Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Update backend.py
Browse filesCorrected a few typos and completed a few missing rules.
This is a work in progress.
- backend.py +11 -10
    	
        backend.py
    CHANGED
    
    | @@ -33,19 +33,20 @@ class KabyleASR: | |
| 33 | 
             
                        str: The post-processed text with correct annexation dashes.
         | 
| 34 | 
             
                    """
         | 
| 35 | 
             
                    # Dictionaries for a set of particles based on the rules document v4, with corrections.
         | 
| 36 | 
            -
                     | 
| 37 | 
            -
                     | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
|  | |
| 40 | 
             
                    # Split StPa into a main group and a special group based on user feedback
         | 
| 41 | 
             
                    StPaSp = {'i', 'am', 'at', 's', 'neɣ', 'aɣ'}
         | 
| 42 | 
             
                    StPa = {'ak', 'as', 'aneɣ', 'anteɣ', 'awen', 'awent', 'asen', 'asent',
         | 
| 43 | 
            -
                            'k', 'm', ' | 
| 44 | 
            -
                            ' | 
| 45 | 
            -
                    DePa = {'a', 'agi', 'nni', 'ihin', ' | 
| 46 | 
             
                    DiPa = {'id', 'in'}
         | 
| 47 | 
            -
                    FuPa = {'ad', 'ara'}
         | 
| 48 | 
            -
                    DiObPa = {'yi', 'k', 'kem', 't', 'tt', ' | 
| 49 | 
             
                              'iyi', 'ik', 'ikem', 'it', 'itt', 'iken', 'ikent', 'iten', 'itent'}
         | 
| 50 | 
             
                    InObPa = {'yi', 'yak', 'yam', 'yas', 'yaɣ', 'yawen', 'yawent', 'yasen', 'yasent'}
         | 
| 51 | 
             
                    # Combined set for general lookup, including both StPa groups
         | 
| @@ -53,7 +54,7 @@ class KabyleASR: | |
| 53 | 
             
                    # The set of particles that can be annexed according to Rule 9, now correctly excluding StPaSp
         | 
| 54 | 
             
                    rule_9_particles = DiObPa.union(InObPa).union(DiPa).union(StPa)
         | 
| 55 | 
             
                    # The full set of state particles for other rules (like Rule 5 and 11)
         | 
| 56 | 
            -
                    full_stpa_set = StPa.union(StPaSp)
         | 
| 57 | 
             
                    # Particles that can be part of the chain after FuPa (Rule 11)
         | 
| 58 | 
             
                    rule_11_particles = DiObPa.union(DiPa).union(full_stpa_set)
         | 
| 59 | 
             
                    # First, tokenize the text by splitting on spaces and existing dashes
         | 
|  | |
| 33 | 
             
                        str: The post-processed text with correct annexation dashes.
         | 
| 34 | 
             
                    """
         | 
| 35 | 
             
                    # Dictionaries for a set of particles based on the rules document v4, with corrections.
         | 
| 36 | 
            +
                    # Based on kabyle_asr_optimized.py
         | 
| 37 | 
            +
                    CoPa = {'d', 'n', 's'}
         | 
| 38 | 
            +
                    PoPro = {'inu', 'inem', 'ines', 'nneɣ', 'nteɣ', 'nwen', 'nwent', 'nsen', 'nsent',
         | 
| 39 | 
            +
                             'iw', 'ik', 'im', 'is', 'w', 'k', 'm', 'tneɣ', 'tenteɣ', 'twen', 'twent', 'tsen', 'tsent'}
         | 
| 40 | 
            +
                    SpWo = {'deg', 'gar', 'ɣer', 'ɣur', 'fell', 'ɣef', 'ddaw', 'nnig', 'yid', 'yes', 'yis', 'aql', 'sɣur', 'sennig', 'deffir', 'sdat'}
         | 
| 41 | 
             
                    # Split StPa into a main group and a special group based on user feedback
         | 
| 42 | 
             
                    StPaSp = {'i', 'am', 'at', 's', 'neɣ', 'aɣ'}
         | 
| 43 | 
             
                    StPa = {'ak', 'as', 'aneɣ', 'anteɣ', 'awen', 'awent', 'asen', 'asent',
         | 
| 44 | 
            +
                            'k', 'm', 'nteɣ', 'wen', 'went', 'sen', 'sent', 'atneɣ', 'atenteɣ',
         | 
| 45 | 
            +
                            'atwen', 'atwent', 'atsen', 'atsent'}
         | 
| 46 | 
            +
                    DePa = {'a', 'agi', 'nni', 'ihin', 'nniḍen'}
         | 
| 47 | 
             
                    DiPa = {'id', 'in'}
         | 
| 48 | 
            +
                    FuPa = {'ad', 'ara','ur'}
         | 
| 49 | 
            +
                    DiObPa = {'yi', 'k', 'kem', 't', 'tt', 'aɣ', 'ken', 'kent', 'ten', 'tent',
         | 
| 50 | 
             
                              'iyi', 'ik', 'ikem', 'it', 'itt', 'iken', 'ikent', 'iten', 'itent'}
         | 
| 51 | 
             
                    InObPa = {'yi', 'yak', 'yam', 'yas', 'yaɣ', 'yawen', 'yawent', 'yasen', 'yasent'}
         | 
| 52 | 
             
                    # Combined set for general lookup, including both StPa groups
         | 
|  | |
| 54 | 
             
                    # The set of particles that can be annexed according to Rule 9, now correctly excluding StPaSp
         | 
| 55 | 
             
                    rule_9_particles = DiObPa.union(InObPa).union(DiPa).union(StPa)
         | 
| 56 | 
             
                    # The full set of state particles for other rules (like Rule 5 and 11)
         | 
| 57 | 
            +
                    full_stpa_set = StPa.union(StPaSp).union(DiObPa) # Added DiObPa for Rule 5
         | 
| 58 | 
             
                    # Particles that can be part of the chain after FuPa (Rule 11)
         | 
| 59 | 
             
                    rule_11_particles = DiObPa.union(DiPa).union(full_stpa_set)
         | 
| 60 | 
             
                    # First, tokenize the text by splitting on spaces and existing dashes
         |