Spaces:

patruff
/

word-phone

Configuration error

App Files Files Community

patruff commited on Jan 20

Commit

b1c9207

verified ·

1 Parent(s): 24809f4

Upload tool

Browse files

Files changed (1) hide show

tool.py +170 -17

tool.py CHANGED Viewed

@@ -1,38 +1,191 @@
 from smolagents.tools import Tool
 import pronouncing
 import string
-import json
 class WordPhoneTool(Tool):
     name = "word_phonetic_analyzer"
-    description = "Analyzes the pronunciation of a word using the CMU dictionary to get its phonemes, syllable count and stress pattern"
-    inputs = {'word': {'type': 'string', 'description': 'The word to analyze for pronunciation patterns'}}
     output_type = "string"
-    def forward(self, word: str) -> str:
         import pronouncing
-        import string
         import json
-        word = word.lower().strip(string.punctuation)
-        phones = pronouncing.phones_for_word(word)
         if not phones:
             result = {
-                'word': word,
                 'found': False,
                 'error': 'Word not found in dictionary'
             }
-        else:
-            primary_phones = phones[0]
-            result = {
-                'word': word,
-                'found': True,
-                'syllable_count': pronouncing.syllable_count(primary_phones),
-                'phones': primary_phones.split(),
-                'stresses': pronouncing.stresses(primary_phones)
-            }
         return json.dumps(result, indent=2)

 from smolagents.tools import Tool
+import json
 import pronouncing
 import string
+import difflib
 class WordPhoneTool(Tool):
     name = "word_phonetic_analyzer"
+    description = """Analyzes word pronunciation using CMU dictionary to get phonemes, syllables, and stress patterns.
+    Can also compare two words for phonetic similarity."""
+    inputs = {'word': {'type': 'string', 'description': 'Primary word to analyze for pronunciation patterns'}, 'compare_to': {'type': 'string', 'description': 'Optional word to compare against for similarity scoring', 'nullable': True}}
     output_type = "string"
+    VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
+    def _get_vowel_groups(self):
+        groups = []
+        group_strs = self.VOWEL_REF.split("|")
+        for group_str in group_strs:
+            groups.append(group_str.split(","))
+        return groups
+    def _get_last_syllable(self, phones):
+        last_vowel_idx = -1
+        last_vowel = None
+        vowel_groups = self._get_vowel_groups()
+        for i in range(len(phones)):
+            phone = phones[i]
+            base_phone = ""
+            for j in range(len(phone)):
+                if phone[j] not in "012":
+                    base_phone += phone[j]
+            for group in vowel_groups:
+                if base_phone in group:
+                    last_vowel_idx = i
+                    last_vowel = base_phone
+                    break
+        if last_vowel_idx == -1:
+            return None, []
+        remaining = []
+        for i in range(last_vowel_idx + 1, len(phones)):
+            remaining.append(phones[i])
+        return last_vowel, remaining
+    def _strip_stress(self, phones):
+        result = []
+        for phone in phones:
+            stripped = ""
+            for char in phone:
+                if char not in "012":
+                    stripped += char
+            result.append(stripped)
+        return result
+    def _vowels_match(self, v1, v2):
+        v1_stripped = ""
+        v2_stripped = ""
+        for char in v1:
+            if char not in "012":
+                v1_stripped += char
+        for char in v2:
+            if char not in "012":
+                v2_stripped += char
+        if v1_stripped == v2_stripped:
+            return True
+        vowel_groups = self._get_vowel_groups()
+        for group in vowel_groups:
+            if v1_stripped in group and v2_stripped in group:
+                return True
+        return False
+    def _calculate_similarity(self, word1, phones1, word2, phones2):
         import pronouncing
+        from difflib import SequenceMatcher
+        phone_list1 = phones1.split()
+        phone_list2 = phones2.split()
+        result1 = self._get_last_syllable(phone_list1)
+        result2 = self._get_last_syllable(phone_list2)
+        last_vowel1 = result1[0]
+        word1_end = result1[1]
+        last_vowel2 = result2[0]
+        word2_end = result2[1]
+        rhyme_score = 0.0
+        syllable_score = 0.0
+        string_similarity = 0.0
+        if last_vowel1 and last_vowel2:
+            if self._vowels_match(last_vowel1, last_vowel2):
+                word1_end_clean = self._strip_stress(word1_end)
+                word2_end_clean = self._strip_stress(word2_end)
+                if word1_end_clean == word2_end_clean:
+                    rhyme_score = 1.0
+                    if len(word1) == len(word2):
+                        if word1[1:] == word2[1:]:
+                            rhyme_score = 1.2
+                else:
+                    rhyme_score = 0.6
+        syl1 = pronouncing.syllable_count(phones1)
+        syl2 = pronouncing.syllable_count(phones2)
+        if syl1 == syl2:
+            syllable_score = 1.0
+        matcher = SequenceMatcher(None)
+        if len(word1) > 1 and len(word2) > 1:
+            matcher.set_seqs(word1[1:], word2[1:])
+            string_similarity = matcher.ratio()
+        else:
+            matcher.set_seqs(word1, word2)
+            string_similarity = matcher.ratio()
+        total_similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
+        return {
+            "similarity": round(total_similarity, 3),
+            "rhyme_score": round(rhyme_score, 3),
+            "syllable_match": syllable_score == 1.0,
+            "string_similarity": round(string_similarity, 3)
+        }
+    def forward(self, word, compare_to=None):
         import json
+        import string
+        import pronouncing
+        word_clean = word.lower()
+        word_clean = word_clean.strip(string.punctuation)
+        phones = pronouncing.phones_for_word(word_clean)
         if not phones:
             result = {
+                'word': word_clean,
                 'found': False,
                 'error': 'Word not found in dictionary'
             }
+            return json.dumps(result, indent=2)
+        primary_phones = phones[0]
+        result = {
+            'word': word_clean,
+            'found': True,
+            'syllable_count': pronouncing.syllable_count(primary_phones),
+            'phones': primary_phones.split(),
+            'stresses': pronouncing.stresses(primary_phones)
+        }
+        if compare_to:
+            compare_clean = compare_to.lower()
+            compare_clean = compare_clean.strip(string.punctuation)
+            compare_phones = pronouncing.phones_for_word(compare_clean)
+            if not compare_phones:
+                result['comparison'] = {
+                    'error': f'Comparison word "{compare_clean}" not found in dictionary'
+                }
+            else:
+                compare_primary = compare_phones[0]
+                result['comparison'] = {
+                    'word': compare_clean,
+                    'syllable_count': pronouncing.syllable_count(compare_primary),
+                    'phones': compare_primary.split(),
+                    'stresses': pronouncing.stresses(compare_primary)
+                }
+                similarity_result = self._calculate_similarity(
+                    word_clean, primary_phones,
+                    compare_clean, compare_primary
+                )
+                result['similarity'] = similarity_result
         return json.dumps(result, indent=2)