from collections import defaultdict import json from colorama import Fore, Style, init init(autoreset=True) # with open('rules/ALL_SYLLABLES.txt', 'r', encoding='utf-8') as f: # ALL_SYLLABLES = f.read().strip().split() # ALL_SYLLABLES = [syllable for syllable in ALL_SYLLABLES if syllable] YUNMU_LIST = ['a', 'o', 'e', 'i', 'u', 'v', 'ai', 'ei', 'ao', 'ou', 'ia', 'ie', 'iao', 'iu', 'ua', 'uo', 'uai', 'ui', 've', 'an', 'en', 'in', 'un', 'vn', 'ian', 'uan', 'vuan', 'ang', 'eng', 'ing', 'ong', 'zhi', 'chi', 'shi', 'ri', 'zi', 'ci', 'si', 'yi', 'wu', 'yu', 'yin', 'yun', 'ye', 'yue', 'yuan','ying'] def get_yunmu(syllable): syllable = syllable.lower().replace('ü', 'v') yunmu_list = sorted(YUNMU_LIST, key=lambda x: -len(x)) if syllable in yunmu_list: return syllable shengmus = [ 'zh', 'ch', 'sh', 'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'j', 'q', 'x', 'z', 'c', 's', 'r', 'y', 'w' ] for shengmu in sorted(shengmus, key=lambda x: -len(x)): if syllable.startswith(shengmu): possible_yunmu = syllable[len(shengmu):] for yunmu in yunmu_list: if possible_yunmu == yunmu: return yunmu if shengmu in ['j', 'q', 'x', 'y'] and possible_yunmu.startswith('u'): possible_yunmu_v = 'v' + possible_yunmu[1:] for yunmu in yunmu_list: if possible_yunmu_v == yunmu: return yunmu if shengmu == 'y': y_map = { 'u': 'yu', 'ue': 'yue', 'uan': 'yuan', 'un': 'yun', 'i': 'yi', 'in': 'yin', 'ing': 'ying', 'e': 'ye' } if possible_yunmu in y_map: return y_map[possible_yunmu] if shengmu == 'w' and possible_yunmu == 'u': return 'wu' if shengmu == 'y' and possible_yunmu == 'i': return 'yi' if shengmu == 'y' and possible_yunmu == 'v': return 'yu' if possible_yunmu.startswith('v'): for yunmu in yunmu_list: if possible_yunmu == yunmu: return yunmu for yunmu in yunmu_list: if syllable == yunmu: return yunmu for yunmu in yunmu_list: if syllable.endswith(yunmu): return yunmu return None def print_results(rhymer, text, target_rhyme, top_results=8, beam_width=20, num_candidates=5000): out = rhymer.get_rhymes(text, target_rhyme, beam_width=beam_width, num_candidates=num_candidates) mask_count = text.count("[M]") context = text.split('[M]')[0] print(f"======= 韵脚: |{target_rhyme}|") for i, (seq, log_prob) in enumerate(out[:top_results]): rhymes = seq[-mask_count:].split() colored_rhymes = [Fore.RED + part + Style.RESET_ALL if idx < mask_count else part for idx, part in enumerate(rhymes)] colored_rhymes = ''.join(colored_rhymes) # Join the parts back together print(f"{i+1}. {context}{colored_rhymes} (score: {log_prob:.3f})") print("=" + "=" * 40) if __name__ == "__main__": syllable_to_yunmu = defaultdict(str) for syllable in ALL_SYLLABLES: yunmu = get_yunmu(syllable) if yunmu: syllable_to_yunmu[syllable] = yunmu with open('rules/syllable_to_yunmu.json', 'w', encoding='utf-8') as f: json.dump(syllable_to_yunmu, f, ensure_ascii=False, indent=4)