from collections import defaultdict
import json
from colorama import Fore, Style, init

init(autoreset=True)

# with open('rules/ALL_SYLLABLES.txt', 'r', encoding='utf-8') as f:
#     ALL_SYLLABLES = f.read().strip().split()
#     ALL_SYLLABLES = [syllable for syllable in ALL_SYLLABLES if syllable]

YUNMU_LIST = ['a', 'o', 'e', 'i', 'u', 'v', 
            'ai', 'ei', 'ao', 'ou', 'ia', 'ie', 'iao', 'iu', 'ua', 'uo', 'uai', 'ui', 've',
            'an', 'en', 'in', 'un', 'vn', 'ian', 'uan', 'vuan',
            'ang', 'eng', 'ing', 'ong',
            'zhi', 'chi', 'shi', 'ri', 'zi', 'ci', 'si',
            'yi', 'wu', 'yu', 'yin', 'yun', 'ye', 'yue', 'yuan','ying']

def get_yunmu(syllable):
    syllable = syllable.lower().replace('ü', 'v')
    yunmu_list = sorted(YUNMU_LIST, key=lambda x: -len(x))
    
    if syllable in yunmu_list:
        return syllable

    shengmus = [
        'zh', 'ch', 'sh', 'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h',
        'j', 'q', 'x', 'z', 'c', 's', 'r', 'y', 'w'
    ]
    for shengmu in sorted(shengmus, key=lambda x: -len(x)):
        if syllable.startswith(shengmu):
            possible_yunmu = syllable[len(shengmu):]

            for yunmu in yunmu_list:
                if possible_yunmu == yunmu:
                    return yunmu

            if shengmu in ['j', 'q', 'x', 'y'] and possible_yunmu.startswith('u'):

                possible_yunmu_v = 'v' + possible_yunmu[1:]
                for yunmu in yunmu_list:
                    if possible_yunmu_v == yunmu:
                        return yunmu

            if shengmu == 'y':
                y_map = {
                    'u': 'yu',
                    'ue': 'yue',
                    'uan': 'yuan',
                    'un': 'yun',
                    'i': 'yi',
                    'in': 'yin',
                    'ing': 'ying',
                    'e': 'ye'
                }
                if possible_yunmu in y_map:
                    return y_map[possible_yunmu]

            if shengmu == 'w' and possible_yunmu == 'u':
                return 'wu'

            if shengmu == 'y' and possible_yunmu == 'i':
                return 'yi'

            if shengmu == 'y' and possible_yunmu == 'v':
                return 'yu'

            if possible_yunmu.startswith('v'):
                for yunmu in yunmu_list:
                    if possible_yunmu == yunmu:
                        return yunmu
    for yunmu in yunmu_list:
        if syllable == yunmu:
            return yunmu
    for yunmu in yunmu_list:
        if syllable.endswith(yunmu):
            return yunmu
    return None


def print_results(rhymer, text, target_rhyme, top_results=8, beam_width=20, num_candidates=5000):
    out = rhymer.get_rhymes(text, target_rhyme, beam_width=beam_width, num_candidates=num_candidates)
    mask_count = text.count("[M]")
    context = text.split('[M]')[0]
    
    print(f"======= 韵脚: |{target_rhyme}|")
    for i, (seq, log_prob) in enumerate(out[:top_results]): 
        rhymes = seq[-mask_count:].split()
        colored_rhymes = [Fore.RED + part + Style.RESET_ALL if idx < mask_count else part for idx, part in enumerate(rhymes)]
        colored_rhymes = ''.join(colored_rhymes)  # Join the parts back together

        print(f"{i+1}. {context}{colored_rhymes} (score: {log_prob:.3f})")
    print("=" + "=" * 40)


if __name__ == "__main__":
    syllable_to_yunmu = defaultdict(str)
    for syllable in ALL_SYLLABLES:
        yunmu = get_yunmu(syllable)
        if yunmu:
            syllable_to_yunmu[syllable] = yunmu

    with open('rules/syllable_to_yunmu.json', 'w', encoding='utf-8') as f:
        json.dump(syllable_to_yunmu, f, ensure_ascii=False, indent=4)