from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# Load Qwen 微调模型用于 emoji 转换
emoji_translator = pipeline(
    "text-generation", 
    model="JenniferHJF/qwen1.5-emoji-finetuned", 
    tokenizer="JenniferHJF/qwen1.5-emoji-finetuned",
    max_new_tokens=20,
    trust_remote_code=True
)

# Load zero-shot/offensive-classification model（可替换为 ChatGLM3、DeepSeek 等）
offensive_classifier = pipeline(
    "text-classification",
    model="s-nlp/roberta-offensive-language-detection"  # 示例模型，可换大模型
)

# Unified prediction function
def classify_text_with_emoji(raw_text):
    # Step 1: Convert emojis ➝ Chinese
    prompt = f"输入：{raw_text}\n输出："
    converted = emoji_translator(prompt)[0]['generated_text']
    # 拿最后一行当输出结果（避免生成前缀）
    translated_text = converted.strip().splitlines()[-1]
    
    # Step 2: Run classification
    result = offensive_classifier(translated_text)[0]
    label = result['label']
    score = result['score']

    return translated_text, label, score