from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # Load Qwen 微调模型用于 emoji 转换 emoji_translator = pipeline( "text-generation", model="JenniferHJF/qwen1.5-emoji-finetuned", tokenizer="JenniferHJF/qwen1.5-emoji-finetuned", max_new_tokens=20, trust_remote_code=True ) # Load zero-shot/offensive-classification model(可替换为 ChatGLM3、DeepSeek 等) offensive_classifier = pipeline( "text-classification", model="s-nlp/roberta-offensive-language-detection" # 示例模型,可换大模型 ) # Unified prediction function def classify_text_with_emoji(raw_text): # Step 1: Convert emojis ➝ Chinese prompt = f"输入:{raw_text}\n输出:" converted = emoji_translator(prompt)[0]['generated_text'] # 拿最后一行当输出结果(避免生成前缀) translated_text = converted.strip().splitlines()[-1] # Step 2: Run classification result = offensive_classifier(translated_text)[0] label = result['label'] score = result['score'] return translated_text, label, score