# -*- coding: utf-8 -*-
"""
鸟类知识科普系统（Qwen3优化版） by [你的名字]
ISOM5240 Group Project
"""


import transformers
import os
import torch
import gradio as gr
from PIL import Image
from transformers import (
    pipeline,
    AutoConfig,
    AutoImageProcessor
)

# ========== 环境配置 ==========
os.environ["TRANSFORMERS_CACHE"] = "./model_cache"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"当前设备: {DEVICE.upper()}")

# ========== 模型初始化 ==========
def init_models():
    """统一模型加载逻辑"""
    try:
        # 1. 鸟类分类模型（显式配置图像处理器）
        image_processor = AutoImageProcessor.from_pretrained(
            "chriamue/bird-species-classifier",
            use_fast=True  # 强制启用快速模式
        )
        classifier = pipeline(
            task="image-classification",
            model="chriamue/bird-species-classifier",
            feature_extractor=image_processor,
            device=DEVICE
        )

        # 2. 文本生成模型（解决revision参数冲突）
        qwen_config = AutoConfig.from_pretrained(
            "Qwen/Qwen-7B-Chat",
            trust_remote_code=True
        )
        text_generator = pipeline(
            task="text-generation",
            model="Qwen/Qwen-7B-Chat",
            config=qwen_config,
            torch_dtype=torch.bfloat16,
            device_map="auto",
            model_kwargs={
                "cache_dir": "./model_cache",
                "revision": "main"  # 唯一指定版本
            }
        )

        # 3. 语音合成模型
        tts = pipeline(
            task="text-to-speech",
            model="facebook/mms-tts-eng",
            device=DEVICE
        )

        return classifier, text_generator, tts
        
    except Exception as e:
        raise RuntimeError(f"模型加载失败: {str(e)}")

# ========== 核心逻辑 ==========
def generate_child_friendly_text(bird_name):
    """生成儿童友好型描述（优化prompt工程）"""
    prompt = f"""用6-12岁儿童能理解的语言描述{bird_name}：
    ★ 使用比喻（例如：羽毛像彩虹糖纸）
    ★ 包含趣味冷知识（例如：每天吃自身体重30%的食物）
    ★ 语句简短（每句不超过15个英文单词）
    ★ 避免专业术语"""
    
    try:
        response = text_generator(
            prompt,
            max_new_tokens=120,
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )
        return response[0]['generated_text'].split("描述")[-1].strip()
    except Exception as e:
        return f"文本生成失败: {str(e)}"

def process_image(img):
    """端到端处理流程"""
    try:
        # 1. 图像分类
        classification = classifier(img)
        bird_name = classification[0]['label']
        
        # 2. 生成描述
        description = generate_child_friendly_text(bird_name)
        
        # 3. 语音合成
        speech = tts(description, forward_params={"speaker_id": 6})
        
        return bird_name, description, speech["audio"]
    
    except Exception as e:
        return "错误", f"处理失败: {str(e)}", None

# ========== 初始化验证 ==========
if __name__ == "__main__":
    # 预加载模型（验证可用性）
    classifier, text_generator, tts = init_models()
    
    # 构建Gradio界面
    with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 800px}") as demo:
        gr.Markdown("# 🐦 鸟类知识小课堂（稳定版）")
        
        with gr.Row():
            img_input = gr.Image(type="pil", label="上传鸟类图片", height=300)
            audio_output = gr.Audio(label="语音讲解", autoplay=True)
        
        with gr.Column():
            name_output = gr.Textbox(label="识别结果")
            desc_output = gr.Textbox(label="趣味知识", lines=4)
        
        gr.Examples(
            examples=["test_images/eagle.jpg", "test_images/penguin.jpg"],
            inputs=img_input,
            label="示例图片"
        )
        
        img_input.change(
            process_image,
            inputs=img_input,
            outputs=[name_output, desc_output, audio_output]
        )
    
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )