import gradio as gr
import torch
from diffusers import DiffusionPipeline
from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
import random
import os
import sys
import time
from threading import Thread

# Set PyTorch MPS fallback for Apple Silicon compatibility
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

# Check for dev mode
DEV_MODE = "--dev" in sys.argv

# Import spaces for HuggingFace deployment
try:
    import spaces
    HF_SPACES = True
    print("🚀 Running on HuggingFace Spaces with ZeroGPU")
except ImportError:
    HF_SPACES = False
    print("🏠 Running locally - spaces module not available")

# MCP is always enabled
print("🔌 MCP protocol enabled - tools available for external access")

MAX_SEED = 2**32 - 1

def load_flux_model():
    dtype = torch.bfloat16
    
    # For HuggingFace Spaces, prioritize CUDA
    if HF_SPACES and torch.cuda.is_available():
        device = "cuda"
    # For local development, prioritize MPS for Apple Silicon
    elif torch.backends.mps.is_available():
        device = "mps"
    elif torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"
    
    print(f"Using device for FLUX: {device}")
    
    pipe = DiffusionPipeline.from_pretrained(
        "black-forest-labs/FLUX.1-schnell", 
        torch_dtype=dtype
    ).to(device)
    
    # Print tokenizer info for debugging
    if hasattr(pipe, 'tokenizer'):
        print(f"FLUX Tokenizer max length: {pipe.tokenizer.model_max_length}")
    if hasattr(pipe, 'tokenizer_2'):
        print(f"FLUX Tokenizer 2 max length: {pipe.tokenizer_2.model_max_length}")
    
    return pipe

def load_gemma_model():
    print("Loading Gemma-3n-E2B-it model...")
    
    model_id = "google/gemma-3n-E2B-it"
    processor = AutoProcessor.from_pretrained(model_id)
    
    if HF_SPACES:
        # Don't load model in main process for ZeroGPU
        print("ZeroGPU mode: Model will be loaded in GPU functions")
        return processor, None
    else:
        model = AutoModelForImageTextToText.from_pretrained(
            model_id, 
            device_map="auto", 
            torch_dtype=torch.bfloat16
        )
        print(f"Using device for Gemma-E2B: {model.device}")
        return processor, model

flux_pipe = load_flux_model()
gemma_processor, gemma_model = load_gemma_model()

# Model loading function for GPU contexts with caching
_cached_gpu_model = None

def _load_gemma_model_gpu():
    """Load model inside GPU context with caching"""
    global _cached_gpu_model
    if _cached_gpu_model is None:
        print("🔄 Loading Gemma model in GPU context...")
        model_id = "google/gemma-3n-E2B-it"
        _cached_gpu_model = AutoModelForImageTextToText.from_pretrained(
            model_id, 
            device_map="auto", 
            torch_dtype=torch.bfloat16
        )
        print("✅ Gemma model loaded and cached")
    else:
        print("♻️ Using cached Gemma model")
    return _cached_gpu_model

# Multilingual support
def get_translations():
    return {
        "en": {
            "title": "🎭 Avatar Generator - Chinese Portrait",
            "subtitle": "Complete at least the first 3 groups to generate your personalized avatar.",
            "portrait_title": "📝 Chinese Portrait (first 3 groups required)",
            "group": "Group",
            "required": "Required",
            "optional": "Optional",
            "if_i_was": "If I was",
            "i_would_be": "I would be",
            "generate_btn": "🎨 Generate Avatar",
            "avatar_title": "🖼️ Generated Avatar",
            "your_avatar": "Your Avatar",
            "information": "Information",
            "error_required": "Error: The first 3 groups of fields are required.",
            "success": "Avatar generated successfully!",
            "prompt_used": "Prompt used:",
            "error_generation": "Error during generation:",
            "footer": "Avatar generated with FLUX.1-schnell",
            "quality_normal": "Normal Quality (4 steps, 512x512)",
            "quality_high": "High Quality (8 steps, 512x512)",
            "quality_label": "Quality:",
            "tab_form": "📝 Form Mode",
            "tab_chat": "💬 Chat Mode",
            "chat_title": "🤖 AI Assistant - Avatar Creator",
            "chat_subtitle": "Let me guide you through creating your Chinese portrait!",
            "thinking": "Thinking...",
            "placeholders": {
                "animal": "an animal...",
                "animal_answer": "a lion...",
                "color": "a color...",
                "color_answer": "red...",
                "object": "an object...",
                "object_answer": "a sword...",
                "feeling": "a feeling...",
                "feeling_answer": "joy...",
                "element": "an element...",
                "element_answer": "fire..."
            }
        },
        "fr": {
            "title": "🎭 Générateur d'Avatar - Portrait Chinois",
            "subtitle": "Complétez au minimum les 3 premiers groupes pour générer votre avatar personnalisé.",
            "portrait_title": "📝 Portrait Chinois (3 premiers groupes obligatoires)",
            "group": "Groupe",
            "required": "Obligatoire",
            "optional": "Optionnel",
            "if_i_was": "Si j'étais",
            "i_would_be": "Je serais",
            "generate_btn": "🎨 Générer l'Avatar",
            "avatar_title": "🖼️ Avatar Généré",
            "your_avatar": "Votre Avatar",
            "information": "Informations",
            "error_required": "Erreur: Les 3 premiers groupes de champs sont obligatoires.",
            "success": "Avatar généré avec succès!",
            "prompt_used": "Prompt utilisé:",
            "error_generation": "Erreur lors de la génération:",
            "footer": "Avatar généré avec FLUX.1-schnell",
            "quality_normal": "Qualité Normale (4 étapes, 512x512)",
            "quality_high": "Haute Qualité (8 étapes, 512x512)",
            "quality_label": "Qualité:",
            "tab_form": "📝 Mode Formulaire",
            "tab_chat": "💬 Mode Chat",
            "chat_title": "🤖 Assistant IA - Créateur d'Avatar",
            "chat_subtitle": "Laissez-moi vous guider pour créer votre portrait chinois!",
            "thinking": "Réflexion...",
            "placeholders": {
                "animal": "un animal...",
                "animal_answer": "un lion...",
                "color": "une couleur...",
                "color_answer": "rouge...",
                "object": "un objet...",
                "object_answer": "une épée...",
                "feeling": "un sentiment...",
                "feeling_answer": "la joie...",
                "element": "un élément...",
                "element_answer": "le feu..."
            }
        }
    }

# Dev mode default values
def get_dev_defaults():
    return {
        "if1": "an animal", "would1": "a majestic wolf",
        "if2": "a color", "would2": "deep purple",
        "if3": "an object", "would3": "an ancient sword",
        "if4": "a feeling", "would4": "fierce determination",
        "if5": "an element", "would5": "lightning"
    }

# Apply ZeroGPU decorator if available
if HF_SPACES:
    @spaces.GPU()
    def generate_avatar(if1: str, would1: str, if2: str, would2: str, if3: str, would3: str, if4: str = "", would4: str = "", if5: str = "", would5: str = "", language: str = "en", quality: str = "normal"):
        """
        Generate a personalized avatar from Chinese portrait elements.
        
        Args:
            if1: First category (e.g., "an animal")
            would1: First answer (e.g., "a majestic wolf")
            if2: Second category (e.g., "a color") 
            would2: Second answer (e.g., "deep purple")
            if3: Third category (e.g., "an object")
            would3: Third answer (e.g., "an ancient sword")
            if4: Fourth category (optional, e.g., "a feeling")
            would4: Fourth answer (optional, e.g., "fierce determination")
            if5: Fifth category (optional, e.g., "an element")
            would5: Fifth answer (optional, e.g., "lightning")
            language: Interface language ("en" or "fr")
            quality: Generation quality ("normal" or "high")
            
        Returns:
            tuple: (generated_image, info_text)
        """
        return _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality)
else:
    def generate_avatar(if1: str, would1: str, if2: str, would2: str, if3: str, would3: str, if4: str = "", would4: str = "", if5: str = "", would5: str = "", language: str = "en", quality: str = "normal"):
        """
        Generate a personalized avatar from Chinese portrait elements.
        
        Args:
            if1: First category (e.g., "an animal")
            would1: First answer (e.g., "a majestic wolf")
            if2: Second category (e.g., "a color") 
            would2: Second answer (e.g., "deep purple")
            if3: Third category (e.g., "an object")
            would3: Third answer (e.g., "an ancient sword")
            if4: Fourth category (optional, e.g., "a feeling")
            would4: Fourth answer (optional, e.g., "fierce determination")
            if5: Fifth category (optional, e.g., "an element")
            would5: Fifth answer (optional, e.g., "lightning")
            language: Interface language ("en" or "fr")
            quality: Generation quality ("normal" or "high")
            
        Returns:
            tuple: (generated_image, info_text)
        """
        return _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality)

@spaces.GPU() if HF_SPACES else lambda x: x
def _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality):
    translations = get_translations()
    t = translations.get(language, translations["en"])
    
    # Validation des champs obligatoires
    if not if1 or not would1 or not if2 or not would2 or not if3 or not would3:
        return None, t["error_required"]
    
    # Construction du prompt portrait chinois amélioré
    portrait_parts = []
    portrait_parts.append(f"If I was {if1}, I would be {would1}")
    portrait_parts.append(f"If I was {if2}, I would be {would2}")
    portrait_parts.append(f"If I was {if3}, I would be {would3}")
    
    if if4 and would4:
        portrait_parts.append(f"If I was {if4}, I would be {would4}")
    if if5 and would5:
        portrait_parts.append(f"If I was {if5}, I would be {would5}")
    
    chinese_portrait = ". ".join(portrait_parts)
    
    # Prompt optimisé pour rester sous 77 tokens CLIP
    elements = [f"{if1}→{would1}", f"{if2}→{would2}", f"{if3}→{would3}"]
    if if4 and would4:
        elements.append(f"{if4}→{would4}")
    if if5 and would5:
        elements.append(f"{if5}→{would5}")
    
    elements_str = ", ".join(elements)
    
    # Prompt concis pour éviter la troncature CLIP
    prompt = f"Artistic character portrait: {elements_str}. High-quality digital art, fantasy style, detailed with dramatic lighting."
    
    try:
        # Configuration selon la qualité
        if quality == "high":
            width, height, steps = 512, 512, 8
        else:
            width, height, steps = 512, 512, 4
            
        # Génération avec seed aléatoire
        seed = random.randint(0, MAX_SEED)
        generator = torch.Generator(device=flux_pipe.device).manual_seed(seed)
        
        image = flux_pipe(
            prompt=prompt,
            width=width,
            height=height,
            num_inference_steps=steps,
            guidance_scale=0.0,
            generator=generator
        ).images[0]
        
        return image, f"{t['success']}\n{t['prompt_used']} {prompt}\nSeed: {seed}\nQuality: {quality} ({steps} steps, {width}x{height})"
    
    except Exception as e:
        return None, f"{t['error_generation']} {str(e)}"

# Separate GPU function for generation only (no generator)
if HF_SPACES:
    @spaces.GPU()
    def gemma_generate_response(message, history, language):
        return _gemma_generate_response_impl(message, history, language)
else:
    def gemma_generate_response(message, history, language):
        return _gemma_generate_response_impl(message, history, language)

# Non-GPU streaming function
def gemma_chat_stream(message, history, language):
    return _gemma_chat_stream_impl(message, history, language)

def _gemma_generate_response_impl(message, history, language):
    """Generate response using GPU - returns complete response"""
    # Load model in GPU context if needed
    model = gemma_model if gemma_model is not None else _load_gemma_model_gpu()
    
    # Prepare messages in the format expected by the processor
    messages = []
    
    # Add history (which already includes the initial system prompt as first user message)
    for user_msg, assistant_msg in history:
        messages.append({"role": "user", "content": [{"type": "text", "text": user_msg}]})
        messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant_msg}]})
    
    # Add current message
    messages.append({"role": "user", "content": [{"type": "text", "text": message}]})
    
    # Apply chat template and tokenize
    inputs = gemma_processor.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt",
    )
    
    # Generate response
    with torch.no_grad():
        # Move to device without dtype conversion to avoid issues
        device_inputs = {k: v.to(model.device) for k, v in inputs.items()}
        
        generate_kwargs = dict(
            device_inputs,
            max_new_tokens=150,
            do_sample=False,
            disable_compile=True,
        )
        
        outputs = model.generate(**generate_kwargs)
        response = gemma_processor.decode(outputs[0][device_inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    
    # Clean response
    response = clean_chat_response(response)
    
    return response

def _gemma_chat_stream_impl(message, history, language):
    """Streaming function that calls GPU function and simulates streaming"""
    # For ZeroGPU, we can't use threading with streaming, so fall back to non-streaming
    if HF_SPACES:
        # Get complete response from GPU function
        response = gemma_generate_response(message, history, language)
        
        # Simulate streaming by yielding progressively
        words = response.split()
        partial_response = ""
        updated_history = history + [[message, ""]]
        
        for i, word in enumerate(words):
            partial_response += word + " "
            updated_history[-1][1] = partial_response.strip()
            yield updated_history.copy()
            if i % 3 == 0:  # Pause tous les 3 mots
                time.sleep(0.1)
    else:
        # Local development - use real streaming
        # Load model in GPU context if needed
        model = gemma_model if gemma_model is not None else _load_gemma_model_gpu()
        
        # Prepare messages in the format expected by the processor
        messages = []
        
        # Add history (which already includes the initial system prompt as first user message)
        for user_msg, assistant_msg in history:
            messages.append({"role": "user", "content": [{"type": "text", "text": user_msg}]})
            messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant_msg}]})
        
        # Add current message
        messages.append({"role": "user", "content": [{"type": "text", "text": message}]})
        
        # Apply chat template and tokenize
        inputs = gemma_processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        )
        
        # Set up streaming generation
        streamer = TextIteratorStreamer(gemma_processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
        
        # Move to device without dtype conversion to avoid issues
        device_inputs = {k: v.to(model.device) for k, v in inputs.items()}
        
        generate_kwargs = dict(
            device_inputs,
            streamer=streamer,
            max_new_tokens=150,
            do_sample=False,
            disable_compile=True,
        )
        
        # Generate text in a separate thread
        t = Thread(target=model.generate, kwargs=generate_kwargs)
        t.start()
        
        # Stream output
        updated_history = history + [[message, ""]]
        output = ""
        try:
            for delta in streamer:
                output += delta
                # Clean the response as it streams
                cleaned_output = clean_chat_response(output)
                updated_history[-1][1] = cleaned_output
                yield updated_history.copy()
        except Exception as e:
            # Fallback to non-streaming if streaming fails
            t.join()  # Wait for thread to complete
            response = gemma_generate_response(message, history, language)
            updated_history[-1][1] = response
            yield updated_history.copy()

def clean_assistant_response(response):
    """
    Nettoie la réponse de l'assistant pour éviter les faux dialogues
    """
    import re
    
    # Enlever les patterns de faux dialogue
    patterns_to_remove = [
        r'User:\s*[^\n]+',  # Enlever "User: ..."
        r'Assistant:\s*[^\n]+',  # Enlever "Assistant: ..."
        r'Human:\s*[^\n]+',  # Enlever "Human: ..."
        r'AI:\s*[^\n]+',    # Enlever "AI: ..."
    ]
    
    cleaned = response.strip()
    
    for pattern in patterns_to_remove:
        cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE)
    
    # Enlever les lignes vides multiples
    cleaned = re.sub(r'\n\s*\n', '\n', cleaned)
    
    # Si la réponse contient encore des patterns de dialogue, couper au premier
    dialogue_patterns = [
        r'(?i)\buser\b.*?:',
        r'(?i)\bassistant\b.*?:',
        r'(?i)\bhuman\b.*?:',
        r'(?i)\bai\b.*?:'
    ]
    
    for pattern in dialogue_patterns:
        match = re.search(pattern, cleaned)
        if match:
            # Couper juste avant le pattern trouvé
            cleaned = cleaned[:match.start()].strip()
            break
    
    # Limiter à 500 caractères max pour éviter les réponses trop longues
    if len(cleaned) > 500:
        # Couper à la dernière phrase complète
        sentences = cleaned[:500].split('.')
        if len(sentences) > 1:
            cleaned = '.'.join(sentences[:-1]) + '.'
        else:
            cleaned = cleaned[:500] + '...'
    
    return cleaned.strip()

def clean_chat_response(response):
    """
    Nettoie la réponse du chat sans limiter la taille autant
    """
    import re
    
    # Enlever les patterns de faux dialogue
    patterns_to_remove = [
        r'User:\s*[^\n]+',  # Enlever "User: ..."
        r'Assistant:\s*[^\n]+',  # Enlever "Assistant: ..."
        r'Human:\s*[^\n]+',  # Enlever "Human: ..."
        r'AI:\s*[^\n]+',    # Enlever "AI: ..."
    ]
    
    cleaned = response.strip()
    
    for pattern in patterns_to_remove:
        cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE)
    
    # Enlever les lignes vides multiples
    cleaned = re.sub(r'\n\s*\n', '\n', cleaned)
    
    # Si la réponse contient encore des patterns de dialogue, couper au premier
    dialogue_patterns = [
        r'(?i)\buser\b.*?:',
        r'(?i)\bassistant\b.*?:',
        r'(?i)\bhuman\b.*?:',
        r'(?i)\bai\b.*?:'
    ]
    
    for pattern in dialogue_patterns:
        match = re.search(pattern, cleaned)
        if match:
            # Couper juste avant le pattern trouvé
            cleaned = cleaned[:match.start()].strip()
            break
    
    # Limiter à 200 caractères max pour le chat (plus généreux que 500 pour l'analyse)
    if len(cleaned) > 200:
        # Couper à la dernière phrase complète
        sentences = cleaned[:200].split('.')
        if len(sentences) > 1:
            cleaned = '.'.join(sentences[:-1]) + '.'
        else:
            cleaned = cleaned[:200] + '...'
    
    return cleaned.strip()

def extract_portrait_from_conversation(history, language="en"):
    """
    Utilise le LLM pour analyser la conversation et synthétiser un prompt d'image dynamique
    """
    # Nettoyer l'historique : enlever le dernier message si c'est une question du modèle sans réponse
    cleaned_history = history.copy()
    if cleaned_history and cleaned_history[-1][1] and not cleaned_history[-1][0]:
        # Si le dernier message a une réponse du modèle mais pas de message utilisateur
        # (dernière entrée est ["", "question_du_modèle"]), on l'enlève
        cleaned_history = cleaned_history[:-1]
    
    # Combiner tout le texte de la conversation nettoyée
    conversation_text = ""
    for user_msg, assistant_msg in cleaned_history:
        if user_msg:  # S'assurer qu'il y a un message utilisateur
            conversation_text += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
    
    # Prompt compact pour synthèse directe
    analysis_prompt = f"""Based on the following conversation, generate a compact character description in the style of a Chinese Portrait, formatted as:

Artistic character portrait: [category1] → [answer1], [category2] → [answer2], ...

Only include clear and relevant answers. Skip any incomplete or vague ones.
Do not repeat the full conversation.
Keep the result short (max ~40 tokens), using simple words.

Conversation: {conversation_text}"""

    try:
        # Load model in GPU context if needed
        model = gemma_model if gemma_model is not None else _load_gemma_model_gpu()
        
        # Prepare messages for the new processor format
        messages = [{"role": "user", "content": [{"type": "text", "text": analysis_prompt}]}]
        
        # Apply chat template and tokenize
        inputs = gemma_processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        )
        
        # Move inputs to device and generate
        with torch.no_grad():
            # Move to device without dtype conversion to avoid issues
            device_inputs = {k: v.to(model.device) for k, v in inputs.items()}
            
            outputs = model.generate(
                **device_inputs,
                max_new_tokens=100,
                do_sample=False,
                disable_compile=True,
            )
            
        response = gemma_processor.decode(outputs[0][device_inputs['input_ids'].shape[1]:], skip_special_tokens=True)

        # Nettoyage léger pour l'analyse (ne pas limiter à 500 caractères)
        response = response.strip()
        # Enlever seulement les patterns de faux dialogue évidents
        import re
        response = re.sub(r'User:\s*[^\n]+', '', response, flags=re.IGNORECASE)
        response = re.sub(r'Assistant:\s*[^\n]+', '', response, flags=re.IGNORECASE)
        response = re.sub(r'Human:\s*[^\n]+', '', response, flags=re.IGNORECASE)
        response = re.sub(r'AI:\s*[^\n]+', '', response, flags=re.IGNORECASE)
        response = re.sub(r'\n\s*\n', '\n', response).strip()
        
        # Extraire le prompt d'image du format attendu
        image_prompt = ""
        
        # Chercher le format "Artistic character portrait:"
        if 'artistic character portrait:' in response.lower():
            # Extraire tout ce qui suit "Artistic character portrait:"
            portrait_index = response.lower().find('artistic character portrait:')
            image_prompt = response[portrait_index:].strip()
        else:
            # Si pas du bon format, prendre la réponse complète et l'ajuster
            image_prompt = response.strip()
            if image_prompt and not image_prompt.lower().startswith('artistic character portrait'):
                image_prompt = f"Artistic character portrait: {image_prompt}"
        
        # S'assurer que le prompt est bien formaté pour FLUX
        if image_prompt:
            # Extraire les éléments pour l'affichage AVANT d'ajouter les éléments artistiques
            elements = []
            if '→' in image_prompt:
                # Parser les éléments au format "category → value"
                parts = image_prompt.split(':')[-1]  # Prendre après ":"
                pairs = parts.split(',')
                for pair in pairs:
                    if '→' in pair and not any(art_word in pair.lower() for art_word in ['high-quality', 'digital art', 'detailed', 'fantasy']):
                        try:
                            category, value = pair.split('→', 1)
                            category = category.strip()
                            value = value.strip().rstrip('.')
                            # Nettoyer la valeur pour enlever les informations d'art
                            value = re.sub(r'\.\s*(high-quality|digital art|fantasy|detailed).*', '', value, flags=re.IGNORECASE).strip()
                            if category and value and not value.startswith('('):
                                elements.append((category, value))
                        except:
                            continue
            
            # Ajouter des éléments artistiques si manquants
            if not any(word in image_prompt.lower() for word in ['detailed', 'high-quality', 'digital art']):
                image_prompt += ". High-quality digital art, fantasy style, detailed illustration"
            
            return image_prompt, elements
        else:
            fallback_prompt = "Artistic character portrait of a unique individual. High-quality digital art, fantasy style, detailed illustration"
            return fallback_prompt, [('style', 'unique individual')]
        
    except Exception as e:
        # Fallback simple en cas d'erreur
        fallback_prompt = "Artistic character portrait of a unique individual. High-quality digital art, fantasy style, detailed illustration with dramatic lighting"
        return fallback_prompt, [('style', 'artistic portrait')]

@spaces.GPU() if HF_SPACES else lambda x: x
def generate_avatar_from_chat(history: list, language: str = "en", quality: str = "normal"):
    """
    Generate avatar from conversation history with AI assistant.
    
    Args:
        history: List of conversation turns [[user_msg, assistant_msg], ...]
        language: Interface language ("en" or "fr")
        quality: Generation quality ("normal" or "high")
        
    Returns:
        tuple: (generated_image, info_text)
    """
    # Extraire le prompt d'image et les éléments de la conversation
    prompt, elements = extract_portrait_from_conversation(history, language)
    
    if not prompt:
        return None, "Could not analyze conversation. Please continue chatting to build your portrait."
    
    try:
        # Configuration selon la qualité
        if quality == "high":
            width, height, steps = 512, 512, 8
        else:
            width, height, steps = 512, 512, 4
            
        # Génération avec seed aléatoire
        seed = random.randint(0, MAX_SEED)
        generator = torch.Generator(device=flux_pipe.device).manual_seed(seed)
        
        image = flux_pipe(
            prompt=prompt,
            width=width,
            height=height,
            num_inference_steps=steps,
            guidance_scale=0.0,
            generator=generator
        ).images[0]
        
        elements_text = "\n".join([f"- {category.title()}: {value}" for category, value in elements])
        
        return image, f"Avatar generated from conversation!\n\nLLM Analysis:\n{elements_text}\n\nPrompt: {prompt}\nSeed: {seed}\nQuality: {quality} ({steps} steps, {width}x{height})"
    
    except Exception as e:
        return None, f"Error during generation: {str(e)}"

def create_form_interface(language="en"):
    translations = get_translations()
    t = translations.get(language, translations["en"])
    dev_defaults = get_dev_defaults() if DEV_MODE else {}
    
    with gr.Column() as form_interface:
        gr.Markdown(f"### {t['portrait_title']}")
        
        # Commutateur de qualité
        quality_radio = gr.Radio(
            choices=["normal", "high"],
            value="normal",
            label=t["quality_label"]
        )
        
        # Groupe 1 (obligatoire)
        gr.Markdown(f"**{t['group']} 1** ⭐ *{t['required']}*")
        with gr.Row():
            if1 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["animal"], 
                           value=dev_defaults.get("if1", ""), scale=1)
            would1 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["animal_answer"], 
                              value=dev_defaults.get("would1", ""), scale=1)
        
        # Groupe 2 (obligatoire)
        gr.Markdown(f"**{t['group']} 2** ⭐ *{t['required']}*")
        with gr.Row():
            if2 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["color"], 
                           value=dev_defaults.get("if2", ""), scale=1)
            would2 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["color_answer"], 
                              value=dev_defaults.get("would2", ""), scale=1)
        
        # Groupe 3 (obligatoire)
        gr.Markdown(f"**{t['group']} 3** ⭐ *{t['required']}*")
        with gr.Row():
            if3 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["object"], 
                           value=dev_defaults.get("if3", ""), scale=1)
            would3 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["object_answer"], 
                              value=dev_defaults.get("would3", ""), scale=1)
        
        # Groupe 4 (optionnel)
        gr.Markdown(f"**{t['group']} 4** ✨ *{t['optional']}*")
        with gr.Row():
            if4 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["feeling"], 
                           value=dev_defaults.get("if4", ""), scale=1)
            would4 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["feeling_answer"], 
                              value=dev_defaults.get("would4", ""), scale=1)
        
        # Groupe 5 (optionnel)
        gr.Markdown(f"**{t['group']} 5** ✨ *{t['optional']}*")
        with gr.Row():
            if5 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["element"], 
                           value=dev_defaults.get("if5", ""), scale=1)
            would5 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["element_answer"], 
                              value=dev_defaults.get("would5", ""), scale=1)
        
        generate_btn = gr.Button(t["generate_btn"], variant="primary", size="lg")
        
        gr.Markdown(f"### {t['avatar_title']}")
        output_image = gr.Image(label=t["your_avatar"], height=400)
        output_text = gr.Textbox(label=t["information"], lines=4, interactive=False)
        
        # Hidden state for language
        lang_state = gr.State(value=language)
        
        generate_btn.click(
            fn=generate_avatar,
            inputs=[if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, lang_state, quality_radio],
            outputs=[output_image, output_text]
        )
        
    return form_interface

def create_chat_interface(language="en"):
    translations = get_translations()
    t = translations.get(language, translations["en"])
    
    with gr.Column() as chat_interface:
        gr.Markdown(f"### {t['chat_title']}")
        gr.Markdown(t["chat_subtitle"])
        
        chatbot = gr.Chatbot(height=400, show_copy_button=True)
        
        # Zone de message avec bouton d'envoi
        with gr.Row():
            msg = gr.Textbox(label="Message", placeholder="Type your response here...", visible=False, scale=4)
            send_btn = gr.Button("📤", visible=False, scale=1, min_width=50)
        
        # Boutons de contrôle - en dessous du chat
        with gr.Row():
            start_btn = gr.Button("🚀 Start New Conversation", variant="primary", scale=1)
            avatar_btn = gr.Button("🎨 Get My Avatar", variant="secondary", scale=1)
            quality_chat = gr.Radio(choices=["normal", "high"], value="normal", label="Quality", scale=1)
        
        # Résultats de génération d'avatar
        avatar_output = gr.Image(label="Generated Avatar", visible=False)
        avatar_info = gr.Textbox(label="Avatar Info", lines=4, interactive=False, visible=False)
        
        # Hidden state for language
        lang_state = gr.State(value=language)
        
        def respond(message: str, history: list, language: str = "en"):
            """
            Process user message and generate streaming AI response for chat interface.
            
            Args:
                message: User's input message
                history: List of previous conversation turns [[user_msg, bot_msg], ...]
                language: Interface language ("en" or "fr")
                
            Yields:
                tuple: ("", updated_history) for streaming chat interface
            """
            
            # Convert history format if needed
            if history is None:
                history = []
            
            # Process streaming response - yield (empty_text, updated_history)
            last_response = None
            for response in gemma_chat_stream(message, history, language):
                last_response = response
                yield "", response
            
            # Ensure we have a final state
            if last_response is not None:
                yield "", last_response
        
        def start_conversation(language):
            """Démarre la conversation avec le prompt système comme premier message utilisateur"""
            # Le prompt système devient le premier message utilisateur
            system_prompt = """You are running a simple "Chinese Portrait" game. Your ONLY job is to ask questions.

STRICT RULES - NEVER BREAK THESE:
1. Ask ONLY: "If you were a [category], what would you be?"
2. After user answers, ask the NEXT question immediately
3. NO comments, NO reactions, NO explanations
4. Use random categories: animal, color, object, emotion, weather, plant, tool, fabric, planet, smell, sound, etc.
5. NEVER repeat a category

EXAMPLE PATTERN:
User: "ready"
You: "If you were an animal, what would you be?"
User: "wolf"
You: "If you were a color, what would you be?"
User: "purple"
You: "If you were a planet, what would you be?"

FORBIDDEN:
- Don't say "interesting", "nice", "cool"
- Don't explain anything
- Don't comment on answers
- Don't ask why or how
- Don't make conversations

JUST ASK THE NEXT QUESTION. Start the game now."""
            
            # Utiliser la fonction respond pour générer la première question
            history = []
            responses = list(gemma_chat_stream(system_prompt, history, language))
            
            if responses:
                # Prendre la dernière réponse générée
                final_history = responses[-1]
                return final_history, gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
            else:
                # Fallback en cas d'erreur
                fallback_message = [[system_prompt, "If you were an animal, what would you be?"]]
                return fallback_message, gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
        
        def show_avatar_interface():
            """Affiche immédiatement l'interface avatar pour montrer que ça calcule"""
            return gr.update(visible=True), gr.update(visible=True, value="Generating your avatar...")
        
        def generate_avatar_from_conversation(history, language, quality):
            if not history:
                return None, "No conversation found. Please start a conversation first."
            
            image, info = generate_avatar_from_chat(history, language, quality)
            return image, info
        
        # Événements
        start_btn.click(
            fn=start_conversation,
            inputs=[lang_state],
            outputs=[chatbot, msg, send_btn, avatar_output, avatar_info]
        )
        
        # Envoi via Enter ou bouton
        msg.submit(
            respond, 
            [msg, chatbot, lang_state], 
            [msg, chatbot], 
            queue=True
        )
        
        send_btn.click(
            respond,
            [msg, chatbot, lang_state],
            [msg, chatbot],
            queue=True
        )
        
        # Affichage immédiat de l'interface puis génération
        avatar_btn.click(
            show_avatar_interface,
            outputs=[avatar_output, avatar_info]
        ).then(
            generate_avatar_from_conversation,
            inputs=[chatbot, lang_state, quality_chat],
            outputs=[avatar_output, avatar_info]
        )
        
        gr.Markdown("*Click 'Start New Conversation' to begin, then 'Get My Avatar' when you've completed your portrait!*")
    
    return chat_interface


def detect_browser_language():
    """Détecte la langue du navigateur via JavaScript injecté"""
    # Par défaut anglais, sera override par le JavaScript
    return "en"

def create_interface(language="en"):
    translations = get_translations()
    t = translations.get(language, translations["en"])
    
    with gr.Blocks(title=t["title"], theme="gstaff/xkcd") as demo:
        gr.Markdown(f"# {t['title']}")
        gr.Markdown(t["subtitle"])
        
        with gr.Tabs():
            with gr.Tab(t["tab_form"]):
                create_form_interface(language)
            
            with gr.Tab(t["tab_chat"]):
                create_chat_interface(language)
        
        gr.Markdown("---")
        gr.Markdown(f"*{t['footer']}*")
        
        return demo


# Create the main web interface with MCP tools integrated
with gr.Blocks(title="🎭 Avatar Generator") as demo:
    gr.Markdown("# 🎭 Avatar Generator - Chinese Portrait")
    gr.Markdown("Generate personalized avatars from Chinese portrait descriptions using FLUX.1-schnell and Gemma-3n-E2B-it")
    
    with gr.Tabs():
        # Main application tabs
        with gr.Tab("📝 Form Mode"):
            create_form_interface("en")
        
        with gr.Tab("💬 Chat Mode"):
            create_chat_interface("en")
        
        
    gr.Markdown("---")
    gr.Markdown("🔌 **MCP Integration**: This app exposes tools via MCP protocol at `/gradio_api/mcp/sse`")
    gr.Markdown("*Avatar generated with FLUX.1-schnell*")

if __name__ == "__main__":
    if DEV_MODE:
        print("🚀 Running in DEV MODE with pre-filled values")
    
    print("🔌 Starting server with MCP support...")
    print("📡 MCP endpoint available at: http://localhost:7860/gradio_api/mcp/sse")
    print("🌐 Web interface available at: http://localhost:7860")
    
    demo.launch(mcp_server=True, show_api=True)