Spaces:

VincentGOURBIN
/

IceBreaker-Avator-Generator

Running on Zero

App Files Files Community

IceBreaker-Avator-Generator / app.py

VincentGOURBIN

Upload app.py with huggingface_hub

17f4940 verified about 10 hours ago

raw

history blame contribute delete

39.7 kB

	import gradio as gr
	import torch
	from diffusers import DiffusionPipeline
	from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
	import random
	import os
	import sys
	import time
	from threading import Thread

	# Set PyTorch MPS fallback for Apple Silicon compatibility
	os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

	# Check for dev mode
	DEV_MODE = "--dev" in sys.argv

	# Import spaces for HuggingFace deployment
	try:
	import spaces
	HF_SPACES = True
	print("🚀 Running on HuggingFace Spaces with ZeroGPU")
	except ImportError:
	HF_SPACES = False
	print("🏠 Running locally - spaces module not available")

	# MCP is always enabled
	print("🔌 MCP protocol enabled - tools available for external access")

	MAX_SEED = 2**32 - 1

	def load_flux_model():
	dtype = torch.bfloat16

	# For HuggingFace Spaces, prioritize CUDA
	if HF_SPACES and torch.cuda.is_available():
	device = "cuda"
	# For local development, prioritize MPS for Apple Silicon
	elif torch.backends.mps.is_available():
	device = "mps"
	elif torch.cuda.is_available():
	device = "cuda"
	else:
	device = "cpu"

	print(f"Using device for FLUX: {device}")

	pipe = DiffusionPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-schnell",
	torch_dtype=dtype
	).to(device)

	# Print tokenizer info for debugging
	if hasattr(pipe, 'tokenizer'):
	print(f"FLUX Tokenizer max length: {pipe.tokenizer.model_max_length}")
	if hasattr(pipe, 'tokenizer_2'):
	print(f"FLUX Tokenizer 2 max length: {pipe.tokenizer_2.model_max_length}")

	return pipe

	def load_gemma_model():
	print("Loading Gemma-3n-E2B-it model...")

	model_id = "google/gemma-3n-E2B-it"
	processor = AutoProcessor.from_pretrained(model_id)

	if HF_SPACES:
	# Don't load model in main process for ZeroGPU
	print("ZeroGPU mode: Model will be loaded in GPU functions")
	return processor, None
	else:
	model = AutoModelForImageTextToText.from_pretrained(
	model_id,
	device_map="auto",
	torch_dtype=torch.bfloat16
	)
	print(f"Using device for Gemma-E2B: {model.device}")
	return processor, model

	flux_pipe = load_flux_model()
	gemma_processor, gemma_model = load_gemma_model()

	# Model loading function for GPU contexts with caching
	_cached_gpu_model = None

	def _load_gemma_model_gpu():
	"""Load model inside GPU context with caching"""
	global _cached_gpu_model
	if _cached_gpu_model is None:
	print("🔄 Loading Gemma model in GPU context...")
	model_id = "google/gemma-3n-E2B-it"
	_cached_gpu_model = AutoModelForImageTextToText.from_pretrained(
	model_id,
	device_map="auto",
	torch_dtype=torch.bfloat16
	)
	print("✅ Gemma model loaded and cached")
	else:
	print("♻️ Using cached Gemma model")
	return _cached_gpu_model

	# Multilingual support
	def get_translations():
	return {
	"en": {
	"title": "🎭 Avatar Generator - Chinese Portrait",
	"subtitle": "Complete at least the first 3 groups to generate your personalized avatar.",
	"portrait_title": "📝 Chinese Portrait (first 3 groups required)",
	"group": "Group",
	"required": "Required",
	"optional": "Optional",
	"if_i_was": "If I was",
	"i_would_be": "I would be",
	"generate_btn": "🎨 Generate Avatar",
	"avatar_title": "🖼️ Generated Avatar",
	"your_avatar": "Your Avatar",
	"information": "Information",
	"error_required": "Error: The first 3 groups of fields are required.",
	"success": "Avatar generated successfully!",
	"prompt_used": "Prompt used:",
	"error_generation": "Error during generation:",
	"footer": "Avatar generated with FLUX.1-schnell",
	"quality_normal": "Normal Quality (4 steps, 512x512)",
	"quality_high": "High Quality (8 steps, 512x512)",
	"quality_label": "Quality:",
	"tab_form": "📝 Form Mode",
	"tab_chat": "💬 Chat Mode",
	"chat_title": "🤖 AI Assistant - Avatar Creator",
	"chat_subtitle": "Let me guide you through creating your Chinese portrait!",
	"thinking": "Thinking...",
	"placeholders": {
	"animal": "an animal...",
	"animal_answer": "a lion...",
	"color": "a color...",
	"color_answer": "red...",
	"object": "an object...",
	"object_answer": "a sword...",
	"feeling": "a feeling...",
	"feeling_answer": "joy...",
	"element": "an element...",
	"element_answer": "fire..."
	}
	},
	"fr": {
	"title": "🎭 Générateur d'Avatar - Portrait Chinois",
	"subtitle": "Complétez au minimum les 3 premiers groupes pour générer votre avatar personnalisé.",
	"portrait_title": "📝 Portrait Chinois (3 premiers groupes obligatoires)",
	"group": "Groupe",
	"required": "Obligatoire",
	"optional": "Optionnel",
	"if_i_was": "Si j'étais",
	"i_would_be": "Je serais",
	"generate_btn": "🎨 Générer l'Avatar",
	"avatar_title": "🖼️ Avatar Généré",
	"your_avatar": "Votre Avatar",
	"information": "Informations",
	"error_required": "Erreur: Les 3 premiers groupes de champs sont obligatoires.",
	"success": "Avatar généré avec succès!",
	"prompt_used": "Prompt utilisé:",
	"error_generation": "Erreur lors de la génération:",
	"footer": "Avatar généré avec FLUX.1-schnell",
	"quality_normal": "Qualité Normale (4 étapes, 512x512)",
	"quality_high": "Haute Qualité (8 étapes, 512x512)",
	"quality_label": "Qualité:",
	"tab_form": "📝 Mode Formulaire",
	"tab_chat": "💬 Mode Chat",
	"chat_title": "🤖 Assistant IA - Créateur d'Avatar",
	"chat_subtitle": "Laissez-moi vous guider pour créer votre portrait chinois!",
	"thinking": "Réflexion...",
	"placeholders": {
	"animal": "un animal...",
	"animal_answer": "un lion...",
	"color": "une couleur...",
	"color_answer": "rouge...",
	"object": "un objet...",
	"object_answer": "une épée...",
	"feeling": "un sentiment...",
	"feeling_answer": "la joie...",
	"element": "un élément...",
	"element_answer": "le feu..."
	}
	}
	}

	# Dev mode default values
	def get_dev_defaults():
	return {
	"if1": "an animal", "would1": "a majestic wolf",
	"if2": "a color", "would2": "deep purple",
	"if3": "an object", "would3": "an ancient sword",
	"if4": "a feeling", "would4": "fierce determination",
	"if5": "an element", "would5": "lightning"
	}

	# Apply ZeroGPU decorator if available
	if HF_SPACES:
	@spaces.GPU()
	def generate_avatar(if1: str, would1: str, if2: str, would2: str, if3: str, would3: str, if4: str = "", would4: str = "", if5: str = "", would5: str = "", language: str = "en", quality: str = "normal"):
	"""
	Generate a personalized avatar from Chinese portrait elements.

	Args:
	if1: First category (e.g., "an animal")
	would1: First answer (e.g., "a majestic wolf")
	if2: Second category (e.g., "a color")
	would2: Second answer (e.g., "deep purple")
	if3: Third category (e.g., "an object")
	would3: Third answer (e.g., "an ancient sword")
	if4: Fourth category (optional, e.g., "a feeling")
	would4: Fourth answer (optional, e.g., "fierce determination")
	if5: Fifth category (optional, e.g., "an element")
	would5: Fifth answer (optional, e.g., "lightning")
	language: Interface language ("en" or "fr")
	quality: Generation quality ("normal" or "high")

	Returns:
	tuple: (generated_image, info_text)
	"""
	return _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality)
	else:
	def generate_avatar(if1: str, would1: str, if2: str, would2: str, if3: str, would3: str, if4: str = "", would4: str = "", if5: str = "", would5: str = "", language: str = "en", quality: str = "normal"):
	"""
	Generate a personalized avatar from Chinese portrait elements.

	Args:
	if1: First category (e.g., "an animal")
	would1: First answer (e.g., "a majestic wolf")
	if2: Second category (e.g., "a color")
	would2: Second answer (e.g., "deep purple")
	if3: Third category (e.g., "an object")
	would3: Third answer (e.g., "an ancient sword")
	if4: Fourth category (optional, e.g., "a feeling")
	would4: Fourth answer (optional, e.g., "fierce determination")
	if5: Fifth category (optional, e.g., "an element")
	would5: Fifth answer (optional, e.g., "lightning")
	language: Interface language ("en" or "fr")
	quality: Generation quality ("normal" or "high")

	Returns:
	tuple: (generated_image, info_text)
	"""
	return _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality)

	@spaces.GPU() if HF_SPACES else lambda x: x
	def _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality):
	translations = get_translations()
	t = translations.get(language, translations["en"])

	# Validation des champs obligatoires
	if not if1 or not would1 or not if2 or not would2 or not if3 or not would3:
	return None, t["error_required"]

	# Construction du prompt portrait chinois amélioré
	portrait_parts = []
	portrait_parts.append(f"If I was {if1}, I would be {would1}")
	portrait_parts.append(f"If I was {if2}, I would be {would2}")
	portrait_parts.append(f"If I was {if3}, I would be {would3}")

	if if4 and would4:
	portrait_parts.append(f"If I was {if4}, I would be {would4}")
	if if5 and would5:
	portrait_parts.append(f"If I was {if5}, I would be {would5}")

	chinese_portrait = ". ".join(portrait_parts)

	# Prompt optimisé pour rester sous 77 tokens CLIP
	elements = [f"{if1}→{would1}", f"{if2}→{would2}", f"{if3}→{would3}"]
	if if4 and would4:
	elements.append(f"{if4}→{would4}")
	if if5 and would5:
	elements.append(f"{if5}→{would5}")

	elements_str = ", ".join(elements)

	# Prompt concis pour éviter la troncature CLIP
	prompt = f"Artistic character portrait: {elements_str}. High-quality digital art, fantasy style, detailed with dramatic lighting."

	try:
	# Configuration selon la qualité
	if quality == "high":
	width, height, steps = 512, 512, 8
	else:
	width, height, steps = 512, 512, 4

	# Génération avec seed aléatoire
	seed = random.randint(0, MAX_SEED)
	generator = torch.Generator(device=flux_pipe.device).manual_seed(seed)

	image = flux_pipe(
	prompt=prompt,
	width=width,
	height=height,
	num_inference_steps=steps,
	guidance_scale=0.0,
	generator=generator
	).images[0]

	return image, f"{t['success']}\n{t['prompt_used']} {prompt}\nSeed: {seed}\nQuality: {quality} ({steps} steps, {width}x{height})"

	except Exception as e:
	return None, f"{t['error_generation']} {str(e)}"

	# Separate GPU function for generation only (no generator)
	if HF_SPACES:
	@spaces.GPU()
	def gemma_generate_response(message, history, language):
	return _gemma_generate_response_impl(message, history, language)
	else:
	def gemma_generate_response(message, history, language):
	return _gemma_generate_response_impl(message, history, language)

	# Non-GPU streaming function
	def gemma_chat_stream(message, history, language):
	return _gemma_chat_stream_impl(message, history, language)

	def _gemma_generate_response_impl(message, history, language):
	"""Generate response using GPU - returns complete response"""
	# Load model in GPU context if needed
	model = gemma_model if gemma_model is not None else _load_gemma_model_gpu()

	# Prepare messages in the format expected by the processor
	messages = []

	# Add history (which already includes the initial system prompt as first user message)
	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": [{"type": "text", "text": user_msg}]})
	messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant_msg}]})

	# Add current message
	messages.append({"role": "user", "content": [{"type": "text", "text": message}]})

	# Apply chat template and tokenize
	inputs = gemma_processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
	)

	# Generate response
	with torch.no_grad():
	# Move to device without dtype conversion to avoid issues
	device_inputs = {k: v.to(model.device) for k, v in inputs.items()}

	generate_kwargs = dict(
	device_inputs,
	max_new_tokens=150,
	do_sample=False,
	disable_compile=True,
	)

	outputs = model.generate(**generate_kwargs)
	response = gemma_processor.decode(outputs[0][device_inputs['input_ids'].shape[1]:], skip_special_tokens=True)

	# Clean response
	response = clean_chat_response(response)

	return response

	def _gemma_chat_stream_impl(message, history, language):
	"""Streaming function that calls GPU function and simulates streaming"""
	# For ZeroGPU, we can't use threading with streaming, so fall back to non-streaming
	if HF_SPACES:
	# Get complete response from GPU function
	response = gemma_generate_response(message, history, language)

	# Simulate streaming by yielding progressively
	words = response.split()
	partial_response = ""
	updated_history = history + [[message, ""]]

	for i, word in enumerate(words):
	partial_response += word + " "
	updated_history[-1][1] = partial_response.strip()
	yield updated_history.copy()
	if i % 3 == 0: # Pause tous les 3 mots
	time.sleep(0.1)
	else:
	# Local development - use real streaming
	# Load model in GPU context if needed
	model = gemma_model if gemma_model is not None else _load_gemma_model_gpu()

	# Prepare messages in the format expected by the processor
	messages = []

	# Add history (which already includes the initial system prompt as first user message)
	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": [{"type": "text", "text": user_msg}]})
	messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant_msg}]})

	# Add current message
	messages.append({"role": "user", "content": [{"type": "text", "text": message}]})

	# Apply chat template and tokenize
	inputs = gemma_processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
	)

	# Set up streaming generation
	streamer = TextIteratorStreamer(gemma_processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)

	# Move to device without dtype conversion to avoid issues
	device_inputs = {k: v.to(model.device) for k, v in inputs.items()}

	generate_kwargs = dict(
	device_inputs,
	streamer=streamer,
	max_new_tokens=150,
	do_sample=False,
	disable_compile=True,
	)

	# Generate text in a separate thread
	t = Thread(target=model.generate, kwargs=generate_kwargs)
	t.start()

	# Stream output
	updated_history = history + [[message, ""]]
	output = ""
	try:
	for delta in streamer:
	output += delta
	# Clean the response as it streams
	cleaned_output = clean_chat_response(output)
	updated_history[-1][1] = cleaned_output
	yield updated_history.copy()
	except Exception as e:
	# Fallback to non-streaming if streaming fails
	t.join() # Wait for thread to complete
	response = gemma_generate_response(message, history, language)
	updated_history[-1][1] = response
	yield updated_history.copy()

	def clean_assistant_response(response):
	"""
	Nettoie la réponse de l'assistant pour éviter les faux dialogues
	"""
	import re

	# Enlever les patterns de faux dialogue
	patterns_to_remove = [
	r'User:\s*[^\n]+', # Enlever "User: ..."
	r'Assistant:\s*[^\n]+', # Enlever "Assistant: ..."
	r'Human:\s*[^\n]+', # Enlever "Human: ..."
	r'AI:\s*[^\n]+', # Enlever "AI: ..."
	]

	cleaned = response.strip()

	for pattern in patterns_to_remove:
	cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE)

	# Enlever les lignes vides multiples
	cleaned = re.sub(r'\n\s*\n', '\n', cleaned)

	# Si la réponse contient encore des patterns de dialogue, couper au premier
	dialogue_patterns = [
	r'(?i)\buser\b.*?:',
	r'(?i)\bassistant\b.*?:',
	r'(?i)\bhuman\b.*?:',
	r'(?i)\bai\b.*?:'
	]

	for pattern in dialogue_patterns:
	match = re.search(pattern, cleaned)
	if match:
	# Couper juste avant le pattern trouvé
	cleaned = cleaned[:match.start()].strip()
	break

	# Limiter à 500 caractères max pour éviter les réponses trop longues
	if len(cleaned) > 500:
	# Couper à la dernière phrase complète
	sentences = cleaned[:500].split('.')
	if len(sentences) > 1:
	cleaned = '.'.join(sentences[:-1]) + '.'
	else:
	cleaned = cleaned[:500] + '...'

	return cleaned.strip()

	def clean_chat_response(response):
	"""
	Nettoie la réponse du chat sans limiter la taille autant
	"""
	import re

	# Enlever les patterns de faux dialogue
	patterns_to_remove = [
	r'User:\s*[^\n]+', # Enlever "User: ..."
	r'Assistant:\s*[^\n]+', # Enlever "Assistant: ..."
	r'Human:\s*[^\n]+', # Enlever "Human: ..."
	r'AI:\s*[^\n]+', # Enlever "AI: ..."
	]

	cleaned = response.strip()

	for pattern in patterns_to_remove:
	cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE)

	# Enlever les lignes vides multiples
	cleaned = re.sub(r'\n\s*\n', '\n', cleaned)

	# Si la réponse contient encore des patterns de dialogue, couper au premier
	dialogue_patterns = [
	r'(?i)\buser\b.*?:',
	r'(?i)\bassistant\b.*?:',
	r'(?i)\bhuman\b.*?:',
	r'(?i)\bai\b.*?:'
	]

	for pattern in dialogue_patterns:
	match = re.search(pattern, cleaned)
	if match:
	# Couper juste avant le pattern trouvé
	cleaned = cleaned[:match.start()].strip()
	break

	# Limiter à 200 caractères max pour le chat (plus généreux que 500 pour l'analyse)
	if len(cleaned) > 200:
	# Couper à la dernière phrase complète
	sentences = cleaned[:200].split('.')
	if len(sentences) > 1:
	cleaned = '.'.join(sentences[:-1]) + '.'
	else:
	cleaned = cleaned[:200] + '...'

	return cleaned.strip()

	def extract_portrait_from_conversation(history, language="en"):
	"""
	Utilise le LLM pour analyser la conversation et synthétiser un prompt d'image dynamique
	"""
	# Nettoyer l'historique : enlever le dernier message si c'est une question du modèle sans réponse
	cleaned_history = history.copy()
	if cleaned_history and cleaned_history[-1][1] and not cleaned_history[-1][0]:
	# Si le dernier message a une réponse du modèle mais pas de message utilisateur
	# (dernière entrée est ["", "question_du_modèle"]), on l'enlève
	cleaned_history = cleaned_history[:-1]

	# Combiner tout le texte de la conversation nettoyée
	conversation_text = ""
	for user_msg, assistant_msg in cleaned_history:
	if user_msg: # S'assurer qu'il y a un message utilisateur
	conversation_text += f"User: {user_msg}\nAssistant: {assistant_msg}\n"

	# Prompt compact pour synthèse directe
	analysis_prompt = f"""Based on the following conversation, generate a compact character description in the style of a Chinese Portrait, formatted as:

	Artistic character portrait: [category1] → [answer1], [category2] → [answer2], ...

	Only include clear and relevant answers. Skip any incomplete or vague ones.
	Do not repeat the full conversation.
	Keep the result short (max ~40 tokens), using simple words.

	Conversation: {conversation_text}"""

	try:
	# Load model in GPU context if needed
	model = gemma_model if gemma_model is not None else _load_gemma_model_gpu()

	# Prepare messages for the new processor format
	messages = [{"role": "user", "content": [{"type": "text", "text": analysis_prompt}]}]

	# Apply chat template and tokenize
	inputs = gemma_processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
	)

	# Move inputs to device and generate
	with torch.no_grad():
	# Move to device without dtype conversion to avoid issues
	device_inputs = {k: v.to(model.device) for k, v in inputs.items()}

	outputs = model.generate(
	**device_inputs,
	max_new_tokens=100,
	do_sample=False,
	disable_compile=True,
	)

	response = gemma_processor.decode(outputs[0][device_inputs['input_ids'].shape[1]:], skip_special_tokens=True)

	# Nettoyage léger pour l'analyse (ne pas limiter à 500 caractères)
	response = response.strip()
	# Enlever seulement les patterns de faux dialogue évidents
	import re
	response = re.sub(r'User:\s*[^\n]+', '', response, flags=re.IGNORECASE)
	response = re.sub(r'Assistant:\s*[^\n]+', '', response, flags=re.IGNORECASE)
	response = re.sub(r'Human:\s*[^\n]+', '', response, flags=re.IGNORECASE)
	response = re.sub(r'AI:\s*[^\n]+', '', response, flags=re.IGNORECASE)
	response = re.sub(r'\n\s*\n', '\n', response).strip()

	# Extraire le prompt d'image du format attendu
	image_prompt = ""

	# Chercher le format "Artistic character portrait:"
	if 'artistic character portrait:' in response.lower():
	# Extraire tout ce qui suit "Artistic character portrait:"
	portrait_index = response.lower().find('artistic character portrait:')
	image_prompt = response[portrait_index:].strip()
	else:
	# Si pas du bon format, prendre la réponse complète et l'ajuster
	image_prompt = response.strip()
	if image_prompt and not image_prompt.lower().startswith('artistic character portrait'):
	image_prompt = f"Artistic character portrait: {image_prompt}"

	# S'assurer que le prompt est bien formaté pour FLUX
	if image_prompt:
	# Extraire les éléments pour l'affichage AVANT d'ajouter les éléments artistiques
	elements = []
	if '→' in image_prompt:
	# Parser les éléments au format "category → value"
	parts = image_prompt.split(':')[-1] # Prendre après ":"
	pairs = parts.split(',')
	for pair in pairs:
	if '→' in pair and not any(art_word in pair.lower() for art_word in ['high-quality', 'digital art', 'detailed', 'fantasy']):
	try:
	category, value = pair.split('→', 1)
	category = category.strip()
	value = value.strip().rstrip('.')
	# Nettoyer la valeur pour enlever les informations d'art
	value = re.sub(r'\.\s(high-quality\|digital art\|fantasy\|detailed).', '', value, flags=re.IGNORECASE).strip()
	if category and value and not value.startswith('('):
	elements.append((category, value))
	except:
	continue

	# Ajouter des éléments artistiques si manquants
	if not any(word in image_prompt.lower() for word in ['detailed', 'high-quality', 'digital art']):
	image_prompt += ". High-quality digital art, fantasy style, detailed illustration"

	return image_prompt, elements
	else:
	fallback_prompt = "Artistic character portrait of a unique individual. High-quality digital art, fantasy style, detailed illustration"
	return fallback_prompt, [('style', 'unique individual')]

	except Exception as e:
	# Fallback simple en cas d'erreur
	fallback_prompt = "Artistic character portrait of a unique individual. High-quality digital art, fantasy style, detailed illustration with dramatic lighting"
	return fallback_prompt, [('style', 'artistic portrait')]

	@spaces.GPU() if HF_SPACES else lambda x: x
	def generate_avatar_from_chat(history: list, language: str = "en", quality: str = "normal"):
	"""
	Generate avatar from conversation history with AI assistant.

	Args:
	history: List of conversation turns [[user_msg, assistant_msg], ...]
	language: Interface language ("en" or "fr")
	quality: Generation quality ("normal" or "high")

	Returns:
	tuple: (generated_image, info_text)
	"""
	# Extraire le prompt d'image et les éléments de la conversation
	prompt, elements = extract_portrait_from_conversation(history, language)

	if not prompt:
	return None, "Could not analyze conversation. Please continue chatting to build your portrait."

	try:
	# Configuration selon la qualité
	if quality == "high":
	width, height, steps = 512, 512, 8
	else:
	width, height, steps = 512, 512, 4

	# Génération avec seed aléatoire
	seed = random.randint(0, MAX_SEED)
	generator = torch.Generator(device=flux_pipe.device).manual_seed(seed)

	image = flux_pipe(
	prompt=prompt,
	width=width,
	height=height,
	num_inference_steps=steps,
	guidance_scale=0.0,
	generator=generator
	).images[0]

	elements_text = "\n".join([f"- {category.title()}: {value}" for category, value in elements])

	return image, f"Avatar generated from conversation!\n\nLLM Analysis:\n{elements_text}\n\nPrompt: {prompt}\nSeed: {seed}\nQuality: {quality} ({steps} steps, {width}x{height})"

	except Exception as e:
	return None, f"Error during generation: {str(e)}"

	def create_form_interface(language="en"):
	translations = get_translations()
	t = translations.get(language, translations["en"])
	dev_defaults = get_dev_defaults() if DEV_MODE else {}

	with gr.Column() as form_interface:
	gr.Markdown(f"### {t['portrait_title']}")

	# Commutateur de qualité
	quality_radio = gr.Radio(
	choices=["normal", "high"],
	value="normal",
	label=t["quality_label"]
	)

	# Groupe 1 (obligatoire)
	gr.Markdown(f"{t['group']} 1 ⭐ {t['required']}")
	with gr.Row():
	if1 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["animal"],
	value=dev_defaults.get("if1", ""), scale=1)
	would1 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["animal_answer"],
	value=dev_defaults.get("would1", ""), scale=1)

	# Groupe 2 (obligatoire)
	gr.Markdown(f"{t['group']} 2 ⭐ {t['required']}")
	with gr.Row():
	if2 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["color"],
	value=dev_defaults.get("if2", ""), scale=1)
	would2 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["color_answer"],
	value=dev_defaults.get("would2", ""), scale=1)

	# Groupe 3 (obligatoire)
	gr.Markdown(f"{t['group']} 3 ⭐ {t['required']}")
	with gr.Row():
	if3 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["object"],
	value=dev_defaults.get("if3", ""), scale=1)
	would3 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["object_answer"],
	value=dev_defaults.get("would3", ""), scale=1)

	# Groupe 4 (optionnel)
	gr.Markdown(f"{t['group']} 4 ✨ {t['optional']}")
	with gr.Row():
	if4 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["feeling"],
	value=dev_defaults.get("if4", ""), scale=1)
	would4 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["feeling_answer"],
	value=dev_defaults.get("would4", ""), scale=1)

	# Groupe 5 (optionnel)
	gr.Markdown(f"{t['group']} 5 ✨ {t['optional']}")
	with gr.Row():
	if5 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["element"],
	value=dev_defaults.get("if5", ""), scale=1)
	would5 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["element_answer"],
	value=dev_defaults.get("would5", ""), scale=1)

	generate_btn = gr.Button(t["generate_btn"], variant="primary", size="lg")

	gr.Markdown(f"### {t['avatar_title']}")
	output_image = gr.Image(label=t["your_avatar"], height=400)
	output_text = gr.Textbox(label=t["information"], lines=4, interactive=False)

	# Hidden state for language
	lang_state = gr.State(value=language)

	generate_btn.click(
	fn=generate_avatar,
	inputs=[if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, lang_state, quality_radio],
	outputs=[output_image, output_text]
	)

	return form_interface

	def create_chat_interface(language="en"):
	translations = get_translations()
	t = translations.get(language, translations["en"])

	with gr.Column() as chat_interface:
	gr.Markdown(f"### {t['chat_title']}")
	gr.Markdown(t["chat_subtitle"])

	chatbot = gr.Chatbot(height=400, show_copy_button=True)

	# Zone de message avec bouton d'envoi
	with gr.Row():
	msg = gr.Textbox(label="Message", placeholder="Type your response here...", visible=False, scale=4)
	send_btn = gr.Button("📤", visible=False, scale=1, min_width=50)

	# Boutons de contrôle - en dessous du chat
	with gr.Row():
	start_btn = gr.Button("🚀 Start New Conversation", variant="primary", scale=1)
	avatar_btn = gr.Button("🎨 Get My Avatar", variant="secondary", scale=1)
	quality_chat = gr.Radio(choices=["normal", "high"], value="normal", label="Quality", scale=1)

	# Résultats de génération d'avatar
	avatar_output = gr.Image(label="Generated Avatar", visible=False)
	avatar_info = gr.Textbox(label="Avatar Info", lines=4, interactive=False, visible=False)

	# Hidden state for language
	lang_state = gr.State(value=language)

	def respond(message: str, history: list, language: str = "en"):
	"""
	Process user message and generate streaming AI response for chat interface.

	Args:
	message: User's input message
	history: List of previous conversation turns [[user_msg, bot_msg], ...]
	language: Interface language ("en" or "fr")

	Yields:
	tuple: ("", updated_history) for streaming chat interface
	"""

	# Convert history format if needed
	if history is None:
	history = []

	# Process streaming response - yield (empty_text, updated_history)
	last_response = None
	for response in gemma_chat_stream(message, history, language):
	last_response = response
	yield "", response

	# Ensure we have a final state
	if last_response is not None:
	yield "", last_response

	def start_conversation(language):
	"""Démarre la conversation avec le prompt système comme premier message utilisateur"""
	# Le prompt système devient le premier message utilisateur
	system_prompt = """You are running a simple "Chinese Portrait" game. Your ONLY job is to ask questions.

	STRICT RULES - NEVER BREAK THESE:
	1. Ask ONLY: "If you were a [category], what would you be?"
	2. After user answers, ask the NEXT question immediately
	3. NO comments, NO reactions, NO explanations
	4. Use random categories: animal, color, object, emotion, weather, plant, tool, fabric, planet, smell, sound, etc.
	5. NEVER repeat a category

	EXAMPLE PATTERN:
	User: "ready"
	You: "If you were an animal, what would you be?"
	User: "wolf"
	You: "If you were a color, what would you be?"
	User: "purple"
	You: "If you were a planet, what would you be?"

	FORBIDDEN:
	- Don't say "interesting", "nice", "cool"
	- Don't explain anything
	- Don't comment on answers
	- Don't ask why or how
	- Don't make conversations

	JUST ASK THE NEXT QUESTION. Start the game now."""

	# Utiliser la fonction respond pour générer la première question
	history = []
	responses = list(gemma_chat_stream(system_prompt, history, language))

	if responses:
	# Prendre la dernière réponse générée
	final_history = responses[-1]
	return final_history, gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
	else:
	# Fallback en cas d'erreur
	fallback_message = [[system_prompt, "If you were an animal, what would you be?"]]
	return fallback_message, gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)

	def show_avatar_interface():
	"""Affiche immédiatement l'interface avatar pour montrer que ça calcule"""
	return gr.update(visible=True), gr.update(visible=True, value="Generating your avatar...")

	def generate_avatar_from_conversation(history, language, quality):
	if not history:
	return None, "No conversation found. Please start a conversation first."

	image, info = generate_avatar_from_chat(history, language, quality)
	return image, info

	# Événements
	start_btn.click(
	fn=start_conversation,
	inputs=[lang_state],
	outputs=[chatbot, msg, send_btn, avatar_output, avatar_info]
	)

	# Envoi via Enter ou bouton
	msg.submit(
	respond,
	[msg, chatbot, lang_state],
	[msg, chatbot],
	queue=True
	)

	send_btn.click(
	respond,
	[msg, chatbot, lang_state],
	[msg, chatbot],
	queue=True
	)

	# Affichage immédiat de l'interface puis génération
	avatar_btn.click(
	show_avatar_interface,
	outputs=[avatar_output, avatar_info]
	).then(
	generate_avatar_from_conversation,
	inputs=[chatbot, lang_state, quality_chat],
	outputs=[avatar_output, avatar_info]
	)

	gr.Markdown("Click 'Start New Conversation' to begin, then 'Get My Avatar' when you've completed your portrait!")

	return chat_interface


	def detect_browser_language():
	"""Détecte la langue du navigateur via JavaScript injecté"""
	# Par défaut anglais, sera override par le JavaScript
	return "en"

	def create_interface(language="en"):
	translations = get_translations()
	t = translations.get(language, translations["en"])

	with gr.Blocks(title=t["title"], theme="gstaff/xkcd") as demo:
	gr.Markdown(f"# {t['title']}")
	gr.Markdown(t["subtitle"])

	with gr.Tabs():
	with gr.Tab(t["tab_form"]):
	create_form_interface(language)

	with gr.Tab(t["tab_chat"]):
	create_chat_interface(language)

	gr.Markdown("---")
	gr.Markdown(f"{t['footer']}")

	return demo


	# Create the main web interface with MCP tools integrated
	with gr.Blocks(title="🎭 Avatar Generator") as demo:
	gr.Markdown("# 🎭 Avatar Generator - Chinese Portrait")
	gr.Markdown("Generate personalized avatars from Chinese portrait descriptions using FLUX.1-schnell and Gemma-3n-E2B-it")

	with gr.Tabs():
	# Main application tabs
	with gr.Tab("📝 Form Mode"):
	create_form_interface("en")

	with gr.Tab("💬 Chat Mode"):
	create_chat_interface("en")




	gr.Markdown("---")
	gr.Markdown("🔌 MCP Integration: This app exposes tools via MCP protocol at `/gradio_api/mcp/sse`")
	gr.Markdown("Avatar generated with FLUX.1-schnell")

	if __name__ == "__main__":
	if DEV_MODE:
	print("🚀 Running in DEV MODE with pre-filled values")

	print("🔌 Starting server with MCP support...")
	print("📡 MCP endpoint available at: http://localhost:7860/gradio_api/mcp/sse")
	print("🌐 Web interface available at: http://localhost:7860")

	demo.launch(mcp_server=True, show_api=True)