Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
from diffusers import DiffusionPipeline | |
from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer | |
import random | |
import os | |
import sys | |
import time | |
from threading import Thread | |
# Set PyTorch MPS fallback for Apple Silicon compatibility | |
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' | |
# Check for dev mode | |
DEV_MODE = "--dev" in sys.argv | |
# Import spaces for HuggingFace deployment | |
try: | |
import spaces | |
HF_SPACES = True | |
print("🚀 Running on HuggingFace Spaces with ZeroGPU") | |
except ImportError: | |
HF_SPACES = False | |
print("🏠 Running locally - spaces module not available") | |
# MCP is always enabled | |
print("🔌 MCP protocol enabled - tools available for external access") | |
MAX_SEED = 2**32 - 1 | |
def load_flux_model(): | |
dtype = torch.bfloat16 | |
# For HuggingFace Spaces, prioritize CUDA | |
if HF_SPACES and torch.cuda.is_available(): | |
device = "cuda" | |
# For local development, prioritize MPS for Apple Silicon | |
elif torch.backends.mps.is_available(): | |
device = "mps" | |
elif torch.cuda.is_available(): | |
device = "cuda" | |
else: | |
device = "cpu" | |
print(f"Using device for FLUX: {device}") | |
pipe = DiffusionPipeline.from_pretrained( | |
"black-forest-labs/FLUX.1-schnell", | |
torch_dtype=dtype | |
).to(device) | |
# Print tokenizer info for debugging | |
if hasattr(pipe, 'tokenizer'): | |
print(f"FLUX Tokenizer max length: {pipe.tokenizer.model_max_length}") | |
if hasattr(pipe, 'tokenizer_2'): | |
print(f"FLUX Tokenizer 2 max length: {pipe.tokenizer_2.model_max_length}") | |
return pipe | |
def load_gemma_model(): | |
print("Loading Gemma-3n-E2B-it model...") | |
model_id = "google/gemma-3n-E2B-it" | |
processor = AutoProcessor.from_pretrained(model_id) | |
if HF_SPACES: | |
# Don't load model in main process for ZeroGPU | |
print("ZeroGPU mode: Model will be loaded in GPU functions") | |
return processor, None | |
else: | |
model = AutoModelForImageTextToText.from_pretrained( | |
model_id, | |
device_map="auto", | |
torch_dtype=torch.bfloat16 | |
) | |
print(f"Using device for Gemma-E2B: {model.device}") | |
return processor, model | |
flux_pipe = load_flux_model() | |
gemma_processor, gemma_model = load_gemma_model() | |
# Model loading function for GPU contexts with caching | |
_cached_gpu_model = None | |
def _load_gemma_model_gpu(): | |
"""Load model inside GPU context with caching""" | |
global _cached_gpu_model | |
if _cached_gpu_model is None: | |
print("🔄 Loading Gemma model in GPU context...") | |
model_id = "google/gemma-3n-E2B-it" | |
_cached_gpu_model = AutoModelForImageTextToText.from_pretrained( | |
model_id, | |
device_map="auto", | |
torch_dtype=torch.bfloat16 | |
) | |
print("✅ Gemma model loaded and cached") | |
else: | |
print("♻️ Using cached Gemma model") | |
return _cached_gpu_model | |
# Multilingual support | |
def get_translations(): | |
return { | |
"en": { | |
"title": "🎭 Avatar Generator - Chinese Portrait", | |
"subtitle": "Complete at least the first 3 groups to generate your personalized avatar.", | |
"portrait_title": "📝 Chinese Portrait (first 3 groups required)", | |
"group": "Group", | |
"required": "Required", | |
"optional": "Optional", | |
"if_i_was": "If I was", | |
"i_would_be": "I would be", | |
"generate_btn": "🎨 Generate Avatar", | |
"avatar_title": "🖼️ Generated Avatar", | |
"your_avatar": "Your Avatar", | |
"information": "Information", | |
"error_required": "Error: The first 3 groups of fields are required.", | |
"success": "Avatar generated successfully!", | |
"prompt_used": "Prompt used:", | |
"error_generation": "Error during generation:", | |
"footer": "Avatar generated with FLUX.1-schnell", | |
"quality_normal": "Normal Quality (4 steps, 512x512)", | |
"quality_high": "High Quality (8 steps, 512x512)", | |
"quality_label": "Quality:", | |
"tab_form": "📝 Form Mode", | |
"tab_chat": "💬 Chat Mode", | |
"chat_title": "🤖 AI Assistant - Avatar Creator", | |
"chat_subtitle": "Let me guide you through creating your Chinese portrait!", | |
"thinking": "Thinking...", | |
"placeholders": { | |
"animal": "an animal...", | |
"animal_answer": "a lion...", | |
"color": "a color...", | |
"color_answer": "red...", | |
"object": "an object...", | |
"object_answer": "a sword...", | |
"feeling": "a feeling...", | |
"feeling_answer": "joy...", | |
"element": "an element...", | |
"element_answer": "fire..." | |
} | |
}, | |
"fr": { | |
"title": "🎭 Générateur d'Avatar - Portrait Chinois", | |
"subtitle": "Complétez au minimum les 3 premiers groupes pour générer votre avatar personnalisé.", | |
"portrait_title": "📝 Portrait Chinois (3 premiers groupes obligatoires)", | |
"group": "Groupe", | |
"required": "Obligatoire", | |
"optional": "Optionnel", | |
"if_i_was": "Si j'étais", | |
"i_would_be": "Je serais", | |
"generate_btn": "🎨 Générer l'Avatar", | |
"avatar_title": "🖼️ Avatar Généré", | |
"your_avatar": "Votre Avatar", | |
"information": "Informations", | |
"error_required": "Erreur: Les 3 premiers groupes de champs sont obligatoires.", | |
"success": "Avatar généré avec succès!", | |
"prompt_used": "Prompt utilisé:", | |
"error_generation": "Erreur lors de la génération:", | |
"footer": "Avatar généré avec FLUX.1-schnell", | |
"quality_normal": "Qualité Normale (4 étapes, 512x512)", | |
"quality_high": "Haute Qualité (8 étapes, 512x512)", | |
"quality_label": "Qualité:", | |
"tab_form": "📝 Mode Formulaire", | |
"tab_chat": "💬 Mode Chat", | |
"chat_title": "🤖 Assistant IA - Créateur d'Avatar", | |
"chat_subtitle": "Laissez-moi vous guider pour créer votre portrait chinois!", | |
"thinking": "Réflexion...", | |
"placeholders": { | |
"animal": "un animal...", | |
"animal_answer": "un lion...", | |
"color": "une couleur...", | |
"color_answer": "rouge...", | |
"object": "un objet...", | |
"object_answer": "une épée...", | |
"feeling": "un sentiment...", | |
"feeling_answer": "la joie...", | |
"element": "un élément...", | |
"element_answer": "le feu..." | |
} | |
} | |
} | |
# Dev mode default values | |
def get_dev_defaults(): | |
return { | |
"if1": "an animal", "would1": "a majestic wolf", | |
"if2": "a color", "would2": "deep purple", | |
"if3": "an object", "would3": "an ancient sword", | |
"if4": "a feeling", "would4": "fierce determination", | |
"if5": "an element", "would5": "lightning" | |
} | |
# Apply ZeroGPU decorator if available | |
if HF_SPACES: | |
def generate_avatar(if1: str, would1: str, if2: str, would2: str, if3: str, would3: str, if4: str = "", would4: str = "", if5: str = "", would5: str = "", language: str = "en", quality: str = "normal"): | |
""" | |
Generate a personalized avatar from Chinese portrait elements. | |
Args: | |
if1: First category (e.g., "an animal") | |
would1: First answer (e.g., "a majestic wolf") | |
if2: Second category (e.g., "a color") | |
would2: Second answer (e.g., "deep purple") | |
if3: Third category (e.g., "an object") | |
would3: Third answer (e.g., "an ancient sword") | |
if4: Fourth category (optional, e.g., "a feeling") | |
would4: Fourth answer (optional, e.g., "fierce determination") | |
if5: Fifth category (optional, e.g., "an element") | |
would5: Fifth answer (optional, e.g., "lightning") | |
language: Interface language ("en" or "fr") | |
quality: Generation quality ("normal" or "high") | |
Returns: | |
tuple: (generated_image, info_text) | |
""" | |
return _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality) | |
else: | |
def generate_avatar(if1: str, would1: str, if2: str, would2: str, if3: str, would3: str, if4: str = "", would4: str = "", if5: str = "", would5: str = "", language: str = "en", quality: str = "normal"): | |
""" | |
Generate a personalized avatar from Chinese portrait elements. | |
Args: | |
if1: First category (e.g., "an animal") | |
would1: First answer (e.g., "a majestic wolf") | |
if2: Second category (e.g., "a color") | |
would2: Second answer (e.g., "deep purple") | |
if3: Third category (e.g., "an object") | |
would3: Third answer (e.g., "an ancient sword") | |
if4: Fourth category (optional, e.g., "a feeling") | |
would4: Fourth answer (optional, e.g., "fierce determination") | |
if5: Fifth category (optional, e.g., "an element") | |
would5: Fifth answer (optional, e.g., "lightning") | |
language: Interface language ("en" or "fr") | |
quality: Generation quality ("normal" or "high") | |
Returns: | |
tuple: (generated_image, info_text) | |
""" | |
return _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality) | |
def _generate_avatar_impl(if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, language, quality): | |
translations = get_translations() | |
t = translations.get(language, translations["en"]) | |
# Validation des champs obligatoires | |
if not if1 or not would1 or not if2 or not would2 or not if3 or not would3: | |
return None, t["error_required"] | |
# Construction du prompt portrait chinois amélioré | |
portrait_parts = [] | |
portrait_parts.append(f"If I was {if1}, I would be {would1}") | |
portrait_parts.append(f"If I was {if2}, I would be {would2}") | |
portrait_parts.append(f"If I was {if3}, I would be {would3}") | |
if if4 and would4: | |
portrait_parts.append(f"If I was {if4}, I would be {would4}") | |
if if5 and would5: | |
portrait_parts.append(f"If I was {if5}, I would be {would5}") | |
chinese_portrait = ". ".join(portrait_parts) | |
# Prompt optimisé pour rester sous 77 tokens CLIP | |
elements = [f"{if1}→{would1}", f"{if2}→{would2}", f"{if3}→{would3}"] | |
if if4 and would4: | |
elements.append(f"{if4}→{would4}") | |
if if5 and would5: | |
elements.append(f"{if5}→{would5}") | |
elements_str = ", ".join(elements) | |
# Prompt concis pour éviter la troncature CLIP | |
prompt = f"Artistic character portrait: {elements_str}. High-quality digital art, fantasy style, detailed with dramatic lighting." | |
try: | |
# Configuration selon la qualité | |
if quality == "high": | |
width, height, steps = 512, 512, 8 | |
else: | |
width, height, steps = 512, 512, 4 | |
# Génération avec seed aléatoire | |
seed = random.randint(0, MAX_SEED) | |
generator = torch.Generator(device=flux_pipe.device).manual_seed(seed) | |
image = flux_pipe( | |
prompt=prompt, | |
width=width, | |
height=height, | |
num_inference_steps=steps, | |
guidance_scale=0.0, | |
generator=generator | |
).images[0] | |
return image, f"{t['success']}\n{t['prompt_used']} {prompt}\nSeed: {seed}\nQuality: {quality} ({steps} steps, {width}x{height})" | |
except Exception as e: | |
return None, f"{t['error_generation']} {str(e)}" | |
# Separate GPU function for generation only (no generator) | |
if HF_SPACES: | |
def gemma_generate_response(message, history, language): | |
return _gemma_generate_response_impl(message, history, language) | |
else: | |
def gemma_generate_response(message, history, language): | |
return _gemma_generate_response_impl(message, history, language) | |
# Non-GPU streaming function | |
def gemma_chat_stream(message, history, language): | |
return _gemma_chat_stream_impl(message, history, language) | |
def _gemma_generate_response_impl(message, history, language): | |
"""Generate response using GPU - returns complete response""" | |
# Load model in GPU context if needed | |
model = gemma_model if gemma_model is not None else _load_gemma_model_gpu() | |
# Prepare messages in the format expected by the processor | |
messages = [] | |
# Add history (which already includes the initial system prompt as first user message) | |
for user_msg, assistant_msg in history: | |
messages.append({"role": "user", "content": [{"type": "text", "text": user_msg}]}) | |
messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant_msg}]}) | |
# Add current message | |
messages.append({"role": "user", "content": [{"type": "text", "text": message}]}) | |
# Apply chat template and tokenize | |
inputs = gemma_processor.apply_chat_template( | |
messages, | |
add_generation_prompt=True, | |
tokenize=True, | |
return_dict=True, | |
return_tensors="pt", | |
) | |
# Generate response | |
with torch.no_grad(): | |
# Move to device without dtype conversion to avoid issues | |
device_inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
generate_kwargs = dict( | |
device_inputs, | |
max_new_tokens=150, | |
do_sample=False, | |
disable_compile=True, | |
) | |
outputs = model.generate(**generate_kwargs) | |
response = gemma_processor.decode(outputs[0][device_inputs['input_ids'].shape[1]:], skip_special_tokens=True) | |
# Clean response | |
response = clean_chat_response(response) | |
return response | |
def _gemma_chat_stream_impl(message, history, language): | |
"""Streaming function that calls GPU function and simulates streaming""" | |
# For ZeroGPU, we can't use threading with streaming, so fall back to non-streaming | |
if HF_SPACES: | |
# Get complete response from GPU function | |
response = gemma_generate_response(message, history, language) | |
# Simulate streaming by yielding progressively | |
words = response.split() | |
partial_response = "" | |
updated_history = history + [[message, ""]] | |
for i, word in enumerate(words): | |
partial_response += word + " " | |
updated_history[-1][1] = partial_response.strip() | |
yield updated_history.copy() | |
if i % 3 == 0: # Pause tous les 3 mots | |
time.sleep(0.1) | |
else: | |
# Local development - use real streaming | |
# Load model in GPU context if needed | |
model = gemma_model if gemma_model is not None else _load_gemma_model_gpu() | |
# Prepare messages in the format expected by the processor | |
messages = [] | |
# Add history (which already includes the initial system prompt as first user message) | |
for user_msg, assistant_msg in history: | |
messages.append({"role": "user", "content": [{"type": "text", "text": user_msg}]}) | |
messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant_msg}]}) | |
# Add current message | |
messages.append({"role": "user", "content": [{"type": "text", "text": message}]}) | |
# Apply chat template and tokenize | |
inputs = gemma_processor.apply_chat_template( | |
messages, | |
add_generation_prompt=True, | |
tokenize=True, | |
return_dict=True, | |
return_tensors="pt", | |
) | |
# Set up streaming generation | |
streamer = TextIteratorStreamer(gemma_processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True) | |
# Move to device without dtype conversion to avoid issues | |
device_inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
generate_kwargs = dict( | |
device_inputs, | |
streamer=streamer, | |
max_new_tokens=150, | |
do_sample=False, | |
disable_compile=True, | |
) | |
# Generate text in a separate thread | |
t = Thread(target=model.generate, kwargs=generate_kwargs) | |
t.start() | |
# Stream output | |
updated_history = history + [[message, ""]] | |
output = "" | |
try: | |
for delta in streamer: | |
output += delta | |
# Clean the response as it streams | |
cleaned_output = clean_chat_response(output) | |
updated_history[-1][1] = cleaned_output | |
yield updated_history.copy() | |
except Exception as e: | |
# Fallback to non-streaming if streaming fails | |
t.join() # Wait for thread to complete | |
response = gemma_generate_response(message, history, language) | |
updated_history[-1][1] = response | |
yield updated_history.copy() | |
def clean_assistant_response(response): | |
""" | |
Nettoie la réponse de l'assistant pour éviter les faux dialogues | |
""" | |
import re | |
# Enlever les patterns de faux dialogue | |
patterns_to_remove = [ | |
r'User:\s*[^\n]+', # Enlever "User: ..." | |
r'Assistant:\s*[^\n]+', # Enlever "Assistant: ..." | |
r'Human:\s*[^\n]+', # Enlever "Human: ..." | |
r'AI:\s*[^\n]+', # Enlever "AI: ..." | |
] | |
cleaned = response.strip() | |
for pattern in patterns_to_remove: | |
cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE) | |
# Enlever les lignes vides multiples | |
cleaned = re.sub(r'\n\s*\n', '\n', cleaned) | |
# Si la réponse contient encore des patterns de dialogue, couper au premier | |
dialogue_patterns = [ | |
r'(?i)\buser\b.*?:', | |
r'(?i)\bassistant\b.*?:', | |
r'(?i)\bhuman\b.*?:', | |
r'(?i)\bai\b.*?:' | |
] | |
for pattern in dialogue_patterns: | |
match = re.search(pattern, cleaned) | |
if match: | |
# Couper juste avant le pattern trouvé | |
cleaned = cleaned[:match.start()].strip() | |
break | |
# Limiter à 500 caractères max pour éviter les réponses trop longues | |
if len(cleaned) > 500: | |
# Couper à la dernière phrase complète | |
sentences = cleaned[:500].split('.') | |
if len(sentences) > 1: | |
cleaned = '.'.join(sentences[:-1]) + '.' | |
else: | |
cleaned = cleaned[:500] + '...' | |
return cleaned.strip() | |
def clean_chat_response(response): | |
""" | |
Nettoie la réponse du chat sans limiter la taille autant | |
""" | |
import re | |
# Enlever les patterns de faux dialogue | |
patterns_to_remove = [ | |
r'User:\s*[^\n]+', # Enlever "User: ..." | |
r'Assistant:\s*[^\n]+', # Enlever "Assistant: ..." | |
r'Human:\s*[^\n]+', # Enlever "Human: ..." | |
r'AI:\s*[^\n]+', # Enlever "AI: ..." | |
] | |
cleaned = response.strip() | |
for pattern in patterns_to_remove: | |
cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE) | |
# Enlever les lignes vides multiples | |
cleaned = re.sub(r'\n\s*\n', '\n', cleaned) | |
# Si la réponse contient encore des patterns de dialogue, couper au premier | |
dialogue_patterns = [ | |
r'(?i)\buser\b.*?:', | |
r'(?i)\bassistant\b.*?:', | |
r'(?i)\bhuman\b.*?:', | |
r'(?i)\bai\b.*?:' | |
] | |
for pattern in dialogue_patterns: | |
match = re.search(pattern, cleaned) | |
if match: | |
# Couper juste avant le pattern trouvé | |
cleaned = cleaned[:match.start()].strip() | |
break | |
# Limiter à 200 caractères max pour le chat (plus généreux que 500 pour l'analyse) | |
if len(cleaned) > 200: | |
# Couper à la dernière phrase complète | |
sentences = cleaned[:200].split('.') | |
if len(sentences) > 1: | |
cleaned = '.'.join(sentences[:-1]) + '.' | |
else: | |
cleaned = cleaned[:200] + '...' | |
return cleaned.strip() | |
def extract_portrait_from_conversation(history, language="en"): | |
""" | |
Utilise le LLM pour analyser la conversation et synthétiser un prompt d'image dynamique | |
""" | |
# Nettoyer l'historique : enlever le dernier message si c'est une question du modèle sans réponse | |
cleaned_history = history.copy() | |
if cleaned_history and cleaned_history[-1][1] and not cleaned_history[-1][0]: | |
# Si le dernier message a une réponse du modèle mais pas de message utilisateur | |
# (dernière entrée est ["", "question_du_modèle"]), on l'enlève | |
cleaned_history = cleaned_history[:-1] | |
# Combiner tout le texte de la conversation nettoyée | |
conversation_text = "" | |
for user_msg, assistant_msg in cleaned_history: | |
if user_msg: # S'assurer qu'il y a un message utilisateur | |
conversation_text += f"User: {user_msg}\nAssistant: {assistant_msg}\n" | |
# Prompt compact pour synthèse directe | |
analysis_prompt = f"""Based on the following conversation, generate a compact character description in the style of a Chinese Portrait, formatted as: | |
Artistic character portrait: [category1] → [answer1], [category2] → [answer2], ... | |
Only include clear and relevant answers. Skip any incomplete or vague ones. | |
Do not repeat the full conversation. | |
Keep the result short (max ~40 tokens), using simple words. | |
Conversation: {conversation_text}""" | |
try: | |
# Load model in GPU context if needed | |
model = gemma_model if gemma_model is not None else _load_gemma_model_gpu() | |
# Prepare messages for the new processor format | |
messages = [{"role": "user", "content": [{"type": "text", "text": analysis_prompt}]}] | |
# Apply chat template and tokenize | |
inputs = gemma_processor.apply_chat_template( | |
messages, | |
add_generation_prompt=True, | |
tokenize=True, | |
return_dict=True, | |
return_tensors="pt", | |
) | |
# Move inputs to device and generate | |
with torch.no_grad(): | |
# Move to device without dtype conversion to avoid issues | |
device_inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
outputs = model.generate( | |
**device_inputs, | |
max_new_tokens=100, | |
do_sample=False, | |
disable_compile=True, | |
) | |
response = gemma_processor.decode(outputs[0][device_inputs['input_ids'].shape[1]:], skip_special_tokens=True) | |
# Nettoyage léger pour l'analyse (ne pas limiter à 500 caractères) | |
response = response.strip() | |
# Enlever seulement les patterns de faux dialogue évidents | |
import re | |
response = re.sub(r'User:\s*[^\n]+', '', response, flags=re.IGNORECASE) | |
response = re.sub(r'Assistant:\s*[^\n]+', '', response, flags=re.IGNORECASE) | |
response = re.sub(r'Human:\s*[^\n]+', '', response, flags=re.IGNORECASE) | |
response = re.sub(r'AI:\s*[^\n]+', '', response, flags=re.IGNORECASE) | |
response = re.sub(r'\n\s*\n', '\n', response).strip() | |
# Extraire le prompt d'image du format attendu | |
image_prompt = "" | |
# Chercher le format "Artistic character portrait:" | |
if 'artistic character portrait:' in response.lower(): | |
# Extraire tout ce qui suit "Artistic character portrait:" | |
portrait_index = response.lower().find('artistic character portrait:') | |
image_prompt = response[portrait_index:].strip() | |
else: | |
# Si pas du bon format, prendre la réponse complète et l'ajuster | |
image_prompt = response.strip() | |
if image_prompt and not image_prompt.lower().startswith('artistic character portrait'): | |
image_prompt = f"Artistic character portrait: {image_prompt}" | |
# S'assurer que le prompt est bien formaté pour FLUX | |
if image_prompt: | |
# Extraire les éléments pour l'affichage AVANT d'ajouter les éléments artistiques | |
elements = [] | |
if '→' in image_prompt: | |
# Parser les éléments au format "category → value" | |
parts = image_prompt.split(':')[-1] # Prendre après ":" | |
pairs = parts.split(',') | |
for pair in pairs: | |
if '→' in pair and not any(art_word in pair.lower() for art_word in ['high-quality', 'digital art', 'detailed', 'fantasy']): | |
try: | |
category, value = pair.split('→', 1) | |
category = category.strip() | |
value = value.strip().rstrip('.') | |
# Nettoyer la valeur pour enlever les informations d'art | |
value = re.sub(r'\.\s*(high-quality|digital art|fantasy|detailed).*', '', value, flags=re.IGNORECASE).strip() | |
if category and value and not value.startswith('('): | |
elements.append((category, value)) | |
except: | |
continue | |
# Ajouter des éléments artistiques si manquants | |
if not any(word in image_prompt.lower() for word in ['detailed', 'high-quality', 'digital art']): | |
image_prompt += ". High-quality digital art, fantasy style, detailed illustration" | |
return image_prompt, elements | |
else: | |
fallback_prompt = "Artistic character portrait of a unique individual. High-quality digital art, fantasy style, detailed illustration" | |
return fallback_prompt, [('style', 'unique individual')] | |
except Exception as e: | |
# Fallback simple en cas d'erreur | |
fallback_prompt = "Artistic character portrait of a unique individual. High-quality digital art, fantasy style, detailed illustration with dramatic lighting" | |
return fallback_prompt, [('style', 'artistic portrait')] | |
def generate_avatar_from_chat(history: list, language: str = "en", quality: str = "normal"): | |
""" | |
Generate avatar from conversation history with AI assistant. | |
Args: | |
history: List of conversation turns [[user_msg, assistant_msg], ...] | |
language: Interface language ("en" or "fr") | |
quality: Generation quality ("normal" or "high") | |
Returns: | |
tuple: (generated_image, info_text) | |
""" | |
# Extraire le prompt d'image et les éléments de la conversation | |
prompt, elements = extract_portrait_from_conversation(history, language) | |
if not prompt: | |
return None, "Could not analyze conversation. Please continue chatting to build your portrait." | |
try: | |
# Configuration selon la qualité | |
if quality == "high": | |
width, height, steps = 512, 512, 8 | |
else: | |
width, height, steps = 512, 512, 4 | |
# Génération avec seed aléatoire | |
seed = random.randint(0, MAX_SEED) | |
generator = torch.Generator(device=flux_pipe.device).manual_seed(seed) | |
image = flux_pipe( | |
prompt=prompt, | |
width=width, | |
height=height, | |
num_inference_steps=steps, | |
guidance_scale=0.0, | |
generator=generator | |
).images[0] | |
elements_text = "\n".join([f"- {category.title()}: {value}" for category, value in elements]) | |
return image, f"Avatar generated from conversation!\n\nLLM Analysis:\n{elements_text}\n\nPrompt: {prompt}\nSeed: {seed}\nQuality: {quality} ({steps} steps, {width}x{height})" | |
except Exception as e: | |
return None, f"Error during generation: {str(e)}" | |
def create_form_interface(language="en"): | |
translations = get_translations() | |
t = translations.get(language, translations["en"]) | |
dev_defaults = get_dev_defaults() if DEV_MODE else {} | |
with gr.Column() as form_interface: | |
gr.Markdown(f"### {t['portrait_title']}") | |
# Commutateur de qualité | |
quality_radio = gr.Radio( | |
choices=["normal", "high"], | |
value="normal", | |
label=t["quality_label"] | |
) | |
# Groupe 1 (obligatoire) | |
gr.Markdown(f"**{t['group']} 1** ⭐ *{t['required']}*") | |
with gr.Row(): | |
if1 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["animal"], | |
value=dev_defaults.get("if1", ""), scale=1) | |
would1 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["animal_answer"], | |
value=dev_defaults.get("would1", ""), scale=1) | |
# Groupe 2 (obligatoire) | |
gr.Markdown(f"**{t['group']} 2** ⭐ *{t['required']}*") | |
with gr.Row(): | |
if2 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["color"], | |
value=dev_defaults.get("if2", ""), scale=1) | |
would2 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["color_answer"], | |
value=dev_defaults.get("would2", ""), scale=1) | |
# Groupe 3 (obligatoire) | |
gr.Markdown(f"**{t['group']} 3** ⭐ *{t['required']}*") | |
with gr.Row(): | |
if3 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["object"], | |
value=dev_defaults.get("if3", ""), scale=1) | |
would3 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["object_answer"], | |
value=dev_defaults.get("would3", ""), scale=1) | |
# Groupe 4 (optionnel) | |
gr.Markdown(f"**{t['group']} 4** ✨ *{t['optional']}*") | |
with gr.Row(): | |
if4 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["feeling"], | |
value=dev_defaults.get("if4", ""), scale=1) | |
would4 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["feeling_answer"], | |
value=dev_defaults.get("would4", ""), scale=1) | |
# Groupe 5 (optionnel) | |
gr.Markdown(f"**{t['group']} 5** ✨ *{t['optional']}*") | |
with gr.Row(): | |
if5 = gr.Textbox(label=t["if_i_was"], placeholder=t["placeholders"]["element"], | |
value=dev_defaults.get("if5", ""), scale=1) | |
would5 = gr.Textbox(label=t["i_would_be"], placeholder=t["placeholders"]["element_answer"], | |
value=dev_defaults.get("would5", ""), scale=1) | |
generate_btn = gr.Button(t["generate_btn"], variant="primary", size="lg") | |
gr.Markdown(f"### {t['avatar_title']}") | |
output_image = gr.Image(label=t["your_avatar"], height=400) | |
output_text = gr.Textbox(label=t["information"], lines=4, interactive=False) | |
# Hidden state for language | |
lang_state = gr.State(value=language) | |
generate_btn.click( | |
fn=generate_avatar, | |
inputs=[if1, would1, if2, would2, if3, would3, if4, would4, if5, would5, lang_state, quality_radio], | |
outputs=[output_image, output_text] | |
) | |
return form_interface | |
def create_chat_interface(language="en"): | |
translations = get_translations() | |
t = translations.get(language, translations["en"]) | |
with gr.Column() as chat_interface: | |
gr.Markdown(f"### {t['chat_title']}") | |
gr.Markdown(t["chat_subtitle"]) | |
chatbot = gr.Chatbot(height=400, show_copy_button=True) | |
# Zone de message avec bouton d'envoi | |
with gr.Row(): | |
msg = gr.Textbox(label="Message", placeholder="Type your response here...", visible=False, scale=4) | |
send_btn = gr.Button("📤", visible=False, scale=1, min_width=50) | |
# Boutons de contrôle - en dessous du chat | |
with gr.Row(): | |
start_btn = gr.Button("🚀 Start New Conversation", variant="primary", scale=1) | |
avatar_btn = gr.Button("🎨 Get My Avatar", variant="secondary", scale=1) | |
quality_chat = gr.Radio(choices=["normal", "high"], value="normal", label="Quality", scale=1) | |
# Résultats de génération d'avatar | |
avatar_output = gr.Image(label="Generated Avatar", visible=False) | |
avatar_info = gr.Textbox(label="Avatar Info", lines=4, interactive=False, visible=False) | |
# Hidden state for language | |
lang_state = gr.State(value=language) | |
def respond(message: str, history: list, language: str = "en"): | |
""" | |
Process user message and generate streaming AI response for chat interface. | |
Args: | |
message: User's input message | |
history: List of previous conversation turns [[user_msg, bot_msg], ...] | |
language: Interface language ("en" or "fr") | |
Yields: | |
tuple: ("", updated_history) for streaming chat interface | |
""" | |
# Convert history format if needed | |
if history is None: | |
history = [] | |
# Process streaming response - yield (empty_text, updated_history) | |
last_response = None | |
for response in gemma_chat_stream(message, history, language): | |
last_response = response | |
yield "", response | |
# Ensure we have a final state | |
if last_response is not None: | |
yield "", last_response | |
def start_conversation(language): | |
"""Démarre la conversation avec le prompt système comme premier message utilisateur""" | |
# Le prompt système devient le premier message utilisateur | |
system_prompt = """You are running a simple "Chinese Portrait" game. Your ONLY job is to ask questions. | |
STRICT RULES - NEVER BREAK THESE: | |
1. Ask ONLY: "If you were a [category], what would you be?" | |
2. After user answers, ask the NEXT question immediately | |
3. NO comments, NO reactions, NO explanations | |
4. Use random categories: animal, color, object, emotion, weather, plant, tool, fabric, planet, smell, sound, etc. | |
5. NEVER repeat a category | |
EXAMPLE PATTERN: | |
User: "ready" | |
You: "If you were an animal, what would you be?" | |
User: "wolf" | |
You: "If you were a color, what would you be?" | |
User: "purple" | |
You: "If you were a planet, what would you be?" | |
FORBIDDEN: | |
- Don't say "interesting", "nice", "cool" | |
- Don't explain anything | |
- Don't comment on answers | |
- Don't ask why or how | |
- Don't make conversations | |
JUST ASK THE NEXT QUESTION. Start the game now.""" | |
# Utiliser la fonction respond pour générer la première question | |
history = [] | |
responses = list(gemma_chat_stream(system_prompt, history, language)) | |
if responses: | |
# Prendre la dernière réponse générée | |
final_history = responses[-1] | |
return final_history, gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) | |
else: | |
# Fallback en cas d'erreur | |
fallback_message = [[system_prompt, "If you were an animal, what would you be?"]] | |
return fallback_message, gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) | |
def show_avatar_interface(): | |
"""Affiche immédiatement l'interface avatar pour montrer que ça calcule""" | |
return gr.update(visible=True), gr.update(visible=True, value="Generating your avatar...") | |
def generate_avatar_from_conversation(history, language, quality): | |
if not history: | |
return None, "No conversation found. Please start a conversation first." | |
image, info = generate_avatar_from_chat(history, language, quality) | |
return image, info | |
# Événements | |
start_btn.click( | |
fn=start_conversation, | |
inputs=[lang_state], | |
outputs=[chatbot, msg, send_btn, avatar_output, avatar_info] | |
) | |
# Envoi via Enter ou bouton | |
msg.submit( | |
respond, | |
[msg, chatbot, lang_state], | |
[msg, chatbot], | |
queue=True | |
) | |
send_btn.click( | |
respond, | |
[msg, chatbot, lang_state], | |
[msg, chatbot], | |
queue=True | |
) | |
# Affichage immédiat de l'interface puis génération | |
avatar_btn.click( | |
show_avatar_interface, | |
outputs=[avatar_output, avatar_info] | |
).then( | |
generate_avatar_from_conversation, | |
inputs=[chatbot, lang_state, quality_chat], | |
outputs=[avatar_output, avatar_info] | |
) | |
gr.Markdown("*Click 'Start New Conversation' to begin, then 'Get My Avatar' when you've completed your portrait!*") | |
return chat_interface | |
def detect_browser_language(): | |
"""Détecte la langue du navigateur via JavaScript injecté""" | |
# Par défaut anglais, sera override par le JavaScript | |
return "en" | |
def create_interface(language="en"): | |
translations = get_translations() | |
t = translations.get(language, translations["en"]) | |
with gr.Blocks(title=t["title"], theme="gstaff/xkcd") as demo: | |
gr.Markdown(f"# {t['title']}") | |
gr.Markdown(t["subtitle"]) | |
with gr.Tabs(): | |
with gr.Tab(t["tab_form"]): | |
create_form_interface(language) | |
with gr.Tab(t["tab_chat"]): | |
create_chat_interface(language) | |
gr.Markdown("---") | |
gr.Markdown(f"*{t['footer']}*") | |
return demo | |
# Create the main web interface with MCP tools integrated | |
with gr.Blocks(title="🎭 Avatar Generator") as demo: | |
gr.Markdown("# 🎭 Avatar Generator - Chinese Portrait") | |
gr.Markdown("Generate personalized avatars from Chinese portrait descriptions using FLUX.1-schnell and Gemma-3n-E2B-it") | |
with gr.Tabs(): | |
# Main application tabs | |
with gr.Tab("📝 Form Mode"): | |
create_form_interface("en") | |
with gr.Tab("💬 Chat Mode"): | |
create_chat_interface("en") | |
gr.Markdown("---") | |
gr.Markdown("🔌 **MCP Integration**: This app exposes tools via MCP protocol at `/gradio_api/mcp/sse`") | |
gr.Markdown("*Avatar generated with FLUX.1-schnell*") | |
if __name__ == "__main__": | |
if DEV_MODE: | |
print("🚀 Running in DEV MODE with pre-filled values") | |
print("🔌 Starting server with MCP support...") | |
print("📡 MCP endpoint available at: http://localhost:7860/gradio_api/mcp/sse") | |
print("🌐 Web interface available at: http://localhost:7860") | |
demo.launch(mcp_server=True, show_api=True) |