Spaces:

nicolasmarques
/

gpt2-app-nicolas

Runtime error

App Files Files Community

nicolasmarques commited on 24 days ago

Commit

7cc7b40

verified ·

1 Parent(s): fbd2fbc

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -67

app.py CHANGED Viewed

@@ -1,105 +1,80 @@
 import re
 from langdetect import detect, DetectorFactory
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 import gradio as gr
-# Define aleatoriedade fixa para a detecção de idioma
 DetectorFactory.seed = 0
-# Modelo T5 em português da Unicamp (compatível com Hugging Face)
-model_name = "unicamp-dl/ptt5-base-portuguese-vocab"
-# Força uso do tokenizer "slow", que evita erro de conversão
-tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-# Pipeline principal
-generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=-1)
-# Comandos reconhecidos
 COMMANDS = {
-    "resumo":   ["resuma", "resumo", "resumir"],
     "reescrever": ["reescreva", "reformule", "reformular"],
     "expandir": ["expanda", "expansão", "expandir", "detalhe"],
     "corrigir": ["corrija", "corrigir", "melhore", "revise"]
 }
-# Prompts humanizadores
-HUMANIZE_PROMPT = {
-    "pt": "Por favor, torne o texto a seguir mais natural e humano:\n\n",
-    "en": "Please make the following text more natural and human-like:\n\n",
-    "fr": "Veuillez rendre le texte suivant plus naturel et humain :\n\n"
 }
-def detect_language(text: str) -> str:
     try:
         lang = detect(text)
-        return lang if lang in HUMANIZE_PROMPT else "pt"
     except:
         return "pt"
-def find_command(text: str, lang: str) -> str:
-    low = text.lower()
-    for cmd, kws in COMMANDS.items():
-        for kw in kws:
-            if kw in low:
                 return cmd
     return "gerar"
-def clean_text(text: str) -> str:
-    txt = re.sub(r"\s+", " ", text)
-    for kws in COMMANDS.values():
-        for kw in kws:
-            txt = re.sub(rf"\b{kw}\b", "", txt, flags=re.IGNORECASE)
-    return txt.strip()
-def build_prompt(core: str, cmd: str, lang: str) -> str:
-    if cmd == "resumo":
-        head = {
-            "pt": "Resuma o texto a seguir de forma concisa:\n\n",
-            "en": "Summarize the following text concisely:\n\n",
-            "fr": "Résumez le texte suivant de manière concise :\n\n"
-        }
-    elif cmd == "reescrever":
-        head = {
-            "pt": "Reescreva este texto com mais clareza e estilo:\n\n",
-            "en": "Rewrite this text with more clarity and style:\n\n",
-            "fr": "Réécrivez ce texte avec plus de clarté et de style :\n\n"
-        }
-    elif cmd == "expandir":
-        head = {
-            "pt": "Expanda este texto, adicionando detalhes e explicações:\n\n",
-            "en": "Expand this text, adding details and explanations:\n\n",
-            "fr": "Développez ce texte en ajoutant des détails et des explications :\n\n"
-        }
-    elif cmd == "corrigir":
-        head = {
-            "pt": "Corrija gramática, ortografia e estilo deste texto:\n\n",
-            "en": "Correct the grammar, spelling, and style of this text:\n\n",
-            "fr": "Corrigez la grammaire, l'orthographe et le style de ce texte :\n\n"
-        }
-    else:
-        head = {"pt": "", "en": "", "fr": ""}
-    return head[lang] + core + "\n\n"
-def gerar_resposta(texto: str) -> str:
     lang = detect_language(texto)
-    cmd  = find_command(texto, lang)
     core = clean_text(texto)
-    prompt = build_prompt(core, cmd, lang)
     output = generator(prompt, max_new_tokens=256, temperature=0.7, top_p=0.9)[0]["generated_text"]
     resposta = output.replace(prompt, "").strip()
-    return resposta
-# Interface Gradio
 demo = gr.Interface(
     fn=gerar_resposta,
-    inputs=gr.Textbox(lines=6, placeholder="Digite um texto com: resuma, expanda, corrija...", label="Entrada"),
     outputs=gr.Textbox(label="Resposta da IA"),
-    title="🧠 IA Multilingue do Sr. Nicolas",
-    description="Detecta idioma e comando embutido. Usa modelo PTT5 treinado em português.",
     allow_flagging="never"
 )

 import re
 from langdetect import detect, DetectorFactory
+from transformers import pipeline
 import gradio as gr
+# Garante consistência nos resultados do langdetect
 DetectorFactory.seed = 0
+# Carrega modelo de texto em português
+generator = pipeline(
+    "text2text-generation",
+    model="unicamp-dl/ptt5-base-portuguese-vocab",
+    tokenizer="unicamp-dl/ptt5-base-portuguese-vocab"
+)
+# Mapeamento de comandos (em português)
 COMMANDS = {
+    "resumo": ["resuma", "resumo", "resumir"],
     "reescrever": ["reescreva", "reformule", "reformular"],
     "expandir": ["expanda", "expansão", "expandir", "detalhe"],
     "corrigir": ["corrija", "corrigir", "melhore", "revise"]
 }
+PROMPTS = {
+    "resumo": "Resuma o texto a seguir:\n\n",
+    "reescrever": "Reescreva com mais clareza e estilo:\n\n",
+    "expandir": "Expanda este texto, adicionando detalhes e explicações:\n\n",
+    "corrigir": "Corrija a gramática, ortografia e estilo do seguinte texto:\n\n",
+    "gerar": ""
 }
+def detect_language(text):
     try:
         lang = detect(text)
+        return lang if lang in ["pt", "en", "fr"] else "pt"
     except:
         return "pt"
+def find_command(text, lang):
+    text_lower = text.lower()
+    for cmd, keywords in COMMANDS.items():
+        for kw in keywords:
+            if kw in text_lower:
                 return cmd
     return "gerar"
+def clean_text(text):
+    text = re.sub(r"\s+", " ", text).strip()
+    for keywords in COMMANDS.values():
+        for kw in keywords:
+            text = re.sub(rf"\b{kw}\b", "", text, flags=re.IGNORECASE)
+    return text.strip(": ").strip()
+def build_prompt(core, cmd):
+    return PROMPTS.get(cmd, "") + core
+def gerar_resposta(texto):
     lang = detect_language(texto)
+    cmd = find_command(texto, lang)
     core = clean_text(texto)
+    prompt = build_prompt(core, cmd)
     output = generator(prompt, max_new_tokens=256, temperature=0.7, top_p=0.9)[0]["generated_text"]
     resposta = output.replace(prompt, "").strip()
+    if resposta.lower().startswith(core.lower()):
+        resposta = resposta[len(core):].strip()
+    return resposta or "⚠️ A IA não conseguiu processar esse texto. Tente reformular."
+# Interface
 demo = gr.Interface(
     fn=gerar_resposta,
+    inputs=gr.Textbox(lines=6, placeholder="Digite algo como: Corrija: O menino está brincano...", label="Entrada"),
     outputs=gr.Textbox(label="Resposta da IA"),
+    title="🧠 IA Multilingue Sr. Nicolas",
+    description="Detecta comando embutido e devolve resposta humanizada em português.",
     allow_flagging="never"
 )