Spaces:

AndreaAlessandrelli4
/

AvvoChat_v02

Runtime error

App Files Files Community

AndreaAlessandrelli4 commited on Jul 19, 2024

Commit

3a5ce72

verified ·

1 Parent(s): bd3fd9f

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -34

app.py CHANGED Viewed

@@ -11,27 +11,28 @@ MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
-# Controllo della disponibilità della GPU
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
-# Caricamento del modello e del tokenizer se la GPU è disponibile
 if torch.cuda.is_available():
     model_id = "AndreaAlessandrelli4/AvvoChat_AITA_v04"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
 @spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
-    system_prompt: str = "",
     max_new_tokens: int = 1024,
-    temperature: float = 0.01,
     top_p: float = 0.9,
     top_k: int = 50,
-    do_sample: bool = False,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
@@ -55,7 +56,7 @@ def generate(
         {"input_ids": input_ids},
         streamer=streamer,
         max_new_tokens=max_new_tokens,
-        do_sample=False,
         top_p=top_p,
         top_k=top_k,
         temperature=temperature,
@@ -70,12 +71,11 @@ def generate(
         outputs.append(text)
         yield "".join(outputs)
-image_path = "AvvoVhat.png"
 chat_interface = gr.ChatInterface(
-    fn = generate,
-    additional_inputs = [
         gr.Slider(
             label="Max new tokens",
             minimum=1,
@@ -104,10 +104,6 @@ chat_interface = gr.ChatInterface(
             step=1,
             value=50,
         ),
-        gr.Checkbox(
-            label="Do-sample (False)",
-            value=False,
-        ),
         gr.Slider(
             label="Repetition penalty",
             minimum=1.0,
@@ -116,31 +112,22 @@ chat_interface = gr.ChatInterface(
             value=1.2,
         ),
     ],
-    stop_btn = None,
-    examples = [
-        ["Posso fare un barbecue sul balcone di casa?"],
-        ["Posso essere multato se esco di casa senza documento d'identità?"],
-        ["Una persona single può adottare un bambino?"],
-        ["Posso usare un immagine creada con l'intelligenza artificiale?"],
-        ["Se il mio pallone da calcio cade in un giardino di un'abitazione privata, poss scavalcare il concello per riprendermelo?"],
-    ],
 )
-with gr.Blocks() as demo:
     gr.Markdown("# AvvoChat")
     gr.Markdown("Fai una domanda riguardante la legge italiana all'AvvoChat e ricevi una spiegazione semplice al tuo dubbio.")
-    #gr.Image(image_path, width=50, height=200)
     chat_interface.render()
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch(share=True)

 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 if torch.cuda.is_available():
     model_id = "AndreaAlessandrelli4/AvvoChat_AITA_v04"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
 @spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
+    system_prompt: str,
     max_new_tokens: int = 1024,
+    temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
         {"input_ids": input_ids},
         streamer=streamer,
         max_new_tokens=max_new_tokens,
+        do_sample=True,
         top_p=top_p,
         top_k=top_k,
         temperature=temperature,
         outputs.append(text)
         yield "".join(outputs)
 chat_interface = gr.ChatInterface(
+    fn=generate,
+    additional_inputs=[
+        gr.Textbox(label="System prompt", lines=6),
         gr.Slider(
             label="Max new tokens",
             minimum=1,
             step=1,
             value=50,
         ),
         gr.Slider(
             label="Repetition penalty",
             minimum=1.0,
             value=1.2,
         ),
     ],
+    stop_btn=None,
+    examples=[
+        ["Hello there! How are you doing?"],
+        ["Can you explain briefly to me what is the Python programming language?"],
+        ["Explain the plot of Cinderella in a sentence."],
+        ["How many hours does it take a man to eat a Helicopter?"],
+        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
+    ],
 )
+with gr.Blocks(css="style.css") as demo:
     gr.Markdown("# AvvoChat")
     gr.Markdown("Fai una domanda riguardante la legge italiana all'AvvoChat e ricevi una spiegazione semplice al tuo dubbio.")
     chat_interface.render()
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch()