AndreaAlessandrelli4 commited on
Commit
3a5ce72
·
verified ·
1 Parent(s): bd3fd9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -34
app.py CHANGED
@@ -11,27 +11,28 @@ MAX_MAX_NEW_TOKENS = 2048
11
  DEFAULT_MAX_NEW_TOKENS = 1024
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
 
14
- # Controllo della disponibilità della GPU
 
15
  if not torch.cuda.is_available():
16
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
17
 
18
- # Caricamento del modello e del tokenizer se la GPU è disponibile
19
  if torch.cuda.is_available():
20
  model_id = "AndreaAlessandrelli4/AvvoChat_AITA_v04"
21
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
22
  tokenizer = AutoTokenizer.from_pretrained(model_id)
23
  tokenizer.use_default_system_prompt = False
24
 
 
25
  @spaces.GPU
26
  def generate(
27
  message: str,
28
  chat_history: list[tuple[str, str]],
29
- system_prompt: str = "",
30
  max_new_tokens: int = 1024,
31
- temperature: float = 0.01,
32
  top_p: float = 0.9,
33
  top_k: int = 50,
34
- do_sample: bool = False,
35
  repetition_penalty: float = 1.2,
36
  ) -> Iterator[str]:
37
  conversation = []
@@ -55,7 +56,7 @@ def generate(
55
  {"input_ids": input_ids},
56
  streamer=streamer,
57
  max_new_tokens=max_new_tokens,
58
- do_sample=False,
59
  top_p=top_p,
60
  top_k=top_k,
61
  temperature=temperature,
@@ -70,12 +71,11 @@ def generate(
70
  outputs.append(text)
71
  yield "".join(outputs)
72
 
73
- image_path = "AvvoVhat.png"
74
-
75
 
76
  chat_interface = gr.ChatInterface(
77
- fn = generate,
78
- additional_inputs = [
 
79
  gr.Slider(
80
  label="Max new tokens",
81
  minimum=1,
@@ -104,10 +104,6 @@ chat_interface = gr.ChatInterface(
104
  step=1,
105
  value=50,
106
  ),
107
- gr.Checkbox(
108
- label="Do-sample (False)",
109
- value=False,
110
- ),
111
  gr.Slider(
112
  label="Repetition penalty",
113
  minimum=1.0,
@@ -116,31 +112,22 @@ chat_interface = gr.ChatInterface(
116
  value=1.2,
117
  ),
118
  ],
119
- stop_btn = None,
120
- examples = [
121
- ["Posso fare un barbecue sul balcone di casa?"],
122
- ["Posso essere multato se esco di casa senza documento d'identità?"],
123
- ["Una persona single può adottare un bambino?"],
124
- ["Posso usare un immagine creada con l'intelligenza artificiale?"],
125
- ["Se il mio pallone da calcio cade in un giardino di un'abitazione privata, poss scavalcare il concello per riprendermelo?"],
126
- ],
127
  )
128
 
129
-
130
-
131
-
132
-
133
-
134
-
135
- with gr.Blocks() as demo:
136
  gr.Markdown("# AvvoChat")
137
  gr.Markdown("Fai una domanda riguardante la legge italiana all'AvvoChat e ricevi una spiegazione semplice al tuo dubbio.")
138
- #gr.Image(image_path, width=50, height=200)
139
  chat_interface.render()
140
-
141
-
142
 
143
-
144
 
145
  if __name__ == "__main__":
146
- demo.queue(max_size=20).launch(share=True)
 
 
11
  DEFAULT_MAX_NEW_TOKENS = 1024
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
 
14
+
15
+
16
  if not torch.cuda.is_available():
17
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
18
 
19
+
20
  if torch.cuda.is_available():
21
  model_id = "AndreaAlessandrelli4/AvvoChat_AITA_v04"
22
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
23
  tokenizer = AutoTokenizer.from_pretrained(model_id)
24
  tokenizer.use_default_system_prompt = False
25
 
26
+
27
  @spaces.GPU
28
  def generate(
29
  message: str,
30
  chat_history: list[tuple[str, str]],
31
+ system_prompt: str,
32
  max_new_tokens: int = 1024,
33
+ temperature: float = 0.6,
34
  top_p: float = 0.9,
35
  top_k: int = 50,
 
36
  repetition_penalty: float = 1.2,
37
  ) -> Iterator[str]:
38
  conversation = []
 
56
  {"input_ids": input_ids},
57
  streamer=streamer,
58
  max_new_tokens=max_new_tokens,
59
+ do_sample=True,
60
  top_p=top_p,
61
  top_k=top_k,
62
  temperature=temperature,
 
71
  outputs.append(text)
72
  yield "".join(outputs)
73
 
 
 
74
 
75
  chat_interface = gr.ChatInterface(
76
+ fn=generate,
77
+ additional_inputs=[
78
+ gr.Textbox(label="System prompt", lines=6),
79
  gr.Slider(
80
  label="Max new tokens",
81
  minimum=1,
 
104
  step=1,
105
  value=50,
106
  ),
 
 
 
 
107
  gr.Slider(
108
  label="Repetition penalty",
109
  minimum=1.0,
 
112
  value=1.2,
113
  ),
114
  ],
115
+ stop_btn=None,
116
+ examples=[
117
+ ["Hello there! How are you doing?"],
118
+ ["Can you explain briefly to me what is the Python programming language?"],
119
+ ["Explain the plot of Cinderella in a sentence."],
120
+ ["How many hours does it take a man to eat a Helicopter?"],
121
+ ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
122
+ ],
123
  )
124
 
125
+ with gr.Blocks(css="style.css") as demo:
 
 
 
 
 
 
126
  gr.Markdown("# AvvoChat")
127
  gr.Markdown("Fai una domanda riguardante la legge italiana all'AvvoChat e ricevi una spiegazione semplice al tuo dubbio.")
 
128
  chat_interface.render()
 
 
129
 
 
130
 
131
  if __name__ == "__main__":
132
+ demo.queue(max_size=20).launch()
133
+