merterbak commited on
Commit
6ce8b1e
·
verified ·
1 Parent(s): 2870fe9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -8
app.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  from threading import Thread
4
  import gradio as gr
5
  import spaces
 
6
 
7
  model_id = "openai/gpt-oss-20b"
8
 
@@ -32,20 +33,34 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
32
 
33
  streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
34
  generation_kwargs = {
35
- "streamer": streamer,
36
  "max_new_tokens": max_new_tokens,
37
  "do_sample": True,
38
  "temperature": temperature,
39
  "top_p": top_p,
40
  "top_k": top_k,
41
- "repetition_penalty": repetition_penalty
 
42
  }
43
  thread = Thread(target=pipe, args=(messages,), kwargs=generation_kwargs)
44
  thread.start()
45
- outputs = []
46
- for text_chunk in streamer:
47
- outputs.append(text_chunk)
48
- yield "".join(outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  demo = gr.ChatInterface(
51
  fn=generate_response,
@@ -63,7 +78,7 @@ demo = gr.ChatInterface(
63
  gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
64
  ],
65
  examples=[
66
- [{"text": "Explain Newton laws clearly and concisely."}],
67
  [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
68
  [{"text": "What are the benefits of open weight AI models"}],
69
  ],
@@ -71,7 +86,7 @@ demo = gr.ChatInterface(
71
  type="messages",
72
  description="""
73
  # gpt-oss-20b
74
- You can adjust reasoning level in the system prompt like "Reasoning: high".
75
  """,
76
  fill_height=True,
77
  textbox=gr.Textbox(
 
3
  from threading import Thread
4
  import gradio as gr
5
  import spaces
6
+ import re
7
 
8
  model_id = "openai/gpt-oss-20b"
9
 
 
33
 
34
  streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
35
  generation_kwargs = {
 
36
  "max_new_tokens": max_new_tokens,
37
  "do_sample": True,
38
  "temperature": temperature,
39
  "top_p": top_p,
40
  "top_k": top_k,
41
+ "repetition_penalty": repetition_penalty,
42
+ "streamer": streamer
43
  }
44
  thread = Thread(target=pipe, args=(messages,), kwargs=generation_kwargs)
45
  thread.start()
46
+ #streaming try #1
47
+ buffer = ""
48
+ full_response = ""
49
+ for chunk in streamer:
50
+ buffer += chunk
51
+ parts = re.split(r'(\s+)', buffer)
52
+ if re.match(r'\s+', parts[-1]) is not None:
53
+ to_append = ''.join(parts)
54
+ buffer = ""
55
+ else:
56
+ to_append = ''.join(parts[:-1])
57
+ buffer = parts[-1]
58
+ if to_append:
59
+ full_response += to_append
60
+ yield full_response
61
+ if buffer:
62
+ full_response += buffer
63
+ yield full_response
64
 
65
  demo = gr.ChatInterface(
66
  fn=generate_response,
 
78
  gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
79
  ],
80
  examples=[
81
+ [{"text": "Explain Newton laws clearly and concisely"}],
82
  [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
83
  [{"text": "What are the benefits of open weight AI models"}],
84
  ],
 
86
  type="messages",
87
  description="""
88
  # gpt-oss-20b
89
+ Wait couple of seconds initially. You can adjust reasoning level in the system prompt like "Reasoning: high.
90
  """,
91
  fill_height=True,
92
  textbox=gr.Textbox(