prakhardoneria commited on
Commit
e18e0ef
·
verified ·
1 Parent(s): 34e03e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -55
app.py CHANGED
@@ -1,57 +1,53 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
- import json
4
-
5
- MODEL_NAME = "tiiuae/falcon-rw-1b" # lightweight and fast
6
-
7
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
9
-
10
- def generate_code(prompt, history):
11
- history = history or []
12
- system_instruction = (
13
- "You are a coding assistant. "
14
- "Respond only in valid JSON format like this:\n"
15
- "{\n"
16
- " \"filename\": \"index.html\",\n"
17
- " \"html\": \"...HTML code...\",\n"
18
- " \"css\": \"...CSS code...\",\n"
19
- " \"js\": \"...JavaScript code...\"\n"
20
- "}\n"
21
  )
22
- full_prompt = system_instruction + "\nPrompt: " + prompt
23
- inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512)
24
- outputs = model.generate(**inputs, max_new_tokens=512)
25
- decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
26
-
27
- try:
28
- parsed = json.loads(decoded)
29
- html = parsed.get("html", "")
30
- css = parsed.get("css", "")
31
- js = parsed.get("js", "")
32
- except Exception:
33
- html, css, js = "", "", ""
34
-
35
- history.append((prompt, decoded))
36
- return html, css, js, history
37
-
38
- def clear_history():
39
- return "", "", "", []
40
-
41
- with gr.Blocks() as demo:
42
- chat_history = gr.State([])
43
-
44
- with gr.Row():
45
- inp = gr.Textbox(label="Prompt", lines=2)
46
- send_btn = gr.Button("Generate")
47
- clear_btn = gr.Button("New Chat")
48
-
49
- with gr.Row():
50
- html_out = gr.Code(label="HTML", language="html")
51
- css_out = gr.Code(label="CSS", language="css")
52
- js_out = gr.Code(label="JavaScript", language="javascript")
53
-
54
- send_btn.click(generate_code, [inp, chat_history], [html_out, css_out, js_out, chat_history])
55
- clear_btn.click(clear_history, outputs=[html_out, css_out, js_out])
56
-
57
- demo.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+ model_name = "sarvamai/sarvam-m"
6
+
7
+ # Load tokenizer and model
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_name, torch_dtype="auto", device_map="auto"
11
+ )
12
+
13
+ def generate_response(prompt):
14
+ messages = [{"role": "user", "content": prompt}]
15
+ text = tokenizer.apply_chat_template(
16
+ messages,
17
+ tokenize=False,
18
+ enable_thinking=True,
 
 
19
  )
20
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
21
+
22
+ # Generate output with temperature=0.2
23
+ generated_ids = model.generate(
24
+ **model_inputs,
25
+ max_new_tokens=8192,
26
+ temperature=0.2
27
+ )
28
+
29
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
30
+ output_text = tokenizer.decode(output_ids)
31
+
32
+ if "</think>" in output_text:
33
+ reasoning_content = output_text.split("</think>")[0].rstrip("\n")
34
+ content = output_text.split("</think>")[-1].lstrip("\n").rstrip("</s>")
35
+ else:
36
+ reasoning_content = ""
37
+ content = output_text.rstrip("</s>")
38
+
39
+ return reasoning_content, content
40
+
41
+ # Gradio UI
42
+ iface = gr.Interface(
43
+ fn=generate_response,
44
+ inputs=gr.Textbox(lines=5, label="Enter your prompt"),
45
+ outputs=[
46
+ gr.Textbox(label="Reasoning"),
47
+ gr.Textbox(label="Response")
48
+ ],
49
+ title="Sarvam-M Chat Interface",
50
+ description="Enter a prompt and receive both the internal reasoning and the final answer from the Sarvam-M model."
51
+ )
52
+
53
+ iface.launch()