Sarvam_m_indian_chat

Running

App Files Files Community

prakhardoneria commited on 13 days ago

Commit

e18e0ef

verified ·

1 Parent(s): 34e03e1

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -55

app.py CHANGED Viewed

@@ -1,57 +1,53 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-import json
-MODEL_NAME = "tiiuae/falcon-rw-1b"  # lightweight and fast
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
-def generate_code(prompt, history):
-    history = history or []
-    system_instruction = (
-        "You are a coding assistant. "
-        "Respond only in valid JSON format like this:\n"
-        "{\n"
-        "  \"filename\": \"index.html\",\n"
-        "  \"html\": \"...HTML code...\",\n"
-        "  \"css\": \"...CSS code...\",\n"
-        "  \"js\": \"...JavaScript code...\"\n"
-        "}\n"
     )
-    full_prompt = system_instruction + "\nPrompt: " + prompt
-    inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512)
-    outputs = model.generate(**inputs, max_new_tokens=512)
-    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    try:
-        parsed = json.loads(decoded)
-        html = parsed.get("html", "")
-        css = parsed.get("css", "")
-        js = parsed.get("js", "")
-    except Exception:
-        html, css, js = "", "", ""
-    history.append((prompt, decoded))
-    return html, css, js, history
-def clear_history():
-    return "", "", "", []
-with gr.Blocks() as demo:
-    chat_history = gr.State([])
-    with gr.Row():
-        inp = gr.Textbox(label="Prompt", lines=2)
-        send_btn = gr.Button("Generate")
-        clear_btn = gr.Button("New Chat")
-    with gr.Row():
-        html_out = gr.Code(label="HTML", language="html")
-        css_out = gr.Code(label="CSS", language="css")
-        js_out = gr.Code(label="JavaScript", language="javascript")
-    send_btn.click(generate_code, [inp, chat_history], [html_out, css_out, js_out, chat_history])
-    clear_btn.click(clear_history, outputs=[html_out, css_out, js_out])
-demo.launch()

 import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = "sarvamai/sarvam-m"
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name, torch_dtype="auto", device_map="auto"
+)
+def generate_response(prompt):
+    messages = [{"role": "user", "content": prompt}]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        enable_thinking=True,
     )
+    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    # Generate output with temperature=0.2
+    generated_ids = model.generate(
+        **model_inputs,
+        max_new_tokens=8192,
+        temperature=0.2
+    )
+    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
+    output_text = tokenizer.decode(output_ids)
+    if "</think>" in output_text:
+        reasoning_content = output_text.split("</think>")[0].rstrip("\n")
+        content = output_text.split("</think>")[-1].lstrip("\n").rstrip("</s>")
+    else:
+        reasoning_content = ""
+        content = output_text.rstrip("</s>")
+    return reasoning_content, content
+# Gradio UI
+iface = gr.Interface(
+    fn=generate_response,
+    inputs=gr.Textbox(lines=5, label="Enter your prompt"),
+    outputs=[
+        gr.Textbox(label="Reasoning"),
+        gr.Textbox(label="Response")
+    ],
+    title="Sarvam-M Chat Interface",
+    description="Enter a prompt and receive both the internal reasoning and the final answer from the Sarvam-M model."
+)
+iface.launch()