transformer_models

Sleeping

App Files Files Community

TakiTakiTa commited on Feb 12

Commit

01ccf7c

verified ·

1 Parent(s): 86a6ee8

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -41

app.py CHANGED Viewed

@@ -2,41 +2,43 @@ import gradio as gr
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Global dictionary to store loaded models, keyed by model name.
-loaded_models = {}
-# Global variable to track the currently loaded model's name.
-current_model_name = ""
 @spaces.GPU
 def load_model(model_name: str):
-    global loaded_models, current_model_name
     try:
-        model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.bfloat16,
-            device_map="auto"
-        )
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        loaded_models[model_name] = (model, tokenizer)
-        current_model_name = model_name  # update global state
-        return f"Model '{model_name}' loaded successfully."
     except Exception as e:
-        return f"Failed to load model '{model_name}': {str(e)}"
 @spaces.GPU
-def generate(prompt, history):
-    global loaded_models, current_model_name
-    print("loaded models: ", loaded_models)
-    print("current model: ", current_model_name)
-    if current_model_name == "" or current_model_name not in loaded_models:
-        return "Please load a model first by entering a model name and clicking the Load Model button."
-    model, tokenizer = loaded_models[current_model_name]
-    # Prepare the messages (with a system prompt and the user's prompt)
     messages = [
-        {"role": "system", "content": "Je bent een vriendelijke, behulpzame assistent."},
         {"role": "user", "content": prompt}
     ]
     text = tokenizer.apply_chat_template(
@@ -44,38 +46,47 @@ def generate(prompt, history):
         tokenize=False,
         add_generation_prompt=True
     )
-    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     generated_ids = model.generate(
-        **model_inputs,
         max_new_tokens=512
     )
-    # Remove the input tokens from the generated tokens.
     generated_ids = [
-        output_ids[len(input_ids):]
-        for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
     ]
     response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return response
-# Build the Gradio UI using Blocks.
 with gr.Blocks() as demo:
     gr.Markdown("## Model Loader")
     with gr.Row():
-        model_name_input = gr.Textbox(
             label="Model Name",
             value="agentica-org/DeepScaleR-1.5B-Preview",
             placeholder="Enter model name (e.g., agentica-org/DeepScaleR-1.5B-Preview)"
         )
         load_button = gr.Button("Load Model")
-    load_status = gr.Textbox(label="Status", interactive=False)
-    # When the Load Model button is clicked, load_model is called.
-    load_button.click(fn=load_model, inputs=model_name_input, outputs=load_status)
     gr.Markdown("## Chat Interface")
-    # Create the chat interface without extra_inputs.
-    chat_interface = gr.ChatInterface(fn=generate)
 demo.launch(share=True)

 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+from functools import lru_cache
+# Cache the loaded model and tokenizer based on the model name.
+@lru_cache(maxsize=1)
+def get_model(model_name: str):
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.bfloat16,
+        device_map="auto"
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    print("Cached model loaded for:", model_name)
+    return model, tokenizer
 @spaces.GPU
 def load_model(model_name: str):
     try:
+        # Call the caching function. (This will load the model if not already cached.)
+        model, tokenizer = get_model(model_name)
+        # Print to verify caching (will show up in the logs).
+        print("Loaded model:", model_name)
+        return f"Model '{model_name}' loaded successfully.", model_name
     except Exception as e:
+        return f"Failed to load model '{model_name}': {str(e)}", ""
 @spaces.GPU
+def generate_response(prompt, chat_history, current_model_name):
+    if current_model_name == "":
+        return "Please load a model first by entering a model name and clicking the Load Model button.", current_model_name, chat_history
+    try:
+        model, tokenizer = get_model(current_model_name)
+    except Exception as e:
+        return f"Error loading model: {str(e)}", current_model_name, chat_history
+    # Prepare conversation messages.
     messages = [
+        {"role": "system", "content": "You are a friendly, helpful assistant."},
         {"role": "user", "content": prompt}
     ]
     text = tokenizer.apply_chat_template(
         tokenize=False,
         add_generation_prompt=True
     )
+    inputs = tokenizer([text], return_tensors="pt").to(model.device)
     generated_ids = model.generate(
+        **inputs,
         max_new_tokens=512
     )
+    # Strip out the prompt tokens.
     generated_ids = [
+        output_ids[len(input_ids):]
+        for input_ids, output_ids in zip(inputs.input_ids, generated_ids)
     ]
     response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    chat_history.append([prompt, response])
+    return "", current_model_name, chat_history
 with gr.Blocks() as demo:
     gr.Markdown("## Model Loader")
     with gr.Row():
+        model_input = gr.Textbox(
             label="Model Name",
             value="agentica-org/DeepScaleR-1.5B-Preview",
             placeholder="Enter model name (e.g., agentica-org/DeepScaleR-1.5B-Preview)"
         )
         load_button = gr.Button("Load Model")
+    status_output = gr.Textbox(label="Status", interactive=False)
+    # Hidden state for the model name.
+    model_state = gr.State("")
+    # When the load button is clicked, update status and state.
+    load_button.click(fn=load_model, inputs=model_input, outputs=[status_output, model_state])
     gr.Markdown("## Chat Interface")
+    chatbot = gr.Chatbot()
+    prompt_box = gr.Textbox(placeholder="Enter your prompt here...")
+    def chat_submit(prompt, history, current_model_name):
+        output, updated_state, history = generate_response(prompt, history, current_model_name)
+        return "", updated_state, history
+    # When a prompt is submitted, clear the prompt textbox and update chat history and model state.
+    prompt_box.submit(fn=chat_submit, inputs=[prompt_box, chatbot, model_state],
+                      outputs=[prompt_box, model_state, chatbot])
 demo.launch(share=True)