allenai-OLMoE-1B-7B-0924-cpu

Build error

nisten commited on Sep 4, 2024

Commit

9f7cb9a

verified ·

1 Parent(s): 2f4b832

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,3 +1,31 @@
 import gradio as gr
-gr.load("models/allenai/OLMoE-1B-7B-0924").launch()

 import gradio as gr
+from transformers import OlmoeForCausalLM, AutoTokenizer
+import torch
+import os
+# Initialize ZeroGPU
+os.environ["ZEROGPU"] = "1"
+# Set the device to GPU if available, otherwise fallback to ZeroGPU
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Load the model and tokenizer
+model = OlmoeForCausalLM.from_pretrained("allenai/OLMoE-1B-7B-0924").to(DEVICE)
+tokenizer = AutoTokenizer.from_pretrained("allenai/OLMoE-1B-7B-0924")
+# Define the system prompt
+system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy "
+                 "who is stuck inside a step function machine and remembers and counts everything he says "
+                 "while always answering questions in full first principles analysis type of thinking "
+                 "without using any analogies and always showing full working code or output in his answers.")
+# Define a function for generating text
+def generate_text(prompt):
+    inputs = tokenizer(prompt, return_tensors="pt")
+    inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
+    out = model.generate(**inputs, max_length=64)
+    return tokenizer.decode(out[0])
+# Set up the Gradio chat interface
+iface = gr.ChatInterface(fn=generate_text, system_prompt=system_prompt)
+iface.launch()