Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,23 +1,24 @@
|
|
1 |
import os, time, tarfile, shutil, threading, subprocess
|
2 |
from urllib.request import urlopen
|
|
|
3 |
import gradio as gr
|
4 |
import ollama
|
5 |
|
6 |
-
#
|
7 |
-
# Config
|
8 |
-
#
|
9 |
MODEL_ID = os.environ.get(
|
10 |
"MODEL_ID",
|
11 |
-
"hf.co/Nadhari/gemma-3n-swahili-E2B-it-gguf:Q8_0",
|
12 |
)
|
13 |
os.environ.setdefault("OLLAMA_HOST", "http://127.0.0.1:11434")
|
14 |
|
15 |
-
|
16 |
|
17 |
-
#
|
18 |
-
# Ollama bootstrap
|
19 |
-
#
|
20 |
-
def
|
21 |
found = shutil.which("ollama")
|
22 |
if found:
|
23 |
return found
|
@@ -45,83 +46,92 @@ def ensure_ollama_bin() -> str:
|
|
45 |
os.chmod(bin_path, 0o755)
|
46 |
return bin_path
|
47 |
|
48 |
-
def
|
49 |
try:
|
50 |
ollama.list()
|
51 |
return True
|
52 |
except Exception:
|
53 |
return False
|
54 |
|
55 |
-
def
|
56 |
-
bin_path =
|
57 |
env = os.environ.copy()
|
58 |
env.setdefault("OLLAMA_MODELS", os.path.abspath("./.ollama_models"))
|
59 |
env.setdefault("OLLAMA_NUM_PARALLEL", "1")
|
60 |
env.setdefault("OLLAMA_MAX_LOADED_MODELS", "1")
|
61 |
os.makedirs(env["OLLAMA_MODELS"], exist_ok=True)
|
62 |
|
63 |
-
if not
|
64 |
subprocess.Popen([bin_path, "serve"], env=env,
|
65 |
stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
|
66 |
-
# wait for server
|
67 |
for _ in range(120):
|
68 |
-
if
|
69 |
break
|
70 |
time.sleep(1)
|
71 |
|
72 |
-
#
|
73 |
try:
|
74 |
ollama.show(model=MODEL_ID)
|
75 |
except Exception:
|
76 |
for _ in ollama.pull(model=MODEL_ID, stream=True):
|
77 |
pass # quiet pull
|
78 |
|
79 |
-
|
80 |
|
81 |
-
|
82 |
-
threading.Thread(target=boot_ollama, daemon=True).start()
|
83 |
|
84 |
-
#
|
85 |
-
# Chat
|
86 |
-
#
|
87 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
88 |
-
#
|
89 |
-
|
90 |
|
91 |
-
# history
|
92 |
-
|
93 |
-
if history and isinstance(history[0], (list, tuple)):
|
94 |
for u, a in history:
|
95 |
-
if u:
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
demo = gr.ChatInterface(
|
114 |
respond,
|
115 |
-
|
116 |
title="Gemma-3n-Swahili demo (E2B-it, GGUF)",
|
117 |
description="Text-only interface for the Gemma-3n Swahili model. Uliza maswali kwa Kiswahili au Kiingereza!",
|
118 |
additional_inputs=[
|
119 |
gr.Textbox(value="Wewe ni msaidizi unaejua na kuongea Kiswahili.", label="System message"),
|
120 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1,
|
121 |
-
gr.Slider(minimum=0.1, maximum=4.0,
|
122 |
-
gr.Slider(minimum=0.1, maximum=1.0,
|
123 |
],
|
124 |
)
|
125 |
|
126 |
if __name__ == "__main__":
|
127 |
-
|
|
|
|
1 |
import os, time, tarfile, shutil, threading, subprocess
|
2 |
from urllib.request import urlopen
|
3 |
+
|
4 |
import gradio as gr
|
5 |
import ollama
|
6 |
|
7 |
+
# ----------------------------------------
|
8 |
+
# Config
|
9 |
+
# ----------------------------------------
|
10 |
MODEL_ID = os.environ.get(
|
11 |
"MODEL_ID",
|
12 |
+
"hf.co/Nadhari/gemma-3n-swahili-E2B-it-gguf:Q8_0", # swap to :Q8_0 if you insist
|
13 |
)
|
14 |
os.environ.setdefault("OLLAMA_HOST", "http://127.0.0.1:11434")
|
15 |
|
16 |
+
_ready = threading.Event()
|
17 |
|
18 |
+
# ----------------------------------------
|
19 |
+
# Ollama bootstrap (user-space install + serve + pull)
|
20 |
+
# ----------------------------------------
|
21 |
+
def _ensure_ollama_bin() -> str:
|
22 |
found = shutil.which("ollama")
|
23 |
if found:
|
24 |
return found
|
|
|
46 |
os.chmod(bin_path, 0o755)
|
47 |
return bin_path
|
48 |
|
49 |
+
def _is_server_up() -> bool:
|
50 |
try:
|
51 |
ollama.list()
|
52 |
return True
|
53 |
except Exception:
|
54 |
return False
|
55 |
|
56 |
+
def _boot_ollama():
|
57 |
+
bin_path = _ensure_ollama_bin()
|
58 |
env = os.environ.copy()
|
59 |
env.setdefault("OLLAMA_MODELS", os.path.abspath("./.ollama_models"))
|
60 |
env.setdefault("OLLAMA_NUM_PARALLEL", "1")
|
61 |
env.setdefault("OLLAMA_MAX_LOADED_MODELS", "1")
|
62 |
os.makedirs(env["OLLAMA_MODELS"], exist_ok=True)
|
63 |
|
64 |
+
if not _is_server_up():
|
65 |
subprocess.Popen([bin_path, "serve"], env=env,
|
66 |
stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
|
|
|
67 |
for _ in range(120):
|
68 |
+
if _is_server_up():
|
69 |
break
|
70 |
time.sleep(1)
|
71 |
|
72 |
+
# Ensure model is present
|
73 |
try:
|
74 |
ollama.show(model=MODEL_ID)
|
75 |
except Exception:
|
76 |
for _ in ollama.pull(model=MODEL_ID, stream=True):
|
77 |
pass # quiet pull
|
78 |
|
79 |
+
_ready.set()
|
80 |
|
81 |
+
threading.Thread(target=_boot_ollama, daemon=True).start()
|
|
|
82 |
|
83 |
+
# ----------------------------------------
|
84 |
+
# Chat (same UX as your original sample)
|
85 |
+
# ----------------------------------------
|
86 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
87 |
+
# Wait silently until model is ready (first run = downloads)
|
88 |
+
_ready.wait()
|
89 |
|
90 |
+
# Normalize history to OpenAI-style messages
|
91 |
+
messages = [{"role": "system", "content": system_message}]
|
92 |
+
if history and isinstance(history[0], (list, tuple)): # tuples format
|
93 |
for u, a in history:
|
94 |
+
if u:
|
95 |
+
messages.append({"role": "user", "content": u})
|
96 |
+
if a:
|
97 |
+
messages.append({"role": "assistant", "content": a})
|
98 |
+
else: # already messages format
|
99 |
+
messages.extend(history or [])
|
100 |
+
messages.append({"role": "user", "content": message})
|
101 |
+
|
102 |
+
response = ""
|
103 |
+
for chunk in ollama.chat(
|
104 |
+
model=MODEL_ID,
|
105 |
+
messages=messages,
|
106 |
+
stream=True,
|
107 |
+
options={
|
108 |
+
"num_predict": int(max_tokens),
|
109 |
+
"temperature": float(temperature),
|
110 |
+
"top_p": float(top_p),
|
111 |
+
"num_ctx": 4096,
|
112 |
+
},
|
113 |
+
):
|
114 |
+
token = chunk.get("message", {}).get("content", "") or chunk.get("delta", "")
|
115 |
+
if token:
|
116 |
+
response += token
|
117 |
+
yield response
|
118 |
+
|
119 |
+
# ----------------------------------------
|
120 |
+
# UI (your original ChatInterface vibe)
|
121 |
+
# ----------------------------------------
|
122 |
demo = gr.ChatInterface(
|
123 |
respond,
|
124 |
+
type="messages", # fix deprecation + align formats
|
125 |
title="Gemma-3n-Swahili demo (E2B-it, GGUF)",
|
126 |
description="Text-only interface for the Gemma-3n Swahili model. Uliza maswali kwa Kiswahili au Kiingereza!",
|
127 |
additional_inputs=[
|
128 |
gr.Textbox(value="Wewe ni msaidizi unaejua na kuongea Kiswahili.", label="System message"),
|
129 |
+
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
130 |
+
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
131 |
+
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|
132 |
],
|
133 |
)
|
134 |
|
135 |
if __name__ == "__main__":
|
136 |
+
# Disable SSR on Spaces to avoid the h11 Content-Length crash
|
137 |
+
demo.queue().launch(ssr_mode=False)
|