Commit
·
0a38286
0
Parent(s):
qwen: Initial.
Browse files- .gitattributes +35 -0
- Dockerfile +20 -0
- README.md +12 -0
- app.py +173 -0
- src/__init__.py +0 -0
- src/assets/__init__.py +0 -0
- src/assets/css/__init__.py +0 -0
- src/assets/css/reasoning.py +47 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
# Use a specific container image for the application
|
7 |
+
FROM hadadrjt/ai:latest
|
8 |
+
|
9 |
+
# Set the main working directory inside the container
|
10 |
+
WORKDIR /usr/src/app
|
11 |
+
|
12 |
+
# Copy all files and directories from the build context on the
|
13 |
+
# Host machine into the working directory in the container
|
14 |
+
COPY . .
|
15 |
+
|
16 |
+
# Open the port so the application can be accessed
|
17 |
+
EXPOSE 7860
|
18 |
+
|
19 |
+
# Define the default command to start the application
|
20 |
+
CMD ["python", "app.py"]
|
README.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: UltimaX Intelligence | Qwen
|
3 |
+
short_description: Qwen3-Coder-480B-A35B-Instruct
|
4 |
+
emoji: ⚡
|
5 |
+
colorFrom: green
|
6 |
+
colorTo: indigo
|
7 |
+
sdk: docker
|
8 |
+
app_port: 7860
|
9 |
+
pinned: false
|
10 |
+
models:
|
11 |
+
- Qwen/Qwen3-Coder-480B-A35B-Instruct
|
12 |
+
---
|
app.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
import os # Used for accessing environment variables such as API credentials
|
7 |
+
import json # Used for encoding and decoding JSON data
|
8 |
+
import httpx # Used to make asynchronous HTTP requests with support for HTTP/2
|
9 |
+
import gradio as gr # Used to create the user interface for the chatbot
|
10 |
+
import random # Used to randomly shuffle available server hosts
|
11 |
+
from datetime import datetime, timedelta # Used to manage time for session expiry
|
12 |
+
from src.assets.css.reasoning import styles # Function to apply CSS styling to reasoning output
|
13 |
+
|
14 |
+
# Load authentication information stored as a JSON string from an environment variable
|
15 |
+
auth = json.loads(os.getenv("auth"))
|
16 |
+
|
17 |
+
# This dictionary is used to track which server sessions are currently marked as busy
|
18 |
+
# The key is the session name and the value is the expiration time of the busy state
|
19 |
+
busy = {}
|
20 |
+
|
21 |
+
def server():
|
22 |
+
"""
|
23 |
+
Clean up any expired server sessions from the 'busy' tracking dictionary.
|
24 |
+
|
25 |
+
This function checks if any server marked as busy has passed its timeout period.
|
26 |
+
If so, it removes them from the busy list, making them available again for use.
|
27 |
+
"""
|
28 |
+
now = datetime.now()
|
29 |
+
for session, expiry in list(busy.items()):
|
30 |
+
if expiry <= now:
|
31 |
+
del busy[session]
|
32 |
+
|
33 |
+
def setup(user_message, history):
|
34 |
+
"""
|
35 |
+
Append the current user message to the conversation history.
|
36 |
+
|
37 |
+
Parameters:
|
38 |
+
- user_message: The new message entered by the user.
|
39 |
+
- history: A list of dictionaries containing previous conversation turns.
|
40 |
+
|
41 |
+
Returns:
|
42 |
+
- A new message list with the latest user message added.
|
43 |
+
"""
|
44 |
+
messages = history.copy()
|
45 |
+
messages.append({"role": "user", "content": user_message})
|
46 |
+
return messages
|
47 |
+
|
48 |
+
def connection(host, messages):
|
49 |
+
"""
|
50 |
+
Send the conversation to a specific server and receive a streamed response.
|
51 |
+
|
52 |
+
This function prepares the headers and request payload, then opens a stream
|
53 |
+
to the target host using the HTTP/2 protocol. It handles incremental data
|
54 |
+
and separates reasoning content from main response content.
|
55 |
+
|
56 |
+
Parameters:
|
57 |
+
- host: A dictionary containing host settings such as token, model, and endpoint.
|
58 |
+
- messages: A list of conversation history messages in proper format.
|
59 |
+
|
60 |
+
Yields:
|
61 |
+
- Incremental reasoning or content responses as they are received from the stream.
|
62 |
+
"""
|
63 |
+
headers = {
|
64 |
+
"Authorization": f"Bearer {host['token']}", # Use bearer token for authentication
|
65 |
+
"Content-Type": "application/json" # Set the request body to JSON format
|
66 |
+
}
|
67 |
+
payload = {
|
68 |
+
"model": host["model"], # Specify the AI model to use
|
69 |
+
"messages": messages, # Provide the chat history including the user input
|
70 |
+
"stream": True, # Enable streaming mode to receive response in real time
|
71 |
+
"temperature": 0.7, # Control randomness of output, higher values = more random
|
72 |
+
"top_p": 0.8, # Control diversity via nucleus sampling
|
73 |
+
"top_k": 20, # Consider the top-k most likely tokens at each step
|
74 |
+
"repetition_penalty": 1.05 # Penalize repeated tokens to reduce redundancy
|
75 |
+
}
|
76 |
+
|
77 |
+
reasoning = "" # This variable stores any intermediate explanation or reasoning
|
78 |
+
content = "" # This variable accumulates the final response content
|
79 |
+
|
80 |
+
# Use a streaming HTTP client with support for HTTP/2
|
81 |
+
with httpx.Client(http2=True) as client:
|
82 |
+
with client.stream("POST", host["endpoint"], headers=headers, json=payload) as resp:
|
83 |
+
resp.raise_for_status() # Raise an exception if the response indicates an error
|
84 |
+
# Process each line of the streaming response
|
85 |
+
for line in resp.iter_lines():
|
86 |
+
if not line:
|
87 |
+
continue # Skip empty lines
|
88 |
+
# Remove the "data: " prefix if it exists
|
89 |
+
raw = line[6:] if line.startswith("data: ") else line
|
90 |
+
try:
|
91 |
+
data = json.loads(raw) # Parse the line as JSON
|
92 |
+
# Extract incremental reasoning if available
|
93 |
+
delta = data["choices"][0]["delta"]
|
94 |
+
reasoning_response = delta.get("reasoning")
|
95 |
+
if reasoning_response:
|
96 |
+
reasoning += reasoning_response
|
97 |
+
# Yield current accumulated reasoning
|
98 |
+
yield styles(reasoning=reasoning, expanded=True)
|
99 |
+
content_response = delta.get("content")
|
100 |
+
# Extract incremental content response if available
|
101 |
+
if content_response:
|
102 |
+
content += content_response
|
103 |
+
# If reasoning exists, yield both reasoning and content
|
104 |
+
if reasoning:
|
105 |
+
yield styles(reasoning=reasoning, expanded=False) + content
|
106 |
+
else:
|
107 |
+
yield content
|
108 |
+
# Stop streaming if the finish condition is met
|
109 |
+
if data["choices"][0].get("finish_reason") == "stop":
|
110 |
+
return
|
111 |
+
except json.JSONDecodeError:
|
112 |
+
continue # Skip malformed JSON lines
|
113 |
+
|
114 |
+
def request(user_message, history):
|
115 |
+
"""
|
116 |
+
Main request handler that distributes the user's question to an available server.
|
117 |
+
|
118 |
+
This function validates the input, prepares the message history, rotates through
|
119 |
+
available hosts, and forwards the message to one that is not currently busy.
|
120 |
+
If a server fails due to a known error, it is temporarily marked as unavailable.
|
121 |
+
|
122 |
+
Parameters:
|
123 |
+
- user_message: The latest message input by the user.
|
124 |
+
- history: The chat history containing all prior messages.
|
125 |
+
|
126 |
+
Yields:
|
127 |
+
- Either the generated reply or a busy message if all hosts are unavailable.
|
128 |
+
"""
|
129 |
+
# Ignore empty or whitespace-only input
|
130 |
+
if not user_message or not user_message.strip():
|
131 |
+
yield []
|
132 |
+
return
|
133 |
+
# Clean up expired server sessions before handling the new request
|
134 |
+
server()
|
135 |
+
# Append the current message to the conversation
|
136 |
+
messages = setup(user_message, history)
|
137 |
+
# Identify servers that are not currently marked as busy
|
138 |
+
available = [h for h in auth if h["jarvis"] not in busy]
|
139 |
+
# Shuffle the available list to randomly balance load across servers
|
140 |
+
random.shuffle(available)
|
141 |
+
# Try each available host one by one until one succeeds
|
142 |
+
for host in available:
|
143 |
+
try:
|
144 |
+
# Attempt to connect and stream a response
|
145 |
+
yield from connection(host, messages)
|
146 |
+
return # Stop after successful response
|
147 |
+
except httpx.HTTPStatusError as e:
|
148 |
+
# If the failure matches the expected error code, mark the host as busy
|
149 |
+
if e.response.status_code == host.get("error"):
|
150 |
+
busy[host["jarvis"]] = datetime.now() + timedelta(hours=1)
|
151 |
+
except Exception:
|
152 |
+
continue # Ignore all other errors and try the next server
|
153 |
+
# If all hosts fail or are busy, notify the user
|
154 |
+
yield "The server is currently busy. Please wait a moment or try again later."
|
155 |
+
|
156 |
+
# Create a Gradio application
|
157 |
+
gr.ChatInterface(
|
158 |
+
fn=request, # The function that handles user messages
|
159 |
+
type="messages", # OpenAI style
|
160 |
+
chatbot=gr.Chatbot(
|
161 |
+
type="messages", # OpenAI style (duplicate to silence warning log)
|
162 |
+
show_copy_button=True, # Enable a button for users to copy responses
|
163 |
+
scale=1 # Use standard display scaling
|
164 |
+
),
|
165 |
+
examples=[
|
166 |
+
["Please introduce yourself."],
|
167 |
+
["Give me a short introduction to large language model."],
|
168 |
+
["Please generate a highly complex code snippet on any topic."],
|
169 |
+
["Explain about quantum computers."]
|
170 |
+
], # Provide sample inputs for users to try
|
171 |
+
cache_examples=False, # Disable caching to ensure responses are always fresh
|
172 |
+
concurrency_limit=2 # Queue limit
|
173 |
+
).launch() # Start the app and open the interface
|
src/__init__.py
ADDED
File without changes
|
src/assets/__init__.py
ADDED
File without changes
|
src/assets/css/__init__.py
ADDED
File without changes
|
src/assets/css/reasoning.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <[email protected]>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
def styles(reasoning: str, expanded: bool = False) -> str:
|
7 |
+
"""
|
8 |
+
Generates a styled HTML collapsible section using
|
9 |
+
<details> and <summary> tags.
|
10 |
+
|
11 |
+
Parameters:
|
12 |
+
* reasoning (str): The explanatory or descriptive text to
|
13 |
+
display inside the section.
|
14 |
+
|
15 |
+
* expanded (bool): If True, the section is expanded by default.
|
16 |
+
Otherwise, it remains collapsed.
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
str: A string containing HTML and inline CSS to render
|
20 |
+
the styled collapsible block.
|
21 |
+
"""
|
22 |
+
open_attr = "open" if expanded else ""
|
23 |
+
emoji = "🧠" # 🧠
|
24 |
+
return f"""
|
25 |
+
<details {open_attr} style="
|
26 |
+
/* Sans-serif font stack for clean modern look */
|
27 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
28 |
+
">
|
29 |
+
<summary style="
|
30 |
+
font-weight: 700; /* Bold summary title */
|
31 |
+
font-size: 14px !important; /* Fixed font size */
|
32 |
+
cursor: pointer; /* Pointer on hover */
|
33 |
+
user-select: none; /* Prevent text selection */
|
34 |
+
">
|
35 |
+
{emoji} Reasoning
|
36 |
+
</summary>
|
37 |
+
<div style="
|
38 |
+
margin-top: 6px; /* Space above content */
|
39 |
+
padding-top: 6px; /* Inner top padding */
|
40 |
+
font-size: 11px !important; /* Smaller text */
|
41 |
+
line-height: 1.7; /* Improve readability */
|
42 |
+
letter-spacing: 0.02em; /* Slight spacing */
|
43 |
+
">
|
44 |
+
{reasoning}
|
45 |
+
</div>
|
46 |
+
</details>
|
47 |
+
"""
|