Spaces:

umint
/

Kimi-K2-Instruct

Paused

App Files Files Community

hadadrjt commited on 27 days ago

Commit

b4f33b6

0 Parent(s):

moonshot: Initial.

Browse files

Files changed (8) hide show

.gitattributes +35 -0
Dockerfile +20 -0
README.md +12 -0
app.py +170 -0
src/__init__.py +0 -0
src/assets/__init__.py +0 -0
src/assets/css/__init__.py +0 -0
src/assets/css/reasoning.py +47 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+#
+# SPDX-FileCopyrightText: Hadad <[email protected]>
+# SPDX-License-Identifier: Apache-2.0
+#
+# Use a specific container image for the application
+FROM hadadrjt/ai:latest
+# Set the main working directory inside the container
+WORKDIR /usr/src/app
+# Copy all files and directories from the build context on the
+# Host machine into the working directory in the container
+COPY . .
+# Open the port so the application can be accessed
+EXPOSE 7860
+# Define the default command to start the application
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: UltimaX Intelligence | Moonshot AI
+short_description: Kimi-K2-Instruct
+emoji: ⚡
+colorFrom: green
+colorTo: indigo
+sdk: docker
+app_port: 7860
+pinned: false
+models:
+  - moonshotai/Kimi-K2-Instruct
+---

app.py ADDED Viewed

	@@ -0,0 +1,170 @@

+#
+# SPDX-FileCopyrightText: Hadad <[email protected]>
+# SPDX-License-Identifier: Apache-2.0
+#
+import os  # Used for accessing environment variables such as API credentials
+import json  # Used for encoding and decoding JSON data
+import httpx  # Used to make asynchronous HTTP requests with support for HTTP/2
+import gradio as gr  # Used to create the user interface for the chatbot
+import random  # Used to randomly shuffle available server hosts
+from datetime import datetime, timedelta  # Used to manage time for session expiry
+from src.assets.css.reasoning import styles  # Function to apply CSS styling to reasoning output
+# Load authentication information stored as a JSON string from an environment variable
+auth = json.loads(os.getenv("auth"))
+# This dictionary is used to track which server sessions are currently marked as busy
+# The key is the session name and the value is the expiration time of the busy state
+busy = {}
+def server():
+    """
+    Clean up any expired server sessions from the 'busy' tracking dictionary.
+    This function checks if any server marked as busy has passed its timeout period.
+    If so, it removes them from the busy list, making them available again for use.
+    """
+    now = datetime.now()
+    for session, expiry in list(busy.items()):
+        if expiry <= now:
+            del busy[session]
+def setup(user_message, history):
+    """
+    Append the current user message to the conversation history.
+    Parameters:
+    - user_message: The new message entered by the user.
+    - history: A list of dictionaries containing previous conversation turns.
+    Returns:
+    - A new message list with the latest user message added.
+    """
+    messages = history.copy()
+    messages.append({"role": "user", "content": user_message})
+    return messages
+def connection(host, messages):
+    """
+    Send the conversation to a specific server and receive a streamed response.
+    This function prepares the headers and request payload, then opens a stream
+    to the target host using the HTTP/2 protocol. It handles incremental data
+    and separates reasoning content from main response content.
+    Parameters:
+    - host: A dictionary containing host settings such as token, model, and endpoint.
+    - messages: A list of conversation history messages in proper format.
+    Yields:
+    - Incremental reasoning or content responses as they are received from the stream.
+    """
+    headers = {
+        "Authorization": f"Bearer {host['token']}",  # Use bearer token for authentication
+        "Content-Type": "application/json"  # Set the request body to JSON format
+    }
+    payload = {
+        "model": host["model"],  # Specify the AI model to use
+        "messages": messages,  # Provide the chat history including the user input
+        "stream": True,  # Enable streaming mode to receive response in real time
+        "temperature": 0.6  # Control randomness of output, higher values = more random
+    }
+    reasoning = ""  # This variable stores any intermediate explanation or reasoning
+    content = ""  # This variable accumulates the final response content
+    # Use a streaming HTTP client with support for HTTP/2
+    with httpx.Client(http2=True) as client:
+        with client.stream("POST", host["endpoint"], headers=headers, json=payload) as resp:
+            resp.raise_for_status()  # Raise an exception if the response indicates an error
+            # Process each line of the streaming response
+            for line in resp.iter_lines():
+                if not line:
+                    continue  # Skip empty lines
+                # Remove the "data: " prefix if it exists
+                raw = line[6:] if line.startswith("data: ") else line
+                try:
+                    data = json.loads(raw)  # Parse the line as JSON
+                    # Extract incremental reasoning if available
+                    delta = data["choices"][0]["delta"]
+                    reasoning_response = delta.get("reasoning")
+                    if reasoning_response:
+                        reasoning += reasoning_response
+                        # Yield current accumulated reasoning
+                        yield styles(reasoning=reasoning, expanded=True)
+                    content_response = delta.get("content")
+                    # Extract incremental content response if available
+                    if content_response:
+                        content += content_response
+                        # If reasoning exists, yield both reasoning and content
+                        if reasoning:
+                            yield styles(reasoning=reasoning, expanded=False) + content
+                        else:
+                            yield content
+                    # Stop streaming if the finish condition is met
+                    if data["choices"][0].get("finish_reason") == "stop":
+                        return
+                except json.JSONDecodeError:
+                    continue  # Skip malformed JSON lines
+def request(user_message, history):
+    """
+    Main request handler that distributes the user's question to an available server.
+    This function validates the input, prepares the message history, rotates through
+    available hosts, and forwards the message to one that is not currently busy.
+    If a server fails due to a known error, it is temporarily marked as unavailable.
+    Parameters:
+    - user_message: The latest message input by the user.
+    - history: The chat history containing all prior messages.
+    Yields:
+    - Either the generated reply or a busy message if all hosts are unavailable.
+    """
+    # Ignore empty or whitespace-only input
+    if not user_message or not user_message.strip():
+        yield []
+        return
+    # Clean up expired server sessions before handling the new request
+    server()
+    # Append the current message to the conversation
+    messages = setup(user_message, history)
+    # Identify servers that are not currently marked as busy
+    available = [h for h in auth if h["jarvis"] not in busy]
+    # Shuffle the available list to randomly balance load across servers
+    random.shuffle(available)
+    # Try each available host one by one until one succeeds
+    for host in available:
+        try:
+            # Attempt to connect and stream a response
+            yield from connection(host, messages)
+            return  # Stop after successful response
+        except httpx.HTTPStatusError as e:
+            # If the failure matches the expected error code, mark the host as busy
+            if e.response.status_code == host.get("error"):
+                busy[host["jarvis"]] = datetime.now() + timedelta(hours=1)
+        except Exception:
+            continue  # Ignore all other errors and try the next server
+    # If all hosts fail or are busy, notify the user
+    yield "The server is currently busy. Please wait a moment or try again later."
+# Create a Gradio application
+gr.ChatInterface(
+    fn=request,  # The function that handles user messages
+    type="messages",  # OpenAI style
+    chatbot=gr.Chatbot(
+        type="messages",  # OpenAI style (duplicate to silence warning log)
+        show_copy_button=True,  # Enable a button for users to copy responses
+        scale=1  # Use standard display scaling
+    ),
+    examples=[
+        ["Please introduce yourself."],
+        ["Give me a short introduction to large language model."],
+        ["Please generate a highly complex code snippet on any topic."],
+        ["Explain about quantum computers."]
+    ],  # Provide sample inputs for users to try
+    cache_examples=False,  # Disable caching to ensure responses are always fresh
+    concurrency_limit=2  # Queue limit
+).launch()  # Start the app and open the interface

src/__init__.py ADDED Viewed

File without changes

src/assets/__init__.py ADDED Viewed

File without changes

src/assets/css/__init__.py ADDED Viewed

File without changes

src/assets/css/reasoning.py ADDED Viewed

	@@ -0,0 +1,47 @@

+#
+# SPDX-FileCopyrightText: Hadad <[email protected]>
+# SPDX-License-Identifier: Apache-2.0
+#
+def styles(reasoning: str, expanded: bool = False) -> str:
+    """
+    Generates a styled HTML collapsible section using
+    <details> and <summary> tags.
+    Parameters:
+    * reasoning (str):  The explanatory or descriptive text to
+                        display inside the section.
+    * expanded (bool):  If True, the section is expanded by default.
+                        Otherwise, it remains collapsed.
+    Returns:
+    str:  A string containing HTML and inline CSS to render
+          the styled collapsible block.
+    """
+    open_attr = "open" if expanded else ""
+    emoji = "&#129504;"  # 🧠
+    return f"""
+<details {open_attr} style="
+    /* Sans-serif font stack for clean modern look */
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+">
+  <summary style="
+    font-weight: 700;            /* Bold summary title */
+    font-size: 14px !important;  /* Fixed font size */
+    cursor: pointer;             /* Pointer on hover */
+    user-select: none;           /* Prevent text selection */
+  ">
+    {emoji} Reasoning
+  </summary>
+  <div style="
+    margin-top: 6px;             /* Space above content */
+    padding-top: 6px;            /* Inner top padding */
+    font-size: 11px !important;  /* Smaller text */
+    line-height: 1.7;            /* Improve readability */
+    letter-spacing: 0.02em;      /* Slight spacing */
+  ">
+    {reasoning}
+  </div>
+</details>
+"""