Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on Jan 4

Commit

98674ca

verified ·

1 Parent(s): 52ad57a

custom models

Browse files

Files changed (1) hide show

app.py +64 -69

app.py CHANGED Viewed

@@ -21,7 +21,8 @@ def respond(
     temperature,
     top_p,
     frequency_penalty,
-    seed
 ):
     """
     This function handles the chatbot response. It takes in:
@@ -33,6 +34,7 @@ def respond(
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
     """
     print(f"Received message: {message}")
@@ -40,6 +42,7 @@ def respond(
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
@@ -62,26 +65,30 @@ def respond(
     # Append the latest user message
     messages.append({"role": "user", "content": message})
     # Start with an empty string to build the response as tokens stream in
     response = ""
     print("Sending request to OpenAI API.")
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
-        model="meta-llama/Llama-3.3-70B-Instruct",   # You can update this to your specific model
         max_tokens=max_tokens,
-        stream=True,  # Stream the response
         temperature=temperature,
         top_p=top_p,
-        frequency_penalty=frequency_penalty,  # <-- NEW
-        seed=seed,                             # <-- NEW
         messages=messages,
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
-        # As streaming progresses, yield partial output
         yield response
     print("Completed response generation.")
@@ -90,69 +97,57 @@ def respond(
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
-MODELS_LIST = [
-    "meta-llama/Llama-3.1-8B-Instruct",
-    "microsoft/Phi-3.5-mini-instruct",
-]
-def filter_models(search_term):
-    """
-    Simple function to filter the placeholder model list based on the user's input
-    """
-    filtered_models = [m for m in MODELS_LIST if search_term.lower() in m.lower()]
-    return gr.update(choices=filtered_models)
-# --------------------------------------
-# REBUILD THE INTERFACE USING BLOCKS
-# --------------------------------------
-print("Building Gradio interface with Blocks...")
-with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
-    # Title
-    gr.Markdown("# Serverless-TextGen-Hub")
-    # Accordion: Parameters (sliders, etc.)
-    with gr.Accordion("Parameters", open=True):
-        system_message = gr.Textbox(value="", label="System message")
-        max_tokens = gr.Slider(minimum=1,   maximum=4096, value=512,   step=1,   label="Max new tokens")
-        temperature = gr.Slider(minimum=0.1, maximum=4.0,  value=0.7,  step=0.1, label="Temperature")
-        top_p = gr.Slider(minimum=0.1, maximum=1.0,  value=0.95, step=0.05, label="Top-P")
-        frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
-        seed = gr.Slider(minimum=-1,  maximum=65535, value=-1,  step=1,    label="Seed (-1 for random)")
-    # Accordion: Featured Models (Below the parameters)
-    with gr.Accordion("Featured Models", open=False):
-        model_search = gr.Textbox(
-            label="Filter Models",
-            placeholder="Search for a featured model...",
-            lines=1
-        )
-        model_radio = gr.Radio(
-            label="Select a model below",
-            value=MODELS_LIST[0],  # default
-            choices=MODELS_LIST,
-            interactive=True
-        )
-        model_search.change(filter_models, inputs=model_search, outputs=model_radio)
-    # The main ChatInterface
-    chat_interface = gr.ChatInterface(
-        fn=respond,
-        additional_inputs=[
-            system_message,
-            max_tokens,
-            temperature,
-            top_p,
-            frequency_penalty,
-            seed
-        ],
-        fill_height=True,
-        chatbot=chatbot,
-        theme="Nymbo/Nymbo_Theme",
-        title="Serverless-TextGen-Hub",
-        description="A comprehensive UI for text generation using the HF Inference API."
-    )
 print("Gradio interface initialized.")
 if __name__ == "__main__":

     temperature,
     top_p,
     frequency_penalty,
+    seed,
+    custom_model
 ):
     """
     This function handles the chatbot response. It takes in:
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
+    - custom_model: the user-provided custom model name (if any)
     """
     print(f"Received message: {message}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
+    print(f"Custom model: {custom_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
     # Append the latest user message
     messages.append({"role": "user", "content": message})
+    # Determine which model to use: either custom_model or a default
+    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
+    print(f"Model selected for inference: {model_to_use}")
     # Start with an empty string to build the response as tokens stream in
     response = ""
     print("Sending request to OpenAI API.")
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
+        model=model_to_use,              # Use either the user-provided custom model or default
         max_tokens=max_tokens,
+        stream=True,                     # Stream the response
         temperature=temperature,
         top_p=top_p,
+        frequency_penalty=frequency_penalty,
+        seed=seed,
         messages=messages,
     ):
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
         response += token_text
+        # Yield the partial response to Gradio so it can display in real-time
         yield response
     print("Completed response generation.")
 chatbot = gr.Chatbot(height=600)
 print("Chatbot interface created.")
+# Create the Gradio ChatInterface
+# We add two new sliders for Frequency Penalty, Seed, and now a new "Custom Model" text box.
+demo = gr.ChatInterface(
+    fn=respond,
+    additional_inputs=[
+        gr.Textbox(value="", label="System message"),
+        gr.Slider(
+            minimum=1,
+            maximum=4096,
+            value=512,
+            step=1,
+            label="Max new tokens"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=4.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-P"
+        ),
+        gr.Slider(
+            minimum=-2.0,
+            maximum=2.0,
+            value=0.0,
+            step=0.1,
+            label="Frequency Penalty"
+        ),
+        gr.Slider(
+            minimum=-1,
+            maximum=65535,  # Arbitrary upper limit for demonstration
+            value=-1,
+            step=1,
+            label="Seed (-1 for random)"
+        ),
+        gr.Textbox(
+            value="",
+            label="Custom Model",
+            info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty."
+        ),
+    ],
+    fill_height=True,
+    chatbot=chatbot,
+    theme="Nymbo/Nymbo_Theme",
+)
 print("Gradio interface initialized.")
 if __name__ == "__main__":