Spaces:

traversaal-internal
/

Alif-1.0-8B-Instruct

Sleeping

App Files Files Community

alishafique commited on Feb 24

Commit

34d5793

verified ·

1 Parent(s): 4a6b784

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -96

app.py CHANGED Viewed

@@ -1,19 +1,16 @@
-import os
-import json
-import subprocess
 import gradio as gr
-from threading import Thread
 from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
-from datetime import datetime
-# Load model from Hugging Face Hub
-MODEL_ID = "large-traversaal/Alif-1.0-8B-Instruct"
-MODEL_FILE = "model-Q8_0.gguf"
-model_path_file = hf_hub_download(MODEL_ID, filename=MODEL_FILE)
-# Initialize Llama model
 llama = Llama(
     model_path=model_path_file,
     n_gpu_layers=40,  # Adjust based on VRAM
@@ -23,17 +20,15 @@ llama = Llama(
     verbose=True  # Enable debug logging
 )
-CHAT_TEMPLATE = "Alif Chat"
-CONTEXT_LENGTH = 4096
-COLOR = "blue"
-EMOJI = "💬"
-DESCRIPTION = "Urdu AI Chatbot powered by Llama.cpp"
-# Function to generate responses
-def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
-    chat_prompt = f"You are an Urdu Chatbot. Write an appropriate response for the given instruction: {message} Response:"
-    response = llama(chat_prompt, max_tokens=max_new_tokens, stop=["Q:", "\n"], echo=False, stream=True)
     text = ""
     for chunk in response:
         content = chunk["choices"][0]["text"]
@@ -41,81 +36,15 @@ def generate_response(message, history, system_prompt, temperature, max_new_toke
             text += content
             yield text
-# Create Gradio interface
-with gr.Blocks() as demo:
-    chatbot = gr.Chatbot(label="Urdu Chatbot", likeable=True, render=False)
-    chat = gr.ChatInterface(
-        generate_response,
-        chatbot=chatbot,
-        title=EMOJI + " " + "Alif-1.0 Chatbot",
-        description=DESCRIPTION,
-        examples=[
-            ["شہر کراچی کے بارے میں بتاؤ"],
-            ["قابل تجدید توانائی کیا ہے؟"],
-            ["پاکستان کی تاریخ کے بارے میں بتائیں۔"]
-        ],
-        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
-        additional_inputs=[
-            gr.Textbox("", label="System prompt", render=False),
-            gr.Slider(0, 1, 0.6, label="Temperature", render=False),
-            gr.Slider(128, CONTEXT_LENGTH, 1024, label="Max new tokens", render=False),
-            gr.Slider(1, 80, 40, step=1, label="Top K sampling", render=False),
-            gr.Slider(0, 2, 1.1, label="Repetition penalty", render=False),
-            gr.Slider(0, 1, 0.95, label="Top P sampling", render=False),
-        ],
-        theme=gr.themes.Soft(primary_hue=COLOR),
-    )
-demo.queue(max_size=20).launch(share=True)
-# import llama_cpp
-# from llama_cpp import Llama
-# # import llama_cpp.llama_tokenizer
-# import gradio as gr
-# from huggingface_hub import hf_hub_download
-# model_name = "large-traversaal/Alif-1.0-8B-Instruct"
-# model_file = "model-Q8_0.gguf"
-# model_path_file = hf_hub_download(model_name,
-#                              filename=model_file,)
-# llama = Llama(
-#     model_path=model_path_file,
-#     n_gpu_layers=40,  # Adjust based on VRAM
-#     n_threads=8,  # Match CPU cores
-#     n_batch=512,  # Optimize for better VRAM usage
-#     n_ctx=4096,  # Context window size
-#     verbose=True  # Enable debug logging
-# )
-# chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
-# # Function to generate text with streaming output
-# def chat_with_ai(prompt):
-#     query = chat_prompt.format(inp=prompt)
-#     #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True)  # Enable streaming
-#     response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True)  # Enable streaming
-#     text = ""
-#     for chunk in response:
-#         content = chunk["choices"][0]["text"]
-#         if content:
-#             text += content
-#             yield text
-# # Gradio UI setup
-# demo = gr.Interface(
-#     fn=chat_with_ai,  # Streaming function
-#     inputs="text",  # User input
-#     outputs="text",  # Model response
-#     title="Streaming Alif-1.0-8B-Instruct Chatbot 🚀",
-#     description="Enter a prompt and get a streamed response."
-# )
-# # Launch the Gradio app
-# demo.launch(share=True)

+import llama_cpp
+from llama_cpp import Llama
+# import llama_cpp.llama_tokenizer
 import gradio as gr
 from huggingface_hub import hf_hub_download
+model_name = "large-traversaal/Alif-1.0-8B-Instruct"
+model_file = "model-Q8_0.gguf"
+model_path_file = hf_hub_download(model_name,
+                             filename=model_file,)
 llama = Llama(
     model_path=model_path_file,
     n_gpu_layers=40,  # Adjust based on VRAM
     verbose=True  # Enable debug logging
 )
+chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
+# Function to generate text with streaming output
+def chat_with_ai(prompt):
+    query = chat_prompt.format(inp=prompt)
+    #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True)  # Enable streaming
+    response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True)  # Enable streaming
     text = ""
     for chunk in response:
         content = chunk["choices"][0]["text"]
             text += content
             yield text
+# Gradio UI setup
+demo = gr.Interface(
+    fn=chat_with_ai,  # Streaming function
+    inputs="text",  # User input
+    outputs="text",  # Model response
+    title="Streaming Alif-1.0-8B-Instruct Chatbot 🚀",
+    description="Enter a prompt and get a streamed response."
+)
+# Launch the Gradio app
+demo.launch(share=True)