Spaces:

arcee-ai
/

Model-Routing

Running on CPU Upgrade

App Files Files Community

Crystalcareai commited on 7 days ago

Commit

af60662

verified ·

1 Parent(s): 982d629

Update app.py

Browse files

Files changed (1) hide show

app.py +314 -645

app.py CHANGED Viewed

@@ -1,675 +1,344 @@
 import gradio as gr
 import json
-import os
-import requests
-from cryptography.fernet import Fernet
-from huggingface_hub import HfApi
-from datetime import datetime
-from dataclasses import dataclass, asdict
-from typing import List, Dict, Tuple, Optional
 import asyncio
-import aiohttp
-from concurrent.futures import ThreadPoolExecutor
-import httpx
-@dataclass
-class ModelComparison:
-    name: str
-    nick1: str
-    endpoint1: str
-    api_key1: str  # This will store encrypted key
-    model1: str
-    nick2: str
-    endpoint2: str
-    api_key2: str  # This will store encrypted key
-    model2: str
-    active: bool = True
-    created_at: str = None
-    def __post_init__(self):
-        if self.created_at is None:
-            self.created_at = datetime.now().isoformat()
-    def to_dict(self) -> dict:
-        return asdict(self)
-    @classmethod
-    def from_dict(cls, data: dict) -> 'ModelComparison':
-        return cls(**data)
 @dataclass
-class Vote:
-    comparison_id: str
-    message: str
-    model1_response: List[Dict]
-    model2_response: List[Dict]
-    winner: str
-    vote_info: str = ""
-    timestamp: str = None
-    def __post_init__(self):
-        if self.timestamp is None:
-            self.timestamp = datetime.now().isoformat()
-    def to_dict(self) -> dict:
-        return asdict(self)
-    @classmethod
-    def from_dict(cls, data: dict) -> 'Vote':
-        return cls(**data)
-class VoteManager:
     def __init__(self):
-        self.votes: List[Vote] = []
-    def add_vote(self, vote: Vote):
-        self.votes.append(vote)
-    def get_votes_for_comparison(self, comparison_id: str) -> List[Vote]:
-        return [vote for vote in self.votes if vote.comparison_id == comparison_id]
-    def get_results(self, comparison: ModelComparison) -> dict:
-        comparison_votes = self.get_votes_for_comparison(comparison.name)
-        total_votes = len(comparison_votes)
-        model1_votes = len([vote for vote in comparison_votes if vote.winner == comparison.nick1])
-        model2_votes = len([vote for vote in comparison_votes if vote.winner == comparison.nick2])
-        even_votes = len([vote for vote in comparison_votes if vote.winner == "Even"])
-        return {
-            "total_votes": total_votes,
-            "model1_name": comparison.nick1,
-            "model2_name": comparison.nick2,
-            "model1_votes": model1_votes,
-            "model2_votes": model2_votes,
-            "even_votes": even_votes,
-            "recent_votes": [vote.to_dict() for vote in comparison_votes[-5:]]
-        }
-    def to_dict_list(self) -> List[dict]:
-        return [vote.to_dict() for vote in self.votes]
-    @classmethod
-    def from_dict_list(cls, data: List[dict]) -> 'VoteManager':
-        manager = cls()
-        manager.votes = [Vote.from_dict(vote_data) for vote_data in data]
-        return manager
-class ModelComparisonApp:
-    def __init__(self):
-        # Initialize encryption key from environment variable
-        self.encryption_key = os.environ.get('ENCRYPTION_KEY')
-        if not self.encryption_key:
-            raise ValueError("ENCRYPTION_KEY environment variable not set")
-        self.fernet = Fernet(self.encryption_key.encode())
-        self.hf_token = os.environ.get('HF_TOKEN')
-        self.api = HfApi(token=self.hf_token)
-        self.dataset_repo_id = os.environ.get('REPO_ID')
-        # Initialize datasets
-        self.models_file = "models.json"
-        self.votes_file = "votes.json"
-        self.comparisons: Dict[str, ModelComparison] = {}
-        self.vote_manager = VoteManager()
-        self.load_data()
-    def load_data(self):
-        """Load model configurations and votes from datasets"""
-        try:
-            model_file_path = self.api.hf_hub_download(
-                repo_id=self.dataset_repo_id,
-                filename=self.models_file,
-                repo_type="dataset",
-            )
-            with open(model_file_path, "r") as f:
-                models_data = json.load(f)
-                self.comparisons = {
-                    name: ModelComparison.from_dict(data)
-                    for name, data in models_data.items()
-                }
-        except Exception as e:
-            print(f"Error loading models: {e}")
-            self.comparisons = {}
-        try:
-            votes_file_path = self.api.hf_hub_download(
-                repo_id=self.dataset_repo_id,
-                filename=self.votes_file,
-                repo_type="dataset",
             )
-            with open(votes_file_path, "r") as f:
-                votes_data = json.load(f)
-                self.vote_manager = VoteManager.from_dict_list(votes_data)
-        except Exception as e:
-            print(f"Error loading votes: {e}")
-            self.vote_manager = VoteManager()
-    def update_results_and_status(self, comparison_id):
-        if not comparison_id:
-            return [
-                [[0, "", 0, "", 0]],
-                "No comparison selected",
-                "Activate",
-            ]
-        results = self.get_comparison_results(comparison_id)
-        is_active = self.comparisons[comparison_id].active
-        return [
-            results["total_votes"],
-            results["model1_name"],
-            results["model2_name"],
-            results["model1_votes"],
-            results["model2_votes"],
-            results["even_votes"],
-            "Active" if is_active else "Inactive",
-            "Deactivate" if is_active else "Activate"
-        ]
-    def load_fresh_data(self):
-        """Force a fresh data load from HuggingFace"""
-        self.load_data()  # This reloads from HuggingFace
-    def refresh_results(self, comparison_id: str):
-        """Refresh results for a specific comparison with fresh data"""
-        self.load_fresh_data()
-        return self.update_results_and_status(comparison_id)
-    def save_data(self, file_name: str, data: dict):
-        """Save data to HuggingFace dataset"""
-        self.api.upload_file(
-            path_or_fileobj=json.dumps(data, indent=2).encode(),
-            path_in_repo=file_name,
-            repo_id=self.dataset_repo_id,
-            repo_type="dataset"
-        )
-    def save_models(self):
-        """Save model comparisons to file"""
-        models_dict = {
-            name: comparison.to_dict()
-            for name, comparison in self.comparisons.items()
         }
-        self.save_data(self.models_file, models_dict)
-    def save_votes(self):
-        """Save votes to file"""
-        self.save_data(self.votes_file, self.vote_manager.to_dict_list())
-    def encrypt_api_key(self, api_key: str) -> str:
-        """Encrypt API key using Fernet"""
-        return self.fernet.encrypt(api_key.encode()).decode()
-    def decrypt_api_key(self, encrypted_key: str) -> str:
-        """Decrypt API key using Fernet"""
-        return self.fernet.decrypt(encrypted_key.encode()).decode()
-    def get_active_comparisons(self) -> List[str]:
-        """Get list of active comparison names"""
-        return [
-            name for name, comparison in self.comparisons.items()
-            if comparison.active
-        ]
-    def get_all_comparisons(self) -> List[str]:
-        """Get list of all comparison names"""
-        return list(self.comparisons.keys())
-    def add_model_comparison(self, name, nick1, endpoint1, api_key1, model1, nick2, endpoint2, api_key2, model2):
-        """Add a new model comparison configuration"""
-        if name in self.comparisons:
-            return f"Model comparison '{name}' already exists", None, None
-        comparison = ModelComparison(
-            name=name,
-            nick1=nick1,
-            endpoint1=endpoint1,
-            api_key1=self.encrypt_api_key(api_key1),
-            model1=model1,
-            nick2=nick2,
-            endpoint2=endpoint2,
-            api_key2=self.encrypt_api_key(api_key2),
-            model2=model2
         )
-        self.comparisons[name] = comparison
-        self.save_models()
-        active_comparisons = self.get_active_comparisons()
-        all_comparisons = self.get_all_comparisons()
-        return (
-            "Model comparison added successfully!",
-            gr.update(choices=active_comparisons, value=active_comparisons[0]),
-            gr.update(choices=all_comparisons, value=all_comparisons[0]),
-            gr.update(visible=True)
         )
-    async def get_model_response_async(
-        self,
-        session: aiohttp.ClientSession,
-        endpoint: str,
-        api_key: str,
-        model: str,
-        message: str
-    ) -> Tuple[str, Optional[str]]:
-        """Get response from a model using OpenAI-compatible API asynchronously,
-        now with streaming support for models.arcee.ai endpoints."""
-        try:
-            headers = {
-                "Authorization": f"Bearer {api_key}",
-                "Content-Type": "application/json"
-            }
-            payload = {
-                "model": model,
-                "messages": [{"role": "user", "content": message}],
-                "temperature": 0.7
-            }
-            if not endpoint.endswith("/chat/completions"):
-                if endpoint.endswith("/"):
-                    endpoint = f"{endpoint}chat/completions"
-                else:
-                    endpoint = f"{endpoint}/chat/completions"
-            # For models from models.arcee.ai, switch to streaming via httpx + HTTP/2
-            if "models.arcee.ai" in endpoint:
-                collected_chunks = []
-                async with httpx.AsyncClient(http2=True) as client:
-                    async with client.stream(
-                        "POST",
-                        endpoint,
-                        headers=headers,
-                        json={**payload, "stream": True},  # enable streaming
-                        timeout=30.0
-                    ) as response:
-                        if response.status_code != 200:
-                            error_data = await response.aread()
-                            return "", f"Error: HTTP {response.status_code}, {error_data.decode('utf-8')}"
-                        buffer = []
-                        async for line in response.aiter_lines():
-                            if line.startswith("data: "):
-                                # parse partial chunks
-                                line_data = line.replace("data: ", "").strip()
-                                if line_data == "[DONE]":
-                                    break  # end of stream
-                                try:
-                                    json_response = json.loads(line_data)
-                                    delta = json_response["choices"][0].get("delta", {})
-                                    if "content" in delta:
-                                        buffer.append(delta["content"])
-                                        # If we see any punctuation or have a decent buffer, flush it
-                                        if len(buffer) >= 10 or any(c in ".,!?\n" for c in buffer[-1]):
-                                            collected_chunks.extend(buffer)
-                                            buffer = []
-                                except json.JSONDecodeError:
-                                    continue
-                        # Flush any remaining
-                        if buffer:
-                            collected_chunks.extend(buffer)
-                return "".join(collected_chunks), None
-            else:
-                # Original aiohttp approach for non-arcee endpoints
-                async with session.post(endpoint, headers=headers, json=payload) as response:
-                    if response.status != 200:
-                        error_msg = f"Error: HTTP {response.status}"
-                        try:
-                            error_data = await response.json()
-                            if 'error' in error_data:
-                                error_msg = f"Error: {error_data['error'].get('message', str(error_data))}"
-                        except:
-                            pass
-                        return "", error_msg
-                    response_data = await response.json()
-                    return response_data["choices"][0]["message"]["content"], None
-        except Exception as e:
-            return "", f"Error: {str(e)}"
-    async def compare_models_async(self, comparison_id: str, message: str):
-        """Compare two models concurrently and get their responses"""
-        config = self.comparisons[comparison_id]
-        async with aiohttp.ClientSession() as session:
-            # Create tasks for both API calls
-            task1 = self.get_model_response_async(
-                session,
-                config.endpoint1,
-                self.decrypt_api_key(config.api_key1),
-                config.model1,
-                message
             )
-            task2 = self.get_model_response_async(
-                session,
-                config.endpoint2,
-                self.decrypt_api_key(config.api_key2),
-                config.model2,
-                message
             )
-            # Run both tasks concurrently
-            response1, error1 = await task1
-            response2, error2 = await task2
-        # Format responses, including error messages if any
-        response1_formatted = [
-            {"role": "user", "content": message},
-            {"role": "assistant", "content": response1 if not error1 else f"⚠️ {error1}"}
-        ]
-        response2_formatted = [
-            {"role": "user", "content": message},
-            {"role": "assistant", "content": response2 if not error2 else f"⚠️ {error2}"}
-        ]
-        return (
-            gr.update(type='messages', value=response1_formatted),
-            gr.update(type='messages', value=response2_formatted),
-            gr.update(interactive=True)
         )
-    def compare_models(self, comparison_id: str, message: str):
-        """Synchronous wrapper for the async comparison function"""
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        try:
-            return loop.run_until_complete(
-                self.compare_models_async(comparison_id, message)
-            )
-        finally:
-            loop.close()
-    def toggle_comparison_status(self, comparison_id: str) -> tuple[str, str]:
-        """Toggle a model comparison between active and inactive states"""
-        if comparison_id not in self.comparisons:
-            return "Comparison not found!", "Deactivate"
-        self.comparisons[comparison_id].active = not self.comparisons[comparison_id].active
-        self.save_models()
-        new_status = "active" if self.comparisons[comparison_id].active else "inactive"
-        new_button_text = "Deactivate" if self.comparisons[comparison_id].active else "Activate"
-        return f"Comparison is now {new_status}!", new_button_text
-    def add_vote(self, comparison_id: str, message: str, response1_output: List[Dict], response2_output: List[Dict], winner: str, vote_info: str) -> str:
-        """Record a vote for model comparison"""
-        if winner is None or winner == "":
-            return "Please select a voting option", gr.Button(), gr.Textbox(), None
-        config = self.comparisons[comparison_id]
-        if winner == "Response 1":
-            winner = config.nick1
-        elif winner == "Response 2":
-            winner = config.nick2
-        vote = Vote(
-            comparison_id=comparison_id,
-            message=message,
-            model1_response=response1_output,
-            model2_response=response2_output,
-            winner=winner,
-            vote_info=vote_info,
-        )
-        self.vote_manager.add_vote(vote)
-        self.save_votes()
-        return "Vote recorded successfully!", gr.update(interactive=False), gr.update(value=""), gr.update(value=None)
-    def get_comparison_results(self, comparison_id: str) -> Optional[dict]:
-        """Get voting results for a specific comparison"""
-        if not comparison_id or comparison_id not in self.comparisons:
-            return None
-        comparison = self.comparisons[comparison_id]
-        return self.vote_manager.get_results(comparison)
-    def update_comparison_dropdown(self):
-        active_comparisons = self.get_active_comparisons()
-        return gr.update(
-            choices=active_comparisons,
-            value=active_comparisons[0] if active_comparisons else None
         )
-    def create_interface(self):
-        """Create Gradio interface"""
-        with gr.Blocks() as interface:
-            gr.Markdown("# Model Comparison Tool")
-            # Get initial active comparisons
-            active_comparisons = self.get_active_comparisons()
-            all_comparisons = self.get_all_comparisons()
-            first_active = active_comparisons[0] if active_comparisons else None
-            first_comparison = all_comparisons[0] if all_comparisons else None
-            # Store button and outputs we'll need to reference later
-            add_btn = None
-            add_output = None
-            with gr.Tab("Compare Models", id="compare_models_tab") as compare_models_tab:
-                comparison_dropdown = gr.Dropdown(
-                    choices=active_comparisons,
-                    label="Select Comparison",
-                    value=first_active,
-                    interactive=True,
-                    filterable=False
-                )
-                with gr.Row():
-                    response1_output = gr.Chatbot(label="Model 1", type='messages')
-                    response2_output = gr.Chatbot(label="Model 2", type='messages')
-                message_input = gr.Textbox(
-                    label="Enter your message",
-                    lines=4,
-                    max_lines=10,
-                    placeholder="Press Enter for new line, Shift + Enter to submit",
-                    autofocus=True,
-                )
-                compare_btn = gr.Button("Send", visible=bool(active_comparisons))
-                with gr.Row():
-                    vote_radio = gr.Radio(["Response 1", "Even", "Response 2"], label="Vote for better response")
-                with gr.Row():
-                    vote_info = gr.Textbox(
-                        label="Vote Explanation",
-                        lines=2,
-                        max_lines=5,
-                        placeholder="Optional: Add a reason for your vote",
-                    )
-                vote_btn = gr.Button("Submit Vote", interactive=False)
-                vote_output = gr.Textbox(label="Vote Result")
-                compare_models_tab.select(
-                    fn=self.update_comparison_dropdown,
-                    inputs=[],
-                    outputs=comparison_dropdown
-                )
-            with gr.Tab("Add Model Comparison"):
-                name = gr.Textbox(label="Comparison Name")
-                with gr.Row():
-                    nick1 = gr.Textbox(label="Model 1 nickname")
-                    nick2 = gr.Textbox(label="Model 2 nickname")
-                with gr.Row():
-                    endpoint1 = gr.Textbox(label="Endpoint 1")
-                    endpoint2 = gr.Textbox(label="Endpoint 2")
-                with gr.Row():
-                    api_key1 = gr.Textbox(label="API Key 1", type="password")
-                    api_key2 = gr.Textbox(label="API Key 2", type="password")
-                with gr.Row():
-                    model1 = gr.Textbox(label="Model 1")
-                    model2 = gr.Textbox(label="Model 2")
-                add_btn = gr.Button("Add Comparison")
-                add_output = gr.Textbox(label="Result")
-            with gr.Tab("Results", id="results_tab") as results_tab:
-                self.load_fresh_data()
-                results_comparison_dropdown = gr.Dropdown(
-                    choices=all_comparisons,  # Show all comparisons
-                    label="Select Comparison",
-                    value=first_comparison
-                )
-                initial_results = self.get_comparison_results(first_comparison)
-                with gr.Row():
-                    total_votes = gr.Textbox(label="Total votes", value=initial_results["total_votes"] if initial_results else 0, interactive=False)
-                with gr.Row():
-                    model1_name = gr.Textbox(label="Model 1", value=initial_results["model1_name"] if initial_results else "", interactive=False)
-                    model2_name = gr.Textbox(label="Model 2", value=initial_results["model2_name"] if initial_results else "", interactive=False)
-                with gr.Row():
-                    model1_votes = gr.Textbox(label="Model 1 votes", value=initial_results["model1_votes"] if initial_results else 0, interactive=False)
-                    model2_votes = gr.Textbox(label="Model 2 votes", value=initial_results["model2_votes"] if initial_results else 0, interactive=False)
-                with gr.Row():
-                    even_votes = gr.Textbox(label="Even votes", value=initial_results["even_votes"] if initial_results else 0, interactive=False)
-                # Add status indicator
-                status_text = gr.Textbox(
-                    label="Status",
-                    value="Active" if first_comparison and self.comparisons[first_comparison].active else "Inactive",
-                    interactive=False
-                )
-                toggle_btn = gr.Button(
-                    "Deactivate" if first_comparison and self.comparisons[first_comparison].active else "Activate"
-                )
-                toggle_output = gr.Textbox(label="Toggle Result")
-                results_tab.select(
-                    fn=lambda x: self.refresh_results(x),
-                    inputs=[results_comparison_dropdown],
-                    outputs=[
-                        total_votes,
-                        model1_name,
-                        model2_name,
-                        model1_votes,
-                        model2_votes,
-                        even_votes,
-                        status_text,
-                        toggle_btn
-                    ]
-                )
-                # Update component interactions
-                results_comparison_dropdown.change(
-                    fn=self.refresh_results,
-                    inputs=[results_comparison_dropdown],
-                    outputs=[
-                        total_votes,
-                        model1_name,
-                        model2_name,
-                        model1_votes,
-                        model2_votes,
-                        even_votes,
-                        status_text,
-                        toggle_btn
-                    ]
-                )
-                toggle_btn.click(
-                    fn=self.toggle_comparison_status,
-                    inputs=[results_comparison_dropdown],
-                    outputs=[toggle_output, toggle_btn]
-                ).then(
-                    fn=lambda: (
-                        gr.update(choices=self.get_active_comparisons()),
-                        gr.update(choices=self.get_all_comparisons())
-                    ),
-                    inputs=[],
-                    outputs=[comparison_dropdown, results_comparison_dropdown]
-                ).then(  # Add another refresh after toggle
-                    fn=self.refresh_results,
-                    inputs=[results_comparison_dropdown],
-                    outputs=[
-                        total_votes,
-                        model1_name,
-                        model2_name,
-                        model1_votes,
-                        model2_votes,
-                        even_votes,
-                        status_text,
-                        toggle_btn
-                    ]
-                )
-                comparison_dropdown.change(
-                    fn=lambda: (
-                        gr.update(value=""),
-                        gr.update(value=""),
-                        gr.update(interactive=False),
-                        gr.update(value=""),
-                        gr.update(value=None)
-                    ),
-                    inputs=[],
-                    outputs=[response1_output, response2_output, vote_btn, vote_info, vote_radio]
-                )
-                # Set up comparison tab interactions
-                compare_btn.click(
-                    fn=self.compare_models,
-                    inputs=[comparison_dropdown, message_input],
-                    outputs=[response1_output, response2_output, vote_btn]
-                )
-                message_input.submit(
-                    fn=self.compare_models,
-                    inputs=[comparison_dropdown, message_input],
-                    outputs=[response1_output, response2_output, vote_btn]
-                )
-                vote_btn.click(
-                    fn=self.add_vote,
-                    inputs=[comparison_dropdown, message_input, response1_output, response2_output, vote_radio, vote_info],
-                    outputs=[vote_output, vote_btn, vote_info, vote_radio]
-                )
-                # .then(
-                #     fn=self.refresh_results,  # Use the refresh method that forces data reload
-                #     inputs=[results_comparison_dropdown],
-                #     outputs=[
-                #         total_votes,
-                #         model1_name,
-                #         model2_name,
-                #         model1_votes,
-                #         model2_votes,
-                #         even_votes,
-                #         status_text,
-                #         toggle_btn
-                #     ]
-                # )
-                # Set up add model comparison tab interactions
-                add_btn.click(
-                    fn=self.add_model_comparison,
-                    inputs=[name, nick1, endpoint1, api_key1, model1, nick2, endpoint2, api_key2, model2],
-                    outputs=[add_output, comparison_dropdown, results_comparison_dropdown, compare_btn]
-                )
-        return interface
-def main():
-    app = ModelComparisonApp()
-    interface = app.create_interface()
-    interface.launch()
 if __name__ == "__main__":
-    main()

 import gradio as gr
+from huggingface_hub import InferenceClient
+from typing import Dict, List, Optional, Generator, AsyncGenerator
+from dataclasses import dataclass
+import httpx
 import json
 import asyncio
+import openai
+import os
+arcee_api_key = os.environ.get("arcee_api_key")
+openrouter_api_key = os.environ.get("openrouter_api_key")
 @dataclass
+class ModelConfig:
+    name: str
+    base_url: str
+    api_key: str
+MODEL_CONFIGS = {
+    1: ModelConfig(
+        name="virtuoso-small",
+        base_url="https://models.arcee.ai/v1/chat/completions",
+        api_key=arcee_api_key
+    ),
+    2: ModelConfig(
+        name="virtuoso-medium",
+        base_url="https://models.arcee.ai/v1/chat/completions",
+        api_key=arcee_api_key
+    ),
+    3: ModelConfig(
+        name="virtuoso-large",
+        base_url="https://models.arcee.ai/v1/chat/completions",
+        api_key=arcee_api_key
+    ),
+    4: ModelConfig(
+        name="anthropic/claude-3.5-sonnet",
+        base_url="https://openrouter.ai/api/v1/chat/completions",
+        api_key=openrouter_api_key
+    )
+}
+class ModelUsageStats:
     def __init__(self):
+        self.usage_counts = {i: 0 for i in range(1, 5)}
+        self.total_queries = 0
+    def update(self, complexity: int):
+        self.usage_counts[complexity] += 1
+        self.total_queries += 1
+    def get_stats(self) -> str:
+        if self.total_queries == 0:
+            return "No queries processed yet."
+        model_names = {
+            1: "virtuoso-small",
+            2: "virtuoso-medium",
+            3: "virtuoso-large",
+            4: "claude-3-sonnet"
+        }
+        stats = []
+        for complexity, count in self.usage_counts.items():
+            percentage = (count / self.total_queries) * 100
+            stats.append(f"{model_names[complexity]}: {count} uses ({percentage:.1f}%)")
+        return "\n".join(stats)
+stats = ModelUsageStats()
+async def get_complexity(prompt: str) -> int:
+    try:
+        async with httpx.AsyncClient(http2=True) as client:
+            response = await client.post(
+                "http://185.216.20.86:8000/complexity",
+                headers={"Content-Type": "application/json"},
+                json={"prompt": prompt},
+                timeout=10
             )
+            response.raise_for_status()
+            return response.json()["complexity"]
+    except Exception as e:
+        print(f"Error getting complexity: {e}")
+        return 3  # Default to medium complexity on error
+async def get_model_response(message: str, history: List[Dict[str, str]], complexity: int) -> AsyncGenerator[str, None]:
+    model_config = MODEL_CONFIGS[complexity]
+    headers = {
+        "Content-Type": "application/json"
+    }
+    if "openrouter.ai" in model_config.base_url:
+        headers.update({
+            "HTTP-Referer": "https://github.com/lucataco/gradio-router",
+            "X-Title": "Gradio Router",
+            "Authorization": f"Bearer {model_config.api_key}"
+        })
+    elif "arcee.ai" in model_config.base_url:
+        headers.update({
+            "Authorization": f"Bearer {model_config.api_key}"
+        })
+    try:
+        collected_chunks = []
+        # For Arcee.ai models, use direct API call with HTTP/2
+        if "arcee.ai" in model_config.base_url:
+            messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
+            for msg in history:
+                # Clean content
+                content = msg["content"]
+                if isinstance(content, str):
+                    content = content.split("\n\n<div")[0]
+                messages.append({"role": msg["role"], "content": content})
+            messages.append({"role": "user", "content": message})
+            async with httpx.AsyncClient(http2=True) as client:
+                async with client.stream(
+                    "POST",
+                    model_config.base_url,
+                    headers=headers,
+                    json={
+                        "model": model_config.name,
+                        "messages": messages,
+                        "temperature": 0.7,
+                        "stream": True
+                    },
+                    timeout=30.0
+                ) as response:
+                    response.raise_for_status()
+                    buffer = []
+                    async for line in response.aiter_lines():
+                        if line.startswith("data: "):
+                            try:
+                                json_response = json.loads(line.replace("data: ", ""))
+                                if json_response.get('choices') and json_response['choices'][0].get('delta', {}).get('content'):
+                                    buffer.append(json_response['choices'][0]['delta']['content'])
+                                    if len(buffer) >= 10 or any(c in '.,!?\n' for c in buffer[-1]):
+                                        collected_chunks.extend(buffer)
+                                        yield "".join(collected_chunks)
+                                        buffer = []
+                            except json.JSONDecodeError:
+                                continue
+                    if buffer:  # Yield any remaining content
+                        collected_chunks.extend(buffer)
+                        yield "".join(collected_chunks)
+        # For OpenRouter models, use direct API call with streaming
+        else:
+            messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
+            for msg in history:
+                content = msg["content"]
+                if isinstance(content, str):
+                    content = content.split("\n\n<div")[0]
+                messages.append({"role": msg["role"], "content": content})
+            messages.append({"role": "user", "content": message})
+            async with httpx.AsyncClient(http2=True) as client:
+                async with client.stream(
+                    "POST",
+                    model_config.base_url,
+                    headers=headers,
+                    json={
+                        "model": model_config.name,
+                        "messages": messages,
+                        "temperature": 0.7,
+                        "stream": True
+                    },
+                    timeout=30.0
+                ) as response:
+                    response.raise_for_status()
+                    buffer = []
+                    async for line in response.aiter_lines():
+                        if line.startswith("data: "):
+                            try:
+                                json_response = json.loads(line.replace("data: ", ""))
+                                if json_response.get('choices') and json_response['choices'][0].get('delta', {}).get('content'):
+                                    buffer.append(json_response['choices'][0]['delta']['content'])
+                                    if len(buffer) >= 10 or any(c in '.,!?\n' for c in buffer[-1]):
+                                        collected_chunks.extend(buffer)
+                                        yield "".join(collected_chunks)
+                                        buffer = []
+                            except json.JSONDecodeError:
+                                continue
+                    if buffer:  # Yield any remaining content
+                        collected_chunks.extend(buffer)
+                        yield "".join(collected_chunks)
+    except Exception as e:
+        error_msg = str(e)
+        print(f"Error getting model response: {error_msg}")
+        if "464" in error_msg:
+            yield "Error: Authentication failed. Please check your API key and try again."
+        elif "Internal Server Error" in error_msg:
+            yield "Error: The server encountered an internal error. Please try again later."
+        else:
+            yield f"Error: Unable to get response from {model_config.name}. {error_msg}"
+async def chat_wrapper(
+    message: str,
+    history: List[Dict[str, str]],
+    system_message: str,
+    max_tokens: int,
+    temperature: float,
+    top_p: float,
+    model_usage_stats: str,
+):
+    complexity = await get_complexity(message)
+    stats.update(complexity)
+    model_name = MODEL_CONFIGS[complexity].name
+    # Convert history for model
+    model_history = []
+    for msg in history:
+        if isinstance(msg, dict) and "role" in msg and "content" in msg:
+            # Clean content
+            content = msg["content"]
+            if isinstance(content, str):
+                content = content.split("\n\n<div")[0]
+            model_history.append({"role": msg["role"], "content": content})
+    # Stream the response
+    full_response = ""
+    async for partial_response in get_model_response(message, model_history, complexity):
+        full_response = partial_response
+        response_with_info = f"{full_response}\n\n<div class='model-info'>Model: {model_name}</div>"
+        # Update stats display
+        stats_text = stats.get_stats()
+        yield [
+            *history,
+            {"role": "user", "content": message},
+            {"role": "assistant", "content": response_with_info}
+        ], stats_text
+with gr.Blocks(
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="indigo",
+        neutral_hue="slate",
+        font=("Inter", "system-ui", "sans-serif")
+    ),
+    css="""
+        .container {
+            max-width: 1000px;
+            margin: auto;
+            padding: 2rem;
         }
+        .title {
+            text-align: center;
+            font-size: 2.5rem;
+            font-weight: 600;
+            margin: 1rem 0;
+            background: linear-gradient(to right, var(--primary-500), var(--secondary-500));
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+        }
+        .subtitle {
+            text-align: center;
+            font-size: 1.1rem;
+            color: var(--neutral-700);
+            margin-bottom: 2rem;
+            font-weight: 400;
+        }
+        .model-info {
+            font-style: italic;
+            color: var(--neutral-500);
+            font-size: 0.85em;
+            margin-top: 1em;
+            padding-top: 0.5em;
+            border-top: 1px solid var(--neutral-200);
+            opacity: 0.8;
+        }
+        .stats-box {
+            margin-top: 1rem;
+            padding: 1rem;
+            border-radius: 0.75rem;
+            background: color-mix(in srgb, var(--background-fill) 80%, transparent);
+            border: 1px solid var(--neutral-200);
+            font-family: monospace;
+            white-space: pre-line;
+        }
+        .message.assistant {
+            padding-bottom: 1.5em !important;
+        }
+    """
+) as demo:
+    with gr.Column(elem_classes="container"):
+        gr.Markdown("# AI Model Router", elem_classes="title")
+        gr.Markdown(
+            "Your message will be routed to the appropriate AI model based on complexity.",
+            elem_classes="subtitle"
         )
+        chatbot = gr.Chatbot(
+            value=[],
+            bubble_full_width=False,
+            show_label=False,
+            height=450,
+            container=True,
+            type="messages"
         )
+        with gr.Row():
+            txt = gr.Textbox(
+                show_label=False,
+                placeholder="Enter your message here...",
+                container=False,
+                scale=7
             )
+            clear = gr.ClearButton(
+                [txt, chatbot],
+                scale=1,
+                variant="secondary",
+                size="sm"
             )
+        with gr.Accordion("Advanced Settings", open=False):
+            system_message = gr.Textbox(value="You are a helpful AI assistant.", label="System message")
+            max_tokens = gr.Slider(minimum=16, maximum=4096, value=2048, step=1, label="Max Tokens")
+            temperature = gr.Slider(minimum=0, maximum=2, value=0.7, step=0.1, label="Temperature")
+            top_p = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.1, label="Top P")
+        stats_display = gr.Textbox(
+            value=stats.get_stats(),
+            label="Model Usage Statistics",
+            interactive=False,
+            elem_classes="stats-box"
         )
+        # Set up event handler for streaming
+        txt.submit(
+            chat_wrapper,
+            [txt, chatbot, system_message, max_tokens, temperature, top_p, stats_display],
+            [chatbot, stats_display],
+        ).then(
+            lambda: "",
+            None,
+            [txt],
         )
 if __name__ == "__main__":
+    demo.queue().launch()