import json
import psutil
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import os
import tarfile
from typing import List, Tuple
import boto3
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class CustomerSupportBot:
    def __init__(self, model_path="models/customer_support_gpt"):
        """
        Initialize the customer support bot with the fine-tuned model.
        
        Args:
            model_path (str): Path to the saved model and tokenizer
        """
        self.process = psutil.Process(os.getpid())
        self.model_path = model_path
        self.model_file_path = os.path.join(self.model_path, "model.tar.gz")
        self.s3 = boto3.client("s3")
        self.model_key = "models/model.tar.gz"
        self.bucket_name = "customer-support-gpt"
        
        # Download and load the model
        self.download_and_load_model()

    def download_and_load_model(self):
        # Check if the model directory exists
        if not os.path.exists(self.model_path):
            os.makedirs(self.model_path)

        # Download model.tar.gz from S3 if not already downloaded
        if not os.path.exists(self.model_file_path):
            print("Downloading model from S3...")
            self.s3.download_file(self.bucket_name, self.model_key, self.model_file_path)
            print("Download complete. Extracting model files...")

            # Extract the model files
            with tarfile.open(self.model_file_path, "r:gz") as tar:
                tar.extractall(self.model_path)

        # Load the model and tokenizer from extracted files
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
        self.model = AutoModelForCausalLM.from_pretrained(self.model_path)
        print("Model and tokenizer loaded successfully.")

        # Move model to GPU if available
        self.device = "cpu" #"cuda" if torch.cuda.is_available() else "cpu"
        self.model = self.model.to(self.device)

        print(f'Model loaded on device: {self.device}')

    def generate_response(self, message: str, max_length=100, temperature=0.7) -> str:
        try:
            input_text = f"Instruction: {message}\nResponse:"
            
            # Tokenize input text
            inputs = self.tokenizer(input_text, return_tensors="pt").to(self.device)
            
            # Generate response using the model
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_length=max_length,
                    temperature=temperature,
                    num_return_sequences=1,
                    pad_token_id=self.tokenizer.pad_token_id,
                    eos_token_id=self.tokenizer.eos_token_id,
                    do_sample=True,
                    top_p=0.95,
                    top_k=50
                )
            
            # Decode and format the response
            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            response = response.split("Response:")[-1].strip()
            return response
        except Exception as e:
            return f"An error occurred: {str(e)}"

    def monitor_resources(self) -> dict:
        usage = {
            "CPU (%)": self.process.cpu_percent(interval=1),
            "RAM (GB)": self.process.memory_info().rss / (1024 ** 3)
        }
        return usage


def create_chat_interface():
    bot = CustomerSupportBot(model_path="/app/models")

    def predict(message: str, history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]:
        if not message:
            return "", history
        
        bot_response = bot.generate_response(message)
        
        # Log resource usage
        usage = bot.monitor_resources()
        print("Resource Usage:", usage)
        
        history.append((message, bot_response))
        return "", history

    # Create the Gradio interface with custom CSS
    with gr.Blocks(css="""
        .message-box {
            margin-bottom: 10px;
        }
        .button-row {
            display: flex;
            gap: 10px;
            margin-top: 10px;
        }
    """) as interface:
        gr.Markdown("# Customer Support Chatbot")
        gr.Markdown("Welcome! How can I assist you today?")
        
        chatbot = gr.Chatbot(
            label="Chat History",
            height=500,
            elem_classes="message-box",
            # type="messages"
        )
        
        with gr.Row():
            msg = gr.Textbox(
                label="Your Message",
                placeholder="Type your message here...",
                lines=2,
                elem_classes="message-box"
            )
        
        with gr.Row(elem_classes="button-row"):
            submit = gr.Button("Send Message", variant="primary")
            clear = gr.ClearButton([msg, chatbot], value="Clear Chat")

        # Add example queries in a separate row
        with gr.Row():
            gr.Examples(
                examples=[
                    "How do I reset my password?",
                    "What are your shipping policies?",
                    "I want to return a product.",
                    "How can I track my order?",
                    "What payment methods do you accept?"
                ],
                inputs=msg,
                label="Example Questions"
            )

        # Set up event handlers
        submit_click = submit.click(
            predict,
            inputs=[msg, chatbot],
            outputs=[msg, chatbot]
        )
        
        msg.submit(
            predict,
            inputs=[msg, chatbot],
            outputs=[msg, chatbot]
        )
        
        # Add keyboard shortcut for submit
        msg.change(lambda x: gr.update(interactive=bool(x.strip())), inputs=[msg], outputs=[submit])

        print("Interface created successfully.")

        # call the initial query function
        # run a query first how are you and predict the output
        print(predict("How are you", []))

        # run a command which checks the resource usage
        print(f'Bot Resource Usage : {bot.monitor_resources()}')

        # show full system usage
        print(f'CPU Percentage : {psutil.cpu_percent()}')
        print(f'RAM Usage : {psutil.virtual_memory()}')
        print(f'Swap Memory : {psutil.swap_memory()}')

    return interface

if __name__ == "__main__":
    demo = create_chat_interface()
    print("Starting Gradio server...")
    demo.launch(
        share=False,
        server_name="0.0.0.0",
        server_port=7860,  # Changed to 7860 for Gradio
        debug=True,
        inline=False
    )