from groq import Groq
import gradio as gr
import time
import threading
import os

api_key = os.getenv("TriviaVilla")

# Initialize the Groq client with your API key
client = Groq(
    api_key= api_key
)

# List to maintain the conversation history, starting with a default prompt
conversation_history = [
    {"role": "system", "content": "You are an experienced assistant in a trivia game focused on Nigerian music. With over 40 years of keeping track of Nigerian Music, You have great knowledge of the Nigerian Music Industry"}
]

# Variable to track the last interaction time
last_interaction_time = time.time()

# Lock to synchronize access to shared variables
lock = threading.Lock()

# Function to count tokens (approximation)
def count_tokens(messages):
    return sum(len(message["content"].split()) for message in messages)

# Function to get the initial LLM output and start the conversation
def start_trivia_game():
    # Initial message to start the game
    initial_message = "Start a Trivia on Nigerian music. Welcome the user and tell the user to type Start to start. Tell the user to type End to finish the game. Make your questions always concise, no irrelevant text. Expect the precise answer as text or the alphabet of the option from users as the answer. Make 'None of the Above' an option available so that if the answer to the question is not in the options, users can choose 'None of the above'. Assess the answers and provide the percentage score based on the present and previous scores and present the next question to keep it going. The highest percentage is 100% and no percentage assessment should be greater than this."

    # Add the initial message to the conversation history
    with lock:
        conversation_history.append({"role": "user", "content": initial_message})

    # Get completion from the LLM for the initial question
    completion = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=conversation_history,
        temperature=0.3,
        max_tokens=4096,
        top_p=1,
        stream=True,
        stop=None,
    )

    llm_output = ""
    for chunk in completion:
        llm_output += chunk.choices[0].delta.content or ""

    # Add the assistant's response to the conversation history
    with lock:
        conversation_history.append({"role": "assistant", "content": llm_output})

    return llm_output

# Function to handle user response and continue the conversation
def continue_trivia_game(user_response):
    global last_interaction_time
    with lock:
        last_interaction_time = time.time()  # Update the last interaction time

    # Add user's response to the conversation history
    with lock:
        conversation_history.append({"role": "user", "content": user_response})

    # Token limit management
    max_tokens = 8000  # Maximum token limit for the LLM (example value)
    current_tokens = count_tokens(conversation_history)

    while current_tokens > max_tokens:
        # Remove the oldest user-assistant pair
        with lock:
            if len(conversation_history) > 2:
                conversation_history.pop(1)  # Removing the second item as the first is the system message
                conversation_history.pop(1)  # Remove the corresponding assistant response
            current_tokens = count_tokens(conversation_history)

    # Get completion from the LLM for the user's response
    try:
        completion = client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=conversation_history,
            temperature=0.3,
            max_tokens=8000,
            top_p=1,
            stream=True,
            stop=None,
        )

        llm_output = ""
        for chunk in completion:
            llm_output += chunk.choices[0].delta.content or ""

        # Add the assistant's response to the conversation history
        with lock:
            conversation_history.append({"role": "assistant", "content": llm_output})

        return llm_output
    except Exception as e:
        # Check for specific rate limit error
        if "rate_limit_exceeded" in str(e):
            return "You've reached the maximum number of requests. Please wait a few minutes before trying again."
        else:
            return f"An error occurred. Try again in 10 minutes: {str(e)}"

# Function to reset the session after inactivity
def reset_session():
    global conversation_history
    while True:
        time.sleep(10)  # Check every 10 seconds
        with lock:
            if time.time() - last_interaction_time > 180:  # 3 minutes of inactivity
                conversation_history = [
                    {"role": "system", "content": "You are an experienced assistant in a trivia game focused on Nigerian music. With over 40 years of keeping track of Nigerian Music, You have great knowledge of the Nigerian Music Industry"}
                ]

# Start the background thread for session reset
reset_thread = threading.Thread(target=reset_session, daemon=True)
reset_thread.start()

# Start the game and get the initial LLM output
initial_output = start_trivia_game()

# Using gr.Blocks to create the interface
with gr.Blocks() as demo:
    # Title and Description
    gr.Markdown("# TriviaVilla\n How much do you know about the Nigerian Music Industry? Here is a Trivia to test your knowledge. Developed using LLama 3.1 LLM. This model may hallucinate sometimes, but you can guide it through your prompt.")

    # LLM Output Textbox
    llm_output = gr.Textbox(label="LLM Output", placeholder="", lines=10, value=initial_output)

    # User Response Textbox
    user_response = gr.Textbox(label="Your Response", placeholder="Type your response here", lines=3)

    # Button to submit the user's response and update the LLM output
    submit_button = gr.Button("Submit")

    # Function to update the LLM output upon submission
    def update_llm_output(user_input):
        return continue_trivia_game(user_input)

    # Define interactions
    submit_button.click(fn=update_llm_output, inputs=user_response, outputs=llm_output)

# Launch the Gradio app
demo.launch()