from groq import Groq import gradio as gr import time import threading import os api_key = os.getenv("TriviaVilla") # Initialize the Groq client with your API key client = Groq( api_key= api_key ) # List to maintain the conversation history, starting with a default prompt conversation_history = [ {"role": "system", "content": "You are an experienced assistant in a trivia game focused on Nigerian music. With over 40 years of keeping track of Nigerian Music, You have great knowledge of the Nigerian Music Industry"} ] # Variable to track the last interaction time last_interaction_time = time.time() # Lock to synchronize access to shared variables lock = threading.Lock() # Function to count tokens (approximation) def count_tokens(messages): return sum(len(message["content"].split()) for message in messages) # Function to get the initial LLM output and start the conversation def start_trivia_game(): # Initial message to start the game initial_message = "Start a Trivia on Nigerian music. Welcome the user and tell the user to type Start to start. Tell the user to type End to finish the game. Make your questions always concise, no irrelevant text. Expect the precise answer as text or the alphabet of the option from users as the answer. Make 'None of the Above' an option available so that if the answer to the question is not in the options, users can choose 'None of the above'. Assess the answers and provide the percentage score based on the present and previous scores and present the next question to keep it going. The highest percentage is 100% and no percentage assessment should be greater than this." # Add the initial message to the conversation history with lock: conversation_history.append({"role": "user", "content": initial_message}) # Get completion from the LLM for the initial question completion = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=conversation_history, temperature=0.3, max_tokens=4096, top_p=1, stream=True, stop=None, ) llm_output = "" for chunk in completion: llm_output += chunk.choices[0].delta.content or "" # Add the assistant's response to the conversation history with lock: conversation_history.append({"role": "assistant", "content": llm_output}) return llm_output # Function to handle user response and continue the conversation def continue_trivia_game(user_response): global last_interaction_time with lock: last_interaction_time = time.time() # Update the last interaction time # Add user's response to the conversation history with lock: conversation_history.append({"role": "user", "content": user_response}) # Token limit management max_tokens = 8000 # Maximum token limit for the LLM (example value) current_tokens = count_tokens(conversation_history) while current_tokens > max_tokens: # Remove the oldest user-assistant pair with lock: if len(conversation_history) > 2: conversation_history.pop(1) # Removing the second item as the first is the system message conversation_history.pop(1) # Remove the corresponding assistant response current_tokens = count_tokens(conversation_history) # Get completion from the LLM for the user's response try: completion = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=conversation_history, temperature=0.3, max_tokens=8000, top_p=1, stream=True, stop=None, ) llm_output = "" for chunk in completion: llm_output += chunk.choices[0].delta.content or "" # Add the assistant's response to the conversation history with lock: conversation_history.append({"role": "assistant", "content": llm_output}) return llm_output except Exception as e: # Check for specific rate limit error if "rate_limit_exceeded" in str(e): return "You've reached the maximum number of requests. Please wait a few minutes before trying again." else: return f"An error occurred. Try again in 10 minutes: {str(e)}" # Function to reset the session after inactivity def reset_session(): global conversation_history while True: time.sleep(10) # Check every 10 seconds with lock: if time.time() - last_interaction_time > 180: # 3 minutes of inactivity conversation_history = [ {"role": "system", "content": "You are an experienced assistant in a trivia game focused on Nigerian music. With over 40 years of keeping track of Nigerian Music, You have great knowledge of the Nigerian Music Industry"} ] # Start the background thread for session reset reset_thread = threading.Thread(target=reset_session, daemon=True) reset_thread.start() # Start the game and get the initial LLM output initial_output = start_trivia_game() # Using gr.Blocks to create the interface with gr.Blocks() as demo: # Title and Description gr.Markdown("# TriviaVilla\n How much do you know about the Nigerian Music Industry? Here is a Trivia to test your knowledge. Developed using LLama 3.1 LLM. This model may hallucinate sometimes, but you can guide it through your prompt.") # LLM Output Textbox llm_output = gr.Textbox(label="LLM Output", placeholder="", lines=10, value=initial_output) # User Response Textbox user_response = gr.Textbox(label="Your Response", placeholder="Type your response here", lines=3) # Button to submit the user's response and update the LLM output submit_button = gr.Button("Submit") # Function to update the LLM output upon submission def update_llm_output(user_input): return continue_trivia_game(user_input) # Define interactions submit_button.click(fn=update_llm_output, inputs=user_response, outputs=llm_output) # Launch the Gradio app demo.launch()