import pandas as pd
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig

# Load the configuration and adjust it
config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3.1-8B")
if 'rope_scaling' in config.to_dict() and isinstance(config.rope_scaling, dict):
    # Adjust the rope_scaling to match the expected format
    config.rope_scaling = {
        "type": config.rope_scaling.get('rope_type', 'llama3'),
        "factor": config.rope_scaling.get('factor', 8.0)
    }

# Load the tokenizer and model with the adjusted configuration
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B")
model = AutoModelForCausalLM.from_config(config)

# Load and preprocess the DataFrame
df = pd.read_csv('anomalies.csv')
df['Feedback'] = ""
df['ds'] = pd.to_datetime(df['ds']).dt.strftime('%Y-%m-%d')
df['real'] = df['real'].apply(lambda x: f"{x:.2f}")

# Convert data rows to sentences
def tokenize_row(row):
    return f"On {row['ds']}, the expense in the group '{row['Group']}' was ${row['real']}."

df['tokenized'] = df.apply(tokenize_row, axis=1)
print(df)

# Function to generate a response based on the latest data entries
def answer_question_with_llama(question):
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    latest_entries = df['tokenized'].tail(10).tolist()
    prompt = f"Based on the following data: {' '.join(latest_entries)} Question: {question} Answer:"
    inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
    attention_mask = inputs['attention_mask']
    input_ids = inputs['input_ids']

    generated_ids = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=512,  # Adjusted to match typical model capacity
        temperature=0.7,  # Adjust temperature for diversity
        top_p=0.9,
        no_repeat_ngram_size=2
    )
    
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    response_part = generated_text.split("Answer:")[1] if "Answer:" in generated_text else "No answer found."
    final_response = response_part.split(".")[0] + "."
    return final_response

# Function to add feedback
def add_feedback(name, feedback):
    global df
    if name in df['Group'].values:
        df.loc[df['Group'] == name, 'Feedback'] = feedback
        return "Feedback successfully added."
    else:
        return "Data not found in DataFrame."

with gr.Blocks() as demo:
    gr.Markdown("# Data Query and Feedback System")
    with gr.Row():
        with gr.Column():
            question_input = gr.Textbox(label="Ask a Question")
            answer_output = gr.Textbox(label="Answer", interactive=False)
            ask_button = gr.Button("Ask")
        with gr.Column():
            name_input = gr.Textbox(label="Name for Feedback")
            feedback_input = gr.Textbox(label="Feedback")
            feedback_result = gr.Textbox(label="Feedback Result", interactive=False)
            submit_button = gr.Button("Submit Feedback")

    ask_button.click(fn=answer_question_with_llama, inputs=question_input, outputs=answer_output)
    submit_button.click(fn=add_feedback, inputs=[name_input, feedback_input], outputs=feedback_result)

demo.launch()