import pandas as pd import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig # Load the configuration and adjust it config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3.1-8B") if 'rope_scaling' in config.to_dict() and isinstance(config.rope_scaling, dict): # Adjust the rope_scaling to match the expected format config.rope_scaling = { "type": config.rope_scaling.get('rope_type', 'llama3'), "factor": config.rope_scaling.get('factor', 8.0) } # Load the tokenizer and model with the adjusted configuration tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B") model = AutoModelForCausalLM.from_config(config) # Load and preprocess the DataFrame df = pd.read_csv('anomalies.csv') df['Feedback'] = "" df['ds'] = pd.to_datetime(df['ds']).dt.strftime('%Y-%m-%d') df['real'] = df['real'].apply(lambda x: f"{x:.2f}") # Convert data rows to sentences def tokenize_row(row): return f"On {row['ds']}, the expense in the group '{row['Group']}' was ${row['real']}." df['tokenized'] = df.apply(tokenize_row, axis=1) print(df) # Function to generate a response based on the latest data entries def answer_question_with_llama(question): if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token latest_entries = df['tokenized'].tail(10).tolist() prompt = f"Based on the following data: {' '.join(latest_entries)} Question: {question} Answer:" inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512) attention_mask = inputs['attention_mask'] input_ids = inputs['input_ids'] generated_ids = model.generate( input_ids, attention_mask=attention_mask, max_length=512, # Adjusted to match typical model capacity temperature=0.7, # Adjust temperature for diversity top_p=0.9, no_repeat_ngram_size=2 ) generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) response_part = generated_text.split("Answer:")[1] if "Answer:" in generated_text else "No answer found." final_response = response_part.split(".")[0] + "." return final_response # Function to add feedback def add_feedback(name, feedback): global df if name in df['Group'].values: df.loc[df['Group'] == name, 'Feedback'] = feedback return "Feedback successfully added." else: return "Data not found in DataFrame." with gr.Blocks() as demo: gr.Markdown("# Data Query and Feedback System") with gr.Row(): with gr.Column(): question_input = gr.Textbox(label="Ask a Question") answer_output = gr.Textbox(label="Answer", interactive=False) ask_button = gr.Button("Ask") with gr.Column(): name_input = gr.Textbox(label="Name for Feedback") feedback_input = gr.Textbox(label="Feedback") feedback_result = gr.Textbox(label="Feedback Result", interactive=False) submit_button = gr.Button("Submit Feedback") ask_button.click(fn=answer_question_with_llama, inputs=question_input, outputs=answer_output) submit_button.click(fn=add_feedback, inputs=[name_input, feedback_input], outputs=feedback_result) demo.launch()