File size: 3,297 Bytes
723fe48
 
b1463e8
82871f4
b1463e8
 
 
 
 
 
 
 
 
 
07fef94
b1463e8
723fe48
07fef94
be2fac2
07fef94
 
 
723fe48
07fef94
82a021e
 
 
 
75112d0
82a021e
07fef94
 
95dfd79
 
 
07fef94
92f07c6
a200765
 
 
07fef94
5e4ebb0
 
 
07fef94
 
92f07c6
 
5e4ebb0
a200765
1ec6af9
92f07c6
 
45cb5c0
1ec6af9
92f07c6
 
723fe48
92f07c6
 
 
1ec6af9
92f07c6
723fe48
 
92f07c6
723fe48
 
92f07c6
 
 
723fe48
92f07c6
723fe48
92f07c6
 
1ec6af9
07fef94
1ec6af9
723fe48
92f07c6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig

# Load the configuration and adjust it
config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3.1-8B")
if 'rope_scaling' in config.to_dict() and isinstance(config.rope_scaling, dict):
    # Adjust the rope_scaling to match the expected format
    config.rope_scaling = {
        "type": config.rope_scaling.get('rope_type', 'llama3'),
        "factor": config.rope_scaling.get('factor', 8.0)
    }

# Load the tokenizer and model with the adjusted configuration
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B")
model = AutoModelForCausalLM.from_config(config)

# Load and preprocess the DataFrame
df = pd.read_csv('anomalies.csv')
df['Feedback'] = ""
df['ds'] = pd.to_datetime(df['ds']).dt.strftime('%Y-%m-%d')
df['real'] = df['real'].apply(lambda x: f"{x:.2f}")

# Convert data rows to sentences
def tokenize_row(row):
    return f"On {row['ds']}, the expense in the group '{row['Group']}' was ${row['real']}."

df['tokenized'] = df.apply(tokenize_row, axis=1)
print(df)

# Function to generate a response based on the latest data entries
def answer_question_with_llama(question):
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    latest_entries = df['tokenized'].tail(10).tolist()
    prompt = f"Based on the following data: {' '.join(latest_entries)} Question: {question} Answer:"
    inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
    attention_mask = inputs['attention_mask']
    input_ids = inputs['input_ids']

    generated_ids = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=512,  # Adjusted to match typical model capacity
        temperature=0.7,  # Adjust temperature for diversity
        top_p=0.9,
        no_repeat_ngram_size=2
    )
    
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    response_part = generated_text.split("Answer:")[1] if "Answer:" in generated_text else "No answer found."
    final_response = response_part.split(".")[0] + "."
    return final_response

# Function to add feedback
def add_feedback(name, feedback):
    global df
    if name in df['Group'].values:
        df.loc[df['Group'] == name, 'Feedback'] = feedback
        return "Feedback successfully added."
    else:
        return "Data not found in DataFrame."

with gr.Blocks() as demo:
    gr.Markdown("# Data Query and Feedback System")
    with gr.Row():
        with gr.Column():
            question_input = gr.Textbox(label="Ask a Question")
            answer_output = gr.Textbox(label="Answer", interactive=False)
            ask_button = gr.Button("Ask")
        with gr.Column():
            name_input = gr.Textbox(label="Name for Feedback")
            feedback_input = gr.Textbox(label="Feedback")
            feedback_result = gr.Textbox(label="Feedback Result", interactive=False)
            submit_button = gr.Button("Submit Feedback")

    ask_button.click(fn=answer_question_with_llama, inputs=question_input, outputs=answer_output)
    submit_button.click(fn=add_feedback, inputs=[name_input, feedback_input], outputs=feedback_result)

demo.launch()