import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import jsonlines
import sys
from tqdm.auto import tqdm
# --- Configuration ---
MODEL_NAME = sys.argv[1] 
INPUT_FILENAME = "./Vietnamese truthful QA results.xlsx"
OUTPUT_FILENAME = sys.argv[2] 
MAX_NEW_TOKENS = 512 # The maximum number of new tokens to generate for each answer.

writer = jsonlines.open(OUTPUT_FILENAME, "w")
# 1. Load data from an XLSX file
try:
    df = pd.read_excel(INPUT_FILENAME)
except FileNotFoundError:
    print(f"Error: The file '{INPUT_FILENAME}' was not found.")
    print("Please make sure your XLSX file is in the same directory as the script.")
    exit()
except Exception as e:
    print(f"An error occurred while reading the Excel file: {e}")
    exit()

# 2. Select Relevant Columns and validate
if "Question" not in df.columns or "Ground truth" not in df.columns:
    print("Error: Required columns 'Question' and/or 'Ground truth' not found.")
    print(f"Available columns are: {list(df.columns)}")
    exit()

df_processed = df[["Question", "Ground truth"]].copy()

# 3. Load Model and Tokenizer
print(f"Loading model '{MODEL_NAME}' and tokenizer...")
# Set up device (use GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the tokenizer and model from Hugging Face Hub
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16, attn_implementation='flash_attention_2')
model.to(device) # Move the model to the selected device

# Set pad token if it's not set (GPT-2 doesn't have a default pad token)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id

print("Model and tokenizer loaded successfully.")

# 4. Generate Answers using the Model
answers = []
out_dict = []
total_questions = len(df_processed)
print(f"Generating answers for {total_questions} questions...")

for i, question in tqdm(enumerate(df_processed["Question"])):
    # Encode the question text into token IDs
    # input_ids = tokenizer.encode(question, return_tensors='pt').to(device)
    messages = [
    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
    {"role": "user", "content": question}
    ]
    input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    input_ids = tokenizer([input], return_tensors='pt').to(model.device)
    # Generate text using the model
    # do_sample=False makes the output deterministic (no randomness)
    output_sequences = model.generate(
        **input_ids, 
        max_new_tokens=MAX_NEW_TOKENS,
        do_sample=False,
        pad_token_id=tokenizer.pad_token_id
    )

    # Decode the generated token IDs back to a string
    # The output includes the original prompt, so we need to remove it.
    full_text = tokenizer.decode(output_sequences[0][input_ids['input_ids'].shape[1]:], skip_special_tokens=True)
    answer = full_text.strip()
    gold = df['Ground truth'][i] 
    answers.append(answer)
    print(f"Processed question {i + 1}/{total_questions}\nAnswer: {answer}\nGold: {gold}")
    writer.write({
        "question": question,
        "answer": answer,
        "gold": gold
    })