import pandas as pd import torch from transformers import AutoModelForCausalLM, AutoTokenizer import jsonlines import sys from tqdm.auto import tqdm # --- Configuration --- MODEL_NAME = sys.argv[1] INPUT_FILENAME = "./Vietnamese truthful QA results.xlsx" OUTPUT_FILENAME = sys.argv[2] MAX_NEW_TOKENS = 512 # The maximum number of new tokens to generate for each answer. writer = jsonlines.open(OUTPUT_FILENAME, "w") # 1. Load data from an XLSX file try: df = pd.read_excel(INPUT_FILENAME) except FileNotFoundError: print(f"Error: The file '{INPUT_FILENAME}' was not found.") print("Please make sure your XLSX file is in the same directory as the script.") exit() except Exception as e: print(f"An error occurred while reading the Excel file: {e}") exit() # 2. Select Relevant Columns and validate if "Question" not in df.columns or "Ground truth" not in df.columns: print("Error: Required columns 'Question' and/or 'Ground truth' not found.") print(f"Available columns are: {list(df.columns)}") exit() df_processed = df[["Question", "Ground truth"]].copy() # 3. Load Model and Tokenizer print(f"Loading model '{MODEL_NAME}' and tokenizer...") # Set up device (use GPU if available, otherwise CPU) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # Load the tokenizer and model from Hugging Face Hub tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16, attn_implementation='flash_attention_2') model.to(device) # Move the model to the selected device # Set pad token if it's not set (GPT-2 doesn't have a default pad token) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model.config.pad_token_id = model.config.eos_token_id print("Model and tokenizer loaded successfully.") # 4. Generate Answers using the Model answers = [] out_dict = [] total_questions = len(df_processed) print(f"Generating answers for {total_questions} questions...") for i, question in tqdm(enumerate(df_processed["Question"])): # Encode the question text into token IDs # input_ids = tokenizer.encode(question, return_tensors='pt').to(device) messages = [ {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."}, {"role": "user", "content": question} ] input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) input_ids = tokenizer([input], return_tensors='pt').to(model.device) # Generate text using the model # do_sample=False makes the output deterministic (no randomness) output_sequences = model.generate( **input_ids, max_new_tokens=MAX_NEW_TOKENS, do_sample=False, pad_token_id=tokenizer.pad_token_id ) # Decode the generated token IDs back to a string # The output includes the original prompt, so we need to remove it. full_text = tokenizer.decode(output_sequences[0][input_ids['input_ids'].shape[1]:], skip_special_tokens=True) answer = full_text.strip() gold = df['Ground truth'][i] answers.append(answer) print(f"Processed question {i + 1}/{total_questions}\nAnswer: {answer}\nGold: {gold}") writer.write({ "question": question, "answer": answer, "gold": gold })