Spaces:
Runtime error
Runtime error
| import math | |
| import os | |
| import sys | |
| import fire | |
| from tqdm import tqdm | |
| import pandas as pd | |
| import torch | |
| import transformers | |
| from peft import PeftModel | |
| import datasets | |
| from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer | |
| from utils.callbacks import Iteratorize, Stream | |
| from utils.prompter import Prompter | |
| device = "cuda" | |
| def main( | |
| load_8bit: bool = True, | |
| base_model: str = "decapoda-research/llama-7b-hf", | |
| lora_weights: str = "./lora-alpaca", | |
| data_path: str = "./data", | |
| output_path: str = "./output", | |
| eval_rate: float = 0.1, | |
| batch_size: int = 32, | |
| # The prompt template to use, will default to alpaca. | |
| prompt_template: str = "alpaca", | |
| ): | |
| base_model = base_model or os.environ.get("BASE_MODEL", "") | |
| assert (base_model), "Please specify a --base_model, e.g. --base_model='huggyllama/llama-7b'" | |
| prompter = Prompter(prompt_template) | |
| tokenizer = LlamaTokenizer.from_pretrained(base_model) | |
| if device == "cuda": | |
| model = LlamaForCausalLM.from_pretrained( | |
| base_model, | |
| load_in_8bit=load_8bit, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| ) | |
| model = PeftModel.from_pretrained( | |
| model, | |
| lora_weights, | |
| torch_dtype=torch.float16, | |
| ) | |
| # unwind broken decapoda-research config | |
| model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk | |
| model.config.bos_token_id = 1 | |
| model.config.eos_token_id = 2 | |
| if not load_8bit: | |
| model.half() # seems to fix bugs for some users. | |
| model.eval() | |
| if torch.__version__ >= "2" and sys.platform != "win32": | |
| model = torch.compile(model) | |
| def evaluate_one( | |
| instruction, | |
| input=None, | |
| temperature=0.1, | |
| top_p=0.75, | |
| top_k=40, | |
| num_beams=2, | |
| max_new_tokens=128, | |
| **kwargs, | |
| ): | |
| prompt = prompter.generate_prompt(instruction, input) | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| input_ids = inputs["input_ids"].to(device) | |
| generation_config = GenerationConfig( | |
| temperature=temperature, | |
| top_p=top_p, | |
| top_k=top_k, | |
| num_beams=num_beams, | |
| **kwargs, | |
| ) | |
| # Without streaming | |
| with torch.no_grad(): | |
| generation_output = model.generate( | |
| input_ids=input_ids, | |
| generation_config=generation_config, | |
| return_dict_in_generate=True, | |
| output_scores=True, | |
| max_new_tokens=max_new_tokens, | |
| ) | |
| s = generation_output.sequences[0] | |
| output = tokenizer.decode(s, skip_special_tokens=True) | |
| return prompter.get_response(output) | |
| def evaluate_all(): | |
| # data = datasets.load_dataset("json", data_files=data_path) | |
| # data = data["train"] | |
| # df = data.to_pandas() | |
| df = pd.read_json(data_path, orient='records') | |
| print(df.info()) | |
| # ่ฎก็ฎๅ็กฎ็ | |
| correct = 0 | |
| total = 0 | |
| total_step = len(df) | |
| pbar = tqdm(total=total_step, unit='batch') | |
| error = [] | |
| for i in range(total_step): | |
| instruction = df['instruction'].iloc[i] | |
| input = df['input'].iloc[i] | |
| label = df['output'].iloc[i] | |
| pred = evaluate_one(instruction=instruction, input=input) | |
| if pred == label: | |
| correct += 1 | |
| else: | |
| error.append((label, pred)) | |
| total += 1 | |
| acc = correct / total | |
| # ๆดๆฐ่ฟๅบฆๆก | |
| # Update the progress bar | |
| pbar.set_description( | |
| f"Testing: Sample [{total}/{total_step}] Acc: {acc :.4f}") | |
| pbar.update(1) | |
| for e in error: | |
| print(e) | |
| def evaluate_by_batch( | |
| temperature=0.1, | |
| top_p=0.75, | |
| top_k=40, | |
| num_beams=1, | |
| max_new_tokens=32 | |
| ): | |
| df = pd.read_json(data_path, orient='records') | |
| # df = df.sample(frac=eval_rate).reset_index(drop=True) | |
| df['prompt'] = df.apply(lambda x: prompter.generate_prompt( | |
| x['instruction'], x['input']), axis=1) | |
| tokenizer.padding_side = "left" # Allow batched inference | |
| generation_config = GenerationConfig( | |
| temperature=temperature, | |
| top_p=top_p, | |
| top_k=top_k, | |
| num_beams=num_beams | |
| ) | |
| outputs = [] | |
| total = 0 | |
| total_step = math.ceil(len(df) / batch_size) | |
| pbar = tqdm(total=total_step, unit='batch') | |
| # ่ฎก็ฎๅ็กฎ็ | |
| with torch.no_grad(): | |
| for i in range(total_step): | |
| batch = df.iloc[i*batch_size:(i+1)*batch_size] | |
| inputs = tokenizer(batch['prompt'].tolist(), return_tensors="pt", padding=True)[ | |
| 'input_ids'].to(device) | |
| generation_outputs = model.generate( | |
| input_ids=inputs, | |
| generation_config=generation_config, | |
| max_new_tokens=max_new_tokens, | |
| pad_token_id=tokenizer.pad_token_id | |
| ) | |
| for g in generation_outputs: | |
| decoded_item = tokenizer.decode( | |
| g, skip_special_tokens=True) | |
| try: | |
| output = prompter.get_response(decoded_item) | |
| except: | |
| output = decoded_item | |
| outputs.append(output) | |
| total += 1 | |
| # ๆดๆฐ่ฟๅบฆๆก | |
| pbar.set_description(f"Testing: Sample [{total}/{len(df)}] ") | |
| pbar.update(1) | |
| df['pred'] = outputs | |
| df['pred'].to_csv(output_path, index=False) | |
| evaluate_by_batch() | |
| if __name__ == "__main__": | |
| # fire.Fire(main) | |
| import yaml | |
| dataset_param = sys.argv[1] | |
| with open("./configs/evaluate_params.yaml", "r") as stream: | |
| # try: | |
| params = yaml.safe_load(stream) | |
| print('=' * 80) | |
| print(params[dataset_param]) | |
| print('=' * 80) | |
| # fire.Fire(train) | |
| main(**params[dataset_param]) | |