Gül Sena Altıntaş
commited on
Commit
·
aebf6ac
1
Parent(s):
7ebe82f
Updated script a bit
Browse files
app.py
CHANGED
@@ -110,7 +110,7 @@ def load_model_and_tokenizer(model_path, use_cache=True, progress_callback=None)
|
|
110 |
model = AutoModelForCausalLM.from_pretrained(
|
111 |
model_path,
|
112 |
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
113 |
-
device_map="auto" if device
|
114 |
trust_remote_code=True,
|
115 |
low_cpu_mem_usage=True
|
116 |
)
|
@@ -131,9 +131,9 @@ def load_model_and_tokenizer(model_path, use_cache=True, progress_callback=None)
|
|
131 |
|
132 |
except Exception as e:
|
133 |
import code
|
134 |
-
code.interact(local=dict(globals(), **locals()))
|
135 |
error_msg = f"❌ Error loading model {model_path}: {str(e)}"
|
136 |
logger.error(error_msg)
|
|
|
137 |
if progress_callback:
|
138 |
progress_callback(0.0, error_msg)
|
139 |
return None
|
@@ -242,7 +242,7 @@ def evaluate_model_on_questions(model_path, questions, progress_callback=None):
|
|
242 |
|
243 |
return results
|
244 |
|
245 |
-
def run_evaluation(dataset_text, selected_predefined, custom_models_text, progress=gr.Progress()):
|
246 |
"""Main evaluation function"""
|
247 |
if not dataset_text.strip():
|
248 |
return (
|
@@ -255,6 +255,8 @@ def run_evaluation(dataset_text, selected_predefined, custom_models_text, progre
|
|
255 |
|
256 |
# Parse custom models
|
257 |
custom_models = []
|
|
|
|
|
258 |
if custom_models_text.strip():
|
259 |
custom_models = [model.strip() for model in custom_models_text.strip().split('\n') if model.strip()]
|
260 |
|
@@ -617,22 +619,22 @@ SAMPLE_DATASETS = {
|
|
617 |
In which country is Llanfair pwllgwyngyll located? Wales Germany France Scotland
|
618 |
In which country is Llanfair PG located? Wales Germany France Scotland""",
|
619 |
"Simple Math": """Question,Correct Answer,Choice1,Choice2,Choice3
|
620 |
-
What is 2+2?,4,3,
|
621 |
-
What is 5*3?,15,12,
|
622 |
-
What is 10-7?,3,
|
623 |
-
What is 8/2?,4,3,
|
624 |
|
625 |
"World Capitals": """Question,Correct Answer,Choice1,Choice2,Choice3
|
626 |
-
What is the capital of France?,Paris,London,Berlin,
|
627 |
-
What is the capital of Japan?,Tokyo,Seoul,
|
628 |
-
What is the capital of Brazil?,Brasília,Rio de Janeiro,
|
629 |
-
What is the capital of Australia?,Canberra,Sydney,Melbourne,
|
630 |
|
631 |
"Science Quiz": """Question,Correct Answer,Choice1,Choice2,Choice3
|
632 |
-
What is the chemical symbol for gold?,Au,Ag,
|
633 |
-
Which planet is closest to the Sun?,Mercury,Venus,
|
634 |
-
What is the speed of light?,299792458 m/s,300000000 m/s,
|
635 |
-
What gas do plants absorb from the atmosphere?,Carbon dioxide,Oxygen,
|
636 |
}
|
637 |
|
638 |
# Custom CSS
|
@@ -679,7 +681,7 @@ with gr.Blocks(title="🤖 Model Performance Comparison", theme=gr.themes.Soft()
|
|
679 |
|
680 |
Example format:
|
681 |
Question,Correct Answer,Choice1,Choice2,Choice3
|
682 |
-
What is 2+2?,4,3,
|
683 |
What is the capital of France?,Paris,London,Berlin,Paris""",
|
684 |
lines=8,
|
685 |
max_lines=15
|
@@ -711,7 +713,7 @@ What is the capital of France?,Paris,London,Berlin,Paris""",
|
|
711 |
microsoft/DialoGPT-medium
|
712 |
bigscience/bloom-560m""",
|
713 |
lines=5,
|
714 |
-
info="Add any HuggingFace model path. One model per line."
|
715 |
)
|
716 |
|
717 |
gr.Markdown("""
|
@@ -737,7 +739,7 @@ bigscience/bloom-560m""",
|
|
737 |
""")
|
738 |
|
739 |
# Results section
|
740 |
-
with gr.Column(visible=
|
741 |
gr.Markdown("## 📊 Results")
|
742 |
|
743 |
summary_output = gr.Markdown(
|
|
|
110 |
model = AutoModelForCausalLM.from_pretrained(
|
111 |
model_path,
|
112 |
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
113 |
+
device_map="auto" if device== "cuda" else None,
|
114 |
trust_remote_code=True,
|
115 |
low_cpu_mem_usage=True
|
116 |
)
|
|
|
131 |
|
132 |
except Exception as e:
|
133 |
import code
|
|
|
134 |
error_msg = f"❌ Error loading model {model_path}: {str(e)}"
|
135 |
logger.error(error_msg)
|
136 |
+
# code.interact(local=dict(globals(), **locals()))
|
137 |
if progress_callback:
|
138 |
progress_callback(0.0, error_msg)
|
139 |
return None
|
|
|
242 |
|
243 |
return results
|
244 |
|
245 |
+
def run_evaluation(dataset_text, selected_predefined, custom_models_text="", progress=gr.Progress()):
|
246 |
"""Main evaluation function"""
|
247 |
if not dataset_text.strip():
|
248 |
return (
|
|
|
255 |
|
256 |
# Parse custom models
|
257 |
custom_models = []
|
258 |
+
if custom_models_text is None:
|
259 |
+
custom_models_text = ""
|
260 |
if custom_models_text.strip():
|
261 |
custom_models = [model.strip() for model in custom_models_text.strip().split('\n') if model.strip()]
|
262 |
|
|
|
619 |
In which country is Llanfair pwllgwyngyll located? Wales Germany France Scotland
|
620 |
In which country is Llanfair PG located? Wales Germany France Scotland""",
|
621 |
"Simple Math": """Question,Correct Answer,Choice1,Choice2,Choice3
|
622 |
+
What is 2+2?,4,3,2,5
|
623 |
+
What is 5*3?,15,12,16,18
|
624 |
+
What is 10-7?,3,7,4,2
|
625 |
+
What is 8/2?,4,3,2,5""",
|
626 |
|
627 |
"World Capitals": """Question,Correct Answer,Choice1,Choice2,Choice3
|
628 |
+
What is the capital of France?,Paris,London,Berlin,Rome
|
629 |
+
What is the capital of Japan?,Tokyo,Seoul,Beijing,Bangkok
|
630 |
+
What is the capital of Brazil?,Brasília,Rio de Janeiro,São Paulo,Salvador
|
631 |
+
What is the capital of Australia?,Canberra,Sydney,Melbourne,Perth""",
|
632 |
|
633 |
"Science Quiz": """Question,Correct Answer,Choice1,Choice2,Choice3
|
634 |
+
What is the chemical symbol for gold?,Au,Ag,Ca,K
|
635 |
+
Which planet is closest to the Sun?,Mercury,Venus,Earth,Mars
|
636 |
+
What is the speed of light?,299792458 m/s,300000000 m/s,2992458 m/s,299000000 m/s
|
637 |
+
What gas do plants absorb from the atmosphere?,Carbon dioxide,Oxygen,Nitrogen,Hydrogen"""
|
638 |
}
|
639 |
|
640 |
# Custom CSS
|
|
|
681 |
|
682 |
Example format:
|
683 |
Question,Correct Answer,Choice1,Choice2,Choice3
|
684 |
+
What is 2+2?,4,3,2,5
|
685 |
What is the capital of France?,Paris,London,Berlin,Paris""",
|
686 |
lines=8,
|
687 |
max_lines=15
|
|
|
713 |
microsoft/DialoGPT-medium
|
714 |
bigscience/bloom-560m""",
|
715 |
lines=5,
|
716 |
+
info="Add any HuggingFace model path. One model per line.",
|
717 |
)
|
718 |
|
719 |
gr.Markdown("""
|
|
|
739 |
""")
|
740 |
|
741 |
# Results section
|
742 |
+
with gr.Column(visible=True) as results_section:
|
743 |
gr.Markdown("## 📊 Results")
|
744 |
|
745 |
summary_output = gr.Markdown(
|