Spaces:
Sleeping
Sleeping
update
Browse files- Dockerfile +1 -1
- data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/arc-easy-1000-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl +3 -0
- examples/make_raw_dataset/step_3_filter_by_keywords.py +19 -2
- examples/test_metrics/bingoplus_chat_metric.py +2 -2
- examples/test_metrics/lingoace_chat_metric.py +2 -2
- llm_eval_script/byteplus.py +2 -1
- llm_eval_script/byteplus_chat.py +4 -3
- llm_eval_script/gemini_google.py +49 -7
- llm_eval_script/gemini_google_chat.py +8 -4
- main.py +1 -0
Dockerfile
CHANGED
@@ -5,7 +5,7 @@ WORKDIR /code
|
|
5 |
COPY . /code
|
6 |
|
7 |
RUN apt-get update
|
8 |
-
RUN apt-get install -y wget unzip ffmpeg build-essential git
|
9 |
|
10 |
RUN pip install --upgrade pip
|
11 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
|
|
5 |
COPY . /code
|
6 |
|
7 |
RUN apt-get update
|
8 |
+
RUN apt-get install -y wget unzip ffmpeg build-essential git git-lfs
|
9 |
|
10 |
RUN pip install --upgrade pip
|
11 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a574d56126be957ef4d283af06243125886f7544ccaa5bbbe0b01900abe2c62f
|
3 |
+
size 2417697
|
data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:118787cf7fd66a6683864ff4b79fc648c7d17c65b420c25092c14857c75674ed
|
3 |
+
size 258515
|
data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab42fc8b853062a9391db33fe890869e7f61e7f9c118ea2c84e3c3555768ca00
|
3 |
+
size 2419510
|
data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d738dbb5fa0aef7cc3880b0ec50f2a54143ce586b74bb3c1cffe009f53344dc
|
3 |
+
size 258673
|
data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ae30069ee95459c290f53eb50dcb72cb2c11a8a7c3691a96006f4d462dd767b
|
3 |
+
size 1211487
|
data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ea3a2b7e5c28a98464352433baecdb7f6c011046d6853282709f7b62ca1386c
|
3 |
+
size 874387
|
data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/arc-easy-1000-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:970ffc784ca83d2ce6e826d3303590d0646f77395bdd832fa809cf09dad46529
|
3 |
+
size 720927
|
data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f762c204ac2438aebe08f143bbffddd10d2e94701dd787b103506c09c79f1c1b
|
3 |
+
size 2471787
|
data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6963aa07be72dff967b2388cb4d0303ed76624ba7b48f3f5861c9b207c08448
|
3 |
+
size 258578
|
data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b38cb68452d6f237d275aa03a6c589ece653d4f8ecd5e808d41bb0ac729d850
|
3 |
+
size 1211826
|
data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40aab0bb0dd05948d878e0ffab0cb84eca630530079619bd79744957cf42bef2
|
3 |
+
size 874346
|
examples/make_raw_dataset/step_3_filter_by_keywords.py
CHANGED
@@ -50,12 +50,29 @@ def main():
|
|
50 |
|
51 |
for key_str in [
|
52 |
# "BingoPlus",
|
53 |
-
" COD ",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
]:
|
55 |
if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
|
56 |
print(f"process: {sample_dir.as_posix()}")
|
57 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-bingoplus"
|
58 |
-
tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-cod"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
tgt_dir.mkdir(parents=True, exist_ok=True)
|
60 |
shutil.move(
|
61 |
sample_dir.as_posix(),
|
|
|
50 |
|
51 |
for key_str in [
|
52 |
# "BingoPlus",
|
53 |
+
# " COD ",
|
54 |
+
# "NXPay",
|
55 |
+
# "NX Money",
|
56 |
+
# "Exodus Telecom",
|
57 |
+
# "Exodus Retail",
|
58 |
+
"Exodus Automotive",
|
59 |
+
# "kta kilat", "KTA KILAT",
|
60 |
+
# "NXCloud",
|
61 |
+
# "作为VIP客户",
|
62 |
+
"FedEx",
|
63 |
]:
|
64 |
if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
|
65 |
print(f"process: {sample_dir.as_posix()}")
|
66 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-bingoplus"
|
67 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-cod"
|
68 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxpay"
|
69 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxmoney"
|
70 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-retail"
|
71 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-automotive"
|
72 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-kta"
|
73 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxcloud"
|
74 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-vip"
|
75 |
+
tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-fedex"
|
76 |
tgt_dir.mkdir(parents=True, exist_ok=True)
|
77 |
shutil.move(
|
78 |
sample_dir.as_posix(),
|
examples/test_metrics/bingoplus_chat_metric.py
CHANGED
@@ -38,12 +38,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
|
|
38 |
)
|
39 |
parser.add_argument(
|
40 |
"--eval_data_file",
|
41 |
-
default=(project_path / "data/eval_data/gemini_google/google/
|
42 |
type=str
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--output_file",
|
46 |
-
default=(project_path / "data/eval_data/gemini_google/google/
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
|
|
38 |
)
|
39 |
parser.add_argument(
|
40 |
"--eval_data_file",
|
41 |
+
default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl.raw").as_posix(),
|
42 |
type=str
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--output_file",
|
46 |
+
default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
examples/test_metrics/lingoace_chat_metric.py
CHANGED
@@ -43,12 +43,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
|
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--eval_data_file",
|
46 |
-
default=(project_path / "data/eval_data/
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
50 |
"--output_file",
|
51 |
-
default=(project_path / "data/eval_data/
|
52 |
type=str
|
53 |
)
|
54 |
parser.add_argument(
|
|
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--eval_data_file",
|
46 |
+
default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl.raw").as_posix(),
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
50 |
"--output_file",
|
51 |
+
default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl").as_posix(),
|
52 |
type=str
|
53 |
)
|
54 |
parser.add_argument(
|
llm_eval_script/byteplus.py
CHANGED
@@ -49,8 +49,9 @@ def get_args():
|
|
49 |
)
|
50 |
parser.add_argument(
|
51 |
"--eval_dataset_name",
|
|
|
52 |
# default="agent-lingoace-zh-400-choice.jsonl",
|
53 |
-
default="arc-easy-1000-choice.jsonl",
|
54 |
type=str
|
55 |
)
|
56 |
parser.add_argument(
|
|
|
49 |
)
|
50 |
parser.add_argument(
|
51 |
"--eval_dataset_name",
|
52 |
+
default="agent-bingoplus-ph-90-choice.jsonl",
|
53 |
# default="agent-lingoace-zh-400-choice.jsonl",
|
54 |
+
# default="arc-easy-1000-choice.jsonl",
|
55 |
type=str
|
56 |
)
|
57 |
parser.add_argument(
|
llm_eval_script/byteplus_chat.py
CHANGED
@@ -42,14 +42,15 @@ def get_args():
|
|
42 |
parser = argparse.ArgumentParser()
|
43 |
parser.add_argument(
|
44 |
"--model_name",
|
45 |
-
default="seed-1-6-250615",
|
46 |
-
|
47 |
# default="deepseek-v3-250324",
|
48 |
type=str
|
49 |
)
|
50 |
parser.add_argument(
|
51 |
"--eval_dataset_name",
|
52 |
-
default="agent-lingoace-zh-80-chat.jsonl",
|
|
|
53 |
type=str
|
54 |
)
|
55 |
parser.add_argument(
|
|
|
42 |
parser = argparse.ArgumentParser()
|
43 |
parser.add_argument(
|
44 |
"--model_name",
|
45 |
+
# default="seed-1-6-250615",
|
46 |
+
default="seed-1-6-flash-250615",
|
47 |
# default="deepseek-v3-250324",
|
48 |
type=str
|
49 |
)
|
50 |
parser.add_argument(
|
51 |
"--eval_dataset_name",
|
52 |
+
# default="agent-lingoace-zh-80-chat.jsonl",
|
53 |
+
default="agent-bingoplus-ph-200-chat.jsonl",
|
54 |
type=str
|
55 |
)
|
56 |
parser.add_argument(
|
llm_eval_script/gemini_google.py
CHANGED
@@ -1,5 +1,25 @@
|
|
1 |
#!/usr/bin/python3
|
2 |
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import argparse
|
4 |
from datetime import datetime
|
5 |
import json
|
@@ -25,13 +45,17 @@ def get_args():
|
|
25 |
"--model_name",
|
26 |
# default="gemini-2.5-pro", # The model does not support setting thinking_budget to 0.
|
27 |
# default="gemini-2.5-flash",
|
28 |
-
default="gemini-2.5-flash-lite-preview-06-17",
|
|
|
|
|
|
|
|
|
29 |
type=str
|
30 |
)
|
31 |
parser.add_argument(
|
32 |
"--eval_dataset_name",
|
33 |
-
default="agent-bingoplus-ph-90-choice.jsonl",
|
34 |
-
|
35 |
# default="arc-easy-1000-choice.jsonl",
|
36 |
type=str
|
37 |
)
|
@@ -55,6 +79,17 @@ def get_args():
|
|
55 |
default="google_potent_veld_462405_t3",
|
56 |
type=str
|
57 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
args = parser.parse_args()
|
59 |
return args
|
60 |
|
@@ -79,9 +114,13 @@ def main():
|
|
79 |
eval_data_dir = Path(args.eval_data_dir)
|
80 |
eval_data_dir.mkdir(parents=True, exist_ok=True)
|
81 |
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
85 |
|
86 |
eval_dataset = eval_dataset_dir / args.eval_dataset_name
|
87 |
|
@@ -91,7 +130,8 @@ def main():
|
|
91 |
client = genai.Client(
|
92 |
vertexai=True,
|
93 |
project=project_id,
|
94 |
-
location="global",
|
|
|
95 |
)
|
96 |
generate_content_config = types.GenerateContentConfig(
|
97 |
top_p=0.95,
|
@@ -137,6 +177,8 @@ def main():
|
|
137 |
]
|
138 |
)
|
139 |
]
|
|
|
|
|
140 |
time_begin = time.time()
|
141 |
llm_response: types.GenerateContentResponse = client.models.generate_content(
|
142 |
model=args.model_name,
|
|
|
1 |
#!/usr/bin/python3
|
2 |
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude?hl=zh-cn
|
5 |
+
https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/use-claude?hl=zh-cn
|
6 |
+
|
7 |
+
|
8 |
+
Llama
|
9 |
+
|
10 |
+
https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/llama/use-llama?hl=zh-cn
|
11 |
+
https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/llama/use-llama?hl=zh-cn#regions-quotas
|
12 |
+
|
13 |
+
Model Name
|
14 |
+
llama-4-maverick-17b-128e-instruct-maas
|
15 |
+
llama-4-scout-17b-16e-instruct-maas
|
16 |
+
|
17 |
+
区域选择 us-east5
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
"""
|
23 |
import argparse
|
24 |
from datetime import datetime
|
25 |
import json
|
|
|
45 |
"--model_name",
|
46 |
# default="gemini-2.5-pro", # The model does not support setting thinking_budget to 0.
|
47 |
# default="gemini-2.5-flash",
|
48 |
+
# default="gemini-2.5-flash-lite-preview-06-17",
|
49 |
+
# default="claude-opus-4@20250514",
|
50 |
+
# default="claude-sonnet-4@20250514",
|
51 |
+
# default="llama-4-maverick-17b-128e-instruct-maas",
|
52 |
+
default="llama-4-scout-17b-16e-instruct-maas",
|
53 |
type=str
|
54 |
)
|
55 |
parser.add_argument(
|
56 |
"--eval_dataset_name",
|
57 |
+
# default="agent-bingoplus-ph-90-choice.jsonl",
|
58 |
+
default="agent-lingoace-zh-400-choice.jsonl",
|
59 |
# default="arc-easy-1000-choice.jsonl",
|
60 |
type=str
|
61 |
)
|
|
|
79 |
default="google_potent_veld_462405_t3",
|
80 |
type=str
|
81 |
)
|
82 |
+
parser.add_argument(
|
83 |
+
"--create_time_str",
|
84 |
+
# default="null",
|
85 |
+
default="20250731_162116",
|
86 |
+
type=str
|
87 |
+
)
|
88 |
+
parser.add_argument(
|
89 |
+
"--interval",
|
90 |
+
default=1,
|
91 |
+
type=int
|
92 |
+
)
|
93 |
args = parser.parse_args()
|
94 |
return args
|
95 |
|
|
|
114 |
eval_data_dir = Path(args.eval_data_dir)
|
115 |
eval_data_dir.mkdir(parents=True, exist_ok=True)
|
116 |
|
117 |
+
if args.create_time_str == "null":
|
118 |
+
tz = ZoneInfo("Asia/Shanghai")
|
119 |
+
now = datetime.now(tz)
|
120 |
+
create_time_str = now.strftime("%Y%m%d_%H%M%S")
|
121 |
+
# create_time_str = "20250729-interval-5"
|
122 |
+
else:
|
123 |
+
create_time_str = args.create_time_str
|
124 |
|
125 |
eval_dataset = eval_dataset_dir / args.eval_dataset_name
|
126 |
|
|
|
130 |
client = genai.Client(
|
131 |
vertexai=True,
|
132 |
project=project_id,
|
133 |
+
# location="global",
|
134 |
+
location="us-east5",
|
135 |
)
|
136 |
generate_content_config = types.GenerateContentConfig(
|
137 |
top_p=0.95,
|
|
|
177 |
]
|
178 |
)
|
179 |
]
|
180 |
+
time.sleep(args.interval)
|
181 |
+
print(f"sleep: {args.interval}")
|
182 |
time_begin = time.time()
|
183 |
llm_response: types.GenerateContentResponse = client.models.generate_content(
|
184 |
model=args.model_name,
|
llm_eval_script/gemini_google_chat.py
CHANGED
@@ -25,7 +25,9 @@ def get_args():
|
|
25 |
"--model_name",
|
26 |
# default="gemini-2.5-pro", # The model does not support setting thinking_budget to 0.
|
27 |
# default="gemini-2.5-flash",
|
28 |
-
default="gemini-2.5-flash-lite-preview-06-17",
|
|
|
|
|
29 |
type=str
|
30 |
)
|
31 |
parser.add_argument(
|
@@ -57,12 +59,12 @@ def get_args():
|
|
57 |
parser.add_argument(
|
58 |
"--create_time_str",
|
59 |
# default="null",
|
60 |
-
default="
|
61 |
type=str
|
62 |
)
|
63 |
parser.add_argument(
|
64 |
"--interval",
|
65 |
-
default=
|
66 |
type=int
|
67 |
)
|
68 |
args = parser.parse_args()
|
@@ -105,7 +107,9 @@ def main():
|
|
105 |
client = genai.Client(
|
106 |
vertexai=True,
|
107 |
project=project_id,
|
108 |
-
location="global",
|
|
|
|
|
109 |
)
|
110 |
generate_content_config = types.GenerateContentConfig(
|
111 |
top_p=0.95,
|
|
|
25 |
"--model_name",
|
26 |
# default="gemini-2.5-pro", # The model does not support setting thinking_budget to 0.
|
27 |
# default="gemini-2.5-flash",
|
28 |
+
# default="gemini-2.5-flash-lite-preview-06-17",
|
29 |
+
# default="llama-4-maverick-17b-128e-instruct-maas",
|
30 |
+
default="llama-4-scout-17b-16e-instruct-maas",
|
31 |
type=str
|
32 |
)
|
33 |
parser.add_argument(
|
|
|
59 |
parser.add_argument(
|
60 |
"--create_time_str",
|
61 |
# default="null",
|
62 |
+
default="20250731_162116",
|
63 |
type=str
|
64 |
)
|
65 |
parser.add_argument(
|
66 |
"--interval",
|
67 |
+
default=1,
|
68 |
type=int
|
69 |
)
|
70 |
args = parser.parse_args()
|
|
|
107 |
client = genai.Client(
|
108 |
vertexai=True,
|
109 |
project=project_id,
|
110 |
+
# location="global",
|
111 |
+
location="us-east5",
|
112 |
+
|
113 |
)
|
114 |
generate_content_config = types.GenerateContentConfig(
|
115 |
top_p=0.95,
|
main.py
CHANGED
@@ -17,6 +17,7 @@ docker run -itd \
|
|
17 |
--name llm_eval_system_7862 \
|
18 |
--restart=always \
|
19 |
--network host \
|
|
|
20 |
python:3.12 \
|
21 |
/bin/bash
|
22 |
|
|
|
17 |
--name llm_eval_system_7862 \
|
18 |
--restart=always \
|
19 |
--network host \
|
20 |
+
-v /data/tianxing/PycharmProjects/llm_eval_system:/data/tianxing/PycharmProjects/llm_eval_system \
|
21 |
python:3.12 \
|
22 |
/bin/bash
|
23 |
|