Upload 5 files
Browse files- README.md +245 -0
- adapter_config.json +32 -0
- adapter_model.safetensors +3 -0
- conda_requirements.txt +182 -0
- config.json +30 -0
README.md
ADDED
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Uploaded model
|
2 |
+
|
3 |
+
- **Developed by:** u9999Yoko
|
4 |
+
- **License:** CC BY-NC-SA
|
5 |
+
- **Finetuned from model :** llm-jp/llm-jp-3-13b
|
6 |
+
|
7 |
+
## Abstract
|
8 |
+
|
9 |
+
「llm-jp/llm-jp-3-13bモデルにichikara-instructionデータを用いてファインチューニングを行ったモデルです。
|
10 |
+
松尾研究室の講義のコンペ用としてモデルを作成しました。
|
11 |
+
https://weblab.t.u-tokyo.ac.jp/lecture/course-list/large-language-model/
|
12 |
+
https://llm-jp.nii.ac.jp/blog/2024/04/30/v2.0-release.html
|
13 |
+
|
14 |
+
## Dataset
|
15 |
+
データセットは以下を使用しました。
|
16 |
+
- ichikara-instruction-003-001-1.json
|
17 |
+
|
18 |
+
## Attention
|
19 |
+
ichikara-instructionデータは、CC BY-NC-SAライセンス(表示-非営利-継承)で公開されています。
|
20 |
+
このライセンスの下では、非営利目的での利用が許可されていますが、商用利用は認められていません。
|
21 |
+
詳しくは次のホームページをご覧ください。
|
22 |
+
https://llm-jp.nii.ac.jp/blog/2024/04/30/v2.0-release.html
|
23 |
+
|
24 |
+
## Usage
|
25 |
+
Execute following code in Notebook
|
26 |
+
|
27 |
+
```python
|
28 |
+
# 必要なライブラリをインストール
|
29 |
+
conda install --yes --file requirements.txt
|
30 |
+
|
31 |
+
from requests.exceptions import HTTPError
|
32 |
+
from transformers import (
|
33 |
+
AutoModelForCausalLM,
|
34 |
+
AutoTokenizer,
|
35 |
+
TrainingArguments,
|
36 |
+
)
|
37 |
+
from peft import (
|
38 |
+
LoraConfig,
|
39 |
+
get_peft_model,
|
40 |
+
)
|
41 |
+
import torch
|
42 |
+
from datasets import load_dataset
|
43 |
+
from trl import SFTTrainer
|
44 |
+
from tqdm import tqdm
|
45 |
+
|
46 |
+
# HuggingFaceにログイン
|
47 |
+
from huggingface_hub import notebook_login
|
48 |
+
|
49 |
+
# 取得したTokenを入力
|
50 |
+
notebook_login()
|
51 |
+
|
52 |
+
# base model id
|
53 |
+
base_model_id = "llm-jp/llm-jp-3-13b"
|
54 |
+
new_model_id = "u9999Yoko//llm-jp-3-13b-finetune"
|
55 |
+
|
56 |
+
|
57 |
+
model = AutoModelForCausalLM.from_pretrained(
|
58 |
+
base_model_id,
|
59 |
+
device_map="auto",
|
60 |
+
torch_dtype=torch.bfloat16, # 半精度計算で効率化
|
61 |
+
)
|
62 |
+
|
63 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
|
64 |
+
|
65 |
+
"""
|
66 |
+
peft_config: PEFTの構成設定
|
67 |
+
- r
|
68 |
+
- LoRA のランク (4, 8, 16 ,32...)
|
69 |
+
- 増やすほど学習が捗るが, 過学習のリスクも高まるので注意
|
70 |
+
- lora_alpha
|
71 |
+
- LoRAのスケーリング係数
|
72 |
+
- lora_dropout
|
73 |
+
- ドロップアウト率(過学習を防ぐための割合)
|
74 |
+
- bias
|
75 |
+
- バイアス項の扱い ("none"の場合、LoRAはバイアスを学習しない)
|
76 |
+
- task_type
|
77 |
+
- タスクタイプ
|
78 |
+
- target_modules
|
79 |
+
- LoRAを適用するターゲットモジュール (前のコードで特定した層)
|
80 |
+
"""
|
81 |
+
|
82 |
+
# LoRA設定
|
83 |
+
lora_config = LoraConfig(
|
84 |
+
r=8,
|
85 |
+
lora_alpha=16,
|
86 |
+
target_modules=["q_proj", "v_proj"], # トレーニング対象の層
|
87 |
+
lora_dropout=0.1,
|
88 |
+
task_type="CAUSAL_LM",
|
89 |
+
)
|
90 |
+
|
91 |
+
model = get_peft_model(model, lora_config)
|
92 |
+
|
93 |
+
dataset = load_dataset("json", data_files="./ichikara-instruction-003-001-1.json")
|
94 |
+
|
95 |
+
# 学習時のプロンプトフォーマットの定義
|
96 |
+
prompt = """### 指示
|
97 |
+
{}
|
98 |
+
### 回答
|
99 |
+
{}"""
|
100 |
+
|
101 |
+
|
102 |
+
"""
|
103 |
+
formatting_prompts_func: 各データをプロンプトに合わせた形式に合わせる
|
104 |
+
"""
|
105 |
+
EOS_TOKEN = tokenizer.eos_token # トークナイザーのEOSトークン(文末トークン)
|
106 |
+
def formatting_prompts_func(examples):
|
107 |
+
input = examples["text"] # 入力データ
|
108 |
+
output = examples["output"] # 出力データ
|
109 |
+
text = prompt.format(input, output) + EOS_TOKEN # プロンプトの作成
|
110 |
+
return { "formatted_text" : text, } # 新しいフィールド "formatted_text" を返す
|
111 |
+
pass
|
112 |
+
|
113 |
+
# # 各データにフォーマットを適用
|
114 |
+
dataset = dataset.map(
|
115 |
+
formatting_prompts_func,
|
116 |
+
num_proc= 4, # 並列処理数を指定
|
117 |
+
)
|
118 |
+
|
119 |
+
# データを確認
|
120 |
+
print(dataset["train"]["formatted_text"][3])
|
121 |
+
|
122 |
+
# データをtrainデータとtestデータに分割 (test_sizeの比率に)
|
123 |
+
dataset = dataset["train"].train_test_split(test_size=0.1)
|
124 |
+
print(dataset)
|
125 |
+
|
126 |
+
"""
|
127 |
+
training_arguments: 学習の設定
|
128 |
+
output_dir:トレーニング後のモデルを保存するディレクトリ
|
129 |
+
per_device_train_batch_size: デバイスごとのトレーニングバッチサイズ
|
130 |
+
per_device__batch_size:デバイスごとの評価バッチサイズ
|
131 |
+
gradient_accumulation_steps:勾配を更新する前にステップを積み重ねる回数
|
132 |
+
optim: オプティマイザの設定
|
133 |
+
num_train_epochs:エポック数
|
134 |
+
eval_strategy:評価の戦略 ("no"/"steps"/"epoch")
|
135 |
+
eval_steps: eval_strategyが"steps"のとき、評価を行うstep間隔
|
136 |
+
logging_strategy: ログ記録の戦略
|
137 |
+
logging_steps: ログを出力するステップ間隔
|
138 |
+
warmup_steps: 学習率のウォームアップステップ数
|
139 |
+
save_steps: モデルを保存するステップ間隔
|
140 |
+
save_total_limit: 保存しておくcheckpointの数
|
141 |
+
max_steps:トレーニングの最大ステップ数
|
142 |
+
learning_rate: 学習率
|
143 |
+
fp16:16bit浮動小数点の使用設定(第8回演習を参考にすると良いです)
|
144 |
+
bf16:BFloat16の使用設定
|
145 |
+
group_by_length:入力シーケンスの長さによりバッチをグループ化 (トレーニングの効率化)
|
146 |
+
report_to:ログの送信先 ("wandb"/"tensorboard"���ど)
|
147 |
+
"""
|
148 |
+
|
149 |
+
training_arguments = TrainingArguments(
|
150 |
+
output_dir=new_model_id,
|
151 |
+
per_device_train_batch_size=1,
|
152 |
+
gradient_accumulation_steps=8,
|
153 |
+
# optim="paged_adamw_32bit",
|
154 |
+
num_train_epochs=3,
|
155 |
+
logging_strategy="steps",
|
156 |
+
logging_steps=10,
|
157 |
+
warmup_steps=10,
|
158 |
+
save_steps=1000,
|
159 |
+
save_total_limit = 2,
|
160 |
+
max_steps = -1,
|
161 |
+
learning_rate=5e-5,
|
162 |
+
fp16=False,
|
163 |
+
bf16=True,
|
164 |
+
seed = 3407,
|
165 |
+
group_by_length=True,
|
166 |
+
report_to="none"
|
167 |
+
)
|
168 |
+
|
169 |
+
"""
|
170 |
+
SFTTrainer: Supervised Fine-Tuningに関する設定
|
171 |
+
model:読み込んだベースのモデル
|
172 |
+
train_dataset:トレーニングに使用するデータセット
|
173 |
+
eval_dataset:評価に使用するデータセット
|
174 |
+
peft_config: PEFT(Parameter-Efficient Fine-Tuning)の設定(LoRAを利用する場合に指定)
|
175 |
+
max_seq_length:モデルに入力されるシーケンスの最大トークン長
|
176 |
+
dataset_text_field:データセット内の学習に使うテキストを含むフィールド名
|
177 |
+
tokenizer:モデルに対応するトークナイザー
|
178 |
+
args:トレーニングに使用するハイパーパラメータ(TrainingArgumentsの設定を指定)
|
179 |
+
packing:入力シーケンスのパッキングを行うかどうかの設定 (False に設定することで、各入力を独立して扱う)
|
180 |
+
"""
|
181 |
+
trainer = SFTTrainer(
|
182 |
+
model=model,
|
183 |
+
train_dataset=dataset["train"],
|
184 |
+
peft_config=lora_config,
|
185 |
+
max_seq_length= 512,
|
186 |
+
dataset_text_field="formatted_text",
|
187 |
+
tokenizer=tokenizer,
|
188 |
+
args=training_arguments,
|
189 |
+
packing= False,
|
190 |
+
)
|
191 |
+
|
192 |
+
model.config.use_cache = False # キャッシュ機能を無効化
|
193 |
+
trainer.train() # トレーニングを実行
|
194 |
+
|
195 |
+
# タスクとなるデータの読み込み。
|
196 |
+
|
197 |
+
import json
|
198 |
+
datasets = []
|
199 |
+
with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
|
200 |
+
item = ""
|
201 |
+
for line in f:
|
202 |
+
line = line.strip()
|
203 |
+
item += line
|
204 |
+
if item.endswith("}"):
|
205 |
+
datasets.append(json.loads(item))
|
206 |
+
item = ""
|
207 |
+
print(datasets)
|
208 |
+
|
209 |
+
|
210 |
+
# モデルによるタスクの推論。
|
211 |
+
|
212 |
+
results = []
|
213 |
+
for data in tqdm(datasets):
|
214 |
+
|
215 |
+
input = data["input"]
|
216 |
+
|
217 |
+
prompt = f"""### 指示
|
218 |
+
{input}
|
219 |
+
### 回答
|
220 |
+
"""
|
221 |
+
|
222 |
+
tokenized_input = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to(model.device)
|
223 |
+
attention_mask = torch.ones_like(tokenized_input)
|
224 |
+
|
225 |
+
with torch.no_grad():
|
226 |
+
outputs = model.generate(
|
227 |
+
tokenized_input,
|
228 |
+
attention_mask=attention_mask,
|
229 |
+
max_new_tokens=400,
|
230 |
+
do_sample=False,
|
231 |
+
repetition_penalty=1.2,
|
232 |
+
pad_token_id=tokenizer.eos_token_id
|
233 |
+
)[0]
|
234 |
+
output = tokenizer.decode(outputs[tokenized_input.size(1):], skip_special_tokens=True)
|
235 |
+
|
236 |
+
results.append({"task_id": data["task_id"], "input": input, "output": output})
|
237 |
+
|
238 |
+
# 結果をjsonlで保存。
|
239 |
+
import re
|
240 |
+
jsonl_id = re.sub(".*/", "", new_model_id)
|
241 |
+
with open(f"./{jsonl_id}-outputs.jsonl", 'w', encoding='utf-8') as f:
|
242 |
+
for result in results:
|
243 |
+
json.dump(result, f, ensure_ascii=False) # ensure_ascii=False for handling non-ASCII characters
|
244 |
+
f.write('\n')
|
245 |
+
```
|
adapter_config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "llm-jp/llm-jp-3-13b",
|
5 |
+
"bias": "none",
|
6 |
+
"eva_config": null,
|
7 |
+
"exclude_modules": null,
|
8 |
+
"fan_in_fan_out": false,
|
9 |
+
"inference_mode": true,
|
10 |
+
"init_lora_weights": true,
|
11 |
+
"layer_replication": null,
|
12 |
+
"layers_pattern": null,
|
13 |
+
"layers_to_transform": null,
|
14 |
+
"loftq_config": {},
|
15 |
+
"lora_alpha": 16,
|
16 |
+
"lora_bias": false,
|
17 |
+
"lora_dropout": 0.1,
|
18 |
+
"megatron_config": null,
|
19 |
+
"megatron_core": "megatron.core",
|
20 |
+
"modules_to_save": null,
|
21 |
+
"peft_type": "LORA",
|
22 |
+
"r": 8,
|
23 |
+
"rank_pattern": {},
|
24 |
+
"revision": null,
|
25 |
+
"target_modules": [
|
26 |
+
"q_proj",
|
27 |
+
"v_proj"
|
28 |
+
],
|
29 |
+
"task_type": "CAUSAL_LM",
|
30 |
+
"use_dora": false,
|
31 |
+
"use_rslora": false
|
32 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6aea77bc32c2c8c6bf62644c7dd7871409417e76c1a24b4ca9b891cffa7067b6
|
3 |
+
size 26235704
|
conda_requirements.txt
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file may be used to create an environment using:
|
2 |
+
# $ conda create --name <env> --file <this file>
|
3 |
+
# platform: osx-arm64
|
4 |
+
# created-by: conda 24.11.0
|
5 |
+
accelerate=1.2.0=pypi_0
|
6 |
+
aiohappyeyeballs=2.4.4=pypi_0
|
7 |
+
aiohttp=3.11.10=pypi_0
|
8 |
+
aiosignal=1.3.1=pypi_0
|
9 |
+
airllm=2.11.0=pypi_0
|
10 |
+
annotated-types=0.7.0=pypi_0
|
11 |
+
anyio=4.7.0=pypi_0
|
12 |
+
asttokens=3.0.0=pypi_0
|
13 |
+
async-timeout=5.0.1=pypi_0
|
14 |
+
attrs=24.2.0=pypi_0
|
15 |
+
beautifulsoup4=4.12.3=pypi_0
|
16 |
+
bitsandbytes=0.42.0=pypi_0
|
17 |
+
bleach=6.2.0=pypi_0
|
18 |
+
brotli-python=1.1.0=py310hb4ad77e_2
|
19 |
+
bzip2=1.0.8=h3422bc3_4
|
20 |
+
ca-certificates=2022.9.24=h4653dfc_0
|
21 |
+
certifi=2024.8.30=pypi_0
|
22 |
+
charset-normalizer=3.4.0=pypi_0
|
23 |
+
click=8.1.7=pypi_0
|
24 |
+
coloredlogs=15.0.1=pypi_0
|
25 |
+
comm=0.2.2=pypi_0
|
26 |
+
cpython=3.10.16=py310hd8ed1ab_1
|
27 |
+
datasets=3.1.0=pypi_0
|
28 |
+
decorator=5.1.1=pypi_0
|
29 |
+
defusedxml=0.7.1=pypi_0
|
30 |
+
dill=0.3.8=pypi_0
|
31 |
+
distro=1.9.0=pypi_0
|
32 |
+
docker-pycreds=0.4.0=pypi_0
|
33 |
+
einops=0.8.0=pypi_0
|
34 |
+
exceptiongroup=1.2.2=pypi_0
|
35 |
+
executing=2.1.0=pypi_0
|
36 |
+
fastjsonschema=2.21.1=pypi_0
|
37 |
+
filelock=3.16.1=pypi_0
|
38 |
+
freetype=2.12.1=hadb7bae_2
|
39 |
+
frozenlist=1.5.0=pypi_0
|
40 |
+
fsspec=2024.9.0=pypi_0
|
41 |
+
giflib=5.2.2=h93a5062_0
|
42 |
+
gitdb=4.0.11=pypi_0
|
43 |
+
gitpython=3.1.43=pypi_0
|
44 |
+
gmp=6.3.0=h7bae524_2
|
45 |
+
gmpy2=2.1.5=py310h805dbd7_3
|
46 |
+
h11=0.14.0=pypi_0
|
47 |
+
h2=4.1.0=pyhd8ed1ab_1
|
48 |
+
hpack=4.0.0=pyhd8ed1ab_1
|
49 |
+
httpcore=1.0.7=pypi_0
|
50 |
+
httpx=0.28.1=pypi_0
|
51 |
+
huggingface-hub=0.26.5=pypi_0
|
52 |
+
humanfriendly=10.0=pypi_0
|
53 |
+
hyperframe=6.0.1=pyhd8ed1ab_1
|
54 |
+
idna=3.10=pypi_0
|
55 |
+
inquirerpy=0.3.4=pypi_0
|
56 |
+
ipython=8.30.0=pypi_0
|
57 |
+
ipywidgets=8.1.5=pypi_0
|
58 |
+
jedi=0.19.2=pypi_0
|
59 |
+
jinja2=3.1.4=pyhd8ed1ab_1
|
60 |
+
jiter=0.8.2=pypi_0
|
61 |
+
jsonlines=4.0.0=pypi_0
|
62 |
+
jsonschema=4.23.0=pypi_0
|
63 |
+
jsonschema-specifications=2024.10.1=pypi_0
|
64 |
+
jupyter-client=8.6.3=pypi_0
|
65 |
+
jupyter-core=5.7.2=pypi_0
|
66 |
+
jupyterlab-pygments=0.3.0=pypi_0
|
67 |
+
jupyterlab-widgets=3.0.13=pypi_0
|
68 |
+
lcms2=2.16=ha0e7c42_0
|
69 |
+
lerc=4.0.0=h9a09cb3_0
|
70 |
+
libblas=3.9.0=16_osxarm64_openblas
|
71 |
+
libcblas=3.9.0=16_osxarm64_openblas
|
72 |
+
libcxx=14.0.6=h2692d47_0
|
73 |
+
libdeflate=1.22=hd74edd7_0
|
74 |
+
libffi=3.4.2=h3422bc3_5
|
75 |
+
libgfortran=5.0.0=11_3_0_hd922786_26
|
76 |
+
libgfortran5=11.3.0=hdaf2cc0_26
|
77 |
+
libjpeg-turbo=3.0.0=hb547adb_1
|
78 |
+
liblapack=3.9.0=16_osxarm64_openblas
|
79 |
+
liblzma=5.6.3=h39f12f2_1
|
80 |
+
libopenblas=0.3.21=openmp_hc731615_3
|
81 |
+
libpng=1.6.44=hc14010f_0
|
82 |
+
libprotobuf=3.21.9=hb5ab8b9_0
|
83 |
+
libsqlite=3.40.0=h76d750c_0
|
84 |
+
libtiff=4.7.0=ha962b0a_2
|
85 |
+
libwebp=1.4.0=h54798ee_0
|
86 |
+
libwebp-base=1.4.0=h93a5062_0
|
87 |
+
libxcb=1.17.0=hdb1d25a_0
|
88 |
+
libzlib=1.2.13=h03a7124_4
|
89 |
+
llvm-openmp=15.0.5=h7cfbb63_0
|
90 |
+
markdown-it-py=3.0.0=pypi_0
|
91 |
+
markupsafe=3.0.2=py310hc74094e_1
|
92 |
+
matplotlib-inline=0.1.7=pypi_0
|
93 |
+
mdurl=0.1.2=pypi_0
|
94 |
+
mistune=3.0.2=pypi_0
|
95 |
+
mlx=0.21.1=pypi_0
|
96 |
+
mpc=1.3.1=h8f1351a_1
|
97 |
+
mpfr=4.2.1=hb693164_3
|
98 |
+
mpmath=1.3.0=pyhd8ed1ab_1
|
99 |
+
multidict=6.1.0=pypi_0
|
100 |
+
multiprocess=0.70.16=pypi_0
|
101 |
+
nbclient=0.10.1=pypi_0
|
102 |
+
nbformat=5.10.4=pypi_0
|
103 |
+
ncurses=6.3=h07bb92c_1
|
104 |
+
networkx=3.4.2=pyh267e887_2
|
105 |
+
numpy=2.2.0=pypi_0
|
106 |
+
openai=1.57.2=pypi_0
|
107 |
+
openjpeg=2.5.2=h9f1df11_0
|
108 |
+
openssl=1.1.1s=h03a7124_0
|
109 |
+
optimum=1.23.3=pypi_0
|
110 |
+
packaging=24.2=pypi_0
|
111 |
+
pandas=2.2.3=pypi_0
|
112 |
+
pandocfilters=1.5.1=pypi_0
|
113 |
+
parso=0.8.4=pypi_0
|
114 |
+
peft=0.14.0=pypi_0
|
115 |
+
pexpect=4.9.0=pypi_0
|
116 |
+
pfzy=0.3.4=pypi_0
|
117 |
+
pillow=11.0.0=py310h530beaf_0
|
118 |
+
pip=24.3.1=pypi_0
|
119 |
+
platformdirs=4.3.6=pypi_0
|
120 |
+
prompt-toolkit=3.0.48=pypi_0
|
121 |
+
propcache=0.2.1=pypi_0
|
122 |
+
protobuf=4.21.9=py310h0f1eb42_0
|
123 |
+
psutil=6.1.0=pypi_0
|
124 |
+
pthread-stubs=0.4=hd74edd7_1002
|
125 |
+
ptyprocess=0.7.0=pypi_0
|
126 |
+
pure-eval=0.2.3=pypi_0
|
127 |
+
pyarrow=18.1.0=pypi_0
|
128 |
+
pydantic=2.10.3=pypi_0
|
129 |
+
pydantic-core=2.27.1=pypi_0
|
130 |
+
pygments=2.18.0=pypi_0
|
131 |
+
python=3.10.6=hbce4517_0_cpython
|
132 |
+
python-dateutil=2.9.0.post0=pypi_0
|
133 |
+
python_abi=3.10=2_cp310
|
134 |
+
pytorch=2.6.0.dev20241112=py3.10_0
|
135 |
+
pytz=2024.2=pypi_0
|
136 |
+
pyyaml=6.0.2=pypi_0
|
137 |
+
pyzmq=26.2.0=pypi_0
|
138 |
+
readline=8.1.2=h46ed386_0
|
139 |
+
referencing=0.35.1=pypi_0
|
140 |
+
regex=2024.11.6=pypi_0
|
141 |
+
requests=2.32.3=pypi_0
|
142 |
+
rich=13.9.4=pypi_0
|
143 |
+
rpds-py=0.22.3=pypi_0
|
144 |
+
safetensors=0.4.5=pypi_0
|
145 |
+
scipy=1.14.1=pypi_0
|
146 |
+
sentencepiece=0.2.0=pypi_0
|
147 |
+
sentry-sdk=2.19.2=pypi_0
|
148 |
+
setproctitle=1.3.4=pypi_0
|
149 |
+
setuptools=75.6.0=pypi_0
|
150 |
+
six=1.17.0=pypi_0
|
151 |
+
smmap=5.0.1=pypi_0
|
152 |
+
sniffio=1.3.1=pypi_0
|
153 |
+
soupsieve=2.6=pypi_0
|
154 |
+
stack-data=0.6.3=pypi_0
|
155 |
+
sympy=1.13.1=pypi_0
|
156 |
+
tiktoken=0.8.0=pypi_0
|
157 |
+
tinycss2=1.4.0=pypi_0
|
158 |
+
tk=8.6.12=he1e0b03_0
|
159 |
+
tokenizers=0.20.3=pypi_0
|
160 |
+
torchaudio=2.5.0.dev20241118=py310_cpu
|
161 |
+
tornado=6.4.2=pypi_0
|
162 |
+
tqdm=4.67.1=pypi_0
|
163 |
+
traitlets=5.14.3=pypi_0
|
164 |
+
transformers=4.46.3=pypi_0
|
165 |
+
transformers-stream-generator=0.0.5=pypi_0
|
166 |
+
trl=0.12.2=pypi_0
|
167 |
+
typing-extensions=4.12.2=pypi_0
|
168 |
+
tzdata=2024.2=pypi_0
|
169 |
+
urllib3=2.2.3=pypi_0
|
170 |
+
wandb=0.19.0=pypi_0
|
171 |
+
wcwidth=0.2.13=pypi_0
|
172 |
+
webencodings=0.5.1=pypi_0
|
173 |
+
wheel=0.38.4=pyhd8ed1ab_0
|
174 |
+
widgetsnbextension=4.0.13=pypi_0
|
175 |
+
xorg-libxau=1.0.11=hd74edd7_1
|
176 |
+
xorg-libxdmcp=1.1.5=hd74edd7_0
|
177 |
+
xxhash=3.5.0=pypi_0
|
178 |
+
xz=5.2.6=h57fd34a_0
|
179 |
+
yaml=0.2.5=h3422bc3_2
|
180 |
+
yarl=1.18.3=pypi_0
|
181 |
+
zstandard=0.23.0=pypi_0
|
182 |
+
zstd=1.5.5.1=pypi_0
|
config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "None",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 5120,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 13824,
|
14 |
+
"max_position_embeddings": 4096,
|
15 |
+
"mlp_bias": false,
|
16 |
+
"model_type": "llama",
|
17 |
+
"num_attention_heads": 40,
|
18 |
+
"num_hidden_layers": 40,
|
19 |
+
"num_key_value_heads": 40,
|
20 |
+
"pretraining_tp": 1,
|
21 |
+
"rms_norm_eps": 1e-05,
|
22 |
+
"rope_scaling": null,
|
23 |
+
"rope_theta": 10000,
|
24 |
+
"tie_word_embeddings": false,
|
25 |
+
"torch_dtype": "bfloat16",
|
26 |
+
"transformers_version": "4.43.3",
|
27 |
+
"use_cache": true,
|
28 |
+
"vocab_size": 99584
|
29 |
+
}
|
30 |
+
|