kevinwang676 commited on
Commit
94d23fe
·
verified ·
1 Parent(s): 4473929

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +11 -0
  2. .ipynb_checkpoints/Untitled-checkpoint.ipynb +6 -0
  3. .ipynb_checkpoints/Untitled1-checkpoint.ipynb +6 -0
  4. .venv-backups/18897064/backup.log +60 -0
  5. .venv-backups/18897064/venv-main-2025-03-18-0524.txt +68 -0
  6. .venv-backups/18897064/venv-main-2025-03-18-0530.txt +68 -0
  7. .venv-backups/18897064/venv-main-2025-03-18-0600.txt +110 -0
  8. .venv-backups/18897064/venv-main-2025-03-18-0630.txt +110 -0
  9. .venv-backups/18897064/venv-main-2025-03-18-0700.txt +110 -0
  10. .venv-backups/18897064/venv-main-2025-03-18-0730.txt +110 -0
  11. .venv-backups/18897064/venv-main-2025-03-18-0800.txt +110 -0
  12. .venv-backups/18897064/venv-main-2025-03-18-0830.txt +110 -0
  13. .venv-backups/18897064/venv-main-2025-03-18-0900.txt +110 -0
  14. .venv-backups/18897064/venv-main-2025-03-18-0930.txt +110 -0
  15. .venv-backups/18897064/venv-main-2025-03-18-1000.txt +110 -0
  16. .venv-backups/18897064/venv-main-2025-03-18-1030.txt +110 -0
  17. .venv-backups/18897064/venv-main-2025-03-18-1100.txt +110 -0
  18. .venv-backups/18897064/venv-main-2025-03-18-1130.txt +110 -0
  19. .venv-backups/18897064/venv-main-2025-03-18-1200.txt +110 -0
  20. .venv-backups/18897064/venv-main-2025-03-18-1230.txt +110 -0
  21. .venv-backups/18897064/venv-main-2025-03-18-1300.txt +110 -0
  22. .venv-backups/18897064/venv-main-2025-03-18-1330.txt +110 -0
  23. .venv-backups/18897064/venv-main-2025-03-18-1400.txt +110 -0
  24. .venv-backups/18897064/venv-main-2025-03-18-1430.txt +110 -0
  25. .venv-backups/18897064/venv-main-latest.txt +110 -0
  26. Untitled.ipynb +1272 -0
  27. Untitled1.ipynb +101 -0
  28. converted_train/data-00000-of-00001.arrow +3 -0
  29. converted_train/dataset_info.json +12 -0
  30. converted_train/state.json +13 -0
  31. lora_model/README.md +202 -0
  32. lora_model/adapter_config.json +37 -0
  33. lora_model/adapter_model.safetensors +3 -0
  34. lora_model/added_tokens.json +24 -0
  35. lora_model/merges.txt +0 -0
  36. lora_model/special_tokens_map.json +31 -0
  37. lora_model/tokenizer.json +3 -0
  38. lora_model/tokenizer_config.json +209 -0
  39. lora_model/vocab.json +0 -0
  40. onstart.sh +3 -0
  41. outputs/checkpoint-100/README.md +202 -0
  42. outputs/checkpoint-100/adapter_config.json +37 -0
  43. outputs/checkpoint-100/adapter_model.safetensors +3 -0
  44. outputs/checkpoint-100/added_tokens.json +24 -0
  45. outputs/checkpoint-100/merges.txt +0 -0
  46. outputs/checkpoint-100/optimizer.pt +3 -0
  47. outputs/checkpoint-100/rng_state.pth +3 -0
  48. outputs/checkpoint-100/scheduler.pt +3 -0
  49. outputs/checkpoint-100/special_tokens_map.json +31 -0
  50. outputs/checkpoint-100/tokenizer.json +3 -0
.gitattributes CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ lora_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ outputs/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ outputs/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ outputs/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ outputs/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ outputs/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ outputs/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ outputs/checkpoint-700/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
+ outputs/checkpoint-800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
45
+ outputs/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text
46
+ outputs/checkpoint-936/tokenizer.json filter=lfs diff=lfs merge=lfs -text
.ipynb_checkpoints/Untitled-checkpoint.ipynb ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 5
6
+ }
.ipynb_checkpoints/Untitled1-checkpoint.ipynb ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 5
6
+ }
.venv-backups/18897064/backup.log ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-03-18 05:24:42] Processing virtual environment: /venv/main
2
+ [2025-03-18 05:24:43] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-0524.txt
3
+ [2025-03-18 05:24:43] Backup process completed
4
+ [2025-03-18 05:30:01] Processing virtual environment: /venv/main
5
+ [2025-03-18 05:30:01] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-0530.txt
6
+ [2025-03-18 05:30:01] Backup process completed
7
+ [2025-03-18 06:00:01] Processing virtual environment: /venv/main
8
+ [2025-03-18 06:00:02] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-0600.txt
9
+ [2025-03-18 06:00:02] Backup process completed
10
+ [2025-03-18 06:30:01] Processing virtual environment: /venv/main
11
+ [2025-03-18 06:30:02] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-0630.txt
12
+ [2025-03-18 06:30:02] Backup process completed
13
+ [2025-03-18 07:00:01] Processing virtual environment: /venv/main
14
+ [2025-03-18 07:00:02] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-0700.txt
15
+ [2025-03-18 07:00:02] Backup process completed
16
+ [2025-03-18 07:30:01] Processing virtual environment: /venv/main
17
+ [2025-03-18 07:30:02] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-0730.txt
18
+ [2025-03-18 07:30:02] Backup process completed
19
+ [2025-03-18 08:00:01] Processing virtual environment: /venv/main
20
+ [2025-03-18 08:00:01] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-0800.txt
21
+ [2025-03-18 08:00:01] Backup process completed
22
+ [2025-03-18 08:30:01] Processing virtual environment: /venv/main
23
+ [2025-03-18 08:30:02] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-0830.txt
24
+ [2025-03-18 08:30:02] Backup process completed
25
+ [2025-03-18 09:00:01] Processing virtual environment: /venv/main
26
+ [2025-03-18 09:00:01] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-0900.txt
27
+ [2025-03-18 09:00:02] Backup process completed
28
+ [2025-03-18 09:30:01] Processing virtual environment: /venv/main
29
+ [2025-03-18 09:30:01] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-0930.txt
30
+ [2025-03-18 09:30:01] Backup process completed
31
+ [2025-03-18 10:00:01] Processing virtual environment: /venv/main
32
+ [2025-03-18 10:00:02] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-1000.txt
33
+ [2025-03-18 10:00:02] Backup process completed
34
+ [2025-03-18 10:30:01] Processing virtual environment: /venv/main
35
+ [2025-03-18 10:30:01] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-1030.txt
36
+ [2025-03-18 10:30:01] Backup process completed
37
+ [2025-03-18 11:00:01] Processing virtual environment: /venv/main
38
+ [2025-03-18 11:00:02] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-1100.txt
39
+ [2025-03-18 11:00:02] Backup process completed
40
+ [2025-03-18 11:30:01] Processing virtual environment: /venv/main
41
+ [2025-03-18 11:30:02] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-1130.txt
42
+ [2025-03-18 11:30:02] Backup process completed
43
+ [2025-03-18 12:00:01] Processing virtual environment: /venv/main
44
+ [2025-03-18 12:00:01] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-1200.txt
45
+ [2025-03-18 12:00:01] Backup process completed
46
+ [2025-03-18 12:30:01] Processing virtual environment: /venv/main
47
+ [2025-03-18 12:30:02] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-1230.txt
48
+ [2025-03-18 12:30:02] Backup process completed
49
+ [2025-03-18 13:00:01] Processing virtual environment: /venv/main
50
+ [2025-03-18 13:00:02] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-1300.txt
51
+ [2025-03-18 13:00:02] Backup process completed
52
+ [2025-03-18 13:30:01] Processing virtual environment: /venv/main
53
+ [2025-03-18 13:30:01] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-1330.txt
54
+ [2025-03-18 13:30:01] Backup process completed
55
+ [2025-03-18 14:00:01] Processing virtual environment: /venv/main
56
+ [2025-03-18 14:00:02] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-1400.txt
57
+ [2025-03-18 14:00:02] Backup process completed
58
+ [2025-03-18 14:30:01] Processing virtual environment: /venv/main
59
+ [2025-03-18 14:30:01] SUCCESS: Created backup at /workspace/.venv-backups/18897064/venv-main-2025-03-18-1430.txt
60
+ [2025-03-18 14:30:01] Backup process completed
.venv-backups/18897064/venv-main-2025-03-18-0524.txt ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ asttokens==3.0.0
2
+ certifi==2025.1.31
3
+ charset-normalizer==3.4.1
4
+ comm==0.2.2
5
+ debugpy==1.8.12
6
+ decorator==5.1.1
7
+ exceptiongroup==1.2.2
8
+ executing==2.2.0
9
+ filelock==3.17.0
10
+ fsspec==2025.2.0
11
+ huggingface-hub==0.28.1
12
+ idna==3.10
13
+ inquirerpy==0.3.4
14
+ ipykernel==6.29.5
15
+ ipython==8.32.0
16
+ ipywidgets==8.1.5
17
+ jedi==0.19.2
18
+ Jinja2==3.1.4
19
+ jupyter_client==8.6.3
20
+ jupyter_core==5.7.2
21
+ jupyterlab_widgets==3.0.13
22
+ MarkupSafe==2.1.5
23
+ matplotlib-inline==0.1.7
24
+ mpmath==1.3.0
25
+ nest-asyncio==1.6.0
26
+ networkx==3.3
27
+ numpy==2.1.2
28
+ nvidia-cublas-cu12==12.1.3.1
29
+ nvidia-cuda-cupti-cu12==12.1.105
30
+ nvidia-cuda-nvrtc-cu12==12.1.105
31
+ nvidia-cuda-runtime-cu12==12.1.105
32
+ nvidia-cudnn-cu12==9.1.0.70
33
+ nvidia-cufft-cu12==11.0.2.54
34
+ nvidia-curand-cu12==10.3.2.106
35
+ nvidia-cusolver-cu12==11.4.5.107
36
+ nvidia-cusparse-cu12==12.1.0.106
37
+ nvidia-nccl-cu12==2.21.5
38
+ nvidia-nvjitlink-cu12==12.1.105
39
+ nvidia-nvtx-cu12==12.1.105
40
+ packaging==24.2
41
+ parso==0.8.4
42
+ pexpect==4.9.0
43
+ pfzy==0.3.4
44
+ pillow==11.0.0
45
+ platformdirs==4.3.6
46
+ prompt_toolkit==3.0.50
47
+ psutil==6.1.1
48
+ ptyprocess==0.7.0
49
+ pure_eval==0.2.3
50
+ Pygments==2.19.1
51
+ python-dateutil==2.9.0.post0
52
+ PyYAML==6.0.2
53
+ pyzmq==26.2.1
54
+ requests==2.32.3
55
+ six==1.17.0
56
+ stack-data==0.6.3
57
+ sympy==1.13.1
58
+ torch==2.5.1+cu121
59
+ torchaudio==2.5.1+cu121
60
+ torchvision==0.20.1+cu121
61
+ tornado==6.4.2
62
+ tqdm==4.67.1
63
+ traitlets==5.14.3
64
+ triton==3.1.0
65
+ typing_extensions==4.12.2
66
+ urllib3==2.3.0
67
+ wcwidth==0.2.13
68
+ widgetsnbextension==4.0.13
.venv-backups/18897064/venv-main-2025-03-18-0530.txt ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ asttokens==3.0.0
2
+ certifi==2025.1.31
3
+ charset-normalizer==3.4.1
4
+ comm==0.2.2
5
+ debugpy==1.8.12
6
+ decorator==5.1.1
7
+ exceptiongroup==1.2.2
8
+ executing==2.2.0
9
+ filelock==3.17.0
10
+ fsspec==2025.2.0
11
+ huggingface-hub==0.28.1
12
+ idna==3.10
13
+ inquirerpy==0.3.4
14
+ ipykernel==6.29.5
15
+ ipython==8.32.0
16
+ ipywidgets==8.1.5
17
+ jedi==0.19.2
18
+ Jinja2==3.1.4
19
+ jupyter_client==8.6.3
20
+ jupyter_core==5.7.2
21
+ jupyterlab_widgets==3.0.13
22
+ MarkupSafe==2.1.5
23
+ matplotlib-inline==0.1.7
24
+ mpmath==1.3.0
25
+ nest-asyncio==1.6.0
26
+ networkx==3.3
27
+ numpy==2.1.2
28
+ nvidia-cublas-cu12==12.1.3.1
29
+ nvidia-cuda-cupti-cu12==12.1.105
30
+ nvidia-cuda-nvrtc-cu12==12.1.105
31
+ nvidia-cuda-runtime-cu12==12.1.105
32
+ nvidia-cudnn-cu12==9.1.0.70
33
+ nvidia-cufft-cu12==11.0.2.54
34
+ nvidia-curand-cu12==10.3.2.106
35
+ nvidia-cusolver-cu12==11.4.5.107
36
+ nvidia-cusparse-cu12==12.1.0.106
37
+ nvidia-nccl-cu12==2.21.5
38
+ nvidia-nvjitlink-cu12==12.1.105
39
+ nvidia-nvtx-cu12==12.1.105
40
+ packaging==24.2
41
+ parso==0.8.4
42
+ pexpect==4.9.0
43
+ pfzy==0.3.4
44
+ pillow==11.0.0
45
+ platformdirs==4.3.6
46
+ prompt_toolkit==3.0.50
47
+ psutil==6.1.1
48
+ ptyprocess==0.7.0
49
+ pure_eval==0.2.3
50
+ Pygments==2.19.1
51
+ python-dateutil==2.9.0.post0
52
+ PyYAML==6.0.2
53
+ pyzmq==26.2.1
54
+ requests==2.32.3
55
+ six==1.17.0
56
+ stack-data==0.6.3
57
+ sympy==1.13.1
58
+ torch==2.5.1+cu121
59
+ torchaudio==2.5.1+cu121
60
+ torchvision==0.20.1+cu121
61
+ tornado==6.4.2
62
+ tqdm==4.67.1
63
+ traitlets==5.14.3
64
+ triton==3.1.0
65
+ typing_extensions==4.12.2
66
+ urllib3==2.3.0
67
+ wcwidth==0.2.13
68
+ widgetsnbextension==4.0.13
.venv-backups/18897064/venv-main-2025-03-18-0600.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-0630.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-0700.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-0730.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-0800.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-0830.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-0900.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-0930.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-1000.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-1030.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-1100.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-1130.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-1200.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-1230.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-1300.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-1330.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-1400.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-2025-03-18-1430.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
.venv-backups/18897064/venv-main-latest.txt ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.5.2
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.14
4
+ aiosignal==1.3.2
5
+ asttokens==3.0.0
6
+ async-timeout==5.0.1
7
+ attrs==25.3.0
8
+ bitsandbytes==0.45.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ comm==0.2.2
12
+ cut-cross-entropy==25.1.1
13
+ datasets==3.4.1
14
+ debugpy==1.8.12
15
+ decorator==5.1.1
16
+ diffusers==0.32.2
17
+ dill==0.3.8
18
+ docstring_parser==0.16
19
+ exceptiongroup==1.2.2
20
+ executing==2.2.0
21
+ filelock==3.17.0
22
+ frozenlist==1.5.0
23
+ fsspec==2024.12.0
24
+ hf_transfer==0.1.9
25
+ huggingface-hub==0.28.1
26
+ idna==3.10
27
+ importlib_metadata==8.6.1
28
+ inquirerpy==0.3.4
29
+ ipykernel==6.29.5
30
+ ipython==8.32.0
31
+ ipywidgets==8.1.5
32
+ jedi==0.19.2
33
+ Jinja2==3.1.4
34
+ jupyter_client==8.6.3
35
+ jupyter_core==5.7.2
36
+ jupyterlab_widgets==3.0.13
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ matplotlib-inline==0.1.7
40
+ mdurl==0.1.2
41
+ mpmath==1.3.0
42
+ multidict==6.2.0
43
+ multiprocess==0.70.16
44
+ nest-asyncio==1.6.0
45
+ networkx==3.3
46
+ numpy==2.1.2
47
+ nvidia-cublas-cu12==12.1.3.1
48
+ nvidia-cuda-cupti-cu12==12.1.105
49
+ nvidia-cuda-nvrtc-cu12==12.1.105
50
+ nvidia-cuda-runtime-cu12==12.1.105
51
+ nvidia-cudnn-cu12==9.1.0.70
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ nvidia-curand-cu12==10.3.2.106
54
+ nvidia-cusolver-cu12==11.4.5.107
55
+ nvidia-cusparse-cu12==12.1.0.106
56
+ nvidia-nccl-cu12==2.21.5
57
+ nvidia-nvjitlink-cu12==12.1.105
58
+ nvidia-nvtx-cu12==12.1.105
59
+ packaging==24.2
60
+ pandas==2.2.3
61
+ parso==0.8.4
62
+ peft==0.14.0
63
+ pexpect==4.9.0
64
+ pfzy==0.3.4
65
+ pillow==11.0.0
66
+ platformdirs==4.3.6
67
+ prompt_toolkit==3.0.50
68
+ propcache==0.3.0
69
+ protobuf==3.20.3
70
+ psutil==6.1.1
71
+ ptyprocess==0.7.0
72
+ pure_eval==0.2.3
73
+ pyarrow==19.0.1
74
+ Pygments==2.19.1
75
+ python-dateutil==2.9.0.post0
76
+ pytz==2025.1
77
+ PyYAML==6.0.2
78
+ pyzmq==26.2.1
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rich==13.9.4
82
+ safetensors==0.5.3
83
+ sentencepiece==0.2.0
84
+ shtab==1.7.1
85
+ six==1.17.0
86
+ stack-data==0.6.3
87
+ sympy==1.13.1
88
+ tokenizers==0.21.1
89
+ torch==2.5.1+cu121
90
+ torchaudio==2.5.1+cu121
91
+ torchvision==0.20.1+cu121
92
+ tornado==6.4.2
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.49.0
96
+ triton==3.1.0
97
+ trl==0.15.2
98
+ typeguard==4.4.2
99
+ typing_extensions==4.12.2
100
+ tyro==0.9.17
101
+ tzdata==2025.1
102
+ unsloth @ git+https://github.com/unslothai/unsloth.git@6f7c8c6d0a63caaa129cc0bc6b845d5d8b9c81e8
103
+ unsloth_zoo==2025.3.12
104
+ urllib3==2.3.0
105
+ wcwidth==0.2.13
106
+ widgetsnbextension==4.0.13
107
+ xformers==0.0.29.post1
108
+ xxhash==3.5.0
109
+ yarl==1.18.3
110
+ zipp==3.21.0
Untitled.ipynb ADDED
@@ -0,0 +1,1272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "de38bf8f-3186-4014-a29e-eca49cb72aea",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
14
+ "🦥 Unsloth Zoo will now patch everything to make training faster!\n",
15
+ "==((====))== Unsloth 2025.3.14: Fast Qwen2 patching. Transformers: 4.49.0.\n",
16
+ " \\\\ /| NVIDIA A100-PCIE-40GB. Num GPUs = 1. Max memory: 39.394 GB. Platform: Linux.\n",
17
+ "O^O/ \\_/ \\ Torch: 2.5.1+cu121. CUDA: 8.0. CUDA Toolkit: 12.1. Triton: 3.1.0\n",
18
+ "\\ / Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]\n",
19
+ " \"-____-\" Free license: http://github.com/unslothai/unsloth\n",
20
+ "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n"
21
+ ]
22
+ },
23
+ {
24
+ "data": {
25
+ "application/vnd.jupyter.widget-view+json": {
26
+ "model_id": "38898fae87c94e53ba161b9a7d1c38fb",
27
+ "version_major": 2,
28
+ "version_minor": 0
29
+ },
30
+ "text/plain": [
31
+ "model.safetensors: 0%| | 0.00/3.09G [00:00<?, ?B/s]"
32
+ ]
33
+ },
34
+ "metadata": {},
35
+ "output_type": "display_data"
36
+ },
37
+ {
38
+ "data": {
39
+ "application/vnd.jupyter.widget-view+json": {
40
+ "model_id": "13d3146872b54fb1b270b3ad5b2c0420",
41
+ "version_major": 2,
42
+ "version_minor": 0
43
+ },
44
+ "text/plain": [
45
+ "generation_config.json: 0%| | 0.00/270 [00:00<?, ?B/s]"
46
+ ]
47
+ },
48
+ "metadata": {},
49
+ "output_type": "display_data"
50
+ },
51
+ {
52
+ "data": {
53
+ "application/vnd.jupyter.widget-view+json": {
54
+ "model_id": "bed630fcafaa4a3d9a8f17e660afd439",
55
+ "version_major": 2,
56
+ "version_minor": 0
57
+ },
58
+ "text/plain": [
59
+ "tokenizer_config.json: 0%| | 0.00/7.36k [00:00<?, ?B/s]"
60
+ ]
61
+ },
62
+ "metadata": {},
63
+ "output_type": "display_data"
64
+ },
65
+ {
66
+ "data": {
67
+ "application/vnd.jupyter.widget-view+json": {
68
+ "model_id": "67c7c03659b94137b44e83852b85334b",
69
+ "version_major": 2,
70
+ "version_minor": 0
71
+ },
72
+ "text/plain": [
73
+ "vocab.json: 0%| | 0.00/2.78M [00:00<?, ?B/s]"
74
+ ]
75
+ },
76
+ "metadata": {},
77
+ "output_type": "display_data"
78
+ },
79
+ {
80
+ "data": {
81
+ "application/vnd.jupyter.widget-view+json": {
82
+ "model_id": "e6679e0c6139454894aeac09301d5f45",
83
+ "version_major": 2,
84
+ "version_minor": 0
85
+ },
86
+ "text/plain": [
87
+ "merges.txt: 0%| | 0.00/1.67M [00:00<?, ?B/s]"
88
+ ]
89
+ },
90
+ "metadata": {},
91
+ "output_type": "display_data"
92
+ },
93
+ {
94
+ "data": {
95
+ "application/vnd.jupyter.widget-view+json": {
96
+ "model_id": "9b8b23eaab2146b79e9960dc8caf0ce7",
97
+ "version_major": 2,
98
+ "version_minor": 0
99
+ },
100
+ "text/plain": [
101
+ "added_tokens.json: 0%| | 0.00/605 [00:00<?, ?B/s]"
102
+ ]
103
+ },
104
+ "metadata": {},
105
+ "output_type": "display_data"
106
+ },
107
+ {
108
+ "data": {
109
+ "application/vnd.jupyter.widget-view+json": {
110
+ "model_id": "9d2750b8cb154c9d956bed8d63e3d53b",
111
+ "version_major": 2,
112
+ "version_minor": 0
113
+ },
114
+ "text/plain": [
115
+ "special_tokens_map.json: 0%| | 0.00/614 [00:00<?, ?B/s]"
116
+ ]
117
+ },
118
+ "metadata": {},
119
+ "output_type": "display_data"
120
+ },
121
+ {
122
+ "data": {
123
+ "application/vnd.jupyter.widget-view+json": {
124
+ "model_id": "c037138176bf4917b5897fb27eeec63e",
125
+ "version_major": 2,
126
+ "version_minor": 0
127
+ },
128
+ "text/plain": [
129
+ "tokenizer.json: 0%| | 0.00/11.4M [00:00<?, ?B/s]"
130
+ ]
131
+ },
132
+ "metadata": {},
133
+ "output_type": "display_data"
134
+ }
135
+ ],
136
+ "source": [
137
+ "from unsloth import FastLanguageModel\n",
138
+ "import torch\n",
139
+ "max_seq_length = 8192 # Choose any! We auto support RoPE Scaling internally!\n",
140
+ "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
141
+ "load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.\n",
142
+ "\n",
143
+ "model, tokenizer = FastLanguageModel.from_pretrained(\n",
144
+ " # Can select any from the below:\n",
145
+ " # \"unsloth/Qwen2.5-0.5B\", \"unsloth/Qwen2.5-1.5B\", \"unsloth/Qwen2.5-3B\"\n",
146
+ " # \"unsloth/Qwen2.5-14B\", \"unsloth/Qwen2.5-32B\", \"unsloth/Qwen2.5-72B\",\n",
147
+ " # And also all Instruct versions and Math. Coding verisons!\n",
148
+ " model_name = \"unsloth/Qwen2.5-1.5B-Instruct\",\n",
149
+ " max_seq_length = max_seq_length,\n",
150
+ " dtype = dtype,\n",
151
+ " load_in_4bit = load_in_4bit,\n",
152
+ " # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n",
153
+ ")"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": 3,
159
+ "id": "5f94455a-e496-46d8-8e81-30b1ac788b62",
160
+ "metadata": {},
161
+ "outputs": [
162
+ {
163
+ "name": "stderr",
164
+ "output_type": "stream",
165
+ "text": [
166
+ "Unsloth 2025.3.14 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.\n"
167
+ ]
168
+ }
169
+ ],
170
+ "source": [
171
+ "model = FastLanguageModel.get_peft_model(\n",
172
+ " model,\n",
173
+ " r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
174
+ " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
175
+ " \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
176
+ " lora_alpha = 16,\n",
177
+ " lora_dropout = 0, # Supports any, but = 0 is optimized\n",
178
+ " bias = \"none\", # Supports any, but = \"none\" is optimized\n",
179
+ " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n",
180
+ " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n",
181
+ " random_state = 3407,\n",
182
+ " use_rslora = False, # We support rank stabilized LoRA\n",
183
+ " loftq_config = None, # And LoftQ\n",
184
+ ")"
185
+ ]
186
+ },
187
+ {
188
+ "cell_type": "code",
189
+ "execution_count": 4,
190
+ "id": "3b8cd54e-88de-4247-8788-e068b7239555",
191
+ "metadata": {},
192
+ "outputs": [
193
+ {
194
+ "data": {
195
+ "application/vnd.jupyter.widget-view+json": {
196
+ "model_id": "e94729d01636494bbe2ba05ebe015f46",
197
+ "version_major": 2,
198
+ "version_minor": 0
199
+ },
200
+ "text/plain": [
201
+ "README.md: 0%| | 0.00/5.60k [00:00<?, ?B/s]"
202
+ ]
203
+ },
204
+ "metadata": {},
205
+ "output_type": "display_data"
206
+ },
207
+ {
208
+ "data": {
209
+ "application/vnd.jupyter.widget-view+json": {
210
+ "model_id": "96a81b8332754882830f4a9cb4dbfb2a",
211
+ "version_major": 2,
212
+ "version_minor": 0
213
+ },
214
+ "text/plain": [
215
+ "Resolving data files: 0%| | 0/40 [00:00<?, ?it/s]"
216
+ ]
217
+ },
218
+ "metadata": {},
219
+ "output_type": "display_data"
220
+ },
221
+ {
222
+ "data": {
223
+ "application/vnd.jupyter.widget-view+json": {
224
+ "model_id": "7f1b67fe539743b384fb89f8879550fc",
225
+ "version_major": 2,
226
+ "version_minor": 0
227
+ },
228
+ "text/plain": [
229
+ "Downloading data: 0%| | 0/40 [00:00<?, ?files/s]"
230
+ ]
231
+ },
232
+ "metadata": {},
233
+ "output_type": "display_data"
234
+ },
235
+ {
236
+ "data": {
237
+ "application/vnd.jupyter.widget-view+json": {
238
+ "model_id": "3baeeb8722924eab85527f8654cf9cd4",
239
+ "version_major": 2,
240
+ "version_minor": 0
241
+ },
242
+ "text/plain": [
243
+ "train-000-of-040.parquet: 0%| | 0.00/59.5M [00:00<?, ?B/s]"
244
+ ]
245
+ },
246
+ "metadata": {},
247
+ "output_type": "display_data"
248
+ },
249
+ {
250
+ "data": {
251
+ "application/vnd.jupyter.widget-view+json": {
252
+ "model_id": "98819c05d78748f5be657aef80762114",
253
+ "version_major": 2,
254
+ "version_minor": 0
255
+ },
256
+ "text/plain": [
257
+ "train-001-of-040.parquet: 0%| | 0.00/59.3M [00:00<?, ?B/s]"
258
+ ]
259
+ },
260
+ "metadata": {},
261
+ "output_type": "display_data"
262
+ },
263
+ {
264
+ "data": {
265
+ "application/vnd.jupyter.widget-view+json": {
266
+ "model_id": "65eb471d507f4034b34d2a030f54edcb",
267
+ "version_major": 2,
268
+ "version_minor": 0
269
+ },
270
+ "text/plain": [
271
+ "train-002-of-040.parquet: 0%| | 0.00/60.7M [00:00<?, ?B/s]"
272
+ ]
273
+ },
274
+ "metadata": {},
275
+ "output_type": "display_data"
276
+ },
277
+ {
278
+ "data": {
279
+ "application/vnd.jupyter.widget-view+json": {
280
+ "model_id": "d8c83b7af18349919f3517bbe6bceee8",
281
+ "version_major": 2,
282
+ "version_minor": 0
283
+ },
284
+ "text/plain": [
285
+ "train-003-of-040.parquet: 0%| | 0.00/60.7M [00:00<?, ?B/s]"
286
+ ]
287
+ },
288
+ "metadata": {},
289
+ "output_type": "display_data"
290
+ },
291
+ {
292
+ "data": {
293
+ "application/vnd.jupyter.widget-view+json": {
294
+ "model_id": "53b01e823d7a46a6ab0dd32f09b61a98",
295
+ "version_major": 2,
296
+ "version_minor": 0
297
+ },
298
+ "text/plain": [
299
+ "train-004-of-040.parquet: 0%| | 0.00/60.3M [00:00<?, ?B/s]"
300
+ ]
301
+ },
302
+ "metadata": {},
303
+ "output_type": "display_data"
304
+ },
305
+ {
306
+ "data": {
307
+ "application/vnd.jupyter.widget-view+json": {
308
+ "model_id": "bb1d1cb4d11447358f6b8cbde8176282",
309
+ "version_major": 2,
310
+ "version_minor": 0
311
+ },
312
+ "text/plain": [
313
+ "train-005-of-040.parquet: 0%| | 0.00/58.8M [00:00<?, ?B/s]"
314
+ ]
315
+ },
316
+ "metadata": {},
317
+ "output_type": "display_data"
318
+ },
319
+ {
320
+ "data": {
321
+ "application/vnd.jupyter.widget-view+json": {
322
+ "model_id": "919e663f947c4b2292cb62c7b129c745",
323
+ "version_major": 2,
324
+ "version_minor": 0
325
+ },
326
+ "text/plain": [
327
+ "train-006-of-040.parquet: 0%| | 0.00/60.9M [00:00<?, ?B/s]"
328
+ ]
329
+ },
330
+ "metadata": {},
331
+ "output_type": "display_data"
332
+ },
333
+ {
334
+ "data": {
335
+ "application/vnd.jupyter.widget-view+json": {
336
+ "model_id": "faeb49017154415aa508e9d07403c8e8",
337
+ "version_major": 2,
338
+ "version_minor": 0
339
+ },
340
+ "text/plain": [
341
+ "train-007-of-040.parquet: 0%| | 0.00/59.5M [00:00<?, ?B/s]"
342
+ ]
343
+ },
344
+ "metadata": {},
345
+ "output_type": "display_data"
346
+ },
347
+ {
348
+ "data": {
349
+ "application/vnd.jupyter.widget-view+json": {
350
+ "model_id": "5f550b040edb487b9bc981baa1de6fc0",
351
+ "version_major": 2,
352
+ "version_minor": 0
353
+ },
354
+ "text/plain": [
355
+ "train-008-of-040.parquet: 0%| | 0.00/60.7M [00:00<?, ?B/s]"
356
+ ]
357
+ },
358
+ "metadata": {},
359
+ "output_type": "display_data"
360
+ },
361
+ {
362
+ "data": {
363
+ "application/vnd.jupyter.widget-view+json": {
364
+ "model_id": "bc616079b7b24ae0bc4b825a7ab23f7f",
365
+ "version_major": 2,
366
+ "version_minor": 0
367
+ },
368
+ "text/plain": [
369
+ "train-009-of-040.parquet: 0%| | 0.00/60.1M [00:00<?, ?B/s]"
370
+ ]
371
+ },
372
+ "metadata": {},
373
+ "output_type": "display_data"
374
+ },
375
+ {
376
+ "data": {
377
+ "application/vnd.jupyter.widget-view+json": {
378
+ "model_id": "4ee331e970144d8abb4fe9225eaba3d9",
379
+ "version_major": 2,
380
+ "version_minor": 0
381
+ },
382
+ "text/plain": [
383
+ "train-010-of-040.parquet: 0%| | 0.00/60.2M [00:00<?, ?B/s]"
384
+ ]
385
+ },
386
+ "metadata": {},
387
+ "output_type": "display_data"
388
+ },
389
+ {
390
+ "data": {
391
+ "application/vnd.jupyter.widget-view+json": {
392
+ "model_id": "998481ecd36d4d39998c65a83400f1e1",
393
+ "version_major": 2,
394
+ "version_minor": 0
395
+ },
396
+ "text/plain": [
397
+ "train-011-of-040.parquet: 0%| | 0.00/60.5M [00:00<?, ?B/s]"
398
+ ]
399
+ },
400
+ "metadata": {},
401
+ "output_type": "display_data"
402
+ },
403
+ {
404
+ "data": {
405
+ "application/vnd.jupyter.widget-view+json": {
406
+ "model_id": "6e49db3e42dc4d8782c30adda124bfe7",
407
+ "version_major": 2,
408
+ "version_minor": 0
409
+ },
410
+ "text/plain": [
411
+ "train-012-of-040.parquet: 0%| | 0.00/59.4M [00:00<?, ?B/s]"
412
+ ]
413
+ },
414
+ "metadata": {},
415
+ "output_type": "display_data"
416
+ },
417
+ {
418
+ "data": {
419
+ "application/vnd.jupyter.widget-view+json": {
420
+ "model_id": "a5d5e53a69604e618afc462f5c905801",
421
+ "version_major": 2,
422
+ "version_minor": 0
423
+ },
424
+ "text/plain": [
425
+ "train-013-of-040.parquet: 0%| | 0.00/59.9M [00:00<?, ?B/s]"
426
+ ]
427
+ },
428
+ "metadata": {},
429
+ "output_type": "display_data"
430
+ },
431
+ {
432
+ "data": {
433
+ "application/vnd.jupyter.widget-view+json": {
434
+ "model_id": "d35c5e7a5e214c70812e1d1ff2889a7e",
435
+ "version_major": 2,
436
+ "version_minor": 0
437
+ },
438
+ "text/plain": [
439
+ "train-014-of-040.parquet: 0%| | 0.00/60.9M [00:00<?, ?B/s]"
440
+ ]
441
+ },
442
+ "metadata": {},
443
+ "output_type": "display_data"
444
+ },
445
+ {
446
+ "data": {
447
+ "application/vnd.jupyter.widget-view+json": {
448
+ "model_id": "bad7cd42efb84257a2c5a7d29dbe0fcf",
449
+ "version_major": 2,
450
+ "version_minor": 0
451
+ },
452
+ "text/plain": [
453
+ "train-015-of-040.parquet: 0%| | 0.00/59.3M [00:00<?, ?B/s]"
454
+ ]
455
+ },
456
+ "metadata": {},
457
+ "output_type": "display_data"
458
+ },
459
+ {
460
+ "data": {
461
+ "application/vnd.jupyter.widget-view+json": {
462
+ "model_id": "79eea92c8dcb4e6fa2ebfbcff1627f64",
463
+ "version_major": 2,
464
+ "version_minor": 0
465
+ },
466
+ "text/plain": [
467
+ "train-016-of-040.parquet: 0%| | 0.00/60.3M [00:00<?, ?B/s]"
468
+ ]
469
+ },
470
+ "metadata": {},
471
+ "output_type": "display_data"
472
+ },
473
+ {
474
+ "data": {
475
+ "application/vnd.jupyter.widget-view+json": {
476
+ "model_id": "8337d4bef62b4b14889d84a513f3faef",
477
+ "version_major": 2,
478
+ "version_minor": 0
479
+ },
480
+ "text/plain": [
481
+ "train-017-of-040.parquet: 0%| | 0.00/61.1M [00:00<?, ?B/s]"
482
+ ]
483
+ },
484
+ "metadata": {},
485
+ "output_type": "display_data"
486
+ },
487
+ {
488
+ "data": {
489
+ "application/vnd.jupyter.widget-view+json": {
490
+ "model_id": "12feb5b385c4481596d71a05ffbbe250",
491
+ "version_major": 2,
492
+ "version_minor": 0
493
+ },
494
+ "text/plain": [
495
+ "train-018-of-040.parquet: 0%| | 0.00/60.4M [00:00<?, ?B/s]"
496
+ ]
497
+ },
498
+ "metadata": {},
499
+ "output_type": "display_data"
500
+ },
501
+ {
502
+ "data": {
503
+ "application/vnd.jupyter.widget-view+json": {
504
+ "model_id": "d2d5e3059341457cb3c439bf6190d88f",
505
+ "version_major": 2,
506
+ "version_minor": 0
507
+ },
508
+ "text/plain": [
509
+ "train-019-of-040.parquet: 0%| | 0.00/61.0M [00:00<?, ?B/s]"
510
+ ]
511
+ },
512
+ "metadata": {},
513
+ "output_type": "display_data"
514
+ },
515
+ {
516
+ "data": {
517
+ "application/vnd.jupyter.widget-view+json": {
518
+ "model_id": "87b856c67b754c42ab198346b420eb9b",
519
+ "version_major": 2,
520
+ "version_minor": 0
521
+ },
522
+ "text/plain": [
523
+ "train-020-of-040.parquet: 0%| | 0.00/60.1M [00:00<?, ?B/s]"
524
+ ]
525
+ },
526
+ "metadata": {},
527
+ "output_type": "display_data"
528
+ },
529
+ {
530
+ "data": {
531
+ "application/vnd.jupyter.widget-view+json": {
532
+ "model_id": "35b4ff1538d24a8fa227ca34a23827aa",
533
+ "version_major": 2,
534
+ "version_minor": 0
535
+ },
536
+ "text/plain": [
537
+ "train-021-of-040.parquet: 0%| | 0.00/60.0M [00:00<?, ?B/s]"
538
+ ]
539
+ },
540
+ "metadata": {},
541
+ "output_type": "display_data"
542
+ },
543
+ {
544
+ "data": {
545
+ "application/vnd.jupyter.widget-view+json": {
546
+ "model_id": "62ac2c4fe7ce4c51888a7829008fcce8",
547
+ "version_major": 2,
548
+ "version_minor": 0
549
+ },
550
+ "text/plain": [
551
+ "train-022-of-040.parquet: 0%| | 0.00/60.5M [00:00<?, ?B/s]"
552
+ ]
553
+ },
554
+ "metadata": {},
555
+ "output_type": "display_data"
556
+ },
557
+ {
558
+ "data": {
559
+ "application/vnd.jupyter.widget-view+json": {
560
+ "model_id": "feba865f9932426a8e8fffaaa7a03240",
561
+ "version_major": 2,
562
+ "version_minor": 0
563
+ },
564
+ "text/plain": [
565
+ "train-023-of-040.parquet: 0%| | 0.00/59.8M [00:00<?, ?B/s]"
566
+ ]
567
+ },
568
+ "metadata": {},
569
+ "output_type": "display_data"
570
+ },
571
+ {
572
+ "data": {
573
+ "application/vnd.jupyter.widget-view+json": {
574
+ "model_id": "0f469dcf2b034d1dab3bac74877044d1",
575
+ "version_major": 2,
576
+ "version_minor": 0
577
+ },
578
+ "text/plain": [
579
+ "train-024-of-040.parquet: 0%| | 0.00/61.0M [00:00<?, ?B/s]"
580
+ ]
581
+ },
582
+ "metadata": {},
583
+ "output_type": "display_data"
584
+ },
585
+ {
586
+ "data": {
587
+ "application/vnd.jupyter.widget-view+json": {
588
+ "model_id": "f1def2f3df5142a2be36781243343bbf",
589
+ "version_major": 2,
590
+ "version_minor": 0
591
+ },
592
+ "text/plain": [
593
+ "train-025-of-040.parquet: 0%| | 0.00/60.4M [00:00<?, ?B/s]"
594
+ ]
595
+ },
596
+ "metadata": {},
597
+ "output_type": "display_data"
598
+ },
599
+ {
600
+ "data": {
601
+ "application/vnd.jupyter.widget-view+json": {
602
+ "model_id": "206807ddb201402ba1674679c32e1c56",
603
+ "version_major": 2,
604
+ "version_minor": 0
605
+ },
606
+ "text/plain": [
607
+ "train-026-of-040.parquet: 0%| | 0.00/60.0M [00:00<?, ?B/s]"
608
+ ]
609
+ },
610
+ "metadata": {},
611
+ "output_type": "display_data"
612
+ },
613
+ {
614
+ "data": {
615
+ "application/vnd.jupyter.widget-view+json": {
616
+ "model_id": "51ada6f44fa0443aa27c3c2747098b1f",
617
+ "version_major": 2,
618
+ "version_minor": 0
619
+ },
620
+ "text/plain": [
621
+ "train-027-of-040.parquet: 0%| | 0.00/58.4M [00:00<?, ?B/s]"
622
+ ]
623
+ },
624
+ "metadata": {},
625
+ "output_type": "display_data"
626
+ },
627
+ {
628
+ "data": {
629
+ "application/vnd.jupyter.widget-view+json": {
630
+ "model_id": "174dced4cf8b45ec8cab4d6f46fd951d",
631
+ "version_major": 2,
632
+ "version_minor": 0
633
+ },
634
+ "text/plain": [
635
+ "train-028-of-040.parquet: 0%| | 0.00/60.7M [00:00<?, ?B/s]"
636
+ ]
637
+ },
638
+ "metadata": {},
639
+ "output_type": "display_data"
640
+ },
641
+ {
642
+ "data": {
643
+ "application/vnd.jupyter.widget-view+json": {
644
+ "model_id": "df784421943645139e67640315c8353c",
645
+ "version_major": 2,
646
+ "version_minor": 0
647
+ },
648
+ "text/plain": [
649
+ "train-029-of-040.parquet: 0%| | 0.00/60.8M [00:00<?, ?B/s]"
650
+ ]
651
+ },
652
+ "metadata": {},
653
+ "output_type": "display_data"
654
+ },
655
+ {
656
+ "data": {
657
+ "application/vnd.jupyter.widget-view+json": {
658
+ "model_id": "92ed1df110d64c37b30a12ce790bddf2",
659
+ "version_major": 2,
660
+ "version_minor": 0
661
+ },
662
+ "text/plain": [
663
+ "train-030-of-040.parquet: 0%| | 0.00/60.5M [00:00<?, ?B/s]"
664
+ ]
665
+ },
666
+ "metadata": {},
667
+ "output_type": "display_data"
668
+ },
669
+ {
670
+ "data": {
671
+ "application/vnd.jupyter.widget-view+json": {
672
+ "model_id": "d20ffd505db747b88ef06661e887f762",
673
+ "version_major": 2,
674
+ "version_minor": 0
675
+ },
676
+ "text/plain": [
677
+ "train-031-of-040.parquet: 0%| | 0.00/60.1M [00:00<?, ?B/s]"
678
+ ]
679
+ },
680
+ "metadata": {},
681
+ "output_type": "display_data"
682
+ },
683
+ {
684
+ "data": {
685
+ "application/vnd.jupyter.widget-view+json": {
686
+ "model_id": "047f67bccb024a34977b3cb22a81aa98",
687
+ "version_major": 2,
688
+ "version_minor": 0
689
+ },
690
+ "text/plain": [
691
+ "train-032-of-040.parquet: 0%| | 0.00/61.0M [00:00<?, ?B/s]"
692
+ ]
693
+ },
694
+ "metadata": {},
695
+ "output_type": "display_data"
696
+ },
697
+ {
698
+ "data": {
699
+ "application/vnd.jupyter.widget-view+json": {
700
+ "model_id": "f504b60602eb4540b18b832d4ed68472",
701
+ "version_major": 2,
702
+ "version_minor": 0
703
+ },
704
+ "text/plain": [
705
+ "train-033-of-040.parquet: 0%| | 0.00/59.9M [00:00<?, ?B/s]"
706
+ ]
707
+ },
708
+ "metadata": {},
709
+ "output_type": "display_data"
710
+ },
711
+ {
712
+ "data": {
713
+ "application/vnd.jupyter.widget-view+json": {
714
+ "model_id": "0f37dbbfabcb408d8b1c6e08569fb9d8",
715
+ "version_major": 2,
716
+ "version_minor": 0
717
+ },
718
+ "text/plain": [
719
+ "train-034-of-040.parquet: 0%| | 0.00/61.0M [00:00<?, ?B/s]"
720
+ ]
721
+ },
722
+ "metadata": {},
723
+ "output_type": "display_data"
724
+ },
725
+ {
726
+ "data": {
727
+ "application/vnd.jupyter.widget-view+json": {
728
+ "model_id": "76317e29f6f34a3c89cdc175e013df52",
729
+ "version_major": 2,
730
+ "version_minor": 0
731
+ },
732
+ "text/plain": [
733
+ "train-035-of-040.parquet: 0%| | 0.00/60.1M [00:00<?, ?B/s]"
734
+ ]
735
+ },
736
+ "metadata": {},
737
+ "output_type": "display_data"
738
+ },
739
+ {
740
+ "data": {
741
+ "application/vnd.jupyter.widget-view+json": {
742
+ "model_id": "58601a7dd8f14efb916f460a2417eb4c",
743
+ "version_major": 2,
744
+ "version_minor": 0
745
+ },
746
+ "text/plain": [
747
+ "train-036-of-040.parquet: 0%| | 0.00/59.2M [00:00<?, ?B/s]"
748
+ ]
749
+ },
750
+ "metadata": {},
751
+ "output_type": "display_data"
752
+ },
753
+ {
754
+ "data": {
755
+ "application/vnd.jupyter.widget-view+json": {
756
+ "model_id": "93e8ebdfc7b14ab7ac2cce4b036489fe",
757
+ "version_major": 2,
758
+ "version_minor": 0
759
+ },
760
+ "text/plain": [
761
+ "train-037-of-040.parquet: 0%| | 0.00/59.2M [00:00<?, ?B/s]"
762
+ ]
763
+ },
764
+ "metadata": {},
765
+ "output_type": "display_data"
766
+ },
767
+ {
768
+ "data": {
769
+ "application/vnd.jupyter.widget-view+json": {
770
+ "model_id": "760f576bd0d64c0ea3b0cdaf5819f097",
771
+ "version_major": 2,
772
+ "version_minor": 0
773
+ },
774
+ "text/plain": [
775
+ "train-038-of-040.parquet: 0%| | 0.00/61.0M [00:00<?, ?B/s]"
776
+ ]
777
+ },
778
+ "metadata": {},
779
+ "output_type": "display_data"
780
+ },
781
+ {
782
+ "data": {
783
+ "application/vnd.jupyter.widget-view+json": {
784
+ "model_id": "90be502cdd05496b9a3b6e04bd63e1f1",
785
+ "version_major": 2,
786
+ "version_minor": 0
787
+ },
788
+ "text/plain": [
789
+ "train-039-of-040.parquet: 0%| | 0.00/30.3M [00:00<?, ?B/s]"
790
+ ]
791
+ },
792
+ "metadata": {},
793
+ "output_type": "display_data"
794
+ },
795
+ {
796
+ "data": {
797
+ "application/vnd.jupyter.widget-view+json": {
798
+ "model_id": "79ca9b10f64d44c6af9810930555deda",
799
+ "version_major": 2,
800
+ "version_minor": 0
801
+ },
802
+ "text/plain": [
803
+ "Generating train split: 0%| | 0/394995 [00:00<?, ? examples/s]"
804
+ ]
805
+ },
806
+ "metadata": {},
807
+ "output_type": "display_data"
808
+ },
809
+ {
810
+ "name": "stdout",
811
+ "output_type": "stream",
812
+ "text": [
813
+ "\n",
814
+ "Processing split: train\n",
815
+ "Number of examples in train: 394995\n",
816
+ "Processed 0 examples\n",
817
+ "Processed 1000 examples\n",
818
+ "Processed 2000 examples\n",
819
+ "Processed 3000 examples\n",
820
+ "Processed 4000 examples\n",
821
+ "Processed 5000 examples\n",
822
+ "Processed 6000 examples\n",
823
+ "Processed 7000 examples\n",
824
+ "Processed 8000 examples\n",
825
+ "Processed 9000 examples\n",
826
+ "Conversion complete for train, processed 10000 examples\n",
827
+ "\n",
828
+ "Example of accessing converted data:\n",
829
+ "Dataset[0]['text']:\n",
830
+ "<|im_start|>system\n",
831
+ "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n",
832
+ "<|im_start|>user\n",
833
+ "The cost of five pencils and one pen is $\\$2.50$, and the cost of one pencil and two pens is $\\$1.85$. What is the cost of two pencils and one pen?<|im_end|>\n",
834
+ "<|im_start|>assistant\n",
835
+ "<think>\n",
836
+ "Alright, let's try to solve this problem step by step. So, we have two equations here:\n",
837
+ "\n",
838
+ "1. The cost of five pencils and one pen is $2.50.\n",
839
+ "2. The cost of one pencil and two pens is $1.85.\n",
840
+ "\n",
841
+ "We need t...\n"
842
+ ]
843
+ },
844
+ {
845
+ "data": {
846
+ "application/vnd.jupyter.widget-view+json": {
847
+ "model_id": "3c4bfda39749415a9c1b3a330cf5365b",
848
+ "version_major": 2,
849
+ "version_minor": 0
850
+ },
851
+ "text/plain": [
852
+ "Saving the dataset (0/1 shards): 0%| | 0/10000 [00:00<?, ? examples/s]"
853
+ ]
854
+ },
855
+ "metadata": {},
856
+ "output_type": "display_data"
857
+ },
858
+ {
859
+ "name": "stdout",
860
+ "output_type": "stream",
861
+ "text": [
862
+ "Saved train dataset to disk as 'converted_train'\n"
863
+ ]
864
+ }
865
+ ],
866
+ "source": [
867
+ "from datasets import load_dataset, Dataset\n",
868
+ "import json\n",
869
+ "\n",
870
+ "def convert_format(messages):\n",
871
+ " \"\"\"\n",
872
+ " Convert a single entry from the 'messages' column to the target format.\n",
873
+ " \"\"\"\n",
874
+ " # Create the target format with system prompt\n",
875
+ " target_format = '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n'\n",
876
+ "\n",
877
+ " # Add each message\n",
878
+ " for message in messages:\n",
879
+ " role = message.get('role')\n",
880
+ " content = message.get('content')\n",
881
+ "\n",
882
+ " if role and content:\n",
883
+ " # Add formatted message\n",
884
+ " target_format += f'<|im_start|>{role}\\n{content}<|im_end|>\\n'\n",
885
+ "\n",
886
+ " return target_format\n",
887
+ "\n",
888
+ "def process_dataset():\n",
889
+ " # Load the dataset from Hugging Face\n",
890
+ " dataset = load_dataset(\"oumi-ai/MetaMathQA-R1\")\n",
891
+ "\n",
892
+ " # Create new datasets dict to store the converted data\n",
893
+ " converted_datasets = {}\n",
894
+ "\n",
895
+ " # Process each split in the dataset\n",
896
+ " for split_name in dataset.keys():\n",
897
+ " print(f\"\\nProcessing split: {split_name}\")\n",
898
+ " split = dataset[split_name]\n",
899
+ "\n",
900
+ " # Check if 'messages' column exists in this split\n",
901
+ " if 'messages' in split.column_names:\n",
902
+ " messages_list = split['messages']\n",
903
+ " print(f\"Number of examples in {split_name}: {len(messages_list)}\")\n",
904
+ "\n",
905
+ " # Get the first 10000 examples (or all if less than 10000)\n",
906
+ " num_examples = min(10000, len(messages_list))\n",
907
+ "\n",
908
+ " # Convert each messages entry and store in a list\n",
909
+ " converted_texts = []\n",
910
+ " for i in range(num_examples):\n",
911
+ " # Convert this entry to the target format\n",
912
+ " converted = convert_format(messages_list[i])\n",
913
+ " converted_texts.append(converted)\n",
914
+ "\n",
915
+ " # Print progress\n",
916
+ " if i % 1000 == 0:\n",
917
+ " print(f\"Processed {i} examples\")\n",
918
+ "\n",
919
+ " # Create a new dataset with a 'text' column containing the converted data\n",
920
+ " converted_dataset = Dataset.from_dict({\"text\": converted_texts})\n",
921
+ " converted_datasets[split_name] = converted_dataset\n",
922
+ "\n",
923
+ " print(f\"Conversion complete for {split_name}, processed {num_examples} examples\")\n",
924
+ " else:\n",
925
+ " print(f\"'messages' column not found in {split_name} split\")\n",
926
+ "\n",
927
+ " return converted_datasets\n",
928
+ "\n",
929
+ "if __name__ == \"__main__\":\n",
930
+ " # Process the dataset and get the converted datasets\n",
931
+ " converted_data = process_dataset()\n",
932
+ "\n",
933
+ " # Example of accessing an element (if 'train' split exists)\n",
934
+ " if 'train' in converted_data:\n",
935
+ " print(\"\\nExample of accessing converted data:\")\n",
936
+ " print(\"Dataset[0]['text']:\")\n",
937
+ " print(converted_data['train'][0]['text'][:500] + \"...\") # Show first 500 chars\n",
938
+ "\n",
939
+ " # You can save the datasets if needed\n",
940
+ " for split_name, dataset in converted_data.items():\n",
941
+ " dataset.save_to_disk(f\"converted_{split_name}\")\n",
942
+ " print(f\"Saved {split_name} dataset to disk as 'converted_{split_name}'\")"
943
+ ]
944
+ },
945
+ {
946
+ "cell_type": "code",
947
+ "execution_count": 5,
948
+ "id": "7117b4c2-1dde-4b46-bd51-16bba68b4c44",
949
+ "metadata": {},
950
+ "outputs": [
951
+ {
952
+ "data": {
953
+ "application/vnd.jupyter.widget-view+json": {
954
+ "model_id": "c71365ef63df431fbf20b5838703cad9",
955
+ "version_major": 2,
956
+ "version_minor": 0
957
+ },
958
+ "text/plain": [
959
+ "Unsloth: Tokenizing [\"text\"] (num_proc=2): 0%| | 0/10000 [00:00<?, ? examples/s]"
960
+ ]
961
+ },
962
+ "metadata": {},
963
+ "output_type": "display_data"
964
+ }
965
+ ],
966
+ "source": [
967
+ "from trl import SFTTrainer\n",
968
+ "from transformers import TrainingArguments, DataCollatorForSeq2Seq\n",
969
+ "from unsloth import is_bfloat16_supported\n",
970
+ "\n",
971
+ "trainer = SFTTrainer(\n",
972
+ " model = model,\n",
973
+ " tokenizer = tokenizer,\n",
974
+ " train_dataset = dataset,\n",
975
+ " dataset_text_field = \"text\",\n",
976
+ " max_seq_length = max_seq_length,\n",
977
+ " data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),\n",
978
+ " dataset_num_proc = 2,\n",
979
+ " packing = False, # Can make training 5x faster for short sequences.\n",
980
+ " args = TrainingArguments(\n",
981
+ " per_device_train_batch_size = 16,\n",
982
+ " gradient_accumulation_steps = 4,\n",
983
+ " warmup_steps = 100,\n",
984
+ " num_train_epochs = 6, # Set this for 1 full training run.\n",
985
+ " #max_steps = 60,\n",
986
+ " learning_rate = 2e-4,\n",
987
+ " fp16 = not is_bfloat16_supported(),\n",
988
+ " bf16 = is_bfloat16_supported(),\n",
989
+ " logging_steps = 50,\n",
990
+ " save_steps = 100, # Added checkpoint saving\n",
991
+ " optim = \"adamw_8bit\",\n",
992
+ " weight_decay = 0.01,\n",
993
+ " lr_scheduler_type = \"linear\",\n",
994
+ " seed = 3407,\n",
995
+ " output_dir = \"outputs\",\n",
996
+ " report_to = \"none\", # Use this for WandB etc\n",
997
+ " ),\n",
998
+ ")"
999
+ ]
1000
+ },
1001
+ {
1002
+ "cell_type": "code",
1003
+ "execution_count": 6,
1004
+ "id": "20ed86b5-5945-487e-a538-b0adc6a52999",
1005
+ "metadata": {},
1006
+ "outputs": [
1007
+ {
1008
+ "data": {
1009
+ "application/vnd.jupyter.widget-view+json": {
1010
+ "model_id": "b31f5620d6a54b3eb6b5933c66834c4f",
1011
+ "version_major": 2,
1012
+ "version_minor": 0
1013
+ },
1014
+ "text/plain": [
1015
+ "Map (num_proc=128): 0%| | 0/10000 [00:00<?, ? examples/s]"
1016
+ ]
1017
+ },
1018
+ "metadata": {},
1019
+ "output_type": "display_data"
1020
+ }
1021
+ ],
1022
+ "source": [
1023
+ "from unsloth.chat_templates import train_on_responses_only\n",
1024
+ "trainer = train_on_responses_only(\n",
1025
+ " trainer,\n",
1026
+ " instruction_part = \"<|im_start|>user\\n\",\n",
1027
+ " response_part = \"<|im_start|>assistant\\n\",\n",
1028
+ ")"
1029
+ ]
1030
+ },
1031
+ {
1032
+ "cell_type": "code",
1033
+ "execution_count": 7,
1034
+ "id": "93d21670-3de4-473f-872f-810e77f80d21",
1035
+ "metadata": {},
1036
+ "outputs": [
1037
+ {
1038
+ "data": {
1039
+ "text/plain": [
1040
+ "'<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n<|im_start|>user\\nThe cost of five pencils and one pen is $\\\\$2.50$, and the cost of one pencil and two pens is $\\\\$1.85$. What is the cost of two pencils and one pen?<|im_end|>\\n<|im_start|>assistant\\n<think>\\nAlright, let\\'s try to solve this problem step by step. So, we have two equations here:\\n\\n1. The cost of five pencils and one pen is $2.50.\\n2. The cost of one pencil and two pens is $1.85.\\n\\nWe need to find the cost of two pencils and one pen. Hmm, okay. So first off, let me assign variables to these things to make it easier. Let\\'s say the cost of one pencil is \"p\" dollars, and the cost of one pen is \"n\" dollars. Then we can translate the given information into equations.\\n\\nFirst equation: Five pencils and one pen cost $2.50. That would be 5 times p plus 1 times n equals 2.50. So, mathematically, that\\'s:\\n\\n5p + n = 2.50\\n\\nSecond equation: One pencil and two pens cost $1.85. That translates to 1 times p plus 2 times n equals 1.85. So:\\n\\np + 2n = 1.85\\n\\nOkay, so now we have a system of two equations with two variables. We need to solve for p and n, and once we have those, we can figure out what 2 pencils and 1 pen would cost by calculating 2p + n.\\n\\nSo, let me think about how to solve these equations. There are a couple of methods: substitution and elimination. Which one would be easier here?\\n\\nLooking at the two equations:\\n\\n1) 5p + n = 2.50 \\n2) p + 2n = 1.85\\n\\nMaybe substitution would work. Let\\'s see. Maybe if we solve one of the equations for one variable and substitute into the other. Let\\'s take the first equation, maybe solve for n.\\n\\nFrom equation 1: \\n5p + n = 2.50 \\nSo, subtract 5p from both sides: \\nn = 2.50 - 5p\\n\\nNow, substitute this expression for n into equation 2. Equation 2 is p + 2n = 1.85. Substituting:\\n\\np + 2*(2.50 - 5p) = 1.85\\n\\nLet me compute that. Multiply out the 2:\\n\\np + 5.00 - 10p = 1.85\\n\\nCombine like terms (p - 10p): \\n-9p + 5.00 = 1.85\\n\\nNow, subtract 5.00 from both sides:\\n\\n-9p = 1.85 - 5.00 \\nWhich is:\\n\\n-9p = -3.15\\n\\nDivide both sides by -9:\\n\\np = (-3.15)/(-9) = 3.15 / 9\\n\\nHmm, let me do that division. 3.15 divided by 9. Let\\'s see, 9 goes into 3.15 how many times?\\n\\nWell, 9 x 0.35 is 3.15. Because 9 x 0.30 = 2.70, and 9 x 0.05 = 0.45, so 2.70 + 0.45 = 3.15. Yup. So, p = 0.35. So each pencil is 35 cents.\\n\\nNow, substitute p = 0.35 back into our expression for n. Earlier, we had n = 2.50 - 5p. Let\\'s plug that in:\\n\\nn = 2.50 - 5*(0.35) \\nCalculate 5*0.35: that\\'s 1.75 \\nSo:\\n\\nn = 2.50 - 1.75 \\nWhich is 0.75. So, a pen is 75 cents.\\n\\nLet me check to make sure these values satisfy both equations.\\n\\nFirst equation: 5 pencils and 1 pen. 5*0.35 is 1.75, plus 0.75 is 2.50. That checks out. Second equation: 1 pencil is 0.35, 2 pens is 0.75*2=1.50. Together, 0.35 + 1.50 = 1.85, which matches the second equation. So, p = $0.35 and n = $0.75 are correct.\\n\\nNow, the question asks for the cost of two pencils and one pen. So, 2p + n.\\n\\nLet\\'s compute that: 2*0.35 + 0.75. That is 0.70 + 0.75 = 1.45. So, the answer should be $1.45.\\n\\nWait, let\\'s make sure we didn\\'t make any arithmetic mistakes. Let\\'s retrace:\\n\\n1. Assigned variables: p = pencil, n = pen.\\n\\n2. Equations: 5p + n = 2.50 and p + 2n = 1.85.\\n\\n3. Solved first equation for n: n = 2.50 -5p.\\n\\n4. Substituted into second equation: p + 2*(2.50 -5p) = 1.85.\\n\\n5. Calculated that as p + 5.00 -10p =1.85, simplifies to -9p +5.00 =1.85.\\n\\n6. Subtract 5: -9p = -3.15, divide by -9: p=0.35.\\n\\n7. Then n = 2.50 -5*0.35=2.50 -1.75=0.75.\\n\\n8. Cost for two pencils and one pen: 2*0.35 +0.75=0.70+0.75=1.45.\\n\\nYes, that all seems correct.\\n\\nAlternatively, we can test using elimination to see if we get the same result.\\n\\nLet me try the elimination method as a check. Maybe multiply the second equation by five:\\n\\nEquation 1: 5p + n = 2.50 \\nEquation 2: p + 2n = 1.85\\n\\nMultiply equation 2 by 5:\\n\\n5p + 10n = 9.25\\n\\nNow subtract equation 1 from this new equation:\\n\\n(5p + 10n) - (5p +n) = 9.25 -2.50 \\nWhich is 5p +10n -5p -n = 6.75 \\nSimplifies to 9n =6.75 \\nThus, n=6.75 /9=0.75.\\n\\nSo, n=0.75. Then substitute back into equation2: p +2*0.75=1.85 \\np +1.50=1.85 \\np=1.85 -1.50=0.35.\\n\\nSame results. So, that reaffirms p=0.35 and n=0.75. So, 2p +n=0.70 +0.75=1.45. Therefore, the answer is indeed $1.45. So written in dollars, that\\'s \\\\$1.45. Hence, the cost of two pencils and one pen is \\\\$1.45.\\n\\nI can see that both substitution and elimination methods give the same answer. So, unless there\\'s a miscalculation that I\\'m missing, the answer should be \\\\$1.45. Given that cross-verification hasn\\'t shown any problems, I think that is correct.\\n\\n**Final Answer**\\n\\\\boxed{1.45}\\n</think>\\n\\nLet \\\\( p \\\\) be the cost of one pencil in dollars and \\\\( n \\\\) be the cost of one pen in dollars. We are given the following system of equations:\\n\\n1. \\\\( 5p + n = 2.50 \\\\)\\n2. \\\\( p + 2n = 1.85 \\\\)\\n\\nFirst, solve the first equation for \\\\( n \\\\):\\n\\\\[\\nn = 2.50 - 5p\\n\\\\]\\n\\nSubstitute this expression for \\\\( n \\\\) into the second equation:\\n\\\\[\\np + 2(2.50 - 5p) = 1.85\\n\\\\]\\nSimplify and solve for \\\\( p \\\\):\\n\\\\[\\np + 5.00 - 10p = 1.85 \\\\\\\\\\n-9p + 5.00 = 1.85 \\\\\\\\\\n-9p = 1.85 - 5.00 \\\\\\\\\\n-9p = -3.15 \\\\\\\\\\np = \\\\frac{-3.15}{-9} = 0.35\\n\\\\]\\n\\nNow substitute \\\\( p = 0.35 \\\\) back into the expression for \\\\( n \\\\):\\n\\\\[\\nn = 2.50 - 5(0.35) \\\\\\\\\\nn = 2.50 - 1.75 \\\\\\\\\\nn = 0.75\\n\\\\]\\n\\nWe need to find the cost of two pencils and one pen, which is \\\\( 2p + n \\\\):\\n\\\\[\\n2(0.35) + 0.75 = 0.70 + 0.75 = 1.45\\n\\\\]\\n\\nThus, the cost of two pencils and one pen is \\\\(\\\\boxed{1.45}\\\\).<|im_end|>\\n'"
1041
+ ]
1042
+ },
1043
+ "execution_count": 7,
1044
+ "metadata": {},
1045
+ "output_type": "execute_result"
1046
+ }
1047
+ ],
1048
+ "source": [
1049
+ "tokenizer.decode(trainer.train_dataset[0][\"input_ids\"])"
1050
+ ]
1051
+ },
1052
+ {
1053
+ "cell_type": "code",
1054
+ "execution_count": 8,
1055
+ "id": "0625d8df-5034-4597-8650-c06530aabc22",
1056
+ "metadata": {},
1057
+ "outputs": [
1058
+ {
1059
+ "data": {
1060
+ "text/plain": [
1061
+ "' <think>\\nAlright, let\\'s try to solve this problem step by step. So, we have two equations here:\\n\\n1. The cost of five pencils and one pen is $2.50.\\n2. The cost of one pencil and two pens is $1.85.\\n\\nWe need to find the cost of two pencils and one pen. Hmm, okay. So first off, let me assign variables to these things to make it easier. Let\\'s say the cost of one pencil is \"p\" dollars, and the cost of one pen is \"n\" dollars. Then we can translate the given information into equations.\\n\\nFirst equation: Five pencils and one pen cost $2.50. That would be 5 times p plus 1 times n equals 2.50. So, mathematically, that\\'s:\\n\\n5p + n = 2.50\\n\\nSecond equation: One pencil and two pens cost $1.85. That translates to 1 times p plus 2 times n equals 1.85. So:\\n\\np + 2n = 1.85\\n\\nOkay, so now we have a system of two equations with two variables. We need to solve for p and n, and once we have those, we can figure out what 2 pencils and 1 pen would cost by calculating 2p + n.\\n\\nSo, let me think about how to solve these equations. There are a couple of methods: substitution and elimination. Which one would be easier here?\\n\\nLooking at the two equations:\\n\\n1) 5p + n = 2.50 \\n2) p + 2n = 1.85\\n\\nMaybe substitution would work. Let\\'s see. Maybe if we solve one of the equations for one variable and substitute into the other. Let\\'s take the first equation, maybe solve for n.\\n\\nFrom equation 1: \\n5p + n = 2.50 \\nSo, subtract 5p from both sides: \\nn = 2.50 - 5p\\n\\nNow, substitute this expression for n into equation 2. Equation 2 is p + 2n = 1.85. Substituting:\\n\\np + 2*(2.50 - 5p) = 1.85\\n\\nLet me compute that. Multiply out the 2:\\n\\np + 5.00 - 10p = 1.85\\n\\nCombine like terms (p - 10p): \\n-9p + 5.00 = 1.85\\n\\nNow, subtract 5.00 from both sides:\\n\\n-9p = 1.85 - 5.00 \\nWhich is:\\n\\n-9p = -3.15\\n\\nDivide both sides by -9:\\n\\np = (-3.15)/(-9) = 3.15 / 9\\n\\nHmm, let me do that division. 3.15 divided by 9. Let\\'s see, 9 goes into 3.15 how many times?\\n\\nWell, 9 x 0.35 is 3.15. Because 9 x 0.30 = 2.70, and 9 x 0.05 = 0.45, so 2.70 + 0.45 = 3.15. Yup. So, p = 0.35. So each pencil is 35 cents.\\n\\nNow, substitute p = 0.35 back into our expression for n. Earlier, we had n = 2.50 - 5p. Let\\'s plug that in:\\n\\nn = 2.50 - 5*(0.35) \\nCalculate 5*0.35: that\\'s 1.75 \\nSo:\\n\\nn = 2.50 - 1.75 \\nWhich is 0.75. So, a pen is 75 cents.\\n\\nLet me check to make sure these values satisfy both equations.\\n\\nFirst equation: 5 pencils and 1 pen. 5*0.35 is 1.75, plus 0.75 is 2.50. That checks out. Second equation: 1 pencil is 0.35, 2 pens is 0.75*2=1.50. Together, 0.35 + 1.50 = 1.85, which matches the second equation. So, p = $0.35 and n = $0.75 are correct.\\n\\nNow, the question asks for the cost of two pencils and one pen. So, 2p + n.\\n\\nLet\\'s compute that: 2*0.35 + 0.75. That is 0.70 + 0.75 = 1.45. So, the answer should be $1.45.\\n\\nWait, let\\'s make sure we didn\\'t make any arithmetic mistakes. Let\\'s retrace:\\n\\n1. Assigned variables: p = pencil, n = pen.\\n\\n2. Equations: 5p + n = 2.50 and p + 2n = 1.85.\\n\\n3. Solved first equation for n: n = 2.50 -5p.\\n\\n4. Substituted into second equation: p + 2*(2.50 -5p) = 1.85.\\n\\n5. Calculated that as p + 5.00 -10p =1.85, simplifies to -9p +5.00 =1.85.\\n\\n6. Subtract 5: -9p = -3.15, divide by -9: p=0.35.\\n\\n7. Then n = 2.50 -5*0.35=2.50 -1.75=0.75.\\n\\n8. Cost for two pencils and one pen: 2*0.35 +0.75=0.70+0.75=1.45.\\n\\nYes, that all seems correct.\\n\\nAlternatively, we can test using elimination to see if we get the same result.\\n\\nLet me try the elimination method as a check. Maybe multiply the second equation by five:\\n\\nEquation 1: 5p + n = 2.50 \\nEquation 2: p + 2n = 1.85\\n\\nMultiply equation 2 by 5:\\n\\n5p + 10n = 9.25\\n\\nNow subtract equation 1 from this new equation:\\n\\n(5p + 10n) - (5p +n) = 9.25 -2.50 \\nWhich is 5p +10n -5p -n = 6.75 \\nSimplifies to 9n =6.75 \\nThus, n=6.75 /9=0.75.\\n\\nSo, n=0.75. Then substitute back into equation2: p +2*0.75=1.85 \\np +1.50=1.85 \\np=1.85 -1.50=0.35.\\n\\nSame results. So, that reaffirms p=0.35 and n=0.75. So, 2p +n=0.70 +0.75=1.45. Therefore, the answer is indeed $1.45. So written in dollars, that\\'s \\\\$1.45. Hence, the cost of two pencils and one pen is \\\\$1.45.\\n\\nI can see that both substitution and elimination methods give the same answer. So, unless there\\'s a miscalculation that I\\'m missing, the answer should be \\\\$1.45. Given that cross-verification hasn\\'t shown any problems, I think that is correct.\\n\\n**Final Answer**\\n\\\\boxed{1.45}\\n</think>\\n\\nLet \\\\( p \\\\) be the cost of one pencil in dollars and \\\\( n \\\\) be the cost of one pen in dollars. We are given the following system of equations:\\n\\n1. \\\\( 5p + n = 2.50 \\\\)\\n2. \\\\( p + 2n = 1.85 \\\\)\\n\\nFirst, solve the first equation for \\\\( n \\\\):\\n\\\\[\\nn = 2.50 - 5p\\n\\\\]\\n\\nSubstitute this expression for \\\\( n \\\\) into the second equation:\\n\\\\[\\np + 2(2.50 - 5p) = 1.85\\n\\\\]\\nSimplify and solve for \\\\( p \\\\):\\n\\\\[\\np + 5.00 - 10p = 1.85 \\\\\\\\\\n-9p + 5.00 = 1.85 \\\\\\\\\\n-9p = 1.85 - 5.00 \\\\\\\\\\n-9p = -3.15 \\\\\\\\\\np = \\\\frac{-3.15}{-9} = 0.35\\n\\\\]\\n\\nNow substitute \\\\( p = 0.35 \\\\) back into the expression for \\\\( n \\\\):\\n\\\\[\\nn = 2.50 - 5(0.35) \\\\\\\\\\nn = 2.50 - 1.75 \\\\\\\\\\nn = 0.75\\n\\\\]\\n\\nWe need to find the cost of two pencils and one pen, which is \\\\( 2p + n \\\\):\\n\\\\[\\n2(0.35) + 0.75 = 0.70 + 0.75 = 1.45\\n\\\\]\\n\\nThus, the cost of two pencils and one pen is \\\\(\\\\boxed{1.45}\\\\).<|im_end|>\\n'"
1062
+ ]
1063
+ },
1064
+ "execution_count": 8,
1065
+ "metadata": {},
1066
+ "output_type": "execute_result"
1067
+ }
1068
+ ],
1069
+ "source": [
1070
+ "space = tokenizer(\" \", add_special_tokens = False).input_ids[0]\n",
1071
+ "tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[0][\"labels\"]])"
1072
+ ]
1073
+ },
1074
+ {
1075
+ "cell_type": "code",
1076
+ "execution_count": 9,
1077
+ "id": "0e8c8bb6-0907-4106-a3cb-94c8b3c6da1c",
1078
+ "metadata": {},
1079
+ "outputs": [
1080
+ {
1081
+ "name": "stderr",
1082
+ "output_type": "stream",
1083
+ "text": [
1084
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
1085
+ " \\\\ /| Num examples = 10,000 | Num Epochs = 6 | Total steps = 936\n",
1086
+ "O^O/ \\_/ \\ Batch size per device = 16 | Gradient accumulation steps = 4\n",
1087
+ "\\ / Data Parallel GPUs = 1 | Total batch size (16 x 4 x 1) = 64\n",
1088
+ " \"-____-\" Trainable parameters = 36,929,536/1,580,643,840 (2.34% trained)\n"
1089
+ ]
1090
+ },
1091
+ {
1092
+ "name": "stdout",
1093
+ "output_type": "stream",
1094
+ "text": [
1095
+ "Unsloth: Will smartly offload gradients to save VRAM!\n"
1096
+ ]
1097
+ },
1098
+ {
1099
+ "data": {
1100
+ "text/html": [
1101
+ "\n",
1102
+ " <div>\n",
1103
+ " \n",
1104
+ " <progress value='936' max='936' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1105
+ " [936/936 7:52:53, Epoch 5/6]\n",
1106
+ " </div>\n",
1107
+ " <table border=\"1\" class=\"dataframe\">\n",
1108
+ " <thead>\n",
1109
+ " <tr style=\"text-align: left;\">\n",
1110
+ " <th>Step</th>\n",
1111
+ " <th>Training Loss</th>\n",
1112
+ " </tr>\n",
1113
+ " </thead>\n",
1114
+ " <tbody>\n",
1115
+ " <tr>\n",
1116
+ " <td>50</td>\n",
1117
+ " <td>1.074000</td>\n",
1118
+ " </tr>\n",
1119
+ " <tr>\n",
1120
+ " <td>100</td>\n",
1121
+ " <td>0.914600</td>\n",
1122
+ " </tr>\n",
1123
+ " <tr>\n",
1124
+ " <td>150</td>\n",
1125
+ " <td>0.853400</td>\n",
1126
+ " </tr>\n",
1127
+ " <tr>\n",
1128
+ " <td>200</td>\n",
1129
+ " <td>0.823500</td>\n",
1130
+ " </tr>\n",
1131
+ " <tr>\n",
1132
+ " <td>250</td>\n",
1133
+ " <td>0.826000</td>\n",
1134
+ " </tr>\n",
1135
+ " <tr>\n",
1136
+ " <td>300</td>\n",
1137
+ " <td>0.831000</td>\n",
1138
+ " </tr>\n",
1139
+ " <tr>\n",
1140
+ " <td>350</td>\n",
1141
+ " <td>0.811000</td>\n",
1142
+ " </tr>\n",
1143
+ " <tr>\n",
1144
+ " <td>400</td>\n",
1145
+ " <td>0.805300</td>\n",
1146
+ " </tr>\n",
1147
+ " <tr>\n",
1148
+ " <td>450</td>\n",
1149
+ " <td>0.803300</td>\n",
1150
+ " </tr>\n",
1151
+ " <tr>\n",
1152
+ " <td>500</td>\n",
1153
+ " <td>0.790300</td>\n",
1154
+ " </tr>\n",
1155
+ " <tr>\n",
1156
+ " <td>550</td>\n",
1157
+ " <td>0.783200</td>\n",
1158
+ " </tr>\n",
1159
+ " <tr>\n",
1160
+ " <td>600</td>\n",
1161
+ " <td>0.784600</td>\n",
1162
+ " </tr>\n",
1163
+ " <tr>\n",
1164
+ " <td>650</td>\n",
1165
+ " <td>0.784000</td>\n",
1166
+ " </tr>\n",
1167
+ " <tr>\n",
1168
+ " <td>700</td>\n",
1169
+ " <td>0.771100</td>\n",
1170
+ " </tr>\n",
1171
+ " <tr>\n",
1172
+ " <td>750</td>\n",
1173
+ " <td>0.767800</td>\n",
1174
+ " </tr>\n",
1175
+ " <tr>\n",
1176
+ " <td>800</td>\n",
1177
+ " <td>0.773000</td>\n",
1178
+ " </tr>\n",
1179
+ " <tr>\n",
1180
+ " <td>850</td>\n",
1181
+ " <td>0.768000</td>\n",
1182
+ " </tr>\n",
1183
+ " <tr>\n",
1184
+ " <td>900</td>\n",
1185
+ " <td>0.756900</td>\n",
1186
+ " </tr>\n",
1187
+ " </tbody>\n",
1188
+ "</table><p>"
1189
+ ],
1190
+ "text/plain": [
1191
+ "<IPython.core.display.HTML object>"
1192
+ ]
1193
+ },
1194
+ "metadata": {},
1195
+ "output_type": "display_data"
1196
+ }
1197
+ ],
1198
+ "source": [
1199
+ "trainer_stats = trainer.train()"
1200
+ ]
1201
+ },
1202
+ {
1203
+ "cell_type": "code",
1204
+ "execution_count": null,
1205
+ "id": "d5fb9b55-1819-44da-ba3a-1a6bc976b80b",
1206
+ "metadata": {},
1207
+ "outputs": [],
1208
+ "source": [
1209
+ "from huggingface_hub import login\n",
1210
+ "login()"
1211
+ ]
1212
+ },
1213
+ {
1214
+ "cell_type": "code",
1215
+ "execution_count": 10,
1216
+ "id": "51fe7a4e-a22f-4809-883c-811d2799bb42",
1217
+ "metadata": {},
1218
+ "outputs": [
1219
+ {
1220
+ "data": {
1221
+ "text/plain": [
1222
+ "('lora_model/tokenizer_config.json',\n",
1223
+ " 'lora_model/special_tokens_map.json',\n",
1224
+ " 'lora_model/vocab.json',\n",
1225
+ " 'lora_model/merges.txt',\n",
1226
+ " 'lora_model/added_tokens.json',\n",
1227
+ " 'lora_model/tokenizer.json')"
1228
+ ]
1229
+ },
1230
+ "execution_count": 10,
1231
+ "metadata": {},
1232
+ "output_type": "execute_result"
1233
+ }
1234
+ ],
1235
+ "source": [
1236
+ "model.save_pretrained(\"lora_model\") # Local saving\n",
1237
+ "tokenizer.save_pretrained(\"lora_model\")\n",
1238
+ "# model.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving\n",
1239
+ "# tokenizer.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving"
1240
+ ]
1241
+ },
1242
+ {
1243
+ "cell_type": "code",
1244
+ "execution_count": null,
1245
+ "id": "e8a1585a-5acd-4571-a291-3db9a04d826d",
1246
+ "metadata": {},
1247
+ "outputs": [],
1248
+ "source": []
1249
+ }
1250
+ ],
1251
+ "metadata": {
1252
+ "kernelspec": {
1253
+ "display_name": "Python3 (ipykernel)",
1254
+ "language": "python",
1255
+ "name": "python3"
1256
+ },
1257
+ "language_info": {
1258
+ "codemirror_mode": {
1259
+ "name": "ipython",
1260
+ "version": 3
1261
+ },
1262
+ "file_extension": ".py",
1263
+ "mimetype": "text/x-python",
1264
+ "name": "python",
1265
+ "nbconvert_exporter": "python",
1266
+ "pygments_lexer": "ipython3",
1267
+ "version": "3.10.12"
1268
+ }
1269
+ },
1270
+ "nbformat": 4,
1271
+ "nbformat_minor": 5
1272
+ }
Untitled1.ipynb ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "c3c1c0c3-95ce-40fd-aaaf-56427df1b82c",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "application/vnd.jupyter.widget-view+json": {
12
+ "model_id": "3aad272b2d9843d3b374d501e3f86504",
13
+ "version_major": 2,
14
+ "version_minor": 0
15
+ },
16
+ "text/plain": [
17
+ "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
18
+ ]
19
+ },
20
+ "metadata": {},
21
+ "output_type": "display_data"
22
+ }
23
+ ],
24
+ "source": [
25
+ "from huggingface_hub import login\n",
26
+ "login()"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "id": "a5dcb236-b4d3-4d59-8eb3-9f7e17abaf91",
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "from huggingface_hub import HfApi\n",
37
+ "api = HfApi()\n",
38
+ "\n",
39
+ "api.upload_folder(\n",
40
+ " folder_path=\"\",\n",
41
+ " repo_id=\"kevinwang676/Qwen2.5-1.5B-Distillation\",\n",
42
+ " repo_type=\"model\",\n",
43
+ ")"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 3,
49
+ "id": "a085135f-2459-4cd9-b1a4-7d37c01e64c0",
50
+ "metadata": {},
51
+ "outputs": [
52
+ {
53
+ "ename": "NameError",
54
+ "evalue": "name 'model' is not defined",
55
+ "output_type": "error",
56
+ "traceback": [
57
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
58
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
59
+ "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241m.\u001b[39msave_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlora_model\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;66;03m# Local saving\u001b[39;00m\n\u001b[1;32m 2\u001b[0m tokenizer\u001b[38;5;241m.\u001b[39msave_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlora_model\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# model.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# tokenizer.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving\u001b[39;00m\n",
60
+ "\u001b[0;31mNameError\u001b[0m: name 'model' is not defined"
61
+ ]
62
+ }
63
+ ],
64
+ "source": [
65
+ "model.save_pretrained(\"lora_model\") # Local saving\n",
66
+ "tokenizer.save_pretrained(\"lora_model\")\n",
67
+ "# model.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving\n",
68
+ "# tokenizer.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": null,
74
+ "id": "fac7f828-b1cd-466a-9e1a-251a74d6847c",
75
+ "metadata": {},
76
+ "outputs": [],
77
+ "source": []
78
+ }
79
+ ],
80
+ "metadata": {
81
+ "kernelspec": {
82
+ "display_name": "Python3 (ipykernel)",
83
+ "language": "python",
84
+ "name": "python3"
85
+ },
86
+ "language_info": {
87
+ "codemirror_mode": {
88
+ "name": "ipython",
89
+ "version": 3
90
+ },
91
+ "file_extension": ".py",
92
+ "mimetype": "text/x-python",
93
+ "name": "python",
94
+ "nbconvert_exporter": "python",
95
+ "pygments_lexer": "ipython3",
96
+ "version": "3.10.12"
97
+ }
98
+ },
99
+ "nbformat": 4,
100
+ "nbformat_minor": 5
101
+ }
converted_train/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:334d9238a8db475c5357f433808c6a65bb791c18300aba7c6a8a98e16fcaa735
3
+ size 64396832
converted_train/dataset_info.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "text": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ }
9
+ },
10
+ "homepage": "",
11
+ "license": ""
12
+ }
converted_train/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "fdc222bf7b68dfbe",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
lora_model/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/Qwen2.5-1.5B-Instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.14.0
lora_model/adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/Qwen2.5-1.5B-Instruct",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": false,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 16,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": null,
21
+ "peft_type": "LORA",
22
+ "r": 32,
23
+ "rank_pattern": {},
24
+ "revision": null,
25
+ "target_modules": [
26
+ "q_proj",
27
+ "down_proj",
28
+ "up_proj",
29
+ "o_proj",
30
+ "gate_proj",
31
+ "v_proj",
32
+ "k_proj"
33
+ ],
34
+ "task_type": "CAUSAL_LM",
35
+ "use_dora": false,
36
+ "use_rslora": false
37
+ }
lora_model/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9066b4abf68c6ba235ecb2860df0e9e77d30e17503b0d0f794f6b431c8390f3
3
+ size 147770496
lora_model/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
lora_model/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
lora_model/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|vision_pad|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
lora_model/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
lora_model/tokenizer_config.json ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "extra_special_tokens": {},
203
+ "model_max_length": 32768,
204
+ "pad_token": "<|vision_pad|>",
205
+ "padding_side": "right",
206
+ "split_special_tokens": false,
207
+ "tokenizer_class": "Qwen2Tokenizer",
208
+ "unk_token": null
209
+ }
lora_model/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
onstart.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ #!/bin/bash
2
+ # This file is run on instance start. Output in /var/log/onstart.log
3
+
outputs/checkpoint-100/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/Qwen2.5-1.5B-Instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.14.0
outputs/checkpoint-100/adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/Qwen2.5-1.5B-Instruct",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": false,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 16,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": null,
21
+ "peft_type": "LORA",
22
+ "r": 32,
23
+ "rank_pattern": {},
24
+ "revision": null,
25
+ "target_modules": [
26
+ "q_proj",
27
+ "down_proj",
28
+ "up_proj",
29
+ "o_proj",
30
+ "gate_proj",
31
+ "v_proj",
32
+ "k_proj"
33
+ ],
34
+ "task_type": "CAUSAL_LM",
35
+ "use_dora": false,
36
+ "use_rslora": false
37
+ }
outputs/checkpoint-100/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6350b2115aca57a874418662d7e86c1f87b153aad2e3171c836f6fafbf711fa
3
+ size 147770496
outputs/checkpoint-100/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
outputs/checkpoint-100/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
outputs/checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b42a68b41d97f1fe9c3c3ac09331e137539e00f10e03a831ee497add9d126eb
3
+ size 75471860
outputs/checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:608fccb6c056ce88cdfd5355e6be2046f4d107a24a87c6b0d2c3b200ce6bb4ea
3
+ size 14244
outputs/checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0bfbbe2be6a1399a4c175bf33e1a680e875704102055b2bbfcb74670a4c677
3
+ size 1064
outputs/checkpoint-100/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|vision_pad|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
outputs/checkpoint-100/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896