Upload folder using huggingface_hub
Browse files- README.md +82 -0
- all_results.json +13 -0
- config.json +237 -0
- eval_results.json +8 -0
- model.safetensors +3 -0
- pred_logit_eval.npy +3 -0
- pred_logit_train.npy +3 -0
- preprocessor_config.json +42 -0
- test_results.json +8 -0
- train_results.json +8 -0
- trainer_state.json +245 -0
- training_args.bin +3 -0
README.md
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: cc-by-nc-4.0
|
3 |
+
base_model: MCG-NJU/videomae-base
|
4 |
+
tags:
|
5 |
+
- video-classification
|
6 |
+
- generated_from_trainer
|
7 |
+
metrics:
|
8 |
+
- accuracy
|
9 |
+
model-index:
|
10 |
+
- name: ucf101_42
|
11 |
+
results: []
|
12 |
+
---
|
13 |
+
|
14 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
15 |
+
should probably proofread and complete it, then remove this comment. -->
|
16 |
+
|
17 |
+
# ucf101_42
|
18 |
+
|
19 |
+
This model is a fine-tuned version of [MCG-NJU/videomae-base](https://huggingface.co/MCG-NJU/videomae-base) on the ucf101 dataset.
|
20 |
+
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.8198
|
22 |
+
- Accuracy: 0.8298
|
23 |
+
|
24 |
+
## Model description
|
25 |
+
|
26 |
+
More information needed
|
27 |
+
|
28 |
+
## Intended uses & limitations
|
29 |
+
|
30 |
+
More information needed
|
31 |
+
|
32 |
+
## Training and evaluation data
|
33 |
+
|
34 |
+
More information needed
|
35 |
+
|
36 |
+
## Training procedure
|
37 |
+
|
38 |
+
### Training hyperparameters
|
39 |
+
|
40 |
+
The following hyperparameters were used during training:
|
41 |
+
- learning_rate: 5e-05
|
42 |
+
- train_batch_size: 8
|
43 |
+
- eval_batch_size: 64
|
44 |
+
- seed: 42
|
45 |
+
- gradient_accumulation_steps: 4
|
46 |
+
- total_train_batch_size: 32
|
47 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
48 |
+
- lr_scheduler_type: linear
|
49 |
+
- num_epochs: 20
|
50 |
+
|
51 |
+
### Training results
|
52 |
+
|
53 |
+
| Training Loss | Epoch | Step | Validation Loss | Accuracy |
|
54 |
+
|:-------------:|:-----:|:----:|:---------------:|:--------:|
|
55 |
+
| No log | 1.01 | 300 | 2.3345 | 0.4887 |
|
56 |
+
| No log | 2.01 | 600 | 1.4956 | 0.6433 |
|
57 |
+
| No log | 3.01 | 900 | 1.1647 | 0.7162 |
|
58 |
+
| 1.5784 | 4.01 | 1200 | 1.1531 | 0.7029 |
|
59 |
+
| 1.5784 | 5.01 | 1500 | 1.0251 | 0.7334 |
|
60 |
+
| 1.5784 | 6.01 | 1800 | 1.0315 | 0.7333 |
|
61 |
+
| 0.1821 | 7.01 | 2100 | 0.9787 | 0.7617 |
|
62 |
+
| 0.1821 | 8.01 | 2400 | 0.8933 | 0.7838 |
|
63 |
+
| 0.1821 | 9.01 | 2700 | 0.8781 | 0.7917 |
|
64 |
+
| 0.0651 | 10.01 | 3000 | 0.9051 | 0.7910 |
|
65 |
+
| 0.0651 | 11.01 | 3300 | 0.9593 | 0.7900 |
|
66 |
+
| 0.0651 | 12.01 | 3600 | 0.8054 | 0.8187 |
|
67 |
+
| 0.0651 | 13.01 | 3900 | 0.8679 | 0.8142 |
|
68 |
+
| 0.0265 | 14.01 | 4200 | 0.8380 | 0.8208 |
|
69 |
+
| 0.0265 | 15.01 | 4500 | 0.8317 | 0.8247 |
|
70 |
+
| 0.0265 | 16.01 | 4800 | 0.8027 | 0.8249 |
|
71 |
+
| 0.0091 | 17.01 | 5100 | 0.8240 | 0.8255 |
|
72 |
+
| 0.0091 | 18.01 | 5400 | 0.8480 | 0.8211 |
|
73 |
+
| 0.0091 | 19.01 | 5700 | 0.8198 | 0.8298 |
|
74 |
+
| 0.0091 | 19.87 | 5960 | 0.8315 | 0.8283 |
|
75 |
+
|
76 |
+
|
77 |
+
### Framework versions
|
78 |
+
|
79 |
+
- Transformers 4.39.3
|
80 |
+
- Pytorch 2.2.2+cu118
|
81 |
+
- Datasets 2.18.0
|
82 |
+
- Tokenizers 0.15.2
|
all_results.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 19.87,
|
3 |
+
"eval_accuracy": 0.8297674418604651,
|
4 |
+
"eval_loss": 0.8198431134223938,
|
5 |
+
"eval_runtime": 374.2016,
|
6 |
+
"eval_samples_per_second": 10.11,
|
7 |
+
"eval_steps_per_second": 0.16,
|
8 |
+
"train_accuracy": 0.9990563070147845,
|
9 |
+
"train_loss": 0.004719822201877832,
|
10 |
+
"train_runtime": 334.4111,
|
11 |
+
"train_samples_per_second": 28.519,
|
12 |
+
"train_steps_per_second": 0.449
|
13 |
+
}
|
config.json
ADDED
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "MCG-NJU/videomae-base",
|
3 |
+
"architectures": [
|
4 |
+
"VideoMAEForVideoClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.0,
|
7 |
+
"decoder_hidden_size": 384,
|
8 |
+
"decoder_intermediate_size": 1536,
|
9 |
+
"decoder_num_attention_heads": 6,
|
10 |
+
"decoder_num_hidden_layers": 4,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.0,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"id2label": {
|
15 |
+
"0": "ApplyEyeMakeup",
|
16 |
+
"1": "ApplyLipstick",
|
17 |
+
"2": "Archery",
|
18 |
+
"3": "BabyCrawling",
|
19 |
+
"4": "BalanceBeam",
|
20 |
+
"5": "BandMarching",
|
21 |
+
"6": "BaseballPitch",
|
22 |
+
"7": "Basketball",
|
23 |
+
"8": "BasketballDunk",
|
24 |
+
"9": "BenchPress",
|
25 |
+
"10": "Biking",
|
26 |
+
"11": "Billiards",
|
27 |
+
"12": "BlowDryHair",
|
28 |
+
"13": "BlowingCandles",
|
29 |
+
"14": "BodyWeightSquats",
|
30 |
+
"15": "Bowling",
|
31 |
+
"16": "BoxingPunchingBag",
|
32 |
+
"17": "BoxingSpeedBag",
|
33 |
+
"18": "BreastStroke",
|
34 |
+
"19": "BrushingTeeth",
|
35 |
+
"20": "CleanAndJerk",
|
36 |
+
"21": "CliffDiving",
|
37 |
+
"22": "CricketBowling",
|
38 |
+
"23": "CricketShot",
|
39 |
+
"24": "CuttingInKitchen",
|
40 |
+
"25": "Diving",
|
41 |
+
"26": "Drumming",
|
42 |
+
"27": "Fencing",
|
43 |
+
"28": "FieldHockeyPenalty",
|
44 |
+
"29": "FloorGymnastics",
|
45 |
+
"30": "FrisbeeCatch",
|
46 |
+
"31": "FrontCrawl",
|
47 |
+
"32": "GolfSwing",
|
48 |
+
"33": "Haircut",
|
49 |
+
"34": "Hammering",
|
50 |
+
"35": "HammerThrow",
|
51 |
+
"36": "HandstandPushups",
|
52 |
+
"37": "HandstandWalking",
|
53 |
+
"38": "HeadMassage",
|
54 |
+
"39": "HighJump",
|
55 |
+
"40": "HorseRace",
|
56 |
+
"41": "HorseRiding",
|
57 |
+
"42": "HulaHoop",
|
58 |
+
"43": "IceDancing",
|
59 |
+
"44": "JavelinThrow",
|
60 |
+
"45": "JugglingBalls",
|
61 |
+
"46": "JumpingJack",
|
62 |
+
"47": "JumpRope",
|
63 |
+
"48": "Kayaking",
|
64 |
+
"49": "Knitting",
|
65 |
+
"50": "LongJump",
|
66 |
+
"51": "Lunges",
|
67 |
+
"52": "MilitaryParade",
|
68 |
+
"53": "Mixing",
|
69 |
+
"54": "MoppingFloor",
|
70 |
+
"55": "Nunchucks",
|
71 |
+
"56": "ParallelBars",
|
72 |
+
"57": "PizzaTossing",
|
73 |
+
"58": "PlayingCello",
|
74 |
+
"59": "PlayingDaf",
|
75 |
+
"60": "PlayingDhol",
|
76 |
+
"61": "PlayingFlute",
|
77 |
+
"62": "PlayingGuitar",
|
78 |
+
"63": "PlayingPiano",
|
79 |
+
"64": "PlayingSitar",
|
80 |
+
"65": "PlayingTabla",
|
81 |
+
"66": "PlayingViolin",
|
82 |
+
"67": "PoleVault",
|
83 |
+
"68": "PommelHorse",
|
84 |
+
"69": "PullUps",
|
85 |
+
"70": "Punch",
|
86 |
+
"71": "PushUps",
|
87 |
+
"72": "Rafting",
|
88 |
+
"73": "RockClimbingIndoor",
|
89 |
+
"74": "RopeClimbing",
|
90 |
+
"75": "Rowing",
|
91 |
+
"76": "SalsaSpin",
|
92 |
+
"77": "ShavingBeard",
|
93 |
+
"78": "Shotput",
|
94 |
+
"79": "SkateBoarding",
|
95 |
+
"80": "Skiing",
|
96 |
+
"81": "Skijet",
|
97 |
+
"82": "SkyDiving",
|
98 |
+
"83": "SoccerJuggling",
|
99 |
+
"84": "SoccerPenalty",
|
100 |
+
"85": "StillRings",
|
101 |
+
"86": "SumoWrestling",
|
102 |
+
"87": "Surfing",
|
103 |
+
"88": "Swing",
|
104 |
+
"89": "TableTennisShot",
|
105 |
+
"90": "TaiChi",
|
106 |
+
"91": "TennisSwing",
|
107 |
+
"92": "ThrowDiscus",
|
108 |
+
"93": "TrampolineJumping",
|
109 |
+
"94": "Typing",
|
110 |
+
"95": "UnevenBars",
|
111 |
+
"96": "VolleyballSpiking",
|
112 |
+
"97": "WalkingWithDog",
|
113 |
+
"98": "WallPushups",
|
114 |
+
"99": "WritingOnBoard",
|
115 |
+
"100": "YoYo"
|
116 |
+
},
|
117 |
+
"image_size": 224,
|
118 |
+
"initializer_range": 0.02,
|
119 |
+
"intermediate_size": 3072,
|
120 |
+
"label2id": {
|
121 |
+
"ApplyEyeMakeup": 0,
|
122 |
+
"ApplyLipstick": 1,
|
123 |
+
"Archery": 2,
|
124 |
+
"BabyCrawling": 3,
|
125 |
+
"BalanceBeam": 4,
|
126 |
+
"BandMarching": 5,
|
127 |
+
"BaseballPitch": 6,
|
128 |
+
"Basketball": 7,
|
129 |
+
"BasketballDunk": 8,
|
130 |
+
"BenchPress": 9,
|
131 |
+
"Biking": 10,
|
132 |
+
"Billiards": 11,
|
133 |
+
"BlowDryHair": 12,
|
134 |
+
"BlowingCandles": 13,
|
135 |
+
"BodyWeightSquats": 14,
|
136 |
+
"Bowling": 15,
|
137 |
+
"BoxingPunchingBag": 16,
|
138 |
+
"BoxingSpeedBag": 17,
|
139 |
+
"BreastStroke": 18,
|
140 |
+
"BrushingTeeth": 19,
|
141 |
+
"CleanAndJerk": 20,
|
142 |
+
"CliffDiving": 21,
|
143 |
+
"CricketBowling": 22,
|
144 |
+
"CricketShot": 23,
|
145 |
+
"CuttingInKitchen": 24,
|
146 |
+
"Diving": 25,
|
147 |
+
"Drumming": 26,
|
148 |
+
"Fencing": 27,
|
149 |
+
"FieldHockeyPenalty": 28,
|
150 |
+
"FloorGymnastics": 29,
|
151 |
+
"FrisbeeCatch": 30,
|
152 |
+
"FrontCrawl": 31,
|
153 |
+
"GolfSwing": 32,
|
154 |
+
"Haircut": 33,
|
155 |
+
"HammerThrow": 35,
|
156 |
+
"Hammering": 34,
|
157 |
+
"HandstandPushups": 36,
|
158 |
+
"HandstandWalking": 37,
|
159 |
+
"HeadMassage": 38,
|
160 |
+
"HighJump": 39,
|
161 |
+
"HorseRace": 40,
|
162 |
+
"HorseRiding": 41,
|
163 |
+
"HulaHoop": 42,
|
164 |
+
"IceDancing": 43,
|
165 |
+
"JavelinThrow": 44,
|
166 |
+
"JugglingBalls": 45,
|
167 |
+
"JumpRope": 47,
|
168 |
+
"JumpingJack": 46,
|
169 |
+
"Kayaking": 48,
|
170 |
+
"Knitting": 49,
|
171 |
+
"LongJump": 50,
|
172 |
+
"Lunges": 51,
|
173 |
+
"MilitaryParade": 52,
|
174 |
+
"Mixing": 53,
|
175 |
+
"MoppingFloor": 54,
|
176 |
+
"Nunchucks": 55,
|
177 |
+
"ParallelBars": 56,
|
178 |
+
"PizzaTossing": 57,
|
179 |
+
"PlayingCello": 58,
|
180 |
+
"PlayingDaf": 59,
|
181 |
+
"PlayingDhol": 60,
|
182 |
+
"PlayingFlute": 61,
|
183 |
+
"PlayingGuitar": 62,
|
184 |
+
"PlayingPiano": 63,
|
185 |
+
"PlayingSitar": 64,
|
186 |
+
"PlayingTabla": 65,
|
187 |
+
"PlayingViolin": 66,
|
188 |
+
"PoleVault": 67,
|
189 |
+
"PommelHorse": 68,
|
190 |
+
"PullUps": 69,
|
191 |
+
"Punch": 70,
|
192 |
+
"PushUps": 71,
|
193 |
+
"Rafting": 72,
|
194 |
+
"RockClimbingIndoor": 73,
|
195 |
+
"RopeClimbing": 74,
|
196 |
+
"Rowing": 75,
|
197 |
+
"SalsaSpin": 76,
|
198 |
+
"ShavingBeard": 77,
|
199 |
+
"Shotput": 78,
|
200 |
+
"SkateBoarding": 79,
|
201 |
+
"Skiing": 80,
|
202 |
+
"Skijet": 81,
|
203 |
+
"SkyDiving": 82,
|
204 |
+
"SoccerJuggling": 83,
|
205 |
+
"SoccerPenalty": 84,
|
206 |
+
"StillRings": 85,
|
207 |
+
"SumoWrestling": 86,
|
208 |
+
"Surfing": 87,
|
209 |
+
"Swing": 88,
|
210 |
+
"TableTennisShot": 89,
|
211 |
+
"TaiChi": 90,
|
212 |
+
"TennisSwing": 91,
|
213 |
+
"ThrowDiscus": 92,
|
214 |
+
"TrampolineJumping": 93,
|
215 |
+
"Typing": 94,
|
216 |
+
"UnevenBars": 95,
|
217 |
+
"VolleyballSpiking": 96,
|
218 |
+
"WalkingWithDog": 97,
|
219 |
+
"WallPushups": 98,
|
220 |
+
"WritingOnBoard": 99,
|
221 |
+
"YoYo": 100
|
222 |
+
},
|
223 |
+
"layer_norm_eps": 1e-12,
|
224 |
+
"model_type": "videomae",
|
225 |
+
"norm_pix_loss": true,
|
226 |
+
"num_attention_heads": 12,
|
227 |
+
"num_channels": 3,
|
228 |
+
"num_frames": 16,
|
229 |
+
"num_hidden_layers": 12,
|
230 |
+
"patch_size": 16,
|
231 |
+
"problem_type": "single_label_classification",
|
232 |
+
"qkv_bias": true,
|
233 |
+
"torch_dtype": "float32",
|
234 |
+
"transformers_version": "4.39.3",
|
235 |
+
"tubelet_size": 2,
|
236 |
+
"use_mean_pooling": false
|
237 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 19.87,
|
3 |
+
"eval_accuracy": 0.8297674418604651,
|
4 |
+
"eval_loss": 0.8198431134223938,
|
5 |
+
"eval_runtime": 374.2016,
|
6 |
+
"eval_samples_per_second": 10.11,
|
7 |
+
"eval_steps_per_second": 0.16
|
8 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e8d059b3e48ff7a7fff343212f9b78876c22e219c60ac0edc0c737de608c0d9
|
3 |
+
size 345241908
|
pred_logit_eval.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d877d4dfb4c0feb8ae3075126b2ee31ba78f3f5e9f12c115e5ca83858f087eb2
|
3 |
+
size 4343128
|
pred_logit_train.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7716cc0c5c91dcd533744af3d195c17fb77e98824b97b7dd36f5c6572d1581a8
|
3 |
+
size 3853076
|
preprocessor_config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_valid_processor_keys": [
|
3 |
+
"videos",
|
4 |
+
"do_resize",
|
5 |
+
"size",
|
6 |
+
"resample",
|
7 |
+
"do_center_crop",
|
8 |
+
"crop_size",
|
9 |
+
"do_rescale",
|
10 |
+
"rescale_factor",
|
11 |
+
"do_normalize",
|
12 |
+
"image_mean",
|
13 |
+
"image_std",
|
14 |
+
"return_tensors",
|
15 |
+
"data_format",
|
16 |
+
"input_data_format"
|
17 |
+
],
|
18 |
+
"crop_size": {
|
19 |
+
"height": 224,
|
20 |
+
"width": 224
|
21 |
+
},
|
22 |
+
"do_center_crop": true,
|
23 |
+
"do_normalize": true,
|
24 |
+
"do_rescale": true,
|
25 |
+
"do_resize": true,
|
26 |
+
"image_mean": [
|
27 |
+
0.485,
|
28 |
+
0.456,
|
29 |
+
0.406
|
30 |
+
],
|
31 |
+
"image_processor_type": "VideoMAEImageProcessor",
|
32 |
+
"image_std": [
|
33 |
+
0.229,
|
34 |
+
0.224,
|
35 |
+
0.225
|
36 |
+
],
|
37 |
+
"resample": 2,
|
38 |
+
"rescale_factor": 0.00392156862745098,
|
39 |
+
"size": {
|
40 |
+
"shortest_edge": 224
|
41 |
+
}
|
42 |
+
}
|
test_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 19.87,
|
3 |
+
"eval_accuracy": 0.8297674418604651,
|
4 |
+
"eval_loss": 0.8198431134223938,
|
5 |
+
"eval_runtime": 374.2016,
|
6 |
+
"eval_samples_per_second": 10.11,
|
7 |
+
"eval_steps_per_second": 0.16
|
8 |
+
}
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 19.87,
|
3 |
+
"train_accuracy": 0.9990563070147845,
|
4 |
+
"train_loss": 0.004719822201877832,
|
5 |
+
"train_runtime": 334.4111,
|
6 |
+
"train_samples_per_second": 28.519,
|
7 |
+
"train_steps_per_second": 0.449
|
8 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8297674418604651,
|
3 |
+
"best_model_checkpoint": "video/train/checkpoint/videomae-base/ucf101_42/checkpoint-5700",
|
4 |
+
"epoch": 19.871751886001675,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 5960,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.01,
|
13 |
+
"eval_accuracy": 0.4886511627906977,
|
14 |
+
"eval_loss": 2.334453582763672,
|
15 |
+
"eval_runtime": 390.4063,
|
16 |
+
"eval_samples_per_second": 9.69,
|
17 |
+
"eval_steps_per_second": 0.154,
|
18 |
+
"step": 300
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"epoch": 2.01,
|
22 |
+
"eval_accuracy": 0.6433488372093024,
|
23 |
+
"eval_loss": 1.4956246614456177,
|
24 |
+
"eval_runtime": 378.3625,
|
25 |
+
"eval_samples_per_second": 9.998,
|
26 |
+
"eval_steps_per_second": 0.159,
|
27 |
+
"step": 600
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 3.01,
|
31 |
+
"eval_accuracy": 0.7161860465116279,
|
32 |
+
"eval_loss": 1.1646597385406494,
|
33 |
+
"eval_runtime": 374.5393,
|
34 |
+
"eval_samples_per_second": 10.1,
|
35 |
+
"eval_steps_per_second": 0.16,
|
36 |
+
"step": 900
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 3.34,
|
40 |
+
"grad_norm": 6.9290690422058105,
|
41 |
+
"learning_rate": 4.161073825503356e-05,
|
42 |
+
"loss": 1.5784,
|
43 |
+
"step": 1000
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"epoch": 4.01,
|
47 |
+
"eval_accuracy": 0.7028837209302325,
|
48 |
+
"eval_loss": 1.1531013250350952,
|
49 |
+
"eval_runtime": 376.3709,
|
50 |
+
"eval_samples_per_second": 10.051,
|
51 |
+
"eval_steps_per_second": 0.159,
|
52 |
+
"step": 1200
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"epoch": 5.01,
|
56 |
+
"eval_accuracy": 0.7333953488372092,
|
57 |
+
"eval_loss": 1.0251258611679077,
|
58 |
+
"eval_runtime": 377.2789,
|
59 |
+
"eval_samples_per_second": 10.027,
|
60 |
+
"eval_steps_per_second": 0.159,
|
61 |
+
"step": 1500
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 6.01,
|
65 |
+
"eval_accuracy": 0.7333023255813953,
|
66 |
+
"eval_loss": 1.031457781791687,
|
67 |
+
"eval_runtime": 376.6891,
|
68 |
+
"eval_samples_per_second": 10.043,
|
69 |
+
"eval_steps_per_second": 0.159,
|
70 |
+
"step": 1800
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 6.67,
|
74 |
+
"grad_norm": 7.911869049072266,
|
75 |
+
"learning_rate": 3.3221476510067115e-05,
|
76 |
+
"loss": 0.1821,
|
77 |
+
"step": 2000
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 7.01,
|
81 |
+
"eval_accuracy": 0.7616744186046511,
|
82 |
+
"eval_loss": 0.9787126779556274,
|
83 |
+
"eval_runtime": 379.965,
|
84 |
+
"eval_samples_per_second": 9.956,
|
85 |
+
"eval_steps_per_second": 0.158,
|
86 |
+
"step": 2100
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 8.01,
|
90 |
+
"eval_accuracy": 0.7838139534883721,
|
91 |
+
"eval_loss": 0.8932655453681946,
|
92 |
+
"eval_runtime": 375.8521,
|
93 |
+
"eval_samples_per_second": 10.065,
|
94 |
+
"eval_steps_per_second": 0.16,
|
95 |
+
"step": 2400
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"epoch": 9.01,
|
99 |
+
"eval_accuracy": 0.7917209302325582,
|
100 |
+
"eval_loss": 0.8780561089515686,
|
101 |
+
"eval_runtime": 373.4872,
|
102 |
+
"eval_samples_per_second": 10.129,
|
103 |
+
"eval_steps_per_second": 0.161,
|
104 |
+
"step": 2700
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 10.01,
|
108 |
+
"grad_norm": 0.12537457048892975,
|
109 |
+
"learning_rate": 2.4832214765100674e-05,
|
110 |
+
"loss": 0.0651,
|
111 |
+
"step": 3000
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"epoch": 10.01,
|
115 |
+
"eval_accuracy": 0.7909767441860465,
|
116 |
+
"eval_loss": 0.9051322937011719,
|
117 |
+
"eval_runtime": 375.6037,
|
118 |
+
"eval_samples_per_second": 10.072,
|
119 |
+
"eval_steps_per_second": 0.16,
|
120 |
+
"step": 3000
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"epoch": 11.01,
|
124 |
+
"eval_accuracy": 0.790046511627907,
|
125 |
+
"eval_loss": 0.9593069553375244,
|
126 |
+
"eval_runtime": 375.7576,
|
127 |
+
"eval_samples_per_second": 10.068,
|
128 |
+
"eval_steps_per_second": 0.16,
|
129 |
+
"step": 3300
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"epoch": 12.01,
|
133 |
+
"eval_accuracy": 0.8186976744186046,
|
134 |
+
"eval_loss": 0.8053779602050781,
|
135 |
+
"eval_runtime": 377.8682,
|
136 |
+
"eval_samples_per_second": 10.011,
|
137 |
+
"eval_steps_per_second": 0.159,
|
138 |
+
"step": 3600
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"epoch": 13.01,
|
142 |
+
"eval_accuracy": 0.8142325581395349,
|
143 |
+
"eval_loss": 0.8678916692733765,
|
144 |
+
"eval_runtime": 376.9615,
|
145 |
+
"eval_samples_per_second": 10.036,
|
146 |
+
"eval_steps_per_second": 0.159,
|
147 |
+
"step": 3900
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"epoch": 13.34,
|
151 |
+
"grad_norm": 10.020633697509766,
|
152 |
+
"learning_rate": 1.644295302013423e-05,
|
153 |
+
"loss": 0.0265,
|
154 |
+
"step": 4000
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"epoch": 14.01,
|
158 |
+
"eval_accuracy": 0.8208372093023256,
|
159 |
+
"eval_loss": 0.8379742503166199,
|
160 |
+
"eval_runtime": 373.9706,
|
161 |
+
"eval_samples_per_second": 10.116,
|
162 |
+
"eval_steps_per_second": 0.16,
|
163 |
+
"step": 4200
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 15.01,
|
167 |
+
"eval_accuracy": 0.8246511627906977,
|
168 |
+
"eval_loss": 0.8317446708679199,
|
169 |
+
"eval_runtime": 375.9164,
|
170 |
+
"eval_samples_per_second": 10.063,
|
171 |
+
"eval_steps_per_second": 0.16,
|
172 |
+
"step": 4500
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"epoch": 16.01,
|
176 |
+
"eval_accuracy": 0.8249302325581396,
|
177 |
+
"eval_loss": 0.8027446269989014,
|
178 |
+
"eval_runtime": 375.7325,
|
179 |
+
"eval_samples_per_second": 10.068,
|
180 |
+
"eval_steps_per_second": 0.16,
|
181 |
+
"step": 4800
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"epoch": 16.67,
|
185 |
+
"grad_norm": 0.026239760220050812,
|
186 |
+
"learning_rate": 8.053691275167785e-06,
|
187 |
+
"loss": 0.0091,
|
188 |
+
"step": 5000
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"epoch": 17.01,
|
192 |
+
"eval_accuracy": 0.8254883720930233,
|
193 |
+
"eval_loss": 0.8239758610725403,
|
194 |
+
"eval_runtime": 375.8655,
|
195 |
+
"eval_samples_per_second": 10.065,
|
196 |
+
"eval_steps_per_second": 0.16,
|
197 |
+
"step": 5100
|
198 |
+
},
|
199 |
+
{
|
200 |
+
"epoch": 18.01,
|
201 |
+
"eval_accuracy": 0.8211162790697675,
|
202 |
+
"eval_loss": 0.8480401039123535,
|
203 |
+
"eval_runtime": 375.7779,
|
204 |
+
"eval_samples_per_second": 10.067,
|
205 |
+
"eval_steps_per_second": 0.16,
|
206 |
+
"step": 5400
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"epoch": 19.01,
|
210 |
+
"eval_accuracy": 0.8297674418604651,
|
211 |
+
"eval_loss": 0.8198431134223938,
|
212 |
+
"eval_runtime": 372.9411,
|
213 |
+
"eval_samples_per_second": 10.144,
|
214 |
+
"eval_steps_per_second": 0.161,
|
215 |
+
"step": 5700
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"epoch": 19.87,
|
219 |
+
"eval_accuracy": 0.8282790697674418,
|
220 |
+
"eval_loss": 0.8315407037734985,
|
221 |
+
"eval_runtime": 376.729,
|
222 |
+
"eval_samples_per_second": 10.042,
|
223 |
+
"eval_steps_per_second": 0.159,
|
224 |
+
"step": 5960
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"epoch": 19.87,
|
228 |
+
"step": 5960,
|
229 |
+
"total_flos": 2.3636640203347614e+20,
|
230 |
+
"train_loss": 0.3129509675422771,
|
231 |
+
"train_runtime": 26481.3321,
|
232 |
+
"train_samples_per_second": 7.203,
|
233 |
+
"train_steps_per_second": 0.225
|
234 |
+
}
|
235 |
+
],
|
236 |
+
"logging_steps": 1000,
|
237 |
+
"max_steps": 5960,
|
238 |
+
"num_input_tokens_seen": 0,
|
239 |
+
"num_train_epochs": 20,
|
240 |
+
"save_steps": 500,
|
241 |
+
"total_flos": 2.3636640203347614e+20,
|
242 |
+
"train_batch_size": 8,
|
243 |
+
"trial_name": null,
|
244 |
+
"trial_params": null
|
245 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:094ade010886978e5f04bad871ebb4d486ddbf3e9224aee0d2227da3fcde25a1
|
3 |
+
size 5048
|