mateoguaman commited on May 13

Commit

9b53e4a

verified ·

1 Parent(s): 78e86f9

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +5 -0
README.md +58 -0
adapter_config.json +37 -0
adapter_model.safetensors +3 -0
all_results.json +13 -0
checkpoint-157/README.md +202 -0
checkpoint-157/adapter_config.json +37 -0
checkpoint-157/adapter_model.safetensors +3 -0
checkpoint-157/optimizer.pt +3 -0
checkpoint-157/preprocessor_config.json +25 -0
checkpoint-157/rng_state_0.pth +3 -0
checkpoint-157/rng_state_1.pth +3 -0
checkpoint-157/rng_state_2.pth +3 -0
checkpoint-157/rng_state_3.pth +3 -0
checkpoint-157/rng_state_4.pth +3 -0
checkpoint-157/rng_state_5.pth +3 -0
checkpoint-157/rng_state_6.pth +3 -0
checkpoint-157/rng_state_7.pth +3 -0
checkpoint-157/scheduler.pt +3 -0
checkpoint-157/special_tokens_map.json +39 -0
checkpoint-157/tokenizer.json +3 -0
checkpoint-157/tokenizer_config.json +0 -0
checkpoint-157/trainer_state.json +106 -0
checkpoint-157/training_args.bin +3 -0
checkpoint-314/README.md +202 -0
checkpoint-314/adapter_config.json +37 -0
checkpoint-314/adapter_model.safetensors +3 -0
checkpoint-314/optimizer.pt +3 -0
checkpoint-314/preprocessor_config.json +25 -0
checkpoint-314/rng_state_0.pth +3 -0
checkpoint-314/rng_state_1.pth +3 -0
checkpoint-314/rng_state_2.pth +3 -0
checkpoint-314/rng_state_3.pth +3 -0
checkpoint-314/rng_state_4.pth +3 -0
checkpoint-314/rng_state_5.pth +3 -0
checkpoint-314/rng_state_6.pth +3 -0
checkpoint-314/rng_state_7.pth +3 -0
checkpoint-314/scheduler.pt +3 -0
checkpoint-314/special_tokens_map.json +39 -0
checkpoint-314/tokenizer.json +3 -0
checkpoint-314/tokenizer_config.json +0 -0
checkpoint-314/trainer_state.json +179 -0
checkpoint-314/training_args.bin +3 -0
checkpoint-471/README.md +202 -0
checkpoint-471/adapter_config.json +37 -0
checkpoint-471/adapter_model.safetensors +3 -0
checkpoint-471/optimizer.pt +3 -0
checkpoint-471/preprocessor_config.json +25 -0
checkpoint-471/rng_state_0.pth +3 -0
checkpoint-471/rng_state_1.pth +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-157/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-314/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-471/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-627/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: google/paligemma2-3b-pt-224
+datasets: mateoguaman/vlmn_iphone_gates_cotrain_0.1_magicsoup_no_insta_sub5
+library_name: transformers
+model_name: /gscratch/weirdlab/mateogc/projects/vlm-navigation/data/paligemma2-3b-pt-224-sft-lora-magicsoup_no_cfiphone_no_insta_sub5
+tags:
+- generated_from_trainer
+- alignment-handbook
+licence: license
+---
+# Model Card for /gscratch/weirdlab/mateogc/projects/vlm-navigation/data/paligemma2-3b-pt-224-sft-lora-magicsoup_no_cfiphone_no_insta_sub5
+This model is a fine-tuned version of [google/paligemma2-3b-pt-224](https://huggingface.co/google/paligemma2-3b-pt-224) on the [mateoguaman/vlmn_iphone_gates_cotrain_0.1_magicsoup_no_insta_sub5](https://huggingface.co/datasets/mateoguaman/vlmn_iphone_gates_cotrain_0.1_magicsoup_no_insta_sub5) dataset.
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/mateoguaman/paligemma2-3b-pt-224-sft-lora-iphone_gates_cotrain_0.1_magicsoup_no_insta_sub5/runs/0zrdcxgy)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.15.2
+- Transformers: 4.49.0
+- Pytorch: 2.6.0
+- Datasets: 3.4.1
+- Tokenizers: 0.21.1
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

adapter_config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/paligemma2-3b-pt-224",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "v_proj",
+    "k_proj",
+    "q_proj",
+    "gate_proj",
+    "down_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86508835b03c8804dbfbdd923eac68bf4d9a05c950c859b45e2d6449b73deb91
+size 95091000

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "eval_loss": 2.694655179977417,
+    "eval_runtime": 66.0317,
+    "eval_samples": 9832,
+    "eval_samples_per_second": 148.898,
+    "eval_steps_per_second": 2.332,
+    "total_flos": 1.6545523447313203e+17,
+    "train_loss": 2.507940781743903,
+    "train_runtime": 1172.0392,
+    "train_samples": 40086,
+    "train_samples_per_second": 34.202,
+    "train_steps_per_second": 0.535
+}

checkpoint-157/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: google/paligemma2-3b-pt-224
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.14.0

checkpoint-157/adapter_config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/paligemma2-3b-pt-224",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "v_proj",
+    "k_proj",
+    "q_proj",
+    "gate_proj",
+    "down_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-157/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86508835b03c8804dbfbdd923eac68bf4d9a05c950c859b45e2d6449b73deb91
+size 95091000

checkpoint-157/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8719799ab83279eb1086f2994596d61a5a2845b9ff52f17db577b98cab3a3c1c
+size 190464380

checkpoint-157/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "SiglipImageProcessor",
+  "image_seq_length": 256,
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "PaliGemmaProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

checkpoint-157/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6dcfd9461cce86a60e31cc02a6dabc98038a813635c0c2f652f072f73abb7457
+size 15984

checkpoint-157/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:468b43cd5244c25cd27d5af58b895fd7e0453ac83d2a4afa127df89dc8efc9fe
+size 15984

checkpoint-157/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb69b79ef5535513041b31c1459923dc31bd592c5b0c43a9d9d3f6d4a0569f87
+size 15984

checkpoint-157/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4b4782efdea914a18b5a3d761fb2df5f03a1e1ad035d494dceb0f9686b048cd
+size 15984

checkpoint-157/rng_state_4.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e84bc3ca1b63543b4e2175d133771419310ce68408c3fcde13b8729db627fb21
+size 15984

checkpoint-157/rng_state_5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1d3ff6bd35590018d05fe31da8a687e0055f31f3b2d80a8ffb39d338a45121f
+size 15984

checkpoint-157/rng_state_6.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3713e147212d2c07741c6b4e303e98eac6bfafacbe2f3d2269319869cf538cf6
+size 15984

checkpoint-157/rng_state_7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec6b3f61768cbd64ba7b918c6f79e98dffaee06e42858c228298efc650fe7105
+size 15984

checkpoint-157/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62ced02920ed9f6d3ac1571202a9161eaa6e027712264a31645ea0da9346de74
+size 1064

checkpoint-157/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-157/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b648d11e0879b11659e6b4051f691752c0cef597a865c6fde5b318b9f68c1d05
+size 34600974

checkpoint-157/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-157/trainer_state.json ADDED Viewed

	@@ -0,0 +1,106 @@

+{
+  "best_metric": 2.694655179977417,
+  "best_model_checkpoint": "data/paligemma2-3b-pt-224-sft-lora-iphone_gates_cotrain_0.1_magicsoup_no_insta_sub5/checkpoint-157",
+  "epoch": 0.2503987240829346,
+  "eval_steps": 157,
+  "global_step": 157,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.001594896331738437,
+      "grad_norm": 3.1208314895629883,
+      "learning_rate": 1.5873015873015873e-06,
+      "loss": 2.5433,
+      "mean_token_accuracy": 0.33664771914482117,
+      "step": 1
+    },
+    {
+      "epoch": 0.03189792663476874,
+      "grad_norm": 2.4424667358398438,
+      "learning_rate": 3.1746031746031745e-05,
+      "loss": 2.5138,
+      "mean_token_accuracy": 0.3535436580055638,
+      "step": 20
+    },
+    {
+      "epoch": 0.06379585326953748,
+      "grad_norm": 2.4620940685272217,
+      "learning_rate": 6.349206349206349e-05,
+      "loss": 2.5051,
+      "mean_token_accuracy": 0.35078124701976776,
+      "step": 40
+    },
+    {
+      "epoch": 0.09569377990430622,
+      "grad_norm": 2.406940221786499,
+      "learning_rate": 9.523809523809524e-05,
+      "loss": 2.5572,
+      "mean_token_accuracy": 0.34765625,
+      "step": 60
+    },
+    {
+      "epoch": 0.12759170653907495,
+      "grad_norm": 3.0474202632904053,
+      "learning_rate": 9.977599647950571e-05,
+      "loss": 2.5707,
+      "mean_token_accuracy": 0.34701704829931257,
+      "step": 80
+    },
+    {
+      "epoch": 0.1594896331738437,
+      "grad_norm": 3.1929140090942383,
+      "learning_rate": 9.894185011967993e-05,
+      "loss": 2.5619,
+      "mean_token_accuracy": 0.3437500029802322,
+      "step": 100
+    },
+    {
+      "epoch": 0.19138755980861244,
+      "grad_norm": 5.141939163208008,
+      "learning_rate": 9.750092174273521e-05,
+      "loss": 2.5659,
+      "mean_token_accuracy": 0.3424005672335625,
+      "step": 120
+    },
+    {
+      "epoch": 0.22328548644338117,
+      "grad_norm": 3.2162439823150635,
+      "learning_rate": 9.547107600693329e-05,
+      "loss": 2.5814,
+      "mean_token_accuracy": 0.34282670766115186,
+      "step": 140
+    },
+    {
+      "epoch": 0.2503987240829346,
+      "eval_loss": 2.694655179977417,
+      "eval_mean_token_accuracy": 0.33972953251230786,
+      "eval_runtime": 73.5337,
+      "eval_samples_per_second": 133.707,
+      "eval_steps_per_second": 2.094,
+      "step": 157
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 627,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 157,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.142977968085402e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-157/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d9d17829fb8138ba7d2faee44d77ac1a9b2d65c100167c2c3125a75640d1235
+size 5816

checkpoint-314/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: google/paligemma2-3b-pt-224
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.14.0

checkpoint-314/adapter_config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/paligemma2-3b-pt-224",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "v_proj",
+    "k_proj",
+    "q_proj",
+    "gate_proj",
+    "down_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-314/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9366120eef533876be86344f8fd860c00fb79c40322f5d3a4661f013dd01f88d
+size 95091000

checkpoint-314/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c846df6ea77830d3ca54de90c8e0e64369554de3ad2138ad958a6b228ea34686
+size 190464380

checkpoint-314/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "SiglipImageProcessor",
+  "image_seq_length": 256,
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "PaliGemmaProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

checkpoint-314/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e77998cea617c5de484099a5f4a9a05eb390593c2875a28d5705a94fcde47181
+size 15984

checkpoint-314/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a73702d0870b7159478807509208de72821e3a81bca6f893b26b62fc1223374f
+size 15984

checkpoint-314/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:939993cdde3497260c9d8ddfda057348f40619567fff7cbdb197f09e4947a457
+size 15984

checkpoint-314/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9f6cd143f027df6de16254566bf7cfad4820e729d58210100e9fad0355947fb
+size 15984

checkpoint-314/rng_state_4.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab75d23f85c246c5b7d65bccc8cf149f29ea74ed279cb3c9cb0f1507e7578850
+size 15984

checkpoint-314/rng_state_5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3bd5559ed500512ce9d2a7f7822ba674e578567bfa24749f80fc9e6af11774f
+size 15984

checkpoint-314/rng_state_6.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58a6089af61e86b0556db8e80c29c7f8cbec2efd2846834c2091eb725f22554c
+size 15984

checkpoint-314/rng_state_7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1cc5745eec9a1324268cc8dc33413473c259b188184912baac89d4876d2f933
+size 15984

checkpoint-314/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6635f7cda11a84a0eb5da7931de8ec78ee5cd005b656c805a074b645b1c6fda
+size 1064

checkpoint-314/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-314/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b648d11e0879b11659e6b4051f691752c0cef597a865c6fde5b318b9f68c1d05
+size 34600974

checkpoint-314/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-314/trainer_state.json ADDED Viewed

	@@ -0,0 +1,179 @@

+{
+  "best_metric": 2.694655179977417,
+  "best_model_checkpoint": "data/paligemma2-3b-pt-224-sft-lora-iphone_gates_cotrain_0.1_magicsoup_no_insta_sub5/checkpoint-157",
+  "epoch": 0.5007974481658692,
+  "eval_steps": 157,
+  "global_step": 314,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.001594896331738437,
+      "grad_norm": 3.1208314895629883,
+      "learning_rate": 1.5873015873015873e-06,
+      "loss": 2.5433,
+      "mean_token_accuracy": 0.33664771914482117,
+      "step": 1
+    },
+    {
+      "epoch": 0.03189792663476874,
+      "grad_norm": 2.4424667358398438,
+      "learning_rate": 3.1746031746031745e-05,
+      "loss": 2.5138,
+      "mean_token_accuracy": 0.3535436580055638,
+      "step": 20
+    },
+    {
+      "epoch": 0.06379585326953748,
+      "grad_norm": 2.4620940685272217,
+      "learning_rate": 6.349206349206349e-05,
+      "loss": 2.5051,
+      "mean_token_accuracy": 0.35078124701976776,
+      "step": 40
+    },
+    {
+      "epoch": 0.09569377990430622,
+      "grad_norm": 2.406940221786499,
+      "learning_rate": 9.523809523809524e-05,
+      "loss": 2.5572,
+      "mean_token_accuracy": 0.34765625,
+      "step": 60
+    },
+    {
+      "epoch": 0.12759170653907495,
+      "grad_norm": 3.0474202632904053,
+      "learning_rate": 9.977599647950571e-05,
+      "loss": 2.5707,
+      "mean_token_accuracy": 0.34701704829931257,
+      "step": 80
+    },
+    {
+      "epoch": 0.1594896331738437,
+      "grad_norm": 3.1929140090942383,
+      "learning_rate": 9.894185011967993e-05,
+      "loss": 2.5619,
+      "mean_token_accuracy": 0.3437500029802322,
+      "step": 100
+    },
+    {
+      "epoch": 0.19138755980861244,
+      "grad_norm": 5.141939163208008,
+      "learning_rate": 9.750092174273521e-05,
+      "loss": 2.5659,
+      "mean_token_accuracy": 0.3424005672335625,
+      "step": 120
+    },
+    {
+      "epoch": 0.22328548644338117,
+      "grad_norm": 3.2162439823150635,
+      "learning_rate": 9.547107600693329e-05,
+      "loss": 2.5814,
+      "mean_token_accuracy": 0.34282670766115186,
+      "step": 140
+    },
+    {
+      "epoch": 0.2503987240829346,
+      "eval_loss": 2.694655179977417,
+      "eval_mean_token_accuracy": 0.33972953251230786,
+      "eval_runtime": 73.5337,
+      "eval_samples_per_second": 133.707,
+      "eval_steps_per_second": 2.094,
+      "step": 157
+    },
+    {
+      "epoch": 0.2551834130781499,
+      "grad_norm": 3.2555155754089355,
+      "learning_rate": 9.28774789794947e-05,
+      "loss": 2.5654,
+      "mean_token_accuracy": 0.3451704482237498,
+      "step": 160
+    },
+    {
+      "epoch": 0.28708133971291866,
+      "grad_norm": 2.5101306438446045,
+      "learning_rate": 8.975228612720416e-05,
+      "loss": 2.5233,
+      "mean_token_accuracy": 0.34417613595724106,
+      "step": 180
+    },
+    {
+      "epoch": 0.3189792663476874,
+      "grad_norm": 2.408094882965088,
+      "learning_rate": 8.613424365230287e-05,
+      "loss": 2.5341,
+      "mean_token_accuracy": 0.3462357923388481,
+      "step": 200
+    },
+    {
+      "epoch": 0.3508771929824561,
+      "grad_norm": 2.1550216674804688,
+      "learning_rate": 8.206820811631386e-05,
+      "loss": 2.5484,
+      "mean_token_accuracy": 0.3474431842565536,
+      "step": 220
+    },
+    {
+      "epoch": 0.3827751196172249,
+      "grad_norm": 2.699045181274414,
+      "learning_rate": 7.760459030751284e-05,
+      "loss": 2.5327,
+      "mean_token_accuracy": 0.3442471593618393,
+      "step": 240
+    },
+    {
+      "epoch": 0.41467304625199364,
+      "grad_norm": 3.142629384994507,
+      "learning_rate": 7.279873024698706e-05,
+      "loss": 2.5423,
+      "mean_token_accuracy": 0.3459517046809196,
+      "step": 260
+    },
+    {
+      "epoch": 0.44657097288676234,
+      "grad_norm": 2.2078137397766113,
+      "learning_rate": 6.771021108196912e-05,
+      "loss": 2.5185,
+      "mean_token_accuracy": 0.3458806797862053,
+      "step": 280
+    },
+    {
+      "epoch": 0.4784688995215311,
+      "grad_norm": 2.915557622909546,
+      "learning_rate": 6.240212037280966e-05,
+      "loss": 2.4966,
+      "mean_token_accuracy": 0.35191761404275895,
+      "step": 300
+    },
+    {
+      "epoch": 0.5007974481658692,
+      "eval_loss": 2.7145228385925293,
+      "eval_mean_token_accuracy": 0.3395731867778869,
+      "eval_runtime": 65.5003,
+      "eval_samples_per_second": 150.106,
+      "eval_steps_per_second": 2.351,
+      "step": 314
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 627,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 157,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.285955927782195e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-314/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d9d17829fb8138ba7d2faee44d77ac1a9b2d65c100167c2c3125a75640d1235
+size 5816

checkpoint-471/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: google/paligemma2-3b-pt-224
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.14.0

checkpoint-471/adapter_config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/paligemma2-3b-pt-224",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "v_proj",
+    "k_proj",
+    "q_proj",
+    "gate_proj",
+    "down_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-471/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1ed32d62b3fc02c2273727f0b50173fc09700907ccfcd0bd3671fa208d70758
+size 95091000

checkpoint-471/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbc51ca113e69636e67ef0a729eeb3724b23ee7c6396a56b2ff745c5e9e31c2a
+size 190464380

checkpoint-471/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "SiglipImageProcessor",
+  "image_seq_length": 256,
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "PaliGemmaProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

checkpoint-471/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d592faee2c0174123b48dfc8f8773166c61cde6339c24a99d64c3b3513d46225
+size 15984

checkpoint-471/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7f81506c0c9b51b584dc873ba330b1f77ee89074812dd56b381e2e9db4aacba
+size 15984