Initial commit

Browse files

Files changed (6) hide show

.gitattributes +2 -0
README.md +81 -3
adapter_config.json +34 -0
adapter_model.safetensors +3 -0
config.json +31 -0
non_lora_trainables.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+adapter_model.safetensors filter=lfs diff=lfs merge=lfs -text
+non_lora_trainables.bin filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,81 @@
----
-license: cc-by-2.5
----

+---
+base_model:
+- mistralai/Mistral-7B-v0.1
+- m-a-p/MERT-v1-95M
+library_name: peft
+license: apache-2.0
+datasets:
+- amaai-lab/MusicBench
+language:
+- en
+metrics:
+- bertscore
+- bleu
+pipeline_tag: audio-text-to-text
+---
+# Model Card for Model ID
+SonicVerse is a model that performs music captioning. Trained with concrete music feature labels to guide the captioning process, it provides features such as key, vocals, vocals gender, instrument, mood/theme, genre, in the generated caption.
+The model is trained for 10 second snippets of music for detailed captioning. The [Spaces demo](https://huggingface.co/spaces/annabeth97c/SonicVerse) allows chaining captions of multiple chunks of 10 seconds of music to generate a long detailed caption.
+## Model Details
+### Model Description
+Trained with a multi-tasking projector that outputs aligned language tokens from music input. Additionally, feature extraction (eg. key classification, vocals classification) is trained and then projected to language tokens, guiding the captioning.
+- **Developed by:** AMAAI Lab
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** Multi-modal Audio Text to Text model
+- **Language(s) (NLP):** English
+- **License:** Apache-2.0
+- **Finetuned from model :** mistralai/Mistral-7B-v0.1
+### Model Sources
+- **Repository:** https://github.com/annabeth97c/sonicverse
+- **Paper [optional]:** [More Information Needed]
+- **Demo :** https://annabeth97c.github.io/sonicverse/
+## Uses
+Model can be used for music-text paired dataset generation
+## How to Get Started with the Model
+Use the instructions provided on the [repository](https://github.com/annabeth97c/sonicverse) to run inference locally. Alternatively try out the model on the [spaces page](https://huggingface.co/spaces/annabeth97c/SonicVerse).
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "gate_proj",
+    "o_proj",
+    "down_proj",
+    "k_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce19c1b7c28e67eb9ee6d49970f55c9b7487ba7e70c336d38ec89e3a51edb2bd
+size 335605144

config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.1",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "modalities": [
+    "audio_mert"
+  ],
+  "modality_builder": "audio_mert",
+  "model_cls": "MistralLMMForCausalLM",
+  "model_type": "mistral-lmm",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.1",
+  "use_cache": false,
+  "vocab_size": 32000
+}

non_lora_trainables.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86c8e3d08b7ae6d6d331aab445f68da813c66494a18ba52a6b81331d48bc905a
+size 2275096730