Training in progress, step 1000

Browse files

Files changed (6) hide show

README.md +3 -48
config.json +2 -2
model.safetensors +1 -1
runs/Aug19_18-11-31_ip-10-192-11-38/events.out.tfevents.1724091092.ip-10-192-11-38.64729.0 +3 -0
special_tokens_map.json +28 -4
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -13,56 +13,11 @@ model-index:
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# speecht5_finetuned_kha for Khasi Text To Speech
 This model is a fine-tuned version of [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) on the audiofolder dataset.
-### Inference with a pipeline
-````python
-from transformers import pipeline
-pipe = pipeline("text-to-speech", model="jefson08/speecht5_finetuned_kha")
-````
-#### Pick a piece of text in Khasi you’d like narrated, e.g.: "Kumno phi long?"
-````python
-text = "Kumno phi long?"
-#Convert the given text to lowercase
-text = text.lower()
-print(text)
-````
-### To use SpeechT5 with the pipeline, you’ll need a speaker embedding.
-### Let’s get it from a json file i.e already saved embedding
-````python
-from huggingface_hub import hf_hub_download
-hf_hub_download(repo_id="jefson08/speecht5_finetuned_kha", filename="speakerEmbedding.json", local_dir=".")
-import json
-# Opening JSON file
-f = open('speakerEmbedding.json')
-# returns JSON object as
-# a dictionary
-example = json.load(f)
-import torch
-speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
-````
-### Now you can pass the text and speaker embeddings to the pipeline, and it will take care of the rest:
-````python
-forward_params = {"speaker_embeddings": speaker_embeddings}
-output = pipe(text, forward_params=forward_params)
-output
-````
-### You can then listen to the result:
-````python
-from IPython.display import Audio
-Audio(output['audio'], rate=output['sampling_rate'])
-````
 ## Model description

 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# speecht5_finetuned_kha
 This model is a fine-tuned version of [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) on the audiofolder dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.4610
 ## Model description

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "microsoft/speecht5_tts",
   "activation_dropout": 0.1,
   "apply_spec_augment": true,
   "architectures": [
@@ -85,7 +85,7 @@
   "speech_decoder_prenet_layers": 2,
   "speech_decoder_prenet_units": 256,
   "torch_dtype": "float32",
-  "transformers_version": "4.43.0",
   "use_cache": false,
   "use_guided_attention_loss": true,
   "vocab_size": 81

 {
+  "_name_or_path": "speecht5_finetuned_kha",
   "activation_dropout": 0.1,
   "apply_spec_augment": true,
   "architectures": [
   "speech_decoder_prenet_layers": 2,
   "speech_decoder_prenet_units": 256,
   "torch_dtype": "float32",
+  "transformers_version": "4.43.3",
   "use_cache": false,
   "use_guided_attention_loss": true,
   "vocab_size": 81

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2578877844f1d7556bea83e5ee10473dde7fcb92e4a57159eb1f92ee8c157a7a
 size 577789320

 version https://git-lfs.github.com/spec/v1
+oid sha256:921ef0adf38a7361eb229ea279e8f391ec36ba6325fa299abd1030aeefdd68df
 size 577789320

runs/Aug19_18-11-31_ip-10-192-11-38/events.out.tfevents.1724091092.ip-10-192-11-38.64729.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddd6e4a870c9ba97ae62d1ee834cc375dee3838587946feddf39e4b735a9a2a0
+size 15247

special_tokens_map.json CHANGED Viewed

@@ -1,6 +1,18 @@
 {
-  "bos_token": "<s>",
-  "eos_token": "</s>",
   "mask_token": {
     "content": "<mask>",
     "lstrip": true,
@@ -8,6 +20,18 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
 }

 {
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
   "mask_token": {
     "content": "<mask>",
     "lstrip": true,
     "rstrip": false,
     "single_word": false
   },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12892c9f23bfc0d86fe0f9537f02a5cd8a1c4f60bcb10b8a1f78400e122dfff3
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:dab5c22181df2afd76215461a02cebbe40c098fe0f932dd9a9d237f1dc467dd6
 size 5368