jefson08 commited on
Commit
957b8e0
·
verified ·
1 Parent(s): b693582

Training in progress, step 1000

Browse files
README.md CHANGED
@@ -13,56 +13,11 @@ model-index:
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
- # speecht5_finetuned_kha for Khasi Text To Speech
17
 
18
  This model is a fine-tuned version of [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) on the audiofolder dataset.
19
-
20
-
21
- ### Inference with a pipeline
22
- ````python
23
- from transformers import pipeline
24
- pipe = pipeline("text-to-speech", model="jefson08/speecht5_finetuned_kha")
25
- ````
26
-
27
- #### Pick a piece of text in Khasi you’d like narrated, e.g.: "Kumno phi long?"
28
- ````python
29
- text = "Kumno phi long?"
30
- #Convert the given text to lowercase
31
- text = text.lower()
32
- print(text)
33
- ````
34
-
35
- ### To use SpeechT5 with the pipeline, you’ll need a speaker embedding.
36
- ### Let’s get it from a json file i.e already saved embedding
37
- ````python
38
- from huggingface_hub import hf_hub_download
39
- hf_hub_download(repo_id="jefson08/speecht5_finetuned_kha", filename="speakerEmbedding.json", local_dir=".")
40
-
41
- import json
42
- # Opening JSON file
43
- f = open('speakerEmbedding.json')
44
-
45
- # returns JSON object as
46
- # a dictionary
47
- example = json.load(f)
48
-
49
- import torch
50
- speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
51
- ````
52
-
53
- ### Now you can pass the text and speaker embeddings to the pipeline, and it will take care of the rest:
54
- ````python
55
- forward_params = {"speaker_embeddings": speaker_embeddings}
56
- output = pipe(text, forward_params=forward_params)
57
- output
58
- ````
59
-
60
-
61
- ### You can then listen to the result:
62
- ````python
63
- from IPython.display import Audio
64
- Audio(output['audio'], rate=output['sampling_rate'])
65
- ````
66
 
67
  ## Model description
68
 
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
+ # speecht5_finetuned_kha
17
 
18
  This model is a fine-tuned version of [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) on the audiofolder dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.4610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  ## Model description
23
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "microsoft/speecht5_tts",
3
  "activation_dropout": 0.1,
4
  "apply_spec_augment": true,
5
  "architectures": [
@@ -85,7 +85,7 @@
85
  "speech_decoder_prenet_layers": 2,
86
  "speech_decoder_prenet_units": 256,
87
  "torch_dtype": "float32",
88
- "transformers_version": "4.43.0",
89
  "use_cache": false,
90
  "use_guided_attention_loss": true,
91
  "vocab_size": 81
 
1
  {
2
+ "_name_or_path": "speecht5_finetuned_kha",
3
  "activation_dropout": 0.1,
4
  "apply_spec_augment": true,
5
  "architectures": [
 
85
  "speech_decoder_prenet_layers": 2,
86
  "speech_decoder_prenet_units": 256,
87
  "torch_dtype": "float32",
88
+ "transformers_version": "4.43.3",
89
  "use_cache": false,
90
  "use_guided_attention_loss": true,
91
  "vocab_size": 81
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2578877844f1d7556bea83e5ee10473dde7fcb92e4a57159eb1f92ee8c157a7a
3
  size 577789320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:921ef0adf38a7361eb229ea279e8f391ec36ba6325fa299abd1030aeefdd68df
3
  size 577789320
runs/Aug19_18-11-31_ip-10-192-11-38/events.out.tfevents.1724091092.ip-10-192-11-38.64729.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd6e4a870c9ba97ae62d1ee834cc375dee3838587946feddf39e4b735a9a2a0
3
+ size 15247
special_tokens_map.json CHANGED
@@ -1,6 +1,18 @@
1
  {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
4
  "mask_token": {
5
  "content": "<mask>",
6
  "lstrip": true,
@@ -8,6 +20,18 @@
8
  "rstrip": false,
9
  "single_word": false
10
  },
11
- "pad_token": "<pad>",
12
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
13
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
  "mask_token": {
17
  "content": "<mask>",
18
  "lstrip": true,
 
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
+ "pad_token": {
24
+ "content": "<pad>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "<unk>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12892c9f23bfc0d86fe0f9537f02a5cd8a1c4f60bcb10b8a1f78400e122dfff3
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dab5c22181df2afd76215461a02cebbe40c098fe0f932dd9a9d237f1dc467dd6
3
  size 5368