Commit
·
dce0916
1
Parent(s):
5121291
Update README.md
Browse files
README.md
CHANGED
@@ -69,7 +69,7 @@ model = Wav2Vec2ForCTC.from_pretrained("gorkemgoknar/wav2vec2-large-xlsr-53-turk
|
|
69 |
model.to("cuda")
|
70 |
|
71 |
#Note: Not ignoring "'" on this one
|
72 |
-
chars_to_ignore_regex = '[
|
73 |
|
74 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
75 |
# Preprocessing the datasets.
|
@@ -95,4 +95,3 @@ print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"],
|
|
95 |
**Test Result**: TBD %
|
96 |
## Training
|
97 |
The Common Voice `train` and `validation` datasets were used for training. Additional 5 Turkish movies with subtitles also used
|
98 |
-
The script used for training can be found [here](https://colab.research.google.com/drive/1hesw9z_kFFINT93jBvGuFspOLrHx10AE?usp=sharing)
|
|
|
69 |
model.to("cuda")
|
70 |
|
71 |
#Note: Not ignoring "'" on this one
|
72 |
+
chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\�]'
|
73 |
|
74 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
75 |
# Preprocessing the datasets.
|
|
|
95 |
**Test Result**: TBD %
|
96 |
## Training
|
97 |
The Common Voice `train` and `validation` datasets were used for training. Additional 5 Turkish movies with subtitles also used
|
|