waveletdeboshir commited on
Commit
e6090eb
·
verified ·
1 Parent(s): 112b81c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +15 -4
README.md CHANGED
@@ -14,7 +14,7 @@ tags:
14
  - audio
15
  - speech
16
  ---
17
-
18
 
19
  # GigaAM-v2-CTC with ngram LM and beamsearch 🤗 Hugging Face transformers
20
 
@@ -66,8 +66,13 @@ input_features = processor(wav[0], sampling_rate=16000, return_tensors="pt")
66
  with torch.no_grad():
67
  logits = model(**input_features).logits
68
 
69
- # decoding with beamseach and LM
70
- transcription = processor.batch_decode(logits=logits.numpy()).text[0]
 
 
 
 
 
71
 
72
  ```
73
 
@@ -78,7 +83,13 @@ In our case (Conformer) `MODEL_STRIDE = 40` ms per timestamp.
78
 
79
  ```python
80
  MODEL_STRIDE = 40
81
- outputs = processor.batch_decode(logits=logits.numpy(), output_word_offsets=True)
 
 
 
 
 
 
82
  word_ts = [
83
  {
84
  "word": d["word"],
 
14
  - audio
15
  - speech
16
  ---
17
+ [![Use In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/gist/waveletdeboshir/07e39ae96f27331aa3e1e053c2c2f9e8/gigaam-ctc-hf-with-lm.ipynb)
18
 
19
  # GigaAM-v2-CTC with ngram LM and beamsearch 🤗 Hugging Face transformers
20
 
 
66
  with torch.no_grad():
67
  logits = model(**input_features).logits
68
 
69
+ # decoding with beamseach and LM (tune alpha, beta, beam_width for your data)
70
+ transcription = processor.batch_decode(
71
+ logits=logits.numpy(),
72
+ beam_width=64,
73
+ alpha=0.5,
74
+ beta=0.5,
75
+ ).text[0]
76
 
77
  ```
78
 
 
83
 
84
  ```python
85
  MODEL_STRIDE = 40
86
+ outputs = processor.batch_decode(
87
+ logits=logits.numpy(),
88
+ beam_width=64,
89
+ alpha=0.5,
90
+ beta=0.5,
91
+ output_word_offsets=True
92
+ )
93
  word_ts = [
94
  {
95
  "word": d["word"],