beshiribrahim commited on
Commit
6937eab
·
verified ·
1 Parent(s): a269755

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ sample.wav filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 198,
3
+ "<pad>": 200,
4
+ "<s>": 197,
5
+ "<unk>": 199
6
+ }
config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "adapter_act": "relu",
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": true,
7
+ "apply_spec_augment": false,
8
+ "architectures": [
9
+ "Wav2Vec2BertForCTC"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 768,
14
+ "codevector_dim": 768,
15
+ "conformer_conv_dropout": 0.1,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_depthwise_kernel_size": 31,
18
+ "ctc_loss_reduction": "mean",
19
+ "ctc_zero_infinity": false,
20
+ "diversity_loss_weight": 0.1,
21
+ "eos_token_id": 2,
22
+ "feat_proj_dropout": 0.0,
23
+ "feat_quantizer_dropout": 0.0,
24
+ "feature_projection_input_dim": 160,
25
+ "final_dropout": 0.1,
26
+ "hidden_act": "swish",
27
+ "hidden_dropout": 0.0,
28
+ "hidden_size": 1024,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 4096,
31
+ "layer_norm_eps": 1e-05,
32
+ "layerdrop": 0.0,
33
+ "left_max_position_embeddings": 64,
34
+ "mask_feature_length": 10,
35
+ "mask_feature_min_masks": 0,
36
+ "mask_feature_prob": 0.0,
37
+ "mask_time_length": 10,
38
+ "mask_time_min_masks": 2,
39
+ "mask_time_prob": 0.0,
40
+ "max_source_positions": 5000,
41
+ "model_type": "wav2vec2-bert",
42
+ "num_adapter_layers": 1,
43
+ "num_attention_heads": 16,
44
+ "num_codevector_groups": 2,
45
+ "num_codevectors_per_group": 320,
46
+ "num_hidden_layers": 24,
47
+ "num_negatives": 100,
48
+ "output_hidden_size": 1024,
49
+ "pad_token_id": 196,
50
+ "position_embeddings_type": "relative_key",
51
+ "proj_codevector_dim": 768,
52
+ "right_max_position_embeddings": 8,
53
+ "rotary_embedding_base": 10000,
54
+ "tdnn_dilation": [
55
+ 1,
56
+ 2,
57
+ 3,
58
+ 1,
59
+ 1
60
+ ],
61
+ "tdnn_dim": [
62
+ 512,
63
+ 512,
64
+ 512,
65
+ 512,
66
+ 1500
67
+ ],
68
+ "tdnn_kernel": [
69
+ 5,
70
+ 3,
71
+ 3,
72
+ 1,
73
+ 1
74
+ ],
75
+ "torch_dtype": "float32",
76
+ "transformers_version": "4.55.4",
77
+ "use_intermediate_ffn_before_adapter": false,
78
+ "use_weighted_layer_sum": false,
79
+ "vocab_size": 199,
80
+ "xvector_output_dim": 512
81
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "feature_extractor_type": "SeamlessM4TFeatureExtractor",
3
+ "feature_size": 80,
4
+ "num_mel_bins": 80,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2BertProcessor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000,
10
+ "stride": 2
11
+ }
sample.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db8e88d68185893ce0bc409cb3ee5b513f8f56b3be02d13d5a5157a34fd70ea1
3
+ size 668204
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
tig_lm.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80749043e27b3f75460846e4ce27f83247ab5d82310862b0ef5599ff32bfa5ef
3
+ size 21659616
tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "195": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "196": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "197": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "198": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "199": {
36
+ "content": "<unk>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "200": {
44
+ "content": "<pad>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": false,
54
+ "do_lower_case": false,
55
+ "eos_token": "</s>",
56
+ "extra_special_tokens": {},
57
+ "model_max_length": 1000000000000000019884624838656,
58
+ "pad_token": "<pad>",
59
+ "processor_class": "Wav2Vec2BertProcessor",
60
+ "replace_word_delimiter_char": " ",
61
+ "target_lang": null,
62
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
63
+ "unk_token": "<unk>",
64
+ "word_delimiter_token": "|"
65
+ }
transcribe.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"machine_shape":"hm","mount_file_id":"15JwXGAHSNDvOfhIDFaj8h2bt5FijajiD","authorship_tag":"ABX9TyOIpJiulD+u85Vw+4eh9A8m"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","source":[],"metadata":{"id":"Pj64tkijY4tT","executionInfo":{"status":"ok","timestamp":1756685653771,"user_tz":240,"elapsed":5,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}}},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["%%capture\n","# Core libraries\n","!pip install torch torchaudio transformers pydub numpy pyctcdecode\n","# If you need mp3 input support\n","!sudo apt-get update -qq\n","!sudo apt-get install -y ffmpeg\n","# For KenLM ARPA/bin support\n","!pip install https://github.com/kpu/kenlm/archive/master.zip"],"metadata":{"id":"d6IIQn8_hEAy","executionInfo":{"status":"ok","timestamp":1756686608615,"user_tz":240,"elapsed":56491,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}}},"execution_count":9,"outputs":[]},{"cell_type":"code","source":["MODEL_PATH = \"/content/drive/MyDrive/artifacts/models/hf/hf_tgt/tigre-asr-Wav2Vec2Bert\" # model and processor path\n","PROCESSOR_PATH = MODEL_PATH\n","AUDIO_FILE = MODEL_PATH+\"/sample.wav\"\n","OUTPUT_TXT = None # e.g., \"/path/to/out.txt\" or None to just print\n","# KenLM + lexicon (optional but recommended for beam search)\n","KENLM_ARPA = MODEL_PATH+\"/lm.arpa\" # set to None to decode WITHOUT LM\n","LEXICON_TXT = MODEL_PATH+\"/lexicon.txt\" # used to load unigrams; set to None if not available"],"metadata":{"id":"B81FMlsQlSOh","executionInfo":{"status":"ok","timestamp":1756686366477,"user_tz":240,"elapsed":13,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}}},"execution_count":7,"outputs":[]},{"cell_type":"code","source":["import warnings\n","import logging\n","\n","# Silence all Python warnings\n","warnings.filterwarnings(\"ignore\")\n","# Silence pyctcdecode logger\n","logging.getLogger(\"pyctcdecode\").setLevel(logging.ERROR)\n","# Silence torchaudio warnings (optionally all)\n","logging.getLogger(\"torchaudio\").setLevel(logging.ERROR)"],"metadata":{"id":"Y90co7BOmK9n","executionInfo":{"status":"ok","timestamp":1756685692591,"user_tz":240,"elapsed":6,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["# Audio / chunking\n","TARGET_SR = 16000\n","CHUNK_SEC = 5 # chunk length in seconds\n","OVERLAP_SEC = 0 # overlap between chunks in seconds (0 for minimal code)\n","# Beam search params\n","BEAM_WIDTH = 150\n","LM_ALPHA = 0.5\n","LM_BETA = 1.0"],"metadata":{"id":"7DOmsFxbnzwK","executionInfo":{"status":"ok","timestamp":1756685693508,"user_tz":240,"elapsed":5,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["import os\n","import torch\n","import numpy as np\n","import torchaudio\n","from typing import List, Optional\n","\n","# Use pydub for robust mp3 handling\n","from pydub import AudioSegment\n","\n","from transformers import Wav2Vec2BertForCTC, Wav2Vec2BertProcessor\n","\n","# Optional LM decoding\n","try:\n"," from pyctcdecode import build_ctcdecoder\n"," _HAS_PYCTC = True\n","except Exception:\n"," _HAS_PYCTC = False\n","\n","# Pick device\n","device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n","\n","def _load_audio(path: str, target_sr: int = 16000) -> torch.Tensor:\n"," \"\"\"Load WAV or MP3 to mono float32 tensor [1, T] at target_sr.\"\"\"\n"," ext = os.path.splitext(path)[1].lower()\n"," if ext == \".mp3\":\n"," audio = AudioSegment.from_file(path, format=\"mp3\")\n"," audio = audio.set_channels(1).set_frame_rate(target_sr)\n"," samples = np.array(audio.get_array_of_samples()).astype(np.float32)\n"," # pydub gives int PCM range; normalize if needed (assume 16-bit)\n"," if samples.dtype != np.float32:\n"," samples = samples.astype(np.float32)\n"," # If sample_width==2 (16-bit), divide by 32768\n"," if audio.sample_width == 2:\n"," samples /= 32768.0\n"," return torch.from_numpy(samples).unsqueeze(0)\n"," else:\n"," wav, sr = torchaudio.load(path)\n"," if wav.shape[0] > 1:\n"," wav = wav.mean(dim=0, keepdim=True) # stereo -> mono\n"," if sr != target_sr:\n"," wav = torchaudio.transforms.Resample(sr, target_sr)(wav)\n"," # ensure float32 in [-1,1]\n"," if wav.dtype != torch.float32:\n"," wav = wav.to(torch.float32)\n"," return wav\n","\n","def _chunks(wave: torch.Tensor, sr: int, chunk_sec: int, overlap_sec: int):\n"," \"\"\"Yield possibly-overlapping chunks [1, T_chunk].\"\"\"\n"," chunk = int(chunk_sec * sr)\n"," step = max(1, chunk - int(overlap_sec * sr))\n"," T = wave.size(-1)\n"," for start in range(0, T, step):\n"," end = min(start + chunk, T)\n"," yield wave[:, start:end]\n"," if end >= T:\n"," break\n","\n","def _load_unigrams(lexicon_path: Optional[str]) -> List[str]:\n"," \"\"\"Read first token per line from lexicon into a unigram list.\"\"\"\n"," if not lexicon_path or not os.path.exists(lexicon_path):\n"," return []\n"," words = set()\n"," with open(lexicon_path, \"r\", encoding=\"utf-8\") as f:\n"," for line in f:\n"," w = line.strip().split()\n"," if w:\n"," words.add(w[0])\n"," return sorted(words)\n","\n","def _build_decoder(model, processor):\n"," \"\"\"Build a pyctcdecode decoder from model vocab + KenLM (if configured).\"\"\"\n"," # Build vocab (id -> token)\n"," vocab_size = model.lm_head.out_features\n"," labels = []\n"," for i in range(vocab_size):\n"," tok = processor.tokenizer.convert_ids_to_tokens([i])[0]\n"," # remove common BPE markers\n"," tok = tok.lstrip(\"Ġ\").lstrip(\"▁\")\n"," labels.append(tok)\n","\n"," # No LM? Use labels only; with LM? also pass unigrams + alpha/beta\n"," if not _HAS_PYCTC:\n"," return None\n","\n"," if KENLM_ARPA and os.path.exists(KENLM_ARPA):\n"," unigrams = _load_unigrams(LEXICON_TXT)\n"," return build_ctcdecoder(\n"," labels=labels,\n"," kenlm_model_path=KENLM_ARPA,\n"," unigrams=unigrams if unigrams else None,\n"," alpha=LM_ALPHA,\n"," beta=LM_BETA\n"," )\n"," else:\n"," # Fallback to lexicon-less decoder (greedy-ish beam without LM)\n"," return build_ctcdecoder(labels=labels)\n","\n","def _postprocess(text: str) -> str:\n"," \"\"\"Light cleanup: strip special markers, collapse dup words, ensure end punctuation.\"\"\"\n"," text = text.replace(\"<|\", \"\").replace(\"|>\", \"\").replace(\"<>\", \"\").strip()\n"," words, cleaned = text.split(), []\n"," for w in words:\n"," if not cleaned or cleaned[-1] != w:\n"," cleaned.append(w)\n"," out = \" \".join(cleaned).strip()\n"," if out and out[-1] not in \".!?\":\n"," out += \".\"\n"," return out\n","\n","def transcribe_one_file() -> str:\n"," # Load model + processor\n"," model = Wav2Vec2BertForCTC.from_pretrained(MODEL_PATH).to(device).eval()\n"," processor = Wav2Vec2BertProcessor.from_pretrained(PROCESSOR_PATH)\n","\n"," # Optional decoder\n"," decoder = _build_decoder(model, processor)\n","\n"," # Load audio\n"," wav = _load_audio(AUDIO_FILE, TARGET_SR)\n","\n"," # Transcribe by chunks\n"," pieces = []\n"," for chunk in _chunks(wav, TARGET_SR, CHUNK_SEC, OVERLAP_SEC):\n"," # processor for Wav2Vec2Bert expects raw audio -> input_features\n"," inputs = processor(chunk.squeeze().numpy(), sampling_rate=TARGET_SR, return_tensors=\"pt\").to(device)\n"," with torch.no_grad():\n"," logits = model(input_features=inputs.input_features).logits # [1, T, V]\n"," logp = logits[0].cpu().numpy()\n","\n"," if decoder is not None:\n"," hypo = decoder.decode(logp, beam_width=BEAM_WIDTH)\n"," else:\n"," # Greedy fallback if pyctcdecode not available\n"," ids = logp.argmax(axis=-1)\n"," tokens = processor.tokenizer.convert_ids_to_tokens(ids.tolist())\n"," hypo = \"\".join(tokens)\n","\n"," if hypo.strip():\n"," pieces.append(hypo.strip())\n","\n"," # cleanup per chunk\n"," del inputs, logits, logp\n","\n"," text = _postprocess(\" \".join(pieces))\n"," return text\n","\n","if __name__ == \"__main__\":\n"," out = transcribe_one_file()\n"," if OUTPUT_TXT:\n"," os.makedirs(os.path.dirname(OUTPUT_TXT), exist_ok=True)\n"," with open(OUTPUT_TXT, \"w\", encoding=\"utf-8\") as f:\n"," f.write(out + \"\\n\")\n"," print(out)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"W1rQvavueaBI","executionInfo":{"status":"ok","timestamp":1756686391018,"user_tz":240,"elapsed":15969,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}},"outputId":"8c5a1a6a-dd57-4b82-f891-0f4e45945a93"},"execution_count":8,"outputs":[{"output_type":"stream","name":"stdout","text":["ሕርጊጎ ምነ ምን ዘበን አትራክ እንዴ አንበተት እብ መረባቤዐ ግሩም ለትሐሌ መዲነት ተ.\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"qs6x1lHOlthS"},"execution_count":null,"outputs":[]}]}
vocab.json ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<": 1,
3
+ ">": 2,
4
+ "[PAD]": 196,
5
+ "[UNK]": 195,
6
+ "|": 0,
7
+ "ሀ": 3,
8
+ "ሁ": 4,
9
+ "ሂ": 5,
10
+ "ሃ": 6,
11
+ "ሄ": 7,
12
+ "ህ": 8,
13
+ "ሆ": 9,
14
+ "ለ": 10,
15
+ "ሉ": 11,
16
+ "ሊ": 12,
17
+ "ላ": 13,
18
+ "ሌ": 14,
19
+ "ል": 15,
20
+ "ሎ": 16,
21
+ "ሐ": 17,
22
+ "ሑ": 18,
23
+ "ሒ": 19,
24
+ "ሓ": 20,
25
+ "ሔ": 21,
26
+ "ሕ": 22,
27
+ "ሖ": 23,
28
+ "መ": 24,
29
+ "ሙ": 25,
30
+ "ሚ": 26,
31
+ "ማ": 27,
32
+ "ሜ": 28,
33
+ "ም": 29,
34
+ "ሞ": 30,
35
+ "ሣ": 31,
36
+ "ሥ": 32,
37
+ "ረ": 33,
38
+ "ሩ": 34,
39
+ "ሪ": 35,
40
+ "ራ": 36,
41
+ "ሬ": 37,
42
+ "ር": 38,
43
+ "ሮ": 39,
44
+ "ሰ": 40,
45
+ "ሱ": 41,
46
+ "ሲ": 42,
47
+ "ሳ": 43,
48
+ "ሴ": 44,
49
+ "ስ": 45,
50
+ "ሶ": 46,
51
+ "ሸ": 47,
52
+ "ሹ": 48,
53
+ "ሺ": 49,
54
+ "ሻ": 50,
55
+ "ሼ": 51,
56
+ "ሽ": 52,
57
+ "ሾ": 53,
58
+ "ቀ": 54,
59
+ "ቁ": 55,
60
+ "ቂ": 56,
61
+ "ቃ": 57,
62
+ "ቄ": 58,
63
+ "ቅ": 59,
64
+ "ቆ": 60,
65
+ "ቈ": 61,
66
+ "ቍ": 62,
67
+ "ቐ": 63,
68
+ "ቑ": 64,
69
+ "ቒ": 65,
70
+ "ቓ": 66,
71
+ "ቕ": 67,
72
+ "ቖ": 68,
73
+ "ቛ": 69,
74
+ "በ": 70,
75
+ "ቡ": 71,
76
+ "ቢ": 72,
77
+ "ባ": 73,
78
+ "ቤ": 74,
79
+ "ብ": 75,
80
+ "ቦ": 76,
81
+ "ተ": 77,
82
+ "ቱ": 78,
83
+ "ቲ": 79,
84
+ "ታ": 80,
85
+ "ቴ": 81,
86
+ "ት": 82,
87
+ "ቶ": 83,
88
+ "ቹ": 84,
89
+ "ቺ": 85,
90
+ "ች": 86,
91
+ "ነ": 87,
92
+ "ኑ": 88,
93
+ "ኒ": 89,
94
+ "ና": 90,
95
+ "ኔ": 91,
96
+ "ን": 92,
97
+ "ኖ": 93,
98
+ "ኛ": 94,
99
+ "አ": 95,
100
+ "ኡ": 96,
101
+ "ኢ": 97,
102
+ "ኣ": 98,
103
+ "ኤ": 99,
104
+ "እ": 100,
105
+ "ኦ": 101,
106
+ "ከ": 102,
107
+ "ኩ": 103,
108
+ "ኪ": 104,
109
+ "ካ": 105,
110
+ "ኬ": 106,
111
+ "ክ": 107,
112
+ "ኮ": 108,
113
+ "ኰ": 109,
114
+ "ኳ": 110,
115
+ "ኸ": 111,
116
+ "ኺ": 112,
117
+ "ኻ": 113,
118
+ "ኽ": 114,
119
+ "ኾ": 115,
120
+ "ወ": 116,
121
+ "ዉ": 117,
122
+ "ዊ": 118,
123
+ "ዋ": 119,
124
+ "ዌ": 120,
125
+ "ው": 121,
126
+ "ዎ": 122,
127
+ "ዐ": 123,
128
+ "ዑ": 124,
129
+ "ዒ": 125,
130
+ "ዓ": 126,
131
+ "ዕ": 127,
132
+ "ዖ": 128,
133
+ "ዘ": 129,
134
+ "ዙ": 130,
135
+ "ዚ": 131,
136
+ "ዛ": 132,
137
+ "ዜ": 133,
138
+ "ዝ": 134,
139
+ "ዞ": 135,
140
+ "የ": 136,
141
+ "ዩ": 137,
142
+ "ዪ": 138,
143
+ "ያ": 139,
144
+ "ይ": 140,
145
+ "ዮ": 141,
146
+ "ደ": 142,
147
+ "ዱ": 143,
148
+ "ዲ": 144,
149
+ "ዳ": 145,
150
+ "ዴ": 146,
151
+ "ድ": 147,
152
+ "ዶ": 148,
153
+ "ጀ": 149,
154
+ "ጃ": 150,
155
+ "ጅ": 151,
156
+ "ገ": 152,
157
+ "ጉ": 153,
158
+ "ጊ": 154,
159
+ "ጋ": 155,
160
+ "ጌ": 156,
161
+ "ግ": 157,
162
+ "ጎ": 158,
163
+ "ጐ": 159,
164
+ "ጓ": 160,
165
+ "ጠ": 161,
166
+ "ጡ": 162,
167
+ "ጢ": 163,
168
+ "ጣ": 164,
169
+ "ጤ": 165,
170
+ "ጥ": 166,
171
+ "ጦ": 167,
172
+ "ጨ": 168,
173
+ "ጩ": 169,
174
+ "ጪ": 170,
175
+ "ጫ": 171,
176
+ "ጭ": 172,
177
+ "ጮ": 173,
178
+ "ጳ": 174,
179
+ "ጵ": 175,
180
+ "ጸ": 176,
181
+ "ጹ": 177,
182
+ "ጺ": 178,
183
+ "ጻ": 179,
184
+ "ጼ": 180,
185
+ "ጽ": 181,
186
+ "ጾ": 182,
187
+ "ፀ": 183,
188
+ "ፅ": 184,
189
+ "ፆ": 185,
190
+ "ፈ": 186,
191
+ "ፉ": 187,
192
+ "ፊ": 188,
193
+ "ፋ": 189,
194
+ "ፌ": 190,
195
+ "ፍ": 191,
196
+ "ፎ": 192,
197
+ "ፑ": 193,
198
+ "ፔ": 194
199
+ }