Upload 9 files
Browse files- .gitattributes +1 -0
- added_tokens.json +6 -0
- config.json +81 -0
- preprocessor_config.json +11 -0
- sample.wav +3 -0
- special_tokens_map.json +6 -0
- tig_lm.bin +3 -0
- tokenizer_config.json +65 -0
- transcribe.ipynb +1 -0
- vocab.json +199 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
sample.wav filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"</s>": 198,
|
3 |
+
"<pad>": 200,
|
4 |
+
"<s>": 197,
|
5 |
+
"<unk>": 199
|
6 |
+
}
|
config.json
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_dropout": 0.0,
|
3 |
+
"adapter_act": "relu",
|
4 |
+
"adapter_kernel_size": 3,
|
5 |
+
"adapter_stride": 2,
|
6 |
+
"add_adapter": true,
|
7 |
+
"apply_spec_augment": false,
|
8 |
+
"architectures": [
|
9 |
+
"Wav2Vec2BertForCTC"
|
10 |
+
],
|
11 |
+
"attention_dropout": 0.0,
|
12 |
+
"bos_token_id": 1,
|
13 |
+
"classifier_proj_size": 768,
|
14 |
+
"codevector_dim": 768,
|
15 |
+
"conformer_conv_dropout": 0.1,
|
16 |
+
"contrastive_logits_temperature": 0.1,
|
17 |
+
"conv_depthwise_kernel_size": 31,
|
18 |
+
"ctc_loss_reduction": "mean",
|
19 |
+
"ctc_zero_infinity": false,
|
20 |
+
"diversity_loss_weight": 0.1,
|
21 |
+
"eos_token_id": 2,
|
22 |
+
"feat_proj_dropout": 0.0,
|
23 |
+
"feat_quantizer_dropout": 0.0,
|
24 |
+
"feature_projection_input_dim": 160,
|
25 |
+
"final_dropout": 0.1,
|
26 |
+
"hidden_act": "swish",
|
27 |
+
"hidden_dropout": 0.0,
|
28 |
+
"hidden_size": 1024,
|
29 |
+
"initializer_range": 0.02,
|
30 |
+
"intermediate_size": 4096,
|
31 |
+
"layer_norm_eps": 1e-05,
|
32 |
+
"layerdrop": 0.0,
|
33 |
+
"left_max_position_embeddings": 64,
|
34 |
+
"mask_feature_length": 10,
|
35 |
+
"mask_feature_min_masks": 0,
|
36 |
+
"mask_feature_prob": 0.0,
|
37 |
+
"mask_time_length": 10,
|
38 |
+
"mask_time_min_masks": 2,
|
39 |
+
"mask_time_prob": 0.0,
|
40 |
+
"max_source_positions": 5000,
|
41 |
+
"model_type": "wav2vec2-bert",
|
42 |
+
"num_adapter_layers": 1,
|
43 |
+
"num_attention_heads": 16,
|
44 |
+
"num_codevector_groups": 2,
|
45 |
+
"num_codevectors_per_group": 320,
|
46 |
+
"num_hidden_layers": 24,
|
47 |
+
"num_negatives": 100,
|
48 |
+
"output_hidden_size": 1024,
|
49 |
+
"pad_token_id": 196,
|
50 |
+
"position_embeddings_type": "relative_key",
|
51 |
+
"proj_codevector_dim": 768,
|
52 |
+
"right_max_position_embeddings": 8,
|
53 |
+
"rotary_embedding_base": 10000,
|
54 |
+
"tdnn_dilation": [
|
55 |
+
1,
|
56 |
+
2,
|
57 |
+
3,
|
58 |
+
1,
|
59 |
+
1
|
60 |
+
],
|
61 |
+
"tdnn_dim": [
|
62 |
+
512,
|
63 |
+
512,
|
64 |
+
512,
|
65 |
+
512,
|
66 |
+
1500
|
67 |
+
],
|
68 |
+
"tdnn_kernel": [
|
69 |
+
5,
|
70 |
+
3,
|
71 |
+
3,
|
72 |
+
1,
|
73 |
+
1
|
74 |
+
],
|
75 |
+
"torch_dtype": "float32",
|
76 |
+
"transformers_version": "4.55.4",
|
77 |
+
"use_intermediate_ffn_before_adapter": false,
|
78 |
+
"use_weighted_layer_sum": false,
|
79 |
+
"vocab_size": 199,
|
80 |
+
"xvector_output_dim": 512
|
81 |
+
}
|
preprocessor_config.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"feature_extractor_type": "SeamlessM4TFeatureExtractor",
|
3 |
+
"feature_size": 80,
|
4 |
+
"num_mel_bins": 80,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0.0,
|
7 |
+
"processor_class": "Wav2Vec2BertProcessor",
|
8 |
+
"return_attention_mask": true,
|
9 |
+
"sampling_rate": 16000,
|
10 |
+
"stride": 2
|
11 |
+
}
|
sample.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db8e88d68185893ce0bc409cb3ee5b513f8f56b3be02d13d5a5157a34fd70ea1
|
3 |
+
size 668204
|
special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"eos_token": "</s>",
|
4 |
+
"pad_token": "<pad>",
|
5 |
+
"unk_token": "<unk>"
|
6 |
+
}
|
tig_lm.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80749043e27b3f75460846e4ce27f83247ab5d82310862b0ef5599ff32bfa5ef
|
3 |
+
size 21659616
|
tokenizer_config.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"195": {
|
4 |
+
"content": "[UNK]",
|
5 |
+
"lstrip": true,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": true,
|
8 |
+
"single_word": false,
|
9 |
+
"special": false
|
10 |
+
},
|
11 |
+
"196": {
|
12 |
+
"content": "[PAD]",
|
13 |
+
"lstrip": true,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": true,
|
16 |
+
"single_word": false,
|
17 |
+
"special": false
|
18 |
+
},
|
19 |
+
"197": {
|
20 |
+
"content": "<s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"198": {
|
28 |
+
"content": "</s>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"199": {
|
36 |
+
"content": "<unk>",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
},
|
43 |
+
"200": {
|
44 |
+
"content": "<pad>",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": true
|
50 |
+
}
|
51 |
+
},
|
52 |
+
"bos_token": "<s>",
|
53 |
+
"clean_up_tokenization_spaces": false,
|
54 |
+
"do_lower_case": false,
|
55 |
+
"eos_token": "</s>",
|
56 |
+
"extra_special_tokens": {},
|
57 |
+
"model_max_length": 1000000000000000019884624838656,
|
58 |
+
"pad_token": "<pad>",
|
59 |
+
"processor_class": "Wav2Vec2BertProcessor",
|
60 |
+
"replace_word_delimiter_char": " ",
|
61 |
+
"target_lang": null,
|
62 |
+
"tokenizer_class": "Wav2Vec2CTCTokenizer",
|
63 |
+
"unk_token": "<unk>",
|
64 |
+
"word_delimiter_token": "|"
|
65 |
+
}
|
transcribe.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"machine_shape":"hm","mount_file_id":"15JwXGAHSNDvOfhIDFaj8h2bt5FijajiD","authorship_tag":"ABX9TyOIpJiulD+u85Vw+4eh9A8m"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","source":[],"metadata":{"id":"Pj64tkijY4tT","executionInfo":{"status":"ok","timestamp":1756685653771,"user_tz":240,"elapsed":5,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}}},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["%%capture\n","# Core libraries\n","!pip install torch torchaudio transformers pydub numpy pyctcdecode\n","# If you need mp3 input support\n","!sudo apt-get update -qq\n","!sudo apt-get install -y ffmpeg\n","# For KenLM ARPA/bin support\n","!pip install https://github.com/kpu/kenlm/archive/master.zip"],"metadata":{"id":"d6IIQn8_hEAy","executionInfo":{"status":"ok","timestamp":1756686608615,"user_tz":240,"elapsed":56491,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}}},"execution_count":9,"outputs":[]},{"cell_type":"code","source":["MODEL_PATH = \"/content/drive/MyDrive/artifacts/models/hf/hf_tgt/tigre-asr-Wav2Vec2Bert\" # model and processor path\n","PROCESSOR_PATH = MODEL_PATH\n","AUDIO_FILE = MODEL_PATH+\"/sample.wav\"\n","OUTPUT_TXT = None # e.g., \"/path/to/out.txt\" or None to just print\n","# KenLM + lexicon (optional but recommended for beam search)\n","KENLM_ARPA = MODEL_PATH+\"/lm.arpa\" # set to None to decode WITHOUT LM\n","LEXICON_TXT = MODEL_PATH+\"/lexicon.txt\" # used to load unigrams; set to None if not available"],"metadata":{"id":"B81FMlsQlSOh","executionInfo":{"status":"ok","timestamp":1756686366477,"user_tz":240,"elapsed":13,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}}},"execution_count":7,"outputs":[]},{"cell_type":"code","source":["import warnings\n","import logging\n","\n","# Silence all Python warnings\n","warnings.filterwarnings(\"ignore\")\n","# Silence pyctcdecode logger\n","logging.getLogger(\"pyctcdecode\").setLevel(logging.ERROR)\n","# Silence torchaudio warnings (optionally all)\n","logging.getLogger(\"torchaudio\").setLevel(logging.ERROR)"],"metadata":{"id":"Y90co7BOmK9n","executionInfo":{"status":"ok","timestamp":1756685692591,"user_tz":240,"elapsed":6,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["# Audio / chunking\n","TARGET_SR = 16000\n","CHUNK_SEC = 5 # chunk length in seconds\n","OVERLAP_SEC = 0 # overlap between chunks in seconds (0 for minimal code)\n","# Beam search params\n","BEAM_WIDTH = 150\n","LM_ALPHA = 0.5\n","LM_BETA = 1.0"],"metadata":{"id":"7DOmsFxbnzwK","executionInfo":{"status":"ok","timestamp":1756685693508,"user_tz":240,"elapsed":5,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["import os\n","import torch\n","import numpy as np\n","import torchaudio\n","from typing import List, Optional\n","\n","# Use pydub for robust mp3 handling\n","from pydub import AudioSegment\n","\n","from transformers import Wav2Vec2BertForCTC, Wav2Vec2BertProcessor\n","\n","# Optional LM decoding\n","try:\n"," from pyctcdecode import build_ctcdecoder\n"," _HAS_PYCTC = True\n","except Exception:\n"," _HAS_PYCTC = False\n","\n","# Pick device\n","device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n","\n","def _load_audio(path: str, target_sr: int = 16000) -> torch.Tensor:\n"," \"\"\"Load WAV or MP3 to mono float32 tensor [1, T] at target_sr.\"\"\"\n"," ext = os.path.splitext(path)[1].lower()\n"," if ext == \".mp3\":\n"," audio = AudioSegment.from_file(path, format=\"mp3\")\n"," audio = audio.set_channels(1).set_frame_rate(target_sr)\n"," samples = np.array(audio.get_array_of_samples()).astype(np.float32)\n"," # pydub gives int PCM range; normalize if needed (assume 16-bit)\n"," if samples.dtype != np.float32:\n"," samples = samples.astype(np.float32)\n"," # If sample_width==2 (16-bit), divide by 32768\n"," if audio.sample_width == 2:\n"," samples /= 32768.0\n"," return torch.from_numpy(samples).unsqueeze(0)\n"," else:\n"," wav, sr = torchaudio.load(path)\n"," if wav.shape[0] > 1:\n"," wav = wav.mean(dim=0, keepdim=True) # stereo -> mono\n"," if sr != target_sr:\n"," wav = torchaudio.transforms.Resample(sr, target_sr)(wav)\n"," # ensure float32 in [-1,1]\n"," if wav.dtype != torch.float32:\n"," wav = wav.to(torch.float32)\n"," return wav\n","\n","def _chunks(wave: torch.Tensor, sr: int, chunk_sec: int, overlap_sec: int):\n"," \"\"\"Yield possibly-overlapping chunks [1, T_chunk].\"\"\"\n"," chunk = int(chunk_sec * sr)\n"," step = max(1, chunk - int(overlap_sec * sr))\n"," T = wave.size(-1)\n"," for start in range(0, T, step):\n"," end = min(start + chunk, T)\n"," yield wave[:, start:end]\n"," if end >= T:\n"," break\n","\n","def _load_unigrams(lexicon_path: Optional[str]) -> List[str]:\n"," \"\"\"Read first token per line from lexicon into a unigram list.\"\"\"\n"," if not lexicon_path or not os.path.exists(lexicon_path):\n"," return []\n"," words = set()\n"," with open(lexicon_path, \"r\", encoding=\"utf-8\") as f:\n"," for line in f:\n"," w = line.strip().split()\n"," if w:\n"," words.add(w[0])\n"," return sorted(words)\n","\n","def _build_decoder(model, processor):\n"," \"\"\"Build a pyctcdecode decoder from model vocab + KenLM (if configured).\"\"\"\n"," # Build vocab (id -> token)\n"," vocab_size = model.lm_head.out_features\n"," labels = []\n"," for i in range(vocab_size):\n"," tok = processor.tokenizer.convert_ids_to_tokens([i])[0]\n"," # remove common BPE markers\n"," tok = tok.lstrip(\"Ġ\").lstrip(\"▁\")\n"," labels.append(tok)\n","\n"," # No LM? Use labels only; with LM? also pass unigrams + alpha/beta\n"," if not _HAS_PYCTC:\n"," return None\n","\n"," if KENLM_ARPA and os.path.exists(KENLM_ARPA):\n"," unigrams = _load_unigrams(LEXICON_TXT)\n"," return build_ctcdecoder(\n"," labels=labels,\n"," kenlm_model_path=KENLM_ARPA,\n"," unigrams=unigrams if unigrams else None,\n"," alpha=LM_ALPHA,\n"," beta=LM_BETA\n"," )\n"," else:\n"," # Fallback to lexicon-less decoder (greedy-ish beam without LM)\n"," return build_ctcdecoder(labels=labels)\n","\n","def _postprocess(text: str) -> str:\n"," \"\"\"Light cleanup: strip special markers, collapse dup words, ensure end punctuation.\"\"\"\n"," text = text.replace(\"<|\", \"\").replace(\"|>\", \"\").replace(\"<>\", \"\").strip()\n"," words, cleaned = text.split(), []\n"," for w in words:\n"," if not cleaned or cleaned[-1] != w:\n"," cleaned.append(w)\n"," out = \" \".join(cleaned).strip()\n"," if out and out[-1] not in \".!?\":\n"," out += \".\"\n"," return out\n","\n","def transcribe_one_file() -> str:\n"," # Load model + processor\n"," model = Wav2Vec2BertForCTC.from_pretrained(MODEL_PATH).to(device).eval()\n"," processor = Wav2Vec2BertProcessor.from_pretrained(PROCESSOR_PATH)\n","\n"," # Optional decoder\n"," decoder = _build_decoder(model, processor)\n","\n"," # Load audio\n"," wav = _load_audio(AUDIO_FILE, TARGET_SR)\n","\n"," # Transcribe by chunks\n"," pieces = []\n"," for chunk in _chunks(wav, TARGET_SR, CHUNK_SEC, OVERLAP_SEC):\n"," # processor for Wav2Vec2Bert expects raw audio -> input_features\n"," inputs = processor(chunk.squeeze().numpy(), sampling_rate=TARGET_SR, return_tensors=\"pt\").to(device)\n"," with torch.no_grad():\n"," logits = model(input_features=inputs.input_features).logits # [1, T, V]\n"," logp = logits[0].cpu().numpy()\n","\n"," if decoder is not None:\n"," hypo = decoder.decode(logp, beam_width=BEAM_WIDTH)\n"," else:\n"," # Greedy fallback if pyctcdecode not available\n"," ids = logp.argmax(axis=-1)\n"," tokens = processor.tokenizer.convert_ids_to_tokens(ids.tolist())\n"," hypo = \"\".join(tokens)\n","\n"," if hypo.strip():\n"," pieces.append(hypo.strip())\n","\n"," # cleanup per chunk\n"," del inputs, logits, logp\n","\n"," text = _postprocess(\" \".join(pieces))\n"," return text\n","\n","if __name__ == \"__main__\":\n"," out = transcribe_one_file()\n"," if OUTPUT_TXT:\n"," os.makedirs(os.path.dirname(OUTPUT_TXT), exist_ok=True)\n"," with open(OUTPUT_TXT, \"w\", encoding=\"utf-8\") as f:\n"," f.write(out + \"\\n\")\n"," print(out)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"W1rQvavueaBI","executionInfo":{"status":"ok","timestamp":1756686391018,"user_tz":240,"elapsed":15969,"user":{"displayName":"Beshir Ibrahim","userId":"16736839346810179639"}},"outputId":"8c5a1a6a-dd57-4b82-f891-0f4e45945a93"},"execution_count":8,"outputs":[{"output_type":"stream","name":"stdout","text":["ሕርጊጎ ምነ ምን ዘበን አትራክ እንዴ አንበተት እብ መረባቤዐ ግሩም ለትሐሌ መዲነት ተ.\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"qs6x1lHOlthS"},"execution_count":null,"outputs":[]}]}
|
vocab.json
ADDED
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<": 1,
|
3 |
+
">": 2,
|
4 |
+
"[PAD]": 196,
|
5 |
+
"[UNK]": 195,
|
6 |
+
"|": 0,
|
7 |
+
"ሀ": 3,
|
8 |
+
"ሁ": 4,
|
9 |
+
"ሂ": 5,
|
10 |
+
"ሃ": 6,
|
11 |
+
"ሄ": 7,
|
12 |
+
"ህ": 8,
|
13 |
+
"ሆ": 9,
|
14 |
+
"ለ": 10,
|
15 |
+
"ሉ": 11,
|
16 |
+
"ሊ": 12,
|
17 |
+
"ላ": 13,
|
18 |
+
"ሌ": 14,
|
19 |
+
"ል": 15,
|
20 |
+
"ሎ": 16,
|
21 |
+
"ሐ": 17,
|
22 |
+
"ሑ": 18,
|
23 |
+
"ሒ": 19,
|
24 |
+
"ሓ": 20,
|
25 |
+
"ሔ": 21,
|
26 |
+
"ሕ": 22,
|
27 |
+
"ሖ": 23,
|
28 |
+
"መ": 24,
|
29 |
+
"ሙ": 25,
|
30 |
+
"ሚ": 26,
|
31 |
+
"ማ": 27,
|
32 |
+
"ሜ": 28,
|
33 |
+
"ም": 29,
|
34 |
+
"ሞ": 30,
|
35 |
+
"ሣ": 31,
|
36 |
+
"ሥ": 32,
|
37 |
+
"ረ": 33,
|
38 |
+
"ሩ": 34,
|
39 |
+
"ሪ": 35,
|
40 |
+
"ራ": 36,
|
41 |
+
"ሬ": 37,
|
42 |
+
"ር": 38,
|
43 |
+
"ሮ": 39,
|
44 |
+
"ሰ": 40,
|
45 |
+
"ሱ": 41,
|
46 |
+
"ሲ": 42,
|
47 |
+
"ሳ": 43,
|
48 |
+
"ሴ": 44,
|
49 |
+
"ስ": 45,
|
50 |
+
"ሶ": 46,
|
51 |
+
"ሸ": 47,
|
52 |
+
"ሹ": 48,
|
53 |
+
"ሺ": 49,
|
54 |
+
"ሻ": 50,
|
55 |
+
"ሼ": 51,
|
56 |
+
"ሽ": 52,
|
57 |
+
"ሾ": 53,
|
58 |
+
"ቀ": 54,
|
59 |
+
"ቁ": 55,
|
60 |
+
"ቂ": 56,
|
61 |
+
"ቃ": 57,
|
62 |
+
"ቄ": 58,
|
63 |
+
"ቅ": 59,
|
64 |
+
"ቆ": 60,
|
65 |
+
"ቈ": 61,
|
66 |
+
"ቍ": 62,
|
67 |
+
"ቐ": 63,
|
68 |
+
"ቑ": 64,
|
69 |
+
"ቒ": 65,
|
70 |
+
"ቓ": 66,
|
71 |
+
"ቕ": 67,
|
72 |
+
"ቖ": 68,
|
73 |
+
"ቛ": 69,
|
74 |
+
"በ": 70,
|
75 |
+
"ቡ": 71,
|
76 |
+
"ቢ": 72,
|
77 |
+
"ባ": 73,
|
78 |
+
"ቤ": 74,
|
79 |
+
"ብ": 75,
|
80 |
+
"ቦ": 76,
|
81 |
+
"ተ": 77,
|
82 |
+
"ቱ": 78,
|
83 |
+
"ቲ": 79,
|
84 |
+
"ታ": 80,
|
85 |
+
"ቴ": 81,
|
86 |
+
"ት": 82,
|
87 |
+
"ቶ": 83,
|
88 |
+
"ቹ": 84,
|
89 |
+
"ቺ": 85,
|
90 |
+
"ች": 86,
|
91 |
+
"ነ": 87,
|
92 |
+
"ኑ": 88,
|
93 |
+
"ኒ": 89,
|
94 |
+
"ና": 90,
|
95 |
+
"ኔ": 91,
|
96 |
+
"ን": 92,
|
97 |
+
"ኖ": 93,
|
98 |
+
"ኛ": 94,
|
99 |
+
"አ": 95,
|
100 |
+
"ኡ": 96,
|
101 |
+
"ኢ": 97,
|
102 |
+
"ኣ": 98,
|
103 |
+
"ኤ": 99,
|
104 |
+
"እ": 100,
|
105 |
+
"ኦ": 101,
|
106 |
+
"ከ": 102,
|
107 |
+
"ኩ": 103,
|
108 |
+
"ኪ": 104,
|
109 |
+
"ካ": 105,
|
110 |
+
"ኬ": 106,
|
111 |
+
"ክ": 107,
|
112 |
+
"ኮ": 108,
|
113 |
+
"ኰ": 109,
|
114 |
+
"ኳ": 110,
|
115 |
+
"ኸ": 111,
|
116 |
+
"ኺ": 112,
|
117 |
+
"ኻ": 113,
|
118 |
+
"ኽ": 114,
|
119 |
+
"ኾ": 115,
|
120 |
+
"ወ": 116,
|
121 |
+
"ዉ": 117,
|
122 |
+
"ዊ": 118,
|
123 |
+
"ዋ": 119,
|
124 |
+
"ዌ": 120,
|
125 |
+
"ው": 121,
|
126 |
+
"ዎ": 122,
|
127 |
+
"ዐ": 123,
|
128 |
+
"ዑ": 124,
|
129 |
+
"ዒ": 125,
|
130 |
+
"ዓ": 126,
|
131 |
+
"ዕ": 127,
|
132 |
+
"ዖ": 128,
|
133 |
+
"ዘ": 129,
|
134 |
+
"ዙ": 130,
|
135 |
+
"ዚ": 131,
|
136 |
+
"ዛ": 132,
|
137 |
+
"ዜ": 133,
|
138 |
+
"ዝ": 134,
|
139 |
+
"ዞ": 135,
|
140 |
+
"የ": 136,
|
141 |
+
"ዩ": 137,
|
142 |
+
"ዪ": 138,
|
143 |
+
"ያ": 139,
|
144 |
+
"ይ": 140,
|
145 |
+
"ዮ": 141,
|
146 |
+
"ደ": 142,
|
147 |
+
"ዱ": 143,
|
148 |
+
"ዲ": 144,
|
149 |
+
"ዳ": 145,
|
150 |
+
"ዴ": 146,
|
151 |
+
"ድ": 147,
|
152 |
+
"ዶ": 148,
|
153 |
+
"ጀ": 149,
|
154 |
+
"ጃ": 150,
|
155 |
+
"ጅ": 151,
|
156 |
+
"ገ": 152,
|
157 |
+
"ጉ": 153,
|
158 |
+
"ጊ": 154,
|
159 |
+
"ጋ": 155,
|
160 |
+
"ጌ": 156,
|
161 |
+
"ግ": 157,
|
162 |
+
"ጎ": 158,
|
163 |
+
"ጐ": 159,
|
164 |
+
"ጓ": 160,
|
165 |
+
"ጠ": 161,
|
166 |
+
"ጡ": 162,
|
167 |
+
"ጢ": 163,
|
168 |
+
"ጣ": 164,
|
169 |
+
"ጤ": 165,
|
170 |
+
"ጥ": 166,
|
171 |
+
"ጦ": 167,
|
172 |
+
"ጨ": 168,
|
173 |
+
"ጩ": 169,
|
174 |
+
"ጪ": 170,
|
175 |
+
"ጫ": 171,
|
176 |
+
"ጭ": 172,
|
177 |
+
"ጮ": 173,
|
178 |
+
"ጳ": 174,
|
179 |
+
"ጵ": 175,
|
180 |
+
"ጸ": 176,
|
181 |
+
"ጹ": 177,
|
182 |
+
"ጺ": 178,
|
183 |
+
"ጻ": 179,
|
184 |
+
"ጼ": 180,
|
185 |
+
"ጽ": 181,
|
186 |
+
"ጾ": 182,
|
187 |
+
"ፀ": 183,
|
188 |
+
"ፅ": 184,
|
189 |
+
"ፆ": 185,
|
190 |
+
"ፈ": 186,
|
191 |
+
"ፉ": 187,
|
192 |
+
"ፊ": 188,
|
193 |
+
"ፋ": 189,
|
194 |
+
"ፌ": 190,
|
195 |
+
"ፍ": 191,
|
196 |
+
"ፎ": 192,
|
197 |
+
"ፑ": 193,
|
198 |
+
"ፔ": 194
|
199 |
+
}
|