BeardedMonster commited on
Commit
8f0685d
·
verified ·
1 Parent(s): e1d904f
Files changed (1) hide show
  1. README.md +45 -0
README.md CHANGED
@@ -76,6 +76,33 @@ generation_config = GenerationConfig(
76
 
77
  repo_name = "BeardedMonster/SabiYarn-125M"
78
  model = AutoModelForCausalLM.from_pretrained(repo_name, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  # Test on English
81
  input_ids = tokenizer("How are you?", return_tensors="pt")["input_ids"]
@@ -106,6 +133,24 @@ print(tokenizer.decode(output[0][input_len:]))
106
  Nkọwapụta
107
  Ebe nrụọrụ weebụ na-ahụ maka gburugburu ebe
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  # Test on Hausa
110
  input_ids = tokenizer("Ministan ya ƙara da cewa dole ne Mista Netanyahu ya sanya ranar da", return_tensors="pt")["input_ids"]
111
  output = model.generate(input_ids, generation_config=generation_config, max_new_tokens=50)
 
76
 
77
  repo_name = "BeardedMonster/SabiYarn-125M"
78
  model = AutoModelForCausalLM.from_pretrained(repo_name, trust_remote_code=True)
79
+ tokenizer= AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
80
+
81
+ #Test on Urhobo
82
+ input_ids = tokenizer("Eshare nana ri vwo ẹguọnọ rẹ iyono rẹ Aristotle vẹ Plato na,", return_tensors="pt")["input_ids"]
83
+ output = model.generate(input_ids, generation_config=generation_config, max_new_tokens=50)
84
+ input_len = len(input_ids[0])
85
+ print(tokenizer.decode(output[0][input_len:]))
86
+
87
+ #Output
88
+ ọ da tobọ dianẹ ayen rhọnvwe kerhọ-ọ. Ọtiọyena, e de ruiruo aghwoghwo ọkieje. (1 Kọr. 7:9; 1 Kọr. 12:2) Vwọrẹ uyota
89
+
90
+ #Test on Efik
91
+ input_ids = tokenizer("Ke eyo Jesus ye mme mbet esie, etop emi ama ada ifụre ọsọk", return_tensors="pt")["input_ids"]
92
+ output = model.generate(input_ids, generation_config=generation_config, max_new_tokens=50)
93
+ input_len = len(input_ids[0])
94
+ print(tokenizer.decode(output[0][input_len:]))
95
+
96
+ #Output
97
+ . Edi ediwak nditọ Israel ẹtịn̄ ẹnọ nnyịn mîkemeke ndinam n̄kpọ Abasi.|end_of_text|Ebe foto si, Getty Images Ebe foto si, Getty Images Nkọwa foto, Ndị
98
+
99
+ input_ids = tokenizer("Ke eyo Jesus ye mme mbet esie, etop emi ama ada ifụre ọsọk mme Jew oro esịt okobụn̄ọde ke ntak idiọkido ke Israel, oro ẹkenyụn̄ ẹdude ke mfụhọ ke itie-ufụn mme nsunsu ido edinam Ido Ukpono Mme Jew eke akpa isua ikie.", return_tensors="pt")["input_ids"]
100
+ output = model.generate(input_ids, generation_config=generation_config, max_new_tokens=50)
101
+ input_len = len(input_ids[0])
102
+ print(tokenizer.decode(output[0][input_len:]))
103
+
104
+ #Output
105
+ Kûsịn idem nnyịme ndifiọk nditọete nnyịn inemesịt onyụn̄ anam nnyịn ikpọn̄utom nnyịn. (Matt. 26:31; Luke 22:42
106
 
107
  # Test on English
108
  input_ids = tokenizer("How are you?", return_tensors="pt")["input_ids"]
 
133
  Nkọwapụta
134
  Ebe nrụọrụ weebụ na-ahụ maka gburugburu ebe
135
 
136
+
137
+ # Test on FulFulde/Fulah
138
+ input_ids = tokenizer("Jos un peeta gallure nɗer ɗi woyla caaka ɓanngeere lardu Naajeeriya. Gelle ɗen haa e ɗuuɗiri ɗun kamano", return_tensors="pt")["input_ids"]
139
+ output = model.generate(input_ids, generation_config=generation_config, max_new_tokens=50)
140
+ input_len = len(input_ids[0])
141
+ print(tokenizer.decode(output[0][input_len:]))
142
+
143
+ #Output
144
+ jogiiji maɓɓe nder lesdi Naajeeriya. |end_o|end_of_text|** Muhammadu_Buhari ** Muhammadu Buhari ko leydi e hukuma pamarun e hukuma pamarun e hukuma pamarun e hukuma pamarun e hukum
145
+
146
+ input_ids = tokenizer("Si hooreejo leydi on (himo wi’ee kadi persidan) accitii laamu, ko woote waɗetee, ɓurɗo jogaade yimɓe on halfinee laamu yeru happu.", return_tensors="pt")["input_ids"]
147
+ output = model.generate(input_ids, generation_config=generation_config, max_new_tokens=50)
148
+ input_len = len(input_ids[0])
149
+ print(tokenizer.decode(output[0][input_len:]))
150
+
151
+ #Output
152
+ |end_of_text|So en nganndii e hitaande 2010, o wiyi : “ko ñalawma hannde golle pulaar walla mbiyen jogiiɗo”. Eɗen mbaawi wiyde «u2008
153
+
154
  # Test on Hausa
155
  input_ids = tokenizer("Ministan ya ƙara da cewa dole ne Mista Netanyahu ya sanya ranar da", return_tensors="pt")["input_ids"]
156
  output = model.generate(input_ids, generation_config=generation_config, max_new_tokens=50)