DenisT commited on
Commit
e13ec26
·
1 Parent(s): 7cf86f8

add: multiple language support

Browse files
README.md CHANGED
@@ -25,7 +25,7 @@ Check out the configuration reference at <https://huggingface.co/docs/hub/spaces
25
 
26
  ## Introduction
27
 
28
- I love reading manga, and I can't wait for the next chapter of my favorite manga to be released. However, the newest chapters are usually in Japanese, and they are translated to English after some time. I want to read the newest chapters as soon as possible, so I decided to build a manga translator that can translate Japanese manga to English.
29
 
30
  ## GitHub Project
31
 
@@ -37,7 +37,6 @@ I want to translate the text in the manga images from Japanese to English. I wil
37
 
38
  ![Manga Translator](./assets/MangaTranslator.png)
39
 
40
-
41
  ### Data Collection
42
 
43
  This [dataset](https://universe.roboflow.com/speechbubbledetection-y9yz3/bubble-detection-gbjon/dataset/2#) contains over 8500 images of manga pages together with their annotations from Roboflow. I will use this dataset to train `Yolov8` to detect the speech bubbles in the manga images. To use this dataset with Yolov8, I will need to convert the annotations to the YOLO format, which is a text file containing the class label and the bounding box coordinates of the object in the image.
 
25
 
26
  ## Introduction
27
 
28
+ I love reading manga, and I can't wait for the next chapter of my favorite manga to be released. However, the newest chapters are usually in Japanese, and they are translated to English after some time. I want to read the newest chapters as soon as possible, so I decided to build a manga translator that can translate Japanese manga to English. (**NEW** I've added support for multiple languages, which include Arabic, Greek, Cyrillic and other non Latin languages.)
29
 
30
  ## GitHub Project
31
 
 
37
 
38
  ![Manga Translator](./assets/MangaTranslator.png)
39
 
 
40
  ### Data Collection
41
 
42
  This [dataset](https://universe.roboflow.com/speechbubbledetection-y9yz3/bubble-detection-gbjon/dataset/2#) contains over 8500 images of manga pages together with their annotations from Roboflow. I will use this dataset to train `Yolov8` to detect the speech bubbles in the manga images. To use this dataset with Yolov8, I will need to convert the annotations to the YOLO format, which is a text file containing the class label and the bounding box coordinates of the object in the image.
app.py CHANGED
@@ -2,45 +2,58 @@ import numpy as np
2
  from PIL import Image
3
  import gradio as gr
4
 
 
5
  from main import predict
6
 
7
- def process_image(image):
8
- if image is not None:
9
- if not isinstance(image, np.ndarray):
10
- image = np.array(Image.open(image))
11
- print(image)
12
 
13
- translated_image = predict(image)
14
- return translated_image
15
- return None
 
 
 
 
 
 
16
 
17
 
18
  with gr.Blocks() as demo:
19
- gr.Markdown(
20
- """
21
- <div style="display: flex; align-items: center; flex-direction: row; justify-content: center; margin-bottom: 20px; text-align: center;">
22
- <a href="https://github.com/Detopall/manga-translator" target="_blank" rel="noopener noreferrer" style="text-decoration: none;">
23
- <h1 style="display: inline; margin-left: 10px; text-decoration: underline;">Manga Translator</h1>
24
- </a>
25
- </div>
26
- """
27
- )
28
-
29
- with gr.Row():
30
- with gr.Column(scale=1):
31
- image_input = gr.Image()
32
- submit_button = gr.Button("Translate")
33
- with gr.Column(scale=1):
34
- image_output = gr.Image()
35
-
36
- submit_button.click(process_image, inputs=image_input, outputs=image_output)
37
-
38
- examples = gr.Examples(examples=[
39
- ["./examples/ex1.jpg"],
40
- ["./examples/ex2.jpg"],
41
- ["./examples/ex3.jpg"],
42
- ["./examples/ex4.jpg"],
43
- ], inputs=image_input)
 
 
 
 
 
 
 
 
 
 
44
 
45
  if __name__ == "__main__":
46
- demo.launch()
 
2
  from PIL import Image
3
  import gradio as gr
4
 
5
+ from utils.langs import languages
6
  from main import predict
7
 
8
+ language_choices = [(name.title(), code) for name, code in languages.items()]
 
 
 
 
9
 
10
+
11
+ def process_image(image, target_lang):
12
+ if image is not None:
13
+ if not isinstance(image, np.ndarray):
14
+ image = np.array(Image.open(image))
15
+
16
+ translated_image = predict(image, target_lang=target_lang)
17
+ return translated_image
18
+ return None
19
 
20
 
21
  with gr.Blocks() as demo:
22
+ gr.Markdown(
23
+ """
24
+ <div style="display: flex; align-items: center; flex-direction: row; justify-content: center; margin-bottom: 20px; text-align: center;">
25
+ <a href="https://github.com/Detopall/manga-translator" target="_blank" rel="noopener noreferrer" style="text-decoration: none;">
26
+ <h1 style="display: inline; margin-left: 10px; text-decoration: underline;">Manga Translator</h1>
27
+ </a>
28
+ </div>
29
+ """
30
+ )
31
+
32
+ with gr.Row():
33
+ with gr.Column(scale=1):
34
+ image_input = gr.Image()
35
+ language_dropdown = gr.Dropdown(
36
+ choices=language_choices,
37
+ label="Target Language",
38
+ value="en-GB",
39
+ )
40
+ submit_button = gr.Button("Translate")
41
+ with gr.Column(scale=1):
42
+ image_output = gr.Image()
43
+
44
+ submit_button.click(
45
+ process_image, inputs=[image_input, language_dropdown], outputs=image_output
46
+ )
47
+
48
+ examples = gr.Examples(
49
+ examples=[
50
+ ["./examples/ex1.jpg"],
51
+ ["./examples/ex2.jpg"],
52
+ ["./examples/ex3.jpg"],
53
+ ["./examples/ex4.jpg"],
54
+ ],
55
+ inputs=image_input,
56
+ )
57
 
58
  if __name__ == "__main__":
59
+ demo.launch()
fonts/NotoNaskhArabic-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46060349550000637973beb3f7cf8e4ae5f6d76e8319f0f9da50c0c1f987d260
3
+ size 327956
fonts/NotoSans-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe8c022f48d8dd29f17b744d16f9346f4357e16f7d4f7be58b000ae7c291b614
3
+ size 629024
fonts/NotoSansCyrillic-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac2c786babb3fd1603d174e6108cc222cede9b0968540a16a7b34ac454467d5e
3
+ size 555264
fonts/NotoSansGreek-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac2c786babb3fd1603d174e6108cc222cede9b0968540a16a7b34ac454467d5e
3
+ size 555264
main.py CHANGED
@@ -1,5 +1,6 @@
1
  import io
2
  import base64
 
3
 
4
  import numpy as np
5
  from PIL import Image
@@ -14,40 +15,42 @@ from utils.write_text_on_image import add_text
14
  MODEL_PATH = "./model_creation/runs/detect/train5/weights/best.pt"
15
  object_detection_model = YOLO(MODEL_PATH)
16
 
17
- def extract_text_from_regions(image: np.ndarray, results: list):
18
 
19
- for result in results:
20
- x1, y1, x2, y2, _, _ = result
21
- detected_image = image[int(y1):int(y2), int(x1):int(x2)]
22
- if detected_image.shape[-1] == 4:
23
- detected_image = detected_image[:, :, :3]
24
- im = Image.fromarray(np.uint8(detected_image * 255))
25
- text = get_text_from_image(im)
26
 
27
- processed_image, cont = process_contour(detected_image)
28
- translated_text = translate_manga(text, source_lang="auto", target_lang="en")
29
- add_text(processed_image, translated_text, cont)
 
 
 
 
30
 
 
 
 
 
 
 
31
 
32
- def convert_image_to_base64(image: Image.Image) -> str:
33
- buff = io.BytesIO()
34
- image.save(buff, format="PNG")
35
- return base64.b64encode(buff.getvalue()).decode("utf-8")
36
 
37
 
38
- def predict(image: np.ndarray):
39
 
40
- image = Image.fromarray(image)
41
- image.save("image.png")
42
 
43
- try:
44
- np_image = np.array(image)
45
 
46
- results = predict_bounding_boxes(object_detection_model, "image.png")
47
- extract_text_from_regions(np_image, results)
48
 
49
- return np_image
50
 
51
- except Exception as e:
52
- print(f"Error: {str(e)}")
53
- return None
 
1
  import io
2
  import base64
3
+ from typing import Dict, Any
4
 
5
  import numpy as np
6
  from PIL import Image
 
15
  MODEL_PATH = "./model_creation/runs/detect/train5/weights/best.pt"
16
  object_detection_model = YOLO(MODEL_PATH)
17
 
 
18
 
19
+ def extract_text_from_regions(
20
+ image: np.ndarray, target_lang: str, results: list
21
+ ) -> Dict[str, Any]:
 
 
 
 
22
 
23
+ for result in results:
24
+ x1, y1, x2, y2, _, _ = result
25
+ detected_image = image[int(y1) : int(y2), int(x1) : int(x2)]
26
+ if detected_image.shape[-1] == 4:
27
+ detected_image = detected_image[:, :, :3]
28
+ im = Image.fromarray(np.uint8(detected_image * 255))
29
+ text = get_text_from_image(im)
30
 
31
+ processed_image, cont = process_contour(detected_image)
32
+ translated_text = translate_manga(
33
+ text, target_lang=target_lang, source_lang="ja-JP"
34
+ )
35
+ if translated_text is None:
36
+ translated_text = "Translation failed"
37
 
38
+ add_text(processed_image, translated_text, cont)
 
 
 
39
 
40
 
41
+ def predict(image: np.ndarray, target_lang: str):
42
 
43
+ image = Image.fromarray(image)
44
+ image.save("image.png")
45
 
46
+ try:
47
+ np_image = np.array(image)
48
 
49
+ results = predict_bounding_boxes(object_detection_model, "image.png")
50
+ extract_text_from_regions(np_image, target_lang, results)
51
 
52
+ return np_image
53
 
54
+ except Exception as e:
55
+ print(f"Error: {str(e)}")
56
+ return None
requirements.txt CHANGED
@@ -3,3 +3,5 @@ ultralytics==8.3.78
3
  manga-ocr==0.1.14
4
  deep-translator==1.11.4
5
  torch==2.6.0
 
 
 
3
  manga-ocr==0.1.14
4
  deep-translator==1.11.4
5
  torch==2.6.0
6
+ python-bidi==0.6.6
7
+ arabic-reshaper==3.0.0
utils/__init__.py ADDED
File without changes
utils/langs.py ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ languages = {
2
+ "acehnese": "ace-ID",
3
+ "afrikaans": "af-ZA",
4
+ "akan": "ak-GH",
5
+ "albanian": "sq-AL",
6
+ "amharic": "am-ET",
7
+ "antigua and barbuda creole english": "aig-AG",
8
+ "arabic": "ar-SA",
9
+ "arabic egyptian": "ar-EG",
10
+ "aragonese": "an-ES",
11
+ "armenian": "hy-AM",
12
+ "assamese": "as-IN",
13
+ "asturian": "ast-ES",
14
+ "austrian german": "de-AT",
15
+ "awadhi": "awa-IN",
16
+ "ayacucho quechua": "quy-PE",
17
+ "azerbaijani": "az-AZ",
18
+ "bahamas creole english": "bah-BS",
19
+ "bajan": "bjs-BB",
20
+ "balinese": "ban-ID",
21
+ "balkan gipsy": "rm-RO",
22
+ "bambara": "bm-ML",
23
+ "banjar": "bjn-ID",
24
+ "bashkir": "ba-RU",
25
+ "basque": "eu-ES",
26
+ "belarusian": "be-BY",
27
+ "belgian french": "fr-BE",
28
+ "bemba": "bem-ZM",
29
+ "bengali": "bn-IN",
30
+ "bhojpuri": "bho-IN",
31
+ "bihari": "bh-IN",
32
+ "bislama": "bi-VU",
33
+ "borana": "gax-KE",
34
+ "bosnian": "bs-BA",
35
+ "bosnian (cyrillic)": "bs-Cyrl-BA",
36
+ "breton": "br-FR",
37
+ "buginese": "bug-ID",
38
+ "bulgarian": "bg-BG",
39
+ "burmese": "my-MM",
40
+ "catalan": "ca-ES",
41
+ "catalan valencian": "cav-ES",
42
+ "cebuano": "ceb-PH",
43
+ "central atlas tamazight": "tzm-MA",
44
+ "central aymara": "ayr-BO",
45
+ "central kanuri (latin script)": "knc-NG",
46
+ "chadian arabic": "shu-TD",
47
+ "chamorro": "ch-GU",
48
+ "cherokee": "chr-US",
49
+ "chhattisgarhi": "hne-IN",
50
+ "chinese simplified": "zh-CN",
51
+ "chinese trad. (hong kong)": "zh-HK",
52
+ "chinese traditional": "zh-TW",
53
+ "chinese traditional macau": "zh-MO",
54
+ "chittagonian": "ctg-BD",
55
+ "chokwe": "cjk-AO",
56
+ "classical greek": "grc-GR",
57
+ "comorian ngazidja": "zdj-KM",
58
+ "coptic": "cop-EG",
59
+ "crimean tatar": "crh-RU",
60
+ "crioulo upper guinea": "pov-GW",
61
+ "croatian": "hr-HR",
62
+ "czech": "cs-CZ",
63
+ "danish": "da-DK",
64
+ "dari": "prs-AF",
65
+ "dimli": "diq-TR",
66
+ "dutch": "nl-NL",
67
+ "dyula": "dyu-CI",
68
+ "dzongkha": "dz-BT",
69
+ "eastern yiddish": "ydd-US",
70
+ "emakhuwa": "vmw-MZ",
71
+ "english": "en-GB",
72
+ "english australia": "en-AU",
73
+ "english canada": "en-CA",
74
+ "english india": "en-IN",
75
+ "english ireland": "en-IE",
76
+ "english new zealand": "en-NZ",
77
+ "english singapore": "en-SG",
78
+ "english south africa": "en-ZA",
79
+ "english us": "en-US",
80
+ "esperanto": "eo-EU",
81
+ "estonian": "et-EE",
82
+ "ewe": "ee-GH",
83
+ "fanagalo": "fn-FNG",
84
+ "faroese": "fo-FO",
85
+ "fijian": "fj-FJ",
86
+ "filipino": "fil-PH",
87
+ "finnish": "fi-FI",
88
+ "flemish": "nl-BE",
89
+ "fon": "fon-BJ",
90
+ "french": "fr-FR",
91
+ "french canada": "fr-CA",
92
+ "french swiss": "fr-CH",
93
+ "friulian": "fur-IT",
94
+ "fula": "ff-FUL",
95
+ "galician": "gl-ES",
96
+ "gamargu": "mfi-NG",
97
+ "garo": "grt-IN",
98
+ "georgian": "ka-GE",
99
+ "german": "de-DE",
100
+ "gilbertese": "gil-KI",
101
+ "glavda": "glw-NG",
102
+ "greek": "el-GR",
103
+ "grenadian creole english": "gcl-GD",
104
+ "guarani": "gn-PY",
105
+ "gujarati": "gu-IN",
106
+ "guyanese creole english": "gyn-GY",
107
+ "haitian creole french": "ht-HT",
108
+ "halh mongolian": "khk-MN",
109
+ "hausa": "ha-NE",
110
+ "hawaiian": "haw-US",
111
+ "hebrew": "he-IL",
112
+ "higi": "hig-NG",
113
+ "hiligaynon": "hil-PH",
114
+ "hill mari": "mrj-RU",
115
+ "hindi": "hi-IN",
116
+ "hmong": "hmn-CN",
117
+ "hungarian": "hu-HU",
118
+ "icelandic": "is-IS",
119
+ "igbo ibo": "ibo-NG",
120
+ "igbo ig": "ig-NG",
121
+ "ilocano": "ilo-PH",
122
+ "indonesian": "id-ID",
123
+ "inuktitut greenlandic": "kl-GL",
124
+ "irish gaelic": "ga-IE",
125
+ "italian": "it-IT",
126
+ "italian swiss": "it-CH",
127
+ "jamaican creole english": "jam-JM",
128
+ "japanese": "ja-JP",
129
+ "javanese": "jv-ID",
130
+ "jingpho": "kac-MM",
131
+ "k'iche": "quc-GT",
132
+ "kabiyè": "kbp-TG",
133
+ "kabuverdianu": "kea-CV",
134
+ "kabylian": "kab-DZ",
135
+ "kalenjin": "kln-KE",
136
+ "kamba": "kam-KE",
137
+ "kannada": "kn-IN",
138
+ "kanuri": "kr-KAU",
139
+ "karen": "kar-MM",
140
+ "kashmiri (devanagari script)": "ks-IN",
141
+ "kashmiri (arabic script)": "kas-IN",
142
+ "kazakh": "kk-KZ",
143
+ "khasi": "kha-IN",
144
+ "khmer": "km-KH",
145
+ "kikuyu kik": "kik-KE",
146
+ "kikuyu ki": "ki-KE",
147
+ "kimbundu": "kmb-AO",
148
+ "kinyarwanda": "rw-RW",
149
+ "kirundi": "rn-BI",
150
+ "kisii": "guz-KE",
151
+ "kongo": "kg-CG",
152
+ "konkani": "kok-IN",
153
+ "korean": "ko-KR",
154
+ "northern kurdish": "kmr-TR",
155
+ "kurdish sorani": "ckb-IQ",
156
+ "kyrgyz": "ky-KG",
157
+ "lao": "lo-LA",
158
+ "latgalian": "ltg-LV",
159
+ "latin": "la-XN",
160
+ "latvian": "lv-LV",
161
+ "ligurian": "lij-IT",
162
+ "limburgish": "li-NL",
163
+ "lingala": "ln-LIN",
164
+ "lithuanian": "lt-LT",
165
+ "lombard": "lmo-IT",
166
+ "luba-kasai": "lua-CD",
167
+ "luganda": "lg-UG",
168
+ "luhya": "luy-KE",
169
+ "luo": "luo-KE",
170
+ "luxembourgish": "lb-LU",
171
+ "maa": "mas-KE",
172
+ "macedonian": "mk-MK",
173
+ "magahi": "mag-IN",
174
+ "maithili": "mai-IN",
175
+ "malagasy": "mg-MG",
176
+ "malay": "ms-MY",
177
+ "malayalam": "ml-IN",
178
+ "maldivian": "dv-MV",
179
+ "maltese": "mt-MT",
180
+ "mandara": "mfi-CM",
181
+ "manipuri": "mni-IN",
182
+ "manx gaelic": "gv-IM",
183
+ "maori": "mi-NZ",
184
+ "marathi": "mr-IN",
185
+ "margi": "mrt-NG",
186
+ "mari": "mhr-RU",
187
+ "marshallese": "mh-MH",
188
+ "mende": "men-SL",
189
+ "meru": "mer-KE",
190
+ "mijikenda": "nyf-KE",
191
+ "minangkabau": "min-ID",
192
+ "mizo": "lus-IN",
193
+ "mongolian": "mn-MN",
194
+ "montenegrin": "sr-ME",
195
+ "morisyen": "mfe-MU",
196
+ "moroccan arabic": "ar-MA",
197
+ "mossi": "mos-BF",
198
+ "ndau": "ndc-MZ",
199
+ "ndebele": "nr-ZA",
200
+ "nepali": "ne-NP",
201
+ "nigerian fulfulde": "fuv-NG",
202
+ "niuean": "niu-NU",
203
+ "north azerbaijani": "azj-AZ",
204
+ "sesotho": "nso-ZA",
205
+ "northern uzbek": "uzn-UZ",
206
+ "norwegian bokmål": "nb-NO",
207
+ "norwegian nynorsk": "nn-NO",
208
+ "nuer": "nus-SS",
209
+ "nyanja": "ny-MW",
210
+ "occitan": "oc-FR",
211
+ "occitan aran": "oc-ES",
212
+ "odia": "or-IN",
213
+ "oriya": "ory-IN",
214
+ "urdu": "ur-PK",
215
+ "palauan": "pau-PW",
216
+ "pali": "pi-IN",
217
+ "pangasinan": "pag-PH",
218
+ "papiamentu": "pap-CW",
219
+ "pashto": "ps-PK",
220
+ "persian": "fa-IR",
221
+ "pijin": "pis-SB",
222
+ "plateau malagasy": "plt-MG",
223
+ "polish": "pl-PL",
224
+ "portuguese": "pt-PT",
225
+ "portuguese brazil": "pt-BR",
226
+ "potawatomi": "pot-US",
227
+ "punjabi": "pa-IN",
228
+ "punjabi (pakistan)": "pnb-PK",
229
+ "quechua": "qu-PE",
230
+ "rohingya": "rhg-MM",
231
+ "rohingyalish": "rhl-MM",
232
+ "romanian": "ro-RO",
233
+ "romansh": "roh-CH",
234
+ "rundi": "run-BI",
235
+ "russian": "ru-RU",
236
+ "saint lucian creole french": "acf-LC",
237
+ "samoan": "sm-WS",
238
+ "sango": "sg-CF",
239
+ "sanskrit": "sa-IN",
240
+ "santali": "sat-IN",
241
+ "sardinian": "sc-IT",
242
+ "scots gaelic": "gd-GB",
243
+ "sena": "seh-ZW",
244
+ "serbian cyrillic": "sr-Cyrl-RS",
245
+ "serbian latin": "sr-Latn-RS",
246
+ "seselwa creole french": "crs-SC",
247
+ "setswana (south africa)": "tn-ZA",
248
+ "shan": "shn-MM",
249
+ "shona": "sn-ZW",
250
+ "sicilian": "scn-IT",
251
+ "silesian": "szl-PL",
252
+ "sindhi snd": "snd-PK",
253
+ "sindhi sd": "sd-PK",
254
+ "sinhala": "si-LK",
255
+ "slovak": "sk-SK",
256
+ "slovenian": "sl-SI",
257
+ "somali": "so-SO",
258
+ "sotho southern": "st-LS",
259
+ "south azerbaijani": "azb-AZ",
260
+ "southern pashto": "pbt-PK",
261
+ "southwestern dinka": "dik-SS",
262
+ "spanish": "es-ES",
263
+ "spanish argentina": "es-AR",
264
+ "spanish colombia": "es-CO",
265
+ "spanish latin america": "es-419",
266
+ "spanish mexico": "es-MX",
267
+ "spanish united states": "es-US",
268
+ "sranan tongo": "srn-SR",
269
+ "standard latvian": "lvs-LV",
270
+ "standard malay": "zsm-MY",
271
+ "sundanese": "su-ID",
272
+ "swahili": "sw-KE",
273
+ "swati": "ss-SZ",
274
+ "swedish": "sv-SE",
275
+ "swiss german": "de-CH",
276
+ "syriac (aramaic)": "syc-TR",
277
+ "tagalog": "tl-PH",
278
+ "tahitian": "ty-PF",
279
+ "tajik": "tg-TJ",
280
+ "tamashek (tuareg)": "tmh-DZ",
281
+ "tamasheq": "taq-ML",
282
+ "tamil india": "ta-IN",
283
+ "tamil sri lanka": "ta-LK",
284
+ "taroko": "trv-TW",
285
+ "tatar": "tt-RU",
286
+ "telugu": "te-IN",
287
+ "tetum": "tet-TL",
288
+ "thai": "th-TH",
289
+ "tibetan": "bo-CN",
290
+ "tigrinya": "ti-ET",
291
+ "tok pisin": "tpi-PG",
292
+ "tokelauan": "tkl-TK",
293
+ "tongan": "to-TO",
294
+ "tosk albanian": "als-AL",
295
+ "tsonga": "ts-ZA",
296
+ "tswa": "tsc-MZ",
297
+ "tswana": "tn-BW",
298
+ "tumbuka": "tum-MW",
299
+ "turkish": "tr-TR",
300
+ "turkmen": "tk-TM",
301
+ "tuvaluan": "tvl-TV",
302
+ "twi": "tw-GH",
303
+ "udmurt": "udm-RU",
304
+ "ukrainian": "uk-UA",
305
+ "uma": "ppk-ID",
306
+ "umbundu": "umb-AO",
307
+ "uyghur uig": "uig-CN",
308
+ "uyghur ug": "ug-CN",
309
+ "uzbek": "uz-UZ",
310
+ "venetian": "vec-IT",
311
+ "vietnamese": "vi-VN",
312
+ "vincentian creole english": "svc-VC",
313
+ "virgin islands creole english": "vic-US",
314
+ "wallisian": "wls-WF",
315
+ "waray (philippines)": "war-PH",
316
+ "welsh": "cy-GB",
317
+ "west central oromo": "gaz-ET",
318
+ "western persian": "pes-IR",
319
+ "wolof": "wo-SN",
320
+ "xhosa": "xh-ZA",
321
+ "yiddish": "yi-YD",
322
+ "yoruba": "yo-NG",
323
+ "zulu": "zu-ZA",
324
+ }
325
+
utils/translate_manga.py CHANGED
@@ -2,10 +2,10 @@
2
  This module is used to translate manga from one language to another.
3
  """
4
 
5
- from deep_translator import GoogleTranslator
6
 
7
 
8
- def translate_manga(text: str, source_lang: str = "ja", target_lang: str = "en") -> str:
9
  """
10
  Translate manga from one language to another.
11
  """
@@ -13,9 +13,13 @@ def translate_manga(text: str, source_lang: str = "ja", target_lang: str = "en")
13
  if source_lang == target_lang:
14
  return text
15
 
16
- translated_text = GoogleTranslator(
17
- source=source_lang, target=target_lang).translate(text)
 
 
 
 
18
  print("Original text:", text)
19
  print("Translated text:", translated_text)
20
 
21
- return translated_text
 
2
  This module is used to translate manga from one language to another.
3
  """
4
 
5
+ from deep_translator import MyMemoryTranslator
6
 
7
 
8
+ def translate_manga(text: str, target_lang: str, source_lang: str = "ja-JP") -> str:
9
  """
10
  Translate manga from one language to another.
11
  """
 
13
  if source_lang == target_lang:
14
  return text
15
 
16
+ if text == "...":
17
+ return text
18
+
19
+ translated_text = MyMemoryTranslator(
20
+ source=source_lang, target=target_lang
21
+ ).translate(text)
22
  print("Original text:", text)
23
  print("Translated text:", translated_text)
24
 
25
+ return translated_text if translated_text != "..." else text
utils/write_text_on_image.py CHANGED
@@ -1,59 +1,95 @@
1
  """
2
  This module contains a function to add text to an image with a bounding box.
3
  """
 
 
4
  import textwrap
5
  from PIL import Image, ImageDraw, ImageFont
6
  import numpy as np
7
  import cv2
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- def add_text(image: np.ndarray, text: str, contour: np.ndarray):
11
- """
12
- Add text to an image with a bounding box.
13
- """
14
-
15
- font_path = "./fonts/fonts_animeace_i.ttf"
16
- pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
17
- draw = ImageDraw.Draw(pil_image)
18
-
19
- x, y, w, h = cv2.boundingRect(contour)
20
-
21
- line_height = 16
22
- font_size = 14
23
- wrapping_ratio = 0.075
24
-
25
- wrapped_text = textwrap.fill(text, width=int(w * wrapping_ratio),
26
- break_long_words=True)
27
-
28
- font = ImageFont.truetype(font_path, size=font_size)
29
-
30
- lines = wrapped_text.split('\n')
31
- total_text_height = (len(lines)) * line_height
32
-
33
- while total_text_height > h:
34
- line_height -= 2
35
- font_size -= 2
36
- wrapping_ratio += 0.025
37
 
38
- wrapped_text = textwrap.fill(text, width=int(w * wrapping_ratio),
39
- break_long_words=True)
40
-
41
- font = ImageFont.truetype(font_path, size=font_size)
42
-
43
- lines = wrapped_text.split('\n')
44
- total_text_height = (len(lines)) * line_height
45
-
46
- # Vertical centering
47
- text_y = y + (h - total_text_height) // 2
48
-
49
- for line in lines:
50
- text_length = draw.textlength(line, font=font)
51
-
52
- # Horizontal centering
53
- text_x = x + (w - text_length) // 2
54
-
55
- draw.text((text_x, text_y), line, font=font, fill=(0, 0, 0))
56
-
57
- text_y += line_height
58
-
59
- image[:, :, :] = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  This module contains a function to add text to an image with a bounding box.
3
  """
4
+
5
+ import unicodedata
6
  import textwrap
7
  from PIL import Image, ImageDraw, ImageFont
8
  import numpy as np
9
  import cv2
10
 
11
+ import arabic_reshaper
12
+ from bidi.algorithm import get_display
13
+
14
+
15
+ def detect_script(text):
16
+ """
17
+ Detect the script of the text
18
+ """
19
+ scripts = set()
20
+ for char in text:
21
+ if char.isalpha():
22
+ name = unicodedata.name(char, "")
23
+ if "LATIN" in name:
24
+ scripts.add("Latin")
25
+ elif "ARABIC" in name:
26
+ scripts.add("Arabic")
27
+ elif "CYRILLIC" in name:
28
+ scripts.add("Cyrillic")
29
+ elif "GREEK" in name:
30
+ scripts.add("Greek")
31
+ elif "HEBREW" in name:
32
+ scripts.add("Hebrew")
33
+ elif "DEVANAGARI" in name:
34
+ scripts.add("Devanagari")
35
+ if not scripts:
36
+ return "Latin"
37
+ return list(scripts)[0]
38
+
39
+
40
+ def get_font_path(script):
41
+ if script == "Latin":
42
+ return "./fonts/NotoSans-Regular.ttf"
43
+ elif script == "Arabic":
44
+ return "./fonts/NotoNaskhArabic-Regular.ttf"
45
+ elif script == "Cyrillic":
46
+ return "./fonts/NotoSansCyrillic-Regular.ttf"
47
+ elif script == "Greek":
48
+ return "./fonts/NotoSansGreek-Regular.ttf"
49
+ else:
50
+ return "./fonts/NotoSans-Regular.ttf"
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ def add_text(image: np.ndarray, text: str, contour: np.ndarray):
54
+ script = detect_script(text)
55
+ font_path = get_font_path(script)
56
+ if script == "Arabic":
57
+ reshaped_text = arabic_reshaper.reshape(text)
58
+ text = get_display(reshaped_text)
59
+ pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
60
+ draw = ImageDraw.Draw(pil_image)
61
+
62
+ x, y, w, h = cv2.boundingRect(contour)
63
+
64
+ line_height = 16
65
+ font_size = 14
66
+ wrapping_ratio = 0.075
67
+
68
+ wrap_width = max(1, int(w * wrapping_ratio))
69
+ wrapped_text = textwrap.fill(text, width=wrap_width, break_long_words=True)
70
+
71
+ font = ImageFont.truetype(font_path, size=font_size)
72
+
73
+ lines = wrapped_text.split("\n")
74
+ total_text_height = (len(lines)) * line_height
75
+
76
+ while total_text_height > h:
77
+ line_height -= 2
78
+ font_size -= 2
79
+ wrapping_ratio += 0.025
80
+ wrap_width = max(1, int(w * wrapping_ratio))
81
+ wrapped_text = textwrap.fill(text, width=wrap_width, break_long_words=True)
82
+ font = ImageFont.truetype(font_path, size=font_size)
83
+ lines = wrapped_text.split("\n")
84
+ total_text_height = (len(lines)) * line_height
85
+
86
+ # Vertical centering
87
+ text_y = y + (h - total_text_height) // 2
88
+
89
+ for line in lines:
90
+ text_length = draw.textlength(line, font=font)
91
+ text_x = x + (w - text_length) // 2
92
+ draw.text((text_x, text_y), line, font=font, fill=(0, 0, 0))
93
+ text_y += line_height
94
+
95
+ image[:, :, :] = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)