File size: 7,311 Bytes
d1b7cde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdd52fa
 
 
 
a9620d6
fdd52fa
0fa06e3
d47c5f5
d1b7cde
b701a5b
e263c34
d1b7cde
 
 
 
 
 
 
 
 
 
 
0fa06e3
d1b7cde
18f57a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b701a5b
18f57a5
b701a5b
fdd52fa
b701a5b
18f57a5
b701a5b
 
 
18f57a5
 
 
b701a5b
18f57a5
b701a5b
 
 
 
fdd52fa
b701a5b
18f57a5
 
 
b701a5b
 
18f57a5
fdd52fa
 
18f57a5
fdd52fa
 
 
 
 
 
 
 
d1b7cde
fdd52fa
a9620d6
 
d1b7cde
 
a9620d6
d1b7cde
a9620d6
b701a5b
a9620d6
 
 
 
d1b7cde
0382d49
a9620d6
 
689664f
 
b701a5b
 
a9620d6
d1b7cde
 
a9620d6
d1b7cde
 
2777129
a9620d6
fdd52fa
 
d1b7cde
fdd52fa
a9620d6
d1b7cde
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# from fastapi import FastAPI, Request
# from pydantic import BaseModel
# from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
# import torch
# import re
# import time

# app = FastAPI()

# def split_by_words_and_dot(text, min_words=125, max_words=160, fallback_words=150):
#     import re
#     words = re.findall(r'\S+|\n', text)  # giữ nguyên \n như một "từ"
#     chunks = []
#     start = 0
#     while start < len(words):
#         end = min(start + max_words, len(words))
#         # Tìm dấu chấm trong khoảng min_words đến max_words
#         dot_idx = -1
#         for i in range(start + min_words, min(start + max_words, len(words))):
#             if words[i] == '.' or (words[i].endswith('.') and words[i] != '\n'):
#                 dot_idx = i
#         if dot_idx != -1:
#             chunk_end = dot_idx + 1
#         elif end - start > fallback_words:
#             chunk_end = start + fallback_words
#         else:
#             chunk_end = end
#         chunk = ' '.join([w if w != '\n' else '\n' for w in words[start:chunk_end]]).replace(' \n ', '\n').replace(' \n', '\n').replace('\n ', '\n')
#         chunks.append(chunk.strip())
#         start = chunk_end
#     return chunks

# # Load model
# model_path = "longvnhue1/facebook-m2m100_418M-fine_tuning"
# tokenizer = M2M100Tokenizer.from_pretrained(model_path)
# model = M2M100ForConditionalGeneration.from_pretrained(model_path)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

# class TranslateRequest(BaseModel):
#     text: str
#     source_lang: str
#     target_lang: str


# @app.post("/translate")
# def translate_text(req: TranslateRequest):
#     tokenizer.src_lang = req.source_lang
#     text_chunks = split_by_words_and_dot(req.text, min_words=125, max_words=160, fallback_words=150)
#     translated_chunks = []
#     timing_info = []

#     for idx, chunk in enumerate(text_chunks):
#         start_time = time.perf_counter()  # Bắt đầu đếm thời gian

#         encoded = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=256).to(device)
#         with torch.inference_mode():
#             generated_tokens = model.generate(
#                 **encoded,
#                 forced_bos_token_id=tokenizer.get_lang_id(req.target_lang),
#                 max_length=256,
#                 num_beams=2,
#                 no_repeat_ngram_size=3,
#             )
#         translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
#         translated_chunks.append(translated_text)

#         end_time = time.perf_counter()  # Kết thúc đếm thời gian
#         elapsed = end_time - start_time
#         timing_info.append(f"Translated chunk {idx+1}/{len(text_chunks)} in {elapsed:.3f} seconds")

#     full_translation = "\n".join(translated_chunks)
#     print(timing_info)

#     return {
#         "source_text": req.text,
#         "translated_text": full_translation,
#         "src_lang": req.source_lang,
#         "tgt_lang": req.target_lang,
#     }


from fastapi import FastAPI
from pydantic import BaseModel
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import torch
import re
import time

# Limit CPU thread
torch.set_num_threads(1)

app = FastAPI()

@app.on_event("startup")
def startup_event():
    global tokenizer, model, device

    model_path = "longvnhue1/facebook-m2m100_418M-fine_tuning"
    tokenizer = M2M100Tokenizer.from_pretrained(model_path)
    model = M2M100ForConditionalGeneration.from_pretrained(model_path)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    print("Model loaded and ready.")

# def split_by_words_and_dot(text, min_words=125, max_words=160, fallback_words=150):
#     words = re.findall(r'\S+|\n', text)  # giữ nguyên \n như một "từ"
#     chunks = []
#     start = 0
#     while start < len(words):
#         end = min(start + max_words, len(words))
#         dot_idx = -1
#         for i in range(start + min_words, min(start + max_words, len(words))):
#             if words[i] == '.' or (words[i].endswith('.') and words[i] != '\n'):
#                 dot_idx = i
#         if dot_idx != -1:
#             chunk_end = dot_idx + 1
#         elif end - start > fallback_words:
#             chunk_end = start + fallback_words
#         else:
#             chunk_end = end
#         chunk = ' '.join([w if w != '\n' else '\n' for w in words[start:chunk_end]]).replace(' \n ', '\n').replace(' \n', '\n').replace('\n ', '\n')
#         chunks.append(chunk.strip())
#         start = chunk_end
#     return chunks

def split_by_words_and_dot(text, min_words=125, max_words=160, fallback_words=150):
    import re
    words = re.findall(r'\S+|\n', text)  # giữ nguyên \n như một "từ"
    chunks = []
    start = 0

    while start < len(words):
        end = min(start + max_words, len(words))
        dot_idx = -1

        for i in range(start + min_words, end):
            if words[i] in ['.', '?', '!'] or (words[i].endswith(('.', '?', '!')) and words[i] != '\n'):
                dot_idx = i

        if dot_idx != -1:
            chunk_end = dot_idx + 1
        elif end - start > fallback_words:
            chunk_end = start + fallback_words
        else:
            chunk_end = end

        chunk = ' '.join([w if w != '\n' else '\n' for w in words[start:chunk_end]])
        chunk = chunk.replace(' \n ', '\n').replace(' \n', '\n').replace('\n ', '\n')
        chunks.append(chunk.strip())
        start = chunk_end

    return chunks


class TranslateRequest(BaseModel):
    text: str
    source_lang: str
    target_lang: str

@app.post("/translate")
def translate_text(req: TranslateRequest):
    tokenizer.src_lang = req.source_lang
    text_chunks = split_by_words_and_dot(req.text)
    translated_chunks = []
    timing_info = []

    global_start = time.perf_counter()

    for idx, chunk in enumerate(text_chunks):
        start_time = time.perf_counter()

        encoded = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=256).to(device)
        with torch.inference_mode():
            generated_tokens = model.generate(
                **encoded,
                forced_bos_token_id=tokenizer.get_lang_id(req.target_lang),
                max_length=256,              # <-- Giữ mức vừa phải
                num_beams=2,                 # <-- Đơn giản beam search
                no_repeat_ngram_size=3,
            )
        print(f"forced_bos_token_id: {tokenizer.lang_code_to_id[req.target_lang]}")
        print(f"tokenizer.src_lang: {tokenizer.src_lang}")
        translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
        translated_chunks.append(translated_text)

        end_time = time.perf_counter()
        timing_info.append(f"Translated chunk {idx+1}/{len(text_chunks)} in {end_time - start_time:.3f} seconds")

    global_end = time.perf_counter()
    print(f"⚡️ Total translation time: {global_end - global_start:.3f} seconds")
    print(timing_info)

    return {
        "source_text": req.text,
        "translated_text": "\n".join(translated_chunks),
        "src_lang": req.source_lang,
        "tgt_lang": req.target_lang,
    }