longvnhue1 commited on
Commit
a9620d6
·
1 Parent(s): b701a5b
Files changed (1) hide show
  1. app.py +22 -9
app.py CHANGED
@@ -3,6 +3,7 @@ from pydantic import BaseModel
3
  from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
4
  import torch
5
  import re
 
6
 
7
  app = FastAPI()
8
 
@@ -67,21 +68,33 @@ def translate_text(req: TranslateRequest):
67
  tokenizer.src_lang = req.source_lang
68
  text_chunks = split_by_words_and_dot(req.text, min_words=125, max_words=160, fallback_words=150)
69
  translated_chunks = []
70
- for chunk in text_chunks:
 
 
 
 
71
  encoded = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=256).to(device)
72
- generated_tokens = model.generate(
73
- **encoded,
74
- forced_bos_token_id=tokenizer.get_lang_id(req.target_lang),
75
- max_length=256,
76
- num_beams=2,
77
- no_repeat_ngram_size=3,
78
- )
 
79
  translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
80
  translated_chunks.append(translated_text)
 
 
 
 
 
81
  full_translation = "\n".join(translated_chunks)
 
82
  return {
83
  "source_text": req.text,
84
  "translated_text": full_translation,
85
  "src_lang": req.source_lang,
86
- "tgt_lang": req.target_lang
 
87
  }
 
3
  from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
4
  import torch
5
  import re
6
+ import time
7
 
8
  app = FastAPI()
9
 
 
68
  tokenizer.src_lang = req.source_lang
69
  text_chunks = split_by_words_and_dot(req.text, min_words=125, max_words=160, fallback_words=150)
70
  translated_chunks = []
71
+ timing_info = []
72
+
73
+ for idx, chunk in enumerate(text_chunks):
74
+ start_time = time.perf_counter() # Bắt đầu đếm thời gian
75
+
76
  encoded = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=256).to(device)
77
+ with torch.inference_mode():
78
+ generated_tokens = model.generate(
79
+ **encoded,
80
+ forced_bos_token_id=tokenizer.get_lang_id(req.target_lang),
81
+ max_length=256,
82
+ num_beams=2,
83
+ no_repeat_ngram_size=3,
84
+ )
85
  translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
86
  translated_chunks.append(translated_text)
87
+
88
+ end_time = time.perf_counter() # Kết thúc đếm thời gian
89
+ elapsed = end_time - start_time
90
+ timing_info.append(f"Translated chunk {idx+1}/{len(text_chunks)} in {elapsed:.3f} seconds")
91
+
92
  full_translation = "\n".join(translated_chunks)
93
+
94
  return {
95
  "source_text": req.text,
96
  "translated_text": full_translation,
97
  "src_lang": req.source_lang,
98
+ "tgt_lang": req.target_lang,
99
+ "timing": timing_info
100
  }