Daemontatox commited on
Commit
01ed945
·
verified ·
1 Parent(s): 11eedf9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -17
app.py CHANGED
@@ -1,13 +1,10 @@
1
- import subprocess
2
 
3
- # Install required dependencies
4
  subprocess.run(
5
  'pip install flash-attn --no-build-isolation',
6
  env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
7
  shell=True
8
  )
9
- subprocess.run('pip install googletrans==4.0.0-rc1 httpx>=0.24.1 gradio>=5.9.1 gradio-client>=1.5.2', shell=True)
10
-
11
  import os
12
  import re
13
  import time
@@ -15,7 +12,6 @@ import torch
15
  import spaces
16
  import gradio as gr
17
  from threading import Thread
18
- from googletrans import Translator
19
  from transformers import (
20
  AutoModelForCausalLM,
21
  AutoTokenizer,
@@ -32,6 +28,7 @@ Writing Style:
32
 
33
  1. Grammar Accuracy: Always ensure translations are grammatically correct.
34
 
 
35
  2. Contextual Suitability: Tailor translations to the context and audience:
36
 
37
  Use concise, clear sentences for medical and legal content.
@@ -40,8 +37,12 @@ Employ engaging, imaginative language for marketing material.
40
 
41
  Preserve formality and eloquence for legal content.
42
 
 
 
43
  3. Proper Structure: Respect Arabic sentence structures and avoid direct replication of source language grammar. Favor "الجملة الفعلية" unless "الجملة الاسمية" is more suitable (e.g., for headlines or disclaimers).
44
 
 
 
45
  Style Choices:
46
 
47
  Use diacritics only when necessary for clarity.
@@ -52,8 +53,10 @@ Transliterate names and drug names unless an Arabic equivalent exists.
52
 
53
  Translate program, department, and agency names when beneficial.
54
 
 
55
  Use Arabic numerals and ensure proper handling of units, addresses, and references.
56
 
 
57
  Punctuation:
58
 
59
  Apply Arabic punctuation rules, ensuring proper readability.
@@ -62,6 +65,8 @@ Use the Arabic comma (،) and semicolon (؛) as per conventions.
62
 
63
  Avoid excessive use of quotation marks and ensure logical placement of colons (:).
64
 
 
 
65
  Common Mistakes to Avoid:
66
 
67
  Avoid translating "is" as "is considered" unless contextually appropriate.
@@ -72,6 +77,7 @@ Minimize repetitive structures; leverage pronouns where applicable.
72
 
73
  Avoid overuse of constructions like "(قام + الفعل)" and "الخاص بـ."
74
 
 
75
  Specific Terminology:
76
 
77
  For legal translations, maintain formal tone and ensure accuracy in terminology.
@@ -80,12 +86,14 @@ For medical translations, simplify technical terms for lay audiences but retain
80
 
81
  For marketing translations, prioritize creativity over literal translation, aligning with the core message.
82
 
 
83
  Formatting Guidelines:
84
 
85
  Consistently follow Arabic typographic standards.
86
 
87
  Preserve the format of critical data (e.g., dates, measurements, and legal citations).
88
 
 
89
  When in doubt, prioritize clarity, consistency, and alignment with the target audience's needs. Always reconcile project-specific instructions with these guidelines, giving precedence to client requirements when conflicts arise.
90
  """
91
  # UI Configuration
@@ -133,9 +141,9 @@ h3 {
133
  def initialize_model():
134
  """Initialize the model with appropriate configurations"""
135
  quantization_config = BitsAndBytesConfig(
136
- load_in_4bit=True,
137
- bnb_4bit_compute_dtype=torch.bfloat16,
138
- bnb_4bit_use_double_quant=True
139
  )
140
 
141
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
@@ -148,6 +156,7 @@ def initialize_model():
148
  device_map="cuda",
149
  attn_implementation="flash_attention_2",
150
  quantization_config=quantization_config
 
151
  )
152
 
153
  return model, tokenizer
@@ -262,14 +271,6 @@ def chat_response(
262
 
263
  yield history, chat_display
264
 
265
- # Translate the final response to Arabic
266
- translator = Translator()
267
- translated_text = translator.translate(buffer, src='en', dest='ar').text
268
- history[-1][1] = translated_text
269
- chat_display = format_chat_history(history)
270
-
271
- yield history, chat_display
272
-
273
  def process_example(example: str) -> tuple:
274
  """Process example query and return empty history and updated display"""
275
  return [], f"User: {example}\n\n"
@@ -404,4 +405,6 @@ def main():
404
 
405
  if __name__ == "__main__":
406
  demo = main()
407
- demo.launch()
 
 
 
1
+ import subprocess
2
 
 
3
  subprocess.run(
4
  'pip install flash-attn --no-build-isolation',
5
  env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
6
  shell=True
7
  )
 
 
8
  import os
9
  import re
10
  import time
 
12
  import spaces
13
  import gradio as gr
14
  from threading import Thread
 
15
  from transformers import (
16
  AutoModelForCausalLM,
17
  AutoTokenizer,
 
28
 
29
  1. Grammar Accuracy: Always ensure translations are grammatically correct.
30
 
31
+
32
  2. Contextual Suitability: Tailor translations to the context and audience:
33
 
34
  Use concise, clear sentences for medical and legal content.
 
37
 
38
  Preserve formality and eloquence for legal content.
39
 
40
+
41
+
42
  3. Proper Structure: Respect Arabic sentence structures and avoid direct replication of source language grammar. Favor "الجملة الفعلية" unless "الجملة الاسمية" is more suitable (e.g., for headlines or disclaimers).
43
 
44
+
45
+
46
  Style Choices:
47
 
48
  Use diacritics only when necessary for clarity.
 
53
 
54
  Translate program, department, and agency names when beneficial.
55
 
56
+
57
  Use Arabic numerals and ensure proper handling of units, addresses, and references.
58
 
59
+
60
  Punctuation:
61
 
62
  Apply Arabic punctuation rules, ensuring proper readability.
 
65
 
66
  Avoid excessive use of quotation marks and ensure logical placement of colons (:).
67
 
68
+
69
+
70
  Common Mistakes to Avoid:
71
 
72
  Avoid translating "is" as "is considered" unless contextually appropriate.
 
77
 
78
  Avoid overuse of constructions like "(قام + الفعل)" and "الخاص بـ."
79
 
80
+
81
  Specific Terminology:
82
 
83
  For legal translations, maintain formal tone and ensure accuracy in terminology.
 
86
 
87
  For marketing translations, prioritize creativity over literal translation, aligning with the core message.
88
 
89
+
90
  Formatting Guidelines:
91
 
92
  Consistently follow Arabic typographic standards.
93
 
94
  Preserve the format of critical data (e.g., dates, measurements, and legal citations).
95
 
96
+
97
  When in doubt, prioritize clarity, consistency, and alignment with the target audience's needs. Always reconcile project-specific instructions with these guidelines, giving precedence to client requirements when conflicts arise.
98
  """
99
  # UI Configuration
 
141
  def initialize_model():
142
  """Initialize the model with appropriate configurations"""
143
  quantization_config = BitsAndBytesConfig(
144
+ load_in_8bit=True,
145
+ bnb_8bit_compute_dtype=torch.bfloat16,
146
+ bnb_8bit_use_double_quant=True
147
  )
148
 
149
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
156
  device_map="cuda",
157
  attn_implementation="flash_attention_2",
158
  quantization_config=quantization_config
159
+
160
  )
161
 
162
  return model, tokenizer
 
271
 
272
  yield history, chat_display
273
 
 
 
 
 
 
 
 
 
274
  def process_example(example: str) -> tuple:
275
  """Process example query and return empty history and updated display"""
276
  return [], f"User: {example}\n\n"
 
405
 
406
  if __name__ == "__main__":
407
  demo = main()
408
+ demo.launch()
409
+
410
+