Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,13 +1,10 @@
|
|
| 1 |
-
import subprocess
|
| 2 |
|
| 3 |
-
# Install required dependencies
|
| 4 |
subprocess.run(
|
| 5 |
'pip install flash-attn --no-build-isolation',
|
| 6 |
env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
|
| 7 |
shell=True
|
| 8 |
)
|
| 9 |
-
subprocess.run('pip install googletrans==4.0.0-rc1 httpx>=0.24.1 gradio>=5.9.1 gradio-client>=1.5.2', shell=True)
|
| 10 |
-
|
| 11 |
import os
|
| 12 |
import re
|
| 13 |
import time
|
|
@@ -15,7 +12,6 @@ import torch
|
|
| 15 |
import spaces
|
| 16 |
import gradio as gr
|
| 17 |
from threading import Thread
|
| 18 |
-
from googletrans import Translator
|
| 19 |
from transformers import (
|
| 20 |
AutoModelForCausalLM,
|
| 21 |
AutoTokenizer,
|
|
@@ -32,6 +28,7 @@ Writing Style:
|
|
| 32 |
|
| 33 |
1. Grammar Accuracy: Always ensure translations are grammatically correct.
|
| 34 |
|
|
|
|
| 35 |
2. Contextual Suitability: Tailor translations to the context and audience:
|
| 36 |
|
| 37 |
Use concise, clear sentences for medical and legal content.
|
|
@@ -40,8 +37,12 @@ Employ engaging, imaginative language for marketing material.
|
|
| 40 |
|
| 41 |
Preserve formality and eloquence for legal content.
|
| 42 |
|
|
|
|
|
|
|
| 43 |
3. Proper Structure: Respect Arabic sentence structures and avoid direct replication of source language grammar. Favor "الجملة الفعلية" unless "الجملة الاسمية" is more suitable (e.g., for headlines or disclaimers).
|
| 44 |
|
|
|
|
|
|
|
| 45 |
Style Choices:
|
| 46 |
|
| 47 |
Use diacritics only when necessary for clarity.
|
|
@@ -52,8 +53,10 @@ Transliterate names and drug names unless an Arabic equivalent exists.
|
|
| 52 |
|
| 53 |
Translate program, department, and agency names when beneficial.
|
| 54 |
|
|
|
|
| 55 |
Use Arabic numerals and ensure proper handling of units, addresses, and references.
|
| 56 |
|
|
|
|
| 57 |
Punctuation:
|
| 58 |
|
| 59 |
Apply Arabic punctuation rules, ensuring proper readability.
|
|
@@ -62,6 +65,8 @@ Use the Arabic comma (،) and semicolon (؛) as per conventions.
|
|
| 62 |
|
| 63 |
Avoid excessive use of quotation marks and ensure logical placement of colons (:).
|
| 64 |
|
|
|
|
|
|
|
| 65 |
Common Mistakes to Avoid:
|
| 66 |
|
| 67 |
Avoid translating "is" as "is considered" unless contextually appropriate.
|
|
@@ -72,6 +77,7 @@ Minimize repetitive structures; leverage pronouns where applicable.
|
|
| 72 |
|
| 73 |
Avoid overuse of constructions like "(قام + الفعل)" and "الخاص بـ."
|
| 74 |
|
|
|
|
| 75 |
Specific Terminology:
|
| 76 |
|
| 77 |
For legal translations, maintain formal tone and ensure accuracy in terminology.
|
|
@@ -80,12 +86,14 @@ For medical translations, simplify technical terms for lay audiences but retain
|
|
| 80 |
|
| 81 |
For marketing translations, prioritize creativity over literal translation, aligning with the core message.
|
| 82 |
|
|
|
|
| 83 |
Formatting Guidelines:
|
| 84 |
|
| 85 |
Consistently follow Arabic typographic standards.
|
| 86 |
|
| 87 |
Preserve the format of critical data (e.g., dates, measurements, and legal citations).
|
| 88 |
|
|
|
|
| 89 |
When in doubt, prioritize clarity, consistency, and alignment with the target audience's needs. Always reconcile project-specific instructions with these guidelines, giving precedence to client requirements when conflicts arise.
|
| 90 |
"""
|
| 91 |
# UI Configuration
|
|
@@ -133,9 +141,9 @@ h3 {
|
|
| 133 |
def initialize_model():
|
| 134 |
"""Initialize the model with appropriate configurations"""
|
| 135 |
quantization_config = BitsAndBytesConfig(
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
)
|
| 140 |
|
| 141 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
|
@@ -148,6 +156,7 @@ def initialize_model():
|
|
| 148 |
device_map="cuda",
|
| 149 |
attn_implementation="flash_attention_2",
|
| 150 |
quantization_config=quantization_config
|
|
|
|
| 151 |
)
|
| 152 |
|
| 153 |
return model, tokenizer
|
|
@@ -262,14 +271,6 @@ def chat_response(
|
|
| 262 |
|
| 263 |
yield history, chat_display
|
| 264 |
|
| 265 |
-
# Translate the final response to Arabic
|
| 266 |
-
translator = Translator()
|
| 267 |
-
translated_text = translator.translate(buffer, src='en', dest='ar').text
|
| 268 |
-
history[-1][1] = translated_text
|
| 269 |
-
chat_display = format_chat_history(history)
|
| 270 |
-
|
| 271 |
-
yield history, chat_display
|
| 272 |
-
|
| 273 |
def process_example(example: str) -> tuple:
|
| 274 |
"""Process example query and return empty history and updated display"""
|
| 275 |
return [], f"User: {example}\n\n"
|
|
@@ -404,4 +405,6 @@ def main():
|
|
| 404 |
|
| 405 |
if __name__ == "__main__":
|
| 406 |
demo = main()
|
| 407 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
| 1 |
+
import subprocess
|
| 2 |
|
|
|
|
| 3 |
subprocess.run(
|
| 4 |
'pip install flash-attn --no-build-isolation',
|
| 5 |
env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
|
| 6 |
shell=True
|
| 7 |
)
|
|
|
|
|
|
|
| 8 |
import os
|
| 9 |
import re
|
| 10 |
import time
|
|
|
|
| 12 |
import spaces
|
| 13 |
import gradio as gr
|
| 14 |
from threading import Thread
|
|
|
|
| 15 |
from transformers import (
|
| 16 |
AutoModelForCausalLM,
|
| 17 |
AutoTokenizer,
|
|
|
|
| 28 |
|
| 29 |
1. Grammar Accuracy: Always ensure translations are grammatically correct.
|
| 30 |
|
| 31 |
+
|
| 32 |
2. Contextual Suitability: Tailor translations to the context and audience:
|
| 33 |
|
| 34 |
Use concise, clear sentences for medical and legal content.
|
|
|
|
| 37 |
|
| 38 |
Preserve formality and eloquence for legal content.
|
| 39 |
|
| 40 |
+
|
| 41 |
+
|
| 42 |
3. Proper Structure: Respect Arabic sentence structures and avoid direct replication of source language grammar. Favor "الجملة الفعلية" unless "الجملة الاسمية" is more suitable (e.g., for headlines or disclaimers).
|
| 43 |
|
| 44 |
+
|
| 45 |
+
|
| 46 |
Style Choices:
|
| 47 |
|
| 48 |
Use diacritics only when necessary for clarity.
|
|
|
|
| 53 |
|
| 54 |
Translate program, department, and agency names when beneficial.
|
| 55 |
|
| 56 |
+
|
| 57 |
Use Arabic numerals and ensure proper handling of units, addresses, and references.
|
| 58 |
|
| 59 |
+
|
| 60 |
Punctuation:
|
| 61 |
|
| 62 |
Apply Arabic punctuation rules, ensuring proper readability.
|
|
|
|
| 65 |
|
| 66 |
Avoid excessive use of quotation marks and ensure logical placement of colons (:).
|
| 67 |
|
| 68 |
+
|
| 69 |
+
|
| 70 |
Common Mistakes to Avoid:
|
| 71 |
|
| 72 |
Avoid translating "is" as "is considered" unless contextually appropriate.
|
|
|
|
| 77 |
|
| 78 |
Avoid overuse of constructions like "(قام + الفعل)" and "الخاص بـ."
|
| 79 |
|
| 80 |
+
|
| 81 |
Specific Terminology:
|
| 82 |
|
| 83 |
For legal translations, maintain formal tone and ensure accuracy in terminology.
|
|
|
|
| 86 |
|
| 87 |
For marketing translations, prioritize creativity over literal translation, aligning with the core message.
|
| 88 |
|
| 89 |
+
|
| 90 |
Formatting Guidelines:
|
| 91 |
|
| 92 |
Consistently follow Arabic typographic standards.
|
| 93 |
|
| 94 |
Preserve the format of critical data (e.g., dates, measurements, and legal citations).
|
| 95 |
|
| 96 |
+
|
| 97 |
When in doubt, prioritize clarity, consistency, and alignment with the target audience's needs. Always reconcile project-specific instructions with these guidelines, giving precedence to client requirements when conflicts arise.
|
| 98 |
"""
|
| 99 |
# UI Configuration
|
|
|
|
| 141 |
def initialize_model():
|
| 142 |
"""Initialize the model with appropriate configurations"""
|
| 143 |
quantization_config = BitsAndBytesConfig(
|
| 144 |
+
load_in_8bit=True,
|
| 145 |
+
bnb_8bit_compute_dtype=torch.bfloat16,
|
| 146 |
+
bnb_8bit_use_double_quant=True
|
| 147 |
)
|
| 148 |
|
| 149 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
|
|
|
| 156 |
device_map="cuda",
|
| 157 |
attn_implementation="flash_attention_2",
|
| 158 |
quantization_config=quantization_config
|
| 159 |
+
|
| 160 |
)
|
| 161 |
|
| 162 |
return model, tokenizer
|
|
|
|
| 271 |
|
| 272 |
yield history, chat_display
|
| 273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
def process_example(example: str) -> tuple:
|
| 275 |
"""Process example query and return empty history and updated display"""
|
| 276 |
return [], f"User: {example}\n\n"
|
|
|
|
| 405 |
|
| 406 |
if __name__ == "__main__":
|
| 407 |
demo = main()
|
| 408 |
+
demo.launch()
|
| 409 |
+
|
| 410 |
+
|