Spaces:

asasasaasasa
/

tilmash-gemma3-translator

Build error

App Files Files Community

asasasaasasa commited on Aug 10

Commit

3c3ac9d

verified ·

1 Parent(s): dee3f9b

Upload folder using huggingface_hub

Browse files

Files changed (15) hide show

utils/__pycache__/chunking.cpython-311.pyc +0 -0
utils/__pycache__/file_readers.cpython-311.pyc +0 -0
utils/__pycache__/formatting.cpython-311.pyc +0 -0
utils/__pycache__/gemma_translation.cpython-311.pyc +0 -0
utils/__pycache__/readability_indices.cpython-311.pyc +0 -0
utils/__pycache__/text_processing.cpython-311.pyc +0 -0
utils/__pycache__/tilmash_translation.cpython-311.pyc +0 -0
utils/chunking.py +170 -0
utils/file_readers.py +35 -0
utils/formatting.py +33 -0
utils/gemma_translation.py +661 -0
utils/readability_indices.py +132 -0
utils/sherkala.py +36 -0
utils/text_processing.py +15 -0
utils/tilmash_translation.py +455 -0

utils/__pycache__/chunking.cpython-311.pyc ADDED Viewed

Binary file (7.02 kB). View file

utils/__pycache__/file_readers.cpython-311.pyc ADDED Viewed

Binary file (2.18 kB). View file

utils/__pycache__/formatting.cpython-311.pyc ADDED Viewed

Binary file (1.29 kB). View file

utils/__pycache__/gemma_translation.cpython-311.pyc ADDED Viewed

Binary file (30.7 kB). View file

utils/__pycache__/readability_indices.cpython-311.pyc ADDED Viewed

Binary file (10.1 kB). View file

utils/__pycache__/text_processing.cpython-311.pyc ADDED Viewed

Binary file (586 Bytes). View file

utils/__pycache__/tilmash_translation.cpython-311.pyc ADDED Viewed

Binary file (24.1 kB). View file

utils/chunking.py ADDED Viewed

	@@ -0,0 +1,170 @@

+# utils/chunking.py
+import logging
+from pysbd import Segmenter
+import re
+def chunk_text_with_separators(text, tokenizer, max_tokens, lang):
+    """
+    Splits the input text into chunks with preserved separators, optimized for handling lists and tables.
+    Args:
+        text (str): The input text to be chunked.
+        tokenizer: Tokenizer object used to encode text into tokens.
+        max_tokens (int): Maximum number of tokens allowed per chunk.
+        lang (str): Language of the text, used for sentence segmentation.
+    Returns:
+        list: A list of tuples, each containing a chunk of text and its corresponding separator.
+    """
+    # Split text into sentences while preserving separators
+    sentences_with_seps = _split_technical_sentences(text, lang)
+    chunks = []
+    current_chunk = []
+    current_length = 0
+    current_separators = []
+    for sentence, sep in sentences_with_seps:
+        sentence_tokens = tokenizer.encode(sentence, add_special_tokens=False)
+        sentence_len = len(sentence_tokens)
+        if sentence_len == 0:
+            continue
+        # Handle special cases like lists and tables
+        if _is_list_item(sentence) or _is_table_header(sentence):
+            if current_chunk:
+                # Finalize the current chunk before processing special items
+                chunks.append((' '.join(current_chunk), ''.join(current_separators)))
+                current_chunk = []
+                current_length = 0
+                current_separators = []
+            # Process list items as separate chunks
+            chunks.extend(_process_special_item(sentence, sep, tokenizer, max_tokens))
+            continue
+        # Add sentence to the current chunk if it fits
+        if current_length + sentence_len <= max_tokens:
+            current_chunk.append(sentence)
+            current_length += sentence_len
+            current_separators.append(sep)
+        else:
+            # Finalize the current chunk and start a new one
+            if current_chunk:
+                chunks.append((' '.join(current_chunk), ''.join(current_separators)))
+            current_chunk = [sentence]
+            current_length = sentence_len
+            current_separators = [sep]
+    # Add any remaining text to the final chunk
+    if current_chunk:
+        chunks.append((' '.join(current_chunk), ''.join(current_separators)))
+    return chunks
+def _split_technical_sentences(text, lang):
+    """Enhanced splitting for technical documents with lists and tables"""
+    # Handle numbered lists and bullet points
+    text = re.sub(r'(\n\s*\d+\.)', r'\n§§§\1', text)
+    # Handle colon-terminated headers
+    text = re.sub(r'(:\s*\n)', r'\1§§§', text)
+    sentences = []
+    separators = []
+    if lang == 'russian':
+        segmenter = Segmenter(language='ru', clean=False)
+        raw_sentences = segmenter.segment(text)
+    else:
+        raw_sentences = re.split(r'([.!?])(\s*)', text)
+    buffer = ''
+    current_sep = ''
+    for part in raw_sentences:
+        if '§§§' in part:
+            parts = part.split('§§§')
+            for p in parts[:-1]:
+                if p.strip():
+                    sentences.append(p.strip())
+                    separators.append(current_sep)
+                current_sep = ''
+            buffer = parts[-1]
+        else:
+            buffer += part
+        # Process buffer when we hit sentence boundaries
+        if lang == 'russian':
+            if buffer.strip() and any(buffer.endswith(c) for c in ['.', '!', '?', ':']):
+                sentences.append(buffer.strip())
+                separators.append(current_sep)
+                buffer = ''
+                current_sep = ''
+        else:
+            if re.search(r'[.!?:]$', buffer):
+                sentences.append(buffer.strip())
+                separators.append(current_sep)
+                buffer = ''
+                current_sep = ''
+    if buffer.strip():
+        sentences.append(buffer.strip())
+        separators.append(current_sep)
+    return list(zip(sentences, separators))
+def _is_list_item(text):
+    return re.match(r'^\s*(\d+\.|\-|\*)\s', text)
+def _is_table_header(text):
+    return re.search(r':\s*$', text) and re.search(r'[A-ZА-Я]{3,}', text)
+def _process_special_item(text, separator, tokenizer, max_tokens):
+    """Process list items and table headers as atomic units"""
+    chunks = []
+    current_chunk = []
+    current_length = 0
+    sentences = re.split(r'(\n+)', text)
+    for sentence in sentences:
+        if not sentence.strip():
+            continue
+        tokens = tokenizer.encode(sentence, add_special_tokens=False)
+        token_count = len(tokens)
+        if token_count > max_tokens:
+            # Handle oversized items with careful splitting
+            parts = re.split(r'([,;])', sentence)
+            for part in parts:
+                if not part.strip():
+                    continue
+                part_tokens = tokenizer.encode(part, add_special_tokens=False)
+                part_len = len(part_tokens)
+                if current_length + part_len > max_tokens:
+                    chunks.append((' '.join(current_chunk), separator))
+                    current_chunk = [part]
+                    current_length = part_len
+                else:
+                    current_chunk.append(part)
+                    current_length += part_len
+        else:
+            if current_length + token_count > max_tokens:
+                chunks.append((' '.join(current_chunk), separator))
+                current_chunk = [sentence]
+                current_length = token_count
+            else:
+                current_chunk.append(sentence)
+                current_length += token_count
+    if current_chunk:
+        chunks.append((' '.join(current_chunk), separator))
+    return chunks

utils/file_readers.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# utils/file_readers.py
+import docx
+import PyPDF2
+def read_txt(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        return f.read()
+def read_docx(file_path):
+    doc = docx.Document(file_path)
+    full_text = []
+    for para in doc.paragraphs:
+        full_text.append(para.text)
+    return '\n'.join(full_text)
+def read_pdf(file_path):
+    text = ''
+    with open(file_path, 'rb') as f:
+        reader = PyPDF2.PdfReader(f)
+        for page in reader.pages:
+            page_text = page.extract_text()
+            if page_text:
+                text += page_text
+    return text
+def read_file(file_path):
+    if file_path.endswith('.txt'):
+        return read_txt(file_path)
+    elif file_path.endswith('.docx'):
+        return read_docx(file_path)
+    elif file_path.endswith('.pdf'):
+        return read_pdf(file_path)
+    else:
+        return ""

utils/formatting.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# utils/formatting.py
+def color_code_index(index_name, value):
+    if index_name == "Flesch Reading Ease":
+        if value >= 90:
+            color = "green"
+        elif 60 <= value < 90:
+            color = "lightgreen"
+        elif 30 <= value < 60:
+            color = "orange"
+        else:
+            color = "red"
+    elif index_name == "Flesch-Kincaid Grade Level":
+        if value <= 5:
+            color = "green"
+        elif 6 <= value <= 10:
+            color = "lightgreen"
+        elif 11 <= value <= 15:
+            color = "orange"
+        else:
+            color = "red"
+    elif index_name in ["Gunning Fog Index", "SMOG Index"]:
+        if value <= 6:
+            color = "green"
+        elif 7 <= value <= 12:
+            color = "lightgreen"
+        elif 13 <= value <= 17:
+            color = "orange"
+        else:
+            color = "red"
+    else:
+        color = "black"
+    return f"<span style='color: {color};'>{value:.2f}</span>"

utils/gemma_translation.py ADDED Viewed

	@@ -0,0 +1,661 @@

+# utils/gemma_translation.py
+import os
+import logging
+from dotenv import load_dotenv
+from llama_cpp import Llama
+import streamlit as st
+from typing import Iterator, Optional, List
+import re
+import time
+import psutil
+import uuid
+import shutil
+import sys
+import contextlib
+# Import configuration defaults
+from config import DEFAULT_CONFIG
+@contextlib.contextmanager
+def suppress_stdout_stderr():
+    """Context manager to suppress stdout and stderr."""
+    # Save original stdout/stderr
+    old_stdout = sys.stdout
+    old_stderr = sys.stderr
+    # Create a null device to redirect output
+    null_device = open(os.devnull, 'w')
+    try:
+        # Redirect stdout/stderr to null device
+        sys.stdout = null_device
+        sys.stderr = null_device
+        yield
+    finally:
+        # Restore original stdout/stderr
+        sys.stdout = old_stdout
+        sys.stderr = old_stderr
+        null_device.close()
+from .chunking import chunk_text_with_separators
+# Load environment variables
+load_dotenv()
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Model configuration from config
+ORIGINAL_MODEL_PATH = os.path.join("local_llms", "gemma-3-12b-it-Q4_K_M.gguf")
+MODEL_DIR = os.path.join("local_llms", "instances")
+os.makedirs(MODEL_DIR, exist_ok=True)
+# Read configuration from config
+DEFAULT_CONTEXT_SIZE = DEFAULT_CONFIG["GEMMA_CONTEXT_SIZE"]
+DEFAULT_MAX_TOKENS = DEFAULT_CONFIG["MAX_TOKENS"]
+DEFAULT_CHUNK_SIZE = DEFAULT_CONFIG["CHUNK_SIZE"]  # Max tokens per chunk
+MODEL_INSTANCE_TIMEOUT = DEFAULT_CONFIG["MODEL_INSTANCE_TIMEOUT"]  # 30 minutes
+# Garbage collection for session-specific model files
+def cleanup_model_instances():
+    """Remove model instances that haven't been used in the last hour"""
+    try:
+        current_time = time.time()
+        for filename in os.listdir(MODEL_DIR):
+            file_path = os.path.join(MODEL_DIR, filename)
+            # Check if file is a model file and older than 1 hour
+            if filename.endswith(".gguf") and os.path.isfile(file_path):
+                last_access = os.path.getatime(file_path)
+                if current_time - last_access > 3600:  # 3600 seconds = 1 hour
+                    try:
+                        os.remove(file_path)
+                        logger.info(f"Removed unused model instance: {filename}")
+                    except Exception as e:
+                        logger.error(f"Could not remove model file {filename}: {str(e)}")
+    except Exception as e:
+        logger.error(f"Error in cleanup: {str(e)}")
+# Run cleanup every time module is imported
+cleanup_model_instances()
+class LlamaCppTokenizerAdapter:
+    """
+    Adapter class to make llama-cpp Llama model compatible with chunking utility
+    which expects a HuggingFace tokenizer interface.
+    """
+    def __init__(self, llama_model):
+        self.model = llama_model
+    def encode(self, text, add_special_tokens=False):
+        """
+        Tokenize text using llama-cpp's tokenize method.
+        Args:
+            text: Text to tokenize
+            add_special_tokens: Ignored (included for compatibility)
+        Returns:
+            List of token IDs
+        """
+        try:
+            return self.model.tokenize(bytes(text, "utf-8"))
+        except Exception as e:
+            logger.warning(f"Tokenization error: {str(e)}")
+            # Fallback to character-based approximate tokenization (4 chars ≈ 1 token)
+            return [0] * (len(text) // 4 + 1)
+class GemmaTranslator:
+    """
+    Translator using Gemma 3 model in GGUF format with streaming capability.
+    Uses a session-specific model file for complete isolation.
+    """
+    def __init__(self):
+        """Initialize the Gemma translator for the current session."""
+        self.initialized = False
+        self.model = None
+        self.tokenizer = None
+        self.using_gpu = False
+        self.session_id = getattr(st.session_state, 'session_id', str(uuid.uuid4()))
+        # Create a session-specific model path
+        self.model_path = self._get_session_model_path()
+    def _get_session_model_path(self):
+        """Get or create a session-specific model file."""
+        session_model_filename = f"gemma-{self.session_id}.gguf"
+        session_model_path = os.path.join(MODEL_DIR, session_model_filename)
+        # If the model file doesn't exist yet, create it by copying the original
+        if not os.path.exists(session_model_path):
+            if not os.path.exists(ORIGINAL_MODEL_PATH):
+                raise FileNotFoundError(f"Original model file not found: {ORIGINAL_MODEL_PATH}")
+            logger.info(f"Creating session-specific model file for {self.session_id}")
+            try:
+                shutil.copy2(ORIGINAL_MODEL_PATH, session_model_path)
+                logger.info(f"Created session model at {session_model_path}")
+            except Exception as e:
+                logger.error(f"Failed to create session model: {str(e)}")
+                # Fallback to original model if copy fails
+                return ORIGINAL_MODEL_PATH
+        return session_model_path
+    def load_model(self,
+                  n_gpu_layers: int = DEFAULT_CONFIG["GEMMA_GPU_LAYERS"],
+                  context_size: int = DEFAULT_CONTEXT_SIZE) -> None:
+        """
+        Load the Gemma model with specified parameters.
+        Args:
+            n_gpu_layers: Number of layers to offload to GPU
+            context_size: Context window size
+        """
+        # Parameters already have defaults from config
+        # No need for additional checks
+        if self.initialized:
+            if n_gpu_layers > 0 and not self.using_gpu:
+                # Need to reload in GPU mode
+                logger.info("Reloading model with GPU support...")
+                self.unload_model()
+            elif n_gpu_layers == 0 and self.using_gpu:
+                # Need to reload in CPU mode
+                logger.info("Reloading model in CPU-only mode...")
+                self.unload_model()
+            else:
+                # No need to reload
+                return
+        # Check if model file exists
+        if not os.path.exists(self.model_path):
+            logger.error(f"Model file not found: {self.model_path}")
+            raise FileNotFoundError(f"Model file not found: {self.model_path}")
+        try:
+            logger.info(f"Loading Gemma model from {self.model_path}...")
+            logger.info(f"Using GPU layers: {n_gpu_layers}")
+            # Log current system memory state
+            memory = psutil.virtual_memory()
+            logger.info(f"System memory: {memory.percent}% used, {memory.available / (1024**3):.2f}GB available")
+            # Create Llama model with streaming capability
+            try:
+                # Suppress stderr output during model initialization
+                with suppress_stdout_stderr():
+                    self.model = Llama(
+                        model_path=str(self.model_path),
+                        n_ctx=context_size,
+                        n_gpu_layers=n_gpu_layers,
+                        verbose=False
+                    )
+                self.using_gpu = n_gpu_layers > 0
+                # Create tokenizer adapter
+                self.tokenizer = LlamaCppTokenizerAdapter(self.model)
+                self.initialized = True
+                logger.info(f"Gemma model loaded successfully with n_gpu_layers={n_gpu_layers}")
+            except Exception as load_error:
+                logger.error(f"Error during model loading: {str(load_error)}")
+                # If we failed with GPU, try CPU mode
+                if n_gpu_layers > 0:
+                    logger.info("Attempting fallback to CPU-only mode...")
+                    try:
+                        # Suppress stderr output during model initialization
+                        with suppress_stdout_stderr():
+                            self.model = Llama(
+                                model_path=str(self.model_path),
+                                n_ctx=context_size,
+                                n_gpu_layers=0,
+                                verbose=False
+                            )
+                        self.using_gpu = False
+                        # Create tokenizer adapter
+                        self.tokenizer = LlamaCppTokenizerAdapter(self.model)
+                        self.initialized = True
+                        logger.info("Gemma model loaded successfully in CPU-only mode")
+                    except Exception as cpu_error:
+                        logger.error(f"CPU fallback also failed: {str(cpu_error)}")
+                        raise
+                else:
+                    raise
+        except Exception as e:
+            logger.error(f"Failed to load Gemma model: {str(e)}")
+            raise
+    def unload_model(self):
+        """Unload the model to free memory"""
+        if self.initialized:
+            logger.info("Unloading Gemma model to free memory...")
+            self.model = None
+            self.tokenizer = None
+            self.initialized = False
+            # Force garbage collection
+            import gc
+            gc.collect()
+            logger.info("Gemma model unloaded")
+    def __del__(self):
+        """Cleanup when object is destroyed"""
+        self.unload_model()
+    def generate_translation_prompt(self, text: str, src_lang: str, tgt_lang: str) -> str:
+        """
+        Create a prompt for translation.
+        Args:
+            text: Text to translate
+            src_lang: Source language code ('en', 'ru', 'kk')
+            tgt_lang: Target language code ('en', 'ru', 'kk')
+        Returns:
+            Formatted prompt for the model
+        """
+        lang_map = {
+            'en': 'English',
+            'ru': 'Russian',
+            'kk': 'Kazakh'
+        }
+        source_lang = lang_map.get(src_lang, 'Unknown')
+        target_lang = lang_map.get(tgt_lang, 'Unknown')
+        system_prompt = (
+            f"Translate the following text from {source_lang} to {target_lang}. "
+            f"Provide only the translated text without explanations, introductions, or comments."
+        )
+        prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{text}\n<|assistant|>\n"
+        return prompt
+    def is_text_too_large(self, text: str) -> bool:
+        """
+        Check if text is too large for the model's context window.
+        Args:
+            text: Input text
+        Returns:
+            True if text needs chunking, False otherwise
+        """
+        if not self.initialized:
+            self.load_model()
+        # Use actual tokenization when possible
+        try:
+            tokens = self.model.tokenize(bytes(text, "utf-8"))
+            token_count = len(tokens)
+        except Exception:
+            # Fallback to character-based approximation
+            token_count = len(text) / 4
+        # Allow for prompt overhead and model's response tokens
+        threshold = DEFAULT_CONTEXT_SIZE * 0.9
+        return token_count > threshold
+    def _split_text_into_sentences(self, text: str, lang: str) -> List[str]:
+        """
+        Split text into sentences for simple chunking when full chunking fails.
+        Args:
+            text: Text to split
+            lang: Language code
+        Returns:
+            List of sentences
+        """
+        if lang in ['ru', 'kk']:
+            # Russian/Kazakh sentence pattern
+            pattern = r'(?<=[.!?])\s+'
+        else:
+            # English sentence pattern
+            pattern = r'(?<=[.!?])\s+'
+        sentences = re.split(pattern, text)
+        return [s.strip() for s in sentences if s.strip()]
+    def translate(self,
+                 text: str,
+                 src_lang: str,
+                 tgt_lang: str,
+                 temperature: float = 0.1,
+                 top_p: float = 0.95,
+                 max_tokens: int = DEFAULT_MAX_TOKENS) -> str:
+        """
+        Translate text using Gemma model.
+        Args:
+            text: Text to translate
+            src_lang: Source language code ('en', 'ru', 'kk')
+            tgt_lang: Target language code ('en', 'ru', 'kk')
+            temperature: Generation temperature (lower = more deterministic)
+            top_p: Top-p sampling threshold
+            max_tokens: Maximum number of tokens to generate
+        Returns:
+            Translated text
+        """
+        if self.is_text_too_large(text):
+            logger.info("Text is too large, using chunking")
+            return self._translate_large_text(text, src_lang, tgt_lang, temperature, top_p, max_tokens)
+        # Prepare prompt for normal-sized text
+        prompt = self.generate_translation_prompt(text, src_lang, tgt_lang)
+        try:
+            # Generate translation
+            response = self.model(
+                prompt,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                stop=["<|user|>", "<|system|>"],
+                echo=False
+            )
+            # Extract translated text
+            if response and "choices" in response and len(response["choices"]) > 0:
+                return response["choices"][0]["text"].strip()
+            else:
+                logger.warning("Empty or invalid response from model")
+                return ""
+        except Exception as e:
+            logger.error(f"Translation error: {str(e)}")
+            return f"Error: {str(e)}"
+    def _translate_large_text(self,
+                            text: str,
+                            src_lang: str,
+                            tgt_lang: str,
+                            temperature: float = 0.1,
+                            top_p: float = 0.95,
+                            max_tokens: int = DEFAULT_MAX_TOKENS) -> str:
+        """
+        Translate large text by splitting it into chunks.
+        Args:
+            text: Text to translate
+            src_lang: Source language code ('en', 'ru', 'kk')
+            tgt_lang: Target language code ('en', 'ru', 'kk')
+            temperature: Generation temperature
+            top_p: Top-p sampling threshold
+            max_tokens: Maximum tokens to generate
+        Returns:
+            Translated text with chunks combined
+        """
+        try:
+            # Determine language for chunking
+            lang_for_chunking = 'russian' if src_lang in ['ru', 'kk'] else 'english'
+            # Use the chunking utility to split text
+            try:
+                chunks_with_seps = chunk_text_with_separators(
+                    text=text,
+                    tokenizer=self.tokenizer,
+                    max_tokens=DEFAULT_CHUNK_SIZE,
+                    lang=lang_for_chunking
+                )
+            except Exception as chunk_error:
+                # Fallback to simpler sentence splitting if advanced chunking fails
+                logger.warning(f"Advanced chunking failed: {str(chunk_error)}. Using simple sentence splitting.")
+                sentences = self._split_text_into_sentences(text, src_lang)
+                chunks_with_seps = [(sent, " ") for sent in sentences]
+            translations = []
+            for chunk_idx, (chunk, separator) in enumerate(chunks_with_seps):
+                if not chunk.strip():
+                    translations.append(separator)
+                    continue
+                logger.info(f"Translating chunk {chunk_idx + 1} of {len(chunks_with_seps)}")
+                # Translate each chunk
+                prompt = self.generate_translation_prompt(chunk, src_lang, tgt_lang)
+                try:
+                    response = self.model(
+                        prompt,
+                        max_tokens=max_tokens,
+                        temperature=temperature,
+                        top_p=top_p,
+                        stop=["<|user|>", "<|system|>"],
+                        echo=False
+                    )
+                    if response and "choices" in response and len(response["choices"]) > 0:
+                        translated_chunk = response["choices"][0]["text"].strip()
+                        translations.append(translated_chunk)
+                        translations.append(separator)
+                    else:
+                        logger.warning(f"Empty response for chunk {chunk_idx}")
+                        translations.append(f"[Translation error]")
+                        translations.append(separator)
+                except Exception as e:
+                    logger.error(f"Error translating chunk {chunk_idx}: {str(e)}")
+                    translations.append(f"[Error: {str(e)}]")
+                    translations.append(separator)
+            # Combine all translated chunks
+            combined_text = ''.join(translations)
+            # Cleanup and postprocessing
+            return self._postprocess_translation(combined_text)
+        except Exception as e:
+            logger.error(f"Large text translation error: {str(e)}")
+            return f"Error: {str(e)}"
+    def _postprocess_translation(self, text: str) -> str:
+        """Clean up and format the translated text."""
+        # Remove multiple spaces
+        text = ' '.join(text.split())
+        # Fix punctuation spacing
+        text = text.replace(' .', '.').replace(' ,', ',')
+        text = text.replace(' !', '!').replace(' ?', '?')
+        # Fix quote spacing
+        text = text.replace('" ', '"').replace(' "', '"')
+        return text
+    def translate_streaming(self,
+                           text: str,
+                           src_lang: str,
+                           tgt_lang: str,
+                           temperature: float = 0.1,
+                           top_p: float = 0.95,
+                           max_tokens: int = DEFAULT_MAX_TOKENS) -> Iterator[str]:
+        """
+        Stream translation using Gemma model.
+        Args:
+            text: Text to translate
+            src_lang: Source language code ('en', 'ru', 'kk')
+            tgt_lang: Target language code ('en', 'ru', 'kk')
+            temperature: Generation temperature (lower = more deterministic)
+            top_p: Top-p sampling threshold
+            max_tokens: Maximum number of tokens to generate
+        Yields:
+            Chunks of translated text as they're generated
+        """
+        if self.is_text_too_large(text):
+            logger.info("Text is too large, using chunked streaming")
+            yield from self._translate_large_text_streaming(text, src_lang, tgt_lang, temperature, top_p, max_tokens)
+            return
+        # Prepare prompt for normal-sized text
+        prompt = self.generate_translation_prompt(text, src_lang, tgt_lang)
+        try:
+            # Stream translation
+            for chunk in self.model(
+                prompt,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                stop=["<|user|>", "<|system|>"],
+                echo=False,
+                stream=True
+            ):
+                if chunk and "choices" in chunk and len(chunk["choices"]) > 0:
+                    token = chunk["choices"][0]["text"]
+                    if token:
+                        yield token
+        except Exception as e:
+            logger.error(f"Streaming translation error: {str(e)}")
+            yield f"Error: {str(e)}"
+    def _translate_large_text_streaming(self,
+                                      text: str,
+                                      src_lang: str,
+                                      tgt_lang: str,
+                                      temperature: float = 0.1,
+                                      top_p: float = 0.95,
+                                      max_tokens: int = DEFAULT_MAX_TOKENS) -> Iterator[str]:
+        """
+        Stream translation of large text by chunks.
+        Args:
+            text: Text to translate
+            src_lang: Source language code ('en', 'ru', 'kk')
+            tgt_lang: Target language code ('en', 'ru', 'kk')
+            temperature: Generation temperature
+            top_p: Top-p sampling threshold
+            max_tokens: Maximum tokens to generate
+        Yields:
+            Chunks of translated text
+        """
+        try:
+            # Determine language for chunking
+            lang_for_chunking = 'russian' if src_lang in ['ru', 'kk'] else 'english'
+            # Use the chunking utility to split text
+            try:
+                chunks_with_seps = chunk_text_with_separators(
+                    text=text,
+                    tokenizer=self.tokenizer,
+                    max_tokens=DEFAULT_CHUNK_SIZE,
+                    lang=lang_for_chunking
+                )
+            except Exception as chunk_error:
+                # Fallback to simpler sentence splitting if advanced chunking fails
+                logger.warning(f"Advanced chunking failed: {str(chunk_error)}. Using simple sentence splitting.")
+                sentences = self._split_text_into_sentences(text, src_lang)
+                chunks_with_seps = [(sent, " ") for sent in sentences]
+            for chunk_idx, (chunk, separator) in enumerate(chunks_with_seps):
+                if not chunk.strip():
+                    yield separator
+                    continue
+                if chunk_idx > 0:
+                    yield "\n\n"  # Add visual separation between chunks
+                # Translate each chunk
+                prompt = self.generate_translation_prompt(chunk, src_lang, tgt_lang)
+                try:
+                    # Stream chunk translation
+                    for token_chunk in self.model(
+                        prompt,
+                        max_tokens=max_tokens,
+                        temperature=temperature,
+                        top_p=top_p,
+                        stop=["<|user|>", "<|system|>"],
+                        echo=False,
+                        stream=True
+                    ):
+                        if token_chunk and "choices" in token_chunk and len(token_chunk["choices"]) > 0:
+                            token = token_chunk["choices"][0]["text"]
+                            if token:
+                                yield token
+                    # Add separator after chunk
+                    yield separator
+                except Exception as e:
+                    logger.error(f"Error streaming chunk {chunk_idx}: {str(e)}")
+                    yield f"\n[Error translating part {chunk_idx + 1}: {str(e)}]\n"
+        except Exception as e:
+            logger.error(f"Large text streaming error: {str(e)}")
+            yield f"\nError: {str(e)}"
+def gemma_translate(text: str, src_lang: str, tgt_lang: str, streaming: bool = True) -> Optional[Iterator[str]]:
+    """
+    Main function to translate text using Gemma 3 model.
+    Args:
+        text: Text to translate
+        src_lang: Source language code ('en', 'ru', 'kk')
+        tgt_lang: Target language code ('en', 'ru', 'kk')
+        streaming: Whether to stream the output
+    Returns:
+        If streaming is True: Iterator yielding chunks of translated text
+        If streaming is False: Complete translated text
+    """
+    if not text or not src_lang or not tgt_lang:
+        return "" if not streaming else iter([""])
+    translator = GemmaTranslator()
+    try:
+        if streaming:
+            return translator.translate_streaming(text, src_lang, tgt_lang)
+        else:
+            return translator.translate(text, src_lang, tgt_lang)
+    except Exception as e:
+        logger.error(f"Translation failed: {str(e)}")
+        return "" if not streaming else iter([f"Error: {str(e)}"])
+def display_streaming_translation(text: str, src_lang: str, tgt_lang: str) -> tuple:
+    """
+    Display streaming translation in a Streamlit app.
+    Args:
+        text: Text to translate
+        src_lang: Source language code ('en', 'ru', 'kk')
+        tgt_lang: Target language code ('en', 'ru', 'kk')
+    Returns:
+        tuple: (translated_text, needs_chunking)
+    """
+    if not text:
+        return "", False
+    # Check if text needs chunking
+    translator = GemmaTranslator()
+    if not translator.initialized:
+        translator.load_model()
+    needs_chunking = translator.is_text_too_large(text)
+    # Create placeholder for streaming output
+    placeholder = st.empty()
+    result = ""
+    # Stream translation
+    for token in gemma_translate(text, src_lang, tgt_lang, streaming=True):
+        result += token
+        placeholder.markdown(result)
+    return result, needs_chunking

utils/readability_indices.py ADDED Viewed

	@@ -0,0 +1,132 @@

+# readability_indices.py
+from nltk.tokenize import sent_tokenize, word_tokenize
+import pyphen
+import re
+from IPython.display import display, HTML
+def count_syllables(word, lang):
+    if lang == 'kk':
+        # Используем простой алгоритм для казахского языка
+        word = word.lower()
+        vowels = "аеёиоуыэюяіүұөө"
+        syllables = sum(1 for char in word if char in vowels)
+        return max(1, syllables)
+    else:
+        # Для русского и английского используем Pyphen
+        dic = pyphen.Pyphen(lang=lang)
+        hyphens = dic.inserted(word)
+        return max(1, hyphens.count('-') + 1)
+# Функции для определения сложных слов
+def is_complex_word(word, lang, syllable_threshold=3):
+    syllables = count_syllables(word, lang)
+    return syllables >= syllable_threshold
+# Функции для расчёта индексов удобочитаемости
+def flesch_reading_ease(text, lang):
+    sentences = sent_tokenize(text, language='russian' if lang == 'ru' else 'english')
+    words = word_tokenize(text, language='russian' if lang == 'ru' else 'english')
+    words = [word for word in words if word.isalpha()]
+    num_sentences = max(1, len(sentences))
+    num_words = max(1, len(words))
+    syllable_count = sum([count_syllables(word, lang) for word in words])
+    asl = num_words / num_sentences  # Средняя длина предложения
+    asw = syllable_count / num_words  # Среднее количество слогов в слове
+    if lang == 'ru':
+        fre = 206.835 - (1.3 * asl) - (60.1 * asw)
+    elif lang == 'en':
+        fre = 206.835 - (1.015 * asl) - (84.6 * asw)
+    elif lang == 'kk':
+        # Предположительные коэффициенты для казахского языка
+        fre = 206.835 - (1.2 * asl) - (70 * asw)
+    else:
+        fre = 0
+    return fre
+def flesch_kincaid_grade_level(text, lang):
+    sentences = sent_tokenize(text, language='russian' if lang == 'ru' else 'english')
+    words = word_tokenize(text, language='russian' if lang == 'ru' else 'english')
+    words = [word for word in words if word.isalpha()]
+    num_sentences = max(1, len(sentences))
+    num_words = max(1, len(words))
+    syllable_count = sum([count_syllables(word, lang) for word in words])
+    asl = num_words / num_sentences
+    asw = syllable_count / num_words
+    if lang == 'ru':
+        fkgl = (0.5 * asl) + (8.4 * asw) - 15.59
+    elif lang == 'en':
+        fkgl = (0.39 * asl) + (11.8 * asw) - 15.59
+    elif lang == 'kk':
+        fkgl = (0.5 * asl) + (9 * asw) - 13
+    else:
+        fkgl = 0
+    return fkgl
+def gunning_fog_index(text, lang):
+    sentences = sent_tokenize(text, language='russian' if lang == 'ru' else 'english')
+    words = word_tokenize(text, language='russian' if lang == 'ru' else 'english')
+    words = [word for word in words if word.isalpha()]
+    num_sentences = max(1, len(sentences))
+    num_words = max(1, len(words))
+    complex_words = [word for word in words if is_complex_word(word, lang)]
+    percentage_complex = (len(complex_words) / num_words) * 100
+    asl = num_words / num_sentences
+    fog_index = 0.4 * (asl + percentage_complex)
+    return fog_index
+def smog_index(text, lang):
+    sentences = sent_tokenize(text, language='russian' if lang == 'ru' else 'english')
+    words = word_tokenize(text, language='russian' if lang == 'ru' else 'english')
+    words = [word for word in words if word.isalpha()]
+    num_sentences = len(sentences)
+    complex_words = [word for word in words if is_complex_word(word, lang)]
+    num_complex = len(complex_words)
+    if num_sentences >= 3:
+        smog = 1.0430 * ((num_complex * (30 / num_sentences)) ** 0.5) + 3.1291
+    else:
+        smog = 0
+    return smog
+# Функция для выделения сложных слов и предложений
+def highlight_complex_text(text, lang):
+    sentences = sent_tokenize(text, language='russian' if lang == 'ru' else 'english')
+    highlighted_sentences = []
+    complex_words_list = []
+    for sentence in sentences:
+        words = word_tokenize(sentence, language='russian' if lang == 'ru' else 'english')
+        words_filtered = [word for word in words if word.isalpha()]
+        complex_words = [word for word in words_filtered if is_complex_word(word, lang)]
+        complex_words_list.extend(complex_words)
+        if len(words_filtered) > 0 and (len(complex_words) / len(words_filtered)) > 0.3:
+            highlighted_sentence = f"<mark>{sentence}</mark>"
+        else:
+            highlighted_sentence = sentence
+            for word in complex_words:
+                highlighted_sentence = re.sub(r'\b{}\b'.format(re.escape(word)), f"<b>{word}</b>", highlighted_sentence)
+        highlighted_sentences.append(highlighted_sentence)
+    highlighted_text = ' '.join(highlighted_sentences)
+    return highlighted_text, complex_words_list
+# Основная функция
+def analyze_text(text, lang_code):
+    if lang_code not in ['ru', 'en', 'kk']:
+        print('Unsupported language code. Please use "ru" for Russian, "en" for English, or "kk" for Kazakh.')
+        return
+    fre = flesch_reading_ease(text, lang_code)
+    fkgl = flesch_kincaid_grade_level(text, lang_code)
+    fog = gunning_fog_index(text, lang_code)
+    smog = smog_index(text, lang_code)
+    highlighted_text, complex_words = highlight_complex_text(text, lang_code)
+    # Вывод результатов
+    print(f"Язык: {'Русский' if lang_code == 'ru' else 'Английский' if lang_code == 'en' else 'Казахский'}")
+    print(f"Индекс удобочитаемости Флеша: {fre:.2f}")
+    print(f"Индекс Флеша-Кинкейда: {fkgl:.2f}")
+    print(f"Индекс тумана Ганнинга: {fog:.2f}")
+    print(f"Индекс SMOG: {smog:.2f}")
+    print("\nСложные слова:")
+    print(', '.join(set(complex_words)))
+    print("\nТекст с выделениями:")
+    display(HTML(highlighted_text))

utils/sherkala.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_path="inceptionai/Llama-3.1-Sherkala-8B-Chat"
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map="auto")
+device = "mps" #if torch.cuda.is_available() else "cpu"
+tokenizer.chat_template="{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role']+'<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %} {% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
+def get_response(text):
+    conversation = [
+        {"role": "user", "content": text}
+    ]
+    input_ids = tokenizer.apply_chat_template(
+        conversation=conversation,
+        tokenize=True,
+        add_generation_prompt=True,
+        return_tensors="pt").to(device)
+    # Generate a response
+    gen_tokens = model.generate(
+        input_ids,
+        max_new_tokens=500,
+        stop_strings=["<|eot_id|>"],
+        tokenizer=tokenizer
+        )
+    # Decode and print the generated text along with generation prompt
+    gen_text = tokenizer.decode(gen_tokens[0][len(input_ids[0]): -1])
+    return gen_text
+question = 'Қазақстанның жақсы тағамдарын ұсына аласыз ба?'
+print(get_response(question))

utils/text_processing.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# utils/text_processing.py
+from langdetect import detect, DetectorFactory
+DetectorFactory.seed = 0
+def detect_language(text):
+    try:
+        lang = detect(text)
+        # Convert 'kk' from langdetect if it indeed returns 'kk' for Kazakh
+        if lang not in ['ru', 'en', 'kk']:
+            return None
+        return lang
+    except:
+        return None

utils/tilmash_translation.py ADDED Viewed

	@@ -0,0 +1,455 @@

+# utils/tilmash_translation.py
+import logging
+import re
+import os
+import threading
+import time
+import uuid
+from dotenv import load_dotenv
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, TranslationPipeline
+from .chunking import chunk_text_with_separators
+from huggingface_hub import login
+from typing import Iterator
+from config import DEFAULT_CONFIG
+# Load environment variables from .env file
+load_dotenv()
+hf_token = os.getenv('HF_TOKEN')
+if not hf_token:
+    logging.warning("HF_TOKEN not found in environment variables. Model downloading might fail.")
+else:
+    login(token=hf_token)
+# Global tilmash lock file
+LOCK_DIR = os.path.join("local_llms", "locks")
+os.makedirs(LOCK_DIR, exist_ok=True)
+TILMASH_LOCK_FILE = os.path.join(LOCK_DIR, "tilmash.lock")
+# Get session timeout from config
+SESSION_TIMEOUT = DEFAULT_CONFIG["SESSION_TIMEOUT"]
+class ExclusiveResourceLock:
+    """File-based lock for exclusive GPU resource access across processes."""
+    def __init__(self, lock_file, timeout=SESSION_TIMEOUT):
+        self.lock_file = lock_file
+        self.timeout = timeout
+        self.lock_id = str(uuid.uuid4())
+        self.acquired = False
+    def acquire(self):
+        """Acquire exclusive lock with timeout."""
+        start_time = time.time()
+        while time.time() - start_time < self.timeout:
+            try:
+                # Try to create the lock file
+                if not os.path.exists(self.lock_file):
+                    with open(self.lock_file, 'w') as f:
+                        f.write(f"{self.lock_id}\n{os.getpid()}\n{time.time()}")
+                    # Verify we got the lock
+                    with open(self.lock_file, 'r') as f:
+                        content = f.read().split('\n')
+                        if content and content[0] == self.lock_id:
+                            self.acquired = True
+                            return True
+                # Check if lock file is stale (older than 5 minutes)
+                elif os.path.exists(self.lock_file):
+                    lock_time = os.path.getmtime(self.lock_file)
+                    if time.time() - lock_time > 300:  # 5 minutes
+                        try:
+                            # Remove stale lock
+                            os.remove(self.lock_file)
+                            continue
+                        except:
+                            pass
+                # Wait before retrying
+                time.sleep(1)
+            except Exception as e:
+                logging.error(f"Lock acquisition error: {str(e)}")
+                time.sleep(1)
+        return False
+    def release(self):
+        """Release the lock if we own it."""
+        if not self.acquired:
+            return
+        try:
+            if os.path.exists(self.lock_file):
+                with open(self.lock_file, 'r') as f:
+                    content = f.read().split('\n')
+                    if content and content[0] == self.lock_id:
+                        os.remove(self.lock_file)
+                        self.acquired = False
+        except Exception as e:
+            logging.error(f"Lock release error: {str(e)}")
+    def __enter__(self):
+        self.acquire()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.release()
+class TilmashTranslator:
+    """
+    Thread-safe translator using Tilmash model
+    """
+    def __init__(self):
+        """Initialize the Tilmash translator."""
+        # Use thread-local lock
+        self._lock = threading.RLock()
+        self.initialized = False
+        self.model = None
+        self.tokenizer = None
+        # Get session ID
+        import streamlit as st
+        self.session_id = getattr(st.session_state, 'session_id', str(uuid.uuid4()))
+    def load_model(self):
+        """Load the Tilmash model if not already loaded."""
+        with self._lock:
+            if self.initialized:
+                return self.model, self.tokenizer
+            try:
+                model_name = "issai/tilmash"
+                cache_dir = "local_llms"
+                # Ensure cache directory exists
+                os.makedirs(cache_dir, exist_ok=True)
+                try:
+                    # First try to load the model locally
+                    logging.info(f"Loading Tilmash model for session {self.session_id[:8]}...")
+                    try:
+                        self.tokenizer = AutoTokenizer.from_pretrained(
+                            model_name,
+                            cache_dir=cache_dir,
+                            local_files_only=True
+                        )
+                        self.model = AutoModelForSeq2SeqLM.from_pretrained(
+                            model_name,
+                            cache_dir=cache_dir,
+                            local_files_only=True
+                        )
+                        logging.info("Successfully loaded model from local cache.")
+                    except OSError:
+                        # If local loading fails, download the model
+                        logging.info("Model not found locally. Downloading from Hugging Face...")
+                        self.tokenizer = AutoTokenizer.from_pretrained(
+                            model_name,
+                            cache_dir=cache_dir,
+                            local_files_only=False
+                        )
+                        self.model = AutoModelForSeq2SeqLM.from_pretrained(
+                            model_name,
+                            cache_dir=cache_dir,
+                            local_files_only=False
+                        )
+                        logging.info("Successfully downloaded and loaded the model.")
+                    self.initialized = True
+                    return self.model, self.tokenizer
+                except ValueError as e:
+                    logging.error(f"Invalid model configuration: {str(e)}")
+                    raise ValueError(f"Failed to load model: {str(e)}")
+                except Exception as e:
+                    logging.error(f"Unexpected error during model initialization: {str(e)}")
+                    raise Exception(f"Failed to load model: {str(e)}")
+            except Exception as e:
+                logging.error(f"Failed to load Tilmash model: {str(e)}")
+                raise
+    def unload_model(self):
+        """Unload the model to free memory"""
+        with self._lock:
+            if self.initialized:
+                logging.info("Unloading Tilmash model to free memory...")
+                self.model = None
+                self.tokenizer = None
+                self.initialized = False
+                # Force garbage collection
+                import gc
+                gc.collect()
+                logging.info("Tilmash model unloaded")
+    def create_pipeline(self, src_lang, tgt_lang, max_length=512):
+        """Create a translation pipeline with the loaded model."""
+        with self._lock:
+            lang_map = {
+                'ru': 'rus_Cyrl',
+                'en': 'eng_Latn',
+                'kk': 'kaz_Cyrl'
+            }
+            # Validate language pair
+            if src_lang not in lang_map or tgt_lang not in lang_map:
+                raise ValueError(f"Unsupported language pair: {src_lang} -> {tgt_lang}")
+            # Make sure model is loaded
+            if not self.initialized:
+                self.load_model()
+            # Configure translation pipeline with optimized parameters
+            pipeline = TranslationPipeline(
+                model=self.model,
+                tokenizer=self.tokenizer,
+                src_lang=lang_map[src_lang],
+                tgt_lang=lang_map[tgt_lang],
+                max_length=max_length,
+                num_beams=7,
+                early_stopping=True,
+                repetition_penalty=1.3,
+                no_repeat_ngram_size=2,
+                length_penalty=1.1,
+                truncation=True,
+                clean_up_tokenization_spaces=True
+            )
+            return pipeline
+    def translate(self, text, src_lang, tgt_lang, max_length=512):
+        """Translate text using the Tilmash model."""
+        with self._lock:
+            try:
+                pipeline = self.create_pipeline(src_lang, tgt_lang, max_length)
+                # Split text into sentences for better quality
+                sentences = re.split(r'(?<=[.!?]) +', text)
+                translated_sentences = []
+                for sentence in sentences:
+                    if sentence.strip():
+                        result = pipeline(sentence)
+                        translated_sentence = _extract_translation(result)
+                        translated_sentences.append(translated_sentence)
+                return ' '.join(translated_sentences)
+            except Exception as e:
+                logging.error(f"Translation error: {str(e)}")
+                return f"Error: {str(e)}"
+    def translate_streaming(self, text, src_lang, tgt_lang, max_length=512) -> Iterator[str]:
+        """Stream translation results sentence by sentence."""
+        try:
+            # Make sure model is loaded - must be done in the locked section
+            with self._lock:
+                if not self.initialized:
+                    self.load_model()
+                pipeline = self.create_pipeline(src_lang, tgt_lang, max_length)
+            # Check if text is too large for single processing
+            # Improved text size detection - check by paragraphs
+            paragraphs = re.split(r'\n\s*\n', text)
+            is_large_text = len(paragraphs) > 3 or len(text) > 1000  # Multiple paragraphs or long text
+            if is_large_text:
+                # Process paragraph by paragraph for structured documents
+                for i, paragraph in enumerate(paragraphs):
+                    if not paragraph.strip():
+                        yield "\n\n"
+                        continue
+                    # If paragraph itself is too large, process it sentence by sentence
+                    if len(paragraph) > 800:
+                        sentences = re.split(r'(?<=[.!?])\s+', paragraph)
+                        for sentence in sentences:
+                            if not sentence.strip():
+                                continue
+                            try:
+                                # Only lock the actual model inference
+                                with self._lock:
+                                    result = pipeline(sentence)
+                                translated = _extract_translation(result)
+                                yield translated + " "
+                            except Exception as e:
+                                logging.error(f"Error translating sentence: {str(e)}")
+                                yield f"[Error: {str(e)}] "
+                    else:
+                        # Process whole paragraph at once
+                        try:
+                            # Only lock the actual model inference
+                            with self._lock:
+                                result = pipeline(paragraph)
+                            translated = _extract_translation(result)
+                            yield translated
+                            # Add paragraph break after each paragraph
+                            if i < len(paragraphs) - 1:
+                                yield "\n\n"
+                        except Exception as e:
+                            logging.error(f"Error translating paragraph: {str(e)}")
+                            yield f"[Error translating paragraph: {str(e)}]\n\n"
+            else:
+                # For short texts, process the entire text at once
+                try:
+                    # Only lock the actual model inference
+                    with self._lock:
+                        result = pipeline(text)
+                    translated = _extract_translation(result)
+                    yield translated
+                except Exception as e:
+                    logging.error(f"Error translating text: {str(e)}")
+                    yield f"[Error: {str(e)}]"
+        except Exception as e:
+            logging.error(f"Streaming translation error: {str(e)}")
+            yield f"Error initializing translation: {str(e)}"
+def tilmash_translate(input_text, src_lang, tgt_lang, max_length=512):
+    """Main translation function with structure preservation"""
+    try:
+        translator = TilmashTranslator()
+        return translator.translate(input_text, src_lang, tgt_lang, max_length)
+    except Exception as e:
+        logging.error(f"Translation failed: {str(e)}")
+        return f"Translation error: {str(e)}"
+def tilmash_translate_streaming(input_text, src_lang, tgt_lang, max_length=512) -> Iterator[str]:
+    """Streaming version of the translation function that yields translated sentences one by one"""
+    try:
+        translator = TilmashTranslator()
+        yield from translator.translate_streaming(input_text, src_lang, tgt_lang, max_length)
+    except Exception as e:
+        logging.error(f"Streaming translation failed: {str(e)}")
+        yield f"Translation error: {str(e)}"
+def display_tilmash_streaming_translation(text: str, src_lang: str, tgt_lang: str) -> tuple:
+    """
+    Display streaming translation in a Streamlit app.
+    Args:
+        text: Text to translate
+        src_lang: Source language code ('en', 'ru', 'kk')
+        tgt_lang: Target language code ('en', 'ru', 'kk')
+    Returns:
+        tuple: (translated_text, needs_chunking)
+    """
+    import streamlit as st
+    if not text:
+        return "", False
+    # Check if text needs chunking
+    needs_chunking = len(text) > 1000  # Roughly 250 tokens
+    # Create placeholder for streaming output
+    placeholder = st.empty()
+    result = ""
+    # Stream translation
+    for sentence in tilmash_translate_streaming(text, src_lang, tgt_lang):
+        result += sentence
+        placeholder.markdown(result)
+    return result, needs_chunking
+def _extract_translation(result):
+    """Safe extraction of translation text from pipeline output"""
+    try:
+        if isinstance(result, list) and len(result) > 0:
+            return result[0].get('translation_text', '').strip()
+        return ""
+    except Exception as e:
+        logging.error(f"Translation extraction error: {str(e)}")
+        return ""
+def _process_large_text(text, src_lang, pipeline, tokenizer, max_length):
+    """Process long documents with structure preservation"""
+    try:
+        chunks_with_seps = chunk_text_with_separators(
+            text=text,
+            tokenizer=tokenizer,
+            max_tokens=int(0.9 * max_length),
+            lang='russian' if src_lang in ['ru', 'kk'] else 'english'
+        )
+    except Exception as e:
+        logging.error(f"Chunking failed: {str(e)}")
+        return ""
+    translations = []
+    prev_separator = None
+    for chunk_idx, (chunk, separator) in enumerate(chunks_with_seps):
+        if not chunk.strip():
+            translations.append(separator)
+            continue
+        try:
+            # Process chunk through translation pipeline
+            result = pipeline(chunk)
+            translated = _extract_translation(result)
+            # Preserve original document structure
+            if prev_separator:
+                translations.append(prev_separator)
+            # Add indentation for list items and tables
+            if _is_structured_element(chunk):
+                translated = _preserve_structure(translated, chunk)
+            translations.append(translated)
+            prev_separator = separator
+        except Exception as e:
+            logging.error(f"Chunk {chunk_idx + 1} error: {str(e)}")
+            translations.append(f"<<ERROR: {chunk[:50]}...>>{separator or ' '}")
+            prev_separator = separator
+    # Assemble final text with cleanup
+    final_text = ''.join(translations).strip()
+    return _postprocess_translation(final_text)
+def _is_structured_element(text):
+    """Check if text contains document structure elements"""
+    return any([
+        re.match(r'^\s*(\d+\.|\-|\*)\s', text),  # List items
+        re.search(r':\s*$', text) and re.search(r'[A-ZА-Я]{3,}', text),  # Headers
+        re.search(r'\|.+\|', text),  # Tables
+        re.search(r'\b(Таблица|Table)\b', text, re.IGNORECASE)  # Table labels
+    ])
+def _preserve_structure(translated, original):
+    """Maintain original formatting in translated structured elements"""
+    # Preserve list indentation
+    if re.match(r'^\s*(\d+\.|\-|\*)\s', original):
+        return '\n' + translated.lstrip()
+    # Preserve table formatting
+    if '|' in original:
+        return translated.replace(' | ', '|').replace('| ', '|').replace(' |', '|')
+    return translated
+def _postprocess_translation(text):
+    """Final cleanup of translated text"""
+    # Fix list numbering
+    text = re.sub(r'\n(\d+)\.\s*\n', r'\n\1. ', text)
+    # Repair table formatting
+    text = re.sub(r'(:\s*)\n(\S)', r'\1\2', text)
+    # Normalize whitespace
+    text = re.sub(r'([,:;])\s+', r'\1 ', text)
+    text = re.sub(r'\s+([.!?])', r'\1', text)
+    # Restore special characters
+    text = text.replace('«', '"').replace('»', '"')
+    return text