Spaces:

O-ken5481
/

talkingAvater_bgk

Paused

App Files Files Community

oKen38461 commited on Jul 17

Commit

07b71bb

1 Parent(s): 8ab20fc

推論キャッシュと並列処理の機能を追加し、`process_talking_head_optimized`関数をキャッシュと並列処理に対応させました。また、Gradioインターフェースにキャッシュ管理機能を追加しました。

Browse files

Files changed (5) hide show

app_optimized.py +175 -20
core/optimization/__init__.py +10 -1
core/optimization/inference_cache.py +386 -0
core/optimization/parallel_inference.py +268 -0
core/optimization/parallel_processing.py +400 -0

app_optimized.py CHANGED Viewed

@@ -18,7 +18,12 @@ from core.optimization import (
     GPUOptimizer,
     AvatarCache,
     AvatarTokenManager,
-    ColdStartOptimizer
 )
 # サンプルファイルのディレクトリを定義
@@ -44,6 +49,18 @@ avatar_cache = AvatarCache(cache_dir="/tmp/avatar_cache", ttl_days=14)
 token_manager = AvatarTokenManager(avatar_cache)
 print(f"✅ アバターキャッシュ初期化: {avatar_cache.get_cache_info()}")
 # モデルの初期化（最適化版）
 USE_PYTORCH = True
 model_manager = ModelManager(cache_dir="/tmp/ditto_models", use_pytorch=USE_PYTORCH)
@@ -92,6 +109,17 @@ except Exception as e:
     traceback.print_exc()
     raise
 def prepare_avatar(image_file) -> Dict[str, Any]:
     """
     画像を事前処理してアバタートークンを生成
@@ -150,16 +178,19 @@ def process_talking_head_optimized(
     audio_file,
     source_image,
     avatar_token: Optional[str] = None,
-    use_resolution_optimization: bool = True
 ):
     """
-    最適化されたTalking Head生成処理
     Args:
         audio_file: 音声ファイル
         source_image: ソース画像（avatar_tokenがない場合に使用）
         avatar_token: 事前生成されたアバタートークン
         use_resolution_optimization: 解像度最適化を使用するか
     """
     if audio_file is None:
@@ -184,7 +215,6 @@ def process_talking_head_optimized(
         # 解像度最適化設定を適用
         if use_resolution_optimization:
-            # SDKに解像度設定を適用
             setup_kwargs = {
                 "max_size": FIXED_RESOLUTION,  # 320固定
                 "sampling_timesteps": resolution_optimizer.get_diffusion_steps()  # 25
@@ -193,15 +223,68 @@ def process_talking_head_optimized(
         else:
             setup_kwargs = {}
-        # 処理実行
-        print(f"処理開始: audio={audio_file}, image={source_image}, token={avatar_token is not None}")
-        seed_everything(1024)
-        # 最適化されたrunを実行
-        run(SDK, audio_file, source_image, output_path, more_kwargs={"setup_kwargs": setup_kwargs})
-        # 処理時間を計測
-        process_time = time.time() - start_time
         # 結果の確認
         if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
@@ -210,8 +293,12 @@ def process_talking_head_optimized(
 ✅ 処理完了！
 処理時間: {process_time:.2f}秒
 解像度: {FIXED_RESOLUTION}×{FIXED_RESOLUTION}
-最適化: {'有効' if use_resolution_optimization else '無効'}
-キャッシュ使用: {'はい' if avatar_token else 'いいえ'}
 """
             return output_path, perf_info
         else:
@@ -233,6 +320,8 @@ with gr.Blocks(title="DittoTalkingHead - Phase 3 最適化版") as demo:
     - 🎯 画像事前アップロード＆キャッシュ機能
     - ⚡ GPU最適化（Mixed Precision, torch.compile）
     - 💾 Cold Start最適化
     ## 使い方
     ### 方法1: 通常の使用
@@ -271,6 +360,16 @@ with gr.Blocks(title="DittoTalkingHead - Phase 3 最適化版") as demo:
                         value=True
                     )
                     generate_btn = gr.Button("🎬 生成", variant="primary")
                 with gr.Column():
@@ -305,16 +404,29 @@ with gr.Blocks(title="DittoTalkingHead - Phase 3 最適化版") as demo:
         # タブ3: 最適化情報
         with gr.TabItem("📊 最適化情報"):
-            gr.Markdown(f"""
             ### 現在の最適化設定
             {resolution_optimizer.get_optimization_summary()}
             {gpu_optimizer.get_optimization_summary()}
-            ### キャッシュ情報
             {avatar_cache.get_cache_info()}
             """)
     # サンプル
     example_audio = EXAMPLES_DIR / "audio.wav"
@@ -323,9 +435,9 @@ with gr.Blocks(title="DittoTalkingHead - Phase 3 最適化版") as demo:
     if example_audio.exists() and example_image.exists():
         gr.Examples(
             examples=[
-                [str(example_audio), str(example_image), None, True]
             ],
-            inputs=[audio_input, image_input, token_input, use_optimization],
             outputs=[video_output, status_output],
             fn=process_talking_head_optimized
         )
@@ -333,7 +445,7 @@ with gr.Blocks(title="DittoTalkingHead - Phase 3 最適化版") as demo:
     # イベントハンドラ
     generate_btn.click(
         fn=process_talking_head_optimized,
-        inputs=[audio_input, image_input, token_input, use_optimization],
         outputs=[video_output, status_output]
     )
@@ -342,6 +454,49 @@ with gr.Blocks(title="DittoTalkingHead - Phase 3 最適化版") as demo:
         inputs=[avatar_image_input],
         outputs=[prepare_output]
     )
 if __name__ == "__main__":
     # Cold Start最適化設定でGradioを起動

     GPUOptimizer,
     AvatarCache,
     AvatarTokenManager,
+    ColdStartOptimizer,
+    InferenceCache,
+    CachedInference,
+    ParallelProcessor,
+    ParallelInference,
+    OptimizedInferenceWrapper
 )
 # サンプルファイルのディレクトリを定義
 token_manager = AvatarTokenManager(avatar_cache)
 print(f"✅ アバターキャッシュ初期化: {avatar_cache.get_cache_info()}")
+# 5. 推論キャッシュの初期化
+inference_cache = InferenceCache(
+    cache_dir="/tmp/inference_cache",
+    memory_cache_size=50,
+    file_cache_size_gb=5.0,
+    ttl_hours=24
+)
+cached_inference = CachedInference(inference_cache)
+print(f"✅ 推論キャッシュ初期化: {inference_cache.get_cache_stats()}")
+# 6. 並列処理の初期化（SDK初期化後に移動）
 # モデルの初期化（最適化版）
 USE_PYTORCH = True
 model_manager = ModelManager(cache_dir="/tmp/ditto_models", use_pytorch=USE_PYTORCH)
     traceback.print_exc()
     raise
+# 並列処理の初期化（SDK初期化成功後）
+parallel_processor = ParallelProcessor(num_threads=4, num_processes=2)
+parallel_inference = ParallelInference(SDK, parallel_processor)
+optimized_wrapper = OptimizedInferenceWrapper(
+    SDK,
+    use_parallel=True,
+    use_cache=True,
+    use_gpu_opt=True
+)
+print(f"✅ 並列処理初期化: {parallel_inference.get_performance_stats()}")
 def prepare_avatar(image_file) -> Dict[str, Any]:
     """
     画像を事前処理してアバタートークンを生成
     audio_file,
     source_image,
     avatar_token: Optional[str] = None,
+    use_resolution_optimization: bool = True,
+    use_inference_cache: bool = True,
+    use_parallel_processing: bool = True
 ):
     """
+    最適化されたTalking Head生成処理（キャッシュ対応）
     Args:
         audio_file: 音声ファイル
         source_image: ソース画像（avatar_tokenがない場合に使用）
         avatar_token: 事前生成されたアバタートークン
         use_resolution_optimization: 解像度最適化を使用するか
+        use_inference_cache: 推論キャッシュを使用するか
     """
     if audio_file is None:
         # 解像度最適化設定を適用
         if use_resolution_optimization:
             setup_kwargs = {
                 "max_size": FIXED_RESOLUTION,  # 320固定
                 "sampling_timesteps": resolution_optimizer.get_diffusion_steps()  # 25
         else:
             setup_kwargs = {}
+        # 処理方法の選択
+        if use_parallel_processing and source_image:
+            # 並列処理を使用
+            print("🔄 並列処理モードで実行...")
+            if use_inference_cache:
+                # キャッシュ + 並列処理
+                def inference_func(audio_path, image_path, out_path, **kwargs):
+                    # 並列処理ラッパーを使用
+                    optimized_wrapper.process(
+                        audio_path, image_path, out_path,
+                        seed=1024,
+                        more_kwargs={"setup_kwargs": kwargs.get('setup_kwargs', {})}
+                    )
+                # キャッシュシステムを通じて処理
+                result_path, cache_hit, process_time = cached_inference.process_with_cache(
+                    inference_func,
+                    audio_file,
+                    source_image,
+                    output_path,
+                    resolution=f"{FIXED_RESOLUTION}x{FIXED_RESOLUTION}" if use_resolution_optimization else "default",
+                    steps=setup_kwargs.get('sampling_timesteps', 50),
+                    setup_kwargs=setup_kwargs
+                )
+                cache_status = "キャッシュヒット（並列）" if cache_hit else "新規生成（並列）"
+            else:
+                # 並列処理のみ
+                _, process_time, stats = optimized_wrapper.process(
+                    audio_file, source_image, output_path,
+                    seed=1024,
+                    more_kwargs={"setup_kwargs": setup_kwargs}
+                )
+                cache_hit = False
+                cache_status = "並列処理（キャッシュ未使用）"
+        elif use_inference_cache and source_image:
+            # キャッシュのみ（並列処理なし）
+            def inference_func(audio_path, image_path, out_path, **kwargs):
+                seed_everything(1024)
+                run(SDK, audio_path, image_path, out_path,
+                    more_kwargs={"setup_kwargs": kwargs.get('setup_kwargs', {})})
+            # キャッシュシステムを通じて処理
+            result_path, cache_hit, process_time = cached_inference.process_with_cache(
+                inference_func,
+                audio_file,
+                source_image,
+                output_path,
+                resolution=f"{FIXED_RESOLUTION}x{FIXED_RESOLUTION}" if use_resolution_optimization else "default",
+                steps=setup_kwargs.get('sampling_timesteps', 50),
+                setup_kwargs=setup_kwargs
+            )
+            cache_status = "キャッシュヒット" if cache_hit else "新規生成"
+        else:
+            # 通常処理（並列処理もキャッシュもなし）
+            print(f"処理開始: audio={audio_file}, image={source_image}, token={avatar_token is not None}")
+            seed_everything(1024)
+            run(SDK, audio_file, source_image, output_path, more_kwargs={"setup_kwargs": setup_kwargs})
+            process_time = time.time() - start_time
+            cache_hit = False
+            cache_status = "通常処理"
         # 結果の確認
         if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
 ✅ 処理完了！
 処理時間: {process_time:.2f}秒
 解像度: {FIXED_RESOLUTION}×{FIXED_RESOLUTION}
+最適化設定:
+  - 解像度最適化: {'有効' if use_resolution_optimization else '無効'}
+  - 並列処理: {'有効' if use_parallel_processing else '無効'}
+  - アバターキャッシュ: {'使用' if avatar_token else '未使用'}
+  - 推論キャッシュ: {cache_status}
+キャッシュ統計: {inference_cache.get_cache_stats()['memory_cache_entries']}件（メモリ）, {inference_cache.get_cache_stats()['file_cache_entries']}件（ファイル）
 """
             return output_path, perf_info
         else:
     - 🎯 画像事前アップロード＆キャッシュ機能
     - ⚡ GPU最適化（Mixed Precision, torch.compile）
     - 💾 Cold Start最適化
+    - 🔄 推論キャッシュ（同じ入力で即座に結果を返す）
+    - 🚀 並列処理（音声・画像の前処理を並列化）
     ## 使い方
     ### 方法1: 通常の使用
                         value=True
                     )
+                    use_cache = gr.Checkbox(
+                        label="推論キャッシュを使用（同じ入力で高速化）",
+                        value=True
+                    )
+                    use_parallel = gr.Checkbox(
+                        label="並列処理を使用（前処理を高速化）",
+                        value=True
+                    )
                     generate_btn = gr.Button("🎬 生成", variant="primary")
                 with gr.Column():
         # タブ3: 最適化情報
         with gr.TabItem("📊 最適化情報"):
+            with gr.Row():
+                refresh_btn = gr.Button("🔄 情報を更新", scale=1)
+            info_display = gr.Markdown(f"""
             ### 現在の最適化設定
             {resolution_optimizer.get_optimization_summary()}
             {gpu_optimizer.get_optimization_summary()}
+            ### アバターキャッシュ情報
             {avatar_cache.get_cache_info()}
+            ### 推論キャッシュ情報
+            {inference_cache.get_cache_stats()}
             """)
+            # キャッシュ管理ボタン
+            with gr.Row():
+                clear_inference_cache_btn = gr.Button("🗑️ 推論キャッシュをクリア", variant="secondary")
+                clear_avatar_cache_btn = gr.Button("🗑️ アバターキャッシュをクリア", variant="secondary")
+            cache_status = gr.Textbox(label="キャッシュ操作ステータス", lines=2)
     # サンプル
     example_audio = EXAMPLES_DIR / "audio.wav"
     if example_audio.exists() and example_image.exists():
         gr.Examples(
             examples=[
+                [str(example_audio), str(example_image), None, True, True, True]
             ],
+            inputs=[audio_input, image_input, token_input, use_optimization, use_cache, use_parallel],
             outputs=[video_output, status_output],
             fn=process_talking_head_optimized
         )
     # イベントハンドラ
     generate_btn.click(
         fn=process_talking_head_optimized,
+        inputs=[audio_input, image_input, token_input, use_optimization, use_cache, use_parallel],
         outputs=[video_output, status_output]
     )
         inputs=[avatar_image_input],
         outputs=[prepare_output]
     )
+    # キャッシュ管理関数
+    def refresh_info():
+        return f"""
+### 現在の最適化設定
+{resolution_optimizer.get_optimization_summary()}
+{gpu_optimizer.get_optimization_summary()}
+### アバターキャッシュ情報
+{avatar_cache.get_cache_info()}
+### 推論キャッシュ情報
+{inference_cache.get_cache_stats()}
+### 並列処理情報
+{parallel_inference.get_performance_stats()}
+"""
+    def clear_inference_cache():
+        inference_cache.clear_cache()
+        return "✅ 推論キャッシュをクリアしました"
+    def clear_avatar_cache():
+        avatar_cache.clear_cache()
+        return "✅ アバターキャッシュをクリアしました"
+    # キャッシュ管理イベント
+    refresh_btn.click(
+        fn=refresh_info,
+        outputs=[info_display]
+    )
+    clear_inference_cache_btn.click(
+        fn=clear_inference_cache,
+        outputs=[cache_status]
+    )
+    clear_avatar_cache_btn.click(
+        fn=clear_avatar_cache,
+        outputs=[cache_status]
+    )
 if __name__ == "__main__":
     # Cold Start最適化設定でGradioを起動

core/optimization/__init__.py CHANGED Viewed

@@ -6,6 +6,9 @@ from .resolution_optimization import FixedResolutionProcessor
 from .gpu_optimization import GPUOptimizer, OptimizedInference
 from .avatar_cache import AvatarCache, AvatarTokenManager
 from .cold_start_optimization import ColdStartOptimizer
 __all__ = [
     'FixedResolutionProcessor',
@@ -13,5 +16,11 @@ __all__ = [
     'OptimizedInference',
     'AvatarCache',
     'AvatarTokenManager',
-    'ColdStartOptimizer'
 ]

 from .gpu_optimization import GPUOptimizer, OptimizedInference
 from .avatar_cache import AvatarCache, AvatarTokenManager
 from .cold_start_optimization import ColdStartOptimizer
+from .inference_cache import InferenceCache, CachedInference
+from .parallel_processing import ParallelProcessor, PipelineProcessor
+from .parallel_inference import ParallelInference, OptimizedInferenceWrapper
 __all__ = [
     'FixedResolutionProcessor',
     'OptimizedInference',
     'AvatarCache',
     'AvatarTokenManager',
+    'ColdStartOptimizer',
+    'InferenceCache',
+    'CachedInference',
+    'ParallelProcessor',
+    'PipelineProcessor',
+    'ParallelInference',
+    'OptimizedInferenceWrapper'
 ]

core/optimization/inference_cache.py ADDED Viewed

	@@ -0,0 +1,386 @@

+"""
+Inference Cache System for DittoTalkingHead
+Caches video generation results for faster repeated processing
+"""
+import hashlib
+import json
+import os
+import pickle
+import time
+from pathlib import Path
+from typing import Optional, Dict, Any, Tuple, Union
+from functools import lru_cache
+import shutil
+from datetime import datetime, timedelta
+class InferenceCache:
+    """
+    Cache system for video generation results
+    Supports both memory and file-based caching
+    """
+    def __init__(
+        self,
+        cache_dir: str = "/tmp/inference_cache",
+        memory_cache_size: int = 100,
+        file_cache_size_gb: float = 10.0,
+        ttl_hours: int = 24
+    ):
+        """
+        Initialize inference cache
+        Args:
+            cache_dir: Directory for file-based cache
+            memory_cache_size: Maximum number of items in memory cache
+            file_cache_size_gb: Maximum size of file cache in GB
+            ttl_hours: Time to live for cache entries in hours
+        """
+        self.cache_dir = Path(cache_dir)
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.memory_cache_size = memory_cache_size
+        self.file_cache_size_bytes = int(file_cache_size_gb * 1024 * 1024 * 1024)
+        self.ttl_seconds = ttl_hours * 3600
+        # Metadata file for managing cache
+        self.metadata_file = self.cache_dir / "cache_metadata.json"
+        self.metadata = self._load_metadata()
+        # In-memory cache
+        self._memory_cache = {}
+        self._access_times = {}
+        # Clean up expired entries on initialization
+        self._cleanup_expired()
+    def _load_metadata(self) -> Dict[str, Any]:
+        """Load cache metadata"""
+        if self.metadata_file.exists():
+            try:
+                with open(self.metadata_file, 'r') as f:
+                    return json.load(f)
+            except:
+                return {}
+        return {}
+    def _save_metadata(self):
+        """Save cache metadata"""
+        with open(self.metadata_file, 'w') as f:
+            json.dump(self.metadata, f, indent=2)
+    def generate_cache_key(
+        self,
+        audio_path: str,
+        image_path: str,
+        **kwargs
+    ) -> str:
+        """
+        Generate unique cache key based on input parameters
+        Args:
+            audio_path: Path to audio file
+            image_path: Path to image file
+            **kwargs: Additional parameters affecting output
+        Returns:
+            SHA-256 hash as cache key
+        """
+        # Read file contents for hashing
+        with open(audio_path, 'rb') as f:
+            audio_hash = hashlib.sha256(f.read()).hexdigest()
+        with open(image_path, 'rb') as f:
+            image_hash = hashlib.sha256(f.read()).hexdigest()
+        # Include relevant parameters in key
+        key_data = {
+            'audio': audio_hash,
+            'image': image_hash,
+            'resolution': kwargs.get('resolution', '320x320'),
+            'steps': kwargs.get('steps', 25),
+            'seed': kwargs.get('seed', None)
+        }
+        # Generate final key
+        key_str = json.dumps(key_data, sort_keys=True)
+        return hashlib.sha256(key_str.encode()).hexdigest()
+    def get_from_memory(self, cache_key: str) -> Optional[str]:
+        """
+        Get video path from memory cache
+        Args:
+            cache_key: Cache key
+        Returns:
+            Video file path if found, None otherwise
+        """
+        if cache_key in self._memory_cache:
+            self._access_times[cache_key] = time.time()
+            return self._memory_cache[cache_key]
+        return None
+    def get_from_file(self, cache_key: str) -> Optional[str]:
+        """
+        Get video path from file cache
+        Args:
+            cache_key: Cache key
+        Returns:
+            Video file path if found, None otherwise
+        """
+        if cache_key not in self.metadata:
+            return None
+        entry = self.metadata[cache_key]
+        # Check expiration
+        if time.time() > entry['expires_at']:
+            self._remove_cache_entry(cache_key)
+            return None
+        # Check if file exists
+        video_path = self.cache_dir / entry['filename']
+        if not video_path.exists():
+            self._remove_cache_entry(cache_key)
+            return None
+        # Update access time
+        self.metadata[cache_key]['last_access'] = time.time()
+        self._save_metadata()
+        # Add to memory cache
+        self._add_to_memory_cache(cache_key, str(video_path))
+        return str(video_path)
+    def get(self, cache_key: str) -> Optional[str]:
+        """
+        Get video from cache (memory first, then file)
+        Args:
+            cache_key: Cache key
+        Returns:
+            Video file path if found, None otherwise
+        """
+        # Try memory cache first
+        result = self.get_from_memory(cache_key)
+        if result:
+            return result
+        # Try file cache
+        return self.get_from_file(cache_key)
+    def put(
+        self,
+        cache_key: str,
+        video_path: str,
+        **metadata
+    ) -> bool:
+        """
+        Store video in cache
+        Args:
+            cache_key: Cache key
+            video_path: Path to generated video
+            **metadata: Additional metadata to store
+        Returns:
+            True if stored successfully
+        """
+        try:
+            # Copy video to cache directory
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            cache_filename = f"{cache_key[:8]}_{timestamp}.mp4"
+            cache_video_path = self.cache_dir / cache_filename
+            shutil.copy2(video_path, cache_video_path)
+            # Store metadata
+            self.metadata[cache_key] = {
+                'filename': cache_filename,
+                'created_at': time.time(),
+                'expires_at': time.time() + self.ttl_seconds,
+                'last_access': time.time(),
+                'size_bytes': os.path.getsize(cache_video_path),
+                'metadata': metadata
+            }
+            # Check cache size and clean if needed
+            self._check_cache_size()
+            # Save metadata
+            self._save_metadata()
+            # Add to memory cache
+            self._add_to_memory_cache(cache_key, str(cache_video_path))
+            return True
+        except Exception as e:
+            print(f"Error storing cache: {e}")
+            return False
+    def _add_to_memory_cache(self, cache_key: str, video_path: str):
+        """Add item to memory cache with LRU eviction"""
+        # Check if we need to evict
+        if len(self._memory_cache) >= self.memory_cache_size:
+            # Find least recently used
+            lru_key = min(self._access_times, key=self._access_times.get)
+            del self._memory_cache[lru_key]
+            del self._access_times[lru_key]
+        self._memory_cache[cache_key] = video_path
+        self._access_times[cache_key] = time.time()
+    def _check_cache_size(self):
+        """Check and maintain cache size limit"""
+        total_size = sum(
+            entry['size_bytes']
+            for entry in self.metadata.values()
+        )
+        if total_size > self.file_cache_size_bytes:
+            # Remove oldest entries until under limit
+            sorted_entries = sorted(
+                self.metadata.items(),
+                key=lambda x: x[1]['last_access']
+            )
+            while total_size > self.file_cache_size_bytes and sorted_entries:
+                key_to_remove, entry = sorted_entries.pop(0)
+                total_size -= entry['size_bytes']
+                self._remove_cache_entry(key_to_remove)
+    def _cleanup_expired(self):
+        """Remove expired cache entries"""
+        current_time = time.time()
+        expired_keys = [
+            key for key, entry in self.metadata.items()
+            if current_time > entry['expires_at']
+        ]
+        for key in expired_keys:
+            self._remove_cache_entry(key)
+        if expired_keys:
+            print(f"Cleaned up {len(expired_keys)} expired cache entries")
+    def _remove_cache_entry(self, cache_key: str):
+        """Remove a cache entry"""
+        if cache_key in self.metadata:
+            # Remove file
+            video_file = self.cache_dir / self.metadata[cache_key]['filename']
+            if video_file.exists():
+                video_file.unlink()
+            # Remove from metadata
+            del self.metadata[cache_key]
+            # Remove from memory cache
+            if cache_key in self._memory_cache:
+                del self._memory_cache[cache_key]
+                del self._access_times[cache_key]
+    def clear_cache(self):
+        """Clear all cache entries"""
+        # Remove all video files
+        for file in self.cache_dir.glob("*.mp4"):
+            file.unlink()
+        # Clear metadata
+        self.metadata = {}
+        self._save_metadata()
+        # Clear memory cache
+        self._memory_cache.clear()
+        self._access_times.clear()
+        print("Inference cache cleared")
+    def get_cache_stats(self) -> Dict[str, Any]:
+        """Get cache statistics"""
+        total_size = sum(
+            entry['size_bytes']
+            for entry in self.metadata.values()
+        )
+        memory_hits = len(self._memory_cache)
+        file_entries = len(self.metadata)
+        return {
+            'memory_cache_entries': memory_hits,
+            'file_cache_entries': file_entries,
+            'total_cache_size_mb': total_size / (1024 * 1024),
+            'cache_size_limit_gb': self.file_cache_size_bytes / (1024 * 1024 * 1024),
+            'ttl_hours': self.ttl_seconds / 3600,
+            'cache_directory': str(self.cache_dir)
+        }
+class CachedInference:
+    """
+    Wrapper for cached inference execution
+    """
+    def __init__(self, cache: InferenceCache):
+        """
+        Initialize cached inference
+        Args:
+            cache: InferenceCache instance
+        """
+        self.cache = cache
+    def process_with_cache(
+        self,
+        inference_func: callable,
+        audio_path: str,
+        image_path: str,
+        output_path: str,
+        **kwargs
+    ) -> Tuple[str, bool, float]:
+        """
+        Process with caching
+        Args:
+            inference_func: Function to generate video
+            audio_path: Path to audio file
+            image_path: Path to image file
+            output_path: Desired output path
+            **kwargs: Additional parameters
+        Returns:
+            Tuple of (output_path, cache_hit, process_time)
+        """
+        start_time = time.time()
+        # Generate cache key
+        cache_key = self.cache.generate_cache_key(
+            audio_path, image_path, **kwargs
+        )
+        # Check cache
+        cached_video = self.cache.get(cache_key)
+        if cached_video:
+            # Cache hit - copy to output path
+            shutil.copy2(cached_video, output_path)
+            process_time = time.time() - start_time
+            print(f"✅ Cache hit! Retrieved in {process_time:.2f}s")
+            return output_path, True, process_time
+        # Cache miss - generate video
+        print("Cache miss - generating video...")
+        inference_func(audio_path, image_path, output_path, **kwargs)
+        # Store in cache
+        if os.path.exists(output_path):
+            self.cache.put(cache_key, output_path, **kwargs)
+        process_time = time.time() - start_time
+        return output_path, False, process_time

core/optimization/parallel_inference.py ADDED Viewed

	@@ -0,0 +1,268 @@

+"""
+Parallel Inference Integration for DittoTalkingHead
+Integrates parallel processing into the inference pipeline
+"""
+import asyncio
+import time
+from typing import Dict, Any, Tuple, Optional
+import numpy as np
+import torch
+from pathlib import Path
+from .parallel_processing import ParallelProcessor, PipelineProcessor
+class ParallelInference:
+    """
+    Parallel inference wrapper for DittoTalkingHead
+    """
+    def __init__(self, sdk, parallel_processor: Optional[ParallelProcessor] = None):
+        """
+        Initialize parallel inference
+        Args:
+            sdk: StreamSDK instance
+            parallel_processor: ParallelProcessor instance
+        """
+        self.sdk = sdk
+        self.parallel_processor = parallel_processor or ParallelProcessor(num_threads=4)
+        # Setup pipeline stages
+        self.pipeline_stages = {
+            'load': self._load_files,
+            'preprocess': self._preprocess,
+            'inference': self._inference,
+            'postprocess': self._postprocess
+        }
+    def _load_files(self, paths: Dict[str, str]) -> Dict[str, Any]:
+        """Load audio and image files"""
+        audio_path = paths['audio']
+        image_path = paths['image']
+        # Parallel loading
+        audio_data, image_data = self.parallel_processor.preprocess_parallel_sync(
+            audio_path, image_path
+        )
+        return {
+            'audio_data': audio_data,
+            'image_data': image_data,
+            'paths': paths
+        }
+    def _preprocess(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """Preprocess loaded data"""
+        # Extract audio features
+        audio = data['audio_data']['audio']
+        sr = data['audio_data']['sample_rate']
+        # Prepare for SDK
+        import librosa
+        import math
+        # Calculate number of frames
+        num_frames = math.ceil(len(audio) / 16000 * 25)
+        # Prepare image
+        image = data['image_data']['image']
+        return {
+            'audio': audio,
+            'image': image,
+            'num_frames': num_frames,
+            'paths': data['paths']
+        }
+    def _inference(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """Run inference"""
+        # This would integrate with the actual SDK inference
+        # For now, placeholder
+        return {
+            'result': 'inference_result',
+            'paths': data['paths']
+        }
+    def _postprocess(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """Postprocess results"""
+        return data
+    async def process_parallel_async(
+        self,
+        audio_path: str,
+        image_path: str,
+        output_path: str,
+        **kwargs
+    ) -> Tuple[str, float]:
+        """
+        Process with full parallelization (async)
+        Args:
+            audio_path: Path to audio file
+            image_path: Path to image file
+            output_path: Output video path
+            **kwargs: Additional parameters
+        Returns:
+            Tuple of (output_path, process_time)
+        """
+        start_time = time.time()
+        # Parallel preprocessing
+        audio_data, image_data = await self.parallel_processor.preprocess_parallel_async(
+            audio_path, image_path, kwargs.get('target_size', 320)
+        )
+        # Run inference (simplified for integration)
+        # In real implementation, this would call SDK methods
+        process_time = time.time() - start_time
+        return output_path, process_time
+    def process_parallel_sync(
+        self,
+        audio_path: str,
+        image_path: str,
+        output_path: str,
+        **kwargs
+    ) -> Tuple[str, float]:
+        """
+        Process with parallelization (sync)
+        Args:
+            audio_path: Path to audio file
+            image_path: Path to image file
+            output_path: Output video path
+            **kwargs: Additional parameters
+        Returns:
+            Tuple of (output_path, process_time)
+        """
+        start_time = time.time()
+        try:
+            # Parallel preprocessing
+            print("🔄 Starting parallel preprocessing...")
+            preprocess_start = time.time()
+            audio_data, image_data = self.parallel_processor.preprocess_parallel_sync(
+                audio_path, image_path, kwargs.get('target_size', 320)
+            )
+            preprocess_time = time.time() - preprocess_start
+            print(f"✅ Parallel preprocessing completed in {preprocess_time:.2f}s")
+            # Run actual SDK inference
+            # This integrates with the existing SDK
+            from inference import run, seed_everything
+            seed_everything(kwargs.get('seed', 1024))
+            inference_start = time.time()
+            run(self.sdk, audio_path, image_path, output_path, more_kwargs=kwargs.get('more_kwargs', {}))
+            inference_time = time.time() - inference_start
+            print(f"✅ Inference completed in {inference_time:.2f}s")
+            total_time = time.time() - start_time
+            # Performance breakdown
+            print(f"""
+🎯 Performance Breakdown:
+- Preprocessing (parallel): {preprocess_time:.2f}s
+- Inference: {inference_time:.2f}s
+- Total: {total_time:.2f}s
+""")
+            return output_path, total_time
+        except Exception as e:
+            print(f"❌ Error in parallel processing: {e}")
+            raise
+    def get_performance_stats(self) -> Dict[str, Any]:
+        """Get performance statistics"""
+        return {
+            'num_threads': self.parallel_processor.num_threads,
+            'num_processes': self.parallel_processor.num_processes,
+            'cuda_streams_enabled': self.parallel_processor.use_cuda_streams
+        }
+class OptimizedInferenceWrapper:
+    """
+    Wrapper that combines all optimizations
+    """
+    def __init__(
+        self,
+        sdk,
+        use_parallel: bool = True,
+        use_cache: bool = True,
+        use_gpu_opt: bool = True
+    ):
+        """
+        Initialize optimized inference wrapper
+        Args:
+            sdk: StreamSDK instance
+            use_parallel: Enable parallel processing
+            use_cache: Enable caching
+            use_gpu_opt: Enable GPU optimizations
+        """
+        self.sdk = sdk
+        self.use_parallel = use_parallel
+        self.use_cache = use_cache
+        self.use_gpu_opt = use_gpu_opt
+        # Initialize components
+        if use_parallel:
+            self.parallel_processor = ParallelProcessor(num_threads=4)
+            self.parallel_inference = ParallelInference(sdk, self.parallel_processor)
+        else:
+            self.parallel_processor = None
+            self.parallel_inference = None
+    def process(
+        self,
+        audio_path: str,
+        image_path: str,
+        output_path: str,
+        **kwargs
+    ) -> Tuple[str, float, Dict[str, Any]]:
+        """
+        Process with all optimizations
+        Returns:
+            Tuple of (output_path, process_time, stats)
+        """
+        stats = {
+            'parallel_enabled': self.use_parallel,
+            'cache_enabled': self.use_cache,
+            'gpu_opt_enabled': self.use_gpu_opt
+        }
+        if self.use_parallel and self.parallel_inference:
+            output_path, process_time = self.parallel_inference.process_parallel_sync(
+                audio_path, image_path, output_path, **kwargs
+            )
+            stats['preprocessing'] = 'parallel'
+        else:
+            # Fallback to sequential
+            from inference import run, seed_everything
+            start_time = time.time()
+            seed_everything(kwargs.get('seed', 1024))
+            run(self.sdk, audio_path, image_path, output_path, more_kwargs=kwargs.get('more_kwargs', {}))
+            process_time = time.time() - start_time
+            stats['preprocessing'] = 'sequential'
+        stats['process_time'] = process_time
+        return output_path, process_time, stats
+    def shutdown(self):
+        """Cleanup resources"""
+        if self.parallel_processor:
+            self.parallel_processor.shutdown()

core/optimization/parallel_processing.py ADDED Viewed

	@@ -0,0 +1,400 @@

+"""
+Parallel Processing Module for DittoTalkingHead
+Implements concurrent audio and image preprocessing
+"""
+import asyncio
+import concurrent.futures
+from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
+import time
+from typing import Tuple, Dict, Any, Optional, Callable
+import numpy as np
+from pathlib import Path
+import threading
+import queue
+import torch
+from functools import partial
+class ParallelProcessor:
+    """
+    Parallel processing for audio and image preprocessing
+    """
+    def __init__(
+        self,
+        num_threads: int = 4,
+        num_processes: int = 2,
+        use_cuda_streams: bool = True
+    ):
+        """
+        Initialize parallel processor
+        Args:
+            num_threads: Number of threads for I/O operations
+            num_processes: Number of processes for CPU-intensive tasks
+            use_cuda_streams: Use CUDA streams for GPU operations
+        """
+        self.num_threads = num_threads
+        self.num_processes = num_processes
+        self.use_cuda_streams = use_cuda_streams and torch.cuda.is_available()
+        # Thread pool for I/O operations
+        self.thread_executor = ThreadPoolExecutor(max_workers=num_threads)
+        # Process pool for CPU-intensive operations
+        self.process_executor = ProcessPoolExecutor(max_workers=num_processes)
+        # CUDA streams for GPU operations
+        if self.use_cuda_streams:
+            self.cuda_streams = [torch.cuda.Stream() for _ in range(2)]
+        else:
+            self.cuda_streams = None
+        print(f"✅ ParallelProcessor initialized: {num_threads} threads, {num_processes} processes")
+        if self.use_cuda_streams:
+            print("✅ CUDA streams enabled for GPU parallelism")
+    def preprocess_audio_parallel(self, audio_path: str) -> Dict[str, Any]:
+        """
+        Preprocess audio file in parallel
+        Args:
+            audio_path: Path to audio file
+        Returns:
+            Preprocessed audio data
+        """
+        import librosa
+        # Define subtasks
+        def load_audio():
+            return librosa.load(audio_path, sr=16000)
+        def extract_features(audio, sr):
+            # Extract various audio features in parallel
+            features = {}
+            # MFCC features
+            features['mfcc'] = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
+            # Spectral features
+            features['spectral_centroid'] = librosa.feature.spectral_centroid(y=audio, sr=sr)
+            features['spectral_rolloff'] = librosa.feature.spectral_rolloff(y=audio, sr=sr)
+            return features
+        # Load audio
+        audio, sr = load_audio()
+        # Extract features in parallel (if needed)
+        features = extract_features(audio, sr)
+        return {
+            'audio': audio,
+            'sample_rate': sr,
+            'features': features,
+            'duration': len(audio) / sr
+        }
+    def preprocess_image_parallel(self, image_path: str, target_size: int = 320) -> Dict[str, Any]:
+        """
+        Preprocess image file in parallel
+        Args:
+            image_path: Path to image file
+            target_size: Target resolution
+        Returns:
+            Preprocessed image data
+        """
+        from PIL import Image
+        import cv2
+        # Define subtasks
+        def load_and_resize():
+            # Load image
+            img = Image.open(image_path).convert('RGB')
+            # Resize
+            img = img.resize((target_size, target_size), Image.Resampling.LANCZOS)
+            return np.array(img)
+        def extract_face_landmarks(img_array):
+            # Face detection and landmark extraction
+            # Simplified version - in production, use MediaPipe or similar
+            return {
+                'has_face': True,
+                'landmarks': None  # Placeholder
+            }
+        # Execute in parallel
+        future_img = self.thread_executor.submit(load_and_resize)
+        # Get results
+        img_array = future_img.result()
+        # Extract landmarks
+        landmarks = extract_face_landmarks(img_array)
+        return {
+            'image': img_array,
+            'shape': img_array.shape,
+            'landmarks': landmarks
+        }
+    async def preprocess_parallel_async(
+        self,
+        audio_path: str,
+        image_path: str,
+        target_size: int = 320
+    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        """
+        Asynchronously preprocess audio and image in parallel
+        Args:
+            audio_path: Path to audio file
+            image_path: Path to image file
+            target_size: Target image resolution
+        Returns:
+            Tuple of (audio_data, image_data)
+        """
+        loop = asyncio.get_event_loop()
+        # Create tasks for parallel execution
+        audio_task = loop.run_in_executor(
+            self.thread_executor,
+            self.preprocess_audio_parallel,
+            audio_path
+        )
+        image_task = loop.run_in_executor(
+            self.thread_executor,
+            partial(self.preprocess_image_parallel, target_size=target_size),
+            image_path
+        )
+        # Wait for both tasks to complete
+        audio_data, image_data = await asyncio.gather(audio_task, image_task)
+        return audio_data, image_data
+    def preprocess_parallel_sync(
+        self,
+        audio_path: str,
+        image_path: str,
+        target_size: int = 320
+    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        """
+        Synchronously preprocess audio and image in parallel
+        Args:
+            audio_path: Path to audio file
+            image_path: Path to image file
+            target_size: Target image resolution
+        Returns:
+            Tuple of (audio_data, image_data)
+        """
+        # Submit tasks to thread pool
+        audio_future = self.thread_executor.submit(
+            self.preprocess_audio_parallel,
+            audio_path
+        )
+        image_future = self.thread_executor.submit(
+            self.preprocess_image_parallel,
+            image_path,
+            target_size
+        )
+        # Wait for results
+        audio_data = audio_future.result()
+        image_data = image_future.result()
+        return audio_data, image_data
+    def process_gpu_parallel(
+        self,
+        audio_tensor: torch.Tensor,
+        image_tensor: torch.Tensor,
+        model_audio: torch.nn.Module,
+        model_image: torch.nn.Module
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Process audio and image through models using CUDA streams
+        Args:
+            audio_tensor: Audio tensor
+            image_tensor: Image tensor
+            model_audio: Audio processing model
+            model_image: Image processing model
+        Returns:
+            Tuple of processed tensors
+        """
+        if not self.use_cuda_streams:
+            # Fallback to sequential processing
+            audio_out = model_audio(audio_tensor)
+            image_out = model_image(image_tensor)
+            return audio_out, image_out
+        # Use CUDA streams for parallel GPU processing
+        with torch.cuda.stream(self.cuda_streams[0]):
+            audio_out = model_audio(audio_tensor)
+        with torch.cuda.stream(self.cuda_streams[1]):
+            image_out = model_image(image_tensor)
+        # Synchronize streams
+        torch.cuda.synchronize()
+        return audio_out, image_out
+    def shutdown(self):
+        """Shutdown executors"""
+        self.thread_executor.shutdown(wait=True)
+        self.process_executor.shutdown(wait=True)
+        print("✅ ParallelProcessor shutdown complete")
+class PipelineProcessor:
+    """
+    Pipeline-based processing for continuous operations
+    """
+    def __init__(self, stages: Dict[str, Callable], buffer_size: int = 10):
+        """
+        Initialize pipeline processor
+        Args:
+            stages: Dictionary of stage_name -> processing_function
+            buffer_size: Size of queues between stages
+        """
+        self.stages = stages
+        self.buffer_size = buffer_size
+        # Create queues between stages
+        self.queues = {}
+        stage_names = list(stages.keys())
+        for i in range(len(stage_names) - 1):
+            queue_name = f"{stage_names[i]}_to_{stage_names[i+1]}"
+            self.queues[queue_name] = queue.Queue(maxsize=buffer_size)
+        # Input and output queues
+        self.input_queue = queue.Queue(maxsize=buffer_size)
+        self.output_queue = queue.Queue(maxsize=buffer_size)
+        # Worker threads
+        self.workers = []
+        self.stop_event = threading.Event()
+    def _worker(self, stage_name: str, process_func: Callable, input_q: queue.Queue, output_q: queue.Queue):
+        """Worker thread for a pipeline stage"""
+        while not self.stop_event.is_set():
+            try:
+                # Get input with timeout
+                item = input_q.get(timeout=0.1)
+                if item is None:  # Poison pill
+                    output_q.put(None)
+                    break
+                # Process item
+                result = process_func(item)
+                # Put result
+                output_q.put(result)
+            except queue.Empty:
+                continue
+            except Exception as e:
+                print(f"Error in stage {stage_name}: {e}")
+                output_q.put(None)
+    def start(self):
+        """Start pipeline processing"""
+        stage_names = list(self.stages.keys())
+        # Create worker threads
+        for i, (stage_name, process_func) in enumerate(self.stages.items()):
+            # Determine input and output queues
+            if i == 0:
+                input_q = self.input_queue
+            else:
+                queue_name = f"{stage_names[i-1]}_to_{stage_names[i]}"
+                input_q = self.queues[queue_name]
+            if i == len(stage_names) - 1:
+                output_q = self.output_queue
+            else:
+                queue_name = f"{stage_names[i]}_to_{stage_names[i+1]}"
+                output_q = self.queues[queue_name]
+            # Create and start worker
+            worker = threading.Thread(
+                target=self._worker,
+                args=(stage_name, process_func, input_q, output_q)
+            )
+            worker.start()
+            self.workers.append(worker)
+        print(f"✅ Pipeline started with {len(self.workers)} stages")
+    def process(self, item: Any) -> Any:
+        """Process an item through the pipeline"""
+        self.input_queue.put(item)
+        return self.output_queue.get()
+    def stop(self):
+        """Stop pipeline processing"""
+        self.stop_event.set()
+        # Send poison pills
+        self.input_queue.put(None)
+        # Wait for workers
+        for worker in self.workers:
+            worker.join()
+        print("✅ Pipeline stopped")
+def benchmark_parallel_processing():
+    """Benchmark parallel vs sequential processing"""
+    import time
+    print("\n=== Parallel Processing Benchmark ===")
+    # Create processor
+    processor = ParallelProcessor(num_threads=4)
+    # Test files (using example files)
+    audio_path = "example/audio.wav"
+    image_path = "example/image.png"
+    # Sequential processing
+    start_seq = time.time()
+    audio_data_seq = processor.preprocess_audio_parallel(audio_path)
+    image_data_seq = processor.preprocess_image_parallel(image_path)
+    time_seq = time.time() - start_seq
+    # Parallel processing
+    start_par = time.time()
+    audio_data_par, image_data_par = processor.preprocess_parallel_sync(audio_path, image_path)
+    time_par = time.time() - start_par
+    # Results
+    print(f"Sequential processing: {time_seq:.3f}s")
+    print(f"Parallel processing: {time_par:.3f}s")
+    print(f"Speedup: {time_seq/time_par:.2f}x")
+    processor.shutdown()
+    return {
+        'sequential_time': time_seq,
+        'parallel_time': time_par,
+        'speedup': time_seq / time_par
+    }