Spaces:

O-ken5481
/

talkingAvater_bgk

Paused

App Files Files Community

oKen38461 commited on Jul 17

Commit

43f5a2b

1 Parent(s): ac7cda5

`.gitignore`に`docs/`フォルダを追加して、無視するファイルを更新

Browse files

Files changed (6) hide show

.gitignore +2 -0
README_hf_space.md +50 -0
app.py +130 -0
model_manager.py +267 -0
requirements.txt +46 -0
setup.sh +18 -0

.gitignore CHANGED Viewed

@@ -37,6 +37,8 @@ log/*
 # Folders to ignore
 example/
 ToDo/
 !example/audio.wav
 !example/image.png

 # Folders to ignore
 example/
 ToDo/
+docs/
 !example/audio.wav
 !example/image.png

README_hf_space.md ADDED Viewed

	@@ -0,0 +1,50 @@

+---
+title: DittoTalkingHead
+emoji: 🗣️
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.19.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+hardware: a100-large
+---
+# DittoTalkingHead - Talking Head Generation
+音声とソース画像から、リアルなTalking Headビデオを生成します。
+## 特徴
+- 高品質なリップシンク
+- 自然な表情と頭部の動き
+- TensorRTによる高速推論
+- 自動モデルダウンロード機能
+## 使い方
+1. **音声ファイル**（WAV形式）をアップロード
+2. **ソース画像**（PNG/JPG形式）をアップロード
+3. **生成**ボタンをクリック
+## 技術仕様
+- **GPU**: NVIDIA A100（推奨）
+- **フレームワーク**: PyTorch
+- **モデル**: DittoTalkingHead (PyTorch版)
+- **モデルサイズ**: 約2.5GB
+## 注意事項
+- 初回実行時は、モデルの自動ダウンロードのため時間がかかります（約10-15分）
+- GPU（A100）環境での実行を推奨します
+- 音声ファイルは16kHz WAV形式が推奨です
+## モデルソース
+モデルは[digital-avatar/ditto-talkinghead](https://huggingface.co/digital-avatar/ditto-talkinghead)から自動的にダウンロードされます。
+## ライセンス
+Apache License 2.0

app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import gradio as gr
+import os
+import tempfile
+import shutil
+from pathlib import Path
+from model_manager import ModelManager
+from stream_pipeline_offline import StreamSDK
+from inference import run, seed_everything
+# モデルの初期化
+print("=== モデルの初期化開始 ===")
+# PyTorchモデルを使用（TensorRTモデルは非常に大きいため）
+USE_PYTORCH = True
+model_manager = ModelManager(cache_dir="/tmp/ditto_models", use_pytorch=USE_PYTORCH)
+if not model_manager.setup_models():
+    raise RuntimeError("モデルのセットアップに失敗しました。")
+# SDKの初期化
+if USE_PYTORCH:
+    data_root = "./checkpoints/ditto_pytorch"
+    cfg_pkl = "./checkpoints/ditto_cfg/v0.4_hubert_cfg_pytorch.pkl"
+else:
+    data_root = "./checkpoints/ditto_trt_Ampere_Plus"
+    cfg_pkl = "./checkpoints/ditto_cfg/v0.4_hubert_cfg_trt.pkl"
+try:
+    SDK = StreamSDK(cfg_pkl, data_root)
+    print("✅ SDK初期化成功")
+except Exception as e:
+    print(f"❌ SDK初期化エラー: {e}")
+    raise
+def process_talking_head(audio_file, source_image):
+    """音声とソース画像からTalking Headビデオを生成"""
+    if audio_file is None:
+        return None, "音声ファイルをアップロードしてください。"
+    if source_image is None:
+        return None, "ソース画像をアップロードしてください。"
+    try:
+        # 一時ファイルの作成
+        with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_output:
+            output_path = tmp_output.name
+        # 処理実行
+        print(f"処理開始: audio={audio_file}, image={source_image}")
+        seed_everything(1024)
+        run(SDK, audio_file, source_image, output_path)
+        # 結果の確認
+        if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+            return output_path, "✅ 処理が完了しました！"
+        else:
+            return None, "❌ 処理に失敗しました。出力ファイルが生成されませんでした。"
+    except Exception as e:
+        import traceback
+        error_msg = f"❌ エラーが発生しました: {str(e)}\n{traceback.format_exc()}"
+        print(error_msg)
+        return None, error_msg
+# Gradio UI
+with gr.Blocks(title="DittoTalkingHead") as demo:
+    gr.Markdown("""
+    # DittoTalkingHead - Talking Head Generation
+    音声とソース画像から、リアルなTalking Headビデオを生成します。
+    ## 使い方
+    1. **音声ファイル**（WAV形式）をアップロード
+    2. **ソース画像**（PNG/JPG形式）をアップロード
+    3. **生成**ボタンをクリック
+    ⚠️ 初回実行時は、モデルのダウンロードのため時間がかかります（約2.5GB）。
+    ### 技術仕様
+    - **モデル**: DittoTalkingHead (PyTorch版)
+    - **GPU**: NVIDIA A100推奨
+    - **モデル提供**: [digital-avatar/ditto-talkinghead](https://huggingface.co/digital-avatar/ditto-talkinghead)
+    """)
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                label="音声ファイル (WAV)",
+                type="filepath"
+            )
+            image_input = gr.Image(
+                label="ソース画像",
+                type="filepath"
+            )
+            generate_btn = gr.Button("生成", variant="primary")
+        with gr.Column():
+            video_output = gr.Video(
+                label="生成されたビデオ"
+            )
+            status_output = gr.Textbox(
+                label="ステータス",
+                lines=3
+            )
+    # サンプル
+    gr.Examples(
+        examples=[
+            ["example/audio.wav", "example/image.png"]
+        ],
+        inputs=[audio_input, image_input],
+        outputs=[video_output, status_output],
+        fn=process_talking_head,
+        cache_examples=True
+    )
+    # イベントハンドラ
+    generate_btn.click(
+        fn=process_talking_head,
+        inputs=[audio_input, image_input],
+        outputs=[video_output, status_output]
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )

model_manager.py ADDED Viewed

	@@ -0,0 +1,267 @@

+import os
+import shutil
+import requests
+from tqdm import tqdm
+from pathlib import Path
+import hashlib
+import json
+import time
+class ModelManager:
+    def __init__(self, cache_dir="/tmp/models", use_pytorch=False):
+        self.cache_dir = Path(cache_dir)
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.use_pytorch = use_pytorch
+        # Hugging Face公式リポジトリからモデルを取得
+        base_url = "https://huggingface.co/digital-avatar/ditto-talkinghead/resolve/main"
+        if use_pytorch:
+            # PyTorchモデルの設定
+            self.model_configs = [
+                {
+                    "name": "appearance_extractor.pth",
+                    "url": f"{base_url}/checkpoints/ditto_pytorch/models/appearance_extractor.pth",
+                    "dest_dir": "checkpoints/ditto_pytorch/models",
+                    "dest_file": "appearance_extractor.pth",
+                    "type": "file"
+                },
+                {
+                    "name": "decoder.pth",
+                    "url": f"{base_url}/checkpoints/ditto_pytorch/models/decoder.pth",
+                    "dest_dir": "checkpoints/ditto_pytorch/models",
+                    "dest_file": "decoder.pth",
+                    "type": "file"
+                },
+                {
+                    "name": "lmdm_v0.4_hubert.pth",
+                    "url": f"{base_url}/checkpoints/ditto_pytorch/models/lmdm_v0.4_hubert.pth",
+                    "dest_dir": "checkpoints/ditto_pytorch/models",
+                    "dest_file": "lmdm_v0.4_hubert.pth",
+                    "type": "file"
+                },
+                {
+                    "name": "motion_extractor.pth",
+                    "url": f"{base_url}/checkpoints/ditto_pytorch/models/motion_extractor.pth",
+                    "dest_dir": "checkpoints/ditto_pytorch/models",
+                    "dest_file": "motion_extractor.pth",
+                    "type": "file"
+                },
+                {
+                    "name": "stitch_network.pth",
+                    "url": f"{base_url}/checkpoints/ditto_pytorch/models/stitch_network.pth",
+                    "dest_dir": "checkpoints/ditto_pytorch/models",
+                    "dest_file": "stitch_network.pth",
+                    "type": "file"
+                },
+                {
+                    "name": "warp_network.pth",
+                    "url": f"{base_url}/checkpoints/ditto_pytorch/models/warp_network.pth",
+                    "dest_dir": "checkpoints/ditto_pytorch/models",
+                    "dest_file": "warp_network.pth",
+                    "type": "file"
+                },
+                {
+                    "name": "v0.4_hubert_cfg.pkl",
+                    "url": f"{base_url}/checkpoints/ditto_cfg/v0.4_hubert_cfg.pkl",
+                    "dest_dir": "checkpoints/ditto_cfg",
+                    "dest_file": "v0.4_hubert_cfg.pkl",
+                    "type": "file"
+                }
+            ]
+        else:
+            # TensorRTモデルの設定
+            self.model_configs = [
+                {
+                    "name": "ditto_trt_models",
+                    "url": os.environ.get("DITTO_TRT_URL", f"{base_url}/checkpoints/ditto_trt_Ampere_Plus.tar.gz"),
+                    "dest_dir": "checkpoints",
+                    "type": "archive",
+                    "extract_subdir": "ditto_trt_Ampere_Plus"
+                },
+                {
+                    "name": "v0.4_hubert_cfg_trt.pkl",
+                    "url": f"{base_url}/checkpoints/ditto_cfg/v0.4_hubert_cfg_trt.pkl",
+                    "dest_dir": "checkpoints/ditto_cfg",
+                    "dest_file": "v0.4_hubert_cfg_trt.pkl",
+                    "type": "file"
+                }
+            ]
+        self.progress_file = self.cache_dir / "download_progress.json"
+        self.download_progress = self.load_progress()
+    def load_progress(self):
+        """ダウンロード進捗の読み込み"""
+        if self.progress_file.exists():
+            with open(self.progress_file, 'r') as f:
+                return json.load(f)
+        return {}
+    def save_progress(self):
+        """ダウンロード進捗の保存"""
+        with open(self.progress_file, 'w') as f:
+            json.dump(self.download_progress, f)
+    def get_file_hash(self, filepath):
+        """ファイルのハッシュ値を計算"""
+        sha256_hash = hashlib.sha256()
+        with open(filepath, "rb") as f:
+            for byte_block in iter(lambda: f.read(4096), b""):
+                sha256_hash.update(byte_block)
+        return sha256_hash.hexdigest()
+    def download_file(self, url, dest_path, retries=3):
+        """ファイルのダウンロード（レジューム対応）"""
+        dest_path = Path(dest_path)
+        dest_path.parent.mkdir(parents=True, exist_ok=True)
+        headers = {}
+        mode = 'wb'
+        resume_pos = 0
+        # レジューム処理
+        if dest_path.exists():
+            resume_pos = dest_path.stat().st_size
+            headers['Range'] = f'bytes={resume_pos}-'
+            mode = 'ab'
+        for attempt in range(retries):
+            try:
+                response = requests.get(url, headers=headers, stream=True, timeout=30)
+                response.raise_for_status()
+                total_size = int(response.headers.get('content-length', 0))
+                if resume_pos > 0:
+                    total_size += resume_pos
+                with open(dest_path, mode) as f:
+                    with tqdm(total=total_size, initial=resume_pos, unit='B', unit_scale=True, desc=dest_path.name) as pbar:
+                        for chunk in response.iter_content(chunk_size=8192):
+                            if chunk:
+                                f.write(chunk)
+                                pbar.update(len(chunk))
+                return True
+            except Exception as e:
+                print(f"ダウンロードエラー (試行 {attempt + 1}/{retries}): {e}")
+                if attempt < retries - 1:
+                    time.sleep(5)  # 再試行前に待機
+                else:
+                    raise
+        return False
+    def extract_archive(self, archive_path, dest_dir, extract_subdir=None):
+        """アーカイブの展開"""
+        import tarfile
+        import zipfile
+        archive_path = Path(archive_path)
+        dest_dir = Path(dest_dir)
+        temp_dir = dest_dir / "temp_extract"
+        try:
+            if archive_path.suffix == '.gz' or archive_path.suffix == '.tar' or str(archive_path).endswith('.tar.gz'):
+                with tarfile.open(archive_path, 'r:*') as tar:
+                    if extract_subdir:
+                        # 一時ディレクトリに展開してから移動
+                        temp_dir.mkdir(exist_ok=True)
+                        tar.extractall(temp_dir)
+                        # 特定のサブディレクトリを移動
+                        src_dir = temp_dir / extract_subdir
+                        if src_dir.exists():
+                            shutil.move(str(src_dir), str(dest_dir / extract_subdir))
+                        shutil.rmtree(temp_dir)
+                    else:
+                        tar.extractall(dest_dir)
+            elif archive_path.suffix == '.zip':
+                with zipfile.ZipFile(archive_path, 'r') as zip_ref:
+                    zip_ref.extractall(dest_dir)
+            else:
+                raise ValueError(f"Unsupported archive format: {archive_path.suffix}")
+        except Exception as e:
+            if temp_dir.exists():
+                shutil.rmtree(temp_dir)
+            raise e
+    def check_models_exist(self):
+        """必要なモデルが存在するかチェック"""
+        missing_models = []
+        for config in self.model_configs:
+            if config['type'] == 'file':
+                dest_path = Path(config['dest_dir']) / config['dest_file']
+                if not dest_path.exists():
+                    missing_models.append(config)
+            else:  # archive
+                dest_dir = Path(config['dest_dir'])
+                if not dest_dir.exists() or not any(dest_dir.iterdir()):
+                    missing_models.append(config)
+        return missing_models
+    def download_models(self):
+        """必要なモデルをダウンロード"""
+        missing_models = self.check_models_exist()
+        if not missing_models:
+            print("すべてのモデルが既に存在します。")
+            return True
+        print(f"{len(missing_models)}個のモデルをダウンロードします...")
+        for config in missing_models:
+            size_info = config.get('size', '不明')
+            print(f"\n{config['name']} をダウンロード中... (サイズ: {size_info})")
+            # キャッシュパスの設定
+            cache_filename = f"{config['name']}.download"
+            cache_path = self.cache_dir / cache_filename
+            try:
+                # ダウンロード
+                if not cache_path.exists() or self.download_progress.get(config['name'], {}).get('status') != 'completed':
+                    self.download_file(config['url'], cache_path)
+                    self.download_progress[config['name']] = {'status': 'completed'}
+                    self.save_progress()
+                # 展開またはコピー
+                if config['type'] == 'file':
+                    dest_dir = Path(config['dest_dir'])
+                    dest_dir.mkdir(parents=True, exist_ok=True)
+                    dest_path = dest_dir / config['dest_file']
+                    shutil.copy2(cache_path, dest_path)
+                else:  # archive
+                    dest_dir = Path(config['dest_dir'])
+                    dest_dir.mkdir(parents=True, exist_ok=True)
+                    print(f"{config['name']} を展開中...")
+                    extract_subdir = config.get('extract_subdir')
+                    self.extract_archive(cache_path, dest_dir, extract_subdir)
+                print(f"{config['name']} のセットアップ完了")
+            except Exception as e:
+                print(f"エラー: {config['name']} のダウンロード中にエラーが発生しました: {e}")
+                return False
+        return True
+    def setup_models(self):
+        """モデルのセットアップ（メイン処理）"""
+        print("=== DittoTalkingHead モデルセットアップ ===")
+        print(f"キャッシュディレクトリ: {self.cache_dir}")
+        success = self.download_models()
+        if success:
+            print("\n✅ すべてのモデルのセットアップが完了しました！")
+        else:
+            print("\n❌ モデルのセットアップ中にエラーが発生しました。")
+        return success
+if __name__ == "__main__":
+    # テスト実行
+    manager = ModelManager()
+    manager.setup_models()

requirements.txt ADDED Viewed

	@@ -0,0 +1,46 @@

+# Core dependencies
+torch==2.5.1
+torchvision==0.20.1
+torchaudio==2.5.1
+numpy==2.0.1
+pillow==11.0.0
+# Audio processing
+librosa==0.10.2.post1
+soundfile==0.13.0
+audioread==3.0.1
+soxr==0.5.0.post1
+# Video/Image processing
+opencv-python-headless==4.10.0.84
+imageio==2.36.1
+imageio-ffmpeg==0.5.1
+scikit-image==0.25.0
+# Machine learning
+scikit-learn==1.6.0
+scipy==1.15.0
+numba==0.60.0
+# TensorRT (GPU acceleration)
+tensorrt==8.6.1
+tensorrt-bindings==8.6.1
+tensorrt-libs==8.6.1
+polygraphy
+colored
+# Web interface
+gradio==4.19.0
+# Utilities
+tqdm==4.67.1
+requests==2.32.3
+pyyaml==6.0.2
+joblib==1.4.2
+cython==3.0.11
+# CUDA dependencies
+cuda-python==12.6.2.post1
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.6.0.74

setup.sh ADDED Viewed

	@@ -0,0 +1,18 @@

+#!/bin/bash
+# Setup script for Hugging Face Space
+echo "=== DittoTalkingHead Setup Script ==="
+# Create necessary directories
+mkdir -p checkpoints/ditto_cfg
+mkdir -p tmp
+mkdir -p output
+# Install system dependencies if needed
+# apt-get update && apt-get install -y ffmpeg
+# Run model download (PyTorch models)
+echo "Starting model download (PyTorch models)..."
+python -c "from model_manager import ModelManager; manager = ModelManager(use_pytorch=True); manager.setup_models()"
+echo "Setup complete!"