Spaces:
Running
Running
import os | |
import logging | |
import time | |
import pandas as pd | |
from multiprocessing import Process | |
from synthetic_utils.dia_tts_wrapper import DiaTTSWrapper | |
def process_chunk(chunk_df, emotion, wav_dir, device, chunk_id): | |
tts = DiaTTSWrapper(device=device) | |
for idx, row in chunk_df.iterrows(): | |
text = row["text"] | |
video_name = row.get("video_name", f"{emotion}_{chunk_id}_{idx}") | |
filename_prefix = video_name | |
try: | |
result = tts.generate_and_save_audio( | |
text=text, | |
out_dir=wav_dir, | |
filename_prefix=filename_prefix, | |
use_timestamp=False, | |
skip_if_exists=True, | |
max_trim_duration=10.0 | |
) | |
if result is None: | |
logging.info(f"[{emotion}] ⏭️ Пропущено: {filename_prefix}.wav") | |
else: | |
logging.info(f"[{emotion}] ✔ {filename_prefix}.wav") | |
except Exception as e: | |
logging.error(f"[{emotion}] ❌ Ошибка: {filename_prefix} — {e}") | |
def generate_from_emotion_csv( | |
csv_path: str, | |
emotion: str, | |
output_dir: str, | |
device: str = "cuda", | |
max_samples: int = None, | |
num_processes: int = 1 | |
): | |
out_dir = os.path.join(output_dir, emotion) | |
wav_dir = os.path.join(out_dir, "wavs") | |
os.makedirs(wav_dir, exist_ok=True) | |
logging.info(f"🎙️ Эмоция: '{emotion}' | CSV: {csv_path}") | |
logging.info(f"📥 Сохранение в: {wav_dir}") | |
df = pd.read_csv(csv_path) | |
if max_samples is not None: | |
df = df.sample(n=max_samples) | |
chunk_size = len(df) // num_processes | |
chunks = [df.iloc[i*chunk_size : (i+1)*chunk_size] for i in range(num_processes)] | |
remainder = len(df) % num_processes | |
if remainder > 0: | |
chunks[-1] = pd.concat([chunks[-1], df.iloc[-remainder:]]) | |
total_start = time.time() | |
processes = [] | |
for i, chunk in enumerate(chunks): | |
p = Process(target=process_chunk, args=(chunk, emotion, wav_dir, device, i)) | |
p.start() | |
processes.append(p) | |
for p in processes: | |
p.join() | |
total_elapsed = time.time() - total_start | |
logging.info(f"✅ Эмоция '{emotion}' завершена | чанков: {num_processes} | ⏱️ {total_elapsed:.1f} сек\n") | |