import os import subprocess import json from huggingface_hub import HfApi, HfFolder from datasets import load_dataset # === Configuración === HF_TOKEN = os.environ.get("HF_TOKEN") # define en los Secrets del Space HF_REPO_ID = "sob111/xttsv2-es-finetuned" # tu repo de destino OUTPUT_PATH = "/tmp/output_model" CONFIG_PATH = "./config.json" # === Guardar token de Hugging Face === print("=== Guardando token de Hugging Face ===") HfFolder.save_token(HF_TOKEN) # === Descargar dataset desde Hugging Face === # print("=== Descargando dataset sob111/voxpopuli_es_500 ===") # ds = load_dataset("sob111/voxpopuli_es_500", split="train", token=HF_TOKEN) # Guardar metadata.json en el formato esperado por Coqui TTS # os.makedirs("/tmp/voxpopuli_es_500/wav_data", exist_ok=True) # meta_file = "/tmp/voxpopuli_es_500/metadata.json" # with open(meta_file, "w", encoding="utf-8") as f: # for i, sample in enumerate(ds): # Guardar cada audio en wav_data # audio_path = f"/tmp/voxpopuli_es_500/wav_data/sample_{i}.wav" # array = sample["audio"]["array"] # import soundfile as sf # sf.write(audio_path, array, sample["audio"]["sampling_rate"]) # entry = { # "audio_file": audio_path, # "text": sample.get("text") or sample.get("sentence", ""), # "speaker_name": str(sample.get("speaker_id", "speaker")) # } # f.write(json.dumps(entry, ensure_ascii=False) + "\n") # print("✅ Metadata guardada en {meta_file}") # === Entrenamiento XTTSv2 === print("=== Iniciando entrenamiento XTTSv2 ===") # Importante: usa el binario oficial de train del paquete TTS # y pasa sólo el config. No uses rutas a 'recipes/...' try: subprocess.run( [ "python", "-m", "TTS.bin.train", "--config_path", CONFIG_PATH, "--output_path", OUTPUT_PATH ], check=True ) except subprocess.CalledProcessError as e: raise RuntimeError("❌ El entrenamiento XTTSv2 falló. Revisa los logs anteriores.") from e print("=== Entrenamiento finalizado ===") # === Subir modelo fine-tune a Hugging Face === print("=== Subiendo modelo fine-tune a Hugging Face ===") api = HfApi() api.create_repo(repo_id=HF_REPO_ID, exist_ok=True, token=HF_TOKEN) api.upload_folder( folder_path=OUTPUT_PATH, repo_id=HF_REPO_ID, repo_type="model", token=HF_TOKEN ) print("✅ Fine-tuning completado y subido a {HF_REPO_ID}")