xtts-castellano / finetune_xtts_hf.py
sob111's picture
Update finetune_xtts_hf.py
9499f80 verified
raw
history blame
1.96 kB
import os
import subprocess
import json
from huggingface_hub import HfApi, HfFolder
from datasets import load_dataset
# === Configuración ===
HF_TOKEN = os.environ.get("HF_TOKEN") # define en los Secrets del Space
HF_REPO_ID = "sob111/xttsv2-es-finetuned" # tu repo de destino
OUTPUT_PATH = "./output_model"
CONFIG_PATH = "./config.json"
# === Guardar token de Hugging Face ===
print("=== Guardando token de Hugging Face ===")
HfFolder.save_token(HF_TOKEN)
# === Descargar dataset desde Hugging Face ===
print("=== Descargando dataset sob111/voxpopuli_es_500 ===")
ds = load_dataset("sob111/voxpopuli_es_500", split="train", token=HF_TOKEN)
# Guardar metadata.json en el formato esperado por Coqui TTS
os.makedirs("./voxpopuli_es_500", exist_ok=True)
meta_file = "./voxpopuli_es_500/metadata.json"
with open(meta_file, "w", encoding="utf-8") as f:
for sample in ds:
entry = {
"audio_file": sample["audio_file"],
"text": sample["text"],
"speaker_name": sample.get("speaker_name", "speaker")
}
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
print("✅ Metadata guardada en {meta_file}")
# === Iniciar entrenamiento XTTSv2 ===
print("=== Iniciando entrenamiento XTTSv2 ===")
try:
subprocess.run(
[
"python",
"TTS/bin/train_tts.py",
"--config_path", CONFIG_PATH
],
check=True
)
except subprocess.CalledProcessError:
raise RuntimeError("❌ El entrenamiento XTTSv2 falló. Revisa los logs anteriores.")
print("=== Entrenamiento finalizado ===")
# === Subir modelo fine-tune a Hugging Face ===
print("=== Subiendo modelo fine-tune a Hugging Face ===")
api = HfApi()
api.create_repo(repo_id=HF_REPO_ID, exist_ok=True, token=HF_TOKEN)
api.upload_folder(
folder_path=OUTPUT_PATH,
repo_id=HF_REPO_ID,
repo_type="model",
token=HF_TOKEN
)
print("✅ Fine-tuning completado y subido a {HF_REPO_ID}")