File size: 4,438 Bytes
de09bd4
b7e87cc
 
a2105f9
9ace052
b7e87cc
704270a
 
 
 
54638f8
f2b2c12
a2105f9
 
 
b009c70
e7a429e
 
b7cfba2
a7c3599
e7a429e
 
3f1a94d
 
 
0392745
 
ef5bfac
e9143ae
 
 
d7dd4e2
d518652
5588dbb
f13b775
5588dbb
 
 
e9143ae
6e4124d
5588dbb
ef5bfac
 
d518652
 
3f1a94d
 
e7a429e
de09bd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7cfba2
de09bd4
 
 
 
 
 
 
 
 
e7a429e
 
 
 
 
 
 
 
 
77b9b1b
e7a429e
 
 
 
 
 
 
 
 
 
 
 
41d29b6
1e6c3e5
 
 
 
 
 
 
 
 
fe644e0
1e6c3e5
fe644e0
 
 
 
e7a429e
 
 
c746161
40457d3
e7a429e
 
f2b2c12
e7a429e
f2b2c12
8f78501
e7a429e
 
de09bd4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os, subprocess, sys, zipfile

os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
os.environ["HF_HUB_DISABLE_HF_TRANSFER"] = "1"
os.environ["HF_HUB_ENABLE_XET"] = "0"

os.environ["NUMBA_CACHE_DIR"] = "/tmp/numba_cache"
os.makedirs("/tmp/numba_cache", exist_ok=True)
os.environ["NUMBA_DISABLE_JIT"] = "1"

from huggingface_hub import HfApi, HfFolder, upload_folder, snapshot_download

# 🔒 Eliminar hf_transfer si está presente
subprocess.run([sys.executable, "-m", "pip", "uninstall", "-y", "hf_transfer"])

# === Configuración ===
HF_MODEL_ID = "tu_usuario/xtts-v2-finetuned"   # <--- cambia con tu repo en HF
HF_TOKEN = os.environ.get("HF_TOKEN")          # Debe estar definido en tu Space/entorno
DATASET_PATH = "/tmp/dataset"        # Ruta a tu dataset
OUTPUT_PATH = "/tmp/output_model"
BASE_MODEL = "coqui/XTTS-v2"

os.makedirs("/tmp/xtts_cache", exist_ok=True)
os.chmod("/tmp/xtts_cache", 0o777)

os.makedirs("/tmp/xtts_model", exist_ok=True)
os.chmod("/tmp/xtts_model", 0o777)

os.makedirs("/tmp/xtts_model/.huggingface", exist_ok=True)
os.chmod("/tmp/xtts_model/.huggingface", 0o777)

# Continúa con tu lógica, usando las nuevas rutas de manera consistent

# 🔧 Forzar descarga sin symlinks ni hf_transfer
model_dir = snapshot_download(
    repo_id="coqui/XTTS-v2",
    local_dir="/tmp/xtts_model",   # descarga directa aquí
    cache_dir="/tmp/hf_cache",     # cache seguro en /tmp
    #local_dir_use_symlinks=False,  # 🔑 evita enlaces simbólicos
    resume_download=True,
    token=HF_TOKEN
)

print(f"✅ Modelo descargado en: {model_dir}")

CONFIG_PATH = "/tmp/xtts_model/config.json"
RESTORE_PATH = "/tmp/xtts_model/model.pth"

# === 1.B Extraer el dataset
def extract_zip(zip_file_path, destination_path):
    """
    Extracts the contents of a ZIP file to a specified directory.
    
    Args:
        zip_file_path (str): The full path to the ZIP file.
        destination_path (str): The directory where the contents will be extracted.
    """
    # Create the destination directory if it doesn't exist
    os.makedirs(destination_path, exist_ok=True)
    
    try:
        # Open the ZIP file in read mode
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            # Extract all the contents to the specified directory
            zip_ref.extractall(destination_path)
        print(f"✅ Extracted '{zip_file_path}' to '{destination_path}' successfully.")
    except zipfile.BadZipFile:
        print(f"❌ Error: The file '{zip_file_path}' is not a valid ZIP file.")
    except FileNotFoundError:
        print(f"❌ Error: The file '{zip_file_path}' was not found.")
    except Exception as e:
        print(f"❌ An unexpected error occurred: {e}")

# Example usage:
zip_file = "/home/user/app/voxpopuli_es_500.zip"
dataset_folder = "/tmp/dataset"

# To protect against security vulnerabilities, it is important to sanitize the destination path.
# This prevents an attacker from using a malicious ZIP file to write outside the destination folder.
safe_destination = os.path.abspath(dataset_folder)

# Call the function
extract_zip(zip_file, safe_destination)

# === 2. Editar configuración para tu dataset VoxPopuli ===
print("=== Editando configuración para fine-tuning con VoxPopuli ===")
import json
with open(CONFIG_PATH, "r") as f:
    config = json.load(f)

config["output_path"] = OUTPUT_PATH
config["datasets"] = [
    {
        "formatter": "ljspeech",
        "path": DATASET_PATH,
        "meta_file_train": "metadata.json"
    }
]
config["run_name"] = "xtts-finetune-voxpopuli"
config["lr"] = 1e-5  # más bajo para fine-tuning

with open(CONFIG_PATH, "w") as f:
    json.dump(config, f, indent=2)

# === 3. Lanzar entrenamiento ===
print("=== Iniciando fine-tuning de XTTS-v2 ===")

import librosa
from librosa.core import spectrum

subprocess.run([
   "python", "/home/user/TTS/TTS/bin/train_tts.py",
   "--config_path", CONFIG_PATH,
   "--restore_path", RESTORE_PATH
], check=True)

# subprocess.run([
#    "python", "-m", "TTS.bin.train",
#    "--config_path", CONFIG_PATH,
#    "--restore_path", RESTORE_PATH
# ], check=True)

# === 4. Subir modelo resultante a HF ===
print("=== Subiendo modelo fine-tuneado a Hugging Face Hub ===")
api = HfApi()
HfFolder.save_token(HF_TOKEN)

upload_folder(
    repo_id=HF_MODEL_ID,
    repo_type="model",
    folder_path=OUTPUT_PATH,
    token=HF_TOKEN
)

print("✅ Fine-tuning completado y modelo subido a Hugging Face.")