Spaces:

ASesYusuf1
/

Jhfhnrqgx-Gxeelqj-Vwxglr

Running on Zero

App Files Files Community

ASesYusuf1 commited on Jun 3

Commit

f092faf

verified ·

1 Parent(s): f9b565a

Update inference.py

Browse files

Files changed (1) hide show

inference.py +129 -78

inference.py CHANGED Viewed

@@ -3,17 +3,21 @@ __author__ = 'Roman Solovyev (ZFTurbo): https://github.com/ZFTurbo/'
 import argparse
 import time
 import librosa
-from tqdm.auto import tqdm
 import sys
 import os
 import glob
 import torch
-import soundfile as sf
 import torch.nn as nn
 import numpy as np
-from assets.i18n.i18n import I18nAuto
 import spaces
 # Colab kontrolü
 try:
@@ -22,19 +26,22 @@ try:
 except ImportError:
     IS_COLAB = False
 i18n = I18nAuto()
 current_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(current_dir)
 from utils import demix, get_model_from_config, normalize_audio, denormalize_audio
-from utils import prefer_target_instrument, apply_tta, load_start_checkpoint, load_lora_weights
-import warnings
-warnings.filterwarnings("ignore")
 def shorten_filename(filename, max_length=30):
-    """Dosya adını belirtilen maksimum uzunluğa kısaltır."""
     base, ext = os.path.splitext(filename)
     if len(base) <= max_length:
         return filename
@@ -42,16 +49,22 @@ def shorten_filename(filename, max_length=30):
     return shortened
 def get_soundfile_subtype(pcm_type, is_float=False):
-    """PCM türüne göre uygun soundfile alt türünü belirler."""
-    if is_float:
         return 'FLOAT'
-    subtype_map = {
-        'PCM_16': 'PCM_16',
-        'PCM_24': 'PCM_24',
-        'FLOAT': 'FLOAT'
-    }
     return subtype_map.get(pcm_type, 'FLOAT')
 def run_folder(model, args, config, device, verbose: bool = False, progress=None):
     start_time = time.time()
     model.eval()
@@ -60,7 +73,7 @@ def run_folder(model, args, config, device, verbose: bool = False, progress=None
     sample_rate = getattr(config.audio, 'sample_rate', 44100)
     logging.info(f"Total files found: {len(mixture_paths)} with sample rate: {sample_rate}")
-    print(f"Total files found: {len(mixture_paths)} with sample rate: {sample_rate}")
     instruments = prefer_target_instrument(config)[:]
     store_dir = args.store_dir
@@ -68,49 +81,65 @@ def run_folder(model, args, config, device, verbose: bool = False, progress=None
     total_files = len(mixture_paths)
     processed_files = 0
     for path in mixture_paths:
         try:
             mix, sr = librosa.load(path, sr=sample_rate, mono=False)
             logging.info(f"Loaded audio: {path}, shape: {mix.shape}")
-            print(f"Loaded audio: {path}, shape: {mix.shape}")
-            # Dosya ilerlemesi için başlangıç güncellemesi
             processed_files += 1
-            base_progress = round(((processed_files - 1) / total_files) * 100)  # Önceki dosyalar
             if progress is not None and callable(getattr(progress, '__call__', None)):
-                progress(base_progress / 100, desc=f"Processing file {processed_files}/{total_files}")
-                update_progress_html(f"Processing file {processed_files}/{total_files}", base_progress)
             mix_orig = mix.copy()
-            if 'normalize' in config.inference and config.inference['normalize']:
                 mix, norm_params = normalize_audio(mix)
-            # demix fonksiyonuna progress nesnesini ilet
-            waveforms_orig = demix(config, model, mix, device, model_type=args.model_type, pbar=False, progress=progress)
             if args.use_tta:
-                # apply_tta fonksiyonuna progress nesnesini ilet
-                waveforms_orig = apply_tta(config, model, mix, waveforms_orig, device, args.model_type, progress=progress)
             if args.demud_phaseremix_inst:
                 logging.info(f"Demudding track: {path}")
-                print(f"Demudding track: {path}")
                 instr = 'vocals' if 'vocals' in instruments else instruments[0]
                 instruments.append('instrumental_phaseremix')
                 if 'instrumental' not in instruments and 'Instrumental' not in instruments:
                     mix_modified = mix_orig - 2 * waveforms_orig[instr]
                     mix_modified_ = mix_modified.copy()
-                    waveforms_modified = demix(config, model, mix_modified, device, model_type=args.model_type, pbar=False, progress=progress)
                     if args.use_tta:
-                        waveforms_modified = apply_tta(config, model, mix_modified, waveforms_modified, device, args.model_type, progress=progress)
                     waveforms_orig['instrumental_phaseremix'] = mix_orig + waveforms_modified[instr]
                 else:
                     mix_modified = 2 * waveforms_orig[instr] - mix_orig
                     mix_modified_ = mix_modified.copy()
-                    waveforms_modified = demix(config, model, mix_modified, device, model_type=args.model_type, pbar=False, progress=progress)
                     if args.use_tta:
-                        waveforms_modified = apply_tta(config, model, mix_modified, waveforms_orig, device, args.model_type, progress=progress)
                     waveforms_orig['instrumental_phaseremix'] = mix_orig + mix_modified_ - waveforms_modified[instr]
             if args.extract_instrumental:
@@ -119,96 +148,118 @@ def run_folder(model, args, config, device, verbose: bool = False, progress=None
                 if 'instrumental' not in instruments:
                     instruments.append('instrumental')
-            for instr in instruments:
                 estimates = waveforms_orig[instr]
-                if 'normalize' in config.inference and config.inference['normalize']:
                     estimates = denormalize_audio(estimates, norm_params)
                 is_float = getattr(args, 'export_format', '').startswith('wav FLOAT')
                 codec = 'flac' if getattr(args, 'flac_file', False) else 'wav'
-                subtype = get_soundfile_subtype(args.pcm_type, is_float) if codec == 'flac' else get_soundfile_subtype('FLOAT', is_float)
                 shortened_filename = shorten_filename(os.path.basename(path))
                 output_filename = f"{shortened_filename}_{instr}.{codec}"
                 output_path = os.path.join(store_dir, output_filename)
                 sf.write(output_path, estimates.T, sr, subtype=subtype)
-            # Dosya tamamlandı, ilerleme güncellemesi
-            file_progress = round((processed_files / total_files) * 100)
             if progress is not None and callable(getattr(progress, '__call__', None)):
-                progress(file_progress / 100, desc=f"Completed file {processed_files}/{total_files}")
-                update_progress_html(f"Completed file {processed_files}/{total_files}", file_progress)
         except Exception as e:
             logging.error(f"Cannot read track: {path}. Error: {str(e)}")
-            print(f"Cannot read track: {path}. Error: {str(e)}")
             continue
     elapsed_time = time.time() - start_time
-    logging.info(f"Elapsed time: {elapsed_time:.2f} seconds")
-    print(f"Elapsed time: {elapsed_time:.2f} seconds")
-    # Tüm işlem tamamlandı
     if progress is not None and callable(getattr(progress, '__call__', None)):
-        progress(1.0, desc="Processing complete")
-        update_progress_html("Processing complete", 100)
 @spaces.GPU
-def proc_folder(args):
-    parser = argparse.ArgumentParser(description=i18n("proc_folder_description"))
-    parser.add_argument("--model_type", type=str, default='mdx23c', help=i18n("model_type_help"))
-    parser.add_argument("--config_path", type=str, help=i18n("config_path_help"))
-    parser.add_argument("--demud_phaseremix_inst", action='store_true', help=i18n("demud_phaseremix_help"))
-    parser.add_argument("--start_check_point", type=str, default='', help=i18n("start_checkpoint_help"))
-    parser.add_argument("--input_folder", type=str, help=i18n("input_folder_help"))
-    parser.add_argument("--audio_path", type=str, help=i18n("audio_path_help"))
-    parser.add_argument("--store_dir", type=str, default="", help=i18n("store_dir_help"))
-    parser.add_argument("--device_ids", nargs='+', type=int, default=0, help=i18n("device_ids_help"))
-    parser.add_argument("--extract_instrumental", action='store_true', help=i18n("extract_instrumental_help"))
-    parser.add_argument("--disable_detailed_pbar", action='store_true', help=i18n("disable_detailed_pbar_help"))
-    parser.add_argument("--force_cpu", action='store_true', help=i18n("force_cpu_help"))
-    parser.add_argument("--flac_file", action='store_true', help=i18n("flac_file_help"))
-    parser.add_argument("--export_format", type=str, choices=['wav FLOAT', 'flac PCM_16', 'flac PCM_24'], default='flac PCM_24', help=i18n("export_format_help"))
-    parser.add_argument("--pcm_type", type=str, choices=['PCM_16', 'PCM_24'], default='PCM_24', help=i18n("pcm_type_help"))
-    parser.add_argument("--use_tta", action='store_true', help=i18n("use_tta_help"))
-    parser.add_argument("--lora_checkpoint", type=str, default='', help=i18n("lora_checkpoint_help"))
-    parser.add_argument("--chunk_size", type=int, default=1000000, help="Inference chunk size")
-    parser.add_argument("--overlap", type=int, default=4, help="Inference overlap factor")
-    if args is None:
-        args = parser.parse_args()
-    else:
-        args = parser.parse_args(args)
     device = "cpu"
     if args.force_cpu:
-        device = "cpu"
     elif torch.cuda.is_available():
         print(i18n("cuda_available"))
-        device = f'cuda:{args.device_ids[0]}' if type(args.device_ids) == list else f'cuda:{args.device_ids}'
     elif torch.backends.mps.is_available():
         device = "mps"
     print(i18n("using_device").format(device))
     model_load_start_time = time.time()
     torch.backends.cudnn.benchmark = True
-    model, config = get_model_from_config(args.model_type, args.config_path)
-    if args.start_check_point != '':
-        load_start_checkpoint(args, model, type_='inference')
     print(i18n("instruments_print").format(config.training.instruments))
-    if type(args.device_ids) == list and len(args.device_ids) > 1 and not args.force_cpu:
         model = nn.DataParallel(model, device_ids=args.device_ids)
     model = model.to(device)
-    print(i18n("model_load_time").format(time.time() - model_load_start_time))
-    run_folder(model, args, config, device, verbose=False)
 if __name__ == "__main__":
-    proc_folder(None)

 import argparse
 import time
+import logging
 import librosa
 import sys
 import os
 import glob
 import torch
 import torch.nn as nn
 import numpy as np
+import soundfile as sf
 import spaces
+import warnings
+warnings.filterwarnings("ignore")
+# Loglama ayarları
+logging.basicConfig(level=logging.DEBUG, filename='utils.log', format='%(asctime)s - %(levelname)s - %(message)s')
 # Colab kontrolü
 try:
 except ImportError:
     IS_COLAB = False
+# i18n yer tutucu
+class I18nAuto:
+    def __call__(self, message):
+        return message
+    def format(self, message, *args):
+        return message.format(*args)
 i18n = I18nAuto()
 current_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(current_dir)
 from utils import demix, get_model_from_config, normalize_audio, denormalize_audio
+from utils import prefer_target_instrument, apply_tta, load_start_checkpoint
 def shorten_filename(filename, max_length=30):
     base, ext = os.path.splitext(filename)
     if len(base) <= max_length:
         return filename
     return shortened
 def get_soundfile_subtype(pcm_type, is_float=False):
+    if pcm_type == 'FLOAT' or is_float:
         return 'FLOAT'
+    subtype_map = {'PCM_16': 'PCM_16', 'PCM_24': 'PCM_24', 'FLOAT': 'FLOAT'}
     return subtype_map.get(pcm_type, 'FLOAT')
+def update_progress_html(progress_label, progress_percent):
+    progress_percent = min(max(round(progress_percent), 0), 100)
+    return f"""
+    <div id="custom-progress" style="margin-top: 10px;">
+        <div style="font-size: 1rem; color: #C0C0C0; margin-bottom: 5px;" id="progress-label">{progress_label}</div>
+        <div style="width: 100%; background-color: #444; border-radius: 5px; overflow: hidden;">
+            <div id="progress-bar" style="width: {progress_percent}%; height: 20px; background-color: #6e8efb; transition: width 0.3s; max-width: 100%;"></div>
+        </div>
+    </div>
+    """
 def run_folder(model, args, config, device, verbose: bool = False, progress=None):
     start_time = time.time()
     model.eval()
     sample_rate = getattr(config.audio, 'sample_rate', 44100)
     logging.info(f"Total files found: {len(mixture_paths)} with sample rate: {sample_rate}")
+    print(i18n("total_files_found").format(len(mixture_paths), sample_rate))
     instruments = prefer_target_instrument(config)[:]
     store_dir = args.store_dir
     total_files = len(mixture_paths)
     processed_files = 0
+    base_progress_per_file = 100 / total_files if total_files > 0 else 100
     for path in mixture_paths:
         try:
             mix, sr = librosa.load(path, sr=sample_rate, mono=False)
             logging.info(f"Loaded audio: {path}, shape: {mix.shape}")
+            print(i18n("loaded_audio").format(path, mix.shape))
             processed_files += 1
+            base_progress = round((processed_files - 1) * base_progress_per_file)
             if progress is not None and callable(getattr(progress, '__call__', None)):
+                progress(base_progress / 100, desc=i18n("processing_file").format(processed_files, total_files))
+                update_progress_html(i18n("processing_file").format(processed_files, total_files), base_progress)
             mix_orig = mix.copy()
+            if 'normalize' in config.inference and config.inference.get('normalize', False):
                 mix, norm_params = normalize_audio(mix)
+            waveforms_orig = demix(
+                config, model, mix, device, model_type=args.model_type, pbar=False,
+                progress=lambda p, desc: progress((base_progress + p * 50) / 100, desc=desc) if progress else None
+            )
             if args.use_tta:
+                waveforms_orig = apply_tta(
+                    config, model, mix, waveforms_orig, device, args.model_type,
+                    progress=lambda p, desc: progress((base_progress + 50 + p * 20) / 100, desc=desc) if progress else None
+                )
             if args.demud_phaseremix_inst:
                 logging.info(f"Demudding track: {path}")
+                print(i18n("demudding_track").format(path))
                 instr = 'vocals' if 'vocals' in instruments else instruments[0]
                 instruments.append('instrumental_phaseremix')
                 if 'instrumental' not in instruments and 'Instrumental' not in instruments:
                     mix_modified = mix_orig - 2 * waveforms_orig[instr]
                     mix_modified_ = mix_modified.copy()
+                    waveforms_modified = demix(
+                        config, model, mix_modified, device, model_type=args.model_type, pbar=False,
+                        progress=lambda p, desc: progress((base_progress + 70 + p * 15) / 100, desc=desc) if progress else None
+                    )
                     if args.use_tta:
+                        waveforms_modified = apply_tta(
+                            config, model, mix_modified, waveforms_modified, device, args.model_type,
+                            progress=lambda p, desc: progress((base_progress + 85 + p * 10) / 100, desc=desc) if progress else None
+                        )
                     waveforms_orig['instrumental_phaseremix'] = mix_orig + waveforms_modified[instr]
                 else:
                     mix_modified = 2 * waveforms_orig[instr] - mix_orig
                     mix_modified_ = mix_modified.copy()
+                    waveforms_modified = demix(
+                        config, model, mix_modified, device, model_type=args.model_type, pbar=False,
+                        progress=lambda p, desc: progress((base_progress + 70 + p * 15) / 100, desc=desc) if progress else None
+                    )
                     if args.use_tta:
+                        waveforms_modified = apply_tta(
+                            config, model, mix_modified, waveforms_orig, device, args.model_type,
+                            progress=lambda p, desc: progress((base_progress + 85 + p * 10) / 100, desc=desc) if progress else None
+                        )
                     waveforms_orig['instrumental_phaseremix'] = mix_orig + mix_modified_ - waveforms_modified[instr]
             if args.extract_instrumental:
                 if 'instrumental' not in instruments:
                     instruments.append('instrumental')
+            for i, instr in enumerate(instruments):
                 estimates = waveforms_orig[instr]
+                if 'normalize' in config.inference and config.inference.get('normalize', False):
                     estimates = denormalize_audio(estimates, norm_params)
                 is_float = getattr(args, 'export_format', '').startswith('wav FLOAT')
                 codec = 'flac' if getattr(args, 'flac_file', False) else 'wav'
+                subtype = get_soundfile_subtype(args.pcm_type, is_float=is_float)
                 shortened_filename = shorten_filename(os.path.basename(path))
                 output_filename = f"{shortened_filename}_{instr}.{codec}"
                 output_path = os.path.join(store_dir, output_filename)
                 sf.write(output_path, estimates.T, sr, subtype=subtype)
+                save_progress = round(base_progress + 95 + (i / len(instruments)) * 5)
+                if progress is not None and callable(getattr('progress', '__call__', None)):
+                    progress(save_progress / 100, desc=i18n("saving_output").format(instr, processed_files, total_files))
+                    update_progress_html(i18n("saving_output").format(instr, processed_files, total_files), save_progress)
+            file_progress = round(processed_files * base_progress_per_file)
             if progress is not None and callable(getattr(progress, '__call__', None)):
+                progress(file_progress / 100, desc=i18n("completed_file").format(processed_files, total_files))
+                update_progress_html(i18n("completed_file").format(processed_files, total_files), file_progress)
         except Exception as e:
             logging.error(f"Cannot read track: {path}. Error: {str(e)}")
+            print(i18n("cannot_read_track").format(path))
+            print(i18n("error_message").format(str(e)))
             continue
     elapsed_time = time.time() - start_time
+    logging.info(f"Processing time: {elapsed_time:.2f} seconds")
+    print(i18n("elapsed_time").format(elapsed_time))
     if progress is not None and callable(getattr(progress, '__call__', None)):
+        progress(1.0, desc=i18n("processing_complete"))
+        update_progress_html(i18n("processing_complete"), 100)
 @spaces.GPU
+def proc_folder(args=None, progress=None):
+    try:
+        parser = argparse.ArgumentParser(description=i18n("proc_folder_description"))
+        parser.add_argument("--model_type", type=str, default='melod_band_roformer', help=i18n("model_type_help"))
+        parser.add_argument("--config_path", type=str, required=True, help=i18n("config_path_help"))
+        parser.add_argument("--start_check_point", type=str, required=True, help=i18n("start_checkpoint_help"))
+        parser.add_argument("--input_folder", type=str, required=True, help=i18n("input_folder_help"))
+        parser.add_argument("--store_dir", type=str, required=True, help=i18n("store_dir_help"))
+        parser.add_argument("--chunk_size", type=int, default=352800, help=i18n("chunk_size_help"))
+        parser.add_argument("--overlap", type=int, default=2, help=i18n("overlap_help"))
+        parser.add_argument("--export_format", type=str, default='wav FLOAT', choices=['wav FLOAT', 'flac PCM_16', 'flac PCM_24'], help=i18n("export_format_help"))
+        parser.add_argument("--demud_phaseremix_inst", action='store_true', help=i18n("demud_phaseremix_help"))
+        parser.add_argument("--extract_instrumental", "action='store_true', help=i18n("extract_instrumental_help"))
+        parser.add_argument("--use_tta", action='store_true', help=i18n("use_tta_help"))
+        parser.add_argument("--flac_file", action='store_true', help=i18n("flac_file_help"))
+        parser.add_argument("--pcm_type", type=str, choices=['PCM_16', 'PCM_24'], default='PCM_24', help=i18n("pcm_type_help"))
+        parser.add_argument("--device_ids", nargs='+', type=int, default=[0], help=i18n("device_ids_help"))
+        parser.add_argument("--force_cpu", action='store_true', help=i18n("force_cpu_help"))
+        parser.add_argument("--lora_checkpoint", type=str, default='', help=i18n("lora_checkpoint_help"))
+        args = parser.parse_args(args if args else [])
+    except Exception as e:
+        logging.error(f"Argument parsing failed: {str(e)}")
+        raise ValueError(f"Invalid command-line arguments: {str(e)}")
     device = "cpu"
     if args.force_cpu:
+        logging.info("Forced to use CPU")
     elif torch.cuda.is_available():
+        logging.info("CUDA available")
         print(i18n("cuda_available"))
+        device = f'cuda:{args.device_ids[0]}'
     elif torch.backends.mps.is_available():
         device = "mps"
+    logging.info(f"Using device: {device}")
     print(i18n("using_device").format(device))
     model_load_start_time = time.time()
     torch.backends.cudnn.benchmark = True
+    try:
+        model, config = get_model_from_config(args.model_type, args.config_path)
+    except Exception as e:
+        logging.error(f"Failed to load model: {str(e)}")
+        raise
+    if args.start_check_point:
+        try:
+            load_start_checkpoint(args, model, type_='inference')
+        except Exception as e:
+            logging.error(f"Failed to load checkpoint: {str(e)}")
+            raise
+    logging.info(f"Instruments: {config.training.instruments}")
     print(i18n("instruments_print").format(config.training.instruments))
+    if len(args.device_ids) > 1 and not args.force_cpu:
         model = nn.DataParallel(model, device_ids=args.device_ids)
+        logging.info(f"Using DataParallel with devices: {args.device_ids}")
     model = model.to(device)
+    elapsed_time = time.time() - model_load_start_time
+    logging.info(f"Model load time: {elapsed_time:.2f} seconds")
+    print(i18n("model_load_time").format(elapsed_time))
+    run_folder(model, args, config, device, verbose=False, progress=progress)
+    return "Processing completed"
 if __name__ == "__main__":
+    try:
+        proc_folder(None)
+    except Exception as e:
+        logging.error(f"Main execution failed: {str(e)}")
+        raise