Spaces:
Build error
Build error
| import os | |
| import sys | |
| import logging | |
| import gradio as gr | |
| import shutil | |
| from demucs_handler import DemucsProcessor, check_dependencies, configure_model | |
| from whisper_handler import WhisperTranscriber | |
| import tempfile | |
| import torch | |
| import torchaudio | |
| import soundfile as sf | |
| import librosa | |
| import numpy as np | |
| # Set up logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s' | |
| ) | |
| def validate_environment(): | |
| try: | |
| import torch | |
| import torchaudio | |
| import demucs | |
| logging.info(f"PyTorch version: {torch.__version__}") | |
| logging.info(f"Torchaudio version: {torchaudio.__version__}") | |
| logging.info(f"CUDA available: {torch.cuda.is_available()}") | |
| except ImportError as e: | |
| logging.error(f"Environment validation failed: {e}") | |
| sys.exit(1) | |
| def create_interface(): | |
| validate_environment() | |
| processor = DemucsProcessor() | |
| transcriber = WhisperTranscriber() | |
| def process_audio(audio_file, whisper_model="base", progress=gr.Progress()): | |
| if audio_file is None: | |
| return None, "Please upload an audio file." | |
| temp_files = [] | |
| try: | |
| progress(0, desc="Starting processing") | |
| logging.info(f"Processing file: {audio_file}") | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| temp_audio_path = os.path.join(temp_dir, "input.wav") | |
| vocals_output_path = os.path.join(temp_dir, "vocals.wav") | |
| # Convert to WAV first | |
| audio, sr = librosa.load(audio_file, sr=44100) | |
| # Fixed: use samplerate instead of sr | |
| sf.write(temp_audio_path, audio, samplerate=sr) | |
| temp_files.append(temp_audio_path) | |
| progress(0.1, desc="Separating vocals") | |
| try: | |
| vocals_path = processor.separate_vocals(temp_audio_path) | |
| # Copy vocals to output path | |
| shutil.copy2(vocals_path, vocals_output_path) | |
| temp_files.append(vocals_output_path) | |
| except RuntimeError as e: | |
| logging.error(f"Vocal separation failed: {str(e)}") | |
| return None, f"Vocal separation failed: {str(e)}" | |
| # Load the processed vocals for playback | |
| vocals_audio, vocals_sr = librosa.load(vocals_output_path, sr=None) | |
| progress(0.75, desc="Transcribing") | |
| lyrics = transcriber.transcribe(vocals_output_path) | |
| progress(1.0, desc="Processing complete") | |
| # Return the audio data tuple and lyrics | |
| return (vocals_sr, vocals_audio), lyrics | |
| except Exception as e: | |
| error_message = f"Processing error: {str(e)}" | |
| logging.error(error_message) | |
| return None, error_message | |
| finally: | |
| # Cleanup temporary files | |
| for file in temp_files: | |
| if file and os.path.exists(file): | |
| try: | |
| os.remove(file) | |
| except: | |
| pass | |
| interface = gr.Interface( | |
| fn=process_audio, | |
| inputs=[ | |
| gr.Audio(label="Upload Audio File", type="filepath"), | |
| gr.Dropdown( | |
| choices=["tiny", "base", "small", "medium", "large-v2"], | |
| value="medium", | |
| label="Whisper Model Size" | |
| ) | |
| ], | |
| outputs=[ | |
| gr.Audio(label="Isolated Vocals", type="numpy"), | |
| gr.Textbox(label="Transcribed Lyrics", lines=10, max_lines=20) | |
| ], | |
| title="Audio Lyrics Extractor", | |
| description="Upload an audio file to extract vocals and transcribe lyrics\n"+ | |
| " Created by Ever Olivares - Looking for Summer 2025 Internship Opportunities\n" + | |
| " Connect with me: [LinkedIn](https://www.linkedin.com/in/everolivares/)"+" Currently not working as intended on HF tested on LightningAI with T4 running largeV2", | |
| analytics_enabled=False | |
| ) | |
| return interface | |
| if __name__ == "__main__": | |
| if not check_dependencies(): | |
| print("Please install missing dependencies") | |
| exit(1) | |
| interface = create_interface() | |
| interface.launch() | |