import random
import librosa
import soundfile as sf
import gradio as gr
from pydub import AudioSegment
from pydub.silence import split_on_silence

def load_audio(file):
    audio, sr = librosa.load(file, sr=None)
    return audio, sr

def sync_to_same_bpm(instrumental_1, instrumental_2):
    bpm_1 = librosa.beat.tempo(instrumental_1)[0]
    bpm_2 = librosa.beat.tempo(instrumental_2)[0]
    
    target_bpm = max(bpm_1, bpm_2)
    rate_1 = target_bpm / bpm_1
    rate_2 = target_bpm / bpm_2
    
    inst1_synced = librosa.effects.time_stretch(instrumental_1, rate_1)
    inst2_synced = librosa.effects.time_stretch(instrumental_2, rate_2)
    
    return inst1_synced, inst2_synced, target_bpm

def split_vocals_on_silence(vocals_file):
    vocals_audio = AudioSegment.from_file(vocals_file)
    # Split vocals at points with at least 0.8 seconds of silence or near-silence
    vocal_segments = split_on_silence(
        vocals_audio, min_silence_len=800, silence_thresh=vocals_audio.dBFS - 14
    )
    return vocal_segments

def create_mashup(opponent_vocals, player_vocals, instrumental_1, instrumental_2):
    # Load and sync audio files
    inst1_audio, sr = load_audio(instrumental_1)
    inst2_audio, _ = load_audio(instrumental_2)
    inst1_sync, inst2_sync, bpm = sync_to_same_bpm(inst1_audio, inst2_audio)

    # Split vocals into segments based on silence detection
    opponent_segments = split_vocals_on_silence(opponent_vocals)
    player_segments = split_vocals_on_silence(player_vocals)

    # Randomly alternate between opponent and player vocal segments
    vocal_turns = []
    for _ in range(10):  # Set the number of turns (adjustable)
        segment = random.choice(opponent_segments + player_segments)
        vocal_turns.append(segment)

    # Generate mashup by combining each vocal turn with alternating instrumentals
    mashup = []
    for i, vocal_segment in enumerate(vocal_turns):
        inst = inst1_sync if i % 2 == 0 else inst2_sync
        vocal_segment = vocal_segment.set_frame_rate(sr).set_channels(1)
        combined = vocal_segment.overlay(AudioSegment.from_array(inst, frame_rate=sr))
        mashup.append(combined)
    
    final_mashup = sum(mashup)
    output_file = "mashup.wav"
    final_mashup.export(output_file, format="wav")
    
    return output_file

# Interface with Gradio
iface = gr.Interface(
    fn=create_mashup,
    inputs=[
        gr.Audio(label="Opponent Vocals", type="filepath"),
        gr.Audio(label="Player Vocals", type="filepath"),
        gr.Audio(label="Instrumental #1", type="filepath"),
        gr.Audio(label="Instrumental #2", type="filepath"),
    ],
    outputs=gr.Audio(label="Mashup Output"),
    title="FNF Mashup Maker",
    description="Upload FNF opponent and player vocals along with two instrumentals to generate a randomized mashup."
)

iface.launch()