Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -157,89 +157,6 @@ def resample_waveform(waveform, original_sample_rate, target_sample_rate):
|
|
| 157 |
|
| 158 |
|
| 159 |
|
| 160 |
-
# def split_audio(waveform, sample_rate):
|
| 161 |
-
# segment_samples = segment_duration * sample_rate
|
| 162 |
-
# total_samples = waveform.size(0)
|
| 163 |
-
|
| 164 |
-
# segments = []
|
| 165 |
-
# # If the audio is shorter than the segment duration, just use the entire audio
|
| 166 |
-
# if total_samples <= segment_samples:
|
| 167 |
-
# segments.append(waveform)
|
| 168 |
-
# else:
|
| 169 |
-
# # Split the audio into segments of the specified duration
|
| 170 |
-
# for start in range(0, total_samples, segment_samples):
|
| 171 |
-
# end = min(start + segment_samples, total_samples)
|
| 172 |
-
# segment = waveform[start:end]
|
| 173 |
-
# segments.append(segment)
|
| 174 |
-
|
| 175 |
-
# # Ensure we have at least one segment with a minimum length
|
| 176 |
-
# if len(segments) == 0 or all(len(segment) < 100 for segment in segments):
|
| 177 |
-
# # Create a padded segment if audio is too short
|
| 178 |
-
# padded_segment = torch.zeros(segment_samples)
|
| 179 |
-
# if total_samples > 0:
|
| 180 |
-
# padded_segment[:total_samples] = waveform
|
| 181 |
-
# segments = [padded_segment]
|
| 182 |
-
|
| 183 |
-
# return segments
|
| 184 |
-
|
| 185 |
-
# def split_audio(waveform, sample_rate):
|
| 186 |
-
# segment_samples = segment_duration * sample_rate
|
| 187 |
-
# total_samples = waveform.size(0)
|
| 188 |
-
|
| 189 |
-
# segments = []
|
| 190 |
-
# # If the audio is shorter than the segment duration, just use the entire audio
|
| 191 |
-
# if total_samples <= segment_samples:
|
| 192 |
-
# segments.append(waveform)
|
| 193 |
-
# else:
|
| 194 |
-
# # Split the audio into segments of the specified duration
|
| 195 |
-
# for start in range(0, total_samples, segment_samples):
|
| 196 |
-
# end = min(start + segment_samples, total_samples)
|
| 197 |
-
# segment = waveform[start:end]
|
| 198 |
-
# segments.append(segment)
|
| 199 |
-
|
| 200 |
-
# # Ensure we have at least one segment
|
| 201 |
-
# if len(segments) == 0:
|
| 202 |
-
# segments.append(waveform)
|
| 203 |
-
|
| 204 |
-
# return segments
|
| 205 |
-
|
| 206 |
-
# def split_audio(waveform, sample_rate):
|
| 207 |
-
# segment_samples = segment_duration * sample_rate
|
| 208 |
-
# total_samples = waveform.size(0)
|
| 209 |
-
|
| 210 |
-
# # Pad if shorter than one segment
|
| 211 |
-
# if total_samples < segment_samples:
|
| 212 |
-
# pad_size = segment_samples - total_samples
|
| 213 |
-
# waveform = torch.nn.functional.pad(waveform, (0, pad_size))
|
| 214 |
-
|
| 215 |
-
# segments = []
|
| 216 |
-
# for start in range(0, waveform.size(0), segment_samples):
|
| 217 |
-
# end = start + segment_samples
|
| 218 |
-
# if end <= waveform.size(0):
|
| 219 |
-
# segment = waveform[start:end]
|
| 220 |
-
# segments.append(segment)
|
| 221 |
-
|
| 222 |
-
# return segments
|
| 223 |
-
|
| 224 |
-
# def split_audio(waveform, sample_rate, segment_duration=10):
|
| 225 |
-
# segment_samples = segment_duration * sample_rate
|
| 226 |
-
# total_samples = waveform.size(0)
|
| 227 |
-
|
| 228 |
-
# segments = []
|
| 229 |
-
# for start in range(0, total_samples, segment_samples):
|
| 230 |
-
# end = start + segment_samples
|
| 231 |
-
# if end <= total_samples:
|
| 232 |
-
# segment = waveform[start:end]
|
| 233 |
-
# segments.append(segment)
|
| 234 |
-
|
| 235 |
-
# # If no full segments were created, pad the short waveform
|
| 236 |
-
# if len(segments) == 0:
|
| 237 |
-
# pad_length = segment_samples - total_samples
|
| 238 |
-
# padded_waveform = torch.nn.functional.pad(waveform, (0, pad_length))
|
| 239 |
-
# segments.append(padded_waveform)
|
| 240 |
-
|
| 241 |
-
# return segments
|
| 242 |
-
|
| 243 |
def split_audio(waveform, sample_rate):
|
| 244 |
segment_samples = segment_duration * sample_rate
|
| 245 |
total_samples = waveform.size(0)
|
|
@@ -778,13 +695,12 @@ css = """
|
|
| 778 |
|
| 779 |
def analyze_emotions(audio_path, threshold):
|
| 780 |
if audio_path is None:
|
| 781 |
-
return "
|
| 782 |
try:
|
| 783 |
model_output = music2emo.predict(audio_path, threshold)
|
| 784 |
return model_output
|
| 785 |
except Exception as e:
|
| 786 |
-
return
|
| 787 |
-
|
| 788 |
|
| 789 |
with gr.Blocks(css=css) as demo:
|
| 790 |
gr.HTML(f"<h1 style='text-align: center;'>{title}</h1>")
|
|
@@ -816,35 +732,13 @@ with gr.Blocks(css=css) as demo:
|
|
| 816 |
|
| 817 |
# Right Panel (Output)
|
| 818 |
with gr.Column(scale=1):
|
| 819 |
-
|
| 820 |
-
label="Analysis Results",
|
| 821 |
-
lines=4,
|
| 822 |
-
interactive=False # Prevent user input
|
| 823 |
-
)
|
| 824 |
-
|
| 825 |
-
# Ensure both plots have padding on top
|
| 826 |
-
with gr.Row(equal_height=True):
|
| 827 |
-
mood_chart = gr.Plot(label="Mood Probabilities", scale=2, elem_classes=["gr-box"])
|
| 828 |
-
va_chart = gr.Plot(label="Valence-Arousal Space", scale=1, elem_classes=["gr-box"])
|
| 829 |
|
| 830 |
-
# predict_btn.click(
|
| 831 |
-
# fn=lambda audio, thresh: format_prediction(music2emo.predict(audio, thresh)),
|
| 832 |
-
# inputs=[input_audio, threshold],
|
| 833 |
-
# outputs=[output_text, va_chart, mood_chart]
|
| 834 |
-
# )
|
| 835 |
predict_btn.click(
|
| 836 |
-
fn=
|
| 837 |
inputs=[input_audio, threshold],
|
| 838 |
-
outputs=
|
| 839 |
)
|
| 840 |
|
| 841 |
-
demo.load(fn=analyze_emotions, inputs=[input_audio, threshold], outputs=[model_output])
|
| 842 |
-
|
| 843 |
# Launch the App
|
| 844 |
-
demo.queue().launch(share=True, show_error=True)
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
|
|
|
| 157 |
|
| 158 |
|
| 159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
def split_audio(waveform, sample_rate):
|
| 161 |
segment_samples = segment_duration * sample_rate
|
| 162 |
total_samples = waveform.size(0)
|
|
|
|
| 695 |
|
| 696 |
def analyze_emotions(audio_path, threshold):
|
| 697 |
if audio_path is None:
|
| 698 |
+
return {"error": "Please upload a valid audio file"}
|
| 699 |
try:
|
| 700 |
model_output = music2emo.predict(audio_path, threshold)
|
| 701 |
return model_output
|
| 702 |
except Exception as e:
|
| 703 |
+
return {"error": str(e)}
|
|
|
|
| 704 |
|
| 705 |
with gr.Blocks(css=css) as demo:
|
| 706 |
gr.HTML(f"<h1 style='text-align: center;'>{title}</h1>")
|
|
|
|
| 732 |
|
| 733 |
# Right Panel (Output)
|
| 734 |
with gr.Column(scale=1):
|
| 735 |
+
output_json = gr.JSON(label="Model Output")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 736 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 737 |
predict_btn.click(
|
| 738 |
+
fn=analyze_emotions,
|
| 739 |
inputs=[input_audio, threshold],
|
| 740 |
+
outputs=output_json
|
| 741 |
)
|
| 742 |
|
|
|
|
|
|
|
| 743 |
# Launch the App
|
| 744 |
+
demo.queue().launch(share=True, show_error=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|