|
""" |
|
La Fréquence du Vivant – Bio acoustic découverte |
|
Comédie des Mondes Hybrides · Gaspard Boréal |
|
------------------------------------------------ |
|
• Upload .wav/.mp3 (≤ 30 s) |
|
• Prédit les tags AudioSet (AST 527) + spectrogramme |
|
• Synthèse bio-acoustique : barres horizontales (≥ 0,5 %) |
|
• Boutons : « Nouveau fichier » & « Voir tags (condensé / complet) » |
|
""" |
|
|
|
|
|
import json, pathlib, matplotlib.pyplot as plt, pandas as pd |
|
import torch, torchaudio, librosa, gradio as gr |
|
from transformers import pipeline |
|
|
|
|
|
ONTO = json.loads(pathlib.Path(__file__).with_name("ontology.json").read_text()) |
|
by_id = {n["id"]: n for n in ONTO} |
|
name2id = {n["name"]: n["id"] for n in ONTO} |
|
parents = {} |
|
for n in ONTO: |
|
for c in n.get("child_ids", []): |
|
parents.setdefault(c, []).append(n["id"]) |
|
def root_parent(mid): |
|
while parents.get(mid): |
|
mid = parents[mid][0] |
|
return mid |
|
|
|
|
|
clf = pipeline("audio-classification", |
|
model="MIT/ast-finetuned-audioset-10-10-0.4593", |
|
top_k=None, |
|
device=0 if torch.cuda.is_available() else -1) |
|
|
|
|
|
def load_audio(path, target_sr=16000, max_sec=30): |
|
ext = pathlib.Path(path).suffix.lower() |
|
if ext == ".wav": |
|
wav, sr = torchaudio.load(path) |
|
else: |
|
y, sr = librosa.load(path, sr=None, mono=False) |
|
wav = torch.tensor(y).unsqueeze(0) if y.ndim == 1 else torch.tensor(y) |
|
wav = wav[:, : max_sec * sr] |
|
if wav.shape[0] > 1: |
|
wav = wav.mean(0, keepdim=True) |
|
if sr != target_sr: |
|
wav = torchaudio.functional.resample(wav, sr, target_sr) |
|
sr = target_sr |
|
return wav, sr |
|
|
|
def top_k_dict(d, k=5): |
|
return {k_: v_ for k_, v_ in |
|
sorted(d.items(), key=lambda x: x[1], reverse=True)[:k]} |
|
|
|
|
|
THRESH = 0.005 |
|
|
|
def analyse(audio_path, expanded): |
|
wav, sr = load_audio(audio_path) |
|
res = clf(wav.numpy().squeeze(), sampling_rate=sr) |
|
|
|
full = {} |
|
for d in res: |
|
s = float(d["score"]) / 100 if d["score"] > 1 else float(d["score"]) |
|
if s >= THRESH: |
|
full[d["label"]] = round(s, 4) |
|
|
|
|
|
synth_raw = {} |
|
for label, sc in full.items(): |
|
mid = name2id.get(label) |
|
if mid: |
|
root = by_id[root_parent(mid)]["name"] |
|
synth_raw[root] = max(synth_raw.get(root, 0), sc) |
|
tot = sum(synth_raw.values()) or 1 |
|
synth_norm = {k: sc / tot for k, sc in synth_raw.items()} |
|
|
|
disp = full if expanded else top_k_dict(full, 5) |
|
|
|
|
|
df = (pd.DataFrame({"Racine": synth_norm.keys(), |
|
"Pourcent": [round(v * 100, 1) for v in synth_norm.values()]}) |
|
.query("Pourcent >= 0.5") |
|
.sort_values("Pourcent", ascending=False)) |
|
|
|
fig, ax = plt.subplots(figsize=(6, 2)) |
|
ax.specgram(wav.numpy()[0], Fs=sr, NFFT=1024, noverlap=512) |
|
ax.set_axis_off(); plt.tight_layout() |
|
|
|
return disp, fig, full, df |
|
|
|
|
|
with gr.Blocks(title="La Fréquence du Vivant – Bio acoustic découverte") as demo: |
|
|
|
gr.Markdown("# La Fréquence du Vivant") |
|
gr.Markdown("### Écoute bio-acoustique : marche techno-sensible entre vivant, humain et machine") |
|
|
|
expanded = gr.State(False) |
|
full_tags = gr.State({}) |
|
synth_df = gr.State(pd.DataFrame()) |
|
|
|
audio_in = gr.Audio(sources=["upload"], type="filepath", |
|
label="🎙️ Charger un fichier .wav / .mp3 (≤ 30 s)") |
|
|
|
|
|
gr.Markdown("**Synthèse bio-acoustique (racines)**") |
|
synth_out = gr.BarPlot(x="Racine", y="Pourcent", |
|
y_lim=(0, 100), |
|
height=260, |
|
title="Répartition par racine (%)") |
|
|
|
|
|
gr.Markdown("**Tags AudioSet**") |
|
with gr.Row(): |
|
reset_btn = gr.Button("🔄 Nouveau fichier / Réinitialiser", size="sm") |
|
toggle_btn = gr.Button("Voir tous les tags", size="sm", variant="primary") |
|
|
|
tags_out = gr.Label() |
|
spec_out = gr.Plot(label="Spectrogramme") |
|
|
|
|
|
def _txt(exp): return "Uniquement les principaux tags" if exp else "Voir tous les tags" |
|
def flip(b): return not b |
|
def refresh(exp, full, df): |
|
disp = full if exp else top_k_dict(full, 5) |
|
return _txt(exp), disp, df |
|
def reset_ui(): |
|
fig = plt.figure(figsize=(6, 2)); plt.axis("off") |
|
return None, {}, fig, pd.DataFrame(), {}, False, "Voir tous les tags" |
|
|
|
audio_in.upload(analyse, |
|
[audio_in, expanded], |
|
[tags_out, spec_out, full_tags, synth_df]) |
|
|
|
toggle_btn.click(flip, expanded, expanded)\ |
|
.then(refresh, |
|
[expanded, full_tags, synth_df], |
|
[toggle_btn, tags_out, synth_out]) |
|
|
|
reset_btn.click( |
|
reset_ui, None, |
|
[audio_in, tags_out, spec_out, |
|
synth_out, full_tags, expanded, toggle_btn] |
|
) |
|
|
|
synth_df.change( |
|
lambda d: d.query("Pourcent >= 0.5").sort_values("Pourcent", ascending=False), |
|
synth_df, synth_out |
|
) |
|
|
|
gr.Markdown("<center>2025 : Gaspard Boréal / La comédie des mondes hybrides</center>") |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True) |
|
|