LaurentTRIPIED's picture
Synthèse filtrée ≥0,5 % et barres horizontales
99f6e51
"""
La Fréquence du Vivant – Bio acoustic découverte
Comédie des Mondes Hybrides · Gaspard Boréal
------------------------------------------------
• Upload .wav/.mp3 (≤ 30 s)
• Prédit les tags AudioSet (AST 527) + spectrogramme
• Synthèse bio-acoustique : barres horizontales (≥ 0,5 %)
• Boutons : « Nouveau fichier » & « Voir tags (condensé / complet) »
"""
# ---------------------------- Imports ---------------------------------
import json, pathlib, matplotlib.pyplot as plt, pandas as pd
import torch, torchaudio, librosa, gradio as gr
from transformers import pipeline
# ---------------------------- 0. Ontology -----------------------------
ONTO = json.loads(pathlib.Path(__file__).with_name("ontology.json").read_text())
by_id = {n["id"]: n for n in ONTO}
name2id = {n["name"]: n["id"] for n in ONTO}
parents = {}
for n in ONTO:
for c in n.get("child_ids", []):
parents.setdefault(c, []).append(n["id"])
def root_parent(mid): # ID racine
while parents.get(mid):
mid = parents[mid][0]
return mid
# ---------------------------- 1. Modèle -------------------------------
clf = pipeline("audio-classification",
model="MIT/ast-finetuned-audioset-10-10-0.4593",
top_k=None,
device=0 if torch.cuda.is_available() else -1)
# ---------------------------- 2. Utils audio --------------------------
def load_audio(path, target_sr=16000, max_sec=30):
ext = pathlib.Path(path).suffix.lower()
if ext == ".wav":
wav, sr = torchaudio.load(path)
else:
y, sr = librosa.load(path, sr=None, mono=False)
wav = torch.tensor(y).unsqueeze(0) if y.ndim == 1 else torch.tensor(y)
wav = wav[:, : max_sec * sr]
if wav.shape[0] > 1:
wav = wav.mean(0, keepdim=True)
if sr != target_sr:
wav = torchaudio.functional.resample(wav, sr, target_sr)
sr = target_sr
return wav, sr
def top_k_dict(d, k=5):
return {k_: v_ for k_, v_ in
sorted(d.items(), key=lambda x: x[1], reverse=True)[:k]}
# ---------------------------- 3. Analyse ------------------------------
THRESH = 0.005 # 0,5 % (score >= 0.005)
def analyse(audio_path, expanded):
wav, sr = load_audio(audio_path)
res = clf(wav.numpy().squeeze(), sampling_rate=sr)
full = {}
for d in res:
s = float(d["score"]) / 100 if d["score"] > 1 else float(d["score"])
if s >= THRESH:
full[d["label"]] = round(s, 4)
# Synthèse racine (max) puis normalisation
synth_raw = {}
for label, sc in full.items():
mid = name2id.get(label)
if mid:
root = by_id[root_parent(mid)]["name"]
synth_raw[root] = max(synth_raw.get(root, 0), sc)
tot = sum(synth_raw.values()) or 1
synth_norm = {k: sc / tot for k, sc in synth_raw.items()}
disp = full if expanded else top_k_dict(full, 5)
# DataFrame filtré pour BarPlot (> 0,5 %)
df = (pd.DataFrame({"Racine": synth_norm.keys(),
"Pourcent": [round(v * 100, 1) for v in synth_norm.values()]})
.query("Pourcent >= 0.5")
.sort_values("Pourcent", ascending=False))
fig, ax = plt.subplots(figsize=(6, 2))
ax.specgram(wav.numpy()[0], Fs=sr, NFFT=1024, noverlap=512)
ax.set_axis_off(); plt.tight_layout()
return disp, fig, full, df
# ---------------------------- 4. Interface ----------------------------
with gr.Blocks(title="La Fréquence du Vivant – Bio acoustic découverte") as demo:
gr.Markdown("# La Fréquence du Vivant")
gr.Markdown("### Écoute bio-acoustique : marche techno-sensible entre vivant, humain et machine")
expanded = gr.State(False)
full_tags = gr.State({})
synth_df = gr.State(pd.DataFrame())
audio_in = gr.Audio(sources=["upload"], type="filepath",
label="🎙️ Charger un fichier .wav / .mp3 (≤ 30 s)")
# Synthèse
gr.Markdown("**Synthèse bio-acoustique (racines)**")
synth_out = gr.BarPlot(x="Racine", y="Pourcent",
y_lim=(0, 100),
height=260,
title="Répartition par racine (%)")
# Détails tags
gr.Markdown("**Tags AudioSet**")
with gr.Row():
reset_btn = gr.Button("🔄 Nouveau fichier / Réinitialiser", size="sm")
toggle_btn = gr.Button("Voir tous les tags", size="sm", variant="primary")
tags_out = gr.Label()
spec_out = gr.Plot(label="Spectrogramme")
# Helpers
def _txt(exp): return "Uniquement les principaux tags" if exp else "Voir tous les tags"
def flip(b): return not b
def refresh(exp, full, df):
disp = full if exp else top_k_dict(full, 5)
return _txt(exp), disp, df
def reset_ui():
fig = plt.figure(figsize=(6, 2)); plt.axis("off")
return None, {}, fig, pd.DataFrame(), {}, False, "Voir tous les tags"
audio_in.upload(analyse,
[audio_in, expanded],
[tags_out, spec_out, full_tags, synth_df])
toggle_btn.click(flip, expanded, expanded)\
.then(refresh,
[expanded, full_tags, synth_df],
[toggle_btn, tags_out, synth_out])
reset_btn.click(
reset_ui, None,
[audio_in, tags_out, spec_out,
synth_out, full_tags, expanded, toggle_btn]
)
synth_df.change(
lambda d: d.query("Pourcent >= 0.5").sort_values("Pourcent", ascending=False),
synth_df, synth_out
)
gr.Markdown("<center>2025 : Gaspard Boréal / La comédie des mondes hybrides</center>")
# ---------------------------- 5. Run ----------------------------------
if __name__ == "__main__":
demo.launch(debug=True)