""" La Fréquence du Vivant – Bio acoustic découverte Comédie des Mondes Hybrides · Gaspard Boréal ------------------------------------------------ • Upload .wav/.mp3 (≤ 30 s) • Prédit les tags AudioSet (AST 527) + spectrogramme • Synthèse bio-acoustique : barres horizontales (≥ 0,5 %) • Boutons : « Nouveau fichier » & « Voir tags (condensé / complet) » """ # ---------------------------- Imports --------------------------------- import json, pathlib, matplotlib.pyplot as plt, pandas as pd import torch, torchaudio, librosa, gradio as gr from transformers import pipeline # ---------------------------- 0. Ontology ----------------------------- ONTO = json.loads(pathlib.Path(__file__).with_name("ontology.json").read_text()) by_id = {n["id"]: n for n in ONTO} name2id = {n["name"]: n["id"] for n in ONTO} parents = {} for n in ONTO: for c in n.get("child_ids", []): parents.setdefault(c, []).append(n["id"]) def root_parent(mid): # ID racine while parents.get(mid): mid = parents[mid][0] return mid # ---------------------------- 1. Modèle ------------------------------- clf = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593", top_k=None, device=0 if torch.cuda.is_available() else -1) # ---------------------------- 2. Utils audio -------------------------- def load_audio(path, target_sr=16000, max_sec=30): ext = pathlib.Path(path).suffix.lower() if ext == ".wav": wav, sr = torchaudio.load(path) else: y, sr = librosa.load(path, sr=None, mono=False) wav = torch.tensor(y).unsqueeze(0) if y.ndim == 1 else torch.tensor(y) wav = wav[:, : max_sec * sr] if wav.shape[0] > 1: wav = wav.mean(0, keepdim=True) if sr != target_sr: wav = torchaudio.functional.resample(wav, sr, target_sr) sr = target_sr return wav, sr def top_k_dict(d, k=5): return {k_: v_ for k_, v_ in sorted(d.items(), key=lambda x: x[1], reverse=True)[:k]} # ---------------------------- 3. Analyse ------------------------------ THRESH = 0.005 # 0,5 % (score >= 0.005) def analyse(audio_path, expanded): wav, sr = load_audio(audio_path) res = clf(wav.numpy().squeeze(), sampling_rate=sr) full = {} for d in res: s = float(d["score"]) / 100 if d["score"] > 1 else float(d["score"]) if s >= THRESH: full[d["label"]] = round(s, 4) # Synthèse racine (max) puis normalisation synth_raw = {} for label, sc in full.items(): mid = name2id.get(label) if mid: root = by_id[root_parent(mid)]["name"] synth_raw[root] = max(synth_raw.get(root, 0), sc) tot = sum(synth_raw.values()) or 1 synth_norm = {k: sc / tot for k, sc in synth_raw.items()} disp = full if expanded else top_k_dict(full, 5) # DataFrame filtré pour BarPlot (> 0,5 %) df = (pd.DataFrame({"Racine": synth_norm.keys(), "Pourcent": [round(v * 100, 1) for v in synth_norm.values()]}) .query("Pourcent >= 0.5") .sort_values("Pourcent", ascending=False)) fig, ax = plt.subplots(figsize=(6, 2)) ax.specgram(wav.numpy()[0], Fs=sr, NFFT=1024, noverlap=512) ax.set_axis_off(); plt.tight_layout() return disp, fig, full, df # ---------------------------- 4. Interface ---------------------------- with gr.Blocks(title="La Fréquence du Vivant – Bio acoustic découverte") as demo: gr.Markdown("# La Fréquence du Vivant") gr.Markdown("### Écoute bio-acoustique : marche techno-sensible entre vivant, humain et machine") expanded = gr.State(False) full_tags = gr.State({}) synth_df = gr.State(pd.DataFrame()) audio_in = gr.Audio(sources=["upload"], type="filepath", label="🎙️ Charger un fichier .wav / .mp3 (≤ 30 s)") # Synthèse gr.Markdown("**Synthèse bio-acoustique (racines)**") synth_out = gr.BarPlot(x="Racine", y="Pourcent", y_lim=(0, 100), height=260, title="Répartition par racine (%)") # Détails tags gr.Markdown("**Tags AudioSet**") with gr.Row(): reset_btn = gr.Button("🔄 Nouveau fichier / Réinitialiser", size="sm") toggle_btn = gr.Button("Voir tous les tags", size="sm", variant="primary") tags_out = gr.Label() spec_out = gr.Plot(label="Spectrogramme") # Helpers def _txt(exp): return "Uniquement les principaux tags" if exp else "Voir tous les tags" def flip(b): return not b def refresh(exp, full, df): disp = full if exp else top_k_dict(full, 5) return _txt(exp), disp, df def reset_ui(): fig = plt.figure(figsize=(6, 2)); plt.axis("off") return None, {}, fig, pd.DataFrame(), {}, False, "Voir tous les tags" audio_in.upload(analyse, [audio_in, expanded], [tags_out, spec_out, full_tags, synth_df]) toggle_btn.click(flip, expanded, expanded)\ .then(refresh, [expanded, full_tags, synth_df], [toggle_btn, tags_out, synth_out]) reset_btn.click( reset_ui, None, [audio_in, tags_out, spec_out, synth_out, full_tags, expanded, toggle_btn] ) synth_df.change( lambda d: d.query("Pourcent >= 0.5").sort_values("Pourcent", ascending=False), synth_df, synth_out ) gr.Markdown("