Spaces:

ComedieDesMondesHybrides
/

LaFrequenceDuVivant-demo

Running

App Files Files Community

LaFrequenceDuVivant-demo / app.py

LaurentTRIPIED

Synthèse filtrée ≥0,5 % et barres horizontales

99f6e51 6 days ago

raw

history blame contribute delete

5.77 kB

	"""
	La Fréquence du Vivant – Bio acoustic découverte
	Comédie des Mondes Hybrides · Gaspard Boréal
	------------------------------------------------
	• Upload .wav/.mp3 (≤ 30 s)
	• Prédit les tags AudioSet (AST 527) + spectrogramme
	• Synthèse bio-acoustique : barres horizontales (≥ 0,5 %)
	• Boutons : « Nouveau fichier » & « Voir tags (condensé / complet) »
	"""

	# ---------------------------- Imports ---------------------------------
	import json, pathlib, matplotlib.pyplot as plt, pandas as pd
	import torch, torchaudio, librosa, gradio as gr
	from transformers import pipeline

	# ---------------------------- 0. Ontology -----------------------------
	ONTO = json.loads(pathlib.Path(__file__).with_name("ontology.json").read_text())
	by_id = {n["id"]: n for n in ONTO}
	name2id = {n["name"]: n["id"] for n in ONTO}
	parents = {}
	for n in ONTO:
	for c in n.get("child_ids", []):
	parents.setdefault(c, []).append(n["id"])
	def root_parent(mid): # ID racine
	while parents.get(mid):
	mid = parents[mid][0]
	return mid

	# ---------------------------- 1. Modèle -------------------------------
	clf = pipeline("audio-classification",
	model="MIT/ast-finetuned-audioset-10-10-0.4593",
	top_k=None,
	device=0 if torch.cuda.is_available() else -1)

	# ---------------------------- 2. Utils audio --------------------------
	def load_audio(path, target_sr=16000, max_sec=30):
	ext = pathlib.Path(path).suffix.lower()
	if ext == ".wav":
	wav, sr = torchaudio.load(path)
	else:
	y, sr = librosa.load(path, sr=None, mono=False)
	wav = torch.tensor(y).unsqueeze(0) if y.ndim == 1 else torch.tensor(y)
	wav = wav[:, : max_sec * sr]
	if wav.shape[0] > 1:
	wav = wav.mean(0, keepdim=True)
	if sr != target_sr:
	wav = torchaudio.functional.resample(wav, sr, target_sr)
	sr = target_sr
	return wav, sr

	def top_k_dict(d, k=5):
	return {k_: v_ for k_, v_ in
	sorted(d.items(), key=lambda x: x[1], reverse=True)[:k]}

	# ---------------------------- 3. Analyse ------------------------------
	THRESH = 0.005 # 0,5 % (score >= 0.005)

	def analyse(audio_path, expanded):
	wav, sr = load_audio(audio_path)
	res = clf(wav.numpy().squeeze(), sampling_rate=sr)

	full = {}
	for d in res:
	s = float(d["score"]) / 100 if d["score"] > 1 else float(d["score"])
	if s >= THRESH:
	full[d["label"]] = round(s, 4)

	# Synthèse racine (max) puis normalisation
	synth_raw = {}
	for label, sc in full.items():
	mid = name2id.get(label)
	if mid:
	root = by_id[root_parent(mid)]["name"]
	synth_raw[root] = max(synth_raw.get(root, 0), sc)
	tot = sum(synth_raw.values()) or 1
	synth_norm = {k: sc / tot for k, sc in synth_raw.items()}

	disp = full if expanded else top_k_dict(full, 5)

	# DataFrame filtré pour BarPlot (> 0,5 %)
	df = (pd.DataFrame({"Racine": synth_norm.keys(),
	"Pourcent": [round(v * 100, 1) for v in synth_norm.values()]})
	.query("Pourcent >= 0.5")
	.sort_values("Pourcent", ascending=False))

	fig, ax = plt.subplots(figsize=(6, 2))
	ax.specgram(wav.numpy()[0], Fs=sr, NFFT=1024, noverlap=512)
	ax.set_axis_off(); plt.tight_layout()

	return disp, fig, full, df

	# ---------------------------- 4. Interface ----------------------------
	with gr.Blocks(title="La Fréquence du Vivant – Bio acoustic découverte") as demo:

	gr.Markdown("# La Fréquence du Vivant")
	gr.Markdown("### Écoute bio-acoustique : marche techno-sensible entre vivant, humain et machine")

	expanded = gr.State(False)
	full_tags = gr.State({})
	synth_df = gr.State(pd.DataFrame())

	audio_in = gr.Audio(sources=["upload"], type="filepath",
	label="🎙️ Charger un fichier .wav / .mp3 (≤ 30 s)")

	# Synthèse
	gr.Markdown("Synthèse bio-acoustique (racines)")
	synth_out = gr.BarPlot(x="Racine", y="Pourcent",
	y_lim=(0, 100),
	height=260,
	title="Répartition par racine (%)")

	# Détails tags
	gr.Markdown("Tags AudioSet")
	with gr.Row():
	reset_btn = gr.Button("🔄 Nouveau fichier / Réinitialiser", size="sm")
	toggle_btn = gr.Button("Voir tous les tags", size="sm", variant="primary")

	tags_out = gr.Label()
	spec_out = gr.Plot(label="Spectrogramme")

	# Helpers
	def _txt(exp): return "Uniquement les principaux tags" if exp else "Voir tous les tags"
	def flip(b): return not b
	def refresh(exp, full, df):
	disp = full if exp else top_k_dict(full, 5)
	return _txt(exp), disp, df
	def reset_ui():
	fig = plt.figure(figsize=(6, 2)); plt.axis("off")
	return None, {}, fig, pd.DataFrame(), {}, False, "Voir tous les tags"

	audio_in.upload(analyse,
	[audio_in, expanded],
	[tags_out, spec_out, full_tags, synth_df])

	toggle_btn.click(flip, expanded, expanded)\
	.then(refresh,
	[expanded, full_tags, synth_df],
	[toggle_btn, tags_out, synth_out])

	reset_btn.click(
	reset_ui, None,
	[audio_in, tags_out, spec_out,
	synth_out, full_tags, expanded, toggle_btn]
	)

	synth_df.change(
	lambda d: d.query("Pourcent >= 0.5").sort_values("Pourcent", ascending=False),
	synth_df, synth_out
	)

	gr.Markdown("<center>2025 : Gaspard Boréal / La comédie des mondes hybrides</center>")

	# ---------------------------- 5. Run ----------------------------------
	if __name__ == "__main__":
	demo.launch(debug=True)