import json import librosa import numpy as np from tqdm import tqdm from metrics.FD import ASTaudio2feature, calculate_statistics, save_AST_feature from tools import rms_normalize from transformers import AutoProcessor, ASTModel device = "cpu" processor = AutoProcessor.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593") AST = ASTModel.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593").to(device) data_split = "train" with open(f'data/NSynth/{data_split}_examples.json') as f: data = json.load(f) def read_signal(note_str): y, sr = librosa.load(f"data/NSynth/nsynth-{data_split}-52/audio/{note_str}.wav", sr=16000) if len(y) >= 64000: y = y[:64000] else: y_extend = [0.0] * 64000 y_extend[:len(y)] = y y = y_extend return rms_normalize(y) for quality in ["bright", "dark", "distortion", "fast_decay", "long_release", "multiphonic", "nonlinear_env", "percussive", "reverb", "tempo-synced"]: features = [] for i, (note_str, attributes) in tqdm(enumerate(data.items())): if not attributes["pitch"] == 52: continue if not (quality in attributes['qualities_str']): continue signal = read_signal(note_str) feature_for_one_signal = ASTaudio2feature(device, [signal], processor, AST, sampling_rate=16000)[0] features.append(feature_for_one_signal) mu, sigma = calculate_statistics(features) print(np.shape(mu)) print(np.shape(sigma)) save_AST_feature(f'{data_split}_{quality}', mu.tolist(), sigma.tolist()) for instrument_name in ["bass", "brass", "flute", "guitar", "keyboard", "mallet", "organ", "reed", "string", "synth_lead", "vocal"]: features = [] for i, (note_str, attributes) in tqdm(enumerate(data.items())): if not attributes["pitch"] == 52: continue if not (attributes["instrument_family_str"] == instrument_name): continue signal = read_signal(note_str) feature_for_one_signal = ASTaudio2feature(device, [signal], processor, AST, sampling_rate=16000)[0] features.append(feature_for_one_signal) mu, sigma = calculate_statistics(features) print(np.shape(mu)) print(np.shape(sigma)) save_AST_feature(f'{data_split}_{instrument_name}', mu.tolist(), sigma.tolist())