#!/usr/bin/env python3 import wave from pathlib import Path from typing import Tuple import sys import numpy as np import sherpa_onnx import sys def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]: with wave.open(wave_filename) as f: assert f.getnchannels() == 1, f.getnchannels() assert f.getsampwidth() == 2, f.getsampwidth() # it is in bytes num_samples = f.getnframes() samples = f.readframes(num_samples) samples_int16 = np.frombuffer(samples, dtype=np.int16) samples_float32 = samples_int16.astype(np.float32) samples_float32 = samples_float32 / 32768 return samples_float32, f.getframerate() def main(): recognizer = sherpa_onnx.OfflineRecognizer.from_transducer( encoder="am-onnx/encoder.onnx", decoder="am-onnx/decoder.onnx", joiner="am-onnx/joiner.onnx", tokens="lang/tokens.txt", num_threads=0, provider='cpu', sample_rate=16000, dither=3e-5, max_active_paths=10, decoding_method="modified_beam_search") samples, sample_rate = read_wave(sys.argv[1]) s = recognizer.create_stream() s.accept_waveform(sample_rate, samples) recognizer.decode_stream(s) print ("Text:", s.result.text) print ("Tokens:", s.result.tokens) print ("Timestamps:", s.result.timestamps) if __name__ == "__main__": main()