from transformers import pipeline pipe = pipeline("greek.wav","automatic-speech-recognition", model="stt-greek",return_timestamps=True) result=pipe(audio, generate_kwargs={"task": "transcribe"},"language":"el"})["text"] print (result)