Spaces:
Sleeping
Sleeping
File size: 1,816 Bytes
10a86b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import whisper
from transformers import pipeline
import torch
import torchaudio
# 🔹 Whisper transkripcija
def transcribe_text(audio_path):
model = whisper.load_model("base")
result = model.transcribe(audio_path, language='lt')
return result.get("text", "").strip()
# 🔹 Whisper kalbos atpažinimas (su papildomu raktinių žodžių tikrinimu)
def recognize_language(audio_path):
model = whisper.load_model("base")
result = model.transcribe(audio_path)
text = result.get("text", "").strip()
lang_code = result.get("language", "unknown")
lower_text = text.lower()
if any(word in lower_text for word in ["labas", "ačiū", "draugas", "vardas", "sekasi", "prašau"]):
return "lt"
elif any(word in lower_text for word in ["hello", "name", "how are you", "friend", "please"]):
return "en"
elif any(word in lower_text for word in ["hallo", "danke", "freund", "ich", "bitte"]):
return "de"
else:
return lang_code
# 🔸 Wav2Vec2 transkripcija (su kalbos pasirinkimu)
def transcribe_text_wav2vec(audio_path, kalba):
kalbos_modeliai = {
"lt": "DeividasM/wav2vec2-large-xlsr-53-lithuanian",
"en": "facebook/wav2vec2-base-960h",
"de": "jonatasgrosman/wav2vec2-large-xlsr-53-german"
}
if kalba not in kalbos_modeliai:
raise ValueError(f"Nepalaikoma kalba: {kalba}")
pipe = pipeline(
"automatic-speech-recognition",
model=kalbos_modeliai[kalba]
)
speech_array, sampling_rate = torchaudio.load(audio_path)
if sampling_rate != 16000:
resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)
speech_array = resampler(speech_array)
speech = speech_array[0].numpy()
result = pipe(speech)
return result["text"]
|