VERTAAL-APP-EAGLE-SHELTER / app-08-04-2024-19u00m.py
JBHF's picture
app-08-04-2024-19u00m.py
691ed18 verified
# JBHF/VERTAAL-APP-EAGLE-SHELTER/app.py - 08-04-2024, 19u00m
# WERKT AL: DE OPGENOMEN AUDIO MBV DEZE APP, audio.wav, HOEFT NIET PERSÉ GEPERSISTEERD TE WORDEN !!!!!!
# https://github.com/theevann/streamlit-audiorecorder
# An audio Recorder for streamlit
#
# Description
# Audio recorder component for streamlit.
# It creates a button to start the recording and takes three arguments:
# the start button text, the stop button text, and the pause button text.
# If the pause button text is not specified, the pause button is not displayed.
#
# Parameters
# The signature of the component is:
# audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
# The prompt parameters are self-explanatory, and the optional key parameter is used internally by streamlit
# to properly distinguish multiple audiorecorders on the page.
#
# Return value
# The component's return value is a pydub AudioSegment.
#
# All AudioSegment methods are available, in particular you can:
# - Play the audio in the frontend with st.audio(audio.export().read())
# - Save the audio to a file with audio.export("audio.wav", format="wav")
# JB: Waarom zie ik in mijn HF Spaces omgeving de file "audio.wav" niet terug ?
# JB: 08-04-2024 - Mogelijk is caching al voldoende (anders file persistence)#
# Zie hiervoor:
#
# CACHING:
# ========
# STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
# https://docs.streamlit.io/develop/concepts/architecture/caching
#
# EVERNOTE :
# https://www.evernote.com/shard/s313/nl/41973486/31880952-8bd9-41ef-8047-ca844143e833/
# STREAMLIT - Caching overview - Streamlit Docs - 07-04-2024 !!!!!
#
# 08-04-2024
#
# EN
#
# PERSISTENCE:
# ============
# HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
# https://huggingface.co/spaces/julien-c/persistent-data/blob/main/app.py
#
# ——->
#
# DUPLICATED TO:
# https://huggingface.co/spaces/JBHF/persistent-data?logs=container
#
# EVERNOTE :
# https://www.evernote.com/shard/s313/nl/41973486/1b07098e-3376-4316-abb3-b3d0996ebf03/
# HF SPACES STREAMLIT APPS - GET PASSWORDS AND ACCESS TOKENS FROM HF ENVIRONMENT ! - PERSISTENT STORAGE ON HF SPACES ! - EAGLE SHELTER VERTAAL APP ETC ! - app.py · julien-c/persistent-data at main - 20-03-2024 !!!!! !!!!! !!!!!
#
# 08-04-2024
#
###########################################################################################################
#
# Installation:
# pip install streamlit-audiorecorder
# Note: This package uses ffmpeg, so it should be installed for this audiorecorder to work properly.
#
# On ubuntu/debian: sudo apt update && sudo apt install ffmpeg
# On mac: brew install ffmpeg
import streamlit as st
from audiorecorder import audiorecorder
st.title("Audio Recorder")
# audiorecorder(start_prompt="Start recording", stop_prompt="Stop recording", pause_prompt="", key=None):
audio = audiorecorder("Click to record", "Click to stop recording", "Click to pause recording")
# JB:
# https://docs.streamlit.io/develop/concepts/architecture/caching
# @st.cache_data
@st.cache_resource # 👈 Add the caching decorator
def audio_export(audio_wav_file, format):
# audio.export("audio.wav", format="wav") # ORIGINAL
audio.export(audio_wav_file, format=format)
if len(audio) > 0:
# To play audio in frontend:
st.audio(audio.export().read())
# To save audio to a file, use pydub export method:
# https://docs.streamlit.io/develop/concepts/architecture/caching
# @st.cache_data
# @st.cache_data
# audio.export("audio.wav", format="wav") # ORIGINAL
audio_export("audio.wav", format="wav") # JB 08-04-2024
# To get audio properties, use pydub AudioSegment properties:
st.write(f"Frame rate: {audio.frame_rate}, Frame width: {audio.frame_width}, Duration: {audio.duration_seconds} seconds")
st.button("Rerun")
###########################################################################################################
###########################################################################################################
# TEST
# ZIE:
# infer_faster_whisper_large_v2 (CPU VERSIE !) 08-04-2024-COLAB-CPU-PYTHON3-tvscitechtalk.ipynb
# https://colab.research.google.com/drive/1EreiFx825oIrR2P43XSXjHXx01EWi6ZH#scrollTo=vuLjbPxexPDj&uniqifier=5
from faster_whisper import WhisperModel
model_size = "large-v2"
# Run on GPU with FP16
# model = WhisperModel(model_size, device="cuda", compute_type="float16") # ORIGINAL, DRAAIT OP COLAB T4 GPU OK
# TEST: Run on CPU
# model = WhisperModel(model_size, device="cpu", compute_type="float16") # JB, DRAAIT OP COLAB CPU OK ?
# ValueError: Requested float16 compute type, but the target device or backend do not support efficient float16 computation.
#
# st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
# model = WhisperModel(model_size, device="cpu") # , compute_type="float16") # JB, DRAAIT OP COLAB CPU OK: JA; HF SPACES STREAMLIT FREE TIER: JB OK !
# JB: Dit gebruikt mijn HF Token !
# st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
st.write("Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
model = WhisperModel(model_size, device="cpu", compute_type="int8") # , compute_type="float16") # JB
# JB: Dit gebruikt mijn HF Token !
# st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\")")
# LOADING OF model = WhisperModel(model_size, device="cpu") TAKES ABOUT 1 MINUTE ON HF SPACES STREAMLIT FREE TIER
#
st.write("Ready Loading the WhisperModel: model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")")
# LOADING OF model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\") TAKES ABOUT 33 sec (Na RERUN 1 minute) ON HF SPACES STREAMLIT FREE TIER
# USING:
# model = WhisperModel(model_size, device="cpu", compute_type="int8") # JB
# segments, info = model.transcribe("sam_altman_lex_podcast_367.flac", beam_size=1)
# /content/Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3
# segments, info = model.transcribe("Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3", beam_size=1)
# TEST:
segments, info = model.transcribe("audio.wav", beam_size=1) # DIT WERKT: GEDURENDE DE SESSIE BLIJFT audio.wav FILE BESCHIKBAAR IN DEZE APP !!!!!
# print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
st.write("Detected language '%s' with probability %f" % (info.language, info.language_probability))
st.write("")
st.write("info.all_language_probs : ", info.all_language_probs)
st.write("len(info.all_language_probs): ", len(info.all_language_probs))
# 99
st.write("")
st.write("info: ", info)
# Ukrainian podcast #10 Traveling to Lviv - Подорож до Льова. SLOW UKRAINIAN.mp3 :
st.write("info.duration: ", info.duration)
# 233.8249375
# time: 3.98 ms (started: 2024-03-15 10:55:15 +00:00)
# minutes = int(info.duration / 60)
# seconds = info.duration - minutes*60
minutes = int(info.duration / 60)
seconds = info.duration - minutes*60
st.write(minutes," minutes and ", seconds, " seconds")
text_to_transcribe = ""
for segment in segments:
# print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
st.write("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
text_to_transcribe = text_to_transcribe + " " + segment.text
st.write("---------------------------------------------------------------------")
#text_to_transcribe = ""
#st.write("TOTAL TEXT TO TRANSCRIBE:")
#for segment in segments:
# st.write(segment.text)
# text_to_transcribe = text_to_transcribe + " " + segment
# # print(segment)
st.write("text_to_transcribe: ", text_to_transcribe)
# DAADWERKELIJK MET MIC OPGENOMEN EN GETRANSCRIBEERD STUKJE OEKRAÍENSE TEKST TER TEST
# OM HIERONDER NAAR NEDERLANDS TE VERTALEN MBV LLM MIXTRAL-8x7b-GROQ! :
# text_to_transcribe:
# князем Данилом Романовичем біля Звенигорода і названий на честь його сина Лева Сьогодні Львів має площу 155 квадратних кілометрів з безліччю громадських будинків, кафе, магазинів
###########################################################################################################
# VERTALING
# DAADWERKELIJK MET MIC OPGENOMEN EN GETRANSCRIBEERD STUKJE OEKRAÍENSE TEKST TER TEST
# OM HIERONDER NAAR NEDERLANDS TE VERTALEN MBV LLM MIXTRAL-8x7b-GROQ! :
# text_to_transcribe:
# князем Данилом Романовичем біля Звенигорода і названий на честь його сина Лева Сьогодні Львів має площу 155 квадратних кілометрів з безліччю громадських будинків, кафе, магазинів
# ...
###########################################################################################################