Spaces:
Runtime error
Runtime error
Commit
Β·
1c827e2
1
Parent(s):
aaa69b0
new file: app.py
Browse filesnew file: hrsh-test.mp3
new file: karanrecording.mp3
new file: requirements.txt
new file: shaunakrecording.mp3
new file: test_1.mp3
new file: testaudio.mp3
- app.py +128 -0
- hrsh-test.mp3 +0 -0
- karanrecording.mp3 +0 -0
- requirements.txt +144 -0
- shaunakrecording.mp3 +0 -0
- test_1.mp3 +0 -0
- testaudio.mp3 +0 -0
app.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from huggingsound import SpeechRecognitionModel
|
| 3 |
+
from transformers import logging
|
| 4 |
+
from transformers import pipeline
|
| 5 |
+
from transformers import BertTokenizer, BertModel
|
| 6 |
+
from pydub import AudioSegment
|
| 7 |
+
unmasker = pipeline('fill-mask', model='bert-base-uncased')
|
| 8 |
+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
| 9 |
+
model = BertModel.from_pretrained("bert-base-uncased")
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def levenshtein_distance(s, t):
|
| 14 |
+
m, n = len(s), len(t)
|
| 15 |
+
d = [[0] * (n+1) for _ in range(m+1)]
|
| 16 |
+
|
| 17 |
+
for i in range(m+1):
|
| 18 |
+
d[i][0] = i
|
| 19 |
+
|
| 20 |
+
for j in range(n+1):
|
| 21 |
+
d[0][j] = j
|
| 22 |
+
|
| 23 |
+
for j in range(1, n+1):
|
| 24 |
+
for i in range(1, m+1):
|
| 25 |
+
if s[i-1] == t[j-1]:
|
| 26 |
+
d[i][j] = d[i-1][j-1]
|
| 27 |
+
else:
|
| 28 |
+
d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
|
| 29 |
+
|
| 30 |
+
return d[m][n]
|
| 31 |
+
|
| 32 |
+
def collate(input):
|
| 33 |
+
pun_marks = [",", ".", "?", "!", ";", ":", "-", "β", "(", ")", "[", "]", "{", "}", "'", "\"", "`"]
|
| 34 |
+
output = ""
|
| 35 |
+
Capital = True
|
| 36 |
+
Dash = False
|
| 37 |
+
for i in range(len(input)):
|
| 38 |
+
if input[i] in pun_marks:
|
| 39 |
+
output += input[i]
|
| 40 |
+
if input[i] in [".", "("]:
|
| 41 |
+
Capital = True
|
| 42 |
+
if input[i] in ["-", "'"]:
|
| 43 |
+
Dash = True
|
| 44 |
+
else:
|
| 45 |
+
Dash = False
|
| 46 |
+
else:
|
| 47 |
+
str = ""
|
| 48 |
+
if (Dash == False):
|
| 49 |
+
str += " "
|
| 50 |
+
if Capital:
|
| 51 |
+
str += input[i].capitalize()
|
| 52 |
+
Capital = False
|
| 53 |
+
else:
|
| 54 |
+
str += input[i]
|
| 55 |
+
output += str
|
| 56 |
+
return output
|
| 57 |
+
|
| 58 |
+
def everything(audio_paths):
|
| 59 |
+
w2vmodel = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english")
|
| 60 |
+
logging.set_verbosity_error() #change'error' to 'warning' or remove this if you want to see the warning
|
| 61 |
+
# https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-english
|
| 62 |
+
# https://huggingface.co/bert-base-uncased
|
| 63 |
+
|
| 64 |
+
transcriptions = w2vmodel.transcribe(audio_paths)
|
| 65 |
+
|
| 66 |
+
return transcriptions
|
| 67 |
+
# input = transcriptions[0]["transcription"]
|
| 68 |
+
# input = input.split()
|
| 69 |
+
|
| 70 |
+
# #(1) is a strategy where tokens are used to determine lexicographic distance
|
| 71 |
+
# #(2) is a strategy where replaced words
|
| 72 |
+
# for t in range(1):
|
| 73 |
+
# # output = [] #(2)
|
| 74 |
+
# for i in range(len(input)):
|
| 75 |
+
# temp = input[i]
|
| 76 |
+
# token = tokenizer(temp)['input_ids'][1]
|
| 77 |
+
# input[i] = "[MASK]"
|
| 78 |
+
# apiint = unmasker(' '.join(input))
|
| 79 |
+
# dist = []
|
| 80 |
+
# for r in range(5):
|
| 81 |
+
# # if (np.abs((apiint[r]['token'] - token)) < 2): #(1)
|
| 82 |
+
# dist.append(levenshtein_distance(temp, apiint[r]['token_str']))
|
| 83 |
+
# lindex = 0
|
| 84 |
+
# l = dist[0]
|
| 85 |
+
# for r in range(5):
|
| 86 |
+
# if dist[r] < l:
|
| 87 |
+
# lindex = r
|
| 88 |
+
|
| 89 |
+
# l = dist[r]
|
| 90 |
+
# if l <= 2:
|
| 91 |
+
# input[i] = apiint[lindex]['token_str']
|
| 92 |
+
# # output.append(apiint[lindex]['token_str']) #(2)
|
| 93 |
+
# else:
|
| 94 |
+
# input[i] = temp
|
| 95 |
+
# # output.append(temp) #(2)
|
| 96 |
+
# # input[i] = temp #(2)
|
| 97 |
+
|
| 98 |
+
# for t in range(1):
|
| 99 |
+
# inndex = 1
|
| 100 |
+
# for i in range(len(input)):
|
| 101 |
+
# input.insert(inndex, "[MASK]")
|
| 102 |
+
# # print(' '.join(input))
|
| 103 |
+
# apiint = unmasker(' '.join(input))
|
| 104 |
+
# if (apiint[0]['token'] < 1500):
|
| 105 |
+
# input[inndex] = apiint[0]["token_str"]
|
| 106 |
+
# inndex += 2
|
| 107 |
+
# else:
|
| 108 |
+
# del input[inndex]
|
| 109 |
+
# inndex += 1
|
| 110 |
+
|
| 111 |
+
# st.write(collate(input))
|
| 112 |
+
|
| 113 |
+
# # In comparison, a plain autocorrect gives this output:
|
| 114 |
+
|
| 115 |
+
# # "The b-movie by Jerry Sinclair, the sound of buzzing
|
| 116 |
+
# # bees, can be heard according to all known laws of
|
| 117 |
+
# # aviation that is no way for b to be able to fly its
|
| 118 |
+
# # wings are too small to get its start little body off
|
| 119 |
+
# # the ground, the be, of course, flies anyway because ``
|
| 120 |
+
# # bees don't care what humans think is possible.
|
| 121 |
+
# # Barbuda is guaranteed one member of the House of
|
| 122 |
+
# # Representatives and two members of the Senate."
|
| 123 |
+
|
| 124 |
+
# # - https://huggingface.co/oliverguhr/spelling-correction-english-base?text=lets+do+a+comparsion
|
| 125 |
+
|
| 126 |
+
demo = gr.Interface(fn=everything,
|
| 127 |
+
inputs = [gr.UploadButton]
|
| 128 |
+
outputs = ["text"])
|
hrsh-test.mp3
ADDED
|
Binary file (109 kB). View file
|
|
|
karanrecording.mp3
ADDED
|
Binary file (70 kB). View file
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# aiohttp==3.8.4
|
| 2 |
+
# aiosignal==1.3.1
|
| 3 |
+
# altair==4.2.2
|
| 4 |
+
# appdirs==1.4.4
|
| 5 |
+
# asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1670263926556/work
|
| 6 |
+
# async-timeout==4.0.2
|
| 7 |
+
# attrs==23.1.0
|
| 8 |
+
# audioread==3.0.0
|
| 9 |
+
# backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
|
| 10 |
+
# backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1618230623929/work
|
| 11 |
+
# blinker==1.6.2
|
| 12 |
+
# # blur-detector==0.0.6
|
| 13 |
+
# brotlipy==0.7.0
|
| 14 |
+
# cachetools==5.3.0
|
| 15 |
+
# certifi==2022.12.7
|
| 16 |
+
# cffi @ file:///C:/ci/cffi_1625831756778/work
|
| 17 |
+
# chardet @ file:///C:/ci/chardet_1607706937985/work
|
| 18 |
+
# charset-normalizer==3.1.0
|
| 19 |
+
# click==8.1.3
|
| 20 |
+
# colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work
|
| 21 |
+
# conda==4.14.0
|
| 22 |
+
# conda-package-handling @ file:///C:/ci/conda-package-handling_1618262410900/work
|
| 23 |
+
# contourpy==1.0.7
|
| 24 |
+
# cryptography @ file:///C:/ci/cryptography_1616769504165/work
|
| 25 |
+
# cycler==0.11.0
|
| 26 |
+
# datasets==2.11.0
|
| 27 |
+
# debugpy @ file:///C:/ci/debugpy_1637091961445/work
|
| 28 |
+
# decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
|
| 29 |
+
# dill==0.3.6
|
| 30 |
+
# distlib==0.3.6
|
| 31 |
+
# entrypoints==0.4
|
| 32 |
+
# et-xmlfile==1.1.0
|
| 33 |
+
# executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1667317341051/work
|
| 34 |
+
# filelock==3.12.0
|
| 35 |
+
# fonttools==4.39.3
|
| 36 |
+
# frozenlist==1.3.3
|
| 37 |
+
# fsspec==2023.4.0
|
| 38 |
+
# gitdb==4.0.10
|
| 39 |
+
# GitPython==3.1.31
|
| 40 |
+
# huggingface-hub==0.13.4
|
| 41 |
+
huggingsound==0.1.6
|
| 42 |
+
# idna @ file:///home/linux1/recipes/ci/idna_1610986105248/work
|
| 43 |
+
# imageio==2.27.0
|
| 44 |
+
# importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1679167925176/work
|
| 45 |
+
# importlib-resources==5.12.0
|
| 46 |
+
# install==1.3.5
|
| 47 |
+
# ipykernel @ file:///D:/bld/ipykernel_1655369313836/work
|
| 48 |
+
# ipython @ file:///D:/bld/ipython_1680185618122/work
|
| 49 |
+
# ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1680023138361/work
|
| 50 |
+
# jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1669134318875/work
|
| 51 |
+
# Jinja2==3.1.2
|
| 52 |
+
# jiwer==2.6.0
|
| 53 |
+
# joblib==1.2.0
|
| 54 |
+
# jsonschema==4.17.3
|
| 55 |
+
# jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1679365123476/work
|
| 56 |
+
# jupyter-core @ file:///C:/b/abs_bd7elvu3w2/croot/jupyter_core_1676538600510/work
|
| 57 |
+
# jupyterlab-widgets @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_widgets_1680020489668/work
|
| 58 |
+
# kiwisolver==1.4.4
|
| 59 |
+
# lazy-loader==0.2
|
| 60 |
+
# librosa==0.9.2
|
| 61 |
+
# llvmlite==0.39.1
|
| 62 |
+
# markdown-it-py==2.2.0
|
| 63 |
+
# MarkupSafe==2.1.2
|
| 64 |
+
# matplotlib==3.7.1
|
| 65 |
+
# matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work
|
| 66 |
+
# mdurl==0.1.2
|
| 67 |
+
# menuinst==1.4.16
|
| 68 |
+
# mpmath==1.3.0
|
| 69 |
+
# msgpack==1.0.5
|
| 70 |
+
# multidict==6.0.4
|
| 71 |
+
# multiprocess==0.70.14
|
| 72 |
+
# nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1664684991461/work
|
| 73 |
+
# networkx==3.1
|
| 74 |
+
# numba==0.56.4
|
| 75 |
+
# numpy==1.23.5
|
| 76 |
+
# oauthlib==3.2.2
|
| 77 |
+
# # opencv-python==4.7.0.72
|
| 78 |
+
# openpyxl==3.1.2
|
| 79 |
+
# packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1673482170163/work
|
| 80 |
+
# pandas==2.0.0
|
| 81 |
+
# parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work
|
| 82 |
+
# pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
|
| 83 |
+
# Pillow==9.5.0
|
| 84 |
+
# pipenv==2023.4.20
|
| 85 |
+
# platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1679871349196/work
|
| 86 |
+
# pooch==1.6.0
|
| 87 |
+
# prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1677600924538/work
|
| 88 |
+
# protobuf==3.20.3
|
| 89 |
+
# psutil @ file:///C:/Windows/Temp/abs_b2c2fd7f-9fd5-4756-95ea-8aed74d0039flsd9qufz/croots/recipe/psutil_1656431277748/work
|
| 90 |
+
# pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work
|
| 91 |
+
# pyarrow==11.0.0
|
| 92 |
+
# pyasn1==0.4.8
|
| 93 |
+
# pyasn1-modules==0.2.8
|
| 94 |
+
# pycosat==0.6.3
|
| 95 |
+
# pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work
|
| 96 |
+
# pydeck==0.8.1b0
|
| 97 |
+
pydub==0.25.1
|
| 98 |
+
# Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1672682006896/work
|
| 99 |
+
# Pympler==1.0.1
|
| 100 |
+
# pyOpenSSL @ file:///tmp/build/80754af9/pyopenssl_1608057966937/work
|
| 101 |
+
# pyparsing==3.0.9
|
| 102 |
+
# pyrsistent==0.19.3
|
| 103 |
+
# PySocks @ file:///C:/ci/pysocks_1605307512533/work
|
| 104 |
+
# python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
|
| 105 |
+
# pytz==2023.3
|
| 106 |
+
# pytz-deprecation-shim==0.1.0.post0
|
| 107 |
+
# PyWavelets==1.4.1
|
| 108 |
+
# pywin32==228
|
| 109 |
+
# PyYAML==6.0
|
| 110 |
+
# pyzmq @ file:///C:/ci/pyzmq_1657615952984/work
|
| 111 |
+
# rapidfuzz==2.13.7
|
| 112 |
+
# regex==2023.3.23
|
| 113 |
+
# requests @ file:///tmp/build/80754af9/requests_1608241421344/work
|
| 114 |
+
# requests-oauthlib==1.3.1
|
| 115 |
+
# resampy==0.4.2
|
| 116 |
+
# responses==0.18.0
|
| 117 |
+
# rich==13.3.5
|
| 118 |
+
# rsa==4.9
|
| 119 |
+
# ruamel-yaml-conda @ file:///C:/ci/ruamel_yaml_1616016898638/work
|
| 120 |
+
# scikit-image==0.20.0
|
| 121 |
+
# scikit-learn==1.2.2
|
| 122 |
+
# scipy==1.9.1
|
| 123 |
+
# # seaborn==0.12.2
|
| 124 |
+
# six @ file:///tmp/build/80754af9/six_1623709665295/work
|
| 125 |
+
# # sklearn==0.0
|
| 126 |
+
# smmap==5.0.0
|
| 127 |
+
# soundfile==0.12.1
|
| 128 |
+
# soxr==0.3.5
|
| 129 |
+
# stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work
|
| 130 |
+
streamlit==1.22.0
|
| 131 |
+
# sympy==1.11.1
|
| 132 |
+
# tenacity==8.2.2
|
| 133 |
+
# threadpoolctl==3.1.0
|
| 134 |
+
# tifffile==2023.3.21
|
| 135 |
+
# tokenizers==0.13.3
|
| 136 |
+
# toml==0.10.2
|
| 137 |
+
# toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1657485559105/work
|
| 138 |
+
# # torch==1.12.1
|
| 139 |
+
# # torchaudio==2.0.1+cu118
|
| 140 |
+
# # torchvision==0.15.1+cu118
|
| 141 |
+
# tornado @ file:///D:/bld/tornado_1656937934674/work
|
| 142 |
+
# tqdm==4.65.0
|
| 143 |
+
# traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1675110562325/work
|
| 144 |
+
transformers==4.28.1
|
shaunakrecording.mp3
ADDED
|
Binary file (553 kB). View file
|
|
|
test_1.mp3
ADDED
|
Binary file (598 kB). View file
|
|
|
testaudio.mp3
ADDED
|
Binary file (127 kB). View file
|
|
|