Spaces:

Hrsh-Venket
/

Corrected-Speech-to-Text

Runtime error

Hrsh-Venket commited on Apr 29, 2023

Commit

1c827e2

1 Parent(s): aaa69b0

new file: app.py

new file: hrsh-test.mp3
new file: karanrecording.mp3
new file: requirements.txt
new file: shaunakrecording.mp3
new file: test_1.mp3
new file: testaudio.mp3

Files changed (7) hide show

app.py +128 -0
hrsh-test.mp3 +0 -0
karanrecording.mp3 +0 -0
requirements.txt +144 -0
shaunakrecording.mp3 +0 -0
test_1.mp3 +0 -0
testaudio.mp3 +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import gradio as gr
+from huggingsound import SpeechRecognitionModel
+from transformers import logging
+from transformers import pipeline
+from transformers import BertTokenizer, BertModel
+from pydub import AudioSegment
+unmasker = pipeline('fill-mask', model='bert-base-uncased')
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+model = BertModel.from_pretrained("bert-base-uncased")
+import os
+def levenshtein_distance(s, t):
+    m, n = len(s), len(t)
+    d = [[0] * (n+1) for _ in range(m+1)]
+    for i in range(m+1):
+        d[i][0] = i
+    for j in range(n+1):
+        d[0][j] = j
+    for j in range(1, n+1):
+        for i in range(1, m+1):
+            if s[i-1] == t[j-1]:
+                d[i][j] = d[i-1][j-1]
+            else:
+                d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
+    return d[m][n]
+def collate(input):
+    pun_marks = [",", ".", "?", "!", ";", ":", "-", "—", "(", ")", "[", "]", "{", "}", "'", "\"", "`"]
+    output = ""
+    Capital = True
+    Dash = False
+    for i in range(len(input)):
+        if input[i] in pun_marks:
+            output += input[i]
+            if input[i] in [".", "("]:
+                Capital = True
+            if input[i] in ["-", "'"]:
+                Dash = True
+            else:
+                Dash = False
+        else:
+            str = ""
+            if (Dash == False):
+                str += " "
+            if Capital:
+                str += input[i].capitalize()
+                Capital = False
+            else:
+                str += input[i]
+            output += str
+    return output
+def everything(audio_paths):
+    w2vmodel = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english")
+    logging.set_verbosity_error() #change'error' to 'warning' or remove this if you want to see the warning
+    # https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-english
+    # https://huggingface.co/bert-base-uncased
+    transcriptions = w2vmodel.transcribe(audio_paths)
+    return transcriptions
+    # input = transcriptions[0]["transcription"]
+    # input = input.split()
+    #     #(1) is a strategy where tokens are used to determine lexicographic distance
+    #     #(2) is a strategy where replaced words
+    # for t in range(1):
+    #     # output = [] #(2)
+    #     for i in range(len(input)):
+    #         temp = input[i]
+    #         token = tokenizer(temp)['input_ids'][1]
+    #         input[i] = "[MASK]"
+    # apiint = unmasker(' '.join(input))
+    # dist = []
+    # for r in range(5):
+    #     # if (np.abs((apiint[r]['token'] - token)) < 2): #(1)
+    #     dist.append(levenshtein_distance(temp, apiint[r]['token_str']))
+    # lindex = 0
+    # l = dist[0]
+    # for r in range(5):
+    #     if dist[r] < l:
+    #         lindex = r
+    #         l = dist[r]
+    # if l <= 2:
+    #     input[i] = apiint[lindex]['token_str']
+    #     # output.append(apiint[lindex]['token_str']) #(2)
+    # else:
+    #     input[i] = temp
+    #     # output.append(temp) #(2)
+    # # input[i] = temp #(2)
+    # for t in range(1):
+    #     inndex = 1
+    #     for i in range(len(input)):
+    #         input.insert(inndex, "[MASK]")
+    #         # print(' '.join(input))
+    #         apiint = unmasker(' '.join(input))
+    #         if (apiint[0]['token'] < 1500):
+    #             input[inndex] = apiint[0]["token_str"]
+    #             inndex += 2
+    #         else:
+    #             del input[inndex]
+    #             inndex += 1
+    # st.write(collate(input))
+    # # In comparison, a plain autocorrect gives this output:
+    # # "The b-movie by Jerry Sinclair, the sound of buzzing
+    # # bees, can be heard according to all known laws of
+    # # aviation that is no way for b to be able to fly its
+    # # wings are too small to get its start little body off
+    # # the ground, the be, of course, flies anyway because ``
+    # # bees don't care what humans think is possible.
+    # # Barbuda is guaranteed one member of the House of
+    # # Representatives and two members of the Senate."
+    # # - https://huggingface.co/oliverguhr/spelling-correction-english-base?text=lets+do+a+comparsion
+demo = gr.Interface(fn=everything,
+                    inputs = [gr.UploadButton]
+                    outputs = ["text"])

hrsh-test.mp3 ADDED Viewed

Binary file (109 kB). View file

karanrecording.mp3 ADDED Viewed

Binary file (70 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,144 @@

+# aiohttp==3.8.4
+# aiosignal==1.3.1
+# altair==4.2.2
+# appdirs==1.4.4
+# asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1670263926556/work
+# async-timeout==4.0.2
+# attrs==23.1.0
+# audioread==3.0.0
+# backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
+# backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1618230623929/work
+# blinker==1.6.2
+# # blur-detector==0.0.6
+# brotlipy==0.7.0
+# cachetools==5.3.0
+# certifi==2022.12.7
+# cffi @ file:///C:/ci/cffi_1625831756778/work
+# chardet @ file:///C:/ci/chardet_1607706937985/work
+# charset-normalizer==3.1.0
+# click==8.1.3
+# colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work
+# conda==4.14.0
+# conda-package-handling @ file:///C:/ci/conda-package-handling_1618262410900/work
+# contourpy==1.0.7
+# cryptography @ file:///C:/ci/cryptography_1616769504165/work
+# cycler==0.11.0
+# datasets==2.11.0
+# debugpy @ file:///C:/ci/debugpy_1637091961445/work
+# decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
+# dill==0.3.6
+# distlib==0.3.6
+# entrypoints==0.4
+# et-xmlfile==1.1.0
+# executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1667317341051/work
+# filelock==3.12.0
+# fonttools==4.39.3
+# frozenlist==1.3.3
+# fsspec==2023.4.0
+# gitdb==4.0.10
+# GitPython==3.1.31
+# huggingface-hub==0.13.4
+huggingsound==0.1.6
+# idna @ file:///home/linux1/recipes/ci/idna_1610986105248/work
+# imageio==2.27.0
+# importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1679167925176/work
+# importlib-resources==5.12.0
+# install==1.3.5
+# ipykernel @ file:///D:/bld/ipykernel_1655369313836/work
+# ipython @ file:///D:/bld/ipython_1680185618122/work
+# ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1680023138361/work
+# jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1669134318875/work
+# Jinja2==3.1.2
+# jiwer==2.6.0
+# joblib==1.2.0
+# jsonschema==4.17.3
+# jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1679365123476/work
+# jupyter-core @ file:///C:/b/abs_bd7elvu3w2/croot/jupyter_core_1676538600510/work
+# jupyterlab-widgets @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_widgets_1680020489668/work
+# kiwisolver==1.4.4
+# lazy-loader==0.2
+# librosa==0.9.2
+# llvmlite==0.39.1
+# markdown-it-py==2.2.0
+# MarkupSafe==2.1.2
+# matplotlib==3.7.1
+# matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work
+# mdurl==0.1.2
+# menuinst==1.4.16
+# mpmath==1.3.0
+# msgpack==1.0.5
+# multidict==6.0.4
+# multiprocess==0.70.14
+# nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1664684991461/work
+# networkx==3.1
+# numba==0.56.4
+# numpy==1.23.5
+# oauthlib==3.2.2
+# # opencv-python==4.7.0.72
+# openpyxl==3.1.2
+# packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1673482170163/work
+# pandas==2.0.0
+# parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work
+# pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
+# Pillow==9.5.0
+# pipenv==2023.4.20
+# platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1679871349196/work
+# pooch==1.6.0
+# prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1677600924538/work
+# protobuf==3.20.3
+# psutil @ file:///C:/Windows/Temp/abs_b2c2fd7f-9fd5-4756-95ea-8aed74d0039flsd9qufz/croots/recipe/psutil_1656431277748/work
+# pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work
+# pyarrow==11.0.0
+# pyasn1==0.4.8
+# pyasn1-modules==0.2.8
+# pycosat==0.6.3
+# pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work
+# pydeck==0.8.1b0
+pydub==0.25.1
+# Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1672682006896/work
+# Pympler==1.0.1
+# pyOpenSSL @ file:///tmp/build/80754af9/pyopenssl_1608057966937/work
+# pyparsing==3.0.9
+# pyrsistent==0.19.3
+# PySocks @ file:///C:/ci/pysocks_1605307512533/work
+# python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
+# pytz==2023.3
+# pytz-deprecation-shim==0.1.0.post0
+# PyWavelets==1.4.1
+# pywin32==228
+# PyYAML==6.0
+# pyzmq @ file:///C:/ci/pyzmq_1657615952984/work
+# rapidfuzz==2.13.7
+# regex==2023.3.23
+# requests @ file:///tmp/build/80754af9/requests_1608241421344/work
+# requests-oauthlib==1.3.1
+# resampy==0.4.2
+# responses==0.18.0
+# rich==13.3.5
+# rsa==4.9
+# ruamel-yaml-conda @ file:///C:/ci/ruamel_yaml_1616016898638/work
+# scikit-image==0.20.0
+# scikit-learn==1.2.2
+# scipy==1.9.1
+# # seaborn==0.12.2
+# six @ file:///tmp/build/80754af9/six_1623709665295/work
+# # sklearn==0.0
+# smmap==5.0.0
+# soundfile==0.12.1
+# soxr==0.3.5
+# stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work
+streamlit==1.22.0
+# sympy==1.11.1
+# tenacity==8.2.2
+# threadpoolctl==3.1.0
+# tifffile==2023.3.21
+# tokenizers==0.13.3
+# toml==0.10.2
+# toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1657485559105/work
+# # torch==1.12.1
+# # torchaudio==2.0.1+cu118
+# # torchvision==0.15.1+cu118
+# tornado @ file:///D:/bld/tornado_1656937934674/work
+# tqdm==4.65.0
+# traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1675110562325/work
+transformers==4.28.1

shaunakrecording.mp3 ADDED Viewed

Binary file (553 kB). View file

test_1.mp3 ADDED Viewed

Binary file (598 kB). View file

testaudio.mp3 ADDED Viewed

Binary file (127 kB). View file