Spaces:
Sleeping
Sleeping
Commit
·
aec05f5
1
Parent(s):
18a597b
Update app.py
Browse files
app.py
CHANGED
@@ -1,105 +1,38 @@
|
|
1 |
-
import torch
|
2 |
-
import torchaudio
|
3 |
-
import soundfile as sf
|
4 |
-
import speech_recognition as sr
|
5 |
-
from gtts import gTTS
|
6 |
-
import pygame
|
7 |
-
import time
|
8 |
import gradio as gr
|
9 |
-
from transformers import
|
10 |
-
|
11 |
-
|
12 |
-
tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
|
13 |
-
|
14 |
-
class AvishkaaramEkta:
|
15 |
-
def __init__(self, model):
|
16 |
-
self.model = model
|
17 |
-
self.tokenizer = tokenizer
|
18 |
-
|
19 |
-
def text_to_speech(self, text, output_file):
|
20 |
-
# Create a gTTS object with the text and desired language
|
21 |
-
tts = gTTS(text=text, lang='en')
|
22 |
-
|
23 |
-
# Save the audio to a file
|
24 |
-
tts.save(output_file)
|
25 |
-
|
26 |
-
def play_mp3(self, file_path):
|
27 |
-
pygame.mixer.init()
|
28 |
-
pygame.mixer.music.load(file_path)
|
29 |
-
pygame.mixer.music.play()
|
30 |
-
while pygame.mixer.music.get_busy():
|
31 |
-
continue
|
32 |
-
|
33 |
-
def ask_question(self, audio_file):
|
34 |
-
print("Recording audio...")
|
35 |
-
waveform, sample_rate = torchaudio.rec(6, sr=44100, channels=1)
|
36 |
-
|
37 |
-
# Save the audio to a file
|
38 |
-
sf.write(audio_file, waveform.squeeze().numpy(), sample_rate)
|
39 |
-
|
40 |
-
print(f"Audio saved to {audio_file}")
|
41 |
-
r = sr.Recognizer()
|
42 |
-
|
43 |
-
with sr.AudioFile(audio_file) as source:
|
44 |
-
audio_data = r.record(source)
|
45 |
-
|
46 |
-
text = ""
|
47 |
-
|
48 |
-
try:
|
49 |
-
text = r.recognize_google(audio_data)
|
50 |
-
print("Transcription:", text)
|
51 |
-
except sr.UnknownValueError:
|
52 |
-
print("Speech recognition could not understand audio")
|
53 |
-
except sr.RequestError as e:
|
54 |
-
print("Could not request results from Google Speech Recognition service; {0}".format(e))
|
55 |
-
|
56 |
-
return text
|
57 |
-
|
58 |
-
def answer_question(self, passage, question):
|
59 |
-
inputs = self.tokenizer(passage, question, return_tensors="pt")
|
60 |
-
outputs = self.model(**inputs)
|
61 |
-
start_logits = outputs.start_logits
|
62 |
-
end_logits = outputs.end_logits
|
63 |
-
start_index = start_logits.argmax(dim=1).item()
|
64 |
-
end_index = end_logits.argmax(dim=1).item()
|
65 |
-
tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
|
66 |
-
answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
|
67 |
-
return answer
|
68 |
-
|
69 |
-
def question_answer(self, passage, question):
|
70 |
-
passage_audio_file = "passage.mp3"
|
71 |
-
question_audio_file = "question.wav"
|
72 |
-
answer_audio_file = "answer.mp3"
|
73 |
-
|
74 |
-
self.text_to_speech(passage, passage_audio_file)
|
75 |
-
self.play_mp3(passage_audio_file)
|
76 |
-
|
77 |
-
question_text = self.ask_question(question_audio_file)
|
78 |
-
answer = self.answer_question(passage, question_text)
|
79 |
|
80 |
-
|
81 |
-
|
|
|
82 |
|
83 |
-
|
|
|
|
|
|
|
84 |
|
85 |
-
|
|
|
|
|
|
|
86 |
|
87 |
-
|
88 |
-
avishkaaram_ekta = AvishkaaramEkta(model)
|
89 |
|
90 |
-
#
|
91 |
iface = gr.Interface(
|
92 |
-
fn=
|
93 |
inputs=["text", "text"],
|
94 |
-
outputs="
|
95 |
-
|
96 |
-
|
|
|
97 |
examples=[
|
98 |
-
["
|
99 |
-
["
|
100 |
-
]
|
101 |
)
|
102 |
|
103 |
-
#
|
104 |
iface.launch()
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
from gtts import gTTS
|
4 |
+
import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
# Load the question-answering model
|
7 |
+
model_name = "AVISHKAARAM/avishkaarak-ekta-hindi"
|
8 |
+
model = pipeline("question-answering", model=model_name, tokenizer=model_name)
|
9 |
|
10 |
+
def question_answering(context, question):
|
11 |
+
# Use the question-answering model to get the answer
|
12 |
+
result = model(context=context, question=question)
|
13 |
+
answer = result["answer"]
|
14 |
|
15 |
+
# Convert the answer to audio
|
16 |
+
tts = gTTS(text=answer, lang='en')
|
17 |
+
audio_path = tempfile.NamedTemporaryFile(suffix=".mp3").name
|
18 |
+
tts.save(audio_path)
|
19 |
|
20 |
+
return answer, audio_path
|
|
|
21 |
|
22 |
+
# Create the Gradio interface
|
23 |
iface = gr.Interface(
|
24 |
+
fn=question_answering,
|
25 |
inputs=["text", "text"],
|
26 |
+
outputs=[gr.outputs.Textbox(label="Answer"), gr.outputs.File(label="Answer Audio")],
|
27 |
+
layout="vertical",
|
28 |
+
title="Question Answering - AVISHKAARAK",
|
29 |
+
description="Enter a context and a question to get an answer. :)",
|
30 |
examples=[
|
31 |
+
["The capital of France is Paris.", "What is the capital of France?"],
|
32 |
+
["OpenAI is famous for developing GPT-3.", "What is OpenAI known for?"],
|
33 |
+
],
|
34 |
)
|
35 |
|
36 |
+
# Run the interface
|
37 |
iface.launch()
|
38 |
|