Shashwat2528 commited on
Commit
aec05f5
·
1 Parent(s): 18a597b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -92
app.py CHANGED
@@ -1,105 +1,38 @@
1
- import torch
2
- import torchaudio
3
- import soundfile as sf
4
- import speech_recognition as sr
5
- from gtts import gTTS
6
- import pygame
7
- import time
8
  import gradio as gr
9
- from transformers import AutoTokenizer, AutoModelForQuestionAnswering
10
-
11
- model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
12
- tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
13
-
14
- class AvishkaaramEkta:
15
- def __init__(self, model):
16
- self.model = model
17
- self.tokenizer = tokenizer
18
-
19
- def text_to_speech(self, text, output_file):
20
- # Create a gTTS object with the text and desired language
21
- tts = gTTS(text=text, lang='en')
22
-
23
- # Save the audio to a file
24
- tts.save(output_file)
25
-
26
- def play_mp3(self, file_path):
27
- pygame.mixer.init()
28
- pygame.mixer.music.load(file_path)
29
- pygame.mixer.music.play()
30
- while pygame.mixer.music.get_busy():
31
- continue
32
-
33
- def ask_question(self, audio_file):
34
- print("Recording audio...")
35
- waveform, sample_rate = torchaudio.rec(6, sr=44100, channels=1)
36
-
37
- # Save the audio to a file
38
- sf.write(audio_file, waveform.squeeze().numpy(), sample_rate)
39
-
40
- print(f"Audio saved to {audio_file}")
41
- r = sr.Recognizer()
42
-
43
- with sr.AudioFile(audio_file) as source:
44
- audio_data = r.record(source)
45
-
46
- text = ""
47
-
48
- try:
49
- text = r.recognize_google(audio_data)
50
- print("Transcription:", text)
51
- except sr.UnknownValueError:
52
- print("Speech recognition could not understand audio")
53
- except sr.RequestError as e:
54
- print("Could not request results from Google Speech Recognition service; {0}".format(e))
55
-
56
- return text
57
-
58
- def answer_question(self, passage, question):
59
- inputs = self.tokenizer(passage, question, return_tensors="pt")
60
- outputs = self.model(**inputs)
61
- start_logits = outputs.start_logits
62
- end_logits = outputs.end_logits
63
- start_index = start_logits.argmax(dim=1).item()
64
- end_index = end_logits.argmax(dim=1).item()
65
- tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
66
- answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
67
- return answer
68
-
69
- def question_answer(self, passage, question):
70
- passage_audio_file = "passage.mp3"
71
- question_audio_file = "question.wav"
72
- answer_audio_file = "answer.mp3"
73
-
74
- self.text_to_speech(passage, passage_audio_file)
75
- self.play_mp3(passage_audio_file)
76
-
77
- question_text = self.ask_question(question_audio_file)
78
- answer = self.answer_question(passage, question_text)
79
 
80
- self.text_to_speech("The answer to the question is: " + answer, answer_audio_file)
81
- self.play_mp3(answer_audio_file)
 
82
 
83
- time.sleep(5) # Wait for 5 seconds before ending
 
 
 
84
 
85
- return answer
 
 
 
86
 
87
- # Create an instance of the AvishkaaramEkta class
88
- avishkaaram_ekta = AvishkaaramEkta(model)
89
 
90
- # Define the Gradio interface
91
  iface = gr.Interface(
92
- fn=avishkaaram_ekta.question_answer,
93
  inputs=["text", "text"],
94
- outputs="text",
95
- title="Audio Question Answering",
96
- description="Ask a question about a given passage using audio input",
 
97
  examples=[
98
- ["In 1960, Dr. Jane Goodall arrived in Gombe, Tanzania, to study the behavior of chimpanzees in the wild."],
99
- ["What was the purpose of Dr. Jane Goodall's visit to Gombe?"]
100
- ]
101
  )
102
 
103
- # Launch the interface
104
  iface.launch()
105
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ from gtts import gTTS
4
+ import tempfile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # Load the question-answering model
7
+ model_name = "AVISHKAARAM/avishkaarak-ekta-hindi"
8
+ model = pipeline("question-answering", model=model_name, tokenizer=model_name)
9
 
10
+ def question_answering(context, question):
11
+ # Use the question-answering model to get the answer
12
+ result = model(context=context, question=question)
13
+ answer = result["answer"]
14
 
15
+ # Convert the answer to audio
16
+ tts = gTTS(text=answer, lang='en')
17
+ audio_path = tempfile.NamedTemporaryFile(suffix=".mp3").name
18
+ tts.save(audio_path)
19
 
20
+ return answer, audio_path
 
21
 
22
+ # Create the Gradio interface
23
  iface = gr.Interface(
24
+ fn=question_answering,
25
  inputs=["text", "text"],
26
+ outputs=[gr.outputs.Textbox(label="Answer"), gr.outputs.File(label="Answer Audio")],
27
+ layout="vertical",
28
+ title="Question Answering - AVISHKAARAK",
29
+ description="Enter a context and a question to get an answer. :)",
30
  examples=[
31
+ ["The capital of France is Paris.", "What is the capital of France?"],
32
+ ["OpenAI is famous for developing GPT-3.", "What is OpenAI known for?"],
33
+ ],
34
  )
35
 
36
+ # Run the interface
37
  iface.launch()
38