Shashwat2528 commited on
Commit
18a597b
·
1 Parent(s): cd4bc6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -219
app.py CHANGED
@@ -1,220 +1,3 @@
1
- # import sounddevice as sd
2
- # import soundfile as sf
3
- # import speech_recognition as sr
4
- # from gtts import gTTS
5
- # import pygame
6
- # import time
7
- # import gradio as gr
8
-
9
- # from transformers import AutoTokenizer, AutoModelForQuestionAnswering
10
-
11
- # model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
12
- # tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
13
-
14
- # class AvishkaaramEkta:
15
- # def __init__(self, model):
16
- # self.model = model
17
- # self.tokenizer = tokenizer
18
-
19
- # def text_to_speech(self, text, output_file):
20
- # # Create a gTTS object with the text and desired language
21
- # tts = gTTS(text=text, lang='en')
22
-
23
- # # Save the audio to a file
24
- # tts.save(output_file)
25
-
26
- # def play_mp3(self, file_path):
27
- # pygame.mixer.init()
28
- # pygame.mixer.music.load(file_path)
29
- # pygame.mixer.music.play()
30
- # while pygame.mixer.music.get_busy():
31
- # continue
32
-
33
- # def ask_question(self, audio_file):
34
- # print("Recording audio...")
35
- # audio = sd.rec(int(44100 * 6), samplerate=44100, channels=1)
36
- # sd.wait()
37
-
38
- # # Save the audio to a file
39
- # sf.write(audio_file, audio, 44100)
40
-
41
- # print(f"Audio saved to {audio_file}")
42
- # r = sr.Recognizer()
43
-
44
- # with sr.AudioFile(audio_file) as source:
45
- # audio_data = r.record(source)
46
-
47
- # text = ""
48
-
49
- # try:
50
- # text = r.recognize_google(audio_data)
51
- # print("Transcription:", text)
52
- # except sr.UnknownValueError:
53
- # print("Speech recognition could not understand audio")
54
- # except sr.RequestError as e:
55
- # print("Could not request results from Google Speech Recognition service; {0}".format(e))
56
-
57
- # return text
58
-
59
- # def answer_question(self, passage, question):
60
- # inputs = self.tokenizer(passage, question, return_tensors="pt")
61
- # outputs = self.model(**inputs)
62
- # start_logits = outputs.start_logits
63
- # end_logits = outputs.end_logits
64
- # start_index = start_logits.argmax(dim=1).item()
65
- # end_index = end_logits.argmax(dim=1).item()
66
- # tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
67
- # answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
68
- # return answer
69
-
70
- # def question_answer(self, passage, question):
71
- # passage_audio_file = "passage.mp3"
72
- # question_audio_file = "question.wav"
73
- # answer_audio_file = "answer.mp3"
74
-
75
- # self.text_to_speech(passage, passage_audio_file)
76
- # self.play_mp3(passage_audio_file)
77
-
78
- # question_text = self.ask_question(question_audio_file)
79
- # answer = self.answer_question(passage, question_text)
80
-
81
- # self.text_to_speech("The answer to the question is: " + answer, answer_audio_file)
82
- # self.play_mp3(answer_audio_file)
83
-
84
- # time.sleep(5) # Wait for 5 seconds before ending
85
-
86
- # return answer
87
-
88
- # # Create an instance of the AvishkaaramEkta class
89
- # avishkaaram_ekta = AvishkaaramEkta(model)
90
-
91
- # # Define the Gradio interface
92
- # iface = gr.Interface(
93
- # fn=avishkaaram_ekta.question_answer,
94
- # inputs=["text", "text"],
95
- # outputs="text",
96
- # title="Audio Question Answering",
97
- # description="Ask a question about a given passage using audio input",
98
- # examples=[
99
- # ["In 1960, Dr. Jane Goodall arrived in Gombe, Tanzania to study chimpanzees.", "What did Dr. Jane Goodall study?"],
100
- # ["The Taj Mahal is located in Agra, India.", "Where is the Taj Mahal situated?"],
101
- # ],
102
- # interpretation="default",
103
- # )
104
-
105
- # # Launch the Gradio interface
106
- # iface.launch()
107
-
108
-
109
- # import torch
110
- # import torchaudio
111
- # import soundfile as sf
112
- # import speech_recognition as sr
113
- # from gtts import gTTS
114
- # import pygame
115
- # import time
116
- # import gradio as gr
117
- # import os
118
-
119
- # from transformers import AutoTokenizer, AutoModelForQuestionAnswering
120
-
121
- # model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
122
- # tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
123
- # os.environ['SDL_AUDIODRIVER'] = 'dsp'
124
- # class AvishkaaramEkta:
125
- # def __init__(self, model):
126
- # self.model = model
127
- # self.tokenizer = tokenizer
128
-
129
- # def text_to_speech(self, text, output_file):
130
- # # Create a gTTS object with the text and desired language
131
- # tts = gTTS(text=text, lang='en')
132
-
133
- # # Save the audio to a file
134
- # tts.save(output_file)
135
-
136
- # def play_mp3(self, file_path):
137
- # pygame.mixer.init()
138
- # pygame.mixer.music.load(file_path)
139
- # pygame.mixer.music.play()
140
- # while pygame.mixer.music.get_busy():
141
- # continue
142
-
143
- # def ask_question(self, audio_file):
144
- # print("Recording audio...")
145
- # waveform, sample_rate = torchaudio.rec(6, sr=44100, channels=1)
146
-
147
- # # Save the audio to a file
148
- # sf.write(audio_file, waveform.squeeze().numpy(), sample_rate)
149
-
150
- # print(f"Audio saved to {audio_file}")
151
- # r = sr.Recognizer()
152
-
153
- # with sr.AudioFile(audio_file) as source:
154
- # audio_data = r.record(source)
155
-
156
- # text = ""
157
-
158
- # try:
159
- # text = r.recognize_google(audio_data)
160
- # print("Transcription:", text)
161
- # except sr.UnknownValueError:
162
- # print("Speech recognition could not understand audio")
163
- # except sr.RequestError as e:
164
- # print("Could not request results from Google Speech Recognition service; {0}".format(e))
165
-
166
- # return text
167
-
168
- # def answer_question(self, passage, question):
169
- # inputs = self.tokenizer(passage, question, return_tensors="pt")
170
- # outputs = self.model(**inputs)
171
- # start_logits = outputs.start_logits
172
- # end_logits = outputs.end_logits
173
- # start_index = start_logits.argmax(dim=1).item()
174
- # end_index = end_logits.argmax(dim=1).item()
175
- # tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
176
- # answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
177
- # return answer
178
-
179
- # def question_answer(self, passage, question):
180
- # passage_audio_file = "passage.mp3"
181
- # question_audio_file = "question.wav"
182
- # answer_audio_file = "answer.mp3"
183
-
184
- # self.text_to_speech(passage, passage_audio_file)
185
- # self.play_mp3(passage_audio_file)
186
-
187
- # question_text = self.ask_question(question_audio_file)
188
- # answer = self.answer_question(passage, question_text)
189
-
190
- # self.text_to_speech("The answer to the question is: " + answer, answer_audio_file)
191
- # self.play_mp3(answer_audio_file)
192
-
193
- # time.sleep(5) # Wait for 5 seconds before ending
194
-
195
- # return answer
196
-
197
- # # Create an instance of the AvishkaaramEkta class
198
- # avishkaaram_ekta = AvishkaaramEkta(model)
199
-
200
- # # Define the Gradio interface
201
- # iface = gr.Interface(
202
- # fn=avishkaaram_ekta.question_answer,
203
- # inputs=["text", "text"],
204
- # outputs="text",
205
- # title="Audio Question Answering",
206
- # description="Ask a question about a given passage using audio input",
207
- # examples=[
208
- # ["In 1960, Dr. Jane Goodall arrived in Gombe, Tanzania to study chimpanzees.", "What did Dr. Jane Goodall study?"],
209
- # ["The Taj Mahal is located in Agra, India.", "Where is the Taj Mahal situated?"],
210
- # ],
211
- # interpretation="default",
212
- # )
213
-
214
- # # Launch the Gradio interface
215
- # iface.launch()
216
-
217
-
218
  import torch
219
  import torchaudio
220
  import soundfile as sf
@@ -222,10 +5,8 @@ import speech_recognition as sr
222
  from gtts import gTTS
223
  import pygame
224
  import time
225
- import os
226
  import gradio as gr
227
  from transformers import AutoTokenizer, AutoModelForQuestionAnswering
228
- os.environ["SDL_AUDIODRIVER"] = "pulseaudio"
229
 
230
  model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
231
  tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  import torchaudio
3
  import soundfile as sf
 
5
  from gtts import gTTS
6
  import pygame
7
  import time
 
8
  import gradio as gr
9
  from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 
10
 
11
  model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
12
  tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')