Spaces:

Shashwat2528
/

avishkaaram

Sleeping

App Files Files Community

Shashwat2528 commited on Jun 26, 2023

Commit

18a597b

1 Parent(s): cd4bc6f

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -219

app.py CHANGED Viewed

@@ -1,220 +1,3 @@
-# import sounddevice as sd
-# import soundfile as sf
-# import speech_recognition as sr
-# from gtts import gTTS
-# import pygame
-# import time
-# import gradio as gr
-# from transformers import AutoTokenizer, AutoModelForQuestionAnswering
-# model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
-# tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
-# class AvishkaaramEkta:
-#     def __init__(self, model):
-#         self.model = model
-#         self.tokenizer = tokenizer
-#     def text_to_speech(self, text, output_file):
-#         # Create a gTTS object with the text and desired language
-#         tts = gTTS(text=text, lang='en')
-#         # Save the audio to a file
-#         tts.save(output_file)
-#     def play_mp3(self, file_path):
-#         pygame.mixer.init()
-#         pygame.mixer.music.load(file_path)
-#         pygame.mixer.music.play()
-#         while pygame.mixer.music.get_busy():
-#             continue
-#     def ask_question(self, audio_file):
-#         print("Recording audio...")
-#         audio = sd.rec(int(44100 * 6), samplerate=44100, channels=1)
-#         sd.wait()
-#         # Save the audio to a file
-#         sf.write(audio_file, audio, 44100)
-#         print(f"Audio saved to {audio_file}")
-#         r = sr.Recognizer()
-#         with sr.AudioFile(audio_file) as source:
-#             audio_data = r.record(source)
-#         text = ""
-#         try:
-#             text = r.recognize_google(audio_data)
-#             print("Transcription:", text)
-#         except sr.UnknownValueError:
-#             print("Speech recognition could not understand audio")
-#         except sr.RequestError as e:
-#             print("Could not request results from Google Speech Recognition service; {0}".format(e))
-#         return text
-#     def answer_question(self, passage, question):
-#         inputs = self.tokenizer(passage, question, return_tensors="pt")
-#         outputs = self.model(**inputs)
-#         start_logits = outputs.start_logits
-#         end_logits = outputs.end_logits
-#         start_index = start_logits.argmax(dim=1).item()
-#         end_index = end_logits.argmax(dim=1).item()
-#         tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-#         answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
-#         return answer
-#     def question_answer(self, passage, question):
-#         passage_audio_file = "passage.mp3"
-#         question_audio_file = "question.wav"
-#         answer_audio_file = "answer.mp3"
-#         self.text_to_speech(passage, passage_audio_file)
-#         self.play_mp3(passage_audio_file)
-#         question_text = self.ask_question(question_audio_file)
-#         answer = self.answer_question(passage, question_text)
-#         self.text_to_speech("The answer to the question is: " + answer, answer_audio_file)
-#         self.play_mp3(answer_audio_file)
-#         time.sleep(5)  # Wait for 5 seconds before ending
-#         return answer
-# # Create an instance of the AvishkaaramEkta class
-# avishkaaram_ekta = AvishkaaramEkta(model)
-# # Define the Gradio interface
-# iface = gr.Interface(
-#     fn=avishkaaram_ekta.question_answer,
-#     inputs=["text", "text"],
-#     outputs="text",
-#     title="Audio Question Answering",
-#     description="Ask a question about a given passage using audio input",
-#     examples=[
-#         ["In 1960, Dr. Jane Goodall arrived in Gombe, Tanzania to study chimpanzees.", "What did Dr. Jane Goodall study?"],
-#         ["The Taj Mahal is located in Agra, India.", "Where is the Taj Mahal situated?"],
-#     ],
-#     interpretation="default",
-# )
-# # Launch the Gradio interface
-# iface.launch()
-# import torch
-# import torchaudio
-# import soundfile as sf
-# import speech_recognition as sr
-# from gtts import gTTS
-# import pygame
-# import time
-# import gradio as gr
-# import os
-# from transformers import AutoTokenizer, AutoModelForQuestionAnswering
-# model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
-# tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
-# os.environ['SDL_AUDIODRIVER'] = 'dsp'
-# class AvishkaaramEkta:
-#     def __init__(self, model):
-#         self.model = model
-#         self.tokenizer = tokenizer
-#     def text_to_speech(self, text, output_file):
-#         # Create a gTTS object with the text and desired language
-#         tts = gTTS(text=text, lang='en')
-#         # Save the audio to a file
-#         tts.save(output_file)
-#     def play_mp3(self, file_path):
-#         pygame.mixer.init()
-#         pygame.mixer.music.load(file_path)
-#         pygame.mixer.music.play()
-#         while pygame.mixer.music.get_busy():
-#             continue
-#     def ask_question(self, audio_file):
-#         print("Recording audio...")
-#         waveform, sample_rate = torchaudio.rec(6, sr=44100, channels=1)
-#         # Save the audio to a file
-#         sf.write(audio_file, waveform.squeeze().numpy(), sample_rate)
-#         print(f"Audio saved to {audio_file}")
-#         r = sr.Recognizer()
-#         with sr.AudioFile(audio_file) as source:
-#             audio_data = r.record(source)
-#         text = ""
-#         try:
-#             text = r.recognize_google(audio_data)
-#             print("Transcription:", text)
-#         except sr.UnknownValueError:
-#             print("Speech recognition could not understand audio")
-#         except sr.RequestError as e:
-#             print("Could not request results from Google Speech Recognition service; {0}".format(e))
-#         return text
-#     def answer_question(self, passage, question):
-#         inputs = self.tokenizer(passage, question, return_tensors="pt")
-#         outputs = self.model(**inputs)
-#         start_logits = outputs.start_logits
-#         end_logits = outputs.end_logits
-#         start_index = start_logits.argmax(dim=1).item()
-#         end_index = end_logits.argmax(dim=1).item()
-#         tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-#         answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
-#         return answer
-#     def question_answer(self, passage, question):
-#         passage_audio_file = "passage.mp3"
-#         question_audio_file = "question.wav"
-#         answer_audio_file = "answer.mp3"
-#         self.text_to_speech(passage, passage_audio_file)
-#         self.play_mp3(passage_audio_file)
-#         question_text = self.ask_question(question_audio_file)
-#         answer = self.answer_question(passage, question_text)
-#         self.text_to_speech("The answer to the question is: " + answer, answer_audio_file)
-#         self.play_mp3(answer_audio_file)
-#         time.sleep(5)  # Wait for 5 seconds before ending
-#         return answer
-# # Create an instance of the AvishkaaramEkta class
-# avishkaaram_ekta = AvishkaaramEkta(model)
-# # Define the Gradio interface
-# iface = gr.Interface(
-#     fn=avishkaaram_ekta.question_answer,
-#     inputs=["text", "text"],
-#     outputs="text",
-#     title="Audio Question Answering",
-#     description="Ask a question about a given passage using audio input",
-#     examples=[
-#         ["In 1960, Dr. Jane Goodall arrived in Gombe, Tanzania to study chimpanzees.", "What did Dr. Jane Goodall study?"],
-#         ["The Taj Mahal is located in Agra, India.", "Where is the Taj Mahal situated?"],
-#     ],
-#     interpretation="default",
-# )
-# # Launch the Gradio interface
-# iface.launch()
 import torch
 import torchaudio
 import soundfile as sf
@@ -222,10 +5,8 @@ import speech_recognition as sr
 from gtts import gTTS
 import pygame
 import time
-import os
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForQuestionAnswering
-os.environ["SDL_AUDIODRIVER"] = "pulseaudio"
 model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
 tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')

 import torch
 import torchaudio
 import soundfile as sf
 from gtts import gTTS
 import pygame
 import time
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
 tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')