Spaces:

Shashwat2528
/

avishkaaram

Sleeping

App Files Files Community

avishkaaram / app.py

Shashwat2528

Update app.py

4ab54db about 2 years ago

raw

history blame

11.2 kB

	# import sounddevice as sd
	# import soundfile as sf
	# import speech_recognition as sr
	# from gtts import gTTS
	# import pygame
	# import time
	# import gradio as gr

	# from transformers import AutoTokenizer, AutoModelForQuestionAnswering

	# model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
	# tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')

	# class AvishkaaramEkta:
	# def __init__(self, model):
	# self.model = model
	# self.tokenizer = tokenizer

	# def text_to_speech(self, text, output_file):
	# # Create a gTTS object with the text and desired language
	# tts = gTTS(text=text, lang='en')

	# # Save the audio to a file
	# tts.save(output_file)

	# def play_mp3(self, file_path):
	# pygame.mixer.init()
	# pygame.mixer.music.load(file_path)
	# pygame.mixer.music.play()
	# while pygame.mixer.music.get_busy():
	# continue

	# def ask_question(self, audio_file):
	# print("Recording audio...")
	# audio = sd.rec(int(44100 * 6), samplerate=44100, channels=1)
	# sd.wait()

	# # Save the audio to a file
	# sf.write(audio_file, audio, 44100)

	# print(f"Audio saved to {audio_file}")
	# r = sr.Recognizer()

	# with sr.AudioFile(audio_file) as source:
	# audio_data = r.record(source)

	# text = ""

	# try:
	# text = r.recognize_google(audio_data)
	# print("Transcription:", text)
	# except sr.UnknownValueError:
	# print("Speech recognition could not understand audio")
	# except sr.RequestError as e:
	# print("Could not request results from Google Speech Recognition service; {0}".format(e))

	# return text

	# def answer_question(self, passage, question):
	# inputs = self.tokenizer(passage, question, return_tensors="pt")
	# outputs = self.model(**inputs)
	# start_logits = outputs.start_logits
	# end_logits = outputs.end_logits
	# start_index = start_logits.argmax(dim=1).item()
	# end_index = end_logits.argmax(dim=1).item()
	# tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
	# answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
	# return answer

	# def question_answer(self, passage, question):
	# passage_audio_file = "passage.mp3"
	# question_audio_file = "question.wav"
	# answer_audio_file = "answer.mp3"

	# self.text_to_speech(passage, passage_audio_file)
	# self.play_mp3(passage_audio_file)

	# question_text = self.ask_question(question_audio_file)
	# answer = self.answer_question(passage, question_text)

	# self.text_to_speech("The answer to the question is: " + answer, answer_audio_file)
	# self.play_mp3(answer_audio_file)

	# time.sleep(5) # Wait for 5 seconds before ending

	# return answer

	# # Create an instance of the AvishkaaramEkta class
	# avishkaaram_ekta = AvishkaaramEkta(model)

	# # Define the Gradio interface
	# iface = gr.Interface(
	# fn=avishkaaram_ekta.question_answer,
	# inputs=["text", "text"],
	# outputs="text",
	# title="Audio Question Answering",
	# description="Ask a question about a given passage using audio input",
	# examples=[
	# ["In 1960, Dr. Jane Goodall arrived in Gombe, Tanzania to study chimpanzees.", "What did Dr. Jane Goodall study?"],
	# ["The Taj Mahal is located in Agra, India.", "Where is the Taj Mahal situated?"],
	# ],
	# interpretation="default",
	# )

	# # Launch the Gradio interface
	# iface.launch()


	# import torch
	# import torchaudio
	# import soundfile as sf
	# import speech_recognition as sr
	# from gtts import gTTS
	# import pygame
	# import time
	# import gradio as gr
	# import os

	# from transformers import AutoTokenizer, AutoModelForQuestionAnswering

	# model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
	# tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
	# os.environ['SDL_AUDIODRIVER'] = 'dsp'
	# class AvishkaaramEkta:
	# def __init__(self, model):
	# self.model = model
	# self.tokenizer = tokenizer

	# def text_to_speech(self, text, output_file):
	# # Create a gTTS object with the text and desired language
	# tts = gTTS(text=text, lang='en')

	# # Save the audio to a file
	# tts.save(output_file)

	# def play_mp3(self, file_path):
	# pygame.mixer.init()
	# pygame.mixer.music.load(file_path)
	# pygame.mixer.music.play()
	# while pygame.mixer.music.get_busy():
	# continue

	# def ask_question(self, audio_file):
	# print("Recording audio...")
	# waveform, sample_rate = torchaudio.rec(6, sr=44100, channels=1)

	# # Save the audio to a file
	# sf.write(audio_file, waveform.squeeze().numpy(), sample_rate)

	# print(f"Audio saved to {audio_file}")
	# r = sr.Recognizer()

	# with sr.AudioFile(audio_file) as source:
	# audio_data = r.record(source)

	# text = ""

	# try:
	# text = r.recognize_google(audio_data)
	# print("Transcription:", text)
	# except sr.UnknownValueError:
	# print("Speech recognition could not understand audio")
	# except sr.RequestError as e:
	# print("Could not request results from Google Speech Recognition service; {0}".format(e))

	# return text

	# def answer_question(self, passage, question):
	# inputs = self.tokenizer(passage, question, return_tensors="pt")
	# outputs = self.model(**inputs)
	# start_logits = outputs.start_logits
	# end_logits = outputs.end_logits
	# start_index = start_logits.argmax(dim=1).item()
	# end_index = end_logits.argmax(dim=1).item()
	# tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
	# answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
	# return answer

	# def question_answer(self, passage, question):
	# passage_audio_file = "passage.mp3"
	# question_audio_file = "question.wav"
	# answer_audio_file = "answer.mp3"

	# self.text_to_speech(passage, passage_audio_file)
	# self.play_mp3(passage_audio_file)

	# question_text = self.ask_question(question_audio_file)
	# answer = self.answer_question(passage, question_text)

	# self.text_to_speech("The answer to the question is: " + answer, answer_audio_file)
	# self.play_mp3(answer_audio_file)

	# time.sleep(5) # Wait for 5 seconds before ending

	# return answer

	# # Create an instance of the AvishkaaramEkta class
	# avishkaaram_ekta = AvishkaaramEkta(model)

	# # Define the Gradio interface
	# iface = gr.Interface(
	# fn=avishkaaram_ekta.question_answer,
	# inputs=["text", "text"],
	# outputs="text",
	# title="Audio Question Answering",
	# description="Ask a question about a given passage using audio input",
	# examples=[
	# ["In 1960, Dr. Jane Goodall arrived in Gombe, Tanzania to study chimpanzees.", "What did Dr. Jane Goodall study?"],
	# ["The Taj Mahal is located in Agra, India.", "Where is the Taj Mahal situated?"],
	# ],
	# interpretation="default",
	# )

	# # Launch the Gradio interface
	# iface.launch()


	import torch
	import torchaudio
	import soundfile as sf
	import speech_recognition as sr
	from gtts import gTTS
	import pygame
	import time
	import os
	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForQuestionAnswering
	os.environ["SDL_AUDIODRIVER"] = "pulseaudio"

	model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
	tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')

	class AvishkaaramEkta:
	def __init__(self, model):
	self.model = model
	self.tokenizer = tokenizer

	def text_to_speech(self, text, output_file):
	# Create a gTTS object with the text and desired language
	tts = gTTS(text=text, lang='en')

	# Save the audio to a file
	tts.save(output_file)

	def play_mp3(self, file_path):
	pygame.mixer.init()
	pygame.mixer.music.load(file_path)
	pygame.mixer.music.play()
	while pygame.mixer.music.get_busy():
	continue

	def ask_question(self, audio_file):
	print("Recording audio...")
	waveform, sample_rate = torchaudio.rec(6, sr=44100, channels=1)

	# Save the audio to a file
	sf.write(audio_file, waveform.squeeze().numpy(), sample_rate)

	print(f"Audio saved to {audio_file}")
	r = sr.Recognizer()

	with sr.AudioFile(audio_file) as source:
	audio_data = r.record(source)

	text = ""

	try:
	text = r.recognize_google(audio_data)
	print("Transcription:", text)
	except sr.UnknownValueError:
	print("Speech recognition could not understand audio")
	except sr.RequestError as e:
	print("Could not request results from Google Speech Recognition service; {0}".format(e))

	return text

	def answer_question(self, passage, question):
	inputs = self.tokenizer(passage, question, return_tensors="pt")
	outputs = self.model(**inputs)
	start_logits = outputs.start_logits
	end_logits = outputs.end_logits
	start_index = start_logits.argmax(dim=1).item()
	end_index = end_logits.argmax(dim=1).item()
	tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
	answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
	return answer

	def question_answer(self, passage, question):
	passage_audio_file = "passage.mp3"
	question_audio_file = "question.wav"
	answer_audio_file = "answer.mp3"

	self.text_to_speech(passage, passage_audio_file)
	self.play_mp3(passage_audio_file)

	question_text = self.ask_question(question_audio_file)
	answer = self.answer_question(passage, question_text)

	self.text_to_speech("The answer to the question is: " + answer, answer_audio_file)
	self.play_mp3(answer_audio_file)

	time.sleep(5) # Wait for 5 seconds before ending

	return answer

	# Create an instance of the AvishkaaramEkta class
	avishkaaram_ekta = AvishkaaramEkta(model)

	# Define the Gradio interface
	iface = gr.Interface(
	fn=avishkaaram_ekta.question_answer,
	inputs=["text", "text"],
	outputs="text",
	title="Audio Question Answering",
	description="Ask a question about a given passage using audio input",
	examples=[
	["In 1960, Dr. Jane Goodall arrived in Gombe, Tanzania, to study the behavior of chimpanzees in the wild."],
	["What was the purpose of Dr. Jane Goodall's visit to Gombe?"]
	]
	)

	# Launch the interface
	iface.launch()