Spaces:

Shashwat2528
/

avishkaaram

Sleeping

App Files Files Community

Shashwat2528 commited on Jun 27, 2023

Commit

aec05f5

1 Parent(s): 18a597b

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -92

app.py CHANGED Viewed

@@ -1,105 +1,38 @@
-import torch
-import torchaudio
-import soundfile as sf
-import speech_recognition as sr
-from gtts import gTTS
-import pygame
-import time
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForQuestionAnswering
-model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
-tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
-class AvishkaaramEkta:
-    def __init__(self, model):
-        self.model = model
-        self.tokenizer = tokenizer
-    def text_to_speech(self, text, output_file):
-        # Create a gTTS object with the text and desired language
-        tts = gTTS(text=text, lang='en')
-        # Save the audio to a file
-        tts.save(output_file)
-    def play_mp3(self, file_path):
-        pygame.mixer.init()
-        pygame.mixer.music.load(file_path)
-        pygame.mixer.music.play()
-        while pygame.mixer.music.get_busy():
-            continue
-    def ask_question(self, audio_file):
-        print("Recording audio...")
-        waveform, sample_rate = torchaudio.rec(6, sr=44100, channels=1)
-        # Save the audio to a file
-        sf.write(audio_file, waveform.squeeze().numpy(), sample_rate)
-        print(f"Audio saved to {audio_file}")
-        r = sr.Recognizer()
-        with sr.AudioFile(audio_file) as source:
-            audio_data = r.record(source)
-        text = ""
-        try:
-            text = r.recognize_google(audio_data)
-            print("Transcription:", text)
-        except sr.UnknownValueError:
-            print("Speech recognition could not understand audio")
-        except sr.RequestError as e:
-            print("Could not request results from Google Speech Recognition service; {0}".format(e))
-        return text
-    def answer_question(self, passage, question):
-        inputs = self.tokenizer(passage, question, return_tensors="pt")
-        outputs = self.model(**inputs)
-        start_logits = outputs.start_logits
-        end_logits = outputs.end_logits
-        start_index = start_logits.argmax(dim=1).item()
-        end_index = end_logits.argmax(dim=1).item()
-        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-        answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
-        return answer
-    def question_answer(self, passage, question):
-        passage_audio_file = "passage.mp3"
-        question_audio_file = "question.wav"
-        answer_audio_file = "answer.mp3"
-        self.text_to_speech(passage, passage_audio_file)
-        self.play_mp3(passage_audio_file)
-        question_text = self.ask_question(question_audio_file)
-        answer = self.answer_question(passage, question_text)
-        self.text_to_speech("The answer to the question is: " + answer, answer_audio_file)
-        self.play_mp3(answer_audio_file)
-        time.sleep(5)  # Wait for 5 seconds before ending
-        return answer
-# Create an instance of the AvishkaaramEkta class
-avishkaaram_ekta = AvishkaaramEkta(model)
-# Define the Gradio interface
 iface = gr.Interface(
-    fn=avishkaaram_ekta.question_answer,
     inputs=["text", "text"],
-    outputs="text",
-    title="Audio Question Answering",
-    description="Ask a question about a given passage using audio input",
     examples=[
-        ["In 1960, Dr. Jane Goodall arrived in Gombe, Tanzania, to study the behavior of chimpanzees in the wild."],
-        ["What was the purpose of Dr. Jane Goodall's visit to Gombe?"]
-    ]
 )
-# Launch the interface
 iface.launch()

 import gradio as gr
+from transformers import pipeline
+from gtts import gTTS
+import tempfile
+# Load the question-answering model
+model_name = "AVISHKAARAM/avishkaarak-ekta-hindi"
+model = pipeline("question-answering", model=model_name, tokenizer=model_name)
+def question_answering(context, question):
+    # Use the question-answering model to get the answer
+    result = model(context=context, question=question)
+    answer = result["answer"]
+    # Convert the answer to audio
+    tts = gTTS(text=answer, lang='en')
+    audio_path = tempfile.NamedTemporaryFile(suffix=".mp3").name
+    tts.save(audio_path)
+    return answer, audio_path
+# Create the Gradio interface
 iface = gr.Interface(
+    fn=question_answering,
     inputs=["text", "text"],
+    outputs=[gr.outputs.Textbox(label="Answer"), gr.outputs.File(label="Answer Audio")],
+    layout="vertical",
+    title="Question Answering - AVISHKAARAK",
+    description="Enter a context and a question to get an answer. :)",
     examples=[
+        ["The capital of France is Paris.", "What is the capital of France?"],
+        ["OpenAI is famous for developing GPT-3.", "What is OpenAI known for?"],
+    ],
 )
+# Run the interface
 iface.launch()