File size: 3,865 Bytes
c8eb530
546a5e2
 
 
 
dbdec86
9cdcc72
 
c37e7d0
 
 
e912c09
9cdcc72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b64203b
9cdcc72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
649ceaa
9cdcc72
2ecde0f
649ceaa
 
9cdcc72
2ecde0f
9cdcc72
4b8269d
3667eab
4b8269d
3667eab
4b8269d
2ecde0f
9cdcc72
 
 
 
 
 
3667eab
 
9cdcc72
649ceaa
e139dcd
f617c7f
45ffe72
0a1b459
d795229
8f42210
f617c7f
 
d795229
f617c7f
 
deea3a0
c4563cf
 
dea79d3
8f42210
d795229
4bbda62
25143b6
946c688
 
2c7e1d1
9cdcc72
 
 
1e21606
9cdcc72
 
 
 
 
06eda5c
9cdcc72
 
 
 
 
 
 
 
7d5f022
9cdcc72
8c29eb3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

'''
This script calls the ada model from openai api to predict the next few words.
'''
import os
os.system("pip install --upgrade pip")
from pprint import pprint
os.system("pip install git+https://github.com/openai/whisper.git")
import sys
print("Sys: ", sys.executable)
os.system("pip install openai")
import openai
import gradio as gr
import whisper
from transformers import pipeline
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import time
# import streaming.py
# from next_word_prediction import GPT2




#gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
#tokenizer = AutoTokenizer.from_pretrained("gpt2")

### /code snippet


# get gpt2 model
#generator = pipeline('text-generation', model='gpt2')

# whisper model specification 
model = whisper.load_model("tiny")


        
def inference(audio, state=""):

    #time.sleep(2)
    #text = p(audio)["text"]
    #state += text + " "
    # load audio data
    audio = whisper.load_audio(audio)
    # ensure sample is in correct format for inference
    audio = whisper.pad_or_trim(audio)

    # generate a log-mel spetrogram of the audio data
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    
    _, probs = model.detect_language(mel)

    # decode audio data
    options = whisper.DecodingOptions(fp16 = False)
    # transcribe speech to text
    result = whisper.decode(model, mel, options)
    print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))

    PROMPT = """The following is an incomplete transcript of a brief conversation. 
    Predict the next few words int he transcript to complete the sentence. 
    A few examples of transcripts and predictions are provided below:
    Transcript: Tomorrow night we're going out to 
    Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend   
    Transcript: I would like to order a cheeseburger with a side of
    Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup 
    Transcript: My friend Savanah is
    Prediction: An elecrical engineer, A marine biologist, A classical musician 
    Transcript: I need to buy a birthday
    Prediction: Present, Gift, Cake, Card
    Given these examples, predict the next few words in the following sentence:   
    """
    text = PROMPT + result.text
    
    openai.api_key = os.environ["Openai_APIkey"]
    
    response = openai.Completion.create(
                        model="text-ada-001",
                        #model="text-curie-001",
                        prompt=text,
                        temperature=0.9,
                        max_tokens=8,
                        n=5)

    infers = []
    temp = []
    infered=[]
    for i in range(5):
        print("print1 ", response['choices'][i]['text'])
        temp.append(response['choices'][i]['text'])
        print("print2: infers ", infers)
        print("print3: Responses ", response)
        print("Object type of response: ", type(response))
        #infered = list(map(lambda x: x.split(',')[0], infers))
        #print("Infered type is: ", type(infered))
        infers = list(map(lambda x: x.replace("\n", ""), temp))
        infered = list(map(lambda x: x.split(','), infers))

        
        
    tempStr = str(infers)
    infer = tempStr.split(",")
    print("Infer type is: ", type(infer))

    # result.text
    #return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
    return result.text, state, infered[3]



# get audio from microphone 

gr.Blocks(
        fn=inference, 
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath"), 
        "state"
    ],
    outputs=[
        "textbox",
        "state",
        "textbox"
    ],
    live=True).launch()