File size: 3,762 Bytes
36c32d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import gradio as gr
import whisper
from transformers import pipeline

model = whisper.load_model("base")
sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")

def analyze_sentiment(text):
    results = sentiment_analysis(text)
    sentiment_results = {result['label']: result['score'] for result in results}
    return sentiment_results

def get_sentiment_emoji(sentiment):
    # Define the emojis corresponding to each sentiment
    emoji_mapping = {
        "disappointment": "๐Ÿ˜ž",
        "sadness": "๐Ÿ˜ข",
        "annoyance": "๐Ÿ˜ ",
        "neutral": "๐Ÿ˜",
        "disapproval": "๐Ÿ‘Ž",
        "realization": "๐Ÿ˜ฎ",
        "nervousness": "๐Ÿ˜ฌ",
        "approval": "๐Ÿ‘",
        "joy": "๐Ÿ˜„",
        "anger": "๐Ÿ˜ก",
        "embarrassment": "๐Ÿ˜ณ",
        "caring": "๐Ÿค—",
        "remorse": "๐Ÿ˜”",
        "disgust": "๐Ÿคข",
        "grief": "๐Ÿ˜ฅ",
        "confusion": "๐Ÿ˜•",
        "relief": "๐Ÿ˜Œ",
        "desire": "๐Ÿ˜",
        "admiration": "๐Ÿ˜Œ",
        "optimism": "๐Ÿ˜Š",
        "fear": "๐Ÿ˜จ",
        "love": "โค๏ธ",
        "excitement": "๐ŸŽ‰",
        "curiosity": "๐Ÿค”",
        "amusement": "๐Ÿ˜„",
        "surprise": "๐Ÿ˜ฒ",
        "gratitude": "๐Ÿ™",
        "pride": "๐Ÿฆ"
    }
    return emoji_mapping.get(sentiment, "")

def display_sentiment_results(sentiment_results, option):
    sentiment_text = ""
    for sentiment, score in sentiment_results.items():
        emoji = get_sentiment_emoji(sentiment)
        if option == "Sentiment Only":
            sentiment_text += f"{sentiment} {emoji}\n"
        elif option == "Sentiment + Score":
            sentiment_text += f"{sentiment} {emoji}: {score}\n"
    return sentiment_text

def inference(audio, sentiment_option):
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)

    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    _, probs = model.detect_language(mel)
    lang = max(probs, key=probs.get)

    options = whisper.DecodingOptions(fp16=False)
    result = whisper.decode(model, mel, options)

    sentiment_results = analyze_sentiment(result.text)
    sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)

    return lang.upper(), result.text, sentiment_output

title = """<h1 align="center">๐ŸŽค Sentiment analysis for Voice Calls ๐Ÿ’ฌ</h1>"""
image_path = "genai.png"
description = """ This POC was developed for AI FINTECH HACKATHON @ BHARATPE"""

custom_css = """
#banner-image {
    display: block;
    margin-left: auto;
    margin-right: auto;
}
#chat-message {
    font-size: 14px;
    min-height: 300px;
}
"""

block = gr.Blocks(css=custom_css)

with block:
    gr.HTML(title)

    with gr.Row():
        with gr.Column():
            gr.Image(image_path, elem_id="banner-image", show_label=False)
        with gr.Column():
            gr.HTML(description)

    with gr.Group():
        with gr.Group():
            audio = gr.Audio(
                label="Input Audio",
                show_label=False,
                type="filepath"
            )

            sentiment_option = gr.Radio(
                choices=["Sentiment Only", "Sentiment + Score"],
                label="Select an option",
                # default="Sentiment Only"
            )

            btn = gr.Button("Transcribe")

        lang_str = gr.Textbox(label="Language")

        text = gr.Textbox(label="Transcription")

        sentiment_output = gr.Textbox(label="Sentiment Analysis Results",
                                    #    output=True
                                       )

        btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output])

       

block.launch()