NihalGazi commited on
Commit
e3b7f9d
·
verified ·
1 Parent(s): f2227a3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -0
app.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import requests
4
+ import random
5
+ import urllib.parse
6
+ import tempfile
7
+ import os
8
+
9
+ # --- Constants ---
10
+ VOICES = [
11
+ "alloy", "echo", "fable", "onyx", "nova", "shimmer", # Standard OpenAI Voices
12
+ "coral", "verse", "ballad", "ash", "sage", "amuch", "dan" # Additional Pollinations Voices? (Assuming based on list)
13
+ ]
14
+
15
+ NSFW_URL_TEMPLATE = "https://text.pollinations.ai/Is this an inappropriate text-to-speech prompt \"{prompt}\". If yes then write \"YES\" only otherwise \"NO\" only"
16
+ TTS_URL_TEMPLATE = "https://text.pollinations.ai/only repeat what i say now say with proper emphasis in a \"{emotion}\" emotion this statement - \"{prompt}\"?model=openai-audio&voice={voice}&seed={seed}"
17
+
18
+ # --- Helper Functions ---
19
+
20
+ def check_nsfw(prompt: str) -> bool:
21
+ """Checks if the prompt is NSFW using the Pollinations API."""
22
+ try:
23
+ # URL encode the prompt for safety
24
+ encoded_prompt = urllib.parse.quote(prompt)
25
+ url = NSFW_URL_TEMPLATE.format(prompt=encoded_prompt)
26
+ print(f"DEBUG: Checking NSFW URL: {url}") # Optional: for debugging
27
+
28
+ response = requests.get(url, timeout=20) # Added timeout
29
+ response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
30
+
31
+ result = response.text.strip().upper()
32
+ print(f"DEBUG: NSFW Check Response: '{result}'") # Optional: for debugging
33
+
34
+ if result == "YES":
35
+ return True
36
+ elif result == "NO":
37
+ return False
38
+ else:
39
+ # Handle unexpected responses from the NSFW checker
40
+ print(f"Warning: Unexpected response from NSFW checker: {response.text}")
41
+ # Defaulting to safe might be risky, maybe default to NSFW? Or raise error?
42
+ # Let's default to considering it potentially unsafe if unsure.
43
+ return True # Treat unexpected responses as potentially NSFW
44
+
45
+ except requests.exceptions.RequestException as e:
46
+ print(f"Error during NSFW check: {e}")
47
+ # If the check fails, maybe treat as unsafe to be cautious
48
+ raise gr.Error(f"Failed to check prompt safety: {e}")
49
+ except Exception as e:
50
+ print(f"Unexpected error during NSFW check: {e}")
51
+ raise gr.Error(f"An unexpected error occurred during safety check: {e}")
52
+
53
+
54
+ def generate_audio(prompt: str, voice: str, emotion: str, seed: int) -> bytes:
55
+ """Generates audio using the Pollinations Text-to-Speech API."""
56
+ try:
57
+ # URL encode the prompt and emotion
58
+ encoded_prompt = urllib.parse.quote(prompt)
59
+ encoded_emotion = urllib.parse.quote(emotion)
60
+
61
+ url = TTS_URL_TEMPLATE.format(
62
+ prompt=encoded_prompt,
63
+ emotion=encoded_emotion,
64
+ voice=voice,
65
+ seed=seed
66
+ )
67
+ print(f"DEBUG: Generating Audio URL: {url}") # Optional: for debugging
68
+
69
+ response = requests.get(url, timeout=60) # Increased timeout for audio generation
70
+ response.raise_for_status() # Raise an exception for bad status codes
71
+
72
+ # Check if response content type suggests audio
73
+ content_type = response.headers.get('content-type', '').lower()
74
+ if 'audio' not in content_type:
75
+ print(f"Warning: Unexpected content type received: {content_type}")
76
+ print(f"Response Text: {response.text[:500]}") # Log beginning of text response
77
+ raise gr.Error(f"API did not return audio. Response: {response.text[:200]}")
78
+
79
+ return response.content # Return raw audio bytes
80
+
81
+ except requests.exceptions.RequestException as e:
82
+ print(f"Error during audio generation: {e}")
83
+ # Try to get more info from response if available
84
+ error_details = ""
85
+ if hasattr(e, 'response') and e.response is not None:
86
+ error_details = e.response.text[:200] # Get first 200 chars of error response
87
+ raise gr.Error(f"Failed to generate audio: {e}. Details: {error_details}")
88
+ except Exception as e:
89
+ print(f"Unexpected error during audio generation: {e}")
90
+ raise gr.Error(f"An unexpected error occurred during audio generation: {e}")
91
+
92
+ # --- Main Gradio Function ---
93
+
94
+ def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int):
95
+ """
96
+ Main function for the Gradio app. Checks NSFW, then generates audio.
97
+ Returns the path to a temporary audio file or an error message.
98
+ """
99
+ if not prompt:
100
+ raise gr.Error("Prompt cannot be empty.")
101
+ if not emotion:
102
+ # Default emotion if none provided, or raise error? Let's default.
103
+ emotion = "neutral"
104
+ print("Warning: No emotion provided, defaulting to 'neutral'.")
105
+ # raise gr.Error("Emotion cannot be empty.") # Alternative: require emotion
106
+ if not voice:
107
+ raise gr.Error("Please select a voice.")
108
+
109
+ # 1. Determine Seed
110
+ seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)
111
+ print(f"Using Seed: {seed}")
112
+
113
+ # 2. Check NSFW
114
+ print("Checking prompt safety...")
115
+ try:
116
+ is_nsfw = check_nsfw(prompt)
117
+ except gr.Error as e:
118
+ # Propagate errors raised by check_nsfw
119
+ return None, str(e) # Return None for audio, error message for text
120
+
121
+ if is_nsfw:
122
+ print("Prompt flagged as inappropriate.")
123
+ # Return None for audio output, and a message for a text output
124
+ return None, "Error: The prompt was flagged as inappropriate and cannot be processed."
125
+
126
+ # 3. Generate Audio (only if not NSFW)
127
+ print("Prompt is safe. Generating audio...")
128
+ try:
129
+ audio_bytes = generate_audio(prompt, voice, emotion, seed)
130
+
131
+ # 4. Save audio to a temporary file for Gradio
132
+ # Suffix is important for Gradio to recognize the format. Assuming MP3 based on common web usage.
133
+ # If the API returns WAV, change suffix to ".wav"
134
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
135
+ temp_audio_file.write(audio_bytes)
136
+ temp_file_path = temp_audio_file.name
137
+ print(f"Audio saved temporarily to: {temp_file_path}")
138
+
139
+ # Return the path to the temp file for the Audio component, and success message for Text
140
+ return temp_file_path, f"Audio generated successfully with voice '{voice}', emotion '{emotion}', and seed {seed}."
141
+
142
+ except gr.Error as e:
143
+ # Handle errors raised by generate_audio
144
+ return None, str(e) # Return None for audio, error message for text
145
+ except Exception as e:
146
+ print(f"Unexpected error in main function: {e}")
147
+ return None, f"An unexpected error occurred: {e}"
148
+
149
+
150
+ # --- Gradio Interface ---
151
+
152
+ def toggle_seed_input(use_random_seed):
153
+ """Updates the visibility of the specific seed input field."""
154
+ return gr.update(visible=not use_random_seed, value=12345) # Reset to default when shown
155
+
156
+ with gr.Blocks() as app:
157
+ gr.Markdown("# Text-to-Speech with NSFW Check")
158
+ gr.Markdown(
159
+ "Enter text, choose a voice and emotion, and generate audio. "
160
+ "The text will be checked for appropriateness before generation."
161
+ )
162
+
163
+ with gr.Row():
164
+ with gr.Column(scale=2):
165
+ prompt_input = gr.Textbox(label="Prompt", placeholder="Enter the text you want to convert to speech...")
166
+ emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...")
167
+ voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy") # Default voice
168
+ with gr.Column(scale=1):
169
+ random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
170
+ seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0) # Integer seed
171
+
172
+ submit_button = gr.Button("Generate Audio", variant="primary")
173
+
174
+ with gr.Row():
175
+ audio_output = gr.Audio(label="Generated Audio", type="filepath") # Use filepath as we save temp file
176
+ status_output = gr.Textbox(label="Status") # To display errors or success messages
177
+
178
+ # --- Event Listeners ---
179
+ random_seed_checkbox.change(
180
+ fn=toggle_seed_input,
181
+ inputs=[random_seed_checkbox],
182
+ outputs=[seed_input]
183
+ )
184
+
185
+ submit_button.click(
186
+ fn=text_to_speech_app,
187
+ inputs=[
188
+ prompt_input,
189
+ voice_dropdown,
190
+ emotion_input,
191
+ random_seed_checkbox,
192
+ seed_input
193
+ ],
194
+ outputs=[audio_output, status_output] # Output to both components
195
+ )
196
+
197
+ gr.Examples(
198
+ examples=[
199
+ ["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", True, 12345],
200
+ ["What a beautiful day to build Gradio apps.", "shimmer", "happy", True, 12345],
201
+ ["I am feeling a bit down today.", "fable", "sad", False, 9876],
202
+ ["This technology is absolutely amazing!", "nova", "excited", True, 12345],
203
+ ],
204
+ inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input],
205
+ outputs=[audio_output, status_output], # Outputs match the click function
206
+ fn=text_to_speech_app, # The function to call for examples
207
+ cache_examples=False, # Might be good to disable caching if APIs change or have quotas
208
+ )
209
+
210
+ # --- Launch the App ---
211
+ if __name__ == "__main__":
212
+ app.launch()
213
+