Spaces:

tonyshark
/

styletts21

Runtime error

App Files Files Community

tonyshark commited on Sep 17

Commit

ef33da7

verified ·

1 Parent(s): 271cff2

Upload 26 files

Browse files

Files changed (2) hide show

app.py +155 -57
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -236,16 +236,63 @@ EXAMPLES = [
 # ---------------------------
 # Gradio UI
 # ---------------------------
-with gr.Blocks(title="StyleTTS2 Text-to-Speech", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🎙️ StyleTTS2 Text-to-Speech với Hiệu ứng Môi trường")
     # Model status indicator
     if model_loaded:
-        gr.Markdown("✅ **Model đã sẵn sàng** - Bạn có thể bắt đầu tạo giọng nói!")
     else:
-        gr.Markdown("❌ **Lỗi tải model** - Vui lòng kiểm tra file giọng nói tham chiếu và khởi động lại.")
-    gr.Markdown("Sử dụng StyleTTS2 với khả năng thêm hiệu ứng môi trường và điều chỉnh tốc độ nói.")
     with gr.Accordion("📑 Danh sách Tags + Emoji", open=False):
         md = "| Tag | Ý nghĩa |\n|-----|----------|\n"
@@ -253,34 +300,39 @@ with gr.Blocks(title="StyleTTS2 Text-to-Speech", theme=gr.themes.Soft()) as demo
             md += f"| `<{k}>...</{k}>` | {v} |\n"
         gr.Markdown(md)
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### ⚙️ Cài đặt")
-            text_in = gr.Textbox(
-                value=EXAMPLES[0],
-                label="📝 Văn bản cần chuyển đổi",
-                lines=4,
-                placeholder="Nhập văn bản của bạn ở đây. Sử dụng tags để tạo cảm xúc..."
-            )
-            with gr.Row():
-                env_in = gr.Dropdown(
-                    choices=["Neutral", "Church", "Hall", "Cafe", "Street", "Phone", "Office", "Supermarket"],
-                    value="Neutral",
-                    label="🌍 Môi trường âm thanh",
-                    info="Chọn môi trường để áp dụng hiệu ứng"
-                )
-            with gr.Row():
-                speed_slider = gr.Slider(
-                    minimum=0.5,
-                    maximum=2.0,
-                    value=1.0,
-                    step=0.1,
-                    label="⚡ Tốc độ nói",
-                    info="1.0 = bình thường, < 1.0 = chậm, > 1.0 = nhanh"
                 )
-            with gr.Row():
                 snr_slider = gr.Slider(
                     0, 30,
                     value=10,
@@ -288,47 +340,93 @@ with gr.Blocks(title="StyleTTS2 Text-to-Speech", theme=gr.themes.Soft()) as demo
                     label="🔊 Mức độ nhiễu (SNR dB)",
                     info="Chỉ áp dụng cho môi trường có tiếng ồn. Cao hơn = ít nhiễu hơn"
                 )
-            btn = gr.Button("🎵 Tạo giọng nói", variant="primary", size="lg")
-            gr.Examples(
-                examples=[[ex] for ex in EXAMPLES],
-                inputs=[text_in],
-                label="💡 Ví dụ nhanh"
-            )
-        with gr.Column(scale=1):
-            gr.Markdown("### 🎧 Kết quả")
-            audio_out = gr.Audio(
-                label="🎵 Âm thanh có hiệu ứng",
-                type="numpy",
-                info="Phiên bản có áp dụng hiệu ứng môi trường"
-            )
-            clean_out = gr.Audio(
-                label="🎤 Âm thanh gốc",
-                type="numpy",
-                info="Phiên bản gốc không có hiệu ứng"
-            )
-            wave_plot = gr.Plot(
-                label="📊 So sánh dạng sóng",
-                info="Biểu đồ so sánh âm thanh gốc và có hiệu ứng"
-            )
     btn.click(fn=synthesize,
               inputs=[text_in, env_in, snr_slider, speed_slider],
               outputs=[audio_out, wave_plot, clean_out])
 # Launch the application
 if __name__ == "__main__":
     try:
         print("🚀 Đang khởi động ứng dụng StyleTTS2...")
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=False,
-            show_error=True
         )
     except Exception as e:
         print(f"❌ Lỗi khi khởi động ứng dụng: {e}")
-        print("Vui lòng kiểm tra lại cấu hình và thử lại.")

 # ---------------------------
 # Gradio UI
 # ---------------------------
+with gr.Blocks(
+    title="StyleTTS2 Text-to-Speech",
+    theme=gr.themes.Soft(),
+    css="""
+    .gradio-container {
+        max-width: 1200px !important;
+        margin: auto !important;
+    }
+    .main-header {
+        text-align: center;
+        margin-bottom: 20px;
+    }
+    .status-box {
+        padding: 10px;
+        border-radius: 8px;
+        margin: 10px 0;
+    }
+    .status-success {
+        background-color: #d4edda;
+        border: 1px solid #c3e6cb;
+        color: #155724;
+    }
+    .status-error {
+        background-color: #f8d7da;
+        border: 1px solid #f5c6cb;
+        color: #721c24;
+    }
+    """
+) as demo:
+    # Header
+    gr.HTML("""
+    <div class="main-header">
+        <h1>🎙️ StyleTTS2 Text-to-Speech với Hiệu ứng Môi trường</h1>
+    </div>
+    """)
     # Model status indicator
     if model_loaded:
+        gr.HTML("""
+        <div class="status-box status-success">
+            <strong>✅ Model đã sẵn sàng</strong> - Bạn có thể bắt đầu tạo giọng nói!
+        </div>
+        """)
     else:
+        gr.HTML("""
+        <div class="status-box status-error">
+            <strong>❌ Lỗi tải model</strong> - Vui lòng kiểm tra file giọng nói tham chiếu và khởi động lại.
+        </div>
+        """)
+    gr.Markdown("""
+    ### 📖 Hướng dẫn sử dụng
+    - **Nhập văn bản** vào ô text và sử dụng các tags để tạo cảm xúc
+    - **Chọn môi trường** để áp dụng hiệu ứng âm thanh phù hợp
+    - **Điều chỉnh tốc độ** và mức độ nhiễu theo ý muốn
+    - **Nhấn "Tạo giọng nói"** để tạo audio với hiệu ứng
+    """)
     with gr.Accordion("📑 Danh sách Tags + Emoji", open=False):
         md = "| Tag | Ý nghĩa |\n|-----|----------|\n"
             md += f"| `<{k}>...</{k}>` | {v} |\n"
         gr.Markdown(md)
+    # Main content area
+    with gr.Row(equal_height=True):
+        # Left column - Input controls
+        with gr.Column(scale=1, min_width=400):
+            with gr.Group():
+                gr.Markdown("### ⚙️ Cài đặt đầu vào")
+                text_in = gr.Textbox(
+                    value=EXAMPLES[0],
+                    label="📝 Văn bản cần chuyển đổi",
+                    lines=4,
+                    placeholder="Nhập văn bản của bạn ở đây. Sử dụng tags để tạo cảm xúc...",
+                    max_lines=6
                 )
+                # Environment and Speed in one row
+                with gr.Row():
+                    env_in = gr.Dropdown(
+                        choices=["Neutral", "Church", "Hall", "Cafe", "Street", "Phone", "Office", "Supermarket"],
+                        value="Neutral",
+                        label="🌍 Môi trường âm thanh",
+                        info="Chọn môi trường để áp dụng hiệu ứng"
+                    )
+                    speed_slider = gr.Slider(
+                        minimum=0.5,
+                        maximum=2.0,
+                        value=1.0,
+                        step=0.1,
+                        label="⚡ Tốc độ nói",
+                        info="1.0 = bình thường, < 1.0 = chậm, > 1.0 = nhanh"
+                    )
+                # Noise level slider
                 snr_slider = gr.Slider(
                     0, 30,
                     value=10,
                     label="🔊 Mức độ nhiễu (SNR dB)",
                     info="Chỉ áp dụng cho môi trường có tiếng ồn. Cao hơn = ít nhiễu hơn"
                 )
+                # Generate button
+                btn = gr.Button("🎵 Tạo giọng nói", variant="primary", size="lg", scale=1)
+            # Examples section
+            with gr.Group():
+                gr.Examples(
+                    examples=[[ex] for ex in EXAMPLES],
+                    inputs=[text_in],
+                    label="💡 Ví dụ nhanh"
+                )
+        # Right column - Output results
+        with gr.Column(scale=1, min_width=400):
+            with gr.Group():
+                gr.Markdown("### 🎧 Kết quả")
+                # Audio outputs
+                audio_out = gr.Audio(
+                    label="🎵 Âm thanh có hiệu ứng",
+                    type="numpy",
+                    info="Phiên bản có áp dụng hiệu ứng môi trường"
+                )
+                clean_out = gr.Audio(
+                    label="🎤 Âm thanh gốc",
+                    type="numpy",
+                    info="Phiên bản gốc không có hiệu ứng"
+                )
+            # Waveform plot
+            with gr.Group():
+                wave_plot = gr.Plot(
+                    label="📊 So sánh dạng sóng",
+                    info="Biểu đồ so sánh âm thanh gốc và có hiệu ứng",
+                    show_label=True
+                )
     btn.click(fn=synthesize,
               inputs=[text_in, env_in, snr_slider, speed_slider],
               outputs=[audio_out, wave_plot, clean_out])
+    # Footer with additional information
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("""
+            ---
+            ### ℹ️ Thông tin bổ sung
+            **🎯 Môi trường âm thanh:**
+            - **Neutral**: Không có hiệu ứng
+            - **Church/Hall**: Thêm reverb (tiếng vang)
+            - **Cafe/Street/Office/Supermarket**: Thêm tiếng ồn nền
+            - **Phone**: Giới hạn tần số như điện thoại
+            **📊 SNR (Signal-to-Noise Ratio):**
+            - Giá trị cao = ít nhiễu hơn
+            - Giá trị thấp = nhiều nhiễu hơn
+            - Chỉ áp dụng cho môi trường có tiếng ồn
+            **⚡ Tốc độ nói:**
+            - 0.5x = Nói chậm một nửa
+            - 1.0x = Tốc độ bình thường
+            - 2.0x = Nói nhanh gấp đôi
+            """)
 # Launch the application
 if __name__ == "__main__":
     try:
         print("🚀 Đang khởi động ứng dụng StyleTTS2...")
+        print("📱 Giao diện sẽ mở tại: http://localhost:7860")
+        print("🔄 Để dừng ứng dụng, nhấn Ctrl+C")
+        print("-" * 50)
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=False,
+            show_error=True,
+            show_tips=True,
+            enable_queue=True,
+            max_threads=10
         )
+    except KeyboardInterrupt:
+        print("\n👋 Ứng dụng đã được dừng bởi người dùng.")
     except Exception as e:
         print(f"❌ Lỗi khi khởi động ứng dụng: {e}")
+        print("💡 Vui lòng kiểm tra lại:")
+        print("   - File giọng nói tham chi��u có tồn tại không")
+        print("   - Cổng 7860 có bị chiếm dụng không")
+        print("   - Các thư viện đã được cài đặt đầy đủ chưa")

requirements.txt CHANGED Viewed

@@ -6,3 +6,4 @@ scipy
 gradio
 librosa
 matplotlib

 gradio
 librosa
 matplotlib
+phonemizer