Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,1113 +1,35 @@
|
|
1 |
-
import spaces
|
2 |
-
import gradio as gr
|
3 |
import os
|
4 |
-
import
|
5 |
-
import
|
6 |
-
import
|
7 |
-
import json
|
8 |
-
import re
|
9 |
-
import httpx
|
10 |
-
import tempfile
|
11 |
-
import wave
|
12 |
-
import base64
|
13 |
-
import numpy as np
|
14 |
-
import soundfile as sf
|
15 |
-
import subprocess
|
16 |
-
import shutil
|
17 |
-
import requests
|
18 |
-
import logging
|
19 |
-
import random
|
20 |
-
from datetime import datetime, timedelta
|
21 |
-
from typing import List, Tuple, Dict, Optional
|
22 |
-
from pathlib import Path
|
23 |
-
from threading import Thread
|
24 |
-
from dotenv import load_dotenv
|
25 |
|
26 |
-
|
27 |
-
from langchain_community.document_loaders import PyPDFLoader
|
28 |
-
|
29 |
-
# OpenAI imports
|
30 |
-
from openai import OpenAI
|
31 |
-
|
32 |
-
# Transformers imports (for legacy local mode)
|
33 |
-
from transformers import (
|
34 |
-
AutoModelForCausalLM,
|
35 |
-
AutoTokenizer,
|
36 |
-
TextIteratorStreamer,
|
37 |
-
BitsAndBytesConfig,
|
38 |
-
)
|
39 |
-
|
40 |
-
# Llama CPP imports (for new local mode)
|
41 |
-
try:
|
42 |
-
from llama_cpp import Llama
|
43 |
-
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
|
44 |
-
from llama_cpp_agent.providers import LlamaCppPythonProvider
|
45 |
-
from llama_cpp_agent.chat_history import BasicChatHistory
|
46 |
-
from llama_cpp_agent.chat_history.messages import Roles
|
47 |
-
from huggingface_hub import hf_hub_download
|
48 |
-
LLAMA_CPP_AVAILABLE = True
|
49 |
-
except ImportError:
|
50 |
-
LLAMA_CPP_AVAILABLE = False
|
51 |
-
|
52 |
-
# Chatterbox TTS imports
|
53 |
-
try:
|
54 |
-
from chatterbox.src.chatterbox.tts import ChatterboxTTS
|
55 |
-
CHATTERBOX_AVAILABLE = True
|
56 |
-
print("✅ Chatterbox TTS imported successfully from chatterbox.src.chatterbox.tts")
|
57 |
-
except ImportError:
|
58 |
-
try:
|
59 |
-
from chatterbox.tts import ChatterboxTTS
|
60 |
-
CHATTERBOX_AVAILABLE = True
|
61 |
-
print("✅ Chatterbox TTS imported successfully from chatterbox.tts")
|
62 |
-
except ImportError:
|
63 |
-
try:
|
64 |
-
# 다른 가능한 경로 시도
|
65 |
-
import sys
|
66 |
-
sys.path.append('/usr/local/lib/python3.10/site-packages')
|
67 |
-
from chatterbox import ChatterboxTTS
|
68 |
-
CHATTERBOX_AVAILABLE = True
|
69 |
-
print("✅ Chatterbox TTS imported successfully from chatterbox")
|
70 |
-
except ImportError:
|
71 |
-
CHATTERBOX_AVAILABLE = False
|
72 |
-
print("❌ Chatterbox TTS not available - falling back to text-only mode")
|
73 |
-
|
74 |
-
# Import config and prompts
|
75 |
-
from config_prompts import (
|
76 |
-
ConversationConfig,
|
77 |
-
PromptBuilder,
|
78 |
-
DefaultConversations,
|
79 |
-
)
|
80 |
-
|
81 |
-
load_dotenv()
|
82 |
-
|
83 |
-
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
84 |
-
print(f"🚀 Running on device: {DEVICE}")
|
85 |
-
|
86 |
-
# Brave Search API 설정
|
87 |
-
BRAVE_KEY = os.getenv("BSEARCH_API")
|
88 |
-
BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
89 |
-
|
90 |
-
|
91 |
-
def set_seed(seed: int):
|
92 |
-
"""Sets the random seed for reproducibility across torch, numpy, and random."""
|
93 |
-
torch.manual_seed(seed)
|
94 |
-
if DEVICE == "cuda":
|
95 |
-
torch.cuda.manual_seed(seed)
|
96 |
-
torch.cuda.manual_seed_all(seed)
|
97 |
-
random.seed(seed)
|
98 |
-
np.random.seed(seed)
|
99 |
-
|
100 |
-
|
101 |
-
def split_text_into_chunks(text: str, max_chars: int = 250) -> list[str]:
|
102 |
-
"""
|
103 |
-
텍스트를 문장 단위로 나누되, 각 청크가 max_chars를 넘지 않도록 합니다.
|
104 |
-
"""
|
105 |
-
# 문장 단위로 분리 (기본적인 문장 분리)
|
106 |
-
sentences = re.split(r'(?<=[.!?])\s+', text.strip())
|
107 |
-
|
108 |
-
chunks = []
|
109 |
-
current_chunk = ""
|
110 |
-
|
111 |
-
for sentence in sentences:
|
112 |
-
# 현재 청크에 문장을 추가해도 max_chars를 넘지 않으면 추가
|
113 |
-
if len(current_chunk) + len(sentence) + 1 <= max_chars:
|
114 |
-
if current_chunk:
|
115 |
-
current_chunk += " " + sentence
|
116 |
-
else:
|
117 |
-
current_chunk = sentence
|
118 |
-
else:
|
119 |
-
# 현재 청크를 저장하고 새 청크 시작
|
120 |
-
if current_chunk:
|
121 |
-
chunks.append(current_chunk)
|
122 |
-
|
123 |
-
# 문장 자체가 max_chars보다 긴 경우 강제로 분할
|
124 |
-
if len(sentence) > max_chars:
|
125 |
-
words = sentence.split()
|
126 |
-
temp_chunk = ""
|
127 |
-
for word in words:
|
128 |
-
if len(temp_chunk) + len(word) + 1 <= max_chars:
|
129 |
-
if temp_chunk:
|
130 |
-
temp_chunk += " " + word
|
131 |
-
else:
|
132 |
-
temp_chunk = word
|
133 |
-
else:
|
134 |
-
if temp_chunk:
|
135 |
-
chunks.append(temp_chunk)
|
136 |
-
temp_chunk = word
|
137 |
-
if temp_chunk:
|
138 |
-
current_chunk = temp_chunk
|
139 |
-
else:
|
140 |
-
current_chunk = sentence
|
141 |
-
|
142 |
-
# 마지막 청크 추가
|
143 |
-
if current_chunk:
|
144 |
-
chunks.append(current_chunk)
|
145 |
-
|
146 |
-
return chunks
|
147 |
-
|
148 |
-
|
149 |
-
def brave_search(query: str, count: int = 8, freshness_days: int | None = None):
|
150 |
-
"""Brave Search API를 사용하여 최신 정보 검색"""
|
151 |
-
if not BRAVE_KEY:
|
152 |
-
return []
|
153 |
-
params = {"q": query, "count": str(count)}
|
154 |
-
if freshness_days:
|
155 |
-
dt_from = (datetime.utcnow() - timedelta(days=freshness_days)).strftime("%Y-%m-%d")
|
156 |
-
params["freshness"] = dt_from
|
157 |
try:
|
158 |
-
|
159 |
-
|
160 |
-
headers={"Accept": "application/json", "X-Subscription-Token": BRAVE_KEY},
|
161 |
-
params=params,
|
162 |
-
timeout=15
|
163 |
-
)
|
164 |
-
raw = r.json().get("web", {}).get("results") or []
|
165 |
-
return [{
|
166 |
-
"title": r.get("title", ""),
|
167 |
-
"url": r.get("url", r.get("link", "")),
|
168 |
-
"snippet": r.get("description", r.get("text", "")),
|
169 |
-
"host": re.sub(r"https?://(www\.)?", "", r.get("url", "")).split("/")[0]
|
170 |
-
} for r in raw[:count]]
|
171 |
-
except Exception as e:
|
172 |
-
logging.error(f"Brave search error: {e}")
|
173 |
-
return []
|
174 |
-
|
175 |
-
|
176 |
-
def format_search_results(query: str, for_keyword: bool = False) -> str:
|
177 |
-
"""검색 결과를 포맷팅하여 반환"""
|
178 |
-
# 키워드 검색의 경우 더 많은 결과 사용
|
179 |
-
count = 5 if for_keyword else 3
|
180 |
-
rows = brave_search(query, count, freshness_days=7 if not for_keyword else None)
|
181 |
-
if not rows:
|
182 |
-
return ""
|
183 |
-
|
184 |
-
results = []
|
185 |
-
# 키워드 검색의 경우 더 상세한 정보 포함
|
186 |
-
max_results = 4 if for_keyword else 2
|
187 |
-
for r in rows[:max_results]:
|
188 |
-
if for_keyword:
|
189 |
-
# 키워드 검색은 더 긴 스니펫 사용
|
190 |
-
snippet = r['snippet'][:200] + "..." if len(r['snippet']) > 200 else r['snippet']
|
191 |
-
results.append(f"**{r['title']}**\n{snippet}\nSource: {r['host']}")
|
192 |
-
else:
|
193 |
-
# 일반 검색은 짧은 스니펫
|
194 |
-
snippet = r['snippet'][:100] + "..." if len(r['snippet']) > 100 else r['snippet']
|
195 |
-
results.append(f"- {r['title']}: {snippet}")
|
196 |
-
|
197 |
-
return "\n\n".join(results) + "\n"
|
198 |
-
|
199 |
-
|
200 |
-
def extract_keywords_for_search(text: str, language: str = "English") -> List[str]:
|
201 |
-
"""텍스트에서 검색할 키워드 추출"""
|
202 |
-
# 텍스트 앞부분만 사용 (너무 많은 텍스트 처리 방지)
|
203 |
-
text_sample = text[:500]
|
204 |
-
|
205 |
-
# 영어는 대문자로 시작하는 단어 중 가장 긴 것 1개
|
206 |
-
words = text_sample.split()
|
207 |
-
keywords = [word.strip('.,!?;:') for word in words
|
208 |
-
if len(word) > 4 and word[0].isupper()]
|
209 |
-
if keywords:
|
210 |
-
return [max(keywords, key=len)] # 가장 긴 단어 1개
|
211 |
-
return []
|
212 |
-
|
213 |
-
|
214 |
-
def search_and_compile_content(keyword: str, language: str = "English") -> str:
|
215 |
-
"""키워드로 검색하여 충분한 콘텐츠 컴파일"""
|
216 |
-
if not BRAVE_KEY:
|
217 |
-
# API 없을 때도 기본 콘텐츠 생성
|
218 |
-
return f"""
|
219 |
-
Comprehensive information about '{keyword}':
|
220 |
-
|
221 |
-
{keyword} is a significant topic in modern society.
|
222 |
-
This subject impacts our lives in various ways and has been
|
223 |
-
gaining increasing attention recently.
|
224 |
-
|
225 |
-
Key aspects:
|
226 |
-
1. Technological advancement and innovation
|
227 |
-
2. Social impact and changes
|
228 |
-
3. Future prospects and possibilities
|
229 |
-
4. Practical applications
|
230 |
-
5. Global trends and developments
|
231 |
-
|
232 |
-
Experts predict that {keyword} will become even more important,
|
233 |
-
and it's crucial to develop a deep understanding of this topic.
|
234 |
-
"""
|
235 |
-
|
236 |
-
# 영어 검색 쿼리
|
237 |
-
queries = [
|
238 |
-
f"{keyword} latest news 2024",
|
239 |
-
f"{keyword} explained comprehensive",
|
240 |
-
f"{keyword} trends forecast",
|
241 |
-
f"{keyword} advantages disadvantages",
|
242 |
-
f"{keyword} how to use",
|
243 |
-
f"{keyword} expert opinions"
|
244 |
-
]
|
245 |
-
|
246 |
-
all_content = []
|
247 |
-
total_content_length = 0
|
248 |
-
|
249 |
-
for query in queries:
|
250 |
-
results = brave_search(query, count=5) # 더 많은 결과 가져오기
|
251 |
-
for r in results[:3]: # 각 쿼리당 상위 3개
|
252 |
-
content = f"**{r['title']}**\n{r['snippet']}\nSource: {r['host']}\n"
|
253 |
-
all_content.append(content)
|
254 |
-
total_content_length += len(r['snippet'])
|
255 |
-
|
256 |
-
# 콘텐츠가 부족하면 추가 생성
|
257 |
-
if total_content_length < 1000: # 최소 1000자 확보
|
258 |
-
additional_content = f"""
|
259 |
-
Additional insights:
|
260 |
-
Recent developments in {keyword} show rapid advancement in this field.
|
261 |
-
Many experts are actively researching this topic, and its practical
|
262 |
-
applications continue to expand.
|
263 |
-
|
264 |
-
Key points to note:
|
265 |
-
- Accelerating technological innovation
|
266 |
-
- Improving user experience
|
267 |
-
- Enhanced accessibility
|
268 |
-
- Increased cost efficiency
|
269 |
-
- Growing global market
|
270 |
-
|
271 |
-
These factors are making the future of {keyword} increasingly promising.
|
272 |
-
"""
|
273 |
-
all_content.append(additional_content)
|
274 |
-
|
275 |
-
# 컴파일된 콘텐츠 반환
|
276 |
-
compiled = "\n\n".join(all_content)
|
277 |
-
|
278 |
-
# 키워드 기반 소개
|
279 |
-
intro = f"### Comprehensive information and latest trends about '{keyword}':\n\n"
|
280 |
-
|
281 |
-
return intro + compiled
|
282 |
-
|
283 |
-
|
284 |
-
class UnifiedAudioConverter:
|
285 |
-
def __init__(self, config: ConversationConfig):
|
286 |
-
self.config = config
|
287 |
-
self.llm_client = None
|
288 |
-
self.legacy_local_model = None
|
289 |
-
self.legacy_tokenizer = None
|
290 |
-
# 새로운 로컬 LLM 관련
|
291 |
-
self.local_llm = None
|
292 |
-
self.local_llm_model = None
|
293 |
-
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
294 |
-
# 프롬프트 빌더 추가
|
295 |
-
self.prompt_builder = PromptBuilder()
|
296 |
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
raise RuntimeError("Llama CPP dependencies not available. Please install llama-cpp-python and llama-cpp-agent.")
|
306 |
|
307 |
-
|
308 |
-
|
309 |
-
# 모델 다운로드
|
310 |
-
model_path = hf_hub_download(
|
311 |
-
repo_id=self.config.local_model_repo,
|
312 |
-
filename=self.config.local_model_name,
|
313 |
-
local_dir="./models"
|
314 |
-
)
|
315 |
-
|
316 |
-
model_path_local = os.path.join("./models", self.config.local_model_name)
|
317 |
-
|
318 |
-
if not os.path.exists(model_path_local):
|
319 |
-
raise RuntimeError(f"Model file not found at {model_path_local}")
|
320 |
-
|
321 |
-
# Llama 모델 초기화
|
322 |
-
self.local_llm = Llama(
|
323 |
-
model_path=model_path_local,
|
324 |
-
flash_attn=True,
|
325 |
-
n_gpu_layers=81 if torch.cuda.is_available() else 0,
|
326 |
-
n_batch=1024,
|
327 |
-
n_ctx=16384,
|
328 |
-
)
|
329 |
-
self.local_llm_model = self.config.local_model_name
|
330 |
-
print(f"Local LLM initialized: {model_path_local}")
|
331 |
-
|
332 |
-
except Exception as e:
|
333 |
-
print(f"Failed to initialize local LLM: {e}")
|
334 |
-
raise RuntimeError(f"Failed to initialize local LLM: {e}")
|
335 |
-
|
336 |
-
@spaces.GPU(duration=60)
|
337 |
-
def initialize_legacy_local_mode(self):
|
338 |
-
"""Initialize legacy local mode with Hugging Face model (fallback)"""
|
339 |
-
if self.legacy_local_model is None:
|
340 |
-
quantization_config = BitsAndBytesConfig(
|
341 |
-
load_in_4bit=True,
|
342 |
-
bnb_4bit_compute_dtype=torch.float16
|
343 |
-
)
|
344 |
-
self.legacy_local_model = AutoModelForCausalLM.from_pretrained(
|
345 |
-
self.config.legacy_local_model_name,
|
346 |
-
quantization_config=quantization_config
|
347 |
-
)
|
348 |
-
self.legacy_tokenizer = AutoTokenizer.from_pretrained(
|
349 |
-
self.config.legacy_local_model_name,
|
350 |
-
revision='8ab73a6800796d84448bc936db9bac5ad9f984ae'
|
351 |
-
)
|
352 |
-
|
353 |
-
def fetch_text(self, url: str) -> str:
|
354 |
-
"""Fetch text content from URL"""
|
355 |
-
if not url:
|
356 |
-
raise ValueError("URL cannot be empty")
|
357 |
-
|
358 |
-
if not url.startswith("http://") and not url.startswith("https://"):
|
359 |
-
raise ValueError("URL must start with 'http://' or 'https://'")
|
360 |
-
|
361 |
-
full_url = f"{self.config.prefix_url}{url}"
|
362 |
-
try:
|
363 |
-
response = httpx.get(full_url, timeout=60.0)
|
364 |
-
response.raise_for_status()
|
365 |
-
return response.text
|
366 |
-
except httpx.HTTPError as e:
|
367 |
-
raise RuntimeError(f"Failed to fetch URL: {e}")
|
368 |
-
|
369 |
-
def extract_text_from_pdf(self, pdf_file) -> str:
|
370 |
-
"""Extract text content from PDF file"""
|
371 |
-
try:
|
372 |
-
# Gradio returns file path, not file object
|
373 |
-
if isinstance(pdf_file, str):
|
374 |
-
pdf_path = pdf_file
|
375 |
-
else:
|
376 |
-
# If it's a file object (shouldn't happen with Gradio)
|
377 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
378 |
-
tmp_file.write(pdf_file.read())
|
379 |
-
pdf_path = tmp_file.name
|
380 |
-
|
381 |
-
# PDF 로드 및 텍스트 추출
|
382 |
-
loader = PyPDFLoader(pdf_path)
|
383 |
-
pages = loader.load()
|
384 |
-
|
385 |
-
# 모든 페이지의 텍스트를 결합
|
386 |
-
text = "\n".join([page.page_content for page in pages])
|
387 |
-
|
388 |
-
# 임시 파일인 경우 삭제
|
389 |
-
if not isinstance(pdf_file, str) and os.path.exists(pdf_path):
|
390 |
-
os.unlink(pdf_path)
|
391 |
-
|
392 |
-
return text
|
393 |
-
except Exception as e:
|
394 |
-
raise RuntimeError(f"Failed to extract text from PDF: {e}")
|
395 |
-
|
396 |
-
def _get_messages_formatter_type(self, model_name):
|
397 |
-
"""Get appropriate message formatter for the model"""
|
398 |
-
if "Mistral" in model_name or "BitSix" in model_name:
|
399 |
-
return MessagesFormatterType.CHATML
|
400 |
-
else:
|
401 |
-
return MessagesFormatterType.LLAMA_3
|
402 |
-
|
403 |
-
@spaces.GPU(duration=120)
|
404 |
-
def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
|
405 |
-
"""Extract conversation using new local LLM with enhanced professional style"""
|
406 |
-
try:
|
407 |
-
# 검색 컨텍스트 생성
|
408 |
-
search_context = ""
|
409 |
-
if BRAVE_KEY and not text.startswith("Keyword-based content:"):
|
410 |
-
try:
|
411 |
-
keywords = extract_keywords_for_search(text, language)
|
412 |
-
if keywords:
|
413 |
-
search_query = f"{keywords[0]} latest news"
|
414 |
-
search_context = format_search_results(search_query)
|
415 |
-
print(f"Search context added for: {search_query}")
|
416 |
-
except Exception as e:
|
417 |
-
print(f"Search failed, continuing without context: {e}")
|
418 |
-
|
419 |
-
# 먼저 새로운 로컬 LLM 시도
|
420 |
-
self.initialize_local_mode()
|
421 |
-
|
422 |
-
chat_template = self._get_messages_formatter_type(self.config.local_model_name)
|
423 |
-
provider = LlamaCppPythonProvider(self.local_llm)
|
424 |
-
|
425 |
-
# 영어 전용 시스템 메시지
|
426 |
-
system_message = (
|
427 |
-
f"You are a professional podcast scriptwriter creating high-quality, "
|
428 |
-
f"insightful discussions in English. Create exactly 12 conversation exchanges "
|
429 |
-
f"with professional expertise. All dialogue must be in English. "
|
430 |
-
f"Respond only in JSON format."
|
431 |
-
)
|
432 |
-
|
433 |
-
agent = LlamaCppAgent(
|
434 |
-
provider,
|
435 |
-
system_prompt=system_message,
|
436 |
-
predefined_messages_formatter_type=chat_template,
|
437 |
-
debug_output=False
|
438 |
-
)
|
439 |
-
|
440 |
-
settings = provider.get_provider_default_settings()
|
441 |
-
settings.temperature = 0.75
|
442 |
-
settings.top_k = 40
|
443 |
-
settings.top_p = 0.95
|
444 |
-
settings.max_tokens = self.config.max_tokens
|
445 |
-
settings.repeat_penalty = 1.1
|
446 |
-
settings.stream = False
|
447 |
-
|
448 |
-
messages = BasicChatHistory()
|
449 |
-
|
450 |
-
prompt = self.prompt_builder.build_prompt(text, language, search_context)
|
451 |
-
response = agent.get_chat_response(
|
452 |
-
prompt,
|
453 |
-
llm_sampling_settings=settings,
|
454 |
-
chat_history=messages,
|
455 |
-
returns_streaming_generator=False,
|
456 |
-
print_output=False
|
457 |
-
)
|
458 |
-
|
459 |
-
# JSON 파싱
|
460 |
-
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
|
461 |
-
json_match = re.search(pattern, response)
|
462 |
-
|
463 |
-
if json_match:
|
464 |
-
conversation_data = json.loads(json_match.group())
|
465 |
-
return conversation_data
|
466 |
-
else:
|
467 |
-
raise ValueError("No valid JSON found in local LLM response")
|
468 |
-
|
469 |
-
except Exception as e:
|
470 |
-
print(f"Local LLM failed: {e}, falling back to legacy local method")
|
471 |
-
return self.extract_conversation_legacy_local(text, language, progress, search_context)
|
472 |
-
|
473 |
-
@spaces.GPU(duration=120)
|
474 |
-
def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
|
475 |
-
"""Extract conversation using legacy local model"""
|
476 |
-
try:
|
477 |
-
self.initialize_legacy_local_mode()
|
478 |
-
|
479 |
-
# 영어 전용 메시지
|
480 |
-
messages = self.prompt_builder.build_messages_for_local(text, language, search_context)
|
481 |
-
|
482 |
-
terminators = [
|
483 |
-
self.legacy_tokenizer.eos_token_id,
|
484 |
-
self.legacy_tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
485 |
-
]
|
486 |
-
|
487 |
-
chat_messages = self.legacy_tokenizer.apply_chat_template(
|
488 |
-
messages, tokenize=False, add_generation_prompt=True
|
489 |
-
)
|
490 |
-
model_inputs = self.legacy_tokenizer([chat_messages], return_tensors="pt").to(self.device)
|
491 |
-
|
492 |
-
streamer = TextIteratorStreamer(
|
493 |
-
self.legacy_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
|
494 |
-
)
|
495 |
-
|
496 |
-
generate_kwargs = dict(
|
497 |
-
model_inputs,
|
498 |
-
streamer=streamer,
|
499 |
-
max_new_tokens=self.config.max_new_tokens,
|
500 |
-
do_sample=True,
|
501 |
-
temperature=0.75,
|
502 |
-
eos_token_id=terminators,
|
503 |
-
)
|
504 |
-
|
505 |
-
t = Thread(target=self.legacy_local_model.generate, kwargs=generate_kwargs)
|
506 |
-
t.start()
|
507 |
-
|
508 |
-
partial_text = ""
|
509 |
-
for new_text in streamer:
|
510 |
-
partial_text += new_text
|
511 |
-
|
512 |
-
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
|
513 |
-
json_match = re.search(pattern, partial_text)
|
514 |
-
|
515 |
-
if json_match:
|
516 |
-
return json.loads(json_match.group())
|
517 |
-
else:
|
518 |
-
raise ValueError("No valid JSON found in legacy local response")
|
519 |
-
|
520 |
-
except Exception as e:
|
521 |
-
print(f"Legacy local model also failed: {e}")
|
522 |
-
return DefaultConversations.get_conversation("English")
|
523 |
-
|
524 |
-
def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
|
525 |
-
"""Extract conversation using API"""
|
526 |
-
if not self.llm_client:
|
527 |
-
raise RuntimeError("API mode not initialized")
|
528 |
-
|
529 |
-
try:
|
530 |
-
# 검색 컨텍스트 생성
|
531 |
-
search_context = ""
|
532 |
-
if BRAVE_KEY and not text.startswith("Keyword-based content:"):
|
533 |
-
try:
|
534 |
-
keywords = extract_keywords_for_search(text, language)
|
535 |
-
if keywords:
|
536 |
-
search_query = f"{keywords[0]} latest news"
|
537 |
-
search_context = format_search_results(search_query)
|
538 |
-
print(f"Search context added for: {search_query}")
|
539 |
-
except Exception as e:
|
540 |
-
print(f"Search failed, continuing without context: {e}")
|
541 |
-
|
542 |
-
# 메시지 빌드
|
543 |
-
messages = self.prompt_builder.build_messages_for_local(text, language, search_context)
|
544 |
-
|
545 |
-
chat_completion = self.llm_client.chat.completions.create(
|
546 |
-
messages=messages,
|
547 |
-
model=self.config.api_model_name,
|
548 |
-
temperature=0.75,
|
549 |
-
)
|
550 |
-
|
551 |
-
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
|
552 |
-
json_match = re.search(pattern, chat_completion.choices[0].message.content)
|
553 |
-
|
554 |
-
if not json_match:
|
555 |
-
raise ValueError("No valid JSON found in response")
|
556 |
-
|
557 |
-
return json.loads(json_match.group())
|
558 |
-
except Exception as e:
|
559 |
-
raise RuntimeError(f"Failed to extract conversation: {e}")
|
560 |
-
|
561 |
-
def parse_conversation_text(self, conversation_text: str) -> Dict:
|
562 |
-
"""Parse conversation text back to JSON format"""
|
563 |
-
lines = conversation_text.strip().split('\n')
|
564 |
-
conversation_data = {"conversation": []}
|
565 |
-
|
566 |
-
for line in lines:
|
567 |
-
if ':' in line:
|
568 |
-
speaker, text = line.split(':', 1)
|
569 |
-
conversation_data["conversation"].append({
|
570 |
-
"speaker": speaker.strip(),
|
571 |
-
"text": text.strip()
|
572 |
-
})
|
573 |
-
|
574 |
-
return conversation_data
|
575 |
-
|
576 |
-
@spaces.GPU(duration=120)
|
577 |
-
def generate_tts_audio_gpu(
|
578 |
-
self,
|
579 |
-
conversation_json: Dict,
|
580 |
-
audio_prompt_path_input: str,
|
581 |
-
exaggeration_input: float = 0.5,
|
582 |
-
temperature_input: float = 0.8,
|
583 |
-
seed_num_input: int = 0,
|
584 |
-
cfgw_input: float = 0.5,
|
585 |
-
chunk_size_input: int = 250
|
586 |
-
) -> tuple[int, np.ndarray]:
|
587 |
-
"""
|
588 |
-
Chatterbox TTS를 사용하여 대화를 음성으로 변환
|
589 |
-
"""
|
590 |
-
if not CHATTERBOX_AVAILABLE:
|
591 |
-
raise RuntimeError("Chatterbox TTS not available. Please install chatterbox package.")
|
592 |
|
|
|
593 |
try:
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
except Exception as e:
|
598 |
-
raise RuntimeError(f"Failed to load Chatterbox TTS model: {e}")
|
599 |
-
|
600 |
-
if seed_num_input != 0:
|
601 |
-
set_seed(int(seed_num_input))
|
602 |
-
|
603 |
-
audio_segments = []
|
604 |
-
|
605 |
-
for i, turn in enumerate(conversation_json["conversation"]):
|
606 |
-
text = turn["text"]
|
607 |
-
if not text.strip():
|
608 |
-
continue
|
609 |
-
|
610 |
-
print(f"🎙️ 생성 중: Speaker {i+1} - '{text[:50]}...'")
|
611 |
|
612 |
-
try:
|
613 |
-
# 텍스트가 짧으면 단일 생성
|
614 |
-
if len(text) <= 300:
|
615 |
-
wav = model.generate(
|
616 |
-
text,
|
617 |
-
audio_prompt_path=audio_prompt_path_input,
|
618 |
-
exaggeration=exaggeration_input,
|
619 |
-
temperature=temperature_input,
|
620 |
-
cfg_weight=cfgw_input,
|
621 |
-
)
|
622 |
-
wav_chunk = wav.squeeze(0).numpy()
|
623 |
-
audio_segments.append(wav_chunk)
|
624 |
-
else:
|
625 |
-
# 긴 텍스트는 청크로 분할
|
626 |
-
chunks = split_text_into_chunks(text, max_chars=chunk_size_input)
|
627 |
-
print(f"📝 텍스트를 {len(chunks)}개 청크로 분할")
|
628 |
-
|
629 |
-
chunk_audio_segments = []
|
630 |
-
for j, chunk in enumerate(chunks):
|
631 |
-
print(f" 📄 청크 {j+1}/{len(chunks)} 생성 중...")
|
632 |
-
wav = model.generate(
|
633 |
-
chunk,
|
634 |
-
audio_prompt_path=audio_prompt_path_input,
|
635 |
-
exaggeration=exaggeration_input,
|
636 |
-
temperature=temperature_input,
|
637 |
-
cfg_weight=cfgw_input,
|
638 |
-
)
|
639 |
-
wav_chunk = wav.squeeze(0).numpy()
|
640 |
-
chunk_audio_segments.append(wav_chunk)
|
641 |
-
|
642 |
-
# 청크들을 연결
|
643 |
-
if chunk_audio_segments:
|
644 |
-
silence_duration = int(0.1 * model.sr) # 0.1초 무음
|
645 |
-
silence = np.zeros(silence_duration)
|
646 |
-
|
647 |
-
turn_audio = []
|
648 |
-
for j, segment in enumerate(chunk_audio_segments):
|
649 |
-
turn_audio.append(segment)
|
650 |
-
if j < len(chunk_audio_segments) - 1:
|
651 |
-
turn_audio.append(silence)
|
652 |
-
|
653 |
-
concatenated_turn = np.concatenate(turn_audio)
|
654 |
-
audio_segments.append(concatenated_turn)
|
655 |
-
|
656 |
-
except Exception as e:
|
657 |
-
print(f"❌ Speaker {i+1} 생성 중 오류 발생: {e}")
|
658 |
-
# 오류 발생 시 무음으로 대체
|
659 |
-
silence_duration = int(2.0 * model.sr) # 2초 무음
|
660 |
-
silence = np.zeros(silence_duration)
|
661 |
-
audio_segments.append(silence)
|
662 |
-
continue
|
663 |
-
|
664 |
-
if not audio_segments:
|
665 |
-
raise RuntimeError("모든 오디오 생성에 실패했습니다.")
|
666 |
-
|
667 |
-
# 모든 스피커의 오디오 세그먼트 연결
|
668 |
-
speaker_silence_duration = int(0.5 * model.sr) # 스피커 간 0.5초 무음
|
669 |
-
speaker_silence = np.zeros(speaker_silence_duration)
|
670 |
-
|
671 |
-
final_audio = []
|
672 |
-
for i, segment in enumerate(audio_segments):
|
673 |
-
final_audio.append(segment)
|
674 |
-
if i < len(audio_segments) - 1:
|
675 |
-
final_audio.append(speaker_silence)
|
676 |
-
|
677 |
-
concatenated_audio = np.concatenate(final_audio)
|
678 |
-
|
679 |
-
print(f"🎉 오디오 생성 완료! 총 길이: {len(concatenated_audio) / model.sr:.2f}초")
|
680 |
-
return (model.sr, concatenated_audio)
|
681 |
-
|
682 |
-
def _create_output_directory(self) -> str:
|
683 |
-
"""Create a unique output directory"""
|
684 |
-
random_bytes = os.urandom(8)
|
685 |
-
folder_name = base64.urlsafe_b64encode(random_bytes).decode("utf-8")
|
686 |
-
os.makedirs(folder_name, exist_ok=True)
|
687 |
-
return folder_name
|
688 |
-
|
689 |
-
|
690 |
-
# Global converter instance
|
691 |
-
converter = UnifiedAudioConverter(ConversationConfig())
|
692 |
-
|
693 |
-
|
694 |
-
async def synthesize(article_input, input_type: str = "URL", mode: str = "Local"):
|
695 |
-
"""Main synthesis function - handles URL, PDF, and Keyword inputs"""
|
696 |
-
try:
|
697 |
-
# Extract text based on input type
|
698 |
-
if input_type == "URL":
|
699 |
-
if not article_input or not isinstance(article_input, str):
|
700 |
-
return "Please provide a valid URL.", None
|
701 |
-
text = converter.fetch_text(article_input)
|
702 |
-
elif input_type == "PDF":
|
703 |
-
if not article_input:
|
704 |
-
return "Please upload a PDF file.", None
|
705 |
-
text = converter.extract_text_from_pdf(article_input)
|
706 |
-
else: # Keyword
|
707 |
-
if not article_input or not isinstance(article_input, str):
|
708 |
-
return "Please provide a keyword or topic.", None
|
709 |
-
text = search_and_compile_content(article_input, "English")
|
710 |
-
text = f"Keyword-based content:\n{text}"
|
711 |
-
|
712 |
-
# Limit text to max words
|
713 |
-
words = text.split()
|
714 |
-
if len(words) > converter.config.max_words:
|
715 |
-
text = " ".join(words[:converter.config.max_words])
|
716 |
-
|
717 |
-
# Extract conversation based on mode
|
718 |
-
if mode == "Local":
|
719 |
-
try:
|
720 |
-
conversation_json = converter.extract_conversation_local(text, "English")
|
721 |
-
except Exception as e:
|
722 |
-
print(f"Local mode failed: {e}, trying API fallback")
|
723 |
-
api_key = os.environ.get("TOGETHER_API_KEY")
|
724 |
-
if api_key:
|
725 |
-
converter.initialize_api_mode(api_key)
|
726 |
-
conversation_json = converter.extract_conversation_api(text, "English")
|
727 |
-
else:
|
728 |
-
raise RuntimeError("Local mode failed and no API key available for fallback")
|
729 |
-
else: # API mode
|
730 |
-
api_key = os.environ.get("TOGETHER_API_KEY")
|
731 |
-
if not api_key:
|
732 |
-
print("API key not found, falling back to local mode")
|
733 |
-
conversation_json = converter.extract_conversation_local(text, "English")
|
734 |
-
else:
|
735 |
-
try:
|
736 |
-
converter.initialize_api_mode(api_key)
|
737 |
-
conversation_json = converter.extract_conversation_api(text, "English")
|
738 |
-
except Exception as e:
|
739 |
-
print(f"API mode failed: {e}, falling back to local mode")
|
740 |
-
conversation_json = converter.extract_conversation_local(text, "English")
|
741 |
-
|
742 |
-
# Generate conversation text
|
743 |
-
conversation_text = "\n".join(
|
744 |
-
f"{turn.get('speaker', f'Speaker {i+1}')}: {turn['text']}"
|
745 |
-
for i, turn in enumerate(conversation_json["conversation"])
|
746 |
-
)
|
747 |
-
|
748 |
-
return conversation_text, None
|
749 |
-
|
750 |
except Exception as e:
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
async def regenerate_audio(
|
755 |
-
conversation_text: str,
|
756 |
-
ref_audio_path: str,
|
757 |
-
exaggeration: float = 0.5,
|
758 |
-
temperature: float = 0.8,
|
759 |
-
seed_num: int = 0,
|
760 |
-
cfg_weight: float = 0.5,
|
761 |
-
chunk_size: int = 250
|
762 |
-
):
|
763 |
-
"""Regenerate audio from edited conversation text using Chatterbox TTS"""
|
764 |
-
if not conversation_text.strip():
|
765 |
-
return "Please provide conversation text.", None
|
766 |
-
|
767 |
-
if not CHATTERBOX_AVAILABLE:
|
768 |
-
return "Chatterbox TTS not available. Please check the installation.", None
|
769 |
-
|
770 |
-
try:
|
771 |
-
conversation_json = converter.parse_conversation_text(conversation_text)
|
772 |
-
|
773 |
-
if not conversation_json["conversation"]:
|
774 |
-
return "No valid conversation found in the text.", None
|
775 |
-
|
776 |
-
# Generate audio using Chatterbox TTS
|
777 |
-
try:
|
778 |
-
sr, audio = converter.generate_tts_audio_gpu(
|
779 |
-
conversation_json,
|
780 |
-
ref_audio_path,
|
781 |
-
exaggeration,
|
782 |
-
temperature,
|
783 |
-
seed_num,
|
784 |
-
cfg_weight,
|
785 |
-
chunk_size
|
786 |
-
)
|
787 |
-
|
788 |
-
# Save audio to file
|
789 |
-
output_dir = converter._create_output_directory()
|
790 |
-
output_file = os.path.join(output_dir, "podcast_audio.wav")
|
791 |
-
sf.write(output_file, audio, sr)
|
792 |
-
|
793 |
-
return "🎉 Audio generated successfully!", output_file
|
794 |
-
except Exception as e:
|
795 |
-
error_msg = str(e)
|
796 |
-
if "Chatterbox TTS not available" in error_msg:
|
797 |
-
return "❌ Chatterbox TTS is not properly installed. Please check the requirements.", None
|
798 |
-
elif "CUDA" in error_msg or "GPU" in error_msg:
|
799 |
-
return f"❌ GPU error: {error_msg}. Please try reducing chunk size or use CPU.", None
|
800 |
-
else:
|
801 |
-
return f"❌ Audio generation error: {error_msg}", None
|
802 |
-
|
803 |
-
except Exception as e:
|
804 |
-
return f"❌ Error processing conversation: {str(e)}", None
|
805 |
-
|
806 |
-
|
807 |
-
def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local"):
|
808 |
-
"""Synchronous wrapper for async synthesis"""
|
809 |
-
return asyncio.run(synthesize(article_input, input_type, mode))
|
810 |
-
|
811 |
-
|
812 |
-
def regenerate_audio_sync(conversation_text: str, ref_audio_path: str, exaggeration: float, temperature: float, seed_num: int, cfg_weight: float, chunk_size: int):
|
813 |
-
"""Synchronous wrapper for async audio regeneration"""
|
814 |
-
return asyncio.run(regenerate_audio(conversation_text, ref_audio_path, exaggeration, temperature, seed_num, cfg_weight, chunk_size))
|
815 |
-
|
816 |
-
|
817 |
-
def toggle_input_visibility(input_type):
|
818 |
-
"""Toggle visibility of URL input, file upload, and keyword input based on input type"""
|
819 |
-
if input_type == "URL":
|
820 |
-
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
|
821 |
-
elif input_type == "PDF":
|
822 |
-
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
823 |
-
else: # Keyword
|
824 |
-
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
|
825 |
-
|
826 |
-
|
827 |
-
def update_char_count(text, chunk_size):
|
828 |
-
"""텍스트 길이 정보 업데이트"""
|
829 |
-
char_len = len(text)
|
830 |
-
if char_len <= 300:
|
831 |
-
return f"{char_len} characters (single generation)"
|
832 |
-
else:
|
833 |
-
chunks = split_text_into_chunks(text, max_chars=chunk_size)
|
834 |
-
chunk_count = len(chunks)
|
835 |
-
estimated_time = chunk_count * 3 # 청크당 약 3초 예상
|
836 |
-
return f"{char_len} characters, {chunk_count} chunks (estimated time: ~{estimated_time}s)"
|
837 |
-
|
838 |
-
|
839 |
-
# 모델 초기화 (앱 시작 시)
|
840 |
-
if LLAMA_CPP_AVAILABLE:
|
841 |
-
try:
|
842 |
-
model_path = hf_hub_download(
|
843 |
-
repo_id=converter.config.local_model_repo,
|
844 |
-
filename=converter.config.local_model_name,
|
845 |
-
local_dir="./models"
|
846 |
-
)
|
847 |
-
print(f"Model downloaded to: {model_path}")
|
848 |
-
except Exception as e:
|
849 |
-
print(f"Failed to download model at startup: {e}")
|
850 |
-
|
851 |
-
|
852 |
-
# Gradio Interface
|
853 |
-
with gr.Blocks(theme='soft', title="AI Podcast Generator", css="""
|
854 |
-
.container {max-width: 1200px; margin: auto; padding: 20px;}
|
855 |
-
.header-text {text-align: center; margin-bottom: 30px;}
|
856 |
-
.input-group {background: #f7f7f7; padding: 20px; border-radius: 10px; margin-bottom: 20px;}
|
857 |
-
.output-group {background: #f0f0f0; padding: 20px; border-radius: 10px;}
|
858 |
-
.status-box {background: #e8f4f8; padding: 15px; border-radius: 8px; margin-top: 10px;}
|
859 |
-
""") as demo:
|
860 |
-
with gr.Column(elem_classes="container"):
|
861 |
-
# 헤더
|
862 |
-
with gr.Row(elem_classes="header-text"):
|
863 |
-
gr.Markdown("""
|
864 |
-
# 🎙️ LIVE Podcast Generator with Chatterbox TTS
|
865 |
-
### Convert any article, blog, PDF document, or topic into an engaging professional podcast conversation!
|
866 |
-
""")
|
867 |
-
|
868 |
-
with gr.Row(elem_classes="discord-badge"):
|
869 |
-
gr.HTML("""
|
870 |
-
<p style="text-align: center;">
|
871 |
-
<a href="https://discord.gg/openfreeai" target="_blank" style="display: inline-block; margin-right: 10px;">
|
872 |
-
<img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="badge">
|
873 |
-
</a>
|
874 |
-
<a href="https://open.spotify.com/show/36GtIP7iqJxCwp7FfXmTYK?si=KsIsUJq7SJiiudPTaMsXAA" target="_blank" style="display: inline-block;">
|
875 |
-
<img src="https://img.shields.io/static/v1?label=Spotify&message=Podcast&color=%230000ff&labelColor=%23000080&logo=Spotify&logoColor=white&style=for-the-badge" alt="badge">
|
876 |
-
</a>
|
877 |
-
<a href="https://huggingface.co/spaces/openfree/AI-Podcast" target="_blank" style="display: inline-block;">
|
878 |
-
<img src="https://img.shields.io/static/v1?label=Huggingface&message=AI%20Podcast&color=%230000ff&labelColor=%23ffa500&logo=huggingface&logoColor=white&style=for-the-badge" alt="badge">
|
879 |
-
</a>
|
880 |
-
</p>
|
881 |
-
""")
|
882 |
-
|
883 |
-
# 상태 표시 섹션
|
884 |
-
with gr.Row():
|
885 |
-
with gr.Column(scale=1):
|
886 |
-
gr.Markdown(f"""
|
887 |
-
#### 🤖 System Status
|
888 |
-
- **LLM**: {converter.config.local_model_name.split('.')[0]}
|
889 |
-
- **Fallback**: {converter.config.api_model_name.split('/')[-1]}
|
890 |
-
- **Llama CPP**: {"✅ Ready" if LLAMA_CPP_AVAILABLE else "❌ Not Available"}
|
891 |
-
- **Chatterbox TTS**: {"✅ Ready" if CHATTERBOX_AVAILABLE else "❌ Not Available"}
|
892 |
-
- **Search**: {"✅ Brave API" if BRAVE_KEY else "❌ No API"}
|
893 |
-
""")
|
894 |
-
with gr.Column(scale=1):
|
895 |
-
gr.Markdown("""
|
896 |
-
#### 🎙️ Chatterbox TTS Features
|
897 |
-
- **High Quality**: Neural voice synthesis
|
898 |
-
- **Voice Cloning**: Upload your reference audio
|
899 |
-
- **Unlimited Length**: Automatic text chunking
|
900 |
-
- **Professional Style**: Expert podcast discussions
|
901 |
-
""")
|
902 |
-
|
903 |
-
# 메인 입력 섹션
|
904 |
-
with gr.Group(elem_classes="input-group"):
|
905 |
-
with gr.Row():
|
906 |
-
# 왼쪽: 입력 옵션들
|
907 |
-
with gr.Column(scale=2):
|
908 |
-
# 입력 타입 선택
|
909 |
-
input_type_selector = gr.Radio(
|
910 |
-
choices=["URL", "PDF", "Keyword"],
|
911 |
-
value="URL",
|
912 |
-
label="📥 Input Type - Choose your content source"
|
913 |
-
)
|
914 |
-
|
915 |
-
# URL 입력
|
916 |
-
url_input = gr.Textbox(
|
917 |
-
label="🔗 Article URL",
|
918 |
-
placeholder="Enter the article URL here...",
|
919 |
-
value="",
|
920 |
-
visible=True,
|
921 |
-
lines=2
|
922 |
-
)
|
923 |
-
|
924 |
-
# PDF 업로드
|
925 |
-
pdf_input = gr.File(
|
926 |
-
label="📄 Upload PDF",
|
927 |
-
file_types=[".pdf"],
|
928 |
-
visible=False
|
929 |
-
)
|
930 |
-
|
931 |
-
# 키워드 입력
|
932 |
-
keyword_input = gr.Textbox(
|
933 |
-
label="🔍 Topic/Keyword - System will search and compile latest information",
|
934 |
-
placeholder="Enter a topic (e.g., 'AI trends 2024', 'quantum computing')",
|
935 |
-
value="",
|
936 |
-
visible=False,
|
937 |
-
lines=2
|
938 |
-
)
|
939 |
-
|
940 |
-
# 오른쪽: 설정 옵션들
|
941 |
-
with gr.Column(scale=1):
|
942 |
-
# 처리 모드
|
943 |
-
mode_selector = gr.Radio(
|
944 |
-
choices=["Local", "API"],
|
945 |
-
value="Local",
|
946 |
-
label="⚙️ Processing Mode - Local: On-device | API: Cloud"
|
947 |
-
)
|
948 |
-
|
949 |
-
# 생성 버튼
|
950 |
-
with gr.Row():
|
951 |
-
convert_btn = gr.Button(
|
952 |
-
"🎯 Generate Professional Conversation",
|
953 |
-
variant="primary",
|
954 |
-
size="lg",
|
955 |
-
scale=1
|
956 |
-
)
|
957 |
-
|
958 |
-
# TTS 설정 섹션
|
959 |
-
with gr.Group(elem_classes="input-group"):
|
960 |
-
gr.Markdown("### 🎙️ Chatterbox TTS Settings")
|
961 |
-
|
962 |
-
with gr.Row():
|
963 |
-
with gr.Column(scale=2):
|
964 |
-
ref_audio = gr.Audio(
|
965 |
-
sources=["upload", "microphone"],
|
966 |
-
type="filepath",
|
967 |
-
label="Reference Audio File (Upload your voice sample for voice cloning)",
|
968 |
-
value="https://storage.googleapis.com/chatterbox-demo-samples/prompts/female_shadowheart4.flac"
|
969 |
-
)
|
970 |
-
|
971 |
-
with gr.Column(scale=1):
|
972 |
-
exaggeration = gr.Slider(
|
973 |
-
0.25, 2, step=.05,
|
974 |
-
label="Exaggeration (Neutral = 0.5)",
|
975 |
-
value=.5
|
976 |
-
)
|
977 |
-
cfg_weight = gr.Slider(
|
978 |
-
0.2, 1, step=.05,
|
979 |
-
label="CFG/Pace",
|
980 |
-
value=0.5
|
981 |
-
)
|
982 |
-
chunk_size = gr.Slider(
|
983 |
-
100, 300, step=50,
|
984 |
-
label="Chunk Size (characters) - Text chunking for long conversations",
|
985 |
-
value=250
|
986 |
-
)
|
987 |
-
|
988 |
-
with gr.Accordion("Advanced Options", open=False):
|
989 |
-
seed_num = gr.Number(value=0, label="Random seed (0 for random)")
|
990 |
-
temperature = gr.Slider(0.05, 5, step=.05, label="Temperature", value=.8)
|
991 |
-
|
992 |
-
# 출력 섹션
|
993 |
-
with gr.Group(elem_classes="output-group"):
|
994 |
-
with gr.Row():
|
995 |
-
# 왼쪽: 대화 텍스트
|
996 |
-
with gr.Column(scale=3):
|
997 |
-
conversation_output = gr.Textbox(
|
998 |
-
label="💬 Generated Professional Conversation (Editable - Format: 'Speaker Name: Text')",
|
999 |
-
lines=25,
|
1000 |
-
max_lines=50,
|
1001 |
-
interactive=True,
|
1002 |
-
placeholder="Professional podcast conversation will appear here..."
|
1003 |
-
)
|
1004 |
-
|
1005 |
-
# 텍스트 길이 표시
|
1006 |
-
char_count = gr.Textbox(
|
1007 |
-
label="Text Information",
|
1008 |
-
value="0 characters",
|
1009 |
-
interactive=False
|
1010 |
-
)
|
1011 |
-
|
1012 |
-
# 오디오 생성 버튼
|
1013 |
-
with gr.Row():
|
1014 |
-
generate_audio_btn = gr.Button(
|
1015 |
-
"🎙️ Generate Audio with Chatterbox TTS",
|
1016 |
-
variant="secondary",
|
1017 |
-
size="lg"
|
1018 |
-
)
|
1019 |
-
|
1020 |
-
# 오른쪽: 오디오 출력 및 상태
|
1021 |
-
with gr.Column(scale=2):
|
1022 |
-
audio_output = gr.Audio(
|
1023 |
-
label="🎧 Professional Podcast Audio",
|
1024 |
-
type="filepath",
|
1025 |
-
interactive=False
|
1026 |
-
)
|
1027 |
-
|
1028 |
-
status_output = gr.Textbox(
|
1029 |
-
label="📊 Status",
|
1030 |
-
interactive=False,
|
1031 |
-
lines=3,
|
1032 |
-
elem_classes="status-box"
|
1033 |
-
)
|
1034 |
-
|
1035 |
-
# 도움말
|
1036 |
-
gr.Markdown("""
|
1037 |
-
#### 💡 Quick Tips:
|
1038 |
-
- **URL**: Paste any article link
|
1039 |
-
- **PDF**: Upload documents directly
|
1040 |
-
- **Keyword**: Enter topics for AI research
|
1041 |
-
- **Voice Cloning**: Upload reference audio
|
1042 |
-
- Edit conversation before audio generation
|
1043 |
-
- Longer text automatically chunked
|
1044 |
-
""")
|
1045 |
-
|
1046 |
-
# 예제 섹션
|
1047 |
-
with gr.Accordion("📚 Examples", open=False):
|
1048 |
-
gr.Examples(
|
1049 |
-
examples=[
|
1050 |
-
["https://huggingface.co/blog/openfreeai/cycle-navigator", "URL", "Local"],
|
1051 |
-
["quantum computing breakthroughs", "Keyword", "Local"],
|
1052 |
-
["https://huggingface.co/papers/2505.14810", "URL", "Local"],
|
1053 |
-
["artificial intelligence ethics", "Keyword", "Local"],
|
1054 |
-
],
|
1055 |
-
inputs=[url_input, input_type_selector, mode_selector],
|
1056 |
-
outputs=[conversation_output, status_output],
|
1057 |
-
fn=synthesize_sync,
|
1058 |
-
cache_examples=False,
|
1059 |
-
)
|
1060 |
-
|
1061 |
-
# Input type change handler
|
1062 |
-
input_type_selector.change(
|
1063 |
-
fn=toggle_input_visibility,
|
1064 |
-
inputs=[input_type_selector],
|
1065 |
-
outputs=[url_input, pdf_input, keyword_input]
|
1066 |
-
)
|
1067 |
-
|
1068 |
-
# 텍스트 입력 시 문자 수 업데이트
|
1069 |
-
conversation_output.change(
|
1070 |
-
fn=update_char_count,
|
1071 |
-
inputs=[conversation_output, chunk_size],
|
1072 |
-
outputs=[char_count]
|
1073 |
-
)
|
1074 |
-
|
1075 |
-
chunk_size.change(
|
1076 |
-
fn=update_char_count,
|
1077 |
-
inputs=[conversation_output, chunk_size],
|
1078 |
-
outputs=[char_count]
|
1079 |
-
)
|
1080 |
-
|
1081 |
-
# 이벤트 연결
|
1082 |
-
def get_article_input(input_type, url_input, pdf_input, keyword_input):
|
1083 |
-
"""Get the appropriate input based on input type"""
|
1084 |
-
if input_type == "URL":
|
1085 |
-
return url_input
|
1086 |
-
elif input_type == "PDF":
|
1087 |
-
return pdf_input
|
1088 |
-
else: # Keyword
|
1089 |
-
return keyword_input
|
1090 |
-
|
1091 |
-
convert_btn.click(
|
1092 |
-
fn=lambda input_type, url_input, pdf_input, keyword_input, mode: synthesize_sync(
|
1093 |
-
get_article_input(input_type, url_input, pdf_input, keyword_input), input_type, mode
|
1094 |
-
),
|
1095 |
-
inputs=[input_type_selector, url_input, pdf_input, keyword_input, mode_selector],
|
1096 |
-
outputs=[conversation_output, status_output]
|
1097 |
-
)
|
1098 |
-
|
1099 |
-
generate_audio_btn.click(
|
1100 |
-
fn=regenerate_audio_sync,
|
1101 |
-
inputs=[conversation_output, ref_audio, exaggeration, temperature, seed_num, cfg_weight, chunk_size],
|
1102 |
-
outputs=[status_output, audio_output]
|
1103 |
-
)
|
1104 |
-
|
1105 |
|
1106 |
-
# Launch the app
|
1107 |
if __name__ == "__main__":
|
1108 |
-
|
1109 |
-
show_api=True,
|
1110 |
-
share=False,
|
1111 |
-
server_name="0.0.0.0",
|
1112 |
-
server_port=7860
|
1113 |
-
)
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import sys
|
3 |
+
import streamlit as st
|
4 |
+
from tempfile import NamedTemporaryFile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
def main():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
try:
|
8 |
+
# Get the code from secrets
|
9 |
+
code = os.environ.get("MAIN_CODE")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
if not code:
|
12 |
+
st.error("⚠️ The application code wasn't found in secrets. Please add the MAIN_CODE secret.")
|
13 |
+
return
|
14 |
|
15 |
+
# Create a temporary Python file
|
16 |
+
with NamedTemporaryFile(suffix='.py', delete=False, mode='w') as tmp:
|
17 |
+
tmp.write(code)
|
18 |
+
tmp_path = tmp.name
|
|
|
19 |
|
20 |
+
# Execute the code
|
21 |
+
exec(compile(code, tmp_path, 'exec'), globals())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
# Clean up the temporary file
|
24 |
try:
|
25 |
+
os.unlink(tmp_path)
|
26 |
+
except:
|
27 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
except Exception as e:
|
30 |
+
st.error(f"⚠️ Error loading or executing the application: {str(e)}")
|
31 |
+
import traceback
|
32 |
+
st.code(traceback.format_exc())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
|
|
34 |
if __name__ == "__main__":
|
35 |
+
main()
|
|
|
|
|
|
|
|
|
|