Spaces:
Running
Running
chatbot add
Browse files- .gitignore +2 -0
- app/agent/custom_chatbot.py +39 -15
- app/utils/whisper_llm.py +16 -19
- requirements.txt +44 -35
- setup-dubsway-env.bat +81 -0
- start-server.bat +8 -0
- worker/daemon.py +26 -10
.gitignore
CHANGED
@@ -9,6 +9,8 @@ env/
|
|
9 |
venv/
|
10 |
myenv/
|
11 |
.myenv/
|
|
|
|
|
12 |
*.env
|
13 |
|
14 |
# Jupyter/IPython
|
|
|
9 |
venv/
|
10 |
myenv/
|
11 |
.myenv/
|
12 |
+
myenv31/
|
13 |
+
.myenv31/
|
14 |
*.env
|
15 |
|
16 |
# Jupyter/IPython
|
app/agent/custom_chatbot.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
import os
|
2 |
import logging
|
3 |
from fastapi import APIRouter, HTTPException
|
4 |
-
from pydantic.v1 import BaseModel
|
5 |
from dotenv import load_dotenv
|
6 |
|
7 |
from langchain_groq import ChatGroq
|
|
|
8 |
from langchain_openai import OpenAIEmbeddings
|
9 |
from langchain_community.vectorstores import FAISS
|
10 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
@@ -15,15 +16,26 @@ from langchain.chains import create_retrieval_chain
|
|
15 |
# Load environment variables
|
16 |
load_dotenv()
|
17 |
|
|
|
18 |
router = APIRouter()
|
19 |
-
|
20 |
logger = logging.getLogger("custom_chatbot")
|
|
|
21 |
|
22 |
-
#
|
23 |
groq_api_key = os.getenv("GROQ_API_KEY")
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
prompt_template = ChatPromptTemplate.from_template("""
|
28 |
Answer the question based only on the provided context.
|
29 |
<context>
|
@@ -33,27 +45,37 @@ Answer the question based only on the provided context.
|
|
33 |
Question: {input}
|
34 |
""")
|
35 |
|
36 |
-
# Input schema
|
37 |
class ChatRequest(BaseModel):
|
38 |
query: str
|
39 |
user_id: int
|
40 |
|
41 |
-
# Load vector store for a
|
42 |
def load_user_vector_store(user_id: int):
|
43 |
user_path = f"vector_store/user_{user_id}"
|
44 |
index_file = os.path.join(user_path, "index.faiss")
|
45 |
|
|
|
46 |
if not os.path.exists(index_file):
|
47 |
-
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
-
#
|
53 |
@router.post("/custom-chatbot")
|
54 |
async def custom_chatbot(request: ChatRequest):
|
55 |
query = request.query
|
56 |
user_id = request.user_id
|
|
|
57 |
|
58 |
try:
|
59 |
vector_store = load_user_vector_store(user_id)
|
@@ -63,15 +85,17 @@ async def custom_chatbot(request: ChatRequest):
|
|
63 |
rag_chain = create_retrieval_chain(retriever, doc_chain)
|
64 |
|
65 |
response = rag_chain.invoke({"input": query})
|
|
|
|
|
66 |
return {
|
67 |
"answer": response["answer"],
|
68 |
-
"sources": [doc.page_content for doc in response
|
69 |
}
|
70 |
|
71 |
except FileNotFoundError as e:
|
72 |
-
logger.warning(f"
|
73 |
raise HTTPException(status_code=404, detail=str(e))
|
74 |
|
75 |
except Exception as e:
|
76 |
-
logger.
|
77 |
raise HTTPException(status_code=500, detail="Internal server error")
|
|
|
1 |
import os
|
2 |
import logging
|
3 |
from fastapi import APIRouter, HTTPException
|
4 |
+
from pydantic.v1 import BaseModel
|
5 |
from dotenv import load_dotenv
|
6 |
|
7 |
from langchain_groq import ChatGroq
|
8 |
+
|
9 |
from langchain_openai import OpenAIEmbeddings
|
10 |
from langchain_community.vectorstores import FAISS
|
11 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
16 |
# Load environment variables
|
17 |
load_dotenv()
|
18 |
|
19 |
+
# Router and logger
|
20 |
router = APIRouter()
|
|
|
21 |
logger = logging.getLogger("custom_chatbot")
|
22 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
23 |
|
24 |
+
# Validate GROQ API Key
|
25 |
groq_api_key = os.getenv("GROQ_API_KEY")
|
26 |
+
if not groq_api_key:
|
27 |
+
logger.error("β GROQ_API_KEY is not set in the environment.")
|
28 |
+
raise RuntimeError("GROQ_API_KEY must be set in .env or environment variables.")
|
29 |
+
|
30 |
+
# LLM Initialization
|
31 |
+
try:
|
32 |
+
llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
|
33 |
+
logger.info("β
ChatGroq LLM initialized successfully.")
|
34 |
+
except Exception as e:
|
35 |
+
logger.exception("β Failed to initialize ChatGroq LLM.")
|
36 |
+
raise
|
37 |
+
|
38 |
+
# Prompt Template
|
39 |
prompt_template = ChatPromptTemplate.from_template("""
|
40 |
Answer the question based only on the provided context.
|
41 |
<context>
|
|
|
45 |
Question: {input}
|
46 |
""")
|
47 |
|
48 |
+
# Input schema
|
49 |
class ChatRequest(BaseModel):
|
50 |
query: str
|
51 |
user_id: int
|
52 |
|
53 |
+
# Load vector store for a given user
|
54 |
def load_user_vector_store(user_id: int):
|
55 |
user_path = f"vector_store/user_{user_id}"
|
56 |
index_file = os.path.join(user_path, "index.faiss")
|
57 |
|
58 |
+
logger.info(f"π Looking for vector store at {index_file}")
|
59 |
if not os.path.exists(index_file):
|
60 |
+
msg = f"No vector store found for user {user_id}"
|
61 |
+
logger.warning(f"π {msg}")
|
62 |
+
raise FileNotFoundError(msg)
|
63 |
|
64 |
+
try:
|
65 |
+
embeddings = OpenAIEmbeddings()
|
66 |
+
vector_store = FAISS.load_local(user_path, embeddings,allow_dangerous_deserialization=True)
|
67 |
+
logger.info(f"π¦ Vector store loaded for user {user_id}")
|
68 |
+
return vector_store
|
69 |
+
except Exception as e:
|
70 |
+
logger.exception(f"β Failed to load vector store for user {user_id}")
|
71 |
+
raise
|
72 |
|
73 |
+
# Main chatbot endpoint
|
74 |
@router.post("/custom-chatbot")
|
75 |
async def custom_chatbot(request: ChatRequest):
|
76 |
query = request.query
|
77 |
user_id = request.user_id
|
78 |
+
logger.info(f"π€ Received query from user {user_id}: {query}")
|
79 |
|
80 |
try:
|
81 |
vector_store = load_user_vector_store(user_id)
|
|
|
85 |
rag_chain = create_retrieval_chain(retriever, doc_chain)
|
86 |
|
87 |
response = rag_chain.invoke({"input": query})
|
88 |
+
logger.info(f"β
Response generated for user {user_id}")
|
89 |
+
|
90 |
return {
|
91 |
"answer": response["answer"],
|
92 |
+
"sources": [doc.page_content for doc in response.get("context", [])],
|
93 |
}
|
94 |
|
95 |
except FileNotFoundError as e:
|
96 |
+
logger.warning(f"π« {e}")
|
97 |
raise HTTPException(status_code=404, detail=str(e))
|
98 |
|
99 |
except Exception as e:
|
100 |
+
logger.exception("β Unexpected error in custom chatbot endpoint.")
|
101 |
raise HTTPException(status_code=500, detail="Internal server error")
|
app/utils/whisper_llm.py
CHANGED
@@ -11,20 +11,19 @@ from langchain_openai import OpenAIEmbeddings
|
|
11 |
from langchain_core.documents import Document
|
12 |
from langchain_community.vectorstores import FAISS
|
13 |
|
14 |
-
from
|
15 |
-
from
|
|
|
16 |
|
17 |
# Setup logger
|
18 |
logger = logging.getLogger("app.utils.whisper_llm")
|
19 |
logger.setLevel(logging.INFO)
|
20 |
-
|
21 |
if not logger.handlers:
|
22 |
handler = logging.StreamHandler()
|
23 |
formatter = logging.Formatter("[%(asctime)s] %(levelname)s - %(message)s")
|
24 |
handler.setFormatter(formatter)
|
25 |
logger.addHandler(handler)
|
26 |
|
27 |
-
|
28 |
# Whisper Model Initialization
|
29 |
def get_whisper_model():
|
30 |
if torch.cuda.is_available():
|
@@ -35,6 +34,7 @@ def get_whisper_model():
|
|
35 |
device = "cpu"
|
36 |
compute_type = "int8"
|
37 |
logger.warning("β οΈ GPU not available: Falling back to CPU with int8 compute")
|
|
|
38 |
try:
|
39 |
model = WhisperModel("base", device=device, compute_type=compute_type)
|
40 |
logger.info(f"π¦ Loaded Faster-Whisper model on {device} with compute_type={compute_type}")
|
@@ -53,7 +53,6 @@ except Exception as e:
|
|
53 |
logger.error(f"β Failed to load summarization pipeline: {e}")
|
54 |
raise
|
55 |
|
56 |
-
|
57 |
# Chunked summarization
|
58 |
def summarize_in_chunks(text, chunk_size=800, overlap=100):
|
59 |
summaries = []
|
@@ -71,25 +70,20 @@ def summarize_in_chunks(text, chunk_size=800, overlap=100):
|
|
71 |
logger.error(f"β Chunk summarization failed: {e}")
|
72 |
return " ".join(summaries)
|
73 |
|
|
|
|
|
|
|
|
|
74 |
|
75 |
-
# π§
|
76 |
-
def
|
77 |
-
|
78 |
-
try:
|
79 |
-
return db.query(User).filter(User.id == user_id).first()
|
80 |
-
finally:
|
81 |
-
db.close()
|
82 |
-
|
83 |
-
|
84 |
-
# β‘ Core Analyzer Function with per-user FAISS ingestion
|
85 |
-
def analyze(video_url: str, user_id: int):
|
86 |
-
# Verify user exists
|
87 |
-
user = get_user(user_id)
|
88 |
if not user:
|
89 |
-
raise ValueError(f"β User with ID {user_id} not found in
|
90 |
|
91 |
logger.info(f"π₯ Starting video analysis for user: {user.email} (ID: {user.id})")
|
92 |
|
|
|
93 |
try:
|
94 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
|
95 |
with requests.get(video_url, stream=True, timeout=60) as response:
|
@@ -102,6 +96,7 @@ def analyze(video_url: str, user_id: int):
|
|
102 |
logger.error(f"β Failed to download video: {e}")
|
103 |
raise
|
104 |
|
|
|
105 |
try:
|
106 |
logger.info("π§ Transcribing audio with Faster-Whisper...")
|
107 |
segments, _ = whisper_model.transcribe(tmp_path)
|
@@ -111,6 +106,7 @@ def analyze(video_url: str, user_id: int):
|
|
111 |
logger.error(f"β Transcription failed: {e}")
|
112 |
raise
|
113 |
|
|
|
114 |
try:
|
115 |
logger.info("π Summarizing transcript with Hugging Face model...")
|
116 |
summary = summarize_in_chunks(text)
|
@@ -119,6 +115,7 @@ def analyze(video_url: str, user_id: int):
|
|
119 |
logger.error(f"β Summarization failed: {e}")
|
120 |
raise
|
121 |
|
|
|
122 |
try:
|
123 |
logger.info("π Creating/updating FAISS vector store for user...")
|
124 |
documents = [Document(page_content=summary)]
|
|
|
11 |
from langchain_core.documents import Document
|
12 |
from langchain_community.vectorstores import FAISS
|
13 |
|
14 |
+
from sqlalchemy import select
|
15 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
16 |
+
from app.models import User
|
17 |
|
18 |
# Setup logger
|
19 |
logger = logging.getLogger("app.utils.whisper_llm")
|
20 |
logger.setLevel(logging.INFO)
|
|
|
21 |
if not logger.handlers:
|
22 |
handler = logging.StreamHandler()
|
23 |
formatter = logging.Formatter("[%(asctime)s] %(levelname)s - %(message)s")
|
24 |
handler.setFormatter(formatter)
|
25 |
logger.addHandler(handler)
|
26 |
|
|
|
27 |
# Whisper Model Initialization
|
28 |
def get_whisper_model():
|
29 |
if torch.cuda.is_available():
|
|
|
34 |
device = "cpu"
|
35 |
compute_type = "int8"
|
36 |
logger.warning("β οΈ GPU not available: Falling back to CPU with int8 compute")
|
37 |
+
|
38 |
try:
|
39 |
model = WhisperModel("base", device=device, compute_type=compute_type)
|
40 |
logger.info(f"π¦ Loaded Faster-Whisper model on {device} with compute_type={compute_type}")
|
|
|
53 |
logger.error(f"β Failed to load summarization pipeline: {e}")
|
54 |
raise
|
55 |
|
|
|
56 |
# Chunked summarization
|
57 |
def summarize_in_chunks(text, chunk_size=800, overlap=100):
|
58 |
summaries = []
|
|
|
70 |
logger.error(f"β Chunk summarization failed: {e}")
|
71 |
return " ".join(summaries)
|
72 |
|
73 |
+
# Async user fetch using AsyncSession
|
74 |
+
async def get_user(user_id: int, db: AsyncSession):
|
75 |
+
result = await db.execute(select(User).where(User.id == user_id))
|
76 |
+
return result.scalar_one_or_none()
|
77 |
|
78 |
+
# π§ Core analyzer function with per-user FAISS ingestion
|
79 |
+
async def analyze(video_url: str, user_id: int, db: AsyncSession):
|
80 |
+
user = await get_user(user_id, db)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
if not user:
|
82 |
+
raise ValueError(f"β User with ID {user_id} not found in database.")
|
83 |
|
84 |
logger.info(f"π₯ Starting video analysis for user: {user.email} (ID: {user.id})")
|
85 |
|
86 |
+
# Step 1: Download video to temp file
|
87 |
try:
|
88 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
|
89 |
with requests.get(video_url, stream=True, timeout=60) as response:
|
|
|
96 |
logger.error(f"β Failed to download video: {e}")
|
97 |
raise
|
98 |
|
99 |
+
# Step 2: Transcribe
|
100 |
try:
|
101 |
logger.info("π§ Transcribing audio with Faster-Whisper...")
|
102 |
segments, _ = whisper_model.transcribe(tmp_path)
|
|
|
106 |
logger.error(f"β Transcription failed: {e}")
|
107 |
raise
|
108 |
|
109 |
+
# Step 3: Summarize
|
110 |
try:
|
111 |
logger.info("π Summarizing transcript with Hugging Face model...")
|
112 |
summary = summarize_in_chunks(text)
|
|
|
115 |
logger.error(f"β Summarization failed: {e}")
|
116 |
raise
|
117 |
|
118 |
+
# Step 4: Save to FAISS store
|
119 |
try:
|
120 |
logger.info("π Creating/updating FAISS vector store for user...")
|
121 |
documents = [Document(page_content=summary)]
|
requirements.txt
CHANGED
@@ -1,47 +1,56 @@
|
|
|
|
1 |
fastapi
|
2 |
uvicorn
|
3 |
boto3
|
4 |
-
python-multipart
|
5 |
-
whisper
|
6 |
-
transformers
|
7 |
-
reportlab
|
8 |
requests
|
9 |
-
supabase
|
10 |
python-dotenv
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
16 |
passlib[bcrypt]
|
17 |
python-jose[cryptography]
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
python-dotenv
|
27 |
-
streamlit
|
28 |
-
langchain_community
|
29 |
langserve
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
faiss-cpu
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
beautifulsoup4
|
38 |
-
|
|
|
|
|
39 |
wikipedia
|
40 |
arxiv
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
1 |
+
# Core
|
2 |
fastapi
|
3 |
uvicorn
|
4 |
boto3
|
|
|
|
|
|
|
|
|
5 |
requests
|
|
|
6 |
python-dotenv
|
7 |
+
python-multipart
|
8 |
+
pydantic[email]>=1.10,<2.0 # V1 for compatibility with many frameworks
|
9 |
+
|
10 |
+
# DB
|
11 |
+
asyncpg
|
12 |
+
sqlalchemy>=2.0
|
13 |
+
databases
|
14 |
+
psycopg2-binary
|
15 |
+
|
16 |
+
# Auth
|
17 |
passlib[bcrypt]
|
18 |
python-jose[cryptography]
|
19 |
+
|
20 |
+
# LLM & RAG
|
21 |
+
langchain==0.1.13
|
22 |
+
langchain-openai==0.1.7
|
23 |
+
langchain-community==0.0.38
|
24 |
+
langchain-core==0.1.53
|
25 |
+
langchain-groq
|
26 |
+
langchainhub
|
|
|
|
|
|
|
27 |
langserve
|
28 |
+
langchain-objectbox
|
29 |
+
|
30 |
+
# Embedding & vector DB
|
31 |
+
sentence-transformers==2.2.2
|
32 |
faiss-cpu
|
33 |
+
chromadb
|
34 |
+
|
35 |
+
# Tools & Transcription
|
36 |
+
transformers
|
37 |
+
whisper
|
38 |
+
faster-whisper==1.0.1
|
39 |
+
ctranslate2>=4.0,<5
|
40 |
+
PyPDF2
|
41 |
+
pypdf
|
42 |
+
reportlab
|
43 |
+
bs4
|
44 |
beautifulsoup4
|
45 |
+
|
46 |
+
# Optional
|
47 |
+
sse-starlette
|
48 |
wikipedia
|
49 |
arxiv
|
50 |
+
cassio
|
51 |
+
streamlit
|
52 |
+
|
53 |
+
# CUDA-enabled Torch (installed separately)
|
54 |
+
# torch==2.2.2+cu121 and torchvision==0.17.2+cu121
|
55 |
+
# Must be installed via pip with specific index:
|
56 |
+
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
setup-dubsway-env.bat
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@echo off
|
2 |
+
setlocal enabledelayedexpansion
|
3 |
+
|
4 |
+
REM ---- CONFIGURATION ----
|
5 |
+
set ENV_NAME=myenv
|
6 |
+
set PYTHON_EXE=python REM Ensure python points to 3.10 or 3.11 in PATH
|
7 |
+
set TORCH_INDEX=https://download.pytorch.org/whl/cu121
|
8 |
+
|
9 |
+
echo.
|
10 |
+
echo π Creating virtual environment: %ENV_NAME%
|
11 |
+
%PYTHON_EXE% -m venv %ENV_NAME%
|
12 |
+
|
13 |
+
echo.
|
14 |
+
echo π Activating environment...
|
15 |
+
call %ENV_NAME%\Scripts\activate.bat
|
16 |
+
|
17 |
+
echo.
|
18 |
+
echo π¦ Upgrading pip
|
19 |
+
pip install --upgrade pip
|
20 |
+
|
21 |
+
echo.
|
22 |
+
echo π Writing requirements.txt
|
23 |
+
(
|
24 |
+
echo fastapi
|
25 |
+
echo uvicorn
|
26 |
+
echo boto3
|
27 |
+
echo requests
|
28 |
+
echo python-dotenv
|
29 |
+
echo python-multipart
|
30 |
+
echo pydantic[email]>=1.10,<2.0
|
31 |
+
echo asyncpg
|
32 |
+
echo sqlalchemy>=2.0
|
33 |
+
echo databases
|
34 |
+
echo psycopg2-binary
|
35 |
+
echo passlib[bcrypt]
|
36 |
+
echo python-jose[cryptography]
|
37 |
+
echo langchain==0.1.13
|
38 |
+
echo langchain-openai==0.1.7
|
39 |
+
echo langchain-community==0.0.38
|
40 |
+
echo langchain-core==0.1.53
|
41 |
+
echo langchain-groq==0.0.3
|
42 |
+
echo langchainhub
|
43 |
+
echo langserve
|
44 |
+
echo langchain-objectbox
|
45 |
+
echo sentence-transformers==2.2.2
|
46 |
+
echo faiss-cpu
|
47 |
+
echo chromadb
|
48 |
+
echo transformers
|
49 |
+
echo whisper
|
50 |
+
echo faster-whisper==1.0.1
|
51 |
+
echo ctranslate2==3.22.0
|
52 |
+
echo PyPDF2
|
53 |
+
echo pypdf
|
54 |
+
echo reportlab
|
55 |
+
echo bs4
|
56 |
+
echo beautifulsoup4
|
57 |
+
echo sse-starlette
|
58 |
+
echo wikipedia
|
59 |
+
echo arxiv
|
60 |
+
echo cassio
|
61 |
+
echo streamlit
|
62 |
+
) > requirements.txt
|
63 |
+
|
64 |
+
echo.
|
65 |
+
echo π₯ Installing base packages...
|
66 |
+
pip install -r requirements.txt
|
67 |
+
|
68 |
+
echo.
|
69 |
+
echo β‘ Installing PyTorch (CUDA 12.1 build)...
|
70 |
+
pip install torch torchvision torchaudio --index-url %TORCH_INDEX%
|
71 |
+
|
72 |
+
echo.
|
73 |
+
echo β
Verifying important packages...
|
74 |
+
python -c "import torch; print('Torch version:', torch.__version__)"
|
75 |
+
python -c "from langchain_groq import ChatGroq; print('β
langchain_groq available')"
|
76 |
+
python -c "from faster_whisper import WhisperModel; print('β
faster-whisper ready')"
|
77 |
+
python -c "import faiss; print('β
faiss-cpu available')"
|
78 |
+
|
79 |
+
echo.
|
80 |
+
echo π Setup complete! Environment '%ENV_NAME%' is ready to use.
|
81 |
+
endlocal
|
start-server.bat
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@echo off
|
2 |
+
echo π Activating virtual environment...
|
3 |
+
call .\myenv\Scripts\activate
|
4 |
+
|
5 |
+
echo π Starting FastAPI server...
|
6 |
+
python -m uvicorn app.main:app --reload
|
7 |
+
|
8 |
+
pause
|
worker/daemon.py
CHANGED
@@ -2,6 +2,7 @@ import asyncio
|
|
2 |
import os
|
3 |
import time
|
4 |
from datetime import datetime
|
|
|
5 |
|
6 |
from sqlalchemy.future import select
|
7 |
from sqlalchemy.ext.asyncio import AsyncSession
|
@@ -10,7 +11,6 @@ from app.database import AsyncSessionLocal
|
|
10 |
from app.models import VideoUpload
|
11 |
from app.utils import whisper_llm, pdf, s3
|
12 |
|
13 |
-
|
14 |
POLL_INTERVAL = 200 # seconds
|
15 |
|
16 |
|
@@ -26,33 +26,48 @@ async def process_pending_videos():
|
|
26 |
print(f"π¬ Processing video ID {video.id} for user {video.user_id}")
|
27 |
|
28 |
try:
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
except Exception as e:
|
31 |
-
print(f"β Whisper failed: {e}")
|
|
|
32 |
continue
|
33 |
|
34 |
try:
|
35 |
pdf_bytes = pdf.generate(transcription, summary)
|
36 |
except Exception as e:
|
37 |
-
print(f"β PDF generation failed: {e}")
|
|
|
38 |
continue
|
39 |
|
40 |
try:
|
41 |
pdf_key = f"pdfs/{video.id}.pdf"
|
42 |
pdf_url = s3.upload_pdf_bytes(pdf_bytes, pdf_key)
|
43 |
except Exception as e:
|
44 |
-
print(f"β Upload to S3 failed: {e}")
|
|
|
45 |
continue
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
50 |
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
53 |
|
54 |
except Exception as e:
|
55 |
print(f"β DB error: {e}")
|
|
|
56 |
|
57 |
|
58 |
async def run_worker():
|
@@ -63,6 +78,7 @@ async def run_worker():
|
|
63 |
await process_pending_videos()
|
64 |
except Exception as e:
|
65 |
print(f"β Worker loop crashed: {e}")
|
|
|
66 |
await asyncio.sleep(POLL_INTERVAL)
|
67 |
|
68 |
|
|
|
2 |
import os
|
3 |
import time
|
4 |
from datetime import datetime
|
5 |
+
import traceback
|
6 |
|
7 |
from sqlalchemy.future import select
|
8 |
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
11 |
from app.models import VideoUpload
|
12 |
from app.utils import whisper_llm, pdf, s3
|
13 |
|
|
|
14 |
POLL_INTERVAL = 200 # seconds
|
15 |
|
16 |
|
|
|
26 |
print(f"π¬ Processing video ID {video.id} for user {video.user_id}")
|
27 |
|
28 |
try:
|
29 |
+
# β
New:
|
30 |
+
transcription, summary = await whisper_llm.analyze(
|
31 |
+
video_url=video.video_url,
|
32 |
+
user_id=video.user_id,
|
33 |
+
db=session # passing the active AsyncSession
|
34 |
+
)
|
35 |
+
|
36 |
except Exception as e:
|
37 |
+
print(f"β Whisper failed for video {video.id}: {e}")
|
38 |
+
traceback.print_exc()
|
39 |
continue
|
40 |
|
41 |
try:
|
42 |
pdf_bytes = pdf.generate(transcription, summary)
|
43 |
except Exception as e:
|
44 |
+
print(f"β PDF generation failed for video {video.id}: {e}")
|
45 |
+
traceback.print_exc()
|
46 |
continue
|
47 |
|
48 |
try:
|
49 |
pdf_key = f"pdfs/{video.id}.pdf"
|
50 |
pdf_url = s3.upload_pdf_bytes(pdf_bytes, pdf_key)
|
51 |
except Exception as e:
|
52 |
+
print(f"β Upload to S3 failed for video {video.id}: {e}")
|
53 |
+
traceback.print_exc()
|
54 |
continue
|
55 |
|
56 |
+
try:
|
57 |
+
video.status = "completed"
|
58 |
+
video.pdf_url = pdf_url
|
59 |
+
video.updated_at = datetime.utcnow()
|
60 |
|
61 |
+
await session.commit()
|
62 |
+
print(f"β
Completed video {video.id}")
|
63 |
+
|
64 |
+
except Exception as e:
|
65 |
+
print(f"β DB commit failed for video {video.id}: {e}")
|
66 |
+
traceback.print_exc()
|
67 |
|
68 |
except Exception as e:
|
69 |
print(f"β DB error: {e}")
|
70 |
+
traceback.print_exc()
|
71 |
|
72 |
|
73 |
async def run_worker():
|
|
|
78 |
await process_pending_videos()
|
79 |
except Exception as e:
|
80 |
print(f"β Worker loop crashed: {e}")
|
81 |
+
traceback.print_exc()
|
82 |
await asyncio.sleep(POLL_INTERVAL)
|
83 |
|
84 |
|