Spaces:
Building
Building
app clean
Browse files- .gitignore +4 -0
- app/utils/online_vector_store.py +181 -0
- app/utils/whisper_llm.py +1 -1
- clean-for-deployment.bat +0 -69
- clean-repo-history.bat +0 -56
- deploy-to-hf.bat +0 -82
- fix-pydantic-deploy.bat +0 -37
- fix_agentic_errors.bat +0 -28
- fresh-deploy.bat +0 -78
- push-to-hf.bat +0 -49
- run_agentic.bat +0 -43
- run_lightweight_agentic.bat +0 -44
- setup_agentic_system.bat +0 -63
- test_agentic_system.py +0 -180
- worker/daemon.py +0 -210
.gitignore
CHANGED
@@ -5,6 +5,10 @@ __pycache__/
|
|
5 |
*.pyd
|
6 |
|
7 |
# Virtual environments
|
|
|
|
|
|
|
|
|
8 |
env/
|
9 |
venv/
|
10 |
myenv/
|
|
|
5 |
*.pyd
|
6 |
|
7 |
# Virtual environments
|
8 |
+
aienv/
|
9 |
+
aienv/
|
10 |
+
.aienv/
|
11 |
+
.aienv/
|
12 |
env/
|
13 |
venv/
|
14 |
myenv/
|
app/utils/online_vector_store.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from typing import List, Optional
|
4 |
+
from langchain_core.documents import Document
|
5 |
+
from langchain_openai import OpenAIEmbeddings
|
6 |
+
from langchain_community.vectorstores import Pinecone
|
7 |
+
from pinecone import Pinecone, ServerlessSpec
|
8 |
+
|
9 |
+
# Setup logger
|
10 |
+
logger = logging.getLogger("app.utils.online_vector_store")
|
11 |
+
logger.setLevel(logging.INFO)
|
12 |
+
if not logger.handlers:
|
13 |
+
handler = logging.StreamHandler()
|
14 |
+
formatter = logging.Formatter("[%(asctime)s] %(levelname)s - %(message)s")
|
15 |
+
handler.setFormatter(formatter)
|
16 |
+
logger.addHandler(handler)
|
17 |
+
|
18 |
+
class OnlineVectorStore:
|
19 |
+
def __init__(self):
|
20 |
+
self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
|
21 |
+
self.pinecone_environment = os.getenv("PINECONE_ENVIRONMENT", "gcp-starter")
|
22 |
+
self.index_name = "dubsway-video-ai"
|
23 |
+
|
24 |
+
if not self.pinecone_api_key:
|
25 |
+
logger.warning("PINECONE_API_KEY not found. Using fallback local storage.")
|
26 |
+
self.use_pinecone = False
|
27 |
+
else:
|
28 |
+
self.use_pinecone = True
|
29 |
+
self._initialize_pinecone()
|
30 |
+
|
31 |
+
def _initialize_pinecone(self):
|
32 |
+
"""Initialize Pinecone client and create index if needed."""
|
33 |
+
try:
|
34 |
+
pc = Pinecone(api_key=self.pinecone_api_key)
|
35 |
+
|
36 |
+
# Check if index exists
|
37 |
+
if self.index_name not in pc.list_indexes().names():
|
38 |
+
logger.info(f"Creating Pinecone index: {self.index_name}")
|
39 |
+
pc.create_index(
|
40 |
+
name=self.index_name,
|
41 |
+
dimension=1536, # OpenAI embeddings dimension
|
42 |
+
metric="cosine",
|
43 |
+
spec=ServerlessSpec(
|
44 |
+
cloud="aws",
|
45 |
+
region="us-east-1"
|
46 |
+
)
|
47 |
+
)
|
48 |
+
logger.info(f"Pinecone index {self.index_name} created successfully")
|
49 |
+
else:
|
50 |
+
logger.info(f"Using existing Pinecone index: {self.index_name}")
|
51 |
+
|
52 |
+
except Exception as e:
|
53 |
+
logger.error(f"Failed to initialize Pinecone: {e}")
|
54 |
+
self.use_pinecone = False
|
55 |
+
|
56 |
+
def add_documents(self, documents: List[Document], user_id: int) -> bool:
|
57 |
+
"""Add documents to the vector store."""
|
58 |
+
try:
|
59 |
+
if not documents:
|
60 |
+
logger.warning("No documents to add")
|
61 |
+
return False
|
62 |
+
|
63 |
+
# Add user_id metadata to each document
|
64 |
+
for doc in documents:
|
65 |
+
if not hasattr(doc, 'metadata'):
|
66 |
+
doc.metadata = {}
|
67 |
+
doc.metadata['user_id'] = user_id
|
68 |
+
doc.metadata['source'] = 'video_analysis'
|
69 |
+
|
70 |
+
if self.use_pinecone:
|
71 |
+
return self._add_to_pinecone(documents, user_id)
|
72 |
+
else:
|
73 |
+
logger.warning("Pinecone not available, skipping vector storage")
|
74 |
+
return False
|
75 |
+
|
76 |
+
except Exception as e:
|
77 |
+
logger.error(f"Failed to add documents to vector store: {e}")
|
78 |
+
return False
|
79 |
+
|
80 |
+
def _add_to_pinecone(self, documents: List[Document], user_id: int) -> bool:
|
81 |
+
"""Add documents to Pinecone."""
|
82 |
+
try:
|
83 |
+
embeddings = OpenAIEmbeddings()
|
84 |
+
|
85 |
+
# Create Pinecone vector store
|
86 |
+
vector_store = Pinecone.from_documents(
|
87 |
+
documents=documents,
|
88 |
+
embedding=embeddings,
|
89 |
+
index_name=self.index_name,
|
90 |
+
namespace=f"user_{user_id}"
|
91 |
+
)
|
92 |
+
|
93 |
+
logger.info(f"Successfully added {len(documents)} documents to Pinecone for user {user_id}")
|
94 |
+
return True
|
95 |
+
|
96 |
+
except Exception as e:
|
97 |
+
logger.error(f"Failed to add documents to Pinecone: {e}")
|
98 |
+
return False
|
99 |
+
|
100 |
+
def search(self, query: str, user_id: int, k: int = 5) -> List[Document]:
|
101 |
+
"""Search for similar documents."""
|
102 |
+
try:
|
103 |
+
if not self.use_pinecone:
|
104 |
+
logger.warning("Pinecone not available, returning empty results")
|
105 |
+
return []
|
106 |
+
|
107 |
+
embeddings = OpenAIEmbeddings()
|
108 |
+
|
109 |
+
# Create Pinecone vector store for searching
|
110 |
+
vector_store = PineconeVectorStore.from_existing_index(
|
111 |
+
index_name=self.index_name,
|
112 |
+
embedding=embeddings,
|
113 |
+
namespace=f"user_{user_id}"
|
114 |
+
)
|
115 |
+
|
116 |
+
# Search for similar documents
|
117 |
+
results = vector_store.similarity_search(
|
118 |
+
query=query,
|
119 |
+
k=k,
|
120 |
+
filter={"user_id": user_id}
|
121 |
+
)
|
122 |
+
|
123 |
+
logger.info(f"Found {len(results)} similar documents for user {user_id}")
|
124 |
+
return results
|
125 |
+
|
126 |
+
except Exception as e:
|
127 |
+
logger.error(f"Failed to search vector store: {e}")
|
128 |
+
return []
|
129 |
+
|
130 |
+
def get_user_documents(self, user_id: int, limit: int = 50) -> List[Document]:
|
131 |
+
"""Get all documents for a specific user."""
|
132 |
+
try:
|
133 |
+
if not self.use_pinecone:
|
134 |
+
logger.warning("Pinecone not available, returning empty results")
|
135 |
+
return []
|
136 |
+
|
137 |
+
embeddings = OpenAIEmbeddings()
|
138 |
+
|
139 |
+
# Create Pinecone vector store for searching
|
140 |
+
vector_store = PineconeVectorStore.from_existing_index(
|
141 |
+
index_name=self.index_name,
|
142 |
+
embedding=embeddings,
|
143 |
+
namespace=f"user_{user_id}"
|
144 |
+
)
|
145 |
+
|
146 |
+
# Get all documents for the user
|
147 |
+
results = vector_store.similarity_search(
|
148 |
+
query="", # Empty query to get all documents
|
149 |
+
k=limit,
|
150 |
+
filter={"user_id": user_id}
|
151 |
+
)
|
152 |
+
|
153 |
+
logger.info(f"Retrieved {len(results)} documents for user {user_id}")
|
154 |
+
return results
|
155 |
+
|
156 |
+
except Exception as e:
|
157 |
+
logger.error(f"Failed to get user documents: {e}")
|
158 |
+
return []
|
159 |
+
|
160 |
+
def delete_user_documents(self, user_id: int) -> bool:
|
161 |
+
"""Delete all documents for a specific user."""
|
162 |
+
try:
|
163 |
+
if not self.use_pinecone:
|
164 |
+
logger.warning("Pinecone not available, skipping deletion")
|
165 |
+
return False
|
166 |
+
|
167 |
+
pc = Pinecone(api_key=self.pinecone_api_key)
|
168 |
+
index = pc.Index(self.index_name)
|
169 |
+
|
170 |
+
# Delete all vectors in the user's namespace
|
171 |
+
index.delete(namespace=f"user_{user_id}")
|
172 |
+
|
173 |
+
logger.info(f"Successfully deleted all documents for user {user_id}")
|
174 |
+
return True
|
175 |
+
|
176 |
+
except Exception as e:
|
177 |
+
logger.error(f"Failed to delete user documents: {e}")
|
178 |
+
return False
|
179 |
+
|
180 |
+
# Global instance
|
181 |
+
vector_store = OnlineVectorStore()
|
app/utils/whisper_llm.py
CHANGED
@@ -114,7 +114,7 @@ def summarize_in_chunks(text, chunk_size=1024, overlap=200):
|
|
114 |
)
|
115 |
return final_result[0]['summary_text']
|
116 |
except Exception as e:
|
117 |
-
logger.error(f"Final
|
118 |
return combined_summary[:1500] + "..." if len(combined_summary) > 1500 else combined_summary
|
119 |
|
120 |
return combined_summary
|
|
|
114 |
)
|
115 |
return final_result[0]['summary_text']
|
116 |
except Exception as e:
|
117 |
+
logger.error(f"Final sum marization failed: {e}")
|
118 |
return combined_summary[:1500] + "..." if len(combined_summary) > 1500 else combined_summary
|
119 |
|
120 |
return combined_summary
|
clean-for-deployment.bat
DELETED
@@ -1,69 +0,0 @@
|
|
1 |
-
@echo off
|
2 |
-
echo 🧹 Cleaning repository for Hugging Face deployment...
|
3 |
-
|
4 |
-
echo.
|
5 |
-
echo 📋 Removing binary files and vector stores...
|
6 |
-
|
7 |
-
REM Remove vector store directory
|
8 |
-
if exist "vector_store" (
|
9 |
-
echo Removing vector_store directory...
|
10 |
-
rmdir /s /q "vector_store"
|
11 |
-
echo ✅ vector_store removed
|
12 |
-
) else (
|
13 |
-
echo ℹ️ vector_store directory not found
|
14 |
-
)
|
15 |
-
|
16 |
-
REM Remove database files
|
17 |
-
for %%f in (*.db *.sqlite *.sqlite3) do (
|
18 |
-
if exist "%%f" (
|
19 |
-
echo Removing %%f...
|
20 |
-
del "%%f"
|
21 |
-
echo ✅ %%f removed
|
22 |
-
)
|
23 |
-
)
|
24 |
-
|
25 |
-
REM Remove FAISS files
|
26 |
-
for %%f in (*.faiss *.index *.bin) do (
|
27 |
-
if exist "%%f" (
|
28 |
-
echo Removing %%f...
|
29 |
-
del "%%f"
|
30 |
-
echo ✅ %%f removed
|
31 |
-
)
|
32 |
-
)
|
33 |
-
|
34 |
-
REM Remove log files
|
35 |
-
for %%f in (*.log) do (
|
36 |
-
if exist "%%f" (
|
37 |
-
echo Removing %%f...
|
38 |
-
del "%%f"
|
39 |
-
echo ✅ %%f removed
|
40 |
-
)
|
41 |
-
)
|
42 |
-
|
43 |
-
echo.
|
44 |
-
echo 🔄 Updating git...
|
45 |
-
|
46 |
-
REM Remove tracked files that should be ignored
|
47 |
-
git rm -r --cached vector_store/ 2>nul
|
48 |
-
git rm --cached *.db 2>nul
|
49 |
-
git rm --cached *.sqlite 2>nul
|
50 |
-
git rm --cached *.sqlite3 2>nul
|
51 |
-
git rm --cached *.faiss 2>nul
|
52 |
-
git rm --cached *.log 2>nul
|
53 |
-
|
54 |
-
echo.
|
55 |
-
echo 📝 Committing changes...
|
56 |
-
git add .
|
57 |
-
git commit -m "Clean repository for Hugging Face deployment - remove binary files"
|
58 |
-
|
59 |
-
echo.
|
60 |
-
echo ✅ Repository cleaned! You can now push to Hugging Face:
|
61 |
-
echo.
|
62 |
-
echo git push space develop
|
63 |
-
echo.
|
64 |
-
echo Or create a new branch:
|
65 |
-
echo git checkout -b main
|
66 |
-
echo git push space main
|
67 |
-
echo.
|
68 |
-
|
69 |
-
pause
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
clean-repo-history.bat
DELETED
@@ -1,56 +0,0 @@
|
|
1 |
-
@echo off
|
2 |
-
echo 🧹 Deep cleaning repository history...
|
3 |
-
|
4 |
-
echo.
|
5 |
-
echo ⚠️ WARNING: This will rewrite git history!
|
6 |
-
echo This will remove all binary files from the entire git history.
|
7 |
-
echo.
|
8 |
-
set /p confirm="Are you sure you want to continue? (y/N): "
|
9 |
-
|
10 |
-
if /i not "%confirm%"=="y" (
|
11 |
-
echo Cancelled.
|
12 |
-
pause
|
13 |
-
exit /b 0
|
14 |
-
)
|
15 |
-
|
16 |
-
echo.
|
17 |
-
echo 🗑️ Removing binary files from git history...
|
18 |
-
|
19 |
-
REM Remove vector store directory from entire history
|
20 |
-
git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch vector_store/" --prune-empty --tag-name-filter cat -- --all
|
21 |
-
|
22 |
-
REM Remove database files from entire history
|
23 |
-
git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.db" --prune-empty --tag-name-filter cat -- --all
|
24 |
-
git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.sqlite" --prune-empty --tag-name-filter cat -- --all
|
25 |
-
git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.sqlite3" --prune-empty --tag-name-filter cat -- --all
|
26 |
-
|
27 |
-
REM Remove FAISS files from entire history
|
28 |
-
git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.faiss" --prune-empty --tag-name-filter cat -- --all
|
29 |
-
git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.index" --prune-empty --tag-name-filter cat -- --all
|
30 |
-
|
31 |
-
REM Remove log files from entire history
|
32 |
-
git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.log" --prune-empty --tag-name-filter cat -- --all
|
33 |
-
|
34 |
-
echo.
|
35 |
-
echo 🧹 Cleaning up...
|
36 |
-
git for-each-ref --format="delete %(refname)" refs/original | git update-ref --stdin
|
37 |
-
git reflog expire --expire=now --all
|
38 |
-
git gc --prune=now --aggressive
|
39 |
-
|
40 |
-
echo.
|
41 |
-
echo ✅ Repository history cleaned!
|
42 |
-
echo.
|
43 |
-
echo 📝 Now commit the current state...
|
44 |
-
git add .
|
45 |
-
git commit -m "Clean repository for Hugging Face deployment"
|
46 |
-
|
47 |
-
echo.
|
48 |
-
echo 🚀 Ready to push! Run:
|
49 |
-
echo git push --force space develop
|
50 |
-
echo.
|
51 |
-
echo Or create a new branch:
|
52 |
-
echo git checkout -b main
|
53 |
-
echo git push space main
|
54 |
-
echo.
|
55 |
-
|
56 |
-
pause
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
deploy-to-hf.bat
DELETED
@@ -1,82 +0,0 @@
|
|
1 |
-
@echo off
|
2 |
-
echo 🚀 Preparing Dubsway Video AI for Hugging Face Deployment...
|
3 |
-
|
4 |
-
echo.
|
5 |
-
echo 📋 Checking prerequisites...
|
6 |
-
|
7 |
-
REM Check if git is available
|
8 |
-
git --version >nul 2>&1
|
9 |
-
if errorlevel 1 (
|
10 |
-
echo ❌ Git is not installed or not in PATH
|
11 |
-
pause
|
12 |
-
exit /b 1
|
13 |
-
)
|
14 |
-
|
15 |
-
REM Check if we're in a git repository
|
16 |
-
git status >nul 2>&1
|
17 |
-
if errorlevel 1 (
|
18 |
-
echo ❌ Not in a git repository. Please run this from your project root.
|
19 |
-
pause
|
20 |
-
exit /b 1
|
21 |
-
)
|
22 |
-
|
23 |
-
echo ✅ Git repository found
|
24 |
-
|
25 |
-
REM Check for required files
|
26 |
-
if not exist ".huggingface.yaml" (
|
27 |
-
echo ❌ .huggingface.yaml not found
|
28 |
-
pause
|
29 |
-
exit /b 1
|
30 |
-
)
|
31 |
-
|
32 |
-
if not exist "Dockerfile" (
|
33 |
-
echo ❌ Dockerfile not found
|
34 |
-
pause
|
35 |
-
exit /b 1
|
36 |
-
)
|
37 |
-
|
38 |
-
if not exist "requirements-hf.txt" (
|
39 |
-
echo ❌ requirements-hf.txt not found
|
40 |
-
pause
|
41 |
-
exit /b 1
|
42 |
-
)
|
43 |
-
|
44 |
-
echo ✅ All required files found
|
45 |
-
|
46 |
-
echo.
|
47 |
-
echo 🔄 Checking git status...
|
48 |
-
git status --porcelain
|
49 |
-
|
50 |
-
echo.
|
51 |
-
echo 📝 Current branch:
|
52 |
-
git branch --show-current
|
53 |
-
|
54 |
-
echo.
|
55 |
-
echo 🚀 Ready to deploy! Follow these steps:
|
56 |
-
echo.
|
57 |
-
echo 1. Push your changes to GitHub:
|
58 |
-
echo git add .
|
59 |
-
echo git commit -m "Deploy to Hugging Face Spaces"
|
60 |
-
echo git push origin main
|
61 |
-
echo.
|
62 |
-
echo 2. Go to https://huggingface.co/spaces
|
63 |
-
echo.
|
64 |
-
echo 3. Create a new Space with these settings:
|
65 |
-
echo - SDK: Docker
|
66 |
-
echo - License: MIT
|
67 |
-
echo - Connect to your GitHub repository
|
68 |
-
echo.
|
69 |
-
echo 4. Set environment variables in your Space settings:
|
70 |
-
echo - GROQ_API_KEY
|
71 |
-
echo - DATABASE_URL=sqlite+aiosqlite:///./dubsway_hf.db
|
72 |
-
echo - SECRET_KEY
|
73 |
-
echo - AWS_ACCESS_KEY_ID (if using S3)
|
74 |
-
echo - AWS_SECRET_ACCESS_KEY (if using S3)
|
75 |
-
echo - S3_BUCKET_NAME (if using S3)
|
76 |
-
echo.
|
77 |
-
echo 5. Monitor deployment in your Space
|
78 |
-
echo.
|
79 |
-
echo 📖 See HUGGINGFACE_DEPLOYMENT.md for detailed instructions
|
80 |
-
echo.
|
81 |
-
|
82 |
-
pause
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fix-pydantic-deploy.bat
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
@echo off
|
2 |
-
echo 🔧 Fixing Pydantic imports for Hugging Face deployment...
|
3 |
-
|
4 |
-
echo.
|
5 |
-
echo 📝 Updating Pydantic imports...
|
6 |
-
|
7 |
-
REM Fix auth.py
|
8 |
-
echo Fixing app/auth.py...
|
9 |
-
powershell -Command "(Get-Content 'app/auth.py') -replace 'from pydantic\.v1 import', 'from pydantic import' | Set-Content 'app/auth.py'"
|
10 |
-
|
11 |
-
REM Fix custom_chatbot.py
|
12 |
-
echo Fixing app/agent/custom_chatbot.py...
|
13 |
-
powershell -Command "(Get-Content 'app/agent/custom_chatbot.py') -replace 'from pydantic\.v1 import', 'from pydantic import' | Set-Content 'app/agent/custom_chatbot.py'"
|
14 |
-
|
15 |
-
REM Fix pdf_ingestion.py
|
16 |
-
echo Fixing app/pdf_ingestion.py...
|
17 |
-
powershell -Command "(Get-Content 'app/pdf_ingestion.py') -replace 'from pydantic\.v1 import', 'from pydantic import' | Set-Content 'app/pdf_ingestion.py'"
|
18 |
-
|
19 |
-
echo.
|
20 |
-
echo ✅ Pydantic imports fixed!
|
21 |
-
|
22 |
-
echo.
|
23 |
-
echo 📝 Committing changes...
|
24 |
-
git add .
|
25 |
-
git commit -m "Fix Pydantic imports for Hugging Face deployment"
|
26 |
-
|
27 |
-
echo.
|
28 |
-
echo 🚀 Pushing to Hugging Face...
|
29 |
-
git push space develop
|
30 |
-
|
31 |
-
echo.
|
32 |
-
echo ✅ Deployment completed!
|
33 |
-
echo.
|
34 |
-
echo 📍 Your Space URL: https://huggingface.co/spaces/peace2024/DubswayAgenticAI
|
35 |
-
echo.
|
36 |
-
|
37 |
-
pause
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fix_agentic_errors.bat
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
@echo off
|
2 |
-
echo ========================================
|
3 |
-
echo Fixing Agentic System Errors
|
4 |
-
echo ========================================
|
5 |
-
echo.
|
6 |
-
|
7 |
-
REM Activate virtual environment
|
8 |
-
echo Activating virtual environment...
|
9 |
-
call myenv31\Scripts\activate.bat
|
10 |
-
|
11 |
-
REM Install missing dependencies
|
12 |
-
echo Installing missing dependencies...
|
13 |
-
pip install timm
|
14 |
-
|
15 |
-
echo.
|
16 |
-
echo ========================================
|
17 |
-
echo Errors Fixed!
|
18 |
-
echo ========================================
|
19 |
-
echo.
|
20 |
-
echo The following issues have been resolved:
|
21 |
-
echo ✅ Missing timm library - INSTALLED
|
22 |
-
echo ✅ PDF generation function - FIXED
|
23 |
-
echo ✅ Enhanced analysis should now work properly
|
24 |
-
echo.
|
25 |
-
echo You can now run the agentic system:
|
26 |
-
echo run_agentic.bat
|
27 |
-
echo.
|
28 |
-
pause
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fresh-deploy.bat
DELETED
@@ -1,78 +0,0 @@
|
|
1 |
-
@echo off
|
2 |
-
echo 🚀 Creating fresh repository for Hugging Face deployment...
|
3 |
-
|
4 |
-
echo.
|
5 |
-
echo 📋 Creating backup of current files...
|
6 |
-
if not exist "backup" mkdir backup
|
7 |
-
xcopy /E /I /Y "app" "backup\app"
|
8 |
-
xcopy /E /I /Y "worker" "backup\worker"
|
9 |
-
copy "requirements-hf.txt" "backup\"
|
10 |
-
copy "Dockerfile" "backup\"
|
11 |
-
copy ".huggingface.yaml" "backup\"
|
12 |
-
copy "env.example" "backup\"
|
13 |
-
copy "*.md" "backup\"
|
14 |
-
copy ".gitignore" "backup\"
|
15 |
-
|
16 |
-
echo.
|
17 |
-
echo 🧹 Creating clean deployment directory...
|
18 |
-
if exist "deploy" rmdir /s /q "deploy"
|
19 |
-
mkdir deploy
|
20 |
-
cd deploy
|
21 |
-
|
22 |
-
echo.
|
23 |
-
echo 📝 Initializing new git repository...
|
24 |
-
git init
|
25 |
-
|
26 |
-
echo.
|
27 |
-
echo 📋 Copying clean files...
|
28 |
-
xcopy /E /I /Y "..\backup\app" "app\"
|
29 |
-
xcopy /E /I /Y "..\backup\worker" "worker\"
|
30 |
-
copy "..\backup\requirements-hf.txt" "."
|
31 |
-
copy "..\backup\Dockerfile" "."
|
32 |
-
copy "..\backup\.huggingface.yaml" "."
|
33 |
-
copy "..\backup\env.example" "."
|
34 |
-
copy "..\backup\.gitignore" "."
|
35 |
-
copy "..\backup\*.md" "."
|
36 |
-
|
37 |
-
echo.
|
38 |
-
echo 🔗 Adding Hugging Face remote...
|
39 |
-
git remote add space https://huggingface.co/spaces/peace2024/DubswayAgenticAI
|
40 |
-
|
41 |
-
echo.
|
42 |
-
echo 📝 Committing clean repository...
|
43 |
-
git add .
|
44 |
-
git commit -m "Initial clean deployment for Hugging Face Spaces"
|
45 |
-
|
46 |
-
echo.
|
47 |
-
echo 🚀 Pushing to Hugging Face...
|
48 |
-
echo Choose your branch:
|
49 |
-
echo 1. Push to develop branch
|
50 |
-
echo 2. Push to main branch
|
51 |
-
echo 3. Cancel
|
52 |
-
echo.
|
53 |
-
set /p choice="Enter choice (1-3): "
|
54 |
-
|
55 |
-
if "%choice%"=="1" (
|
56 |
-
echo Pushing to develop branch...
|
57 |
-
git push space develop
|
58 |
-
) else if "%choice%"=="2" (
|
59 |
-
echo Pushing to main branch...
|
60 |
-
git push space main
|
61 |
-
) else (
|
62 |
-
echo Cancelled.
|
63 |
-
cd ..
|
64 |
-
pause
|
65 |
-
exit /b 0
|
66 |
-
)
|
67 |
-
|
68 |
-
echo.
|
69 |
-
echo ✅ Fresh deployment completed!
|
70 |
-
echo.
|
71 |
-
echo 📍 Your Space URL will be:
|
72 |
-
echo https://huggingface.co/spaces/peace2024/DubswayAgenticAI
|
73 |
-
echo.
|
74 |
-
echo 🔍 Monitor the build logs in your Space settings.
|
75 |
-
echo.
|
76 |
-
|
77 |
-
cd ..
|
78 |
-
pause
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
push-to-hf.bat
DELETED
@@ -1,49 +0,0 @@
|
|
1 |
-
@echo off
|
2 |
-
echo 🚀 Pushing to Hugging Face Spaces...
|
3 |
-
|
4 |
-
echo.
|
5 |
-
echo 🧹 Cleaning binary files...
|
6 |
-
|
7 |
-
REM Remove vector store from git tracking
|
8 |
-
git rm -r --cached vector_store/ 2>nul
|
9 |
-
git rm --cached *.db 2>nul
|
10 |
-
git rm --cached *.log 2>nul
|
11 |
-
|
12 |
-
echo.
|
13 |
-
echo 📝 Committing changes...
|
14 |
-
git add .
|
15 |
-
git commit -m "Clean repository for Hugging Face deployment"
|
16 |
-
|
17 |
-
echo.
|
18 |
-
echo 🔄 Pushing to Hugging Face...
|
19 |
-
echo Choose your branch:
|
20 |
-
echo 1. Push to develop branch
|
21 |
-
echo 2. Create and push to main branch
|
22 |
-
echo 3. Cancel
|
23 |
-
echo.
|
24 |
-
set /p choice="Enter choice (1-3): "
|
25 |
-
|
26 |
-
if "%choice%"=="1" (
|
27 |
-
echo Pushing to develop branch...
|
28 |
-
git push space develop
|
29 |
-
) else if "%choice%"=="2" (
|
30 |
-
echo Creating main branch...
|
31 |
-
git checkout -b main
|
32 |
-
echo Pushing to main branch...
|
33 |
-
git push space main
|
34 |
-
) else (
|
35 |
-
echo Cancelled.
|
36 |
-
pause
|
37 |
-
exit /b 0
|
38 |
-
)
|
39 |
-
|
40 |
-
echo.
|
41 |
-
echo ✅ Push completed!
|
42 |
-
echo.
|
43 |
-
echo 📍 Your Space URL will be:
|
44 |
-
echo https://huggingface.co/spaces/peace2024/DubswayAgenticAI
|
45 |
-
echo.
|
46 |
-
echo 🔍 Monitor the build logs in your Space settings.
|
47 |
-
echo.
|
48 |
-
|
49 |
-
pause
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
run_agentic.bat
DELETED
@@ -1,43 +0,0 @@
|
|
1 |
-
@echo off
|
2 |
-
echo ========================================
|
3 |
-
echo Dubsway Video AI - Agentic System Runner
|
4 |
-
echo ========================================
|
5 |
-
echo.
|
6 |
-
|
7 |
-
REM Activate virtual environment
|
8 |
-
echo Activating virtual environment...
|
9 |
-
call myenv31\Scripts\activate.bat
|
10 |
-
|
11 |
-
REM Check for Groq API key
|
12 |
-
if "%GROQ_API_KEY%"=="" (
|
13 |
-
echo.
|
14 |
-
echo ========================================
|
15 |
-
echo GROQ API KEY REQUIRED
|
16 |
-
echo ========================================
|
17 |
-
echo.
|
18 |
-
echo Please set your Groq API key:
|
19 |
-
echo 1. Get API key from: https://console.groq.com/
|
20 |
-
echo 2. Set environment variable: set GROQ_API_KEY=your_key_here
|
21 |
-
echo.
|
22 |
-
echo Then run this script again.
|
23 |
-
echo.
|
24 |
-
pause
|
25 |
-
exit /b 1
|
26 |
-
)
|
27 |
-
|
28 |
-
echo Groq API key found!
|
29 |
-
echo.
|
30 |
-
|
31 |
-
REM Run the agentic daemon
|
32 |
-
echo Starting agentic video processing daemon...
|
33 |
-
echo.
|
34 |
-
echo The daemon will:
|
35 |
-
echo - Process pending videos with enhanced analysis
|
36 |
-
echo - Use Groq Llama3-8b-8192 for intelligent reasoning
|
37 |
-
echo - Generate beautiful, comprehensive reports
|
38 |
-
echo - Fall back to basic analysis if needed
|
39 |
-
echo.
|
40 |
-
echo Press Ctrl+C to stop the daemon
|
41 |
-
echo.
|
42 |
-
|
43 |
-
python -m worker.daemon
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
run_lightweight_agentic.bat
DELETED
@@ -1,44 +0,0 @@
|
|
1 |
-
@echo off
|
2 |
-
echo ========================================
|
3 |
-
echo Dubsway Video AI - Lightweight Agentic System
|
4 |
-
echo ========================================
|
5 |
-
echo.
|
6 |
-
|
7 |
-
REM Activate virtual environment
|
8 |
-
echo Activating virtual environment...
|
9 |
-
call myenv31\Scripts\activate.bat
|
10 |
-
|
11 |
-
REM Check for Groq API key
|
12 |
-
if "%GROQ_API_KEY%"=="" (
|
13 |
-
echo.
|
14 |
-
echo ========================================
|
15 |
-
echo GROQ API KEY REQUIRED
|
16 |
-
echo ========================================
|
17 |
-
echo.
|
18 |
-
echo Please set your Groq API key:
|
19 |
-
echo 1. Get API key from: https://console.groq.com/
|
20 |
-
echo 2. Set environment variable: set GROQ_API_KEY=your_key_here
|
21 |
-
echo.
|
22 |
-
echo Then run this script again.
|
23 |
-
echo.
|
24 |
-
pause
|
25 |
-
exit /b 1
|
26 |
-
)
|
27 |
-
|
28 |
-
echo Groq API key found!
|
29 |
-
echo.
|
30 |
-
|
31 |
-
REM Run the lightweight agentic daemon
|
32 |
-
echo Starting lightweight agentic video processing daemon...
|
33 |
-
echo.
|
34 |
-
echo The lightweight daemon will:
|
35 |
-
echo - Process videos with Groq Llama3-8b-8192 analysis
|
36 |
-
echo - Skip heavy computer vision models (no hanging)
|
37 |
-
echo - Provide intelligent text-based insights
|
38 |
-
echo - Generate beautiful reports
|
39 |
-
echo - Fall back to basic analysis if needed
|
40 |
-
echo.
|
41 |
-
echo Press Ctrl+C to stop the daemon
|
42 |
-
echo.
|
43 |
-
|
44 |
-
python -m worker.daemon
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
setup_agentic_system.bat
DELETED
@@ -1,63 +0,0 @@
|
|
1 |
-
@echo off
|
2 |
-
echo ========================================
|
3 |
-
echo Dubsway Video AI - Agentic System Setup
|
4 |
-
echo ========================================
|
5 |
-
echo.
|
6 |
-
|
7 |
-
REM Check if virtual environment exists
|
8 |
-
if not exist "myenv31" (
|
9 |
-
echo Creating virtual environment...
|
10 |
-
python -m venv myenv31
|
11 |
-
)
|
12 |
-
|
13 |
-
REM Activate virtual environment
|
14 |
-
echo Activating virtual environment...
|
15 |
-
call myenv31\Scripts\activate.bat
|
16 |
-
|
17 |
-
REM Install dependencies
|
18 |
-
echo Installing dependencies...
|
19 |
-
pip install -r requirements.txt
|
20 |
-
|
21 |
-
REM Install Groq specifically
|
22 |
-
echo Installing Groq integration...
|
23 |
-
pip install langchain-groq
|
24 |
-
|
25 |
-
REM Check for Groq API key
|
26 |
-
echo.
|
27 |
-
echo Checking for Groq API key...
|
28 |
-
if "%GROQ_API_KEY%"=="" (
|
29 |
-
echo.
|
30 |
-
echo ========================================
|
31 |
-
echo GROQ API KEY REQUIRED
|
32 |
-
echo ========================================
|
33 |
-
echo.
|
34 |
-
echo To use the agentic system, you need a Groq API key:
|
35 |
-
echo 1. Visit: https://console.groq.com/
|
36 |
-
echo 2. Sign up and get your API key
|
37 |
-
echo 3. Set the environment variable:
|
38 |
-
echo set GROQ_API_KEY=your_key_here
|
39 |
-
echo.
|
40 |
-
echo Or add it to your .env file:
|
41 |
-
echo GROQ_API_KEY=your_key_here
|
42 |
-
echo.
|
43 |
-
pause
|
44 |
-
) else (
|
45 |
-
echo Groq API key found!
|
46 |
-
)
|
47 |
-
|
48 |
-
REM Run test
|
49 |
-
echo.
|
50 |
-
echo Running system test...
|
51 |
-
python test_agentic_system.py
|
52 |
-
|
53 |
-
echo.
|
54 |
-
echo ========================================
|
55 |
-
echo Setup Complete!
|
56 |
-
echo ========================================
|
57 |
-
echo.
|
58 |
-
echo To run the agentic system:
|
59 |
-
echo 1. Make sure GROQ_API_KEY is set
|
60 |
-
echo 2. Run: python -m worker.daemon
|
61 |
-
echo 3. Or use: start-server.bat
|
62 |
-
echo.
|
63 |
-
pause
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_agentic_system.py
DELETED
@@ -1,180 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""
|
3 |
-
Test script for the agentic video analysis system with Groq integration
|
4 |
-
"""
|
5 |
-
import asyncio
|
6 |
-
import os
|
7 |
-
import sys
|
8 |
-
from pathlib import Path
|
9 |
-
|
10 |
-
# Add project root to Python path
|
11 |
-
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
12 |
-
|
13 |
-
async def test_groq_integration():
|
14 |
-
"""Test Groq integration and basic functionality"""
|
15 |
-
print("🧪 Testing Groq Integration for Agentic Video Analysis")
|
16 |
-
print("=" * 60)
|
17 |
-
|
18 |
-
# Check for Groq API key
|
19 |
-
groq_api_key = os.getenv("GROQ_API_KEY")
|
20 |
-
if not groq_api_key:
|
21 |
-
print("❌ GROQ_API_KEY environment variable not found!")
|
22 |
-
print("Please set your Groq API key:")
|
23 |
-
print("1. Get API key from: https://console.groq.com/")
|
24 |
-
print("2. Set environment variable: GROQ_API_KEY=your_key_here")
|
25 |
-
return False
|
26 |
-
|
27 |
-
print("✅ GROQ_API_KEY found")
|
28 |
-
|
29 |
-
try:
|
30 |
-
# Test Groq import
|
31 |
-
from langchain_groq import ChatGroq
|
32 |
-
print("✅ langchain-groq imported successfully")
|
33 |
-
|
34 |
-
# Test Groq connection
|
35 |
-
llm = ChatGroq(
|
36 |
-
groq_api_key=groq_api_key,
|
37 |
-
model_name="llama3-8b-8192",
|
38 |
-
temperature=0.1,
|
39 |
-
max_tokens=100
|
40 |
-
)
|
41 |
-
|
42 |
-
# Simple test
|
43 |
-
response = await llm.ainvoke("Say 'Hello from Groq!'")
|
44 |
-
print(f"✅ Groq test successful: {response.content}")
|
45 |
-
|
46 |
-
except ImportError as e:
|
47 |
-
print(f"❌ Failed to import langchain-groq: {e}")
|
48 |
-
print("Please install: pip install langchain-groq")
|
49 |
-
return False
|
50 |
-
except Exception as e:
|
51 |
-
print(f"❌ Groq test failed: {e}")
|
52 |
-
return False
|
53 |
-
|
54 |
-
return True
|
55 |
-
|
56 |
-
async def test_enhanced_analysis():
|
57 |
-
"""Test enhanced analysis components"""
|
58 |
-
print("\n🔍 Testing Enhanced Analysis Components")
|
59 |
-
print("=" * 60)
|
60 |
-
|
61 |
-
try:
|
62 |
-
# Test imports
|
63 |
-
from app.utils.enhanced_analysis import MultiModalAnalyzer
|
64 |
-
print("✅ Enhanced analysis imports successful")
|
65 |
-
|
66 |
-
# Test analyzer initialization
|
67 |
-
groq_api_key = os.getenv("GROQ_API_KEY")
|
68 |
-
analyzer = MultiModalAnalyzer(groq_api_key=groq_api_key)
|
69 |
-
print("✅ MultiModalAnalyzer initialized successfully")
|
70 |
-
|
71 |
-
# Test agent creation
|
72 |
-
if analyzer.agent:
|
73 |
-
print("✅ Agent created successfully")
|
74 |
-
else:
|
75 |
-
print("❌ Agent creation failed")
|
76 |
-
return False
|
77 |
-
|
78 |
-
except Exception as e:
|
79 |
-
print(f"❌ Enhanced analysis test failed: {e}")
|
80 |
-
return False
|
81 |
-
|
82 |
-
return True
|
83 |
-
|
84 |
-
async def test_agentic_integration():
|
85 |
-
"""Test agentic integration"""
|
86 |
-
print("\n🤖 Testing Agentic Integration")
|
87 |
-
print("=" * 60)
|
88 |
-
|
89 |
-
try:
|
90 |
-
from app.utils.agentic_integration import AgenticVideoProcessor, MCPToolManager
|
91 |
-
print("✅ Agentic integration imports successful")
|
92 |
-
|
93 |
-
# Test processor initialization
|
94 |
-
groq_api_key = os.getenv("GROQ_API_KEY")
|
95 |
-
processor = AgenticVideoProcessor(enable_enhanced_analysis=True, groq_api_key=groq_api_key)
|
96 |
-
print("✅ AgenticVideoProcessor initialized successfully")
|
97 |
-
|
98 |
-
# Test MCP tool manager
|
99 |
-
tool_manager = MCPToolManager(groq_api_key=groq_api_key)
|
100 |
-
print("✅ MCPToolManager initialized successfully")
|
101 |
-
|
102 |
-
# Test tool registration
|
103 |
-
if tool_manager.tools:
|
104 |
-
print(f"✅ {len(tool_manager.tools)} tools registered")
|
105 |
-
else:
|
106 |
-
print("❌ No tools registered")
|
107 |
-
return False
|
108 |
-
|
109 |
-
except Exception as e:
|
110 |
-
print(f"❌ Agentic integration test failed: {e}")
|
111 |
-
return False
|
112 |
-
|
113 |
-
return True
|
114 |
-
|
115 |
-
async def test_dependencies():
|
116 |
-
"""Test all required dependencies"""
|
117 |
-
print("\n📦 Testing Dependencies")
|
118 |
-
print("=" * 60)
|
119 |
-
|
120 |
-
dependencies = [
|
121 |
-
("opencv-python", "cv2"),
|
122 |
-
("pillow", "PIL"),
|
123 |
-
("torch", "torch"),
|
124 |
-
("transformers", "transformers"),
|
125 |
-
("faster_whisper", "faster_whisper"),
|
126 |
-
("langchain", "langchain"),
|
127 |
-
("langchain_groq", "langchain_groq"),
|
128 |
-
("duckduckgo-search", "duckduckgo_search"),
|
129 |
-
("wikipedia-api", "wikipedia"),
|
130 |
-
]
|
131 |
-
|
132 |
-
all_good = True
|
133 |
-
for package_name, import_name in dependencies:
|
134 |
-
try:
|
135 |
-
__import__(import_name)
|
136 |
-
print(f"✅ {package_name}")
|
137 |
-
except ImportError:
|
138 |
-
print(f"❌ {package_name} - missing")
|
139 |
-
all_good = False
|
140 |
-
|
141 |
-
return all_good
|
142 |
-
|
143 |
-
async def main():
|
144 |
-
"""Main test function"""
|
145 |
-
print("🚀 Dubsway Video AI - Agentic System Test")
|
146 |
-
print("=" * 60)
|
147 |
-
|
148 |
-
# Test dependencies first
|
149 |
-
deps_ok = await test_dependencies()
|
150 |
-
if not deps_ok:
|
151 |
-
print("\n❌ Some dependencies are missing. Please install them:")
|
152 |
-
print("pip install -r requirements.txt")
|
153 |
-
return False
|
154 |
-
|
155 |
-
# Test Groq integration
|
156 |
-
groq_ok = await test_groq_integration()
|
157 |
-
if not groq_ok:
|
158 |
-
return False
|
159 |
-
|
160 |
-
# Test enhanced analysis
|
161 |
-
enhanced_ok = await test_enhanced_analysis()
|
162 |
-
if not enhanced_ok:
|
163 |
-
return False
|
164 |
-
|
165 |
-
# Test agentic integration
|
166 |
-
agentic_ok = await test_agentic_integration()
|
167 |
-
if not agentic_ok:
|
168 |
-
return False
|
169 |
-
|
170 |
-
print("\n🎉 All tests passed! Your agentic system is ready to use.")
|
171 |
-
print("\n📋 Next steps:")
|
172 |
-
print("1. Update your worker/daemon.py to use agentic analysis")
|
173 |
-
print("2. Set GROQ_API_KEY environment variable")
|
174 |
-
print("3. Run your daemon with enhanced capabilities")
|
175 |
-
|
176 |
-
return True
|
177 |
-
|
178 |
-
if __name__ == "__main__":
|
179 |
-
success = asyncio.run(main())
|
180 |
-
sys.exit(0 if success else 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
worker/daemon.py
DELETED
@@ -1,210 +0,0 @@
|
|
1 |
-
import asyncio
|
2 |
-
import os
|
3 |
-
import time
|
4 |
-
import signal
|
5 |
-
import sys
|
6 |
-
from datetime import datetime
|
7 |
-
import traceback
|
8 |
-
import logging
|
9 |
-
|
10 |
-
from sqlalchemy.future import select
|
11 |
-
from sqlalchemy.ext.asyncio import AsyncSession
|
12 |
-
from sqlalchemy.exc import SQLAlchemyError
|
13 |
-
|
14 |
-
from app.database import AsyncSessionLocal, init_db, close_db
|
15 |
-
from app.models import VideoUpload
|
16 |
-
from app.utils import whisper_llm, pdf, s3, lightweight_agentic
|
17 |
-
|
18 |
-
# Setup logging with UTF-8 encoding for Windows compatibility
|
19 |
-
logging.basicConfig(
|
20 |
-
level=logging.INFO,
|
21 |
-
format='[%(asctime)s] %(levelname)s - %(name)s - %(message)s',
|
22 |
-
handlers=[
|
23 |
-
logging.StreamHandler(sys.stdout), # Use stdout for better encoding
|
24 |
-
logging.FileHandler('worker.log', encoding='utf-8')
|
25 |
-
]
|
26 |
-
)
|
27 |
-
logger = logging.getLogger("worker.daemon")
|
28 |
-
|
29 |
-
POLL_INTERVAL = 200 # seconds
|
30 |
-
SHUTDOWN_EVENT = asyncio.Event()
|
31 |
-
|
32 |
-
|
33 |
-
def signal_handler(signum, frame):
|
34 |
-
"""Handle shutdown signals gracefully"""
|
35 |
-
logger.info(f"Received signal {signum}, initiating graceful shutdown...")
|
36 |
-
SHUTDOWN_EVENT.set()
|
37 |
-
|
38 |
-
|
39 |
-
async def process_pending_videos():
|
40 |
-
"""Process all pending video uploads"""
|
41 |
-
async with AsyncSessionLocal() as session:
|
42 |
-
try:
|
43 |
-
# Query for pending videos
|
44 |
-
result = await session.execute(
|
45 |
-
select(VideoUpload).where(VideoUpload.status == "pending")
|
46 |
-
)
|
47 |
-
pending_videos = result.scalars().all()
|
48 |
-
|
49 |
-
if not pending_videos:
|
50 |
-
logger.info("No pending videos found")
|
51 |
-
return
|
52 |
-
|
53 |
-
logger.info(f"Found {len(pending_videos)} pending videos to process")
|
54 |
-
|
55 |
-
for video in pending_videos:
|
56 |
-
if SHUTDOWN_EVENT.is_set():
|
57 |
-
logger.info("Shutdown requested, stopping video processing")
|
58 |
-
break
|
59 |
-
|
60 |
-
logger.info(f"Processing video ID {video.id} for user {video.user_id}")
|
61 |
-
|
62 |
-
try:
|
63 |
-
# Update status to processing
|
64 |
-
video.status = "processing"
|
65 |
-
video.updated_at = datetime.utcnow()
|
66 |
-
await session.commit()
|
67 |
-
|
68 |
-
# Process with Lightweight Agentic Analysis (Groq + Llama3)
|
69 |
-
try:
|
70 |
-
transcription, summary = await lightweight_agentic.analyze_with_lightweight_agentic(
|
71 |
-
video_url=video.video_url,
|
72 |
-
user_id=video.user_id,
|
73 |
-
db=session
|
74 |
-
)
|
75 |
-
logger.info(f"Lightweight agentic analysis completed for video {video.id}")
|
76 |
-
except Exception as agentic_error:
|
77 |
-
logger.warning(f"Lightweight agentic analysis failed, falling back to basic Whisper: {agentic_error}")
|
78 |
-
transcription, summary = await whisper_llm.analyze(
|
79 |
-
video_url=video.video_url,
|
80 |
-
user_id=video.user_id,
|
81 |
-
db=session
|
82 |
-
)
|
83 |
-
logger.info(f"Basic Whisper analysis completed for video {video.id}")
|
84 |
-
|
85 |
-
except Exception as e:
|
86 |
-
logger.error(f"Whisper failed for video {video.id}: {e}")
|
87 |
-
logger.debug(traceback.format_exc())
|
88 |
-
|
89 |
-
# Update status to failed
|
90 |
-
video.status = "failed"
|
91 |
-
video.updated_at = datetime.utcnow()
|
92 |
-
await session.commit()
|
93 |
-
continue
|
94 |
-
|
95 |
-
try:
|
96 |
-
# Generate PDF
|
97 |
-
pdf_bytes = pdf.generate(transcription, summary)
|
98 |
-
logger.info(f"PDF generation completed for video {video.id}")
|
99 |
-
except Exception as e:
|
100 |
-
logger.error(f"PDF generation failed for video {video.id}: {e}")
|
101 |
-
logger.debug(traceback.format_exc())
|
102 |
-
|
103 |
-
video.status = "failed"
|
104 |
-
video.updated_at = datetime.utcnow()
|
105 |
-
await session.commit()
|
106 |
-
continue
|
107 |
-
|
108 |
-
try:
|
109 |
-
# Upload to S3
|
110 |
-
pdf_key = f"pdfs/{video.id}.pdf"
|
111 |
-
pdf_url = s3.upload_pdf_bytes(pdf_bytes, pdf_key)
|
112 |
-
logger.info(f"S3 upload completed for video {video.id}")
|
113 |
-
except Exception as e:
|
114 |
-
logger.error(f"Upload to S3 failed for video {video.id}: {e}")
|
115 |
-
logger.debug(traceback.format_exc())
|
116 |
-
|
117 |
-
video.status = "failed"
|
118 |
-
video.updated_at = datetime.utcnow()
|
119 |
-
await session.commit()
|
120 |
-
continue
|
121 |
-
|
122 |
-
try:
|
123 |
-
# Mark as completed
|
124 |
-
video.status = "completed"
|
125 |
-
video.pdf_url = pdf_url
|
126 |
-
video.updated_at = datetime.utcnow()
|
127 |
-
await session.commit()
|
128 |
-
logger.info(f"Successfully completed video {video.id}")
|
129 |
-
|
130 |
-
except SQLAlchemyError as e:
|
131 |
-
logger.error(f"DB commit failed for video {video.id}: {e}")
|
132 |
-
logger.debug(traceback.format_exc())
|
133 |
-
await session.rollback()
|
134 |
-
|
135 |
-
except SQLAlchemyError as e:
|
136 |
-
logger.error(f"Database error: {e}")
|
137 |
-
logger.debug(traceback.format_exc())
|
138 |
-
except Exception as e:
|
139 |
-
logger.error(f"Unexpected error in process_pending_videos: {e}")
|
140 |
-
logger.debug(traceback.format_exc())
|
141 |
-
|
142 |
-
|
143 |
-
async def run_worker():
|
144 |
-
"""Main worker loop"""
|
145 |
-
logger.info("Async worker daemon started...")
|
146 |
-
|
147 |
-
# Initialize database
|
148 |
-
try:
|
149 |
-
await init_db()
|
150 |
-
logger.info("Database initialized successfully")
|
151 |
-
except Exception as e:
|
152 |
-
logger.error(f"Failed to initialize database: {e}")
|
153 |
-
return
|
154 |
-
|
155 |
-
cycle_count = 0
|
156 |
-
while not SHUTDOWN_EVENT.is_set():
|
157 |
-
cycle_count += 1
|
158 |
-
logger.info(f"Worker cycle {cycle_count} - Checking for pending videos...")
|
159 |
-
|
160 |
-
try:
|
161 |
-
await process_pending_videos()
|
162 |
-
except Exception as e:
|
163 |
-
logger.error(f"Worker loop error: {e}")
|
164 |
-
logger.debug(traceback.format_exc())
|
165 |
-
|
166 |
-
# Wait for next cycle or shutdown
|
167 |
-
try:
|
168 |
-
await asyncio.wait_for(SHUTDOWN_EVENT.wait(), timeout=POLL_INTERVAL)
|
169 |
-
except asyncio.TimeoutError:
|
170 |
-
# Normal timeout, continue to next cycle
|
171 |
-
pass
|
172 |
-
except Exception as e:
|
173 |
-
logger.error(f"Error in worker wait: {e}")
|
174 |
-
break
|
175 |
-
|
176 |
-
logger.info("Worker loop stopped, cleaning up...")
|
177 |
-
|
178 |
-
# Cleanup
|
179 |
-
try:
|
180 |
-
await close_db()
|
181 |
-
logger.info("Database connections closed")
|
182 |
-
except Exception as e:
|
183 |
-
logger.error(f"Error during cleanup: {e}")
|
184 |
-
|
185 |
-
|
186 |
-
async def main():
|
187 |
-
"""Main entry point with signal handling"""
|
188 |
-
# Setup signal handlers
|
189 |
-
signal.signal(signal.SIGINT, signal_handler)
|
190 |
-
signal.signal(signal.SIGTERM, signal_handler)
|
191 |
-
|
192 |
-
try:
|
193 |
-
await run_worker()
|
194 |
-
except KeyboardInterrupt:
|
195 |
-
logger.info("Keyboard interrupt received")
|
196 |
-
except Exception as e:
|
197 |
-
logger.error(f"Fatal error in main: {e}")
|
198 |
-
logger.debug(traceback.format_exc())
|
199 |
-
finally:
|
200 |
-
logger.info("Worker daemon shutdown complete")
|
201 |
-
|
202 |
-
|
203 |
-
if __name__ == "__main__":
|
204 |
-
try:
|
205 |
-
asyncio.run(main())
|
206 |
-
except KeyboardInterrupt:
|
207 |
-
logger.info("Worker daemon interrupted by user")
|
208 |
-
except Exception as e:
|
209 |
-
logger.error(f"Fatal error: {e}")
|
210 |
-
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|