peace2024 commited on
Commit
a408f4b
·
1 Parent(s): ed6b1d2
.gitignore CHANGED
@@ -5,6 +5,10 @@ __pycache__/
5
  *.pyd
6
 
7
  # Virtual environments
 
 
 
 
8
  env/
9
  venv/
10
  myenv/
 
5
  *.pyd
6
 
7
  # Virtual environments
8
+ aienv/
9
+ aienv/
10
+ .aienv/
11
+ .aienv/
12
  env/
13
  venv/
14
  myenv/
app/utils/online_vector_store.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from typing import List, Optional
4
+ from langchain_core.documents import Document
5
+ from langchain_openai import OpenAIEmbeddings
6
+ from langchain_community.vectorstores import Pinecone
7
+ from pinecone import Pinecone, ServerlessSpec
8
+
9
+ # Setup logger
10
+ logger = logging.getLogger("app.utils.online_vector_store")
11
+ logger.setLevel(logging.INFO)
12
+ if not logger.handlers:
13
+ handler = logging.StreamHandler()
14
+ formatter = logging.Formatter("[%(asctime)s] %(levelname)s - %(message)s")
15
+ handler.setFormatter(formatter)
16
+ logger.addHandler(handler)
17
+
18
+ class OnlineVectorStore:
19
+ def __init__(self):
20
+ self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
21
+ self.pinecone_environment = os.getenv("PINECONE_ENVIRONMENT", "gcp-starter")
22
+ self.index_name = "dubsway-video-ai"
23
+
24
+ if not self.pinecone_api_key:
25
+ logger.warning("PINECONE_API_KEY not found. Using fallback local storage.")
26
+ self.use_pinecone = False
27
+ else:
28
+ self.use_pinecone = True
29
+ self._initialize_pinecone()
30
+
31
+ def _initialize_pinecone(self):
32
+ """Initialize Pinecone client and create index if needed."""
33
+ try:
34
+ pc = Pinecone(api_key=self.pinecone_api_key)
35
+
36
+ # Check if index exists
37
+ if self.index_name not in pc.list_indexes().names():
38
+ logger.info(f"Creating Pinecone index: {self.index_name}")
39
+ pc.create_index(
40
+ name=self.index_name,
41
+ dimension=1536, # OpenAI embeddings dimension
42
+ metric="cosine",
43
+ spec=ServerlessSpec(
44
+ cloud="aws",
45
+ region="us-east-1"
46
+ )
47
+ )
48
+ logger.info(f"Pinecone index {self.index_name} created successfully")
49
+ else:
50
+ logger.info(f"Using existing Pinecone index: {self.index_name}")
51
+
52
+ except Exception as e:
53
+ logger.error(f"Failed to initialize Pinecone: {e}")
54
+ self.use_pinecone = False
55
+
56
+ def add_documents(self, documents: List[Document], user_id: int) -> bool:
57
+ """Add documents to the vector store."""
58
+ try:
59
+ if not documents:
60
+ logger.warning("No documents to add")
61
+ return False
62
+
63
+ # Add user_id metadata to each document
64
+ for doc in documents:
65
+ if not hasattr(doc, 'metadata'):
66
+ doc.metadata = {}
67
+ doc.metadata['user_id'] = user_id
68
+ doc.metadata['source'] = 'video_analysis'
69
+
70
+ if self.use_pinecone:
71
+ return self._add_to_pinecone(documents, user_id)
72
+ else:
73
+ logger.warning("Pinecone not available, skipping vector storage")
74
+ return False
75
+
76
+ except Exception as e:
77
+ logger.error(f"Failed to add documents to vector store: {e}")
78
+ return False
79
+
80
+ def _add_to_pinecone(self, documents: List[Document], user_id: int) -> bool:
81
+ """Add documents to Pinecone."""
82
+ try:
83
+ embeddings = OpenAIEmbeddings()
84
+
85
+ # Create Pinecone vector store
86
+ vector_store = Pinecone.from_documents(
87
+ documents=documents,
88
+ embedding=embeddings,
89
+ index_name=self.index_name,
90
+ namespace=f"user_{user_id}"
91
+ )
92
+
93
+ logger.info(f"Successfully added {len(documents)} documents to Pinecone for user {user_id}")
94
+ return True
95
+
96
+ except Exception as e:
97
+ logger.error(f"Failed to add documents to Pinecone: {e}")
98
+ return False
99
+
100
+ def search(self, query: str, user_id: int, k: int = 5) -> List[Document]:
101
+ """Search for similar documents."""
102
+ try:
103
+ if not self.use_pinecone:
104
+ logger.warning("Pinecone not available, returning empty results")
105
+ return []
106
+
107
+ embeddings = OpenAIEmbeddings()
108
+
109
+ # Create Pinecone vector store for searching
110
+ vector_store = PineconeVectorStore.from_existing_index(
111
+ index_name=self.index_name,
112
+ embedding=embeddings,
113
+ namespace=f"user_{user_id}"
114
+ )
115
+
116
+ # Search for similar documents
117
+ results = vector_store.similarity_search(
118
+ query=query,
119
+ k=k,
120
+ filter={"user_id": user_id}
121
+ )
122
+
123
+ logger.info(f"Found {len(results)} similar documents for user {user_id}")
124
+ return results
125
+
126
+ except Exception as e:
127
+ logger.error(f"Failed to search vector store: {e}")
128
+ return []
129
+
130
+ def get_user_documents(self, user_id: int, limit: int = 50) -> List[Document]:
131
+ """Get all documents for a specific user."""
132
+ try:
133
+ if not self.use_pinecone:
134
+ logger.warning("Pinecone not available, returning empty results")
135
+ return []
136
+
137
+ embeddings = OpenAIEmbeddings()
138
+
139
+ # Create Pinecone vector store for searching
140
+ vector_store = PineconeVectorStore.from_existing_index(
141
+ index_name=self.index_name,
142
+ embedding=embeddings,
143
+ namespace=f"user_{user_id}"
144
+ )
145
+
146
+ # Get all documents for the user
147
+ results = vector_store.similarity_search(
148
+ query="", # Empty query to get all documents
149
+ k=limit,
150
+ filter={"user_id": user_id}
151
+ )
152
+
153
+ logger.info(f"Retrieved {len(results)} documents for user {user_id}")
154
+ return results
155
+
156
+ except Exception as e:
157
+ logger.error(f"Failed to get user documents: {e}")
158
+ return []
159
+
160
+ def delete_user_documents(self, user_id: int) -> bool:
161
+ """Delete all documents for a specific user."""
162
+ try:
163
+ if not self.use_pinecone:
164
+ logger.warning("Pinecone not available, skipping deletion")
165
+ return False
166
+
167
+ pc = Pinecone(api_key=self.pinecone_api_key)
168
+ index = pc.Index(self.index_name)
169
+
170
+ # Delete all vectors in the user's namespace
171
+ index.delete(namespace=f"user_{user_id}")
172
+
173
+ logger.info(f"Successfully deleted all documents for user {user_id}")
174
+ return True
175
+
176
+ except Exception as e:
177
+ logger.error(f"Failed to delete user documents: {e}")
178
+ return False
179
+
180
+ # Global instance
181
+ vector_store = OnlineVectorStore()
app/utils/whisper_llm.py CHANGED
@@ -114,7 +114,7 @@ def summarize_in_chunks(text, chunk_size=1024, overlap=200):
114
  )
115
  return final_result[0]['summary_text']
116
  except Exception as e:
117
- logger.error(f"Final summarization failed: {e}")
118
  return combined_summary[:1500] + "..." if len(combined_summary) > 1500 else combined_summary
119
 
120
  return combined_summary
 
114
  )
115
  return final_result[0]['summary_text']
116
  except Exception as e:
117
+ logger.error(f"Final sum marization failed: {e}")
118
  return combined_summary[:1500] + "..." if len(combined_summary) > 1500 else combined_summary
119
 
120
  return combined_summary
clean-for-deployment.bat DELETED
@@ -1,69 +0,0 @@
1
- @echo off
2
- echo 🧹 Cleaning repository for Hugging Face deployment...
3
-
4
- echo.
5
- echo 📋 Removing binary files and vector stores...
6
-
7
- REM Remove vector store directory
8
- if exist "vector_store" (
9
- echo Removing vector_store directory...
10
- rmdir /s /q "vector_store"
11
- echo ✅ vector_store removed
12
- ) else (
13
- echo ℹ️ vector_store directory not found
14
- )
15
-
16
- REM Remove database files
17
- for %%f in (*.db *.sqlite *.sqlite3) do (
18
- if exist "%%f" (
19
- echo Removing %%f...
20
- del "%%f"
21
- echo ✅ %%f removed
22
- )
23
- )
24
-
25
- REM Remove FAISS files
26
- for %%f in (*.faiss *.index *.bin) do (
27
- if exist "%%f" (
28
- echo Removing %%f...
29
- del "%%f"
30
- echo ✅ %%f removed
31
- )
32
- )
33
-
34
- REM Remove log files
35
- for %%f in (*.log) do (
36
- if exist "%%f" (
37
- echo Removing %%f...
38
- del "%%f"
39
- echo ✅ %%f removed
40
- )
41
- )
42
-
43
- echo.
44
- echo 🔄 Updating git...
45
-
46
- REM Remove tracked files that should be ignored
47
- git rm -r --cached vector_store/ 2>nul
48
- git rm --cached *.db 2>nul
49
- git rm --cached *.sqlite 2>nul
50
- git rm --cached *.sqlite3 2>nul
51
- git rm --cached *.faiss 2>nul
52
- git rm --cached *.log 2>nul
53
-
54
- echo.
55
- echo 📝 Committing changes...
56
- git add .
57
- git commit -m "Clean repository for Hugging Face deployment - remove binary files"
58
-
59
- echo.
60
- echo ✅ Repository cleaned! You can now push to Hugging Face:
61
- echo.
62
- echo git push space develop
63
- echo.
64
- echo Or create a new branch:
65
- echo git checkout -b main
66
- echo git push space main
67
- echo.
68
-
69
- pause
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
clean-repo-history.bat DELETED
@@ -1,56 +0,0 @@
1
- @echo off
2
- echo 🧹 Deep cleaning repository history...
3
-
4
- echo.
5
- echo ⚠️ WARNING: This will rewrite git history!
6
- echo This will remove all binary files from the entire git history.
7
- echo.
8
- set /p confirm="Are you sure you want to continue? (y/N): "
9
-
10
- if /i not "%confirm%"=="y" (
11
- echo Cancelled.
12
- pause
13
- exit /b 0
14
- )
15
-
16
- echo.
17
- echo 🗑️ Removing binary files from git history...
18
-
19
- REM Remove vector store directory from entire history
20
- git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch vector_store/" --prune-empty --tag-name-filter cat -- --all
21
-
22
- REM Remove database files from entire history
23
- git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.db" --prune-empty --tag-name-filter cat -- --all
24
- git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.sqlite" --prune-empty --tag-name-filter cat -- --all
25
- git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.sqlite3" --prune-empty --tag-name-filter cat -- --all
26
-
27
- REM Remove FAISS files from entire history
28
- git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.faiss" --prune-empty --tag-name-filter cat -- --all
29
- git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.index" --prune-empty --tag-name-filter cat -- --all
30
-
31
- REM Remove log files from entire history
32
- git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.log" --prune-empty --tag-name-filter cat -- --all
33
-
34
- echo.
35
- echo 🧹 Cleaning up...
36
- git for-each-ref --format="delete %(refname)" refs/original | git update-ref --stdin
37
- git reflog expire --expire=now --all
38
- git gc --prune=now --aggressive
39
-
40
- echo.
41
- echo ✅ Repository history cleaned!
42
- echo.
43
- echo 📝 Now commit the current state...
44
- git add .
45
- git commit -m "Clean repository for Hugging Face deployment"
46
-
47
- echo.
48
- echo 🚀 Ready to push! Run:
49
- echo git push --force space develop
50
- echo.
51
- echo Or create a new branch:
52
- echo git checkout -b main
53
- echo git push space main
54
- echo.
55
-
56
- pause
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deploy-to-hf.bat DELETED
@@ -1,82 +0,0 @@
1
- @echo off
2
- echo 🚀 Preparing Dubsway Video AI for Hugging Face Deployment...
3
-
4
- echo.
5
- echo 📋 Checking prerequisites...
6
-
7
- REM Check if git is available
8
- git --version >nul 2>&1
9
- if errorlevel 1 (
10
- echo ❌ Git is not installed or not in PATH
11
- pause
12
- exit /b 1
13
- )
14
-
15
- REM Check if we're in a git repository
16
- git status >nul 2>&1
17
- if errorlevel 1 (
18
- echo ❌ Not in a git repository. Please run this from your project root.
19
- pause
20
- exit /b 1
21
- )
22
-
23
- echo ✅ Git repository found
24
-
25
- REM Check for required files
26
- if not exist ".huggingface.yaml" (
27
- echo ❌ .huggingface.yaml not found
28
- pause
29
- exit /b 1
30
- )
31
-
32
- if not exist "Dockerfile" (
33
- echo ❌ Dockerfile not found
34
- pause
35
- exit /b 1
36
- )
37
-
38
- if not exist "requirements-hf.txt" (
39
- echo ❌ requirements-hf.txt not found
40
- pause
41
- exit /b 1
42
- )
43
-
44
- echo ✅ All required files found
45
-
46
- echo.
47
- echo 🔄 Checking git status...
48
- git status --porcelain
49
-
50
- echo.
51
- echo 📝 Current branch:
52
- git branch --show-current
53
-
54
- echo.
55
- echo 🚀 Ready to deploy! Follow these steps:
56
- echo.
57
- echo 1. Push your changes to GitHub:
58
- echo git add .
59
- echo git commit -m "Deploy to Hugging Face Spaces"
60
- echo git push origin main
61
- echo.
62
- echo 2. Go to https://huggingface.co/spaces
63
- echo.
64
- echo 3. Create a new Space with these settings:
65
- echo - SDK: Docker
66
- echo - License: MIT
67
- echo - Connect to your GitHub repository
68
- echo.
69
- echo 4. Set environment variables in your Space settings:
70
- echo - GROQ_API_KEY
71
- echo - DATABASE_URL=sqlite+aiosqlite:///./dubsway_hf.db
72
- echo - SECRET_KEY
73
- echo - AWS_ACCESS_KEY_ID (if using S3)
74
- echo - AWS_SECRET_ACCESS_KEY (if using S3)
75
- echo - S3_BUCKET_NAME (if using S3)
76
- echo.
77
- echo 5. Monitor deployment in your Space
78
- echo.
79
- echo 📖 See HUGGINGFACE_DEPLOYMENT.md for detailed instructions
80
- echo.
81
-
82
- pause
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fix-pydantic-deploy.bat DELETED
@@ -1,37 +0,0 @@
1
- @echo off
2
- echo 🔧 Fixing Pydantic imports for Hugging Face deployment...
3
-
4
- echo.
5
- echo 📝 Updating Pydantic imports...
6
-
7
- REM Fix auth.py
8
- echo Fixing app/auth.py...
9
- powershell -Command "(Get-Content 'app/auth.py') -replace 'from pydantic\.v1 import', 'from pydantic import' | Set-Content 'app/auth.py'"
10
-
11
- REM Fix custom_chatbot.py
12
- echo Fixing app/agent/custom_chatbot.py...
13
- powershell -Command "(Get-Content 'app/agent/custom_chatbot.py') -replace 'from pydantic\.v1 import', 'from pydantic import' | Set-Content 'app/agent/custom_chatbot.py'"
14
-
15
- REM Fix pdf_ingestion.py
16
- echo Fixing app/pdf_ingestion.py...
17
- powershell -Command "(Get-Content 'app/pdf_ingestion.py') -replace 'from pydantic\.v1 import', 'from pydantic import' | Set-Content 'app/pdf_ingestion.py'"
18
-
19
- echo.
20
- echo ✅ Pydantic imports fixed!
21
-
22
- echo.
23
- echo 📝 Committing changes...
24
- git add .
25
- git commit -m "Fix Pydantic imports for Hugging Face deployment"
26
-
27
- echo.
28
- echo 🚀 Pushing to Hugging Face...
29
- git push space develop
30
-
31
- echo.
32
- echo ✅ Deployment completed!
33
- echo.
34
- echo 📍 Your Space URL: https://huggingface.co/spaces/peace2024/DubswayAgenticAI
35
- echo.
36
-
37
- pause
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fix_agentic_errors.bat DELETED
@@ -1,28 +0,0 @@
1
- @echo off
2
- echo ========================================
3
- echo Fixing Agentic System Errors
4
- echo ========================================
5
- echo.
6
-
7
- REM Activate virtual environment
8
- echo Activating virtual environment...
9
- call myenv31\Scripts\activate.bat
10
-
11
- REM Install missing dependencies
12
- echo Installing missing dependencies...
13
- pip install timm
14
-
15
- echo.
16
- echo ========================================
17
- echo Errors Fixed!
18
- echo ========================================
19
- echo.
20
- echo The following issues have been resolved:
21
- echo ✅ Missing timm library - INSTALLED
22
- echo ✅ PDF generation function - FIXED
23
- echo ✅ Enhanced analysis should now work properly
24
- echo.
25
- echo You can now run the agentic system:
26
- echo run_agentic.bat
27
- echo.
28
- pause
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fresh-deploy.bat DELETED
@@ -1,78 +0,0 @@
1
- @echo off
2
- echo 🚀 Creating fresh repository for Hugging Face deployment...
3
-
4
- echo.
5
- echo 📋 Creating backup of current files...
6
- if not exist "backup" mkdir backup
7
- xcopy /E /I /Y "app" "backup\app"
8
- xcopy /E /I /Y "worker" "backup\worker"
9
- copy "requirements-hf.txt" "backup\"
10
- copy "Dockerfile" "backup\"
11
- copy ".huggingface.yaml" "backup\"
12
- copy "env.example" "backup\"
13
- copy "*.md" "backup\"
14
- copy ".gitignore" "backup\"
15
-
16
- echo.
17
- echo 🧹 Creating clean deployment directory...
18
- if exist "deploy" rmdir /s /q "deploy"
19
- mkdir deploy
20
- cd deploy
21
-
22
- echo.
23
- echo 📝 Initializing new git repository...
24
- git init
25
-
26
- echo.
27
- echo 📋 Copying clean files...
28
- xcopy /E /I /Y "..\backup\app" "app\"
29
- xcopy /E /I /Y "..\backup\worker" "worker\"
30
- copy "..\backup\requirements-hf.txt" "."
31
- copy "..\backup\Dockerfile" "."
32
- copy "..\backup\.huggingface.yaml" "."
33
- copy "..\backup\env.example" "."
34
- copy "..\backup\.gitignore" "."
35
- copy "..\backup\*.md" "."
36
-
37
- echo.
38
- echo 🔗 Adding Hugging Face remote...
39
- git remote add space https://huggingface.co/spaces/peace2024/DubswayAgenticAI
40
-
41
- echo.
42
- echo 📝 Committing clean repository...
43
- git add .
44
- git commit -m "Initial clean deployment for Hugging Face Spaces"
45
-
46
- echo.
47
- echo 🚀 Pushing to Hugging Face...
48
- echo Choose your branch:
49
- echo 1. Push to develop branch
50
- echo 2. Push to main branch
51
- echo 3. Cancel
52
- echo.
53
- set /p choice="Enter choice (1-3): "
54
-
55
- if "%choice%"=="1" (
56
- echo Pushing to develop branch...
57
- git push space develop
58
- ) else if "%choice%"=="2" (
59
- echo Pushing to main branch...
60
- git push space main
61
- ) else (
62
- echo Cancelled.
63
- cd ..
64
- pause
65
- exit /b 0
66
- )
67
-
68
- echo.
69
- echo ✅ Fresh deployment completed!
70
- echo.
71
- echo 📍 Your Space URL will be:
72
- echo https://huggingface.co/spaces/peace2024/DubswayAgenticAI
73
- echo.
74
- echo 🔍 Monitor the build logs in your Space settings.
75
- echo.
76
-
77
- cd ..
78
- pause
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
push-to-hf.bat DELETED
@@ -1,49 +0,0 @@
1
- @echo off
2
- echo 🚀 Pushing to Hugging Face Spaces...
3
-
4
- echo.
5
- echo 🧹 Cleaning binary files...
6
-
7
- REM Remove vector store from git tracking
8
- git rm -r --cached vector_store/ 2>nul
9
- git rm --cached *.db 2>nul
10
- git rm --cached *.log 2>nul
11
-
12
- echo.
13
- echo 📝 Committing changes...
14
- git add .
15
- git commit -m "Clean repository for Hugging Face deployment"
16
-
17
- echo.
18
- echo 🔄 Pushing to Hugging Face...
19
- echo Choose your branch:
20
- echo 1. Push to develop branch
21
- echo 2. Create and push to main branch
22
- echo 3. Cancel
23
- echo.
24
- set /p choice="Enter choice (1-3): "
25
-
26
- if "%choice%"=="1" (
27
- echo Pushing to develop branch...
28
- git push space develop
29
- ) else if "%choice%"=="2" (
30
- echo Creating main branch...
31
- git checkout -b main
32
- echo Pushing to main branch...
33
- git push space main
34
- ) else (
35
- echo Cancelled.
36
- pause
37
- exit /b 0
38
- )
39
-
40
- echo.
41
- echo ✅ Push completed!
42
- echo.
43
- echo 📍 Your Space URL will be:
44
- echo https://huggingface.co/spaces/peace2024/DubswayAgenticAI
45
- echo.
46
- echo 🔍 Monitor the build logs in your Space settings.
47
- echo.
48
-
49
- pause
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
run_agentic.bat DELETED
@@ -1,43 +0,0 @@
1
- @echo off
2
- echo ========================================
3
- echo Dubsway Video AI - Agentic System Runner
4
- echo ========================================
5
- echo.
6
-
7
- REM Activate virtual environment
8
- echo Activating virtual environment...
9
- call myenv31\Scripts\activate.bat
10
-
11
- REM Check for Groq API key
12
- if "%GROQ_API_KEY%"=="" (
13
- echo.
14
- echo ========================================
15
- echo GROQ API KEY REQUIRED
16
- echo ========================================
17
- echo.
18
- echo Please set your Groq API key:
19
- echo 1. Get API key from: https://console.groq.com/
20
- echo 2. Set environment variable: set GROQ_API_KEY=your_key_here
21
- echo.
22
- echo Then run this script again.
23
- echo.
24
- pause
25
- exit /b 1
26
- )
27
-
28
- echo Groq API key found!
29
- echo.
30
-
31
- REM Run the agentic daemon
32
- echo Starting agentic video processing daemon...
33
- echo.
34
- echo The daemon will:
35
- echo - Process pending videos with enhanced analysis
36
- echo - Use Groq Llama3-8b-8192 for intelligent reasoning
37
- echo - Generate beautiful, comprehensive reports
38
- echo - Fall back to basic analysis if needed
39
- echo.
40
- echo Press Ctrl+C to stop the daemon
41
- echo.
42
-
43
- python -m worker.daemon
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
run_lightweight_agentic.bat DELETED
@@ -1,44 +0,0 @@
1
- @echo off
2
- echo ========================================
3
- echo Dubsway Video AI - Lightweight Agentic System
4
- echo ========================================
5
- echo.
6
-
7
- REM Activate virtual environment
8
- echo Activating virtual environment...
9
- call myenv31\Scripts\activate.bat
10
-
11
- REM Check for Groq API key
12
- if "%GROQ_API_KEY%"=="" (
13
- echo.
14
- echo ========================================
15
- echo GROQ API KEY REQUIRED
16
- echo ========================================
17
- echo.
18
- echo Please set your Groq API key:
19
- echo 1. Get API key from: https://console.groq.com/
20
- echo 2. Set environment variable: set GROQ_API_KEY=your_key_here
21
- echo.
22
- echo Then run this script again.
23
- echo.
24
- pause
25
- exit /b 1
26
- )
27
-
28
- echo Groq API key found!
29
- echo.
30
-
31
- REM Run the lightweight agentic daemon
32
- echo Starting lightweight agentic video processing daemon...
33
- echo.
34
- echo The lightweight daemon will:
35
- echo - Process videos with Groq Llama3-8b-8192 analysis
36
- echo - Skip heavy computer vision models (no hanging)
37
- echo - Provide intelligent text-based insights
38
- echo - Generate beautiful reports
39
- echo - Fall back to basic analysis if needed
40
- echo.
41
- echo Press Ctrl+C to stop the daemon
42
- echo.
43
-
44
- python -m worker.daemon
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
setup_agentic_system.bat DELETED
@@ -1,63 +0,0 @@
1
- @echo off
2
- echo ========================================
3
- echo Dubsway Video AI - Agentic System Setup
4
- echo ========================================
5
- echo.
6
-
7
- REM Check if virtual environment exists
8
- if not exist "myenv31" (
9
- echo Creating virtual environment...
10
- python -m venv myenv31
11
- )
12
-
13
- REM Activate virtual environment
14
- echo Activating virtual environment...
15
- call myenv31\Scripts\activate.bat
16
-
17
- REM Install dependencies
18
- echo Installing dependencies...
19
- pip install -r requirements.txt
20
-
21
- REM Install Groq specifically
22
- echo Installing Groq integration...
23
- pip install langchain-groq
24
-
25
- REM Check for Groq API key
26
- echo.
27
- echo Checking for Groq API key...
28
- if "%GROQ_API_KEY%"=="" (
29
- echo.
30
- echo ========================================
31
- echo GROQ API KEY REQUIRED
32
- echo ========================================
33
- echo.
34
- echo To use the agentic system, you need a Groq API key:
35
- echo 1. Visit: https://console.groq.com/
36
- echo 2. Sign up and get your API key
37
- echo 3. Set the environment variable:
38
- echo set GROQ_API_KEY=your_key_here
39
- echo.
40
- echo Or add it to your .env file:
41
- echo GROQ_API_KEY=your_key_here
42
- echo.
43
- pause
44
- ) else (
45
- echo Groq API key found!
46
- )
47
-
48
- REM Run test
49
- echo.
50
- echo Running system test...
51
- python test_agentic_system.py
52
-
53
- echo.
54
- echo ========================================
55
- echo Setup Complete!
56
- echo ========================================
57
- echo.
58
- echo To run the agentic system:
59
- echo 1. Make sure GROQ_API_KEY is set
60
- echo 2. Run: python -m worker.daemon
61
- echo 3. Or use: start-server.bat
62
- echo.
63
- pause
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_agentic_system.py DELETED
@@ -1,180 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test script for the agentic video analysis system with Groq integration
4
- """
5
- import asyncio
6
- import os
7
- import sys
8
- from pathlib import Path
9
-
10
- # Add project root to Python path
11
- sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
12
-
13
- async def test_groq_integration():
14
- """Test Groq integration and basic functionality"""
15
- print("🧪 Testing Groq Integration for Agentic Video Analysis")
16
- print("=" * 60)
17
-
18
- # Check for Groq API key
19
- groq_api_key = os.getenv("GROQ_API_KEY")
20
- if not groq_api_key:
21
- print("❌ GROQ_API_KEY environment variable not found!")
22
- print("Please set your Groq API key:")
23
- print("1. Get API key from: https://console.groq.com/")
24
- print("2. Set environment variable: GROQ_API_KEY=your_key_here")
25
- return False
26
-
27
- print("✅ GROQ_API_KEY found")
28
-
29
- try:
30
- # Test Groq import
31
- from langchain_groq import ChatGroq
32
- print("✅ langchain-groq imported successfully")
33
-
34
- # Test Groq connection
35
- llm = ChatGroq(
36
- groq_api_key=groq_api_key,
37
- model_name="llama3-8b-8192",
38
- temperature=0.1,
39
- max_tokens=100
40
- )
41
-
42
- # Simple test
43
- response = await llm.ainvoke("Say 'Hello from Groq!'")
44
- print(f"✅ Groq test successful: {response.content}")
45
-
46
- except ImportError as e:
47
- print(f"❌ Failed to import langchain-groq: {e}")
48
- print("Please install: pip install langchain-groq")
49
- return False
50
- except Exception as e:
51
- print(f"❌ Groq test failed: {e}")
52
- return False
53
-
54
- return True
55
-
56
- async def test_enhanced_analysis():
57
- """Test enhanced analysis components"""
58
- print("\n🔍 Testing Enhanced Analysis Components")
59
- print("=" * 60)
60
-
61
- try:
62
- # Test imports
63
- from app.utils.enhanced_analysis import MultiModalAnalyzer
64
- print("✅ Enhanced analysis imports successful")
65
-
66
- # Test analyzer initialization
67
- groq_api_key = os.getenv("GROQ_API_KEY")
68
- analyzer = MultiModalAnalyzer(groq_api_key=groq_api_key)
69
- print("✅ MultiModalAnalyzer initialized successfully")
70
-
71
- # Test agent creation
72
- if analyzer.agent:
73
- print("✅ Agent created successfully")
74
- else:
75
- print("❌ Agent creation failed")
76
- return False
77
-
78
- except Exception as e:
79
- print(f"❌ Enhanced analysis test failed: {e}")
80
- return False
81
-
82
- return True
83
-
84
- async def test_agentic_integration():
85
- """Test agentic integration"""
86
- print("\n🤖 Testing Agentic Integration")
87
- print("=" * 60)
88
-
89
- try:
90
- from app.utils.agentic_integration import AgenticVideoProcessor, MCPToolManager
91
- print("✅ Agentic integration imports successful")
92
-
93
- # Test processor initialization
94
- groq_api_key = os.getenv("GROQ_API_KEY")
95
- processor = AgenticVideoProcessor(enable_enhanced_analysis=True, groq_api_key=groq_api_key)
96
- print("✅ AgenticVideoProcessor initialized successfully")
97
-
98
- # Test MCP tool manager
99
- tool_manager = MCPToolManager(groq_api_key=groq_api_key)
100
- print("✅ MCPToolManager initialized successfully")
101
-
102
- # Test tool registration
103
- if tool_manager.tools:
104
- print(f"✅ {len(tool_manager.tools)} tools registered")
105
- else:
106
- print("❌ No tools registered")
107
- return False
108
-
109
- except Exception as e:
110
- print(f"❌ Agentic integration test failed: {e}")
111
- return False
112
-
113
- return True
114
-
115
- async def test_dependencies():
116
- """Test all required dependencies"""
117
- print("\n📦 Testing Dependencies")
118
- print("=" * 60)
119
-
120
- dependencies = [
121
- ("opencv-python", "cv2"),
122
- ("pillow", "PIL"),
123
- ("torch", "torch"),
124
- ("transformers", "transformers"),
125
- ("faster_whisper", "faster_whisper"),
126
- ("langchain", "langchain"),
127
- ("langchain_groq", "langchain_groq"),
128
- ("duckduckgo-search", "duckduckgo_search"),
129
- ("wikipedia-api", "wikipedia"),
130
- ]
131
-
132
- all_good = True
133
- for package_name, import_name in dependencies:
134
- try:
135
- __import__(import_name)
136
- print(f"✅ {package_name}")
137
- except ImportError:
138
- print(f"❌ {package_name} - missing")
139
- all_good = False
140
-
141
- return all_good
142
-
143
- async def main():
144
- """Main test function"""
145
- print("🚀 Dubsway Video AI - Agentic System Test")
146
- print("=" * 60)
147
-
148
- # Test dependencies first
149
- deps_ok = await test_dependencies()
150
- if not deps_ok:
151
- print("\n❌ Some dependencies are missing. Please install them:")
152
- print("pip install -r requirements.txt")
153
- return False
154
-
155
- # Test Groq integration
156
- groq_ok = await test_groq_integration()
157
- if not groq_ok:
158
- return False
159
-
160
- # Test enhanced analysis
161
- enhanced_ok = await test_enhanced_analysis()
162
- if not enhanced_ok:
163
- return False
164
-
165
- # Test agentic integration
166
- agentic_ok = await test_agentic_integration()
167
- if not agentic_ok:
168
- return False
169
-
170
- print("\n🎉 All tests passed! Your agentic system is ready to use.")
171
- print("\n📋 Next steps:")
172
- print("1. Update your worker/daemon.py to use agentic analysis")
173
- print("2. Set GROQ_API_KEY environment variable")
174
- print("3. Run your daemon with enhanced capabilities")
175
-
176
- return True
177
-
178
- if __name__ == "__main__":
179
- success = asyncio.run(main())
180
- sys.exit(0 if success else 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
worker/daemon.py DELETED
@@ -1,210 +0,0 @@
1
- import asyncio
2
- import os
3
- import time
4
- import signal
5
- import sys
6
- from datetime import datetime
7
- import traceback
8
- import logging
9
-
10
- from sqlalchemy.future import select
11
- from sqlalchemy.ext.asyncio import AsyncSession
12
- from sqlalchemy.exc import SQLAlchemyError
13
-
14
- from app.database import AsyncSessionLocal, init_db, close_db
15
- from app.models import VideoUpload
16
- from app.utils import whisper_llm, pdf, s3, lightweight_agentic
17
-
18
- # Setup logging with UTF-8 encoding for Windows compatibility
19
- logging.basicConfig(
20
- level=logging.INFO,
21
- format='[%(asctime)s] %(levelname)s - %(name)s - %(message)s',
22
- handlers=[
23
- logging.StreamHandler(sys.stdout), # Use stdout for better encoding
24
- logging.FileHandler('worker.log', encoding='utf-8')
25
- ]
26
- )
27
- logger = logging.getLogger("worker.daemon")
28
-
29
- POLL_INTERVAL = 200 # seconds
30
- SHUTDOWN_EVENT = asyncio.Event()
31
-
32
-
33
- def signal_handler(signum, frame):
34
- """Handle shutdown signals gracefully"""
35
- logger.info(f"Received signal {signum}, initiating graceful shutdown...")
36
- SHUTDOWN_EVENT.set()
37
-
38
-
39
- async def process_pending_videos():
40
- """Process all pending video uploads"""
41
- async with AsyncSessionLocal() as session:
42
- try:
43
- # Query for pending videos
44
- result = await session.execute(
45
- select(VideoUpload).where(VideoUpload.status == "pending")
46
- )
47
- pending_videos = result.scalars().all()
48
-
49
- if not pending_videos:
50
- logger.info("No pending videos found")
51
- return
52
-
53
- logger.info(f"Found {len(pending_videos)} pending videos to process")
54
-
55
- for video in pending_videos:
56
- if SHUTDOWN_EVENT.is_set():
57
- logger.info("Shutdown requested, stopping video processing")
58
- break
59
-
60
- logger.info(f"Processing video ID {video.id} for user {video.user_id}")
61
-
62
- try:
63
- # Update status to processing
64
- video.status = "processing"
65
- video.updated_at = datetime.utcnow()
66
- await session.commit()
67
-
68
- # Process with Lightweight Agentic Analysis (Groq + Llama3)
69
- try:
70
- transcription, summary = await lightweight_agentic.analyze_with_lightweight_agentic(
71
- video_url=video.video_url,
72
- user_id=video.user_id,
73
- db=session
74
- )
75
- logger.info(f"Lightweight agentic analysis completed for video {video.id}")
76
- except Exception as agentic_error:
77
- logger.warning(f"Lightweight agentic analysis failed, falling back to basic Whisper: {agentic_error}")
78
- transcription, summary = await whisper_llm.analyze(
79
- video_url=video.video_url,
80
- user_id=video.user_id,
81
- db=session
82
- )
83
- logger.info(f"Basic Whisper analysis completed for video {video.id}")
84
-
85
- except Exception as e:
86
- logger.error(f"Whisper failed for video {video.id}: {e}")
87
- logger.debug(traceback.format_exc())
88
-
89
- # Update status to failed
90
- video.status = "failed"
91
- video.updated_at = datetime.utcnow()
92
- await session.commit()
93
- continue
94
-
95
- try:
96
- # Generate PDF
97
- pdf_bytes = pdf.generate(transcription, summary)
98
- logger.info(f"PDF generation completed for video {video.id}")
99
- except Exception as e:
100
- logger.error(f"PDF generation failed for video {video.id}: {e}")
101
- logger.debug(traceback.format_exc())
102
-
103
- video.status = "failed"
104
- video.updated_at = datetime.utcnow()
105
- await session.commit()
106
- continue
107
-
108
- try:
109
- # Upload to S3
110
- pdf_key = f"pdfs/{video.id}.pdf"
111
- pdf_url = s3.upload_pdf_bytes(pdf_bytes, pdf_key)
112
- logger.info(f"S3 upload completed for video {video.id}")
113
- except Exception as e:
114
- logger.error(f"Upload to S3 failed for video {video.id}: {e}")
115
- logger.debug(traceback.format_exc())
116
-
117
- video.status = "failed"
118
- video.updated_at = datetime.utcnow()
119
- await session.commit()
120
- continue
121
-
122
- try:
123
- # Mark as completed
124
- video.status = "completed"
125
- video.pdf_url = pdf_url
126
- video.updated_at = datetime.utcnow()
127
- await session.commit()
128
- logger.info(f"Successfully completed video {video.id}")
129
-
130
- except SQLAlchemyError as e:
131
- logger.error(f"DB commit failed for video {video.id}: {e}")
132
- logger.debug(traceback.format_exc())
133
- await session.rollback()
134
-
135
- except SQLAlchemyError as e:
136
- logger.error(f"Database error: {e}")
137
- logger.debug(traceback.format_exc())
138
- except Exception as e:
139
- logger.error(f"Unexpected error in process_pending_videos: {e}")
140
- logger.debug(traceback.format_exc())
141
-
142
-
143
- async def run_worker():
144
- """Main worker loop"""
145
- logger.info("Async worker daemon started...")
146
-
147
- # Initialize database
148
- try:
149
- await init_db()
150
- logger.info("Database initialized successfully")
151
- except Exception as e:
152
- logger.error(f"Failed to initialize database: {e}")
153
- return
154
-
155
- cycle_count = 0
156
- while not SHUTDOWN_EVENT.is_set():
157
- cycle_count += 1
158
- logger.info(f"Worker cycle {cycle_count} - Checking for pending videos...")
159
-
160
- try:
161
- await process_pending_videos()
162
- except Exception as e:
163
- logger.error(f"Worker loop error: {e}")
164
- logger.debug(traceback.format_exc())
165
-
166
- # Wait for next cycle or shutdown
167
- try:
168
- await asyncio.wait_for(SHUTDOWN_EVENT.wait(), timeout=POLL_INTERVAL)
169
- except asyncio.TimeoutError:
170
- # Normal timeout, continue to next cycle
171
- pass
172
- except Exception as e:
173
- logger.error(f"Error in worker wait: {e}")
174
- break
175
-
176
- logger.info("Worker loop stopped, cleaning up...")
177
-
178
- # Cleanup
179
- try:
180
- await close_db()
181
- logger.info("Database connections closed")
182
- except Exception as e:
183
- logger.error(f"Error during cleanup: {e}")
184
-
185
-
186
- async def main():
187
- """Main entry point with signal handling"""
188
- # Setup signal handlers
189
- signal.signal(signal.SIGINT, signal_handler)
190
- signal.signal(signal.SIGTERM, signal_handler)
191
-
192
- try:
193
- await run_worker()
194
- except KeyboardInterrupt:
195
- logger.info("Keyboard interrupt received")
196
- except Exception as e:
197
- logger.error(f"Fatal error in main: {e}")
198
- logger.debug(traceback.format_exc())
199
- finally:
200
- logger.info("Worker daemon shutdown complete")
201
-
202
-
203
- if __name__ == "__main__":
204
- try:
205
- asyncio.run(main())
206
- except KeyboardInterrupt:
207
- logger.info("Worker daemon interrupted by user")
208
- except Exception as e:
209
- logger.error(f"Fatal error: {e}")
210
- sys.exit(1)