Spaces:

peace2024
/

dubswayAgenticV2

Building

App Files Files Community

peace2024 commited on Aug 2

Commit

a408f4b

1 Parent(s): ed6b1d2

app clean

Browse files

Files changed (15) hide show

.gitignore +4 -0
app/utils/online_vector_store.py +181 -0
app/utils/whisper_llm.py +1 -1
clean-for-deployment.bat +0 -69
clean-repo-history.bat +0 -56
deploy-to-hf.bat +0 -82
fix-pydantic-deploy.bat +0 -37
fix_agentic_errors.bat +0 -28
fresh-deploy.bat +0 -78
push-to-hf.bat +0 -49
run_agentic.bat +0 -43
run_lightweight_agentic.bat +0 -44
setup_agentic_system.bat +0 -63
test_agentic_system.py +0 -180
worker/daemon.py +0 -210

.gitignore CHANGED Viewed

@@ -5,6 +5,10 @@ __pycache__/
 *.pyd
 # Virtual environments
 env/
 venv/
 myenv/

 *.pyd
 # Virtual environments
+aienv/
+aienv/
+.aienv/
+.aienv/
 env/
 venv/
 myenv/

app/utils/online_vector_store.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import os
+import logging
+from typing import List, Optional
+from langchain_core.documents import Document
+from langchain_openai import OpenAIEmbeddings
+from langchain_community.vectorstores import Pinecone
+from pinecone import Pinecone, ServerlessSpec
+# Setup logger
+logger = logging.getLogger("app.utils.online_vector_store")
+logger.setLevel(logging.INFO)
+if not logger.handlers:
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter("[%(asctime)s] %(levelname)s - %(message)s")
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+class OnlineVectorStore:
+    def __init__(self):
+        self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
+        self.pinecone_environment = os.getenv("PINECONE_ENVIRONMENT", "gcp-starter")
+        self.index_name = "dubsway-video-ai"
+        if not self.pinecone_api_key:
+            logger.warning("PINECONE_API_KEY not found. Using fallback local storage.")
+            self.use_pinecone = False
+        else:
+            self.use_pinecone = True
+            self._initialize_pinecone()
+    def _initialize_pinecone(self):
+        """Initialize Pinecone client and create index if needed."""
+        try:
+            pc = Pinecone(api_key=self.pinecone_api_key)
+            # Check if index exists
+            if self.index_name not in pc.list_indexes().names():
+                logger.info(f"Creating Pinecone index: {self.index_name}")
+                pc.create_index(
+                    name=self.index_name,
+                    dimension=1536,  # OpenAI embeddings dimension
+                    metric="cosine",
+                    spec=ServerlessSpec(
+                        cloud="aws",
+                        region="us-east-1"
+                    )
+                )
+                logger.info(f"Pinecone index {self.index_name} created successfully")
+            else:
+                logger.info(f"Using existing Pinecone index: {self.index_name}")
+        except Exception as e:
+            logger.error(f"Failed to initialize Pinecone: {e}")
+            self.use_pinecone = False
+    def add_documents(self, documents: List[Document], user_id: int) -> bool:
+        """Add documents to the vector store."""
+        try:
+            if not documents:
+                logger.warning("No documents to add")
+                return False
+            # Add user_id metadata to each document
+            for doc in documents:
+                if not hasattr(doc, 'metadata'):
+                    doc.metadata = {}
+                doc.metadata['user_id'] = user_id
+                doc.metadata['source'] = 'video_analysis'
+            if self.use_pinecone:
+                return self._add_to_pinecone(documents, user_id)
+            else:
+                logger.warning("Pinecone not available, skipping vector storage")
+                return False
+        except Exception as e:
+            logger.error(f"Failed to add documents to vector store: {e}")
+            return False
+    def _add_to_pinecone(self, documents: List[Document], user_id: int) -> bool:
+        """Add documents to Pinecone."""
+        try:
+            embeddings = OpenAIEmbeddings()
+            # Create Pinecone vector store
+            vector_store = Pinecone.from_documents(
+                documents=documents,
+                embedding=embeddings,
+                index_name=self.index_name,
+                namespace=f"user_{user_id}"
+            )
+            logger.info(f"Successfully added {len(documents)} documents to Pinecone for user {user_id}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to add documents to Pinecone: {e}")
+            return False
+    def search(self, query: str, user_id: int, k: int = 5) -> List[Document]:
+        """Search for similar documents."""
+        try:
+            if not self.use_pinecone:
+                logger.warning("Pinecone not available, returning empty results")
+                return []
+            embeddings = OpenAIEmbeddings()
+            # Create Pinecone vector store for searching
+            vector_store = PineconeVectorStore.from_existing_index(
+                index_name=self.index_name,
+                embedding=embeddings,
+                namespace=f"user_{user_id}"
+            )
+            # Search for similar documents
+            results = vector_store.similarity_search(
+                query=query,
+                k=k,
+                filter={"user_id": user_id}
+            )
+            logger.info(f"Found {len(results)} similar documents for user {user_id}")
+            return results
+        except Exception as e:
+            logger.error(f"Failed to search vector store: {e}")
+            return []
+    def get_user_documents(self, user_id: int, limit: int = 50) -> List[Document]:
+        """Get all documents for a specific user."""
+        try:
+            if not self.use_pinecone:
+                logger.warning("Pinecone not available, returning empty results")
+                return []
+            embeddings = OpenAIEmbeddings()
+            # Create Pinecone vector store for searching
+            vector_store = PineconeVectorStore.from_existing_index(
+                index_name=self.index_name,
+                embedding=embeddings,
+                namespace=f"user_{user_id}"
+            )
+            # Get all documents for the user
+            results = vector_store.similarity_search(
+                query="",  # Empty query to get all documents
+                k=limit,
+                filter={"user_id": user_id}
+            )
+            logger.info(f"Retrieved {len(results)} documents for user {user_id}")
+            return results
+        except Exception as e:
+            logger.error(f"Failed to get user documents: {e}")
+            return []
+    def delete_user_documents(self, user_id: int) -> bool:
+        """Delete all documents for a specific user."""
+        try:
+            if not self.use_pinecone:
+                logger.warning("Pinecone not available, skipping deletion")
+                return False
+            pc = Pinecone(api_key=self.pinecone_api_key)
+            index = pc.Index(self.index_name)
+            # Delete all vectors in the user's namespace
+            index.delete(namespace=f"user_{user_id}")
+            logger.info(f"Successfully deleted all documents for user {user_id}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to delete user documents: {e}")
+            return False
+# Global instance
+vector_store = OnlineVectorStore()

app/utils/whisper_llm.py CHANGED Viewed

@@ -114,7 +114,7 @@ def summarize_in_chunks(text, chunk_size=1024, overlap=200):
             )
             return final_result[0]['summary_text']
         except Exception as e:
-            logger.error(f"Final summarization failed: {e}")
             return combined_summary[:1500] + "..." if len(combined_summary) > 1500 else combined_summary
     return combined_summary

             )
             return final_result[0]['summary_text']
         except Exception as e:
+            logger.error(f"Final sum marization failed: {e}")
             return combined_summary[:1500] + "..." if len(combined_summary) > 1500 else combined_summary
     return combined_summary

clean-for-deployment.bat DELETED Viewed

@@ -1,69 +0,0 @@
-@echo off
-echo 🧹 Cleaning repository for Hugging Face deployment...
-echo.
-echo 📋 Removing binary files and vector stores...
-REM Remove vector store directory
-if exist "vector_store" (
-    echo Removing vector_store directory...
-    rmdir /s /q "vector_store"
-    echo ✅ vector_store removed
-) else (
-    echo ℹ️ vector_store directory not found
-)
-REM Remove database files
-for %%f in (*.db *.sqlite *.sqlite3) do (
-    if exist "%%f" (
-        echo Removing %%f...
-        del "%%f"
-        echo ✅ %%f removed
-    )
-)
-REM Remove FAISS files
-for %%f in (*.faiss *.index *.bin) do (
-    if exist "%%f" (
-        echo Removing %%f...
-        del "%%f"
-        echo ✅ %%f removed
-    )
-)
-REM Remove log files
-for %%f in (*.log) do (
-    if exist "%%f" (
-        echo Removing %%f...
-        del "%%f"
-        echo ✅ %%f removed
-    )
-)
-echo.
-echo 🔄 Updating git...
-REM Remove tracked files that should be ignored
-git rm -r --cached vector_store/ 2>nul
-git rm --cached *.db 2>nul
-git rm --cached *.sqlite 2>nul
-git rm --cached *.sqlite3 2>nul
-git rm --cached *.faiss 2>nul
-git rm --cached *.log 2>nul
-echo.
-echo 📝 Committing changes...
-git add .
-git commit -m "Clean repository for Hugging Face deployment - remove binary files"
-echo.
-echo ✅ Repository cleaned! You can now push to Hugging Face:
-echo.
-echo git push space develop
-echo.
-echo Or create a new branch:
-echo git checkout -b main
-echo git push space main
-echo.
-pause

clean-repo-history.bat DELETED Viewed

@@ -1,56 +0,0 @@
-@echo off
-echo 🧹 Deep cleaning repository history...
-echo.
-echo ⚠️  WARNING: This will rewrite git history!
-echo This will remove all binary files from the entire git history.
-echo.
-set /p confirm="Are you sure you want to continue? (y/N): "
-if /i not "%confirm%"=="y" (
-    echo Cancelled.
-    pause
-    exit /b 0
-)
-echo.
-echo 🗑️ Removing binary files from git history...
-REM Remove vector store directory from entire history
-git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch vector_store/" --prune-empty --tag-name-filter cat -- --all
-REM Remove database files from entire history
-git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.db" --prune-empty --tag-name-filter cat -- --all
-git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.sqlite" --prune-empty --tag-name-filter cat -- --all
-git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.sqlite3" --prune-empty --tag-name-filter cat -- --all
-REM Remove FAISS files from entire history
-git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.faiss" --prune-empty --tag-name-filter cat -- --all
-git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.index" --prune-empty --tag-name-filter cat -- --all
-REM Remove log files from entire history
-git filter-branch --force --index-filter "git rm -rf --cached --ignore-unmatch *.log" --prune-empty --tag-name-filter cat -- --all
-echo.
-echo 🧹 Cleaning up...
-git for-each-ref --format="delete %(refname)" refs/original | git update-ref --stdin
-git reflog expire --expire=now --all
-git gc --prune=now --aggressive
-echo.
-echo ✅ Repository history cleaned!
-echo.
-echo 📝 Now commit the current state...
-git add .
-git commit -m "Clean repository for Hugging Face deployment"
-echo.
-echo 🚀 Ready to push! Run:
-echo git push --force space develop
-echo.
-echo Or create a new branch:
-echo git checkout -b main
-echo git push space main
-echo.
-pause

deploy-to-hf.bat DELETED Viewed

@@ -1,82 +0,0 @@
-@echo off
-echo 🚀 Preparing Dubsway Video AI for Hugging Face Deployment...
-echo.
-echo 📋 Checking prerequisites...
-REM Check if git is available
-git --version >nul 2>&1
-if errorlevel 1 (
-    echo ❌ Git is not installed or not in PATH
-    pause
-    exit /b 1
-)
-REM Check if we're in a git repository
-git status >nul 2>&1
-if errorlevel 1 (
-    echo ❌ Not in a git repository. Please run this from your project root.
-    pause
-    exit /b 1
-)
-echo ✅ Git repository found
-REM Check for required files
-if not exist ".huggingface.yaml" (
-    echo ❌ .huggingface.yaml not found
-    pause
-    exit /b 1
-)
-if not exist "Dockerfile" (
-    echo ❌ Dockerfile not found
-    pause
-    exit /b 1
-)
-if not exist "requirements-hf.txt" (
-    echo ❌ requirements-hf.txt not found
-    pause
-    exit /b 1
-)
-echo ✅ All required files found
-echo.
-echo 🔄 Checking git status...
-git status --porcelain
-echo.
-echo 📝 Current branch:
-git branch --show-current
-echo.
-echo 🚀 Ready to deploy! Follow these steps:
-echo.
-echo 1. Push your changes to GitHub:
-echo    git add .
-echo    git commit -m "Deploy to Hugging Face Spaces"
-echo    git push origin main
-echo.
-echo 2. Go to https://huggingface.co/spaces
-echo.
-echo 3. Create a new Space with these settings:
-echo    - SDK: Docker
-echo    - License: MIT
-echo    - Connect to your GitHub repository
-echo.
-echo 4. Set environment variables in your Space settings:
-echo    - GROQ_API_KEY
-echo    - DATABASE_URL=sqlite+aiosqlite:///./dubsway_hf.db
-echo    - SECRET_KEY
-echo    - AWS_ACCESS_KEY_ID (if using S3)
-echo    - AWS_SECRET_ACCESS_KEY (if using S3)
-echo    - S3_BUCKET_NAME (if using S3)
-echo.
-echo 5. Monitor deployment in your Space
-echo.
-echo 📖 See HUGGINGFACE_DEPLOYMENT.md for detailed instructions
-echo.
-pause

fix-pydantic-deploy.bat DELETED Viewed

@@ -1,37 +0,0 @@
-@echo off
-echo 🔧 Fixing Pydantic imports for Hugging Face deployment...
-echo.
-echo 📝 Updating Pydantic imports...
-REM Fix auth.py
-echo Fixing app/auth.py...
-powershell -Command "(Get-Content 'app/auth.py') -replace 'from pydantic\.v1 import', 'from pydantic import' | Set-Content 'app/auth.py'"
-REM Fix custom_chatbot.py
-echo Fixing app/agent/custom_chatbot.py...
-powershell -Command "(Get-Content 'app/agent/custom_chatbot.py') -replace 'from pydantic\.v1 import', 'from pydantic import' | Set-Content 'app/agent/custom_chatbot.py'"
-REM Fix pdf_ingestion.py
-echo Fixing app/pdf_ingestion.py...
-powershell -Command "(Get-Content 'app/pdf_ingestion.py') -replace 'from pydantic\.v1 import', 'from pydantic import' | Set-Content 'app/pdf_ingestion.py'"
-echo.
-echo ✅ Pydantic imports fixed!
-echo.
-echo 📝 Committing changes...
-git add .
-git commit -m "Fix Pydantic imports for Hugging Face deployment"
-echo.
-echo 🚀 Pushing to Hugging Face...
-git push space develop
-echo.
-echo ✅ Deployment completed!
-echo.
-echo 📍 Your Space URL: https://huggingface.co/spaces/peace2024/DubswayAgenticAI
-echo.
-pause

fix_agentic_errors.bat DELETED Viewed

@@ -1,28 +0,0 @@
-@echo off
-echo ========================================
-echo Fixing Agentic System Errors
-echo ========================================
-echo.
-REM Activate virtual environment
-echo Activating virtual environment...
-call myenv31\Scripts\activate.bat
-REM Install missing dependencies
-echo Installing missing dependencies...
-pip install timm
-echo.
-echo ========================================
-echo Errors Fixed!
-echo ========================================
-echo.
-echo The following issues have been resolved:
-echo ✅ Missing timm library - INSTALLED
-echo ✅ PDF generation function - FIXED
-echo ✅ Enhanced analysis should now work properly
-echo.
-echo You can now run the agentic system:
-echo run_agentic.bat
-echo.
-pause

fresh-deploy.bat DELETED Viewed

@@ -1,78 +0,0 @@
-@echo off
-echo 🚀 Creating fresh repository for Hugging Face deployment...
-echo.
-echo 📋 Creating backup of current files...
-if not exist "backup" mkdir backup
-xcopy /E /I /Y "app" "backup\app"
-xcopy /E /I /Y "worker" "backup\worker"
-copy "requirements-hf.txt" "backup\"
-copy "Dockerfile" "backup\"
-copy ".huggingface.yaml" "backup\"
-copy "env.example" "backup\"
-copy "*.md" "backup\"
-copy ".gitignore" "backup\"
-echo.
-echo 🧹 Creating clean deployment directory...
-if exist "deploy" rmdir /s /q "deploy"
-mkdir deploy
-cd deploy
-echo.
-echo 📝 Initializing new git repository...
-git init
-echo.
-echo 📋 Copying clean files...
-xcopy /E /I /Y "..\backup\app" "app\"
-xcopy /E /I /Y "..\backup\worker" "worker\"
-copy "..\backup\requirements-hf.txt" "."
-copy "..\backup\Dockerfile" "."
-copy "..\backup\.huggingface.yaml" "."
-copy "..\backup\env.example" "."
-copy "..\backup\.gitignore" "."
-copy "..\backup\*.md" "."
-echo.
-echo 🔗 Adding Hugging Face remote...
-git remote add space https://huggingface.co/spaces/peace2024/DubswayAgenticAI
-echo.
-echo 📝 Committing clean repository...
-git add .
-git commit -m "Initial clean deployment for Hugging Face Spaces"
-echo.
-echo 🚀 Pushing to Hugging Face...
-echo Choose your branch:
-echo 1. Push to develop branch
-echo 2. Push to main branch
-echo 3. Cancel
-echo.
-set /p choice="Enter choice (1-3): "
-if "%choice%"=="1" (
-    echo Pushing to develop branch...
-    git push space develop
-) else if "%choice%"=="2" (
-    echo Pushing to main branch...
-    git push space main
-) else (
-    echo Cancelled.
-    cd ..
-    pause
-    exit /b 0
-)
-echo.
-echo ✅ Fresh deployment completed!
-echo.
-echo 📍 Your Space URL will be:
-echo https://huggingface.co/spaces/peace2024/DubswayAgenticAI
-echo.
-echo 🔍 Monitor the build logs in your Space settings.
-echo.
-cd ..
-pause

push-to-hf.bat DELETED Viewed

@@ -1,49 +0,0 @@
-@echo off
-echo 🚀 Pushing to Hugging Face Spaces...
-echo.
-echo 🧹 Cleaning binary files...
-REM Remove vector store from git tracking
-git rm -r --cached vector_store/ 2>nul
-git rm --cached *.db 2>nul
-git rm --cached *.log 2>nul
-echo.
-echo 📝 Committing changes...
-git add .
-git commit -m "Clean repository for Hugging Face deployment"
-echo.
-echo 🔄 Pushing to Hugging Face...
-echo Choose your branch:
-echo 1. Push to develop branch
-echo 2. Create and push to main branch
-echo 3. Cancel
-echo.
-set /p choice="Enter choice (1-3): "
-if "%choice%"=="1" (
-    echo Pushing to develop branch...
-    git push space develop
-) else if "%choice%"=="2" (
-    echo Creating main branch...
-    git checkout -b main
-    echo Pushing to main branch...
-    git push space main
-) else (
-    echo Cancelled.
-    pause
-    exit /b 0
-)
-echo.
-echo ✅ Push completed!
-echo.
-echo 📍 Your Space URL will be:
-echo https://huggingface.co/spaces/peace2024/DubswayAgenticAI
-echo.
-echo 🔍 Monitor the build logs in your Space settings.
-echo.
-pause

run_agentic.bat DELETED Viewed

@@ -1,43 +0,0 @@
-@echo off
-echo ========================================
-echo Dubsway Video AI - Agentic System Runner
-echo ========================================
-echo.
-REM Activate virtual environment
-echo Activating virtual environment...
-call myenv31\Scripts\activate.bat
-REM Check for Groq API key
-if "%GROQ_API_KEY%"=="" (
-    echo.
-    echo ========================================
-    echo GROQ API KEY REQUIRED
-    echo ========================================
-    echo.
-    echo Please set your Groq API key:
-    echo 1. Get API key from: https://console.groq.com/
-    echo 2. Set environment variable: set GROQ_API_KEY=your_key_here
-    echo.
-    echo Then run this script again.
-    echo.
-    pause
-    exit /b 1
-)
-echo Groq API key found!
-echo.
-REM Run the agentic daemon
-echo Starting agentic video processing daemon...
-echo.
-echo The daemon will:
-echo - Process pending videos with enhanced analysis
-echo - Use Groq Llama3-8b-8192 for intelligent reasoning
-echo - Generate beautiful, comprehensive reports
-echo - Fall back to basic analysis if needed
-echo.
-echo Press Ctrl+C to stop the daemon
-echo.
-python -m worker.daemon

run_lightweight_agentic.bat DELETED Viewed

@@ -1,44 +0,0 @@
-@echo off
-echo ========================================
-echo Dubsway Video AI - Lightweight Agentic System
-echo ========================================
-echo.
-REM Activate virtual environment
-echo Activating virtual environment...
-call myenv31\Scripts\activate.bat
-REM Check for Groq API key
-if "%GROQ_API_KEY%"=="" (
-    echo.
-    echo ========================================
-    echo GROQ API KEY REQUIRED
-    echo ========================================
-    echo.
-    echo Please set your Groq API key:
-    echo 1. Get API key from: https://console.groq.com/
-    echo 2. Set environment variable: set GROQ_API_KEY=your_key_here
-    echo.
-    echo Then run this script again.
-    echo.
-    pause
-    exit /b 1
-)
-echo Groq API key found!
-echo.
-REM Run the lightweight agentic daemon
-echo Starting lightweight agentic video processing daemon...
-echo.
-echo The lightweight daemon will:
-echo - Process videos with Groq Llama3-8b-8192 analysis
-echo - Skip heavy computer vision models (no hanging)
-echo - Provide intelligent text-based insights
-echo - Generate beautiful reports
-echo - Fall back to basic analysis if needed
-echo.
-echo Press Ctrl+C to stop the daemon
-echo.
-python -m worker.daemon

setup_agentic_system.bat DELETED Viewed

@@ -1,63 +0,0 @@
-@echo off
-echo ========================================
-echo Dubsway Video AI - Agentic System Setup
-echo ========================================
-echo.
-REM Check if virtual environment exists
-if not exist "myenv31" (
-    echo Creating virtual environment...
-    python -m venv myenv31
-)
-REM Activate virtual environment
-echo Activating virtual environment...
-call myenv31\Scripts\activate.bat
-REM Install dependencies
-echo Installing dependencies...
-pip install -r requirements.txt
-REM Install Groq specifically
-echo Installing Groq integration...
-pip install langchain-groq
-REM Check for Groq API key
-echo.
-echo Checking for Groq API key...
-if "%GROQ_API_KEY%"=="" (
-    echo.
-    echo ========================================
-    echo GROQ API KEY REQUIRED
-    echo ========================================
-    echo.
-    echo To use the agentic system, you need a Groq API key:
-    echo 1. Visit: https://console.groq.com/
-    echo 2. Sign up and get your API key
-    echo 3. Set the environment variable:
-    echo    set GROQ_API_KEY=your_key_here
-    echo.
-    echo Or add it to your .env file:
-    echo GROQ_API_KEY=your_key_here
-    echo.
-    pause
-) else (
-    echo Groq API key found!
-)
-REM Run test
-echo.
-echo Running system test...
-python test_agentic_system.py
-echo.
-echo ========================================
-echo Setup Complete!
-echo ========================================
-echo.
-echo To run the agentic system:
-echo 1. Make sure GROQ_API_KEY is set
-echo 2. Run: python -m worker.daemon
-echo 3. Or use: start-server.bat
-echo.
-pause

test_agentic_system.py DELETED Viewed

@@ -1,180 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for the agentic video analysis system with Groq integration
-"""
-import asyncio
-import os
-import sys
-from pathlib import Path
-# Add project root to Python path
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-async def test_groq_integration():
-    """Test Groq integration and basic functionality"""
-    print("🧪 Testing Groq Integration for Agentic Video Analysis")
-    print("=" * 60)
-    # Check for Groq API key
-    groq_api_key = os.getenv("GROQ_API_KEY")
-    if not groq_api_key:
-        print("❌ GROQ_API_KEY environment variable not found!")
-        print("Please set your Groq API key:")
-        print("1. Get API key from: https://console.groq.com/")
-        print("2. Set environment variable: GROQ_API_KEY=your_key_here")
-        return False
-    print("✅ GROQ_API_KEY found")
-    try:
-        # Test Groq import
-        from langchain_groq import ChatGroq
-        print("✅ langchain-groq imported successfully")
-        # Test Groq connection
-        llm = ChatGroq(
-            groq_api_key=groq_api_key,
-            model_name="llama3-8b-8192",
-            temperature=0.1,
-            max_tokens=100
-        )
-        # Simple test
-        response = await llm.ainvoke("Say 'Hello from Groq!'")
-        print(f"✅ Groq test successful: {response.content}")
-    except ImportError as e:
-        print(f"❌ Failed to import langchain-groq: {e}")
-        print("Please install: pip install langchain-groq")
-        return False
-    except Exception as e:
-        print(f"❌ Groq test failed: {e}")
-        return False
-    return True
-async def test_enhanced_analysis():
-    """Test enhanced analysis components"""
-    print("\n🔍 Testing Enhanced Analysis Components")
-    print("=" * 60)
-    try:
-        # Test imports
-        from app.utils.enhanced_analysis import MultiModalAnalyzer
-        print("✅ Enhanced analysis imports successful")
-        # Test analyzer initialization
-        groq_api_key = os.getenv("GROQ_API_KEY")
-        analyzer = MultiModalAnalyzer(groq_api_key=groq_api_key)
-        print("✅ MultiModalAnalyzer initialized successfully")
-        # Test agent creation
-        if analyzer.agent:
-            print("✅ Agent created successfully")
-        else:
-            print("❌ Agent creation failed")
-            return False
-    except Exception as e:
-        print(f"❌ Enhanced analysis test failed: {e}")
-        return False
-    return True
-async def test_agentic_integration():
-    """Test agentic integration"""
-    print("\n🤖 Testing Agentic Integration")
-    print("=" * 60)
-    try:
-        from app.utils.agentic_integration import AgenticVideoProcessor, MCPToolManager
-        print("✅ Agentic integration imports successful")
-        # Test processor initialization
-        groq_api_key = os.getenv("GROQ_API_KEY")
-        processor = AgenticVideoProcessor(enable_enhanced_analysis=True, groq_api_key=groq_api_key)
-        print("✅ AgenticVideoProcessor initialized successfully")
-        # Test MCP tool manager
-        tool_manager = MCPToolManager(groq_api_key=groq_api_key)
-        print("✅ MCPToolManager initialized successfully")
-        # Test tool registration
-        if tool_manager.tools:
-            print(f"✅ {len(tool_manager.tools)} tools registered")
-        else:
-            print("❌ No tools registered")
-            return False
-    except Exception as e:
-        print(f"❌ Agentic integration test failed: {e}")
-        return False
-    return True
-async def test_dependencies():
-    """Test all required dependencies"""
-    print("\n📦 Testing Dependencies")
-    print("=" * 60)
-    dependencies = [
-        ("opencv-python", "cv2"),
-        ("pillow", "PIL"),
-        ("torch", "torch"),
-        ("transformers", "transformers"),
-        ("faster_whisper", "faster_whisper"),
-        ("langchain", "langchain"),
-        ("langchain_groq", "langchain_groq"),
-        ("duckduckgo-search", "duckduckgo_search"),
-        ("wikipedia-api", "wikipedia"),
-    ]
-    all_good = True
-    for package_name, import_name in dependencies:
-        try:
-            __import__(import_name)
-            print(f"✅ {package_name}")
-        except ImportError:
-            print(f"❌ {package_name} - missing")
-            all_good = False
-    return all_good
-async def main():
-    """Main test function"""
-    print("🚀 Dubsway Video AI - Agentic System Test")
-    print("=" * 60)
-    # Test dependencies first
-    deps_ok = await test_dependencies()
-    if not deps_ok:
-        print("\n❌ Some dependencies are missing. Please install them:")
-        print("pip install -r requirements.txt")
-        return False
-    # Test Groq integration
-    groq_ok = await test_groq_integration()
-    if not groq_ok:
-        return False
-    # Test enhanced analysis
-    enhanced_ok = await test_enhanced_analysis()
-    if not enhanced_ok:
-        return False
-    # Test agentic integration
-    agentic_ok = await test_agentic_integration()
-    if not agentic_ok:
-        return False
-    print("\n🎉 All tests passed! Your agentic system is ready to use.")
-    print("\n📋 Next steps:")
-    print("1. Update your worker/daemon.py to use agentic analysis")
-    print("2. Set GROQ_API_KEY environment variable")
-    print("3. Run your daemon with enhanced capabilities")
-    return True
-if __name__ == "__main__":
-    success = asyncio.run(main())
-    sys.exit(0 if success else 1)

worker/daemon.py DELETED Viewed

@@ -1,210 +0,0 @@
-import asyncio
-import os
-import time
-import signal
-import sys
-from datetime import datetime
-import traceback
-import logging
-from sqlalchemy.future import select
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.exc import SQLAlchemyError
-from app.database import AsyncSessionLocal, init_db, close_db
-from app.models import VideoUpload
-from app.utils import whisper_llm, pdf, s3, lightweight_agentic
-# Setup logging with UTF-8 encoding for Windows compatibility
-logging.basicConfig(
-    level=logging.INFO,
-    format='[%(asctime)s] %(levelname)s - %(name)s - %(message)s',
-    handlers=[
-        logging.StreamHandler(sys.stdout),  # Use stdout for better encoding
-        logging.FileHandler('worker.log', encoding='utf-8')
-    ]
-)
-logger = logging.getLogger("worker.daemon")
-POLL_INTERVAL = 200  # seconds
-SHUTDOWN_EVENT = asyncio.Event()
-def signal_handler(signum, frame):
-    """Handle shutdown signals gracefully"""
-    logger.info(f"Received signal {signum}, initiating graceful shutdown...")
-    SHUTDOWN_EVENT.set()
-async def process_pending_videos():
-    """Process all pending video uploads"""
-    async with AsyncSessionLocal() as session:
-        try:
-            # Query for pending videos
-            result = await session.execute(
-                select(VideoUpload).where(VideoUpload.status == "pending")
-            )
-            pending_videos = result.scalars().all()
-            if not pending_videos:
-                logger.info("No pending videos found")
-                return
-            logger.info(f"Found {len(pending_videos)} pending videos to process")
-            for video in pending_videos:
-                if SHUTDOWN_EVENT.is_set():
-                    logger.info("Shutdown requested, stopping video processing")
-                    break
-                logger.info(f"Processing video ID {video.id} for user {video.user_id}")
-                try:
-                    # Update status to processing
-                    video.status = "processing"
-                    video.updated_at = datetime.utcnow()
-                    await session.commit()
-                    # Process with Lightweight Agentic Analysis (Groq + Llama3)
-                    try:
-                        transcription, summary = await lightweight_agentic.analyze_with_lightweight_agentic(
-                            video_url=video.video_url,
-                            user_id=video.user_id,
-                            db=session
-                        )
-                        logger.info(f"Lightweight agentic analysis completed for video {video.id}")
-                    except Exception as agentic_error:
-                        logger.warning(f"Lightweight agentic analysis failed, falling back to basic Whisper: {agentic_error}")
-                    transcription, summary = await whisper_llm.analyze(
-                        video_url=video.video_url,
-                        user_id=video.user_id,
-                            db=session
-                    )
-                        logger.info(f"Basic Whisper analysis completed for video {video.id}")
-                except Exception as e:
-                    logger.error(f"Whisper failed for video {video.id}: {e}")
-                    logger.debug(traceback.format_exc())
-                    # Update status to failed
-                    video.status = "failed"
-                    video.updated_at = datetime.utcnow()
-                    await session.commit()
-                    continue
-                try:
-                    # Generate PDF
-                    pdf_bytes = pdf.generate(transcription, summary)
-                    logger.info(f"PDF generation completed for video {video.id}")
-                except Exception as e:
-                    logger.error(f"PDF generation failed for video {video.id}: {e}")
-                    logger.debug(traceback.format_exc())
-                    video.status = "failed"
-                    video.updated_at = datetime.utcnow()
-                    await session.commit()
-                    continue
-                try:
-                    # Upload to S3
-                    pdf_key = f"pdfs/{video.id}.pdf"
-                    pdf_url = s3.upload_pdf_bytes(pdf_bytes, pdf_key)
-                    logger.info(f"S3 upload completed for video {video.id}")
-                except Exception as e:
-                    logger.error(f"Upload to S3 failed for video {video.id}: {e}")
-                    logger.debug(traceback.format_exc())
-                    video.status = "failed"
-                    video.updated_at = datetime.utcnow()
-                    await session.commit()
-                    continue
-                try:
-                    # Mark as completed
-                    video.status = "completed"
-                    video.pdf_url = pdf_url
-                    video.updated_at = datetime.utcnow()
-                    await session.commit()
-                    logger.info(f"Successfully completed video {video.id}")
-                except SQLAlchemyError as e:
-                    logger.error(f"DB commit failed for video {video.id}: {e}")
-                    logger.debug(traceback.format_exc())
-                    await session.rollback()
-        except SQLAlchemyError as e:
-            logger.error(f"Database error: {e}")
-            logger.debug(traceback.format_exc())
-        except Exception as e:
-            logger.error(f"Unexpected error in process_pending_videos: {e}")
-            logger.debug(traceback.format_exc())
-async def run_worker():
-    """Main worker loop"""
-    logger.info("Async worker daemon started...")
-    # Initialize database
-    try:
-        await init_db()
-        logger.info("Database initialized successfully")
-    except Exception as e:
-        logger.error(f"Failed to initialize database: {e}")
-        return
-    cycle_count = 0
-    while not SHUTDOWN_EVENT.is_set():
-        cycle_count += 1
-        logger.info(f"Worker cycle {cycle_count} - Checking for pending videos...")
-        try:
-            await process_pending_videos()
-        except Exception as e:
-            logger.error(f"Worker loop error: {e}")
-            logger.debug(traceback.format_exc())
-        # Wait for next cycle or shutdown
-        try:
-            await asyncio.wait_for(SHUTDOWN_EVENT.wait(), timeout=POLL_INTERVAL)
-        except asyncio.TimeoutError:
-            # Normal timeout, continue to next cycle
-            pass
-        except Exception as e:
-            logger.error(f"Error in worker wait: {e}")
-            break
-    logger.info("Worker loop stopped, cleaning up...")
-    # Cleanup
-    try:
-        await close_db()
-        logger.info("Database connections closed")
-    except Exception as e:
-        logger.error(f"Error during cleanup: {e}")
-async def main():
-    """Main entry point with signal handling"""
-    # Setup signal handlers
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-    try:
-        await run_worker()
-    except KeyboardInterrupt:
-        logger.info("Keyboard interrupt received")
-    except Exception as e:
-        logger.error(f"Fatal error in main: {e}")
-        logger.debug(traceback.format_exc())
-    finally:
-        logger.info("Worker daemon shutdown complete")
-if __name__ == "__main__":
-    try:
-        asyncio.run(main())
-    except KeyboardInterrupt:
-        logger.info("Worker daemon interrupted by user")
-    except Exception as e:
-        logger.error(f"Fatal error: {e}")
-        sys.exit(1)