Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- .gitignore +1 -0
- README.md +0 -13
- app.py +158 -0
- config.py +36 -0
- requirements.txt +28 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.env
|
README.md
CHANGED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Chat To Video
|
3 |
-
emoji: 📊
|
4 |
-
colorFrom: indigo
|
5 |
-
colorTo: blue
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.44.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
short_description: create a basic app for user can interact with video
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Main Streamlit application for the video chat interface."""
|
2 |
+
import streamlit as st
|
3 |
+
import os
|
4 |
+
from pathlib import Path
|
5 |
+
import time
|
6 |
+
|
7 |
+
from modules.video_processor import VideoProcessor
|
8 |
+
from modules.embedding import EmbeddingGenerator
|
9 |
+
from modules.indexing import VectorStore
|
10 |
+
from modules.retrieval import RetrievalSystem
|
11 |
+
from modules.llm import LLMProcessor
|
12 |
+
|
13 |
+
# Initialize the session state
|
14 |
+
if "chat_history" not in st.session_state:
|
15 |
+
st.session_state.chat_history = []
|
16 |
+
|
17 |
+
if "video_id" not in st.session_state:
|
18 |
+
st.session_state.video_id = None
|
19 |
+
|
20 |
+
if "video_title" not in st.session_state:
|
21 |
+
st.session_state.video_title = None
|
22 |
+
|
23 |
+
if "video_processed" not in st.session_state:
|
24 |
+
st.session_state.video_processed = False
|
25 |
+
|
26 |
+
# Initialize components
|
27 |
+
@st.cache_resource
|
28 |
+
def load_components():
|
29 |
+
video_processor = VideoProcessor()
|
30 |
+
embedding_generator = EmbeddingGenerator()
|
31 |
+
vector_store = VectorStore()
|
32 |
+
retrieval_system = RetrievalSystem(vector_store, embedding_generator)
|
33 |
+
llm_processor = LLMProcessor()
|
34 |
+
|
35 |
+
return {
|
36 |
+
"video_processor": video_processor,
|
37 |
+
"embedding_generator": embedding_generator,
|
38 |
+
"vector_store": vector_store,
|
39 |
+
"retrieval_system": retrieval_system,
|
40 |
+
"llm_processor": llm_processor
|
41 |
+
}
|
42 |
+
|
43 |
+
components = load_components()
|
44 |
+
|
45 |
+
# Application title
|
46 |
+
st.title("Video Chat Application")
|
47 |
+
|
48 |
+
# Sidebar with options
|
49 |
+
st.sidebar.title("Video Options")
|
50 |
+
|
51 |
+
# Video URL input
|
52 |
+
video_url = st.sidebar.text_input("Enter video URL:")
|
53 |
+
|
54 |
+
# Video processing options
|
55 |
+
include_audio = st.sidebar.checkbox("Include audio", value=True)
|
56 |
+
include_subtitles = st.sidebar.checkbox("Include subtitles", value=True)
|
57 |
+
|
58 |
+
# Process video button
|
59 |
+
if st.sidebar.button("Process Video"):
|
60 |
+
if video_url:
|
61 |
+
with st.spinner("Processing video... This may take a few minutes."):
|
62 |
+
try:
|
63 |
+
# Process the video
|
64 |
+
video_processor = components["video_processor"]
|
65 |
+
video_data = video_processor.process_video(
|
66 |
+
url=video_url,
|
67 |
+
include_audio=include_audio,
|
68 |
+
include_subtitles=include_subtitles
|
69 |
+
)
|
70 |
+
|
71 |
+
# Generate embeddings
|
72 |
+
embedding_generator = components["embedding_generator"]
|
73 |
+
embeddings_data = embedding_generator.process_video_data(video_data)
|
74 |
+
|
75 |
+
# Index the video
|
76 |
+
vector_store = components["vector_store"]
|
77 |
+
index_result = vector_store.index_video(video_url, video_data, embeddings_data)
|
78 |
+
|
79 |
+
# Update session state
|
80 |
+
st.session_state.video_id = index_result["video_id"]
|
81 |
+
st.session_state.video_title = video_data["title"]
|
82 |
+
st.session_state.video_processed = True
|
83 |
+
st.session_state.video_data = video_data
|
84 |
+
|
85 |
+
st.sidebar.success(f"Video processed successfully: {video_data['title']}")
|
86 |
+
except Exception as e:
|
87 |
+
st.sidebar.error(f"Error processing video: {str(e)}")
|
88 |
+
else:
|
89 |
+
st.sidebar.error("Please enter a valid video URL")
|
90 |
+
|
91 |
+
# Main chat interface
|
92 |
+
st.subheader("Chat with the Video")
|
93 |
+
|
94 |
+
# Display current video information
|
95 |
+
if st.session_state.video_processed and st.session_state.video_title:
|
96 |
+
st.info(f"Current video: {st.session_state.video_title}")
|
97 |
+
|
98 |
+
# Display chat history
|
99 |
+
for message in st.session_state.chat_history:
|
100 |
+
if message["role"] == "user":
|
101 |
+
st.write(f"You: {message['content']}")
|
102 |
+
else:
|
103 |
+
st.write(f"AI: {message['content']}")
|
104 |
+
|
105 |
+
# Chat input
|
106 |
+
user_query = st.text_input("Ask a question about the video:")
|
107 |
+
|
108 |
+
if st.button("Send") and user_query:
|
109 |
+
# Add user message to chat history
|
110 |
+
st.session_state.chat_history.append({
|
111 |
+
"role": "user",
|
112 |
+
"content": user_query
|
113 |
+
})
|
114 |
+
|
115 |
+
# Check if a video has been processed
|
116 |
+
if not st.session_state.video_processed:
|
117 |
+
response = "Please process a video first before asking questions."
|
118 |
+
else:
|
119 |
+
with st.spinner("Generating response..."):
|
120 |
+
try:
|
121 |
+
# Retrieve relevant context
|
122 |
+
retrieval_system = components["retrieval_system"]
|
123 |
+
context = retrieval_system.retrieve_context_for_query(
|
124 |
+
query=user_query,
|
125 |
+
video_id=st.session_state.video_id
|
126 |
+
)
|
127 |
+
|
128 |
+
# Get relevant frame paths if available
|
129 |
+
frame_paths = None
|
130 |
+
if "frames" in context and context["frames"]:
|
131 |
+
frame_paths = [frame["path"] for frame in context["frames"] if "path" in frame]
|
132 |
+
|
133 |
+
# Generate response
|
134 |
+
llm_processor = components["llm_processor"]
|
135 |
+
response = llm_processor.generate_response(
|
136 |
+
query=user_query,
|
137 |
+
context=context,
|
138 |
+
frames_paths=frame_paths
|
139 |
+
)
|
140 |
+
except Exception as e:
|
141 |
+
response = f"Error generating response: {str(e)}"
|
142 |
+
|
143 |
+
# Add assistant response to chat history
|
144 |
+
st.session_state.chat_history.append({
|
145 |
+
"role": "assistant",
|
146 |
+
"content": response
|
147 |
+
})
|
148 |
+
|
149 |
+
# Rerun to update the display
|
150 |
+
st.experimental_rerun()
|
151 |
+
|
152 |
+
# Display current video frame if available
|
153 |
+
if st.session_state.video_processed and "video_data" in st.session_state:
|
154 |
+
video_data = st.session_state.video_data
|
155 |
+
if "frame_paths" in video_data and video_data["frame_paths"]:
|
156 |
+
# Display the first frame
|
157 |
+
st.sidebar.subheader("Video Preview")
|
158 |
+
st.sidebar.image(str(video_data["frame_paths"][0]))
|
config.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Configuration settings for the video chat application."""
|
2 |
+
import os
|
3 |
+
from pathlib import Path
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
# Load environment variables
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
# Base paths
|
10 |
+
BASE_DIR = Path(__file__).resolve().parent
|
11 |
+
DATA_DIR = BASE_DIR / "data"
|
12 |
+
TEMP_DIR = DATA_DIR / "temp"
|
13 |
+
|
14 |
+
# Create directories if they don't exist
|
15 |
+
DATA_DIR.mkdir(exist_ok=True)
|
16 |
+
TEMP_DIR.mkdir(exist_ok=True)
|
17 |
+
|
18 |
+
# Model paths and configurations
|
19 |
+
#BRIDGETOWER_MODEL = "BridgeTower/bridgetower-large"
|
20 |
+
BRIDGETOWER_MODEL = "BridgeTower/bridgetower-large-itm-mlm"
|
21 |
+
|
22 |
+
#LLAVA_MODEL = os.getenv("LLAVA_MODEL_PATH", "liuhaotian/llava-v1.5-7b")
|
23 |
+
LLAVA_MODEL = os.getenv("LLAVA_MODEL_PATH", "llava-hf/llava-1.5-7b-hf")
|
24 |
+
|
25 |
+
# LanceDB configuration
|
26 |
+
LANCEDB_URI = str(DATA_DIR / "lancedb")
|
27 |
+
# HuggingFace Token from environment
|
28 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
29 |
+
|
30 |
+
# Video processing settings
|
31 |
+
FRAME_EXTRACTION_RATE = 1 # Extract 1 frame per second
|
32 |
+
MAX_FRAMES = 100 # Maximum number of frames to process
|
33 |
+
|
34 |
+
# Retrieval settings
|
35 |
+
TOP_K_RESULTS = 5 # Number of results to retrieve for each query
|
36 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Core dependencies
|
2 |
+
streamlit>=1.28.0
|
3 |
+
python-dotenv>=1.0.0
|
4 |
+
pydantic>=2.4.2
|
5 |
+
|
6 |
+
|
7 |
+
# Video processing
|
8 |
+
pytube>=15.0.0
|
9 |
+
opencv-python>=4.8.0
|
10 |
+
moviepy==1.0.3
|
11 |
+
imageio-ffmpeg==0.4.8
|
12 |
+
pillow>=10.0.0
|
13 |
+
|
14 |
+
# Audio processing
|
15 |
+
librosa>=0.10.1
|
16 |
+
pydub>=0.25.1
|
17 |
+
whispercpp>=0.0.17
|
18 |
+
|
19 |
+
|
20 |
+
# Embedding and vector DB
|
21 |
+
torch>=2.0.0
|
22 |
+
transformers>=4.34.0
|
23 |
+
sentence-transformers>=2.2.2
|
24 |
+
lancedb>=0.3.0
|
25 |
+
|
26 |
+
# LLM integration
|
27 |
+
llama-cpp-python>=0.2.0
|
28 |
+
accelerate>=0.23.0
|