Yadav122 commited on
Commit
648bb55
·
verified ·
1 Parent(s): 867c809

Upload 5 files

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. README.md +0 -13
  3. app.py +158 -0
  4. config.py +36 -0
  5. requirements.txt +28 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
README.md CHANGED
@@ -1,13 +0,0 @@
1
- ---
2
- title: Chat To Video
3
- emoji: 📊
4
- colorFrom: indigo
5
- colorTo: blue
6
- sdk: streamlit
7
- sdk_version: 1.44.1
8
- app_file: app.py
9
- pinned: false
10
- short_description: create a basic app for user can interact with video
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Main Streamlit application for the video chat interface."""
2
+ import streamlit as st
3
+ import os
4
+ from pathlib import Path
5
+ import time
6
+
7
+ from modules.video_processor import VideoProcessor
8
+ from modules.embedding import EmbeddingGenerator
9
+ from modules.indexing import VectorStore
10
+ from modules.retrieval import RetrievalSystem
11
+ from modules.llm import LLMProcessor
12
+
13
+ # Initialize the session state
14
+ if "chat_history" not in st.session_state:
15
+ st.session_state.chat_history = []
16
+
17
+ if "video_id" not in st.session_state:
18
+ st.session_state.video_id = None
19
+
20
+ if "video_title" not in st.session_state:
21
+ st.session_state.video_title = None
22
+
23
+ if "video_processed" not in st.session_state:
24
+ st.session_state.video_processed = False
25
+
26
+ # Initialize components
27
+ @st.cache_resource
28
+ def load_components():
29
+ video_processor = VideoProcessor()
30
+ embedding_generator = EmbeddingGenerator()
31
+ vector_store = VectorStore()
32
+ retrieval_system = RetrievalSystem(vector_store, embedding_generator)
33
+ llm_processor = LLMProcessor()
34
+
35
+ return {
36
+ "video_processor": video_processor,
37
+ "embedding_generator": embedding_generator,
38
+ "vector_store": vector_store,
39
+ "retrieval_system": retrieval_system,
40
+ "llm_processor": llm_processor
41
+ }
42
+
43
+ components = load_components()
44
+
45
+ # Application title
46
+ st.title("Video Chat Application")
47
+
48
+ # Sidebar with options
49
+ st.sidebar.title("Video Options")
50
+
51
+ # Video URL input
52
+ video_url = st.sidebar.text_input("Enter video URL:")
53
+
54
+ # Video processing options
55
+ include_audio = st.sidebar.checkbox("Include audio", value=True)
56
+ include_subtitles = st.sidebar.checkbox("Include subtitles", value=True)
57
+
58
+ # Process video button
59
+ if st.sidebar.button("Process Video"):
60
+ if video_url:
61
+ with st.spinner("Processing video... This may take a few minutes."):
62
+ try:
63
+ # Process the video
64
+ video_processor = components["video_processor"]
65
+ video_data = video_processor.process_video(
66
+ url=video_url,
67
+ include_audio=include_audio,
68
+ include_subtitles=include_subtitles
69
+ )
70
+
71
+ # Generate embeddings
72
+ embedding_generator = components["embedding_generator"]
73
+ embeddings_data = embedding_generator.process_video_data(video_data)
74
+
75
+ # Index the video
76
+ vector_store = components["vector_store"]
77
+ index_result = vector_store.index_video(video_url, video_data, embeddings_data)
78
+
79
+ # Update session state
80
+ st.session_state.video_id = index_result["video_id"]
81
+ st.session_state.video_title = video_data["title"]
82
+ st.session_state.video_processed = True
83
+ st.session_state.video_data = video_data
84
+
85
+ st.sidebar.success(f"Video processed successfully: {video_data['title']}")
86
+ except Exception as e:
87
+ st.sidebar.error(f"Error processing video: {str(e)}")
88
+ else:
89
+ st.sidebar.error("Please enter a valid video URL")
90
+
91
+ # Main chat interface
92
+ st.subheader("Chat with the Video")
93
+
94
+ # Display current video information
95
+ if st.session_state.video_processed and st.session_state.video_title:
96
+ st.info(f"Current video: {st.session_state.video_title}")
97
+
98
+ # Display chat history
99
+ for message in st.session_state.chat_history:
100
+ if message["role"] == "user":
101
+ st.write(f"You: {message['content']}")
102
+ else:
103
+ st.write(f"AI: {message['content']}")
104
+
105
+ # Chat input
106
+ user_query = st.text_input("Ask a question about the video:")
107
+
108
+ if st.button("Send") and user_query:
109
+ # Add user message to chat history
110
+ st.session_state.chat_history.append({
111
+ "role": "user",
112
+ "content": user_query
113
+ })
114
+
115
+ # Check if a video has been processed
116
+ if not st.session_state.video_processed:
117
+ response = "Please process a video first before asking questions."
118
+ else:
119
+ with st.spinner("Generating response..."):
120
+ try:
121
+ # Retrieve relevant context
122
+ retrieval_system = components["retrieval_system"]
123
+ context = retrieval_system.retrieve_context_for_query(
124
+ query=user_query,
125
+ video_id=st.session_state.video_id
126
+ )
127
+
128
+ # Get relevant frame paths if available
129
+ frame_paths = None
130
+ if "frames" in context and context["frames"]:
131
+ frame_paths = [frame["path"] for frame in context["frames"] if "path" in frame]
132
+
133
+ # Generate response
134
+ llm_processor = components["llm_processor"]
135
+ response = llm_processor.generate_response(
136
+ query=user_query,
137
+ context=context,
138
+ frames_paths=frame_paths
139
+ )
140
+ except Exception as e:
141
+ response = f"Error generating response: {str(e)}"
142
+
143
+ # Add assistant response to chat history
144
+ st.session_state.chat_history.append({
145
+ "role": "assistant",
146
+ "content": response
147
+ })
148
+
149
+ # Rerun to update the display
150
+ st.experimental_rerun()
151
+
152
+ # Display current video frame if available
153
+ if st.session_state.video_processed and "video_data" in st.session_state:
154
+ video_data = st.session_state.video_data
155
+ if "frame_paths" in video_data and video_data["frame_paths"]:
156
+ # Display the first frame
157
+ st.sidebar.subheader("Video Preview")
158
+ st.sidebar.image(str(video_data["frame_paths"][0]))
config.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration settings for the video chat application."""
2
+ import os
3
+ from pathlib import Path
4
+ from dotenv import load_dotenv
5
+
6
+ # Load environment variables
7
+ load_dotenv()
8
+
9
+ # Base paths
10
+ BASE_DIR = Path(__file__).resolve().parent
11
+ DATA_DIR = BASE_DIR / "data"
12
+ TEMP_DIR = DATA_DIR / "temp"
13
+
14
+ # Create directories if they don't exist
15
+ DATA_DIR.mkdir(exist_ok=True)
16
+ TEMP_DIR.mkdir(exist_ok=True)
17
+
18
+ # Model paths and configurations
19
+ #BRIDGETOWER_MODEL = "BridgeTower/bridgetower-large"
20
+ BRIDGETOWER_MODEL = "BridgeTower/bridgetower-large-itm-mlm"
21
+
22
+ #LLAVA_MODEL = os.getenv("LLAVA_MODEL_PATH", "liuhaotian/llava-v1.5-7b")
23
+ LLAVA_MODEL = os.getenv("LLAVA_MODEL_PATH", "llava-hf/llava-1.5-7b-hf")
24
+
25
+ # LanceDB configuration
26
+ LANCEDB_URI = str(DATA_DIR / "lancedb")
27
+ # HuggingFace Token from environment
28
+ HF_TOKEN = os.getenv("HF_TOKEN")
29
+
30
+ # Video processing settings
31
+ FRAME_EXTRACTION_RATE = 1 # Extract 1 frame per second
32
+ MAX_FRAMES = 100 # Maximum number of frames to process
33
+
34
+ # Retrieval settings
35
+ TOP_K_RESULTS = 5 # Number of results to retrieve for each query
36
+
requirements.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ streamlit>=1.28.0
3
+ python-dotenv>=1.0.0
4
+ pydantic>=2.4.2
5
+
6
+
7
+ # Video processing
8
+ pytube>=15.0.0
9
+ opencv-python>=4.8.0
10
+ moviepy==1.0.3
11
+ imageio-ffmpeg==0.4.8
12
+ pillow>=10.0.0
13
+
14
+ # Audio processing
15
+ librosa>=0.10.1
16
+ pydub>=0.25.1
17
+ whispercpp>=0.0.17
18
+
19
+
20
+ # Embedding and vector DB
21
+ torch>=2.0.0
22
+ transformers>=4.34.0
23
+ sentence-transformers>=2.2.2
24
+ lancedb>=0.3.0
25
+
26
+ # LLM integration
27
+ llama-cpp-python>=0.2.0
28
+ accelerate>=0.23.0