Spaces:

Yadav122
/

chat_to_video

Runtime error

App Files Files Community

Yadav122 commited on Apr 13

Commit

648bb55

verified ·

1 Parent(s): 867c809

Upload 5 files

Browse files

Files changed (5) hide show

.gitignore +1 -0
README.md +0 -13
app.py +158 -0
config.py +36 -0
requirements.txt +28 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

README.md CHANGED Viewed

@@ -1,13 +0,0 @@
----
-title: Chat To Video
-emoji: 📊
-colorFrom: indigo
-colorTo: blue
-sdk: streamlit
-sdk_version: 1.44.1
-app_file: app.py
-pinned: false
-short_description: create a basic app for user can interact with video
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+"""Main Streamlit application for the video chat interface."""
+import streamlit as st
+import os
+from pathlib import Path
+import time
+from modules.video_processor import VideoProcessor
+from modules.embedding import EmbeddingGenerator
+from modules.indexing import VectorStore
+from modules.retrieval import RetrievalSystem
+from modules.llm import LLMProcessor
+# Initialize the session state
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+if "video_id" not in st.session_state:
+    st.session_state.video_id = None
+if "video_title" not in st.session_state:
+    st.session_state.video_title = None
+if "video_processed" not in st.session_state:
+    st.session_state.video_processed = False
+# Initialize components
+@st.cache_resource
+def load_components():
+    video_processor = VideoProcessor()
+    embedding_generator = EmbeddingGenerator()
+    vector_store = VectorStore()
+    retrieval_system = RetrievalSystem(vector_store, embedding_generator)
+    llm_processor = LLMProcessor()
+    return {
+        "video_processor": video_processor,
+        "embedding_generator": embedding_generator,
+        "vector_store": vector_store,
+        "retrieval_system": retrieval_system,
+        "llm_processor": llm_processor
+    }
+components = load_components()
+# Application title
+st.title("Video Chat Application")
+# Sidebar with options
+st.sidebar.title("Video Options")
+# Video URL input
+video_url = st.sidebar.text_input("Enter video URL:")
+# Video processing options
+include_audio = st.sidebar.checkbox("Include audio", value=True)
+include_subtitles = st.sidebar.checkbox("Include subtitles", value=True)
+# Process video button
+if st.sidebar.button("Process Video"):
+    if video_url:
+        with st.spinner("Processing video... This may take a few minutes."):
+            try:
+                # Process the video
+                video_processor = components["video_processor"]
+                video_data = video_processor.process_video(
+                    url=video_url,
+                    include_audio=include_audio,
+                    include_subtitles=include_subtitles
+                )
+                # Generate embeddings
+                embedding_generator = components["embedding_generator"]
+                embeddings_data = embedding_generator.process_video_data(video_data)
+                # Index the video
+                vector_store = components["vector_store"]
+                index_result = vector_store.index_video(video_url, video_data, embeddings_data)
+                # Update session state
+                st.session_state.video_id = index_result["video_id"]
+                st.session_state.video_title = video_data["title"]
+                st.session_state.video_processed = True
+                st.session_state.video_data = video_data
+                st.sidebar.success(f"Video processed successfully: {video_data['title']}")
+            except Exception as e:
+                st.sidebar.error(f"Error processing video: {str(e)}")
+    else:
+        st.sidebar.error("Please enter a valid video URL")
+# Main chat interface
+st.subheader("Chat with the Video")
+# Display current video information
+if st.session_state.video_processed and st.session_state.video_title:
+    st.info(f"Current video: {st.session_state.video_title}")
+# Display chat history
+for message in st.session_state.chat_history:
+    if message["role"] == "user":
+        st.write(f"You: {message['content']}")
+    else:
+        st.write(f"AI: {message['content']}")
+# Chat input
+user_query = st.text_input("Ask a question about the video:")
+if st.button("Send") and user_query:
+    # Add user message to chat history
+    st.session_state.chat_history.append({
+        "role": "user",
+        "content": user_query
+    })
+    # Check if a video has been processed
+    if not st.session_state.video_processed:
+        response = "Please process a video first before asking questions."
+    else:
+        with st.spinner("Generating response..."):
+            try:
+                # Retrieve relevant context
+                retrieval_system = components["retrieval_system"]
+                context = retrieval_system.retrieve_context_for_query(
+                    query=user_query,
+                    video_id=st.session_state.video_id
+                )
+                # Get relevant frame paths if available
+                frame_paths = None
+                if "frames" in context and context["frames"]:
+                    frame_paths = [frame["path"] for frame in context["frames"] if "path" in frame]
+                # Generate response
+                llm_processor = components["llm_processor"]
+                response = llm_processor.generate_response(
+                    query=user_query,
+                    context=context,
+                    frames_paths=frame_paths
+                )
+            except Exception as e:
+                response = f"Error generating response: {str(e)}"
+    # Add assistant response to chat history
+    st.session_state.chat_history.append({
+        "role": "assistant",
+        "content": response
+    })
+    # Rerun to update the display
+    st.experimental_rerun()
+# Display current video frame if available
+if st.session_state.video_processed and "video_data" in st.session_state:
+    video_data = st.session_state.video_data
+    if "frame_paths" in video_data and video_data["frame_paths"]:
+        # Display the first frame
+        st.sidebar.subheader("Video Preview")
+        st.sidebar.image(str(video_data["frame_paths"][0]))

config.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""Configuration settings for the video chat application."""
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Base paths
+BASE_DIR = Path(__file__).resolve().parent
+DATA_DIR = BASE_DIR / "data"
+TEMP_DIR = DATA_DIR / "temp"
+# Create directories if they don't exist
+DATA_DIR.mkdir(exist_ok=True)
+TEMP_DIR.mkdir(exist_ok=True)
+# Model paths and configurations
+#BRIDGETOWER_MODEL = "BridgeTower/bridgetower-large"
+BRIDGETOWER_MODEL = "BridgeTower/bridgetower-large-itm-mlm"
+#LLAVA_MODEL = os.getenv("LLAVA_MODEL_PATH", "liuhaotian/llava-v1.5-7b")
+LLAVA_MODEL = os.getenv("LLAVA_MODEL_PATH", "llava-hf/llava-1.5-7b-hf")
+# LanceDB configuration
+LANCEDB_URI = str(DATA_DIR / "lancedb")
+# HuggingFace Token from environment
+HF_TOKEN = os.getenv("HF_TOKEN")
+# Video processing settings
+FRAME_EXTRACTION_RATE = 1  # Extract 1 frame per second
+MAX_FRAMES = 100  # Maximum number of frames to process
+# Retrieval settings
+TOP_K_RESULTS = 5  # Number of results to retrieve for each query

requirements.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+# Core dependencies
+streamlit>=1.28.0
+python-dotenv>=1.0.0
+pydantic>=2.4.2
+# Video processing
+pytube>=15.0.0
+opencv-python>=4.8.0
+moviepy==1.0.3
+imageio-ffmpeg==0.4.8
+pillow>=10.0.0
+# Audio processing
+librosa>=0.10.1
+pydub>=0.25.1
+whispercpp>=0.0.17
+# Embedding and vector DB
+torch>=2.0.0
+transformers>=4.34.0
+sentence-transformers>=2.2.2
+lancedb>=0.3.0
+# LLM integration
+llama-cpp-python>=0.2.0
+accelerate>=0.23.0