awacke1 commited on
Commit
b866d44
1 Parent(s): d907b5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -46
app.py CHANGED
@@ -67,15 +67,21 @@ def process_audio(audio_input):
67
  )
68
  st.markdown(response.choices[0].message.content)
69
 
 
 
 
 
 
 
 
70
  def process_video(video_path, seconds_per_frame=2):
71
  base64Frames = []
72
  base_video_path, _ = os.path.splitext(video_path)
73
-
74
  video = cv2.VideoCapture(video_path)
75
  total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
76
  fps = video.get(cv2.CAP_PROP_FPS)
77
  frames_to_skip = int(fps * seconds_per_frame)
78
- curr_frame=0
79
 
80
  # Loop through the video and extract frames at specified sampling rate
81
  while curr_frame < total_frames - 1:
@@ -86,6 +92,7 @@ def process_video(video_path, seconds_per_frame=2):
86
  _, buffer = cv2.imencode(".jpg", frame)
87
  base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
88
  curr_frame += frames_to_skip
 
89
  video.release()
90
 
91
  # Extract audio from video
@@ -97,54 +104,34 @@ def process_video(video_path, seconds_per_frame=2):
97
 
98
  print(f"Extracted {len(base64Frames)} frames")
99
  print(f"Extracted audio to {audio_path}")
 
100
  return base64Frames, audio_path
101
 
102
- # Extract 1 frame per second. You can adjust the `seconds_per_frame` parameter to change the sampling rate
103
- base64Frames, audio_path = process_video(VIDEO_PATH, seconds_per_frame=1)
104
-
105
-
106
- ## Generate a summary with visual and audio
107
- def process_video(video_input):
108
- base64Frames, audio_path = process_video(video_input, seconds_per_frame=1)
109
- response = client.chat.completions.create(
110
- model=MODEL,
111
- messages=[
112
- {"role": "system", "content":"""You are generating a video summary. Create a summary of the provided video and its transcript. Respond in Markdown"""},
113
- {"role": "user", "content": [
114
- "These are the frames from the video.",
115
- *map(lambda x: {"type": "image_url",
116
- "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
117
- {"type": "text", "text": f"The audio transcription is: {transcription.text}"}
 
 
 
118
  ],
119
- }
120
- ],
121
- temperature=0,
122
- )
123
- st.markdown(response.choices[0].message.content)
124
 
125
- def process_video_frames(video_path, seconds_per_frame=2):
126
- base64Frames = []
127
- base_video_path, _ = os.path.splitext(video_path.name)
128
- video = cv2.VideoCapture(video_path.name)
129
- total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
130
- fps = video.get(cv2.CAP_PROP_FPS)
131
- frames_to_skip = int(fps * seconds_per_frame)
132
- curr_frame = 0
133
- while curr_frame < total_frames - 1:
134
- video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
135
- success, frame = video.read()
136
- if not success:
137
- break
138
- _, buffer = cv2.imencode(".jpg", frame)
139
- base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
140
- curr_frame += frames_to_skip
141
- video.release()
142
- audio_path = f"{base_video_path}.mp3"
143
- clip = VideoFileClip(video_path.name)
144
- clip.audio.write_audiofile(audio_path, bitrate="32k")
145
- clip.audio.close()
146
- clip.close()
147
- return base64Frames, audio_path
148
 
149
  def main():
150
  st.markdown("### OpenAI GPT-4o Model")
 
67
  )
68
  st.markdown(response.choices[0].message.content)
69
 
70
+
71
+ def save_video(video_file):
72
+ # Save the uploaded video file
73
+ with open(video_file.name, "wb") as f:
74
+ f.write(video_file.getbuffer())
75
+ return video_file.name
76
+
77
  def process_video(video_path, seconds_per_frame=2):
78
  base64Frames = []
79
  base_video_path, _ = os.path.splitext(video_path)
 
80
  video = cv2.VideoCapture(video_path)
81
  total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
82
  fps = video.get(cv2.CAP_PROP_FPS)
83
  frames_to_skip = int(fps * seconds_per_frame)
84
+ curr_frame = 0
85
 
86
  # Loop through the video and extract frames at specified sampling rate
87
  while curr_frame < total_frames - 1:
 
92
  _, buffer = cv2.imencode(".jpg", frame)
93
  base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
94
  curr_frame += frames_to_skip
95
+
96
  video.release()
97
 
98
  # Extract audio from video
 
104
 
105
  print(f"Extracted {len(base64Frames)} frames")
106
  print(f"Extracted audio to {audio_path}")
107
+
108
  return base64Frames, audio_path
109
 
110
+ def ProcessVideo(video_input)
111
+ if video_input is not None:
112
+ # Save the uploaded video file
113
+ video_path = save_video(video_file)
114
+
115
+ # Process the saved video
116
+ base64Frames, audio_path = process_video(video_path, seconds_per_frame=1)
117
+
118
+ # Generate a summary with visual and audio
119
+ response = client.chat.completions.create(
120
+ model=MODEL,
121
+ messages=[
122
+ {"role": "system", "content": """You are generating a video summary. Create a summary of the provided video and its transcript. Respond in Markdown"""},
123
+ {"role": "user", "content": [
124
+ "These are the frames from the video.",
125
+ *map(lambda x: {"type": "image_url",
126
+ "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
127
+ {"type": "text", "text": f"The audio transcription is: {transcription.text}"}
128
+ ]},
129
  ],
130
+ temperature=0,
131
+ )
132
+
133
+ st.markdown(response.choices[0].message.content)
 
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  def main():
137
  st.markdown("### OpenAI GPT-4o Model")