shigeru saito commited on
Commit
9a2372b
·
1 Parent(s): df52446

BGM生成対応

Browse files
Files changed (3) hide show
  1. app.py +180 -41
  2. schema.json +10 -0
  3. template.md +10 -2
app.py CHANGED
@@ -9,6 +9,7 @@ import requests
9
  import argparse
10
  import markdown2
11
  import uuid
 
12
  from pathlib import Path
13
 
14
  from dotenv import load_dotenv
@@ -16,6 +17,8 @@ from IPython.display import Image
16
  from moviepy.editor import VideoFileClip, concatenate_videoclips, ImageClip
17
  from moviepy.video.fx.all import fadein, fadeout
18
  from PIL import Image as PIL_Image
 
 
19
 
20
  from jinja2 import Template
21
 
@@ -28,28 +31,23 @@ openai.api_key = os.getenv('OPENAI_API_KEY')
28
  REPLICATE_API_TOKEN_LIST = os.getenv("REPLICATE_API_TOKEN_LIST").split(',')
29
  NUMBER_OF_SCENES = os.getenv("NUMBER_OF_SCENES")
30
 
31
- if ENV == "PRODUCTION":
32
- import replicate
33
- from replicate.client import Client
34
- else:
35
- # from stub import replicate
36
- pass
37
 
38
- class Video:
39
- def __init__(self, scene, index, client: Client):
 
40
  self.client = client
41
- self.scene = scene
42
- self.prompt = "masterpiece, awards, best quality, dramatic-lighting, "
43
- self.prompt = self.prompt + scene.get("visual_prompt_in_en")
44
- self.prompt = self.prompt + ", cinematic-angles-" + scene.get("cinematic_angles")
45
- self.nagative_prompt = "badhandv4, easynegative, ng_deepnegative_v1_75t, verybadimagenegative_v1.3, bad-artist, bad_prompt_version2-neg, nsfw, "
46
  self.index = index
 
 
 
 
 
47
  self.output_url = None
48
- self.video_id = uuid.uuid4()
49
- self.file_path = f"assets/thread_{index}_request_{self.video_id}_video.mp4"
50
-
51
- REPLICATE_MODEL_PATH = "lucataco/animate-diff"
52
- REPLICATE_MODEL_VERSION = "1531004ee4c98894ab11f8a4ce6206099e732c1da15121987a8eef54828f0663"
53
 
54
  def run_replicate(self, retries=0):
55
  try:
@@ -64,12 +62,7 @@ class Video:
64
  version = model.versions.get(self.REPLICATE_MODEL_VERSION)
65
  self.prediction = self.client.predictions.create(
66
  version=version,
67
- input={
68
- "motion_module": "mm_sd_v14",
69
- "prompt": self.prompt,
70
- "n_prompt": self.nagative_prompt,
71
- "seed": 0,
72
- },
73
  )
74
 
75
  self.prediction_id = self.prediction.id
@@ -89,13 +82,17 @@ class Video:
89
  else:
90
  self.output_url = None
91
 
 
92
  end_time = time.time()
93
  duration = end_time - start_time
94
 
95
  self.download_and_save(url=self.output_url, file_path=self.file_path)
96
  self.print_thread_info(start_time, end_time, duration)
97
  except replicate.exceptions.ReplicateError as e:
98
- if self.prediction and str(e) == "The requested resource could not be found.":
 
 
 
99
  predictions = self.client.predictions.list()
100
  self.prediction = next((p for p in predictions if p.id == self.prediction_id), None)
101
 
@@ -111,7 +108,10 @@ class Video:
111
  print(f"Thread {self.index} token: {self.client.api_token[:10]} prediction.output: {self.prediction.output}")
112
  else:
113
  self.output_url = None
 
 
114
 
 
115
  end_time = time.time()
116
  duration = end_time - start_time
117
 
@@ -119,11 +119,14 @@ class Video:
119
  self.print_thread_info(start_time, end_time, duration)
120
  else:
121
  print(f"Error in thread {self.index}: {e}")
 
122
  print("予期しないエラーが発生しました。スレッドを終了します。")
123
  # 予期しないエラーが発生した場合の追加処理
 
124
  except Exception as e:
125
  print(f"Error in thread {self.index}: {e}")
126
-
 
127
  def download_and_save(self, url, file_path):
128
  response = requests.get(url)
129
  with open(file_path, "wb") as f:
@@ -135,37 +138,121 @@ class Video:
135
  print(f"Thread {self.index} end time: {end_time}")
136
  print(f"Thread {self.index} duration: {duration}")
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  class ThreadController:
139
  def __init__(self, args):
 
140
  self.args = args
141
  scenes = args.get("scenes")
 
142
  self.videos = []
143
  self.threads = []
144
  self.lock = threading.Lock()
145
  self.replicate_client_list = {}
146
- for token in REPLICATE_API_TOKEN_LIST:
147
-
 
 
 
148
  client = Client()
149
  client.api_token = token
150
  self.replicate_client_list[token] = client
151
-
 
 
 
152
  for index, scene in enumerate(scenes):
153
- # token = REPLICATE_API_TOKEN_LIST[self.client.api_token_index]
154
- video = Video(scene, index, client)
155
  self.videos.append(video)
156
 
157
- # self.client.api_token_index = (self.client.api_token_index + 1) % len(REPLICATE_API_TOKEN_LIST)
158
-
159
 
160
  def run_threads(self):
161
- os.makedirs("assets", exist_ok=True)
162
 
163
- token = None
 
 
 
 
164
  for video in self.videos:
165
  if token is not None and video.client.api_token != token:
166
- # tokenが異なる場合、1秒待ってから次を実行
167
- print(f"Thread {video.index} token changed. Waiting 3 seconds.")
168
- time.sleep(5)
169
 
170
  thread = threading.Thread(target=video.run_replicate)
171
  self.threads.append(thread)
@@ -186,12 +273,54 @@ class ThreadController:
186
  print(f"Error: Video file {video.file_path} could not be found! Skipping this file.")
187
  # 他のログ出力方法も使用可能、例: loggingモジュール
188
 
 
 
189
  final_clip = concatenate_videoclips(clips)
 
 
 
 
 
190
 
191
- os.makedirs("videos", exist_ok=True)
192
- output_path = f"videos/final_concatenated_video_{uuid.uuid4()}.mp4"
193
 
194
- final_clip.write_videofile(output_path, codec='libx264', fps=24)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
  return output_path
197
 
@@ -358,3 +487,13 @@ if __name__ == "__main__":
358
  )
359
  iface.launch()
360
 
 
 
 
 
 
 
 
 
 
 
 
9
  import argparse
10
  import markdown2
11
  import uuid
12
+ import traceback
13
  from pathlib import Path
14
 
15
  from dotenv import load_dotenv
 
17
  from moviepy.editor import VideoFileClip, concatenate_videoclips, ImageClip
18
  from moviepy.video.fx.all import fadein, fadeout
19
  from PIL import Image as PIL_Image
20
+ from pydub import AudioSegment
21
+ from moviepy.editor import VideoFileClip, AudioFileClip
22
 
23
  from jinja2 import Template
24
 
 
31
  REPLICATE_API_TOKEN_LIST = os.getenv("REPLICATE_API_TOKEN_LIST").split(',')
32
  NUMBER_OF_SCENES = os.getenv("NUMBER_OF_SCENES")
33
 
34
+ import replicate
35
+ from replicate.client import Client
 
 
 
 
36
 
37
+ class Replicate:
38
+ def __init__(self, id, client: Client, args, index=None):
39
+ self.id = id
40
  self.client = client
41
+ self.args = args
 
 
 
 
42
  self.index = index
43
+ self.prompt = ""
44
+ self.file_path_format = ""
45
+ self.REPLICATE_MODEL_PATH = ""
46
+ self.REPLICATE_MODEL_VERSION = ""
47
+ self.input={}
48
  self.output_url = None
49
+ self.response = None
50
+ self.prediction_id = None
 
 
 
51
 
52
  def run_replicate(self, retries=0):
53
  try:
 
62
  version = model.versions.get(self.REPLICATE_MODEL_VERSION)
63
  self.prediction = self.client.predictions.create(
64
  version=version,
65
+ input=self.input
 
 
 
 
 
66
  )
67
 
68
  self.prediction_id = self.prediction.id
 
82
  else:
83
  self.output_url = None
84
 
85
+ self.file_path = self.file_path_format.format(id=self.id, class_name=self.__class__.__name__, index=self.index, prediction_id=self.prediction_id)
86
  end_time = time.time()
87
  duration = end_time - start_time
88
 
89
  self.download_and_save(url=self.output_url, file_path=self.file_path)
90
  self.print_thread_info(start_time, end_time, duration)
91
  except replicate.exceptions.ReplicateError as e:
92
+ print(f"Error fetching model or version: {e}")
93
+ print(f"Model Path: {self.REPLICATE_MODEL_PATH}")
94
+ print(f"Model Version: {self.REPLICATE_MODEL_VERSION}")
95
+ if self.prediction_id and str(e) == "The requested resource could not be found.":
96
  predictions = self.client.predictions.list()
97
  self.prediction = next((p for p in predictions if p.id == self.prediction_id), None)
98
 
 
108
  print(f"Thread {self.index} token: {self.client.api_token[:10]} prediction.output: {self.prediction.output}")
109
  else:
110
  self.output_url = None
111
+ print(f"Thread {self.index} token: {self.client.api_token[:10]} prediction.output: Error")
112
+ print(f"Thread {self.index} token: {self.client.api_token[:10]} prediction.output: {self.prediction.output}")
113
 
114
+ self.file_path = self.file_path_format.format(id=self.id, class_name=self.__class__.__name__, index=self.index, prediction_id=self.prediction_id)
115
  end_time = time.time()
116
  duration = end_time - start_time
117
 
 
119
  self.print_thread_info(start_time, end_time, duration)
120
  else:
121
  print(f"Error in thread {self.index}: {e}")
122
+ print(traceback.format_exc())
123
  print("予期しないエラーが発生しました。スレッドを終了します。")
124
  # 予期しないエラーが発生した場合の追加処理
125
+ raise e
126
  except Exception as e:
127
  print(f"Error in thread {self.index}: {e}")
128
+ print(traceback.format_exc())
129
+
130
  def download_and_save(self, url, file_path):
131
  response = requests.get(url)
132
  with open(file_path, "wb") as f:
 
138
  print(f"Thread {self.index} end time: {end_time}")
139
  print(f"Thread {self.index} duration: {duration}")
140
 
141
+ class Video(Replicate):
142
+
143
+ def __init__(self, id, client: Client, args, scene, index=None):
144
+ super().__init__(id, client, args, index)
145
+ self.REPLICATE_MODEL_PATH = "lucataco/animate-diff"
146
+ self.REPLICATE_MODEL_VERSION = "1531004ee4c98894ab11f8a4ce6206099e732c1da15121987a8eef54828f0663"
147
+ self.scene = scene
148
+ self.prompt = "masterpiece, awards, best quality, dramatic-lighting, "
149
+ self.prompt = self.prompt + scene.get("visual_prompt_in_en")
150
+ self.prompt = self.prompt + ", cinematic-angles-" + scene.get("cinematic_angles")
151
+ self.nagative_prompt = "badhandv4, easynegative, ng_deepnegative_v1_75t, verybadimagenegative_v1.3, bad-artist, bad_prompt_version2-neg, nsfw, "
152
+ self.file_path_format = "assets/{id}/{class_name}_thread_{index}_request_{prediction_id}.mp4"
153
+ self.file_path = None
154
+ self.input={
155
+ "motion_module": "mm_sd_v14",
156
+ "prompt": self.prompt,
157
+ "n_prompt": self.nagative_prompt,
158
+ "seed": 0, # random
159
+ }
160
+
161
+ def run_replicate(self, retries=0):
162
+ self.response = super().run_replicate()
163
+ self.file_path = self.file_path_format.format(id=self.id, class_name=self.__class__.__name__, index=self.index, prediction_id=self.prediction_id)
164
+ return self.response
165
+
166
+ class Music(Replicate):
167
+
168
+ def __init__(self, id, client: Client, args):
169
+ super().__init__(id, client, args)
170
+ self.REPLICATE_MODEL_PATH = "facebookresearch/musicgen"
171
+ self.REPLICATE_MODEL_VERSION = "f8578df960c345df7bc1f85dd152c5ae0b57ce45a6fc09511c467a62ad820ba3",
172
+ self.prompt = "innovative, exceptional, captivating, " \
173
+ + args.get("bgm_prompt_in_en")
174
+
175
+ self.file_path_format = "assets/{id}/{class_name}_{index}_request_{prediction_id}.mp3"
176
+ self.file_path = None
177
+ self.duration = args.get("")
178
+ self.input = {
179
+ "model_version": "large",
180
+ "prompt": self.prompt,
181
+ "duration": self.duration,
182
+ "output_format": "mp3",
183
+ "seed": -1, # random
184
+ }
185
+
186
+ def run_replicate(self, retries=0):
187
+
188
+ start_time = time.time()
189
+ print(f"Thread {self.index} token: {self.client.api_token[:10]}")
190
+
191
+ os.environ['REPLICATE_API_TOKEN'] = self.client.api_token
192
+ output = replicate.run(
193
+ "facebookresearch/musicgen:7a76a8258b23fae65c5a22debb8841d1d7e816b75c2f24218cd2bd8573787906",
194
+ input={
195
+ "model_version": "large",
196
+ # "prompt": "The sound of samurai's footsteps marching across the field, the echo of the mountain, the fierce battle sound, and finally the triumphant fanfare as they claim victory."
197
+ "prompt": self.prompt,
198
+ "duration": self.duration,
199
+ "output_format": "mp3",
200
+ "seed": -1, # random
201
+ }
202
+ )
203
+ print(output)
204
+ self.output_url = output
205
+ self.response = output
206
+
207
+ self.file_path = self.file_path_format.format(id=self.id, class_name=self.__class__.__name__, index=self.index, prediction_id=self.prediction_id)
208
+ end_time = time.time()
209
+ duration = end_time - start_time
210
+ self.download_and_save(url=self.output_url, file_path=self.file_path)
211
+ self.print_thread_info(start_time, end_time, duration)
212
+
213
+ return self.response
214
+
215
  class ThreadController:
216
  def __init__(self, args):
217
+ self.id = uuid.uuid4()
218
  self.args = args
219
  scenes = args.get("scenes")
220
+ self.music = None
221
  self.videos = []
222
  self.threads = []
223
  self.lock = threading.Lock()
224
  self.replicate_client_list = {}
225
+ self.duration = int(2.1 * len(scenes) * len(REPLICATE_API_TOKEN_LIST)) # 2.1秒 * シーン数 * APIトークン数
226
+
227
+ os.makedirs(f"assets/{self.id}", exist_ok=True)
228
+
229
+ for token_index, token in enumerate(REPLICATE_API_TOKEN_LIST):
230
  client = Client()
231
  client.api_token = token
232
  self.replicate_client_list[token] = client
233
+ if token_index == 0:
234
+ self.music = Music(self.id, client, args)
235
+ self.music.duration = self.duration
236
+
237
  for index, scene in enumerate(scenes):
238
+ token = REPLICATE_API_TOKEN_LIST[token_index]
239
+ video = Video(self.id, client, args, scene, index)
240
  self.videos.append(video)
241
 
242
+ # client.api_token_index = (token_index + 1) % len(REPLICATE_API_TOKEN_LIST)
 
243
 
244
  def run_threads(self):
 
245
 
246
+ thread = threading.Thread(target=self.music.run_replicate)
247
+ self.threads.append(thread)
248
+ thread.start()
249
+ token = self.music.client.api_token
250
+
251
  for video in self.videos:
252
  if token is not None and video.client.api_token != token:
253
+ # tokenが異なる場合、4秒待ってから次を実行
254
+ print(f"Thread {video.index} token changed. Waiting 4 seconds.")
255
+ time.sleep(4)
256
 
257
  thread = threading.Thread(target=video.run_replicate)
258
  self.threads.append(thread)
 
273
  print(f"Error: Video file {video.file_path} could not be found! Skipping this file.")
274
  # 他のログ出力方法も使用可能、例: loggingモジュール
275
 
276
+ output_path = f"assets/{self.id}/concatenated_video_{self.id}.mp4"
277
+
278
  final_clip = concatenate_videoclips(clips)
279
+ final_clip.write_videofile(output_path, codec='libx264', fps=24)
280
+
281
+ # Load the video file using MoviePy
282
+ video_clip = VideoFileClip(output_path)
283
+ video_duration = video_clip.duration
284
 
285
+ # Re-loading the audio file using pydub
286
+ audio_segment = AudioSegment.from_mp3(self.music.file_path)
287
 
288
+ # Calculating the number of loops needed to match the video duration
289
+ num_loops = int(video_duration * 1000) // len(audio_segment) + 1
290
+
291
+ # Creating an audio segment that has the same duration as the video by looping the original audio
292
+ final_audio_segment = audio_segment * num_loops
293
+
294
+ # Trimming the final audio segment to match the video duration exactly
295
+ final_audio_segment = final_audio_segment[:int(video_duration * 1000)]
296
+
297
+ temp_audio_path = "/tmp/temp_audio.mp3"
298
+
299
+ # Saving the final audio as a temporary WAV file
300
+ final_audio_segment.export(temp_audio_path, format="mp3")
301
+
302
+ # Loading the temporary audio file as a MoviePy AudioFileClip
303
+ final_audio_clip = AudioFileClip(temp_audio_path)
304
+
305
+ # Setting the audio to the video
306
+ final_video_clip = video_clip.set_audio(final_audio_clip)
307
+
308
+ # Path to save the final video with audio (different name to avoid confusion)
309
+ output_path_with_audio_fixed = "/tmp/final_video_with_audio_fixed.mp4"
310
+
311
+ # Saving the final video with audio
312
+ final_video_clip.write_videofile(output_path_with_audio_fixed, codec="libx264", audio_codec="aac")
313
+
314
+ # Path to the final video with audio (fixed version)
315
+ output_path_with_audio_fixed
316
+
317
+ os.makedirs(f"videos/{self.id}/", exist_ok=True)
318
+ output_path = f"videos/{self.id}/final_concatenated_video_{self.id}.mp4"
319
+
320
+ # final_clip.write_videofile(output_path, codec='libx264', fps=24)
321
+
322
+ import shutil
323
+ shutil.move(output_path_with_audio_fixed, output_path)
324
 
325
  return output_path
326
 
 
487
  )
488
  iface.launch()
489
 
490
+ # import replicate
491
+ # import os
492
+ # token = os.environ.get("REPLICATE_API_TOKEN")
493
+ # os.environ["REPLICATE_API_TOKEN"] = token
494
+ # print(f"token: {token}")
495
+ # output = replicate.run(
496
+ # "facebookresearch/musicgen:7a76a8258b23fae65c5a22debb8841d1d7e816b75c2f24218cd2bd8573787906",
497
+ # input={"model_version": "melody"}
498
+ # )
499
+ # print(output)
schema.json CHANGED
@@ -6,14 +6,20 @@
6
  "parameters": {
7
  "type": "object",
8
  "required": [
 
9
  "title",
10
  "story",
11
  "visual_style",
12
  "visual_prompt_in_en",
 
13
  "scene_count",
14
  "scenes"
15
  ],
16
  "properties": {
 
 
 
 
17
  "title": {
18
  "type": "string",
19
  "description": "映画のタイトル"
@@ -30,6 +36,10 @@
30
  "type": "string",
31
  "description": "映像に関連する簡単な説明"
32
  },
 
 
 
 
33
  "negative_visual_prompt_in_en": {
34
  "type": "string",
35
  "description": "映像に含めないでほしい視覚的説明"
 
6
  "parameters": {
7
  "type": "object",
8
  "required": [
9
+ "lang",
10
  "title",
11
  "story",
12
  "visual_style",
13
  "visual_prompt_in_en",
14
+ "bgm_prompt_in_en",
15
  "scene_count",
16
  "scenes"
17
  ],
18
  "properties": {
19
+ "lang": {
20
+ "type": "string",
21
+ "description": "The language of the user input in ISO 639-1 format."
22
+ },
23
  "title": {
24
  "type": "string",
25
  "description": "映画のタイトル"
 
36
  "type": "string",
37
  "description": "映像に関連する簡単な説明"
38
  },
39
+ "bgm_prompt_in_en": {
40
+ "type": "string",
41
+ "description": "シーンBGMの聴覚的表現を文学的に説明する"
42
+ },
43
  "negative_visual_prompt_in_en": {
44
  "type": "string",
45
  "description": "映像に含めないでほしい視覚的説明"
template.md CHANGED
@@ -1,4 +1,4 @@
1
- # Movie Title: {{ args.title }}
2
 
3
  ## Story
4
 
@@ -8,8 +8,16 @@
8
 
9
  {{ generation_time }}
10
 
11
- ## Prompts
 
 
 
 
12
 
13
  | Scene | visual_prompt_in_en | negative_visual_prompt_in_en | cinematic_angles |
14
  |----:|----|----|----|{% for item in args.scenes %}
15
  |{{ item.scene }}|{{ item.visual_prompt_in_en }}|{{ item.negative_visual_prompt_in_en }}|{{ item.cinematic_angles}}|{% endfor %}
 
 
 
 
 
1
+ # Story Title: {{ args.title }}
2
 
3
  ## Story
4
 
 
8
 
9
  {{ generation_time }}
10
 
11
+ ## BGM Prompt
12
+
13
+ {{ args.bgm_prompt_in_en }}
14
+
15
+ ## Visual Prompts
16
 
17
  | Scene | visual_prompt_in_en | negative_visual_prompt_in_en | cinematic_angles |
18
  |----:|----|----|----|{% for item in args.scenes %}
19
  |{{ item.scene }}|{{ item.visual_prompt_in_en }}|{{ item.negative_visual_prompt_in_en }}|{{ item.cinematic_angles}}|{% endfor %}
20
+
21
+ ## Language
22
+
23
+ {{ args.lang }}