shigeru saito commited on
Commit
917851d
1 Parent(s): 2d789b8

first commit

Browse files
Files changed (7) hide show
  1. .env.example +3 -0
  2. .gitignore +1 -0
  3. app.py +319 -0
  4. requirements.txt +3 -0
  5. schema.json +70 -0
  6. stub/replicate.py +16 -0
  7. template.md +11 -0
.env.example ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ REPLICATE_API_TOKEN=
2
+ OPENAI_API_KEY=
3
+ ENV=PRODUCTION
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
3
+ import sys
4
+ import os
5
+ import json
6
+ import threading
7
+ import time
8
+ import requests
9
+ import argparse
10
+ import markdown2
11
+
12
+ from dotenv import load_dotenv
13
+ from IPython.display import Image
14
+ from moviepy.editor import VideoFileClip, concatenate_videoclips, ImageClip
15
+ from moviepy.video.fx.all import fadein, fadeout
16
+ from PIL import Image as PIL_Image
17
+
18
+ from jinja2 import Template
19
+
20
+ load_dotenv()
21
+ openai.api_key = os.getenv('OPENAI_API_KEY')
22
+ REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
23
+ ENV = os.getenv("ENV")
24
+ MODEL = "gpt-3.5-turbo"
25
+ # MODEL = "gpt-4"
26
+
27
+ if ENV == "PRODUCTION":
28
+ import replicate
29
+ else:
30
+ from stub import replicate
31
+
32
+ class Video:
33
+ def __init__(self, scene, index):
34
+ self.scene = scene
35
+ self.prompt = "masterpiece, awards, best quality, dramatic-lighting, key-visual, "
36
+ self.prompt = self.prompt + scene.get("visual_prompt_in_en")
37
+ self.prompt = self.prompt + ", cinematic-angles-" + scene.get("cinematic_angles")
38
+ self.nagative_prompt = "badhandv4, easynegative, ng_deepnegative_v1_75t, verybadimagenegative_v1.3, bad-artist, bad_prompt_version2-neg, "
39
+ self.index = index
40
+ self.output_url = None
41
+ self.file_path = f"assets/thread_{index}_video.mp4"
42
+
43
+ def run_replicate(self):
44
+ start_time = time.time()
45
+
46
+ self.output_url = replicate.run(
47
+ "lucataco/animate-diff:1531004ee4c98894ab11f8a4ce6206099e732c1da15121987a8eef54828f0663",
48
+ input={
49
+ "motion_module": "mm_sd_v14",
50
+ "prompt": self.prompt,
51
+ "n_prompt": self.nagative_prompt,
52
+ }
53
+ )
54
+
55
+ end_time = time.time()
56
+ duration = end_time - start_time
57
+
58
+ self.download_and_save(url=self.output_url, file_path=self.file_path)
59
+ self.print_thread_info(start_time, end_time, duration)
60
+
61
+ def download_and_save(self, url, file_path):
62
+ response = requests.get(url)
63
+ with open(file_path, "wb") as f:
64
+ f.write(response.content)
65
+
66
+ def print_thread_info(self, start_time, end_time, duration):
67
+ print(f"Thread {self.index} output_url: {self.output_url}")
68
+ print(f"Thread {self.index} start time: {start_time}")
69
+ print(f"Thread {self.index} end time: {end_time}")
70
+ print(f"Thread {self.index} duration: {duration}")
71
+
72
+ class ThreadController:
73
+ def __init__(self, args):
74
+ self.args = args
75
+ self.num_threads = len(args)
76
+ scenes = args.get("scenes")
77
+ # prompts = []
78
+ # if scenes:
79
+ # for scene_data in scenes:
80
+ # prompt = scene_data.get("visual_prompt_in_en")
81
+ # prompt = prompt + ", " + scene_data.get("cinematic_angles")
82
+ # prompt = prompt + ", " + scene_data.get("visual_prompt_in_en")
83
+ # prompts.append(prompt)
84
+
85
+ self.videos = [Video(scene, index) for index, scene in enumerate(scenes)]
86
+ self.threads = []
87
+
88
+ def run_threads(self):
89
+ os.makedirs("assets", exist_ok=True)
90
+
91
+ for video in self.videos:
92
+ thread = threading.Thread(target=video.run_replicate)
93
+ self.threads.append(thread)
94
+ thread.start()
95
+
96
+ for thread in self.threads:
97
+ thread.join()
98
+
99
+ def merge_videos(self):
100
+ clips = []
101
+ for video in self.videos:
102
+ clips.append(VideoFileClip(video.file_path))
103
+
104
+ final_clip = concatenate_videoclips(clips)
105
+
106
+ os.makedirs("videos", exist_ok=True)
107
+ output_path = "videos/final_concatenated_video.mp4"
108
+
109
+ final_clip.write_videofile(output_path, codec='libx264', fps=24)
110
+
111
+ return output_path
112
+
113
+ def print_prompts(self):
114
+ for video in self.videos:
115
+ print(f"Thread {video.index} prompt: {video.prompt}")
116
+
117
+ def main(args):
118
+ thread_controller = ThreadController(args)
119
+ thread_controller.run_threads()
120
+ merged_video_path = thread_controller.merge_videos()
121
+
122
+ thread_controller.print_prompts()
123
+
124
+ return merged_video_path
125
+
126
+ def load_prompts(file_path):
127
+ with open(file_path, "r") as f:
128
+ prompts = f.read().splitlines()
129
+ return prompts
130
+
131
+ def get_filetext(filename):
132
+ with open(filename, "r") as file:
133
+ filetext = file.read()
134
+ return filetext
135
+
136
+ def get_functions_from_schema(filename):
137
+ schema = get_filetext(filename)
138
+ schema_json = json.loads(schema)
139
+ functions = schema_json.get("functions")
140
+ return functions
141
+
142
+ functions = get_functions_from_schema('schema.json')
143
+
144
+ class OpenAI:
145
+
146
+ @classmethod
147
+ def chat_completion_with_function(cls, prompt, messages, functions):
148
+ print("prompt:"+prompt)
149
+
150
+ # 文章生成にかかる時間を計測する
151
+ start = time.time()
152
+ # ChatCompletion APIを呼び出す
153
+ response = openai.ChatCompletion.create(
154
+ model=MODEL,
155
+ messages=messages,
156
+ functions=functions,
157
+ function_call={"name": "generate_video"}
158
+ )
159
+ print("gpt generation time: "+str(time.time() - start))
160
+
161
+ # ChatCompletion APIから返された結果を取得する
162
+ message = response.choices[0].message
163
+ print("chat completion message: " + json.dumps(message, indent=2))
164
+
165
+ return message
166
+
167
+ class NajiminoAI:
168
+
169
+ def __init__(self, user_message):
170
+ self.user_message = user_message
171
+
172
+ def generate_markdown(self, args, generation_time):
173
+
174
+ # # lang=args.get("lang")
175
+ # title=args.get("title")
176
+ # description=args.get("description")
177
+ # visual_prompt_in_en=args.get("visual_prompt_in_en")
178
+ # scenes = args.get("scenes")
179
+
180
+ # prompt_for_visual_expression = \
181
+ # visual_prompt_in_en
182
+
183
+ # print("prompt_for_visual_expression: "+prompt_for_visual_expression)
184
+
185
+ # prompts = []
186
+ # if scenes:
187
+ # for scene_data in scenes:
188
+ # prompt = scene_data.get("visual_prompt_in_en")
189
+ # prompt = prompt + ", " + scene_data.get("cinematic_angles")
190
+ # prompt = prompt + ", " + scene_data.get("visual_prompt_in_en")
191
+ # prompts.append(prompt)
192
+ # print("scenes: " + json.dumps(scenes, indent=2))
193
+ # if scenes:
194
+ # for scene_data in scenes:
195
+ # scene = scene_data.get("scene")
196
+ # cinematic_angles = scene_data.get("cinematic_angles")
197
+ # visual_prompt_in_en = scene_data.get("visual_prompt_in_en")
198
+ # print("scene: ", scene)
199
+ # print("cinematic_angles: ", cinematic_angles)
200
+ # print("visual_prompt_in_en: ", visual_prompt_in_en)
201
+
202
+ template_string = get_filetext(filename = "template.md")
203
+
204
+ template = Template(template_string)
205
+ result = template.render(args=args, generation_time=generation_time)
206
+
207
+ print(result)
208
+
209
+ return result
210
+
211
+ @classmethod
212
+ def generate(cls, user_message):
213
+
214
+ najiminoai = NajiminoAI(user_message)
215
+
216
+ return najiminoai.create_video()
217
+
218
+ def create_video(self):
219
+ main_start_time = time.time()
220
+
221
+ user_message = self.user_message + " 4シーン"
222
+
223
+ messages = [
224
+ {"role": "user", "content": user_message}
225
+ ]
226
+
227
+ functions = get_functions_from_schema('schema.json')
228
+
229
+ message = OpenAI.chat_completion_with_function(prompt=user_message, messages=messages, functions=functions)
230
+
231
+ video_path = None
232
+ html = None
233
+ if message.get("function_call") is None:
234
+
235
+ print("message: " + json.dumps(message, indent=2))
236
+ return [video_path, html]
237
+
238
+ function_name = message["function_call"]["name"]
239
+
240
+ args = json.loads(message["function_call"]["arguments"])
241
+
242
+ print("args: " + json.dumps(args, indent=2))
243
+
244
+ # # lang=args.get("lang")
245
+ # title=args.get("title")
246
+ # description=args.get("description")
247
+ # visual_prompt_in_en=args.get("visual_prompt_in_en")
248
+ # scenes = args.get("scenes")
249
+
250
+ # prompt_for_visual_expression = \
251
+ # visual_prompt_in_en
252
+
253
+ # print("prompt_for_visual_expression: "+prompt_for_visual_expression)
254
+
255
+ # prompts = []
256
+ # if scenes:
257
+ # for scene_data in scenes:
258
+ # prompt = scene_data.get("visual_prompt_in_en")
259
+ # prompt = prompt + ", " + scene_data.get("cinematic_angles")
260
+ # prompt = prompt + ", " + scene_data.get("visual_prompt_in_en")
261
+ # prompts.append(prompt)
262
+
263
+ video_path = main(args)
264
+
265
+ main_end_time = time.time()
266
+ main_duration = main_end_time - main_start_time
267
+
268
+ print("Thread Main start time:", main_start_time)
269
+ print("Thread Main end time:", main_end_time)
270
+ print("Thread Main duration:", main_duration)
271
+ print("All threads finished.")
272
+
273
+ function_response = self.generate_markdown(args, main_duration)
274
+
275
+ html = (
276
+ "<div style='max-width:100%; overflow:auto'>"
277
+ + "<p>"
278
+ + markdown2.markdown(function_response,extras=["tables"])
279
+ + "</div>"
280
+ )
281
+ return [video_path, html]
282
+
283
+
284
+ if __name__ == "__main__":
285
+ parser = argparse.ArgumentParser(description="Generate videos from text prompts")
286
+
287
+ parser.add_argument("--prompts_file", type=str, help="File containing prompts (one per line)")
288
+
289
+ args = parser.parse_args()
290
+
291
+ if args.prompts_file:
292
+ prompts = load_prompts(args.prompts_file)
293
+ # main(prompts)
294
+ NajiminoAI.generate("伝統工芸と最新技術の融合")
295
+ else:
296
+ # def create_video(prompt):
297
+ # prompts = prompt.strip().split('\n')
298
+ # output_path = main(prompts)
299
+ # return output_path
300
+
301
+ iface = gr.Interface(
302
+ fn=NajiminoAI.generate,
303
+ # inputs=gr.Textbox(label=inputs_label),
304
+ outputs=[
305
+ gr.Video(),
306
+ "html"
307
+ ],
308
+ # title=title,
309
+ inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your prompt"),
310
+ title="Video Generator",
311
+ description="Generate a video based on the text prompt you enter.",
312
+ examples=[
313
+ ["伝統工芸と最新技術の融合"],
314
+ ["子どもたちが笑ったり怒ったり泣いたり楽しんだりする"],
315
+ ["光、闇、氷、水、雲、風、自然、渦、土"],
316
+ ],
317
+ )
318
+ iface.launch()
319
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ replicate
2
+ moviepy
3
+ openai
schema.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "functions": [
3
+ {
4
+ "name": "generate_video",
5
+ "description": "指定された文章からビデオを生成する",
6
+ "parameters": {
7
+ "type": "object",
8
+ "required": [
9
+ "title",
10
+ "visual_style",
11
+ "visual_prompt_in_en",
12
+ "scene_count",
13
+ "scenes"
14
+ ],
15
+ "properties": {
16
+ "title": {
17
+ "type": "string",
18
+ "description": "動画のタイトル"
19
+ },
20
+ "visual_style": {
21
+ "type": "string",
22
+ "description": "ビデオの視覚的なスタイル (例: 'anime')"
23
+ },
24
+ "visual_prompt_in_en": {
25
+ "type": "string",
26
+ "description": "映像に関連する簡単な説明"
27
+ },
28
+ "negative_visual_prompt_in_en": {
29
+ "type": "string",
30
+ "description": "映像に含めないでほしい視覚的説明"
31
+ },
32
+ "scene_count": {
33
+ "type": "integer",
34
+ "description": "シーンの数"
35
+ },
36
+ "scenes": {
37
+ "type": "array",
38
+ "description": "シーンの詳細",
39
+ "items": {
40
+ "type": "object",
41
+ "required": [
42
+ "scene",
43
+ "visual_prompt_in_en",
44
+ "cinematic_angles"
45
+ ],
46
+ "properties": {
47
+ "scene": {
48
+ "type": "integer",
49
+ "description": "シーン番号"
50
+ },
51
+ "visual_prompt_in_en": {
52
+ "type": "string",
53
+ "description": "シーンの視覚的説明"
54
+ },
55
+ "cinematic_angles": {
56
+ "type": "string",
57
+ "description": "カメラアングルや動きの説明 (例: 'dolly-in', 'spinning-shot', 'dutch-angle')"
58
+ },
59
+ "negative_visual_prompt_in_en": {
60
+ "type": "string",
61
+ "description": "シーンの含まないでほしい視覚的説明"
62
+ }
63
+ }
64
+ }
65
+ }
66
+ }
67
+ }
68
+ }
69
+ ]
70
+ }
stub/replicate.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # stub/replicate.py
2
+ def run(model_path, input):
3
+ print("Stub called for replicate.run with model_path and input")
4
+
5
+ prompt = input["prompt"]
6
+ if "Introduction".lower() in prompt.lower():
7
+ url = "https://replicate.delivery/pbxt/sLBtHnGDMVK7AV5x24dl29lp9pQnbsfcuMbusXcJEl9kG8rIA/out.mp4"
8
+ elif "Development".lower() in prompt.lower():
9
+ url = "https://replicate.delivery/pbxt/QVgesepHS7pvZE9UWs2SPDMsEeCMrteelrmuegvvr7iITDerIA/out.mp4"
10
+ elif "Climax".lower() in prompt.lower():
11
+ url = "https://replicate.delivery/pbxt/H1bJ3dp0r95OM5XPXoK6gfABF9vOCsFT7gxH0I4ceg75N4XRA/out.mp4"
12
+ elif "Resolution".lower() in prompt.lower():
13
+ url = "https://replicate.delivery/pbxt/qNdKneAbNaRtdK6pZnoAO17JCJfD5neffTw193F1XXUkvBfVE/out.mp4"
14
+ else:
15
+ url = "https://replicate.delivery/pbxt/cgT0Aef4haodP04HybKaOrsHOQKkYcV8mpzGj7WHx3eFMuviA/out.mp4"
16
+ return url
template.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Movie Title: {{ args.title }}
2
+
3
+ ## Generation Time
4
+
5
+ {{ generation_time }}
6
+
7
+ ## Prompts
8
+
9
+ | Scene | visual_prompt_in_en | negative_visual_prompt_in_en | cinematic_angles |
10
+ |----:|----|----|----|{% for item in args.scenes %}
11
+ |{{ item.scene }}|{{ item.visual_prompt_in_en }}|{{ item.negative_visual_prompt_in_en }}|{{ item.camera_work}}|{% endfor %}