Spaces:
Running
on
Zero
Running
on
Zero
''' | |
Process the llama file for the next step | |
''' | |
import os, shutil, sys | |
import json | |
import pandas as pd | |
import collections | |
if __name__ == "__main__": | |
# Define important path | |
json_path = "../SVD1/v1.jsonl" | |
folder_path = "/home/kiteret/Desktop/StableVideoDiffusion/full_text_tmp/" | |
# Read the json file | |
with open(json_path, 'r') as json_file: | |
json_list = list(json_file) | |
# Iterate all the json files | |
length_stats = collections.defaultdict(int) | |
for json_info in json_list: | |
json_info = json.loads(json_info) | |
# Define the path to write | |
key_start = len("/home/chfeng/llama3/full_text_tmp/") | |
key_end = len("lang.txt") | |
sub_path = json_info["file_path"][key_start:int(-1*key_end)] | |
new_text_path = os.path.join(folder_path, sub_path, "processed_text.txt") | |
if os.path.exists(new_text_path): | |
os.remove(new_text_path) | |
# Sanity check for the case where input is missed | |
if json_info["input"] == "": | |
print("It is weird for the input is empty in the LLM process for ", sub_path) | |
continue | |
# Re-Define the content | |
outputs = json_info["output"] | |
if outputs.find("action:") != 0: | |
print("It is weird for no actions: keyword in the outputs for ", sub_path, " with prompt ", outputs) | |
continue | |
# Prepare write file | |
contents = outputs.split('\n') | |
f = open(new_text_path, "a") | |
# Itearte | |
effective_length = 0 | |
for idx, content in enumerate(contents): | |
key_word = content.split(":")[1][1:] | |
if key_word != "": | |
effective_length += 1 | |
else: | |
if idx == 1: | |
print("It is abnormal for the this content to be empty ", sub_path, " with prompt ", outputs) | |
f.write(key_word + "\n") | |
# if effective_length == 2: | |
# print("short prompt case is ", sub_path, " with prompt ", outputs) | |
if effective_length < 2: # For those only 1 or zero, we won't consider them | |
print("The prompt is too short for ", sub_path, " with prompt ", outputs) | |
os.remove(new_text_path) | |
length_stats[effective_length] += 1 | |
print("length_stats is ", length_stats) | |