File size: 2,335 Bytes
59b2a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
'''
    Process the llama file for the next step
'''
import os, shutil, sys
import json
import pandas as pd  
import collections


if __name__ == "__main__":

    # Define important path
    json_path = "../SVD1/v1.jsonl"
    folder_path = "/home/kiteret/Desktop/StableVideoDiffusion/full_text_tmp/"
    

    # Read the json file
    with open(json_path, 'r') as json_file:
        json_list = list(json_file)

    # Iterate all the json files
    length_stats = collections.defaultdict(int)
    for json_info in json_list:
        json_info = json.loads(json_info)


        # Define the path to write
        key_start = len("/home/chfeng/llama3/full_text_tmp/")
        key_end = len("lang.txt")
        sub_path = json_info["file_path"][key_start:int(-1*key_end)]
        new_text_path = os.path.join(folder_path, sub_path, "processed_text.txt")
        if os.path.exists(new_text_path):
            os.remove(new_text_path)


        # Sanity check for the case where input is missed
        if json_info["input"] == "":
            print("It is weird for the input is empty in the LLM process for ", sub_path)
            continue


        # Re-Define the content
        outputs = json_info["output"]
        if outputs.find("action:") != 0:
            print("It is weird for no actions: keyword in the outputs for ", sub_path, " with prompt ", outputs)
            continue
        
        # Prepare write file
        contents = outputs.split('\n')
        f = open(new_text_path, "a")

        # Itearte
        effective_length = 0
        for idx, content in enumerate(contents):
            key_word = content.split(":")[1][1:]
            if key_word != "":
                effective_length += 1
            else:
                if idx == 1:
                    print("It is abnormal for the this content to be empty ", sub_path, " with prompt ", outputs)
            f.write(key_word + "\n")
        # if effective_length == 2:
        #     print("short prompt case is ", sub_path, " with prompt ", outputs)
        if effective_length < 2:  # For those only 1 or zero, we won't consider them
            print("The prompt is too short for ", sub_path, " with prompt ", outputs)
            os.remove(new_text_path)

        length_stats[effective_length] += 1
    
    print("length_stats is ", length_stats)