Spaces:

HikariDawn
/

This-and-That

Running on Zero

App Files Files Community

This-and-That / scripts /process_llama.py

HikariDawn777

feat: initial push

59b2a81 4 months ago

raw

history blame contribute delete

2.34 kB

	'''
	Process the llama file for the next step
	'''
	import os, shutil, sys
	import json
	import pandas as pd
	import collections


	if __name__ == "__main__":

	# Define important path
	json_path = "../SVD1/v1.jsonl"
	folder_path = "/home/kiteret/Desktop/StableVideoDiffusion/full_text_tmp/"


	# Read the json file
	with open(json_path, 'r') as json_file:
	json_list = list(json_file)

	# Iterate all the json files
	length_stats = collections.defaultdict(int)
	for json_info in json_list:
	json_info = json.loads(json_info)


	# Define the path to write
	key_start = len("/home/chfeng/llama3/full_text_tmp/")
	key_end = len("lang.txt")
	sub_path = json_info["file_path"][key_start:int(-1*key_end)]
	new_text_path = os.path.join(folder_path, sub_path, "processed_text.txt")
	if os.path.exists(new_text_path):
	os.remove(new_text_path)


	# Sanity check for the case where input is missed
	if json_info["input"] == "":
	print("It is weird for the input is empty in the LLM process for ", sub_path)
	continue


	# Re-Define the content
	outputs = json_info["output"]
	if outputs.find("action:") != 0:
	print("It is weird for no actions: keyword in the outputs for ", sub_path, " with prompt ", outputs)
	continue

	# Prepare write file
	contents = outputs.split('\n')
	f = open(new_text_path, "a")

	# Itearte
	effective_length = 0
	for idx, content in enumerate(contents):
	key_word = content.split(":")[1][1:]
	if key_word != "":
	effective_length += 1
	else:
	if idx == 1:
	print("It is abnormal for the this content to be empty ", sub_path, " with prompt ", outputs)
	f.write(key_word + "\n")
	# if effective_length == 2:
	# print("short prompt case is ", sub_path, " with prompt ", outputs)
	if effective_length < 2: # For those only 1 or zero, we won't consider them
	print("The prompt is too short for ", sub_path, " with prompt ", outputs)
	os.remove(new_text_path)

	length_stats[effective_length] += 1

	print("length_stats is ", length_stats)