Update commonsenseConstraint.py
Browse files- commonsenseConstraint.py +0 -188
commonsenseConstraint.py
CHANGED
|
@@ -545,191 +545,3 @@ def boolean_evaluation(query_data, tested_data):
|
|
| 545 |
print(return_info[key][1])
|
| 546 |
return False
|
| 547 |
return True
|
| 548 |
-
|
| 549 |
-
# if __name__ == '__main__':
|
| 550 |
-
# number_list = extract_numbers_from_filenames('/home/xj/toolAugEnv/code/toolConstraint/data/annotation/lrz')
|
| 551 |
-
# # json_data = json.load(open('/home/xj/toolAugEnv/code/toolConstraint/data/annotation/x/annotation_4.json'))
|
| 552 |
-
# query_data = load_line_json_data('/home/xj/toolAugEnv/code/toolConstraint/data/query/lrz.jsonl')
|
| 553 |
-
# for idx in number_list:
|
| 554 |
-
# json_data = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/lrz/annotation_{idx}.json'))
|
| 555 |
-
# print(str(idx), evaluation(query_data[idx-1], json_data))
|
| 556 |
-
# # json_data = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/plan_{idx}.json'))
|
| 557 |
-
# # query_data = load_line_json_data('/home/xj/toolAugEnv/code/toolConstraint/data/query/test.jsonl')[idx-1]
|
| 558 |
-
# # help me write all function name in this file, just the name
|
| 559 |
-
# #
|
| 560 |
-
# # list all function name in this file
|
| 561 |
-
# # ['is_reasonalbe_visiting_city', 'is_valiable_restaurants', 'is_valiable_attractions', 'is_valiable_transportation', 'is_valid_information_in_current_city', 'is_valid_information_in_sandbox']
|
| 562 |
-
# # print(is_valiable_restaurants(query_data, json_data))
|
| 563 |
-
|
| 564 |
-
# if __name__ == "__main__":
|
| 565 |
-
# user = 'zk'
|
| 566 |
-
# query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
|
| 567 |
-
# idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
|
| 568 |
-
# commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
| 569 |
-
# for idx in idx_number_list:
|
| 570 |
-
# print(idx)
|
| 571 |
-
# query_data = query_data_list[idx-1]
|
| 572 |
-
# generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/plan_{idx}.json'))
|
| 573 |
-
# # generated_plan = generated_plan[:-1]
|
| 574 |
-
# if generated_plan[-1]['gpt-3.5-turbo-16k-result'] != 'Plan Fail':
|
| 575 |
-
# info_box = evaluation(query_data, generated_plan[-1]['gpt-3.5-turbo-16k-result'])
|
| 576 |
-
# generated_plan[-1]['toolAug-commonsense'] = info_box
|
| 577 |
-
# else:
|
| 578 |
-
# generated_plan[-1]['toolAug-commonsense'] = None
|
| 579 |
-
# info_box = None
|
| 580 |
-
# commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
|
| 581 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/plan_{idx}.json','w') as f:
|
| 582 |
-
# json.dump(generated_plan,f)
|
| 583 |
-
|
| 584 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/turbo16k-turbo16k/{user}/commonsense_statistic.json','w') as f:
|
| 585 |
-
# json.dump(commonsense_statistic,f)
|
| 586 |
-
|
| 587 |
-
# if __name__ == "__main__":
|
| 588 |
-
# user = 'all'
|
| 589 |
-
# model_type = ['chatgpt','gpt4','greedy_search'][2]
|
| 590 |
-
# query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
|
| 591 |
-
# # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
|
| 592 |
-
# idx_number_list = [i for i in range(1,501)]
|
| 593 |
-
# commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
| 594 |
-
|
| 595 |
-
# for idx in idx_number_list:
|
| 596 |
-
# print(idx)
|
| 597 |
-
# query_data = query_data_list[idx-1]
|
| 598 |
-
# generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/plan_{idx}.json'))
|
| 599 |
-
# # generated_plan = generated_plan[:-1]
|
| 600 |
-
# if model_type == 'greedy_search':
|
| 601 |
-
# info_box = evaluation(query_data, generated_plan[-1][f'greedy_search_plan'])
|
| 602 |
-
# else:
|
| 603 |
-
# info_box = evaluation(query_data, generated_plan[-1][f'{model_type}_human_collected_info_results_parsed'])
|
| 604 |
-
# generated_plan[-1][f'{model_type}_with_human_collected_commonsense'] = info_box
|
| 605 |
-
# commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
|
| 606 |
-
|
| 607 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/plan_{idx}.json','w') as f:
|
| 608 |
-
# json.dump(generated_plan,f)
|
| 609 |
-
|
| 610 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre2/{user}/{model_type}_with_human_collected_commonsense_statistic.json','w') as f:
|
| 611 |
-
# json.dump(commonsense_statistic,f)
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
# if __name__ == "__main__":
|
| 615 |
-
# user = 'all'
|
| 616 |
-
# query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
|
| 617 |
-
# idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
|
| 618 |
-
# hardConstraint_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
| 619 |
-
# not_satified = []
|
| 620 |
-
# for idx in tqdm(idx_number_list):
|
| 621 |
-
# # print(idx)
|
| 622 |
-
# query_data = query_data_list[idx-1]
|
| 623 |
-
# generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}/annotation_{idx}.json'))
|
| 624 |
-
|
| 625 |
-
# if not boolean_evaluation(query_data, generated_plan):
|
| 626 |
-
# not_satified.append(idx)
|
| 627 |
-
# print(idx)
|
| 628 |
-
# generated_plan = generated_plan[:-1]
|
| 629 |
-
# print(not_satified)
|
| 630 |
-
|
| 631 |
-
if __name__ == "__main__":
|
| 632 |
-
set_type = ["train",'dev','test'][0]
|
| 633 |
-
query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/query/query.jsonl')
|
| 634 |
-
# idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/plan')
|
| 635 |
-
commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
| 636 |
-
not_satified = []
|
| 637 |
-
# print( idx_number_list)
|
| 638 |
-
for idx in tqdm(range(1,len(query_data_list)+1)):
|
| 639 |
-
# print(idx)
|
| 640 |
-
query_data = query_data_list[idx-1]
|
| 641 |
-
generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{set_type}/plan/plan_{idx}.json'))
|
| 642 |
-
try:
|
| 643 |
-
store_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/plan_{idx}.json'))
|
| 644 |
-
except FileNotFoundError:
|
| 645 |
-
store_plan = [{}]
|
| 646 |
-
info_box = evaluation(query_data,generated_plan[1])
|
| 647 |
-
# if not boolean_evaluation(query_data, generated_plan[1]):
|
| 648 |
-
# not_satified.append(idx)
|
| 649 |
-
# print(idx)
|
| 650 |
-
# print(store_plan[-1])
|
| 651 |
-
store_plan[-1][f'human_anno_commonsense_constraint'] = info_box
|
| 652 |
-
with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/plan_{idx}.json','w') as f:
|
| 653 |
-
json.dump(store_plan,f)
|
| 654 |
-
commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
|
| 655 |
-
print(not_satified)
|
| 656 |
-
with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{set_type}/human_anno_commonsense_constraint.json','w') as f:
|
| 657 |
-
json.dump(commonsense_statistic,f)
|
| 658 |
-
|
| 659 |
-
# if __name__ == "__main__":
|
| 660 |
-
# user = 'all'
|
| 661 |
-
# model_type = ['chatgpt','gpt4'][1]
|
| 662 |
-
# query_data_list = load_line_json_data(f'/home/xj/toolAugEnv/code/toolConstraint/data/query/{user}.jsonl')
|
| 663 |
-
# # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
|
| 664 |
-
# idx_number_list = [i for i in range(1,501)]
|
| 665 |
-
# commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
| 666 |
-
# cnt = 0
|
| 667 |
-
# for idx in idx_number_list:
|
| 668 |
-
# # print(idx)
|
| 669 |
-
# query_data = query_data_list[idx-1]
|
| 670 |
-
# generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/pre/{user}/plan_{idx}.json'))[-1]['gpt4_human_collected_info_results_parsed']
|
| 671 |
-
# # generated_plan = generated_plan[:-1]
|
| 672 |
-
|
| 673 |
-
# if not boolean_evaluation(query_data, generated_plan):
|
| 674 |
-
# cnt += 1
|
| 675 |
-
# print(idx)
|
| 676 |
-
# print(cnt)
|
| 677 |
-
|
| 678 |
-
# if __name__ == "__main__":
|
| 679 |
-
# parser = argparse.ArgumentParser(description="")
|
| 680 |
-
# # model_type = ['gpt-3.5-turbo-1106','gpt-4-1106-preview','greedy_search','mistral-7B-32K','gemini2','mixtral','gpt-3.5-turbo-11062'][-1]
|
| 681 |
-
# # method = ['direct','cot','react','reflexion','tool-use'][-1]
|
| 682 |
-
# # set_type = ['dev','test'][0]
|
| 683 |
-
# parser.add_argument("--model_type", type=str, default="gpt-3.5-turbo-1106")
|
| 684 |
-
# parser.add_argument("--method", type=str, default="direct")
|
| 685 |
-
# parser.add_argument("--set_type", type=str, default="dev")
|
| 686 |
-
# args = parser.parse_args()
|
| 687 |
-
# directory = f'/home/xj/toolAugEnv/code/toolConstraint/data/final_data/{args.set_type}'
|
| 688 |
-
# query_data_list = load_line_json_data(os.path.join(directory, 'query/query.jsonl'))
|
| 689 |
-
# # idx_number_list = extract_numbers_from_filenames(f'/home/xj/toolAugEnv/code/toolConstraint/data/annotation/{user}')
|
| 690 |
-
# idx_number_list = [i for i in range(1,len(query_data_list)+1)]
|
| 691 |
-
# commonsense_statistic= {level:{day:[] for day in [3,5,7]} for level in ['easy','medium','hard']}
|
| 692 |
-
# deliver_cnt = 0
|
| 693 |
-
# if args.method == 'tool-use':
|
| 694 |
-
# suffix = ''
|
| 695 |
-
# else:
|
| 696 |
-
# suffix = '_with_human_info'
|
| 697 |
-
# for idx in tqdm(idx_number_list):
|
| 698 |
-
# # print(idx)
|
| 699 |
-
# query_data = query_data_list[idx-1]
|
| 700 |
-
# generated_plan = json.load(open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/plan_{idx}.json'))
|
| 701 |
-
# # generated_plan = generated_plan[:-1]
|
| 702 |
-
# if args.model_type == 'greedy_search':
|
| 703 |
-
# info_box = evaluation(query_data, generated_plan[-1][f'greedy_search_plan'])
|
| 704 |
-
# else:
|
| 705 |
-
# if args.method == 'tool-use':
|
| 706 |
-
# suffix2 = ''
|
| 707 |
-
# else:
|
| 708 |
-
# suffix2 = '_collected'
|
| 709 |
-
# if generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] and generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results']!='Max Token Length Exceeded.':
|
| 710 |
-
# try:
|
| 711 |
-
# info_box = evaluation(query_data, generated_plan[-1][f'{args.model_type}_{args.method}{suffix}_results_parsed'])
|
| 712 |
-
# except KeyError:
|
| 713 |
-
# info_box = None
|
| 714 |
-
# generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] = ""
|
| 715 |
-
# except IndexError:
|
| 716 |
-
# info_box = None
|
| 717 |
-
# generated_plan[-1][f'{args.model_type}_{args.method}{suffix2}_info_results'] = ""
|
| 718 |
-
# else:
|
| 719 |
-
# info_box = None
|
| 720 |
-
# if info_box:
|
| 721 |
-
# deliver_cnt += 1
|
| 722 |
-
# generated_plan[-1][f'{args.model_type}_{args.method}{suffix}_commonsense_constraint'] = info_box
|
| 723 |
-
# commonsense_statistic[query_data['level']][query_data['days']].append(info_box)
|
| 724 |
-
|
| 725 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/plan_{idx}.json','w') as f:
|
| 726 |
-
# json.dump(generated_plan,f)
|
| 727 |
-
|
| 728 |
-
# with open(f'/home/xj/toolAugEnv/code/toolConstraint/results/{args.set_type}/{args.model_type}_{args.method}{suffix}_commonsense_constraint.json','w') as f:
|
| 729 |
-
# json.dump(commonsense_statistic,f)
|
| 730 |
-
|
| 731 |
-
# if args.set_type == 'dev':
|
| 732 |
-
# print(f"Model:{args.model_type} Method:{args.method} Set: {args.set_type} \nDeliver Rate: {deliver_cnt/180}" )
|
| 733 |
-
# elif args.set_type == 'test':
|
| 734 |
-
# print(f"Model:{args.model_type} Method:{args.method} Set: {args.set_type} \nDeliver Rate: {deliver_cnt/1000}" )
|
| 735 |
-
|
|
|
|
| 545 |
print(return_info[key][1])
|
| 546 |
return False
|
| 547 |
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|