""" for LLAMA parsing: takes a json of annotated minecraft games and converts to a turn format to be used format_(un/annotated)_jsonl.py. """ import os import json def is_nl(textstr): """ Determine if an element is an EDU or EEU. """ colors = ['orange', 'blue', 'green', 'yellow', 'red', 'purple'] ans = 0 words = textstr.split(' ') numerals = [n for n in words if n.isnumeric()] colors = [c for c in words if c in colors] if ('place' in words or 'pick' in words) and len(numerals) >= 3 and len(colors) > 0: ans = 1 return ans current_folder=os.getcwd() data_path = current_folder + '/.json' save_path = current_folder + '/.jsonl' with open(data_path, 'r') as j: jfile = json.load(j) games = jfile ##for each game, find turns, edus. ##feed one turn at a time, with each edu numbered, plus structure for that turn ##TEXT: ##STRUCTURE: ##NEXT TURN => #output structure turn_version = [] for game in games: new_game = {} new_game['id'] = game['id'] game_turns = [] edus = game['edus'] #the first edu is always the first turn. turn_no = 0 last_speaker = None new_turn = {} new_turn['turn'] = turn_no new_turn['speaker'] = edus[0]['speaker'] turn_edus = [] turn_edus.append(edus[0]['text']) for edu in edus[1:]: if edu['speaker'] == 'Architect': if edu['speaker'] == last_speaker: turn_edus.append(edu['text']) else: last_speaker = edu['speaker'] #finish and append last turn new_turn['edus'] = turn_edus game_turns.append(new_turn) turn_no += 1 #now start a new turn! new_turn = {} new_turn['turn'] = turn_no new_turn['speaker'] = last_speaker turn_edus = [] #a list of edus from that turn turn_edus.append(edu['text']) else: if is_nl(edu['text']): #then this is an action sequence and should be it's own turn last_speaker = None #need to do this so that builder actions turns are always their own turns #finish and append last turn new_turn['edus'] = turn_edus game_turns.append(new_turn) turn_no += 1 #now start a new turn! new_turn = {} new_turn['turn'] = turn_no new_turn['speaker'] = 'Builder' turn_edus = [] #a list of edus from that turn turn_edus.append(edu['text']) elif edu['speaker'] != last_speaker: last_speaker = edu['speaker'] #finish and append last turn new_turn['edus'] = turn_edus game_turns.append(new_turn) turn_no += 1 #now start a new turn! new_turn = {} new_turn['turn'] = turn_no new_turn['speaker'] = last_speaker turn_edus = [] #a list of edus from that turn turn_edus.append(edu['text']) else: turn_edus.append(edu['text']) #take care of last speaker turn in the game new_turn['edus'] = turn_edus game_turns.append(new_turn) #append new turns to the game dict new_game['turns'] = game_turns #add game dict to list of games turn_version.append(new_game) with open(save_path, 'w') as outfile: json.dump(turn_version, outfile) print('json saved for {} games'.format(len(turn_version)))