|
""" |
|
for LLAMA parsing: |
|
takes a json of annotated minecraft games and converts to |
|
a turn format to be used format_(un/annotated)_jsonl.py. |
|
""" |
|
import os |
|
import json |
|
|
|
|
|
def is_nl(textstr): |
|
""" |
|
Determine if an element is an EDU or EEU. |
|
""" |
|
colors = ['orange', 'blue', 'green', 'yellow', 'red', 'purple'] |
|
ans = 0 |
|
words = textstr.split(' ') |
|
numerals = [n for n in words if n.isnumeric()] |
|
colors = [c for c in words if c in colors] |
|
if ('place' in words or 'pick' in words) and len(numerals) >= 3 and len(colors) > 0: |
|
ans = 1 |
|
return ans |
|
|
|
current_folder=os.getcwd() |
|
|
|
data_path = current_folder + '/<orig_data>.json' |
|
save_path = current_folder + '/<turns>.jsonl' |
|
|
|
with open(data_path, 'r') as j: |
|
jfile = json.load(j) |
|
games = jfile |
|
|
|
|
|
|
|
|
|
turn_version = [] |
|
for game in games: |
|
new_game = {} |
|
new_game['id'] = game['id'] |
|
game_turns = [] |
|
edus = game['edus'] |
|
|
|
turn_no = 0 |
|
last_speaker = None |
|
new_turn = {} |
|
new_turn['turn'] = turn_no |
|
new_turn['speaker'] = edus[0]['speaker'] |
|
turn_edus = [] |
|
turn_edus.append(edus[0]['text']) |
|
for edu in edus[1:]: |
|
if edu['speaker'] == 'Architect': |
|
if edu['speaker'] == last_speaker: |
|
turn_edus.append(edu['text']) |
|
else: |
|
last_speaker = edu['speaker'] |
|
|
|
new_turn['edus'] = turn_edus |
|
game_turns.append(new_turn) |
|
turn_no += 1 |
|
|
|
new_turn = {} |
|
new_turn['turn'] = turn_no |
|
new_turn['speaker'] = last_speaker |
|
turn_edus = [] |
|
turn_edus.append(edu['text']) |
|
else: |
|
if is_nl(edu['text']): |
|
|
|
last_speaker = None |
|
|
|
new_turn['edus'] = turn_edus |
|
game_turns.append(new_turn) |
|
turn_no += 1 |
|
|
|
new_turn = {} |
|
new_turn['turn'] = turn_no |
|
new_turn['speaker'] = 'Builder' |
|
turn_edus = [] |
|
turn_edus.append(edu['text']) |
|
elif edu['speaker'] != last_speaker: |
|
last_speaker = edu['speaker'] |
|
|
|
new_turn['edus'] = turn_edus |
|
game_turns.append(new_turn) |
|
turn_no += 1 |
|
|
|
new_turn = {} |
|
new_turn['turn'] = turn_no |
|
new_turn['speaker'] = last_speaker |
|
turn_edus = [] |
|
turn_edus.append(edu['text']) |
|
else: |
|
turn_edus.append(edu['text']) |
|
|
|
new_turn['edus'] = turn_edus |
|
game_turns.append(new_turn) |
|
|
|
new_game['turns'] = game_turns |
|
|
|
turn_version.append(new_game) |
|
|
|
with open(save_path, 'w') as outfile: |
|
json.dump(turn_version, outfile) |
|
|
|
print('json saved for {} games'.format(len(turn_version))) |
|
|
|
|