Other
English
minecraft
action prediction
File size: 3,640 Bytes
30b495e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""
for LLAMA parsing:
takes a json of annotated minecraft games and converts to 
a turn format to be used format_(un/annotated)_jsonl.py. 
"""
import os
import json


def is_nl(textstr):
    """
    Determine if an element is an EDU or EEU. 
    """
    colors = ['orange', 'blue', 'green', 'yellow', 'red', 'purple']
    ans = 0
    words = textstr.split(' ')
    numerals = [n for n in words if n.isnumeric()]
    colors = [c for c in words if c in colors]
    if ('place' in words or 'pick' in words) and len(numerals) >= 3 and len(colors) > 0:
        ans = 1
    return ans 

current_folder=os.getcwd()

data_path = current_folder + '/<orig_data>.json'
save_path = current_folder + '/<turns>.jsonl'

with open(data_path, 'r') as j:
    jfile = json.load(j)
    games = jfile

##for each game, find turns, edus.
##feed one turn at a time, with each edu numbered, plus structure for that turn
##TEXT:   ##STRUCTURE:  ##NEXT TURN   => #output structure
turn_version = []
for game in games:
    new_game = {}
    new_game['id'] = game['id']
    game_turns = []
    edus = game['edus']
    #the first edu is always the first turn. 
    turn_no = 0
    last_speaker = None
    new_turn = {}
    new_turn['turn'] = turn_no
    new_turn['speaker'] = edus[0]['speaker']
    turn_edus = []
    turn_edus.append(edus[0]['text'])
    for edu in edus[1:]:
        if edu['speaker'] == 'Architect':
            if edu['speaker'] == last_speaker:
                turn_edus.append(edu['text'])
            else:
                last_speaker = edu['speaker']
                #finish and append last turn
                new_turn['edus'] = turn_edus
                game_turns.append(new_turn)
                turn_no += 1
                #now start a new turn!
                new_turn = {}
                new_turn['turn'] = turn_no
                new_turn['speaker'] = last_speaker
                turn_edus = [] #a list of edus from that turn
                turn_edus.append(edu['text'])
        else:
            if is_nl(edu['text']):
                #then this is an action sequence and should be it's own turn
                last_speaker = None #need to do this so that builder actions turns are always their own turns
                #finish and append last turn
                new_turn['edus'] = turn_edus
                game_turns.append(new_turn)
                turn_no += 1
                #now start a new turn!
                new_turn = {}
                new_turn['turn'] = turn_no
                new_turn['speaker'] = 'Builder'
                turn_edus = [] #a list of edus from that turn
                turn_edus.append(edu['text'])
            elif edu['speaker'] != last_speaker:
                last_speaker = edu['speaker']
                #finish and append last turn
                new_turn['edus'] = turn_edus
                game_turns.append(new_turn)
                turn_no += 1
                #now start a new turn!
                new_turn = {}
                new_turn['turn'] = turn_no
                new_turn['speaker'] = last_speaker
                turn_edus = [] #a list of edus from that turn
                turn_edus.append(edu['text'])
            else:
                turn_edus.append(edu['text'])
    #take care of last speaker turn in the game
    new_turn['edus'] = turn_edus
    game_turns.append(new_turn)
    #append new turns to the game dict
    new_game['turns'] = game_turns
    #add game dict to list of games
    turn_version.append(new_game)

with open(save_path, 'w') as outfile:
    json.dump(turn_version, outfile)

print('json saved for {} games'.format(len(turn_version)))