File size: 504 Bytes
9c19cff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/bin/bash

DS=dataset/
TOK=tokenized/
ARCHS=( arm32 aarch64 x64 )
SPLITS=( train valid test )

for arch in ${ARCHS[@]}
do
    mkdir -p ${TOK}/${arch}
    # Train the BPE tokenizer
    python3 remend.bpe -i ${DS}/${arch} -o ${TOK}/${arch}
    cp ${DS}/${arch}/{train,valid,test}.eqn ${TOK}/${arch}
    fairseq-preprocess -s asm -t eqn \
        --trainpref ${TOK}/${arch}/train \
        --testpref ${TOK}/${arch}/test \
        --validpref ${TOK}/${arch}/valid \
        --destdir ${TOK}/${arch}/
done