File size: 504 Bytes
9c19cff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
#!/bin/bash
DS=dataset/
TOK=tokenized/
ARCHS=( arm32 aarch64 x64 )
SPLITS=( train valid test )
for arch in ${ARCHS[@]}
do
mkdir -p ${TOK}/${arch}
# Train the BPE tokenizer
python3 remend.bpe -i ${DS}/${arch} -o ${TOK}/${arch}
cp ${DS}/${arch}/{train,valid,test}.eqn ${TOK}/${arch}
fairseq-preprocess -s asm -t eqn \
--trainpref ${TOK}/${arch}/train \
--testpref ${TOK}/${arch}/test \
--validpref ${TOK}/${arch}/valid \
--destdir ${TOK}/${arch}/
done
|