RAW=compiled/ | |
DEST=dataset/ | |
ARCHS=( arm32 aarch64 x64 ) | |
FILES=( asm eqn const.jsonl ) | |
IMPLS=( cse_c dag_c cse_fortran dag_fortran ) | |
SPLITS=( train valid test ) | |
DTYPES=( float double ) | |
for arch in ${ARCHS[@]} | |
do | |
mkdir -p ${DEST}/${arch} | |
rm -f ${DEST}/${arch}/* | |
for split in ${SPLITS[@]} | |
do | |
for file in ${FILES[@]} | |
do | |
for opt in $(seq 0 2) | |
do | |
for impl in ${IMPLS[@]} | |
do | |
for dtype in ${DTYPES[@]} | |
do | |
cat ${RAW}/${arch}/${dtype}/O${opt}/${impl}/${split}.${file} >> ${DEST}/${arch}/full.${file} | |
done | |
done | |
done | |
done | |
done | |
python3 -m remend.deduplicate_split --inprefix ${DEST}/${arch}/full --outdir ${DEST}/${arch} --split 0.025 --filter bigint | |
done | |