RAW=compiled/ DEST=dataset/ ARCHS=( arm32 aarch64 x64 ) FILES=( asm eqn const.jsonl ) IMPLS=( cse_c dag_c cse_fortran dag_fortran ) SPLITS=( train valid test ) DTYPES=( float double ) for arch in ${ARCHS[@]} do mkdir -p ${DEST}/${arch} rm -f ${DEST}/${arch}/* for split in ${SPLITS[@]} do for file in ${FILES[@]} do for opt in $(seq 0 2) do for impl in ${IMPLS[@]} do for dtype in ${DTYPES[@]} do cat ${RAW}/${arch}/${dtype}/O${opt}/${impl}/${split}.${file} >> ${DEST}/${arch}/full.${file} done done done done done python3 -m remend.deduplicate_split --inprefix ${DEST}/${arch}/full --outdir ${DEST}/${arch} --split 0.025 --filter bigint done