REMEND / combine.sh
udiboy1209's picture
Add REMEND synthetic data and models :tada:
9c19cff
raw
history blame contribute delete
862 Bytes
RAW=compiled/
DEST=dataset/
ARCHS=( arm32 aarch64 x64 )
FILES=( asm eqn const.jsonl )
IMPLS=( cse_c dag_c cse_fortran dag_fortran )
SPLITS=( train valid test )
DTYPES=( float double )
for arch in ${ARCHS[@]}
do
mkdir -p ${DEST}/${arch}
rm -f ${DEST}/${arch}/*
for split in ${SPLITS[@]}
do
for file in ${FILES[@]}
do
for opt in $(seq 0 2)
do
for impl in ${IMPLS[@]}
do
for dtype in ${DTYPES[@]}
do
cat ${RAW}/${arch}/${dtype}/O${opt}/${impl}/${split}.${file} >> ${DEST}/${arch}/full.${file}
done
done
done
done
done
python3 -m remend.deduplicate_split --inprefix ${DEST}/${arch}/full --outdir ${DEST}/${arch} --split 0.025 --filter bigint
done