File size: 862 Bytes
9c19cff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
RAW=compiled/
DEST=dataset/
ARCHS=( arm32 aarch64 x64 )
FILES=( asm eqn const.jsonl )
IMPLS=( cse_c dag_c cse_fortran dag_fortran )
SPLITS=( train valid test )
DTYPES=( float double )

for arch in ${ARCHS[@]}
do
    mkdir -p ${DEST}/${arch}
    rm -f ${DEST}/${arch}/*

    for split in ${SPLITS[@]}
    do
        for file in ${FILES[@]}
        do
            for opt in $(seq 0 2)
            do
                for impl in ${IMPLS[@]}
                do
                    for dtype in ${DTYPES[@]}
                    do
                        cat ${RAW}/${arch}/${dtype}/O${opt}/${impl}/${split}.${file} >> ${DEST}/${arch}/full.${file}
                    done
                done
            done
        done
    done
    python3 -m remend.deduplicate_split --inprefix ${DEST}/${arch}/full --outdir ${DEST}/${arch} --split 0.025 --filter bigint
done