udiboy1209 commited on
Commit
9c19cff
·
1 Parent(s): 5ddcc19

Add REMEND synthetic data and models :tada:

Browse files
combine.sh ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ RAW=compiled/
2
+ DEST=dataset/
3
+ ARCHS=( arm32 aarch64 x64 )
4
+ FILES=( asm eqn const.jsonl )
5
+ IMPLS=( cse_c dag_c cse_fortran dag_fortran )
6
+ SPLITS=( train valid test )
7
+ DTYPES=( float double )
8
+
9
+ for arch in ${ARCHS[@]}
10
+ do
11
+ mkdir -p ${DEST}/${arch}
12
+ rm -f ${DEST}/${arch}/*
13
+
14
+ for split in ${SPLITS[@]}
15
+ do
16
+ for file in ${FILES[@]}
17
+ do
18
+ for opt in $(seq 0 2)
19
+ do
20
+ for impl in ${IMPLS[@]}
21
+ do
22
+ for dtype in ${DTYPES[@]}
23
+ do
24
+ cat ${RAW}/${arch}/${dtype}/O${opt}/${impl}/${split}.${file} >> ${DEST}/${arch}/full.${file}
25
+ done
26
+ done
27
+ done
28
+ done
29
+ done
30
+ python3 -m remend.deduplicate_split --inprefix ${DEST}/${arch}/full --outdir ${DEST}/${arch} --split 0.025 --filter bigint
31
+ done
32
+
compile.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ ARCHS=( arm32 aarch64 x64 )
4
+ DTYPES=( float double )
5
+
6
+ for arch in ${ARCHS[@]}
7
+ do
8
+ for dtype in ${DTYPES[@]}
9
+ do
10
+ DIR=compiled/${arch}/${dtype}
11
+ mkdir -p $DIR
12
+ echo "### Running ${arch} ${dtype} test ###"
13
+ python3 -m remend.compile_dataset --file ./prim_fwd/prim_fwd.test --prefix test \
14
+ --impl cse_c dag_c cse_fortran dag_fortran -O 0 1 2 --pick 1 --outdir $DIR \
15
+ --arch ${arch} --dtype ${dtype}
16
+ echo "### Running ${arch} ${dtype} valid ###"
17
+ python3 -m remend.compile_dataset --file ./prim_fwd/prim_fwd.valid --prefix valid \
18
+ --impl cse_c dag_c cse_fortran dag_fortran -O 0 1 2 --pick 1 --outdir $DIR \
19
+ --arch ${arch} --dtype ${dtype}
20
+ echo "### Running ${arch} ${dtype} train ###"
21
+ python3 -m remend.compile_dataset --file ./prim_fwd/prim_fwd.train --prefix train \
22
+ --impl cse_c dag_c cse_fortran dag_fortran -O 0 1 2 --pick 0.0005 --outdir $DIR \
23
+ --arch ${arch} --dtype ${dtype}
24
+ done
25
+ done
dataset.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b4baa436aa5b3e45245ee22f323ad853533389a626e4fb02b4065cfa8e75693
3
+ size 64566475
models/trained_aarch64_base/beam1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23ff02892441961b89f90fd070b99a20c4dda410b72d33e454b77f21e34a4a23
3
+ size 2609062
models/trained_aarch64_base/beam5.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfd20966b755b0a344399216977a0a57f8a3f52b7e78da6656bbf0365ce72e93
3
+ size 2608053
models/trained_aarch64_base/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8476217234da977912018453eb0e413a56ce21799130703b7ba672895e3d7de
3
+ size 152687704
models/trained_aarch64_base/training.log ADDED
The diff for this file is too large to render. See raw diff
 
models/trained_aarch64_best_drop01/beam1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6f5c0fa95ee34a5f167cfaea37362d5a8032889e0b18844f137e501aa6efcee
3
+ size 2603377
models/trained_aarch64_best_drop01/beam5.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec172f2c6b25c2525c3a5b18f43991850cc02360fc42b9837934b428134b74fe
3
+ size 2600951
models/trained_aarch64_best_drop01/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dae30c4670aa0e6b336cd695a01caa180ecc6bda419c0a22f671509f48db990a
3
+ size 143618328
models/trained_aarch64_best_drop01/training.log ADDED
The diff for this file is too large to render. See raw diff
 
models/trained_arm32_base/beam1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:537779cd52f30aefab4800cc5fba9cc4d3fcb49a20711e53ddb8ecd30d7dc3e9
3
+ size 2324372
models/trained_arm32_base/beam5.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5f80acc6d02ded22d8c8f7ba7b57c5087695f4e867fbe48a6e91107a6c3f150
3
+ size 2321993
models/trained_arm32_base/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d13a4cb6271f8d146e3e04a7948017d60f674a5df17376a550e4129b14b2d352
3
+ size 152944672
models/trained_arm32_base/training.log ADDED
The diff for this file is too large to render. See raw diff
 
models/trained_arm32_best_L4/beam1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:389510e251276d1e0f27e735bab86abfce4ad417a679f2e32d6de285d2cafeeb
3
+ size 2270911
models/trained_arm32_best_L4/beam5.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ad0bfd17e371e63c5372794454a5fea2d4f5db1203c3d4fe7dbdd1503a4b3f0
3
+ size 1164987
models/trained_arm32_best_L4/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77ec390a3265c1d6743d45219a1f1c63600721815cc6c51b73131af338a9209c
3
+ size 96298008
models/trained_arm32_best_L4/training.log ADDED
The diff for this file is too large to render. See raw diff
 
models/trained_x64_base/beam1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e98663bfe7e89dd379004b29190aa4259cb5ccf1399b0fbcd56eddea1278e4ba
3
+ size 2747346
models/trained_x64_base/beam5.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b910eca1cd409aaa52220442095c8eb242d1abaf7373f4c15dbda9b95acdcbc4
3
+ size 2746375
models/trained_x64_base/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:494060e19f549ce2c9c5e810fafd4a68f781bc086243b6186d76c3d58fbcd248
3
+ size 143470872
models/trained_x64_base/training.log ADDED
The diff for this file is too large to render. See raw diff
 
models/trained_x64_best_decay005/beam1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66c6d17b5111db175de0e015288bfc1765b622240c58c5413643343b3498c38b
3
+ size 2739095
models/trained_x64_best_decay005/beam5.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5670cd6fb4e36a06d08032a64448b01d5481a221be46e70ce3fefceaa74f7da
3
+ size 2738411
models/trained_x64_best_decay005/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43657cb25769821cc178e6d04267d1a4ebc67493aefcae737b1d741cd40edd08
3
+ size 143470872
models/trained_x64_best_decay005/training.log ADDED
The diff for this file is too large to render. See raw diff
 
tokenize.sh ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ DS=dataset/
4
+ TOK=tokenized/
5
+ ARCHS=( arm32 aarch64 x64 )
6
+ SPLITS=( train valid test )
7
+
8
+ for arch in ${ARCHS[@]}
9
+ do
10
+ mkdir -p ${TOK}/${arch}
11
+ # Train the BPE tokenizer
12
+ python3 remend.bpe -i ${DS}/${arch} -o ${TOK}/${arch}
13
+ cp ${DS}/${arch}/{train,valid,test}.eqn ${TOK}/${arch}
14
+ fairseq-preprocess -s asm -t eqn \
15
+ --trainpref ${TOK}/${arch}/train \
16
+ --testpref ${TOK}/${arch}/test \
17
+ --validpref ${TOK}/${arch}/valid \
18
+ --destdir ${TOK}/${arch}/
19
+ done
tokenized.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d21c76e6d195fa68f9556036d7196f2230205d3b107bb8d1d46a1221b13a3524
3
+ size 121241317