udiboy1209 commited on Jul 4

Commit

9c19cff

1 Parent(s): 5ddcc19

Add REMEND synthetic data and models :tada:

Browse files

Files changed (29) hide show

combine.sh +32 -0
compile.sh +25 -0
dataset.zip +3 -0
models/trained_aarch64_base/beam1.zip +3 -0
models/trained_aarch64_base/beam5.zip +3 -0
models/trained_aarch64_base/checkpoint_best.pt +3 -0
models/trained_aarch64_base/training.log +0 -0
models/trained_aarch64_best_drop01/beam1.zip +3 -0
models/trained_aarch64_best_drop01/beam5.zip +3 -0
models/trained_aarch64_best_drop01/checkpoint_best.pt +3 -0
models/trained_aarch64_best_drop01/training.log +0 -0
models/trained_arm32_base/beam1.zip +3 -0
models/trained_arm32_base/beam5.zip +3 -0
models/trained_arm32_base/checkpoint_best.pt +3 -0
models/trained_arm32_base/training.log +0 -0
models/trained_arm32_best_L4/beam1.zip +3 -0
models/trained_arm32_best_L4/beam5.zip +3 -0
models/trained_arm32_best_L4/checkpoint_best.pt +3 -0
models/trained_arm32_best_L4/training.log +0 -0
models/trained_x64_base/beam1.zip +3 -0
models/trained_x64_base/beam5.zip +3 -0
models/trained_x64_base/checkpoint_best.pt +3 -0
models/trained_x64_base/training.log +0 -0
models/trained_x64_best_decay005/beam1.zip +3 -0
models/trained_x64_best_decay005/beam5.zip +3 -0
models/trained_x64_best_decay005/checkpoint_best.pt +3 -0
models/trained_x64_best_decay005/training.log +0 -0
tokenize.sh +19 -0
tokenized.zip +3 -0

combine.sh ADDED Viewed

	@@ -0,0 +1,32 @@

+RAW=compiled/
+DEST=dataset/
+ARCHS=( arm32 aarch64 x64 )
+FILES=( asm eqn const.jsonl )
+IMPLS=( cse_c dag_c cse_fortran dag_fortran )
+SPLITS=( train valid test )
+DTYPES=( float double )
+for arch in ${ARCHS[@]}
+do
+    mkdir -p ${DEST}/${arch}
+    rm -f ${DEST}/${arch}/*
+    for split in ${SPLITS[@]}
+    do
+        for file in ${FILES[@]}
+        do
+            for opt in $(seq 0 2)
+            do
+                for impl in ${IMPLS[@]}
+                do
+                    for dtype in ${DTYPES[@]}
+                    do
+                        cat ${RAW}/${arch}/${dtype}/O${opt}/${impl}/${split}.${file} >> ${DEST}/${arch}/full.${file}
+                    done
+                done
+            done
+        done
+    done
+    python3 -m remend.deduplicate_split --inprefix ${DEST}/${arch}/full --outdir ${DEST}/${arch} --split 0.025 --filter bigint
+done

compile.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+#!/bin/bash
+ARCHS=( arm32 aarch64 x64 )
+DTYPES=( float double )
+for arch in ${ARCHS[@]}
+do
+    for dtype in ${DTYPES[@]}
+    do
+        DIR=compiled/${arch}/${dtype}
+        mkdir -p $DIR
+        echo "### Running ${arch} ${dtype} test ###"
+        python3 -m remend.compile_dataset --file ./prim_fwd/prim_fwd.test --prefix test \
+            --impl cse_c dag_c cse_fortran dag_fortran -O 0 1 2 --pick 1 --outdir $DIR \
+            --arch ${arch} --dtype ${dtype}
+        echo "### Running ${arch} ${dtype} valid ###"
+        python3 -m remend.compile_dataset --file ./prim_fwd/prim_fwd.valid --prefix valid \
+            --impl cse_c dag_c cse_fortran dag_fortran -O 0 1 2 --pick 1 --outdir $DIR \
+            --arch ${arch} --dtype ${dtype}
+        echo "### Running ${arch} ${dtype} train ###"
+        python3 -m remend.compile_dataset --file ./prim_fwd/prim_fwd.train --prefix train \
+            --impl cse_c dag_c cse_fortran dag_fortran -O 0 1 2 --pick 0.0005 --outdir $DIR \
+            --arch ${arch} --dtype ${dtype}
+    done
+done

dataset.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b4baa436aa5b3e45245ee22f323ad853533389a626e4fb02b4065cfa8e75693
+size 64566475

models/trained_aarch64_base/beam1.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23ff02892441961b89f90fd070b99a20c4dda410b72d33e454b77f21e34a4a23
+size 2609062

models/trained_aarch64_base/beam5.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfd20966b755b0a344399216977a0a57f8a3f52b7e78da6656bbf0365ce72e93
+size 2608053

models/trained_aarch64_base/checkpoint_best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8476217234da977912018453eb0e413a56ce21799130703b7ba672895e3d7de
+size 152687704

models/trained_aarch64_base/training.log ADDED Viewed

The diff for this file is too large to render. See raw diff

models/trained_aarch64_best_drop01/beam1.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6f5c0fa95ee34a5f167cfaea37362d5a8032889e0b18844f137e501aa6efcee
+size 2603377

models/trained_aarch64_best_drop01/beam5.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec172f2c6b25c2525c3a5b18f43991850cc02360fc42b9837934b428134b74fe
+size 2600951

models/trained_aarch64_best_drop01/checkpoint_best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dae30c4670aa0e6b336cd695a01caa180ecc6bda419c0a22f671509f48db990a
+size 143618328

models/trained_aarch64_best_drop01/training.log ADDED Viewed

The diff for this file is too large to render. See raw diff

models/trained_arm32_base/beam1.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:537779cd52f30aefab4800cc5fba9cc4d3fcb49a20711e53ddb8ecd30d7dc3e9
+size 2324372

models/trained_arm32_base/beam5.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5f80acc6d02ded22d8c8f7ba7b57c5087695f4e867fbe48a6e91107a6c3f150
+size 2321993

models/trained_arm32_base/checkpoint_best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d13a4cb6271f8d146e3e04a7948017d60f674a5df17376a550e4129b14b2d352
+size 152944672

models/trained_arm32_base/training.log ADDED Viewed

The diff for this file is too large to render. See raw diff

models/trained_arm32_best_L4/beam1.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:389510e251276d1e0f27e735bab86abfce4ad417a679f2e32d6de285d2cafeeb
+size 2270911

models/trained_arm32_best_L4/beam5.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ad0bfd17e371e63c5372794454a5fea2d4f5db1203c3d4fe7dbdd1503a4b3f0
+size 1164987

models/trained_arm32_best_L4/checkpoint_best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77ec390a3265c1d6743d45219a1f1c63600721815cc6c51b73131af338a9209c
+size 96298008

models/trained_arm32_best_L4/training.log ADDED Viewed

The diff for this file is too large to render. See raw diff

models/trained_x64_base/beam1.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e98663bfe7e89dd379004b29190aa4259cb5ccf1399b0fbcd56eddea1278e4ba
+size 2747346

models/trained_x64_base/beam5.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b910eca1cd409aaa52220442095c8eb242d1abaf7373f4c15dbda9b95acdcbc4
+size 2746375

models/trained_x64_base/checkpoint_best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:494060e19f549ce2c9c5e810fafd4a68f781bc086243b6186d76c3d58fbcd248
+size 143470872

models/trained_x64_base/training.log ADDED Viewed

The diff for this file is too large to render. See raw diff

models/trained_x64_best_decay005/beam1.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66c6d17b5111db175de0e015288bfc1765b622240c58c5413643343b3498c38b
+size 2739095

models/trained_x64_best_decay005/beam5.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5670cd6fb4e36a06d08032a64448b01d5481a221be46e70ce3fefceaa74f7da
+size 2738411

models/trained_x64_best_decay005/checkpoint_best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43657cb25769821cc178e6d04267d1a4ebc67493aefcae737b1d741cd40edd08
+size 143470872

models/trained_x64_best_decay005/training.log ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenize.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/bin/bash
+DS=dataset/
+TOK=tokenized/
+ARCHS=( arm32 aarch64 x64 )
+SPLITS=( train valid test )
+for arch in ${ARCHS[@]}
+do
+    mkdir -p ${TOK}/${arch}
+    # Train the BPE tokenizer
+    python3 remend.bpe -i ${DS}/${arch} -o ${TOK}/${arch}
+    cp ${DS}/${arch}/{train,valid,test}.eqn ${TOK}/${arch}
+    fairseq-preprocess -s asm -t eqn \
+        --trainpref ${TOK}/${arch}/train \
+        --testpref ${TOK}/${arch}/test \
+        --validpref ${TOK}/${arch}/valid \
+        --destdir ${TOK}/${arch}/
+done

tokenized.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d21c76e6d195fa68f9556036d7196f2230205d3b107bb8d1d46a1221b13a3524
+size 121241317