perf: replace metal with wgpu support and add http file for predict request

Browse files

Files changed (6) hide show

model/src/inference.rs +1 -1
model/src/lib.rs +1 -1
requests/predict.http +10 -0
server/src/api.rs +8 -2
server/src/main.rs +1 -0
trainer/src/main.rs +20 -14

model/src/inference.rs CHANGED Viewed

@@ -55,7 +55,7 @@ impl<B: Backend, D: TextClassificationDataset + 'static> SamModel<B, D> {
         println!("Loading weights ...");
         let record = CompactRecorder::new()
             .load(format!("{artifact_dir}/model").into(), &device)
-            .expect("Trained model weights tb");
         // Create model using loaded weights
         println!("Creating model ...");

         println!("Loading weights ...");
         let record = CompactRecorder::new()
             .load(format!("{artifact_dir}/model").into(), &device)
+            .expect("Trained model weights to be present");
         // Create model using loaded weights
         println!("Creating model ...");

model/src/lib.rs CHANGED Viewed

@@ -1,3 +1,3 @@
 pub mod data;
 pub mod inference;
-pub mod model;

 pub mod data;
 pub mod inference;
+pub mod model;

requests/predict.http ADDED Viewed

	@@ -0,0 +1,10 @@

+### POST predict sentiment
+POST http://127.0.0.1:8000/predict
+Accept: application/json
+Content-Type: application/json
+{
+  "text": "I love programming!"
+}
+###

server/src/api.rs CHANGED Viewed

@@ -1,5 +1,5 @@
 use crate::dtos::{SentimentRequest, SentimentResponse};
-use burn::backend::Metal;
 use model::data::TweetSentimentDataset;
 use model::model::SamModel;
 use rocket::serde::json::Json;
@@ -7,7 +7,13 @@ use rocket::tokio::sync::Mutex;
 use rocket::{State, post};
 use std::sync::Arc;
-pub type BackendImpl = Metal<f32, i32>;
 // ---- API Route ---- //
 #[post("/predict", format = "json", data = "<input>")]

 use crate::dtos::{SentimentRequest, SentimentResponse};
+use burn::backend::Wgpu;
 use model::data::TweetSentimentDataset;
 use model::model::SamModel;
 use rocket::serde::json::Json;
 use rocket::{State, post};
 use std::sync::Arc;
+#[cfg(not(feature = "f16"))]
+#[allow(dead_code)]
+type ElemType = f32;
+#[cfg(feature = "f16")]
+type ElemType = burn::tensor::f16;
+pub type BackendImpl = Wgpu<ElemType, i32>;
 // ---- API Route ---- //
 #[post("/predict", format = "json", data = "<input>")]

server/src/main.rs CHANGED Viewed

@@ -1,4 +1,5 @@
 #![recursion_limit = "256"]
 use crate::api::{AppState, BackendImpl, predict};
 use model::data::TweetSentimentDataset;

 #![recursion_limit = "256"]
+#![allow(unexpected_cfgs)]
 use crate::api::{AppState, BackendImpl, predict};
 use model::data::TweetSentimentDataset;

trainer/src/main.rs CHANGED Viewed

@@ -11,18 +11,10 @@ fn main() {
     inference_runner::run();
 }
-#[cfg(not(any(feature = "f16", feature = "flex32")))]
-#[allow(unused)]
-type ElemType = f32;
-#[cfg(feature = "f16")]
-type ElemType = burn::tensor::f16;
-#[cfg(feature = "flex32")]
-type ElemType = burn::tensor::flex32;
 #[cfg(feature = "training")]
 mod training_runner {
-    use crate::{ElemType, training};
-    use burn::backend::{Autodiff, Metal};
     use burn::nn::transformer::TransformerEncoderConfig;
     use burn::optim::AdamConfig;
     use burn::optim::decay::WeightDecayConfig;
@@ -30,6 +22,14 @@ mod training_runner {
     use model::data::TweetSentimentDataset;
     use model::inference::ExperimentConfig;
     pub fn launch<B: AutodiffBackend>(devices: Vec<B::Device>) {
         let config = ExperimentConfig::new(
             TransformerEncoderConfig::new(256, 1024, 8, 4)
@@ -48,18 +48,24 @@ mod training_runner {
     }
     pub fn run() {
-        launch::<Autodiff<Metal<ElemType, i32>>>(vec![Default::default()]);
     }
 }
 #[cfg(feature = "inference")]
 mod inference_runner {
-    use crate::ElemType;
-    use burn::backend::Metal;
     use burn::prelude::Backend;
     use model::data::TweetSentimentDataset;
     use model::inference;
     pub fn launch<B: Backend>(device: B::Device) {
         inference::infer::<B, TweetSentimentDataset>(
             device,
@@ -80,6 +86,6 @@ mod inference_runner {
     }
     pub fn run() {
-        launch::<Metal<ElemType, i32>>(Default::default());
     }
 }

     inference_runner::run();
 }
 #[cfg(feature = "training")]
 mod training_runner {
+    use crate::training;
+    use burn::backend::{Autodiff, Wgpu};
     use burn::nn::transformer::TransformerEncoderConfig;
     use burn::optim::AdamConfig;
     use burn::optim::decay::WeightDecayConfig;
     use model::data::TweetSentimentDataset;
     use model::inference::ExperimentConfig;
+    #[cfg(not(any(feature = "f16", feature = "flex32")))]
+    #[allow(unused)]
+    pub type ElemType = f32;
+    #[cfg(feature = "f16")]
+    pub type ElemType = burn::tensor::f16;
+    #[cfg(feature = "flex32")]
+    pub type ElemType = burn::tensor::flex32;
     pub fn launch<B: AutodiffBackend>(devices: Vec<B::Device>) {
         let config = ExperimentConfig::new(
             TransformerEncoderConfig::new(256, 1024, 8, 4)
     }
     pub fn run() {
+        launch::<Autodiff<Wgpu<ElemType, i32>>>(vec![Default::default()]);
     }
 }
 #[cfg(feature = "inference")]
 mod inference_runner {
+    use burn::backend::Wgpu;
+    use burn::backend::wgpu::WgpuDevice;
     use burn::prelude::Backend;
     use model::data::TweetSentimentDataset;
     use model::inference;
+    #[cfg(not(feature = "f16"))]
+    #[allow(dead_code)]
+    type ElemType = f32;
+    #[cfg(feature = "f16")]
+    type ElemType = burn::tensor::f16;
     pub fn launch<B: Backend>(device: B::Device) {
         inference::infer::<B, TweetSentimentDataset>(
             device,
     }
     pub fn run() {
+        launch::<Wgpu<ElemType, i32>>(WgpuDevice::default());
     }
 }