perf: add class balancing and label smoothing to the loss function per batch

Browse files

Files changed (3) hide show

model/src/data/batcher.rs +21 -0
model/src/model.rs +4 -1
trainer/src/training.rs +7 -3

model/src/data/batcher.rs CHANGED Viewed

@@ -28,6 +28,7 @@ pub struct TextClassificationTrainingBatch<B: Backend> {
     pub tokens: Tensor<B, 2, Int>,    // Tokenized text
     pub labels: Tensor<B, 1, Int>,    // Labels of the text
     pub mask_pad: Tensor<B, 2, Bool>, // Padding mask for the tokenized text
 }
 /// Struct for inference batch in text classification task
@@ -50,6 +51,9 @@ impl<B: Backend> Batcher<B, TextClassificationItem, TextClassificationTrainingBa
         let mut tokens_list = Vec::with_capacity(items.len());
         let mut labels_list = Vec::with_capacity(items.len());
         // Tokenize text and create label tensor for each item
         for item in items {
             tokens_list.push(self.tokenizer.encode(&item.text));
@@ -72,10 +76,27 @@ impl<B: Backend> Batcher<B, TextClassificationItem, TextClassificationTrainingBa
             tokens: mask.tensor,
             labels: Tensor::cat(labels_list, 0),
             mask_pad: mask.mask,
         }
     }
 }
 /// Implement Batcher trait for TextClassificationBatcher struct for inference
 impl<B: Backend> Batcher<B, String, TextClassificationInferenceBatch<B>>
     for TextClassificationBatcher

     pub tokens: Tensor<B, 2, Int>,    // Tokenized text
     pub labels: Tensor<B, 1, Int>,    // Labels of the text
     pub mask_pad: Tensor<B, 2, Bool>, // Padding mask for the tokenized text
+    pub class_weights: Vec<f32>,      // Class weights for handling class imbalance
 }
 /// Struct for inference batch in text classification task
         let mut tokens_list = Vec::with_capacity(items.len());
         let mut labels_list = Vec::with_capacity(items.len());
+        // Compute class weights based on the training dataset
+        let class_weights = compute_class_weights(&items);
         // Tokenize text and create label tensor for each item
         for item in items {
             tokens_list.push(self.tokenizer.encode(&item.text));
             tokens: mask.tensor,
             labels: Tensor::cat(labels_list, 0),
             mask_pad: mask.mask,
+            class_weights,
         }
     }
 }
+// Function to compute class weights based on the training dataset
+fn compute_class_weights(items: &[TextClassificationItem]) -> Vec<f32> {
+    let num_classes = items.iter().map(|item| item.label).max().unwrap_or(0) + 1;
+    let mut class_counts = vec![0; num_classes];
+    for item in items {
+        class_counts[item.label] += 1;
+    }
+    let total_count = class_counts.iter().sum::<usize>() as f32;
+    class_counts
+        .iter()
+        .map(|&count| total_count / count as f32)
+        .collect()
+}
 /// Implement Batcher trait for TextClassificationBatcher struct for inference
 impl<B: Backend> Batcher<B, String, TextClassificationInferenceBatch<B>>
     for TextClassificationBatcher

model/src/model.rs CHANGED Viewed

@@ -119,7 +119,10 @@ impl<B: Backend> TextClassificationModel<B> {
         // Compute the loss using Cross-Entropy
         let loss = CrossEntropyLossConfig::new()
-            .init(&logits.device())
             .forward(logits.clone(), labels.clone());
         // Return the output and loss

         // Compute the loss using Cross-Entropy
         let loss = CrossEntropyLossConfig::new()
+            .with_weights(Some(item.class_weights))
+            .with_smoothing(Some(0.1))
+            .with_logits(true)
+            .init(device)
             .forward(logits.clone(), labels.clone());
         // Return the output and loss

trainer/src/training.rs CHANGED Viewed

@@ -41,6 +41,10 @@ pub fn train<B: AutodiffBackend, D: TextClassificationDataset + 'static>(
     // Initialize batcher
     let batcher = TextClassificationBatcher::new(tokenizer.clone(), config.max_seq_length);
     // Initialize model
     let model = TextClassificationModelConfig::new(
         config.transformer.clone(),
@@ -54,12 +58,12 @@ pub fn train<B: AutodiffBackend, D: TextClassificationDataset + 'static>(
     let train_loader = DataLoaderBuilder::new(batcher.clone())
         .batch_size(config.batch_size)
         .num_workers(1)
-        .build(SamplerDataset::new(dataset_train, 50_000));
     let valid_loader = DataLoaderBuilder::new(batcher)
         .batch_size(config.batch_size)
         .num_workers(1)
-        .build(SamplerDataset::new(dataset_test, 5_000));
     // Initialize optimizer
     let optimizer = config.optimizer.init();

     // Initialize batcher
     let batcher = TextClassificationBatcher::new(tokenizer.clone(), config.max_seq_length);
+    // Create data samplers for training and testing datasets
+    let train_sampler = SamplerDataset::new(dataset_train, 50_000);
+    let test_sampler = SamplerDataset::new(dataset_test, 5_000);
     // Initialize model
     let model = TextClassificationModelConfig::new(
         config.transformer.clone(),
     let train_loader = DataLoaderBuilder::new(batcher.clone())
         .batch_size(config.batch_size)
         .num_workers(1)
+        .build(train_sampler);
     let valid_loader = DataLoaderBuilder::new(batcher)
         .batch_size(config.batch_size)
         .num_workers(1)
+        .build(test_sampler);
     // Initialize optimizer
     let optimizer = config.optimizer.init();