AbstractPhil commited on
Commit
1711144
·
verified ·
1 Parent(s): 0cba5a9

Update bert_handler.py

Browse files
Files changed (1) hide show
  1. bert_handler.py +55 -17
bert_handler.py CHANGED
@@ -38,7 +38,11 @@ class BERTHandler:
38
 
39
  def __del__(self):
40
  """Destructor to ensure cleanup when object is deleted"""
41
- self._cleanup_model()
 
 
 
 
42
 
43
  def _cleanup_model(self):
44
  """
@@ -48,33 +52,57 @@ class BERTHandler:
48
  if hasattr(self, 'model') and self.model is not None:
49
  print("🧹 Cleaning up existing model from VRAM...")
50
 
 
 
 
 
 
 
 
 
51
  # Move model to CPU first to free GPU memory
52
- if torch.cuda.is_available() and next(self.model.parameters(), None) is not None:
53
- if next(self.model.parameters()).is_cuda:
54
- self.model = self.model.cpu()
 
 
 
 
55
 
56
  # Delete the model
57
- del self.model
58
- self.model = None
 
 
 
59
 
60
  # Force garbage collection
61
- gc.collect()
 
 
 
62
 
63
  # Clear CUDA cache
64
- if torch.cuda.is_available():
65
- torch.cuda.empty_cache()
66
- torch.cuda.synchronize() # Ensure all CUDA operations complete
 
 
 
67
 
68
  print("✅ Model cleanup complete")
69
 
70
  def _print_vram_usage(self, prefix=""):
71
  """Print current VRAM usage for monitoring"""
72
- if torch.cuda.is_available():
73
- allocated = torch.cuda.memory_allocated() / 1e9
74
- reserved = torch.cuda.memory_reserved() / 1e9
75
- print(f"🎯 {prefix}VRAM: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")
76
- else:
77
- print(f"🎯 {prefix}CUDA not available")
 
 
 
78
 
79
  def load_fresh_model(self, model_name="nomic-ai/nomic-bert-2048"):
80
  """Load fresh model and add special tokens with proper VRAM management"""
@@ -152,7 +180,17 @@ class BERTHandler:
152
  print(f" - Embedding size: {self.model.bert.embeddings.word_embeddings.weight.shape[0]}")
153
  print(f" - Tokenizer size: {len(self.tokenizer)}")
154
 
155
- # DO NOT MODIFY ANYTHING - checkpoint is self-consistent
 
 
 
 
 
 
 
 
 
 
156
 
157
  # Load training state
158
  self._load_training_state(checkpoint_path)
 
38
 
39
  def __del__(self):
40
  """Destructor to ensure cleanup when object is deleted"""
41
+ try:
42
+ self._cleanup_model()
43
+ except Exception:
44
+ # Ignore cleanup errors during shutdown
45
+ pass
46
 
47
  def _cleanup_model(self):
48
  """
 
52
  if hasattr(self, 'model') and self.model is not None:
53
  print("🧹 Cleaning up existing model from VRAM...")
54
 
55
+ # Check if torch is still available (can be None during shutdown)
56
+ try:
57
+ import torch as torch_module
58
+ if torch_module is None:
59
+ return
60
+ except (ImportError, AttributeError):
61
+ return
62
+
63
  # Move model to CPU first to free GPU memory
64
+ try:
65
+ if torch_module.cuda.is_available() and next(self.model.parameters(), None) is not None:
66
+ if next(self.model.parameters()).is_cuda:
67
+ self.model = self.model.cpu()
68
+ except Exception:
69
+ # Continue cleanup even if moving to CPU fails
70
+ pass
71
 
72
  # Delete the model
73
+ try:
74
+ del self.model
75
+ self.model = None
76
+ except Exception:
77
+ pass
78
 
79
  # Force garbage collection
80
+ try:
81
+ gc.collect()
82
+ except Exception:
83
+ pass
84
 
85
  # Clear CUDA cache
86
+ try:
87
+ if torch_module.cuda.is_available():
88
+ torch_module.cuda.empty_cache()
89
+ torch_module.cuda.synchronize() # Ensure all CUDA operations complete
90
+ except Exception:
91
+ pass
92
 
93
  print("✅ Model cleanup complete")
94
 
95
  def _print_vram_usage(self, prefix=""):
96
  """Print current VRAM usage for monitoring"""
97
+ try:
98
+ if torch.cuda.is_available():
99
+ allocated = torch.cuda.memory_allocated() / 1e9
100
+ reserved = torch.cuda.memory_reserved() / 1e9
101
+ print(f"🎯 {prefix}VRAM: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")
102
+ else:
103
+ print(f"🎯 {prefix}CUDA not available")
104
+ except Exception:
105
+ print(f"🎯 {prefix}VRAM: Could not read CUDA memory")
106
 
107
  def load_fresh_model(self, model_name="nomic-ai/nomic-bert-2048"):
108
  """Load fresh model and add special tokens with proper VRAM management"""
 
180
  print(f" - Embedding size: {self.model.bert.embeddings.word_embeddings.weight.shape[0]}")
181
  print(f" - Tokenizer size: {len(self.tokenizer)}")
182
 
183
+ # Check for vocab size mismatch and warn (but don't auto-fix for checkpoints)
184
+ tokenizer_size = len(self.tokenizer)
185
+ model_vocab_size = self.model.config.vocab_size
186
+ embedding_size = self.model.bert.embeddings.word_embeddings.weight.shape[0]
187
+
188
+ if not (tokenizer_size == model_vocab_size == embedding_size):
189
+ print(f"⚠️ VOCAB SIZE MISMATCH DETECTED:")
190
+ print(f" - Tokenizer size: {tokenizer_size}")
191
+ print(f" - Model config vocab_size: {model_vocab_size}")
192
+ print(f" - Embedding size: {embedding_size}")
193
+ print(f" This might affect inference quality.")
194
 
195
  # Load training state
196
  self._load_training_state(checkpoint_path)