WCNegentropy commited on
Commit
dfe6d16
Β·
verified Β·
1 Parent(s): b9246a0

πŸš€ OS Launch: Clean documentation and refined licensing

Browse files

This OS launch commit includes:

βœ… **Cleaned Documentation**
- Removed inflated claims and marketing language
- Added honest research status and limitations
- Created professional model card and validation reports
- Streamlined licensing to AGPLv3 + commercial contact

βœ… **Refined Codebase**
- Complete experimental bit-native transformer implementation
- 57 Python files with comprehensive research framework
- Safety telemetry and monitoring systems
- Distributed training and development tools

βœ… **Professional Standards**
- Empirical validation of all claims
- Clear experimental vs production distinctions
- Rigorous research methodology requirements
- Community contribution framework

Ready for serious research evaluation and academic investigation.

Files changed (1) hide show
  1. markov_spline_training.py +438 -0
markov_spline_training.py ADDED
@@ -0,0 +1,438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ MarkovSpline-Enhanced BitTransformerLM Training
4
+
5
+ Integrates MarkovSpline data smoothing directly into BitTransformerLM training pipeline
6
+ for improved data preprocessing and gradient optimization.
7
+ """
8
+
9
+ import os
10
+ import sys
11
+ import json
12
+ import time
13
+ import torch
14
+ import torch.nn as nn
15
+ import torch.optim as optim
16
+ import numpy as np
17
+ from pathlib import Path
18
+ from typing import Dict, List, Tuple, Optional, Any
19
+ from torch.utils.data import DataLoader, Dataset
20
+
21
+ # Add MarkovSpline to path
22
+ sys.path.insert(0, '/data/MarkovSpline')
23
+ from bitpipe_integration import MarkovSplineBitPipeModule, create_markov_spline_bitpipe_module
24
+
25
+ # BitTransformerLM imports
26
+ from bit_transformer.model import BitTransformerLM
27
+ from bit_transformer.telemetry import TelemetrySynthesizer
28
+
29
+ # Simple trainer base class
30
+ class BitwiseTrainer:
31
+ """Simple base trainer for BitTransformerLM."""
32
+
33
+ def __init__(self, model, learning_rate=1e-3, max_grad_norm=1.0):
34
+ self.model = model
35
+ self.device = next(model.parameters()).device
36
+ self.optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
37
+ self.criterion = nn.CrossEntropyLoss()
38
+ self.max_grad_norm = max_grad_norm
39
+
40
+ def train_step(self, batch):
41
+ """Simple training step."""
42
+ self.optimizer.zero_grad()
43
+
44
+ outputs = self.model(batch['input_bits'])
45
+ # BitTransformerLM returns (logits, telemetry)
46
+ if isinstance(outputs, tuple):
47
+ logits, telemetry = outputs
48
+ else:
49
+ logits = outputs
50
+
51
+ loss = self.criterion(logits.reshape(-1, logits.size(-1)), batch['target_bits'].reshape(-1))
52
+
53
+ loss.backward()
54
+
55
+ if self.max_grad_norm > 0:
56
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_norm)
57
+
58
+ self.optimizer.step()
59
+
60
+ return {'loss': loss.item()}
61
+
62
+
63
+ class MarkovSplineEnhancedDataset(Dataset):
64
+ """Dataset wrapper that applies MarkovSpline preprocessing."""
65
+
66
+ def __init__(self,
67
+ base_dataset: Dataset,
68
+ markov_module: MarkovSplineBitPipeModule,
69
+ smoothing_strength: float = 0.1,
70
+ enable_smoothing: bool = True):
71
+
72
+ self.base_dataset = base_dataset
73
+ self.markov_module = markov_module
74
+ self.smoothing_strength = smoothing_strength
75
+ self.enable_smoothing = enable_smoothing
76
+
77
+ # Initialize data preprocessor
78
+ if enable_smoothing:
79
+ self.markov_module.initialize_application('data_preprocessor',
80
+ smoothing_strength=smoothing_strength,
81
+ preserve_features=True)
82
+
83
+ def __len__(self):
84
+ return len(self.base_dataset)
85
+
86
+ def __getitem__(self, idx):
87
+ # Get original data
88
+ data = self.base_dataset[idx]
89
+
90
+ if not self.enable_smoothing:
91
+ return data
92
+
93
+ # Apply MarkovSpline preprocessing to bit sequences
94
+ if isinstance(data, dict) and 'input_bits' in data:
95
+ try:
96
+ # Smooth input bits
97
+ result = self.markov_module.process_data(
98
+ [data['input_bits']],
99
+ 'preprocess_training',
100
+ binary_data=True
101
+ )
102
+
103
+ if result['success'] and result['processed_sequences']:
104
+ data['input_bits'] = result['processed_sequences'][0]
105
+ data['smoothing_applied'] = True
106
+ else:
107
+ data['smoothing_applied'] = False
108
+
109
+ except Exception as e:
110
+ print(f"Warning: MarkovSpline preprocessing failed for sample {idx}: {e}")
111
+ data['smoothing_applied'] = False
112
+
113
+ return data
114
+
115
+
116
+ class MarkovSplineEnhancedTrainer(BitwiseTrainer):
117
+ """Enhanced BitTransformerLM trainer with MarkovSpline integration."""
118
+
119
+ def __init__(self,
120
+ model: BitTransformerLM,
121
+ markov_config: Optional[Dict] = None,
122
+ gradient_smoothing: bool = True,
123
+ data_smoothing: bool = True,
124
+ smoothing_strength: float = 0.1,
125
+ **kwargs):
126
+
127
+ super().__init__(model, **kwargs)
128
+
129
+ # Initialize MarkovSpline module
130
+ self.markov_module = create_markov_spline_bitpipe_module(markov_config)
131
+ self.gradient_smoothing = gradient_smoothing
132
+ self.data_smoothing = data_smoothing
133
+ self.smoothing_strength = smoothing_strength
134
+
135
+ # Initialize gradient smoother if enabled
136
+ if gradient_smoothing:
137
+ self.markov_module.initialize_application('gradient_smoother',
138
+ learning_rate=kwargs.get('learning_rate', 0.001),
139
+ smoothing_strength=smoothing_strength,
140
+ momentum_states=10)
141
+
142
+ # Tracking
143
+ self.smoothing_metrics = {}
144
+ self.gradient_smooth_history = []
145
+
146
+ print(f"🌊 MarkovSpline Enhanced Trainer initialized")
147
+ print(f" - Gradient smoothing: {'βœ…' if gradient_smoothing else '❌'}")
148
+ print(f" - Data smoothing: {'βœ…' if data_smoothing else '❌'}")
149
+ print(f" - Smoothing strength: {smoothing_strength}")
150
+
151
+ def create_enhanced_dataloader(self,
152
+ dataset: Dataset,
153
+ batch_size: int = 8,
154
+ **kwargs) -> DataLoader:
155
+ """Create dataloader with MarkovSpline preprocessing."""
156
+
157
+ enhanced_dataset = MarkovSplineEnhancedDataset(
158
+ dataset,
159
+ self.markov_module,
160
+ self.smoothing_strength,
161
+ self.data_smoothing
162
+ )
163
+
164
+ return DataLoader(enhanced_dataset, batch_size=batch_size, **kwargs)
165
+
166
+ def apply_gradient_smoothing(self,
167
+ parameters: Dict[str, torch.Tensor],
168
+ gradients: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
169
+ """Apply MarkovSpline gradient smoothing."""
170
+
171
+ if not self.gradient_smoothing:
172
+ return parameters
173
+
174
+ try:
175
+ # Process through MarkovSpline gradient smoother
176
+ result = self.markov_module.process_data(
177
+ {
178
+ 'parameters': parameters,
179
+ 'gradients': gradients
180
+ },
181
+ 'smooth_gradients'
182
+ )
183
+
184
+ if result['success']:
185
+ self.gradient_smooth_history.append(result['optimization_metrics'])
186
+ return result['smoothed_parameters']
187
+ else:
188
+ print(f"Warning: Gradient smoothing failed: {result.get('error', 'Unknown')}")
189
+ return parameters
190
+
191
+ except Exception as e:
192
+ print(f"Warning: Gradient smoothing error: {e}")
193
+ return parameters
194
+
195
+ def train_step(self, batch: Dict[str, torch.Tensor]) -> Dict[str, float]:
196
+ """Enhanced training step with MarkovSpline integration."""
197
+
198
+ # Standard forward pass
199
+ self.optimizer.zero_grad()
200
+
201
+ # Forward pass
202
+ outputs = self.model(batch['input_bits'])
203
+ # BitTransformerLM returns (logits, telemetry)
204
+ if isinstance(outputs, tuple):
205
+ logits, telemetry = outputs
206
+ else:
207
+ logits = outputs
208
+
209
+ loss = self.criterion(logits.reshape(-1, logits.size(-1)), batch['target_bits'].reshape(-1))
210
+
211
+ # Backward pass
212
+ loss.backward()
213
+
214
+ # Extract parameters and gradients for smoothing
215
+ if self.gradient_smoothing:
216
+ parameters = {}
217
+ gradients = {}
218
+
219
+ for name, param in self.model.named_parameters():
220
+ if param.grad is not None:
221
+ parameters[name] = param.data.clone()
222
+ gradients[name] = param.grad.data.clone()
223
+
224
+ # Apply MarkovSpline gradient smoothing
225
+ smoothed_params = self.apply_gradient_smoothing(parameters, gradients)
226
+
227
+ # Update model parameters with smoothed values
228
+ for name, param in self.model.named_parameters():
229
+ if name in smoothed_params:
230
+ param.data = smoothed_params[name]
231
+
232
+ # Standard optimizer step
233
+ if self.max_grad_norm > 0:
234
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_norm)
235
+
236
+ self.optimizer.step()
237
+
238
+ # Collect metrics
239
+ metrics = {
240
+ 'loss': loss.item(),
241
+ 'smoothing_applied': batch.get('smoothing_applied', torch.tensor(False)).float().mean().item()
242
+ }
243
+
244
+ if hasattr(batch, 'smoothing_applied'):
245
+ metrics['data_smoothing_rate'] = batch['smoothing_applied'].float().mean().item()
246
+
247
+ return metrics
248
+
249
+ def train_epoch(self,
250
+ train_loader: DataLoader,
251
+ epoch: int) -> Dict[str, float]:
252
+ """Train one epoch with MarkovSpline enhancements."""
253
+
254
+ self.model.train()
255
+ epoch_metrics = {
256
+ 'loss': 0.0,
257
+ 'smoothing_applied': 0.0,
258
+ 'data_smoothing_rate': 0.0,
259
+ 'gradient_smoothing_success': 0.0
260
+ }
261
+
262
+ num_batches = 0
263
+
264
+ for batch_idx, batch in enumerate(train_loader):
265
+ # Move batch to device
266
+ for key in batch:
267
+ if isinstance(batch[key], torch.Tensor):
268
+ batch[key] = batch[key].to(self.device)
269
+
270
+ # Training step with MarkovSpline integration
271
+ step_metrics = self.train_step(batch)
272
+
273
+ # Accumulate metrics
274
+ for key, value in step_metrics.items():
275
+ if key in epoch_metrics:
276
+ epoch_metrics[key] += value
277
+
278
+ num_batches += 1
279
+
280
+ # Log progress
281
+ if batch_idx % 10 == 0:
282
+ print(f" Batch {batch_idx:3d}: Loss={step_metrics['loss']:.4f}")
283
+
284
+ # Average metrics
285
+ for key in epoch_metrics:
286
+ epoch_metrics[key] /= num_batches
287
+
288
+ return epoch_metrics
289
+
290
+ def get_markov_spline_metrics(self) -> Dict[str, Any]:
291
+ """Get comprehensive MarkovSpline performance metrics."""
292
+
293
+ metrics = self.markov_module.get_performance_metrics()
294
+
295
+ # Add training-specific metrics
296
+ metrics['training_integration'] = {
297
+ 'gradient_smoothing_enabled': self.gradient_smoothing,
298
+ 'data_smoothing_enabled': self.data_smoothing,
299
+ 'smoothing_strength': self.smoothing_strength,
300
+ 'gradient_smooth_operations': len(self.gradient_smooth_history)
301
+ }
302
+
303
+ if self.gradient_smooth_history:
304
+ recent_gradient_metrics = self.gradient_smooth_history[-10:] # Last 10 operations
305
+ metrics['recent_gradient_smoothing'] = {
306
+ 'average_metrics': {
307
+ key: np.mean([m.get(key, 0) for m in recent_gradient_metrics])
308
+ for key in recent_gradient_metrics[0].keys()
309
+ } if recent_gradient_metrics else {}
310
+ }
311
+
312
+ return metrics
313
+
314
+ def save_enhanced_checkpoint(self,
315
+ checkpoint_path: str,
316
+ epoch: int,
317
+ metrics: Dict[str, float]):
318
+ """Save checkpoint with MarkovSpline state."""
319
+
320
+ # Standard checkpoint data
321
+ checkpoint = {
322
+ 'epoch': epoch,
323
+ 'model_state_dict': self.model.state_dict(),
324
+ 'optimizer_state_dict': self.optimizer.state_dict(),
325
+ 'metrics': metrics,
326
+ 'config': self.model.get_config()
327
+ }
328
+
329
+ # Add MarkovSpline metrics
330
+ checkpoint['markov_spline_metrics'] = self.get_markov_spline_metrics()
331
+ checkpoint['markov_spline_config'] = {
332
+ 'gradient_smoothing': self.gradient_smoothing,
333
+ 'data_smoothing': self.data_smoothing,
334
+ 'smoothing_strength': self.smoothing_strength
335
+ }
336
+
337
+ # Save MarkovSpline module state
338
+ markov_state_path = Path(checkpoint_path).parent / 'markov_spline_state'
339
+ self.markov_module.save_module_state(markov_state_path)
340
+
341
+ torch.save(checkpoint, checkpoint_path)
342
+ print(f"βœ… Enhanced checkpoint saved: {checkpoint_path}")
343
+
344
+
345
+ def create_markov_enhanced_training_config(base_config: Dict) -> Dict:
346
+ """Create training configuration with MarkovSpline enhancements."""
347
+
348
+ enhanced_config = base_config.copy()
349
+
350
+ # MarkovSpline specific settings
351
+ enhanced_config.update({
352
+ 'markov_spline': {
353
+ 'enabled': True,
354
+ 'gradient_smoothing': True,
355
+ 'data_smoothing': True,
356
+ 'smoothing_strength': 0.1,
357
+ 'num_states': 10,
358
+ 'spline_type': 'cubic',
359
+ 'adaptive_smoothing': True
360
+ },
361
+ 'data_preprocessing': {
362
+ 'smooth_training_data': True,
363
+ 'preserve_features': True,
364
+ 'preprocessing_strength': 0.15
365
+ },
366
+ 'gradient_optimization': {
367
+ 'smooth_gradients': True,
368
+ 'momentum_states': 10,
369
+ 'learning_rate_smoothing': 0.2
370
+ }
371
+ })
372
+
373
+ return enhanced_config
374
+
375
+
376
+ def run_markov_enhanced_training(config_file: str = None):
377
+ """Run BitTransformerLM training with MarkovSpline enhancements."""
378
+
379
+ # Load configuration
380
+ if config_file and os.path.exists(config_file):
381
+ with open(config_file, 'r') as f:
382
+ config = json.load(f)
383
+ else:
384
+ # Default enhanced configuration
385
+ config = create_markov_enhanced_training_config({
386
+ 'model': {
387
+ 'd_model': 128,
388
+ 'nhead': 8,
389
+ 'num_layers': 4,
390
+ 'dim_feedforward': 512,
391
+ 'max_seq_len': 512
392
+ },
393
+ 'training': {
394
+ 'batch_size': 8,
395
+ 'learning_rate': 1e-4,
396
+ 'epochs': 10,
397
+ 'max_grad_norm': 1.0
398
+ }
399
+ })
400
+
401
+ print("🌊 Starting MarkovSpline-Enhanced BitTransformerLM Training")
402
+ print(f"πŸ“‹ Configuration: {json.dumps(config, indent=2)}")
403
+
404
+ # Initialize model
405
+ model_config = config['model']
406
+ model = BitTransformerLM(**model_config)
407
+
408
+ # Initialize enhanced trainer
409
+ trainer = MarkovSplineEnhancedTrainer(
410
+ model=model,
411
+ markov_config=config.get('markov_spline'),
412
+ gradient_smoothing=config['markov_spline']['gradient_smoothing'],
413
+ data_smoothing=config['markov_spline']['data_smoothing'],
414
+ smoothing_strength=config['markov_spline']['smoothing_strength'],
415
+ **config['training']
416
+ )
417
+
418
+ print("πŸš€ Enhanced training pipeline initialized successfully!")
419
+ return trainer, config
420
+
421
+
422
+ if __name__ == '__main__':
423
+ import argparse
424
+
425
+ parser = argparse.ArgumentParser(description='MarkovSpline-Enhanced BitTransformerLM Training')
426
+ parser.add_argument('--config', '-c', help='Configuration file path')
427
+ parser.add_argument('--output-dir', '-o', default='./markov_enhanced_checkpoints',
428
+ help='Output directory for checkpoints')
429
+
430
+ args = parser.parse_args()
431
+
432
+ # Create output directory
433
+ os.makedirs(args.output_dir, exist_ok=True)
434
+
435
+ # Run enhanced training
436
+ trainer, config = run_markov_enhanced_training(args.config)
437
+
438
+ print(f"πŸ“Š MarkovSpline metrics: {trainer.get_markov_spline_metrics()}")