WCNegentropy
/

BitTransformerLM

+"""
+Type definitions and type aliases for BitTransformerLM.
+Provides standardized type hints and common type aliases used throughout the codebase.
+"""
+from __future__ import annotations
+from typing import Union, List, Dict, Tuple, Optional, Any, Callable, Protocol
+from pathlib import Path
+import torch
+import numpy as np
+# Common tensor types
+TensorLike = Union[torch.Tensor, np.ndarray, List[float], List[int]]
+DeviceType = Union[str, torch.device]
+DtypeType = Union[torch.dtype, type, str]
+# Bit sequence types
+BitSequence = List[int]  # List of 0s and 1s
+BitTensor = torch.Tensor  # Tensor containing 0s and 1s
+BitBatch = Union[List[BitSequence], torch.Tensor]
+# Model types
+ModelOutput = Union[torch.Tensor, Tuple[torch.Tensor, ...]]
+TelemetryDict = Dict[str, Union[float, List[float], torch.Tensor]]
+SafetyMetrics = Dict[str, float]
+# File and path types
+PathLike = Union[str, Path]
+OptionalPath = Optional[PathLike]
+# Training types
+LossValue = Union[float, torch.Tensor]
+OptimizerState = Dict[str, Any]
+SchedulerState = Dict[str, Any]
+# Configuration types
+ModelConfig = Dict[str, Any]
+TrainingConfig = Dict[str, Any]
+DatasetConfig = Dict[str, Any]
+# HuggingFace types
+HFRepoId = str
+HFToken = Optional[str]
+# Function type protocols
+class ModelForward(Protocol):
+    """Protocol for model forward functions."""
+    def __call__(self,
+                 inputs: BitTensor,
+                 attention_mask: Optional[torch.Tensor] = None,
+                 **kwargs) -> ModelOutput: ...
+class LossFunction(Protocol):
+    """Protocol for loss functions."""
+    def __call__(self,
+                 predictions: torch.Tensor,
+                 targets: torch.Tensor) -> LossValue: ...
+class MetricFunction(Protocol):
+    """Protocol for metric computation functions."""
+    def __call__(self,
+                 predictions: torch.Tensor,
+                 targets: torch.Tensor) -> float: ...
+# Compression types
+CompressedData = torch.Tensor
+CompressionRatio = float
+# Safety and telemetry types
+NegentropyScore = float  # K metric: 0 (random) to 1 (ordered)
+ComplexityScore = float  # C metric: LZ complexity proxy
+SymbiosisScore = float   # S metric: KL divergence alignment
+SafetyThresholds = Dict[str, float]
+TelemetryCallback = Callable[[TelemetryDict], None]
+# Distributed training types
+WorldSize = int
+ProcessRank = int
+DistributedConfig = Dict[str, Any]
+# Quantization types
+QuantizationConfig = Dict[str, Any]
+QuantizedModel = torch.nn.Module
+# Common type aliases for cleaner signatures
+BatchSize = int
+SequenceLength = int
+VocabSize = int
+HiddenSize = int
+NumHeads = int
+NumLayers = int
+# Attention types
+AttentionWeights = torch.Tensor
+AttentionMask = Optional[torch.Tensor]
+ChunkSize = Optional[int]
+# Generation types
+GenerationConfig = Dict[str, Any]
+GeneratedSequence = BitSequence
+GenerationCallback = Callable[[GeneratedSequence], None]
+# Diffusion types
+NoiseSchedule = str  # 'linear', 'cosine', 'exponential'
+DiffusionSteps = int
+DiffusionConfig = Dict[str, Any]
+# Error handling types
+ErrorHandler = Callable[[Exception], None]
+RecoveryStrategy = Callable[[], Any]
+# Logging types
+LogLevel = str  # 'DEBUG', 'INFO', 'WARNING', 'ERROR'
+LogMessage = str
+Logger = Any  # To avoid circular import with logging module