""" BitTransformerLM CLI Argument Standards Unified command-line interface standards for all BitTransformerLM scripts. This module provides standardized argument parsers and naming conventions. """ import argparse from typing import Optional, Callable class BitTransformerCLI: """Standardized CLI argument parser for BitTransformerLM.""" @staticmethod def add_model_args(parser: argparse.ArgumentParser) -> None: """Add standard model configuration arguments.""" model_group = parser.add_argument_group('Model Configuration') model_group.add_argument('--model-size', choices=['tiny', 'small', 'medium', 'large'], default='small', help='Model size preset') model_group.add_argument('--d-model', type=int, default=128, help='Model dimension') model_group.add_argument('--num-heads', type=int, default=8, help='Number of attention heads') model_group.add_argument('--num-layers', type=int, default=6, help='Number of transformer layers') model_group.add_argument('--dropout', type=float, default=0.1, help='Dropout rate') model_group.add_argument('--max-seq-len', type=int, default=512, help='Maximum sequence length') @staticmethod def add_training_args(parser: argparse.ArgumentParser) -> None: """Add standard training arguments.""" train_group = parser.add_argument_group('Training Configuration') train_group.add_argument('--epochs', type=int, default=10, help='Number of training epochs') train_group.add_argument('--batch-size', type=int, default=16, help='Training batch size') train_group.add_argument('--learning-rate', type=float, default=1e-3, help='Learning rate') train_group.add_argument('--weight-decay', type=float, default=0.01, help='Weight decay') train_group.add_argument('--grad-clip', type=float, default=1.0, help='Gradient clipping threshold') train_group.add_argument('--warmup-steps', type=int, default=100, help='Number of warmup steps') @staticmethod def add_dataset_args(parser: argparse.ArgumentParser) -> None: """Add standard dataset arguments.""" data_group = parser.add_argument_group('Dataset Configuration') data_group.add_argument('--dataset-name', type=str, default='synthetic', help='Dataset name or path') data_group.add_argument('--dataset-size', type=int, default=10000, help='Dataset size (number of samples)') data_group.add_argument('--seq-length', type=int, default=64, help='Sequence length for training') data_group.add_argument('--validation-split', type=float, default=0.1, help='Validation split ratio') @staticmethod def add_safety_args(parser: argparse.ArgumentParser) -> None: """Add safety and telemetry arguments.""" safety_group = parser.add_argument_group('Safety & Telemetry') safety_group.add_argument('--enable-safety-gates', action='store_true', help='Enable safety gates during inference') safety_group.add_argument('--min-negentropy', type=float, default=0.1, help='Minimum negentropy threshold') safety_group.add_argument('--max-complexity', type=float, default=0.9, help='Maximum LZ complexity threshold') safety_group.add_argument('--min-symbiosis', type=float, default=0.3, help='Minimum symbiosis score threshold') safety_group.add_argument('--telemetry-logging', action='store_true', help='Enable detailed telemetry logging') @staticmethod def add_optimization_args(parser: argparse.ArgumentParser) -> None: """Add optimization and performance arguments.""" opt_group = parser.add_argument_group('Optimization & Performance') opt_group.add_argument('--use-amp', action='store_true', help='Use automatic mixed precision') opt_group.add_argument('--gradient-checkpointing', action='store_true', help='Use gradient checkpointing') opt_group.add_argument('--compile-model', action='store_true', help='Use torch.compile for optimization') opt_group.add_argument('--chunk-size', type=int, default=None, help='Chunk size for chunked attention') opt_group.add_argument('--num-workers', type=int, default=4, help='Number of data loader workers') @staticmethod def add_distributed_args(parser: argparse.ArgumentParser) -> None: """Add distributed training arguments.""" dist_group = parser.add_argument_group('Distributed Training') dist_group.add_argument('--distributed', action='store_true', help='Enable distributed training') dist_group.add_argument('--world-size', type=int, default=1, help='Number of distributed processes') dist_group.add_argument('--rank', type=int, default=0, help='Process rank for distributed training') dist_group.add_argument('--backend', choices=['nccl', 'gloo'], default='nccl', help='Distributed backend') @staticmethod def add_io_args(parser: argparse.ArgumentParser) -> None: """Add input/output arguments.""" io_group = parser.add_argument_group('Input/Output') io_group.add_argument('--input-path', type=str, help='Input file or directory path') io_group.add_argument('--output-path', type=str, default='./output', help='Output directory path') io_group.add_argument('--weights-path', type=str, default='./weights/model.pt', help='Model weights file path') io_group.add_argument('--checkpoint-dir', type=str, default='./checkpoints', help='Checkpoint directory path') io_group.add_argument('--log-level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default='INFO', help='Logging level') @staticmethod def add_huggingface_args(parser: argparse.ArgumentParser) -> None: """Add HuggingFace integration arguments.""" hf_group = parser.add_argument_group('HuggingFace Integration') hf_group.add_argument('--hf-repo', type=str, help='HuggingFace repository ID') hf_group.add_argument('--hf-token', type=str, help='HuggingFace access token') hf_group.add_argument('--private-repo', action='store_true', help='Create private HuggingFace repository') hf_group.add_argument('--auto-upload', action='store_true', help='Automatically upload to HuggingFace after training') @staticmethod def add_diffusion_args(parser: argparse.ArgumentParser) -> None: """Add diffusion mode arguments.""" diff_group = parser.add_argument_group('Diffusion Mode') diff_group.add_argument('--diffusion-mode', action='store_true', help='Enable diffusion training mode') diff_group.add_argument('--diffusion-steps', type=int, default=8, help='Number of diffusion steps') diff_group.add_argument('--noise-schedule', choices=['linear', 'cosine', 'exponential'], default='linear', help='Noise schedule type') diff_group.add_argument('--diffusion-curriculum', action='store_true', help='Use curriculum learning for diffusion') @classmethod def create_standard_parser(cls, description: str, include_groups: Optional[list] = None) -> argparse.ArgumentParser: """Create a standardized argument parser with specified groups. Args: description: Parser description include_groups: List of group names to include. If None, includes all. Options: ['model', 'training', 'dataset', 'safety', 'optimization', 'distributed', 'io', 'huggingface', 'diffusion'] """ parser = argparse.ArgumentParser( description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter ) # Default groups to include if none specified if include_groups is None: include_groups = ['model', 'training', 'dataset', 'safety', 'io'] # Add requested argument groups group_methods = { 'model': cls.add_model_args, 'training': cls.add_training_args, 'dataset': cls.add_dataset_args, 'safety': cls.add_safety_args, 'optimization': cls.add_optimization_args, 'distributed': cls.add_distributed_args, 'io': cls.add_io_args, 'huggingface': cls.add_huggingface_args, 'diffusion': cls.add_diffusion_args, } for group_name in include_groups: if group_name in group_methods: group_methods[group_name](parser) # Add common flags parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose output') parser.add_argument('--debug', action='store_true', help='Enable debug mode') parser.add_argument('--seed', type=int, default=42, help='Random seed for reproducibility') return parser # Pre-configured parsers for common use cases def create_training_parser() -> argparse.ArgumentParser: """Create parser for training scripts.""" return BitTransformerCLI.create_standard_parser( "BitTransformerLM Training Script", ['model', 'training', 'dataset', 'safety', 'optimization', 'distributed', 'io', 'huggingface'] ) def create_inference_parser() -> argparse.ArgumentParser: """Create parser for inference scripts.""" return BitTransformerCLI.create_standard_parser( "BitTransformerLM Inference Script", ['model', 'safety', 'io', 'diffusion'] ) def create_evaluation_parser() -> argparse.ArgumentParser: """Create parser for evaluation scripts.""" return BitTransformerCLI.create_standard_parser( "BitTransformerLM Evaluation Script", ['model', 'dataset', 'safety', 'io'] ) def create_workflow_parser() -> argparse.ArgumentParser: """Create parser for workflow/pipeline scripts.""" return BitTransformerCLI.create_standard_parser( "BitTransformerLM Workflow Script", ['model', 'training', 'dataset', 'safety', 'optimization', 'io', 'huggingface', 'diffusion'] )