π Final optimization: Update cli_standards.py with production-ready enhancements
e2ef423
verified
| """ | |
| BitTransformerLM CLI Argument Standards | |
| Unified command-line interface standards for all BitTransformerLM scripts. | |
| This module provides standardized argument parsers and naming conventions. | |
| """ | |
| import argparse | |
| from typing import Optional, Callable | |
| class BitTransformerCLI: | |
| """Standardized CLI argument parser for BitTransformerLM.""" | |
| def add_model_args(parser: argparse.ArgumentParser) -> None: | |
| """Add standard model configuration arguments.""" | |
| model_group = parser.add_argument_group('Model Configuration') | |
| model_group.add_argument('--model-size', choices=['tiny', 'small', 'medium', 'large'], | |
| default='small', help='Model size preset') | |
| model_group.add_argument('--d-model', type=int, default=128, | |
| help='Model dimension') | |
| model_group.add_argument('--num-heads', type=int, default=8, | |
| help='Number of attention heads') | |
| model_group.add_argument('--num-layers', type=int, default=6, | |
| help='Number of transformer layers') | |
| model_group.add_argument('--dropout', type=float, default=0.1, | |
| help='Dropout rate') | |
| model_group.add_argument('--max-seq-len', type=int, default=512, | |
| help='Maximum sequence length') | |
| def add_training_args(parser: argparse.ArgumentParser) -> None: | |
| """Add standard training arguments.""" | |
| train_group = parser.add_argument_group('Training Configuration') | |
| train_group.add_argument('--epochs', type=int, default=10, | |
| help='Number of training epochs') | |
| train_group.add_argument('--batch-size', type=int, default=16, | |
| help='Training batch size') | |
| train_group.add_argument('--learning-rate', type=float, default=1e-3, | |
| help='Learning rate') | |
| train_group.add_argument('--weight-decay', type=float, default=0.01, | |
| help='Weight decay') | |
| train_group.add_argument('--grad-clip', type=float, default=1.0, | |
| help='Gradient clipping threshold') | |
| train_group.add_argument('--warmup-steps', type=int, default=100, | |
| help='Number of warmup steps') | |
| def add_dataset_args(parser: argparse.ArgumentParser) -> None: | |
| """Add standard dataset arguments.""" | |
| data_group = parser.add_argument_group('Dataset Configuration') | |
| data_group.add_argument('--dataset-name', type=str, default='synthetic', | |
| help='Dataset name or path') | |
| data_group.add_argument('--dataset-size', type=int, default=10000, | |
| help='Dataset size (number of samples)') | |
| data_group.add_argument('--seq-length', type=int, default=64, | |
| help='Sequence length for training') | |
| data_group.add_argument('--validation-split', type=float, default=0.1, | |
| help='Validation split ratio') | |
| def add_safety_args(parser: argparse.ArgumentParser) -> None: | |
| """Add safety and telemetry arguments.""" | |
| safety_group = parser.add_argument_group('Safety & Telemetry') | |
| safety_group.add_argument('--enable-safety-gates', action='store_true', | |
| help='Enable safety gates during inference') | |
| safety_group.add_argument('--min-negentropy', type=float, default=0.1, | |
| help='Minimum negentropy threshold') | |
| safety_group.add_argument('--max-complexity', type=float, default=0.9, | |
| help='Maximum LZ complexity threshold') | |
| safety_group.add_argument('--min-symbiosis', type=float, default=0.3, | |
| help='Minimum symbiosis score threshold') | |
| safety_group.add_argument('--telemetry-logging', action='store_true', | |
| help='Enable detailed telemetry logging') | |
| def add_optimization_args(parser: argparse.ArgumentParser) -> None: | |
| """Add optimization and performance arguments.""" | |
| opt_group = parser.add_argument_group('Optimization & Performance') | |
| opt_group.add_argument('--use-amp', action='store_true', | |
| help='Use automatic mixed precision') | |
| opt_group.add_argument('--gradient-checkpointing', action='store_true', | |
| help='Use gradient checkpointing') | |
| opt_group.add_argument('--compile-model', action='store_true', | |
| help='Use torch.compile for optimization') | |
| opt_group.add_argument('--chunk-size', type=int, default=None, | |
| help='Chunk size for chunked attention') | |
| opt_group.add_argument('--num-workers', type=int, default=4, | |
| help='Number of data loader workers') | |
| def add_distributed_args(parser: argparse.ArgumentParser) -> None: | |
| """Add distributed training arguments.""" | |
| dist_group = parser.add_argument_group('Distributed Training') | |
| dist_group.add_argument('--distributed', action='store_true', | |
| help='Enable distributed training') | |
| dist_group.add_argument('--world-size', type=int, default=1, | |
| help='Number of distributed processes') | |
| dist_group.add_argument('--rank', type=int, default=0, | |
| help='Process rank for distributed training') | |
| dist_group.add_argument('--backend', choices=['nccl', 'gloo'], default='nccl', | |
| help='Distributed backend') | |
| def add_io_args(parser: argparse.ArgumentParser) -> None: | |
| """Add input/output arguments.""" | |
| io_group = parser.add_argument_group('Input/Output') | |
| io_group.add_argument('--input-path', type=str, | |
| help='Input file or directory path') | |
| io_group.add_argument('--output-path', type=str, default='./output', | |
| help='Output directory path') | |
| io_group.add_argument('--weights-path', type=str, default='./weights/model.pt', | |
| help='Model weights file path') | |
| io_group.add_argument('--checkpoint-dir', type=str, default='./checkpoints', | |
| help='Checkpoint directory path') | |
| io_group.add_argument('--log-level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], | |
| default='INFO', help='Logging level') | |
| def add_huggingface_args(parser: argparse.ArgumentParser) -> None: | |
| """Add HuggingFace integration arguments.""" | |
| hf_group = parser.add_argument_group('HuggingFace Integration') | |
| hf_group.add_argument('--hf-repo', type=str, | |
| help='HuggingFace repository ID') | |
| hf_group.add_argument('--hf-token', type=str, | |
| help='HuggingFace access token') | |
| hf_group.add_argument('--private-repo', action='store_true', | |
| help='Create private HuggingFace repository') | |
| hf_group.add_argument('--auto-upload', action='store_true', | |
| help='Automatically upload to HuggingFace after training') | |
| def add_diffusion_args(parser: argparse.ArgumentParser) -> None: | |
| """Add diffusion mode arguments.""" | |
| diff_group = parser.add_argument_group('Diffusion Mode') | |
| diff_group.add_argument('--diffusion-mode', action='store_true', | |
| help='Enable diffusion training mode') | |
| diff_group.add_argument('--diffusion-steps', type=int, default=8, | |
| help='Number of diffusion steps') | |
| diff_group.add_argument('--noise-schedule', choices=['linear', 'cosine', 'exponential'], | |
| default='linear', help='Noise schedule type') | |
| diff_group.add_argument('--diffusion-curriculum', action='store_true', | |
| help='Use curriculum learning for diffusion') | |
| def create_standard_parser(cls, | |
| description: str, | |
| include_groups: Optional[list] = None) -> argparse.ArgumentParser: | |
| """Create a standardized argument parser with specified groups. | |
| Args: | |
| description: Parser description | |
| include_groups: List of group names to include. If None, includes all. | |
| Options: ['model', 'training', 'dataset', 'safety', 'optimization', | |
| 'distributed', 'io', 'huggingface', 'diffusion'] | |
| """ | |
| parser = argparse.ArgumentParser( | |
| description=description, | |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter | |
| ) | |
| # Default groups to include if none specified | |
| if include_groups is None: | |
| include_groups = ['model', 'training', 'dataset', 'safety', 'io'] | |
| # Add requested argument groups | |
| group_methods = { | |
| 'model': cls.add_model_args, | |
| 'training': cls.add_training_args, | |
| 'dataset': cls.add_dataset_args, | |
| 'safety': cls.add_safety_args, | |
| 'optimization': cls.add_optimization_args, | |
| 'distributed': cls.add_distributed_args, | |
| 'io': cls.add_io_args, | |
| 'huggingface': cls.add_huggingface_args, | |
| 'diffusion': cls.add_diffusion_args, | |
| } | |
| for group_name in include_groups: | |
| if group_name in group_methods: | |
| group_methods[group_name](parser) | |
| # Add common flags | |
| parser.add_argument('--verbose', '-v', action='store_true', | |
| help='Enable verbose output') | |
| parser.add_argument('--debug', action='store_true', | |
| help='Enable debug mode') | |
| parser.add_argument('--seed', type=int, default=42, | |
| help='Random seed for reproducibility') | |
| return parser | |
| # Pre-configured parsers for common use cases | |
| def create_training_parser() -> argparse.ArgumentParser: | |
| """Create parser for training scripts.""" | |
| return BitTransformerCLI.create_standard_parser( | |
| "BitTransformerLM Training Script", | |
| ['model', 'training', 'dataset', 'safety', 'optimization', 'distributed', 'io', 'huggingface'] | |
| ) | |
| def create_inference_parser() -> argparse.ArgumentParser: | |
| """Create parser for inference scripts.""" | |
| return BitTransformerCLI.create_standard_parser( | |
| "BitTransformerLM Inference Script", | |
| ['model', 'safety', 'io', 'diffusion'] | |
| ) | |
| def create_evaluation_parser() -> argparse.ArgumentParser: | |
| """Create parser for evaluation scripts.""" | |
| return BitTransformerCLI.create_standard_parser( | |
| "BitTransformerLM Evaluation Script", | |
| ['model', 'dataset', 'safety', 'io'] | |
| ) | |
| def create_workflow_parser() -> argparse.ArgumentParser: | |
| """Create parser for workflow/pipeline scripts.""" | |
| return BitTransformerCLI.create_standard_parser( | |
| "BitTransformerLM Workflow Script", | |
| ['model', 'training', 'dataset', 'safety', 'optimization', 'io', 'huggingface', 'diffusion'] | |
| ) |