""" Command-line interface for the Cloud Agents system. """ import click import asyncio import logging from .coordinator import Coordinator from .scaling import ScalingManager from .config import settings logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @click.group() def cli(): """Cloud Agents CLI for distributed model training.""" pass @cli.command() @click.option('--num-epochs', default=1, help='Number of training epochs') @click.option('--steps-per-epoch', default=100, help='Steps per epoch') def train(num_epochs, steps_per_epoch): """Start distributed training.""" try: coordinator = Coordinator() scaling_manager = ScalingManager() async def run_training(): # Start scaling manager asyncio.create_task(scaling_manager.monitor_and_scale()) # Start training await coordinator.coordinate_training({ 'num_epochs': num_epochs, 'steps_per_epoch': steps_per_epoch }) asyncio.run(run_training()) except Exception as e: logger.error(f"Training failed: {e}") raise @cli.command() def status(): """Get cluster status.""" try: scaling_manager = ScalingManager() status = scaling_manager.get_cluster_status() click.echo("Cluster Status:") click.echo(f"Total Agents: {status['total_agents']}") click.echo(f"Busy Agents: {status['busy_agents']}") click.echo(f"Idle Agents: {status['idle_agents']}") click.echo(f"Utilization: {status['utilization']:.2%}") click.echo(f"Can Scale Up: {status['can_scale_up']}") click.echo(f"Can Scale Down: {status['can_scale_down']}") except Exception as e: logger.error(f"Failed to get status: {e}") raise if __name__ == '__main__': cli()