{ "backend": { "name": "pytorch", "version": "2.1.2", "_target_": "optimum_benchmark.backends.pytorch.backend.PyTorchBackend", "model": "LLMNewbie/vic_critP_20pr", "task": "text-generation", "library": "transformers", "device": "cuda", "device_ids": "0", "seed": 42, "inter_op_num_threads": null, "intra_op_num_threads": null, "hub_kwargs": { "revision": "main", "force_download": false, "local_files_only": false, "trust_remote_code": false }, "no_weights": true, "device_map": null, "torch_dtype": null, "amp_autocast": false, "amp_dtype": null, "eval_mode": true, "to_bettertransformer": false, "low_cpu_mem_usage": null, "attn_implementation": null, "cache_implementation": null, "torch_compile": false, "torch_compile_config": {}, "quantization_scheme": null, "quantization_config": {}, "deepspeed_inference": false, "deepspeed_inference_config": {}, "peft_type": null, "peft_config": {} }, "launcher": { "name": "torchrun", "_target_": "optimum_benchmark.launchers.torchrun.launcher.TorchrunLauncher", "device_isolation": false, "min_nodes": 1, "max_nodes": 1, "nproc_per_node": 1, "role": "benchmark_worker", "monitor_interval": 30, "rdzv_id": "647201d3-1b63-4512-a68e-b5e519f515bf", "rdzv_backend": "c10d", "rdzv_endpoint": "localhost:0", "rdzv_configs": { "rank": 0, "timeout": 900 }, "max_restarts": 0, "start_method": "spawn", "log_dir": null, "redirects": "0", "tee": "0", "metrics_cfg": {}, "local_addr": null }, "benchmark": { "name": "inference", "_target_": "optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark", "duration": 10, "warmup_runs": 10, "input_shapes": { "batch_size": 2, "num_choices": 2, "sequence_length": 16 }, "new_tokens": null, "latency": true, "memory": true, "energy": false, "forward_kwargs": {}, "generate_kwargs": {}, "call_kwargs": {} }, "experiment_name": "api-launch", "task": null, "model": null, "device": null, "library": null, "environment": { "cpu": " Intel(R) Xeon(R) Gold 6448Y", "cpu_count": 16, "cpu_ram_mb": 237491.42528, "system": "Linux", "machine": "x86_64", "platform": "Linux-5.15.0-86-generic-x86_64-with-glibc2.35", "processor": "x86_64", "python_version": "3.10.12", "gpu": [ "NVIDIA H100 80GB HBM3" ], "gpu_count": 1, "gpu_vram_mb": 85520809984, "optimum_benchmark_version": "0.2.0", "optimum_benchmark_commit": "9141f5b68a70cab5d9d5698b5bcefafd0c07270f", "transformers_version": "4.39.1", "transformers_commit": null, "accelerate_version": "0.25.0", "accelerate_commit": null, "diffusers_version": "0.15.0", "diffusers_commit": null, "optimum_version": "1.17.1", "optimum_commit": null, "timm_version": null, "timm_commit": null, "peft_version": "0.9.0", "peft_commit": null } }