monai
medical
katielink commited on
Commit
13ef090
·
1 Parent(s): cba6bb5

fix the wrong GPU index issue of multi-node

Browse files
configs/metadata.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
  "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
3
- "version": "0.4.0",
4
  "changelog": {
 
5
  "0.4.0": "remove error dollar symbol in readme",
6
  "0.3.9": "add cpu ram requirement in readme",
7
  "0.3.8": "add non-deterministic note",
@@ -18,7 +19,7 @@
18
  "0.1.0": "complete the model package",
19
  "0.0.1": "initialize the model package structure"
20
  },
21
- "monai_version": "1.2.0rc6",
22
  "pytorch_version": "1.13.1",
23
  "numpy_version": "1.22.2",
24
  "optional_packages_version": {
 
1
  {
2
  "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
3
+ "version": "0.4.1",
4
  "changelog": {
5
+ "0.4.1": "fix the wrong GPU index issue of multi-node",
6
  "0.4.0": "remove error dollar symbol in readme",
7
  "0.3.9": "add cpu ram requirement in readme",
8
  "0.3.8": "add non-deterministic note",
 
19
  "0.1.0": "complete the model package",
20
  "0.0.1": "initialize the model package structure"
21
  },
22
+ "monai_version": "1.2.0",
23
  "pytorch_version": "1.13.1",
24
  "numpy_version": "1.22.2",
25
  "optional_packages_version": {
configs/multi_gpu_train.yaml CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- device: "$torch.device(f'cuda:{dist.get_rank()}')"
3
  network:
4
  _target_: torch.nn.parallel.DistributedDataParallel
5
  module: "$@network_def.to(@device)"
 
1
  ---
2
+ device: "$torch.device('cuda:' + os.environ['LOCAL_RANK'])"
3
  network:
4
  _target_: torch.nn.parallel.DistributedDataParallel
5
  module: "$@network_def.to(@device)"