From 7d0b2ad4c6429e38b235b4c012341823a2cc9843 Mon Sep 17 00:00:00 2001 From: Sayantan Date: Tue, 25 Jan 2022 14:22:16 +0530 Subject: [PATCH 1/3] Added Gloo as a backend option --- src/components/TabTraining.vue | 14 ++++++++++++-- src/metadata/metadata.json | 8 ++++++++ .../template-text-classification/README.md | 2 +- .../template-vision-classification/README.md | 2 +- src/templates/template-vision-dcgan/README.md | 2 +- .../template-vision-segmentation/README.md | 2 +- 6 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/components/TabTraining.vue b/src/components/TabTraining.vue index 2409b87b..34bde3d8 100644 --- a/src/components/TabTraining.vue +++ b/src/components/TabTraining.vue @@ -6,9 +6,16 @@ :label="deterministic.description" :saveKey="deterministic.name" /> -

Distributed Training (NCCL backend)

+

Distributed Training

+

Choose a Backend

+ Date: Tue, 25 Jan 2022 16:21:10 +0530 Subject: [PATCH 2/3] ran prettier and changed backend from nccl --- src/components/TabTraining.vue | 4 ++-- src/metadata/metadata.json | 7 ++----- src/templates/template-text-classification/README.md | 10 +++++----- src/templates/template-vision-classification/README.md | 10 +++++----- src/templates/template-vision-dcgan/README.md | 10 +++++----- src/templates/template-vision-segmentation/README.md | 8 ++++---- 6 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/components/TabTraining.vue b/src/components/TabTraining.vue index 34bde3d8..8c9e9d05 100644 --- a/src/components/TabTraining.vue +++ b/src/components/TabTraining.vue @@ -11,7 +11,7 @@

Choose a Backend

\ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend nccl + --backend #:::= backend :::# ``` #::: } else { :::# @@ -99,7 +99,7 @@ python main.py \ ```sh python main.py \ --nproc_per_node #:::= it.nproc_per_node :::# \ - --backend nccl + --backend #:::= backend :::# ``` #::: } :::# diff --git a/src/templates/template-vision-classification/README.md b/src/templates/template-vision-classification/README.md index ff298c56..c6e14c0e 100644 --- a/src/templates/template-vision-classification/README.md +++ b/src/templates/template-vision-classification/README.md @@ -44,7 +44,7 @@ python -m torch.distributed.launch \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend nccl + --backend #:::= backend :::# ``` #::: } else { :::# @@ -55,7 +55,7 @@ python -m torch.distributed.launch \ python -m torch.distributed.launch \ --nproc_per_node #:::= it.nproc_per_node :::# \ --use_env main.py \ - --backend nccl + --backend #:::= backend :::# ``` #::: } :::# @@ -77,7 +77,7 @@ python main.py \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend nccl + --backend #:::= backend :::# ``` - Execute on worker nodes @@ -89,7 +89,7 @@ python main.py \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend nccl + --backend #:::= backend :::# ``` #::: } else { :::# @@ -99,7 +99,7 @@ python main.py \ ```sh python main.py \ --nproc_per_node #:::= it.nproc_per_node :::# \ - --backend nccl + --backend #:::= backend :::# ``` #::: } :::# diff --git a/src/templates/template-vision-dcgan/README.md b/src/templates/template-vision-dcgan/README.md index 3a383559..d0768d44 100644 --- a/src/templates/template-vision-dcgan/README.md +++ b/src/templates/template-vision-dcgan/README.md @@ -44,7 +44,7 @@ python -m torch.distributed.launch \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend nccl + --backend #:::= backend :::# ``` #::: } else { :::# @@ -55,7 +55,7 @@ python -m torch.distributed.launch \ python -m torch.distributed.launch \ --nproc_per_node #:::= it.nproc_per_node :::# \ --use_env main.py \ - --backend nccl + --backend #:::= backend :::# ``` #::: } :::# @@ -77,7 +77,7 @@ python main.py \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend nccl + --backend #:::= backend :::# ``` - Execute on worker nodes @@ -89,7 +89,7 @@ python main.py \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend nccl + --backend #:::= backend :::# ``` #::: } else { :::# @@ -99,7 +99,7 @@ python main.py \ ```sh python main.py \ --nproc_per_node #:::= it.nproc_per_node :::# \ - --backend nccl + --backend #:::= backend :::# ``` #::: } :::# diff --git a/src/templates/template-vision-segmentation/README.md b/src/templates/template-vision-segmentation/README.md index 2f10af62..77948c20 100644 --- a/src/templates/template-vision-segmentation/README.md +++ b/src/templates/template-vision-segmentation/README.md @@ -44,7 +44,7 @@ python -m torch.distributed.launch \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend nccl + --backend #:::= backend :::# ``` #::: } else { :::# @@ -77,7 +77,7 @@ python main.py \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend nccl + --backend #:::= backend :::# ``` - Execute on worker nodes @@ -89,7 +89,7 @@ python main.py \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend nccl + --backend #:::= backend :::# ``` #::: } else { :::# @@ -99,7 +99,7 @@ python main.py \ ```sh python main.py \ --nproc_per_node #:::= it.nproc_per_node :::# \ - --backend nccl + --backend #:::= backend :::# ``` #::: } :::# From 9f0ab87ca79b26cc54f0f923be3ee33ca8aadb5d Mon Sep 17 00:00:00 2001 From: Sayantan Date: Wed, 26 Jan 2022 13:25:54 +0530 Subject: [PATCH 3/3] corrected code for adding Gloo as a backend --- src/templates/template-common/README.md | 20 +++++++++---------- .../template-text-classification/README.md | 20 +++++++++---------- .../template-vision-classification/README.md | 20 +++++++++---------- src/templates/template-vision-dcgan/README.md | 20 +++++++++---------- .../template-vision-segmentation/README.md | 20 +++++++++---------- 5 files changed, 50 insertions(+), 50 deletions(-) diff --git a/src/templates/template-common/README.md b/src/templates/template-common/README.md index fcb431fd..d3068011 100644 --- a/src/templates/template-common/README.md +++ b/src/templates/template-common/README.md @@ -9,26 +9,26 @@ ```sh python -m torch.distributed.launch \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend nccl + --backend #:::= it.backend :::# ``` - Execute on worker nodes ```sh python -m torch.distributed.launch \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend nccl + --backend #:::= it.backend :::# ``` #::: } else { :::# @@ -39,7 +39,7 @@ python -m torch.distributed.launch \ python -m torch.distributed.launch \ --nproc_per_node #:::= it.nproc_per_node :::# \ --use_env main.py \ - --backend nccl + --backend #:::= it.backend :::# ``` #::: } :::# @@ -56,24 +56,24 @@ python -m torch.distributed.launch \ ```sh python main.py \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend nccl + --backend #:::= it.backend :::# ``` - Execute on worker nodes ```sh python main.py \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend nccl + --backend #:::= it.backend :::# ``` #::: } else { :::# @@ -83,7 +83,7 @@ python main.py \ ```sh python main.py \ --nproc_per_node #:::= it.nproc_per_node :::# \ - --backend nccl + --backend #:::= it.backend :::# ``` #::: } :::# diff --git a/src/templates/template-text-classification/README.md b/src/templates/template-text-classification/README.md index bedfbf4a..17af6528 100644 --- a/src/templates/template-text-classification/README.md +++ b/src/templates/template-text-classification/README.md @@ -25,26 +25,26 @@ pip install -r requirements.txt --progress-bar off -U ```sh python -m torch.distributed.launch \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` - Execute on worker nodes ```sh python -m torch.distributed.launch \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } else { :::# @@ -55,7 +55,7 @@ python -m torch.distributed.launch \ python -m torch.distributed.launch \ --nproc_per_node #:::= it.nproc_per_node :::# \ --use_env main.py \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } :::# @@ -72,24 +72,24 @@ python -m torch.distributed.launch \ ```sh python main.py \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` - Execute on worker nodes ```sh python main.py \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } else { :::# @@ -99,7 +99,7 @@ python main.py \ ```sh python main.py \ --nproc_per_node #:::= it.nproc_per_node :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } :::# diff --git a/src/templates/template-vision-classification/README.md b/src/templates/template-vision-classification/README.md index c6e14c0e..6f3fd02a 100644 --- a/src/templates/template-vision-classification/README.md +++ b/src/templates/template-vision-classification/README.md @@ -25,26 +25,26 @@ pip install -r requirements.txt --progress-bar off -U ```sh python -m torch.distributed.launch \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` - Execute on worker nodes ```sh python -m torch.distributed.launch \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } else { :::# @@ -55,7 +55,7 @@ python -m torch.distributed.launch \ python -m torch.distributed.launch \ --nproc_per_node #:::= it.nproc_per_node :::# \ --use_env main.py \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } :::# @@ -72,24 +72,24 @@ python -m torch.distributed.launch \ ```sh python main.py \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` - Execute on worker nodes ```sh python main.py \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } else { :::# @@ -99,7 +99,7 @@ python main.py \ ```sh python main.py \ --nproc_per_node #:::= it.nproc_per_node :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } :::# diff --git a/src/templates/template-vision-dcgan/README.md b/src/templates/template-vision-dcgan/README.md index d0768d44..9ddf31f7 100644 --- a/src/templates/template-vision-dcgan/README.md +++ b/src/templates/template-vision-dcgan/README.md @@ -25,26 +25,26 @@ pip install -r requirements.txt --progress-bar off -U ```sh python -m torch.distributed.launch \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` - Execute on worker nodes ```sh python -m torch.distributed.launch \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } else { :::# @@ -55,7 +55,7 @@ python -m torch.distributed.launch \ python -m torch.distributed.launch \ --nproc_per_node #:::= it.nproc_per_node :::# \ --use_env main.py \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } :::# @@ -72,24 +72,24 @@ python -m torch.distributed.launch \ ```sh python main.py \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` - Execute on worker nodes ```sh python main.py \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } else { :::# @@ -99,7 +99,7 @@ python main.py \ ```sh python main.py \ --nproc_per_node #:::= it.nproc_per_node :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } :::# diff --git a/src/templates/template-vision-segmentation/README.md b/src/templates/template-vision-segmentation/README.md index 77948c20..b649a059 100644 --- a/src/templates/template-vision-segmentation/README.md +++ b/src/templates/template-vision-segmentation/README.md @@ -25,26 +25,26 @@ pip install -r requirements.txt --progress-bar off -U ```sh python -m torch.distributed.launch \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` - Execute on worker nodes ```sh python -m torch.distributed.launch \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ --use_env main.py \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } else { :::# @@ -55,7 +55,7 @@ python -m torch.distributed.launch \ python -m torch.distributed.launch \ --nproc_per_node #:::= it.nproc_per_node :::# \ --use_env main.py \ - --backend nccl + --backend #:::= it.backend :::# ``` #::: } :::# @@ -72,24 +72,24 @@ python -m torch.distributed.launch \ ```sh python main.py \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank 0 \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` - Execute on worker nodes ```sh python main.py \ - --nproc_per_node #:::= nproc_per_node :::# \ + --nproc_per_node #:::= it.nproc_per_node :::# \ --nnodes #:::= it.nnodes :::# \ --node_rank \ --master_addr #:::= it.master_addr :::# \ --master_port #:::= it.master_port :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } else { :::# @@ -99,7 +99,7 @@ python main.py \ ```sh python main.py \ --nproc_per_node #:::= it.nproc_per_node :::# \ - --backend #:::= backend :::# + --backend #:::= it.backend :::# ``` #::: } :::#