dmlc · trivialfis · Jun 2, 2022 · Jun 1, 2022
diff --git a/demo/nvflare/README.md b/demo/nvflare/README.md
@@ -3,6 +3,8 @@
 This directory contains a demo of Federated Learning using
 [NVFlare](https://nvidia.github.io/NVFlare/).
 
+## Training with CPU only
+
 To run the demo, first build XGBoost with the federated learning plugin enabled (see the
 [README](../../plugin/federated/README.md)).
 
@@ -53,3 +55,12 @@ Finally, shutdown everything from the admin CLI:
 shutdown client
 shutdown server
 ```
+
+## Training with GPUs
+
+To demo with Federated Learning using GPUs, make sure your machine has at least 2 GPUs.
+Build XGBoost with the federated learning plugin enabled along with CUDA, but with NCCL
+turned off (see the [README](../../plugin/federated/README.md)).
+
+Modify `config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps
+above.
diff --git a/demo/nvflare/config/config_fed_client.json b/demo/nvflare/config/config_fed_client.json
@@ -12,7 +12,8 @@
           "world_size": 2,
           "server_cert_path": "server-cert.pem",
           "client_key_path": "client-key.pem",
-          "client_cert_path": "client-cert.pem"
+          "client_cert_path": "client-cert.pem",
+          "use_gpus": "false"
         }
       }
     }

diff --git a/demo/nvflare/custom/trainer.py b/demo/nvflare/custom/trainer.py
@@ -16,7 +16,7 @@ class SupportedTasks(object):
 
 class XGBoostTrainer(Executor):
     def __init__(self, server_address: str, world_size: int, server_cert_path: str,
-                 client_key_path: str, client_cert_path: str):
+                 client_key_path: str, client_cert_path: str, use_gpus: bool):
         """Trainer for federated XGBoost.
 
         Args:
@@ -32,6 +32,7 @@ def __init__(self, server_address: str, world_size: int, server_cert_path: str,
         self._server_cert_path = server_cert_path
         self._client_key_path = client_key_path
         self._client_cert_path = client_cert_path
+        self._use_gpus = use_gpus
 
     def execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext,
                 abort_signal: Signal) -> Shareable:
@@ -66,6 +67,10 @@ def _do_training(self, fl_ctx: FLContext):
 
             # Specify parameters via map, definition are same as c++ version
             param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
+            if self._use_gpus:
+                self.log_info(fl_ctx, f'Training with GPU {rank}')
+                param['tree_method'] = 'gpu_hist'
+                param['gpu_id'] = rank
 
             # Specify validations set to watch performance
             watchlist = [(dtest, 'eval'), (dtrain, 'train')]

diff --git a/plugin/federated/README.md b/plugin/federated/README.md
@@ -20,7 +20,12 @@ Build the Plugin
 # Under xgboost source tree.
 mkdir build
 cd build
-cmake .. -GNinja -DPLUGIN_FEDERATED=ON
+# For now NCCL needs to be turned off.
+cmake .. -GNinja\
+ -DPLUGIN_FEDERATED=ON\
+ -DUSE_CUDA=ON\
+ -DBUILD_WITH_CUDA_CUB=ON\
+ -DUSE_NCCL=OFF
 ninja
 cd ../python-package
 pip install -e .  # or equivalently python setup.py develop
@@ -31,5 +36,6 @@ Test Federated XGBoost
 ```shell
 # Under xgboost source tree.
 cd tests/distributed
+# This tests both CPU training (`hist`) and GPU training (`gpu_hist`).
 ./runtests-federated.sh
 ```