From 790835be18775a79ad3416a68e0639b743f43879 Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Fri, 30 Jun 2023 09:42:09 +0200 Subject: [PATCH 1/5] Initialize default metrics/loss inside ModelOutput instead --- merlin/models/torch/outputs/classification.py | 16 +++++++--------- merlin/models/torch/outputs/regression.py | 11 +++++++---- tests/unit/torch/outputs/test_classification.py | 4 ++-- tests/unit/torch/outputs/test_regression.py | 2 +- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/merlin/models/torch/outputs/classification.py b/merlin/models/torch/outputs/classification.py index e201953fd0..2ca36143f7 100644 --- a/merlin/models/torch/outputs/classification.py +++ b/merlin/models/torch/outputs/classification.py @@ -36,24 +36,22 @@ class BinaryOutput(ModelOutput): The metrics used for evaluation. Default includes Accuracy, AUROC, Precision, and Recall. """ + DEFAULT_LOSS_CLS = nn.BCEWithLogitsLoss + DEFAULT_METRICS_CLS = (Accuracy, AUROC, Precision, Recall) + def __init__( self, schema: Optional[ColumnSchema] = None, - loss: nn.Module = nn.BCEWithLogitsLoss(), - metrics: Sequence[Metric] = ( - Accuracy(task="binary"), - AUROC(task="binary"), - Precision(task="binary"), - Recall(task="binary"), - ), + loss: Optional[nn.Module] = None, + metrics: Sequence[Metric] = (), ): """Initializes a BinaryOutput object.""" super().__init__( nn.LazyLinear(1), nn.Sigmoid(), schema=schema, - loss=loss, - metrics=metrics, + loss=loss or self.DEFAULT_LOSS_CLS(), + metrics=metrics or [m(task="binary") for m in self.DEFAULT_METRICS_CLS], ) def setup_schema(self, target: Optional[Union[ColumnSchema, Schema]]): diff --git a/merlin/models/torch/outputs/regression.py b/merlin/models/torch/outputs/regression.py index 0f9f9ad318..e3b2f97b09 100644 --- a/merlin/models/torch/outputs/regression.py +++ b/merlin/models/torch/outputs/regression.py @@ -36,18 +36,21 @@ class RegressionOutput(ModelOutput): The metrics used for evaluation. Default is MeanSquaredError. """ + DEFAULT_LOSS_CLS = nn.MSELoss + DEFAULT_METRICS_CLS = (MeanSquaredError,) + def __init__( self, schema: Optional[ColumnSchema] = None, - loss: nn.Module = nn.MSELoss(), - metrics: Sequence[Metric] = (MeanSquaredError(),), + loss: Optional[nn.Module] = None, + metrics: Sequence[Metric] = (), ): """Initializes a RegressionOutput object.""" super().__init__( nn.LazyLinear(1), schema=schema, - loss=loss, - metrics=metrics, + loss=loss or self.DEFAULT_LOSS_CLS(), + metrics=metrics or [m() for m in self.DEFAULT_METRICS_CLS], ) def setup_schema(self, target: Optional[Union[ColumnSchema, Schema]]): diff --git a/tests/unit/torch/outputs/test_classification.py b/tests/unit/torch/outputs/test_classification.py index ea6643c740..755d465350 100644 --- a/tests/unit/torch/outputs/test_classification.py +++ b/tests/unit/torch/outputs/test_classification.py @@ -31,12 +31,12 @@ def test_init(self): assert isinstance(binary_output, mm.BinaryOutput) assert isinstance(binary_output.loss, nn.BCEWithLogitsLoss) - assert binary_output.metrics == ( + assert binary_output.metrics == [ Accuracy(task="binary"), AUROC(task="binary"), Precision(task="binary"), Recall(task="binary"), - ) + ] assert binary_output.output_schema == Schema() def test_identity(self): diff --git a/tests/unit/torch/outputs/test_regression.py b/tests/unit/torch/outputs/test_regression.py index 17541c5081..f8537bca51 100644 --- a/tests/unit/torch/outputs/test_regression.py +++ b/tests/unit/torch/outputs/test_regression.py @@ -30,7 +30,7 @@ def test_init(self): assert isinstance(reg_output, mm.RegressionOutput) assert isinstance(reg_output.loss, nn.MSELoss) - assert reg_output.metrics == (MeanSquaredError,) + assert reg_output.metrics == [MeanSquaredError()] assert reg_output.output_schema == Schema() def test_identity(self): From 3ac002b7a9fb820a951a829984269b7280461a70 Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Fri, 30 Jun 2023 09:51:28 +0200 Subject: [PATCH 2/5] Trying to change py38 -> py39 in tox.ini to see if that fixes CI --- tox.ini | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tox.ini b/tox.ini index 2446672a8f..7bf3dfdfda 100644 --- a/tox.ini +++ b/tox.ini @@ -2,14 +2,14 @@ ; .github/workflows/cpu-ci.yml for the workflow definition. [tox] -envlist = py38-gpu,py38-multi-gpu +envlist = py39-gpu,py39-multi-gpu [testenv] commands = pip install --upgrade pip pip install -e .[all] -[testenv:py38-gpu] +[testenv:py39-gpu] ; Runs in: Github Actions ; Runs GPU-based tests. allowlist_externals = @@ -28,7 +28,7 @@ commands = python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/systems.git@{env:MERLIN_BRANCH:main} bash -c 'python -m pytest --cov-report term --cov merlin -m "{env:PYTEST_MARKERS}" -rxs tests/ || ([ $? = 5 ] && exit 0 || exit $?)' -[testenv:py38-multi-gpu] +[testenv:py39-multi-gpu] ; Runs in: Github Actions ; Runs GPU-based tests. allowlist_externals = @@ -50,7 +50,7 @@ commands = sh examples/usecases/multi-gpu/install_sparse_operation_kit.sh {envdir} bash -c 'horovodrun -np 2 sh examples/usecases/multi-gpu/hvd_wrapper.sh python -m pytest -m "horovod {env:EXTRA_PYTEST_MARKERS}" -rxs tests/unit || ([ $? = 5 ] && exit 0 || exit $?)' -[testenv:py38-horovod-cpu] +[testenv:py39-horovod-cpu] setenv = HOROVOD_WITH_MPI=1 HOROVOD_WITH_TENSORFLOW=1 @@ -66,7 +66,7 @@ commands = {envdir}/env/bin/python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/nvtabular.git@{env:MERLIN_BRANCH:main} {envdir}/env/bin/horovodrun -np 2 sh examples/usecases/multi-gpu/hvd_wrapper.sh pytest -m "horovod {env:EXTRA_PYTEST_MARKERS}" -rxs tests/unit -[testenv:py38-nvtabular-cpu] +[testenv:py39-nvtabular-cpu] passenv=GIT_COMMIT allowlist_externals = git deps = @@ -82,7 +82,7 @@ commands = python -m pip install . python -m pytest nvtabular-{env:GIT_COMMIT}/tests/unit -[testenv:py38-systems-cpu] +[testenv:py39-systems-cpu] passenv=GIT_COMMIT allowlist_externals = git deps = @@ -99,7 +99,7 @@ commands = python -m pip install . python -m pytest -m "not notebook" systems-{env:GIT_COMMIT}/tests/unit -[testenv:py38-transformers4rec-cpu] +[testenv:py39-transformers4rec-cpu] passenv=GIT_COMMIT allowlist_externals = git commands = From 3fd9bd88d4141ec96414144c83843dd47def468f Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Fri, 30 Jun 2023 09:52:45 +0200 Subject: [PATCH 3/5] Trying to change py38 -> py39 in tox.ini to see if that fixes CI --- .github/workflows/gpu.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gpu.yml b/.github/workflows/gpu.yml index 8999056146..a92b8fab45 100644 --- a/.github/workflows/gpu.yml +++ b/.github/workflows/gpu.yml @@ -34,7 +34,7 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - cd ${{ github.workspace }}; PYTEST_MARKERS="unit and not (examples or integration or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py38-gpu + cd ${{ github.workspace }}; PYTEST_MARKERS="unit and not (examples or integration or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py39-gpu tests-examples: runs-on: 1GPU @@ -55,4 +55,4 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - cd ${{ github.workspace }}; PYTEST_MARKERS="(examples or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py38-gpu + cd ${{ github.workspace }}; PYTEST_MARKERS="(examples or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py39-gpu From 918b6a5cdbbbd2a3162bbe520eb1fcc2f17eaecd Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Fri, 30 Jun 2023 09:56:34 +0200 Subject: [PATCH 4/5] Trying to change py39 -> py310 in tox.ini to see if that fixes CI --- .github/workflows/gpu.yml | 4 ++-- tox.ini | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/gpu.yml b/.github/workflows/gpu.yml index a92b8fab45..db4b63275e 100644 --- a/.github/workflows/gpu.yml +++ b/.github/workflows/gpu.yml @@ -34,7 +34,7 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - cd ${{ github.workspace }}; PYTEST_MARKERS="unit and not (examples or integration or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py39-gpu + cd ${{ github.workspace }}; PYTEST_MARKERS="unit and not (examples or integration or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py310-gpu tests-examples: runs-on: 1GPU @@ -55,4 +55,4 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - cd ${{ github.workspace }}; PYTEST_MARKERS="(examples or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py39-gpu + cd ${{ github.workspace }}; PYTEST_MARKERS="(examples or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e py310-gpu diff --git a/tox.ini b/tox.ini index 7bf3dfdfda..ab14fe206c 100644 --- a/tox.ini +++ b/tox.ini @@ -2,14 +2,14 @@ ; .github/workflows/cpu-ci.yml for the workflow definition. [tox] -envlist = py39-gpu,py39-multi-gpu +envlist = py310-gpu,py310-multi-gpu [testenv] commands = pip install --upgrade pip pip install -e .[all] -[testenv:py39-gpu] +[testenv:py310-gpu] ; Runs in: Github Actions ; Runs GPU-based tests. allowlist_externals = @@ -28,7 +28,7 @@ commands = python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/systems.git@{env:MERLIN_BRANCH:main} bash -c 'python -m pytest --cov-report term --cov merlin -m "{env:PYTEST_MARKERS}" -rxs tests/ || ([ $? = 5 ] && exit 0 || exit $?)' -[testenv:py39-multi-gpu] +[testenv:py310-multi-gpu] ; Runs in: Github Actions ; Runs GPU-based tests. allowlist_externals = @@ -50,7 +50,7 @@ commands = sh examples/usecases/multi-gpu/install_sparse_operation_kit.sh {envdir} bash -c 'horovodrun -np 2 sh examples/usecases/multi-gpu/hvd_wrapper.sh python -m pytest -m "horovod {env:EXTRA_PYTEST_MARKERS}" -rxs tests/unit || ([ $? = 5 ] && exit 0 || exit $?)' -[testenv:py39-horovod-cpu] +[testenv:py310-horovod-cpu] setenv = HOROVOD_WITH_MPI=1 HOROVOD_WITH_TENSORFLOW=1 @@ -66,7 +66,7 @@ commands = {envdir}/env/bin/python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/nvtabular.git@{env:MERLIN_BRANCH:main} {envdir}/env/bin/horovodrun -np 2 sh examples/usecases/multi-gpu/hvd_wrapper.sh pytest -m "horovod {env:EXTRA_PYTEST_MARKERS}" -rxs tests/unit -[testenv:py39-nvtabular-cpu] +[testenv:py310-nvtabular-cpu] passenv=GIT_COMMIT allowlist_externals = git deps = @@ -82,7 +82,7 @@ commands = python -m pip install . python -m pytest nvtabular-{env:GIT_COMMIT}/tests/unit -[testenv:py39-systems-cpu] +[testenv:py310-systems-cpu] passenv=GIT_COMMIT allowlist_externals = git deps = @@ -99,7 +99,7 @@ commands = python -m pip install . python -m pytest -m "not notebook" systems-{env:GIT_COMMIT}/tests/unit -[testenv:py39-transformers4rec-cpu] +[testenv:py310-transformers4rec-cpu] passenv=GIT_COMMIT allowlist_externals = git commands = From ae1b5e4dd6cacbc240b9d45e6df3df9469003910 Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Fri, 30 Jun 2023 09:58:07 +0200 Subject: [PATCH 5/5] Update LD_LIBRARY_PATH --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index ab14fe206c..67477b0fb9 100644 --- a/tox.ini +++ b/tox.ini @@ -40,7 +40,7 @@ passenv = setenv = TF_GPU_ALLOCATOR=cuda_malloc_async CPATH={env:CPATH}{:}{envdir}/hugectr/include - LD_LIBRARY_PATH=${envdir}/hugectr/include/lib{:}/usr/local/lib/python3.8/dist-packages/tensorflow{:}{env:LD_LIBRARY_PATH} + LD_LIBRARY_PATH=${envdir}/hugectr/include/lib{:}/usr/local/lib/python3.10/dist-packages/tensorflow{:}{env:LD_LIBRARY_PATH} LIBRARY_PATH=${envdir}/hugectr/lib{:}{env:LIBRARY_PATH} sitepackages=true commands =