From b4ccd4660296b730c8c04a3f5768411acce193a6 Mon Sep 17 00:00:00 2001
From: Patrick Bloebaum <bloebp@amazon.com>
Date: Thu, 3 Oct 2024 23:32:08 -0700
Subject: [PATCH] Remove some dependencies

Signed-off-by: Patrick Bloebaum <bloebp@amazon.com>
---
 .github/workflows/docs-ci.yml                 | 16 +++--
 .github/workflows/docs.yml                    | 16 +++--
 docs/source/dowhy.graph_learners.rst          |  8 ---
 .../learning_causal_structure.rst             | 11 +---
 poetry.lock                                   | 62 +------------------
 pyproject.toml                                |  2 -
 .../test_econml_estimator.py                  | 33 +---------
 7 files changed, 17 insertions(+), 131 deletions(-)

diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml
index 3134af6f98..99488854fc 100644
--- a/.github/workflows/docs-ci.yml
+++ b/.github/workflows/docs-ci.yml
@@ -26,16 +26,14 @@ jobs:
           version: "14.0"
           directory: ${{ runner.temp }}/llvm
 
-      - name: Some cleanup for more disk space
-        run: |
-          rm -rf /usr/share/dotnet
-          rm -rf "$AGENT_TOOLSDIRECTORY"
-          apt-get autoremove -y
-          apt-get clean
-          df -h
-
       - name: Install Python Dependencies
-        run: poetry install -E plotting -E pydot -E pygraphviz -E econml --with docs
+        run: poetry install
+
+      - name: Install Python Dependencies 2
+        run: poetry install -E plotting -E pydot -E pygraphviz
+
+      - name: Install Python Dependencies 2
+        run: poetry install -E econml --with docs
 
       - run: git config --global --add safe.directory /__w/dowhy/dowhy
 
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 355ec52a78..9ab472bcf3 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -30,16 +30,14 @@ jobs:
           version: "14.0"
           directory: ${{ runner.temp }}/llvm
 
-      - name: Some cleanup for more disk space
-        run: |
-          rm -rf /usr/share/dotnet
-          rm -rf "$AGENT_TOOLSDIRECTORY"
-          apt-get autoremove -y
-          apt-get clean
-          df -h
-
       - name: Install Python Dependencies
-        run: poetry install -E plotting -E pydot -E pygraphviz -E econml --with docs
+        run: poetry install
+
+      - name: Install Python Dependencies 2
+        run: poetry install -E plotting -E pydot -E pygraphviz
+
+      - name: Install Python Dependencies 2
+        run: poetry install -E econml --with docs
 
       - run: git config --global --add safe.directory /__w/dowhy/dowhy
 
diff --git a/docs/source/dowhy.graph_learners.rst b/docs/source/dowhy.graph_learners.rst
index e919b8e409..c7a1ed240a 100644
--- a/docs/source/dowhy.graph_learners.rst
+++ b/docs/source/dowhy.graph_learners.rst
@@ -4,14 +4,6 @@ dowhy.graph\_learners package
 Submodules
 ----------
 
-dowhy.graph\_learners.cdt module
---------------------------------
-
-.. automodule:: dowhy.graph_learners.cdt
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
 dowhy.graph\_learners.ges module
 --------------------------------
 
diff --git a/docs/source/user_guide/modeling_causal_relations/learning_causal_structure.rst b/docs/source/user_guide/modeling_causal_relations/learning_causal_structure.rst
index 06fc81f95f..abd19f8f07 100644
--- a/docs/source/user_guide/modeling_causal_relations/learning_causal_structure.rst
+++ b/docs/source/user_guide/modeling_causal_relations/learning_causal_structure.rst
@@ -5,16 +5,7 @@ Learning the causal graph is only necessary in case we cannot construct the grap
 
 In many cases, the true causal structure for a given dataset may not be known. We can utilize *graph discovery* algorithms to provide candidate causal graphs consistent with the dataset. Such graphs can then be inspected, edited and modified by a user to match their domain expertise or world knowledge. Note that these graphs should be not used directly because graph discovery from observational data is a provably impossible problem in the fully non-parametric setting. Given a dataset, there exist multiple graphs that  would lead to the exact same joint distribution and thus are indistinguishable based on the dataset (such graphs constitute the *Markov equivalence class*).  As a result, graph discovery algorithms make certain assumptions to learn a graph and do not guarantee the validity of a learned graph. 
 
-DoWhy does not implement graph discovery algorithms, but provides a simple way to input the learnt graph from a discovery algorithm. The only constraint is that DoWhy expects the algorithm to output a directed acyclic graph (DAG). In the future, we expect to support learning causal graphs directly through integration with the `causal-learn <https://github.com/py-why/causal-learn>`_ and `dodiscover <https://github.com/py-why/dodiscover>`_ packages in PyWhy. 
-
-Graph discovery using CDT
--------------------------
-Given a dataset as a pandas DataFrame, the following snippet learns the graph using LiNGAM algorithm and loads it in DoWhy. The algorithm implementation is in the Causal Discovery Toolbox (CDT) package which needs to be installed separately.
-
->>> from cdt.causality.graph import LiNGAM
->>> causal_graph = LiNGAM().predict(dataset)
-
-For a full example using CDT, you can refer to the :doc:`../../example_notebooks/dowhy_causal_discovery_example`. 
+DoWhy does not implement graph discovery algorithms, but provides a simple way to input the learnt graph from a discovery algorithm. The only constraint is that DoWhy expects a directed acyclic graph (DAG) in the form of a networkx graph. For packages that implement different causal discovery algorithms, see the `causal-learn <https://github.com/py-why/causal-learn>`_ and `dodiscover <https://github.com/py-why/dodiscover>`_ packages in PyWhy.
 
 Graph discovery using dodiscover
 --------------------------------
diff --git a/poetry.lock b/poetry.lock
index f2e2f7e2fa..874bc63e1e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -471,30 +471,6 @@ scipy = "*"
 statsmodels = "*"
 tqdm = "*"
 
-[[package]]
-name = "cdt"
-version = "0.6.0"
-description = "A Toolbox for causal graph inference"
-optional = false
-python-versions = "*"
-files = [
-    {file = "cdt-0.6.0-py3-none-any.whl", hash = "sha256:a90401b688597c3ccf23cc7b10c62e19df69dc9277095bab40a1343c6e89996b"},
-    {file = "cdt-0.6.0.tar.gz", hash = "sha256:cecfb6390440311d0cb7e5446e97f9eb3e3b7d90d9b10d3b4cbbabc2d4c23dba"},
-]
-
-[package.dependencies]
-GPUtil = "*"
-joblib = "*"
-networkx = "*"
-numpy = "*"
-pandas = "*"
-requests = "*"
-scikit-learn = "*"
-scipy = "*"
-skrebate = "*"
-statsmodels = "*"
-tqdm = "*"
-
 [[package]]
 name = "certifi"
 version = "2024.8.30"
@@ -1540,16 +1516,6 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe,
 test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
 tqdm = ["tqdm"]
 
-[[package]]
-name = "gputil"
-version = "1.4.0"
-description = "GPUtil is a Python module for getting the GPU status from NVIDA GPUs using nvidia-smi."
-optional = false
-python-versions = "*"
-files = [
-    {file = "GPUtil-1.4.0.tar.gz", hash = "sha256:099e52c65e512cdfa8c8763fca67f5a5c2afb63469602d5dcb4d296b3661efb9"},
-]
-
 [[package]]
 name = "graphviz"
 version = "0.20.3"
@@ -2205,17 +2171,6 @@ files = [
     {file = "jupyterlab_widgets-3.0.13.tar.gz", hash = "sha256:a2966d385328c1942b683a8cd96b89b8dd82c8b8f81dda902bb2bc06d46f5bed"},
 ]
 
-[[package]]
-name = "keras"
-version = "2.15.0"
-description = "Deep learning for humans."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "keras-2.15.0-py3-none-any.whl", hash = "sha256:2dcc6d2e30cf9c951064b63c1f4c404b966c59caf09e01f3549138ec8ee0dd1f"},
-    {file = "keras-2.15.0.tar.gz", hash = "sha256:81871d298c064dc4ac6b58440fdae67bfcf47c8d7ad28580fab401834c06a575"},
-]
-
 [[package]]
 name = "kiwisolver"
 version = "1.4.7"
@@ -4728,21 +4683,6 @@ files = [
     {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
 ]
 
-[[package]]
-name = "skrebate"
-version = "0.62"
-description = "Relief-based feature selection algorithms"
-optional = false
-python-versions = "*"
-files = [
-    {file = "skrebate-0.62.tar.gz", hash = "sha256:b20dad4dc52f650e1f7960151314840f34251222cae0a78ac23d9f6d377ca558"},
-]
-
-[package.dependencies]
-numpy = "*"
-scikit-learn = "*"
-scipy = "*"
-
 [[package]]
 name = "slicer"
 version = "0.0.7"
@@ -5722,4 +5662,4 @@ pygraphviz = ["pygraphviz"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8,<3.13"
-content-hash = "14cd645747c244735975c83f5c157eaaefd16a4b1e85f37345508bce12ac09e4"
+content-hash = "9373b33f0a10245f90f07c356a7374c47f7b0748dd1831a12779b2c37191d08f"
diff --git a/pyproject.toml b/pyproject.toml
index 97746c0d4d..654f3d1617 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -107,7 +107,6 @@ pytest-split = ">=0.8.0"
 nbformat = ">=5.4.0"
 jupyter = ">=1.0.0"
 flaky = ">=3.7.0"
-keras = ">=2.9.0"
 xgboost = ">=2.0.2"
 mypy = ">=1.7.1"
 torch = [
@@ -140,7 +139,6 @@ pydata-sphinx-theme = ">=0.9.0"
 ipykernel = ">=6.15.1"
 sphinx-copybutton = "0.5.0"
 seaborn = ">=0.12.1"
-cdt =">=0.6.0"
 
 #
 # Versions defined for security reasons
diff --git a/tests/causal_estimators/test_econml_estimator.py b/tests/causal_estimators/test_econml_estimator.py
index 4de5b89b49..526ed039fc 100644
--- a/tests/causal_estimators/test_econml_estimator.py
+++ b/tests/causal_estimators/test_econml_estimator.py
@@ -144,7 +144,6 @@ def test_metalearners(self):
         assert pytest.approx(sl_cate_estimates_test_f[0], 0.01) != sl_cate_estimates_test_f[1]
 
     def test_iv_estimators(self):
-        keras = pytest.importorskip("keras")
         # Setup data
         data = datasets.linear_dataset(
             10,
@@ -168,37 +167,7 @@ def test_iv_estimators(self):
         # Test DeepIV
         dims_zx = len(model._instruments) + len(model._effect_modifiers)
         dims_tx = len(model._treatment) + len(model._effect_modifiers)
-        treatment_model = keras.Sequential(
-            [
-                keras.layers.Dense(128, activation="relu", input_shape=(dims_zx,)),  # sum of dims of Z and X
-                keras.layers.Dropout(0.17),
-                keras.layers.Dense(64, activation="relu"),
-                keras.layers.Dropout(0.17),
-                keras.layers.Dense(32, activation="relu"),
-                keras.layers.Dropout(0.17),
-            ]
-        )
-        response_model = keras.Sequential(
-            [
-                keras.layers.Dense(128, activation="relu", input_shape=(dims_tx,)),  # sum of dims of T and X
-                keras.layers.Dropout(0.17),
-                keras.layers.Dense(64, activation="relu"),
-                keras.layers.Dropout(0.17),
-                keras.layers.Dense(32, activation="relu"),
-                keras.layers.Dropout(0.17),
-                keras.layers.Dense(1),
-            ]
-        )
-        dmliv_estimate = model.estimate_effect(
-            identified_estimand,
-            method_name="iv.econml.iv.dml.DMLIV",
-            target_units=lambda df: df["X0"] > -1,
-            confidence_intervals=False,
-            method_params={
-                "init_params": {"discrete_treatment": False, "discrete_instrument": False},
-                "fit_params": {},
-            },
-        )
+
         # Test IntentToTreatDRIV
         data = datasets.linear_dataset(
             10,