From b4ccd4660296b730c8c04a3f5768411acce193a6 Mon Sep 17 00:00:00 2001 From: Patrick Bloebaum Date: Thu, 3 Oct 2024 23:32:08 -0700 Subject: [PATCH] Remove some dependencies Signed-off-by: Patrick Bloebaum --- .github/workflows/docs-ci.yml | 16 +++-- .github/workflows/docs.yml | 16 +++-- docs/source/dowhy.graph_learners.rst | 8 --- .../learning_causal_structure.rst | 11 +--- poetry.lock | 62 +------------------ pyproject.toml | 2 - .../test_econml_estimator.py | 33 +--------- 7 files changed, 17 insertions(+), 131 deletions(-) diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index 3134af6f98..99488854fc 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -26,16 +26,14 @@ jobs: version: "14.0" directory: ${{ runner.temp }}/llvm - - name: Some cleanup for more disk space - run: | - rm -rf /usr/share/dotnet - rm -rf "$AGENT_TOOLSDIRECTORY" - apt-get autoremove -y - apt-get clean - df -h - - name: Install Python Dependencies - run: poetry install -E plotting -E pydot -E pygraphviz -E econml --with docs + run: poetry install + + - name: Install Python Dependencies 2 + run: poetry install -E plotting -E pydot -E pygraphviz + + - name: Install Python Dependencies 2 + run: poetry install -E econml --with docs - run: git config --global --add safe.directory /__w/dowhy/dowhy diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 355ec52a78..9ab472bcf3 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -30,16 +30,14 @@ jobs: version: "14.0" directory: ${{ runner.temp }}/llvm - - name: Some cleanup for more disk space - run: | - rm -rf /usr/share/dotnet - rm -rf "$AGENT_TOOLSDIRECTORY" - apt-get autoremove -y - apt-get clean - df -h - - name: Install Python Dependencies - run: poetry install -E plotting -E pydot -E pygraphviz -E econml --with docs + run: poetry install + + - name: Install Python Dependencies 2 + run: poetry install -E plotting -E pydot -E pygraphviz + + - name: Install Python Dependencies 2 + run: poetry install -E econml --with docs - run: git config --global --add safe.directory /__w/dowhy/dowhy diff --git a/docs/source/dowhy.graph_learners.rst b/docs/source/dowhy.graph_learners.rst index e919b8e409..c7a1ed240a 100644 --- a/docs/source/dowhy.graph_learners.rst +++ b/docs/source/dowhy.graph_learners.rst @@ -4,14 +4,6 @@ dowhy.graph\_learners package Submodules ---------- -dowhy.graph\_learners.cdt module --------------------------------- - -.. automodule:: dowhy.graph_learners.cdt - :members: - :undoc-members: - :show-inheritance: - dowhy.graph\_learners.ges module -------------------------------- diff --git a/docs/source/user_guide/modeling_causal_relations/learning_causal_structure.rst b/docs/source/user_guide/modeling_causal_relations/learning_causal_structure.rst index 06fc81f95f..abd19f8f07 100644 --- a/docs/source/user_guide/modeling_causal_relations/learning_causal_structure.rst +++ b/docs/source/user_guide/modeling_causal_relations/learning_causal_structure.rst @@ -5,16 +5,7 @@ Learning the causal graph is only necessary in case we cannot construct the grap In many cases, the true causal structure for a given dataset may not be known. We can utilize *graph discovery* algorithms to provide candidate causal graphs consistent with the dataset. Such graphs can then be inspected, edited and modified by a user to match their domain expertise or world knowledge. Note that these graphs should be not used directly because graph discovery from observational data is a provably impossible problem in the fully non-parametric setting. Given a dataset, there exist multiple graphs that would lead to the exact same joint distribution and thus are indistinguishable based on the dataset (such graphs constitute the *Markov equivalence class*). As a result, graph discovery algorithms make certain assumptions to learn a graph and do not guarantee the validity of a learned graph. -DoWhy does not implement graph discovery algorithms, but provides a simple way to input the learnt graph from a discovery algorithm. The only constraint is that DoWhy expects the algorithm to output a directed acyclic graph (DAG). In the future, we expect to support learning causal graphs directly through integration with the `causal-learn `_ and `dodiscover `_ packages in PyWhy. - -Graph discovery using CDT -------------------------- -Given a dataset as a pandas DataFrame, the following snippet learns the graph using LiNGAM algorithm and loads it in DoWhy. The algorithm implementation is in the Causal Discovery Toolbox (CDT) package which needs to be installed separately. - ->>> from cdt.causality.graph import LiNGAM ->>> causal_graph = LiNGAM().predict(dataset) - -For a full example using CDT, you can refer to the :doc:`../../example_notebooks/dowhy_causal_discovery_example`. +DoWhy does not implement graph discovery algorithms, but provides a simple way to input the learnt graph from a discovery algorithm. The only constraint is that DoWhy expects a directed acyclic graph (DAG) in the form of a networkx graph. For packages that implement different causal discovery algorithms, see the `causal-learn `_ and `dodiscover `_ packages in PyWhy. Graph discovery using dodiscover -------------------------------- diff --git a/poetry.lock b/poetry.lock index f2e2f7e2fa..874bc63e1e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -471,30 +471,6 @@ scipy = "*" statsmodels = "*" tqdm = "*" -[[package]] -name = "cdt" -version = "0.6.0" -description = "A Toolbox for causal graph inference" -optional = false -python-versions = "*" -files = [ - {file = "cdt-0.6.0-py3-none-any.whl", hash = "sha256:a90401b688597c3ccf23cc7b10c62e19df69dc9277095bab40a1343c6e89996b"}, - {file = "cdt-0.6.0.tar.gz", hash = "sha256:cecfb6390440311d0cb7e5446e97f9eb3e3b7d90d9b10d3b4cbbabc2d4c23dba"}, -] - -[package.dependencies] -GPUtil = "*" -joblib = "*" -networkx = "*" -numpy = "*" -pandas = "*" -requests = "*" -scikit-learn = "*" -scipy = "*" -skrebate = "*" -statsmodels = "*" -tqdm = "*" - [[package]] name = "certifi" version = "2024.8.30" @@ -1540,16 +1516,6 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe, test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] tqdm = ["tqdm"] -[[package]] -name = "gputil" -version = "1.4.0" -description = "GPUtil is a Python module for getting the GPU status from NVIDA GPUs using nvidia-smi." -optional = false -python-versions = "*" -files = [ - {file = "GPUtil-1.4.0.tar.gz", hash = "sha256:099e52c65e512cdfa8c8763fca67f5a5c2afb63469602d5dcb4d296b3661efb9"}, -] - [[package]] name = "graphviz" version = "0.20.3" @@ -2205,17 +2171,6 @@ files = [ {file = "jupyterlab_widgets-3.0.13.tar.gz", hash = "sha256:a2966d385328c1942b683a8cd96b89b8dd82c8b8f81dda902bb2bc06d46f5bed"}, ] -[[package]] -name = "keras" -version = "2.15.0" -description = "Deep learning for humans." -optional = false -python-versions = ">=3.8" -files = [ - {file = "keras-2.15.0-py3-none-any.whl", hash = "sha256:2dcc6d2e30cf9c951064b63c1f4c404b966c59caf09e01f3549138ec8ee0dd1f"}, - {file = "keras-2.15.0.tar.gz", hash = "sha256:81871d298c064dc4ac6b58440fdae67bfcf47c8d7ad28580fab401834c06a575"}, -] - [[package]] name = "kiwisolver" version = "1.4.7" @@ -4728,21 +4683,6 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] -[[package]] -name = "skrebate" -version = "0.62" -description = "Relief-based feature selection algorithms" -optional = false -python-versions = "*" -files = [ - {file = "skrebate-0.62.tar.gz", hash = "sha256:b20dad4dc52f650e1f7960151314840f34251222cae0a78ac23d9f6d377ca558"}, -] - -[package.dependencies] -numpy = "*" -scikit-learn = "*" -scipy = "*" - [[package]] name = "slicer" version = "0.0.7" @@ -5722,4 +5662,4 @@ pygraphviz = ["pygraphviz"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.13" -content-hash = "14cd645747c244735975c83f5c157eaaefd16a4b1e85f37345508bce12ac09e4" +content-hash = "9373b33f0a10245f90f07c356a7374c47f7b0748dd1831a12779b2c37191d08f" diff --git a/pyproject.toml b/pyproject.toml index 97746c0d4d..654f3d1617 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,7 +107,6 @@ pytest-split = ">=0.8.0" nbformat = ">=5.4.0" jupyter = ">=1.0.0" flaky = ">=3.7.0" -keras = ">=2.9.0" xgboost = ">=2.0.2" mypy = ">=1.7.1" torch = [ @@ -140,7 +139,6 @@ pydata-sphinx-theme = ">=0.9.0" ipykernel = ">=6.15.1" sphinx-copybutton = "0.5.0" seaborn = ">=0.12.1" -cdt =">=0.6.0" # # Versions defined for security reasons diff --git a/tests/causal_estimators/test_econml_estimator.py b/tests/causal_estimators/test_econml_estimator.py index 4de5b89b49..526ed039fc 100644 --- a/tests/causal_estimators/test_econml_estimator.py +++ b/tests/causal_estimators/test_econml_estimator.py @@ -144,7 +144,6 @@ def test_metalearners(self): assert pytest.approx(sl_cate_estimates_test_f[0], 0.01) != sl_cate_estimates_test_f[1] def test_iv_estimators(self): - keras = pytest.importorskip("keras") # Setup data data = datasets.linear_dataset( 10, @@ -168,37 +167,7 @@ def test_iv_estimators(self): # Test DeepIV dims_zx = len(model._instruments) + len(model._effect_modifiers) dims_tx = len(model._treatment) + len(model._effect_modifiers) - treatment_model = keras.Sequential( - [ - keras.layers.Dense(128, activation="relu", input_shape=(dims_zx,)), # sum of dims of Z and X - keras.layers.Dropout(0.17), - keras.layers.Dense(64, activation="relu"), - keras.layers.Dropout(0.17), - keras.layers.Dense(32, activation="relu"), - keras.layers.Dropout(0.17), - ] - ) - response_model = keras.Sequential( - [ - keras.layers.Dense(128, activation="relu", input_shape=(dims_tx,)), # sum of dims of T and X - keras.layers.Dropout(0.17), - keras.layers.Dense(64, activation="relu"), - keras.layers.Dropout(0.17), - keras.layers.Dense(32, activation="relu"), - keras.layers.Dropout(0.17), - keras.layers.Dense(1), - ] - ) - dmliv_estimate = model.estimate_effect( - identified_estimand, - method_name="iv.econml.iv.dml.DMLIV", - target_units=lambda df: df["X0"] > -1, - confidence_intervals=False, - method_params={ - "init_params": {"discrete_treatment": False, "discrete_instrument": False}, - "fit_params": {}, - }, - ) + # Test IntentToTreatDRIV data = datasets.linear_dataset( 10,