Merge branch 'realese/v0.4.0'

maks-sh · Aug 16, 2021 · c9dd56a · c9dd56a · github-actions · Aug 16, 2021
2 parents e9de7ab + e93aeba
commit c9dd56a
Show file tree

Hide file tree

Showing 55 changed files with 21,142 additions and 481 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,2 @@
+[run]
+omit = sklift/tests/*,*__init__.py*
diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml
@@ -3,29 +3,67 @@ name: Python package
 on:
   push:
     branches: [ master ]
-  pull_request:
-
+  pull_request_target:
 
 jobs:
   test:
     name: Check tests
-    runs-on: ${{ matrix.operating-system }}
+    runs-on: ${{ matrix.os }}
+    env:
+      # fix the python version and the operating system for codecoverage commentator
+      USING_COVERAGE_PY: '3.8'
+      USING_COVERAGE_OS: 'ubuntu-latest'
+    outputs:
+      # fix the results of pytest for unix
+      output1: ${{ steps.pytest.outputs.exit_code }}
+
     strategy:
       matrix:
-        operating-system: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: [3.6, 3.7, 3.8, 3.9]
+        os: ['ubuntu-latest', 'windows-latest', 'macos-latest']
+        python-version: ['3.6', '3.7', '3.8', '3.9']
+      # GitHub does not cancel all in-progress jobs if any matrix job fails
       fail-fast: false
 
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies and lints
-      run: pip install pytest .[tests]
-    - name: Run PyTest
-      run: pytest
+      - uses: actions/checkout@v2
+      # Install python
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      # Update pip and install dependencies
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install . -r test_requirements.txt -r requirements.txt
+      # Pytest in windows
+      - name: Run PyTest windows
+        if: ${{ matrix.os == 'windows-latest' }}
+        run: |
+          pytest | tee pytest-coverage.txt
+      # Pytest in unix. Exit code of this run captures the exit status of tee and not of pytest
+      # So, use $PIPESTATUS that holds the exit status of each command in pipeline
+      - name: Run PyTest unix
+        if: ${{ matrix.os != 'windows-latest' }}
+        id: pytest
+        run: |
+          pytest | tee pytest-coverage.txt;
+          exit_code=${PIPESTATUS[0]};
+          echo "::set-output name=exit_code::$exit_code"
+      # Сomment on the results of the test coverage
+      - name: Comment coverage
+        if: contains(env.USING_COVERAGE_PY, matrix.python-version) && contains(env.USING_COVERAGE_OS, matrix.os)
+        uses: MishaKav/[email protected]
+        with:
+          pytest-coverage-path: ./pytest-coverage.txt
+          junitxml-path: ./pytest.xml
+      # For unix workflow should have failed if exit code of pytest were 1
+      - name: Check fail of pytest unix
+        if: ${{ matrix.os != 'windows-latest' && steps.pytest.outputs.exit_code == 1 }}
+        uses: actions/github-script@v3
+        with:
+          script: |
+              core.setFailed('Some tests failed!')
 
   check_sphinx_build:
     name: Check Sphinx build for docs
@@ -34,15 +72,15 @@ jobs:
       matrix:
         python-version: [3.8]
     steps:
-    - name: Checkout
-      uses: actions/checkout@v2
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Update pip
-      run: python -m pip install --upgrade pip
-    - name: Install dependencies
-      run: pip install -r docs/requirements.txt
-    - name: Run Sphinx
-      run: sphinx-build -b html docs /tmp/_docs_build
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Update pip and install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r docs/requirements.txt -r requirements.txt
+      - name: Run Sphinx
+        run: sphinx-build -W -b html docs /tmp/_docs_build
diff --git a/.gitignore b/.gitignore
@@ -62,6 +62,7 @@ coverage.xml
 *.cover
 .hypothesis/
 .pytest_cache/
+pytest.xml
 
 # Translations
 *.mo

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -13,7 +13,8 @@ sphinx:
 #  configuration: mkdocs.yml
 
 # Optionally build your docs in additional formats such as PDF and ePub
-formats: all
+formats:
+  - htmlzip
 
 # Optionally set the version of Python and requirements required to build your docs
 python:

diff --git a/Readme.rst b/Readme.rst
@@ -54,19 +54,22 @@ Articles in russian on habr.com: `Part 1 <https://habr.com/ru/company/ru_mts/blo
 `Part 2 <https://habr.com/ru/company/ru_mts/blog/485976/>`__
 and `Part 3 <https://habr.com/ru/company/ru_mts/blog/538934/>`__.
 
-**Features**:
+Why sklift
+-------------
+
+- Сomfortable and intuitive *scikit-learn*-like API;
 
-* Сomfortable and intuitive scikit-learn-like API;
+- More uplift metrics than you have ever seen in one place! Include brilliants like  *Area Under Uplift Curve* (AUUC) or *Area Under Qini Curve* (Qini coefficient) with ideal cases;
 
-* Applying any estimator compatible with scikit-learn (e.g. Xgboost, LightGBM, Catboost, etc.);
+- Supporting any estimator compatible with scikit-learn (e.g. Xgboost, LightGBM, Catboost, etc.);
 
-* All approaches can be used in sklearn.pipeline (see example (`EN <https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_EN.ipynb>`__ |Open In Colab3|_, `RU <https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/pipeline_usage_RU.ipynb>`__ |Open In Colab4|_));
+- All approaches can be used in the ``sklearn.pipeline``. See the example of usage on `the Tutorials page <https://www.uplift-modeling.com/en/latest/tutorials.html>`__;
 
-* Almost all implemented approaches solve classification and regression problem;
+- Also metrics are compatible with the classes from ``sklearn.model_selection``. See the example of usage on `the Tutorials page <https://www.uplift-modeling.com/en/latest/tutorials.html>`__;
 
-* More uplift metrics that you have ever seen in one place! Include brilliants like  *Area Under Uplift Curve* (AUUC) or *Area Under Qini Curve* (Qini coefficient) with ideal cases;
+- Almost all implemented approaches solve classification and regression problems;
 
-* Nice and useful viz for analyzing a performance model.
+- Nice and useful viz for analysing a performance model.
 
 Installation
 -------------
@@ -112,24 +115,25 @@ Use the intuitive python API to train uplift models with `sklift.models  <https:
 .. code-block:: python
 
     # import approaches
-    from sklift.models import SoloModel, ClassTransformation, TwoModels
+    from sklift.models import SoloModel, ClassTransformation
     # import any estimator adheres to scikit-learn conventions.
-    from catboost import CatBoostClassifier
-
+    from lightgbm import LGBMClassifier
 
     # define models
-    treatment_model = CatBoostClassifier(iterations=50, thread_count=3,
-                                         random_state=42, silent=True)
-    control_model = CatBoostClassifier(iterations=50, thread_count=3,
-                                       random_state=42, silent=True)
+    estimator = LGBMClassifier(n_estimators=10)
+
+    # define metamodel
+    slearner = SoloModel(estimator=estimator)
 
-    # define approach
-    tm = TwoModels(treatment_model, control_model, method='vanilla')
     # fit model
-    tm = tm.fit(X_train, y_train, treat_train)
+    slearner.fit(
+        X=X_tr,
+        y=y_tr,
+        treatment=trmnt_tr,
+    )
 
     # predict uplift
-    uplift_preds = tm.predict(X_val)
+    uplift_slearner = slearner.predict(X_val)
 
 **Evaluate your uplift model**
 
@@ -144,33 +148,48 @@ Uplift model evaluation metrics are available in `sklift.metrics  <https://www.u
 
 
     # Uplift@30%
-    tm_uplift_at_k = uplift_at_k(y_true=y_val, uplift=uplift_preds, treatment=treat_val,
-                                 strategy='overall', k=0.3)
+    uplift_at_k = uplift_at_k(y_true=y_val, uplift=uplift_slearner,
+                              treatment=trmnt_val,
+                              strategy='overall', k=0.3)
 
     # Area Under Qini Curve
-    tm_qini_auc = qini_auc_score(y_true=y_val, uplift=uplift_preds, treatment=treat_val)
+    qini_coef = qini_auc_score(y_true=y_val, uplift=uplift_slearner,
+                               treatment=trmnt_val)
 
     # Area Under Uplift Curve
-    tm_uplift_auc = uplift_auc_score(y_true=y_val, uplift=uplift_preds, treatment=treat_val)
+    uplift_auc = uplift_auc_score(y_true=y_val, uplift=uplift_slearner,
+                                  treatment=trmnt_val)
 
     # Weighted average uplift
-    tm_wau = weighted_average_uplift(y_true=y_val, uplift=uplift_preds,  treatment=treat_val)
+    wau = weighted_average_uplift(y_true=y_val, uplift=uplift_slearner,
+                                  treatment=trmnt_val)
 
 **Vizualize the results**
 
 Visualize performance metrics with `sklift.viz  <https://www.uplift-modeling.com/en/latest/api/viz/index.html>`__.
 
 .. code-block:: python
 
-    # import vizualisation tools
     from sklift.viz import plot_qini_curve
+    import matplotlib.pyplot as plt
+
+    fig, ax = plt.subplots(1, 1)
+    ax.set_title('Qini curves')
+
+    plot_qini_curve(
+        y_test, uplift_slearner, trmnt_test,
+        perfect=True, name='Slearner', ax=ax
+    );
 
-    plot_qini_curve(y_true=y_val, uplift=uplift_preds, treatment=treat_val, negative_effect=True)
+    plot_qini_curve(
+        y_test, uplift_revert, trmnt_test,
+        perfect=False, name='Revert label', ax=ax
+    );
 
-.. image:: docs/_static/images/Readme_qini_curve.png
+.. image:: docs/_static/images/quick_start_qini.png
     :width: 514px
     :height: 400px
-    :alt: Example of model's qini curve, perfect qini curve and random qini curve
+    :alt: Example of some models qini curves, perfect qini curve and random qini curve
 
 Development
 -----------

diff --git a/docs/404.rst b/docs/404.rst
@@ -0,0 +1,15 @@
+:orphan:
+
+*******************
+404 Page Not Found
+*******************
+
+.. image:: _static/images/sklift_404.png
+   :alt: 404 Page not found
+   :align: center
+   :width: 250 px
+   :height: 250 px
+
+Sorry, we couldn't find that page.
+
+Try using the search box or go to the `homepage <https://www.uplift-modeling.com/en/latest/index.html>`__.
diff --git a/docs/_static/images/Readme_qini_curve.png b/docs/_static/images/Readme_qini_curve.png
diff --git a/docs/_static/images/quick_start_qini.png b/docs/_static/images/quick_start_qini.png
diff --git a/docs/_static/images/quick_start_uplift.png b/docs/_static/images/quick_start_uplift.png
diff --git a/docs/_static/images/quick_start_wau.png b/docs/_static/images/quick_start_wau.png
diff --git a/docs/_static/images/sklift_404.png b/docs/_static/images/sklift_404.png
diff --git a/docs/api/datasets/fetch_megafon.rst b/docs/api/datasets/fetch_megafon.rst
@@ -0,0 +1,9 @@
+.. _Megafon:
+
+***************************************
+`sklift.datasets <./>`_.fetch_megafon
+***************************************
+
+.. autofunction:: sklift.datasets.datasets.fetch_megafon
+
+.. include:: ../../../sklift/datasets/descr/megafon.rst
diff --git a/docs/api/datasets/index.rst b/docs/api/datasets/index.rst
@@ -10,4 +10,5 @@
    ./fetch_lenta
    ./fetch_x5
    ./fetch_criteo
-   ./fetch_hillstrom
+   ./fetch_hillstrom
+   ./fetch_megafon
diff --git a/docs/api/metrics/average_squared_deviation.rst b/docs/api/metrics/average_squared_deviation.rst
@@ -0,0 +1,5 @@
+*************************************************
+`sklift.metrics <./>`_.average_squared_deviation
+*************************************************
+
+.. autofunction:: sklift.metrics.metrics.average_squared_deviation
diff --git a/docs/api/metrics/index.rst b/docs/api/metrics/index.rst
@@ -15,4 +15,6 @@
    ./weighted_average_uplift
    ./uplift_by_percentile
    ./response_rate_by_percentile
-   ./treatment_balance_curve
+   ./treatment_balance_curve
+   ./average_squared_deviation
+   ./make_uplift_scorer
diff --git a/docs/api/metrics/make_uplift_scorer.rst b/docs/api/metrics/make_uplift_scorer.rst
@@ -0,0 +1,5 @@
+**********************************************
+`sklift.metrics <./>`_.make_uplift_scorer
+**********************************************
+
+.. autofunction:: sklift.metrics.metrics.make_uplift_scorer
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -5,22 +5,46 @@
 * 🔥 something big that you couldn’t do before.
 * 💥 something that you couldn’t do before.
 * 📝 a miscellaneous minor improvement.
-* 🔨 something that previously didn’t work as documentated – or according to reasonable expectations – should now work.
+* 🔨 something that previously didn’t work as documented – or according to reasonable expectations – should now work.
 * ❗️ you will need to change your code to have the same effect in the future; or a feature will be removed in the future.
 
+## Version 0.4.0
+
+### [sklift.metrics](https://www.uplift-modeling.com/en/v0.4.0/api/index/metrics.html)
+
+* 🔥 Add [make_uplift_scorer](https://www.uplift-modeling.com/en/v0.4.0/api/metrics/make_uplift_scorer.html) function for interacting with the module ``sklearn.model_selection``  by [@wrapper228](https://github.com/wrapper228).
+* 🔥 Add new metric [average_squared_deviation](https://www.uplift-modeling.com/en/v0.4.0/api/metrics/average_squared_deviation.html) function by [@Mogby](https://github.com/Mogby).
+
+### [sklift.viz](https://www.uplift-modeling.com/en/v0.4.0/api/viz/index.html)
+
+* 🔥 Added the ability to draw multiple plot on the same graph of [plot_uplift_curve](https://www.uplift-modeling.com/en/v0.4.0/api/viz/plot_uplift_curve.html) function and [plot_qini_curve](https://www.uplift-modeling.com/en/v0.4.0/api/viz/plot_qini_curve.html) function by [@flashlight101](https://github.com/flashlight101).
+
+### [sklift.datasets](https://www.uplift-modeling.com/en/v0.4.0/api/datasets/index.html)
+
+* 💥 Add new dataset [fetch_megafon](https://www.uplift-modeling.com/en/v0.4.0/api/datasets/fetch_megafon.html) function by [@ezhdi](https://github.com/ezhdi).
+* 📝 Improve documentation of [sklift.datasets](https://www.uplift-modeling.com/en/v0.4.0/api/datasets/index.html) by [@flashlight101](https://github.com/flashlight101) and [@ezhdi](https://github.com/ezhdi).
+
+
+### Miscellaneous
+
+* 💥 Add new tutorial [Example of usage model from sklift.models in sklearn.model_selection](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/uplift_model_selection_tutorial.ipynb) by [@wrapper228](https://github.com/wrapper228).
+* 💥 Increased test coverage from 30% to 82% by [@flashlight101](https://github.com/flashlight101) and [@Ksyula](https://github.com/Ksyula)
+* 📝 Add EDA of available datasets on [Tutorials](https://www.uplift-modeling.com/en/v0.4.0/tutorials.html) page by [@lyutov89](https://github.com/lyutov89), [@ezhdi](https://github.com/ezhdi), [@patpanda94](https://github.com/patpanda94) and [@Ksyula](https://github.com/Ksyula).
+* 📝 Imporve ["RetailHero tutorial"](https://nbviewer.jupyter.org/github/maks-sh/scikit-uplift/blob/master/notebooks/RetailHero_EN.ipynb) by [@Ksyula](https://github.com/Ksyula).
+
 ## Version 0.3.2
 
-### [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.1/api/datasets/index.html)
+### [sklift.datasets](https://www.uplift-modeling.com/en/v0.3.2/api/datasets/index.html)
 
-* 🔨 Fix bug in [fetch_x5](https://www.uplift-modeling.com/en/v0.3.1/api/datasets/fetch_x5.html) function by [@Muhamob](https://github.com/Muhamob).
+* 🔨 Fix bug in [fetch_x5](https://www.uplift-modeling.com/en/v0.3.2/api/datasets/fetch_x5.html) function by [@Muhamob](https://github.com/Muhamob).
 
-### [sklift.metrics](https://www.uplift-modeling.com/en/v0.3.1/api/index/metrics.html)
+### [sklift.metrics](https://www.uplift-modeling.com/en/v0.3.2/api/index/metrics.html)
 
-* 📝 Fix docstring in [uplift_by_percentile](https://www.uplift-modeling.com/en/v0.3.1/api/metrics/uplift_by_percentile.html) function by [@ElisovaIra](https://github.com/ElisovaIra).
+* 📝 Fix docstring in [uplift_by_percentile](https://www.uplift-modeling.com/en/v0.3.2/api/metrics/uplift_by_percentile.html) function by [@ElisovaIra](https://github.com/ElisovaIra).
 
-### [sklift.viz](https://www.uplift-modeling.com/en/v0.3.1/api/viz/index.html)
+### [sklift.viz](https://www.uplift-modeling.com/en/v0.3.2/api/viz/index.html)
 
-* 🔨 Fix bug in [plot_uplift_preds](https://www.uplift-modeling.com/en/v0.3.1/api/viz/plot_uplift_preds.html) function by [@bwbelljr](https://github.com/bwbelljr).
+* 🔨 Fix bug in [plot_uplift_preds](https://www.uplift-modeling.com/en/v0.3.2/api/viz/plot_uplift_preds.html) function by [@bwbelljr](https://github.com/bwbelljr).
 
 ### Miscellaneous
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -50,12 +50,13 @@ def get_version():
     "sphinx.ext.viewcode",
     "sphinx.ext.mathjax",
     "sphinx.ext.napoleon",
-    "recommonmark",
+    "myst_parser",
     "sphinx.ext.intersphinx",
     "sphinxcontrib.bibtex"
 ]
 
 bibtex_bibfiles = ['refs.bib']
+bibtex_reference_style = 'author_year'
 
 master_doc = 'index'
 
@@ -88,11 +89,8 @@ def get_version():
 # Removing the view source link
 html_show_sourcelink = False
 
-# Add supporting *.md files by recommonmark extension
-source_suffix = ['.rst', '.md']
-
 html_theme_options = {
     'navigation_depth': 3,
 }
 
-trim_footnote_reference_space = True
+trim_footnote_reference_space = True
File	Stmts	Miss	Cover	Missing
sklift/datasets
datasets.py	147	36	76%	55, 80–85, 95, 109, 265–304, 384, 391, 397, 497
sklift/metrics
metrics.py	264	53	80%	48–68, 307, 363, 420, 434, 439, 442–449, 453–454, 461, 466, 519, 523, 527, 530, 590, 594, 598, 660, 664, 668, 672, 676, 679, 797–828
sklift/models
models.py	162	13	92%	72, 99, 107, 120, 148, 168, 248, 352, 356, 414, 434, 464, 482
sklift/utils
utils.py	4	0	100%
sklift/viz
base.py	182	32	82%	34, 107–108, 133–135, 321, 325, 329, 333, 337, 372, 380, 409–412, 438–463
TOTAL	759	134	82%