lvgig · adamsardar · Dec 29, 2023 · Oct 5, 2021 · Nov 3, 2021 · Nov 9, 2021
diff --git a/.ruff.toml b/.ruff.toml
@@ -3,11 +3,12 @@
 # McCabe complexity (`C901`) by default.
 select = ["E", "F", "W", "I", "UP", "ASYNC", "YTT", "A", "COM", "C4", "T10", "EM", 
 "FA", "ISC", "PIE", "PYI", "Q", "RSE", "RET", "SLOT", "SIM", "TID", "TCH", "INT", 
-"PD", "PGH", "PLC", "PLE", "PLW", "FLY", "NPY", "PERF", "B", "DTZ"]
+"PD", "PGH", "PLC", "PLE", "PLW", "FLY", "NPY", "PERF", "B", "DTZ", "ANN"]
+
 
 # ignore E501 - linelength limit (covered by black except in docstrings) 
 # and PD901 - use of df variable name
-ignore = ["E501", "PD901"]
+ignore = ["E501", "PD901", "ANN101"]
 
 # Allow autofix for all enabled rules (when `--fix`) is provided.
 fixable = ["ALL"]
@@ -33,4 +34,5 @@ target-version = "py38"
 
 # Ignore `E402` (import violations) in all `__init__.py` file.
 [per-file-ignores]
-"__init__.py" = ["E402", "F401"]
+"__init__.py" = ["E402", "F401"]
+"tests/*" = ["ANN"]
diff --git a/examples/Data-Science-Festival-Workshop/plotting.py b/examples/Data-Science-Festival-Workshop/plotting.py
@@ -1,16 +1,19 @@
-def one_way_summary_plot(df, column, response="y"):
+import pandas as pd
+
+
+def one_way_summary_plot(df: pd.DataFrame, column: str, response: str = "y") -> None:
     """Function to produce a rough one-way summary plot of a specific column.
 
     Specifically plot averge response (right y axis) and number of records (left
     y axis) by the selected column (x axis).
 
     """
-    agg = df.groupby(column).agg({column: ["count"], "y": ["mean"]})
+    agg = df.groupby(column).agg({column: ["count"], response: ["mean"]})
 
     ax = agg.plot.bar(y=(column, "count"), ylabel="count", figsize=(8, 5))
 
     agg.plot(
-        y=("y", "mean"),
+        y=(response, "mean"),
         style=":",
         marker=".",
         c="k",

diff --git a/setup.py b/setup.py
@@ -19,7 +19,7 @@
         raise RuntimeError(msg)
 
 
-def list_reqs(fname="requirements.txt"):
+def list_reqs(fname: str = "requirements.txt") -> list:
     with open(fname) as fd:
         return fd.read().splitlines()
 

diff --git a/tests/dates/test_DateDiffLeapYearTransformer.py b/tests/dates/test_DateDiffLeapYearTransformer.py
@@ -167,6 +167,25 @@ def test_inputs_set_to_attribute(self):
             msg="Attributes for DateDiffLeapYearTransformer set in init",
         )
 
+    def test_inputs_set_to_attribute_name_not_set(self):
+        """Test that the value passed for new_column_new_column_name and units are saved in attributes of the same new_column_name."""
+        x = DateDiffLeapYearTransformer(
+            column_lower="dummy_1",
+            column_upper="dummy_2",
+            drop_cols=True,
+        )
+
+        ta.classes.test_object_attributes(
+            obj=x,
+            expected_attributes={
+                "column_lower": "dummy_1",
+                "column_upper": "dummy_2",
+                "columns": ["dummy_1", "dummy_2"],
+                "new_column_name": "dummy_2_dummy_1_datediff",
+            },
+            msg="Attributes for DateDifferenceTransformer set in init",
+        )
+
 
 class TestTransform:
     """Tests for DateDiffLeapYearTransformer.transform()."""

diff --git a/tests/numeric/test_CutTransformer.py b/tests/numeric/test_CutTransformer.py
@@ -135,7 +135,7 @@ def test_pd_cut_call(self, mocker):
 
         expected_call_args = {
             0: {
-                "args": (d.create_df_9()["a"],),
+                "args": (d.create_df_9()["a"].to_numpy(),),
                 "kwargs": {"bins": 3, "right": False, "precision": 2},
             },
         }

diff --git a/tubular/base.py b/tubular/base.py
@@ -2,6 +2,8 @@
 from. These transformers contain key checks to be applied in all cases.
 """
 
+from __future__ import annotations
+
 import warnings
 
 import pandas as pd
@@ -46,11 +48,16 @@ class BaseTransformer(TransformerMixin, BaseEstimator):
 
     """
 
-    def classname(self):
+    def classname(self) -> str:
         """Method that returns the name of the current class when called."""
         return type(self).__name__
 
-    def __init__(self, columns=None, copy=True, verbose=False) -> None:
+    def __init__(
+        self,
+        columns: list[str] | str = None,
+        copy: bool = True,
+        verbose: bool = False,
+    ) -> None:
         self.version_ = __version__
 
         if not isinstance(verbose, bool):
@@ -92,7 +99,7 @@ def __init__(self, columns=None, copy=True, verbose=False) -> None:
 
         self.copy = copy
 
-    def fit(self, X, y=None):
+    def fit(self, X: pd.DataFrame, y: pd.Series | None = None) -> BaseTransformer:
         """Base transformer fit method, checks X and y types. Currently only pandas DataFrames are allowed for X
         and DataFrames or Series for y.
 
@@ -130,7 +137,7 @@ def fit(self, X, y=None):
 
         return self
 
-    def _combine_X_y(self, X, y):
+    def _combine_X_y(self, X: pd.DataFrame, y: pd.Series) -> pd.DataFrame:
         """Combine X and y by adding a new column with the values of y to a copy of X.
 
         The new column response column will be called `_temporary_response`.
@@ -171,7 +178,7 @@ def _combine_X_y(self, X, y):
 
         return X_y
 
-    def transform(self, X):
+    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """Base transformer transform method; checks X type (pandas DataFrame only) and copies data if requested.
 
         Transform calls the columns_check method which will check columns in columns attribute are in X.
@@ -201,7 +208,7 @@ def transform(self, X):
 
         return X
 
-    def check_is_fitted(self, attribute):
+    def check_is_fitted(self, attribute: str) -> None:
         """Check if particular attributes are on the object. This is useful to do before running transform to avoid
         trying to transform data without first running the fit method.
 
@@ -215,7 +222,7 @@ def check_is_fitted(self, attribute):
         """
         check_is_fitted(self, attribute)
 
-    def columns_check(self, X):
+    def columns_check(self, X: pd.DataFrame) -> None:
         """Method to check that the columns attribute is set and all values are present in X.
 
         Parameters
@@ -240,7 +247,7 @@ def columns_check(self, X):
             if c not in X.columns.to_numpy():
                 raise ValueError(f"{self.classname()}: variable " + c + " is not in X")
 
-    def columns_set_or_check(self, X):
+    def columns_set_or_check(self, X: pd.DataFrame) -> None:
         """Function to check or set columns attribute.
 
         If the columns attribute is None then set it to all columns in X. Otherwise run the columns_check method.
@@ -262,7 +269,7 @@ def columns_set_or_check(self, X):
             self.columns_check(X)
 
     @staticmethod
-    def check_weights_column(X, weights_column):
+    def check_weights_column(X: pd.DataFrame, weights_column: str) -> None:
         """Helper method for validating weights column in dataframe.
 
         Args:
@@ -345,12 +352,12 @@ class DataFrameMethodTransformer(BaseTransformer):
 
     def __init__(
         self,
-        new_column_name,
-        pd_method_name,
-        columns,
-        pd_method_kwargs=None,
-        drop_original=False,
-        **kwargs,
+        new_column_name: list[str] | str,
+        pd_method_name: str,
+        columns: list[str] | str | None,
+        pd_method_kwargs: dict[str, object] | None = None,
+        drop_original: bool = False,
+        **kwargs: dict[str, bool],
     ) -> None:
         super().__init__(columns=columns, **kwargs)
 
@@ -397,7 +404,7 @@ def __init__(
             msg = f'{self.classname()}: error accessing "{pd_method_name}" method on pd.DataFrame object - pd_method_name should be a pd.DataFrame method'
             raise AttributeError(msg) from err
 
-    def transform(self, X):
+    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """Transform input pandas DataFrame (X) using the given pandas.DataFrame method and assign the output
         back to column or columns in X.
 

diff --git a/tubular/capping.py b/tubular/capping.py
@@ -1,5 +1,7 @@
 """This module contains a transformer that applies capping to numeric columns."""
 
+from __future__ import annotations
+
 import copy
 import warnings
 
@@ -61,10 +63,10 @@ class CappingTransformer(BaseTransformer):
 
     def __init__(
         self,
-        capping_values=None,
-        quantiles=None,
-        weights_column=None,
-        **kwargs,
+        capping_values: dict[str, list[int | float | None]] | None = None,
+        quantiles: dict[str, list[int | float]] | None = None,
+        weights_column: str | None = None,
+        **kwargs: dict[str, bool],
     ) -> None:
         if capping_values is None and quantiles is None:
             msg = f"{self.classname()}: both capping_values and quantiles are None, either supply capping values in the capping_values argument or supply quantiles that can be learnt in the fit method"
@@ -100,7 +102,11 @@ def __init__(
         self.weights_column = weights_column
         self._replacement_values = copy.deepcopy(self.capping_values)
 
-    def check_capping_values_dict(self, capping_values_dict, dict_name):
+    def check_capping_values_dict(
+        self,
+        capping_values_dict: dict[str, list[int | float | None]],
+        dict_name: str,
+    ) -> None:
         """Performs checks on a dictionary passed to ."""
         if type(capping_values_dict) is not dict:
             msg = f"{self.classname()}: {dict_name} should be dict of columns and capping values"
@@ -139,7 +145,7 @@ def check_capping_values_dict(self, capping_values_dict, dict_name):
                 msg = f"{self.classname()}: both values are None for key {k}"
                 raise ValueError(msg)
 
-    def fit(self, X, y=None):
+    def fit(self, X: pd.DataFrame, y: None = None) -> CappingTransformer:
         """Learn capping values from input data X.
 
         Calculates the quantiles to cap at given the quantiles dictionary supplied
@@ -185,7 +191,12 @@ def fit(self, X, y=None):
 
         return self
 
-    def prepare_quantiles(self, values, quantiles, sample_weight=None):
+    def prepare_quantiles(
+        self,
+        values: pd.Series | np.array,
+        quantiles: list[float],
+        sample_weight: pd.Series | np.array | None = None,
+    ) -> list[int | float]:
         """Method to call the weighted_quantile method and prepare the outputs.
 
         If there are no None values in the supplied quantiles then the outputs from weighted_quantile
@@ -230,7 +241,12 @@ def prepare_quantiles(self, values, quantiles, sample_weight=None):
 
         return results
 
-    def weighted_quantile(self, values, quantiles, sample_weight=None):
+    def weighted_quantile(
+        self,
+        values: pd.Series | np.array,
+        quantiles: list[float],
+        sample_weight: pd.Series | np.array | None = None,
+    ) -> list[int | float]:
         """Method to calculate weighted quantiles.
 
         This method is adapted from the "Completely vectorized numpy solution" answer from user
@@ -328,7 +344,7 @@ def weighted_quantile(self, values, quantiles, sample_weight=None):
 
         return list(np.interp(quantiles, weighted_quantiles, values))
 
-    def transform(self, X):
+    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """Apply capping to columns in X.
 
         If cap_value_max is set, any values above cap_value_max will be set to cap_value_max. If cap_value_min
@@ -440,10 +456,10 @@ class OutOfRangeNullTransformer(CappingTransformer):
 
     def __init__(
         self,
-        capping_values=None,
-        quantiles=None,
-        weights_column=None,
-        **kwargs,
+        capping_values: dict[str, list[int | float | None]] | None = None,
+        quantiles: dict[str, list[int | float]] | None = None,
+        weights_column: str | None = None,
+        **kwargs: dict[str, bool],
     ) -> None:
         super().__init__(
             capping_values=capping_values,
@@ -454,7 +470,7 @@ def __init__(
 
         self.set_replacement_values()
 
-    def set_replacement_values(self):
+    def set_replacement_values(self) -> None:
         """Method to set the _replacement_values to have all null values.
 
         Keeps the existing keys in the _replacement_values dict and sets all values (except None) in the lists to np.NaN. Any None
@@ -468,7 +484,7 @@ def set_replacement_values(self):
 
             self._replacement_values[k] = null_replacements_list
 
-    def fit(self, X, y=None):
+    def fit(self, X: pd.DataFrame, y: None = None) -> OutOfRangeNullTransformer:
         """Learn capping values from input data X.
 
         Calculates the quantiles to cap at given the quantiles dictionary supplied

diff --git a/tubular/comparison.py b/tubular/comparison.py
@@ -1,4 +1,6 @@
-import pandas as pd
+from __future__ import annotations
+
+import pandas as pd  # noqa: TCH002
 
 from tubular.base import BaseTransformer
 
@@ -27,7 +29,7 @@ def __init__(
         columns: list,
         new_col_name: str,
         drop_original: bool = False,
-        **kwargs,
+        **kwargs: dict[str, bool],
     ) -> None:
         super().__init__(columns=columns, **kwargs)