FIX-modin-project#7238: Fix docstring inheritance for 'cached_propert…

…y' and use it Signed-off-by: Anatoly Myachev <[email protected]>
anmyachev · May 8, 2024 · 4562e96 · 4562e96
1 parent bb43cd5
commit 4562e96
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 50 deletions.
diff --git a/modin/core/io/column_stores/parquet_dispatcher.py b/modin/core/io/column_stores/parquet_dispatcher.py
@@ -15,6 +15,7 @@
 
 from __future__ import annotations
 
+import functools
 import json
 import os
 import re
@@ -60,11 +61,6 @@ class ColumnStoreDataset:
     dataset : ParquetDataset or ParquetFile
         Underlying dataset implementation for PyArrow and fastparquet
         respectively.
-    _row_groups_per_file : list
-        List that contains the number of row groups for each file in the
-        given parquet dataset.
-    _files : list
-        List that contains the full paths of the parquet files in the dataset.
     """
 
     def __init__(self, path, storage_options):  # noqa : PR01
@@ -73,8 +69,6 @@ def __init__(self, path, storage_options):  # noqa : PR01
         self._fs_path = None
         self._fs = None
         self.dataset = self._init_dataset()
-        self._row_groups_per_file = None
-        self._files = None
 
     @property
     def pandas_metadata(self):
@@ -91,14 +85,12 @@ def engine(self):
         """Return string representing what engine is being used."""
         raise NotImplementedError
 
-    # TODO: make this cached_property after docstring inheritance is fixed.
-    @property
+    @functools.cached_property
     def files(self):
         """Return the list of formatted file paths of the dataset."""
         raise NotImplementedError
 
-    # TODO: make this cached_property after docstring inheritance is fixed.
-    @property
+    @functools.cached_property
     def row_groups_per_file(self):
         """Return a list with the number of row groups per file."""
         raise NotImplementedError
@@ -203,31 +195,27 @@ def columns(self):
     def engine(self):
         return "pyarrow"
 
-    @property
+    @functools.cached_property
     def row_groups_per_file(self):
         from pyarrow.parquet import ParquetFile
 
-        if self._row_groups_per_file is None:
-            row_groups_per_file = []
-            # Count up the total number of row groups across all files and
-            # keep track of row groups per file to use later.
-            for file in self.files:
-                with self.fs.open(file) as f:
-                    row_groups = ParquetFile(f).num_row_groups
-                    row_groups_per_file.append(row_groups)
-            self._row_groups_per_file = row_groups_per_file
-        return self._row_groups_per_file
+        row_groups_per_file = []
+        # Count up the total number of row groups across all files and
+        # keep track of row groups per file to use later.
+        for file in self.files:
+            with self.fs.open(file) as f:
+                row_groups = ParquetFile(f).num_row_groups
+                row_groups_per_file.append(row_groups)
+        return row_groups_per_file
 
-    @property
+    @functools.cached_property
     def files(self):
-        if self._files is None:
-            try:
-                files = self.dataset.files
-            except AttributeError:
-                # compatibility at least with 3.0.0 <= pyarrow < 8.0.0
-                files = self.dataset._dataset.files
-            self._files = self._get_files(files)
-        return self._files
+        try:
+            files = self.dataset.files
+        except AttributeError:
+            # compatibility at least with 3.0.0 <= pyarrow < 8.0.0
+            files = self.dataset._dataset.files
+        return self._get_files(files)
 
     def to_pandas_dataframe(
         self,
@@ -261,26 +249,22 @@ def columns(self):
     def engine(self):
         return "fastparquet"
 
-    @property
+    @functools.cached_property
     def row_groups_per_file(self):
         from fastparquet import ParquetFile
 
-        if self._row_groups_per_file is None:
-            row_groups_per_file = []
-            # Count up the total number of row groups across all files and
-            # keep track of row groups per file to use later.
-            for file in self.files:
-                with self.fs.open(file) as f:
-                    row_groups = ParquetFile(f).info["row_groups"]
-                    row_groups_per_file.append(row_groups)
-            self._row_groups_per_file = row_groups_per_file
-        return self._row_groups_per_file
+        row_groups_per_file = []
+        # Count up the total number of row groups across all files and
+        # keep track of row groups per file to use later.
+        for file in self.files:
+            with self.fs.open(file) as f:
+                row_groups = ParquetFile(f).info["row_groups"]
+                row_groups_per_file.append(row_groups)
+        return row_groups_per_file
 
-    @property
+    @functools.cached_property
     def files(self):
-        if self._files is None:
-            self._files = self._get_files(self._get_fastparquet_files())
-        return self._files
+        return self._get_files(self._get_fastparquet_files())
 
     def to_pandas_dataframe(self, columns):
         return self.dataset.to_pandas(columns=columns)

diff --git a/modin/utils.py b/modin/utils.py
@@ -296,7 +296,7 @@ def _replace_doc(
     parent_cls : class, optional
         If `target_obj` is an attribute of a class, `parent_cls` should be that class.
         This is used for generating the API URL as well as for handling special cases
-        like `target_obj` being a property.
+        like `target_obj` being a property or a cached_property.
     attr_name : str, optional
         Gives the name to `target_obj` if it's an attribute of `parent_cls`.
         Needed to handle some special cases and in most cases could be determined automatically.
@@ -314,6 +314,8 @@ def _replace_doc(
     if parent_cls and not attr_name:
         if isinstance(target_obj, property):
             attr_name = target_obj.fget.__name__  # type: ignore[union-attr]
+        elif isinstance(target_obj, functools.cached_property):
+            attr_name = target_obj.func.__name__
         elif isinstance(target_obj, (staticmethod, classmethod)):
             attr_name = target_obj.__func__.__name__
         else:
@@ -355,6 +357,16 @@ def _replace_doc(
             attr_name,
             property(target_obj.fget, target_obj.fset, target_obj.fdel, doc),
         )
+    elif parent_cls and isinstance(target_obj, functools.cached_property):
+        if overwrite:
+            target_obj.func.__doc_inherited__ = True  # type: ignore[attr-defined]
+        assert attr_name is not None
+        target_obj.func.__doc__ = doc
+        setattr(
+            parent_cls,
+            attr_name,
+            functools.cached_property(target_obj.func),
+        )
     else:
         if overwrite:
             target_obj.__doc_inherited__ = True  # type: ignore[attr-defined]
@@ -421,6 +433,7 @@ def _documentable_obj(obj: object) -> bool:
         return bool(
             callable(obj)
             or (isinstance(obj, property) and obj.fget)
+            or (isinstance(obj, functools.cached_property))
             or (isinstance(obj, (staticmethod, classmethod)) and obj.__func__)
         )
 

diff --git a/scripts/doc_checker.py b/scripts/doc_checker.py
@@ -367,9 +367,16 @@ def validate_object(import_path: str) -> list:
 
     errors = []
     doc = construct_validator(import_path)
-    if getattr(doc.obj, "__doc_inherited__", False) or (
-        isinstance(doc.obj, property)
-        and getattr(doc.obj.fget, "__doc_inherited__", False)
+    if (
+        getattr(doc.obj, "__doc_inherited__", False)
+        or (
+            isinstance(doc.obj, property)
+            and getattr(doc.obj.fget, "__doc_inherited__", False)
+        )
+        or (
+            isinstance(doc.obj, functools.cached_property)
+            and getattr(doc.obj.func, "__doc_inherited__", False)
+        )
     ):
         # do not check inherited docstrings
         return errors