Documented remaining Attributes of classes and functions (#1283)

Add documentation and type hints for the remaining attributes of classes and functions. --------- Co-authored-by: Lennart Purucker <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
openml · Oct 31, 2023 · d45cf37 · d45cf37
1 parent 5f5424a
commit d45cf37
Show file tree

Hide file tree

Showing 11 changed files with 514 additions and 91 deletions.
diff --git a/doc/progress.rst b/doc/progress.rst
@@ -10,6 +10,7 @@ next
 ~~~~~~
 
  * MAINT #1280: Use the server-provided ``parquet_url`` instead of ``minio_url`` to determine the location of the parquet file.
+ * ADD #716: add documentation for remaining attributes of classes and functions.
 
 0.14.1
 ~~~~~~

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
@@ -2101,6 +2101,21 @@ def instantiate_model_from_hpo_class(
         return base_estimator
 
     def _extract_trace_data(self, model, rep_no, fold_no):
+        """Extracts data from a machine learning model's cross-validation results
+        and creates an ARFF (Attribute-Relation File Format) trace.
+
+        Parameters
+        ----------
+        model : Any
+            A fitted hyperparameter optimization model.
+        rep_no : int
+            The repetition number.
+        fold_no : int
+            The fold number.
+        Returns
+        -------
+        A list of ARFF tracecontent.
+        """
         arff_tracecontent = []
         for itt_no in range(0, len(model.cv_results_["mean_test_score"])):
             # we use the string values for True and False, as it is defined in

diff --git a/openml/flows/flow.py b/openml/flows/flow.py
@@ -523,6 +523,19 @@ def get_subflow(self, structure):
 
 
 def _copy_server_fields(source_flow, target_flow):
+    """Recursively copies the fields added by the server
+    from the `source_flow` to the `target_flow`.
+
+    Parameters
+    ----------
+    source_flow : OpenMLFlow
+        To copy the fields from.
+    target_flow : OpenMLFlow
+        To copy the fields to.
+    Returns
+    -------
+    None
+    """
     fields_added_by_the_server = ["flow_id", "uploader", "version", "upload_date"]
     for field in fields_added_by_the_server:
         setattr(target_flow, field, getattr(source_flow, field))
@@ -533,5 +546,19 @@ def _copy_server_fields(source_flow, target_flow):
 
 
 def _add_if_nonempty(dic, key, value):
+    """Adds a key-value pair to a dictionary if the value is not None.
+
+    Parameters
+    ----------
+    dic: dict
+        To add the key-value pair to.
+    key: hashable
+        To add to the dictionary.
+    value: Any
+        To add to the dictionary.
+    Returns
+    -------
+    None
+    """
     if value is not None:
         dic[key] = value
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
@@ -337,6 +337,20 @@ def get_flow_id(
 
 
 def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
+    """Retrieve information about flows from OpenML API
+    and parse it to a dictionary or a Pandas DataFrame.
+
+    Parameters
+    ----------
+    api_call: str
+        Retrieves the information about flows.
+    output_format: str in {"dict", "dataframe"}
+        The output format.
+    Returns
+
+    -------
+        The flows information in the specified output format.
+    """
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
     flows_dict = xmltodict.parse(xml_string, force_list=("oml:flow",))
 

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -128,6 +128,19 @@ def run_model_on_task(
     flow = extension.model_to_flow(model)
 
     def get_task_and_type_conversion(task: Union[int, str, OpenMLTask]) -> OpenMLTask:
+        """Retrieve an OpenMLTask object from either an integer or string ID,
+        or directly from an OpenMLTask object.
+
+        Parameters
+        ----------
+        task : Union[int, str, OpenMLTask]
+            The task ID or the OpenMLTask object.
+
+        Returns
+        -------
+        OpenMLTask
+            The OpenMLTask object.
+        """
         if isinstance(task, (int, str)):
             return get_task(int(task))
         else:
@@ -451,6 +464,32 @@ def _run_task_get_arffcontent(
     "OrderedDict[str, OrderedDict]",
     "OrderedDict[str, OrderedDict]",
 ]:
+    """Runs the hyperparameter optimization on the given task
+    and returns the arfftrace content.
+
+    Parameters
+    ----------
+    model : Any
+        The model that is to be evalauted.
+    task : OpenMLTask
+        The OpenMLTask to evaluate.
+    extension : Extension
+        The OpenML extension object.
+    add_local_measures : bool
+        Whether to compute additional local evaluation measures.
+    dataset_format : str
+        The format in which to download the dataset.
+    n_jobs : int
+        Number of jobs to run in parallel.
+        If None, use 1 core by default. If -1, use all available cores.
+
+    Returns
+    -------
+    Tuple[List[List], Optional[OpenMLRunTrace],
+        OrderedDict[str, OrderedDict], OrderedDict[str, OrderedDict]]
+    A tuple containing the arfftrace content,
+    the OpenML run trace, the global and local evaluation measures.
+    """
     arff_datacontent = []  # type: List[List]
     traces = []  # type: List[OpenMLRunTrace]
     # stores fold-based evaluation measures. In case of a sample based task,
@@ -636,6 +675,36 @@ def _run_task_get_arffcontent_parallel_helper(
     Optional[OpenMLRunTrace],
     "OrderedDict[str, float]",
 ]:
+    """Helper function that runs a single model on a single task fold sample.
+
+    Parameters
+    ----------
+    extension : Extension
+        An OpenML extension instance.
+    fold_no : int
+        The fold number to be run.
+    model : Any
+        The model that is to be evaluated.
+    rep_no : int
+        Repetition number to be run.
+    sample_no : int
+        Sample number to be run.
+    task : OpenMLTask
+        The task object from OpenML.
+    dataset_format : str
+        The dataset format to be used.
+    configuration : Dict
+        Hyperparameters to configure the model.
+
+    Returns
+    -------
+    Tuple[np.ndarray, Optional[pd.DataFrame], np.ndarray, Optional[pd.DataFrame],
+           Optional[OpenMLRunTrace], OrderedDict[str, float]]
+    A tuple containing the predictions, probability estimates (if applicable),
+    actual target values, actual target value probabilities (if applicable),
+    the trace object of the OpenML run (if applicable),
+    and a dictionary of local measures for this particular fold.
+    """
     # Sets up the OpenML instantiated in the child process to match that of the parent's
     # if configuration=None, loads the default
     config._setup(configuration)