Skip to content

Commit

Permalink
Documented remaining Attributes of classes and functions (#1283)
Browse files Browse the repository at this point in the history
Add documentation and type hints for the remaining attributes of classes and functions.

---------

Co-authored-by: Lennart Purucker <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 31, 2023
1 parent 5f5424a commit d45cf37
Show file tree
Hide file tree
Showing 11 changed files with 514 additions and 91 deletions.
1 change: 1 addition & 0 deletions doc/progress.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ next
~~~~~~

* MAINT #1280: Use the server-provided ``parquet_url`` instead of ``minio_url`` to determine the location of the parquet file.
* ADD #716: add documentation for remaining attributes of classes and functions.

0.14.1
~~~~~~
Expand Down
15 changes: 15 additions & 0 deletions openml/extensions/sklearn/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -2101,6 +2101,21 @@ def instantiate_model_from_hpo_class(
return base_estimator

def _extract_trace_data(self, model, rep_no, fold_no):
"""Extracts data from a machine learning model's cross-validation results
and creates an ARFF (Attribute-Relation File Format) trace.
Parameters
----------
model : Any
A fitted hyperparameter optimization model.
rep_no : int
The repetition number.
fold_no : int
The fold number.
Returns
-------
A list of ARFF tracecontent.
"""
arff_tracecontent = []
for itt_no in range(0, len(model.cv_results_["mean_test_score"])):
# we use the string values for True and False, as it is defined in
Expand Down
27 changes: 27 additions & 0 deletions openml/flows/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,19 @@ def get_subflow(self, structure):


def _copy_server_fields(source_flow, target_flow):
"""Recursively copies the fields added by the server
from the `source_flow` to the `target_flow`.
Parameters
----------
source_flow : OpenMLFlow
To copy the fields from.
target_flow : OpenMLFlow
To copy the fields to.
Returns
-------
None
"""
fields_added_by_the_server = ["flow_id", "uploader", "version", "upload_date"]
for field in fields_added_by_the_server:
setattr(target_flow, field, getattr(source_flow, field))
Expand All @@ -533,5 +546,19 @@ def _copy_server_fields(source_flow, target_flow):


def _add_if_nonempty(dic, key, value):
"""Adds a key-value pair to a dictionary if the value is not None.
Parameters
----------
dic: dict
To add the key-value pair to.
key: hashable
To add to the dictionary.
value: Any
To add to the dictionary.
Returns
-------
None
"""
if value is not None:
dic[key] = value
14 changes: 14 additions & 0 deletions openml/flows/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,20 @@ def get_flow_id(


def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
"""Retrieve information about flows from OpenML API
and parse it to a dictionary or a Pandas DataFrame.
Parameters
----------
api_call: str
Retrieves the information about flows.
output_format: str in {"dict", "dataframe"}
The output format.
Returns
-------
The flows information in the specified output format.
"""
xml_string = openml._api_calls._perform_api_call(api_call, "get")
flows_dict = xmltodict.parse(xml_string, force_list=("oml:flow",))

Expand Down
69 changes: 69 additions & 0 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,19 @@ def run_model_on_task(
flow = extension.model_to_flow(model)

def get_task_and_type_conversion(task: Union[int, str, OpenMLTask]) -> OpenMLTask:
"""Retrieve an OpenMLTask object from either an integer or string ID,
or directly from an OpenMLTask object.
Parameters
----------
task : Union[int, str, OpenMLTask]
The task ID or the OpenMLTask object.
Returns
-------
OpenMLTask
The OpenMLTask object.
"""
if isinstance(task, (int, str)):
return get_task(int(task))
else:
Expand Down Expand Up @@ -451,6 +464,32 @@ def _run_task_get_arffcontent(
"OrderedDict[str, OrderedDict]",
"OrderedDict[str, OrderedDict]",
]:
"""Runs the hyperparameter optimization on the given task
and returns the arfftrace content.
Parameters
----------
model : Any
The model that is to be evalauted.
task : OpenMLTask
The OpenMLTask to evaluate.
extension : Extension
The OpenML extension object.
add_local_measures : bool
Whether to compute additional local evaluation measures.
dataset_format : str
The format in which to download the dataset.
n_jobs : int
Number of jobs to run in parallel.
If None, use 1 core by default. If -1, use all available cores.
Returns
-------
Tuple[List[List], Optional[OpenMLRunTrace],
OrderedDict[str, OrderedDict], OrderedDict[str, OrderedDict]]
A tuple containing the arfftrace content,
the OpenML run trace, the global and local evaluation measures.
"""
arff_datacontent = [] # type: List[List]
traces = [] # type: List[OpenMLRunTrace]
# stores fold-based evaluation measures. In case of a sample based task,
Expand Down Expand Up @@ -636,6 +675,36 @@ def _run_task_get_arffcontent_parallel_helper(
Optional[OpenMLRunTrace],
"OrderedDict[str, float]",
]:
"""Helper function that runs a single model on a single task fold sample.
Parameters
----------
extension : Extension
An OpenML extension instance.
fold_no : int
The fold number to be run.
model : Any
The model that is to be evaluated.
rep_no : int
Repetition number to be run.
sample_no : int
Sample number to be run.
task : OpenMLTask
The task object from OpenML.
dataset_format : str
The dataset format to be used.
configuration : Dict
Hyperparameters to configure the model.
Returns
-------
Tuple[np.ndarray, Optional[pd.DataFrame], np.ndarray, Optional[pd.DataFrame],
Optional[OpenMLRunTrace], OrderedDict[str, float]]
A tuple containing the predictions, probability estimates (if applicable),
actual target values, actual target value probabilities (if applicable),
the trace object of the OpenML run (if applicable),
and a dictionary of local measures for this particular fold.
"""
# Sets up the OpenML instantiated in the child process to match that of the parent's
# if configuration=None, loads the default
config._setup(configuration)
Expand Down
Loading

0 comments on commit d45cf37

Please sign in to comment.