Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Documented remaining Attributes of classes and functions #1283

Merged
merged 49 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
3d79e6e
Update extension.py
v-parmar Aug 16, 2023
2c4519e
Update task.py
v-parmar Aug 16, 2023
7ddbdf4
Update flow.py
v-parmar Aug 16, 2023
7d5a04f
Update functions.py
v-parmar Aug 16, 2023
58ef19d
Update functions.py
v-parmar Aug 16, 2023
63a20bb
Update trace.py
v-parmar Aug 16, 2023
0197de3
Update functions.py
v-parmar Aug 16, 2023
1385155
Update functions.py
v-parmar Aug 16, 2023
43ed152
Update functions.py
v-parmar Aug 16, 2023
6c5f9d4
Update split.py
v-parmar Aug 16, 2023
e4db820
Update task.py
v-parmar Aug 16, 2023
9948541
Update openml/flows/flow.py
v-parmar Aug 18, 2023
1f79793
Update openml/flows/flow.py
v-parmar Aug 18, 2023
bb0077c
Update openml/flows/functions.py
v-parmar Aug 18, 2023
e986733
Update openml/extensions/sklearn/extension.py
v-parmar Aug 18, 2023
426e5fb
Update openml/flows/functions.py
v-parmar Aug 18, 2023
581d47a
Update openml/runs/functions.py
v-parmar Aug 18, 2023
31d6e03
Update openml/runs/functions.py
v-parmar Aug 18, 2023
4dfe6fe
Update openml/runs/functions.py
v-parmar Aug 18, 2023
3c29b1d
Update openml/runs/trace.py
v-parmar Aug 18, 2023
bf07329
Update openml/runs/trace.py
v-parmar Aug 18, 2023
711303a
Update openml/setups/functions.py
v-parmar Aug 18, 2023
ad8aec2
Update openml/study/functions.py
v-parmar Aug 18, 2023
2150d64
Update openml/study/functions.py
v-parmar Aug 18, 2023
f9fee1e
Update openml/tasks/functions.py
v-parmar Aug 18, 2023
3776710
Update openml/tasks/split.py
v-parmar Aug 18, 2023
091335d
Update openml/tasks/split.py
v-parmar Aug 18, 2023
fb556ea
Merge branch 'openml:develop' into v-parmar-patch-2
v-parmar Aug 18, 2023
2f9b2ad
Update openml/tasks/task.py
v-parmar Aug 21, 2023
571951f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 21, 2023
125ea1d
Update openml/tasks/task.py
v-parmar Aug 21, 2023
7fa8359
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 21, 2023
08055e7
Update openml/extensions/sklearn/extension.py
v-parmar Sep 19, 2023
0d272b6
Update openml/flows/flow.py
v-parmar Sep 19, 2023
de5fa6c
Update openml/flows/functions.py
v-parmar Sep 19, 2023
3535387
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 19, 2023
4cc0abf
Update openml/runs/functions.py
v-parmar Sep 19, 2023
554215f
Update openml/runs/functions.py
v-parmar Sep 19, 2023
4e1765e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 19, 2023
f976555
Update openml/setups/functions.py
v-parmar Sep 19, 2023
95bfc69
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 19, 2023
c0eedfa
Update openml/study/functions.py
v-parmar Sep 19, 2023
af2eac7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 19, 2023
ae8581f
Update openml/tasks/functions.py
v-parmar Sep 19, 2023
e3c475a
Update trace.py to fix mypy error
LennartPurucker Oct 30, 2023
51798e0
Update functions.py to fix mypy error
LennartPurucker Oct 30, 2023
3ab73e0
fix copy paste error
LennartPurucker Oct 30, 2023
ab6f202
Update functions.py of setup to fix mypy error
LennartPurucker Oct 30, 2023
049230d
Update progress.rst
v-parmar Oct 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions openml/extensions/sklearn/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -2101,6 +2101,20 @@ def instantiate_model_from_hpo_class(
return base_estimator

def _extract_trace_data(self, model, rep_no, fold_no):
"""Extracts data from a machine learning model's cross-validation results and creates an ARFF (Attribute-Relation File Format) trace.

Parameters
----------
model : Any
A fitted hyperparameter optimization model.
rep_no : int
The repetition number.
fold_no : int
The fold number.
Returns
-------
A list of ARFF tracecontent.
"""
v-parmar marked this conversation as resolved.
Show resolved Hide resolved
arff_tracecontent = []
for itt_no in range(0, len(model.cv_results_["mean_test_score"])):
# we use the string values for True and False, as it is defined in
Expand Down
26 changes: 26 additions & 0 deletions openml/flows/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,18 @@ def get_subflow(self, structure):


def _copy_server_fields(source_flow, target_flow):
""" Recursively copies the fields added by the server from the `source_flow` to the `target_flow`.
v-parmar marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
source_flow : OpenMLFlow
To copy the fields from.
target_flow : OpenMLFlow
To copy the fields to.
Returns
-------
None
"""
fields_added_by_the_server = ["flow_id", "uploader", "version", "upload_date"]
for field in fields_added_by_the_server:
setattr(target_flow, field, getattr(source_flow, field))
Expand All @@ -533,5 +545,19 @@ def _copy_server_fields(source_flow, target_flow):


def _add_if_nonempty(dic, key, value):
""" Adds a key-value pair to a dictionary if the value is not None.
v-parmar marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
dic: dict
To add the key-value pair to.
key: hashable
To add to the dictionary.
value: Any
To add to the dictionary.
Returns
-------
None
"""
if value is not None:
dic[key] = value
12 changes: 12 additions & 0 deletions openml/flows/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,18 @@ def get_flow_id(


def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
"""
Retrieve information about flows from OpenML API and parse it to a dictionary or a Pandas DataFrame.
Parameters
----------
api_call: str
v-parmar marked this conversation as resolved.
Show resolved Hide resolved
Retrieves the information about flows.
output_format: str in {"dict", "dataframe"}
The output format.
Returns
-------
v-parmar marked this conversation as resolved.
Show resolved Hide resolved
The flows information in the specified output format.
"""
xml_string = openml._api_calls._perform_api_call(api_call, "get")
flows_dict = xmltodict.parse(xml_string, force_list=("oml:flow",))

Expand Down
60 changes: 60 additions & 0 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,16 @@ def run_model_on_task(
flow = extension.model_to_flow(model)

def get_task_and_type_conversion(task: Union[int, str, OpenMLTask]) -> OpenMLTask:
""" Retrieve an OpenMLTask object from either an integer or string ID, or directly from an OpenMLTask object.
Parameters
----------
task : Union[int, str, OpenMLTask]
The task ID or the OpenMLTask object.
Returns
-------
OpenMLTask
The OpenMLTask object.
"""
v-parmar marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(task, (int, str)):
return get_task(int(task))
else:
Expand Down Expand Up @@ -451,6 +461,27 @@ def _run_task_get_arffcontent(
"OrderedDict[str, OrderedDict]",
"OrderedDict[str, OrderedDict]",
]:
""" Runs the hyperparameter optimization on the given task and returns the arfftrace content.
v-parmar marked this conversation as resolved.
Show resolved Hide resolved
Parameters
----------
model : Any
The model that is to be evalauted.
task : OpenMLTask
The OpenMLTask to evaluate.
extension : Extension
The OpenML extension object.
add_local_measures : bool
Whether to compute additional local evaluation measures.
dataset_format : str
The format in which to download the dataset.
n_jobs : int
Number of jobs to run in parallel. If None, use 1 core by default. If -1, use all available cores.

Returns
-------
Tuple[List[List], Optional[OpenMLRunTrace], OrderedDict[str, OrderedDict], OrderedDict[str, OrderedDict]]
A tuple containing the arfftrace content, the OpenML run trace, the global and local evaluation measures.
"""
arff_datacontent = [] # type: List[List]
traces = [] # type: List[OpenMLRunTrace]
# stores fold-based evaluation measures. In case of a sample based task,
Expand Down Expand Up @@ -636,6 +667,35 @@ def _run_task_get_arffcontent_parallel_helper(
Optional[OpenMLRunTrace],
"OrderedDict[str, float]",
]:
""" Helper function that runs a single model on a single task fold sample.
v-parmar marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
extension : Extension
An OpenML extension instance.
fold_no : int
The fold number to be run.
model : Any
The model that is to be evaluated.
rep_no : int
Repetition number to be run.
sample_no : int
Sample number to be run.
task : OpenMLTask
The task object from OpenML.
dataset_format : str
The dataset format to be used.
configuration : Dict
Hyperparameters to configure the model.

Returns
-------
Tuple[np.ndarray, Optional[pd.DataFrame], np.ndarray, Optional[pd.DataFrame],
Optional[OpenMLRunTrace], OrderedDict[str, float]]
A tuple containing the predictions, probability estimates (if applicable),
actual target values, actual target value probabilities (if applicable),
the trace object of the OpenML run (if applicable), and a dictionary of local measures for this particular fold.
"""
# Sets up the OpenML instantiated in the child process to match that of the parent's
# if configuration=None, loads the default
config._setup(configuration)
Expand Down
50 changes: 49 additions & 1 deletion openml/runs/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,17 @@ class OpenMLRunTrace(object):

"""

def __init__(self, run_id, trace_iterations):
def __init__(self, run_id: int, trace_iterations: List[List]):
"""
Object to hold the trace content of a run.
v-parmar marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
run_id : int
Id for which the trace content is to be stored.
trace_iterations : List[List]
The trace content obtained by running a flow on a task.
"""
self.run_id = run_id
self.trace_iterations = trace_iterations

Expand Down Expand Up @@ -228,6 +238,24 @@ def trace_from_arff(cls, arff_obj):

@classmethod
def _trace_from_arff_struct(cls, attributes, content, error_message):
""" Generate a trace dictionary from ARFF structure.
v-parmar marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
cls : type
The trace object to be created.
attributes : List[Tuple[str, str]]
Attribute descriptions.
content : List[List[Union[int, float, str]]]
List of instances.
error_message : str
Error message to raise if `setup_string` is in `attributes`.

Returns
-------
OrderedDict
A dictionary representing the trace.
"""
trace = OrderedDict()
attribute_idx = {att[0]: idx for idx, att in enumerate(attributes)}

Expand Down Expand Up @@ -345,6 +373,26 @@ def trace_from_xml(cls, xml):

@classmethod
def merge_traces(cls, traces: List["OpenMLRunTrace"]) -> "OpenMLRunTrace":
"""Merge multiple traces into a single trace.

Parameters
----------
cls : type
Type of the trace object to be created.
traces : List[OpenMLRunTrace]
List of traces to merge.

Returns
-------
OpenMLRunTrace
A trace object representing the merged traces.

Raises
------
ValueError
If the parameters in the iterations of the traces being merged are not equal.
If a key (repeat, fold, iteration) is encountered twice while merging the traces.
"""
merged_trace = (
OrderedDict()
) # type: OrderedDict[Tuple[int, int, int], OpenMLTraceIteration] # noqa E501
Expand Down
39 changes: 36 additions & 3 deletions openml/setups/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,24 @@ def setup_exists(flow) -> int:
return setup_id if setup_id > 0 else False


def _get_cached_setup(setup_id):
"""Load a run from the cache."""
def _get_cached_setup(setup_id: int):
"""Load a run from the cache.

Parameters
----------
setup_id : int
ID of the setup to be loaded.

Returns
-------
OpenMLSetup
The loaded setup object.

Raises
------
OpenMLCacheException
If the setup file for the given setup ID is not cached.
"""
cache_dir = config.get_cache_directory()
setup_cache_dir = os.path.join(cache_dir, "setups", str(setup_id))
try:
Expand Down Expand Up @@ -271,7 +287,21 @@ def initialize_model(setup_id: int) -> Any:
return model


def _to_dict(flow_id, openml_parameter_settings):
def _to_dict(flow_id: int, openml_parameter_settings):
""" Convert a flow ID and a list of OpenML parameter settings to a dictionary representation that can be serialized to XML.
v-parmar marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
flow_id : int
ID of the flow.
openml_parameter_settings : List[OpenMLParameter]
A list of OpenML parameter settings.

Returns
-------
OrderedDict
A dictionary representation of the flow ID and parameter settings.
"""
# for convenience, this function (ab)uses the run object.
xml = OrderedDict()
xml["oml:run"] = OrderedDict()
Expand Down Expand Up @@ -319,6 +349,9 @@ def _create_setup_from_xml(result_dict, output_format="object"):


def _create_setup_parameter_from_xml(result_dict, output_format="object"):
"""
Create an OpenMLParameter object or a dictionary from an API xml result.
"""
if output_format == "object":
return OpenMLParameter(
input_id=int(result_dict["oml:id"]),
Expand Down
28 changes: 28 additions & 0 deletions openml/study/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,20 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
tags.append(current_tag)

def get_nested_ids_from_result_dict(key: str, subkey: str) -> Optional[List]:
""" Extracts a list of nested IDs from a result dictionary.
v-parmar marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
key : str
Nested OpenML IDs.
subkey : str
The subkey contains the nested OpenML IDs.

Returns
-------
Optional[List]
A list of nested OpenML IDs, or None if the key is not present in the dictionary.
"""
if result_dict.get(key) is not None:
return [int(oml_id) for oml_id in result_dict[key][subkey]]
return None
Expand Down Expand Up @@ -591,6 +605,20 @@ def _list_studies(output_format="dict", **kwargs) -> Union[Dict, pd.DataFrame]:


def __list_studies(api_call, output_format="object") -> Union[Dict, pd.DataFrame]:
""" Retrieves the list of OpenML studies and returns it in a dictionary or a Pandas DataFrame.
v-parmar marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
api_call : str
The API call for retrieving the list of OpenML studies.
output_format : str in {"object", "dataframe"}
Format of the output, either 'object' for a dictionary or 'dataframe' for a Pandas DataFrame.

Returns
-------
Union[Dict, pd.DataFrame]
A dictionary or Pandas DataFrame of OpenML studies, depending on the value of 'output_format'.
"""
xml_string = openml._api_calls._perform_api_call(api_call, "get")
study_dict = xmltodict.parse(xml_string, force_list=("oml:study",))

Expand Down
22 changes: 22 additions & 0 deletions openml/tasks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,28 @@ def _list_tasks(task_type=None, output_format="dict", **kwargs):


def __list_tasks(api_call, output_format="dict"):
""" Returns a dictionary or a Pandas DataFrame with information about OpenML tasks.
v-parmar marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
api_call : str
The API call specifying which tasks to return.
output_format : str in {"dict", "dataframe"}
Output format for the returned object.

Returns
-------
Union[Dict, pd.DataFrame]
A dictionary or a Pandas DataFrame with information about OpenML tasks.

Raises
------
ValueError
If the XML returned by the OpenML API does not contain 'oml:tasks', '@xmlns:oml', or has an incorrect value for
'@xmlns:oml'.
KeyError
If an invalid key is found in the XML for a task.
"""
xml_string = openml._api_calls._perform_api_call(api_call, "get")
tasks_dict = xmltodict.parse(xml_string, force_list=("oml:task", "oml:input"))
# Minimalistic check if the XML is useful
Expand Down
Loading
Loading