From 18f0c854b66b48d7ee3fc965ce20d77ac37e28f9 Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Fri, 2 Feb 2024 20:23:54 +0100 Subject: [PATCH 01/22] refactor: move memoization logic into new class --- src/safeds_runner/server/memoization_map.py | 43 +++++++++++++++++++ src/safeds_runner/server/pipeline_manager.py | 29 ++----------- .../safeds_runner/server/test_memoization.py | 13 +++--- 3 files changed, 53 insertions(+), 32 deletions(-) create mode 100644 src/safeds_runner/server/memoization_map.py diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py new file mode 100644 index 0000000..5fdc4e3 --- /dev/null +++ b/src/safeds_runner/server/memoization_map.py @@ -0,0 +1,43 @@ +from dataclasses import dataclass +from typing import Any, Callable + + +@dataclass(frozen=True) +class MemoizationStats: + last_access: int + computation_time: int + lookup_time: int + memory_size: int + + +class MemoizationMap: + def __init__(self, map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any], + map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats]): + self.map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any] = map_values + self.map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats] = map_stats + + def memoized_function_call(self, function_name: str, function_callable: Callable, parameters: list[Any], + hidden_parameters: list[Any]) -> Any: + key = (function_name, _convert_list_to_tuple(parameters), _convert_list_to_tuple(hidden_parameters)) + if key in self.map_values: + return self.map_values[key] + result = function_callable(*parameters) + self.map_values[key] = result + return result + + +def _convert_list_to_tuple(values: list) -> tuple: + """ + Recursively convert a mutable list of values to an immutable tuple containing the same values, to make the values hashable. + + Parameters + ---------- + values : list + Values that should be converted to a tuple + + Returns + ------- + tuple + Converted list containing all the elements of the provided list + """ + return tuple(_convert_list_to_tuple(value) if isinstance(value, list) else value for value in values) diff --git a/src/safeds_runner/server/pipeline_manager.py b/src/safeds_runner/server/pipeline_manager.py index 2e223d4..a316fdb 100644 --- a/src/safeds_runner/server/pipeline_manager.py +++ b/src/safeds_runner/server/pipeline_manager.py @@ -15,6 +15,7 @@ import stack_data +from safeds_runner.server.memoization_map import MemoizationMap from safeds_runner.server.messages import ( Message, MessageDataProgram, @@ -27,8 +28,6 @@ ) from safeds_runner.server.module_manager import InMemoryFinder -MemoizationMap: typing.TypeAlias = dict[tuple[str, tuple[Any], tuple[Any]], Any] - class PipelineManager: """ @@ -59,7 +58,7 @@ def _messages_queue_thread(self) -> threading.Thread: @cached_property def _memoization_map(self) -> MemoizationMap: - return self._multiprocessing_manager.dict() # type: ignore[return-value] + return MemoizationMap(self._multiprocessing_manager.dict(), self._multiprocessing_manager.dict()) # type: ignore[return-value] def startup(self) -> None: """ @@ -334,29 +333,7 @@ def runner_memoized_function_call( if current_pipeline is None: return None # pragma: no cover memoization_map = current_pipeline.get_memoization_map() - key = (function_name, _convert_list_to_tuple(parameters), _convert_list_to_tuple(hidden_parameters)) - if key in memoization_map: - return memoization_map[key] - result = function_callable(*parameters) - memoization_map[key] = result - return result - - -def _convert_list_to_tuple(values: list) -> tuple: - """ - Recursively convert a mutable list of values to an immutable tuple containing the same values, to make the values hashable. - - Parameters - ---------- - values : list - Values that should be converted to a tuple - - Returns - ------- - tuple - Converted list containing all the elements of the provided list - """ - return tuple(_convert_list_to_tuple(value) if isinstance(value, list) else value for value in values) + return memoization_map.memoized_function_call(function_name, function_callable, parameters, hidden_parameters) def runner_filemtime(filename: str) -> int | None: diff --git a/tests/safeds_runner/server/test_memoization.py b/tests/safeds_runner/server/test_memoization.py index 33e8b7e..9efa716 100644 --- a/tests/safeds_runner/server/test_memoization.py +++ b/tests/safeds_runner/server/test_memoization.py @@ -5,7 +5,8 @@ from typing import Any import pytest -from safeds_runner.server import pipeline_manager +from safeds_runner.server import pipeline_manager, memoization_map +from safeds_runner.server.memoization_map import MemoizationMap from safeds_runner.server.messages import MessageDataProgram, ProgramMainInformation from safeds_runner.server.pipeline_manager import PipelineProcess @@ -29,13 +30,13 @@ def test_memoization_already_present_values( "", Queue(), {}, - {}, + MemoizationMap({}, {}), ) - pipeline_manager.current_pipeline.get_memoization_map()[ + pipeline_manager.current_pipeline.get_memoization_map().map_values[ ( function_name, - pipeline_manager._convert_list_to_tuple(params), - pipeline_manager._convert_list_to_tuple(hidden_params), + memoization_map._convert_list_to_tuple(params), + memoization_map._convert_list_to_tuple(hidden_params), ) ] = expected_result result = pipeline_manager.runner_memoized_function_call(function_name, lambda *_: None, params, hidden_params) @@ -62,7 +63,7 @@ def test_memoization_not_present_values( "", Queue(), {}, - {}, + MemoizationMap({}, {}), ) # Save value in map result = pipeline_manager.runner_memoized_function_call(function_name, function, params, hidden_params) From ab0213d5a829576b9be3bfa1cdf6f0b17d449a8a Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Fri, 2 Feb 2024 20:48:30 +0100 Subject: [PATCH 02/22] feat: track more stats about memoized function calls --- src/safeds_runner/server/memoization_map.py | 32 +++++++++++++++++-- .../safeds_runner/server/test_memoization.py | 11 ++++++- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 5fdc4e3..0987fe6 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -1,3 +1,5 @@ +import sys +import time from dataclasses import dataclass from typing import Any, Callable @@ -9,6 +11,9 @@ class MemoizationStats: lookup_time: int memory_size: int + def __str__(self): + return f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time: {self.lookup_time}, memory size: {self.memory_size}" + class MemoizationMap: def __init__(self, map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any], @@ -19,10 +24,33 @@ def __init__(self, map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any], def memoized_function_call(self, function_name: str, function_callable: Callable, parameters: list[Any], hidden_parameters: list[Any]) -> Any: key = (function_name, _convert_list_to_tuple(parameters), _convert_list_to_tuple(hidden_parameters)) - if key in self.map_values: - return self.map_values[key] + time_compare_start = time.perf_counter_ns() + try: + potential_value = self.map_values[key] + time_compare_end = time.perf_counter_ns() + # Use time_ns for absolute time points, as perf_counter_ns does not guarantee any fixed reference-point + time_last_access = time.time_ns() + time_compare = time_compare_end - time_compare_start + old_memoization_stats = self.map_stats[key] + memoization_stats = MemoizationStats(time_last_access, old_memoization_stats.computation_time, + time_compare, old_memoization_stats.memory_size) + self.map_stats[key] = memoization_stats + print(f"Updated memoization stats for {function_name}: {memoization_stats}") + return potential_value + except KeyError: + time_compare_end = time.perf_counter_ns() + time_compare = time_compare_end - time_compare_start + time_compute_start = time.perf_counter_ns() result = function_callable(*parameters) + time_compute_end = time.perf_counter_ns() + # Use time_ns for absolute time points, as perf_counter_ns does not guarantee any fixed reference-point + time_last_access = time.time_ns() + time_compute = time_compute_end - time_compute_start + value_memory = sys.getsizeof(result) self.map_values[key] = result + memoization_stats = MemoizationStats(time_last_access, time_compute, time_compare, value_memory) + print(f"New memoization stats for {function_name}: {memoization_stats}") + self.map_stats[key] = memoization_stats return result diff --git a/tests/safeds_runner/server/test_memoization.py b/tests/safeds_runner/server/test_memoization.py index 9efa716..3f7d874 100644 --- a/tests/safeds_runner/server/test_memoization.py +++ b/tests/safeds_runner/server/test_memoization.py @@ -1,4 +1,6 @@ +import sys import tempfile +import time import typing from datetime import UTC, datetime from queue import Queue @@ -6,7 +8,7 @@ import pytest from safeds_runner.server import pipeline_manager, memoization_map -from safeds_runner.server.memoization_map import MemoizationMap +from safeds_runner.server.memoization_map import MemoizationMap, MemoizationStats from safeds_runner.server.messages import MessageDataProgram, ProgramMainInformation from safeds_runner.server.pipeline_manager import PipelineProcess @@ -39,6 +41,13 @@ def test_memoization_already_present_values( memoization_map._convert_list_to_tuple(hidden_params), ) ] = expected_result + pipeline_manager.current_pipeline.get_memoization_map().map_stats[ + ( + function_name, + memoization_map._convert_list_to_tuple(params), + memoization_map._convert_list_to_tuple(hidden_params), + ) + ] = MemoizationStats(time.perf_counter_ns(), 0, 0, sys.getsizeof(expected_result)) result = pipeline_manager.runner_memoized_function_call(function_name, lambda *_: None, params, hidden_params) assert result == expected_result From 670e8e0858b6a0d418657c2bd4aa83220cae07bf Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Sun, 4 Feb 2024 02:30:50 +0100 Subject: [PATCH 03/22] feat: calculate memory usage better docs: add docstrings --- src/safeds_runner/server/memoization_map.py | 93 +++++++++++++++++++- src/safeds_runner/server/pipeline_manager.py | 2 +- 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 0987fe6..04da42d 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -1,11 +1,30 @@ +"""Module that contains the memoization logic and stats.""" import sys import time from dataclasses import dataclass from typing import Any, Callable +from safeds.data.image.containers import Image +from safeds.data.tabular.containers import Table, Column, Row, TaggedTable, TimeSeries +from safeds.data.tabular.typing import Schema + @dataclass(frozen=True) class MemoizationStats: + """ + Statistics calculated for every memoization call. + + Parameters + ---------- + last_access + Absolute timestamp since the unix epoch of the last access to the memoized value in nanoseconds + computation_time + Duration the computation of the value took in nanoseconds + lookup_time + Duration the lookup of the value took in nanoseconds (key comparison + IPC) + memory_size + Amount of memory the memoized value takes up in bytes + """ last_access: int computation_time: int lookup_time: int @@ -16,13 +35,48 @@ def __str__(self): class MemoizationMap: + """ + The memoization map handles memoized function calls, looks up stored values, computes new values if needed and calculates and updates statistics. + """ def __init__(self, map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any], map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats]): + """ + Create a new memoization map using a value store dictionary and a stats dictionary. + + Parameters + ---------- + map_values + Value store dictionary + map_stats + Stats dictionary + """ self.map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any] = map_values self.map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats] = map_stats def memoized_function_call(self, function_name: str, function_callable: Callable, parameters: list[Any], hidden_parameters: list[Any]) -> Any: + """ + Handles a memoized function call. + + Looks up the stored value, determined by function name, parameters and hidden parameters and returns it if found. + If no value is found, computes the value using the provided callable and stores it in the map. + Every call to this function will update the memoization stats. + + Parameters + ---------- + function_name + Fully qualified function name + function_callable + Function that is called and memoized if the result was not found in the memoization map + parameters + List of parameters passed to the function + hidden_parameters + List of hidden parameters for the function. This is used for memoizing some impure functions. + + Returns + ------- + The result of the specified function, if any exists + """ key = (function_name, _convert_list_to_tuple(parameters), _convert_list_to_tuple(hidden_parameters)) time_compare_start = time.perf_counter_ns() try: @@ -46,7 +100,7 @@ def memoized_function_call(self, function_name: str, function_callable: Callable # Use time_ns for absolute time points, as perf_counter_ns does not guarantee any fixed reference-point time_last_access = time.time_ns() time_compute = time_compute_end - time_compute_start - value_memory = sys.getsizeof(result) + value_memory = _get_size_of_value(result) self.map_values[key] = result memoization_stats = MemoizationStats(time_last_access, time_compute, time_compare, value_memory) print(f"New memoization stats for {function_name}: {memoization_stats}") @@ -69,3 +123,40 @@ def _convert_list_to_tuple(values: list) -> tuple: Converted list containing all the elements of the provided list """ return tuple(_convert_list_to_tuple(value) if isinstance(value, list) else value for value in values) + + +def _get_size_of_value(value: Any) -> int: + """ + Recursively calculate the memory usage of a given value. + + Parameters + ---------- + value + Any value of which the memory usage should be calculated. + + Returns + ------- + Size of the provided value in bytes + """ + size_immediate = sys.getsizeof(value) + if isinstance(value, dict): + return sum(map(_get_size_of_value, value.items())) + size_immediate + elif isinstance(value, list) or isinstance(value, tuple) or isinstance(value, set) or isinstance(value, frozenset): + return sum(map(_get_size_of_value, value)) + size_immediate + elif isinstance(value, Table): + return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + size_immediate + elif isinstance(value, Schema): + return _get_size_of_value(value._schema) + size_immediate + elif isinstance(value, Image): + return _get_size_of_value(value._image_tensor) + value._image_tensor.element_size() * value._image_tensor.nelement() + size_immediate + elif isinstance(value, Column): + return _get_size_of_value(value._data) + _get_size_of_value(value._name) + _get_size_of_value(value._type) + size_immediate + elif isinstance(value, Row): + return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + size_immediate + elif isinstance(value, TaggedTable): + return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value( + value._features) + _get_size_of_value(value._target) + size_immediate + elif isinstance(value, TimeSeries): + return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value(value._time) + _get_size_of_value(value._features) + _get_size_of_value(value._target) + size_immediate + else: + return size_immediate diff --git a/src/safeds_runner/server/pipeline_manager.py b/src/safeds_runner/server/pipeline_manager.py index a316fdb..1b4cd9a 100644 --- a/src/safeds_runner/server/pipeline_manager.py +++ b/src/safeds_runner/server/pipeline_manager.py @@ -58,7 +58,7 @@ def _messages_queue_thread(self) -> threading.Thread: @cached_property def _memoization_map(self) -> MemoizationMap: - return MemoizationMap(self._multiprocessing_manager.dict(), self._multiprocessing_manager.dict()) # type: ignore[return-value] + return MemoizationMap(self._multiprocessing_manager.dict(), self._multiprocessing_manager.dict()) # type: ignore[arg-type] def startup(self) -> None: """ From d0e25ff4a4d2d3d7ab9a561cd24859cf86c1a200 Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Sun, 4 Feb 2024 02:40:26 +0100 Subject: [PATCH 04/22] fix: comment out future functionality not yet preset in master docs: fix documentation style: replace print with logging --- src/safeds_runner/server/memoization_map.py | 39 ++++++++++++++------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 04da42d..178b524 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -1,11 +1,12 @@ """Module that contains the memoization logic and stats.""" +import logging import sys import time from dataclasses import dataclass from typing import Any, Callable from safeds.data.image.containers import Image -from safeds.data.tabular.containers import Table, Column, Row, TaggedTable, TimeSeries +from safeds.data.tabular.containers import Table, Column, Row, TaggedTable # , TimeSeries from safeds.data.tabular.typing import Schema @@ -30,14 +31,24 @@ class MemoizationStats: lookup_time: int memory_size: int - def __str__(self): + def __str__(self) -> str: + """ + Summarizes stats contained in this object. + + Returns + ------- + Summary of stats + """ return f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time: {self.lookup_time}, memory size: {self.memory_size}" class MemoizationMap: """ - The memoization map handles memoized function calls, looks up stored values, computes new values if needed and calculates and updates statistics. + The memoization map handles memoized function calls. + + This contains looking up stored values, computing new values if needed and calculating and updating statistics. """ + def __init__(self, map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any], map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats]): """ @@ -56,7 +67,7 @@ def __init__(self, map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any], def memoized_function_call(self, function_name: str, function_callable: Callable, parameters: list[Any], hidden_parameters: list[Any]) -> Any: """ - Handles a memoized function call. + Handle a memoized function call. Looks up the stored value, determined by function name, parameters and hidden parameters and returns it if found. If no value is found, computes the value using the provided callable and stores it in the map. @@ -89,11 +100,12 @@ def memoized_function_call(self, function_name: str, function_callable: Callable memoization_stats = MemoizationStats(time_last_access, old_memoization_stats.computation_time, time_compare, old_memoization_stats.memory_size) self.map_stats[key] = memoization_stats - print(f"Updated memoization stats for {function_name}: {memoization_stats}") + logging.info(f"Updated memoization stats for {function_name}: {memoization_stats}") return potential_value except KeyError: - time_compare_end = time.perf_counter_ns() - time_compare = time_compare_end - time_compare_start + pass + time_compare_end = time.perf_counter_ns() + time_compare = time_compare_end - time_compare_start time_compute_start = time.perf_counter_ns() result = function_callable(*parameters) time_compute_end = time.perf_counter_ns() @@ -103,7 +115,7 @@ def memoized_function_call(self, function_name: str, function_callable: Callable value_memory = _get_size_of_value(result) self.map_values[key] = result memoization_stats = MemoizationStats(time_last_access, time_compute, time_compare, value_memory) - print(f"New memoization stats for {function_name}: {memoization_stats}") + logging.info(f"New memoization stats for {function_name}: {memoization_stats}") self.map_stats[key] = memoization_stats return result @@ -148,15 +160,18 @@ def _get_size_of_value(value: Any) -> int: elif isinstance(value, Schema): return _get_size_of_value(value._schema) + size_immediate elif isinstance(value, Image): - return _get_size_of_value(value._image_tensor) + value._image_tensor.element_size() * value._image_tensor.nelement() + size_immediate + return _get_size_of_value( + value._image_tensor) + value._image_tensor.element_size() * value._image_tensor.nelement() + size_immediate elif isinstance(value, Column): - return _get_size_of_value(value._data) + _get_size_of_value(value._name) + _get_size_of_value(value._type) + size_immediate + return _get_size_of_value(value._data) + _get_size_of_value(value._name) + _get_size_of_value( + value._type) + size_immediate elif isinstance(value, Row): return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + size_immediate elif isinstance(value, TaggedTable): return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value( value._features) + _get_size_of_value(value._target) + size_immediate - elif isinstance(value, TimeSeries): - return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value(value._time) + _get_size_of_value(value._features) + _get_size_of_value(value._target) + size_immediate + # elif isinstance(value, TimeSeries): + # return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value( + # value._time) + _get_size_of_value(value._features) + _get_size_of_value(value._target) + size_immediate else: return size_immediate From 184d16ab08c865dd3ddb2cdfcaff9aec2c9e44fe Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Sun, 4 Feb 2024 02:52:55 +0100 Subject: [PATCH 05/22] refactor: move rest of code not sensitive to keyerror to else part of try expression style: remove f-strings from logging statements --- src/safeds_runner/server/memoization_map.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 178b524..0ff0595 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -92,6 +92,9 @@ def memoized_function_call(self, function_name: str, function_callable: Callable time_compare_start = time.perf_counter_ns() try: potential_value = self.map_values[key] + except KeyError: + pass + else: time_compare_end = time.perf_counter_ns() # Use time_ns for absolute time points, as perf_counter_ns does not guarantee any fixed reference-point time_last_access = time.time_ns() @@ -100,10 +103,8 @@ def memoized_function_call(self, function_name: str, function_callable: Callable memoization_stats = MemoizationStats(time_last_access, old_memoization_stats.computation_time, time_compare, old_memoization_stats.memory_size) self.map_stats[key] = memoization_stats - logging.info(f"Updated memoization stats for {function_name}: {memoization_stats}") + logging.info("Updated memoization stats for %s: %s", function_name, memoization_stats) return potential_value - except KeyError: - pass time_compare_end = time.perf_counter_ns() time_compare = time_compare_end - time_compare_start time_compute_start = time.perf_counter_ns() @@ -115,7 +116,7 @@ def memoized_function_call(self, function_name: str, function_callable: Callable value_memory = _get_size_of_value(result) self.map_values[key] = result memoization_stats = MemoizationStats(time_last_access, time_compute, time_compare, value_memory) - logging.info(f"New memoization stats for {function_name}: {memoization_stats}") + logging.info("New memoization stats for %s: %s", function_name, memoization_stats) self.map_stats[key] = memoization_stats return result From 02dc525ddeb4a2eaf141ca380826b8761e7f1fb8 Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Sun, 4 Feb 2024 03:11:01 +0100 Subject: [PATCH 06/22] test: add tests for memoization memory usage calculation fix: correct order of memoization memory usage calculation --- src/safeds_runner/server/memoization_map.py | 12 +++---- .../safeds_runner/server/test_memoization.py | 35 +++++++++++++++++++ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 0ff0595..007d6f5 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -156,6 +156,12 @@ def _get_size_of_value(value: Any) -> int: return sum(map(_get_size_of_value, value.items())) + size_immediate elif isinstance(value, list) or isinstance(value, tuple) or isinstance(value, set) or isinstance(value, frozenset): return sum(map(_get_size_of_value, value)) + size_immediate + # elif isinstance(value, TimeSeries): + # return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value( + # value._time) + _get_size_of_value(value._features) + _get_size_of_value(value._target) + size_immediate + elif isinstance(value, TaggedTable): + return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value( + value._features) + _get_size_of_value(value._target) + size_immediate elif isinstance(value, Table): return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + size_immediate elif isinstance(value, Schema): @@ -168,11 +174,5 @@ def _get_size_of_value(value: Any) -> int: value._type) + size_immediate elif isinstance(value, Row): return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + size_immediate - elif isinstance(value, TaggedTable): - return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value( - value._features) + _get_size_of_value(value._target) + size_immediate - # elif isinstance(value, TimeSeries): - # return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value( - # value._time) + _get_size_of_value(value._features) + _get_size_of_value(value._target) + size_immediate else: return size_immediate diff --git a/tests/safeds_runner/server/test_memoization.py b/tests/safeds_runner/server/test_memoization.py index 3f7d874..e014611 100644 --- a/tests/safeds_runner/server/test_memoization.py +++ b/tests/safeds_runner/server/test_memoization.py @@ -5,8 +5,12 @@ from datetime import UTC, datetime from queue import Queue from typing import Any +import base64 import pytest +from safeds.data.image.containers import Image +from safeds.data.tabular.containers import Table + from safeds_runner.server import pipeline_manager, memoization_map from safeds_runner.server.memoization_map import MemoizationMap, MemoizationStats from safeds_runner.server.messages import MessageDataProgram, ProgramMainInformation @@ -91,3 +95,34 @@ def test_file_mtime_exists() -> None: def test_file_mtime_not_exists() -> None: file_mtime = pipeline_manager.runner_filemtime(f"file_not_exists.{datetime.now(tz=UTC).timestamp()}") assert file_mtime is None + + +@pytest.mark.parametrize( + argnames="value,expected_size", + argvalues=[ + (1, 28), + ({}, 64), + ({"a": "b"}, 340), + ([], 56), + ([1, 2, 3], 172), + ((), 40), + ((1, 2, 3), 148), + (set(), 216), + ({1, 2, 3}, 300), + (frozenset(), 216), + (frozenset({1, 2, 3}), 300), + (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}), 816), + (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}).schema, 564), + (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}).get_column("a"), 342), + (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}).get_row(0), 800), + (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}).tag_columns("a", ["b"]), 1796), + (Image.from_bytes( + base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAYAAACp8Z5+AAAAD0lEQVQIW2NkQAOMpAsAAADuAAVDMQ2mAAAAAElFTkSuQmCC", + ), + ), 208) + ], + ids=["immediate", "dict_empty", "dict_values", "list_empty", "list_values", "tuple_empty", "tuple_values", "set_empty", "set_values", "frozenset_empty", "frozenset_values", "table", "schema", "column", "row", "tagged_table", "image"], +) +def test_memory_usage(value: Any, expected_size: int) -> None: + assert memoization_map._get_size_of_value(value) == expected_size From 61f5c4d7b5a5af0775753a4e25da00a2fec0ca78 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sun, 4 Feb 2024 02:12:53 +0000 Subject: [PATCH 07/22] style: apply automated linter fixes --- src/safeds_runner/server/memoization_map.py | 58 ++++++++++++++----- .../safeds_runner/server/test_memoization.py | 56 +++++++++++------- 2 files changed, 78 insertions(+), 36 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 007d6f5..4d230ed 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -1,12 +1,14 @@ """Module that contains the memoization logic and stats.""" + import logging import sys import time +from collections.abc import Callable from dataclasses import dataclass -from typing import Any, Callable +from typing import Any from safeds.data.image.containers import Image -from safeds.data.tabular.containers import Table, Column, Row, TaggedTable # , TimeSeries +from safeds.data.tabular.containers import Column, Row, Table, TaggedTable # , TimeSeries from safeds.data.tabular.typing import Schema @@ -26,6 +28,7 @@ class MemoizationStats: memory_size Amount of memory the memoized value takes up in bytes """ + last_access: int computation_time: int lookup_time: int @@ -39,7 +42,10 @@ def __str__(self) -> str: ------- Summary of stats """ - return f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time: {self.lookup_time}, memory size: {self.memory_size}" + return ( + f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time:" + f" {self.lookup_time}, memory size: {self.memory_size}" + ) class MemoizationMap: @@ -49,8 +55,11 @@ class MemoizationMap: This contains looking up stored values, computing new values if needed and calculating and updating statistics. """ - def __init__(self, map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any], - map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats]): + def __init__( + self, + map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any], + map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats], + ): """ Create a new memoization map using a value store dictionary and a stats dictionary. @@ -64,8 +73,9 @@ def __init__(self, map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any], self.map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any] = map_values self.map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats] = map_stats - def memoized_function_call(self, function_name: str, function_callable: Callable, parameters: list[Any], - hidden_parameters: list[Any]) -> Any: + def memoized_function_call( + self, function_name: str, function_callable: Callable, parameters: list[Any], hidden_parameters: list[Any], + ) -> Any: """ Handle a memoized function call. @@ -100,8 +110,12 @@ def memoized_function_call(self, function_name: str, function_callable: Callable time_last_access = time.time_ns() time_compare = time_compare_end - time_compare_start old_memoization_stats = self.map_stats[key] - memoization_stats = MemoizationStats(time_last_access, old_memoization_stats.computation_time, - time_compare, old_memoization_stats.memory_size) + memoization_stats = MemoizationStats( + time_last_access, + old_memoization_stats.computation_time, + time_compare, + old_memoization_stats.memory_size, + ) self.map_stats[key] = memoization_stats logging.info("Updated memoization stats for %s: %s", function_name, memoization_stats) return potential_value @@ -154,24 +168,36 @@ def _get_size_of_value(value: Any) -> int: size_immediate = sys.getsizeof(value) if isinstance(value, dict): return sum(map(_get_size_of_value, value.items())) + size_immediate - elif isinstance(value, list) or isinstance(value, tuple) or isinstance(value, set) or isinstance(value, frozenset): + elif isinstance(value, frozenset | list | set | tuple): return sum(map(_get_size_of_value, value)) + size_immediate # elif isinstance(value, TimeSeries): # return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value( # value._time) + _get_size_of_value(value._features) + _get_size_of_value(value._target) + size_immediate elif isinstance(value, TaggedTable): - return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value( - value._features) + _get_size_of_value(value._target) + size_immediate + return ( + _get_size_of_value(value._data) + + _get_size_of_value(value._schema) + + _get_size_of_value(value._features) + + _get_size_of_value(value._target) + + size_immediate + ) elif isinstance(value, Table): return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + size_immediate elif isinstance(value, Schema): return _get_size_of_value(value._schema) + size_immediate elif isinstance(value, Image): - return _get_size_of_value( - value._image_tensor) + value._image_tensor.element_size() * value._image_tensor.nelement() + size_immediate + return ( + _get_size_of_value(value._image_tensor) + + value._image_tensor.element_size() * value._image_tensor.nelement() + + size_immediate + ) elif isinstance(value, Column): - return _get_size_of_value(value._data) + _get_size_of_value(value._name) + _get_size_of_value( - value._type) + size_immediate + return ( + _get_size_of_value(value._data) + + _get_size_of_value(value._name) + + _get_size_of_value(value._type) + + size_immediate + ) elif isinstance(value, Row): return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + size_immediate else: diff --git a/tests/safeds_runner/server/test_memoization.py b/tests/safeds_runner/server/test_memoization.py index e014611..796a013 100644 --- a/tests/safeds_runner/server/test_memoization.py +++ b/tests/safeds_runner/server/test_memoization.py @@ -1,3 +1,4 @@ +import base64 import sys import tempfile import time @@ -5,13 +6,11 @@ from datetime import UTC, datetime from queue import Queue from typing import Any -import base64 import pytest from safeds.data.image.containers import Image from safeds.data.tabular.containers import Table - -from safeds_runner.server import pipeline_manager, memoization_map +from safeds_runner.server import memoization_map, pipeline_manager from safeds_runner.server.memoization_map import MemoizationMap, MemoizationStats from safeds_runner.server.messages import MessageDataProgram, ProgramMainInformation from safeds_runner.server.pipeline_manager import PipelineProcess @@ -38,20 +37,16 @@ def test_memoization_already_present_values( {}, MemoizationMap({}, {}), ) - pipeline_manager.current_pipeline.get_memoization_map().map_values[ - ( - function_name, - memoization_map._convert_list_to_tuple(params), - memoization_map._convert_list_to_tuple(hidden_params), - ) - ] = expected_result - pipeline_manager.current_pipeline.get_memoization_map().map_stats[ - ( - function_name, - memoization_map._convert_list_to_tuple(params), - memoization_map._convert_list_to_tuple(hidden_params), - ) - ] = MemoizationStats(time.perf_counter_ns(), 0, 0, sys.getsizeof(expected_result)) + pipeline_manager.current_pipeline.get_memoization_map().map_values[( + function_name, + memoization_map._convert_list_to_tuple(params), + memoization_map._convert_list_to_tuple(hidden_params), + )] = expected_result + pipeline_manager.current_pipeline.get_memoization_map().map_stats[( + function_name, + memoization_map._convert_list_to_tuple(params), + memoization_map._convert_list_to_tuple(hidden_params), + )] = MemoizationStats(time.perf_counter_ns(), 0, 0, sys.getsizeof(expected_result)) result = pipeline_manager.runner_memoized_function_call(function_name, lambda *_: None, params, hidden_params) assert result == expected_result @@ -116,13 +111,34 @@ def test_file_mtime_not_exists() -> None: (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}).get_column("a"), 342), (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}).get_row(0), 800), (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}).tag_columns("a", ["b"]), 1796), - (Image.from_bytes( + ( + Image.from_bytes( base64.b64decode( "iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAYAAACp8Z5+AAAAD0lEQVQIW2NkQAOMpAsAAADuAAVDMQ2mAAAAAElFTkSuQmCC", ), - ), 208) + ), + 208, + ), + ], + ids=[ + "immediate", + "dict_empty", + "dict_values", + "list_empty", + "list_values", + "tuple_empty", + "tuple_values", + "set_empty", + "set_values", + "frozenset_empty", + "frozenset_values", + "table", + "schema", + "column", + "row", + "tagged_table", + "image", ], - ids=["immediate", "dict_empty", "dict_values", "list_empty", "list_values", "tuple_empty", "tuple_values", "set_empty", "set_values", "frozenset_empty", "frozenset_values", "table", "schema", "column", "row", "tagged_table", "image"], ) def test_memory_usage(value: Any, expected_size: int) -> None: assert memoization_map._get_size_of_value(value) == expected_size From 9b2d08d450fddcf2d3c10eeedf43aa12b6a0be54 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sun, 4 Feb 2024 02:14:20 +0000 Subject: [PATCH 08/22] style: apply automated linter fixes --- src/safeds_runner/server/memoization_map.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 4d230ed..33f015d 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -74,7 +74,11 @@ def __init__( self.map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats] = map_stats def memoized_function_call( - self, function_name: str, function_callable: Callable, parameters: list[Any], hidden_parameters: list[Any], + self, + function_name: str, + function_callable: Callable, + parameters: list[Any], + hidden_parameters: list[Any], ) -> Any: """ Handle a memoized function call. From d86aeb92808e759de5279edafd6a4fa54a35bdd1 Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Sun, 4 Feb 2024 03:30:06 +0100 Subject: [PATCH 09/22] style: disable lines that are covered but coverage is broken --- src/safeds_runner/server/memoization_map.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 33f015d..56795f4 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -42,9 +42,9 @@ def __str__(self) -> str: ------- Summary of stats """ - return ( - f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time:" - f" {self.lookup_time}, memory size: {self.memory_size}" + return ( # pragma: no cover + f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time:" # pragma: no cover + f" {self.lookup_time}, memory size: {self.memory_size}" # pragma: no cover ) From b210b9ab6206d4fcd06cd47997ffb5bbeb7fb2dd Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sun, 4 Feb 2024 02:32:13 +0000 Subject: [PATCH 10/22] style: apply automated linter fixes --- src/safeds_runner/server/memoization_map.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 56795f4..29dd1ec 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -42,8 +42,8 @@ def __str__(self) -> str: ------- Summary of stats """ - return ( # pragma: no cover - f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time:" # pragma: no cover + return ( # pragma: no cover # pragma: no cover + f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time:" f" {self.lookup_time}, memory size: {self.memory_size}" # pragma: no cover ) From 506edbc4b9aed63722909841ebfa55f93ac0be6b Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Mon, 5 Feb 2024 16:38:30 +0100 Subject: [PATCH 11/22] style: remove unneeded coverage hints --- src/safeds_runner/server/memoization_map.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 56795f4..5e98378 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -43,8 +43,8 @@ def __str__(self) -> str: Summary of stats """ return ( # pragma: no cover - f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time:" # pragma: no cover - f" {self.lookup_time}, memory size: {self.memory_size}" # pragma: no cover + f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time:" + f" {self.lookup_time}, memory size: {self.memory_size}" ) From 638963ad58887e76b3d49e6bd245db0a1777d5bd Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Tue, 6 Feb 2024 14:58:19 +0100 Subject: [PATCH 12/22] build: update to safe-ds 0.19 --- poetry.lock | 90 +++++++++++++++++++++++++------------------------- pyproject.toml | 2 +- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/poetry.lock b/poetry.lock index 270625d..a85e59b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -472,13 +472,13 @@ woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] [[package]] name = "fsspec" -version = "2023.12.2" +version = "2024.2.0" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2023.12.2-py3-none-any.whl", hash = "sha256:d800d87f72189a745fa3d6b033b9dc4a34ad069f60ca60b943a63599f5501960"}, - {file = "fsspec-2023.12.2.tar.gz", hash = "sha256:8548d39e8810b59c38014934f6b31e57f40c1b20f911f4cc2b85389c7e9bf0cb"}, + {file = "fsspec-2024.2.0-py3-none-any.whl", hash = "sha256:817f969556fa5916bc682e02ca2045f96ff7f586d45110fcb76022063ad2c7d8"}, + {file = "fsspec-2024.2.0.tar.gz", hash = "sha256:b6ad1a679f760dda52b1168c859d01b7b80648ea6f7f7c7f5a8a91dc3f3ecb84"}, ] [package.extras] @@ -496,7 +496,7 @@ github = ["requests"] gs = ["gcsfs"] gui = ["panel"] hdfs = ["pyarrow (>=1)"] -http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)"] libarchive = ["libarchive-c"] oci = ["ocifs"] s3 = ["s3fs"] @@ -1245,47 +1245,47 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] [[package]] name = "numpy" -version = "1.26.3" +version = "1.26.4" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" files = [ - {file = "numpy-1.26.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:806dd64230dbbfaca8a27faa64e2f414bf1c6622ab78cc4264f7f5f028fee3bf"}, - {file = "numpy-1.26.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02f98011ba4ab17f46f80f7f8f1c291ee7d855fcef0a5a98db80767a468c85cd"}, - {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d45b3ec2faed4baca41c76617fcdcfa4f684ff7a151ce6fc78ad3b6e85af0a6"}, - {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdd2b45bf079d9ad90377048e2747a0c82351989a2165821f0c96831b4a2a54b"}, - {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:211ddd1e94817ed2d175b60b6374120244a4dd2287f4ece45d49228b4d529178"}, - {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1240f767f69d7c4c8a29adde2310b871153df9b26b5cb2b54a561ac85146485"}, - {file = "numpy-1.26.3-cp310-cp310-win32.whl", hash = "sha256:21a9484e75ad018974a2fdaa216524d64ed4212e418e0a551a2d83403b0531d3"}, - {file = "numpy-1.26.3-cp310-cp310-win_amd64.whl", hash = "sha256:9e1591f6ae98bcfac2a4bbf9221c0b92ab49762228f38287f6eeb5f3f55905ce"}, - {file = "numpy-1.26.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b831295e5472954104ecb46cd98c08b98b49c69fdb7040483aff799a755a7374"}, - {file = "numpy-1.26.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9e87562b91f68dd8b1c39149d0323b42e0082db7ddb8e934ab4c292094d575d6"}, - {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c66d6fec467e8c0f975818c1796d25c53521124b7cfb760114be0abad53a0a2"}, - {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f25e2811a9c932e43943a2615e65fc487a0b6b49218899e62e426e7f0a57eeda"}, - {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:af36e0aa45e25c9f57bf684b1175e59ea05d9a7d3e8e87b7ae1a1da246f2767e"}, - {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:51c7f1b344f302067b02e0f5b5d2daa9ed4a721cf49f070280ac202738ea7f00"}, - {file = "numpy-1.26.3-cp311-cp311-win32.whl", hash = "sha256:7ca4f24341df071877849eb2034948459ce3a07915c2734f1abb4018d9c49d7b"}, - {file = "numpy-1.26.3-cp311-cp311-win_amd64.whl", hash = "sha256:39763aee6dfdd4878032361b30b2b12593fb445ddb66bbac802e2113eb8a6ac4"}, - {file = "numpy-1.26.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a7081fd19a6d573e1a05e600c82a1c421011db7935ed0d5c483e9dd96b99cf13"}, - {file = "numpy-1.26.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12c70ac274b32bc00c7f61b515126c9205323703abb99cd41836e8125ea0043e"}, - {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f784e13e598e9594750b2ef6729bcd5a47f6cfe4a12cca13def35e06d8163e3"}, - {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f24750ef94d56ce6e33e4019a8a4d68cfdb1ef661a52cdaee628a56d2437419"}, - {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:77810ef29e0fb1d289d225cabb9ee6cf4d11978a00bb99f7f8ec2132a84e0166"}, - {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8ed07a90f5450d99dad60d3799f9c03c6566709bd53b497eb9ccad9a55867f36"}, - {file = "numpy-1.26.3-cp312-cp312-win32.whl", hash = "sha256:f73497e8c38295aaa4741bdfa4fda1a5aedda5473074369eca10626835445511"}, - {file = "numpy-1.26.3-cp312-cp312-win_amd64.whl", hash = "sha256:da4b0c6c699a0ad73c810736303f7fbae483bcb012e38d7eb06a5e3b432c981b"}, - {file = "numpy-1.26.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1666f634cb3c80ccbd77ec97bc17337718f56d6658acf5d3b906ca03e90ce87f"}, - {file = "numpy-1.26.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18c3319a7d39b2c6a9e3bb75aab2304ab79a811ac0168a671a62e6346c29b03f"}, - {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b7e807d6888da0db6e7e75838444d62495e2b588b99e90dd80c3459594e857b"}, - {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4d362e17bcb0011738c2d83e0a65ea8ce627057b2fdda37678f4374a382a137"}, - {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b8c275f0ae90069496068c714387b4a0eba5d531aace269559ff2b43655edd58"}, - {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cc0743f0302b94f397a4a65a660d4cd24267439eb16493fb3caad2e4389bccbb"}, - {file = "numpy-1.26.3-cp39-cp39-win32.whl", hash = "sha256:9bc6d1a7f8cedd519c4b7b1156d98e051b726bf160715b769106661d567b3f03"}, - {file = "numpy-1.26.3-cp39-cp39-win_amd64.whl", hash = "sha256:867e3644e208c8922a3be26fc6bbf112a035f50f0a86497f98f228c50c607bb2"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3c67423b3703f8fbd90f5adaa37f85b5794d3366948efe9a5190a5f3a83fc34e"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46f47ee566d98849323f01b349d58f2557f02167ee301e5e28809a8c0e27a2d0"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a8474703bffc65ca15853d5fd4d06b18138ae90c17c8d12169968e998e448bb5"}, - {file = "numpy-1.26.3.tar.gz", hash = "sha256:697df43e2b6310ecc9d95f05d5ef20eacc09c7c4ecc9da3f235d39e71b7da1e4"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, + {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, + {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, + {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, + {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, + {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, + {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, + {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, + {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, + {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] [[package]] @@ -2208,13 +2208,13 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "safe-ds" -version = "0.18.0" +version = "0.19.0" description = "A user-friendly library for Data Science in Python." optional = false python-versions = ">=3.11,<3.13" files = [ - {file = "safe_ds-0.18.0-py3-none-any.whl", hash = "sha256:38a34817287dbc2c0d3818adc0ca23a9fdd304f57c41126d925522dad6f89c55"}, - {file = "safe_ds-0.18.0.tar.gz", hash = "sha256:506f00d1c77a131ed9dd905450be12a1b6fffc3ecb4a0828a295809b16a2e7c0"}, + {file = "safe_ds-0.19.0-py3-none-any.whl", hash = "sha256:a4d210b93f0d1d9e22e23ae90bdec11f102f81dd92acce7d6a817f7f653d1092"}, + {file = "safe_ds-0.19.0.tar.gz", hash = "sha256:f6f4917bdfb5b801f0c428e557241d96f132fd2ca387d66e57f9f2f54ab7ed26"}, ] [package.dependencies] @@ -2754,4 +2754,4 @@ h11 = ">=0.9.0,<1" [metadata] lock-version = "2.0" python-versions = "^3.11,<3.13" -content-hash = "c370f24e30bdf74feb5a8c034ee52a9c86858d472826504c5db0282597862190" +content-hash = "febafee8ad3da24e2a3bcd39014b9f8283fcdfbe0108588430338aca62f02e69" diff --git a/pyproject.toml b/pyproject.toml index 3c94a90..4553a37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ safe-ds-runner = "safeds_runner.main:main" [tool.poetry.dependencies] python = "^3.11,<3.13" -safe-ds = ">=0.18,<0.19" +safe-ds = ">=0.19,<0.20" hypercorn = "^0.16.0" quart = "^0.19.4" From f4522826ab544cf6c2982c23a3fbee3fd30670fd Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Tue, 6 Feb 2024 15:03:15 +0100 Subject: [PATCH 13/22] feat: update memoization map memory usage calculation test: update memory usage test to relative comparison, as the size is not constant across python versions --- src/safeds_runner/server/memoization_map.py | 36 +-------------- .../safeds_runner/server/test_memoization.py | 46 +++++-------------- 2 files changed, 13 insertions(+), 69 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 5e98378..6f38110 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -7,10 +7,6 @@ from dataclasses import dataclass from typing import Any -from safeds.data.image.containers import Image -from safeds.data.tabular.containers import Column, Row, Table, TaggedTable # , TimeSeries -from safeds.data.tabular.typing import Schema - @dataclass(frozen=True) class MemoizationStats: @@ -171,38 +167,8 @@ def _get_size_of_value(value: Any) -> int: """ size_immediate = sys.getsizeof(value) if isinstance(value, dict): - return sum(map(_get_size_of_value, value.items())) + size_immediate + return sum(map(_get_size_of_value, value.keys())) + sum(map(_get_size_of_value, value.values())) + size_immediate elif isinstance(value, frozenset | list | set | tuple): return sum(map(_get_size_of_value, value)) + size_immediate - # elif isinstance(value, TimeSeries): - # return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value( - # value._time) + _get_size_of_value(value._features) + _get_size_of_value(value._target) + size_immediate - elif isinstance(value, TaggedTable): - return ( - _get_size_of_value(value._data) - + _get_size_of_value(value._schema) - + _get_size_of_value(value._features) - + _get_size_of_value(value._target) - + size_immediate - ) - elif isinstance(value, Table): - return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + size_immediate - elif isinstance(value, Schema): - return _get_size_of_value(value._schema) + size_immediate - elif isinstance(value, Image): - return ( - _get_size_of_value(value._image_tensor) - + value._image_tensor.element_size() * value._image_tensor.nelement() - + size_immediate - ) - elif isinstance(value, Column): - return ( - _get_size_of_value(value._data) - + _get_size_of_value(value._name) - + _get_size_of_value(value._type) - + size_immediate - ) - elif isinstance(value, Row): - return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + size_immediate else: return size_immediate diff --git a/tests/safeds_runner/server/test_memoization.py b/tests/safeds_runner/server/test_memoization.py index 796a013..fda9fab 100644 --- a/tests/safeds_runner/server/test_memoization.py +++ b/tests/safeds_runner/server/test_memoization.py @@ -1,4 +1,3 @@ -import base64 import sys import tempfile import time @@ -8,8 +7,6 @@ from typing import Any import pytest -from safeds.data.image.containers import Image -from safeds.data.tabular.containers import Table from safeds_runner.server import memoization_map, pipeline_manager from safeds_runner.server.memoization_map import MemoizationMap, MemoizationStats from safeds_runner.server.messages import MessageDataProgram, ProgramMainInformation @@ -95,30 +92,17 @@ def test_file_mtime_not_exists() -> None: @pytest.mark.parametrize( argnames="value,expected_size", argvalues=[ - (1, 28), - ({}, 64), - ({"a": "b"}, 340), - ([], 56), - ([1, 2, 3], 172), - ((), 40), - ((1, 2, 3), 148), - (set(), 216), - ({1, 2, 3}, 300), - (frozenset(), 216), - (frozenset({1, 2, 3}), 300), - (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}), 816), - (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}).schema, 564), - (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}).get_column("a"), 342), - (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}).get_row(0), 800), - (Table.from_dict({"a": [1, 2], "b": [3.2, 4.0]}).tag_columns("a", ["b"]), 1796), - ( - Image.from_bytes( - base64.b64decode( - "iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAYAAACp8Z5+AAAAD0lEQVQIW2NkQAOMpAsAAADuAAVDMQ2mAAAAAElFTkSuQmCC", - ), - ), - 208, - ), + (1, 0), + ({}, 0), + ({"a": "b"}, sys.getsizeof({})), + ([], 0), + ([1, 2, 3], sys.getsizeof([])), + ((), 0), + ((1, 2, 3), sys.getsizeof(())), + (set(), 0), + ({1, 2, 3}, sys.getsizeof(set())), + (frozenset(), 0), + (frozenset({1, 2, 3}), sys.getsizeof(frozenset())), ], ids=[ "immediate", @@ -132,13 +116,7 @@ def test_file_mtime_not_exists() -> None: "set_values", "frozenset_empty", "frozenset_values", - "table", - "schema", - "column", - "row", - "tagged_table", - "image", ], ) def test_memory_usage(value: Any, expected_size: int) -> None: - assert memoization_map._get_size_of_value(value) == expected_size + assert memoization_map._get_size_of_value(value) > expected_size From a1adeb162d59416c85ca04a88fc41c4e98c273e4 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 6 Feb 2024 14:05:20 +0000 Subject: [PATCH 14/22] style: apply automated linter fixes --- src/safeds_runner/server/memoization_map.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 6f38110..407c15f 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -167,7 +167,9 @@ def _get_size_of_value(value: Any) -> int: """ size_immediate = sys.getsizeof(value) if isinstance(value, dict): - return sum(map(_get_size_of_value, value.keys())) + sum(map(_get_size_of_value, value.values())) + size_immediate + return ( + sum(map(_get_size_of_value, value.keys())) + sum(map(_get_size_of_value, value.values())) + size_immediate + ) elif isinstance(value, frozenset | list | set | tuple): return sum(map(_get_size_of_value, value)) + size_immediate else: From 43dcb93bfc364a30510db4be46e3a1141d29ebde Mon Sep 17 00:00:00 2001 From: WinPlay02 Date: Tue, 6 Feb 2024 16:55:24 +0100 Subject: [PATCH 15/22] refactor: split memoization function into multiple parts feat: track multiple stats per function instead of replacing them test: import directly, if possible --- src/safeds_runner/server/memoization_map.py | 159 ++++++++++++++---- .../safeds_runner/server/test_memoization.py | 30 ++-- 2 files changed, 144 insertions(+), 45 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 6f38110..f971a62 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -1,11 +1,14 @@ """Module that contains the memoization logic and stats.""" +import dataclasses import logging import sys import time from collections.abc import Callable from dataclasses import dataclass -from typing import Any +from typing import Any, TypeAlias + +MemoizationKey: TypeAlias = tuple[str, tuple[Any], tuple[Any]] @dataclass(frozen=True) @@ -17,18 +20,18 @@ class MemoizationStats: ---------- last_access Absolute timestamp since the unix epoch of the last access to the memoized value in nanoseconds - computation_time - Duration the computation of the value took in nanoseconds lookup_time Duration the lookup of the value took in nanoseconds (key comparison + IPC) + computation_time + Duration the computation of the value took in nanoseconds memory_size Amount of memory the memoized value takes up in bytes """ - last_access: int - computation_time: int - lookup_time: int - memory_size: int + last_access: list[int] = dataclasses.field(default_factory=list) + lookup_time: list[int] = dataclasses.field(default_factory=list) + computation_time: list[int] = dataclasses.field(default_factory=list) + memory_size: list[int] = dataclasses.field(default_factory=list) def __str__(self) -> str: """ @@ -44,6 +47,26 @@ def __str__(self) -> str: ) +def _create_memoization_key(function_name: str, parameters: list[Any], hidden_parameters: list[Any]) -> MemoizationKey: + """ + Convert values provided to a memoized function call to a memoization key. + + Parameters + ---------- + function_name + Fully qualified function name + parameters + List of parameters passed to the function + hidden_parameters + List of parameters not passed to the function + + Returns + ------- + A memoization key, which contains the lists converted to tuples + """ + return function_name, _convert_list_to_tuple(parameters), _convert_list_to_tuple(hidden_parameters) + + class MemoizationMap: """ The memoization map handles memoized function calls. @@ -53,8 +76,8 @@ class MemoizationMap: def __init__( self, - map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any], - map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats], + map_values: dict[MemoizationKey, Any], + map_stats: dict[str, MemoizationStats], ): """ Create a new memoization map using a value store dictionary and a stats dictionary. @@ -66,8 +89,8 @@ def __init__( map_stats Stats dictionary """ - self.map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any] = map_values - self.map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats] = map_stats + self._map_values: dict[MemoizationKey, Any] = map_values + self._map_stats: dict[str, MemoizationStats] = map_stats def memoized_function_call( self, @@ -98,42 +121,119 @@ def memoized_function_call( ------- The result of the specified function, if any exists """ - key = (function_name, _convert_list_to_tuple(parameters), _convert_list_to_tuple(hidden_parameters)) time_compare_start = time.perf_counter_ns() + key = _create_memoization_key(function_name, parameters, hidden_parameters) + potential_value = self._lookup_value(key, time_compare_start) + if potential_value is not None: + return potential_value + return self._memoize_new_value(key, function_callable, time_compare_start) + + def _lookup_value(self, key: MemoizationKey, time_compare_start: int) -> Any | None: + """ + Lookup a potentially existing value from the memoization cache. + + Parameters + ---------- + key + Memoization Key + time_compare_start + Point in time where the comparison time started + + Returns + ------- + The value corresponding to the provided memoization key, if any exists. + """ try: - potential_value = self.map_values[key] + potential_value = self._map_values[key] except KeyError: - pass + return None else: time_compare_end = time.perf_counter_ns() # Use time_ns for absolute time points, as perf_counter_ns does not guarantee any fixed reference-point time_last_access = time.time_ns() time_compare = time_compare_end - time_compare_start - old_memoization_stats = self.map_stats[key] - memoization_stats = MemoizationStats( - time_last_access, - old_memoization_stats.computation_time, - time_compare, - old_memoization_stats.memory_size, - ) - self.map_stats[key] = memoization_stats - logging.info("Updated memoization stats for %s: %s", function_name, memoization_stats) + self._update_stats_on_hit(key[0], time_last_access, time_compare) + logging.info("Updated memoization stats for %s: (last_access=%s, time_compare=%s)", key[0], + time_last_access, time_compare) return potential_value + + def _memoize_new_value(self, key: MemoizationKey, function_callable: Callable, time_compare_start: int) -> Any: + """ + Memoize a new function call and return computed the result. + + Parameters + ---------- + key + Memoization Key + function_callable + Function that will be called + time_compare_start + Point in time where the comparison time started + + Returns + ------- + The newly computed value corresponding to the provided memoization key + """ time_compare_end = time.perf_counter_ns() time_compare = time_compare_end - time_compare_start time_compute_start = time.perf_counter_ns() - result = function_callable(*parameters) + result = function_callable(*key[1]) time_compute_end = time.perf_counter_ns() # Use time_ns for absolute time points, as perf_counter_ns does not guarantee any fixed reference-point time_last_access = time.time_ns() time_compute = time_compute_end - time_compute_start value_memory = _get_size_of_value(result) - self.map_values[key] = result - memoization_stats = MemoizationStats(time_last_access, time_compute, time_compare, value_memory) - logging.info("New memoization stats for %s: %s", function_name, memoization_stats) - self.map_stats[key] = memoization_stats + self._map_values[key] = result + self._update_stats_on_miss(key[0], time_last_access, time_compare, time_compute, value_memory) + logging.info("New memoization stats for %s: (last_access=%s, time_compare=%s, time_compute=%s, memory=%s)", + key[0], time_last_access, time_compare, time_compute, value_memory) return result + def _update_stats_on_hit(self, function_name: str, last_access: int, time_compare: int) -> None: + """ + Update the memoization stats on a cache hit. + + Parameters + ---------- + function_name + Fully qualified function name + last_access + Timestamp where this value was last accessed + time_compare + Duration the comparison took + """ + old_memoization_stats = self._map_stats[function_name] + old_memoization_stats.last_access.append(last_access) + old_memoization_stats.lookup_time.append(time_compare) + self._map_stats[function_name] = old_memoization_stats + + def _update_stats_on_miss(self, function_name: str, last_access: int, time_compare: int, time_computation: int, + memory_size: int) -> None: + """ + Update the memoization stats on a cache miss. + + Parameters + ---------- + function_name + Fully qualified function name + last_access + Timestamp where this value was last accessed + time_compare + Duration the comparison took + time_computation + Duration the computation of the new value took + memory_size + Memory the newly computed value takes up + """ + old_memoization_stats = self._map_stats.get(function_name) + if old_memoization_stats is None: + old_memoization_stats = MemoizationStats() + old_memoization_stats.last_access.append(last_access) + old_memoization_stats.lookup_time.append(time_compare) + old_memoization_stats.computation_time.append(time_computation) + old_memoization_stats.memory_size.append(memory_size) + self._map_stats[function_name] = old_memoization_stats + def _convert_list_to_tuple(values: list) -> tuple: """ @@ -167,7 +267,8 @@ def _get_size_of_value(value: Any) -> int: """ size_immediate = sys.getsizeof(value) if isinstance(value, dict): - return sum(map(_get_size_of_value, value.keys())) + sum(map(_get_size_of_value, value.values())) + size_immediate + return sum(map(_get_size_of_value, value.keys())) + sum( + map(_get_size_of_value, value.values())) + size_immediate elif isinstance(value, frozenset | list | set | tuple): return sum(map(_get_size_of_value, value)) + size_immediate else: diff --git a/tests/safeds_runner/server/test_memoization.py b/tests/safeds_runner/server/test_memoization.py index fda9fab..9a084ee 100644 --- a/tests/safeds_runner/server/test_memoization.py +++ b/tests/safeds_runner/server/test_memoization.py @@ -7,10 +7,12 @@ from typing import Any import pytest -from safeds_runner.server import memoization_map, pipeline_manager -from safeds_runner.server.memoization_map import MemoizationMap, MemoizationStats + +from safeds_runner.server import pipeline_manager +from safeds_runner.server.memoization_map import MemoizationMap, MemoizationStats, _get_size_of_value, \ + _convert_list_to_tuple from safeds_runner.server.messages import MessageDataProgram, ProgramMainInformation -from safeds_runner.server.pipeline_manager import PipelineProcess +from safeds_runner.server.pipeline_manager import PipelineProcess, runner_filemtime, runner_memoized_function_call @pytest.mark.parametrize( @@ -34,16 +36,12 @@ def test_memoization_already_present_values( {}, MemoizationMap({}, {}), ) - pipeline_manager.current_pipeline.get_memoization_map().map_values[( + pipeline_manager.current_pipeline.get_memoization_map()._map_values[( function_name, - memoization_map._convert_list_to_tuple(params), - memoization_map._convert_list_to_tuple(hidden_params), + _convert_list_to_tuple(params), + _convert_list_to_tuple(hidden_params), )] = expected_result - pipeline_manager.current_pipeline.get_memoization_map().map_stats[( - function_name, - memoization_map._convert_list_to_tuple(params), - memoization_map._convert_list_to_tuple(hidden_params), - )] = MemoizationStats(time.perf_counter_ns(), 0, 0, sys.getsizeof(expected_result)) + pipeline_manager.current_pipeline.get_memoization_map()._map_stats[function_name] = MemoizationStats([time.perf_counter_ns()], [], [], [sys.getsizeof(expected_result)]) result = pipeline_manager.runner_memoized_function_call(function_name, lambda *_: None, params, hidden_params) assert result == expected_result @@ -71,21 +69,21 @@ def test_memoization_not_present_values( MemoizationMap({}, {}), ) # Save value in map - result = pipeline_manager.runner_memoized_function_call(function_name, function, params, hidden_params) + result = runner_memoized_function_call(function_name, function, params, hidden_params) assert result == expected_result # Test if value is actually saved by calling another function that does not return the expected result - result2 = pipeline_manager.runner_memoized_function_call(function_name, lambda *_: None, params, hidden_params) + result2 = runner_memoized_function_call(function_name, lambda *_: None, params, hidden_params) assert result2 == expected_result def test_file_mtime_exists() -> None: with tempfile.NamedTemporaryFile() as file: - file_mtime = pipeline_manager.runner_filemtime(file.name) + file_mtime = runner_filemtime(file.name) assert file_mtime is not None def test_file_mtime_not_exists() -> None: - file_mtime = pipeline_manager.runner_filemtime(f"file_not_exists.{datetime.now(tz=UTC).timestamp()}") + file_mtime = runner_filemtime(f"file_not_exists.{datetime.now(tz=UTC).timestamp()}") assert file_mtime is None @@ -119,4 +117,4 @@ def test_file_mtime_not_exists() -> None: ], ) def test_memory_usage(value: Any, expected_size: int) -> None: - assert memoization_map._get_size_of_value(value) > expected_size + assert _get_size_of_value(value) > expected_size From 9ec25db93e1324b6733b2ace456dba9de281b17c Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 6 Feb 2024 15:59:38 +0000 Subject: [PATCH 16/22] style: apply automated linter fixes --- src/safeds_runner/server/memoization_map.py | 23 ++++++++++++++----- .../safeds_runner/server/test_memoization.py | 13 +++++++---- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 6ed7574..919b37c 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -153,8 +153,12 @@ def _lookup_value(self, key: MemoizationKey, time_compare_start: int) -> Any | N time_last_access = time.time_ns() time_compare = time_compare_end - time_compare_start self._update_stats_on_hit(key[0], time_last_access, time_compare) - logging.info("Updated memoization stats for %s: (last_access=%s, time_compare=%s)", key[0], - time_last_access, time_compare) + logging.info( + "Updated memoization stats for %s: (last_access=%s, time_compare=%s)", + key[0], + time_last_access, + time_compare, + ) return potential_value def _memoize_new_value(self, key: MemoizationKey, function_callable: Callable, time_compare_start: int) -> Any: @@ -185,8 +189,14 @@ def _memoize_new_value(self, key: MemoizationKey, function_callable: Callable, t value_memory = _get_size_of_value(result) self._map_values[key] = result self._update_stats_on_miss(key[0], time_last_access, time_compare, time_compute, value_memory) - logging.info("New memoization stats for %s: (last_access=%s, time_compare=%s, time_compute=%s, memory=%s)", - key[0], time_last_access, time_compare, time_compute, value_memory) + logging.info( + "New memoization stats for %s: (last_access=%s, time_compare=%s, time_compute=%s, memory=%s)", + key[0], + time_last_access, + time_compare, + time_compute, + value_memory, + ) return result def _update_stats_on_hit(self, function_name: str, last_access: int, time_compare: int) -> None: @@ -207,8 +217,9 @@ def _update_stats_on_hit(self, function_name: str, last_access: int, time_compar old_memoization_stats.lookup_time.append(time_compare) self._map_stats[function_name] = old_memoization_stats - def _update_stats_on_miss(self, function_name: str, last_access: int, time_compare: int, time_computation: int, - memory_size: int) -> None: + def _update_stats_on_miss( + self, function_name: str, last_access: int, time_compare: int, time_computation: int, memory_size: int, + ) -> None: """ Update the memoization stats on a cache miss. diff --git a/tests/safeds_runner/server/test_memoization.py b/tests/safeds_runner/server/test_memoization.py index 9a084ee..b5bb4a9 100644 --- a/tests/safeds_runner/server/test_memoization.py +++ b/tests/safeds_runner/server/test_memoization.py @@ -7,10 +7,13 @@ from typing import Any import pytest - from safeds_runner.server import pipeline_manager -from safeds_runner.server.memoization_map import MemoizationMap, MemoizationStats, _get_size_of_value, \ - _convert_list_to_tuple +from safeds_runner.server.memoization_map import ( + MemoizationMap, + MemoizationStats, + _convert_list_to_tuple, + _get_size_of_value, +) from safeds_runner.server.messages import MessageDataProgram, ProgramMainInformation from safeds_runner.server.pipeline_manager import PipelineProcess, runner_filemtime, runner_memoized_function_call @@ -41,7 +44,9 @@ def test_memoization_already_present_values( _convert_list_to_tuple(params), _convert_list_to_tuple(hidden_params), )] = expected_result - pipeline_manager.current_pipeline.get_memoization_map()._map_stats[function_name] = MemoizationStats([time.perf_counter_ns()], [], [], [sys.getsizeof(expected_result)]) + pipeline_manager.current_pipeline.get_memoization_map()._map_stats[function_name] = MemoizationStats( + [time.perf_counter_ns()], [], [], [sys.getsizeof(expected_result)], + ) result = pipeline_manager.runner_memoized_function_call(function_name, lambda *_: None, params, hidden_params) assert result == expected_result From c18eccb6e5543d3ae600b9edcc0f83e6b77f5f10 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 6 Feb 2024 16:01:08 +0000 Subject: [PATCH 17/22] style: apply automated linter fixes --- src/safeds_runner/server/memoization_map.py | 7 ++++++- tests/safeds_runner/server/test_memoization.py | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 919b37c..694f172 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -218,7 +218,12 @@ def _update_stats_on_hit(self, function_name: str, last_access: int, time_compar self._map_stats[function_name] = old_memoization_stats def _update_stats_on_miss( - self, function_name: str, last_access: int, time_compare: int, time_computation: int, memory_size: int, + self, + function_name: str, + last_access: int, + time_compare: int, + time_computation: int, + memory_size: int, ) -> None: """ Update the memoization stats on a cache miss. diff --git a/tests/safeds_runner/server/test_memoization.py b/tests/safeds_runner/server/test_memoization.py index b5bb4a9..27506a9 100644 --- a/tests/safeds_runner/server/test_memoization.py +++ b/tests/safeds_runner/server/test_memoization.py @@ -45,7 +45,10 @@ def test_memoization_already_present_values( _convert_list_to_tuple(hidden_params), )] = expected_result pipeline_manager.current_pipeline.get_memoization_map()._map_stats[function_name] = MemoizationStats( - [time.perf_counter_ns()], [], [], [sys.getsizeof(expected_result)], + [time.perf_counter_ns()], + [], + [], + [sys.getsizeof(expected_result)], ) result = pipeline_manager.runner_memoized_function_call(function_name, lambda *_: None, params, hidden_params) assert result == expected_result From 48cf96284a52d98ba76cc2a6c66d22644ed1f52f Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 6 Feb 2024 17:34:51 +0100 Subject: [PATCH 18/22] refactor: pluralize field names --- src/safeds_runner/server/memoization_map.py | 32 ++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 694f172..774c419 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -18,20 +18,20 @@ class MemoizationStats: Parameters ---------- - last_access + access_timestamps Absolute timestamp since the unix epoch of the last access to the memoized value in nanoseconds - lookup_time + lookup_times Duration the lookup of the value took in nanoseconds (key comparison + IPC) - computation_time + computation_times Duration the computation of the value took in nanoseconds - memory_size + memory_sizes Amount of memory the memoized value takes up in bytes """ - last_access: list[int] = dataclasses.field(default_factory=list) - lookup_time: list[int] = dataclasses.field(default_factory=list) - computation_time: list[int] = dataclasses.field(default_factory=list) - memory_size: list[int] = dataclasses.field(default_factory=list) + access_timestamps: list[int] = dataclasses.field(default_factory=list) + lookup_times: list[int] = dataclasses.field(default_factory=list) + computation_times: list[int] = dataclasses.field(default_factory=list) + memory_sizes: list[int] = dataclasses.field(default_factory=list) def __str__(self) -> str: """ @@ -42,8 +42,8 @@ def __str__(self) -> str: Summary of stats """ return ( # pragma: no cover - f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time:" - f" {self.lookup_time}, memory size: {self.memory_size}" + f"Last access: {self.access_timestamps}, computation time: {self.computation_times}, lookup time:" + f" {self.lookup_times}, memory size: {self.memory_sizes}" ) @@ -213,8 +213,8 @@ def _update_stats_on_hit(self, function_name: str, last_access: int, time_compar Duration the comparison took """ old_memoization_stats = self._map_stats[function_name] - old_memoization_stats.last_access.append(last_access) - old_memoization_stats.lookup_time.append(time_compare) + old_memoization_stats.access_timestamps.append(last_access) + old_memoization_stats.lookup_times.append(time_compare) self._map_stats[function_name] = old_memoization_stats def _update_stats_on_miss( @@ -244,10 +244,10 @@ def _update_stats_on_miss( old_memoization_stats = self._map_stats.get(function_name) if old_memoization_stats is None: old_memoization_stats = MemoizationStats() - old_memoization_stats.last_access.append(last_access) - old_memoization_stats.lookup_time.append(time_compare) - old_memoization_stats.computation_time.append(time_computation) - old_memoization_stats.memory_size.append(memory_size) + old_memoization_stats.access_timestamps.append(last_access) + old_memoization_stats.lookup_times.append(time_compare) + old_memoization_stats.computation_times.append(time_computation) + old_memoization_stats.memory_sizes.append(memory_size) self._map_stats[function_name] = old_memoization_stats From 8044357b2a3d43c138d038fa0cbaf144cffe5f8a Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 6 Feb 2024 17:39:25 +0100 Subject: [PATCH 19/22] refactor: make `_create_memoization_key` a method of `MemoizationMap` --- src/safeds_runner/server/memoization_map.py | 45 ++++++++++++--------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 774c419..8a65350 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -47,26 +47,6 @@ def __str__(self) -> str: ) -def _create_memoization_key(function_name: str, parameters: list[Any], hidden_parameters: list[Any]) -> MemoizationKey: - """ - Convert values provided to a memoized function call to a memoization key. - - Parameters - ---------- - function_name - Fully qualified function name - parameters - List of parameters passed to the function - hidden_parameters - List of parameters not passed to the function - - Returns - ------- - A memoization key, which contains the lists converted to tuples - """ - return function_name, _convert_list_to_tuple(parameters), _convert_list_to_tuple(hidden_parameters) - - class MemoizationMap: """ The memoization map handles memoized function calls. @@ -129,6 +109,31 @@ def memoized_function_call( return self._memoize_new_value(key, function_callable, time_compare_start) def _lookup_value(self, key: MemoizationKey, time_compare_start: int) -> Any | None: + + def _create_memoization_key( + self, + function_name: str, + parameters: list[Any], + hidden_parameters: list[Any], + ) -> MemoizationKey: + """ + Convert values provided to a memoized function call to a memoization key. + + Parameters + ---------- + function_name + Fully qualified function name + parameters + List of parameters passed to the function + hidden_parameters + List of parameters not passed to the function + + Returns + ------- + A memoization key, which contains the lists converted to tuples + """ + return function_name, _convert_list_to_tuple(parameters), _convert_list_to_tuple(hidden_parameters) + """ Lookup a potentially existing value from the memoization cache. From 880cd5643bde11e53a3fafa08e9938132faebfcd Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 6 Feb 2024 17:52:29 +0100 Subject: [PATCH 20/22] refactor: add methods `update_on_hit` and `update_on_miss` to `MemoizationStats` --- src/safeds_runner/server/memoization_map.py | 84 ++++++++++++++------- 1 file changed, 57 insertions(+), 27 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 8a65350..fba1e17 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -33,6 +33,40 @@ class MemoizationStats: computation_times: list[int] = dataclasses.field(default_factory=list) memory_sizes: list[int] = dataclasses.field(default_factory=list) + def update_on_hit(self, access_timestamp: int, lookup_time: int) -> None: + """ + Update the memoization stats on a cache hit. + + Parameters + ---------- + access_timestamp + Timestamp when this value was last accessed + lookup_time + Duration the comparison took in nanoseconds + """ + self.access_timestamps.append(access_timestamp) + self.lookup_times.append(lookup_time) + + def update_on_miss(self, access_timestamp: int, lookup_time: int, computation_time: int, memory_size: int) -> None: + """ + Update the memoization stats on a cache miss. + + Parameters + ---------- + access_timestamp + Timestamp when this value was last accessed + lookup_time + Duration the comparison took in nanoseconds + computation_time + Duration the computation of the new value took in nanoseconds + memory_size + Memory the newly computed value takes up in bytes + """ + self.access_timestamps.append(access_timestamp) + self.lookup_times.append(lookup_time) + self.computation_times.append(computation_time) + self.memory_sizes.append(memory_size) + def __str__(self) -> str: """ Summarizes stats contained in this object. @@ -204,7 +238,7 @@ def _memoize_new_value(self, key: MemoizationKey, function_callable: Callable, t ) return result - def _update_stats_on_hit(self, function_name: str, last_access: int, time_compare: int) -> None: + def _update_stats_on_hit(self, function_name: str, access_timestamp: int, lookup_time: int) -> None: """ Update the memoization stats on a cache hit. @@ -212,22 +246,20 @@ def _update_stats_on_hit(self, function_name: str, last_access: int, time_compar ---------- function_name Fully qualified function name - last_access - Timestamp where this value was last accessed - time_compare - Duration the comparison took + access_timestamp + Timestamp when this value was last accessed + lookup_time + Duration the comparison took in nanoseconds """ - old_memoization_stats = self._map_stats[function_name] - old_memoization_stats.access_timestamps.append(last_access) - old_memoization_stats.lookup_times.append(time_compare) - self._map_stats[function_name] = old_memoization_stats + stats = self._map_stats[function_name] + stats.update_on_hit(access_timestamp, lookup_time) def _update_stats_on_miss( self, function_name: str, - last_access: int, - time_compare: int, - time_computation: int, + access_timestamp: int, + lookup_time: int, + computation_time: int, memory_size: int, ) -> None: """ @@ -237,23 +269,21 @@ def _update_stats_on_miss( ---------- function_name Fully qualified function name - last_access - Timestamp where this value was last accessed - time_compare - Duration the comparison took - time_computation - Duration the computation of the new value took + access_timestamp + Timestamp when this value was last accessed + lookup_time + Duration the comparison took in nanoseconds + computation_time + Duration the computation of the new value took in nanoseconds memory_size - Memory the newly computed value takes up + Memory the newly computed value takes up in bytes """ - old_memoization_stats = self._map_stats.get(function_name) - if old_memoization_stats is None: - old_memoization_stats = MemoizationStats() - old_memoization_stats.access_timestamps.append(last_access) - old_memoization_stats.lookup_times.append(time_compare) - old_memoization_stats.computation_times.append(time_computation) - old_memoization_stats.memory_sizes.append(memory_size) - self._map_stats[function_name] = old_memoization_stats + stats = self._map_stats.get(function_name) + if stats is None: + stats = MemoizationStats() + + stats.update_on_miss(access_timestamp, lookup_time, computation_time, memory_size) + self._map_stats[function_name] = stats def _convert_list_to_tuple(values: list) -> tuple: From 368fc50356d370dd5beb94c2c48a2f92a3020831 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 6 Feb 2024 18:29:45 +0100 Subject: [PATCH 21/22] refactor: restructure logic to track stats --- src/safeds_runner/server/memoization_map.py | 96 ++++++++++----------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index fba1e17..9810bb4 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -135,14 +135,43 @@ def memoized_function_call( ------- The result of the specified function, if any exists """ - time_compare_start = time.perf_counter_ns() - key = _create_memoization_key(function_name, parameters, hidden_parameters) - potential_value = self._lookup_value(key, time_compare_start) - if potential_value is not None: - return potential_value - return self._memoize_new_value(key, function_callable, time_compare_start) + access_timestamp = time.time_ns() + + # Lookup memoized value + lookup_time_start = time.perf_counter_ns() + key = self._create_memoization_key(function_name, parameters, hidden_parameters) + memoized_value = self._lookup_value(key) + lookup_time = time.perf_counter_ns() - lookup_time_start + + # Hit + if memoized_value is not None: + self._update_stats_on_hit(function_name, access_timestamp, lookup_time) + return memoized_value + + # Miss + computation_time_start = time.perf_counter_ns() + computed_value = self._compute_and_memoize_value(key, function_callable, parameters) + computation_time = time.perf_counter_ns() - computation_time_start + memory_size = _get_size_of_value(computed_value) + + self._update_stats_on_miss( + function_name, + access_timestamp, + lookup_time, + computation_time, + memory_size, + ) + + logging.info( + "New memoization stats for %s: (access_timestamp=%s, lookup_time=%s, computation_time=%s, memory_size=%s)", + key[0], + access_timestamp, + lookup_time, + computation_time, + memory_size, + ) - def _lookup_value(self, key: MemoizationKey, time_compare_start: int) -> Any | None: + return computed_value def _create_memoization_key( self, @@ -168,6 +197,7 @@ def _create_memoization_key( """ return function_name, _convert_list_to_tuple(parameters), _convert_list_to_tuple(hidden_parameters) + def _lookup_value(self, key: MemoizationKey) -> Any | None: """ Lookup a potentially existing value from the memoization cache. @@ -175,32 +205,19 @@ def _create_memoization_key( ---------- key Memoization Key - time_compare_start - Point in time where the comparison time started Returns ------- The value corresponding to the provided memoization key, if any exists. """ - try: - potential_value = self._map_values[key] - except KeyError: - return None - else: - time_compare_end = time.perf_counter_ns() - # Use time_ns for absolute time points, as perf_counter_ns does not guarantee any fixed reference-point - time_last_access = time.time_ns() - time_compare = time_compare_end - time_compare_start - self._update_stats_on_hit(key[0], time_last_access, time_compare) - logging.info( - "Updated memoization stats for %s: (last_access=%s, time_compare=%s)", - key[0], - time_last_access, - time_compare, - ) - return potential_value - - def _memoize_new_value(self, key: MemoizationKey, function_callable: Callable, time_compare_start: int) -> Any: + return self._map_values.get(key) + + def _compute_and_memoize_value( + self, + key: MemoizationKey, + function_callable: Callable, + parameters: list[Any], + ) -> Any: """ Memoize a new function call and return computed the result. @@ -210,32 +227,15 @@ def _memoize_new_value(self, key: MemoizationKey, function_callable: Callable, t Memoization Key function_callable Function that will be called - time_compare_start - Point in time where the comparison time started + parameters + List of parameters passed to the function Returns ------- The newly computed value corresponding to the provided memoization key """ - time_compare_end = time.perf_counter_ns() - time_compare = time_compare_end - time_compare_start - time_compute_start = time.perf_counter_ns() - result = function_callable(*key[1]) - time_compute_end = time.perf_counter_ns() - # Use time_ns for absolute time points, as perf_counter_ns does not guarantee any fixed reference-point - time_last_access = time.time_ns() - time_compute = time_compute_end - time_compute_start - value_memory = _get_size_of_value(result) + result = function_callable(*parameters) self._map_values[key] = result - self._update_stats_on_miss(key[0], time_last_access, time_compare, time_compute, value_memory) - logging.info( - "New memoization stats for %s: (last_access=%s, time_compare=%s, time_compute=%s, memory=%s)", - key[0], - time_last_access, - time_compare, - time_compute, - value_memory, - ) return result def _update_stats_on_hit(self, function_name: str, access_timestamp: int, lookup_time: int) -> None: From b501fcef590d6e0b38a766a42495ef56d6976b7c Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 6 Feb 2024 18:46:15 +0100 Subject: [PATCH 22/22] fix: add explicit assignment to `self._map_stats` back --- src/safeds_runner/server/memoization_map.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/safeds_runner/server/memoization_map.py b/src/safeds_runner/server/memoization_map.py index 9810bb4..801c519 100644 --- a/src/safeds_runner/server/memoization_map.py +++ b/src/safeds_runner/server/memoization_map.py @@ -254,6 +254,10 @@ def _update_stats_on_hit(self, function_name: str, access_timestamp: int, lookup stats = self._map_stats[function_name] stats.update_on_hit(access_timestamp, lookup_time) + # This assignment is required for multiprocessing, see + # https://docs.python.org/3.11/library/multiprocessing.html#proxy-objects + self._map_stats[function_name] = stats + def _update_stats_on_miss( self, function_name: str,