Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: track memoization stats #51

Merged
merged 27 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
18f0c85
refactor: move memoization logic into new class
WinPlay02 Feb 2, 2024
ab0213d
feat: track more stats about memoized function calls
WinPlay02 Feb 2, 2024
6c328fb
Merge branch 'main' into memoization-expansion
WinPlay02 Feb 3, 2024
670e8e0
feat: calculate memory usage better
WinPlay02 Feb 4, 2024
d0e25ff
fix: comment out future functionality not yet preset in master
WinPlay02 Feb 4, 2024
184d16a
refactor: move rest of code not sensitive to keyerror to else part of…
WinPlay02 Feb 4, 2024
02dc525
test: add tests for memoization memory usage calculation
WinPlay02 Feb 4, 2024
61f5c4d
style: apply automated linter fixes
megalinter-bot Feb 4, 2024
9b2d08d
style: apply automated linter fixes
megalinter-bot Feb 4, 2024
d86aeb9
style: disable lines that are covered but coverage is broken
WinPlay02 Feb 4, 2024
b210b9a
style: apply automated linter fixes
megalinter-bot Feb 4, 2024
2651b1a
Merge branch 'main' into memoization-expansion
WinPlay02 Feb 4, 2024
506edbc
style: remove unneeded coverage hints
WinPlay02 Feb 5, 2024
bb53033
Merge branch 'main' of https://github.com/Safe-DS/Runner into memoiza…
WinPlay02 Feb 6, 2024
825de81
Merge branch 'memoization-expansion' of https://github.com/Safe-DS/Ru…
WinPlay02 Feb 6, 2024
638963a
build: update to safe-ds 0.19
WinPlay02 Feb 6, 2024
f452282
feat: update memoization map memory usage calculation
WinPlay02 Feb 6, 2024
a1adeb1
style: apply automated linter fixes
megalinter-bot Feb 6, 2024
43dcb93
refactor: split memoization function into multiple parts
WinPlay02 Feb 6, 2024
7b4ebdd
Merge branch 'memoization-expansion' of https://github.com/Safe-DS/Ru…
WinPlay02 Feb 6, 2024
9ec25db
style: apply automated linter fixes
megalinter-bot Feb 6, 2024
c18eccb
style: apply automated linter fixes
megalinter-bot Feb 6, 2024
48cf962
refactor: pluralize field names
lars-reimann Feb 6, 2024
8044357
refactor: make `_create_memoization_key` a method of `MemoizationMap`
lars-reimann Feb 6, 2024
880cd56
refactor: add methods `update_on_hit` and `update_on_miss` to `Memoiz…
lars-reimann Feb 6, 2024
368fc50
refactor: restructure logic to track stats
lars-reimann Feb 6, 2024
b501fce
fix: add explicit assignment to `self._map_stats` back
lars-reimann Feb 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 177 additions & 0 deletions src/safeds_runner/server/memoization_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
"""Module that contains the memoization logic and stats."""
import logging
import sys
import time
from dataclasses import dataclass
from typing import Any, Callable

from safeds.data.image.containers import Image
from safeds.data.tabular.containers import Table, Column, Row, TaggedTable # , TimeSeries
from safeds.data.tabular.typing import Schema


@dataclass(frozen=True)
class MemoizationStats:
"""
Statistics calculated for every memoization call.

Parameters
----------
last_access
Absolute timestamp since the unix epoch of the last access to the memoized value in nanoseconds
computation_time
Duration the computation of the value took in nanoseconds
lookup_time
Duration the lookup of the value took in nanoseconds (key comparison + IPC)
memory_size
Amount of memory the memoized value takes up in bytes
"""
last_access: int
computation_time: int
lookup_time: int
memory_size: int

def __str__(self) -> str:
"""
Summarizes stats contained in this object.

Returns
-------
Summary of stats
"""
return f"Last access: {self.last_access}, computation time: {self.computation_time}, lookup time: {self.lookup_time}, memory size: {self.memory_size}"


class MemoizationMap:
"""
The memoization map handles memoized function calls.

This contains looking up stored values, computing new values if needed and calculating and updating statistics.
"""

def __init__(self, map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any],
map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats]):
"""
Create a new memoization map using a value store dictionary and a stats dictionary.

Parameters
----------
map_values
Value store dictionary
map_stats
Stats dictionary
"""
self.map_values: dict[tuple[str, tuple[Any], tuple[Any]], Any] = map_values
self.map_stats: dict[tuple[str, tuple[Any], tuple[Any]], MemoizationStats] = map_stats
WinPlay02 marked this conversation as resolved.
Show resolved Hide resolved
WinPlay02 marked this conversation as resolved.
Show resolved Hide resolved

def memoized_function_call(self, function_name: str, function_callable: Callable, parameters: list[Any],
hidden_parameters: list[Any]) -> Any:
"""
Handle a memoized function call.

Looks up the stored value, determined by function name, parameters and hidden parameters and returns it if found.
If no value is found, computes the value using the provided callable and stores it in the map.
Every call to this function will update the memoization stats.

Parameters
----------
function_name
Fully qualified function name
function_callable
Function that is called and memoized if the result was not found in the memoization map
parameters
List of parameters passed to the function
hidden_parameters
List of hidden parameters for the function. This is used for memoizing some impure functions.

Returns
-------
The result of the specified function, if any exists
"""
key = (function_name, _convert_list_to_tuple(parameters), _convert_list_to_tuple(hidden_parameters))
time_compare_start = time.perf_counter_ns()
WinPlay02 marked this conversation as resolved.
Show resolved Hide resolved
try:
potential_value = self.map_values[key]
time_compare_end = time.perf_counter_ns()
# Use time_ns for absolute time points, as perf_counter_ns does not guarantee any fixed reference-point
time_last_access = time.time_ns()
time_compare = time_compare_end - time_compare_start
old_memoization_stats = self.map_stats[key]
memoization_stats = MemoizationStats(time_last_access, old_memoization_stats.computation_time,
time_compare, old_memoization_stats.memory_size)
self.map_stats[key] = memoization_stats
logging.info(f"Updated memoization stats for {function_name}: {memoization_stats}")
return potential_value
except KeyError:
pass
time_compare_end = time.perf_counter_ns()
time_compare = time_compare_end - time_compare_start
time_compute_start = time.perf_counter_ns()
result = function_callable(*parameters)
time_compute_end = time.perf_counter_ns()
# Use time_ns for absolute time points, as perf_counter_ns does not guarantee any fixed reference-point
time_last_access = time.time_ns()
time_compute = time_compute_end - time_compute_start
value_memory = _get_size_of_value(result)
self.map_values[key] = result
memoization_stats = MemoizationStats(time_last_access, time_compute, time_compare, value_memory)
logging.info(f"New memoization stats for {function_name}: {memoization_stats}")
self.map_stats[key] = memoization_stats
return result


def _convert_list_to_tuple(values: list) -> tuple:
"""
Recursively convert a mutable list of values to an immutable tuple containing the same values, to make the values hashable.

Parameters
----------
values : list
Values that should be converted to a tuple

Returns
-------
tuple
Converted list containing all the elements of the provided list
"""
return tuple(_convert_list_to_tuple(value) if isinstance(value, list) else value for value in values)


def _get_size_of_value(value: Any) -> int:
"""
Recursively calculate the memory usage of a given value.

Parameters
----------
value
Any value of which the memory usage should be calculated.

Returns
-------
Size of the provided value in bytes
"""
size_immediate = sys.getsizeof(value)
if isinstance(value, dict):
return sum(map(_get_size_of_value, value.items())) + size_immediate

Check warning on line 155 in src/safeds_runner/server/memoization_map.py

View check run for this annotation

Codecov / codecov/patch

src/safeds_runner/server/memoization_map.py#L155

Added line #L155 was not covered by tests
elif isinstance(value, list) or isinstance(value, tuple) or isinstance(value, set) or isinstance(value, frozenset):
return sum(map(_get_size_of_value, value)) + size_immediate

Check warning on line 157 in src/safeds_runner/server/memoization_map.py

View check run for this annotation

Codecov / codecov/patch

src/safeds_runner/server/memoization_map.py#L157

Added line #L157 was not covered by tests
elif isinstance(value, Table):
return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + size_immediate

Check warning on line 159 in src/safeds_runner/server/memoization_map.py

View check run for this annotation

Codecov / codecov/patch

src/safeds_runner/server/memoization_map.py#L159

Added line #L159 was not covered by tests
elif isinstance(value, Schema):
return _get_size_of_value(value._schema) + size_immediate

Check warning on line 161 in src/safeds_runner/server/memoization_map.py

View check run for this annotation

Codecov / codecov/patch

src/safeds_runner/server/memoization_map.py#L161

Added line #L161 was not covered by tests
elif isinstance(value, Image):
return _get_size_of_value(

Check warning on line 163 in src/safeds_runner/server/memoization_map.py

View check run for this annotation

Codecov / codecov/patch

src/safeds_runner/server/memoization_map.py#L163

Added line #L163 was not covered by tests
value._image_tensor) + value._image_tensor.element_size() * value._image_tensor.nelement() + size_immediate
elif isinstance(value, Column):
return _get_size_of_value(value._data) + _get_size_of_value(value._name) + _get_size_of_value(

Check warning on line 166 in src/safeds_runner/server/memoization_map.py

View check run for this annotation

Codecov / codecov/patch

src/safeds_runner/server/memoization_map.py#L166

Added line #L166 was not covered by tests
value._type) + size_immediate
elif isinstance(value, Row):
return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + size_immediate

Check warning on line 169 in src/safeds_runner/server/memoization_map.py

View check run for this annotation

Codecov / codecov/patch

src/safeds_runner/server/memoization_map.py#L169

Added line #L169 was not covered by tests
elif isinstance(value, TaggedTable):
return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value(

Check warning on line 171 in src/safeds_runner/server/memoization_map.py

View check run for this annotation

Codecov / codecov/patch

src/safeds_runner/server/memoization_map.py#L171

Added line #L171 was not covered by tests
value._features) + _get_size_of_value(value._target) + size_immediate
# elif isinstance(value, TimeSeries):
# return _get_size_of_value(value._data) + _get_size_of_value(value._schema) + _get_size_of_value(
# value._time) + _get_size_of_value(value._features) + _get_size_of_value(value._target) + size_immediate
WinPlay02 marked this conversation as resolved.
Show resolved Hide resolved
else:
return size_immediate
29 changes: 3 additions & 26 deletions src/safeds_runner/server/pipeline_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import stack_data

from safeds_runner.server.memoization_map import MemoizationMap
from safeds_runner.server.messages import (
Message,
MessageDataProgram,
Expand All @@ -27,8 +28,6 @@
)
from safeds_runner.server.module_manager import InMemoryFinder

MemoizationMap: typing.TypeAlias = dict[tuple[str, tuple[Any], tuple[Any]], Any]


class PipelineManager:
"""
Expand Down Expand Up @@ -59,7 +58,7 @@ def _messages_queue_thread(self) -> threading.Thread:

@cached_property
def _memoization_map(self) -> MemoizationMap:
return self._multiprocessing_manager.dict() # type: ignore[return-value]
return MemoizationMap(self._multiprocessing_manager.dict(), self._multiprocessing_manager.dict()) # type: ignore[arg-type]

def startup(self) -> None:
"""
Expand Down Expand Up @@ -334,29 +333,7 @@ def runner_memoized_function_call(
if current_pipeline is None:
return None # pragma: no cover
memoization_map = current_pipeline.get_memoization_map()
key = (function_name, _convert_list_to_tuple(parameters), _convert_list_to_tuple(hidden_parameters))
if key in memoization_map:
return memoization_map[key]
result = function_callable(*parameters)
memoization_map[key] = result
return result


def _convert_list_to_tuple(values: list) -> tuple:
"""
Recursively convert a mutable list of values to an immutable tuple containing the same values, to make the values hashable.

Parameters
----------
values : list
Values that should be converted to a tuple

Returns
-------
tuple
Converted list containing all the elements of the provided list
"""
return tuple(_convert_list_to_tuple(value) if isinstance(value, list) else value for value in values)
return memoization_map.memoized_function_call(function_name, function_callable, parameters, hidden_parameters)


def runner_filemtime(filename: str) -> int | None:
Expand Down
22 changes: 16 additions & 6 deletions tests/safeds_runner/server/test_memoization.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import sys
import tempfile
import time
import typing
from datetime import UTC, datetime
from queue import Queue
from typing import Any

import pytest
from safeds_runner.server import pipeline_manager
from safeds_runner.server import pipeline_manager, memoization_map
from safeds_runner.server.memoization_map import MemoizationMap, MemoizationStats
from safeds_runner.server.messages import MessageDataProgram, ProgramMainInformation
from safeds_runner.server.pipeline_manager import PipelineProcess

Expand All @@ -29,15 +32,22 @@ def test_memoization_already_present_values(
"",
Queue(),
{},
{},
MemoizationMap({}, {}),
)
pipeline_manager.current_pipeline.get_memoization_map()[
pipeline_manager.current_pipeline.get_memoization_map().map_values[
(
function_name,
pipeline_manager._convert_list_to_tuple(params),
pipeline_manager._convert_list_to_tuple(hidden_params),
memoization_map._convert_list_to_tuple(params),
memoization_map._convert_list_to_tuple(hidden_params),
)
] = expected_result
pipeline_manager.current_pipeline.get_memoization_map().map_stats[
(
function_name,
memoization_map._convert_list_to_tuple(params),
memoization_map._convert_list_to_tuple(hidden_params),
)
] = MemoizationStats(time.perf_counter_ns(), 0, 0, sys.getsizeof(expected_result))
result = pipeline_manager.runner_memoized_function_call(function_name, lambda *_: None, params, hidden_params)
assert result == expected_result

Expand All @@ -62,7 +72,7 @@ def test_memoization_not_present_values(
"",
Queue(),
{},
{},
MemoizationMap({}, {}),
)
# Save value in map
result = pipeline_manager.runner_memoized_function_call(function_name, function, params, hidden_params)
Expand Down