Skip to content

Commit

Permalink
Rewrite test_delete_versions_speed to use mocks (#281)
Browse files Browse the repository at this point in the history
  • Loading branch information
peytondmurray authored Oct 19, 2023
1 parent d382673 commit f55838d
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 43 deletions.
9 changes: 7 additions & 2 deletions versioned_hdf5/replay.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations
import gc
from typing import List, Iterable, Union, Dict, Any, Optional
from typing import List, Iterable, Union, Dict, Any, Optional, Set
from h5py import (
VirtualLayout,
h5s,
Expand Down Expand Up @@ -559,7 +559,7 @@ def delete_versions(
continue
prev_version = versions[version_name].attrs['prev_version']
while prev_version in versions_to_delete_set:
prev_version = versions[prev_version].attrs['prev_version']
prev_version = _get_parent(versions, prev_version)
versions[version_name].attrs['prev_version'] = prev_version

# delete the version groups to delete
Expand All @@ -573,6 +573,11 @@ def delete_versions(
# for a discussion about this.
gc.collect()


def _get_parent(versions, version_name):
return versions[version_name].attrs['prev_version']


# Backwards compatibility
delete_version = delete_versions

Expand Down
60 changes: 19 additions & 41 deletions versioned_hdf5/tests/test_replay.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import linecache
import pathlib
import shutil
import subprocess
import sys

import h5py
import numpy as np
import pytest
from unittest import mock

from versioned_hdf5 import VersionedHDF5File
from versioned_hdf5.hashtable import Hashtable
from versioned_hdf5.replay import (_recreate_hashtable,
_recreate_raw_data,
_recreate_virtual_dataset,
delete_version, delete_versions,
modify_metadata)
modify_metadata,
_get_parent)


def setup_vfile(file):
Expand Down Expand Up @@ -884,55 +884,33 @@ def test_delete_string_dataset(filepath):


def test_delete_versions_speed(vfile):
"""Test that delete_versions only needs linear time to find the previous version
for the versions that are being kept.
"""
with vfile.stage_version('r0') as sv:
sv.create_dataset('values', data=np.zeros(100), fillvalue=0,
chunks=(300,), maxshape=(None,), compression='lzf')

for i in range(1, 1000):
for i in range(1, 100):
with vfile.stage_version(f'r{i}') as sv:
sv['values'][:] = np.arange(i, i + 100)

# keep only every 10th version
versions_to_delete = []
versions = sorted([(v, vfile._versions[v].attrs['timestamp']) for v in vfile._versions],
key=lambda t: t[1])
versions = sorted(
[(v, vfile._versions[v].attrs['timestamp']) for v in vfile._versions],
key=lambda t: t[1]
)
for i, v in enumerate(versions):
if i % 10 != 0:
versions_to_delete.append(v[0])

# The line counts for determining the previous version
line_counts = 0

def trace_prev_version_line_calls(frame, event, arg):
nonlocal line_counts
if event == 'line':
if frame.f_code.co_name == 'delete_versions':
line_no = frame.f_lineno
if line_no == 562:
# count executions of this line, check that it's actually the correct line
expected_line = "prev_version = versions[prev_version].attrs['prev_version']"
filename = frame.f_code.co_filename
line = linecache.getline(filename, line_no).strip()
assert line == expected_line
line_counts += 1
return trace_prev_version_line_calls

# Set the trace function to count number of times a line is executed
old_tracer = sys.gettrace()
sys.settrace(trace_prev_version_line_calls)

try:
# delete_versions
with mock.patch(
'versioned_hdf5.replay._get_parent', wraps=_get_parent
) as mock_get_parent:
delete_versions(vfile, versions_to_delete)
finally:
# restore old tracer function (or None)
sys.settrace(old_tracer)

# We have 1000 versions and keep only every tenth. This means that for each version
# we should go back its modulo by 10 steps. That's
# 100 * 0 + 100 * 1 + 100 * 2 + ... + 100 * 9 == 4500
# executions of this line. But sine the line contains multiple substeps
# it's "executed" multiple times. Long story short, empirically
# we end up with 8619 executions on Python 3.10, but the number
# varies between Python versions
assert 8600 <= line_counts <= 8650

# There are 90 versions to delete, and 10 to keep. Each of the 10 we are
# keeping has to go up 9 versions from it's current previous version, for
# a total of 90 calls.
assert mock_get_parent.call_count == 90

0 comments on commit f55838d

Please sign in to comment.