From 18212ac4ca12702db8b76afd11a73959bce12a01 Mon Sep 17 00:00:00 2001 From: hernot Date: Sun, 20 Dec 2020 21:28:41 +0100 Subject: [PATCH] Python tox based compression safety testing Compression keywords safety tests: ================================== In issue #140 it was reported that some loaders crash when 'compression', 'chunks' and related h5py keyword arguments are specified. By running pytest a second time and thereby specifying the custom --enable-compression parameter all tests are rerun with kwargs={'compression':'gzip', 'compression_opts':6} All compression sensitive tests especially all 'test_XX_*.py::*' unit test functions must include the 'compression_kwargs' parameter in their signature to receive the actual kwargs list to be passed to all 'create_fcn' function defined by loader module. In case a test function misses to pass on the 'compression_kwargs' as keyword arguments ('**kwargs') to 'hickle.dump', 'hickle._dump', or any dump_method listed in 'class_register' table of loader module or specified directly in a 'LoaderManager.register_class' an AssertionError exception is thrown indicating the name of the test function, the line in which the affected function is called any function which it calls. Tests which either test compression related issues explicitly or do not call any of the dump_functions may be marked accordingly using the 'pytest.mark.no_compression' marker to explicitly exclude test function from compression testing. Tox virtual env manager support: ================================ Adds support for virtualenv manager tox. Tox simplifies local testing of compatibility for multiple python versions before pushing to github and creating pullrequest. Travis and Appveyor integration still has to be tested and verified. '# Sie sind gerade beim Rebase von Branch 'final_and_cleanup_4.1.0' auf 'ab9a0ee'. --- .appveyor.yml | 20 +- .travis.yml | 8 +- MANIFEST.in | 8 +- conftest.py | 457 ++++++++++++++++++++++++++ hickle/loaders/load_astropy.py | 7 +- hickle/loaders/load_numpy.py | 2 +- hickle/loaders/load_pandas.py | 1 + hickle/tests/test_02_hickle_lookup.py | 57 ++-- hickle/tests/test_03_load_builtins.py | 100 +++--- hickle/tests/test_04_load_numpy.py | 63 ++-- hickle/tests/test_05_load_scipy.py | 18 +- hickle/tests/test_06_load_astropy.py | 110 ++++--- hickle/tests/test_99_hickle_core.py | 152 +++++---- hickle/tests/test_hickle.py | 176 +++++----- hickle/tests/test_legacy_load.py | 1 + requirements_h5py.txt | 4 + requirements_test.txt | 1 - setup.cfg | 1 + tox.ini | 51 +++ 19 files changed, 941 insertions(+), 296 deletions(-) create mode 100644 conftest.py create mode 100644 requirements_h5py.txt create mode 100644 tox.ini diff --git a/.appveyor.yml b/.appveyor.yml index 3cf76822..a5621af1 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -3,34 +3,42 @@ environment: - PYTHON: "C:\\Python35" PYTHON_VERSION: "3.5.x" PYTHON_ARCH: "32" + TOX_APPVEYOR_X64: 0 - PYTHON: "C:\\Python35-x64" PYTHON_VERSION: "3.5.x" PYTHON_ARCH: "64" + TOX_APPVEYOR_X64: 1 - PYTHON: "C:\\Python36" PYTHON_VERSION: "3.6.x" PYTHON_ARCH: "32" + TOX_APPVEYOR_X64: 0 - PYTHON: "C:\\Python36-x64" PYTHON_VERSION: "3.6.x" PYTHON_ARCH: "64" + TOX_APPVEYOR_X64: 1 - PYTHON: "C:\\Python37" PYTHON_VERSION: "3.7.x" PYTHON_ARCH: "32" + TOX_APPVEYOR_X64: 0 - PYTHON: "C:\\Python37-x64" PYTHON_VERSION: "3.7.x" PYTHON_ARCH: "64" + TOX_APPVEYOR_X64: 1 - PYTHON: "C:\\Python38" PYTHON_VERSION: "3.8.x" PYTHON_ARCH: "32" + TOX_APPVEYOR_X64: 0 - PYTHON: "C:\\Python38-x64" PYTHON_VERSION: "3.8.x" PYTHON_ARCH: "64" + TOX_APPVEYOR_X64: 1 install: # Prepend newly installed Python to the PATH of this build (this cannot be @@ -40,15 +48,17 @@ install: # Upgrade pip - "python -m pip install --user --upgrade pip setuptools wheel" + - "python -m pip install tox-appveyor" # Install testing requirements - - "pip install -r requirements_test.txt" + #- "pip install -r requirements_test.txt" build: false test_script: - "check-manifest" - - "python setup.py sdist bdist_wheel" - - "twine check dist/*" - - "pip install ." - - "pytest" + - "python -m tox" + #- "python setup.py sdist bdist_wheel" + #- "twine check dist/*" + #- "pip install ." + #- "pytest" diff --git a/.travis.yml b/.travis.yml index 42323a8a..5b8913c5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,14 +12,12 @@ install: - sudo apt-get update -qq - sudo apt-get install -qq libhdf5-serial-dev - python -m pip install --upgrade pip setuptools wheel - - pip install -r requirements_test.txt + - pip install tox-travis + #- pip install -r requirements_test.txt script: - check-manifest - - python setup.py sdist bdist_wheel - - twine check dist/* - - pip install . - - pytest + - tox # Run code coverage after_success: codecov diff --git a/MANIFEST.in b/MANIFEST.in index 01c0ebf8..0ca2e0c2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -11,4 +11,10 @@ exclude .nojekyll exclude .pylintrc exclude paper* recursive-exclude * __pycache__ -recursive-exclude * *.py[co] +recursive-exclude * *.py[co] *.bk +recursive-exclude * old .old +recursive-exclude * *.tox +exclude hickle/tests/classes +recursive-exclude hickle/tests/classes * +exclude hickle/tests/dev_check +recursive-exclude hickle/tests/dev_check * diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..1e536f9e --- /dev/null +++ b/conftest.py @@ -0,0 +1,457 @@ +import pytest +import sys +import types +import functools as ft +import threading +import os +import os.path +import importlib +import collections +import ctypes + +# list of function names which shall not be +# traced when compression keyword hardening +# test run is executed +non_core_loader_functions = { + 'type_legacy_mro', + 'load_pickled_data', + 'load_compact_dataset', + 'register_compact_expand', + '_moc_numpy_array_object_lambda', + 'fix_lambda_obj_type', + 'LoaderManager.load_loader', + 'CompactContainer.convert', + 'NoContainer.convert', + '_DictItemContainer.convert', + 'ExpandReferenceContainer.convert', + 'CompactContainer.filter', + 'ExpandReferenceContainer.filter', + 'ReferenceManager.resolve_type', + 'CompactContainer._append' +} + +def pytest_addoption(parser): + """ + adds enable_compression keywort to pytest commandline options + for enabling h5py compression keyword hardening testing of + dump functions of hikcle.loaders and hickle core loaders + """ + parser.addoption( + "--enable-compression", + action='store', + nargs='?', + const=6, + type=int, + choices=range(0,10), + help="run all tests with bzip compression enabled. Optionally specify compression level 0-9 (default 6)", + dest="enable_compression" + ) + +def _get_trace_function(trace_function): + """ + try to get hold of FunctionType object of passed in Method, Function or callable + """ + while not isinstance(trace_function,(types.FunctionType,types.LambdaType,types.BuiltinFunctionType)): + if isinstance(trace_function,(types.MethodType,types.BuiltinMethodType)): + trace_function = getattr(trace_function,'__func__') + continue + if isinstance(trace_function,ft.partial): + trace_function = trace_function.func + continue + return ( + getattr(trace_function,'__call__',trace_function) + if callable(trace_function) and not isinstance(trace_function,type) else + None + ) + return trace_function + +# keyword arguments to yield from compression_kwargs fixture below +# may in future become a list of dictionaries to be yieled for +# running same test with different sets of compression keywors +# (implizit parametrization of tests) +_compression_args = dict( + compression='gzip', + compression_opts=6 +) +_test_compression = None + +def pytest_configure(config): + """ + make no_compression mark available from pytest.mark. + if not yet activated enable profiling of dump methods and functions + and set compression_level selected on commandline if explicitly + specified. + """ + global _test_compression + + config.addinivalue_line( + "markers","no_compression: do not enforce h5py comression hardening testing" + ) + if _test_compression is not None: + return + compression_level = config.getoption("enable_compression",default=-1) + if compression_level is None or compression_level < 0: + return + _compression_args['compression_opts'] = compression_level + _test_compression = True + +# local handle of no_compression mark +no_compression = pytest.mark.no_compression + +@pytest.fixture#(scope='session') +def compression_kwargs(request): + """ + fixture providing the compressoin related keyword arguments + to be passed to any test not marked with no_compression mark + and expecting compression_kwargs as one of its parameters + """ + global _test_compression + yield ( _compression_args if _test_compression else {} ) + +# list of distinct copyies of LoaderManager.register_class function +# keys are either "::LoaderManager.register_class" or +# copy of code object executed when LoaderManager.register_class method +# is called +_trace_register_class = {} + +# list of dump_functions to be traced with respect to beeing +# passed the compression related keywords provided throug compression_kwargs +# fixture above. In case a call to any of theses does not include at least these +# keywords an AssertionError Exception is raised. +_trace_functions = collections.OrderedDict() + +# profiling function to be called after execution of _trace_loader_funcs +# below +_trace_profile_call = None + +# index of dump_function argument in argument list of LoaderManager.register_class +# method. +_trace_function_argument_default = -1 +def _chain_profile_call(frame,event,arg): + global _trace_profile_call + if _trace_profile_call: + next_call = _trace_profile_call(frame,event,arg) + if next_call: + _trace_profile_call = next_call + +# argument names which correspond to argument beeing passed dump_function +# object +_trace_function_arg_names = {'dump_function'} + + +# the pytest session tracing of proper handling of compression related +# keywords is activated for +_traced_session = None + +def pytest_sessionstart(session): + """ + pytest hook called at start of session. + - collects all functions exported by hickle.lookup module (for now) and + records inserts "::" strings into + _trace_functions list for any not listed in above non_core_loader_functions + - collects all dump_functions listed in class_register tables of all + hickle.loaders.load_*.py modules. + """ + global _test_compression,_traced_session,_trace_register_class,_trace_functions,_trace_profile_call + if _test_compression is None: + pytest_configure(session.config) + if not _test_compression: + return None + # extract all loader function from hickle.lookup + lookup_module = sys.modules.get('hickle.lookup',None) + if not isinstance(lookup_module,types.ModuleType): + lookup_module_spec = importlib.util.find_spec("hickle.lookup") + + lookup_module = importlib.util.module_from_spec(lookup_module_spec) + lookup_module_spec.loader.exec_module(lookup_module) + register_class = lookup_module.LoaderManager.register_class + register_class_code = register_class.__func__.__code__ + trace_function_argument = register_class_code.co_argcount + register_class_code.co_kwonlyargcount + for argid,trace_function in ( (count,varname) for count,varname in enumerate(register_class_code.co_varnames[:(register_class_code.co_argcount + register_class_code.co_kwonlyargcount)]) if varname in _trace_function_arg_names ): + trace_function_argument = argid + break + if trace_function_argument < 0: + return None + _trace_function_argument_default = trace_function_argument + qualname = getattr(register_class,'__qualname__',register_class.__name__) + code_name = qualname if qualname.rsplit('.',1) == register_class_code.co_name else register_class_code.co_name + _trace_register_class.update({"{}::{}".format(register_class_code.co_filename,code_name):trace_function_argument}) + for loader_func_name,loader_func in ( + (func_name,func) + for name, item in lookup_module.__dict__.items() + if isinstance(item,(types.FunctionType,type)) + for func_name,func in ( + ((name,item),) + if isinstance(item,types.FunctionType) else + ( + ( meth_name,meth) + for meth_name,meth in item.__dict__.items() + if isinstance(meth,types.FunctionType) + ) + ) + if func_name[:2] != '__' and func_name[-2:] != '__' + ): + loader_func = _get_trace_function(loader_func) + if loader_func is not None and loader_func.__module__ == lookup_module.__name__: + code = loader_func.__code__ + qualname = getattr(loader_func,'__qualname__',loader_func.__name__) + if qualname not in non_core_loader_functions: + code_name = qualname if qualname.rsplit('.',1) == code.co_name else code.co_name + _trace_functions["{}::{}".format(code.co_filename,code_name)] = (loader_func.__module__,qualname) + # extract all dump functions from any known loader module + hickle_loaders_path = os.path.join(os.path.dirname(lookup_module.__file__),'loaders') + for loader in os.scandir(hickle_loaders_path): + if not loader.is_file() or not loader.name.startswith('load_'): + continue + loader_module_name = "hickle.loaders.{}".format(loader.name.rsplit('.',1)[0]) + loader_module = sys.modules.get(loader_module_name,None) + if loader_module is None: + + loader_module_spec = importlib.util.find_spec("hickle.loaders.{}".format(loader.name.rsplit('.',1)[0])) + if loader_module_spec is None: + continue + loader_module = importlib.util.module_from_spec(loader_module_spec) + try: + loader_module_spec.loader.exec_module(loader_module) + except ModuleNotFoundError: + continue + except ImportError: + if sys.version_info[0] > 3 or sys.version_info[1] > 5: + raise + continue + class_register_table = getattr(loader_module,'class_register',()) + # trace function has cls/self + for dump_function in ( entry[trace_function_argument-1] for entry in class_register_table ): + dump_function = _get_trace_function(dump_function) + if dump_function is not None: + code = dump_function.__code__ + qualname = getattr(dump_function,'__qualname__',dump_function.__name__) + code_name = qualname if qualname.rsplit('.',1) == code.co_name else code.co_name + _trace_functions["{}::{}".format(code.co_filename,code_name)] = (dump_function.__module__,qualname) + # activate compression related profiling + _trace_profile_call = sys.getprofile() + _traced_session = session + sys.setprofile(_trace_loader_funcs) + return None + +# List of test functions which are marked by no_compression mark +_never_trace_compression = set() + +def traceback_from_frame(frame,stopafter): + """ + helper function used in Python >= 3.7 to beautify traceback + of AssertionError exceptoin thrown by _trace_loader_funcs + """ + tb = types.TracebackType(None,frame,frame.f_lasti,frame.f_lineno) + while frame.f_back is not stopafter.f_back: + frame = frame.f_back + tb = types.TracebackType(tb,frame,frame.f_lasti,frame.f_lineno) + return tb + + +def pytest_collection_finish(session): + """ + collect all test functions for which comression related keyword monitoring + shall be disabled. + """ + if not sys.getprofile() == _trace_loader_funcs: + return + + listed = set() + listemodules = set() + for item in session.items: + func = item.getparent(pytest.Function) + if func not in listed: + listed.add(func) + for marker in func.iter_markers(no_compression.name): + never_trace_code = func.function.__code__ + qualname = getattr(func.function,'__qualname__',func.function.__name__) + code_name = qualname if qualname.rsplit('.',1) == never_trace_code.co_name else never_trace_code.co_name + _never_trace_compression.add("{}::{}".format(never_trace_code.co_filename,code_name)) + break + + +def _trace_loader_funcs(frame,event,arg,nochain=False): + """ + does the actuatual profiling with respect to proper passing compression keywords + to dump_functions + """ + global _chain_profile_call, _trace_functions,_never_trace_compression,_trace_register_class,_trace_function_argument_default + try: + if event not in {'call','c_call'}: + return _trace_loader_funcs + # check if LoaderManager.register_class has been called + # if get position of dump_function argument and extract + # code object for dump_function to be registered if not None + code_block = frame.f_code + trace_function_argument = _trace_register_class.get(code_block,None) + if trace_function_argument is not None: + trace_function = frame.f_locals.get(code_block.co_varnames[trace_function_argument],None) + load_function = frame.f_locals.get(code_block.co_varnames[trace_function_argument+1],None) + if load_function is not None: + load_function = _get_trace_function(load_function) + _trace_functions.pop("{}::{}".format(load_function.__code__.co_filename,load_function.__code__.co_name),None) + if trace_function is None: + return _trace_loader_funcs + trace_function = _get_trace_function(trace_function) + if trace_function is None: + return _trace_loader_funcs + trace_function_code = getattr(trace_function,'__code__',None) + if trace_function_code is not None: + # store code object corresponding to dump_function in _trace_functions list + # if not yet present there. + qualname = getattr(trace_function,'__qualname__',trace_function.__name__) + code_name = qualname if qualname.rsplit('.',1) == trace_function_code.co_name else trace_function_code.co_name + trace_function_code_name = "{}::{}".format(trace_function_code.co_filename,code_name) + if ( + trace_function_code_name not in _trace_register_class and + ( + trace_function_code_name not in _trace_functions or + trace_function_code not in _trace_functions + ) + ): + trace_function_spec = (trace_function.__module__,qualname) + _trace_functions[trace_function_code] = trace_function_spec + _trace_functions[trace_function_code_name] = trace_function_spec + return _trace_loader_funcs + # estimate qualname from local variable stored in frame.f_local corresponding + # to frame.f_code.co_varnames[0] if any. + object_self_name = frame.f_code.co_varnames[:1] + if object_self_name: + self = frame.f_locals.get(object_self_name[0],None) + module = getattr(self,'__module__','') + if isinstance(module,str) and module.split('.',1)[0] == 'hickle' and isinstance(getattr(self,'__name__',None),str): + method = getattr(self,frame.f_code.co_name,None) + if method is not None and getattr(method,'__code__',None) == frame.f_code: + code_name = "{}::{}.{}".format( + frame.f_code.co_filename, + getattr(self,'__qualname__',self.__name__), + frame.f_code.co_name + ) + else: + code_name = "{}::{}".format(frame.f_code.co_filename,frame.f_code.co_name) + else: + code_name = "{}::{}".format(frame.f_code.co_filename,frame.f_code.co_name) + else: + code_name = "{}::{}".format(frame.f_code.co_filename,frame.f_code.co_name) + # check if frame could encode a clall to a new incarnation of LoaderManager.register_class + # method. Add its code object to the list of known incarnations and rerun above code + if code_block.co_name == 'register_class': + trace_function_argument = _trace_register_class.get(code_name,None) + if trace_function_argument is not None: + _trace_register_class[code_block] = trace_function_argument + return _trace_loader_funcs(frame,event,arg,True) + if ( + code_block.co_filename.rsplit('/',2) == ['hickle','lookup.py'] and + code_block.co_varnames > trace_function_argument and + code_block.co_varnames[_trace_function_argument_default] in _trace_function_arg_names + ): + _trace_register_class[code_name] = _trace_function_argument_default + _trace_register_class[code_block] = _trace_function_argument_default + return _trace_loader_funcs(frame,event,arg,True) + + # frame encodes a call to any other function or method. + # If the function or method is listed in _trace_functions list check + # if it received the appropriate set of compresson related keywords + function_object_spec = _trace_functions.get(frame.f_code,None) + if function_object_spec is None: + function_object_spec = _trace_functions.get(code_name,None) + if function_object_spec is None: + return _trace_loader_funcs + _trace_functions[frame.f_code] = function_object_spec + baseargs = ( + (arg,frame.f_locals[arg]) + for arg in frame.f_code.co_varnames[:(frame.f_code.co_argcount + frame.f_code.co_kwonlyargcount)] + ) + kwargs = frame.f_locals.get('kwargs',None) + if kwargs is not None: + fullargs = ( (name,arg) for arglist in (kwargs.items(),baseargs) for name,arg in arglist ) + else: + fullargs = baseargs + seen_compression_args = set() + for arg,value in fullargs: + if arg in seen_compression_args: + continue + if _compression_args.get(arg,None) is not None: + seen_compression_args.add(arg) + if len(seen_compression_args) == len(_compression_args): + return _trace_loader_funcs + # keywords not passed or filtered prematurely. + # walk the stack until reaching executed test function. + # if test function is not marked with no_compression raise + # AssertionError stating that dump_function did not + # receive expected compression keywords defined above + # For Python <= 3.6 collect all functions called between current + # frame and frame of executed test function. For Python > 3.6 use + # above traceback_from_frame function to build traceack showing appropriate + # callstack and context excluding this function to ensure AssertionError + # exception appears thrown on behlaf of function triggering call encoded by + # passed frame + function_object_spec = _trace_functions[frame.f_code] + if _traced_session is not None: + test_list = { + "{}::{}".format( + item.function.__code__.co_filename, + getattr(item.function,'__qualname__', + item.function.__name__) + ):item + for item in _traced_session.items + } + collect_call_tree = [] + next_frame = frame + while next_frame is not None: + object_self_name = frame.f_code.co_varnames[:1] + if object_self_name: + self = frame.f_locals.get(object_self_name[0]) + module = getattr(self,'__module__','') + if ( + isinstance(module,str) and + module.split('.',1)[0] == 'hickle' and + isinstance(getattr(self,'__name__',None),str) + ): + method = getattr(self,frame.f_code.co_name,None) + if method is not None and getattr(method,'__code__',None) == frame.f_code: + frame_name = "{}::{}".format( + next_frame.f_code.co_filename, + getattr(method,'__qualname__',method.__name__) + ) + else: + frame_name = "{}::{}".format(next_frame.f_code.co_filename,next_frame.f_code.co_name) + else: + frame_name = "{}::{}".format(next_frame.f_code.co_filename,next_frame.f_code.co_name) + else: + frame_name = "{}::{}".format(next_frame.f_code.co_filename,next_frame.f_code.co_name) + if frame_name in _never_trace_compression: + return _trace_loader_funcs + in_test = test_list.get(frame_name,None) + collect_call_tree.append((next_frame.f_code.co_filename,frame_name,next_frame.f_lineno)) + if in_test is not None: + try: + tb = traceback_from_frame(frame,next_frame) + except TypeError: + pass + else: + raise AssertionError( + "'{}': compression_kwargs lost in call".format("::".join(function_object_spec)) + ).with_traceback(tb) + raise AssertionError( + "'{}': compression_kwargs lost in call:\n\t{}\n".format( + "::".join(function_object_spec), + "\n\t".join("{}::{} ({})".format(*call) for call in collect_call_tree[:0:-1]) + ) + ) + next_frame = next_frame.f_back + except AssertionError as ae: + # check that first entry in traceback does not refer to this function + if ae.__traceback__.tb_frame.f_code == _trace_loader_funcs.__code__: + ae.__traceback__ = ae.__traceback__.tb_next + raise + #except Exception as e: + # import traceback;traceback.print_exc() + # import pdb;pdb.set_trace() + finally: + if not nochain: + _chain_profile_call(frame,event,arg) + +def pytest_sessionfinish(session): + sys.setprofile(_trace_profile_call) diff --git a/hickle/loaders/load_astropy.py b/hickle/loaders/load_astropy.py index e5c59204..a9243919 100644 --- a/hickle/loaders/load_astropy.py +++ b/hickle/loaders/load_astropy.py @@ -8,6 +8,7 @@ import numpy as np # hickle imports +from hickle.helpers import no_compression # %% FUNCTION DEFINITIONS @@ -25,7 +26,7 @@ def create_astropy_quantity(py_obj, h_group, name, **kwargs): """ d = h_group.create_dataset(name, data=py_obj.value, dtype='float64', - **kwargs) + **no_compression(kwargs)) d.attrs['unit'] = py_obj.unit.to_string().encode('ascii') return d,() @@ -44,7 +45,7 @@ def create_astropy_angle(py_obj, h_group, name, **kwargs): """ d = h_group.create_dataset(name, data=py_obj.value, dtype='float64', - **kwargs) + **no_compression(kwargs)) d.attrs['unit'] = py_obj.unit.to_string().encode('ascii') return d,() @@ -118,7 +119,7 @@ def create_astropy_constant(py_obj, h_group, name, **kwargs): """ d = h_group.create_dataset(name, data=py_obj.value, dtype='float64', - **kwargs) + **no_compression(kwargs)) d.attrs["unit"] = py_obj.unit.to_string().encode('ascii') d.attrs["abbrev"] = py_obj.abbrev.encode('ascii') d.attrs["name"] = py_obj.name.encode('ascii') diff --git a/hickle/loaders/load_numpy.py b/hickle/loaders/load_numpy.py index c0733506..fb06fa02 100644 --- a/hickle/loaders/load_numpy.py +++ b/hickle/loaders/load_numpy.py @@ -94,7 +94,7 @@ def create_np_array_dataset(py_obj, h_group, name, **kwargs): h_node = h_group.create_group(name) sub_items = ("data",py_obj,{},kwargs), else: - h_node = h_group.create_dataset(name, data=py_obj, **kwargs) + h_node = h_group.create_dataset(name, data=py_obj, **( no_compression(kwargs) if "bytes" in dtype.name else kwargs )) sub_items = () h_node.attrs['np_dtype'] = dtype.str.encode('ascii') return h_node,sub_items diff --git a/hickle/loaders/load_pandas.py b/hickle/loaders/load_pandas.py index b1a9edf4..4cda663b 100644 --- a/hickle/loaders/load_pandas.py +++ b/hickle/loaders/load_pandas.py @@ -1,5 +1,6 @@ import pandas as pd +print("pandas",pd.__version__) # TODO: populate with classes to load class_register = [] exclude_register = [] diff --git a/hickle/tests/test_02_hickle_lookup.py b/hickle/tests/test_02_hickle_lookup.py index 54f78d9f..dd5b277c 100644 --- a/hickle/tests/test_02_hickle_lookup.py +++ b/hickle/tests/test_02_hickle_lookup.py @@ -27,7 +27,7 @@ from hickle.helpers import PyContainer,not_dumpable from hickle.loaders import optional_loaders, attribute_prefix import hickle.lookup as lookup - + # Set current working directory to the temporary directory local.get_temproot().chdir() @@ -542,7 +542,7 @@ def test_type_legacy_mro(): assert lookup.type_legacy_mro(function_to_dump) == (function_to_dump,) -def test_create_pickled_dataset(h5_data): +def test_create_pickled_dataset(h5_data,compression_kwargs): """ tests create_pickled_dataset, load_pickled_data function and PickledContainer """ @@ -552,7 +552,7 @@ def test_create_pickled_dataset(h5_data): py_object = ClassToDump('hello',1) data_set_name = "greetings" with pytest.warns(lookup.SerializedWarning,match = r".*type\s+not\s+understood,\s+data\s+is\s+serialized:.*") as warner: - h5_node,subitems = lookup.create_pickled_dataset(py_object, h5_data,data_set_name) + h5_node,subitems = lookup.create_pickled_dataset(py_object, h5_data,data_set_name,**compression_kwargs) assert isinstance(h5_node,h5py.Dataset) and not subitems and iter(subitems) assert bytes(h5_node[()]) == pickle.dumps(py_object) and h5_node.name.rsplit('/',1)[-1] == data_set_name assert lookup.load_pickled_data(h5_node,b'pickle',object) == py_object @@ -569,6 +569,7 @@ def test__DictItemContainer(): assert container.convert() is my_bike_lock +#@pytest.mark.no_compression def test__moc_numpy_array_object_lambda(): """ test the _moc_numpy_array_object_lambda function @@ -583,6 +584,7 @@ def test__moc_numpy_array_object_lambda(): data = ['hello','world'] assert lookup._moc_numpy_array_object_lambda(data) == data[0] +#@pytest.mark.no_compression def test_fix_lambda_obj_type(): """ test _moc_numpy_array_object_lambda function it self. When invokded @@ -783,26 +785,27 @@ def test_ReferenceManager_context(h5_data): assert memo._overlay is None read_only_handle.close() -def test_ReferenceManager_store_type(h5_data): +def test_ReferenceManager_store_type(h5_data,compression_kwargs): """ test ReferenceManager.store_type method which sets 'type' attribute reference to appropriate py_obj_type entry within 'hickle_types_table' """ h_node = h5_data.create_group('some_list') with lookup.ReferenceManager.create_manager(h5_data) as memo: - memo.store_type(h_node,object,None) + memo.store_type(h_node,object,None,**compression_kwargs) assert len(memo._py_obj_type_table) == 0 and not memo._py_obj_type_link and not memo._base_type_link with pytest.raises(lookup.LookupError): - memo.store_type(h_node,list,None) + memo.store_type(h_node,list,None,**compression_kwargs) with pytest.raises(ValueError): - memo.store_type(h_node,list,b'') - memo.store_type(h_node,list,b'list') + memo.store_type(h_node,list,b'',**compression_kwargs) + memo.store_type(h_node,list,b'list',**compression_kwargs) assert isinstance(h_node.attrs['type'],h5py.Reference) type_table_entry = h5_data.file[h_node.attrs['type']] assert pickle.loads(type_table_entry[()]) is list assert isinstance(type_table_entry.attrs['base_type'],h5py.Reference) assert h5_data.file[type_table_entry.attrs['base_type']].name.rsplit('/',1)[-1].encode('ascii') == b'list' +@pytest.mark.no_compression def test_ReferenceManager_get_manager(h5_data): h_node = h5_data.create_group('some_list') item_data = np.array(memoryview(b'hallo welt lore grueszet dich ipsum aus der lore von ipsum gelort in ipsum'),copy=False) @@ -821,6 +824,7 @@ def test_ReferenceManager_get_manager(h5_data): with pytest.raises(lookup.ReferenceError): manager = lookup.ReferenceManager.get_manager(h_item) +@pytest.mark.no_compression def test_ReferenceManager_resolve_type(h5_data): """ test ReferenceManager.reslove_type method which tries to resolve @@ -902,7 +906,7 @@ def test_ExpandReferenceContainer(h5_data): content = np.array(subitem[()]) sub_container.append(name,content,subitem.attrs) -def test_create_compact_dataset(h5_data): +def test_create_compact_dataset(h5_data,compression_kwargs): """ test create_compact_dataset, load_compact_dataset function and CompactContainer object @@ -914,11 +918,11 @@ def test_create_compact_dataset(h5_data): data_set_name = ("some_object","some_object_compact","some_object_dataset","some_object_compact_off") with pytest.warns(lookup.SerializedWarning,match = r".*type\s+not\s+understood,\s+data\s+is\s+serialized:.*") as warner: - h5_node,subitems = lookup.create_compact_dataset(py_obj,h5_data,data_set_name[0]) + h5_node,subitems = lookup.create_compact_dataset(py_obj,h5_data,data_set_name[0],**compression_kwargs) assert isinstance(h5_node,h5py.Dataset) and not subitems and iter(subitems) assert bytes(h5_node[()]) == pickle.dumps(py_obj) and h5_node.name.rsplit('/',1)[-1] == data_set_name[0] assert lookup.load_pickled_data(h5_node,b'pickle',object) == py_obj - h5_node_off,subitems = lookup.create_compact_dataset(py_obj_compact_off,h5_data,data_set_name[3]) + h5_node_off,subitems = lookup.create_compact_dataset(py_obj_compact_off,h5_data,data_set_name[3],**compression_kwargs) assert isinstance(h5_node_off,h5py.Dataset) and not subitems and iter(subitems) assert bytes(h5_node_off[()]) == pickle.dumps(py_obj_compact_off) and h5_node_off.name.rsplit('/',1)[-1] == data_set_name[3] assert lookup.load_pickled_data(h5_node_off,b'pickle',object) == py_obj_compact_off @@ -955,13 +959,13 @@ def convert(self): with lookup.ReferenceManager.create_manager(h5_data) as memo: with lookup.LoaderManager.create_manager(h5_data) as loader: - h5_node_compact_set,subitems = lookup.create_compact_dataset(py_obj_compact_set,h5_data,data_set_name[2]) - memo.store_type(h5_node_compact_set,ClassToDumpCompactDataset,b'!compact') + h5_node_compact_set,subitems = lookup.create_compact_dataset(py_obj_compact_set,h5_data,data_set_name[2],**compression_kwargs) + memo.store_type(h5_node_compact_set,ClassToDumpCompactDataset,b'!compact',**compression_kwargs) assert isinstance(h5_node_compact_set,h5py.Dataset) and not subitems and iter(subitems) assert h5_node_compact_set.name.rsplit('/',1)[-1] == data_set_name[2] assert lookup.load_compact_dataset(h5_node_compact_set,b'!compact!',ClassToDumpCompactDataset) == py_obj_compact_set - h5_node_compact,subitems = lookup.create_compact_dataset(py_obj_compact,h5_data,data_set_name[1]) - memo.store_type(h5_node_compact,ClassToDumpCompact,b'!compact') + h5_node_compact,subitems = lookup.create_compact_dataset(py_obj_compact,h5_data,data_set_name[1],**compression_kwargs) + memo.store_type(h5_node_compact,ClassToDumpCompact,b'!compact',**compression_kwargs) assert isinstance(h5_node_compact,h5py.Group) and subitems and iter(subitems) assert h5_node_compact.name.rsplit('/',1)[-1] == data_set_name[1] expand_container = lookup.CompactContainer(h5_node_compact.attrs,b'!compact!',ClassToDumpCompact) @@ -1077,6 +1081,7 @@ def convert(self): lookup.LoaderManager.__hkl_container__[None].update(backup__hkl_container__None) +@pytest.mark.no_compression def test_register_compact_expand(): """ test register_compact_expand function @@ -1110,6 +1115,8 @@ def test_register_compact_expand(): if __name__ == "__main__": from _pytest.monkeypatch import monkeypatch from _pytest.fixtures import FixtureRequest + from hickle.tests.conftest import compression_kwargs + for table in loader_table(): test_LoaderManager_register_class(table) for table in loader_table(): @@ -1134,8 +1141,11 @@ def test_register_compact_expand(): ): test_LoaderManager_load_loader(table,h5_root,monkey) test_type_legacy_mro() - for h5_root in h5_data(FixtureRequest(test_create_pickled_dataset)): - test_create_pickled_dataset(h5_root) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_create_pickled_dataset),) + ): + test_create_pickled_dataset(h5_root,keywords) test__DictItemContainer() test__moc_numpy_array_object_lambda() test_fix_lambda_obj_type() @@ -1152,14 +1162,19 @@ def test_register_compact_expand(): test_ReferenceManager_context(h5_root) for h5_root in h5_data(FixtureRequest(test_ReferenceManager_get_manager)): test_ReferenceManager_get_manager(h5_root) - for h5_root in h5_data(FixtureRequest(test_ReferenceManager_store_type)): - test_ReferenceManager_store_type(h5_root) + for h5_root,compression_kwargs in ( + h5_data(FixtureRequest(test_ReferenceManager_store_type)) + ): + test_ReferenceManager_store_type(h5_root,compression_kwargs) for h5_root in h5_data(FixtureRequest(test_ReferenceManager_resolve_type)): test_ReferenceManager_resolve_type(h5_root) for h5_root in h5_data(FixtureRequest(test_ExpandReferenceContainer)): test_ExpandReferenceContainer(h5_root) - for h5_root in h5_data(FixtureRequest(test_create_compact_dataset)): - test_create_compact_dataset(h5_root) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_create_compact_dataset),) + ): + test_create_compact_dataset(h5_root,keywords) test_register_compact_expand() diff --git a/hickle/tests/test_03_load_builtins.py b/hickle/tests/test_03_load_builtins.py index 1e7a7c89..696c2f6c 100644 --- a/hickle/tests/test_03_load_builtins.py +++ b/hickle/tests/test_03_load_builtins.py @@ -48,28 +48,28 @@ def h5_data(request): # %% FUNCTION DEFINITIONS -def test_scalar_dataset(h5_data): +def test_scalar_dataset(h5_data,compression_kwargs): """ tests creation and loading of datasets for scalar values """ # check that scalar value is properly handled floatvalue = 5.2 - h_dataset,subitems= load_builtins.create_scalar_dataset(floatvalue,h5_data,"floatvalue") + h_dataset,subitems= load_builtins.create_scalar_dataset(floatvalue,h5_data,"floatvalue",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and h_dataset[()] == floatvalue assert not [ item for item in subitems ] assert load_builtins.load_scalar_dataset(h_dataset,b'float',float) == floatvalue # check that intger value less thatn 64 bit is stored as int intvalue = 11 - h_dataset,subitems = load_builtins.create_scalar_dataset(intvalue,h5_data,"intvalue") + h_dataset,subitems = load_builtins.create_scalar_dataset(intvalue,h5_data,"intvalue",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and h_dataset[()] == intvalue assert not [ item for item in subitems ] assert load_builtins.load_scalar_dataset(h_dataset,b'int',int) == intvalue # check that integer larger than 64 bit is stored as ascii byte string non_mappable_int = int(2**65) - h_dataset,subitems = load_builtins.create_scalar_dataset(non_mappable_int,h5_data,"non_mappable_int") + h_dataset,subitems = load_builtins.create_scalar_dataset(non_mappable_int,h5_data,"non_mappable_int",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) assert bytearray(h_dataset[()]) == str(non_mappable_int).encode('utf8') assert not [ item for item in subitems ] @@ -77,31 +77,31 @@ def test_scalar_dataset(h5_data): # check that integer larger than 64 bit is stored as ascii byte string non_mappable_neg_int = -int(-2**63-1) - h_dataset,subitems = load_builtins.create_scalar_dataset(non_mappable_neg_int,h5_data,"non_mappable_neg_int") + h_dataset,subitems = load_builtins.create_scalar_dataset(non_mappable_neg_int,h5_data,"non_mappable_neg_int",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) assert bytearray(h_dataset[()]) == str(non_mappable_neg_int).encode('utf8') assert not [ item for item in subitems ] assert load_builtins.load_scalar_dataset(h_dataset,b'int',int) == non_mappable_neg_int -def test_non_dataset(h5_data): +def test_non_dataset(h5_data,compression_kwargs): """ that None value is properly stored """ - h_dataset,subitems = load_builtins.create_none_dataset(None,h5_data,"None_value") + h_dataset,subitems = load_builtins.create_none_dataset(None,h5_data,"None_value",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and h_dataset.shape is None and h_dataset.dtype == 'V1' assert not [ item for item in subitems ] assert load_builtins.load_none_dataset(h_dataset,b'None',None.__class__) is None -def test_listlike_dataset(h5_data): +def test_listlike_dataset(h5_data,compression_kwargs): """ test storing and loading of list like data """ # check that empty tuple is stored properly empty_tuple = () - h_dataset,subitems = load_builtins.create_listlike_dataset(empty_tuple, h5_data, "empty_tuple") + h_dataset,subitems = load_builtins.create_listlike_dataset(empty_tuple, h5_data, "empty_tuple",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and h_dataset.size is None assert not subitems and iter(subitems) assert load_builtins.load_list_dataset(h_dataset,b'tuple',tuple) == empty_tuple @@ -109,7 +109,7 @@ def test_listlike_dataset(h5_data): # check that string data is stored properly stored as array of bytes # which supports compression stringdata = "string_data" - h_dataset,subitems = load_builtins.create_listlike_dataset(stringdata, h5_data, "string_data") + h_dataset,subitems = load_builtins.create_listlike_dataset(stringdata, h5_data, "string_data",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not [ item for item in subitems ] assert bytearray(h_dataset[()]).decode("utf8") == stringdata assert h_dataset.attrs["str_type"].decode("ascii") == 'str' @@ -118,7 +118,7 @@ def test_listlike_dataset(h5_data): # check that byte string is proprly stored as array of bytes which # supports compression bytesdata = b'bytes_data' - h_dataset,subitems = load_builtins.create_listlike_dataset(bytesdata, h5_data, "bytes_data") + h_dataset,subitems = load_builtins.create_listlike_dataset(bytesdata, h5_data, "bytes_data",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not [ item for item in subitems ] assert bytes(h_dataset[()]) == bytesdata assert h_dataset.attrs["str_type"].decode("ascii") == 'bytes' @@ -132,7 +132,7 @@ def test_listlike_dataset(h5_data): # check that list of single type is stored as dataset of same type homogenous_list = [ 1, 2, 3, 4, 5, 6] - h_dataset,subitems = load_builtins.create_listlike_dataset(homogenous_list,h5_data,"homogenous_list") + h_dataset,subitems = load_builtins.create_listlike_dataset(homogenous_list,h5_data,"homogenous_list",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not [ item for item in subitems ] assert h_dataset[()].tolist() == homogenous_list and h_dataset.dtype == int assert load_builtins.load_list_dataset(h_dataset,b'list',list) == homogenous_list @@ -140,7 +140,7 @@ def test_listlike_dataset(h5_data): # check that list of different scalar types for which a least common type exists # is stored using a dataset mixed_dtype_list = [ 1, 2.5, 3.8, 4, 5, 6] - h_dataset,subitems = load_builtins.create_listlike_dataset(mixed_dtype_list,h5_data,"mixed_dtype_list") + h_dataset,subitems = load_builtins.create_listlike_dataset(mixed_dtype_list,h5_data,"mixed_dtype_list",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not [ item for item in subitems ] assert h_dataset[()].tolist() == mixed_dtype_list and h_dataset.dtype == float assert load_builtins.load_list_dataset(h_dataset,b'list',list) == mixed_dtype_list @@ -149,7 +149,7 @@ def test_listlike_dataset(h5_data): # further check that for groups representing list the index of items is either # provided via item_index attribute or can be read from name of item not_so_homogenous_list = [ 1, 2, 3, [4],5 ,6 ] - h_dataset,subitems = load_builtins.create_listlike_dataset(not_so_homogenous_list,h5_data,"not_so_homogenous_list") + h_dataset,subitems = load_builtins.create_listlike_dataset(not_so_homogenous_list,h5_data,"not_so_homogenous_list",**compression_kwargs) assert isinstance(h_dataset,h5.Group) item_name = "data{:d}" index = -1 @@ -158,9 +158,9 @@ def test_listlike_dataset(h5_data): index_from_string = load_builtins.ListLikeContainer(h_dataset.attrs,b'list',list) for index,(name,item,attrs,kwargs) in enumerate(iter(subitems1)): assert item_name.format(index) == name and item == not_so_homogenous_list[index] - assert attrs == {"item_index":index} and kwargs == {} + assert attrs == {"item_index":index} and kwargs == compression_kwargs if isinstance(item,list): - item_dataset,_ = load_builtins.create_listlike_dataset(item,h_dataset,name) + item_dataset,_ = load_builtins.create_listlike_dataset(item,h_dataset,name,**compression_kwargs) else: item_dataset = h_dataset.create_dataset(name,data = item) item_dataset.attrs.update(attrs) @@ -177,7 +177,7 @@ def test_listlike_dataset(h5_data): no_num_items_container = load_builtins.ListLikeContainer(no_num_items,b'list',list) for index,(name,item,attrs,kwargs) in enumerate(iter(subitems2)): assert item_name.format(index) == name and item == not_so_homogenous_list[index] - assert attrs == {"item_index":index} and kwargs == {} + assert attrs == {"item_index":index} and kwargs == compression_kwargs item_dataset = h_dataset.get(name,None) no_num_items_container.append(name,item,{}) assert index + 1 == len(not_so_homogenous_list) @@ -189,7 +189,7 @@ def test_listlike_dataset(h5_data): # from the taile of its name. Also check that ListLikeContainer.append # raises exceptoin in case value for item_index already has been loaded object_list = [ [4, 5 ] ,6, [ 1, 2, 3 ] ] - h_dataset,subitems = load_builtins.create_listlike_dataset(object_list,h5_data,"object_list") + h_dataset,subitems = load_builtins.create_listlike_dataset(object_list,h5_data,"object_list",**compression_kwargs) assert isinstance(h_dataset,h5.Group) item_name = "data{:d}" wrong_item_name = item_name + "_ni" @@ -198,9 +198,9 @@ def test_listlike_dataset(h5_data): index_from_string = load_builtins.ListLikeContainer(h_dataset.attrs,b'list',list) for index,(name,item,attrs,kwargs) in enumerate(iter(subitems)): assert item_name.format(index) == name and item == object_list[index] - assert attrs == {"item_index":index} and kwargs == {} + assert attrs == {"item_index":index} and kwargs == compression_kwargs if isinstance(item,list): - item_dataset,_ = load_builtins.create_listlike_dataset(item,h_dataset,name) + item_dataset,_ = load_builtins.create_listlike_dataset(item,h_dataset,name,**compression_kwargs) else: item_dataset = h_dataset.create_dataset(name,data = item) item_dataset.attrs.update(attrs) @@ -224,7 +224,7 @@ def test_listlike_dataset(h5_data): # assert that list of strings where first string has lenght 1 is properly mapped # to group string_list = test_set = ['I','confess','appriciate','hickle','times'] - h_dataset,subitems = load_builtins.create_listlike_dataset(string_list,h5_data,"string_list") + h_dataset,subitems = load_builtins.create_listlike_dataset(string_list,h5_data,"string_list",**compression_kwargs) assert isinstance(h_dataset,h5.Group) item_name = "data{:d}" index = -1 @@ -232,7 +232,7 @@ def test_listlike_dataset(h5_data): index_from_string = load_builtins.ListLikeContainer(h_dataset.attrs,b'list',list) for index,(name,item,attrs,kwargs) in enumerate(iter(subitems)): assert item_name.format(index) == name and item == string_list[index] - assert attrs == {"item_index":index} and kwargs == {} + assert attrs == {"item_index":index} and kwargs == compression_kwargs item_dataset = h_dataset.create_dataset(name,data = item) item_dataset.attrs.update(attrs) loaded_list.append(name,item,item_dataset.attrs) @@ -244,7 +244,7 @@ def test_listlike_dataset(h5_data): # assert that list which contains numeric values and strings is properly mapped # to group mixed_string_list = test_set = [12,2.8,'I','confess','appriciate','hickle','times'] - h_dataset,subitems = load_builtins.create_listlike_dataset(mixed_string_list,h5_data,"mixed_string_list") + h_dataset,subitems = load_builtins.create_listlike_dataset(mixed_string_list,h5_data,"mixed_string_list",**compression_kwargs) assert isinstance(h_dataset,h5.Group) item_name = "data{:d}" index = -1 @@ -252,7 +252,7 @@ def test_listlike_dataset(h5_data): index_from_string = load_builtins.ListLikeContainer(h_dataset.attrs,b'list',list) for index,(name,item,attrs,kwargs) in enumerate(iter(subitems)): assert item_name.format(index) == name and item == mixed_string_list[index] - assert attrs == {"item_index":index} and kwargs == {} + assert attrs == {"item_index":index} and kwargs == compression_kwargs item_dataset = h_dataset.create_dataset(name,data = item) item_dataset.attrs.update(attrs) loaded_list.append(name,item,item_dataset.attrs) @@ -262,14 +262,14 @@ def test_listlike_dataset(h5_data): assert index_from_string.convert() == mixed_string_list -def test_set_container(h5_data): +def test_set_container(h5_data,compression_kwargs): """ tests storing and loading of set """ # check that set of strings is store as group test_set = {'I','confess','appriciate','hickle','times'} - h_setdataset,subitems = load_builtins.create_setlike_dataset(test_set,h5_data,"test_set") + h_setdataset,subitems = load_builtins.create_setlike_dataset(test_set,h5_data,"test_set",**compression_kwargs) set_container = load_builtins.SetLikeContainer(h_setdataset.attrs,b'set',set) for name,item,attrs,kwargs in subitems: set_container.append(name,item,attrs) @@ -277,27 +277,27 @@ def test_set_container(h5_data): # check that set of single bytes is stored as single dataset test_set_2 = set(b"hello world") - h_setdataset,subitems = load_builtins.create_setlike_dataset(test_set_2,h5_data,"test_set_2") + h_setdataset,subitems = load_builtins.create_setlike_dataset(test_set_2,h5_data,"test_set_2",**compression_kwargs) assert isinstance(h_setdataset,h5.Dataset) and set(h_setdataset[()]) == test_set_2 assert not subitems and iter(subitems) assert load_builtins.load_list_dataset(h_setdataset,b'set',set) == test_set_2 # check that set containing byte strings is stored as group test_set_3 = set((item.encode("utf8") for item in test_set)) - h_setdataset,subitems = load_builtins.create_setlike_dataset(test_set_3,h5_data,"test_set_3") + h_setdataset,subitems = load_builtins.create_setlike_dataset(test_set_3,h5_data,"test_set_3",**compression_kwargs) set_container = load_builtins.SetLikeContainer(h_setdataset.attrs,b'set',set) for name,item,attrs,kwargs in subitems: set_container.append(name,item,attrs) assert set_container.convert() == test_set_3 # check that empty set is represented by emtpy dataset - h_setdataset,subitems = load_builtins.create_setlike_dataset(set(),h5_data,"empty_set") + h_setdataset,subitems = load_builtins.create_setlike_dataset(set(),h5_data,"empty_set",**compression_kwargs) assert isinstance(h_setdataset,h5.Dataset) and h_setdataset.size == 0 assert not subitems and iter(subitems) assert load_builtins.load_list_dataset(h_setdataset,b'set',set) == set() -def test_dictlike_dataset(h5_data): +def test_dictlike_dataset(h5_data,compression_kwargs): """ test storing and loading of dict """ @@ -321,7 +321,7 @@ class KeyClass(): # check that string and byte string keys are mapped to dataset or group name # check that scalar dict keys are converted to their string representation # check that for all other keys a key value pair is created - h_datagroup,subitems = load_builtins.create_dictlike_dataset(allkeys_dict,h5_data,"allkeys_dict") + h_datagroup,subitems = load_builtins.create_dictlike_dataset(allkeys_dict,h5_data,"allkeys_dict",**compression_kwargs) assert isinstance(h_datagroup,h5.Group) invalid_key = b'' last_entry = -1 @@ -380,7 +380,7 @@ class KeyClass(): # check that order of OrderedDict dict keys is not altered on loading data from # hickle file - h_datagroup,subitems = load_builtins.create_dictlike_dataset(ordered_dict,h5_data,"ordered_dict") + h_datagroup,subitems = load_builtins.create_dictlike_dataset(ordered_dict,h5_data,"ordered_dict",**compression_kwargs) assert isinstance(h_datagroup,h5.Group) last_entry = -1 load_ordered_dict = load_builtins.DictLikeContainer(h_datagroup.attrs,b'dict',collections.OrderedDict) @@ -408,16 +408,32 @@ class KeyClass(): # %% MAIN SCRIPT if __name__ == "__main__": from _pytest.fixtures import FixtureRequest - for h5_root in h5_data(FixtureRequest(test_scalar_dataset)): - test_scalar_dataset(h5_root) - for h5_root in h5_data(FixtureRequest(test_non_dataset)): - test_non_dataset(h5_root) - for h5_root in h5_data(FixtureRequest(test_listlike_dataset)): - test_listlike_dataset(h5_root) - for h5_root in h5_data(FixtureRequest(test_set_container)): - test_set_container(h5_root) - for h5_root in h5_data(FixtureRequest(test_dictlike_dataset)): - test_dictlike_dataset(h5_root) + from hickle.tests.conftest import compression_kwargs + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_scalar_dataset),) + ): + test_scalar_dataset(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_non_dataset),) + ): + test_non_dataset(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_listlike_dataset),) + ): + test_listlike_dataset(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_set_container),) + ): + test_set_container(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_dictlike_dataset),) + ): + test_dictlike_dataset(h5_root,keywords) diff --git a/hickle/tests/test_04_load_numpy.py b/hickle/tests/test_04_load_numpy.py index 7bfe086f..7396dd9b 100644 --- a/hickle/tests/test_04_load_numpy.py +++ b/hickle/tests/test_04_load_numpy.py @@ -57,7 +57,7 @@ def h5_data(request): # %% FUNCTION DEFINITIONS -def test_create_np_scalar(h5_data): +def test_create_np_scalar(h5_data,compression_kwargs): """ tests proper storage and loading of numpy scalars """ @@ -65,7 +65,7 @@ def test_create_np_scalar(h5_data): # check that scalar dataset is created for nupy scalar scalar_data = np.float64(np.pi) dtype = scalar_data.dtype - h_dataset,subitems = load_numpy.create_np_scalar_dataset(scalar_data,h5_data,"scalar_data") + h_dataset,subitems = load_numpy.create_np_scalar_dataset(scalar_data,h5_data,"scalar_data",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and iter(subitems) and not subitems assert h_dataset.attrs['np_dtype'] == dtype.str.encode('ascii') assert h_dataset[()] == scalar_data @@ -74,22 +74,22 @@ def test_create_np_scalar(h5_data): # check that numpy.bool_ scarlar is properly stored and reloaded scalar_data = np.bool_(True) dtype = scalar_data.dtype - h_dataset,subitems = load_numpy.create_np_scalar_dataset(scalar_data,h5_data,"generic_data") + h_dataset,subitems = load_numpy.create_np_scalar_dataset(scalar_data,h5_data,"generic_data",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and iter(subitems) and not subitems assert h_dataset.attrs['np_dtype'] == dtype.str.encode('ascii') and h_dataset[()] == scalar_data assert load_numpy.load_np_scalar_dataset(h_dataset,b'np_scalar',scalar_data.__class__) == scalar_data -def test_create_np_dtype(h5_data): +def test_create_np_dtype(h5_data,compression_kwargs): """ test proper creation and loading of dataset representing numpy dtype """ dtype = np.dtype(np.int16) - h_dataset,subitems = load_numpy.create_np_dtype(dtype, h5_data,"dtype_string") + h_dataset,subitems = load_numpy.create_np_dtype(dtype, h5_data,"dtype_string",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and iter(subitems) and not subitems assert bytes(h_dataset[()]).decode('ascii') == dtype.str assert load_numpy.load_np_dtype_dataset(h_dataset,'np_dtype',np.dtype) == dtype -def test_create_np_ndarray(h5_data): +def test_create_np_ndarray(h5_data,compression_kwargs): """ test proper creatoin and loading of numpy ndarray """ @@ -97,7 +97,7 @@ def test_create_np_ndarray(h5_data): # check that numpy array representing python utf8 string is properly # stored as bytearray dataset and reloaded from np_array_data = np.array("im python string") - h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_string_array") + h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_string_array",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and iter(subitems) and not subitems assert bytes(h_dataset[()]) == np_array_data.tolist().encode("utf8") assert h_dataset.attrs["np_dtype"] == np_array_data.dtype.str.encode("ascii") @@ -106,7 +106,7 @@ def test_create_np_ndarray(h5_data): # chekc that numpy array representing python bytes string is properly # stored as bytearray dataset and reloaded from np_array_data = np.array(b"im python bytes") - h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_bytes_array") + h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_bytes_array",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and iter(subitems) and not subitems assert h_dataset[()] == np_array_data.tolist() assert h_dataset.attrs["np_dtype"] == np_array_data.dtype.str.encode("ascii") @@ -115,8 +115,12 @@ def test_create_np_ndarray(h5_data): # check that numpy array with dtype object representing list of various kinds # of objects is converted to list before storing and reloaded proprly from this # list representation - np_array_data = np.array([[NESTED_DICT], ('What is this?',), {1, 2, 3, 7, 1}]) - h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_list_object_array") + + # NOTE: simplified as mixing items of varying length receives + # VisibleDeprecationWarning from newer numpy versions + #np_array_data = np.array([[NESTED_DICT], ('What is this?',), {1, 2, 3, 7, 1}]) + np_array_data = np.array([NESTED_DICT])#, ('What is this?',), {1, 2, 3, 7, 1}]) + h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_list_object_array",**compression_kwargs) ndarray_container = load_numpy.NDArrayLikeContainer(h_dataset.attrs,b'ndarray',np_array_data.__class__) assert isinstance(h_dataset,h5.Group) and iter(subitems) assert h_dataset.attrs["np_dtype"] == np_array_data.dtype.str.encode("ascii") @@ -130,7 +134,7 @@ def test_create_np_ndarray(h5_data): # is properly converted to list of strings and restored from its list # representation np_array_data = np.array(["1313e", "was", "maybe?", "here"]) - h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_list_of_strings_array") + h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_list_of_strings_array",**compression_kwargs) ndarray_container = load_numpy.NDArrayLikeContainer(h_dataset.attrs,b'ndarray',np_array_data.__class__) assert isinstance(h_dataset,h5.Group) and iter(subitems) assert h_dataset.attrs["np_dtype"] == np_array_data.dtype.str.encode("ascii") @@ -144,7 +148,7 @@ def test_create_np_ndarray(h5_data): # by ndarray.tolist method is properly stored according to type of object and # restored from this representation accordingly np_array_data = np.array(NESTED_DICT) - h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_object_array") + h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_object_array",**compression_kwargs) ndarray_container = load_numpy.NDArrayLikeContainer(h_dataset.attrs,b'ndarray',np_array_data.__class__) ndarray_pickle_container = load_numpy.NDArrayLikeContainer(h_dataset.attrs,b'ndarray',np_array_data.__class__) assert isinstance(h_dataset,h5.Group) and iter(subitems) @@ -174,7 +178,7 @@ def test_create_np_ndarray(h5_data): # just PendingDeprecationWarning with pytest.warns(PendingDeprecationWarning): np_array_data = np.matrix([[1, 2], [3, 4]]) - h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_matrix") + h_dataset,subitems = load_numpy.create_np_array_dataset(np_array_data,h5_data,"numpy_matrix",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and iter(subitems) and not subitems assert np.all(h_dataset[()] == np_array_data) assert h_dataset.attrs["np_dtype"] == np_array_data.dtype.str.encode("ascii") @@ -183,14 +187,14 @@ def test_create_np_ndarray(h5_data): assert isinstance(np_loaded_array_data,np.matrix) assert np_loaded_array_data.shape == np_array_data.shape -def test_create_np_masked_array(h5_data): +def test_create_np_masked_array(h5_data,compression_kwargs): """ test proper creation and loading of numpy.masked arrays """ # check that simple masked array is properly stored and loaded masked_array = np.ma.array([1, 2, 3, 4], dtype='float32', mask=[0, 1, 0, 0]) - h_datagroup,subitems = load_numpy.create_np_masked_array_dataset(masked_array, h5_data, "masked_array") + h_datagroup,subitems = load_numpy.create_np_masked_array_dataset(masked_array, h5_data, "masked_array",**compression_kwargs) masked_array_container = load_numpy.NDMaskedArrayContainer(h_datagroup.attrs,b'ndarray_masked',np.ma.array) assert isinstance(h_datagroup,h5.Group) and iter(subitems) assert h_datagroup.attrs["np_dtype"] == masked_array.dtype.str.encode("ascii") @@ -222,13 +226,26 @@ def test_create_np_masked_array(h5_data): # %% MAIN SCRIPT if __name__ == "__main__": from _pytest.fixtures import FixtureRequest - for h5_root in h5_data(FixtureRequest(test_create_np_scalar)): - test_create_np_scalar(h5_root) - for h5_root in h5_data(FixtureRequest(test_create_np_dtype)): - test_create_np_dtype(h5_root) - for h5_root in h5_data(FixtureRequest(test_create_np_ndarray)): - test_create_np_ndarray(h5_root) - for h5_root in h5_data(FixtureRequest(test_create_np_masked_array)): - test_create_np_masked_array(h5_root) + from hickle.tests.conftest import compression_kwargs + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_create_np_scalar),) + ): + test_create_np_scalar(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_create_np_dtype),) + ): + test_create_np_dtype(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_create_np_ndarray),) + ): + test_create_np_ndarray(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_create_np_masked_array),) + ): + test_create_np_masked_array(h5_root,keywords) diff --git a/hickle/tests/test_05_load_scipy.py b/hickle/tests/test_05_load_scipy.py index 5d13f943..536e60e9 100644 --- a/hickle/tests/test_05_load_scipy.py +++ b/hickle/tests/test_05_load_scipy.py @@ -37,7 +37,7 @@ def h5_data(request): # %% FUNCTION DEFINITIONS -def test_create_sparse_dataset(h5_data): +def test_create_sparse_dataset(h5_data,compression_kwargs): """ test creation and loading of sparse matrix """ @@ -55,7 +55,7 @@ def test_create_sparse_dataset(h5_data): sm3 = bsr_matrix((data, indices, indptr), shape=(6, 6)) # check that csr type matrix is properly stored and loaded - h_datagroup,subitems = load_scipy.create_sparse_dataset(sm1,h5_data,"csr_matrix") + h_datagroup,subitems = load_scipy.create_sparse_dataset(sm1,h5_data,"csr_matrix",**compression_kwargs) assert isinstance(h_datagroup,h5.Group) and iter(subitems) seen_items = dict((key,False) for key in ("data",'indices','indptr','shape')) sparse_container = load_scipy.SparseMatrixContainer(h_datagroup.attrs,b'csr_matrix',csr_matrix) @@ -67,7 +67,7 @@ def test_create_sparse_dataset(h5_data): assert np.all(reloaded.data == sm1.data) and reloaded.dtype == sm1.dtype and reloaded.shape == sm1.shape # check that csc type matrix is properly stored and loaded - h_datagroup,subitems = load_scipy.create_sparse_dataset(sm2,h5_data,"csc_matrix") + h_datagroup,subitems = load_scipy.create_sparse_dataset(sm2,h5_data,"csc_matrix",**compression_kwargs) assert isinstance(h_datagroup,h5.Group) and iter(subitems) seen_items = dict((key,False) for key in ("data",'indices','indptr','shape')) sparse_container = load_scipy.SparseMatrixContainer(h_datagroup.attrs,b'csc_matrix',csc_matrix) @@ -79,7 +79,7 @@ def test_create_sparse_dataset(h5_data): assert np.all(reloaded.data == sm2.data) and reloaded.dtype == sm2.dtype and reloaded.shape == sm2.shape # check that bsr type matrix is properly stored and loaded - h_datagroup,subitems = load_scipy.create_sparse_dataset(sm3,h5_data,"bsr_matrix") + h_datagroup,subitems = load_scipy.create_sparse_dataset(sm3,h5_data,"bsr_matrix",**compression_kwargs) assert isinstance(h_datagroup,h5.Group) and iter(subitems) seen_items = dict((key,False) for key in ("data",'indices','indptr','shape')) sparse_container = load_scipy.SparseMatrixContainer(h_datagroup.attrs,b'bsr_matrix',bsr_matrix) @@ -91,7 +91,7 @@ def test_create_sparse_dataset(h5_data): assert np.all(reloaded.data == sm3.data) and reloaded.dtype == sm3.dtype and reloaded.shape == sm3.shape # mimic hickle version 4.0.0 format to represent crs type matrix - h_datagroup,subitems = load_scipy.create_sparse_dataset(sm1,h5_data,"csr_matrix_filtered") + h_datagroup,subitems = load_scipy.create_sparse_dataset(sm1,h5_data,"csr_matrix_filtered",**compression_kwargs) sparse_container = load_scipy.SparseMatrixContainer(h_datagroup.attrs,b'csr_matrix',load_scipy.return_first) for name,item,attrs,kwargs in subitems: h_dataset = h_datagroup.create_dataset(name,data=item) @@ -126,5 +126,9 @@ def test_create_sparse_dataset(h5_data): # %% MAIN SCRIPT if __name__ == "__main__": from _pytest.fixtures import FixtureRequest - for h5_root in h5_data(FixtureRequest(test_create_sparse_dataset)): - test_create_sparse_dataset(h5_root) + from hickle.tests.conftest import compression_kwargs + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_create_sparse_dataset),) + ): + test_create_sparse_dataset(h5_root,keywords) diff --git a/hickle/tests/test_06_load_astropy.py b/hickle/tests/test_06_load_astropy.py index be7c5e66..a9304880 100644 --- a/hickle/tests/test_06_load_astropy.py +++ b/hickle/tests/test_06_load_astropy.py @@ -41,33 +41,33 @@ def h5_data(request): dummy_file.close() # %% FUNCTION DEFINITIONS -def test_create_astropy_quantity(h5_data): +def test_create_astropy_quantity(h5_data,compression_kwargs): """ test proper storage and loading of astorpy quantities """ for index,uu in enumerate(['m^3', 'm^3 / s', 'kg/pc']): a = Quantity(7, unit=uu) - h_dataset,subitems = load_astropy.create_astropy_quantity(a,h5_data,"quantity{}".format(index)) + h_dataset,subitems = load_astropy.create_astropy_quantity(a,h5_data,"quantity{}".format(index),**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert h_dataset.attrs['unit'] == a.unit.to_string().encode("ascii") and h_dataset[()] == a.value reloaded = load_astropy.load_astropy_quantity_dataset(h_dataset,b'astropy_quantity',Quantity) assert reloaded == a and reloaded.unit == a.unit a *= a - h_dataset,subitems = load_astropy.create_astropy_quantity(a,h5_data,"quantity_sqr{}".format(index)) + h_dataset,subitems = load_astropy.create_astropy_quantity(a,h5_data,"quantity_sqr{}".format(index),**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert h_dataset.attrs['unit'] == a.unit.to_string().encode("ascii") and h_dataset[()] == a.value reloaded = load_astropy.load_astropy_quantity_dataset(h_dataset,b'astropy_quantity',Quantity) assert reloaded == a and reloaded.unit == a.unit -def test_create_astropy_constant(h5_data): +def test_create_astropy_constant(h5_data,compression_kwargs): """ test proper storage and loading of astropy constants """ - h_dataset,subitems = load_astropy.create_astropy_constant(apc.G,h5_data,"apc_G") + h_dataset,subitems = load_astropy.create_astropy_constant(apc.G,h5_data,"apc_G",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert h_dataset.attrs["unit"] == apc.G.unit.to_string().encode('ascii') assert h_dataset.attrs["abbrev"] == apc.G.abbrev.encode('ascii') @@ -77,7 +77,7 @@ def test_create_astropy_constant(h5_data): reloaded = load_astropy.load_astropy_constant_dataset(h_dataset,b'astropy_constant',apc.G.__class__) assert reloaded == apc.G and reloaded.dtype == apc.G.dtype - h_dataset,subitems = load_astropy.create_astropy_constant(apc.cgs.e,h5_data,"apc_cgs_e") + h_dataset,subitems = load_astropy.create_astropy_constant(apc.cgs.e,h5_data,"apc_cgs_e",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert h_dataset.attrs["unit"] == apc.cgs.e.unit.to_string().encode('ascii') assert h_dataset.attrs["abbrev"] == apc.cgs.e.abbrev.encode('ascii') @@ -89,13 +89,13 @@ def test_create_astropy_constant(h5_data): assert reloaded == apc.cgs.e and reloaded.dtype == apc.cgs.e.dtype -def test_astropy_table(h5_data): +def test_astropy_table(h5_data,compression_kwargs): """ test proper storage and loading of astropy table """ t = Table([[1, 2], [3, 4]], names=('a', 'b'), meta={'name': 'test_thing'}) - h_dataset,subitems = load_astropy.create_astropy_table(t,h5_data,"astropy_table") + h_dataset,subitems = load_astropy.create_astropy_table(t,h5_data,"astropy_table",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert np.all(h_dataset.attrs['colnames'] == [ cname.encode('ascii') for cname in t.colnames]) for metakey,metavalue in t.meta.items(): @@ -107,19 +107,19 @@ def test_astropy_table(h5_data): assert np.allclose(t['b'].astype('float32'),reloaded['b'].astype('float32')) -def test_astropy_quantity_array(h5_data): +def test_astropy_quantity_array(h5_data,compression_kwargs): """ tet proper storage and loading of array of astropy quantities """ a = Quantity([1, 2, 3], unit='m') - h_dataset,subitems = load_astropy.create_astropy_quantity(a,h5_data,"quantity_array") + h_dataset,subitems = load_astropy.create_astropy_quantity(a,h5_data,"quantity_array",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert h_dataset.attrs['unit'] == a.unit.to_string().encode("ascii") and np.all(h_dataset[()] == a.value) reloaded = load_astropy.load_astropy_quantity_dataset(h_dataset,b'astropy_quantity',Quantity) assert np.all(reloaded == a) and reloaded.unit == a.unit -def test_astropy_time_array(h5_data): +def test_astropy_time_array(h5_data,compression_kwargs): """ test proper storage and loading of astropy time representations """ @@ -127,7 +127,7 @@ def test_astropy_time_array(h5_data): times = ['1999-01-01T00:00:00.123456789', '2010-01-01T00:00:00'] t1 = Time(times, format='isot', scale='utc') - h_dataset,subitems = load_astropy.create_astropy_time(t1,h5_data,'time1') + h_dataset,subitems = load_astropy.create_astropy_time(t1,h5_data,'time1',**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert h_dataset.attrs['format'] == str(t1.format).encode('ascii') assert h_dataset.attrs['scale'] == str(t1.scale).encode('ascii') @@ -149,7 +149,7 @@ def test_astropy_time_array(h5_data): times = [58264, 58265, 58266] t1 = Time(times, format='mjd', scale='utc') - h_dataset,subitems = load_astropy.create_astropy_time(t1,h5_data,'time2') + h_dataset,subitems = load_astropy.create_astropy_time(t1,h5_data,'time2',**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert h_dataset.attrs['format'] == str(t1.format).encode('ascii') assert h_dataset.attrs['scale'] == str(t1.scale).encode('ascii') @@ -162,14 +162,14 @@ def test_astropy_time_array(h5_data): assert reloaded.value[index] == t1.value[index] -def test_astropy_angle(h5_data): +def test_astropy_angle(h5_data,compression_kwargs): """ test proper storage of astropy angles """ for index,uu in enumerate(['radian', 'degree']): a = Angle(1.02, unit=uu) - h_dataset,subitems = load_astropy.create_astropy_angle(a,h5_data,"angle_{}".format(uu)) + h_dataset,subitems = load_astropy.create_astropy_angle(a,h5_data,"angle_{}".format(uu),**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert h_dataset.attrs['unit'] == a.unit.to_string().encode('ascii') assert h_dataset[()] == a.value @@ -177,19 +177,19 @@ def test_astropy_angle(h5_data): assert reloaded == a and reloaded.unit == a.unit -def test_astropy_angle_array(h5_data): +def test_astropy_angle_array(h5_data,compression_kwargs): """ test proper storage and loading of arrays of astropy angles """ a = Angle([1, 2, 3], unit='degree') - h_dataset,subitems = load_astropy.create_astropy_angle(a,h5_data,"angle_array") + h_dataset,subitems = load_astropy.create_astropy_angle(a,h5_data,"angle_array",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert h_dataset.attrs['unit'] == a.unit.to_string().encode('ascii') assert np.allclose(h_dataset[()] , a.value ) reloaded = load_astropy.load_astropy_angle_dataset(h_dataset,b'astropy_angle',a.__class__) assert np.all(reloaded == a) and reloaded.unit == a.unit -def test_astropy_skycoord(h5_data): +def test_astropy_skycoord(h5_data,compression_kwargs): """ test proper storage and loading of astropy sky coordinates """ @@ -197,7 +197,7 @@ def test_astropy_skycoord(h5_data): ra = Angle('1d20m', unit='degree') dec = Angle('33d0m0s', unit='degree') radec = SkyCoord(ra, dec) - h_dataset,subitems = load_astropy.create_astropy_skycoord(radec,h5_data,"astropy_skycoord_1") + h_dataset,subitems = load_astropy.create_astropy_skycoord(radec,h5_data,"astropy_skycoord_1",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert h_dataset[()][...,0] == radec.data.lon.value assert h_dataset[()][...,1] == radec.data.lat.value @@ -210,7 +210,7 @@ def test_astropy_skycoord(h5_data): ra = Angle('1d20m', unit='hourangle') dec = Angle('33d0m0s', unit='degree') radec = SkyCoord(ra, dec) - h_dataset,subitems = load_astropy.create_astropy_skycoord(radec,h5_data,"astropy_skycoord_2") + h_dataset,subitems = load_astropy.create_astropy_skycoord(radec,h5_data,"astropy_skycoord_2",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert h_dataset[()][...,0] == radec.data.lon.value assert h_dataset[()][...,1] == radec.data.lat.value @@ -220,7 +220,7 @@ def test_astropy_skycoord(h5_data): assert reloaded.ra.value == radec.ra.value assert reloaded.dec.value == radec.dec.value -def test_astropy_skycoord_array(h5_data): +def test_astropy_skycoord_array(h5_data,compression_kwargs): """ test proper storage and loading of astropy sky coordinates """ @@ -228,7 +228,7 @@ def test_astropy_skycoord_array(h5_data): ra = Angle(['1d20m', '0d21m'], unit='degree') dec = Angle(['33d0m0s', '-33d01m'], unit='degree') radec = SkyCoord(ra, dec) - h_dataset,subitems = load_astropy.create_astropy_skycoord(radec,h5_data,"astropy_skycoord_1") + h_dataset,subitems = load_astropy.create_astropy_skycoord(radec,h5_data,"astropy_skycoord_1",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert np.allclose(h_dataset[()][...,0],radec.data.lon.value) assert np.allclose(h_dataset[()][...,1],radec.data.lat.value) @@ -241,7 +241,7 @@ def test_astropy_skycoord_array(h5_data): ra = Angle([['1d20m', '0d21m'], ['1d20m', '0d21m']], unit='hourangle') dec = Angle([['33d0m0s', '33d01m'], ['33d0m0s', '33d01m']], unit='degree') radec = SkyCoord(ra, dec) - h_dataset,subitems = load_astropy.create_astropy_skycoord(radec,h5_data,"astropy_skycoord_2") + h_dataset,subitems = load_astropy.create_astropy_skycoord(radec,h5_data,"astropy_skycoord_2",**compression_kwargs) assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems) assert np.allclose(h_dataset[()][...,0],radec.data.lon.value) assert np.allclose(h_dataset[()][...,1],radec.data.lat.value) @@ -256,21 +256,49 @@ def test_astropy_skycoord_array(h5_data): # %% MAIN SCRIPT if __name__ == "__main__": from _pytest.fixtures import FixtureRequest - for h5_root in h5_data(FixtureRequest(test_create_astropy_quantity)): - test_create_astropy_quantity(h5_root) - for h5_root in h5_data(FixtureRequest(test_create_astropy_constant)): - test_create_astropy_constant(h5_root) - for h5_root in h5_data(FixtureRequest(test_astropy_table)): - test_astropy_table(h5_root) - for h5_root in h5_data(FixtureRequest(test_astropy_quantity_array)): - test_astropy_quantity_array(h5_root) - for h5_root in h5_data(FixtureRequest(test_astropy_time_array)): - test_astropy_time_array(h5_root) - for h5_root in h5_data(FixtureRequest(test_astropy_angle)): - test_astropy_angle(h5_root) - for h5_root in h5_data(FixtureRequest(test_astropy_angle_array)): - test_astropy_angle_array(h5_root) - for h5_root in h5_data(FixtureRequest(test_astropy_skycoord)): - test_astropy_skycoord(h5_root) - for h5_root in h5_data(FixtureRequest(test_astropy_skycoord_array)): - test_astropy_skycoord_array(h5_root) + from hickle.tests.conftest import compression_kwargs + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_create_astropy_quantity),) + ): + test_create_astropy_quantity(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_create_astropy_constant),) + ): + test_create_astropy_constant(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_astropy_table),) + ): + test_astropy_table(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_astropy_quantity_array),) + ): + test_astropy_quantity_array(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_astropy_time_array),) + ): + test_astropy_time_array(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_astropy_angle),) + ): + test_astropy_angle(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_astropy_angle_array),) + ): + test_astropy_angle_array(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_astropy_skycoord),) + ): + test_astropy_skycoord(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_astropy_skycoord_array),) + ): + test_astropy_skycoord_array(h5_root,keywords) diff --git a/hickle/tests/test_99_hickle_core.py b/hickle/tests/test_99_hickle_core.py index 8fd9ede0..366b572d 100644 --- a/hickle/tests/test_99_hickle_core.py +++ b/hickle/tests/test_99_hickle_core.py @@ -54,7 +54,7 @@ def test_file_name(request): # %% FUNCTION DEFINITIONS -def test_recursive_dump(h5_data): +def test_recursive_dump(h5_data,compression_kwargs): """ test _dump function and that it properly calls itself recursively """ @@ -64,7 +64,7 @@ def test_recursive_dump(h5_data): data = simple_list = [1,2,3,4] with lookup.ReferenceManager.create_manager(h5_data) as memo: with lookup.LoaderManager.create_manager(h5_data) as loader: - hickle._dump(data, h5_data, "simple_list",memo,loader) + hickle._dump(data, h5_data, "simple_list",memo,loader,**compression_kwargs) dumped_data = h5_data["simple_list"] assert memo.resolve_type(dumped_data) == (data.__class__,b'list',False) assert np.all(dumped_data[()] == simple_list) @@ -76,7 +76,7 @@ def test_recursive_dump(h5_data): '12':12, (1,2,3):'hallo' } - hickle._dump(data, h5_data, "some_dict",memo,loader) + hickle._dump(data, h5_data, "some_dict",memo,loader,**compression_kwargs) dumped_data = h5_data["some_dict"] assert memo.resolve_type(dumped_data) == (data.__class__,b'dict',True) @@ -118,13 +118,13 @@ def fail_create_dict(py_obj,h_group,name,**kwargs): loader.types_dict.maps.insert(0,{dict:(fail_create_dict,*loader.types_dict[dict][1:])}) memo_backup = memo.pop(id(data),None) with pytest.warns(lookup.SerializedWarning): - hickle._dump(data, h5_data, "pickled_dict",memo,loader) + hickle._dump(data, h5_data, "pickled_dict",memo,loader,**compression_kwargs) dumped_data = h5_data["pickled_dict"] assert bytes(dumped_data[()]) == pickle.dumps(data) loader.types_dict.maps.pop(0) memo[id(data)] = memo_backup -def test_recursive_load(h5_data): +def test_recursive_load(h5_data,compression_kwargs): """ test _load function and that it properly calls itself recursively """ @@ -135,7 +135,7 @@ def test_recursive_load(h5_data): data_name = "the_answer" with lookup.ReferenceManager.create_manager(h5_data) as memo: with lookup.LoaderManager.create_manager(h5_data) as loader: - hickle._dump(data, h5_data, data_name,memo,loader) + hickle._dump(data, h5_data, data_name,memo,loader,**compression_kwargs) py_container = hickle.RootContainer(h5_data.attrs,b'hickle_root',hickle.RootContainer) hickle._load(py_container, data_name, h5_data[data_name],memo,loader) assert py_container.convert() == data @@ -143,7 +143,7 @@ def test_recursive_load(h5_data): # check that dict object is properly restored on load from corresponding group data = {'question':None,'answer':42} data_name = "not_formulated" - hickle._dump(data, h5_data, data_name,memo,loader) + hickle._dump(data, h5_data, data_name,memo,loader,**compression_kwargs) py_container = hickle.RootContainer(h5_data.attrs,b'hickle_root',hickle.RootContainer) hickle._load(py_container, data_name, h5_data[data_name],memo,loader) assert py_container.convert() == data @@ -158,7 +158,7 @@ def fail_create_dict(py_obj,h_group,name,**kwargs): data_name = "pickled_dict" memo_backup = memo.pop(id(data),None) with pytest.warns(lookup.SerializedWarning): - hickle._dump(data, h5_data, data_name,memo,loader) + hickle._dump(data, h5_data, data_name,memo,loader,**compression_kwargs) hickle._load(py_container, data_name, h5_data[data_name],memo,loader) assert py_container.convert() == data loader.types_dict.maps.pop(0) @@ -166,59 +166,59 @@ def fail_create_dict(py_obj,h_group,name,**kwargs): # %% ISSUE RELATED TESTS -def test_invalid_file(): +def test_invalid_file(compression_kwargs): """ Test if trying to use a non-file object fails. """ with pytest.raises(hickle.FileError): - dump('test', ()) + dump('test', (),**compression_kwargs) -def test_binary_file(test_file_name): +def test_binary_file(test_file_name,compression_kwargs): """ Test if using a binary file works https://github.com/telegraphic/hickle/issues/123""" filename = test_file_name.replace(".hkl",".hdf5") with open(filename, "w") as f: - with pytest.raises(helpers.FileError): - hickle.dump(None, f) + with pytest.raises(hickle.FileError): + hickle.dump(None, f,**compression_kwargs) with open(filename, "w+") as f: - with pytest.raises(helpers.FileError): - hickle.dump(None, f) + with pytest.raises(hickle.FileError): + hickle.dump(None, f,**compression_kwargs) with open(filename, "wb") as f: - with pytest.raises(helpers.FileError): - hickle.dump(None, f) + with pytest.raises(hickle.FileError): + hickle.dump(None, f,**compression_kwargs) with open(filename, "w+b") as f: - hickle.dump(None, f) + hickle.dump(None, f,**compression_kwargs) -def test_file_open_close(test_file_name,h5_data): +def test_file_open_close(test_file_name,h5_data,compression_kwargs): """ https://github.com/telegraphic/hickle/issues/20 """ import h5py f = h5py.File(test_file_name.replace(".hkl",".hdf"), 'w') a = np.arange(5) - dump(a, test_file_name) - dump(a, test_file_name) + dump(a, test_file_name,**compression_kwargs) + dump(a, test_file_name,**compression_kwargs) - dump(a, f, mode='w') + dump(a, f, mode='w',**compression_kwargs) f.close() with pytest.raises(hickle.ClosedFileError): - dump(a, f, mode='w') + dump(a, f, mode='w',**compression_kwargs) h5_data.create_dataset('nothing',data=[]) with pytest.raises(ValueError,match = r"Unable\s+to\s+create\s+group\s+\(name\s+already\s+exists\)"): - dump(a,h5_data.file,path="/root_group") + dump(a,h5_data.file,path="/root_group",**compression_kwargs) -def test_hdf5_group(test_file_name): +def test_hdf5_group(test_file_name,compression_kwargs): import h5py hdf5_filename = test_file_name.replace(".hkl",".hdf5") file = h5py.File(hdf5_filename, 'w') group = file.create_group('test_group') a = np.arange(5) - dump(a, group) + dump(a, group,**compression_kwargs) file.close() a_hkl = load(hdf5_filename, path='/test_group') @@ -228,7 +228,7 @@ def test_hdf5_group(test_file_name): group = file.create_group('test_group2') b = np.arange(8) - dump(b, group, path='deeper/and_deeper') + dump(b, group, path='deeper/and_deeper',**compression_kwargs) file.close() with pytest.raises(ValueError): @@ -243,7 +243,7 @@ def test_hdf5_group(test_file_name): -def test_with_open_file(test_file_name): +def test_with_open_file(test_file_name,compression_kwargs): """ Testing dumping and loading to an open file @@ -255,10 +255,10 @@ def test_with_open_file(test_file_name): arr = np.array([1]) with h5py.File(test_file_name, 'w') as file: - dump(lst, file, path='/lst') - dump(tpl, file, path='/tpl') - dump(dct, file, path='/dct') - dump(arr, file, path='/arr') + dump(lst, file, path='/lst',**compression_kwargs) + dump(tpl, file, path='/tpl',**compression_kwargs) + dump(dct, file, path='/dct',**compression_kwargs) + dump(arr, file, path='/arr',**compression_kwargs) with h5py.File(test_file_name, 'r') as file: assert load(file, '/lst') == lst @@ -267,7 +267,7 @@ def test_with_open_file(test_file_name): assert load(file, '/arr') == arr -def test_load(test_file_name): +def test_load(test_file_name,compression_kwargs): a = set([1, 2, 3, 4]) b = set([5, 6, 7, 8]) c = set([9, 10, 11, 12]) @@ -277,7 +277,7 @@ def test_load(test_file_name): print("Original:") pprint(z) - dump(z, test_file_name, mode='w') + dump(z, test_file_name, mode='w',**compression_kwargs) print("\nReconstructed:") z = load(test_file_name) @@ -286,7 +286,7 @@ def test_load(test_file_name): -def test_multi_hickle(test_file_name): +def test_multi_hickle(test_file_name,compression_kwargs): """ Dumping to and loading from the same file several times https://github.com/telegraphic/hickle/issues/20""" @@ -295,10 +295,10 @@ def test_multi_hickle(test_file_name): if os.path.exists(test_file_name): os.remove(test_file_name) - dump(a, test_file_name, path="/test", mode="w") - dump(a, test_file_name, path="/test2", mode="r+") - dump(a, test_file_name, path="/test3", mode="r+") - dump(a, test_file_name, path="/test4", mode="r+") + dump(a, test_file_name, path="/test", mode="w",**compression_kwargs) + dump(a, test_file_name, path="/test2", mode="r+",**compression_kwargs) + dump(a, test_file_name, path="/test3", mode="r+",**compression_kwargs) + dump(a, test_file_name, path="/test4", mode="r+",**compression_kwargs) load(test_file_name, path="/test") load(test_file_name, path="/test2") @@ -306,7 +306,7 @@ def test_multi_hickle(test_file_name): load(test_file_name, path="/test4") -def test_improper_attrs(test_file_name): +def test_improper_attrs(test_file_name,compression_kwargs): """ test for proper reporting missing mandatory attributes for the various supported file versions @@ -315,7 +315,7 @@ def test_improper_attrs(test_file_name): # check that missing attributes which disallow to identify # hickle version are reported data = "my name? Ha I'm Nobody" - dump(data,test_file_name) + dump(data,test_file_name,**compression_kwargs) manipulated = h5py.File(test_file_name,"r+") root_group = manipulated.get('/') root_group.attrs["VERSION"] = root_group.attrs["HICKLE_VERSION"] @@ -333,33 +333,53 @@ def test_improper_attrs(test_file_name): if __name__ == '__main__': """ Some tests and examples """ from _pytest.fixtures import FixtureRequest + from hickle.tests.conftest import compression_kwargs - for h5_root,filename in ( - ( h5_data(request),test_file_name(request) ) - for request in (FixtureRequest(test_file_opener),) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_recursive_dump),) ): - test_file_opener(h5_root,filename) - for h5_root in h5_data(FixtureRequest(test_recursive_dump)): - test_recursive_dump(h5_root) - for h5_root in h5_data(FixtureRequest(test_recursive_load)): - test_recursive_load(h5_root) - test_invalid_file() - for filename in test_file_name(FixtureRequest(test_binary_file)): - test_binary_file(filename) - for h5_root,filename in ( - ( h5_data(request),test_file_name(request) ) + test_recursive_dump(h5_root,keywords) + for h5_root,keywords in ( + ( h5_data(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_recursive_load),) + ): + test_recursive_load(h5_root,keywords) + for keywords in compression_kwargs(FixtureRequest(test_recursive_dump)): + test_invalid_file(keywords) + for filename,keywords in ( + ( test_file_name(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_binary_file),) + ): + test_binary_file(filename,keywords) + for h5_root,filename,keywords in ( + ( h5_data(request),test_file_name(request),compression_kwargs(request) ) for request in (FixtureRequest(test_file_open_close),) ): - test_file_open_close(h5_root,filename) - for filename in test_file_name(FixtureRequest(test_hdf5_group)): - test_hdf5_group(filename) - for filename in test_file_name(FixtureRequest(test_with_open_file)): - test_with_open_file(filename) - - for filename in test_file_name(FixtureRequest(test_load)): - test_load(filename) - for filename in test_file_name(FixtureRequest(test_multi_hickle)): - test_multi_hickle(filename) - for filename in test_file_name(FixtureRequest(test_improper_attrs)): - test_improper_attrs(filename) + test_file_open_close(h5_root,filename,keywords) + for filename,keywords in ( + ( test_file_name(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_hdf5_group),) + ): + test_hdf5_group(filename,keywords) + for filename,keywords in ( + ( test_file_name(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_with_open_file),) + ): + test_with_open_file(filename,keywords) + for filename,keywords in ( + ( test_file_name(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_load),) + ): + test_load(filename,keywords) + for filename,keywords in ( + ( test_file_name(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_multi_hickle),) + ): + test_multi_hickle(filename,keywords) + for filename,keywords in ( + ( test_file_name(request),compression_kwargs(request) ) + for request in (FixtureRequest(test_improper_attrs),) + ): + test_improper_attrs(filename,keywords) diff --git a/hickle/tests/test_hickle.py b/hickle/tests/test_hickle.py index 20827902..946e5115 100644 --- a/hickle/tests/test_hickle.py +++ b/hickle/tests/test_hickle.py @@ -137,7 +137,7 @@ def test_invalid_file(): dump('test', ()) -def test_state_obj(monkeypatch,test_file_name): +def test_state_obj(monkeypatch,test_file_name,compression_kwargs): """ Dumping and loading a class object with pickle states https://github.com/telegraphic/hickle/issues/125""" @@ -148,63 +148,63 @@ def test_state_obj(monkeypatch,test_file_name): mode = 'w' obj = with_state() with pytest.warns(lookup.SerializedWarning): - dump(obj, test_file_name, mode) + dump(obj, test_file_name, mode,**compression_kwargs) monkey.setattr(pickle,'loads',hide_from_hickle) obj_hkl = load(test_file_name) assert isinstance(obj,obj_hkl.__class__) or isinstance(obj_hkl,obj.__class__) assert np.allclose(obj[1], obj_hkl[1]) -def test_local_func(test_file_name): +def test_local_func(test_file_name,compression_kwargs): """ Dumping and loading a local function https://github.com/telegraphic/hickle/issues/119""" mode = 'w' with pytest.warns(lookup.SerializedWarning): - dump(func, test_file_name, mode) + dump(func, test_file_name, mode,**compression_kwargs) func_hkl = load(test_file_name) assert isinstance(func,func_hkl.__class__) or isinstance(func_hkl,func.__class__) assert func(1, 2) == func_hkl(1, 2) -def test_non_empty_group(test_file_name): +def test_non_empty_group(test_file_name,compression_kwargs): """ Test if attempting to dump to a group with data fails """ - hickle.dump(None, test_file_name) + hickle.dump(None, test_file_name,**compression_kwargs) with pytest.raises(ValueError): - dump(None, test_file_name, 'r+') + dump(None, test_file_name, 'r+',**compression_kwargs) -def test_string(test_file_name): +def test_string(test_file_name,compression_kwargs): """ Dumping and loading a string """ mode = 'w' string_obj = "The quick brown fox jumps over the lazy dog" - dump(string_obj, test_file_name, mode) + dump(string_obj, test_file_name, mode,**compression_kwargs) string_hkl = load(test_file_name) assert isinstance(string_hkl, str) assert string_obj == string_hkl -def test_65bit_int(test_file_name): +def test_65bit_int(test_file_name,compression_kwargs): """ Dumping and loading an integer with arbitrary precision https://github.com/telegraphic/hickle/issues/113""" i = 2**65-1 - dump(i, test_file_name) + dump(i, test_file_name,**compression_kwargs) i_hkl = load(test_file_name) assert i == i_hkl j = -2**63-1 - dump(j, test_file_name) + dump(j, test_file_name,**compression_kwargs) j_hkl = load(test_file_name) assert j == j_hkl -def test_list(test_file_name): +def test_list(test_file_name,compression_kwargs): """ Dumping and loading a list """ filename, mode = 'test_list.h5', 'w' list_obj = [1, 2, 3, 4, 5] - dump(list_obj, test_file_name, mode=mode) + dump(list_obj, test_file_name, mode=mode,**compression_kwargs) list_hkl = load(test_file_name) try: assert isinstance(list_hkl, list) @@ -220,11 +220,11 @@ def test_list(test_file_name): raise -def test_set(test_file_name) : +def test_set(test_file_name,compression_kwargs) : """ Dumping and loading a list """ mode = 'w' list_obj = set([1, 0, 3, 4.5, 11.2]) - dump(list_obj, test_file_name, mode) + dump(list_obj, test_file_name, mode,**compression_kwargs) list_hkl = load(test_file_name) try: assert isinstance(list_hkl, set) @@ -235,14 +235,14 @@ def test_set(test_file_name) : raise -def test_numpy(test_file_name): +def test_numpy(test_file_name,compression_kwargs): """ Dumping and loading numpy array """ mode = 'w' dtypes = ['float32', 'float64', 'complex64', 'complex128'] for dt in dtypes: array_obj = np.ones(8, dtype=dt) - dump(array_obj, test_file_name, mode) + dump(array_obj, test_file_name, mode,**compression_kwargs) array_hkl = load(test_file_name) try: assert array_hkl.dtype == array_obj.dtype @@ -253,12 +253,12 @@ def test_numpy(test_file_name): raise -def test_masked(test_file_name): +def test_masked(test_file_name,compression_kwargs): """ Test masked numpy array """ mode = 'w' a = np.ma.array([1, 2, 3, 4], dtype='float32', mask=[0, 1, 0, 0]) - dump(a, test_file_name, mode) + dump(a, test_file_name, mode,**compression_kwargs) a_hkl = load(test_file_name) try: @@ -270,45 +270,48 @@ def test_masked(test_file_name): raise -def test_object_numpy(test_file_name): +def test_object_numpy(test_file_name,compression_kwargs): """ Dumping and loading a NumPy array containing non-NumPy objects. https://github.com/telegraphic/hickle/issues/90""" - arr = np.array([[NESTED_DICT], ('What is this?',), {1, 2, 3, 7, 1}]) - dump(arr, test_file_name) + # VisibleDeprecationWarning from newer numpy versions + #np_array_data = np.array([[NESTED_DICT], ('What is this?',), {1, 2, 3, 7, 1}]) + arr = np.array([NESTED_DICT])#, ('What is this?',), {1, 2, 3, 7, 1}]) + dump(arr, test_file_name,**compression_kwargs) arr_hkl = load(test_file_name) assert np.all(arr == arr_hkl) arr2 = np.array(NESTED_DICT) - dump(arr2, test_file_name) + dump(arr2, test_file_name,**compression_kwargs) arr_hkl2 = load(test_file_name) assert np.all(arr2 == arr_hkl2) -def test_string_numpy(test_file_name): +def test_string_numpy(test_file_name,compression_kwargs): """ Dumping and loading NumPy arrays containing Python 3 strings. """ arr = np.array(["1313e", "was", "maybe?", "here"]) - dump(arr, test_file_name) + dump(arr, test_file_name,**compression_kwargs) arr_hkl = load(test_file_name) assert np.all(arr == arr_hkl) -def test_list_object_numpy(test_file_name): +def test_list_object_numpy(test_file_name,compression_kwargs): """ Dumping and loading a list of NumPy arrays with objects. https://github.com/telegraphic/hickle/issues/90""" - lst = [np.array(NESTED_DICT), np.array([('What is this?',), - {1, 2, 3, 7, 1}])] - dump(lst, test_file_name) + # VisibleDeprecationWarning from newer numpy versions + lst = [np.array(NESTED_DICT)]#, np.array([('What is this?',), + # {1, 2, 3, 7, 1}])] + dump(lst, test_file_name,**compression_kwargs) lst_hkl = load(test_file_name) assert np.all(lst[0] == lst_hkl[0]) - assert np.all(lst[1] == lst_hkl[1]) + #assert np.all(lst[1] == lst_hkl[1]) -def test_dict(test_file_name): +def test_dict(test_file_name,compression_kwargs): """ Test dictionary dumping and loading """ mode = 'w' @@ -321,7 +324,7 @@ def test_dict(test_file_name): 'narr': np.array([1, 2, 3]), } - dump(dd, test_file_name, mode) + dump(dd, test_file_name, mode,**compression_kwargs) dd_hkl = load(test_file_name) for k in dd.keys(): @@ -341,14 +344,14 @@ def test_dict(test_file_name): raise -def test_odict(test_file_name): +def test_odict(test_file_name,compression_kwargs): """ Test ordered dictionary dumping and loading https://github.com/telegraphic/hickle/issues/65""" mode = 'w' od = odict(((3, [3, 0.1]), (7, [5, 0.1]), (5, [3, 0.1]))) - dump(od, test_file_name, mode) + dump(od, test_file_name, mode,**compression_kwargs) od_hkl = load(test_file_name) assert od.keys() == od_hkl.keys() @@ -357,16 +360,23 @@ def test_odict(test_file_name): assert od_item == od_hkl_item -def test_empty_dict(test_file_name): +def test_empty_dict(test_file_name,compression_kwargs): """ Test empty dictionary dumping and loading https://github.com/telegraphic/hickle/issues/91""" mode = 'w' - dump({}, test_file_name, mode) + dump({}, test_file_name, mode,**compression_kwargs) assert load(test_file_name) == {} + +# TODO consider converting to parameterized test +# or enable implicit parameterizing of all tests +# though compression_kwargs fixture providing +# various combinations of compression and chunking +# related keywords +@pytest.mark.no_compression def test_compression(test_file_name): """ Test compression on datasets""" @@ -390,7 +400,7 @@ def test_compression(test_file_name): raise -def test_dict_int_key(test_file_name): +def test_dict_int_key(test_file_name,compression_kwargs): """ Test for dictionaries with integer keys """ mode = 'w' @@ -399,17 +409,17 @@ def test_dict_int_key(test_file_name): 1: "test2" } - dump(dd, test_file_name, mode) + dump(dd, test_file_name, mode,**compression_kwargs) load(test_file_name) -def test_dict_nested(test_file_name): +def test_dict_nested(test_file_name,compression_kwargs): """ Test for dictionaries with integer keys """ mode = 'w' dd = NESTED_DICT - dump(dd, test_file_name, mode) + dump(dd, test_file_name, mode,**compression_kwargs) dd_hkl = load(test_file_name) ll_hkl = dd_hkl["level1_3"]["level2_1"]["level3_1"] @@ -417,7 +427,7 @@ def test_dict_nested(test_file_name): assert ll == ll_hkl -def test_masked_dict(test_file_name): +def test_masked_dict(test_file_name,compression_kwargs): """ Test dictionaries with masked arrays """ filename, mode = 'test.h5', 'w' @@ -427,7 +437,7 @@ def test_masked_dict(test_file_name): "data2": np.array([1, 2, 3, 4, 5]) } - dump(dd, test_file_name, mode) + dump(dd, test_file_name, mode,**compression_kwargs) dd_hkl = load(test_file_name) for k in dd.keys(): @@ -451,7 +461,7 @@ def test_masked_dict(test_file_name): raise -def test_np_float(test_file_name): +def test_np_float(test_file_name,compression_kwargs): """ Test for singular np dtypes """ mode = 'w' @@ -463,7 +473,7 @@ def test_np_float(test_file_name): for dt in dtype_list: dd = dt(1) - dump(dd, test_file_name, mode) + dump(dd, test_file_name, mode,**compression_kwargs) dd_hkl = load(test_file_name) assert dd == dd_hkl assert dd.dtype == dd_hkl.dtype @@ -471,7 +481,7 @@ def test_np_float(test_file_name): dd = {} for dt in dtype_list: dd[str(dt)] = dt(1.0) - dump(dd, test_file_name, mode) + dump(dd, test_file_name, mode,**compression_kwargs) dd_hkl = load(test_file_name) print(dd) @@ -479,6 +489,12 @@ def test_np_float(test_file_name): assert dd[str(dt)] == dd_hkl[str(dt)] +# TODO consider converting to parameterized test +# or enable implicit parameterizing of all tests +# though compression_kwargs fixture providing +# various combinations of compression and chunking +# related keywords +@pytest.mark.no_compression def test_comp_kwargs(test_file_name): """ Test compression with some kwargs for shuffle and chunking """ @@ -508,7 +524,7 @@ def test_comp_kwargs(test_file_name): load(test_file_name) -def test_list_numpy(test_file_name): +def test_list_numpy(test_file_name,compression_kwargs): """ Test converting a list of numpy arrays """ mode = 'w' @@ -517,7 +533,7 @@ def test_list_numpy(test_file_name): b = np.zeros(1000) c = [a, b] - dump(c, test_file_name, mode) + dump(c, test_file_name, mode,**compression_kwargs) dd_hkl = load(test_file_name) print(dd_hkl) @@ -526,7 +542,7 @@ def test_list_numpy(test_file_name): assert isinstance(dd_hkl[0], np.ndarray) -def test_tuple_numpy(test_file_name): +def test_tuple_numpy(test_file_name,compression_kwargs): """ Test converting a list of numpy arrays """ mode = 'w' @@ -535,7 +551,7 @@ def test_tuple_numpy(test_file_name): b = np.zeros(1000) c = (a, b, a) - dump(c, test_file_name, mode) + dump(c, test_file_name, mode,**compression_kwargs) dd_hkl = load(test_file_name) print(dd_hkl) @@ -544,23 +560,23 @@ def test_tuple_numpy(test_file_name): assert isinstance(dd_hkl[0], np.ndarray) -def test_numpy_dtype(test_file_name): +def test_numpy_dtype(test_file_name,compression_kwargs): """ Dumping and loading a NumPy dtype """ dtype = np.dtype('float16') - dump(dtype, test_file_name) + dump(dtype, test_file_name,**compression_kwargs) dtype_hkl = load(test_file_name) assert dtype == dtype_hkl -def test_none(test_file_name): +def test_none(test_file_name,compression_kwargs): """ Test None type hickling """ mode = 'w' a = None - dump(a, test_file_name, mode) + dump(a, test_file_name, mode,**compression_kwargs) dd_hkl = load(test_file_name) print(a) print(dd_hkl) @@ -568,10 +584,10 @@ def test_none(test_file_name): assert isinstance(dd_hkl, type(None)) -def test_list_order(test_file_name): +def test_list_order(test_file_name,compression_kwargs): """ https://github.com/telegraphic/hickle/issues/26 """ d = [np.arange(n + 1) for n in range(20)] - dump(d, test_file_name) + dump(d, test_file_name,**compression_kwargs) d_hkl = load(test_file_name) try: @@ -584,12 +600,12 @@ def test_list_order(test_file_name): raise -def test_embedded_array(test_file_name): +def test_embedded_array(test_file_name,compression_kwargs): """ See https://github.com/telegraphic/hickle/issues/24 """ d_orig = [[np.array([10., 20.]), np.array([10, 20, 30])], [np.array([10, 2]), np.array([1.])]] - dump(d_orig, test_file_name) + dump(d_orig, test_file_name,**compression_kwargs) d_hkl = load(test_file_name) for ii, xx in enumerate(d_orig): @@ -620,76 +636,76 @@ def generate_nested(): z = {'a': a, 'b': b, 'c': c, 'd': d, 'z': z} return z -def test_dump_nested(test_file_name): +def test_dump_nested(test_file_name,compression_kwargs): """ Dump a complicated nested object to HDF5 """ z = generate_nested() - dump(z, test_file_name, mode='w') + dump(z, test_file_name, mode='w',**compression_kwargs) -def test_ndarray(test_file_name): +def test_ndarray(test_file_name,compression_kwargs): a = np.array([1, 2, 3]) b = np.array([2, 3, 4]) z = (a, b) print("Original:") pprint(z) - dump(z, test_file_name, mode='w') + dump(z, test_file_name, mode='w',**compression_kwargs) print("\nReconstructed:") z = load(test_file_name) pprint(z) -def test_ndarray_masked(test_file_name): +def test_ndarray_masked(test_file_name,compression_kwargs): a = np.ma.array([1, 2, 3]) b = np.ma.array([2, 3, 4], mask=[True, False, True]) z = (a, b) print("Original:") pprint(z) - dump(z, test_file_name, mode='w') + dump(z, test_file_name, mode='w',**compression_kwargs) print("\nReconstructed:") z = load(test_file_name) pprint(z) -def test_simple_dict(test_file_name): +def test_simple_dict(test_file_name,compression_kwargs): a = {'key1': 1, 'key2': 2} - dump(a, test_file_name) + dump(a, test_file_name,**compression_kwargs) z = load(test_file_name) pprint(a) pprint(z) -def test_complex_dict(test_file_name): +def test_complex_dict(test_file_name,compression_kwargs): a = {'akey': 1, 'akey2': 2} c = {'ckey': "hello", "ckey2": "hi there"} z = {'zkey1': a, 'zkey2': a, 'zkey3': c} print("Original:") pprint(z) - dump(z, test_file_name, mode='w') + dump(z, test_file_name, mode='w',**compression_kwargs) print("\nReconstructed:") z = load(test_file_name) pprint(z) -def test_complex(test_file_name): +def test_complex(test_file_name,compression_kwargs): """ Test complex value dtype is handled correctly https://github.com/telegraphic/hickle/issues/29 """ data = {"A": 1.5, "B": 1.5 + 1j, "C": np.linspace(0, 1, 4) + 2j} - dump(data, test_file_name) + dump(data, test_file_name,**compression_kwargs) data2 = load(test_file_name) for key in data.keys(): assert isinstance(data[key], data2[key].__class__) -def test_nonstring_keys(test_file_name): +def test_nonstring_keys(test_file_name,compression_kwargs): """ Test that keys are reconstructed back to their original datatypes https://github.com/telegraphic/hickle/issues/36 """ @@ -710,7 +726,7 @@ def test_nonstring_keys(test_file_name): } print(data) - dump(data, test_file_name) + dump(data, test_file_name,**compression_kwargs) data2 = load(test_file_name) print(data2) @@ -719,7 +735,7 @@ def test_nonstring_keys(test_file_name): print(data2) - +@pytest.mark.no_compression def test_scalar_compression(test_file_name): """ Test bug where compression causes a crash on scalar datasets @@ -736,12 +752,12 @@ def test_scalar_compression(test_file_name): assert isinstance(data[key], data2[key].__class__) -def test_bytes(test_file_name): +def test_bytes(test_file_name,compression_kwargs): """ Dumping and loading a string. PYTHON3 ONLY """ mode = 'w' string_obj = b"The quick brown fox jumps over the lazy dog" - dump(string_obj, test_file_name, mode) + dump(string_obj, test_file_name, mode,**compression_kwargs) string_hkl = load(test_file_name) print(type(string_obj)) print(type(string_hkl)) @@ -749,26 +765,26 @@ def test_bytes(test_file_name): assert string_obj == string_hkl -def test_np_scalar(test_file_name): +def test_np_scalar(test_file_name,compression_kwargs): """ Numpy scalar datatype https://github.com/telegraphic/hickle/issues/50 """ r0 = {'test': np.float64(10.)} - dump(r0, test_file_name) + dump(r0, test_file_name,**compression_kwargs) r = load(test_file_name) print(r) assert isinstance(r0['test'], r['test'].__class__) -def test_slash_dict_keys(test_file_name): +def test_slash_dict_keys(test_file_name,compression_kwargs): """ Support for having slashes in dict keys https://github.com/telegraphic/hickle/issues/124""" dct = {'a/b': [1, '2'], 1.4: 3} - dump(dct, test_file_name, 'w') + dump(dct, test_file_name, 'w',**compression_kwargs) dct_hkl = load(test_file_name) assert isinstance(dct_hkl, dict) @@ -778,7 +794,7 @@ def test_slash_dict_keys(test_file_name): # Check that having backslashes in dict keys will serialize the dict dct2 = {'a\\b': [1, '2'], 1.4: 3} with pytest.warns(None) as not_expected: - dump(dct2, test_file_name) + dump(dct2, test_file_name,**compression_kwargs) assert not not_expected diff --git a/hickle/tests/test_legacy_load.py b/hickle/tests/test_legacy_load.py index e185ed4e..5a1c8391 100644 --- a/hickle/tests/test_legacy_load.py +++ b/hickle/tests/test_legacy_load.py @@ -40,6 +40,7 @@ def test_legacy_load(): print(item.attrs.items()) raise +@pytest.mark.no_compression def test_4_0_0_load(): """ test that files created by hickle 4.0.x can be loaded by diff --git a/requirements_h5py.txt b/requirements_h5py.txt new file mode 100644 index 00000000..5dbe831f --- /dev/null +++ b/requirements_h5py.txt @@ -0,0 +1,4 @@ +dill>=0.3.0 +h5py>=2.8.0,<3 +numpy>=1.8 +six>=1.11.0 diff --git a/requirements_test.txt b/requirements_test.txt index 36f1b43b..55c4c4d0 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -6,4 +6,3 @@ scipy>=1.0.0 pandas>=0.24.0 check-manifest twine>=1.13.0 -h5py<3 diff --git a/setup.cfg b/setup.cfg index d1f49f6a..580980b5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,3 +13,4 @@ omit= hickle/tests/* hickle/*/tests/* hickle/legacy_v3/* + hickle/loaders/load_pandas.py diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..4dc11541 --- /dev/null +++ b/tox.ini @@ -0,0 +1,51 @@ +[tox] +envlist = py{35,36,37,38}, py{35,36,37,38}-compress +skip_missing_interpreters=true + + +[testenv] +passenv = HOME USER +deps = + -rrequirements.txt + h5py3: -rrequirements_h5py_3.txt + -rrequirements_test.txt + + +# {posargs} allows to pass any pytest related cli arguments +# to tox after -- argument separator. +commands = + pip install --upgrade pip + py{35,36,37,38}-!compress: pytest --cov-report=term-missing {posargs} + compress: pytest --enable-compression --cov-report=term-missing {posargs} + +[testenv:h5py3] +# special environment for testing and debugging h5py >= 3.0 support +# related issues. Manually calls python setup.py develop instead of +# python setup.py install which would also be possible below. +# system commands like mv, ln etc must be explicitly allowed to be +# called from within the virtual environment +skipdist=True +allowlist_externals= + mv + ln + cp + rm + +# change h5py version requirements to >= 3.0 +commands_pre= + mv requirements.txt requirements_mv.txt + ln -s requirements_h5py_3.txt requirements.txt +commands = + python setup.py develop + pytest --cov-report=term-missing {posargs} + +# switch back to inital state again +commands_post= + rm requirements.txt + cp requirements_mv.txt requirements.txt + +[pytest] +# options to be passed to pytest in any cases as well +# as any desired pytest configuration values +addopts = --cov=./hickle +