diff --git a/tests/conftest.py b/tests/conftest.py index 3e89820a03..496b8b714a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -370,9 +370,9 @@ def load_scan_jpk() -> LoadScans: @pytest.fixture -def load_scan_gwy() -> LoadScans: +def load_scan_gwy(loading_config: dict) -> LoadScans: """Instantiate a LoadScans object from a .gwy file.""" - scan_loader = LoadScans([RESOURCES / "file.gwy"], channel="dummy_channel") + scan_loader = LoadScans([RESOURCES / "file.gwy", RESOURCES / "file_landscape.gwy"], channel="dummy_channel") return scan_loader diff --git a/tests/resources/file_landscape.gwy b/tests/resources/file_landscape.gwy new file mode 100644 index 0000000000..93bf04cbf2 Binary files /dev/null and b/tests/resources/file_landscape.gwy differ diff --git a/tests/resources/file_square.gwy b/tests/resources/file_square.gwy new file mode 100644 index 0000000000..a38221f2f4 Binary files /dev/null and b/tests/resources/file_square.gwy differ diff --git a/tests/resources/gwy_landscape.gwy b/tests/resources/gwy_landscape.gwy new file mode 100644 index 0000000000..93bf04cbf2 Binary files /dev/null and b/tests/resources/gwy_landscape.gwy differ diff --git a/tests/resources/gwy_square.gwy b/tests/resources/gwy_square.gwy new file mode 100644 index 0000000000..a38221f2f4 Binary files /dev/null and b/tests/resources/gwy_square.gwy differ diff --git a/tests/resources/test_scale_config.yaml b/tests/resources/test_scale_config.yaml new file mode 100644 index 0000000000..730e23bb91 --- /dev/null +++ b/tests/resources/test_scale_config.yaml @@ -0,0 +1,5 @@ +# Test for unit scaling +loading: + scale: + nm: + {m: 1e9, mm: 1e6, um: 1e3} diff --git a/tests/test_io.py b/tests/test_io.py index 3098c367e0..15c7898919 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -19,6 +19,7 @@ get_out_path, path_to_str, save_folder_grainstats, + Scale, LoadScans, save_pkl, load_pkl, @@ -359,11 +360,24 @@ def test_load_scan_gwy(load_scan_gwy: LoadScans) -> None: load_scan_gwy.img_path = load_scan_gwy.img_paths[0] load_scan_gwy.filename = load_scan_gwy.img_paths[0].stem image, px_to_nm_scaling = load_scan_gwy.load_gwy() + assert len(load_scan_gwy.img_paths) == 2 assert isinstance(image, np.ndarray) assert image.shape == (512, 512) assert image.sum() == 33836850.232917726 assert isinstance(px_to_nm_scaling, float) assert px_to_nm_scaling == 0.8468632812499975 + # Test loading landscape .gwy file. + load_scan_gwy.img_path = load_scan_gwy.img_paths[1] + load_scan_gwy.filename = load_scan_gwy.img_paths[1].stem + image, px_to_nm_scaling = load_scan_gwy.load_gwy() + expected_sum = 1.9190233924574975e2 # calculated in Gwyddion + numpy 1.9.2 + assert abs(image.sum() - expected_sum) < 1e-10 + assert isinstance(image, np.ndarray) + assert image.shape == (170, 220) # (height, width) + assert isinstance(px_to_nm_scaling, float) + # conventional parameter, px_to_nm_scaling + expected_scaling_x = 1000.0 / 220.0 + assert abs(px_to_nm_scaling - expected_scaling_x) < 1e-10 def test_load_scan_topostats(load_scan_topostats: LoadScans) -> None: @@ -409,6 +423,21 @@ def test_gwy_read_component(load_scan_dummy: LoadScans) -> None: assert list(test_dict.values()) == [{"test nested component": 3}] +def test_scale() -> None: + test_config = read_yaml(RESOURCES / "test_scale_config.yaml") + if "scale" not in test_config["loading"]: + raise KeyError("Scale is not defined in test_scale_config.yaml") + scale = Scale(test_config["loading"]["scale"]) + assert scale.is_available("nm", "m") == True + assert scale.get_factor("nm", "um") == 1000 + assert scale.in_nm(1.0, "um") == 1000 + assert scale.in_nm(1.0, "mm") == 1000000 + scale.add_factor("nm", "pixel_x_in_nm", 1000 / 220) + scale.add_factor("nm", "pixel_y_in_nm", 1000 / 170) + assert abs(scale.in_nm(220, "pixel_x_in_nm") - 1000) < 1e-10 + assert abs(scale.in_nm(170, "pixel_y_in_nm") - 1000) < 1e-10 + + # FIXME : Get this test working # @pytest.mark.parametrize( # "unit, x, y, expected", @@ -431,7 +460,7 @@ def test_gwy_read_component(load_scan_dummy: LoadScans) -> None: ("load_scan_spm", 1, (1024, 1024), 30695369.188316286, "minicircle", 0.4940029296875), ("load_scan_ibw", 1, (512, 512), -218091520.0, "minicircle2", 1.5625), ("load_scan_jpk", 1, (256, 256), 286598232.9308627, "file", 1.2770176335964876), - ("load_scan_gwy", 1, (512, 512), 33836850.232917726, "file", 0.8468632812499975), + ("load_scan_gwy", 2, (512, 512), 33836850.232917726, "file", 0.8468632812499975), ("load_scan_topostats", 1, (1024, 1024), 182067.12616107278, "file", 0.4940029296875), ], ) @@ -456,6 +485,14 @@ def test_load_scan_get_data( assert isinstance(scan.img_dict[filename]["pixel_to_nm_scaling"], float) assert scan.img_dict[filename]["pixel_to_nm_scaling"] == pixel_to_nm_scaling + # Scale object holds conversion factors of image + # Not all file format doesn't have it yet. + if "scale" in scan.img_dict[filename]: + scale = scan.img_dict[filename]["scale"] + assert isinstance(scale.get_factor("nm", "px_to_nm_x"), float) + assert scale.get_factor("nm", "px_to_nm_x") == pixel_to_nm_scaling + assert isinstance(scale.get_factor("nm", "px_to_nm_y"), float) + @pytest.mark.parametrize( "x, y, log_msg", diff --git a/topostats/default_config.yaml b/topostats/default_config.yaml index 8038a57333..994bc446fc 100644 --- a/topostats/default_config.yaml +++ b/topostats/default_config.yaml @@ -5,6 +5,9 @@ cores: 2 # Number of CPU cores to utilise for processing multiple files simultan file_ext: .spm # File extension of the data files. loading: channel: Height # Channel to pull data from in the data files. + scale: + nm: + {m: 1e9, mm: 1e6, um: 1e3} # factors to convert to nm filter: run: true # Options : true, false row_alignment_quantile: 0.5 # below values may improve flattening of larger features diff --git a/topostats/io.py b/topostats/io.py index 7a65b22552..bbe48cfec3 100644 --- a/topostats/io.py +++ b/topostats/io.py @@ -7,6 +7,7 @@ from pathlib import Path import pickle as pkl from typing import Any, Dict, List, Union +import re import numpy as np import pandas as pd @@ -16,6 +17,8 @@ import h5py from ruamel.yaml import YAML, YAMLError from ruamel.yaml.main import round_trip_load as yaml_load, round_trip_dump as yaml_dump +import importlib.resources as pkg_resources +import yaml from topostats.logs.logs import LOGGER_NAME @@ -449,6 +452,49 @@ def convert_basename_to_relative_paths(df: pd.DataFrame): return df +class Scale: + """Hold scaling factors and convert value by multiplying. + It can hold conversion factors for image like "pixel_x_in_nm" + """ + + def __init__(self, config_dict): + """Instantiate scaling factors using configuration.yaml. + default_config["loading"]["scale"] should have the dict. + """ + self._factors = config_dict + + def in_nm(self, value_from, unit_from) -> float: + """Return value in nanometre from value and its unit""" + return self.get_value("nm", value_from, unit_from) + + def get_value(self, unit_to, value_from, unit_from) -> float: + return value_from * self.get_factor(unit_to, unit_from) + + def get_factor(self, unit_to, unit_from) -> float: + """Conversion factor from a unit to another unit""" + return float(self._factors[unit_to][unit_from]) + + def add_factor(self, unit_to, unit_from, factor): + """Add a factor with the arguments.""" + if not unit_to in self._factors: + self._factors[unit_to] = {} + if not unit_from in self._factors[unit_to]: + self._factors[unit_to][unit_from] = {} + self._factors[unit_to][unit_from] = float(factor) + + def is_available(self, unit_to, unit_from) -> bool: + if unit_to not in self._factors: + return False + return unit_from in self._factors[unit_to] + + def __str__(self) -> str: + s = "" + for to_key in self._factors.keys(): + for from_key in self._factors[to_key].keys(): + s += f"1({to_key})={self._factors[to_key][from_key]}({from_key})," + return s[:-1] + + # pylint: disable=too-many-instance-attributes class LoadScans: """Load the image and image parameters from a file path.""" @@ -778,7 +824,7 @@ def _gwy_read_component(open_file: io.TextIOWrapper, initial_byte_pos: int, data for index in range(array_size): data[index] = read_64d(open_file=open_file) if "xres" in data_dict and "yres" in data_dict: - data = data.reshape((data_dict["xres"], data_dict["yres"])) + data = data.reshape((data_dict["yres"], data_dict["xres"])) data_dict["data"] = data return open_file.tell() - initial_byte_pos @@ -836,29 +882,60 @@ def load_gwy(self) -> tuple: # dictionary output showing the object - component structure and # available keys: # LoadScans._gwy_print_dict_wrapper(gwy_file_dict=image_data_dict) - - if "/0/data" in image_data_dict: - image = image_data_dict["/0/data"]["data"] - units = image_data_dict["/0/data"]["si_unit_xy"]["unitstr"] - px_to_nm = image_data_dict["/0/data"]["xreal"] * 1e9 / image.shape[1] - elif "/1/data" in image_data_dict: - image = image_data_dict["/1/data"]["data"] - px_to_nm = image_data_dict["/1/data"]["xreal"] * 1e9 / image.shape[1] - units = image_data_dict["/1/data"]["si_unit_xy"]["unitstr"] - else: - raise KeyError( - "Data location not defined in the .gwy file. Please locate it and add to the load_gwy() function." - ) - - # Convert image heights to nanometresQ - if units == "m": - image = image * 1e9 - else: - raise ValueError( - f"Units '{units}' have not been added for .gwy files. Please add \ - an SI to nanometre conversion factor for these units in _gwy_read_component in \ - io.py." - ) + image = None + has_image_found = False + units = "" + # How can we get current configuration["loading"] ? + # default_config.yaml is used to get conf["loading"]["scale"] + default_config = pkg_resources.open_text(__package__, "default_config.yaml").read() + config = yaml.safe_load(default_config) + self.scale = Scale(config["loading"]["scale"]) + LOGGER.info(self.scale) + + reg_gwy_idx = r"\/(\d+)\/data$" + for component in image_data_dict.keys(): # component is like '/0/data', /4/data/title' + match = re.match(reg_gwy_idx, component) + if match == None: # not data field + continue + LOGGER.debug(f"DataField exists in the container at {match[1]}") + channel_dict = image_data_dict[component] + # check if this data contains z-height values + for key in channel_dict.keys(): + if key != "si_unit_z": + continue + units = channel_dict[key]["unitstr"] + if units[len(units) - 1] != "m": # units doesn't end with m + continue + if not has_image_found: + image = channel_dict["data"] + LOGGER.info(f"\t({self.filename}) has topography image with z-height data({units}).") + if self.scale.is_available("nm", units): # m, um, mm conversion + scale = self.scale.get_factor("nm", units) + image = image * scale + else: + raise ValueError( + f"Units '{units}' have not been added in configuration file. \ + an SI to nanometre conversion factor for these units default_config.yaml." + ) + + m2nm = self.scale.get_factor("nm", "m") + px_to_nm = image_data_dict[component]["xreal"] * m2nm / float(image.shape[1]) + # scale instance holds the scaling factors for image data, then will be copied to img_dict + self.scale.add_factor( + "nm", "px_to_nm", image_data_dict[component]["xreal"] * m2nm / image.shape[1] + ) + self.scale.add_factor( + "nm", "px_to_nm_x", image_data_dict[component]["xreal"] * m2nm / image.shape[1] + ) + self.scale.add_factor( + "nm", "px_to_nm_y", image_data_dict[component]["yreal"] * m2nm / image.shape[0] + ) + has_image_found = True + + if not has_image_found: + raise KeyError( + "Data location not defined in the .gwy file. Please locate it and add to the load_gwy() function." + ) except FileNotFoundError: LOGGER.info(f"[{self.filename}] File not found : {self.img_path}") @@ -938,6 +1015,11 @@ def add_to_dict(self) -> None: "image_flattened": None, "grain_masks": self.grain_masks, } + # Copy scale instance to img_dict, only gwy loader has the attribute now, + # attribute of img_dict is checked. + if hasattr(self, "scale"): + LOGGER.info("Scaling factors are stored in img_dict[filename][scale] as Scale objct.") + self.img_dict[self.filename]["scale"] = self.scale def save_topostats_file(output_dir: Path, filename: str, topostats_object: dict) -> None: