AFM-SPM · iobataya · Sep 9, 2023 · Sep 10, 2023 · Sep 11, 2023 · Sep 11, 2023
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -370,9 +370,9 @@ def load_scan_jpk() -> LoadScans:
 
 
 @pytest.fixture
-def load_scan_gwy() -> LoadScans:
+def load_scan_gwy(loading_config: dict) -> LoadScans:
     """Instantiate a LoadScans object from a .gwy file."""
-    scan_loader = LoadScans([RESOURCES / "file.gwy"], channel="dummy_channel")
+    scan_loader = LoadScans([RESOURCES / "file.gwy", RESOURCES / "file_landscape.gwy"], channel="dummy_channel")
     return scan_loader
 
 

diff --git a/tests/resources/file_landscape.gwy b/tests/resources/file_landscape.gwy
diff --git a/tests/resources/file_square.gwy b/tests/resources/file_square.gwy
diff --git a/tests/resources/test_scale_config.yaml b/tests/resources/test_scale_config.yaml
@@ -0,0 +1,5 @@
+# Test for unit scaling
+loading:
+  scale:
+    nm:
+      {m: 1e9, mm: 1e6, um: 1e3}
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -19,6 +19,7 @@
     get_out_path,
     path_to_str,
     save_folder_grainstats,
+    Scale,
     LoadScans,
     save_pkl,
     load_pkl,
@@ -359,11 +360,24 @@ def test_load_scan_gwy(load_scan_gwy: LoadScans) -> None:
     load_scan_gwy.img_path = load_scan_gwy.img_paths[0]
     load_scan_gwy.filename = load_scan_gwy.img_paths[0].stem
     image, px_to_nm_scaling = load_scan_gwy.load_gwy()
+    assert len(load_scan_gwy.img_paths) == 2
     assert isinstance(image, np.ndarray)
     assert image.shape == (512, 512)
     assert image.sum() == 33836850.232917726
     assert isinstance(px_to_nm_scaling, float)
     assert px_to_nm_scaling == 0.8468632812499975
+    # Test loading landscape .gwy file.
+    load_scan_gwy.img_path = load_scan_gwy.img_paths[1]
+    load_scan_gwy.filename = load_scan_gwy.img_paths[1].stem
+    image, px_to_nm_scaling = load_scan_gwy.load_gwy()
+    expected_sum = 1.9190233924574975e2  # calculated in Gwyddion + numpy 1.9.2
+    assert abs(image.sum() - expected_sum) < 1e-10
+    assert isinstance(image, np.ndarray)
+    assert image.shape == (170, 220)  # (height, width)
+    assert isinstance(px_to_nm_scaling, float)
+    # conventional parameter, px_to_nm_scaling
+    expected_scaling_x = 1000.0 / 220.0
+    assert abs(px_to_nm_scaling - expected_scaling_x) < 1e-10
 
 
 def test_load_scan_topostats(load_scan_topostats: LoadScans) -> None:
@@ -409,6 +423,21 @@ def test_gwy_read_component(load_scan_dummy: LoadScans) -> None:
         assert list(test_dict.values()) == [{"test nested component": 3}]
 
 
+def test_scale() -> None:
+    test_config = read_yaml(RESOURCES / "test_scale_config.yaml")
+    if "scale" not in test_config["loading"]:
+        raise KeyError("Scale is not defined in test_scale_config.yaml")
+    scale = Scale(test_config["loading"]["scale"])
+    assert scale.is_available("nm", "m") == True
+    assert scale.get_factor("nm", "um") == 1000
+    assert scale.in_nm(1.0, "um") == 1000
+    assert scale.in_nm(1.0, "mm") == 1000000
+    scale.add_factor("nm", "pixel_x_in_nm", 1000 / 220)
+    scale.add_factor("nm", "pixel_y_in_nm", 1000 / 170)
+    assert abs(scale.in_nm(220, "pixel_x_in_nm") - 1000) < 1e-10
+    assert abs(scale.in_nm(170, "pixel_y_in_nm") - 1000) < 1e-10
+
+
 # FIXME : Get this test working
 # @pytest.mark.parametrize(
 #     "unit, x, y, expected",
@@ -431,7 +460,7 @@ def test_gwy_read_component(load_scan_dummy: LoadScans) -> None:
         ("load_scan_spm", 1, (1024, 1024), 30695369.188316286, "minicircle", 0.4940029296875),
         ("load_scan_ibw", 1, (512, 512), -218091520.0, "minicircle2", 1.5625),
         ("load_scan_jpk", 1, (256, 256), 286598232.9308627, "file", 1.2770176335964876),
-        ("load_scan_gwy", 1, (512, 512), 33836850.232917726, "file", 0.8468632812499975),
+        ("load_scan_gwy", 2, (512, 512), 33836850.232917726, "file", 0.8468632812499975),
         ("load_scan_topostats", 1, (1024, 1024), 182067.12616107278, "file", 0.4940029296875),
     ],
 )
@@ -456,6 +485,14 @@ def test_load_scan_get_data(
     assert isinstance(scan.img_dict[filename]["pixel_to_nm_scaling"], float)
     assert scan.img_dict[filename]["pixel_to_nm_scaling"] == pixel_to_nm_scaling
 
+    # Scale object holds conversion factors of image
+    # Not all file format doesn't have it yet.
+    if "scale" in scan.img_dict[filename]:
+        scale = scan.img_dict[filename]["scale"]
+        assert isinstance(scale.get_factor("nm", "px_to_nm_x"), float)
+        assert scale.get_factor("nm", "px_to_nm_x") == pixel_to_nm_scaling
+        assert isinstance(scale.get_factor("nm", "px_to_nm_y"), float)
+
 
 @pytest.mark.parametrize(
     "x, y, log_msg",

diff --git a/topostats/default_config.yaml b/topostats/default_config.yaml
@@ -5,6 +5,9 @@ cores: 2 # Number of CPU cores to utilise for processing multiple files simultan
 file_ext: .spm # File extension of the data files.
 loading:
   channel: Height # Channel to pull data from in the data files.
+  scale:
+    nm:
+      {m: 1e9, mm: 1e6, um: 1e3} # factors to convert to nm
 filter:
   run: true # Options : true, false
   row_alignment_quantile: 0.5 # below values may improve flattening of larger features

diff --git a/topostats/io.py b/topostats/io.py
@@ -7,7 +7,7 @@
 from pathlib import Path
 import pickle as pkl
 from typing import Any, Dict, List, Union
-import regex
+import re
 
 import numpy as np
 import pandas as pd
@@ -17,6 +17,8 @@
 import h5py
 from ruamel.yaml import YAML, YAMLError
 from ruamel.yaml.main import round_trip_load as yaml_load, round_trip_dump as yaml_dump
+import importlib.resources as pkg_resources
+import yaml
 
 from topostats.logs.logs import LOGGER_NAME
 
@@ -450,6 +452,49 @@ def convert_basename_to_relative_paths(df: pd.DataFrame):
     return df
 
 
+class Scale:
+    """Hold scaling factors and convert value by multiplying.
+    It can hold conversion factors for image like "pixel_x_in_nm"
+    """
+
+    def __init__(self, config_dict):
+        """Instantiate scaling factors using configuration.yaml.
+        default_config["loading"]["scale"] should have the dict.
+        """
+        self._factors = config_dict
+
+    def in_nm(self, value_from, unit_from) -> float:
+        """Return value in nanometre from value and its unit"""
+        return self.get_value("nm", value_from, unit_from)
+
+    def get_value(self, unit_to, value_from, unit_from) -> float:
+        return value_from * self.get_factor(unit_to, unit_from)
+
+    def get_factor(self, unit_to, unit_from) -> float:
+        """Conversion factor from a unit to another unit"""
+        return float(self._factors[unit_to][unit_from])
+
+    def add_factor(self, unit_to, unit_from, factor):
+        """Add a factor with the arguments."""
+        if not unit_to in self._factors:
+            self._factors[unit_to] = {}
+        if not unit_from in self._factors[unit_to]:
+            self._factors[unit_to][unit_from] = {}
+        self._factors[unit_to][unit_from] = float(factor)
+
+    def is_available(self, unit_to, unit_from) -> bool:
+        if unit_to not in self._factors:
+            return False
+        return unit_from in self._factors[unit_to]
+
+    def __str__(self) -> str:
+        s = ""
+        for to_key in self._factors.keys():
+            for from_key in self._factors[to_key].keys():
+                s += f"1({to_key})={self._factors[to_key][from_key]}({from_key}),"
+        return s[:-1]
+
+
 # pylint: disable=too-many-instance-attributes
 class LoadScans:
     """Load the image and image parameters from a file path."""
@@ -840,49 +885,52 @@ def load_gwy(self) -> tuple:
             image = None
             has_image_found = False
             units = ""
-
-            re = r"\/(\d+)\/data$"
-            components = list(image_data_dict.keys())
-            conv_factors = {"m": 1e9, "um": 1e6, "mm": 1e3}
-            for component in components:
-                match = regex.match(re, component)
-                if match == None:
+            # How can we get current configuration["loading"] ?
+            # default_config.yaml is used to get conf["loading"]["scale"]
+            default_config = pkg_resources.open_text(__package__, "default_config.yaml").read()
+            config = yaml.safe_load(default_config)
+            self.scale = Scale(config["loading"]["scale"])
+            LOGGER.info(self.scale)
+
+            reg_gwy_idx = r"\/(\d+)\/data$"
+            for component in image_data_dict.keys():  # component is like '/0/data', /4/data/title'
+                match = re.match(reg_gwy_idx, component)
+                if match == None:  # not data field
                     continue
-                idx = int(match[1])
-                LOGGER.info(f"Channel found at {idx}")
+                LOGGER.debug(f"DataField exists in the container at {match[1]}")
                 channel_dict = image_data_dict[component]
-                LOGGER.info(f"Guessing if this chchannel is height")
+                # check if this data contains z-height values
                 for key in channel_dict.keys():
-                    if key == "si_unit_z":
-                        u = channel_dict[key]["unitstr"]
-                        if u[len(u) - 1] == "m":  # True if m,um,mm or *m
-                            LOGGER.info(f"\t{key} : {channel_dict[key]}, maybe topography.")
-                            if not has_image_found:
-                                image = image_data_dict[component]["data"]
-                                units = image_data_dict[component][key]["unitstr"]
-                                LOGGER.info(f"\tUnit for Z of this topography is {units}")
-                                if units in conv_factors:  # m, um, mm conversion
-                                    factor = conv_factors[units]
-                                    image = image * factor
-                                else:
-                                    raise ValueError(
-                                        f"Units '{units}' have not been added for .gwy files. Please add \
-                                        an SI to nanometre conversion factor for these units in _gwy_read_component in \
-                                        io.py."
-                                    )
-                                px_to_nm = image_data_dict[component]["xreal"] * 1e9 / image.shape[1]
-                                # TODO: xy units and z units should be separately considered.
-                                # added parameters for xy conversion support for non-square image
-                                self.px_to_nm_x = image_data_dict[component]["xreal"] * 1e9 / image.shape[1]
-                                self.px_to_nm_y = image_data_dict[component]["yreal"] * 1e9 / image.shape[0]
-                                has_image_found = True
-                            else:
-                                LOGGER.info(f"\t{key} : {channel_dict[key]}, maybe topography, but not used.")
+                    if key != "si_unit_z":
+                        continue
+                    units = channel_dict[key]["unitstr"]
+                    if units[len(units) - 1] != "m":  # units doesn't end with m
+                        continue
+                    if not has_image_found:
+                        image = channel_dict["data"]
+                        LOGGER.info(f"\t({self.filename}) has topography image with z-height data({units}).")
+                        if self.scale.is_available("nm", units):  # m, um, mm conversion
+                            scale = self.scale.get_factor("nm", units)
+                            image = image * scale
                         else:
-                            LOGGER.info(f"\t{key} : {channel_dict[key]}, not topography.")
-                    else:
-                        if not key == "data":
-                            LOGGER.info(f"\t{key} : {channel_dict[key]}")
+                            raise ValueError(
+                                f"Units '{units}' have not been added in configuration file. \
+                                    an SI to nanometre conversion factor for these units default_config.yaml."
+                            )
+
+                        m2nm = self.scale.get_factor("nm", "m")
+                        px_to_nm = image_data_dict[component]["xreal"] * m2nm / float(image.shape[1])
+                        # scale instance holds the scaling factors for image data, then will be copied to img_dict
+                        self.scale.add_factor(
+                            "nm", "px_to_nm", image_data_dict[component]["xreal"] * m2nm / image.shape[1]
+                        )
+                        self.scale.add_factor(
+                            "nm", "px_to_nm_x", image_data_dict[component]["xreal"] * m2nm / image.shape[1]
+                        )
+                        self.scale.add_factor(
+                            "nm", "px_to_nm_y", image_data_dict[component]["yreal"] * m2nm / image.shape[0]
+                        )
+                        has_image_found = True
 
                 if not has_image_found:
                     raise KeyError(
@@ -967,9 +1015,11 @@ def add_to_dict(self) -> None:
             "image_flattened": None,
             "grain_masks": self.grain_masks,
         }
-        if hasattr(self, "pixel_to_nm_scaling_x"):
-            self.img_dict["pixel_to_nm_scaling_x"] = self.pixel_to_nm_scaling_x
-            self.img_dict["pixel_to_nm_scaling_y"] = self.pixel_to_nm_scaling_y
+        # Copy scale instance to img_dict, only gwy loader has the attribute now,
+        # attribute of img_dict is checked.
+        if hasattr(self, "scale"):
+            LOGGER.info("Scaling factors are stored in img_dict[filename][scale] as Scale objct.")
+            self.img_dict[self.filename]["scale"] = self.scale
 
 
 def save_topostats_file(output_dir: Path, filename: str, topostats_object: dict) -> None: