Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversion factors from um to nanometre added in load_gwy in io.py #659

Closed
wants to merge 11 commits into from
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,9 +370,9 @@ def load_scan_jpk() -> LoadScans:


@pytest.fixture
def load_scan_gwy() -> LoadScans:
def load_scan_gwy(loading_config: dict) -> LoadScans:
"""Instantiate a LoadScans object from a .gwy file."""
scan_loader = LoadScans([RESOURCES / "file.gwy"], channel="dummy_channel")
scan_loader = LoadScans([RESOURCES / "file.gwy", RESOURCES / "file_landscape.gwy"], channel="dummy_channel")
return scan_loader


Expand Down
Binary file added tests/resources/file_landscape.gwy
Binary file not shown.
Binary file added tests/resources/file_square.gwy
Binary file not shown.
5 changes: 5 additions & 0 deletions tests/resources/test_scale_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Test for unit scaling
loading:
scale:
nm:
{m: 1e9, mm: 1e6, um: 1e3}
39 changes: 38 additions & 1 deletion tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
get_out_path,
path_to_str,
save_folder_grainstats,
Scale,
LoadScans,
save_pkl,
load_pkl,
Expand Down Expand Up @@ -359,11 +360,24 @@ def test_load_scan_gwy(load_scan_gwy: LoadScans) -> None:
load_scan_gwy.img_path = load_scan_gwy.img_paths[0]
load_scan_gwy.filename = load_scan_gwy.img_paths[0].stem
image, px_to_nm_scaling = load_scan_gwy.load_gwy()
assert len(load_scan_gwy.img_paths) == 2
assert isinstance(image, np.ndarray)
assert image.shape == (512, 512)
assert image.sum() == 33836850.232917726
assert isinstance(px_to_nm_scaling, float)
assert px_to_nm_scaling == 0.8468632812499975
# Test loading landscape .gwy file.
load_scan_gwy.img_path = load_scan_gwy.img_paths[1]
load_scan_gwy.filename = load_scan_gwy.img_paths[1].stem
image, px_to_nm_scaling = load_scan_gwy.load_gwy()
expected_sum = 1.9190233924574975e2 # calculated in Gwyddion + numpy 1.9.2
assert abs(image.sum() - expected_sum) < 1e-10
assert isinstance(image, np.ndarray)
assert image.shape == (170, 220) # (height, width)
assert isinstance(px_to_nm_scaling, float)
# conventional parameter, px_to_nm_scaling
expected_scaling_x = 1000.0 / 220.0
assert abs(px_to_nm_scaling - expected_scaling_x) < 1e-10


def test_load_scan_topostats(load_scan_topostats: LoadScans) -> None:
Expand Down Expand Up @@ -409,6 +423,21 @@ def test_gwy_read_component(load_scan_dummy: LoadScans) -> None:
assert list(test_dict.values()) == [{"test nested component": 3}]


def test_scale() -> None:
test_config = read_yaml(RESOURCES / "test_scale_config.yaml")
if "scale" not in test_config["loading"]:
raise KeyError("Scale is not defined in test_scale_config.yaml")
scale = Scale(test_config["loading"]["scale"])
assert scale.is_available("nm", "m") == True
assert scale.get_factor("nm", "um") == 1000
assert scale.in_nm(1.0, "um") == 1000
assert scale.in_nm(1.0, "mm") == 1000000
scale.add_factor("nm", "pixel_x_in_nm", 1000 / 220)
scale.add_factor("nm", "pixel_y_in_nm", 1000 / 170)
assert abs(scale.in_nm(220, "pixel_x_in_nm") - 1000) < 1e-10
assert abs(scale.in_nm(170, "pixel_y_in_nm") - 1000) < 1e-10


# FIXME : Get this test working
# @pytest.mark.parametrize(
# "unit, x, y, expected",
Expand All @@ -431,7 +460,7 @@ def test_gwy_read_component(load_scan_dummy: LoadScans) -> None:
("load_scan_spm", 1, (1024, 1024), 30695369.188316286, "minicircle", 0.4940029296875),
("load_scan_ibw", 1, (512, 512), -218091520.0, "minicircle2", 1.5625),
("load_scan_jpk", 1, (256, 256), 286598232.9308627, "file", 1.2770176335964876),
("load_scan_gwy", 1, (512, 512), 33836850.232917726, "file", 0.8468632812499975),
("load_scan_gwy", 2, (512, 512), 33836850.232917726, "file", 0.8468632812499975),
("load_scan_topostats", 1, (1024, 1024), 182067.12616107278, "file", 0.4940029296875),
],
)
Expand All @@ -456,6 +485,14 @@ def test_load_scan_get_data(
assert isinstance(scan.img_dict[filename]["pixel_to_nm_scaling"], float)
assert scan.img_dict[filename]["pixel_to_nm_scaling"] == pixel_to_nm_scaling

# Scale object holds conversion factors of image
# Not all file format doesn't have it yet.
if "scale" in scan.img_dict[filename]:
scale = scan.img_dict[filename]["scale"]
assert isinstance(scale.get_factor("nm", "px_to_nm_x"), float)
assert scale.get_factor("nm", "px_to_nm_x") == pixel_to_nm_scaling
assert isinstance(scale.get_factor("nm", "px_to_nm_y"), float)


@pytest.mark.parametrize(
"x, y, log_msg",
Expand Down
3 changes: 3 additions & 0 deletions topostats/default_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ cores: 2 # Number of CPU cores to utilise for processing multiple files simultan
file_ext: .spm # File extension of the data files.
loading:
channel: Height # Channel to pull data from in the data files.
scale:
nm:
{m: 1e9, mm: 1e6, um: 1e3} # factors to convert to nm
filter:
run: true # Options : true, false
row_alignment_quantile: 0.5 # below values may improve flattening of larger features
Expand Down
136 changes: 93 additions & 43 deletions topostats/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pathlib import Path
import pickle as pkl
from typing import Any, Dict, List, Union
import regex
import re

import numpy as np
import pandas as pd
Expand All @@ -17,6 +17,8 @@
import h5py
from ruamel.yaml import YAML, YAMLError
from ruamel.yaml.main import round_trip_load as yaml_load, round_trip_dump as yaml_dump
import importlib.resources as pkg_resources
import yaml

from topostats.logs.logs import LOGGER_NAME

Expand Down Expand Up @@ -450,6 +452,49 @@ def convert_basename_to_relative_paths(df: pd.DataFrame):
return df


class Scale:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been thinking about this and am wondering if making this a class is perhaps over-complicating things?

Scaling is a simple multiplication really, its just a case of selecting the correct scaling factor based on the units used in the scans and what is desired.

We can have sensible defaults defined in the configuration which also allows users to extend the possible scalings and could also be setup to allow the selection of the desired scaling using a simple function to select the correct options from the dictionary.

I realise this is a fairly poorly formed idea and lacks code but what do you think @iobataya ?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @ns-rse ,
Since the loading dict is available in LoadScans by receiving it in its constructor now, the config dict will cover the factors without the Scale class. I agree that Scale class is too much and not necessary.

According to that direction, I'm planning to make change next as follows.

  • to make LoadScans receives loading_config dict.
  • to make load_gwy get the factors as dict from the configuration YAML
  • to eliminate Scale class.
  • to handle xres/yres for gwy correctly (square, landscape)
  • to copy the scaling factors (units, px_in_nm, px_in_nm_x, px_in_nm_y) in img_dict of image in add_dict method

I'll apply codes and test codes in my forked repository to check at first in this month. I'll try run tests with cov module of pytest.

I'd appreciate your ideas and advice.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @iobataya

This all sounds eminently reasonable and bulks out my above comments with more solid ideas, thank you.

I appreciate this is somewhat divergent to your original PR and am grateful of you taking the time to work through these things.

A couple of things that might be useful.

Merge conflicts

As we've had a couple of merges recently there are now merge conflicts, I'll resolve these on GitHub but you will need to update your local branch with git pull to pull those changes down.

Development tools

You may want to install some of the development tools we have listed as optional dependencies and make sure pre-commit is installed and running.

Dev tools can be installed if you do the following in your virtual environment (assuming you are using one, if not this might be a useful resource Conda environments for effective and reproducible research)...

cd ~/path/to/TopoStats
pip install -e .[dev]

This will pull in pytest-cov and tell you what the coverage change is although don't worry too much we have GitHub working with CodeCov to report these things.

However, we are increasingly trying to adhere to PEP8 code style and NumpyDocstyle for docstrings and have pre-commit hooks in place which when enabled will check changes are compliant prior to allowing permits (we also use pre-commit.ci to check pull requests but its a faster feedback loop to enable pre-commit locally).

Once you've installed the dev requirements you can in your VirtualEnv install the pre-commit hooks with pre-commit install and then when you make commits the checks will run and tell you if there are any problems (some will be automatically corrected if possible). I've a blog post I've written on pre-commit that you may find useful pre-commit: Protecting your future self if you want to find out more.

Please do let me know if you've any problems, more than happy to help and could even have a chat over video if you think that would be useful.

@ns-rse

Copy link
Author

@iobataya iobataya Sep 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello @ns-rse,

I appreciate your info about git pull and pip install -e .. Especially, pip install -e . saved tons of time. And pytest-cov works fine to show coverage of codes. The test_io.py covers 84%.

I'm happy that I've reached the topostats in my branch can load gwy of any sizes of image (square and non-square) with arbitary pixel/lines aspect ratio. Now this makes me to proceed my project. I'll continue to improve the codes, and format codes to the expected style.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Brilliant, that is great to hear @iobataya 👍

You'll notice below that there are a couple of merge conflicts that have arisen. This arises when changes have been made to files after you forked the repository and Git doesn't know which should be kept.

They need manually resolving, probably the easiest is to do that here on GitHub using the links/buttons below. You then choose which bit of code should be retained and a commit will be made to your branch here on GitHub which you then git pull down to your local copy.

Let me know if you've any problems or would like help with this.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ns-rse Sorry for my silence for a while. I prepared for domestic conference commencing tomorrow. My program using TopoStats is public now (https://github.com/iobataya/line_detection). Once the conference and other events are over and I take a break, I will start organizing the codes here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the update @iobataya I hope the conference went well.

"""Hold scaling factors and convert value by multiplying.
It can hold conversion factors for image like "pixel_x_in_nm"
"""

def __init__(self, config_dict):
"""Instantiate scaling factors using configuration.yaml.
default_config["loading"]["scale"] should have the dict.
"""
self._factors = config_dict

def in_nm(self, value_from, unit_from) -> float:
"""Return value in nanometre from value and its unit"""
return self.get_value("nm", value_from, unit_from)

def get_value(self, unit_to, value_from, unit_from) -> float:
return value_from * self.get_factor(unit_to, unit_from)

def get_factor(self, unit_to, unit_from) -> float:
"""Conversion factor from a unit to another unit"""
return float(self._factors[unit_to][unit_from])

def add_factor(self, unit_to, unit_from, factor):
"""Add a factor with the arguments."""
if not unit_to in self._factors:
self._factors[unit_to] = {}
if not unit_from in self._factors[unit_to]:
self._factors[unit_to][unit_from] = {}
self._factors[unit_to][unit_from] = float(factor)

def is_available(self, unit_to, unit_from) -> bool:
if unit_to not in self._factors:
return False
return unit_from in self._factors[unit_to]

def __str__(self) -> str:
s = ""
for to_key in self._factors.keys():
for from_key in self._factors[to_key].keys():
s += f"1({to_key})={self._factors[to_key][from_key]}({from_key}),"
return s[:-1]


# pylint: disable=too-many-instance-attributes
class LoadScans:
"""Load the image and image parameters from a file path."""
Expand Down Expand Up @@ -840,49 +885,52 @@ def load_gwy(self) -> tuple:
image = None
has_image_found = False
units = ""

re = r"\/(\d+)\/data$"
components = list(image_data_dict.keys())
conv_factors = {"m": 1e9, "um": 1e6, "mm": 1e3}
for component in components:
match = regex.match(re, component)
if match == None:
# How can we get current configuration["loading"] ?
# default_config.yaml is used to get conf["loading"]["scale"]
default_config = pkg_resources.open_text(__package__, "default_config.yaml").read()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than loading the config again we have structured classes to take arguments and use the same argument name in the YAML configuration and the parameters are passed in using Pythons **kwargs functionality as the names match.

Thus to get the loading dictionary we need to add an argument to the __init__ method of the LoadScans and on line 123 of topostats/run_topostats.py this is loaded automatically and instead the line below can be modified to extract the scale.

config = yaml.safe_load(default_config)
self.scale = Scale(config["loading"]["scale"])
Copy link
Collaborator

@ns-rse ns-rse Sep 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the above suggestion we can move this to the __init__ method (as PyLint will complain about self. being defined outside of __init__.

The values will be in the argument scale so something like...

    def __init__(
        self,
        img_paths: list,
        channel: str,
        scale: dict,
    ):
    ...
    self.scale = scale

...would suffice and the value will cascade through.

Would need to add details of the additional parameter to the docstring too.

LOGGER.info(self.scale)

reg_gwy_idx = r"\/(\d+)\/data$"
for component in image_data_dict.keys(): # component is like '/0/data', /4/data/title'
match = re.match(reg_gwy_idx, component)
if match == None: # not data field
continue
idx = int(match[1])
LOGGER.info(f"Channel found at {idx}")
LOGGER.debug(f"DataField exists in the container at {match[1]}")
channel_dict = image_data_dict[component]
LOGGER.info(f"Guessing if this chchannel is height")
# check if this data contains z-height values
for key in channel_dict.keys():
if key == "si_unit_z":
u = channel_dict[key]["unitstr"]
if u[len(u) - 1] == "m": # True if m,um,mm or *m
LOGGER.info(f"\t{key} : {channel_dict[key]}, maybe topography.")
if not has_image_found:
image = image_data_dict[component]["data"]
units = image_data_dict[component][key]["unitstr"]
LOGGER.info(f"\tUnit for Z of this topography is {units}")
if units in conv_factors: # m, um, mm conversion
factor = conv_factors[units]
image = image * factor
else:
raise ValueError(
f"Units '{units}' have not been added for .gwy files. Please add \
an SI to nanometre conversion factor for these units in _gwy_read_component in \
io.py."
)
px_to_nm = image_data_dict[component]["xreal"] * 1e9 / image.shape[1]
# TODO: xy units and z units should be separately considered.
# added parameters for xy conversion support for non-square image
self.px_to_nm_x = image_data_dict[component]["xreal"] * 1e9 / image.shape[1]
self.px_to_nm_y = image_data_dict[component]["yreal"] * 1e9 / image.shape[0]
has_image_found = True
else:
LOGGER.info(f"\t{key} : {channel_dict[key]}, maybe topography, but not used.")
if key != "si_unit_z":
continue
units = channel_dict[key]["unitstr"]
if units[len(units) - 1] != "m": # units doesn't end with m
continue
if not has_image_found:
image = channel_dict["data"]
LOGGER.info(f"\t({self.filename}) has topography image with z-height data({units}).")
if self.scale.is_available("nm", units): # m, um, mm conversion
scale = self.scale.get_factor("nm", units)
image = image * scale
else:
LOGGER.info(f"\t{key} : {channel_dict[key]}, not topography.")
else:
if not key == "data":
LOGGER.info(f"\t{key} : {channel_dict[key]}")
raise ValueError(
f"Units '{units}' have not been added in configuration file. \
an SI to nanometre conversion factor for these units default_config.yaml."
)

m2nm = self.scale.get_factor("nm", "m")
px_to_nm = image_data_dict[component]["xreal"] * m2nm / float(image.shape[1])
# scale instance holds the scaling factors for image data, then will be copied to img_dict
self.scale.add_factor(
"nm", "px_to_nm", image_data_dict[component]["xreal"] * m2nm / image.shape[1]
)
self.scale.add_factor(
"nm", "px_to_nm_x", image_data_dict[component]["xreal"] * m2nm / image.shape[1]
)
self.scale.add_factor(
"nm", "px_to_nm_y", image_data_dict[component]["yreal"] * m2nm / image.shape[0]
)
has_image_found = True

if not has_image_found:
raise KeyError(
Expand Down Expand Up @@ -967,9 +1015,11 @@ def add_to_dict(self) -> None:
"image_flattened": None,
"grain_masks": self.grain_masks,
}
if hasattr(self, "pixel_to_nm_scaling_x"):
self.img_dict["pixel_to_nm_scaling_x"] = self.pixel_to_nm_scaling_x
self.img_dict["pixel_to_nm_scaling_y"] = self.pixel_to_nm_scaling_y
# Copy scale instance to img_dict, only gwy loader has the attribute now,
# attribute of img_dict is checked.
if hasattr(self, "scale"):
LOGGER.info("Scaling factors are stored in img_dict[filename][scale] as Scale objct.")
self.img_dict[self.filename]["scale"] = self.scale


def save_topostats_file(output_dir: Path, filename: str, topostats_object: dict) -> None:
Expand Down
Loading