-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversion factors from um to nanometre added in load_gwy in io.py #659
Changes from 5 commits
e51f9c4
ecbc120
a68de1c
bbd301b
0b7ef85
4eee70a
5b8a5cc
de1ac76
07fec64
9ac1a92
d8103c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Test for unit scaling | ||
loading: | ||
scale: | ||
nm: | ||
{m: 1e9, mm: 1e6, um: 1e3} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ | |
from pathlib import Path | ||
import pickle as pkl | ||
from typing import Any, Dict, List, Union | ||
import regex | ||
import re | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
@@ -17,6 +17,8 @@ | |
import h5py | ||
from ruamel.yaml import YAML, YAMLError | ||
from ruamel.yaml.main import round_trip_load as yaml_load, round_trip_dump as yaml_dump | ||
import importlib.resources as pkg_resources | ||
import yaml | ||
|
||
from topostats.logs.logs import LOGGER_NAME | ||
|
||
|
@@ -450,6 +452,49 @@ def convert_basename_to_relative_paths(df: pd.DataFrame): | |
return df | ||
|
||
|
||
class Scale: | ||
"""Hold scaling factors and convert value by multiplying. | ||
It can hold conversion factors for image like "pixel_x_in_nm" | ||
""" | ||
|
||
def __init__(self, config_dict): | ||
"""Instantiate scaling factors using configuration.yaml. | ||
default_config["loading"]["scale"] should have the dict. | ||
""" | ||
self._factors = config_dict | ||
|
||
def in_nm(self, value_from, unit_from) -> float: | ||
"""Return value in nanometre from value and its unit""" | ||
return self.get_value("nm", value_from, unit_from) | ||
|
||
def get_value(self, unit_to, value_from, unit_from) -> float: | ||
return value_from * self.get_factor(unit_to, unit_from) | ||
|
||
def get_factor(self, unit_to, unit_from) -> float: | ||
"""Conversion factor from a unit to another unit""" | ||
return float(self._factors[unit_to][unit_from]) | ||
|
||
def add_factor(self, unit_to, unit_from, factor): | ||
"""Add a factor with the arguments.""" | ||
if not unit_to in self._factors: | ||
self._factors[unit_to] = {} | ||
if not unit_from in self._factors[unit_to]: | ||
self._factors[unit_to][unit_from] = {} | ||
self._factors[unit_to][unit_from] = float(factor) | ||
|
||
def is_available(self, unit_to, unit_from) -> bool: | ||
if unit_to not in self._factors: | ||
return False | ||
return unit_from in self._factors[unit_to] | ||
|
||
def __str__(self) -> str: | ||
s = "" | ||
for to_key in self._factors.keys(): | ||
for from_key in self._factors[to_key].keys(): | ||
s += f"1({to_key})={self._factors[to_key][from_key]}({from_key})," | ||
return s[:-1] | ||
|
||
|
||
# pylint: disable=too-many-instance-attributes | ||
class LoadScans: | ||
"""Load the image and image parameters from a file path.""" | ||
|
@@ -840,49 +885,52 @@ def load_gwy(self) -> tuple: | |
image = None | ||
has_image_found = False | ||
units = "" | ||
|
||
re = r"\/(\d+)\/data$" | ||
components = list(image_data_dict.keys()) | ||
conv_factors = {"m": 1e9, "um": 1e6, "mm": 1e3} | ||
for component in components: | ||
match = regex.match(re, component) | ||
if match == None: | ||
# How can we get current configuration["loading"] ? | ||
# default_config.yaml is used to get conf["loading"]["scale"] | ||
default_config = pkg_resources.open_text(__package__, "default_config.yaml").read() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than loading the config again we have structured classes to take arguments and use the same argument name in the YAML configuration and the parameters are passed in using Pythons Thus to get the |
||
config = yaml.safe_load(default_config) | ||
self.scale = Scale(config["loading"]["scale"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the above suggestion we can move this to the The values will be in the argument def __init__(
self,
img_paths: list,
channel: str,
scale: dict,
):
...
self.scale = scale ...would suffice and the value will cascade through. Would need to add details of the additional parameter to the docstring too. |
||
LOGGER.info(self.scale) | ||
|
||
reg_gwy_idx = r"\/(\d+)\/data$" | ||
for component in image_data_dict.keys(): # component is like '/0/data', /4/data/title' | ||
match = re.match(reg_gwy_idx, component) | ||
if match == None: # not data field | ||
continue | ||
idx = int(match[1]) | ||
LOGGER.info(f"Channel found at {idx}") | ||
LOGGER.debug(f"DataField exists in the container at {match[1]}") | ||
channel_dict = image_data_dict[component] | ||
LOGGER.info(f"Guessing if this chchannel is height") | ||
# check if this data contains z-height values | ||
for key in channel_dict.keys(): | ||
if key == "si_unit_z": | ||
u = channel_dict[key]["unitstr"] | ||
if u[len(u) - 1] == "m": # True if m,um,mm or *m | ||
LOGGER.info(f"\t{key} : {channel_dict[key]}, maybe topography.") | ||
if not has_image_found: | ||
image = image_data_dict[component]["data"] | ||
units = image_data_dict[component][key]["unitstr"] | ||
LOGGER.info(f"\tUnit for Z of this topography is {units}") | ||
if units in conv_factors: # m, um, mm conversion | ||
factor = conv_factors[units] | ||
image = image * factor | ||
else: | ||
raise ValueError( | ||
f"Units '{units}' have not been added for .gwy files. Please add \ | ||
an SI to nanometre conversion factor for these units in _gwy_read_component in \ | ||
io.py." | ||
) | ||
px_to_nm = image_data_dict[component]["xreal"] * 1e9 / image.shape[1] | ||
# TODO: xy units and z units should be separately considered. | ||
# added parameters for xy conversion support for non-square image | ||
self.px_to_nm_x = image_data_dict[component]["xreal"] * 1e9 / image.shape[1] | ||
self.px_to_nm_y = image_data_dict[component]["yreal"] * 1e9 / image.shape[0] | ||
has_image_found = True | ||
else: | ||
LOGGER.info(f"\t{key} : {channel_dict[key]}, maybe topography, but not used.") | ||
if key != "si_unit_z": | ||
continue | ||
units = channel_dict[key]["unitstr"] | ||
if units[len(units) - 1] != "m": # units doesn't end with m | ||
continue | ||
if not has_image_found: | ||
image = channel_dict["data"] | ||
LOGGER.info(f"\t({self.filename}) has topography image with z-height data({units}).") | ||
if self.scale.is_available("nm", units): # m, um, mm conversion | ||
scale = self.scale.get_factor("nm", units) | ||
image = image * scale | ||
else: | ||
LOGGER.info(f"\t{key} : {channel_dict[key]}, not topography.") | ||
else: | ||
if not key == "data": | ||
LOGGER.info(f"\t{key} : {channel_dict[key]}") | ||
raise ValueError( | ||
f"Units '{units}' have not been added in configuration file. \ | ||
an SI to nanometre conversion factor for these units default_config.yaml." | ||
) | ||
|
||
m2nm = self.scale.get_factor("nm", "m") | ||
px_to_nm = image_data_dict[component]["xreal"] * m2nm / float(image.shape[1]) | ||
# scale instance holds the scaling factors for image data, then will be copied to img_dict | ||
self.scale.add_factor( | ||
"nm", "px_to_nm", image_data_dict[component]["xreal"] * m2nm / image.shape[1] | ||
) | ||
self.scale.add_factor( | ||
"nm", "px_to_nm_x", image_data_dict[component]["xreal"] * m2nm / image.shape[1] | ||
) | ||
self.scale.add_factor( | ||
"nm", "px_to_nm_y", image_data_dict[component]["yreal"] * m2nm / image.shape[0] | ||
) | ||
has_image_found = True | ||
|
||
if not has_image_found: | ||
raise KeyError( | ||
|
@@ -967,9 +1015,11 @@ def add_to_dict(self) -> None: | |
"image_flattened": None, | ||
"grain_masks": self.grain_masks, | ||
} | ||
if hasattr(self, "pixel_to_nm_scaling_x"): | ||
self.img_dict["pixel_to_nm_scaling_x"] = self.pixel_to_nm_scaling_x | ||
self.img_dict["pixel_to_nm_scaling_y"] = self.pixel_to_nm_scaling_y | ||
# Copy scale instance to img_dict, only gwy loader has the attribute now, | ||
# attribute of img_dict is checked. | ||
if hasattr(self, "scale"): | ||
LOGGER.info("Scaling factors are stored in img_dict[filename][scale] as Scale objct.") | ||
self.img_dict[self.filename]["scale"] = self.scale | ||
|
||
|
||
def save_topostats_file(output_dir: Path, filename: str, topostats_object: dict) -> None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've been thinking about this and am wondering if making this a class is perhaps over-complicating things?
Scaling is a simple multiplication really, its just a case of selecting the correct scaling factor based on the units used in the scans and what is desired.
We can have sensible defaults defined in the configuration which also allows users to extend the possible scalings and could also be setup to allow the selection of the desired scaling using a simple function to select the correct options from the dictionary.
I realise this is a fairly poorly formed idea and lacks code but what do you think @iobataya ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @ns-rse ,
Since the loading dict is available in
LoadScans
by receiving it in its constructor now, the config dict will cover the factors without the Scale class. I agree that Scale class is too much and not necessary.According to that direction, I'm planning to make change next as follows.
add_dict
methodI'll apply codes and test codes in my forked repository to check at first in this month. I'll try run tests with cov module of pytest.
I'd appreciate your ideas and advice.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @iobataya
This all sounds eminently reasonable and bulks out my above comments with more solid ideas, thank you.
I appreciate this is somewhat divergent to your original PR and am grateful of you taking the time to work through these things.
A couple of things that might be useful.
Merge conflicts
As we've had a couple of merges recently there are now merge conflicts, I'll resolve these on GitHub but you will need to update your local branch with
git pull
to pull those changes down.Development tools
You may want to install some of the development tools we have listed as optional dependencies and make sure
pre-commit
is installed and running.Dev tools can be installed if you do the following in your virtual environment (assuming you are using one, if not this might be a useful resource Conda environments for effective and reproducible research)...
This will pull in
pytest-cov
and tell you what the coverage change is although don't worry too much we have GitHub working with CodeCov to report these things.However, we are increasingly trying to adhere to PEP8 code style and NumpyDocstyle for docstrings and have pre-commit hooks in place which when enabled will check changes are compliant prior to allowing permits (we also use pre-commit.ci to check pull requests but its a faster feedback loop to enable
pre-commit
locally).Once you've installed the
dev
requirements you can in your VirtualEnv install thepre-commit
hooks withpre-commit install
and then when you make commits the checks will run and tell you if there are any problems (some will be automatically corrected if possible). I've a blog post I've written on pre-commit that you may find useful pre-commit: Protecting your future self if you want to find out more.Please do let me know if you've any problems, more than happy to help and could even have a chat over video if you think that would be useful.
@ns-rse
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hello @ns-rse,
I appreciate your info about
git pull
andpip install -e .
. Especially,pip install -e .
saved tons of time. Andpytest-cov
works fine to show coverage of codes. The test_io.py covers 84%.I'm happy that I've reached the topostats in my branch can load gwy of any sizes of image (square and non-square) with arbitary pixel/lines aspect ratio. Now this makes me to proceed my project. I'll continue to improve the codes, and format codes to the expected style.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Brilliant, that is great to hear @iobataya 👍
You'll notice below that there are a couple of merge conflicts that have arisen. This arises when changes have been made to files after you forked the repository and Git doesn't know which should be kept.
They need manually resolving, probably the easiest is to do that here on GitHub using the links/buttons below. You then choose which bit of code should be retained and a commit will be made to your branch here on GitHub which you then
git pull
down to your local copy.Let me know if you've any problems or would like help with this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ns-rse Sorry for my silence for a while. I prepared for domestic conference commencing tomorrow. My program using TopoStats is public now (https://github.com/iobataya/line_detection). Once the conference and other events are over and I take a break, I will start organizing the codes here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the update @iobataya I hope the conference went well.