Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nion directory #25

Merged
merged 11 commits into from
Jun 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions pynxtools_em/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from pynxtools_em.subparsers.convention_reader import NxEmConventionParser
from pynxtools_em.subparsers.nxs_imgs import NxEmImagesSubParser
from pynxtools_em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
from pynxtools_em.subparsers.nxs_nion import ZipNionProjectSubParser
from pynxtools_em.subparsers.nxs_nion import NionProjectSubParser
from pynxtools_em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
from pynxtools_em.subparsers.oasis_config_reader import (
NxEmNomadOasisConfigurationParser,
Expand Down Expand Up @@ -113,7 +113,7 @@ def read(
nxs_pyxem = NxEmNxsPyxemSubParser(entry_id, case.dat[0], verbose=False)
nxs_pyxem.parse(template)

nxs_nion = ZipNionProjectSubParser(entry_id, case.dat[0], verbose=False)
nxs_nion = NionProjectSubParser(entry_id, case.dat[0], verbose=False)
nxs_nion.parse(template)

# zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id)
Expand Down
214 changes: 133 additions & 81 deletions pynxtools_em/subparsers/nxs_nion.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

"""Parse Nion-specific content in a file containing a zip-compressed nionswift project."""

import glob
import json
import mmap
from typing import Dict
Expand All @@ -44,8 +45,8 @@
from pynxtools_em.utils.nion_utils import uuid_to_file_name


class ZipNionProjectSubParser:
"""Parse zip-compressed archive of a nionswift project with its content."""
class NionProjectSubParser:
"""Parse (zip-compressed archive of a) nionswift project with its content."""

def __init__(
self, entry_id: int = 1, input_file_path: str = "", verbose: bool = True
Expand All @@ -71,7 +72,8 @@ def __init__(
self.configure()
self.supported = False
self.verbose = verbose
self.check_if_zipped_nionswift_project_file()
self.is_zipped = False
self.check_if_nionswift_project()

def configure(self):
self.tmp["cfg"]: Dict = {}
Expand All @@ -81,49 +83,94 @@ def configure(self):
self.tmp["cfg"]["spectrum_id"] = 1
self.tmp["flat_dict_meta"] = fd.FlatDict({})

def check_if_zipped_nionswift_project_file(self):
def check_if_nionswift_project(self):
"""Inspect the content of the compressed project file to check if supported."""
if not self.file_path.endswith(".zip.nion"):
if self.file_path.endswith(".zip.nion"):
self.is_zipped = True
elif self.file_path.endswith(".nsproj"):
self.is_zipped = False
else:
print(
f"Parser ZipNionProject finds no content in {self.file_path} that it supports"
f"Parser NionProject finds no content in {self.file_path} that it supports"
)
return

with open(self.file_path, "rb", 0) as fp:
s = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)
magic = s.read(8)
if self.verbose:
fp.seek(0, 2)
eof_byte_offset = fp.tell()
print(
f"Expecting zip-compressed file: ___{self.file_path}___{magic}___{get_sha256_of_file_content(fp)}___{eof_byte_offset}___"
)
"""
if magic != b'PK\x03\x04': # https://en.wikipedia.org/wiki/List_of_file_signatures
print(f"Test 1 failed, {self.file_path} is not a ZIP archive !")
return False
"""
# analyze information content in the archive an granularization
with ZipFile(self.file_path) as zip_file_hdl:
for file in zip_file_hdl.namelist():
if (
file.endswith(".h5")
or file.endswith(".hdf")
or file.endswith(".hdf5")
):
with zip_file_hdl.open(file) as fp:
if self.is_zipped:
with open(self.file_path, "rb", 0) as fp:
s = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)
magic = s.read(8)
if self.verbose:
fp.seek(0, 2)
eof_byte_offset = fp.tell()
print(
f"Expecting zip-compressed file: ___{self.file_path}___{magic}___{get_sha256_of_file_content(fp)}___{eof_byte_offset}___"
)
"""
if magic != b'PK\x03\x04': # https://en.wikipedia.org/wiki/List_of_file_signatures
print(f"Test 1 failed, {self.file_path} is not a ZIP archive !")
return False
"""
# analyze information content of the project and its granularization
with ZipFile(self.file_path) as zip_file_hdl:
for file in zip_file_hdl.namelist():
if file.endswith((".h5", ".hdf", ".hdf5")):
with zip_file_hdl.open(file) as fp:
magic = fp.read(8)
if self.verbose:
fp.seek(0, 2)
eof_byte_offset = fp.tell()
print(
f"Expecting hfive: ___{file}___{magic}___{get_sha256_of_file_content(fp)}___{eof_byte_offset}___"
)
key = file[file.rfind("/") + 1 :].replace(".h5", "")
if key not in self.hfive_file_dict:
self.hfive_file_dict[key] = file
elif file.endswith(".ndata"):
with zip_file_hdl.open(file) as fp:
magic = fp.read(8)
if self.verbose:
fp.seek(0, 2)
eof_byte_offset = fp.tell()
print(
f"Expecting ndata: ___{file}___{magic}___{get_sha256_of_file_content(fp)}___{eof_byte_offset}___"
)
key = file[file.rfind("/") + 1 :].replace(".ndata", "")
if key not in self.ndata_file_dict:
self.ndata_file_dict[key] = file
elif file.endswith(".nsproj"):
with zip_file_hdl.open(file) as fp:
magic = fp.read(8)
if self.verbose:
fp.seek(0, 2)
eof_byte_offset = fp.tell()
print(
f"Expecting nsproj: ___{file}___{magic}___{get_sha256_of_file_content(fp)}___{eof_byte_offset}___"
)
key = file[file.rfind("/") + 1 :].replace(".nsproj", "")
if key not in self.proj_file_dict:
self.proj_file_dict[key] = file
else:
continue
else:
nsproj_data_path = f"{self.file_path[0:self.file_path.rfind('.')]} Data"
print(f"nsproj_data_path __{nsproj_data_path}__")
for file in glob.glob(f"{nsproj_data_path}/**/*", recursive=True):
print(f"----->>>> {file}")
if file.endswith((".h5", ".hdf", ".hdf5")):
with open(file, "rb") as fp:
magic = fp.read(8)
if self.verbose:
fp.seek(0, 2)
eof_byte_offset = fp.tell()
# get_sha256_of_file_content(fp)
print(
f"Expecting hfive: ___{file}___{magic}___{get_sha256_of_file_content(fp)}___{eof_byte_offset}___"
)
key = file[file.rfind("/") + 1 :].replace(".h5", "")
if key not in self.hfive_file_dict:
self.hfive_file_dict[key] = file
elif file.endswith(".ndata"):
with zip_file_hdl.open(file) as fp:
with open(file, "rb") as fp:
magic = fp.read(8)
if self.verbose:
fp.seek(0, 2)
Expand All @@ -134,26 +181,13 @@ def check_if_zipped_nionswift_project_file(self):
key = file[file.rfind("/") + 1 :].replace(".ndata", "")
if key not in self.ndata_file_dict:
self.ndata_file_dict[key] = file
elif file.endswith(".nsproj"):
with zip_file_hdl.open(file) as fp:
magic = fp.read(8)
if self.verbose:
fp.seek(0, 2)
eof_byte_offset = fp.tell()
print(
f"Expecting nsproj: ___{file}___{magic}___{get_sha256_of_file_content(fp)}___{eof_byte_offset}___"
)
key = file[file.rfind("/") + 1 :].replace(".nsproj", "")
if key not in self.proj_file_dict:
self.proj_file_dict[key] = file
else:
continue

if not self.ndata_file_dict.keys().isdisjoint(self.hfive_file_dict.keys()):
print(
"Test 2 failed, UUID keys of *.ndata and *.h5 files in project are not disjoint!"
)
return
if len(self.proj_file_dict.keys()) != 1:
if self.is_zipped and len(self.proj_file_dict.keys()) != 1:
print(
"Test 3 failed, he project contains either no or more than one nsproj file!"
)
Expand Down Expand Up @@ -255,7 +289,6 @@ def process_ndata(self, file_hdl, full_path, template):
del flat_metadata_dict
del data_arr
del nx_concept_name
return template
"""
return template

Expand Down Expand Up @@ -312,20 +345,26 @@ def process_hfive(self, file_hdl, full_path, template: dict):
def parse_project_file(self, template: dict) -> dict:
"""Parse lazily from compressed NionSwift project (nsproj + directory)."""
nionswift_proj_mdata = {}
with ZipFile(self.file_path) as zip_file_hdl:
for pkey, proj_file_name in self.proj_file_dict.items():
with zip_file_hdl.open(proj_file_name) as file_hdl:
nionswift_proj_mdata = fd.FlatDict(
yaml.safe_load(file_hdl), delimiter="/"
)
# TODO::inspection phase, maybe with yaml to file?
if self.verbose:
print(f"Flattened content of {proj_file_name}")
for (
key,
value,
) in nionswift_proj_mdata.items(): # ["display_items"]:
print(f"nsprj, flat: ___{key}___{value}___")
if self.is_zipped:
with ZipFile(self.file_path) as zip_file_hdl:
for pkey, proj_file_name in self.proj_file_dict.items():
with zip_file_hdl.open(proj_file_name) as file_hdl:
nionswift_proj_mdata = fd.FlatDict(
yaml.safe_load(file_hdl), delimiter="/"
)
else:
with open(self.file_path) as file_hdl:
nionswift_proj_mdata = fd.FlatDict(
yaml.safe_load(file_hdl), delimiter="/"
)
# TODO::inspection phase, maybe with yaml to file?
if self.verbose:
if self.is_zipped:
print(f"Flattened content of {proj_file_name}")
else:
print(f"Flattened content of {self.file_path}")
for key, value in nionswift_proj_mdata.items(): # ["display_items"]:
print(f"nsprj, flat: ___{key}___{value}___")
if nionswift_proj_mdata == {}:
return template

Expand All @@ -340,31 +379,41 @@ def parse_project_file(self, template: dict) -> dict:
)
# file_name without the mime type
if key in self.ndata_file_dict.keys():
print(
f"Key {key} is *.ndata maps to {self.ndata_file_dict[key]}"
)
with ZipFile(self.file_path) as zip_file_hdl:
print(f"Parsing {self.ndata_file_dict[key]}...")
with zip_file_hdl.open(
self.ndata_file_dict[key]
) as file_hdl:
this_file = self.ndata_file_dict[key]
print(f"Key {key} is *.ndata maps to {this_file}")
print(f"Parsing {this_file}...")
if self.is_zipped:
with ZipFile(self.file_path) as zip_file_hdl:
with zip_file_hdl.open(this_file) as file_hdl:
self.process_ndata(
file_hdl,
this_file,
template,
)
else:
with open(this_file, "rb") as file_hdl:
self.process_ndata(
file_hdl,
self.ndata_file_dict[key],
this_file,
template,
)
elif key in self.hfive_file_dict.keys():
print(
f"Key {key} is *.h5 maps to {self.hfive_file_dict[key]}"
)
with ZipFile(self.file_path) as zip_file_hdl:
print(f"Parsing {self.hfive_file_dict[key]}...")
with zip_file_hdl.open(
self.hfive_file_dict[key]
) as file_hdl:
this_file = self.hfive_file_dict[key]
print(f"Key {key} is *.h5 maps to {this_file}")
print(f"Parsing {this_file}...")
if self.is_zipped:
with ZipFile(self.file_path) as zip_file_hdl:
with zip_file_hdl.open(this_file) as file_hdl:
self.process_hfive(
file_hdl,
this_file,
template,
)
else:
with open(this_file, "rb") as file_hdl:
self.process_hfive(
file_hdl,
self.hfive_file_dict[key],
this_file,
template,
)
else:
Expand All @@ -374,8 +423,11 @@ def parse_project_file(self, template: dict) -> dict:
def parse(self, template: dict) -> dict:
"""Parse NOMAD OASIS relevant data and metadata from swift project."""
if self.supported:
print(
"Parsing in-place from zip-compressed nionswift project (nsproj + directory)..."
)
if self.is_zipped:
print(
"Parsing in-place zip-compressed nionswift project (nsproj + data)..."
)
else:
print("Parsing in-place nionswift project (nsproj + data)...")
self.parse_project_file(template)
return template
3 changes: 2 additions & 1 deletion pynxtools_em/utils/io_case_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
".tif",
".zip.axon",
".zip.nion",
".nsproj",
".edaxh5",
".h5",
".hdf5",
Expand Down Expand Up @@ -85,7 +86,7 @@ def check_validity_of_file_combinations(self):
else:
continue
# print(f"{dat_input}, {other_input}")
if 1 <= other_input <= 3:
if 0 <= other_input <= 3:
self.is_valid = True
self.dat: List[str] = []
for suffix in VALID_FILE_NAME_SUFFIX_DATA:
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ dev = [
[project.entry-points."pynxtools.reader"]
em = "pynxtools_em.reader:EMReader"

[tool.setuptools.packages.find]
exclude = ["dev/*"]

[tool.setuptools_scm]
version_scheme = "no-guess-dev"
local_scheme = "node-and-date"
Expand Down
Loading