Skip to content

Commit

Permalink
Add initial md_to_h5md script
Browse files Browse the repository at this point in the history
  • Loading branch information
oerc0122 committed Sep 10, 2024
1 parent f2561cb commit 784772b
Show file tree
Hide file tree
Showing 10 changed files with 406 additions and 0 deletions.
216 changes: 216 additions & 0 deletions castep_outputs_tools/md_to_h5md.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
"""
Convert CASTEP .md/geom format to h5md output.
References
----------
.. [1] https://www.nongnu.org/h5md/
"""
from __future__ import annotations

import argparse
from functools import singledispatch
from pathlib import Path
from typing import TextIO

import h5py
from castep_outputs.parsers import parse_md_geom_file as parser

from castep_outputs_tools import __version__


def _convert_frame(out_file: h5py.File, frame: dict, frame_id: int):
"""
Convert a single frame and fill the data blocks.
Parameters
----------
frame : dict
Incoming read frame.
frame_id : int
Index of current frame.
out_file : h5py.File
File to write.
"""
part = out_file["particles"]
obs = out_file["observables"]

part["box/edges/time"][frame_id] = frame["time"]
part["box/edges/value"][frame_id] = frame["h"]

atom_props = [val for key, val in frame.items() if isinstance(key, tuple)]

for i, elem in enumerate(atom_props):
part["position/value"][frame_id, i] = elem["R"]
part["velocity/value"][frame_id, i] = elem["V"]
part["force/value"][frame_id, i] = elem["F"]

for i, prop in enumerate(("hamiltonian_energy", "potential_energy", "kinetic_energy")):
obs[f"{prop}/value"][frame_id] = frame["E"][0][i]

obs["temperature/value"][frame_id] = frame["T"]
obs["pressure/value"][frame_id] = frame["P"]
obs["stress/value"][frame_id] = frame["S"]
obs["lattice_velocity/value"][frame_id] = frame["hv"]

def _create_header_info(out_file: h5py.File, **metadata):
"""
Create metadata block from provided information.
Parameters
----------
out_file : h5py.File
File to write.
**metadata : dict
Metadata ("name" and "email")
"""
grp = out_file.create_group("h5md")
grp.attrs["version"] = (1, 1)
auth = grp.create_group("author")
auth.attrs["name"] = metadata.get("author", "Unknown")
auth.attrs["email"] = metadata.get("email", "Unknown")
crea = grp.create_group("creator")
crea.attrs["name"] = "castep outputs"
crea.attrs["version"] = __version__

def _create_groups(out_file: h5py.File, n_steps: int, species: set[str], atoms: list[str]):
"""
Create empty groups for filling with data.
Parameters
----------
out_file : h5py.File
File to write.
n_steps : int
Number of steps in md file.
species : set[str]
Species in file.
atoms : list[str]
Complete list of atoms in file.
"""
n_atoms = len(atoms)
n_species = len(species)

part = out_file.create_group("particles")
obs = out_file.create_group("observables")

atom_dict = dict(zip(species, range(n_species)))
spec_enum = h5py.enum_dtype(atom_dict)
atom_ind = [atom_dict[atm] for atm in atoms]

part.create_dataset("species", (n_atoms,), dtype=spec_enum, data=atom_ind)

box = part.create_group("box")
box.attrs["dimension"] = 3
box.attrs["boundary"] = "periodic"
edge = box.create_group("edges")
edge["step"] = list(range(1, n_steps+1))
edge.create_dataset("time", (n_steps,), dtype=float)
edge.create_dataset("value", (n_steps, 3, 3), dtype=float)

for prop in ("position", "velocity", "force"):
grp = part.create_group(prop)
grp["step"] = edge["step"]
grp["time"] = edge["time"]
grp.create_dataset("value", (n_steps, n_atoms, 3), dtype=float)

for prop in ("hamiltonian_energy", "potential_energy",
"kinetic_energy", "pressure", "temperature"):
grp = obs.create_group(prop)
grp["step"] = edge["step"]
grp["time"] = edge["time"]
grp.create_dataset("value", (n_steps,), dtype=float)

for prop in ("lattice_velocity", "stress"):
grp = obs.create_group(prop)
grp["step"] = edge["step"]
grp["time"] = edge["time"]
grp.create_dataset("value", (n_steps, 3, 3), dtype=float)


def md_to_h5md(md_geom_file: TextIO, out_path: Path | str, **metadata) -> None:
"""
Convert an MD file to h5md format [1]_.
Parameters
----------
md_geom_file : TextIO
File to parse.
out_path : Path or str
File to write.
**metadata : dict
Username and email of author.
"""
parsed = parser(md_geom_file)
atoms = [x[0] for x in parsed[0] if isinstance(x, tuple)]
species = set(atoms)
n_steps = len(parsed)

with h5py.File(out_path, "w") as out_file:

_create_header_info(out_file, **metadata)
_create_groups(out_file, n_steps, species, atoms)

for i, frame in enumerate(parsed):
_convert_frame(out_file, frame, i)

@singledispatch
def main(source, output, **metadata):
"""
Convert an MD file to h5md format [1]_.
Parameters
----------
source : str or Path or TextIO
File to parse.
output : str or Path
File to write.
Raises
------
NotImplementedError
Invalid types passed.
"""
raise NotImplementedError(f"Unable to convert {type(source).__name__} to h5md")

@main.register(str)
def _(source, output: Path | str, **metadata):
main(Path(source), output, **metadata)

@main.register(Path)
def _(source, output: Path | str, **metadata):
with source.open("r") as in_file:
md_to_h5md(in_file, output, **metadata)

@main.register(TextIO)
def _(source, output: Path | str, **metadata):
md_to_h5md(source, output, **metadata)


def cli():
"""
Run md_to_h5md through command line.
Examples
--------
.. code-block:: sh
md_to_h5md -o my_file.h5md my_input.md
md_to_h5md --author "Jacob Wilkins" --email "[email protected]" -o my_file.h5md my_input.md
"""
arg_parser = argparse.ArgumentParser(
prog="md_to_h5md",
description="Convert a castep .md file to .h5md format.",
epilog="See https://www.nongnu.org/h5md/ for more info on h5md.",
)
arg_parser.add_argument("source", type=Path, help=".md file to parse")
arg_parser.add_argument("-o", "--output", help="File to write output.", required=True)
arg_parser.add_argument("-a", "--author", type=str, help="Author for metadata.", default="Unknown")
arg_parser.add_argument("-e", "--email", type=str, help="Email for metadata.", default="Unknown")
arg_parser.add_argument("-V", "--version", action="version", version=f"%(prog)s v{__version__}")
args = arg_parser.parse_args()

main(args.source, args.output, author=args.author, email=args.email)


if __name__ == "__main__":
cli()
21 changes: 21 additions & 0 deletions docs/source/api/castep_outputs_tools.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
castep\_outputs\_tools package
==============================

Submodules
----------

castep\_outputs\_tools.md\_to\_h5md module
------------------------------------------

.. automodule:: castep_outputs_tools.md_to_h5md
:members:
:undoc-members:
:show-inheritance:

Module contents
---------------

.. automodule:: castep_outputs_tools
:members:
:undoc-members:
:show-inheritance:
7 changes: 7 additions & 0 deletions docs/source/api/modules.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
castep_outputs_tools
====================

.. toctree::
:maxdepth: 1

castep_outputs_tools
5 changes: 5 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@
napoleon_use_param = False
napoleon_use_admonition_for_notes = True

intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"h5py": ("https://docs.h5py.org/en/stable/", None),
}

numpydoc_validation_checks = {"all", "ES01", "EX01", "SA01"}

# -- Options for HTML output -------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ documentation for details.
:maxdepth: 2
:caption: Contents:

tools/tools
api/modules
53 changes: 53 additions & 0 deletions docs/source/tools/md_to_h5md.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
``md_to_h5md``
==============

Tool for converting castep .md files to h5md format [1]_.

Installation
------------

To install `md_to_h5md` and depedencies, use:

.. code-block::
pip install "castep_outputs_tools[md_to_h5md]"
This adds a script which can be run from the command line:

.. code-block::
> md_to_h5md.py -h
usage: md_to_h5md [-h] -o OUTPUT [-a AUTHOR] [-e EMAIL] [-V] source
Convert a castep .md file to .h5md format.
positional arguments:
source .md file to parse
options:
-h, --help show this help message and exit
-o OUTPUT, --output OUTPUT
File to write output.
-a AUTHOR, --author AUTHOR
Author for metadata.
-e EMAIL, --email EMAIL
Email for metadata.
-V, --version show program's version number and exit
See https://www.nongnu.org/h5md/ for more info on h5md.
Limitations
-----------

h5md cannot handle variable atom count calculations, and so
attempting to convert a ̆μVT calculation may fail.

Dependencies
------------

`h5py <https://www.h5py.org/>`__


.. [1] https://www.nongnu.org/h5md/
7 changes: 7 additions & 0 deletions docs/source/tools/tools.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Available Tools
===============

.. toctree::
:maxdepth: 1

md_to_h5md
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ dependencies=["castep_outputs"]
[project.optional-dependencies]
docs = ["sphinx>=0.13.1", "sphinx-book-theme>=0.3.3", "sphinx-argparse>=0.4.0"]
lint = ["ruff"]
md_to_h5md = ["h5py"]

[project.scripts]
md_to_h5md = "castep_outputs_tools.md_to_h5md:cli"

[project.urls]
Homepage="https://github.com/oerc0122/castep_outputs"
Expand Down
Loading

0 comments on commit 784772b

Please sign in to comment.