From 695f7e1b97bead99ffe5c7f7dc8c8f30959c240b Mon Sep 17 00:00:00 2001
From: Josh Borrow <josh@joshborrow.com>
Date: Wed, 18 Sep 2024 16:58:15 +0200
Subject: [PATCH 1/7] Added refactoring

---
 swiftsimio/masks.py                           |    5 +-
 .../metadata/metadata/metadata_fields.py      |    2 +
 swiftsimio/metadata/objects.py                | 1247 +++++++++++++++++
 swiftsimio/reader.py                          | 1125 +--------------
 4 files changed, 1255 insertions(+), 1124 deletions(-)
 create mode 100644 swiftsimio/metadata/objects.py

diff --git a/swiftsimio/masks.py b/swiftsimio/masks.py
index dde09d3f..1c65f6cf 100644
--- a/swiftsimio/masks.py
+++ b/swiftsimio/masks.py
@@ -50,9 +50,8 @@ def __init__(self, metadata: SWIFTMetadata, spatial_only=True):
         self.units = metadata.units
         self.spatial_only = spatial_only
 
-        if self.metadata.filetype == "FOF":
-            # No virtual snapshots or cells metadata for fof currently
-            raise NotImplementedError("Masking not supported for FOF filetype")
+        if not self.metadata.masking_valid:
+            raise NotImplementedError(f"Masking not supported for {self.metadata.output_type} filetype")
 
         if self.metadata.partial_snapshot:
             raise InvalidSnapshot(
diff --git a/swiftsimio/metadata/metadata/metadata_fields.py b/swiftsimio/metadata/metadata/metadata_fields.py
index 2e2b2f99..182a3907 100644
--- a/swiftsimio/metadata/metadata/metadata_fields.py
+++ b/swiftsimio/metadata/metadata/metadata_fields.py
@@ -23,6 +23,8 @@
 header_unpack_arrays = {
     "BoxSize": "boxsize",
     "NumPart_ThisFile": "num_part",
+    "NumGroup_ThisFile": "num_group",
+    "NumSubhalos_ThisFile": "num_subhalo",
     "CanHaveTypes": "has_type",
     "NumFilesPerSnapshot": "num_files_per_snapshot",
     "OutputType": "output_type",
diff --git a/swiftsimio/metadata/objects.py b/swiftsimio/metadata/objects.py
new file mode 100644
index 00000000..cdccc478
--- /dev/null
+++ b/swiftsimio/metadata/objects.py
@@ -0,0 +1,1247 @@
+
+import numpy as np
+import unyt
+
+import h5py
+from swiftsimio.conversions import swift_cosmology_to_astropy
+from swiftsimio import metadata
+from abc import ABC, abstractmethod
+
+import re
+import warnings
+
+from datetime import datetime
+from pathlib import Path
+
+from typing import List, Optional
+
+class MassTable(object):
+    """
+    Extracts a mass table to local variables based on the
+    particle type names.
+    """
+
+    def __init__(self, base_mass_table: np.array, mass_units: unyt.unyt_quantity):
+        """
+        Parameters
+        ----------
+
+        base_mass_table : np.array
+            Mass table of the same length as the number of particle types.
+
+        mass_units : unyt_quantity
+            Base mass units for the simulation.
+        """
+
+        # TODO: Extract these names from the files themselves if possible.
+
+        for index, name in metadata.particle_types.particle_name_underscores.items():
+            try:
+                setattr(
+                    self,
+                    name,
+                    unyt.unyt_quantity(base_mass_table[index], units=mass_units),
+                )
+            except IndexError:
+                # Backwards compatible.
+                setattr(self, name, None)
+
+        return
+
+    def __str__(self):
+        return f"Mass table for {' '.join(metadata.particle_types.particle_name_underscores.values())}"
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class MappingTable(object):
+    """
+    A mapping table from one named column instance to the other.
+    Initially designed for the mapping between dust and elements.
+    """
+
+    def __init__(
+        self,
+        data: np.ndarray,
+        named_columns_x: List[str],
+        named_columns_y: List[str],
+        named_columns_x_name: str,
+        named_columns_y_name: str,
+    ):
+        """
+        Parameters
+        ----------
+
+        data: np.ndarray
+            The data array providing the mapping between the named
+            columns. Should be of size N x M, where N is the number
+            of elements in ``named_columns_x`` and M the number
+            of elements in ``named_columns_y``.
+
+        named_columns_x: List[str]
+            The names of the columns in the first axis.
+
+        named_columns_y: List[str]
+            The names of the columns in the second axis.
+
+        named_columns_x_name: str
+            The name of the first mapping.
+
+        named_columns_y_name: str
+            The name of the second mapping.
+        """
+
+        self.data = data
+        self.named_columns_x = named_columns_x
+        self.named_columns_y = named_columns_y
+        self.named_columns_x_name = named_columns_x_name
+        self.named_columns_y_name = named_columns_y_name
+
+        for x, name_x in enumerate(named_columns_x):
+            for y, name_y in enumerate(named_columns_y):
+                setattr(self, f"{name_x.lower()}_to_{name_y.lower()}", data[x][y])
+
+        return
+
+    def __str__(self):
+        return (
+            f"Mapping table from {self.named_columns_x_name} to "
+            f"{self.named_columns_y_name}, containing {len(self.data)} "
+            f"by {len(self.data[0])} elements."
+        )
+
+    def __repr__(self):
+        return f"{self.__str__()}. Raw data: " "\n" f"{self.data}."
+    
+
+
+class SWIFTGroupMetadata(object):
+    """
+    Object that contains the metadata for one hdf5 group.
+
+    This, for instance, could be part type 0, or 'gas'. This will load in
+    the names of all datasets, their units, possible named fields,
+    and their cosmology, and present them for use in the actual i/o routines.
+
+    Methods
+    -------
+    load_metadata(self):
+        Loads the required metadata.
+    load_field_names(self):
+        Loads in the field names.
+    load_field_units(self):
+        Loads in the units from each dataset.
+    load_field_descriptions(self):
+        Loads in descriptions of the fields for each dataset.
+    load_field_compressions(self):
+        Loads in compressions of the fields for each dataset.
+    load_cosmology(self):
+        Loads in the field cosmologies.
+    load_physical(self):
+        Loads in whether the field is saved as comoving or physical.
+    load_valid_transforms(self):
+        Loads in whether the field can be converted to comoving.
+    load_named_columns(self):
+        Loads the named column data for relevant fields.
+    """
+
+    def __init__(
+        self, group: str, group_name: str, metadata: "SWIFTMetadata", scale_factor: float
+    ):
+        """
+        Constructor for SWIFTGroupMetadata class
+
+        Parameters
+        ----------
+        group: str 
+            the name of the group in the hdf5 file
+        group_name : str
+            the corresponding group name for swiftsimio
+        metadata : SWIFTMetadata
+            the snapshot metadata
+        scale_factor : float
+            the snapshot scale factor
+        """
+        self.group = group
+        self.group_name = group_name
+        self.metadata = metadata
+        self.units = metadata.units
+        self.scale_factor = scale_factor
+
+        self.filename = metadata.filename
+
+        self.load_metadata()
+
+        return
+
+    def __str__(self):
+        return f"Metadata class for {self.group} ({self.group_name})"
+
+    def __repr__(self):
+        return self.__str__()
+
+    def load_metadata(self):
+        """
+        Loads the required metadata.
+
+        This includes loading the field names, units and descriptions, as well as the
+        cosmology metadata and any custom named columns
+        """
+
+        self.load_field_names()
+        self.load_field_units()
+        self.load_field_descriptions()
+        self.load_field_compressions()
+        self.load_cosmology()
+        self.load_physical()
+        self.load_valid_transforms()
+        self.load_named_columns()
+
+    def load_field_names(self):
+        """
+        Loads in only the field names.
+        """
+
+        # regular expression for camel case to snake case
+        # https://stackoverflow.com/a/1176023
+        def convert(name):
+            return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
+
+        # Skip fields which are groups themselves
+        self.field_paths = []
+        self.field_names = []
+        for item in self.metadata.handle[f"{self.group}"].keys():
+            # Skip fields which are groups themselves
+            if f"{self.group}/{item}" not in self.metadata.present_groups:
+                self.field_paths.append(f"{self.group}/{item}")
+                self.field_names.append(convert(item))
+
+        return
+
+    def load_field_units(self):
+        """
+        Loads in the units from each dataset.
+        """
+
+        unit_dict = {
+            "I": self.units.current,
+            "L": self.units.length,
+            "M": self.units.mass,
+            "T": self.units.temperature,
+            "t": self.units.time,
+        }
+
+        def get_units(unit_attribute):
+            units = 1.0
+
+            for exponent, unit in unit_dict.items():
+                # We store the 'unit exponent' in the SWIFT metadata. This corresponds
+                # to the power we need to raise each unit to, to return the correct units
+                try:
+                    # Need to check if the exponent is 0 manually because of float precision
+                    unit_exponent = unit_attribute[f"U_{exponent} exponent"][0]
+                    if unit_exponent != 0.0:
+                        units *= unit ** unit_exponent
+                except KeyError:
+                    # Can't load that data!
+                    # We should probably warn the user here...
+                    pass
+
+            # Deal with case where we _really_ have a dimensionless quantity. Comparing with
+            # 1.0 doesn't work, beacause in these cases unyt reverts to a floating point
+            # comparison.
+            try:
+                units.units
+            except AttributeError:
+                units = None
+
+            return units
+
+        self.field_units = [
+            get_units(self.metadata.handle[x].attrs) for x in self.field_paths
+        ]
+
+        return
+
+    def load_field_descriptions(self):
+        """
+        Loads in the text descriptions of the fields for each dataset.
+        For SOAP filetypes a description of the mask is included.
+        """
+
+        def get_desc(dataset):
+            try:
+                description = dataset.attrs["Description"].decode("utf-8")
+            except AttributeError:
+                # Description is saved as a string not bytes
+                description = dataset.attrs["Description"]
+            except KeyError:
+                # Can't load description!
+                description = "No description available"
+
+            if self.metadata.filetype != "SOAP":
+                return description
+
+            try:
+                is_masked = dataset.attrs["Masked"]
+            except KeyError:
+                is_masked = False
+            if not is_masked:
+                return description + " Not masked."
+
+            # TODO: Update for https://github.com/SWIFTSIM/SOAP/pull/81
+            # TODO: Also need to add metadata for each halo type for that PR
+            # If is_masked is true then these other attributes should exist
+            mask_datasets = dataset.attrs["Mask Datasets"]
+            mask_threshold = dataset.attrs["Mask Threshold"]
+            if len(mask_datasets) == 1:
+                mask_str = f" Only computed for objects with {mask_datasets[0]} >= {mask_threshold}."
+            else:
+                mask_str = f' Only computed for objects where {" + ".join(mask_datasets)} >= {mask_threshold}.'
+            return description + mask_str
+
+        self.field_descriptions = [
+            get_desc(self.metadata.handle[x]) for x in self.field_paths
+        ]
+
+        return
+
+    def load_field_compressions(self):
+        """
+        Loads in the string describing the compression filters of the fields for each dataset.
+        """
+
+        def get_comp(dataset):
+            try:
+                # SOAP catalogues can be compressed/uncompressed
+                is_compressed = dataset.attrs["Is Compressed"]
+            except KeyError:
+                is_compressed = True
+            try:
+                comp = dataset.attrs["Lossy compression filter"].decode("utf-8")
+            except AttributeError:
+                # Compression is saved as str not bytes
+                comp = dataset.attrs["Lossy compression filter"]
+            except KeyError:
+                # Can't load compression string!
+                comp = "No compression info available"
+
+            return comp if is_compressed else "Not compressed."
+
+        self.field_compressions = [
+            get_comp(self.metadata.handle[x]) for x in self.field_paths
+        ]
+
+        return
+
+    def load_cosmology(self):
+        """
+        Loads in the field cosmologies.
+        """
+
+        current_scale_factor = self.scale_factor
+
+        def get_cosmo(dataset):
+            try:
+                cosmo_exponent = dataset.attrs["a-scale exponent"][0]
+            except:
+                # Can't load, 'graceful' fallback.
+                cosmo_exponent = 0.0
+
+            a_factor_this_dataset = a ** cosmo_exponent
+
+            return cosmo_factor(a_factor_this_dataset, current_scale_factor)
+
+        self.field_cosmologies = [
+            get_cosmo(self.metadata.handle[x]) for x in self.field_paths
+        ]
+
+        return
+
+    def load_physical(self):
+        """
+        Loads in whether the field is saved as comoving or physical.
+        """
+
+        def get_physical(dataset):
+            try:
+                physical = dataset.attrs["Value stored as physical"][0] == 1
+            except:
+                physical = False
+            return physical
+
+        self.field_physicals = [
+            get_physical(self.metadata.handle[x]) for x in self.field_paths
+        ]
+
+        return
+
+    def load_valid_transforms(self):
+        """
+        Loads in whether the field can be converted to comoving.
+        """
+
+        def get_valid_transform(dataset):
+            try:
+                valid_transform = (
+                    dataset.attrs["Property can be converted to comoving"][0] == 1
+                )
+            except:
+                valid_transform = True
+            return valid_transform
+
+        self.field_valid_transforms = [
+            get_valid_transform(self.metadata.handle[x]) for x in self.field_paths
+        ]
+
+        return
+
+    def load_named_columns(self):
+        """
+        Loads the named column data for relevant fields.
+        """
+
+        named_columns = {}
+
+        for field in self.field_paths:
+            property_name = field.split("/")[-1]
+
+            if property_name in self.metadata.named_columns.keys():
+                field_names = self.metadata.named_columns[property_name]
+
+                # Now need to make a decision on capitalisation. If we have a set of
+                # words with only one capital in them, then it's likely that they are
+                # element names or something similar, so they should be lower case.
+                # If on average we have many more capitals, then they are likely to be
+                # ionized fractions (e.g. HeII) and so we want to leave them with their
+                # original capitalisation.
+
+                num_capitals = lambda x: sum(1 for c in x if c.isupper())
+                mean_num_capitals = sum(map(num_capitals, field_names)) / len(
+                    field_names
+                )
+
+                if mean_num_capitals < 1.01:
+                    # Decapitalise them as they are likely individual element names
+                    formatted_field_names = [x.lower() for x in field_names]
+                else:
+                    formatted_field_names = field_names
+
+                named_columns[field] = formatted_field_names
+            else:
+                named_columns[field] = None
+
+        self.named_columns = named_columns
+
+        return
+
+
+class SWIFTUnits(object):
+    """
+    Generates a unyt system that can then be used with the SWIFT data.
+
+    These give the unit mass, length, time, current, and temperature as
+    unyt unit variables in simulation units. I.e. you can take any value
+    that you get out of the code and multiply it by the appropriate values
+    to get it 'unyt-ified' with the correct units.
+
+    Attributes
+    ----------
+    mass : float
+        unit for mass used
+    length : float
+        unit for length used
+    time : float
+        unit for time used
+    current : float
+        unit for current used
+    temperature : float
+        unit for temperature used
+
+    """
+
+    def __init__(self, filename: Path, handle: Optional[h5py.File] = None):
+        """
+        SWIFTUnits constructor
+
+        Sets filename for file to read units from and gets unit dictionary
+
+        Parameters
+        ----------
+
+        filename : Path
+            Name of file to read units from
+
+        handle: h5py.File, optional
+            The h5py file handle, optional. Will open a new handle with the
+            filename if required.
+
+        """
+        self.filename = filename
+        self._handle = handle
+
+        self.get_unit_dictionary()
+
+        return
+
+    @property
+    def handle(self):
+        """
+        Property that gets the file handle, which can be shared
+        with other objects for efficiency reasons.
+        """
+        if isinstance(self._handle, h5py.File):
+            # Can be open or closed, let's test.
+            try:
+                file = self._handle.file
+
+                return self._handle
+            except ValueError:
+                # This will be the case if there is no active file handle
+                pass
+
+        self._handle = h5py.File(self.filename, "r")
+
+        return self._handle
+
+    def get_unit_dictionary(self):
+        """
+        Store unit data and metadata
+
+        Length 1 arrays are used to store the unit data. This dictionary
+        also contains the metadata information that connects the unyt
+        objects to the names that are stored in the SWIFT snapshots.
+        """
+
+        self.units = {
+            name: unyt.unyt_quantity(
+                value[0], units=metadata.unit_types.unit_names_to_unyt[name]
+            )
+            for name, value in self.handle["Units"].attrs.items()
+        }
+
+        # We now unpack this into variables.
+        self.mass = metadata.unit_types.find_nearest_base_unit(
+            self.units["Unit mass in cgs (U_M)"], "mass"
+        )
+        self.length = metadata.unit_types.find_nearest_base_unit(
+            self.units["Unit length in cgs (U_L)"], "length"
+        )
+        self.time = metadata.unit_types.find_nearest_base_unit(
+            self.units["Unit time in cgs (U_t)"], "time"
+        )
+        self.current = metadata.unit_types.find_nearest_base_unit(
+            self.units["Unit current in cgs (U_I)"], "current"
+        )
+        self.temperature = metadata.unit_types.find_nearest_base_unit(
+            self.units["Unit temperature in cgs (U_T)"], "temperature"
+        )
+
+    def __del__(self):
+        if isinstance(self._handle, h5py.File):
+            self._handle.close()
+
+
+
+def metadata_discriminator(filename: str, units: SWIFTUnits) -> "SWIFTMetadata":
+    """
+    Discriminates between the different types of metadata objects read from SWIFT-compatible
+    files.
+
+    Parameters
+    ----------
+
+    filename : str
+        Name of the file to read metadata from
+
+    units : SWIFTUnits
+        The units object associated with the file
+
+    
+    Returns
+    -------
+
+    SWIFTMetadata
+        The appropriate metadata object for the file type
+    """
+    # Old snapshots did not have this attribute, so we need to default to FullVolume
+    file_type = units.handle["Header"].attrs.get("OutputType", "FullVolume")
+
+    if isinstance(file_type, bytes):
+        file_type = file_type.decode("utf-8")
+
+    if file_type in ["FullVolume"]:
+        return SWIFTSnapshotMetadata(filename, units)
+    elif file_type in ["SOAP"]:
+        return SWIFTSOAPMetadata(filename, units)
+    elif file_type in ["FOF"]:
+        return SWIFTFOFMetadata(filename, units)
+    else:
+        raise ValueError(f"File type {file_type} not recognised.")
+    
+
+
+class SWIFTMetadata(ABC):
+    """
+    An abstract base class for all SWIFT-related file metadata.
+    """
+
+    filename: str
+    units: SWIFTUnits
+    header: dict
+    masking_valid: bool = False
+
+    @abstractmethod
+    def __init__(self, filename, units: SWIFTUnits):
+        raise NotImplementedError
+
+    @property
+    def handle(self):
+        # Handle, which is shared with units. Units handles
+        # file opening and closing.
+        return self.units.handle
+
+    def load_groups(self):
+        """
+        Loads the groups and metadata into objects:
+
+            metadata.<group_name>_properties
+
+        This contains eight arrays,
+
+            metadata.<type>_properties.field_names
+            metadata.<type>_properties.field_paths
+            metadata.<type>_properties.field_units
+            metadata.<type>_properties.field_cosmologies
+            metadata.<type>_properties.field_descriptions
+            metadata.<type>_properties.field_compressions
+            metadata.<type>_properties.field_physicals
+            metadata.<type>_properties.field_valid_transforms
+
+        As well as some more information about the group.
+        """
+
+        for group, name in zip(self.present_groups, self.present_group_names):
+            filetype_metadata = SWIFTGroupMetadata(
+                group=group,
+                group_name=name,
+                metadata=self,
+                scale_factor=self.scale_factor,
+            )
+            setattr(self, f"{name}_properties", filetype_metadata)
+
+        return
+    
+    def get_metadata(self):
+        """
+        Loads the metadata as specified in metadata.metadata_fields.
+        """
+
+        for field, name in metadata.metadata_fields.metadata_fields_to_read.items():
+            try:
+                setattr(self, name, dict(self.handle[field].attrs))
+            except KeyError:
+                setattr(self, name, None)
+
+        return
+    
+    def postprocess_header(self):
+        """
+        Some minor postprocessing on the header to local variables.
+        """
+
+        # These are just read straight in to variables
+        header_unpack_arrays_units = metadata.metadata_fields.generate_units_header_unpack_arrays(
+            m=self.units.mass,
+            l=self.units.length,
+            t=self.units.time,
+            I=self.units.current,
+            T=self.units.temperature,
+        )
+
+        for field, name in metadata.metadata_fields.header_unpack_arrays.items():
+            try:
+                if name in header_unpack_arrays_units.keys():
+                    setattr(
+                        self,
+                        name,
+                        unyt.unyt_array(
+                            self.header[field], units=header_unpack_arrays_units[name]
+                        ),
+                    )
+                    # This is required or we automatically get everything in CGS!
+                    getattr(self, name).convert_to_units(
+                        header_unpack_arrays_units[name]
+                    )
+                else:
+                    # Must not have any units! Oh well.
+                    setattr(self, name, self.header[field])
+            except KeyError:
+                # Must not be present, just skip it
+                continue
+
+        # Now unpack the 'mass table' type items:
+        for field, name in metadata.metadata_fields.header_unpack_mass_tables.items():
+            try:
+                setattr(
+                    self,
+                    name,
+                    MassTable(
+                        base_mass_table=self.header[field], mass_units=self.units.mass
+                    ),
+                )
+            except KeyError:
+                setattr(
+                    self,
+                    name,
+                    MassTable(
+                        base_mass_table=np.zeros(
+                            len(metadata.particle_types.particle_name_underscores)
+                        ),
+                        mass_units=self.units.mass,
+                    ),
+                )
+
+        # These must be unpacked as 'real' strings (i.e. converted to utf-8)
+
+        for field, name in metadata.metadata_fields.header_unpack_string.items():
+            try:
+                # Deal with h5py's quirkiness that fixed-sized and variable-sized
+                # strings are read as strings or bytes
+                # See: https://github.com/h5py/h5py/issues/2172
+                raw = self.header[field]
+                try:
+                    string = raw.decode("utf-8")
+                except AttributeError:
+                    string = raw
+                setattr(self, name, string)
+            except KeyError:
+                # Must not be present, just skip it
+                setattr(self, name, "")
+
+        # These must be unpacked as they are stored as length-1 arrays
+
+        header_unpack_float_units = metadata.metadata_fields.generate_units_header_unpack_single_float(
+            m=self.units.mass,
+            l=self.units.length,
+            t=self.units.time,
+            I=self.units.current,
+            T=self.units.temperature,
+        )
+
+        for field, names in metadata.metadata_fields.header_unpack_single_float.items():
+            try:
+                if isinstance(names, list):
+                    # Sometimes we store a list in case we have multiple names, for example
+                    # Redshift -> metadata.redshift AND metadata.z. Can't just do the iteration
+                    # because we may loop over the letters in the string.
+                    for variable in names:
+                        if variable in header_unpack_float_units.keys():
+                            # We have an associated unit!
+                            unit = header_unpack_float_units[variable]
+                            setattr(
+                                self,
+                                variable,
+                                unyt.unyt_quantity(self.header[field][0], units=unit),
+                            )
+                        else:
+                            # No unit
+                            setattr(self, variable, self.header[field][0])
+                else:
+                    # We can just check for the unit and set the attribute
+                    variable = names
+                    if variable in header_unpack_float_units.keys():
+                        # We have an associated unit!
+                        unit = header_unpack_float_units[variable]
+                        setattr(
+                            self,
+                            variable,
+                            unyt.unyt_quantity(self.header[field][0], units=unit),
+                        )
+                    else:
+                        # No unit
+                        setattr(self, variable, self.header[field][0])
+            except KeyError:
+                # Must not be present, just skip it
+                continue
+
+        # These are special cases, sorry!
+        # Date and time of snapshot dump
+        try:
+            try:
+                # Try and decode bytes, otherwise save raw string
+                snapshot_date = self.header.get(
+                    "SnapshotDate", self.header.get("Snapshot date", b"")
+                ).decode("utf-8")
+            except AttributeError:
+                snapshot_date = self.header.get(
+                    "SnapshotDate", self.header.get("Snapshot date", "")
+                )
+            try:
+                self.snapshot_date = datetime.strptime(
+                    snapshot_date, "%H:%M:%S %Y-%m-%d %Z"
+                )
+            except ValueError:
+                # Backwards compatibility; this was used previously due to simplicity
+                # but is not portable between regions. So if you ran a simulation on
+                # a British (en_GB) machine, and then tried to read on a Dutch
+                # machine (nl_NL), this would _not_ work because %c is different.
+                try:
+                    self.snapshot_date = datetime.strptime(snapshot_date, "%c\n")
+                except ValueError:
+                    # Oh dear this has gone _very_wrong. Let's just keep it as a string.
+                    self.snapshot_date = snapshot_date
+        except KeyError:
+            # Old file
+            pass
+
+        # get photon group edges RT dataset from the SubgridScheme group
+        try:
+            self.photon_group_edges = (
+                self.handle["SubgridScheme/PhotonGroupEdges"][:] / self.units.time
+            )
+        except KeyError:
+            self.photon_group_edges = None
+
+        # get reduced speed of light RT dataset from the SubgridScheme group
+        try:
+            self.reduced_lightspeed = (
+                self.handle["SubgridScheme/ReducedLightspeed"][0]
+                * self.units.length
+                / self.units.time
+            )
+        except KeyError:
+            self.reduced_lightspeed = None
+
+        # Store these separately as self.n_gas = number of gas particles for example
+        for (part_number, (_, part_name)) in enumerate(
+            metadata.particle_types.particle_name_underscores.items()
+        ):
+            try:
+                setattr(self, f"n_{part_name}", self.num_part[part_number])
+            except IndexError:
+                # Backwards compatibility; mass/number table can change size.
+                setattr(self, f"n_{part_name}", 0)
+
+        # Need to unpack the gas gamma for cosmology
+        try:
+            self.gas_gamma = self.hydro_scheme["Adiabatic index"]
+        except (KeyError, TypeError):
+            # We can set a default and print a message whenever we require this value
+            self.gas_gamma = None
+
+        try:
+            self.a = self.scale_factor
+        except AttributeError:
+            # These must always be present for the initialisation of cosmology properties
+            self.a = 1.0
+            self.scale_factor = 1.0
+
+        return
+    
+
+    def extract_cosmology(self):
+        """
+        Creates an astropy.cosmology object from the internal cosmology system.
+
+        This will be saved as ``self.cosmology``.
+        """
+
+        if self.cosmology_raw is not None:
+            cosmo = self.cosmology_raw
+        else:
+            cosmo = {"Cosmological run": 0}
+
+        if cosmo.get("Cosmological run", 0):
+            self.cosmology = swift_cosmology_to_astropy(cosmo, units=self.units)
+        else:
+            self.cosmology = None
+
+        return
+    
+    @property
+    @abstractmethod
+    def present_groups(self):
+        raise NotImplementedError
+    
+    @property
+    @abstractmethod
+    def present_group_names(self):
+        raise NotImplementedError
+    
+    @property
+    def partial_snapshot(self) -> bool:
+        return False
+
+class SWIFTSnapshotMetadata(SWIFTMetadata):
+    """
+    Loads all metadata (apart from Units, those are handled by SWIFTUnits)
+    into dictionaries.
+
+    This also does some extra parsing on some well-used metadata.
+    """
+
+    # Name of the file that has been read from
+    filename: str
+    # Unit instance associated with this file
+    units: SWIFTUnits
+    # Header dictionary, metadata about snapshot.
+    header: dict
+    masking_valid: bool = True
+
+    def __init__(self, filename, units: SWIFTUnits):
+        """
+        Constructor for SWIFTMetadata object
+
+        Parameters
+        ----------
+
+        filename : str
+            name of file to read from
+
+        units : SWIFTUnits
+            the units being used
+        """
+        self.filename = filename
+        self.units = units
+
+        self.get_metadata()
+        self.get_named_column_metadata()
+        self.get_mapping_metadata()
+
+        self.postprocess_header()
+
+        self.load_groups()
+        self.extract_cosmology()
+
+        # After we've loaded all this metadata, we can safely release the file handle.
+        self.handle.close()
+
+        return
+
+    def get_named_column_metadata(self):
+        """
+        Loads the custom named column metadata (if it exists) from
+        SubgridScheme/NamedColumns.
+        """
+
+        try:
+            data = self.handle["SubgridScheme/NamedColumns"]
+
+            self.named_columns = {
+                k: [x.decode("utf-8") for x in data[k][:]] for k in data.keys()
+            }
+        except KeyError:
+            self.named_columns = {}
+
+        return
+
+    def get_mapping_metadata(self):
+        """
+        Gets the mappings based on the named columns (must have already been read),
+        from the form:
+
+        SubgridScheme/{X}To{Y}Mapping.
+
+        Includes a hack of `Dust` -> `Grains` that will be deprecated.
+        """
+
+        try:
+            possible_keys = self.handle["SubgridScheme"].keys()
+
+            available_keys = [key for key in possible_keys if key.endswith("Mapping")]
+            available_data = [
+                self.handle[f"SubgridScheme/{key}"][:] for key in available_keys
+            ]
+        except KeyError:
+            available_keys = []
+            available_data = []
+
+        # Keys have form {X}To{Y}Mapping
+
+        # regular expression for camel case to snake case
+        # https://stackoverflow.com/a/1176023
+        def convert(name):
+            return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
+
+        regex = r"([a-zA-Z]*)To([a-zA-Z]*)Mapping"
+        compiled = re.compile(regex)
+
+        for key, data in zip(available_keys, available_data):
+            match = compiled.match(key)
+            snake_case = convert(key)
+
+            if match:
+                x = match.group(1)
+                y = match.group(2)
+
+                if x == "Grain":
+                    warnings.warn(
+                        "Use of the GrainToElementMapping is deprecated, please use a newer "
+                        "version of SWIFT to run this simulation.",
+                        DeprecationWarning,
+                    )
+
+                    x = "Dust"
+
+                named_column_name_x = [
+                    key for key in self.named_columns.keys() if key.startswith(x)
+                ][0]
+                named_column_name_y = [
+                    key for key in self.named_columns.keys() if key.startswith(y)
+                ][0]
+
+                setattr(
+                    self,
+                    snake_case,
+                    MappingTable(
+                        data=data,
+                        named_columns_x=self.named_columns[named_column_name_x],
+                        named_columns_y=self.named_columns[named_column_name_y],
+                        named_columns_x_name=named_column_name_x,
+                        named_columns_y_name=named_column_name_y,
+                    ),
+                )
+
+        return
+
+
+    @property
+    def present_groups(self):
+        """
+        The groups containing datasets that are present in the file.
+        """
+        types = np.where(np.array(getattr(self, "has_type", self.num_part)) != 0)[0]
+        return [f"PartType{i}" for i in types]
+
+    @property
+    def present_group_names(self):
+        """
+        The names of the groups that we want to expose.
+        """
+
+        return [
+            metadata.particle_types.particle_name_underscores[x]
+            for x in self.present_groups
+        ]
+
+    @property
+    def code_info(self) -> str:
+        """
+        Gets a nicely printed set of code information with:
+
+        Name (Git Branch)
+        Git Revision
+        Git Date
+        """
+
+        def get_string(x):
+            return self.code[x].decode("utf-8")
+
+        output = (
+            f"{get_string('Code')} ({get_string('Git Branch')})\n"
+            f"{get_string('Git Revision')}\n"
+            f"{get_string('Git Date')}"
+        )
+
+        return output
+
+    @property
+    def compiler_info(self) -> str:
+        """
+        Gets information about the compiler and formats it as:
+
+        Compiler Name (Compiler Version)
+        MPI library
+        """
+
+        def get_string(x):
+            return self.code[x].decode("utf-8")
+
+        output = (
+            f"{get_string('Compiler Name')} ({get_string('Compiler Version')})\n"
+            f"{get_string('MPI library')}"
+        )
+
+        return output
+
+    @property
+    def library_info(self) -> str:
+        """
+        Gets information about the libraries used and formats it as:
+
+        FFTW vFFTW library version
+        GSL vGSL library version
+        HDF5 vHDF5 library version
+        """
+
+        def get_string(x):
+            return self.code[f"{x} library version"].decode("utf-8")
+
+        output = (
+            f"FFTW v{get_string('FFTW')}\n"
+            f"GSL v{get_string('GSL')}\n"
+            f"HDF5 v{get_string('HDF5')}"
+        )
+
+        return output
+
+    @property
+    def hydro_info(self) -> str:
+        r"""
+        Gets information about the hydro scheme and formats it as:
+
+        Scheme
+        Kernel function in DimensionD
+        $\eta$ = Kernel eta (Kernel target N_ngb $N_{ngb}$)
+        $C_{\rm CFL}$ = CFL parameter
+        """
+
+        def get_float(x):
+            return "{:4.2f}".format(self.hydro_scheme[x][0])
+
+        def get_int(x):
+            return int(self.hydro_scheme[x][0])
+
+        def get_string(x):
+            return self.hydro_scheme[x].decode("utf-8")
+
+        output = (
+            f"{get_string('Scheme')}\n"
+            f"{get_string('Kernel function')} in {get_int('Dimension')}D\n"
+            rf"$\eta$ = {get_float('Kernel eta')} "
+            rf"({get_float('Kernel target N_ngb')} $N_{{ngb}}$)"
+            "\n"
+            rf"$C_{{\rm CFL}}$ = {get_float('CFL parameter')}"
+        )
+
+        return output
+
+    @property
+    def viscosity_info(self) -> str:
+        r"""
+        Gets information about the viscosity scheme and formats it as:
+
+        Viscosity Model
+        $\alpha_{V, 0}$ = Alpha viscosity, $\ell_V$ = Viscosity decay length [internal units], $\beta_V$ = Beta viscosity
+        Alpha viscosity (min) < $\alpha_V$ < Alpha viscosity (max)
+        """
+
+        def get_float(x):
+            return "{:4.2f}".format(self.hydro_scheme[x][0])
+
+        def get_string(x):
+            return self.hydro_scheme[x].decode("utf-8")
+
+        output = (
+            f"{get_string('Viscosity Model')}\n"
+            rf"$\alpha_{{V, 0}}$ = {get_float('Alpha viscosity')}, "
+            rf"$\ell_V$ = {get_float('Viscosity decay length [internal units]')}, "
+            rf"$\beta_V$ = {get_float('Beta viscosity')}"
+            "\n"
+            rf"{get_float('Alpha viscosity (min)')} < $\alpha_V$ < {get_float('Alpha viscosity (max)')}"
+        )
+
+        return output
+
+    @property
+    def diffusion_info(self) -> str:
+        """
+        Gets information about the diffusion scheme and formats it as:
+
+        $\alpha_{D, 0}$ = Diffusion alpha, $\beta_D$ = Diffusion beta
+        Diffusion alpha (min) < $\alpha_D$ < Diffusion alpha (max)
+        """
+
+        def get_float(x):
+            return "{:4.2f}".format(self.hydro_scheme[x][0])
+
+        output = (
+            rf"$\alpha_{{D, 0}}$ = {get_float('Diffusion alpha')}, "
+            rf"$\beta_D$ = {get_float('Diffusion beta')}"
+            "\n"
+            rf"${get_float('Diffusion alpha (min)')} < "
+            rf"\alpha_D < {get_float('Diffusion alpha (max)')}$"
+        )
+
+        return output
+
+    @property
+    def partial_snapshot(self) -> bool:
+        """
+        Whether or not this snapshot is partial (e.g. a "x.0.hdf5" file), or
+        a file describing an entire snapshot.
+        """
+
+        # Partial snapshots have num_files_per_snapshot set to 1. Virtual snapshots
+        # collating multiple sub-snapshots together have num_files_per_snapshot = 1.
+
+        return self.num_files_per_snapshot > 1
+    
+
+class SWIFTFOFMetadata(SWIFTMetadata):
+    masking_valid: bool = False
+
+    def __init__(self, filename: str, units: SWIFTUnits):
+        self.filename = filename
+        self.units = units
+
+        self.get_metadata()
+        self.postprocess_header()
+
+        self.load_groups()
+
+        # After we've loaded all this metadata, we can safely release the file handle.
+        self.handle.close()
+
+        return
+    
+    @property
+    def present_groups(self):
+        """
+        The groups containing datasets that are present in the file.
+        """
+        return ["Groups"]
+    
+    @property
+    def present_group_names(self):
+        """
+        The names of the groups that we want to expose.
+        """
+        return ["fof_groups"]
+    
+
+class SWIFTSOAPMetadata(SWIFTMetadata):
+    masking_valid: bool = True
+
+    def __init__(self, filename: str, units: SWIFTUnits):
+        self.filename = filename
+        self.units = units
+
+        self.get_metadata()
+        self.postprocess_header()
+
+        self.load_groups()
+
+        # After we've loaded all this metadata, we can safely release the file handle.
+        self.handle.close()
+
+        return
+    
+    @property
+    def present_groups(self):
+        """
+        The groups containing datasets that are present in the file.
+        """
+        return self.subhalo_types
+    
+    @property
+    def present_group_names(self):
+        """
+        The names of the groups that we want to expose.
+        """
+        return [
+            metadata.soap_types.get_soap_name_underscore(x)
+            for x in self.present_groups
+        ]
\ No newline at end of file
diff --git a/swiftsimio/reader.py b/swiftsimio/reader.py
index abaa5345..2fee88df 100644
--- a/swiftsimio/reader.py
+++ b/swiftsimio/reader.py
@@ -11,10 +11,11 @@
 + SWIFTDataset, a container class for all of the above.
 """
 
-from swiftsimio import metadata
+
 from swiftsimio.accelerated import read_ranges_from_file
 from swiftsimio.objects import cosmo_array, cosmo_factor, a
-from swiftsimio.conversions import swift_cosmology_to_astropy
+
+from swiftsimio.metadata.objects import metadata_discriminator, SWIFTUnits, SWIFTGroupMetadata, SWIFTMetadata
 
 import re
 import h5py
@@ -28,1124 +29,6 @@
 from typing import Union, Callable, List, Optional
 
 
-class MassTable(object):
-    """
-    Extracts a mass table to local variables based on the
-    particle type names.
-    """
-
-    def __init__(self, base_mass_table: np.array, mass_units: unyt.unyt_quantity):
-        """
-        Parameters
-        ----------
-
-        base_mass_table : np.array
-            Mass table of the same length as the number of particle types.
-
-        mass_units : unyt_quantity
-            Base mass units for the simulation.
-        """
-
-        for index, name in metadata.particle_types.particle_name_underscores.items():
-            try:
-                setattr(
-                    self,
-                    name,
-                    unyt.unyt_quantity(base_mass_table[index], units=mass_units),
-                )
-            except IndexError:
-                # Backwards compatible.
-                setattr(self, name, None)
-
-        return
-
-    def __str__(self):
-        return f"Mass table for {' '.join(metadata.particle_types.particle_name_underscores.values())}"
-
-    def __repr__(self):
-        return self.__str__()
-
-
-class MappingTable(object):
-    """
-    A mapping table from one named column instance to the other.
-    Initially designed for the mapping between dust and elements.
-    """
-
-    def __init__(
-        self,
-        data: np.ndarray,
-        named_columns_x: List[str],
-        named_columns_y: List[str],
-        named_columns_x_name: str,
-        named_columns_y_name: str,
-    ):
-        """
-        Parameters
-        ----------
-
-        data: np.ndarray
-            The data array providing the mapping between the named
-            columns. Should be of size N x M, where N is the number
-            of elements in ``named_columns_x`` and M the number
-            of elements in ``named_columns_y``.
-
-        named_columns_x: List[str]
-            The names of the columns in the first axis.
-
-        named_columns_y: List[str]
-            The names of the columns in the second axis.
-
-        named_columns_x_name: str
-            The name of the first mapping.
-
-        named_columns_y_name: str
-            The name of the second mapping.
-        """
-
-        self.data = data
-        self.named_columns_x = named_columns_x
-        self.named_columns_y = named_columns_y
-        self.named_columns_x_name = named_columns_x_name
-        self.named_columns_y_name = named_columns_y_name
-
-        for x, name_x in enumerate(named_columns_x):
-            for y, name_y in enumerate(named_columns_y):
-                setattr(self, f"{name_x.lower()}_to_{name_y.lower()}", data[x][y])
-
-        return
-
-    def __str__(self):
-        return (
-            f"Mapping table from {self.named_columns_x_name} to "
-            f"{self.named_columns_y_name}, containing {len(self.data)} "
-            f"by {len(self.data[0])} elements."
-        )
-
-    def __repr__(self):
-        return f"{self.__str__()}. Raw data: " "\n" f"{self.data}."
-
-
-class SWIFTUnits(object):
-    """
-    Generates a unyt system that can then be used with the SWIFT data.
-
-    These give the unit mass, length, time, current, and temperature as
-    unyt unit variables in simulation units. I.e. you can take any value
-    that you get out of the code and multiply it by the appropriate values
-    to get it 'unyt-ified' with the correct units.
-
-    Attributes
-    ----------
-    mass : float
-        unit for mass used
-    length : float
-        unit for length used
-    time : float
-        unit for time used
-    current : float
-        unit for current used
-    temperature : float
-        unit for temperature used
-
-    """
-
-    def __init__(self, filename: Path, handle: Optional[h5py.File] = None):
-        """
-        SWIFTUnits constructor
-
-        Sets filename for file to read units from and gets unit dictionary
-
-        Parameters
-        ----------
-
-        filename : Path
-            Name of file to read units from
-
-        handle: h5py.File, optional
-            The h5py file handle, optional. Will open a new handle with the
-            filename if required.
-
-        """
-        self.filename = filename
-        self._handle = handle
-
-        self.get_unit_dictionary()
-
-        return
-
-    @property
-    def handle(self):
-        """
-        Property that gets the file handle, which can be shared
-        with other objects for efficiency reasons.
-        """
-        if isinstance(self._handle, h5py.File):
-            # Can be open or closed, let's test.
-            try:
-                file = self._handle.file
-
-                return self._handle
-            except ValueError:
-                # This will be the case if there is no active file handle
-                pass
-
-        self._handle = h5py.File(self.filename, "r")
-
-        return self._handle
-
-    def get_unit_dictionary(self):
-        """
-        Store unit data and metadata
-
-        Length 1 arrays are used to store the unit data. This dictionary
-        also contains the metadata information that connects the unyt
-        objects to the names that are stored in the SWIFT snapshots.
-        """
-
-        self.units = {
-            name: unyt.unyt_quantity(
-                value[0], units=metadata.unit_types.unit_names_to_unyt[name]
-            )
-            for name, value in self.handle["Units"].attrs.items()
-        }
-
-        # We now unpack this into variables.
-        self.mass = metadata.unit_types.find_nearest_base_unit(
-            self.units["Unit mass in cgs (U_M)"], "mass"
-        )
-        self.length = metadata.unit_types.find_nearest_base_unit(
-            self.units["Unit length in cgs (U_L)"], "length"
-        )
-        self.time = metadata.unit_types.find_nearest_base_unit(
-            self.units["Unit time in cgs (U_t)"], "time"
-        )
-        self.current = metadata.unit_types.find_nearest_base_unit(
-            self.units["Unit current in cgs (U_I)"], "current"
-        )
-        self.temperature = metadata.unit_types.find_nearest_base_unit(
-            self.units["Unit temperature in cgs (U_T)"], "temperature"
-        )
-
-    def __del__(self):
-        if isinstance(self._handle, h5py.File):
-            self._handle.close()
-
-
-class SWIFTMetadata(object):
-    """
-    Loads all metadata (apart from Units, those are handled by SWIFTUnits)
-    into dictionaries.
-
-    This also does some extra parsing on some well-used metadata.
-    """
-
-    # Name of the file that has been read from
-    filename: str
-    # Unit instance associated with this file
-    units: SWIFTUnits
-    # Header dictionary, metadata about snapshot.
-    header: dict
-
-    def __init__(self, filename, units: SWIFTUnits):
-        """
-        Constructor for SWIFTMetadata object
-
-        Parameters
-        ----------
-
-        filename : str
-            name of file to read from
-
-        units : SWIFTUnits
-            the units being used
-        """
-        self.filename = filename
-        self.units = units
-
-        self.get_metadata()
-        self.get_named_column_metadata()
-        self.get_mapping_metadata()
-
-        self.postprocess_header()
-
-        self.get_filetype()
-
-        self.load_groups()
-        self.extract_cosmology()
-
-        # After we've loaded all this metadata, we can safely release the file handle.
-        self.handle.close()
-
-        return
-
-    @property
-    def handle(self):
-        # Handle, which is shared with units. Units handles
-        # file opening and closing.
-        return self.units.handle
-
-    def get_metadata(self):
-        """
-        Loads the metadata as specified in metadata.metadata_fields.
-        """
-
-        for field, name in metadata.metadata_fields.metadata_fields_to_read.items():
-            try:
-                setattr(self, name, dict(self.handle[field].attrs))
-            except KeyError:
-                setattr(self, name, None)
-
-        return
-
-    def get_named_column_metadata(self):
-        """
-        Loads the custom named column metadata (if it exists) from
-        SubgridScheme/NamedColumns.
-        """
-
-        try:
-            data = self.handle["SubgridScheme/NamedColumns"]
-
-            self.named_columns = {
-                k: [x.decode("utf-8") for x in data[k][:]] for k in data.keys()
-            }
-        except KeyError:
-            self.named_columns = {}
-
-        return
-
-    def get_mapping_metadata(self):
-        """
-        Gets the mappings based on the named columns (must have already been read),
-        from the form:
-
-        SubgridScheme/{X}To{Y}Mapping.
-
-        Includes a hack of `Dust` -> `Grains` that will be deprecated.
-        """
-
-        try:
-            possible_keys = self.handle["SubgridScheme"].keys()
-
-            available_keys = [key for key in possible_keys if key.endswith("Mapping")]
-            available_data = [
-                self.handle[f"SubgridScheme/{key}"][:] for key in available_keys
-            ]
-        except KeyError:
-            available_keys = []
-            available_data = []
-
-        # Keys have form {X}To{Y}Mapping
-
-        # regular expression for camel case to snake case
-        # https://stackoverflow.com/a/1176023
-        def convert(name):
-            return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
-
-        regex = r"([a-zA-Z]*)To([a-zA-Z]*)Mapping"
-        compiled = re.compile(regex)
-
-        for key, data in zip(available_keys, available_data):
-            match = compiled.match(key)
-            snake_case = convert(key)
-
-            if match:
-                x = match.group(1)
-                y = match.group(2)
-
-                if x == "Grain":
-                    warnings.warn(
-                        "Use of the GrainToElementMapping is deprecated, please use a newer "
-                        "version of SWIFT to run this simulation.",
-                        DeprecationWarning,
-                    )
-
-                    x = "Dust"
-
-                named_column_name_x = [
-                    key for key in self.named_columns.keys() if key.startswith(x)
-                ][0]
-                named_column_name_y = [
-                    key for key in self.named_columns.keys() if key.startswith(y)
-                ][0]
-
-                setattr(
-                    self,
-                    snake_case,
-                    MappingTable(
-                        data=data,
-                        named_columns_x=self.named_columns[named_column_name_x],
-                        named_columns_y=self.named_columns[named_column_name_y],
-                        named_columns_x_name=named_column_name_x,
-                        named_columns_y_name=named_column_name_y,
-                    ),
-                )
-
-        return
-
-    def postprocess_header(self):
-        """
-        Some minor postprocessing on the header to local variables.
-        """
-
-        # These are just read straight in to variables
-        header_unpack_arrays_units = metadata.metadata_fields.generate_units_header_unpack_arrays(
-            m=self.units.mass,
-            l=self.units.length,
-            t=self.units.time,
-            I=self.units.current,
-            T=self.units.temperature,
-        )
-
-        for field, name in metadata.metadata_fields.header_unpack_arrays.items():
-            try:
-                if name in header_unpack_arrays_units.keys():
-                    setattr(
-                        self,
-                        name,
-                        unyt.unyt_array(
-                            self.header[field], units=header_unpack_arrays_units[name]
-                        ),
-                    )
-                    # This is required or we automatically get everything in CGS!
-                    getattr(self, name).convert_to_units(
-                        header_unpack_arrays_units[name]
-                    )
-                else:
-                    # Must not have any units! Oh well.
-                    setattr(self, name, self.header[field])
-            except KeyError:
-                # Must not be present, just skip it
-                continue
-
-        # Now unpack the 'mass table' type items:
-        for field, name in metadata.metadata_fields.header_unpack_mass_tables.items():
-            try:
-                setattr(
-                    self,
-                    name,
-                    MassTable(
-                        base_mass_table=self.header[field], mass_units=self.units.mass
-                    ),
-                )
-            except KeyError:
-                setattr(
-                    self,
-                    name,
-                    MassTable(
-                        base_mass_table=np.zeros(
-                            len(metadata.particle_types.particle_name_underscores)
-                        ),
-                        mass_units=self.units.mass,
-                    ),
-                )
-
-        # These must be unpacked as 'real' strings (i.e. converted to utf-8)
-
-        for field, name in metadata.metadata_fields.header_unpack_string.items():
-            try:
-                # Deal with h5py's quirkiness that fixed-sized and variable-sized
-                # strings are read as strings or bytes
-                # See: https://github.com/h5py/h5py/issues/2172
-                raw = self.header[field]
-                try:
-                    string = raw.decode("utf-8")
-                except AttributeError:
-                    string = raw
-                setattr(self, name, string)
-            except KeyError:
-                # Must not be present, just skip it
-                setattr(self, name, "")
-
-        # These must be unpacked as they are stored as length-1 arrays
-
-        header_unpack_float_units = metadata.metadata_fields.generate_units_header_unpack_single_float(
-            m=self.units.mass,
-            l=self.units.length,
-            t=self.units.time,
-            I=self.units.current,
-            T=self.units.temperature,
-        )
-
-        for field, names in metadata.metadata_fields.header_unpack_single_float.items():
-            try:
-                if isinstance(names, list):
-                    # Sometimes we store a list in case we have multiple names, for example
-                    # Redshift -> metadata.redshift AND metadata.z. Can't just do the iteration
-                    # because we may loop over the letters in the string.
-                    for variable in names:
-                        if variable in header_unpack_float_units.keys():
-                            # We have an associated unit!
-                            unit = header_unpack_float_units[variable]
-                            setattr(
-                                self,
-                                variable,
-                                unyt.unyt_quantity(self.header[field][0], units=unit),
-                            )
-                        else:
-                            # No unit
-                            setattr(self, variable, self.header[field][0])
-                else:
-                    # We can just check for the unit and set the attribute
-                    variable = names
-                    if variable in header_unpack_float_units.keys():
-                        # We have an associated unit!
-                        unit = header_unpack_float_units[variable]
-                        setattr(
-                            self,
-                            variable,
-                            unyt.unyt_quantity(self.header[field][0], units=unit),
-                        )
-                    else:
-                        # No unit
-                        setattr(self, variable, self.header[field][0])
-            except KeyError:
-                # Must not be present, just skip it
-                continue
-
-        # These are special cases, sorry!
-        # Date and time of snapshot dump
-        try:
-            try:
-                # Try and decode bytes, otherwise save raw string
-                snapshot_date = self.header.get(
-                    "SnapshotDate", self.header.get("Snapshot date", b"")
-                ).decode("utf-8")
-            except AttributeError:
-                snapshot_date = self.header.get(
-                    "SnapshotDate", self.header.get("Snapshot date", "")
-                )
-            try:
-                self.snapshot_date = datetime.strptime(
-                    snapshot_date, "%H:%M:%S %Y-%m-%d %Z"
-                )
-            except ValueError:
-                # Backwards compatibility; this was used previously due to simplicity
-                # but is not portable between regions. So if you ran a simulation on
-                # a British (en_GB) machine, and then tried to read on a Dutch
-                # machine (nl_NL), this would _not_ work because %c is different.
-                try:
-                    self.snapshot_date = datetime.strptime(snapshot_date, "%c\n")
-                except ValueError:
-                    # Oh dear this has gone _very_wrong. Let's just keep it as a string.
-                    self.snapshot_date = snapshot_date
-        except KeyError:
-            # Old file
-            pass
-
-        # get photon group edges RT dataset from the SubgridScheme group
-        try:
-            self.photon_group_edges = (
-                self.handle["SubgridScheme/PhotonGroupEdges"][:] / self.units.time
-            )
-        except KeyError:
-            self.photon_group_edges = None
-
-        # get reduced speed of light RT dataset from the SubgridScheme group
-        try:
-            self.reduced_lightspeed = (
-                self.handle["SubgridScheme/ReducedLightspeed"][0]
-                * self.units.length
-                / self.units.time
-            )
-        except KeyError:
-            self.reduced_lightspeed = None
-
-        # Store these separately as self.n_gas = number of gas particles for example
-        for (part_number, (_, part_name)) in enumerate(
-            metadata.particle_types.particle_name_underscores.items()
-        ):
-            try:
-                setattr(self, f"n_{part_name}", self.num_part[part_number])
-            except IndexError:
-                # Backwards compatibility; mass/number table can change size.
-                setattr(self, f"n_{part_name}", 0)
-
-        # Need to unpack the gas gamma for cosmology
-        try:
-            self.gas_gamma = self.hydro_scheme["Adiabatic index"]
-        except (KeyError, TypeError):
-            # We can set a default and print a message whenever we require this value
-            self.gas_gamma = None
-
-        try:
-            self.a = self.scale_factor
-        except AttributeError:
-            # These must always be present for the initialisation of cosmology properties
-            self.a = 1.0
-            self.scale_factor = 1.0
-
-        return
-
-    def get_filetype(self):
-        """
-        Determine what type of file we are reading
-        """
-        if self.output_type == "FOF":
-            self.filetype = "FOF"
-        elif self.output_type == "SOAP":
-            self.filetype = "SOAP"
-        else:
-            # Default to snapshot
-            self.filetype = "snapshot"
-
-    def load_groups(self):
-        """
-        Loads the groups and metadata into objects:
-
-            metadata.<group_name>_properties
-
-        This contains eight arrays,
-
-            metadata.<type>_properties.field_names
-            metadata.<type>_properties.field_paths
-            metadata.<type>_properties.field_units
-            metadata.<type>_properties.field_cosmologies
-            metadata.<type>_properties.field_descriptions
-            metadata.<type>_properties.field_compressions
-            metadata.<type>_properties.field_physicals
-            metadata.<type>_properties.field_valid_transforms
-
-        As well as some more information about the group.
-        """
-
-        for group, name in zip(self.present_groups, self.present_group_names):
-            filetype_metadata = SWIFTGroupMetadata(
-                group=group,
-                group_name=name,
-                metadata=self,
-                scale_factor=self.scale_factor,
-            )
-            setattr(self, f"{name}_properties", filetype_metadata)
-
-        return
-
-    def extract_cosmology(self):
-        """
-        Creates an astropy.cosmology object from the internal cosmology system.
-
-        This will be saved as ``self.cosmology``.
-        """
-
-        if self.cosmology_raw is not None:
-            cosmo = self.cosmology_raw
-        else:
-            cosmo = {"Cosmological run": 0}
-
-        if cosmo.get("Cosmological run", 0):
-            self.cosmology = swift_cosmology_to_astropy(cosmo, units=self.units)
-        else:
-            self.cosmology = None
-
-        return
-
-    @property
-    def present_groups(self):
-        """
-        The groups containing datasets that are present in the file.
-        """
-        if self.filetype == "snapshot":
-            types = np.where(np.array(getattr(self, "has_type", self.num_part)) != 0)[0]
-            return [f"PartType{i}" for i in types]
-        elif self.filetype == "FOF":
-            return ["Groups"]
-        elif self.filetype == "SOAP":
-            return self.subhalo_types
-
-    @property
-    def present_group_names(self):
-        """
-        The names of the groups that we want to expose.
-        """
-
-        if self.filetype == "snapshot":
-            return [
-                metadata.particle_types.particle_name_underscores[x]
-                for x in self.present_groups
-            ]
-        elif self.filetype == "FOF":
-            return ["fof_groups"]
-        elif self.filetype == "SOAP":
-            return [
-                metadata.soap_types.get_soap_name_underscore(x)
-                for x in self.present_groups
-            ]
-
-    @property
-    def code_info(self) -> str:
-        """
-        Gets a nicely printed set of code information with:
-
-        Name (Git Branch)
-        Git Revision
-        Git Date
-        """
-
-        def get_string(x):
-            return self.code[x].decode("utf-8")
-
-        output = (
-            f"{get_string('Code')} ({get_string('Git Branch')})\n"
-            f"{get_string('Git Revision')}\n"
-            f"{get_string('Git Date')}"
-        )
-
-        return output
-
-    @property
-    def compiler_info(self) -> str:
-        """
-        Gets information about the compiler and formats it as:
-
-        Compiler Name (Compiler Version)
-        MPI library
-        """
-
-        def get_string(x):
-            return self.code[x].decode("utf-8")
-
-        output = (
-            f"{get_string('Compiler Name')} ({get_string('Compiler Version')})\n"
-            f"{get_string('MPI library')}"
-        )
-
-        return output
-
-    @property
-    def library_info(self) -> str:
-        """
-        Gets information about the libraries used and formats it as:
-
-        FFTW vFFTW library version
-        GSL vGSL library version
-        HDF5 vHDF5 library version
-        """
-
-        def get_string(x):
-            return self.code[f"{x} library version"].decode("utf-8")
-
-        output = (
-            f"FFTW v{get_string('FFTW')}\n"
-            f"GSL v{get_string('GSL')}\n"
-            f"HDF5 v{get_string('HDF5')}"
-        )
-
-        return output
-
-    @property
-    def hydro_info(self) -> str:
-        r"""
-        Gets information about the hydro scheme and formats it as:
-
-        Scheme
-        Kernel function in DimensionD
-        $\eta$ = Kernel eta (Kernel target N_ngb $N_{ngb}$)
-        $C_{\rm CFL}$ = CFL parameter
-        """
-
-        def get_float(x):
-            return "{:4.2f}".format(self.hydro_scheme[x][0])
-
-        def get_int(x):
-            return int(self.hydro_scheme[x][0])
-
-        def get_string(x):
-            return self.hydro_scheme[x].decode("utf-8")
-
-        output = (
-            f"{get_string('Scheme')}\n"
-            f"{get_string('Kernel function')} in {get_int('Dimension')}D\n"
-            rf"$\eta$ = {get_float('Kernel eta')} "
-            rf"({get_float('Kernel target N_ngb')} $N_{{ngb}}$)"
-            "\n"
-            rf"$C_{{\rm CFL}}$ = {get_float('CFL parameter')}"
-        )
-
-        return output
-
-    @property
-    def viscosity_info(self) -> str:
-        r"""
-        Gets information about the viscosity scheme and formats it as:
-
-        Viscosity Model
-        $\alpha_{V, 0}$ = Alpha viscosity, $\ell_V$ = Viscosity decay length [internal units], $\beta_V$ = Beta viscosity
-        Alpha viscosity (min) < $\alpha_V$ < Alpha viscosity (max)
-        """
-
-        def get_float(x):
-            return "{:4.2f}".format(self.hydro_scheme[x][0])
-
-        def get_string(x):
-            return self.hydro_scheme[x].decode("utf-8")
-
-        output = (
-            f"{get_string('Viscosity Model')}\n"
-            rf"$\alpha_{{V, 0}}$ = {get_float('Alpha viscosity')}, "
-            rf"$\ell_V$ = {get_float('Viscosity decay length [internal units]')}, "
-            rf"$\beta_V$ = {get_float('Beta viscosity')}"
-            "\n"
-            rf"{get_float('Alpha viscosity (min)')} < $\alpha_V$ < {get_float('Alpha viscosity (max)')}"
-        )
-
-        return output
-
-    @property
-    def diffusion_info(self) -> str:
-        """
-        Gets information about the diffusion scheme and formats it as:
-
-        $\alpha_{D, 0}$ = Diffusion alpha, $\beta_D$ = Diffusion beta
-        Diffusion alpha (min) < $\alpha_D$ < Diffusion alpha (max)
-        """
-
-        def get_float(x):
-            return "{:4.2f}".format(self.hydro_scheme[x][0])
-
-        output = (
-            rf"$\alpha_{{D, 0}}$ = {get_float('Diffusion alpha')}, "
-            rf"$\beta_D$ = {get_float('Diffusion beta')}"
-            "\n"
-            rf"${get_float('Diffusion alpha (min)')} < "
-            rf"\alpha_D < {get_float('Diffusion alpha (max)')}$"
-        )
-
-        return output
-
-    @property
-    def partial_snapshot(self) -> bool:
-        """
-        Whether or not this snapshot is partial (e.g. a "x.0.hdf5" file), or
-        a file describing an entire snapshot.
-        """
-
-        # Partial snapshots have num_files_per_snapshot set to 1. Virtual snapshots
-        # collating multiple sub-snapshots together have num_files_per_snapshot = 1.
-
-        return self.num_files_per_snapshot > 1
-
-
-class SWIFTGroupMetadata(object):
-    """
-    Object that contains the metadata for one hdf5 group.
-
-    This, for instance, could be part type 0, or 'gas'. This will load in
-    the names of all datasets, their units, possible named fields,
-    and their cosmology, and present them for use in the actual i/o routines.
-
-    Methods
-    -------
-    load_metadata(self):
-        Loads the required metadata.
-    load_field_names(self):
-        Loads in the field names.
-    load_field_units(self):
-        Loads in the units from each dataset.
-    load_field_descriptions(self):
-        Loads in descriptions of the fields for each dataset.
-    load_field_compressions(self):
-        Loads in compressions of the fields for each dataset.
-    load_cosmology(self):
-        Loads in the field cosmologies.
-    load_physical(self):
-        Loads in whether the field is saved as comoving or physical.
-    load_valid_transforms(self):
-        Loads in whether the field can be converted to comoving.
-    load_named_columns(self):
-        Loads the named column data for relevant fields.
-    """
-
-    def __init__(
-        self, group: str, group_name: str, metadata: SWIFTMetadata, scale_factor: float
-    ):
-        """
-        Constructor for SWIFTGroupMetadata class
-
-        Parameters
-        ----------
-        group: str 
-            the name of the group in the hdf5 file
-        group_name : str
-            the corresponding group name for swiftsimio
-        metadata : SWIFTMetadata
-            the snapshot metadata
-        scale_factor : float
-            the snapshot scale factor
-        """
-        self.group = group
-        self.group_name = group_name
-        self.metadata = metadata
-        self.units = metadata.units
-        self.scale_factor = scale_factor
-
-        self.filename = metadata.filename
-
-        self.load_metadata()
-
-        return
-
-    def __str__(self):
-        return f"Metadata class for {self.group} ({self.group_name})"
-
-    def __repr__(self):
-        return self.__str__()
-
-    def load_metadata(self):
-        """
-        Loads the required metadata.
-
-        This includes loading the field names, units and descriptions, as well as the
-        cosmology metadata and any custom named columns
-        """
-
-        self.load_field_names()
-        self.load_field_units()
-        self.load_field_descriptions()
-        self.load_field_compressions()
-        self.load_cosmology()
-        self.load_physical()
-        self.load_valid_transforms()
-        self.load_named_columns()
-
-    def load_field_names(self):
-        """
-        Loads in only the field names.
-        """
-
-        # regular expression for camel case to snake case
-        # https://stackoverflow.com/a/1176023
-        def convert(name):
-            return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
-
-        # Skip fields which are groups themselves
-        self.field_paths = []
-        self.field_names = []
-        for item in self.metadata.handle[f"{self.group}"].keys():
-            # Skip fields which are groups themselves
-            if f"{self.group}/{item}" not in self.metadata.present_groups:
-                self.field_paths.append(f"{self.group}/{item}")
-                self.field_names.append(convert(item))
-
-        return
-
-    def load_field_units(self):
-        """
-        Loads in the units from each dataset.
-        """
-
-        unit_dict = {
-            "I": self.units.current,
-            "L": self.units.length,
-            "M": self.units.mass,
-            "T": self.units.temperature,
-            "t": self.units.time,
-        }
-
-        def get_units(unit_attribute):
-            units = 1.0
-
-            for exponent, unit in unit_dict.items():
-                # We store the 'unit exponent' in the SWIFT metadata. This corresponds
-                # to the power we need to raise each unit to, to return the correct units
-                try:
-                    # Need to check if the exponent is 0 manually because of float precision
-                    unit_exponent = unit_attribute[f"U_{exponent} exponent"][0]
-                    if unit_exponent != 0.0:
-                        units *= unit ** unit_exponent
-                except KeyError:
-                    # Can't load that data!
-                    # We should probably warn the user here...
-                    pass
-
-            # Deal with case where we _really_ have a dimensionless quantity. Comparing with
-            # 1.0 doesn't work, beacause in these cases unyt reverts to a floating point
-            # comparison.
-            try:
-                units.units
-            except AttributeError:
-                units = None
-
-            return units
-
-        self.field_units = [
-            get_units(self.metadata.handle[x].attrs) for x in self.field_paths
-        ]
-
-        return
-
-    def load_field_descriptions(self):
-        """
-        Loads in the text descriptions of the fields for each dataset.
-        For SOAP filetypes a description of the mask is included.
-        """
-
-        def get_desc(dataset):
-            try:
-                description = dataset.attrs["Description"].decode("utf-8")
-            except AttributeError:
-                # Description is saved as a string not bytes
-                description = dataset.attrs["Description"]
-            except KeyError:
-                # Can't load description!
-                description = "No description available"
-
-            if self.metadata.filetype != "SOAP":
-                return description
-
-            try:
-                is_masked = dataset.attrs["Masked"]
-            except KeyError:
-                is_masked = False
-            if not is_masked:
-                return description + " Not masked."
-
-            # TODO: Update for https://github.com/SWIFTSIM/SOAP/pull/81
-            # TODO: Also need to add metadata for each halo type for that PR
-            # If is_masked is true then these other attributes should exist
-            mask_datasets = dataset.attrs["Mask Datasets"]
-            mask_threshold = dataset.attrs["Mask Threshold"]
-            if len(mask_datasets) == 1:
-                mask_str = f" Only computed for objects with {mask_datasets[0]} >= {mask_threshold}."
-            else:
-                mask_str = f' Only computed for objects where {" + ".join(mask_datasets)} >= {mask_threshold}.'
-            return description + mask_str
-
-        self.field_descriptions = [
-            get_desc(self.metadata.handle[x]) for x in self.field_paths
-        ]
-
-        return
-
-    def load_field_compressions(self):
-        """
-        Loads in the string describing the compression filters of the fields for each dataset.
-        """
-
-        def get_comp(dataset):
-            try:
-                # SOAP catalogues can be compressed/uncompressed
-                is_compressed = dataset.attrs["Is Compressed"]
-            except KeyError:
-                is_compressed = True
-            try:
-                comp = dataset.attrs["Lossy compression filter"].decode("utf-8")
-            except AttributeError:
-                # Compression is saved as str not bytes
-                comp = dataset.attrs["Lossy compression filter"]
-            except KeyError:
-                # Can't load compression string!
-                comp = "No compression info available"
-
-            return comp if is_compressed else "Not compressed."
-
-        self.field_compressions = [
-            get_comp(self.metadata.handle[x]) for x in self.field_paths
-        ]
-
-        return
-
-    def load_cosmology(self):
-        """
-        Loads in the field cosmologies.
-        """
-
-        current_scale_factor = self.scale_factor
-
-        def get_cosmo(dataset):
-            try:
-                cosmo_exponent = dataset.attrs["a-scale exponent"][0]
-            except:
-                # Can't load, 'graceful' fallback.
-                cosmo_exponent = 0.0
-
-            a_factor_this_dataset = a ** cosmo_exponent
-
-            return cosmo_factor(a_factor_this_dataset, current_scale_factor)
-
-        self.field_cosmologies = [
-            get_cosmo(self.metadata.handle[x]) for x in self.field_paths
-        ]
-
-        return
-
-    def load_physical(self):
-        """
-        Loads in whether the field is saved as comoving or physical.
-        """
-
-        def get_physical(dataset):
-            try:
-                physical = dataset.attrs["Value stored as physical"][0] == 1
-            except:
-                physical = False
-            return physical
-
-        self.field_physicals = [
-            get_physical(self.metadata.handle[x]) for x in self.field_paths
-        ]
-
-        return
-
-    def load_valid_transforms(self):
-        """
-        Loads in whether the field can be converted to comoving.
-        """
-
-        def get_valid_transform(dataset):
-            try:
-                valid_transform = (
-                    dataset.attrs["Property can be converted to comoving"][0] == 1
-                )
-            except:
-                valid_transform = True
-            return valid_transform
-
-        self.field_valid_transforms = [
-            get_valid_transform(self.metadata.handle[x]) for x in self.field_paths
-        ]
-
-        return
-
-    def load_named_columns(self):
-        """
-        Loads the named column data for relevant fields.
-        """
-
-        named_columns = {}
-
-        for field in self.field_paths:
-            property_name = field.split("/")[-1]
-
-            if property_name in self.metadata.named_columns.keys():
-                field_names = self.metadata.named_columns[property_name]
-
-                # Now need to make a decision on capitalisation. If we have a set of
-                # words with only one capital in them, then it's likely that they are
-                # element names or something similar, so they should be lower case.
-                # If on average we have many more capitals, then they are likely to be
-                # ionized fractions (e.g. HeII) and so we want to leave them with their
-                # original capitalisation.
-
-                num_capitals = lambda x: sum(1 for c in x if c.isupper())
-                mean_num_capitals = sum(map(num_capitals, field_names)) / len(
-                    field_names
-                )
-
-                if mean_num_capitals < 1.01:
-                    # Decapitalise them as they are likely individual element names
-                    formatted_field_names = [x.lower() for x in field_names]
-                else:
-                    formatted_field_names = field_names
-
-                named_columns[field] = formatted_field_names
-            else:
-                named_columns[field] = None
-
-        self.named_columns = named_columns
-
-        return
-
 
 def generate_getter(
     filename,
@@ -1722,7 +605,7 @@ def get_metadata(self):
         this function again if you mess things up.
         """
 
-        self.metadata = SWIFTMetadata(self.filename, self.units)
+        self.metadata = metadata_discriminator(self.filename, self.units)
 
         return
 

From 6b26291641e3300005039513e2887968fc18f467 Mon Sep 17 00:00:00 2001
From: Josh Borrow <josh@joshborrow.com>
Date: Wed, 18 Sep 2024 17:03:58 +0200
Subject: [PATCH 2/7] Added test loading a SOAP catalogue

---
 tests/test_soap.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 tests/test_soap.py

diff --git a/tests/test_soap.py b/tests/test_soap.py
new file mode 100644
index 00000000..f835474b
--- /dev/null
+++ b/tests/test_soap.py
@@ -0,0 +1,13 @@
+"""
+Tests that we can open SOAP files
+"""
+
+from tests.helper import requires
+
+from swiftsimio import load
+
+@requires("soap_example.hdf5")
+def test_soap_can_load(filename):
+    data = load(filename)
+
+    return

From 988dc284693b26537bacb5481c6244cf8fc9c01f Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 18 Sep 2024 16:15:32 +0100
Subject: [PATCH 3/7] Nice names

---
 swiftsimio/metadata/objects.py | 24 +++++++++++++-----------
 swiftsimio/reader.py           |  8 +-------
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/swiftsimio/metadata/objects.py b/swiftsimio/metadata/objects.py
index cdccc478..4624c01b 100644
--- a/swiftsimio/metadata/objects.py
+++ b/swiftsimio/metadata/objects.py
@@ -5,6 +5,7 @@
 import h5py
 from swiftsimio.conversions import swift_cosmology_to_astropy
 from swiftsimio import metadata
+from swiftsimio.objects import cosmo_array, cosmo_factor, a
 from abc import ABC, abstractmethod
 
 import re
@@ -280,19 +281,10 @@ def get_desc(dataset):
                 # Can't load description!
                 description = "No description available"
 
-            if self.metadata.filetype != "SOAP":
-                return description
-
-            try:
-                is_masked = dataset.attrs["Masked"]
-            except KeyError:
-                is_masked = False
+            is_masked = dataset.attrs.get("Masked", False)
             if not is_masked:
                 return description + " Not masked."
 
-            # TODO: Update for https://github.com/SWIFTSIM/SOAP/pull/81
-            # TODO: Also need to add metadata for each halo type for that PR
-            # If is_masked is true then these other attributes should exist
             mask_datasets = dataset.attrs["Mask Datasets"]
             mask_threshold = dataset.attrs["Mask Threshold"]
             if len(mask_datasets) == 1:
@@ -1179,6 +1171,9 @@ def partial_snapshot(self) -> bool:
 
         return self.num_files_per_snapshot > 1
     
+    @staticmethod
+    def get_nice_name(group):
+        return metadata.particle_types.particle_name_class[group]
 
 class SWIFTFOFMetadata(SWIFTMetadata):
     masking_valid: bool = False
@@ -1211,6 +1206,9 @@ def present_group_names(self):
         """
         return ["fof_groups"]
     
+    @staticmethod
+    def get_nice_name(group):
+        return "FOFGroups"
 
 class SWIFTSOAPMetadata(SWIFTMetadata):
     masking_valid: bool = True
@@ -1244,4 +1242,8 @@ def present_group_names(self):
         return [
             metadata.soap_types.get_soap_name_underscore(x)
             for x in self.present_groups
-        ]
\ No newline at end of file
+        ]
+
+    @staticmethod
+    def get_nice_name(group):
+        return metadata.soap_types.get_soap_name_nice(group)
diff --git a/swiftsimio/reader.py b/swiftsimio/reader.py
index 2fee88df..6a9f0654 100644
--- a/swiftsimio/reader.py
+++ b/swiftsimio/reader.py
@@ -395,13 +395,7 @@ def generate_datasets(group_metadata: SWIFTGroupMetadata, mask):
     group = group_metadata.group
     group_name = group_metadata.group_name
 
-    # Set nice name for group
-    if group_metadata.metadata.filetype == "snapshot":
-        group_nice_name = metadata.particle_types.particle_name_class[group]
-    elif group_metadata.metadata.filetype == "FOF":
-        group_nice_name = "FOFGroups"
-    elif group_metadata.metadata.filetype == "SOAP":
-        group_nice_name = metadata.soap_types.get_soap_name_nice(group)
+    group_nice_name = group_metadata.metadata.get_nice_name(group)
 
     # Mask is an object that contains all masks for all possible datasets.
     if mask is not None:

From 276f4fed3de98f02392063199aa3683efefd9607 Mon Sep 17 00:00:00 2001
From: Josh Borrow <josh@joshborrow.com>
Date: Wed, 18 Sep 2024 17:12:02 +0200
Subject: [PATCH 4/7] Packaging changes

---
 pyproject.toml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 52dbbecd..8407d060 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,28 +12,27 @@ packages = [
     "swiftsimio.metadata.particle",
     "swiftsimio.metadata.unit",
     "swiftsimio.metadata.writer",
+    "swiftsimio.metadata.soap",
     "swiftsimio.visualisation",
     "swiftsimio.visualisation.projection_backends",
     "swiftsimio.visualisation.slice_backends",
     "swiftsimio.visualisation.tools",
     "swiftsimio.visualisation.smoothing_length",
-    "swiftsimio.metadata.soap"
 ]
 
 [project]
 name = "swiftsimio"
-version="8.0.1"
+version="9.0.0"
 authors = [
     { name="Josh Borrow", email="josh@joshborrow.com" },
 ]
-description="SWIFTsim (swift.dur.ac.uk) i/o routines for python."
+description="SWIFTsim (swiftsim.com) i/o routines for python."
 readme = "README.md"
-requires-python = ">3.8.0"
+requires-python = ">3.10.0"
 classifiers = [
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
     "Operating System :: OS Independent",
 ]

From 828d649e04d14338c52ca69dd93bbc59e1644788 Mon Sep 17 00:00:00 2001
From: Josh Borrow <josh@joshborrow.com>
Date: Wed, 18 Sep 2024 17:19:31 +0200
Subject: [PATCH 5/7] Added optional named column metadata

---
 swiftsimio/metadata/objects.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/swiftsimio/metadata/objects.py b/swiftsimio/metadata/objects.py
index 4624c01b..14928bfd 100644
--- a/swiftsimio/metadata/objects.py
+++ b/swiftsimio/metadata/objects.py
@@ -399,7 +399,10 @@ def load_named_columns(self):
         for field in self.field_paths:
             property_name = field.split("/")[-1]
 
-            if property_name in self.metadata.named_columns.keys():
+            # Not all datasets have named columns
+            named_columns_metadata = getattr(self.metadata, "named_columns", {})
+
+            if property_name in named_columns_metadata.keys():
                 field_names = self.metadata.named_columns[property_name]
 
                 # Now need to make a decision on capitalisation. If we have a set of

From aca50486b32ffb3c61cb72b1a5c855713d9e2d3a Mon Sep 17 00:00:00 2001
From: robjmcgibbon <robjmcgibbon@gmail.com>
Date: Wed, 18 Sep 2024 16:29:05 +0100
Subject: [PATCH 6/7] Add shared cell counts

---
 swiftsimio/__init__.py         | 2 +-
 swiftsimio/masks.py            | 8 ++++----
 swiftsimio/metadata/objects.py | 2 ++
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/swiftsimio/__init__.py b/swiftsimio/__init__.py
index ba19d453..4df92f4d 100644
--- a/swiftsimio/__init__.py
+++ b/swiftsimio/__init__.py
@@ -75,7 +75,7 @@ def mask(filename, spatial_only=True) -> SWIFTMask:
     """
 
     units = SWIFTUnits(filename)
-    metadata = SWIFTMetadata(filename, units)
+    metadata = metadata_discriminator(filename, units)
 
     return SWIFTMask(metadata=metadata, spatial_only=spatial_only)
 
diff --git a/swiftsimio/masks.py b/swiftsimio/masks.py
index 1c65f6cf..e403a4c2 100644
--- a/swiftsimio/masks.py
+++ b/swiftsimio/masks.py
@@ -110,12 +110,12 @@ def _unpack_cell_metadata(self):
         for group, group_name in zip(
             self.metadata.present_groups, self.metadata.present_group_names
         ):
-            if self.metadata.filetype == "SOAP":
-                counts = count_handle["Subhalos"][:]
-                offsets = offset_handle["Subhalos"][:]
-            elif self.metadata.filetype == "snapshot":
+            if self.metadata.shared_cell_counts is None:
                 counts = count_handle[group][:]
                 offsets = offset_handle[group][:]
+            else:
+                counts = count_handle[self.metadata.shared_cell_counts][:]
+                offsets = offset_handle[self.metadata.shared_cell_counts][:]
 
             # When using MPI, we cannot assume that these are sorted.
             if sort is None:
diff --git a/swiftsimio/metadata/objects.py b/swiftsimio/metadata/objects.py
index 14928bfd..95dad3da 100644
--- a/swiftsimio/metadata/objects.py
+++ b/swiftsimio/metadata/objects.py
@@ -586,6 +586,7 @@ class SWIFTMetadata(ABC):
     units: SWIFTUnits
     header: dict
     masking_valid: bool = False
+    shared_cell_counts: str|None = None
 
     @abstractmethod
     def __init__(self, filename, units: SWIFTUnits):
@@ -1215,6 +1216,7 @@ def get_nice_name(group):
 
 class SWIFTSOAPMetadata(SWIFTMetadata):
     masking_valid: bool = True
+    shared_cell_counts: str|None = "Subhalos"
 
     def __init__(self, filename: str, units: SWIFTUnits):
         self.filename = filename

From e6b4a622b928245bdb5052b49072c065ad30be41 Mon Sep 17 00:00:00 2001
From: Josh Borrow <josh@joshborrow.com>
Date: Thu, 19 Sep 2024 08:56:02 +0200
Subject: [PATCH 7/7] Formatting and cleanup

---
 swiftsimio/masks.py            |   4 +-
 swiftsimio/metadata/objects.py | 875 +++++++++++++++++----------------
 swiftsimio/reader.py           |   8 +-
 tests/test_soap.py             |   1 +
 4 files changed, 471 insertions(+), 417 deletions(-)

diff --git a/swiftsimio/masks.py b/swiftsimio/masks.py
index e403a4c2..b3f406ca 100644
--- a/swiftsimio/masks.py
+++ b/swiftsimio/masks.py
@@ -51,7 +51,9 @@ def __init__(self, metadata: SWIFTMetadata, spatial_only=True):
         self.spatial_only = spatial_only
 
         if not self.metadata.masking_valid:
-            raise NotImplementedError(f"Masking not supported for {self.metadata.output_type} filetype")
+            raise NotImplementedError(
+                f"Masking not supported for {self.metadata.output_type} filetype"
+            )
 
         if self.metadata.partial_snapshot:
             raise InvalidSnapshot(
diff --git a/swiftsimio/metadata/objects.py b/swiftsimio/metadata/objects.py
index 95dad3da..e3bd3b00 100644
--- a/swiftsimio/metadata/objects.py
+++ b/swiftsimio/metadata/objects.py
@@ -1,3 +1,11 @@
+"""
+Objects describing the metadata in SWIFTsimIO files. There is a main
+abstract class, ``SWIFTMetadata``, that contains the required base
+methods to correctly represent the internal representation of an
+HDF5 file to what SWIFTsimIO expects to be able to unpack into the
+object notation (e.g. PartType0/Coordinates -> gas.coordinates).
+"""
+
 
 import numpy as np
 import unyt
@@ -16,6 +24,334 @@
 
 from typing import List, Optional
 
+
+class SWIFTMetadata(ABC):
+    """
+    An abstract base class for all SWIFT-related file metadata.
+    """
+
+    # Underlying path to the file that this metadata is associated with.
+    filename: str
+    # The units object associated with this file. All SWIFT metadata objects
+    # must use this units system.
+    units: "SWIFTUnits"
+    # The header dictionary which will later be unpackaged according to the
+    # metadata fields.
+    header: dict
+    # Whether this type of file can be masked or not (this is a fixed parameter
+    # that should probably not be changed at run-time).
+    masking_valid: bool = False
+    # Whether this file uses shared metadata cell counts for all particle types
+    # (as is the case in SOAP) or whether each type (e.g. Gas, Dark Matter, etc.)
+    # has its own top-level cell grid counts.
+    shared_cell_counts: str | None = None
+
+    @abstractmethod
+    def __init__(self, filename, units: "SWIFTUnits"):
+        raise NotImplementedError
+
+    @property
+    def handle(self):
+        # Handle, which is shared with units. Units handles
+        # file opening and closing.
+        return self.units.handle
+
+    def load_groups(self):
+        """
+        Loads the groups and metadata into objects:
+
+            metadata.<group_name>_properties
+
+        This contains eight arrays,
+
+            metadata.<type>_properties.field_names
+            metadata.<type>_properties.field_paths
+            metadata.<type>_properties.field_units
+            metadata.<type>_properties.field_cosmologies
+            metadata.<type>_properties.field_descriptions
+            metadata.<type>_properties.field_compressions
+            metadata.<type>_properties.field_physicals
+            metadata.<type>_properties.field_valid_transforms
+
+        As well as some more information about the group.
+        """
+
+        for group, name in zip(self.present_groups, self.present_group_names):
+            filetype_metadata = SWIFTGroupMetadata(
+                group=group,
+                group_name=name,
+                metadata=self,
+                scale_factor=self.scale_factor,
+            )
+            setattr(self, f"{name}_properties", filetype_metadata)
+
+        return
+
+    def get_metadata(self):
+        """
+        Loads the metadata as specified in metadata.metadata_fields.
+        """
+
+        for field, name in metadata.metadata_fields.metadata_fields_to_read.items():
+            try:
+                setattr(self, name, dict(self.handle[field].attrs))
+            except KeyError:
+                setattr(self, name, None)
+
+        return
+
+    def postprocess_header(self):
+        """
+        Some minor postprocessing on the header to local variables.
+        """
+
+        # These are just read straight in to variables
+        header_unpack_arrays_units = metadata.metadata_fields.generate_units_header_unpack_arrays(
+            m=self.units.mass,
+            l=self.units.length,
+            t=self.units.time,
+            I=self.units.current,
+            T=self.units.temperature,
+        )
+
+        for field, name in metadata.metadata_fields.header_unpack_arrays.items():
+            try:
+                if name in header_unpack_arrays_units.keys():
+                    setattr(
+                        self,
+                        name,
+                        unyt.unyt_array(
+                            self.header[field], units=header_unpack_arrays_units[name]
+                        ),
+                    )
+                    # This is required or we automatically get everything in CGS!
+                    getattr(self, name).convert_to_units(
+                        header_unpack_arrays_units[name]
+                    )
+                else:
+                    # Must not have any units! Oh well.
+                    setattr(self, name, self.header[field])
+            except KeyError:
+                # Must not be present, just skip it
+                continue
+
+        # Now unpack the 'mass table' type items:
+        for field, name in metadata.metadata_fields.header_unpack_mass_tables.items():
+            try:
+                setattr(
+                    self,
+                    name,
+                    MassTable(
+                        base_mass_table=self.header[field], mass_units=self.units.mass
+                    ),
+                )
+            except KeyError:
+                setattr(
+                    self,
+                    name,
+                    MassTable(
+                        base_mass_table=np.zeros(
+                            len(metadata.particle_types.particle_name_underscores)
+                        ),
+                        mass_units=self.units.mass,
+                    ),
+                )
+
+        # These must be unpacked as 'real' strings (i.e. converted to utf-8)
+
+        for field, name in metadata.metadata_fields.header_unpack_string.items():
+            try:
+                # Deal with h5py's quirkiness that fixed-sized and variable-sized
+                # strings are read as strings or bytes
+                # See: https://github.com/h5py/h5py/issues/2172
+                raw = self.header[field]
+                try:
+                    string = raw.decode("utf-8")
+                except AttributeError:
+                    string = raw
+                setattr(self, name, string)
+            except KeyError:
+                # Must not be present, just skip it
+                setattr(self, name, "")
+
+        # These must be unpacked as they are stored as length-1 arrays
+
+        header_unpack_float_units = metadata.metadata_fields.generate_units_header_unpack_single_float(
+            m=self.units.mass,
+            l=self.units.length,
+            t=self.units.time,
+            I=self.units.current,
+            T=self.units.temperature,
+        )
+
+        for field, names in metadata.metadata_fields.header_unpack_single_float.items():
+            try:
+                if isinstance(names, list):
+                    # Sometimes we store a list in case we have multiple names, for example
+                    # Redshift -> metadata.redshift AND metadata.z. Can't just do the iteration
+                    # because we may loop over the letters in the string.
+                    for variable in names:
+                        if variable in header_unpack_float_units.keys():
+                            # We have an associated unit!
+                            unit = header_unpack_float_units[variable]
+                            setattr(
+                                self,
+                                variable,
+                                unyt.unyt_quantity(self.header[field][0], units=unit),
+                            )
+                        else:
+                            # No unit
+                            setattr(self, variable, self.header[field][0])
+                else:
+                    # We can just check for the unit and set the attribute
+                    variable = names
+                    if variable in header_unpack_float_units.keys():
+                        # We have an associated unit!
+                        unit = header_unpack_float_units[variable]
+                        setattr(
+                            self,
+                            variable,
+                            unyt.unyt_quantity(self.header[field][0], units=unit),
+                        )
+                    else:
+                        # No unit
+                        setattr(self, variable, self.header[field][0])
+            except KeyError:
+                # Must not be present, just skip it
+                continue
+
+        # These are special cases, sorry!
+        # Date and time of snapshot dump
+        try:
+            try:
+                # Try and decode bytes, otherwise save raw string
+                snapshot_date = self.header.get(
+                    "SnapshotDate", self.header.get("Snapshot date", b"")
+                ).decode("utf-8")
+            except AttributeError:
+                snapshot_date = self.header.get(
+                    "SnapshotDate", self.header.get("Snapshot date", "")
+                )
+            try:
+                self.snapshot_date = datetime.strptime(
+                    snapshot_date, "%H:%M:%S %Y-%m-%d %Z"
+                )
+            except ValueError:
+                # Backwards compatibility; this was used previously due to simplicity
+                # but is not portable between regions. So if you ran a simulation on
+                # a British (en_GB) machine, and then tried to read on a Dutch
+                # machine (nl_NL), this would _not_ work because %c is different.
+                try:
+                    self.snapshot_date = datetime.strptime(snapshot_date, "%c\n")
+                except ValueError:
+                    # Oh dear this has gone _very_wrong. Let's just keep it as a string.
+                    self.snapshot_date = snapshot_date
+        except KeyError:
+            # Old file
+            pass
+
+        # get photon group edges RT dataset from the SubgridScheme group
+        try:
+            self.photon_group_edges = (
+                self.handle["SubgridScheme/PhotonGroupEdges"][:] / self.units.time
+            )
+        except KeyError:
+            self.photon_group_edges = None
+
+        # get reduced speed of light RT dataset from the SubgridScheme group
+        try:
+            self.reduced_lightspeed = (
+                self.handle["SubgridScheme/ReducedLightspeed"][0]
+                * self.units.length
+                / self.units.time
+            )
+        except KeyError:
+            self.reduced_lightspeed = None
+
+        # Store these separately as self.n_gas = number of gas particles for example
+        for (part_number, (_, part_name)) in enumerate(
+            metadata.particle_types.particle_name_underscores.items()
+        ):
+            try:
+                setattr(self, f"n_{part_name}", self.num_part[part_number])
+            except IndexError:
+                # Backwards compatibility; mass/number table can change size.
+                setattr(self, f"n_{part_name}", 0)
+
+        # Need to unpack the gas gamma for cosmology
+        try:
+            self.gas_gamma = self.hydro_scheme["Adiabatic index"]
+        except (KeyError, TypeError):
+            # We can set a default and print a message whenever we require this value
+            self.gas_gamma = None
+
+        try:
+            self.a = self.scale_factor
+        except AttributeError:
+            # These must always be present for the initialisation of cosmology properties
+            self.a = 1.0
+            self.scale_factor = 1.0
+
+        return
+
+    def extract_cosmology(self):
+        """
+        Creates an astropy.cosmology object from the internal cosmology system.
+
+        This will be saved as ``self.cosmology``.
+        """
+
+        if self.cosmology_raw is not None:
+            cosmo = self.cosmology_raw
+        else:
+            cosmo = {"Cosmological run": 0}
+
+        if cosmo.get("Cosmological run", 0):
+            self.cosmology = swift_cosmology_to_astropy(cosmo, units=self.units)
+        else:
+            self.cosmology = None
+
+        return
+
+    @property
+    @abstractmethod
+    def present_groups(self) -> list[str]:
+        """
+        A property giving the present particle groups in the file to be unpackaged
+        into top-level properties. For instance, in a regular snapshot, this would be
+        ["PartType0", "PartType1", "PartType4", ...]. In SOAP, this would be
+        ["SO/200_crit", "SO/200_mean", ...], i.e. one per aperture. 
+        """
+        raise NotImplementedError
+
+    @property
+    @abstractmethod
+    def present_group_names(self) -> list[str]:
+        """
+        A property giving the mapping for the names in ``present_groups`` to what the
+        objects are called on the SWIFTsimIO objects. For instance, in a regular snapshot,
+        this would be ["gas", "dark_matter", "stars", ...]. In SOAP, this would be
+        ["spherical_overdensity_200_crit", ...].
+        """
+        raise NotImplementedError
+
+    @property
+    def partial_snapshot(self) -> bool:
+        """
+        A property defining whether this is a partial snapshot (e.g. a `.0.hdf5` file) or
+        a full/virtual snapsoht covering all particles. This must be computed at run-time.
+        """
+        return False
+
+    @staticmethod
+    @abstractmethod
+    def get_nice_name(group: str) -> str:
+        """
+        Converts the group name to a 'nice name' (i.e. for printing) for the SWIFTsimIO objects.
+        """
+        raise NotImplementedError
+
+
 class MassTable(object):
     """
     Extracts a mass table to local variables based on the
@@ -114,7 +450,6 @@ def __str__(self):
 
     def __repr__(self):
         return f"{self.__str__()}. Raw data: " "\n" f"{self.data}."
-    
 
 
 class SWIFTGroupMetadata(object):
@@ -148,7 +483,11 @@ class SWIFTGroupMetadata(object):
     """
 
     def __init__(
-        self, group: str, group_name: str, metadata: "SWIFTMetadata", scale_factor: float
+        self,
+        group: str,
+        group_name: str,
+        metadata: "SWIFTMetadata",
+        scale_factor: float,
     ):
         """
         Constructor for SWIFTGroupMetadata class
@@ -460,430 +799,128 @@ def __init__(self, filename: Path, handle: Optional[h5py.File] = None):
         """
         SWIFTUnits constructor
 
-        Sets filename for file to read units from and gets unit dictionary
-
-        Parameters
-        ----------
-
-        filename : Path
-            Name of file to read units from
-
-        handle: h5py.File, optional
-            The h5py file handle, optional. Will open a new handle with the
-            filename if required.
-
-        """
-        self.filename = filename
-        self._handle = handle
-
-        self.get_unit_dictionary()
-
-        return
-
-    @property
-    def handle(self):
-        """
-        Property that gets the file handle, which can be shared
-        with other objects for efficiency reasons.
-        """
-        if isinstance(self._handle, h5py.File):
-            # Can be open or closed, let's test.
-            try:
-                file = self._handle.file
-
-                return self._handle
-            except ValueError:
-                # This will be the case if there is no active file handle
-                pass
-
-        self._handle = h5py.File(self.filename, "r")
-
-        return self._handle
-
-    def get_unit_dictionary(self):
-        """
-        Store unit data and metadata
-
-        Length 1 arrays are used to store the unit data. This dictionary
-        also contains the metadata information that connects the unyt
-        objects to the names that are stored in the SWIFT snapshots.
-        """
-
-        self.units = {
-            name: unyt.unyt_quantity(
-                value[0], units=metadata.unit_types.unit_names_to_unyt[name]
-            )
-            for name, value in self.handle["Units"].attrs.items()
-        }
-
-        # We now unpack this into variables.
-        self.mass = metadata.unit_types.find_nearest_base_unit(
-            self.units["Unit mass in cgs (U_M)"], "mass"
-        )
-        self.length = metadata.unit_types.find_nearest_base_unit(
-            self.units["Unit length in cgs (U_L)"], "length"
-        )
-        self.time = metadata.unit_types.find_nearest_base_unit(
-            self.units["Unit time in cgs (U_t)"], "time"
-        )
-        self.current = metadata.unit_types.find_nearest_base_unit(
-            self.units["Unit current in cgs (U_I)"], "current"
-        )
-        self.temperature = metadata.unit_types.find_nearest_base_unit(
-            self.units["Unit temperature in cgs (U_T)"], "temperature"
-        )
-
-    def __del__(self):
-        if isinstance(self._handle, h5py.File):
-            self._handle.close()
-
-
-
-def metadata_discriminator(filename: str, units: SWIFTUnits) -> "SWIFTMetadata":
-    """
-    Discriminates between the different types of metadata objects read from SWIFT-compatible
-    files.
-
-    Parameters
-    ----------
-
-    filename : str
-        Name of the file to read metadata from
-
-    units : SWIFTUnits
-        The units object associated with the file
-
-    
-    Returns
-    -------
-
-    SWIFTMetadata
-        The appropriate metadata object for the file type
-    """
-    # Old snapshots did not have this attribute, so we need to default to FullVolume
-    file_type = units.handle["Header"].attrs.get("OutputType", "FullVolume")
-
-    if isinstance(file_type, bytes):
-        file_type = file_type.decode("utf-8")
-
-    if file_type in ["FullVolume"]:
-        return SWIFTSnapshotMetadata(filename, units)
-    elif file_type in ["SOAP"]:
-        return SWIFTSOAPMetadata(filename, units)
-    elif file_type in ["FOF"]:
-        return SWIFTFOFMetadata(filename, units)
-    else:
-        raise ValueError(f"File type {file_type} not recognised.")
-    
-
-
-class SWIFTMetadata(ABC):
-    """
-    An abstract base class for all SWIFT-related file metadata.
-    """
-
-    filename: str
-    units: SWIFTUnits
-    header: dict
-    masking_valid: bool = False
-    shared_cell_counts: str|None = None
-
-    @abstractmethod
-    def __init__(self, filename, units: SWIFTUnits):
-        raise NotImplementedError
-
-    @property
-    def handle(self):
-        # Handle, which is shared with units. Units handles
-        # file opening and closing.
-        return self.units.handle
-
-    def load_groups(self):
-        """
-        Loads the groups and metadata into objects:
-
-            metadata.<group_name>_properties
-
-        This contains eight arrays,
-
-            metadata.<type>_properties.field_names
-            metadata.<type>_properties.field_paths
-            metadata.<type>_properties.field_units
-            metadata.<type>_properties.field_cosmologies
-            metadata.<type>_properties.field_descriptions
-            metadata.<type>_properties.field_compressions
-            metadata.<type>_properties.field_physicals
-            metadata.<type>_properties.field_valid_transforms
-
-        As well as some more information about the group.
-        """
-
-        for group, name in zip(self.present_groups, self.present_group_names):
-            filetype_metadata = SWIFTGroupMetadata(
-                group=group,
-                group_name=name,
-                metadata=self,
-                scale_factor=self.scale_factor,
-            )
-            setattr(self, f"{name}_properties", filetype_metadata)
-
-        return
-    
-    def get_metadata(self):
-        """
-        Loads the metadata as specified in metadata.metadata_fields.
-        """
-
-        for field, name in metadata.metadata_fields.metadata_fields_to_read.items():
-            try:
-                setattr(self, name, dict(self.handle[field].attrs))
-            except KeyError:
-                setattr(self, name, None)
-
-        return
-    
-    def postprocess_header(self):
-        """
-        Some minor postprocessing on the header to local variables.
-        """
-
-        # These are just read straight in to variables
-        header_unpack_arrays_units = metadata.metadata_fields.generate_units_header_unpack_arrays(
-            m=self.units.mass,
-            l=self.units.length,
-            t=self.units.time,
-            I=self.units.current,
-            T=self.units.temperature,
-        )
-
-        for field, name in metadata.metadata_fields.header_unpack_arrays.items():
-            try:
-                if name in header_unpack_arrays_units.keys():
-                    setattr(
-                        self,
-                        name,
-                        unyt.unyt_array(
-                            self.header[field], units=header_unpack_arrays_units[name]
-                        ),
-                    )
-                    # This is required or we automatically get everything in CGS!
-                    getattr(self, name).convert_to_units(
-                        header_unpack_arrays_units[name]
-                    )
-                else:
-                    # Must not have any units! Oh well.
-                    setattr(self, name, self.header[field])
-            except KeyError:
-                # Must not be present, just skip it
-                continue
-
-        # Now unpack the 'mass table' type items:
-        for field, name in metadata.metadata_fields.header_unpack_mass_tables.items():
-            try:
-                setattr(
-                    self,
-                    name,
-                    MassTable(
-                        base_mass_table=self.header[field], mass_units=self.units.mass
-                    ),
-                )
-            except KeyError:
-                setattr(
-                    self,
-                    name,
-                    MassTable(
-                        base_mass_table=np.zeros(
-                            len(metadata.particle_types.particle_name_underscores)
-                        ),
-                        mass_units=self.units.mass,
-                    ),
-                )
-
-        # These must be unpacked as 'real' strings (i.e. converted to utf-8)
-
-        for field, name in metadata.metadata_fields.header_unpack_string.items():
-            try:
-                # Deal with h5py's quirkiness that fixed-sized and variable-sized
-                # strings are read as strings or bytes
-                # See: https://github.com/h5py/h5py/issues/2172
-                raw = self.header[field]
-                try:
-                    string = raw.decode("utf-8")
-                except AttributeError:
-                    string = raw
-                setattr(self, name, string)
-            except KeyError:
-                # Must not be present, just skip it
-                setattr(self, name, "")
-
-        # These must be unpacked as they are stored as length-1 arrays
+        Sets filename for file to read units from and gets unit dictionary
 
-        header_unpack_float_units = metadata.metadata_fields.generate_units_header_unpack_single_float(
-            m=self.units.mass,
-            l=self.units.length,
-            t=self.units.time,
-            I=self.units.current,
-            T=self.units.temperature,
-        )
+        Parameters
+        ----------
 
-        for field, names in metadata.metadata_fields.header_unpack_single_float.items():
-            try:
-                if isinstance(names, list):
-                    # Sometimes we store a list in case we have multiple names, for example
-                    # Redshift -> metadata.redshift AND metadata.z. Can't just do the iteration
-                    # because we may loop over the letters in the string.
-                    for variable in names:
-                        if variable in header_unpack_float_units.keys():
-                            # We have an associated unit!
-                            unit = header_unpack_float_units[variable]
-                            setattr(
-                                self,
-                                variable,
-                                unyt.unyt_quantity(self.header[field][0], units=unit),
-                            )
-                        else:
-                            # No unit
-                            setattr(self, variable, self.header[field][0])
-                else:
-                    # We can just check for the unit and set the attribute
-                    variable = names
-                    if variable in header_unpack_float_units.keys():
-                        # We have an associated unit!
-                        unit = header_unpack_float_units[variable]
-                        setattr(
-                            self,
-                            variable,
-                            unyt.unyt_quantity(self.header[field][0], units=unit),
-                        )
-                    else:
-                        # No unit
-                        setattr(self, variable, self.header[field][0])
-            except KeyError:
-                # Must not be present, just skip it
-                continue
+        filename : Path
+            Name of file to read units from
 
-        # These are special cases, sorry!
-        # Date and time of snapshot dump
-        try:
-            try:
-                # Try and decode bytes, otherwise save raw string
-                snapshot_date = self.header.get(
-                    "SnapshotDate", self.header.get("Snapshot date", b"")
-                ).decode("utf-8")
-            except AttributeError:
-                snapshot_date = self.header.get(
-                    "SnapshotDate", self.header.get("Snapshot date", "")
-                )
-            try:
-                self.snapshot_date = datetime.strptime(
-                    snapshot_date, "%H:%M:%S %Y-%m-%d %Z"
-                )
-            except ValueError:
-                # Backwards compatibility; this was used previously due to simplicity
-                # but is not portable between regions. So if you ran a simulation on
-                # a British (en_GB) machine, and then tried to read on a Dutch
-                # machine (nl_NL), this would _not_ work because %c is different.
-                try:
-                    self.snapshot_date = datetime.strptime(snapshot_date, "%c\n")
-                except ValueError:
-                    # Oh dear this has gone _very_wrong. Let's just keep it as a string.
-                    self.snapshot_date = snapshot_date
-        except KeyError:
-            # Old file
-            pass
+        handle: h5py.File, optional
+            The h5py file handle, optional. Will open a new handle with the
+            filename if required.
 
-        # get photon group edges RT dataset from the SubgridScheme group
-        try:
-            self.photon_group_edges = (
-                self.handle["SubgridScheme/PhotonGroupEdges"][:] / self.units.time
-            )
-        except KeyError:
-            self.photon_group_edges = None
+        """
+        self.filename = filename
+        self._handle = handle
 
-        # get reduced speed of light RT dataset from the SubgridScheme group
-        try:
-            self.reduced_lightspeed = (
-                self.handle["SubgridScheme/ReducedLightspeed"][0]
-                * self.units.length
-                / self.units.time
-            )
-        except KeyError:
-            self.reduced_lightspeed = None
+        self.get_unit_dictionary()
 
-        # Store these separately as self.n_gas = number of gas particles for example
-        for (part_number, (_, part_name)) in enumerate(
-            metadata.particle_types.particle_name_underscores.items()
-        ):
+        return
+
+    @property
+    def handle(self):
+        """
+        Property that gets the file handle, which can be shared
+        with other objects for efficiency reasons.
+        """
+        if isinstance(self._handle, h5py.File):
+            # Can be open or closed, let's test.
             try:
-                setattr(self, f"n_{part_name}", self.num_part[part_number])
-            except IndexError:
-                # Backwards compatibility; mass/number table can change size.
-                setattr(self, f"n_{part_name}", 0)
+                file = self._handle.file
 
-        # Need to unpack the gas gamma for cosmology
-        try:
-            self.gas_gamma = self.hydro_scheme["Adiabatic index"]
-        except (KeyError, TypeError):
-            # We can set a default and print a message whenever we require this value
-            self.gas_gamma = None
+                return self._handle
+            except ValueError:
+                # This will be the case if there is no active file handle
+                pass
 
-        try:
-            self.a = self.scale_factor
-        except AttributeError:
-            # These must always be present for the initialisation of cosmology properties
-            self.a = 1.0
-            self.scale_factor = 1.0
+        self._handle = h5py.File(self.filename, "r")
 
-        return
-    
+        return self._handle
 
-    def extract_cosmology(self):
+    def get_unit_dictionary(self):
         """
-        Creates an astropy.cosmology object from the internal cosmology system.
+        Store unit data and metadata
 
-        This will be saved as ``self.cosmology``.
+        Length 1 arrays are used to store the unit data. This dictionary
+        also contains the metadata information that connects the unyt
+        objects to the names that are stored in the SWIFT snapshots.
         """
 
-        if self.cosmology_raw is not None:
-            cosmo = self.cosmology_raw
-        else:
-            cosmo = {"Cosmological run": 0}
+        self.units = {
+            name: unyt.unyt_quantity(
+                value[0], units=metadata.unit_types.unit_names_to_unyt[name]
+            )
+            for name, value in self.handle["Units"].attrs.items()
+        }
 
-        if cosmo.get("Cosmological run", 0):
-            self.cosmology = swift_cosmology_to_astropy(cosmo, units=self.units)
-        else:
-            self.cosmology = None
+        # We now unpack this into variables.
+        self.mass = metadata.unit_types.find_nearest_base_unit(
+            self.units["Unit mass in cgs (U_M)"], "mass"
+        )
+        self.length = metadata.unit_types.find_nearest_base_unit(
+            self.units["Unit length in cgs (U_L)"], "length"
+        )
+        self.time = metadata.unit_types.find_nearest_base_unit(
+            self.units["Unit time in cgs (U_t)"], "time"
+        )
+        self.current = metadata.unit_types.find_nearest_base_unit(
+            self.units["Unit current in cgs (U_I)"], "current"
+        )
+        self.temperature = metadata.unit_types.find_nearest_base_unit(
+            self.units["Unit temperature in cgs (U_T)"], "temperature"
+        )
+
+    def __del__(self):
+        if isinstance(self._handle, h5py.File):
+            self._handle.close()
+
+
+def metadata_discriminator(filename: str, units: SWIFTUnits) -> "SWIFTMetadata":
+    """
+    Discriminates between the different types of metadata objects read from SWIFT-compatible
+    files.
+
+    Parameters
+    ----------
+
+    filename : str
+        Name of the file to read metadata from
+
+    units : SWIFTUnits
+        The units object associated with the file
 
-        return
-    
-    @property
-    @abstractmethod
-    def present_groups(self):
-        raise NotImplementedError
-    
-    @property
-    @abstractmethod
-    def present_group_names(self):
-        raise NotImplementedError
     
-    @property
-    def partial_snapshot(self) -> bool:
-        return False
+    Returns
+    -------
 
-class SWIFTSnapshotMetadata(SWIFTMetadata):
+    SWIFTMetadata
+        The appropriate metadata object for the file type
     """
-    Loads all metadata (apart from Units, those are handled by SWIFTUnits)
-    into dictionaries.
+    # Old snapshots did not have this attribute, so we need to default to FullVolume
+    file_type = units.handle["Header"].attrs.get("OutputType", "FullVolume")
+
+    if isinstance(file_type, bytes):
+        file_type = file_type.decode("utf-8")
+
+    if file_type in ["FullVolume"]:
+        return SWIFTSnapshotMetadata(filename, units)
+    elif file_type in ["SOAP"]:
+        return SWIFTSOAPMetadata(filename, units)
+    elif file_type in ["FOF"]:
+        return SWIFTFOFMetadata(filename, units)
+    else:
+        raise ValueError(f"File type {file_type} not recognised.")
+
 
-    This also does some extra parsing on some well-used metadata.
+class SWIFTSnapshotMetadata(SWIFTMetadata):
+    """
+    SWIFT Metadata for a snapshot-style file containing particle
+    information. For more documentation, see the main :cls:`SWIFTMetadata`
+    class.
     """
 
-    # Name of the file that has been read from
-    filename: str
-    # Unit instance associated with this file
-    units: SWIFTUnits
-    # Header dictionary, metadata about snapshot.
-    header: dict
     masking_valid: bool = True
 
     def __init__(self, filename, units: SWIFTUnits):
@@ -1002,7 +1039,6 @@ def convert(name):
 
         return
 
-
     @property
     def present_groups(self):
         """
@@ -1174,13 +1210,18 @@ def partial_snapshot(self) -> bool:
         # collating multiple sub-snapshots together have num_files_per_snapshot = 1.
 
         return self.num_files_per_snapshot > 1
-    
+
     @staticmethod
     def get_nice_name(group):
         return metadata.particle_types.particle_name_class[group]
 
+
 class SWIFTFOFMetadata(SWIFTMetadata):
-    masking_valid: bool = False
+    """
+    SWIFT Metadata for a snapshot-style file containing particle
+    information. For more documentation, see the main :cls:`SWIFTMetadata`
+    class.
+    """
 
     def __init__(self, filename: str, units: SWIFTUnits):
         self.filename = filename
@@ -1195,28 +1236,35 @@ def __init__(self, filename: str, units: SWIFTUnits):
         self.handle.close()
 
         return
-    
+
     @property
     def present_groups(self):
         """
         The groups containing datasets that are present in the file.
         """
         return ["Groups"]
-    
+
     @property
     def present_group_names(self):
         """
         The names of the groups that we want to expose.
         """
         return ["fof_groups"]
-    
+
     @staticmethod
     def get_nice_name(group):
         return "FOFGroups"
 
+
 class SWIFTSOAPMetadata(SWIFTMetadata):
+    """
+    SWIFT Metadata for a snapshot-style file containing particle
+    information. For more documentation, see the main :cls:`SWIFTMetadata`
+    class.
+    """
+
     masking_valid: bool = True
-    shared_cell_counts: str|None = "Subhalos"
+    shared_cell_counts: str = "Subhalos"
 
     def __init__(self, filename: str, units: SWIFTUnits):
         self.filename = filename
@@ -1231,22 +1279,21 @@ def __init__(self, filename: str, units: SWIFTUnits):
         self.handle.close()
 
         return
-    
+
     @property
     def present_groups(self):
         """
         The groups containing datasets that are present in the file.
         """
         return self.subhalo_types
-    
+
     @property
     def present_group_names(self):
         """
         The names of the groups that we want to expose.
         """
         return [
-            metadata.soap_types.get_soap_name_underscore(x)
-            for x in self.present_groups
+            metadata.soap_types.get_soap_name_underscore(x) for x in self.present_groups
         ]
 
     @staticmethod
diff --git a/swiftsimio/reader.py b/swiftsimio/reader.py
index 6a9f0654..cc06f571 100644
--- a/swiftsimio/reader.py
+++ b/swiftsimio/reader.py
@@ -15,7 +15,12 @@
 from swiftsimio.accelerated import read_ranges_from_file
 from swiftsimio.objects import cosmo_array, cosmo_factor, a
 
-from swiftsimio.metadata.objects import metadata_discriminator, SWIFTUnits, SWIFTGroupMetadata, SWIFTMetadata
+from swiftsimio.metadata.objects import (
+    metadata_discriminator,
+    SWIFTUnits,
+    SWIFTGroupMetadata,
+    SWIFTMetadata,
+)
 
 import re
 import h5py
@@ -29,7 +34,6 @@
 from typing import Union, Callable, List, Optional
 
 
-
 def generate_getter(
     filename,
     name: str,
diff --git a/tests/test_soap.py b/tests/test_soap.py
index f835474b..0a6a4b0d 100644
--- a/tests/test_soap.py
+++ b/tests/test_soap.py
@@ -6,6 +6,7 @@
 
 from swiftsimio import load
 
+
 @requires("soap_example.hdf5")
 def test_soap_can_load(filename):
     data = load(filename)