unit table descriptions for phy and kilosort (#1053)

catalystneuro · Sep 10, 2024 · 0aa9087 · 0aa9087
1 parent ab37a4e
commit 0aa9087
Show file tree

Hide file tree

Showing 4 changed files with 156 additions and 0 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,6 +16,7 @@
 ### Improvements
 * Using ruff to enforce existence of public classes' docstrings [PR #1034](https://github.com/catalystneuro/neuroconv/pull/1034)
 * Separated tests that use external data by modality [PR #1049](https://github.com/catalystneuro/neuroconv/pull/1049)
+* Added Unit Table descriptions for phy and kilosort: [PR #1053](https://github.com/catalystneuro/neuroconv/pull/1053)
 * Using ruff to enforce existence of public functions's docstrings [PR #1062](https://github.com/catalystneuro/neuroconv/pull/1062)
 * Improved device metadata of `IntanRecordingInterface` by adding the type of controller used [PR #1059](https://github.com/catalystneuro/neuroconv/pull/1059)
 

diff --git a/src/neuroconv/datainterfaces/ecephys/kilosort/kilosortdatainterface.py b/src/neuroconv/datainterfaces/ecephys/kilosort/kilosortdatainterface.py
@@ -36,3 +36,30 @@ def __init__(
         verbose: bool, default: True
         """
         super().__init__(folder_path=folder_path, keep_good_only=keep_good_only, verbose=verbose)
+
+    def get_metadata(self):
+        metadata = super().get_metadata()
+        # See Kilosort save_to_phy() docstring for more info on these fields: https://github.com/MouseLand/Kilosort/blob/main/kilosort/io.py
+        # Or see phy documentation: https://github.com/cortex-lab/phy/blob/master/phy/apps/base.py
+        metadata["Ecephys"]["UnitProperties"] = [
+            dict(name="n_spikes", description="Number of spikes recorded from each unit."),
+            dict(name="fr", description="Average firing rate of each unit."),
+            dict(name="depth", description="Estimated depth of each unit in micrometers."),
+            dict(name="Amplitude", description="Per-template amplitudes, computed as the L2 norm of the template."),
+            dict(
+                name="ContamPct",
+                description="Contamination rate for each template, computed as fraction of refractory period violations relative to expectation based on a Poisson process.",
+            ),
+            dict(
+                name="KSLabel",
+                description="Label indicating whether each template is 'mua' (multi-unit activity) or 'good' (refractory).",
+            ),
+            dict(name="original_cluster_id", description="Original cluster ID assigned by Kilosort."),
+            dict(
+                name="amp",
+                description="For every template, the maximum amplitude of the template waveforms across all channels.",
+            ),
+            dict(name="ch", description="The channel label of the best channel, as defined by the user."),
+            dict(name="sh", description="The shank label of the best channel."),
+        ]
+        return metadata
diff --git a/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py b/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py
@@ -43,3 +43,30 @@ def __init__(
         verbose : bool, default: True
         """
         super().__init__(folder_path=folder_path, exclude_cluster_groups=exclude_cluster_groups, verbose=verbose)
+
+    def get_metadata(self):
+        metadata = super().get_metadata()
+        # See Kilosort save_to_phy() docstring for more info on these fields: https://github.com/MouseLand/Kilosort/blob/main/kilosort/io.py
+        # Or see phy documentation: https://github.com/cortex-lab/phy/blob/master/phy/apps/base.py
+        metadata["Ecephys"]["UnitProperties"] = [
+            dict(name="n_spikes", description="Number of spikes recorded from each unit."),
+            dict(name="fr", description="Average firing rate of each unit."),
+            dict(name="depth", description="Estimated depth of each unit in micrometers."),
+            dict(name="Amplitude", description="Per-template amplitudes, computed as the L2 norm of the template."),
+            dict(
+                name="ContamPct",
+                description="Contamination rate for each template, computed as fraction of refractory period violations relative to expectation based on a Poisson process.",
+            ),
+            dict(
+                name="KSLabel",
+                description="Label indicating whether each template is 'mua' (multi-unit activity) or 'good' (refractory).",
+            ),
+            dict(name="original_cluster_id", description="Original cluster ID assigned by Kilosort."),
+            dict(
+                name="amp",
+                description="For every template, the maximum amplitude of the template waveforms across all channels.",
+            ),
+            dict(name="ch", description="The channel label of the best channel, as defined by the user."),
+            dict(name="sh", description="The shank label of the best channel."),
+        ]
+        return metadata
diff --git a/tests/test_on_data/ecephys/test_sorting_interfaces.py b/tests/test_on_data/ecephys/test_sorting_interfaces.py
@@ -7,6 +7,7 @@
     BlackrockRecordingInterface,
     BlackrockSortingInterface,
     CellExplorerSortingInterface,
+    KiloSortSortingInterface,
     NeuralynxSortingInterface,
     NeuroScopeSortingInterface,
     PhySortingInterface,
@@ -193,6 +194,106 @@ class TestPhySortingInterface(SortingExtractorInterfaceTestMixin):
     interface_kwargs = dict(folder_path=str(DATA_PATH / "phy" / "phy_example_0"))
     save_directory = OUTPUT_PATH
 
+    def check_extracted_metadata(self, metadata: dict):
+        assert metadata["Ecephys"]["UnitProperties"] == [
+            dict(name="n_spikes", description="Number of spikes recorded from each unit."),
+            dict(name="fr", description="Average firing rate of each unit."),
+            dict(name="depth", description="Estimated depth of each unit in micrometers."),
+            dict(name="Amplitude", description="Per-template amplitudes, computed as the L2 norm of the template."),
+            dict(
+                name="ContamPct",
+                description="Contamination rate for each template, computed as fraction of refractory period violations relative to expectation based on a Poisson process.",
+            ),
+            dict(
+                name="KSLabel",
+                description="Label indicating whether each template is 'mua' (multi-unit activity) or 'good' (refractory).",
+            ),
+            dict(name="original_cluster_id", description="Original cluster ID assigned by Kilosort."),
+            dict(
+                name="amp",
+                description="For every template, the maximum amplitude of the template waveforms across all channels.",
+            ),
+            dict(name="ch", description="The channel label of the best channel, as defined by the user."),
+            dict(name="sh", description="The shank label of the best channel."),
+        ]
+
+    def check_units_table_propagation(self):
+        metadata = self.interface.get_metadata()
+        if "session_start_time" not in metadata["NWBFile"]:
+            metadata["NWBFile"].update(session_start_time=datetime.now().astimezone())
+        nwbfile = self.interface.create_nwbfile(metadata=metadata, **self.conversion_options)
+
+        # example data does not contain n_spikes, fr, depth, amp, ch, and sh
+        assert (
+            nwbfile.units["Amplitude"].description
+            == "Per-template amplitudes, computed as the L2 norm of the template."
+        )
+        assert (
+            nwbfile.units["ContamPct"].description
+            == "Contamination rate for each template, computed as fraction of refractory period violations relative to expectation based on a Poisson process."
+        )
+        assert (
+            nwbfile.units["KSLabel"].description
+            == "Label indicating whether each template is 'mua' (multi-unit activity) or 'good' (refractory)."
+        )
+        assert nwbfile.units["original_cluster_id"].description == "Original cluster ID assigned by Kilosort."
+
+    def run_custom_checks(self):
+        self.check_units_table_propagation()
+
+
+class TestKilosortSortingInterface(SortingExtractorInterfaceTestMixin):
+    data_interface_cls = KiloSortSortingInterface
+    interface_kwargs = dict(folder_path=str(DATA_PATH / "phy" / "phy_example_0"))
+    save_directory = OUTPUT_PATH
+
+    def check_extracted_metadata(self, metadata: dict):
+        assert metadata["Ecephys"]["UnitProperties"] == [
+            dict(name="n_spikes", description="Number of spikes recorded from each unit."),
+            dict(name="fr", description="Average firing rate of each unit."),
+            dict(name="depth", description="Estimated depth of each unit in micrometers."),
+            dict(name="Amplitude", description="Per-template amplitudes, computed as the L2 norm of the template."),
+            dict(
+                name="ContamPct",
+                description="Contamination rate for each template, computed as fraction of refractory period violations relative to expectation based on a Poisson process.",
+            ),
+            dict(
+                name="KSLabel",
+                description="Label indicating whether each template is 'mua' (multi-unit activity) or 'good' (refractory).",
+            ),
+            dict(name="original_cluster_id", description="Original cluster ID assigned by Kilosort."),
+            dict(
+                name="amp",
+                description="For every template, the maximum amplitude of the template waveforms across all channels.",
+            ),
+            dict(name="ch", description="The channel label of the best channel, as defined by the user."),
+            dict(name="sh", description="The shank label of the best channel."),
+        ]
+
+    def check_units_table_propagation(self):
+        metadata = self.interface.get_metadata()
+        if "session_start_time" not in metadata["NWBFile"]:
+            metadata["NWBFile"].update(session_start_time=datetime.now().astimezone())
+        nwbfile = self.interface.create_nwbfile(metadata=metadata, **self.conversion_options)
+
+        # example data does not contain n_spikes, fr, depth, amp, ch, and sh
+        assert (
+            nwbfile.units["Amplitude"].description
+            == "Per-template amplitudes, computed as the L2 norm of the template."
+        )
+        assert (
+            nwbfile.units["ContamPct"].description
+            == "Contamination rate for each template, computed as fraction of refractory period violations relative to expectation based on a Poisson process."
+        )
+        assert (
+            nwbfile.units["KSLabel"].description
+            == "Label indicating whether each template is 'mua' (multi-unit activity) or 'good' (refractory)."
+        )
+        assert nwbfile.units["original_cluster_id"].description == "Original cluster ID assigned by Kilosort."
+
+    def run_custom_checks(self):
+        self.check_units_table_propagation()
+
 
 class TestPlexonSortingInterface(SortingExtractorInterfaceTestMixin):
     data_interface_cls = PlexonSortingInterface