Key length solve: Reduce varchars (#664)

* Contrib doc edits * #531 * #532: PR Template * Fix numbering * Typo * #658 config. Start to phase out config dir * Update changelog * Add docstrings. Separate sql content from funcs * #630 - Reduce varchar approach * More varchar edits * Fix typo. Note all Reduce Varchar changes * Fix linting * Blackify * Update changelog, citation * typo * Remove unwanted import
LorenFrankLab · Nov 8, 2023 · 3ce16f6 · 3ce16f6
1 parent 1e66ad4
commit 3ce16f6
Show file tree

Hide file tree

Showing 17 changed files with 86 additions and 41 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,11 +1,12 @@
 # Change Log
 
-## [Unreleased]
+## [0.4.3] (November 7, 2023)
 
 - Migrate `config` helper scripts to Spyglass codebase. #662
 - Revise contribution guidelines. #655
 - Minor bug fixes. #656, #657, #659, #651, #671
 - Add setup instruction specificity.
+- Reduce primary key varchar allocation aross may tables. #664
 
 ## [0.4.2] (October 10, 2023)
 

diff --git a/CITATION.cff b/CITATION.cff
@@ -10,6 +10,6 @@ authors:
 - family-names: "Frank"
   given-names: "Loren"
 title: "spyglass"
-version: 0.4.2
-date-released: 2023-10-10
+version: 0.4.3
+date-released: 2023-11-07
 url: "https://github.com/LorenFrankLab/spyglass"
diff --git a/src/spyglass/common/common_interval.py b/src/spyglass/common/common_interval.py
@@ -18,11 +18,13 @@ class IntervalList(dj.Manual):
     definition = """
     # Time intervals used for analysis
     -> Session
-    interval_list_name: varchar(200)  # descriptive name of this interval list
+    interval_list_name: varchar(170)  # descriptive name of this interval list
     ---
     valid_times: longblob  # numpy array with start/end times for each interval
     """
 
+    # See #630, #664. Excessive key length.
+
     @classmethod
     def insert_from_nwbfile(cls, nwbf, *, nwb_file_name):
         """Add each entry in the NWB file epochs table to the IntervalList.

diff --git a/src/spyglass/common/common_nwbfile.py b/src/spyglass/common/common_nwbfile.py
@@ -42,14 +42,16 @@
 class Nwbfile(dj.Manual):
     definition = """
     # Table for holding the NWB files.
-    nwb_file_name: varchar(255)   # name of the NWB file
+    nwb_file_name: varchar(64)   # name of the NWB file
     ---
     nwb_file_abs_path: filepath@raw
     INDEX (nwb_file_abs_path)
     """
     # NOTE the INDEX above is implicit from filepath@... above but needs to be explicit
     # so that alter() can work
 
+    # NOTE: See #630, #664. Excessive key length.
+
     @classmethod
     def insert_from_relative_file_name(cls, nwb_file_name):
         """Insert a new session from an existing NWB file.
@@ -149,7 +151,7 @@ def cleanup(delete_files=False):
 class AnalysisNwbfile(dj.Manual):
     definition = """
     # Table for holding the NWB files that contain results of analysis, such as spike sorting.
-    analysis_file_name: varchar(255)               # name of the file
+    analysis_file_name: varchar(64)               # name of the file
     ---
     -> Nwbfile                                     # name of the parent NWB file. Used for naming and metadata copy
     analysis_file_abs_path: filepath@analysis      # the full path to the file
@@ -161,6 +163,8 @@ class AnalysisNwbfile(dj.Manual):
     # NOTE the INDEX above is implicit from filepath@... above but needs to be explicit
     # so that alter() can work
 
+    # See #630, #664. Excessive key length.
+
     def create(self, nwb_file_name):
         """Open the NWB file, create a copy, write the copy to disk and return the name of the new file.
 

diff --git a/src/spyglass/common/common_position.py b/src/spyglass/common/common_position.py
@@ -39,7 +39,7 @@ class PositionInfoParameters(dj.Lookup):
     """
 
     definition = """
-    position_info_param_name : varchar(80) # name for this set of parameters
+    position_info_param_name : varchar(32) # name for this set of parameters
     ---
     max_separation = 9.0  : float   # max distance (in cm) between head LEDs
     max_speed = 300.0     : float   # max speed (in cm / s) of animal
@@ -53,6 +53,8 @@ class PositionInfoParameters(dj.Lookup):
         # pandas.DataFrame.interpolation for list of methods
     """
 
+    # See #630, #664. Excessive key length.
+
 
 @schema
 class IntervalPositionInfoSelection(dj.Lookup):

diff --git a/src/spyglass/decoding/__init__.py b/src/spyglass/decoding/__init__.py
@@ -3,7 +3,6 @@
     ClusterlessClassifierParameters,
     MarkParameters,
     MultiunitFiringRate,
-    MultiunitHighSynchronyEvents,
     MultiunitHighSynchronyEventsParameters,
     UnitMarkParameters,
     UnitMarks,

diff --git a/src/spyglass/decoding/clusterless.py b/src/spyglass/decoding/clusterless.py
@@ -32,18 +32,18 @@
 from replay_trajectory_classification.initial_conditions import (
     UniformInitialConditions,
 )
-
 from ripple_detection import (
     get_multiunit_population_firing_rate,
     multiunit_HSE_detector,
 )
-from spyglass.common.common_interval import IntervalList
-from spyglass.common.common_nwbfile import AnalysisNwbfile
-from spyglass.common.common_position import IntervalPositionInfo
-from spyglass.utils.dj_helper_fn import fetch_nwb
+from tqdm.auto import tqdm
+
 from spyglass.common.common_behav import (
     convert_epoch_interval_name_to_position_interval_name,
 )
+from spyglass.common.common_interval import IntervalList
+from spyglass.common.common_nwbfile import AnalysisNwbfile
+from spyglass.common.common_position import IntervalPositionInfo
 from spyglass.decoding.core import (
     convert_valid_times_to_slice,
     get_valid_ephys_position_times_by_epoch,
@@ -61,7 +61,7 @@
     SpikeSorting,
     SpikeSortingSelection,
 )
-from tqdm.auto import tqdm
+from spyglass.utils.dj_helper_fn import fetch_nwb
 
 schema = dj.schema("decoding_clusterless")
 
@@ -72,13 +72,15 @@ class MarkParameters(dj.Manual):
     time."""
 
     definition = """
-    mark_param_name : varchar(80) # a name for this set of parameters
+    mark_param_name : varchar(32) # a name for this set of parameters
     ---
     # the type of mark. Currently only 'amplitude' is supported
     mark_type = 'amplitude':  varchar(40)
     mark_param_dict:    BLOB    # dictionary of parameters for the mark extraction function
     """
 
+    # NOTE: See #630, #664. Excessive key length.
+
     def insert_default(self):
         """Insert the default parameter set
 
@@ -665,18 +667,20 @@ def insert_default(self):
         )
 
 
-@schema
+"""
+NOTE: Table decommissioned. See #630, #664. Excessive key length.
+
 class MultiunitHighSynchronyEvents(dj.Computed):
-    """Finds times of high mulitunit activity during immobility."""
+    "Finds times of high mulitunit activity during immobility."
 
-    definition = """
+    definition = "
     -> MultiunitHighSynchronyEventsParameters
     -> UnitMarksIndicator
     -> IntervalPositionInfo
     ---
     -> AnalysisNwbfile
     multiunit_hse_times_object_id: varchar(40)
-    """
+    "
 
     def make(self, key):
         marks = (UnitMarksIndicator & key).fetch_xarray()
@@ -712,6 +716,7 @@ def make(self, key):
         )
 
         self.insert1(key)
+"""
 
 
 def get_decoding_data_for_epoch(

diff --git a/src/spyglass/figurl_views/SpikeSortingView.py b/src/spyglass/figurl_views/SpikeSortingView.py
@@ -5,7 +5,7 @@
     SpikeSortingView as SortingViewSpikeSortingView,
 )
 
-from ..common.common_spikesorting import SpikeSorting, SpikeSortingRecording
+from ..spikesorting import SpikeSorting, SpikeSortingRecording
 from .prepare_spikesortingview_data import prepare_spikesortingview_data
 
 schema = dj.schema("figurl_view_spike_sorting_recording")

diff --git a/src/spyglass/lfp/v1/lfp_artifact.py b/src/spyglass/lfp/v1/lfp_artifact.py
@@ -1,13 +1,13 @@
 import datajoint as dj
+import numpy as np
 
+from spyglass.common import get_electrode_indices
 from spyglass.common.common_interval import IntervalList
 from spyglass.lfp.v1.lfp import LFPV1
 from spyglass.lfp.v1.lfp_artifact_difference_detection import (
     difference_artifact_detector,
 )
 from spyglass.lfp.v1.lfp_artifact_MAD_detection import mad_artifact_detector
-import numpy as np
-from spyglass.common import get_electrode_indices
 
 schema = dj.schema("lfp_v1")
 
@@ -182,7 +182,6 @@ def make(self, key):
                         key["target_interval_list_name"],
                         "LFP",
                         key["artifact_params_name"],
-                        "artifact_removed_valid_times",
                     ]
                 ),
             )
@@ -204,9 +203,12 @@ class LFPArtifactRemovedIntervalList(dj.Manual):
     definition = """
     # Stores intervals without detected artifacts. Entries can come from either
     # ArtifactDetection() or alternative artifact removal analyses.
-    artifact_removed_interval_list_name: varchar(200)
+    artifact_removed_interval_list_name: varchar(128)
     ---
     -> LFPArtifactDetectionSelection
     artifact_removed_valid_times: longblob
     artifact_times: longblob # np.array of artifact intervals
     """
+
+    # See #630, #664. Excessive key length.
+    # 200 enties in database over 128. If string removed as above, all fit.
diff --git a/src/spyglass/lock/file_lock.py b/src/spyglass/lock/file_lock.py
@@ -4,6 +4,8 @@
 
 schema = dj.schema("file_lock")
 
+from ..common.common_nwbfile import AnalysisNwbfile, Nwbfile
+
 
 @schema
 class NwbfileLock(dj.Manual):

diff --git a/src/spyglass/position/position_merge.py b/src/spyglass/position/position_merge.py
@@ -88,13 +88,15 @@ def fetch1_dataframe(self):
 class PositionVideoSelection(dj.Manual):
     definition = """
     nwb_file_name           : varchar(255)                 # name of the NWB file
-    interval_list_name      : varchar(200)                 # descriptive name of this interval list
+    interval_list_name      : varchar(170)                 # descriptive name of this interval list
     plot_id                 : int
     plot                    : varchar(40) # Which position info to overlay on video file
     ---
     output_dir              : varchar(255)                 # directory where to save output video
     """
 
+    # NOTE: See #630, #664. Excessive key length.
+
     def insert1(self, key, **kwargs):
         key["plot_id"] = self.get_plotid(key)
         super().insert1(key, **kwargs)

diff --git a/src/spyglass/position_linearization/v1/__init__.py b/src/spyglass/position_linearization/v1/__init__.py
@@ -1,7 +1,7 @@
+from spyglass.common.common_position import NodePicker
 from spyglass.position_linearization.v1.linearization import (
     LinearizationParameters,
     LinearizationSelection,
     LinearizedPositionV1,
-    NodePicker,
     TrackGraph,
 )
diff --git a/src/spyglass/spikesorting/spikesorting_artifact.py b/src/spyglass/spikesorting/spikesorting_artifact.py
@@ -140,13 +140,15 @@ class ArtifactRemovedIntervalList(dj.Manual):
     definition = """
     # Stores intervals without detected artifacts.
     # Note that entries can come from either ArtifactDetection() or alternative artifact removal analyses.
-    artifact_removed_interval_list_name: varchar(200)
+    artifact_removed_interval_list_name: varchar(170)
     ---
     -> ArtifactDetectionSelection
     artifact_removed_valid_times: longblob
     artifact_times: longblob # np array of artifact intervals
     """
 
+    # See #630, #664. Excessive key length.
+
 
 def _get_artifact_times(
     recording: si.BaseRecording,

diff --git a/src/spyglass/spikesorting/spikesorting_curation.py b/src/spyglass/spikesorting/spikesorting_curation.py
@@ -385,10 +385,13 @@ def _get_waveform_extractor_name(self, key):
 class MetricParameters(dj.Manual):
     definition = """
     # Parameters for computing quality metrics of sorted units
-    metric_params_name: varchar(200)
+    metric_params_name: varchar(64)
     ---
     metric_params: blob
     """
+
+    # NOTE: See #630, #664. Excessive key length.
+
     metric_default_params = {
         "snr": {
             "peak_sign": "neg",
@@ -645,12 +648,14 @@ def _get_num_spikes(
 @schema
 class AutomaticCurationParameters(dj.Manual):
     definition = """
-    auto_curation_params_name: varchar(200)   # name of this parameter set
+    auto_curation_params_name: varchar(36)   # name of this parameter set
     ---
     merge_params: blob   # dictionary of params to merge units
     label_params: blob   # dictionary params to label units
     """
 
+    # NOTE: No existing entries impacted by this change
+
     def insert1(self, key, **kwargs):
         # validate the labels and then insert
         # TODO: add validation for merge_params

diff --git a/src/spyglass/spikesorting/spikesorting_recording.py b/src/spyglass/spikesorting/spikesorting_recording.py
@@ -20,8 +20,8 @@
 from ..common.common_lab import LabTeam  # noqa: F401
 from ..common.common_nwbfile import Nwbfile
 from ..common.common_session import Session  # noqa: F401
-from ..utils.dj_helper_fn import dj_replace
 from ..settings import recording_dir
+from ..utils.dj_helper_fn import dj_replace
 
 schema = dj.schema("spikesorting_recording")
 
@@ -321,19 +321,23 @@ def get_geometry(self, sort_group_id, nwb_file_name):
 class SortInterval(dj.Manual):
     definition = """
     -> Session
-    sort_interval_name: varchar(200) # name for this interval
+    sort_interval_name: varchar(64) # name for this interval 
     ---
     sort_interval: longblob # 1D numpy array with start and end time for a single interval to be used for spike sorting
     """
 
+    # NOTE: See #630, #664. Excessive key length.
+
 
 @schema
 class SpikeSortingPreprocessingParameters(dj.Manual):
     definition = """
-    preproc_params_name: varchar(200)
+    preproc_params_name: varchar(32)
     ---
     preproc_params: blob
     """
+    # NOTE: Reduced key less than 2 existing entries
+    # All existing entries are below 48
 
     def insert_default(self):
         # set up the default filter parameters

diff --git a/src/spyglass/spikesorting/spikesorting_sorting.py b/src/spyglass/spikesorting/spikesorting_sorting.py
@@ -14,7 +14,7 @@
 
 from ..common.common_lab import LabMember, LabTeam
 from ..common.common_nwbfile import AnalysisNwbfile
-from ..settings import temp_dir, sorting_dir
+from ..settings import sorting_dir, temp_dir
 from .spikesorting_artifact import ArtifactRemovedIntervalList
 from .spikesorting_recording import (
     SpikeSortingRecording,
@@ -27,12 +27,14 @@
 @schema
 class SpikeSorterParameters(dj.Manual):
     definition = """
-    sorter: varchar(200)
-    sorter_params_name: varchar(200)
+    sorter: varchar(32)
+    sorter_params_name: varchar(64)
     ---
     sorter_params: blob
     """
 
+    # NOTE: See #630, #664. Excessive key length.
+
     def insert_default(self):
         """Default params from spike sorters available via spikeinterface"""
         sorters = sis.available_sorters()
@@ -236,10 +238,11 @@ def make(self, key: dict):
         self.insert1(key)
 
     def delete(self):
-        """Extends the delete method of base class to implement permission checking.
-        Note that this is NOT a security feature, as anyone that has access to source code
-        can disable it; it just makes it less likely to accidentally delete entries.
-        """
+        """Extends the delete method of base class to implement permission
+        checking. Note that this is NOT a security feature, as anyone that has
+        access to source code can disable it; it just makes it less likely to
+        accidentally delete entries."""
+
         current_user_name = dj.config["database.user"]
         entries = self.fetch()
         permission_bool = np.zeros((len(entries),))