Remove unused tables #976

CBroz1 · 2024-05-16T22:09:35Z

Process

Using a sql command, I fetched tables with no rows, as recorded in the info schema. Some weren't actually empty, so I narrowed down the list by fetching with dj, and then looked for which items weren't represented in the package

Script

import inspect
import pkgutil
import sys

import datajoint as dj
from datajoint.user_tables import TableMeta
from datajoint.utils import from_camel_case
from tqdm import tqdm

import spyglass
from spyglass.utils.database_settings import SHARED_MODULES


def empty_in_shared_module(full_table_name):
    if full_table_name.split(".")[0].split("_")[0] not in SHARED_MODULES:
        return False  # private prefix
    if full_table_name.split(".")[1][0] == "~":  # hidden table
        return False
    return len(dj.FreeTable(dj.conn(), full_table_name)) == 0


class_cache = dict()  # Cache results to avoid re-importing modules
sql_tbls = dict()  # Cache for schema.list_tables()


def load_cache():
    for importer, modname, ispkg in tqdm(
        pkgutil.walk_packages(spyglass.__path__),
        desc="Loading cache",
        total=300,
    ):
        module = importer.find_module(modname).load_module(modname)
        if not hasattr(module, "schema"):
            continue
        database = module.schema.database
        if not class_cache.get(database):
            class_cache[database] = set()
            sql_tbls[database] = set()
        schema_list = module.schema.list_tables()
        sql_tbls[database].update(schema_list)
        for attr_name in getattr(module, "__dict__", []):
            if not isinstance(getattr(module, attr_name), TableMeta):
                continue
            if attr_name.startswith("_"):
                continue
            snake_case_name = from_camel_case(attr_name)
            if snake_case_name in schema_list:
                class_cache[database].add(from_camel_case(attr_name))


def class_exists_in_package(full_table_name):
    if not class_cache:
        load_cache()
    schema_name, table_name = full_table_name.split(".")
    schema_tables = class_cache.get(schema_name, [])
    return table_name in schema_tables


my_query = """
select table_schema as database_name,
       table_name
   from information_schema.tables
where table_type = 'BASE TABLE'
      and table_rows = 0
      and table_schema not in('information_schema', 'sys',
                              'performance_schema', 'mysql')
order by table_schema,
         table_name;
"""

mysql_empty = [".".join(t) for t in dj.conn().query(my_query).fetchall()]
no_content = [name for name in mysql_empty if empty_in_shared_module(name)]
no_class = [t for t in mysql_empty if not class_exists_in_package(t)]
neither = list(set(no_content).intersection(set(no_class)))
no_content_has_class = list(set(no_content).difference(set(no_class)))

Resulting Lists

First pass with an older version of the script above, will update soon

no_content = [
    "common_behav._head_dir",
    "common_behav._lin_pos",
    "common_behav._speed",
    "common_behav.merged_position",
    "common_behav.merged_position__method_two_position",
    "common_behav.merged_position__raw_position",
    "common_behav.method_two_position",
    "common_curation.__automatic_curation_sorting",
    "common_curation.__curated_spike_sorting",
    "common_curation.__curated_spike_sorting__unit",
    "common_curation.__selected_units",
    "common_curation.automatic_curation_parameters",
    "common_curation.automatic_curation_selection",
    "common_curation.curated_spike_sorting_selection",
    "common_curation.selected_units_parameters",
    "common_ephys.electrode_brain_region",
    "common_lab.__analysis_nwbfile_kachery",
    "common_lab.__nwbfile_kachery",
    "common_metrics.__quality_metrics",
    "common_metrics.metric_selection",
    "common_nwbfile.__analysis_nwbfile_kachery",
    "common_nwbfile.__nwbfile_kachery",
    "common_nwbfile.analysis_nwbfile_kachery",
    "common_nwbfile.analysis_nwbfile_kachery_selection",
    "common_nwbfile.nwbfile_kachery",
    "common_nwbfile.nwbfile_kachery_selection",
    "common_position.__position_video",
    "common_ripple.l_f_p_selection",
    "common_ripple.l_f_p_selection__l_f_p_electrode",
    "common_ripple.ripple_artifact_detection_selection",
    "common_session._experimenter_list",
    "common_session._experimenter_list__experimenter",
    "common_session.session_group_session",
    "common_sortingview.__sortingview_workspace__sortings",
    "common_spikesorting.__automatic_curation",
    "common_spikesorting.__curated_spike_sorting",
    "common_spikesorting.__curated_spike_sorting__unit",
    "common_spikesorting.__modify_sorting",
    "common_spikesorting.__spike_sorting",
    "common_spikesorting.__spike_sorting_workspace",
    "common_spikesorting.automatic_curation_selection",
    "common_spikesorting.curated_spike_sorting_selection",
    "common_spikesorting.modify_sorting_parameters",
    "common_spikesorting.modify_sorting_selection",
    "common_spikesorting.modify_sorting_selection__sortings_i_ds",
    "common_spikesorting.sorting",
    "common_spikesorting.sorting_i_d",
    "common_spikesorting.sorting_list",
    "common_spikesorting.sortings",
    "common_spikesorting.spike_sorting_artifact_parameters",
    "common_spikesorting.spike_sorting_filter_parameters",
    "common_task.apparatus",
    "common_usage.insert_error",
    "common_waveforms.__waveforms",
    "common_waveforms.waveform_selection",
    "decoding._decode_spikes_selection",
    "decoding._decode_spikes_selection__decode_spikes",
    "decoding.decode_spikes_selection",
    "decoding.decode_spikes_selection__decode_spikes",
    "decoding_clusterless.__multiunit_firing_rate",
    "decoding_clusterless.__multiunit_high_synchrony_events",
    "decoding_clusterless.mark_indicator_parameters",
    "decoding_sortedspikes.__my_sorted_spikes_indicator",
    "decoding_sortedspikes.__sorted_spikes_results",
    "lfp_imported._imported_l_f_p",
    "lfp_merge.l_f_p_output__imported_l_f_p",
    "lfp_v1.__l_f_p_band_artifact_detection",
    "lfp_v1._imported_l_f_p_v1",
    "lfp_v1.l_f_p_band_artifact_detection_parameters",
    "lfp_v1.l_f_p_band_artifact_detection_selection",
    "lfp_v1.l_f_p_band_artifact_removed_interval_list",
    "position_dlc_model.__d_l_c_model_evaluation",
    "position_dlc_selection.__d_l_c_pos_video",
    "position_linearization_merge.linearized_position_output__linearized_position_v0",
    "position_merge.__position_video",
    "position_merge.position_video_selection",
    "position_position.__position_video",
    "position_position.final_position__common_pos",
    "position_position.final_position__trodes_pos_v1",
    "position_position.position_video_selection",
    "position_trodes_position.__trodes_pos_v1",
    "position_v1_dlc_model.__d_l_c_model_evaluation",
    "position_v1_dlc_selection.__d_l_c_pos_video",
    "position_v1_trodes_position.__trodes_pos_video",
    "spikesorting_artifact.artifact_detection_parameter",
    "spikesorting_curation.__units",
    "spikesorting_curation.unit_inclusion_parameters",
    "spikesorting_group_v1.sorted_spikes_group__sort_group",
    "spikesorting_merge.unit_inclusion",
    "spikesorting_recording.__sort_group_targeted_location",
    "spikesorting_recording.electrode_brain_location",
    "spikesorting_recording.spike_sorting_preprocessing_parameter",
    "spikesorting_sorting.spike_sorter_parameter",
    "spikesorting_v1_unit_inclusion.imported_unit_inclusion_v1",
]

no_class = [
    "common_behav._head_dir",
    "common_behav._lin_pos",
    "common_behav._speed",
    "common_behav.merged_position",
    "common_behav.merged_position__method_two_position",
    "common_behav.merged_position__raw_position",
    "common_behav.method_two_position",
    "common_curation.__automatic_curation_sorting",
    "common_curation.__curated_spike_sorting__unit",
    "common_curation.__selected_units",
    "common_curation.selected_units_parameters",
    "common_lab.__nwbfile_kachery",
    "common_nwbfile.__nwbfile_kachery",
    "common_nwbfile.nwbfile_kachery",
    "common_nwbfile.nwbfile_kachery_selection",
    "common_ripple.l_f_p_selection__l_f_p_electrode",
    "common_ripple.ripple_artifact_detection_selection",
    "common_session._experimenter_list",
    "common_session._experimenter_list__experimenter",
    "common_sortingview.__sortingview_workspace__sortings",
    "common_spikesorting.__curated_spike_sorting__unit",
    "common_spikesorting.__modify_sorting",
    "common_spikesorting.__spike_sorting_workspace",
    "common_spikesorting.modify_sorting_parameters",
    "common_spikesorting.modify_sorting_selection",
    "common_spikesorting.modify_sorting_selection__sortings_i_ds",
    "common_spikesorting.sorting",
    "common_spikesorting.sorting_i_d",
    "common_spikesorting.sorting_list",
    "common_spikesorting.sortings",
    "common_spikesorting.spike_sorting_artifact_parameters",
    "common_spikesorting.spike_sorting_filter_parameters",
    "common_task.apparatus",
    "decoding._decode_spikes_selection",
    "decoding._decode_spikes_selection__decode_spikes",
    "decoding.decode_spikes_selection",
    "decoding.decode_spikes_selection__decode_spikes",
    "decoding_clusterless.__multiunit_firing_rate",
    "decoding_clusterless.__multiunit_high_synchrony_events",
    "decoding_clusterless.mark_indicator_parameters",
    "decoding_sortedspikes.__my_sorted_spikes_indicator",
    "decoding_sortedspikes.__sorted_spikes_results",
    "lfp_merge.l_f_p_output__imported_l_f_p",
    "lfp_v1.__l_f_p_band_artifact_detection",
    "lfp_v1._imported_l_f_p_v1",
    "lfp_v1.l_f_p_band_artifact_detection_parameters",
    "lfp_v1.l_f_p_band_artifact_detection_selection",
    "lfp_v1.l_f_p_band_artifact_removed_interval_list",
    "position_linearization_merge.linearized_position_output__linearized_position_v0",
    "position_position.final_position__common_pos",
    "position_position.final_position__trodes_pos_v1",
    "spikesorting_artifact.artifact_detection_parameter",
    "spikesorting_curation.__units",
    "spikesorting_group_v1.sorted_spikes_group__sort_group",
    "spikesorting_merge.unit_inclusion",
    "spikesorting_recording.__sort_group_targeted_location",
    "spikesorting_recording.electrode_brain_location",
    "spikesorting_recording.spike_sorting_preprocessing_parameter",
    "spikesorting_sorting.spike_sorter_parameter",
    "spikesorting_v1_unit_inclusion.imported_unit_inclusion_v1",
]

neither = [
    "common_spikesorting.sortings",
    "common_behav._head_dir",
    "decoding.decode_spikes_selection__decode_spikes",
    "common_curation.curated_spike_sorting_selection",
    "position_position.final_position__common_pos",
    "position_merge.__position_video",
    "lfp_v1.l_f_p_band_artifact_detection_selection",
    "spikesorting_group_v1.sorted_spikes_group__sort_group",
    "spikesorting_artifact.artifact_detection_parameter",
    "common_session._experimenter_list",
    "common_behav.merged_position__method_two_position",
    "decoding._decode_spikes_selection__decode_spikes",
    "common_nwbfile.analysis_nwbfile_kachery_selection",
    "common_spikesorting.__spike_sorting_workspace",
    "decoding.decode_spikes_selection",
    "spikesorting_curation.__units",
    "common_position.__position_video",
    "common_waveforms.__waveforms",
    "decoding_sortedspikes.__sorted_spikes_results",
    "common_spikesorting.sorting",
    "common_nwbfile.nwbfile_kachery_selection",
    "position_position.__position_video",
    "common_curation.__automatic_curation_sorting",
    "common_curation.automatic_curation_parameters",
    "spikesorting_recording.spike_sorting_preprocessing_parameter",
    "position_v1_dlc_selection.__d_l_c_pos_video",
    "common_spikesorting.__curated_spike_sorting",
    "common_spikesorting.sorting_i_d",
    "position_v1_dlc_model.__d_l_c_model_evaluation",
    "common_metrics.metric_selection",
    "common_nwbfile.nwbfile_kachery",
    "position_dlc_selection.__d_l_c_pos_video",
    "common_curation.automatic_curation_selection",
    "spikesorting_sorting.spike_sorter_parameter",
    "common_ripple.ripple_artifact_detection_selection",
    "common_spikesorting.__curated_spike_sorting__unit",
    "common_behav.merged_position__raw_position",
    "common_spikesorting.__modify_sorting",
    "common_spikesorting.__spike_sorting",
    "common_curation.selected_units_parameters",
    "common_nwbfile.__nwbfile_kachery",
    "lfp_v1.l_f_p_band_artifact_removed_interval_list",
    "common_behav.merged_position",
    "position_position.position_video_selection",
    "lfp_v1.l_f_p_band_artifact_detection_parameters",
    "common_spikesorting.sorting_list",
    "spikesorting_v1_unit_inclusion.imported_unit_inclusion_v1",
    "common_curation.__curated_spike_sorting",
    "decoding_clusterless.mark_indicator_parameters",
    "common_curation.__selected_units",
    "position_dlc_model.__d_l_c_model_evaluation",
    "spikesorting_recording.__sort_group_targeted_location",
    "common_spikesorting.modify_sorting_selection__sortings_i_ds",
    "common_metrics.__quality_metrics",
    "lfp_v1._imported_l_f_p_v1",
    "position_position.final_position__trodes_pos_v1",
    "position_linearization_merge.linearized_position_output__linearized_position_v0",
    "common_ripple.l_f_p_selection__l_f_p_electrode",
    "decoding_clusterless.__multiunit_firing_rate",
    "spikesorting_recording.electrode_brain_location",
    "decoding_clusterless.__multiunit_high_synchrony_events",
    "common_spikesorting.spike_sorting_artifact_parameters",
    "common_sortingview.__sortingview_workspace__sortings",
    "lfp_v1.__l_f_p_band_artifact_detection",
    "common_behav._lin_pos",
    "position_trodes_position.__trodes_pos_v1",
    "common_ripple.l_f_p_selection",
    "common_curation.__curated_spike_sorting__unit",
    "common_waveforms.waveform_selection",
    "common_behav.method_two_position",
    "common_spikesorting.__automatic_curation",
    "common_task.apparatus",
    "common_spikesorting.automatic_curation_selection",
    "position_v1_trodes_position.__trodes_pos_video",
    "spikesorting_merge.unit_inclusion",
    "common_lab.__analysis_nwbfile_kachery",
    "common_behav._speed",
    "common_spikesorting.spike_sorting_filter_parameters",
    "common_nwbfile.__analysis_nwbfile_kachery",
    "lfp_imported._imported_l_f_p",
    "common_lab.__nwbfile_kachery",
    "decoding_sortedspikes.__my_sorted_spikes_indicator",
    "decoding._decode_spikes_selection",
    "common_nwbfile.analysis_nwbfile_kachery",
    "lfp_merge.l_f_p_output__imported_l_f_p",
    "common_session._experimenter_list__experimenter",
    "common_spikesorting.curated_spike_sorting_selection",
    "common_spikesorting.modify_sorting_parameters",
    "common_spikesorting.modify_sorting_selection",
]

no_content_has_class = [
    "spikesorting_curation.unit_inclusion_parameters",
    "common_session.session_group_session",
    "common_ephys.electrode_brain_region",
    "common_usage.insert_error",
    "position_merge.position_video_selection",
]

Result

There are ....

Schemas that do not currently or no longer have a Spyglass schema
- common_analytic_signal: last used by Xulu 05/23
- common_artifact: last used by Alison 11/22
- common_backup: last used by Kyu 02/22
- common_curation: last used by Eric 02/22
- common_metrics: last used by Jen 03/22
- common_sortingview: last used by Alison 11/22
- common_waveform: last used by Jen 03/22
- decoding: last used by Eric 05/23
- position_position: last used by Daniel 04/23
Tables on used schemas that do not have a Spyglass class (e.g., common_behav._head_dir). See 'neither' in list above.
Tables on used schemas that have a Spyglass class, declared some time ago that remain unused
- spikesorting_curation.unit_inclusion_parameters: declared 07/22
- common_session.session_group_session: declared 03/22
- common_ephys.electrode_brain_region: declared 05/22
- position_merge.position_video_selection: 04/23
- common_usage.insert_error: declared 02/24 - worth preserving for outside-lab insert errors?

Proposed

Tables and schemas on shared prefixes without a python representation are scheduled for deletion after a team-wide two-week warning.
Empty tables with Spyglass classes that have been up >3mo are marked for deprecation in version 0.6.0. Inserts into these tables can be overwritten to temporarily launch a deprecation warning and then log in common_usage to let us know they're being used.

Steps

Remove unused from the package
Drop empty table in the database
Drop tables with no corresponding class

The text was updated successfully, but these errors were encountered:

edeno · 2024-05-17T20:07:14Z

All these would be good to clean up as far as I know.

* #976 * Remove notebook reference

* Give UUID to artifact interval * Add ability to set smoothing sigma in get_firing_rate (#994) * add option to set spike smoothing sigma * update changelog * Add docstrings to SortedSpikesGroup and Decoding methods (#996) * Add docstrings * update changelog * fix spelling --------- Co-authored-by: Samuel Bray <[email protected]> * Add Common Errors doc (#997) * Add Common Errors * Update changelog * Mua notebook (#998) * documented some of mua notebook * mua notebook documented * documented some of mua notebook * synced py script * Dandi export and read (#956) * compile exported files, download dandiset, and organize * add function to translate files into dandi-compatible names * add table to store dandi name translation and steps to populate * add dandiset validation * add function to fetch nwb from dandi * add function to change obj_id of nwb_file * add dandi upload call and fix circular import * debug dandi file streaming * fix circular import * resolve dandi-streamed files with fetch_nwb * implement review comments * add admin tools to fix common dandi discrepencies * implement tool to cleanup common dandi errors * add dandi export to tutorial * fix linting * update changelog * fix spelling * style changes from review * reorganize function locations * fix circular import * make dandi dependency optional in imports * store dandi instance of data in DandiPath * resolve case of pre-existing dandi entries for export * cleanup bugs from refactor * update notebook * Apply suggestions from code review Co-authored-by: Chris Broz <[email protected]> * add requested changes from review * make method check_admin_privilege in LabMember --------- Co-authored-by: Chris Broz <[email protected]> * Minor fixes (#999) * give analysis nwb new uuid when created * fix function argument * update changelog * Fix bug in change in analysis_file object_id (#1004) * fix bug in change in analysis_file_object_id * update changelog * Remove classes for usused tables (#1003) * #976 * Remove notebook reference * Non-daemon parallel populate (#1001) * initial non daemon parallel commit * resolve namespace and pickling errors * fix linting * update changelog * implement review comments * add parallel_make flag to spikesorting recording tables * fix multiprocessing spawn error on mac * move propert --------- Co-authored-by: Samuel Bray <[email protected]> * Update pipeline column for IntervalList --------- Co-authored-by: Samuel Bray <[email protected]> Co-authored-by: Samuel Bray <[email protected]> Co-authored-by: Chris Broz <[email protected]> Co-authored-by: Denisse Morales-Rodriguez <[email protected]> Co-authored-by: Samuel Bray <[email protected]>

CBroz1 · 2024-07-09T15:45:27Z

At @samuelbray32 's suggestion, I wrote a script to look at tables on shared prefixes, without a Spyglass class, with contents. They are listed at the end of the script below.

Script

import importlib
import inspect
import pkgutil

import datajoint as dj
from datajoint.user_tables import TableMeta
from tqdm import tqdm

from spyglass.utils.database_settings import SHARED_MODULES


class SearchPkg:
    def __init__(self, package_name="spyglass"):
        self.package = importlib.import_module(package_name)
        self.class_cache = set()
        self.load_cache()

    def find_subclasses(self, module, parent_class):
        for name, obj in inspect.getmembers(parent_class, inspect.isclass):
            if isinstance(obj, TableMeta) and obj.__module__ == module.__name__:
                name = obj.full_table_name.replace("`", "")
                if name in self.class_cache:
                    continue
                print(f"Found class: {name}")
                self.class_cache.add(obj.full_table_name.replace("`", ""))
                self.find_subclasses(module, obj)

    def load_cache(self, package_name="spyglass"):
        package = self.package
        for loader, module_name, is_pkg in tqdm(
            pkgutil.walk_packages(package.__path__, package.__name__ + "."),
            desc="Loading cache",
            total=64,
        ):
            module = importlib.import_module(module_name)

            if not getattr(module, "schema", None):
                continue

            print(f"Checking module: {module_name}")
            self.find_subclasses(module, module)


my_query = """
select table_schema as database_name, table_name from information_schema.tables
where table_type = 'BASE TABLE'
and table_schema not in('information_schema', 'sys', 'performance_schema', 'mysql')
order by table_schema, table_name;
"""

mysql_tbls = [
    ".".join(t)
    for t in dj.conn().query(my_query).fetchall()
    if t[0].split("_")[0] in SHARED_MODULES and t[1][0] != "~"
]
class_cache = SearchPkg().class_cache
no_class = set(mysql_tbls).difference(class_cache)

len_cache = dict()
for t in no_class:
    len_cache[t] = len(dj.FreeTable(dj.conn(), t))
non_empty = {k: v for k, v in len_cache.items() if v > 0}

non_empty = {
    "common_analytic_signal.__analytic_signal": 1,
    "common_analytic_signal.analytic_signal_parameters": 1,
    "common_analytic_signal.analytic_signal_selection": 1,
    "common_artifact.__artifact_detection": 1,
    "common_artifact.artifact_detection_parameters": 3,
    "common_artifact.artifact_detection_selection": 1,
    "common_artifact.artifact_removed_interval_list": 1,
    "common_backup.curated_spike_sorting_back_up": 413,
    "common_backup.spike_sorting_back_up": 2810,
    "common_filter.fir_filter": 3,
    "common_interval.sort_interval": 52,
    "common_lab.analysis_nwbfile": 5,
    "common_lab.nwbfile": 4,
    "common_metrics.metric_parameters": 1,
    "common_nwbfile.kachery_channel": 1,
    "common_ripple.ripple_artifact_detection_parameters": 2,
    "common_ripple.ripple_artifact_removed_interval_list": 2,
    "common_sortingview.__sortingview_workspace": 1,
    "common_spikesorting.__spike_sorting_recording": 1,
    "common_spikesorting.automatic_curation_parameters": 3,
    "common_spikesorting.sort_group": 1665,
    "common_spikesorting.sort_group__sort_group_electrode": 7427,
    "common_spikesorting.spike_sorter": 16,
    "common_spikesorting.spike_sorter_parameters": 20,
    "common_spikesorting.spike_sorting_artifact_detection_parameters": 1,
    "common_spikesorting.spike_sorting_metric_parameters": 6,
    "common_spikesorting.spike_sorting_metrics": 3,
    "common_spikesorting.spike_sorting_preprocessing_parameters": 2,
    "common_spikesorting.spike_sorting_recording_selection": 1,
    "common_spikesorting.spike_sorting_selection": 1,
    "common_spikesorting.spike_sorting_waveform_parameters": 1,
    "common_spikesorting.unit_inclusion_parameters": 2,
    "common_temp.temp": 2,
    "common_waveforms.waveform_parameters": 1,
    "decoding_clusterless.classifier_parameters": 3,
    "decoding_clusterless.multiunit_high_synchrony_events_parameters": 1,
    "decoding_clusterless.sorted_spikes_classifier_parameters": 4,
    "decoding_sortedspikes.#my_sorted_spikes_indicator_selection": 13,
    "lfp_v1.__l_f_p": 81,
    "lfp_v1.__l_f_p_band": 4,
    "lfp_v1.l_f_p_band_selection": 5,
    "lfp_v1.l_f_p_band_selection__l_f_p_band_electrode": 76,
    "lfp_v1.l_f_p_electrode_group": 65,
    "lfp_v1.l_f_p_electrode_group__l_f_p_electrode": 3248,
    "lfp_v1.l_f_p_output": 80,
    "lfp_v1.l_f_p_output__l_f_p": 80,
    "position_dlc_centroid.__d_l_c_centroid": 52,
    "position_dlc_centroid.d_l_c_centroid_params": 6,
    "position_dlc_centroid.d_l_c_centroid_selection": 60,
    "position_dlc_cohort.__d_l_c_smooth_interp_cohort": 52,
    "position_dlc_cohort.__d_l_c_smooth_interp_cohort__body_part": 206,
    "position_dlc_cohort.d_l_c_smooth_interp_cohort_selection": 52,
    "position_dlc_model.__d_l_c_model": 3,
    "position_dlc_model.__d_l_c_model__body_part": 15,
    "position_dlc_model.d_l_c_model_input": 3,
    "position_dlc_model.d_l_c_model_params": 1,
    "position_dlc_model.d_l_c_model_selection": 4,
    "position_dlc_model.d_l_c_model_source": 5,
    "position_dlc_model.d_l_c_model_source__from_import": 3,
    "position_dlc_model.d_l_c_model_source__from_upstream": 2,
    "position_dlc_orient.__d_l_c_orientation": 52,
    "position_dlc_orient.d_l_c_orientation_params": 2,
    "position_dlc_orient.d_l_c_orientation_selection": 60,
    "position_dlc_pose_estimation.__d_l_c_pose_estimation": 64,
    "position_dlc_pose_estimation.__d_l_c_pose_estimation__body_part": 320,
    "position_dlc_pose_estimation.d_l_c_pose_estimation_selection": 307,
    "position_dlc_position.__d_l_c_smooth_interp": 206,
    "position_dlc_position.d_l_c_smooth_interp_params": 5,
    "position_dlc_position.d_l_c_smooth_interp_selection": 242,
    "position_dlc_project.body_part": 7,
    "position_dlc_project.d_l_c_project": 8,
    "position_dlc_project.d_l_c_project__body_part": 37,
    "position_dlc_project.d_l_c_project__file": 55,
    "position_dlc_selection.__d_l_c_pos_v1": 36,
    "position_dlc_selection.d_l_c_pos_selection": 52,
    "position_dlc_selection.d_l_c_pos_video_params": 10,
    "position_dlc_selection.d_l_c_pos_video_selection": 26,
    "position_dlc_training.#d_l_c_model_training_params": 2,
    "position_dlc_training.__d_l_c_model_training": 2,
    "position_dlc_training.d_l_c_model_training_selection": 3,
    "position_merge.pose_output": 1,
    "position_merge.pose_output__d_l_c_pose_estimation": 1,
    "position_position.final_position": 35,
    "position_position.final_position__d_l_c_pos_v1": 26,
    "waveform_features.#waveform_features_params": 4,
    "waveform_features.unit_waveform_features_selection": 327,
}

These seem to primarily be first drafts of tables that were later renamed. Unless there are objections, I plan to drop these tables as well.

samuelbray32 · 2024-07-11T15:06:33Z

Would these table's exist on other labs' databases? Just checking if the drop calls should be in the release notes

CBroz1 · 2024-07-11T15:27:17Z

Only those we removed in #1003. The rest are remnants of bad drafting practices. Ideally, tables with no Spy class would not be declared on shared schemas, which suggests that we should revoke CREATE privileged for dj_user role

CBroz1 · 2024-08-08T20:36:43Z

This task is 95% there. Here are the remaining empty tables that are part of the package

empty_tables = [
    # PARTS - cannot drop
    "lfp_imported._imported_l_f_p",
    "lfp_merge.l_f_p_output__imported_l_f_p",
    "position_linearization_merge.linearized_position_output__linearized_position_v0",
    "spikesorting_group_v1.sorted_spikes_group__sort_group",
    # Empty is informative
    "common_usage.activity_log",
    # See 1025
    "common_session.session_group_session",
    # Can drop?
    "common_position.__position_video",
    "position_v1_dlc_model.__d_l_c_model_evaluation",
]

Can we deprecate the last 4, @edeno ? This includes ...

Remove SessionGroup? #1025 - SessionGroup
common_position.PositionVideo - unused in favor of the DLC equivalent?
position.v1.position_dlc_model.DLCModelEvaluation - completely unused feature

CBroz1 · 2024-08-09T15:21:37Z

These tables have been dropped in the short term, but a subset that have been removed from the package will be redeclared by anyone with a version that retains the deprecated table

Script for deleting empty tables

import os

import datajoint as dj
import pandas as pd
from datajoint.errors import DataJointError
from datajoint.schemas import VirtualModule
from datajoint.utils import to_camel_case
from networkx import NetworkXError
from pymysql import IntegrityError, OperationalError

from spyglass.utils.database_settings import SHARED_MODULES


class TableData:
    def __init__(self, tables=None):
        self.processed = False
        self.tables = (
            list(tables.keys()) if isinstance(tables, dict) else tables
        ) or self.fetch_empty_tables()
        self._by_schemas = None
        self._vmods = None

    def fetch_empty_tables(self):
        query = (
            "SELECT table_schema AS database_name, table_name"
            + "FROM information_schema.tables WHERE table_rows < 1;"
        )
        empty = dj.conn().query(query).fetchall()
        in_spy = []
        for db, tbl in empty:
            if db.split("_")[0] not in SHARED_MODULES or tbl[0] == "~":
                continue
            full_name = f"{db}.{tbl}"
            ft = dj.FreeTable(dj.conn(), full_name)
            rows = len(ft)
            if len(ft) > 0:
                print(f"{rows:03}: {full_name}")
                continue
            in_spy.append(f"{db}.{tbl}")
        return in_spy

    @property
    def by_schemas(self):
        if self._by_schemas is None:
            self._by_schemas = {}
            for table in self.tables:
                schema, table_name = table.split(".")
                if schema not in self._by_schemas:
                    self._by_schemas[schema] = []
                self._by_schemas[schema].append(to_camel_case(table_name))
        return self._by_schemas

    @property
    def vmods(self):
        if self._vmods is None:
            self._vmods = {}
            for schema in self.by_schemas:
                self._vmods[schema] = VirtualModule("vmod", schema)
        return self._vmods

    def drop(self, table):
        schema, table_name = table.split(".")
        class_obj = getattr(self.vmods[schema], to_camel_case(table_name))
        getattr(self.vmods[schema], to_camel_case(table_name)).drop()
        self.tables.remove(table)

    def drop_all(self):
        for table in self.tables:
            try:
                self.drop(table)
            except KeyboardInterrupt:
                print("Interrupted")
            except (AttributeError, DataJointError, NetworkXError) as e:
                print(f"Error: {e}")


if __name__ == "__main__":
    data = TableData(tbls)
    data.drop_all()

This could only really be addressed by revoking table declaration privileges for average users, which would result in error messages that could be fixed by deleting the table from their outdated packages. To be discussed as a group

CBroz1 · 2024-09-03T18:45:35Z

This has been resolved on our production server

CBroz1 added the Database Issues with Frank Lab database, not Spyglass code label May 16, 2024

CBroz1 self-assigned this May 18, 2024

CBroz1 mentioned this issue May 21, 2024

Add pytests to position #966

Merged

6 tasks

CBroz1 changed the title ~~Deprecate unused tables~~ Remove unused tables Jun 10, 2024

CBroz1 added a commit to CBroz1/spyglass that referenced this issue Jun 10, 2024

LorenFrankLab#976

5e44c6e

CBroz1 mentioned this issue Jun 10, 2024

Remove classes for usused tables #1003

Merged

7 tasks

edeno pushed a commit that referenced this issue Jun 11, 2024

Remove classes for usused tables (#1003)

5d957f1

* #976 * Remove notebook reference

samuelbray32 mentioned this issue Jul 17, 2024

Periph table fallback on TableChain for experimenter summary #1035

Merged

7 tasks

CBroz1 closed this as completed Sep 3, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Remove unused tables #976

Remove unused tables #976

CBroz1 commented May 16, 2024 •

edited

Loading

edeno commented May 17, 2024

CBroz1 commented Jul 9, 2024

samuelbray32 commented Jul 11, 2024

CBroz1 commented Jul 11, 2024

CBroz1 commented Aug 8, 2024 •

edited

Loading

CBroz1 commented Aug 9, 2024

CBroz1 commented Sep 3, 2024

Remove unused tables #976

Remove unused tables #976

Comments

CBroz1 commented May 16, 2024 • edited Loading

Process

Result

Proposed

Steps

edeno commented May 17, 2024

CBroz1 commented Jul 9, 2024

samuelbray32 commented Jul 11, 2024

CBroz1 commented Jul 11, 2024

CBroz1 commented Aug 8, 2024 • edited Loading

CBroz1 commented Aug 9, 2024

CBroz1 commented Sep 3, 2024

CBroz1 commented May 16, 2024 •

edited

Loading

CBroz1 commented Aug 8, 2024 •

edited

Loading