Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unused tables #976

Closed
3 tasks done
CBroz1 opened this issue May 16, 2024 · 7 comments
Closed
3 tasks done

Remove unused tables #976

CBroz1 opened this issue May 16, 2024 · 7 comments
Assignees
Labels
Database Issues with Frank Lab database, not Spyglass code

Comments

@CBroz1
Copy link
Member

CBroz1 commented May 16, 2024

Process

Using a sql command, I fetched tables with no rows, as recorded in the info schema. Some weren't actually empty, so I narrowed down the list by fetching with dj, and then looked for which items weren't represented in the package

Script
import inspect
import pkgutil
import sys

import datajoint as dj
from datajoint.user_tables import TableMeta
from datajoint.utils import from_camel_case
from tqdm import tqdm

import spyglass
from spyglass.utils.database_settings import SHARED_MODULES


def empty_in_shared_module(full_table_name):
    if full_table_name.split(".")[0].split("_")[0] not in SHARED_MODULES:
        return False  # private prefix
    if full_table_name.split(".")[1][0] == "~":  # hidden table
        return False
    return len(dj.FreeTable(dj.conn(), full_table_name)) == 0


class_cache = dict()  # Cache results to avoid re-importing modules
sql_tbls = dict()  # Cache for schema.list_tables()


def load_cache():
    for importer, modname, ispkg in tqdm(
        pkgutil.walk_packages(spyglass.__path__),
        desc="Loading cache",
        total=300,
    ):
        module = importer.find_module(modname).load_module(modname)
        if not hasattr(module, "schema"):
            continue
        database = module.schema.database
        if not class_cache.get(database):
            class_cache[database] = set()
            sql_tbls[database] = set()
        schema_list = module.schema.list_tables()
        sql_tbls[database].update(schema_list)
        for attr_name in getattr(module, "__dict__", []):
            if not isinstance(getattr(module, attr_name), TableMeta):
                continue
            if attr_name.startswith("_"):
                continue
            snake_case_name = from_camel_case(attr_name)
            if snake_case_name in schema_list:
                class_cache[database].add(from_camel_case(attr_name))


def class_exists_in_package(full_table_name):
    if not class_cache:
        load_cache()
    schema_name, table_name = full_table_name.split(".")
    schema_tables = class_cache.get(schema_name, [])
    return table_name in schema_tables


my_query = """
select table_schema as database_name,
       table_name
   from information_schema.tables
where table_type = 'BASE TABLE'
      and table_rows = 0
      and table_schema not in('information_schema', 'sys',
                              'performance_schema', 'mysql')
order by table_schema,
         table_name;
"""

mysql_empty = [".".join(t) for t in dj.conn().query(my_query).fetchall()]
no_content = [name for name in mysql_empty if empty_in_shared_module(name)]
no_class = [t for t in mysql_empty if not class_exists_in_package(t)]
neither = list(set(no_content).intersection(set(no_class)))
no_content_has_class = list(set(no_content).difference(set(no_class)))
Resulting Lists

First pass with an older version of the script above, will update soon

no_content = [
    "common_behav._head_dir",
    "common_behav._lin_pos",
    "common_behav._speed",
    "common_behav.merged_position",
    "common_behav.merged_position__method_two_position",
    "common_behav.merged_position__raw_position",
    "common_behav.method_two_position",
    "common_curation.__automatic_curation_sorting",
    "common_curation.__curated_spike_sorting",
    "common_curation.__curated_spike_sorting__unit",
    "common_curation.__selected_units",
    "common_curation.automatic_curation_parameters",
    "common_curation.automatic_curation_selection",
    "common_curation.curated_spike_sorting_selection",
    "common_curation.selected_units_parameters",
    "common_ephys.electrode_brain_region",
    "common_lab.__analysis_nwbfile_kachery",
    "common_lab.__nwbfile_kachery",
    "common_metrics.__quality_metrics",
    "common_metrics.metric_selection",
    "common_nwbfile.__analysis_nwbfile_kachery",
    "common_nwbfile.__nwbfile_kachery",
    "common_nwbfile.analysis_nwbfile_kachery",
    "common_nwbfile.analysis_nwbfile_kachery_selection",
    "common_nwbfile.nwbfile_kachery",
    "common_nwbfile.nwbfile_kachery_selection",
    "common_position.__position_video",
    "common_ripple.l_f_p_selection",
    "common_ripple.l_f_p_selection__l_f_p_electrode",
    "common_ripple.ripple_artifact_detection_selection",
    "common_session._experimenter_list",
    "common_session._experimenter_list__experimenter",
    "common_session.session_group_session",
    "common_sortingview.__sortingview_workspace__sortings",
    "common_spikesorting.__automatic_curation",
    "common_spikesorting.__curated_spike_sorting",
    "common_spikesorting.__curated_spike_sorting__unit",
    "common_spikesorting.__modify_sorting",
    "common_spikesorting.__spike_sorting",
    "common_spikesorting.__spike_sorting_workspace",
    "common_spikesorting.automatic_curation_selection",
    "common_spikesorting.curated_spike_sorting_selection",
    "common_spikesorting.modify_sorting_parameters",
    "common_spikesorting.modify_sorting_selection",
    "common_spikesorting.modify_sorting_selection__sortings_i_ds",
    "common_spikesorting.sorting",
    "common_spikesorting.sorting_i_d",
    "common_spikesorting.sorting_list",
    "common_spikesorting.sortings",
    "common_spikesorting.spike_sorting_artifact_parameters",
    "common_spikesorting.spike_sorting_filter_parameters",
    "common_task.apparatus",
    "common_usage.insert_error",
    "common_waveforms.__waveforms",
    "common_waveforms.waveform_selection",
    "decoding._decode_spikes_selection",
    "decoding._decode_spikes_selection__decode_spikes",
    "decoding.decode_spikes_selection",
    "decoding.decode_spikes_selection__decode_spikes",
    "decoding_clusterless.__multiunit_firing_rate",
    "decoding_clusterless.__multiunit_high_synchrony_events",
    "decoding_clusterless.mark_indicator_parameters",
    "decoding_sortedspikes.__my_sorted_spikes_indicator",
    "decoding_sortedspikes.__sorted_spikes_results",
    "lfp_imported._imported_l_f_p",
    "lfp_merge.l_f_p_output__imported_l_f_p",
    "lfp_v1.__l_f_p_band_artifact_detection",
    "lfp_v1._imported_l_f_p_v1",
    "lfp_v1.l_f_p_band_artifact_detection_parameters",
    "lfp_v1.l_f_p_band_artifact_detection_selection",
    "lfp_v1.l_f_p_band_artifact_removed_interval_list",
    "position_dlc_model.__d_l_c_model_evaluation",
    "position_dlc_selection.__d_l_c_pos_video",
    "position_linearization_merge.linearized_position_output__linearized_position_v0",
    "position_merge.__position_video",
    "position_merge.position_video_selection",
    "position_position.__position_video",
    "position_position.final_position__common_pos",
    "position_position.final_position__trodes_pos_v1",
    "position_position.position_video_selection",
    "position_trodes_position.__trodes_pos_v1",
    "position_v1_dlc_model.__d_l_c_model_evaluation",
    "position_v1_dlc_selection.__d_l_c_pos_video",
    "position_v1_trodes_position.__trodes_pos_video",
    "spikesorting_artifact.artifact_detection_parameter",
    "spikesorting_curation.__units",
    "spikesorting_curation.unit_inclusion_parameters",
    "spikesorting_group_v1.sorted_spikes_group__sort_group",
    "spikesorting_merge.unit_inclusion",
    "spikesorting_recording.__sort_group_targeted_location",
    "spikesorting_recording.electrode_brain_location",
    "spikesorting_recording.spike_sorting_preprocessing_parameter",
    "spikesorting_sorting.spike_sorter_parameter",
    "spikesorting_v1_unit_inclusion.imported_unit_inclusion_v1",
]

no_class = [
    "common_behav._head_dir",
    "common_behav._lin_pos",
    "common_behav._speed",
    "common_behav.merged_position",
    "common_behav.merged_position__method_two_position",
    "common_behav.merged_position__raw_position",
    "common_behav.method_two_position",
    "common_curation.__automatic_curation_sorting",
    "common_curation.__curated_spike_sorting__unit",
    "common_curation.__selected_units",
    "common_curation.selected_units_parameters",
    "common_lab.__nwbfile_kachery",
    "common_nwbfile.__nwbfile_kachery",
    "common_nwbfile.nwbfile_kachery",
    "common_nwbfile.nwbfile_kachery_selection",
    "common_ripple.l_f_p_selection__l_f_p_electrode",
    "common_ripple.ripple_artifact_detection_selection",
    "common_session._experimenter_list",
    "common_session._experimenter_list__experimenter",
    "common_sortingview.__sortingview_workspace__sortings",
    "common_spikesorting.__curated_spike_sorting__unit",
    "common_spikesorting.__modify_sorting",
    "common_spikesorting.__spike_sorting_workspace",
    "common_spikesorting.modify_sorting_parameters",
    "common_spikesorting.modify_sorting_selection",
    "common_spikesorting.modify_sorting_selection__sortings_i_ds",
    "common_spikesorting.sorting",
    "common_spikesorting.sorting_i_d",
    "common_spikesorting.sorting_list",
    "common_spikesorting.sortings",
    "common_spikesorting.spike_sorting_artifact_parameters",
    "common_spikesorting.spike_sorting_filter_parameters",
    "common_task.apparatus",
    "decoding._decode_spikes_selection",
    "decoding._decode_spikes_selection__decode_spikes",
    "decoding.decode_spikes_selection",
    "decoding.decode_spikes_selection__decode_spikes",
    "decoding_clusterless.__multiunit_firing_rate",
    "decoding_clusterless.__multiunit_high_synchrony_events",
    "decoding_clusterless.mark_indicator_parameters",
    "decoding_sortedspikes.__my_sorted_spikes_indicator",
    "decoding_sortedspikes.__sorted_spikes_results",
    "lfp_merge.l_f_p_output__imported_l_f_p",
    "lfp_v1.__l_f_p_band_artifact_detection",
    "lfp_v1._imported_l_f_p_v1",
    "lfp_v1.l_f_p_band_artifact_detection_parameters",
    "lfp_v1.l_f_p_band_artifact_detection_selection",
    "lfp_v1.l_f_p_band_artifact_removed_interval_list",
    "position_linearization_merge.linearized_position_output__linearized_position_v0",
    "position_position.final_position__common_pos",
    "position_position.final_position__trodes_pos_v1",
    "spikesorting_artifact.artifact_detection_parameter",
    "spikesorting_curation.__units",
    "spikesorting_group_v1.sorted_spikes_group__sort_group",
    "spikesorting_merge.unit_inclusion",
    "spikesorting_recording.__sort_group_targeted_location",
    "spikesorting_recording.electrode_brain_location",
    "spikesorting_recording.spike_sorting_preprocessing_parameter",
    "spikesorting_sorting.spike_sorter_parameter",
    "spikesorting_v1_unit_inclusion.imported_unit_inclusion_v1",
]

neither = [
    "common_spikesorting.sortings",
    "common_behav._head_dir",
    "decoding.decode_spikes_selection__decode_spikes",
    "common_curation.curated_spike_sorting_selection",
    "position_position.final_position__common_pos",
    "position_merge.__position_video",
    "lfp_v1.l_f_p_band_artifact_detection_selection",
    "spikesorting_group_v1.sorted_spikes_group__sort_group",
    "spikesorting_artifact.artifact_detection_parameter",
    "common_session._experimenter_list",
    "common_behav.merged_position__method_two_position",
    "decoding._decode_spikes_selection__decode_spikes",
    "common_nwbfile.analysis_nwbfile_kachery_selection",
    "common_spikesorting.__spike_sorting_workspace",
    "decoding.decode_spikes_selection",
    "spikesorting_curation.__units",
    "common_position.__position_video",
    "common_waveforms.__waveforms",
    "decoding_sortedspikes.__sorted_spikes_results",
    "common_spikesorting.sorting",
    "common_nwbfile.nwbfile_kachery_selection",
    "position_position.__position_video",
    "common_curation.__automatic_curation_sorting",
    "common_curation.automatic_curation_parameters",
    "spikesorting_recording.spike_sorting_preprocessing_parameter",
    "position_v1_dlc_selection.__d_l_c_pos_video",
    "common_spikesorting.__curated_spike_sorting",
    "common_spikesorting.sorting_i_d",
    "position_v1_dlc_model.__d_l_c_model_evaluation",
    "common_metrics.metric_selection",
    "common_nwbfile.nwbfile_kachery",
    "position_dlc_selection.__d_l_c_pos_video",
    "common_curation.automatic_curation_selection",
    "spikesorting_sorting.spike_sorter_parameter",
    "common_ripple.ripple_artifact_detection_selection",
    "common_spikesorting.__curated_spike_sorting__unit",
    "common_behav.merged_position__raw_position",
    "common_spikesorting.__modify_sorting",
    "common_spikesorting.__spike_sorting",
    "common_curation.selected_units_parameters",
    "common_nwbfile.__nwbfile_kachery",
    "lfp_v1.l_f_p_band_artifact_removed_interval_list",
    "common_behav.merged_position",
    "position_position.position_video_selection",
    "lfp_v1.l_f_p_band_artifact_detection_parameters",
    "common_spikesorting.sorting_list",
    "spikesorting_v1_unit_inclusion.imported_unit_inclusion_v1",
    "common_curation.__curated_spike_sorting",
    "decoding_clusterless.mark_indicator_parameters",
    "common_curation.__selected_units",
    "position_dlc_model.__d_l_c_model_evaluation",
    "spikesorting_recording.__sort_group_targeted_location",
    "common_spikesorting.modify_sorting_selection__sortings_i_ds",
    "common_metrics.__quality_metrics",
    "lfp_v1._imported_l_f_p_v1",
    "position_position.final_position__trodes_pos_v1",
    "position_linearization_merge.linearized_position_output__linearized_position_v0",
    "common_ripple.l_f_p_selection__l_f_p_electrode",
    "decoding_clusterless.__multiunit_firing_rate",
    "spikesorting_recording.electrode_brain_location",
    "decoding_clusterless.__multiunit_high_synchrony_events",
    "common_spikesorting.spike_sorting_artifact_parameters",
    "common_sortingview.__sortingview_workspace__sortings",
    "lfp_v1.__l_f_p_band_artifact_detection",
    "common_behav._lin_pos",
    "position_trodes_position.__trodes_pos_v1",
    "common_ripple.l_f_p_selection",
    "common_curation.__curated_spike_sorting__unit",
    "common_waveforms.waveform_selection",
    "common_behav.method_two_position",
    "common_spikesorting.__automatic_curation",
    "common_task.apparatus",
    "common_spikesorting.automatic_curation_selection",
    "position_v1_trodes_position.__trodes_pos_video",
    "spikesorting_merge.unit_inclusion",
    "common_lab.__analysis_nwbfile_kachery",
    "common_behav._speed",
    "common_spikesorting.spike_sorting_filter_parameters",
    "common_nwbfile.__analysis_nwbfile_kachery",
    "lfp_imported._imported_l_f_p",
    "common_lab.__nwbfile_kachery",
    "decoding_sortedspikes.__my_sorted_spikes_indicator",
    "decoding._decode_spikes_selection",
    "common_nwbfile.analysis_nwbfile_kachery",
    "lfp_merge.l_f_p_output__imported_l_f_p",
    "common_session._experimenter_list__experimenter",
    "common_spikesorting.curated_spike_sorting_selection",
    "common_spikesorting.modify_sorting_parameters",
    "common_spikesorting.modify_sorting_selection",
]

no_content_has_class = [
    "spikesorting_curation.unit_inclusion_parameters",
    "common_session.session_group_session",
    "common_ephys.electrode_brain_region",
    "common_usage.insert_error",
    "position_merge.position_video_selection",
]

Result

There are ....

  1. Schemas that do not currently or no longer have a Spyglass schema
    • common_analytic_signal: last used by Xulu 05/23
    • common_artifact: last used by Alison 11/22
    • common_backup: last used by Kyu 02/22
    • common_curation: last used by Eric 02/22
    • common_metrics: last used by Jen 03/22
    • common_sortingview: last used by Alison 11/22
    • common_waveform: last used by Jen 03/22
    • decoding: last used by Eric 05/23
    • position_position: last used by Daniel 04/23
  2. Tables on used schemas that do not have a Spyglass class (e.g., common_behav._head_dir). See 'neither' in list above.
  3. Tables on used schemas that have a Spyglass class, declared some time ago that remain unused
    • spikesorting_curation.unit_inclusion_parameters: declared 07/22
    • common_session.session_group_session: declared 03/22
    • common_ephys.electrode_brain_region: declared 05/22
    • position_merge.position_video_selection: 04/23
    • common_usage.insert_error: declared 02/24 - worth preserving for outside-lab insert errors?

Proposed

  1. Tables and schemas on shared prefixes without a python representation are scheduled for deletion after a team-wide two-week warning.
  2. Empty tables with Spyglass classes that have been up >3mo are marked for deprecation in version 0.6.0. Inserts into these tables can be overwritten to temporarily launch a deprecation warning and then log in common_usage to let us know they're being used.

Steps

  • Remove unused from the package
  • Drop empty table in the database
  • Drop tables with no corresponding class
@CBroz1 CBroz1 added the Database Issues with Frank Lab database, not Spyglass code label May 16, 2024
@edeno
Copy link
Collaborator

edeno commented May 17, 2024

All these would be good to clean up as far as I know.

@CBroz1 CBroz1 self-assigned this May 18, 2024
@CBroz1 CBroz1 mentioned this issue May 21, 2024
6 tasks
@CBroz1 CBroz1 changed the title Deprecate unused tables Remove unused tables Jun 10, 2024
CBroz1 added a commit to CBroz1/spyglass that referenced this issue Jun 10, 2024
edeno pushed a commit that referenced this issue Jun 11, 2024
* #976

* Remove notebook reference
edeno pushed a commit that referenced this issue Jun 18, 2024
* Give UUID to artifact interval

* Add ability to set smoothing sigma in get_firing_rate (#994)

* add option to set spike smoothing sigma

* update changelog

* Add docstrings to SortedSpikesGroup and Decoding methods (#996)

* Add docstrings

* update changelog

* fix spelling

---------

Co-authored-by: Samuel Bray <[email protected]>

* Add Common Errors doc (#997)

* Add Common Errors

* Update changelog

* Mua notebook (#998)

* documented some of mua notebook

* mua notebook documented

* documented some of mua notebook

* synced py script

* Dandi export and read (#956)

* compile exported files, download dandiset, and organize

* add function to translate files into dandi-compatible names

* add table to store dandi name translation and steps to populate

* add dandiset validation

* add function to fetch nwb from dandi

* add function to change obj_id of nwb_file

* add dandi upload call and fix circular import

* debug dandi file streaming

* fix circular import

* resolve dandi-streamed files with fetch_nwb

* implement review comments

* add admin tools to fix common dandi discrepencies

* implement tool to cleanup common dandi errors

* add dandi export to tutorial

* fix linting

* update changelog

* fix spelling

* style changes from review

* reorganize function locations

* fix circular import

* make dandi dependency optional in imports

* store dandi instance of data in DandiPath

* resolve case of pre-existing dandi entries for export

* cleanup bugs from refactor

* update notebook

* Apply suggestions from code review

Co-authored-by: Chris Broz <[email protected]>

* add requested changes from review

* make method check_admin_privilege in LabMember

---------

Co-authored-by: Chris Broz <[email protected]>

* Minor fixes (#999)

* give analysis nwb new uuid when created

* fix function argument

* update changelog

* Fix bug in change in analysis_file object_id (#1004)

* fix bug in change in analysis_file_object_id

* update changelog

* Remove classes for usused tables (#1003)

* #976

* Remove notebook reference

* Non-daemon parallel populate (#1001)

* initial non daemon parallel commit

* resolve namespace and pickling errors

* fix linting

* update changelog

* implement review comments

* add parallel_make flag to spikesorting recording tables

* fix multiprocessing spawn error on mac

* move propert

---------

Co-authored-by: Samuel Bray <[email protected]>

* Update pipeline column for IntervalList

---------

Co-authored-by: Samuel Bray <[email protected]>
Co-authored-by: Samuel Bray <[email protected]>
Co-authored-by: Chris Broz <[email protected]>
Co-authored-by: Denisse Morales-Rodriguez <[email protected]>
Co-authored-by: Samuel Bray <[email protected]>
@CBroz1
Copy link
Member Author

CBroz1 commented Jul 9, 2024

At @samuelbray32 's suggestion, I wrote a script to look at tables on shared prefixes, without a Spyglass class, with contents. They are listed at the end of the script below.

Script
import importlib
import inspect
import pkgutil

import datajoint as dj
from datajoint.user_tables import TableMeta
from tqdm import tqdm

from spyglass.utils.database_settings import SHARED_MODULES


class SearchPkg:
    def __init__(self, package_name="spyglass"):
        self.package = importlib.import_module(package_name)
        self.class_cache = set()
        self.load_cache()

    def find_subclasses(self, module, parent_class):
        for name, obj in inspect.getmembers(parent_class, inspect.isclass):
            if isinstance(obj, TableMeta) and obj.__module__ == module.__name__:
                name = obj.full_table_name.replace("`", "")
                if name in self.class_cache:
                    continue
                print(f"Found class: {name}")
                self.class_cache.add(obj.full_table_name.replace("`", ""))
                self.find_subclasses(module, obj)

    def load_cache(self, package_name="spyglass"):
        package = self.package
        for loader, module_name, is_pkg in tqdm(
            pkgutil.walk_packages(package.__path__, package.__name__ + "."),
            desc="Loading cache",
            total=64,
        ):
            module = importlib.import_module(module_name)

            if not getattr(module, "schema", None):
                continue

            print(f"Checking module: {module_name}")
            self.find_subclasses(module, module)


my_query = """
select table_schema as database_name, table_name from information_schema.tables
where table_type = 'BASE TABLE'
and table_schema not in('information_schema', 'sys', 'performance_schema', 'mysql')
order by table_schema, table_name;
"""

mysql_tbls = [
    ".".join(t)
    for t in dj.conn().query(my_query).fetchall()
    if t[0].split("_")[0] in SHARED_MODULES and t[1][0] != "~"
]
class_cache = SearchPkg().class_cache
no_class = set(mysql_tbls).difference(class_cache)

len_cache = dict()
for t in no_class:
    len_cache[t] = len(dj.FreeTable(dj.conn(), t))
non_empty = {k: v for k, v in len_cache.items() if v > 0}

non_empty = {
    "common_analytic_signal.__analytic_signal": 1,
    "common_analytic_signal.analytic_signal_parameters": 1,
    "common_analytic_signal.analytic_signal_selection": 1,
    "common_artifact.__artifact_detection": 1,
    "common_artifact.artifact_detection_parameters": 3,
    "common_artifact.artifact_detection_selection": 1,
    "common_artifact.artifact_removed_interval_list": 1,
    "common_backup.curated_spike_sorting_back_up": 413,
    "common_backup.spike_sorting_back_up": 2810,
    "common_filter.fir_filter": 3,
    "common_interval.sort_interval": 52,
    "common_lab.analysis_nwbfile": 5,
    "common_lab.nwbfile": 4,
    "common_metrics.metric_parameters": 1,
    "common_nwbfile.kachery_channel": 1,
    "common_ripple.ripple_artifact_detection_parameters": 2,
    "common_ripple.ripple_artifact_removed_interval_list": 2,
    "common_sortingview.__sortingview_workspace": 1,
    "common_spikesorting.__spike_sorting_recording": 1,
    "common_spikesorting.automatic_curation_parameters": 3,
    "common_spikesorting.sort_group": 1665,
    "common_spikesorting.sort_group__sort_group_electrode": 7427,
    "common_spikesorting.spike_sorter": 16,
    "common_spikesorting.spike_sorter_parameters": 20,
    "common_spikesorting.spike_sorting_artifact_detection_parameters": 1,
    "common_spikesorting.spike_sorting_metric_parameters": 6,
    "common_spikesorting.spike_sorting_metrics": 3,
    "common_spikesorting.spike_sorting_preprocessing_parameters": 2,
    "common_spikesorting.spike_sorting_recording_selection": 1,
    "common_spikesorting.spike_sorting_selection": 1,
    "common_spikesorting.spike_sorting_waveform_parameters": 1,
    "common_spikesorting.unit_inclusion_parameters": 2,
    "common_temp.temp": 2,
    "common_waveforms.waveform_parameters": 1,
    "decoding_clusterless.classifier_parameters": 3,
    "decoding_clusterless.multiunit_high_synchrony_events_parameters": 1,
    "decoding_clusterless.sorted_spikes_classifier_parameters": 4,
    "decoding_sortedspikes.#my_sorted_spikes_indicator_selection": 13,
    "lfp_v1.__l_f_p": 81,
    "lfp_v1.__l_f_p_band": 4,
    "lfp_v1.l_f_p_band_selection": 5,
    "lfp_v1.l_f_p_band_selection__l_f_p_band_electrode": 76,
    "lfp_v1.l_f_p_electrode_group": 65,
    "lfp_v1.l_f_p_electrode_group__l_f_p_electrode": 3248,
    "lfp_v1.l_f_p_output": 80,
    "lfp_v1.l_f_p_output__l_f_p": 80,
    "position_dlc_centroid.__d_l_c_centroid": 52,
    "position_dlc_centroid.d_l_c_centroid_params": 6,
    "position_dlc_centroid.d_l_c_centroid_selection": 60,
    "position_dlc_cohort.__d_l_c_smooth_interp_cohort": 52,
    "position_dlc_cohort.__d_l_c_smooth_interp_cohort__body_part": 206,
    "position_dlc_cohort.d_l_c_smooth_interp_cohort_selection": 52,
    "position_dlc_model.__d_l_c_model": 3,
    "position_dlc_model.__d_l_c_model__body_part": 15,
    "position_dlc_model.d_l_c_model_input": 3,
    "position_dlc_model.d_l_c_model_params": 1,
    "position_dlc_model.d_l_c_model_selection": 4,
    "position_dlc_model.d_l_c_model_source": 5,
    "position_dlc_model.d_l_c_model_source__from_import": 3,
    "position_dlc_model.d_l_c_model_source__from_upstream": 2,
    "position_dlc_orient.__d_l_c_orientation": 52,
    "position_dlc_orient.d_l_c_orientation_params": 2,
    "position_dlc_orient.d_l_c_orientation_selection": 60,
    "position_dlc_pose_estimation.__d_l_c_pose_estimation": 64,
    "position_dlc_pose_estimation.__d_l_c_pose_estimation__body_part": 320,
    "position_dlc_pose_estimation.d_l_c_pose_estimation_selection": 307,
    "position_dlc_position.__d_l_c_smooth_interp": 206,
    "position_dlc_position.d_l_c_smooth_interp_params": 5,
    "position_dlc_position.d_l_c_smooth_interp_selection": 242,
    "position_dlc_project.body_part": 7,
    "position_dlc_project.d_l_c_project": 8,
    "position_dlc_project.d_l_c_project__body_part": 37,
    "position_dlc_project.d_l_c_project__file": 55,
    "position_dlc_selection.__d_l_c_pos_v1": 36,
    "position_dlc_selection.d_l_c_pos_selection": 52,
    "position_dlc_selection.d_l_c_pos_video_params": 10,
    "position_dlc_selection.d_l_c_pos_video_selection": 26,
    "position_dlc_training.#d_l_c_model_training_params": 2,
    "position_dlc_training.__d_l_c_model_training": 2,
    "position_dlc_training.d_l_c_model_training_selection": 3,
    "position_merge.pose_output": 1,
    "position_merge.pose_output__d_l_c_pose_estimation": 1,
    "position_position.final_position": 35,
    "position_position.final_position__d_l_c_pos_v1": 26,
    "waveform_features.#waveform_features_params": 4,
    "waveform_features.unit_waveform_features_selection": 327,
}

These seem to primarily be first drafts of tables that were later renamed. Unless there are objections, I plan to drop these tables as well.

@samuelbray32
Copy link
Collaborator

Would these table's exist on other labs' databases? Just checking if the drop calls should be in the release notes

@CBroz1
Copy link
Member Author

CBroz1 commented Jul 11, 2024

Only those we removed in #1003. The rest are remnants of bad drafting practices. Ideally, tables with no Spy class would not be declared on shared schemas, which suggests that we should revoke CREATE privileged for dj_user role

@CBroz1
Copy link
Member Author

CBroz1 commented Aug 8, 2024

This task is 95% there. Here are the remaining empty tables that are part of the package

empty_tables = [
    # PARTS - cannot drop
    "lfp_imported._imported_l_f_p",
    "lfp_merge.l_f_p_output__imported_l_f_p",
    "position_linearization_merge.linearized_position_output__linearized_position_v0",
    "spikesorting_group_v1.sorted_spikes_group__sort_group",
    # Empty is informative
    "common_usage.activity_log",
    # See 1025
    "common_session.session_group_session",
    # Can drop?
    "common_position.__position_video",
    "position_v1_dlc_model.__d_l_c_model_evaluation",
]

Can we deprecate the last 4, @edeno ? This includes ...

  • Remove SessionGroup? #1025 - SessionGroup
  • common_position.PositionVideo - unused in favor of the DLC equivalent?
  • position.v1.position_dlc_model.DLCModelEvaluation - completely unused feature

@CBroz1
Copy link
Member Author

CBroz1 commented Aug 9, 2024

These tables have been dropped in the short term, but a subset that have been removed from the package will be redeclared by anyone with a version that retains the deprecated table

Script for deleting empty tables
import os

import datajoint as dj
import pandas as pd
from datajoint.errors import DataJointError
from datajoint.schemas import VirtualModule
from datajoint.utils import to_camel_case
from networkx import NetworkXError
from pymysql import IntegrityError, OperationalError

from spyglass.utils.database_settings import SHARED_MODULES


class TableData:
    def __init__(self, tables=None):
        self.processed = False
        self.tables = (
            list(tables.keys()) if isinstance(tables, dict) else tables
        ) or self.fetch_empty_tables()
        self._by_schemas = None
        self._vmods = None

    def fetch_empty_tables(self):
        query = (
            "SELECT table_schema AS database_name, table_name"
            + "FROM information_schema.tables WHERE table_rows < 1;"
        )
        empty = dj.conn().query(query).fetchall()
        in_spy = []
        for db, tbl in empty:
            if db.split("_")[0] not in SHARED_MODULES or tbl[0] == "~":
                continue
            full_name = f"{db}.{tbl}"
            ft = dj.FreeTable(dj.conn(), full_name)
            rows = len(ft)
            if len(ft) > 0:
                print(f"{rows:03}: {full_name}")
                continue
            in_spy.append(f"{db}.{tbl}")
        return in_spy

    @property
    def by_schemas(self):
        if self._by_schemas is None:
            self._by_schemas = {}
            for table in self.tables:
                schema, table_name = table.split(".")
                if schema not in self._by_schemas:
                    self._by_schemas[schema] = []
                self._by_schemas[schema].append(to_camel_case(table_name))
        return self._by_schemas

    @property
    def vmods(self):
        if self._vmods is None:
            self._vmods = {}
            for schema in self.by_schemas:
                self._vmods[schema] = VirtualModule("vmod", schema)
        return self._vmods

    def drop(self, table):
        schema, table_name = table.split(".")
        class_obj = getattr(self.vmods[schema], to_camel_case(table_name))
        getattr(self.vmods[schema], to_camel_case(table_name)).drop()
        self.tables.remove(table)

    def drop_all(self):
        for table in self.tables:
            try:
                self.drop(table)
            except KeyboardInterrupt:
                print("Interrupted")
            except (AttributeError, DataJointError, NetworkXError) as e:
                print(f"Error: {e}")


if __name__ == "__main__":
    data = TableData(tbls)
    data.drop_all()

This could only really be addressed by revoking table declaration privileges for average users, which would result in error messages that could be fixed by deleting the table from their outdated packages. To be discussed as a group

@CBroz1
Copy link
Member Author

CBroz1 commented Sep 3, 2024

This has been resolved on our production server

@CBroz1 CBroz1 closed this as completed Sep 3, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Database Issues with Frank Lab database, not Spyglass code
Projects
None yet
Development

No branches or pull requests

3 participants