From 2f5ef69ad7caa2b5910feed2541d723e7ae57cf3 Mon Sep 17 00:00:00 2001 From: Fabio Buso Date: Sun, 20 Nov 2022 01:53:28 +0100 Subject: [PATCH] Updates to the migration script --- .../default/sql/ddl/3.1.0__initial_tables.sql | 5 +- files/default/sql/ddl/updates/3.1.0.sql | 82 ++++++++++++++----- 2 files changed, 63 insertions(+), 24 deletions(-) diff --git a/files/default/sql/ddl/3.1.0__initial_tables.sql b/files/default/sql/ddl/3.1.0__initial_tables.sql index 427bebb66..0fff71a54 100644 --- a/files/default/sql/ddl/3.1.0__initial_tables.sql +++ b/files/default/sql/ddl/3.1.0__initial_tables.sql @@ -196,17 +196,16 @@ CREATE TABLE `dataset_request` ( `id` int(11) NOT NULL AUTO_INCREMENT, `dataset` int(11) NOT NULL, `projectId` int(11) NOT NULL, - `user_email` varchar(150) COLLATE latin1_general_cs NOT NULL, + `uid` INT(11) NOT NULL, `requested` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, `message` varchar(3000) COLLATE latin1_general_cs DEFAULT NULL, `message_id` int(11) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `index2` (`dataset`,`projectId`), - KEY `projectId` (`projectId`,`user_email`), KEY `message_id` (`message_id`), CONSTRAINT `FK_429_449` FOREIGN KEY (`dataset`) REFERENCES `dataset` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION, CONSTRAINT `FK_438_452` FOREIGN KEY (`message_id`) REFERENCES `message` (`id`) ON DELETE NO ACTION ON UPDATE NO ACTION, - CONSTRAINT `FK_302_451` FOREIGN KEY (`projectId`,`user_email`) REFERENCES `project_team` (`project_id`,`team_member`) ON DELETE CASCADE ON UPDATE NO ACTION + CONSTRAINT `project_team_fk_ds` FOREIGN KEY (`projectId`,`uid`) REFERENCES `project_team` (`project_id`,`uid`) ON DELETE CASCADE ON UPDATE NO ACTION ) ENGINE=ndbcluster AUTO_INCREMENT=3 DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs; /*!40101 SET character_set_client = @saved_cs_client */; diff --git a/files/default/sql/ddl/updates/3.1.0.sql b/files/default/sql/ddl/updates/3.1.0.sql index ffc842b3c..8dce3f2a2 100644 --- a/files/default/sql/ddl/updates/3.1.0.sql +++ b/files/default/sql/ddl/updates/3.1.0.sql @@ -163,12 +163,14 @@ ALTER TABLE `feature_store_kafka_connector` -- CHANGES HWORKS-262 DROP TABLE `hopsworks`.`message_to_user`; +-- to avoid repeating the same code 100s of time we create a procedure here DROP PROCEDURE IF EXISTS REPLACE_EMAIL_FK; DELIMITER // CREATE PROCEDURE REPLACE_EMAIL_FK(IN table_name VARCHAR(100), IN old_column_name VARCHAR(100), IN new_column_name VARCHAR(100), + IN old_fk_ref_table VARCHAR(100), IN index_name VARCHAR(100), IN fk_name VARCHAR(100)) BEGIN -- add the new column @@ -176,14 +178,6 @@ BEGIN PREPARE stmt1 FROM @s; EXECUTE stmt1; DEALLOCATE PREPARE stmt1; - - -- -- add fk constraint - SET @s := concat('ALTER TABLE hopsworks.', table_name, ' ADD CONSTRAINT `', fk_name - , '` FOREIGN KEY (`', new_column_name - ,'`) REFERENCES `hopsworks`.`users` (`uid`) ON DELETE NO ACTION ON UPDATE NO ACTION'); - PREPARE stmt1 FROM @s; - EXECUTE stmt1; - DEALLOCATE PREPARE stmt1; -- update the uid values based on the emails SET SQL_SAFE_UPDATES = 0; @@ -194,23 +188,44 @@ BEGIN DEALLOCATE PREPARE stmt1; SET SQL_SAFE_UPDATES = 1; + -- now that the column has been populated, modify it to be not null + -- this is needed to be able to build some unique indices/primary keys for some tables + SET @s := concat('ALTER TABLE hopsworks.', table_name, ' MODIFY COLUMN `', new_column_name ,'` INT(11) NOT NULL'); + PREPARE stmt1 FROM @s; + EXECUTE stmt1; + DEALLOCATE PREPARE stmt1; + + -- add fk constraint to the users table + SET @s := concat('ALTER TABLE hopsworks.', table_name, ' ADD CONSTRAINT `', fk_name + , '` FOREIGN KEY (`', new_column_name + ,'`) REFERENCES `hopsworks`.`users` (`uid`) ON DELETE NO ACTION ON UPDATE NO ACTION'); + PREPARE stmt1 FROM @s; + EXECUTE stmt1; + DEALLOCATE PREPARE stmt1; + -- drop the existing foreign key SET @fk_name = (SELECT k.CONSTRAINT_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE k - WHERE k.TABLE_SCHEMA = "hopsworks" AND k.TABLE_NAME = table_name AND k.COLUMN_NAME = old_column_name AND k.REFERENCED_TABLE_NAME="users"); + WHERE k.TABLE_SCHEMA = "hopsworks" AND k.TABLE_NAME = table_name AND k.COLUMN_NAME = old_column_name AND k.REFERENCED_TABLE_NAME=old_fk_ref_table); SET @s := concat('ALTER TABLE hopsworks.', table_name , ' DROP FOREIGN KEY `', @fk_name, '`'); PREPARE stmt1 FROM @s; EXECUTE stmt1; DEALLOCATE PREPARE stmt1; - -- -- drop the index created by the foreign key + -- drop the index created by the foreign key SET @s := concat('ALTER TABLE hopsworks.', table_name, ' DROP KEY `', index_name, '`'); PREPARE stmt1 FROM @s; EXECUTE stmt1; DEALLOCATE PREPARE stmt1; - -- -- drop the original column + IF table_name = "jobs" THEN + -- need to do it at this stage. earlier and the index is used by the old fk + -- later and we can't drop the column as there is an index assigned to it + ALTER TABLE `hopsworks`.`jobs` DROP KEY `creator_project_idx`; + END IF; + + -- drop the original column SET @s := concat('ALTER TABLE hopsworks.', table_name, ' DROP COLUMN `', old_column_name, '`'); PREPARE stmt1 FROM @s; EXECUTE stmt1; @@ -220,17 +235,42 @@ END // DELIMITER ; -CALL REPLACE_EMAIL_FK('executions', 'user', 'uid', 'user', 'user_fk_executions'); -CALL REPLACE_EMAIL_FK('topic_acls', 'username', 'uid', 'username', 'user_fk_kafka_acls'); -CALL REPLACE_EMAIL_FK('jupyter_settings', 'team_member', 'uid', 'team_member', 'user_fk_jp_settings'); -CALL REPLACE_EMAIL_FK('message', 'user_from', 'uid_from', 'user_from', 'user_fk_msg_from'); -CALL REPLACE_EMAIL_FK('message', 'user_to', 'uid_to', 'user_to', 'user_fk_msg_to'); -CALL REPLACE_EMAIL_FK('project', 'username', 'uid', 'user_idx', 'user_fk_project'); +-- welcome to the jungle. -ALTER TABLE `hopsworks`.`project_team` DROP PRIMARY KEY; -CALL REPLACE_EMAIL_FK('project_team', 'team_member', 'uid', 'team_member', 'user_fk_team'); -ALTER TABLE `hopsworks`.`project_team` ADD PRIMARY KEY (`project_id`,`uid`); +CALL REPLACE_EMAIL_FK('jobs', 'creator', 'uid', 'users', 'creator', 'user_fk_executions'); +-- add the index back, but with the UID column +ALTER TABLE `hopsworks`.`jobs` ADD KEY `uid_project_idx`(`uid`, `project_id`); + +CALL REPLACE_EMAIL_FK('executions', 'user', 'uid', 'users', 'user', 'user_fk_executions'); -CALL REPLACE_EMAIL_FK('rstudio_settings', 'team_member', 'uid', 'team_member', 'user_fk_jp_settings'); + +-- dataset_request table has a FK to the project_team table that references project_id and email +-- we need to clean up the email from the dataset_request first then migrate the project_team +-- table and then add back the FK to the dataset_request table pointing to the new uid column +CALL REPLACE_EMAIL_FK('dataset_request', 'user_email', 'uid', 'project_team', 'projectId', 'project_team_fk'); + +-- the primary key for the project team table should be re-created to use uid instead of email +ALTER TABLE `hopsworks`.`project_team` DROP PRIMARY KEY; +-- migrate the column +CALL REPLACE_EMAIL_FK('project_team', 'team_member', 'uid', 'users', 'team_member', 'user_fk_team'); +-- add back the primary key using the uid column +ALTER TABLE `hopsworks`.`project_team` ADD PRIMARY KEY(`project_id`, `uid`); + +-- drop the foreign key created by the procedure above for dataset_request +-- and the proper one. This is done here to avoid having too much complexity +-- on the stored procedure +ALTER TABLE `hopsworks`.`dataset_request` DROP FOREIGN KEY project_team_fk; +ALTER TABLE `hopsworks`.`dataset_request` DROP KEY project_team_fk; +ALTER TABLE `hopsworks`.`dataset_request` + ADD CONSTRAINT `project_team_fk_ds` FOREIGN KEY (`projectId`,`uid`) + REFERENCES `project_team` (`project_id`,`uid`) ON DELETE CASCADE ON UPDATE NO ACTION; + +-- These are the easy ones that should not get tangled +CALL REPLACE_EMAIL_FK('project', 'username', 'uid', 'users', 'user_idx', 'user_fk_project'); +CALL REPLACE_EMAIL_FK('topic_acls', 'username', 'uid', 'users', 'username', 'user_fk_kafka_acls'); +CALL REPLACE_EMAIL_FK('jupyter_settings', 'team_member', 'uid', 'users', 'team_member', 'user_fk_jp_settings'); +CALL REPLACE_EMAIL_FK('rstudio_settings', 'team_member', 'uid', 'users', 'team_member', 'user_fk_jp_settings'); +CALL REPLACE_EMAIL_FK('message', 'user_from', 'uid_from', 'users', 'user_from', 'user_fk_msg_from'); +CALL REPLACE_EMAIL_FK('message', 'user_to', 'uid_to', 'users', 'user_to', 'user_fk_msg_to'); -- END CHANGES HWORKS-262 \ No newline at end of file