diff --git a/doc/releasenotes/16_0_0_release_notes.md b/doc/releasenotes/16_0_0_release_notes.md index c66877e8ca2..55b30b03cfc 100644 --- a/doc/releasenotes/16_0_0_release_notes.md +++ b/doc/releasenotes/16_0_0_release_notes.md @@ -1,11 +1,32 @@ # Release of Vitess v16.0.0 + ## Major Changes ### Online DDL +#### 'mysql' strategy + Introducing a new DDL strategy: `mysql`. This strategy is a hybrid between `direct` (which is completely non-Online) and the various online strategies. A migration submitted with `mysql` strategy is _managed_. That is, it gets a migration UUID. The scheduler queues it, reviews it, runs it. The user may cancel or retry it, much like all Online DDL migrations. The difference is that when the scheduler runs the migration via normal MySQL `CREATE/ALTER/DROP TABLE` statements. The user may add `ALGORITHM=INPLACE` or `ALGORITHM=INSTANT` as they please, and the scheduler will attempt to run those as is. Some migrations will be completely blocking. See the [MySQL documentation](https://dev.mysql.com/doc/refman/8.0/en/innodb-online-ddl-operations.html). In particular, consider that for non-`INSTANT` DDLs, replicas will accumulate substantial lag. +#### '--in-order-completion' strategy flag + +A migration that runs with this DDL strategy flag may only complete if no prior migrations are still pending (pending means either `queued`, `ready` or `running` states). + +`--in-order-completion` considers the order by which migrations were submitted. For example: + +- In two sequential `ApplySchema` commands, the first is considered to be "earlier" and the second is "later". +- In a single `ApplySchema` command, and with multiple queries in `--sql` command line argument, the order of migrations is the same as the order of SQL statements. + +Internally, that order is implied by the `id` column of `_vt.schema_migrations` table. + +Note that `--in-order-completion` still allows concurrency. In fact, it is designed to work with concurrent migrations. The idea is that many migrations may run concurrently, but the way they finally `complete` is in-order. + +This lets the user submit multiple migrations which may have some dependencies (for example, introduce two views, one of which reads from the other). As long as the migrations are submitted in a valid order, the user can then expect Vitess to complete the migrations successfully (and in that order). + +This strategy flag applies to any `CREATE|DROP TABLE|VIEW` statements, and to `ALTER TABLE` with `vitess|online` strategy. + +It _does not_ apply to `ALTER TABLE` in `gh-ost`, `pt-osc`, `mysql` and `direct` strategies. diff --git a/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go b/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go index 4964c508c71..663e7352b6d 100644 --- a/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go +++ b/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go @@ -131,7 +131,7 @@ func parseTableName(t *testing.T, sql string) (tableName string) { if err != nil && errors.Is(err, io.EOF) { break } - require.NoError(t, err) + require.NoErrorf(t, err, "parsing sql: [%v]", sql) ddlStmt, ok := stmt.(sqlparser.DDLStatement) require.True(t, ok) tableName = ddlStmt.GetTable().Name.String() @@ -316,6 +316,12 @@ func testScheduler(t *testing.T) { dropT4Statement = ` DROP TABLE IF EXISTS t4_test ` + alterExtraColumn = ` + ALTER TABLE t1_test ADD COLUMN extra_column int NOT NULL DEFAULT 0 + ` + createViewDependsOnExtraColumn = ` + CREATE VIEW t1_test_view AS SELECT id, extra_column FROM t1_test + ` ) testReadTimestamp := func(t *testing.T, uuid string, timestampColumn string) (timestamp string) { @@ -489,6 +495,7 @@ func testScheduler(t *testing.T) { }) testTableSequentialTimes(t, t1uuid, t2uuid) }) + t.Run("ALTER both tables, elligible for concurrenct", func(t *testing.T) { // ALTER TABLE is allowed to run concurrently when no other ALTER is busy with copy state. Our tables are tiny so we expect to find both migrations running t1uuid = testOnlineDDLStatement(t, createParams(trivialAlterT1Statement, ddlStrategy+" --allow-concurrent --postpone-completion", "vtgate", "", "", true)) // skip wait @@ -856,6 +863,89 @@ func testScheduler(t *testing.T) { onlineddl.CheckMigrationStatus(t, &vtParams, shards, t1uuid, schema.OnlineDDLStatusComplete) }) }) + // in-order-completion + t.Run("in-order-completion: multiple drops for nonexistent tables and views", func(t *testing.T) { + u, err := schema.CreateOnlineDDLUUID() + require.NoError(t, err) + + sqls := []string{ + fmt.Sprintf("drop table if exists t4_%s", u), + fmt.Sprintf("drop view if exists t1_%s", u), + fmt.Sprintf("drop table if exists t2_%s", u), + fmt.Sprintf("drop view if exists t3_%s", u), + } + sql := strings.Join(sqls, ";") + var vuuids []string + t.Run("drop multiple tables and views, in-order-completion", func(t *testing.T) { + uuidList := testOnlineDDLStatement(t, createParams(sql, ddlStrategy+" --allow-concurrent --in-order-completion", "vtctl", "", "", true)) // skip wait + vuuids = strings.Split(uuidList, "\n") + assert.Equal(t, 4, len(vuuids)) + for _, uuid := range vuuids { + status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, normalWaitTime, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed) + fmt.Printf("# Migration status (for debug purposes): <%s>\n", status) + onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete) + } + }) + require.Equal(t, 4, len(vuuids)) + for i := range vuuids { + if i > 0 { + testTableCompletionTimes(t, vuuids[i-1], vuuids[i]) + } + } + }) + t.Run("in-order-completion: two new views, one depends on the other", func(t *testing.T) { + u, err := schema.CreateOnlineDDLUUID() + require.NoError(t, err) + v2name := fmt.Sprintf("v2_%s", u) + createv2 := fmt.Sprintf("create view %s as select id from t1_test", v2name) + v1name := fmt.Sprintf("v1_%s", u) + createv1 := fmt.Sprintf("create view %s as select id from %s", v1name, v2name) + + sql := fmt.Sprintf("%s; %s;", createv2, createv1) + var vuuids []string + t.Run("create two views, expect both complete", func(t *testing.T) { + uuidList := testOnlineDDLStatement(t, createParams(sql, ddlStrategy+" --allow-concurrent --in-order-completion", "vtctl", "", "", true)) // skip wait + vuuids = strings.Split(uuidList, "\n") + assert.Equal(t, 2, len(vuuids)) + for _, uuid := range vuuids { + status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, normalWaitTime, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed) + fmt.Printf("# Migration status (for debug purposes): <%s>\n", status) + onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete) + } + }) + require.Equal(t, 2, len(vuuids)) + testTableCompletionTimes(t, vuuids[0], vuuids[1]) + }) + t.Run("in-order-completion: new table column, new view depends on said column", func(t *testing.T) { + // The VIEW creation can only succeed when the ALTER has completed and the table has the new column + t1uuid = testOnlineDDLStatement(t, createParams(alterExtraColumn, ddlStrategy+" --allow-concurrent --postpone-completion --in-order-completion", "vtctl", "", "", true)) // skip wait + v1uuid := testOnlineDDLStatement(t, createParams(createViewDependsOnExtraColumn, ddlStrategy+" --allow-concurrent --postpone-completion --in-order-completion", "vtctl", "", "", true)) // skip wait + + testAllowConcurrent(t, "t1", t1uuid, 1) + testAllowConcurrent(t, "v1", v1uuid, 1) + t.Run("expect table running, expect view ready", func(t *testing.T) { + onlineddl.WaitForMigrationStatus(t, &vtParams, shards, t1uuid, normalWaitTime, schema.OnlineDDLStatusRunning) + onlineddl.WaitForMigrationStatus(t, &vtParams, shards, v1uuid, normalWaitTime, schema.OnlineDDLStatusQueued, schema.OnlineDDLStatusReady) + time.Sleep(ensureStateNotChangedTime) + // nothing should change + onlineddl.WaitForMigrationStatus(t, &vtParams, shards, t1uuid, normalWaitTime, schema.OnlineDDLStatusRunning) + onlineddl.WaitForMigrationStatus(t, &vtParams, shards, v1uuid, normalWaitTime, schema.OnlineDDLStatusQueued, schema.OnlineDDLStatusReady) + }) + t.Run("complete both", func(t *testing.T) { + onlineddl.CheckCompleteAllMigrations(t, &vtParams, len(shards)*2) + }) + t.Run("expect table success", func(t *testing.T) { + status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, t1uuid, normalWaitTime, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed) + fmt.Printf("# Migration status (for debug purposes): <%s>\n", status) + onlineddl.CheckMigrationStatus(t, &vtParams, shards, t1uuid, schema.OnlineDDLStatusComplete) + }) + t.Run("expect view success", func(t *testing.T) { + status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, v1uuid, normalWaitTime, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed) + fmt.Printf("# Migration status (for debug purposes): <%s>\n", status) + onlineddl.CheckMigrationStatus(t, &vtParams, shards, v1uuid, schema.OnlineDDLStatusComplete) + }) + testTableCompletionTimes(t, t1uuid, v1uuid) + }) } func testSingleton(t *testing.T) { diff --git a/go/vt/schema/ddl_strategy.go b/go/vt/schema/ddl_strategy.go index 247a0c8d364..d56b8004ab8 100644 --- a/go/vt/schema/ddl_strategy.go +++ b/go/vt/schema/ddl_strategy.go @@ -35,6 +35,7 @@ const ( allowZeroInDateFlag = "allow-zero-in-date" postponeLaunchFlag = "postpone-launch" postponeCompletionFlag = "postpone-completion" + inOrderCompletionFlag = "in-order-completion" allowConcurrentFlag = "allow-concurrent" preferInstantDDL = "prefer-instant-ddl" fastRangeRotationFlag = "fast-range-rotation" @@ -156,6 +157,11 @@ func (setting *DDLStrategySetting) IsPostponeCompletion() bool { return setting.hasFlag(postponeCompletionFlag) } +// IsInOrderCompletion checks if strategy options include --in-order-completion +func (setting *DDLStrategySetting) IsInOrderCompletion() bool { + return setting.hasFlag(inOrderCompletionFlag) +} + // IsAllowConcurrent checks if strategy options include --allow-concurrent func (setting *DDLStrategySetting) IsAllowConcurrent() bool { return setting.hasFlag(allowConcurrentFlag) @@ -194,6 +200,7 @@ func (setting *DDLStrategySetting) RuntimeOptions() []string { case isFlag(opt, allowZeroInDateFlag): case isFlag(opt, postponeLaunchFlag): case isFlag(opt, postponeCompletionFlag): + case isFlag(opt, inOrderCompletionFlag): case isFlag(opt, allowConcurrentFlag): case isFlag(opt, preferInstantDDL): case isFlag(opt, fastRangeRotationFlag): diff --git a/go/vt/schema/ddl_strategy_test.go b/go/vt/schema/ddl_strategy_test.go index 7aab517d846..610cb8b9ed3 100644 --- a/go/vt/schema/ddl_strategy_test.go +++ b/go/vt/schema/ddl_strategy_test.go @@ -47,6 +47,7 @@ func TestParseDDLStrategy(t *testing.T) { isSingleton bool isPostponeLaunch bool isPostponeCompletion bool + isInOrderCompletion bool isAllowConcurrent bool fastOverRevertible bool fastRangeRotation bool @@ -123,6 +124,13 @@ func TestParseDDLStrategy(t *testing.T) { runtimeOptions: "", isPostponeCompletion: true, }, + { + strategyVariable: "online --in-order-completion", + strategy: DDLStrategyOnline, + options: "--in-order-completion", + runtimeOptions: "", + isInOrderCompletion: true, + }, { strategyVariable: "online -allow-concurrent", strategy: DDLStrategyOnline, diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 483aebb9ec1..54a38899ece 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -3267,22 +3267,36 @@ func (e *Executor) runNextMigration(ctx context.Context) error { // - a migration is 'ready' but is not set to run _concurrently_, and there's a running migration that is also non-concurrent // - a migration is 'ready' but there's another migration 'running' on the exact same table getNonConflictingMigration := func() (*schema.OnlineDDL, error) { + pendingMigrationsUUIDs, err := e.readPendingMigrationsUUIDs(ctx) + if err != nil { + return nil, err + } r, err := e.execQuery(ctx, sqlSelectReadyMigrations) if err != nil { return nil, err } for _, row := range r.Named().Rows { uuid := row["migration_uuid"].ToString() - onlineDDL, _, err := e.readMigration(ctx, uuid) + onlineDDL, migrationRow, err := e.readMigration(ctx, uuid) if err != nil { return nil, err } - if conflictFound, _ := e.isAnyConflictingMigrationRunning(onlineDDL); !conflictFound { - if e.countOwnedRunningMigrations() < maxConcurrentOnlineDDLs { - // This migration seems good to go - return onlineDDL, err + isImmediateOperation := migrationRow.AsBool("is_immediate_operation", false) + + if conflictFound, _ := e.isAnyConflictingMigrationRunning(onlineDDL); conflictFound { + continue // this migration conflicts with a running one + } + if e.countOwnedRunningMigrations() >= maxConcurrentOnlineDDLs { + continue // too many running migrations + } + if isImmediateOperation && onlineDDL.StrategySetting().IsInOrderCompletion() { + // This migration is immediate: if we run it now, it will complete within a second or two at most. + if len(pendingMigrationsUUIDs) > 0 && pendingMigrationsUUIDs[0] != onlineDDL.UUID { + continue } } + // This migration seems good to go + return onlineDDL, err } // no non-conflicting migration found... // Either all ready migrations are conflicting, or there are no ready migrations... @@ -3518,6 +3532,10 @@ func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning i if err != nil { return countRunnning, cancellable, err } + pendingMigrationsUUIDs, err := e.readPendingMigrationsUUIDs(ctx) + if err != nil { + return countRunnning, cancellable, err + } uuidsFoundRunning := map[string]bool{} for _, row := range r.Named().Rows { uuid := row["migration_uuid"].ToString() @@ -3606,6 +3624,12 @@ func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning i // override. Even if migration is ready, we do not complete it. isReady = false } + if isReady && onlineDDL.StrategySetting().IsInOrderCompletion() { + if len(pendingMigrationsUUIDs) > 0 && pendingMigrationsUUIDs[0] != onlineDDL.UUID { + // wait for earlier pending migrations to complete + isReady = false + } + } if isReady { if err := e.cutOverVReplMigration(ctx, s); err != nil { _ = e.updateMigrationMessage(ctx, uuid, err.Error()) @@ -3676,12 +3700,8 @@ func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning i } { // now, let's look at UUIDs we own and _think_ should be running, and see which of tham _isn't_ actually running or pending... - pendingUUIDS, err := e.readPendingMigrationsUUIDs(ctx) - if err != nil { - return countRunnning, cancellable, err - } uuidsFoundPending := map[string]bool{} - for _, uuid := range pendingUUIDS { + for _, uuid := range pendingMigrationsUUIDs { uuidsFoundPending[uuid] = true } diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 92c72bc94de..33ae3ab48df 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -85,6 +85,7 @@ const ( alterSchemaMigrationsCutoverAttempts = "ALTER TABLE _vt.schema_migrations add column cutover_attempts int unsigned NOT NULL DEFAULT 0" alterSchemaMigrationsTableImmediateOperation = "ALTER TABLE _vt.schema_migrations add column is_immediate_operation tinyint unsigned NOT NULL DEFAULT 0" alterSchemaMigrationsReviewedTimestamp = "ALTER TABLE _vt.schema_migrations add column reviewed_timestamp timestamp NULL DEFAULT NULL" + alterSchemaMigrationsCompletedTimestampResolution = "ALTER TABLE _vt.schema_migrations modify completed_timestamp timestamp(6) NULL DEFAULT NULL" sqlInsertMigration = `INSERT IGNORE INTO _vt.schema_migrations ( migration_uuid, @@ -107,7 +108,7 @@ const ( reverted_uuid, is_view ) VALUES ( - %a, %a, %a, %a, %a, %a, %a, %a, %a, NOW(), %a, %a, %a, %a, %a, %a, %a, %a, %a + %a, %a, %a, %a, %a, %a, %a, %a, %a, NOW(6), %a, %a, %a, %a, %a, %a, %a, %a, %a )` sqlSelectQueuedMigrations = `SELECT @@ -122,6 +123,7 @@ const ( WHERE migration_status='queued' AND reviewed_timestamp IS NOT NULL + ORDER BY id ` sqlUpdateMySQLTable = `UPDATE _vt.schema_migrations SET mysql_table=%a @@ -179,13 +181,13 @@ const ( migration_uuid=%a ` sqlUpdateMigrationStartedTimestamp = `UPDATE _vt.schema_migrations SET - started_timestamp =IFNULL(started_timestamp, NOW()), - liveness_timestamp=IFNULL(liveness_timestamp, NOW()) + started_timestamp =IFNULL(started_timestamp, NOW(6)), + liveness_timestamp=IFNULL(liveness_timestamp, NOW(6)) WHERE migration_uuid=%a ` sqlUpdateMigrationTimestamp = `UPDATE _vt.schema_migrations - SET %s=NOW() + SET %s=NOW(6) WHERE migration_uuid=%a ` @@ -390,6 +392,7 @@ const ( FROM _vt.schema_migrations WHERE migration_status IN ('queued', 'ready', 'running') + ORDER BY id ` sqlSelectQueuedUnreviewedMigrations = `SELECT migration_uuid @@ -397,6 +400,7 @@ const ( WHERE migration_status='queued' AND reviewed_timestamp IS NULL + ORDER BY id ` sqlSelectUncollectedArtifacts = `SELECT migration_uuid, @@ -413,7 +417,7 @@ const ( ` sqlFixCompletedTimestamp = `UPDATE _vt.schema_migrations SET - completed_timestamp=NOW() + completed_timestamp=NOW(6) WHERE migration_status='failed' AND cleanup_timestamp IS NULL @@ -456,7 +460,9 @@ const ( cancelled_timestamp, component_throttled, postpone_launch, - postpone_completion + postpone_completion, + is_immediate_operation, + reviewed_timestamp FROM _vt.schema_migrations WHERE migration_uuid=%a @@ -681,4 +687,5 @@ var ApplyDDL = []string{ alterSchemaMigrationsCutoverAttempts, alterSchemaMigrationsTableImmediateOperation, alterSchemaMigrationsReviewedTimestamp, + alterSchemaMigrationsCompletedTimestampResolution, }