diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index 80c3d3353ff2..13f15d7ca395 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -170,4 +170,4 @@ trace.debug.enable boolean false if set, traces for recent requests can be seen trace.jaeger.agent string the address of a Jaeger agent to receive traces using the Jaeger UDP Thrift protocol, as :. If no port is specified, 6381 will be used. trace.opentelemetry.collector string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as :. If no port is specified, 4317 will be used. trace.zipkin.collector string the address of a Zipkin instance to receive traces, as :. If no port is specified, 9411 will be used. -version version 21.2-36 set the active cluster version in the format '.' +version version 21.2-42 set the active cluster version in the format '.' diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index c000c4280f56..a670872ebe17 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -177,6 +177,6 @@ trace.jaeger.agentstringthe address of a Jaeger agent to receive traces using the Jaeger UDP Thrift protocol, as :. If no port is specified, 6381 will be used. trace.opentelemetry.collectorstringaddress of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as :. If no port is specified, 4317 will be used. trace.zipkin.collectorstringthe address of a Zipkin instance to receive traces, as :. If no port is specified, 9411 will be used. -versionversion21.2-36set the active cluster version in the format '.' +versionversion21.2-42set the active cluster version in the format '.' diff --git a/pkg/base/test_server_args.go b/pkg/base/test_server_args.go index 7fad3f90d74a..8cebd7438b99 100644 --- a/pkg/base/test_server_args.go +++ b/pkg/base/test_server_args.go @@ -139,9 +139,14 @@ type TestServerArgs struct { // If set, a TraceDir is initialized at the provided path. TraceDir string - // If set, the span configs infrastructure will be enabled. This is - // equivalent to setting COCKROACH_EXPERIMENTAL_SPAN_CONFIGS. - EnableSpanConfigs bool + // DisableSpanConfigs disables the use of the span configs infrastructure + // (in favor of the gossiped system config span). It's equivalent to setting + // COCKROACH_DISABLE_SPAN_CONFIGS, and is only intended for tests written + // with the system config span in mind. + // + // TODO(irfansharif): Remove all uses of this when we rip out the system + // config span. + DisableSpanConfigs bool } // TestClusterArgs contains the parameters one can set when creating a test diff --git a/pkg/ccl/backupccl/BUILD.bazel b/pkg/ccl/backupccl/BUILD.bazel index 511c1af1dda9..c606729ffe13 100644 --- a/pkg/ccl/backupccl/BUILD.bazel +++ b/pkg/ccl/backupccl/BUILD.bazel @@ -199,6 +199,7 @@ go_test( "//pkg/security/securitytest", "//pkg/server", "//pkg/settings/cluster", + "//pkg/spanconfig", "//pkg/sql", "//pkg/sql/catalog", "//pkg/sql/catalog/catalogkeys", diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go index 17ab58ef81c8..3b63be137c98 100644 --- a/pkg/ccl/backupccl/backup_test.go +++ b/pkg/ccl/backupccl/backup_test.go @@ -1933,7 +1933,12 @@ func TestBackupRestoreControlJob(t *testing.T) { // force every call to update defer jobs.TestingSetProgressThresholds()() - serverArgs := base.TestServerArgs{Knobs: base.TestingKnobs{JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals()}} + serverArgs := base.TestServerArgs{ + DisableSpanConfigs: true, + Knobs: base.TestingKnobs{ + JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals(), + }, + } // Disable external processing of mutations so that the final check of // crdb_internal.tables is guaranteed to not be cleaned up. Although this // was never observed by a stress test, it is here for safety. diff --git a/pkg/ccl/backupccl/full_cluster_backup_restore_test.go b/pkg/ccl/backupccl/full_cluster_backup_restore_test.go index b3429b5ec9f3..5791a2b1be58 100644 --- a/pkg/ccl/backupccl/full_cluster_backup_restore_test.go +++ b/pkg/ccl/backupccl/full_cluster_backup_restore_test.go @@ -23,6 +23,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/security" + "github.com/cockroachdb/cockroach/pkg/spanconfig" "github.com/cockroachdb/cockroach/pkg/sql" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv" "github.com/cockroachdb/cockroach/pkg/sql/catalog/systemschema" @@ -45,9 +46,20 @@ func TestFullClusterBackup(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) + params := base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{ + Knobs: base.TestingKnobs{ + SpanConfig: &spanconfig.TestingKnobs{ + // We compare job progress before and after a restore. Disable + // the automatic jobs checkpointing which could possibly mutate + // the progress data during the backup/restore process. + JobDisablePersistingCheckpoints: true, + }, + }, + }} const numAccounts = 10 - tcBackup, sqlDB, tempDir, cleanupFn := BackupRestoreTestSetup(t, singleNode, numAccounts, InitManualReplication) - tcRestore, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, InitManualReplication, base.TestClusterArgs{}) + tcBackup, sqlDB, tempDir, cleanupFn := backupRestoreTestSetupWithParams(t, singleNode, numAccounts, InitManualReplication, params) + tcRestore, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, InitManualReplication, params) defer cleanupFn() defer cleanupEmptyCluster() diff --git a/pkg/ccl/importccl/exportcsv_test.go b/pkg/ccl/importccl/exportcsv_test.go index 6809ec2c525b..6ecd664a2564 100644 --- a/pkg/ccl/importccl/exportcsv_test.go +++ b/pkg/ccl/importccl/exportcsv_test.go @@ -50,7 +50,13 @@ func setupExportableBank(t *testing.T, nodes, rows int) (*sqlutils.SQLRunner, st dir, cleanupDir := testutils.TempDir(t) tc := testcluster.StartTestCluster(t, nodes, - base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: dir, UseDatabase: "test"}}, + base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{ + ExternalIODir: dir, + UseDatabase: "test", + DisableSpanConfigs: true, + }, + }, ) conn := tc.Conns[0] db := sqlutils.MakeSQLRunner(conn) diff --git a/pkg/ccl/migrationccl/migrationsccl/seed_tenant_span_configs_external_test.go b/pkg/ccl/migrationccl/migrationsccl/seed_tenant_span_configs_external_test.go index 1e38a6e3ceb7..1823731faec1 100644 --- a/pkg/ccl/migrationccl/migrationsccl/seed_tenant_span_configs_external_test.go +++ b/pkg/ccl/migrationccl/migrationsccl/seed_tenant_span_configs_external_test.go @@ -36,7 +36,6 @@ func TestPreSeedSpanConfigsWrittenWhenActive(t *testing.T) { ctx := context.Background() tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ - EnableSpanConfigs: true, // we use spanconfig.KVAccessor to check if its contents are as we'd expect Knobs: base.TestingKnobs{ Server: &server.TestingKnobs{ DisableAutomaticVersionUpgrade: 1, @@ -89,7 +88,6 @@ func TestSeedTenantSpanConfigs(t *testing.T) { ctx := context.Background() tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ - EnableSpanConfigs: true, // we use spanconfig.KVAccessor to check if its contents are as we'd expect Knobs: base.TestingKnobs{ Server: &server.TestingKnobs{ DisableAutomaticVersionUpgrade: 1, @@ -157,7 +155,6 @@ func TestSeedTenantSpanConfigsWithExistingEntry(t *testing.T) { ctx := context.Background() tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ - EnableSpanConfigs: true, // we use spanconfig.KVAccessor to check if its contents are as we'd expect Knobs: base.TestingKnobs{ Server: &server.TestingKnobs{ DisableAutomaticVersionUpgrade: 1, diff --git a/pkg/ccl/serverccl/statusccl/BUILD.bazel b/pkg/ccl/serverccl/statusccl/BUILD.bazel index ffc142050c38..afc78d0bb431 100644 --- a/pkg/ccl/serverccl/statusccl/BUILD.bazel +++ b/pkg/ccl/serverccl/statusccl/BUILD.bazel @@ -40,6 +40,7 @@ go_test( "//pkg/security/securitytest", "//pkg/server", "//pkg/server/serverpb", + "//pkg/spanconfig", "//pkg/sql/catalog/catconstants", "//pkg/sql/catalog/descpb", "//pkg/sql/sem/tree", diff --git a/pkg/ccl/serverccl/statusccl/tenant_status_test.go b/pkg/ccl/serverccl/statusccl/tenant_status_test.go index 1d07a26c0b71..9eb14676c565 100644 --- a/pkg/ccl/serverccl/statusccl/tenant_status_test.go +++ b/pkg/ccl/serverccl/statusccl/tenant_status_test.go @@ -25,6 +25,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/security" "github.com/cockroachdb/cockroach/pkg/server/serverpb" + "github.com/cockroachdb/cockroach/pkg/spanconfig" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catconstants" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" @@ -91,6 +92,9 @@ func TestTenantCannotSeeNonTenantStats(t *testing.T) { ctx := context.Background() serverParams, _ := tests.CreateTestServerParams() + serverParams.Knobs.SpanConfig = &spanconfig.TestingKnobs{ + ManagerDisableJobCreation: true, // TODO(irfansharif): #74919. + } testCluster := serverutils.StartNewTestCluster(t, 3 /* numNodes */, base.TestClusterArgs{ ServerArgs: serverParams, }) diff --git a/pkg/ccl/spanconfigccl/spanconfigcomparedccl/datadriven_test.go b/pkg/ccl/spanconfigccl/spanconfigcomparedccl/datadriven_test.go index 305f1f3b40fe..f05e6b4c7bbb 100644 --- a/pkg/ccl/spanconfigccl/spanconfigcomparedccl/datadriven_test.go +++ b/pkg/ccl/spanconfigccl/spanconfigcomparedccl/datadriven_test.go @@ -95,7 +95,6 @@ func TestDataDriven(t *testing.T) { } tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ - EnableSpanConfigs: true, Knobs: base.TestingKnobs{ JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals(), // speeds up test SpanConfig: scKnobs, diff --git a/pkg/ccl/spanconfigccl/spanconfigreconcilerccl/datadriven_test.go b/pkg/ccl/spanconfigccl/spanconfigreconcilerccl/datadriven_test.go index 90bed1d76335..10632465338d 100644 --- a/pkg/ccl/spanconfigccl/spanconfigreconcilerccl/datadriven_test.go +++ b/pkg/ccl/spanconfigccl/spanconfigreconcilerccl/datadriven_test.go @@ -88,7 +88,6 @@ func TestDataDriven(t *testing.T) { } tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ - EnableSpanConfigs: true, Knobs: base.TestingKnobs{ JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals(), // speeds up test SpanConfig: scKnobs, diff --git a/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/datadriven_test.go b/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/datadriven_test.go index 4ac3898895e9..705f8bf9affa 100644 --- a/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/datadriven_test.go +++ b/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/datadriven_test.go @@ -80,7 +80,6 @@ func TestDataDriven(t *testing.T) { datadriven.Walk(t, testutils.TestDataPath(t), func(t *testing.T, path string) { tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ - EnableSpanConfigs: true, Knobs: base.TestingKnobs{ SpanConfig: scKnobs, }, diff --git a/pkg/cli/testdata/doctor/test_examine_cluster b/pkg/cli/testdata/doctor/test_examine_cluster index 193f57256243..9acea0492aed 100644 --- a/pkg/cli/testdata/doctor/test_examine_cluster +++ b/pkg/cli/testdata/doctor/test_examine_cluster @@ -3,5 +3,5 @@ debug doctor examine cluster debug doctor examine cluster Examining 43 descriptors and 42 namespace entries... ParentID 50, ParentSchemaID 51: relation "foo" (55): expected matching namespace entry, found none -Examining 3 jobs... +Examining 4 jobs... ERROR: validation failed diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index f80353e7e6b8..91d7647ccf7b 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -229,6 +229,16 @@ const ( // system.protected_ts_records table that describes what is protected by the // record. AlterSystemProtectedTimestampAddColumn + // EnsureSpanConfigReconciliation ensures that the host tenant has run its + // reconciliation process at least once. + EnsureSpanConfigReconciliation + // EnsureSpanConfigSubscription ensures that all KV nodes are subscribed to + // the global span configuration state, observing the entries installed as + // in EnsureSpanConfigReconciliation. + EnsureSpanConfigSubscription + // EnableSpanConfigStore enables the use of the span configs infrastructure + // in KV. + EnableSpanConfigStore // ************************************************* // Step (1): Add new versions here. @@ -337,6 +347,18 @@ var versionsSingleton = keyedVersions{ Key: AlterSystemProtectedTimestampAddColumn, Version: roachpb.Version{Major: 21, Minor: 2, Internal: 36}, }, + { + Key: EnsureSpanConfigReconciliation, + Version: roachpb.Version{Major: 21, Minor: 2, Internal: 38}, + }, + { + Key: EnsureSpanConfigSubscription, + Version: roachpb.Version{Major: 21, Minor: 2, Internal: 40}, + }, + { + Key: EnableSpanConfigStore, + Version: roachpb.Version{Major: 21, Minor: 2, Internal: 42}, + }, // ************************************************* // Step (2): Add new versions here. diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index abd04ad7fb6d..881b8600e993 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -27,11 +27,14 @@ func _() { _ = x[SeedTenantSpanConfigs-16] _ = x[PublicSchemasWithDescriptors-17] _ = x[AlterSystemProtectedTimestampAddColumn-18] + _ = x[EnsureSpanConfigReconciliation-19] + _ = x[EnsureSpanConfigSubscription-20] + _ = x[EnableSpanConfigStore-21] } -const _Key_name = "V21_2Start22_1TargetBytesAvoidExcessAvoidDrainingNamesDrainingNamesMigrationTraceIDDoesntImplyStructuredRecordingAlterSystemTableStatisticsAddAvgSizeColAlterSystemStmtDiagReqsMVCCAddSSTableInsertPublicSchemaNamespaceEntryOnRestoreUnsplitRangesInAsyncGCJobsValidateGrantOptionPebbleFormatBlockPropertyCollectorProbeRequestSelectRPCsTakeTracingInfoInbandPreSeedTenantSpanConfigsSeedTenantSpanConfigsPublicSchemasWithDescriptorsAlterSystemProtectedTimestampAddColumn" +const _Key_name = "V21_2Start22_1TargetBytesAvoidExcessAvoidDrainingNamesDrainingNamesMigrationTraceIDDoesntImplyStructuredRecordingAlterSystemTableStatisticsAddAvgSizeColAlterSystemStmtDiagReqsMVCCAddSSTableInsertPublicSchemaNamespaceEntryOnRestoreUnsplitRangesInAsyncGCJobsValidateGrantOptionPebbleFormatBlockPropertyCollectorProbeRequestSelectRPCsTakeTracingInfoInbandPreSeedTenantSpanConfigsSeedTenantSpanConfigsPublicSchemasWithDescriptorsAlterSystemProtectedTimestampAddColumnEnsureSpanConfigReconciliationEnsureSpanConfigSubscriptionEnableSpanConfigStore" -var _Key_index = [...]uint16{0, 5, 14, 36, 54, 76, 113, 152, 175, 189, 230, 256, 275, 309, 321, 352, 376, 397, 425, 463} +var _Key_index = [...]uint16{0, 5, 14, 36, 54, 76, 113, 152, 175, 189, 230, 256, 275, 309, 321, 352, 376, 397, 425, 463, 493, 521, 542} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { diff --git a/pkg/jobs/jobspb/jobs.proto b/pkg/jobs/jobspb/jobs.proto index 2c981b53bfd8..9c7d440093dd 100644 --- a/pkg/jobs/jobspb/jobs.proto +++ b/pkg/jobs/jobspb/jobs.proto @@ -533,6 +533,7 @@ message AutoSpanConfigReconciliationDetails { // AutoSpanConfigReconciliationProgress is the persisted progress for the span // config reconciliation job. message AutoSpanConfigReconciliationProgress { + util.hlc.Timestamp checkpoint = 1 [(gogoproto.nullable) = false]; } message ResumeSpanList { diff --git a/pkg/keys/constants.go b/pkg/keys/constants.go index ae537cc96196..b9605c9c18dc 100644 --- a/pkg/keys/constants.go +++ b/pkg/keys/constants.go @@ -287,6 +287,7 @@ var ( // ScratchRangeMin is a key used in tests to write arbitrary data without // overlapping with meta, system or tenant ranges. ScratchRangeMin = TableDataMax + ScratchRangeMax = TenantPrefix // // SystemConfigSplitKey is the key to split at immediately prior to the // system config span. NB: Split keys need to be valid column keys. diff --git a/pkg/kv/kvserver/client_raft_test.go b/pkg/kv/kvserver/client_raft_test.go index f4055d06dbc4..ca1e980c409b 100644 --- a/pkg/kv/kvserver/client_raft_test.go +++ b/pkg/kv/kvserver/client_raft_test.go @@ -38,6 +38,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/server" "github.com/cockroachdb/cockroach/pkg/server/serverpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/spanconfig" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/skip" @@ -3134,6 +3135,13 @@ func TestDecommission(t *testing.T) { ctx := context.Background() tc := testcluster.StartTestCluster(t, 5, base.TestClusterArgs{ ReplicationMode: base.ReplicationAuto, + ServerArgs: base.TestServerArgs{ + Knobs: base.TestingKnobs{ + SpanConfig: &spanconfig.TestingKnobs{ + ConfigureScratchRange: true, + }, + }, + }, }) defer tc.Stopper().Stop(ctx) diff --git a/pkg/kv/kvserver/client_replica_test.go b/pkg/kv/kvserver/client_replica_test.go index f83d0201d0da..e22daa3a04dc 100644 --- a/pkg/kv/kvserver/client_replica_test.go +++ b/pkg/kv/kvserver/client_replica_test.go @@ -2073,6 +2073,9 @@ func TestSystemZoneConfigs(t *testing.T) { DisableLoadBasedSplitting: true, }, }, + // This test was written for the gossip-backed SystemConfigSpan + // infrastructure. + DisableSpanConfigs: true, // Scan like a bat out of hell to ensure replication and replica GC // happen in a timely manner. ScanInterval: 50 * time.Millisecond, diff --git a/pkg/kv/kvserver/client_spanconfigs_test.go b/pkg/kv/kvserver/client_spanconfigs_test.go index ce06c66cb436..7d7a91adbbac 100644 --- a/pkg/kv/kvserver/client_spanconfigs_test.go +++ b/pkg/kv/kvserver/client_spanconfigs_test.go @@ -43,7 +43,6 @@ func TestSpanConfigUpdateAppliedToReplica(t *testing.T) { ctx := context.Background() args := base.TestServerArgs{ - EnableSpanConfigs: true, Knobs: base.TestingKnobs{ Store: &kvserver.StoreTestingKnobs{ DisableMergeQueue: true, diff --git a/pkg/kv/kvserver/client_split_test.go b/pkg/kv/kvserver/client_split_test.go index bd81cdb34f55..7f94dd73b1e0 100644 --- a/pkg/kv/kvserver/client_split_test.go +++ b/pkg/kv/kvserver/client_split_test.go @@ -1014,6 +1014,8 @@ func TestStoreZoneUpdateAndRangeSplit(t *testing.T) { Knobs: base.TestingKnobs{ Store: &kvserver.StoreTestingKnobs{ DisableMergeQueue: true, + // This test was written with the SystemConfigSpan in mind. + UseSystemConfigSpanForQueues: true, }, }, }) @@ -1083,6 +1085,8 @@ func TestStoreRangeSplitWithMaxBytesUpdate(t *testing.T) { Knobs: base.TestingKnobs{ Store: &kvserver.StoreTestingKnobs{ DisableMergeQueue: true, + // This test was written with the system config span in mind. + UseSystemConfigSpanForQueues: true, }, }, }) @@ -1308,7 +1312,10 @@ func TestStoreRangeSystemSplits(t *testing.T) { // Intentionally leave the merge queue enabled. This indirectly tests that the // merge queue respects these split points. ctx := context.Background() - s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) + s, _, _ := serverutils.StartServer(t, base.TestServerArgs{ + // This test was written with the system config span in mind. + DisableSpanConfigs: true, + }) defer s.Stopper().Stop(ctx) userTableMax := keys.TestingUserDescID(4) @@ -3557,8 +3564,10 @@ func TestStoreRangeSplitAndMergeWithGlobalReads(t *testing.T) { serv, _, _ := serverutils.StartServer(t, base.TestServerArgs{ Knobs: base.TestingKnobs{ Store: &kvserver.StoreTestingKnobs{ - DisableMergeQueue: true, - TestingResponseFilter: respFilter, + DisableMergeQueue: true, + // This test was written with the system config span in mind. + UseSystemConfigSpanForQueues: true, + TestingResponseFilter: respFilter, }, }, }) diff --git a/pkg/kv/kvserver/replicate_queue_test.go b/pkg/kv/kvserver/replicate_queue_test.go index b8c1abbafa91..c9ea04bff06c 100644 --- a/pkg/kv/kvserver/replicate_queue_test.go +++ b/pkg/kv/kvserver/replicate_queue_test.go @@ -33,6 +33,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/server" + "github.com/cockroachdb/cockroach/pkg/spanconfig" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" @@ -414,7 +415,16 @@ func TestReplicateQueueDecommissioningNonVoters(t *testing.T) { // Setup a scratch range on a test cluster with 2 non-voters and 1 voter. setupFn := func(t *testing.T) (*testcluster.TestCluster, roachpb.RangeDescriptor) { tc := testcluster.StartTestCluster(t, 5, - base.TestClusterArgs{ReplicationMode: base.ReplicationAuto}, + base.TestClusterArgs{ + ReplicationMode: base.ReplicationAuto, + ServerArgs: base.TestServerArgs{ + Knobs: base.TestingKnobs{ + SpanConfig: &spanconfig.TestingKnobs{ + ConfigureScratchRange: true, + }, + }, + }, + }, ) scratchKey := tc.ScratchRange(t) @@ -529,6 +539,9 @@ func TestReplicateQueueDeadNonVoters(t *testing.T) { ReplicationMode: base.ReplicationAuto, ServerArgs: base.TestServerArgs{ Knobs: base.TestingKnobs{ + SpanConfig: &spanconfig.TestingKnobs{ + ConfigureScratchRange: true, + }, NodeLiveness: kvserver.NodeLivenessTestingKnobs{ StorePoolNodeLivenessFn: func( id roachpb.NodeID, now time.Time, duration time.Duration, @@ -678,6 +691,11 @@ func TestReplicateQueueSwapVotersWithNonVoters(t *testing.T) { }, }, }, + Knobs: base.TestingKnobs{ + SpanConfig: &spanconfig.TestingKnobs{ + ConfigureScratchRange: true, + }, + }, } } clusterArgs := base.TestClusterArgs{ diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index f2e91f9689fd..893094aaa550 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -1049,12 +1049,12 @@ type StoreConfig struct { // tests. KVMemoryMonitor *mon.BytesMonitor - // SpanConfigsEnabled determines whether we're able to use the span configs - // infrastructure. - SpanConfigsEnabled bool + // SpanConfigsDisabled determines whether we're able to use the span configs + // infrastructure or not. + SpanConfigsDisabled bool // Used to subscribe to span configuration changes, keeping up-to-date a // data structure useful for retrieving span configs. Only available if - // SpanConfigsEnabled. + // SpanConfigsDisabled is unset. SpanConfigSubscriber spanconfig.KVSubscriber // KVAdmissionController is an optional field used for admission control. @@ -1945,7 +1945,7 @@ func (s *Store) Start(ctx context.Context, stopper *stop.Stopper) error { }) } - if s.cfg.SpanConfigsEnabled { + if !s.cfg.SpanConfigsDisabled { s.cfg.SpanConfigSubscriber.Subscribe(func(update roachpb.Span) { s.onSpanConfigUpdate(ctx, update) }) @@ -2107,11 +2107,37 @@ func (s *Store) GetConfReader(ctx context.Context) (spanconfig.StoreReader, erro return nil, errSysCfgUnavailable } - if s.cfg.SpanConfigsEnabled && spanconfigstore.EnabledSetting.Get(&s.ClusterSettings().SV) { - return s.cfg.SpanConfigSubscriber, nil + // We need a version gate here before switching over to the span configs + // infrastructure. In a mixed-version cluster we need to wait for + // the host tenant to have fully populated `system.span_configurations` + // (read: reconciled) at least once before using it as a view for all + // split/config decisions. + _ = clusterversion.EnsureSpanConfigReconciliation + // + // We also want to ensure that the KVSubscriber on each store is at least as + // up-to-date as some full reconciliation timestamp. + _ = clusterversion.EnsureSpanConfigSubscription + // + // Without a version gate, it would be possible for a replica on a + // new-binary-server to apply the static fallback config (assuming no + // entries in `system.span_configurations`), in violation of explicit + // configs directly set by the user. Though unlikely, it's also possible for + // us to merge all ranges into a single one -- with no entries in + // system.span_configurations, the infrastructure can erroneously conclude + // that there are zero split points. + // + // We achieve all this through a three-step migration process, culminating + // in the following cluster version gate: + _ = clusterversion.EnableSpanConfigStore + + if s.cfg.SpanConfigsDisabled || + !spanconfigstore.EnabledSetting.Get(&s.ClusterSettings().SV) || + !s.cfg.Settings.Version.IsActive(ctx, clusterversion.EnableSpanConfigStore) || + s.TestingKnobs().UseSystemConfigSpanForQueues { + return sysCfg, nil } - return sysCfg, nil + return s.cfg.SpanConfigSubscriber, nil } // startLeaseRenewer runs an infinite loop in a goroutine which regularly @@ -3346,6 +3372,25 @@ func (s *Store) PurgeOutdatedReplicas(ctx context.Context, version roachpb.Versi return g.Wait() } +// WaitForSpanConfigSubscription waits until the store is wholly subscribed to +// the global span configurations state. +func (s *Store) WaitForSpanConfigSubscription(ctx context.Context) error { + if s.cfg.SpanConfigsDisabled { + return nil // nothing to do here + } + + for r := retry.StartWithCtx(ctx, base.DefaultRetryOptions()); r.Next(); { + if !s.cfg.SpanConfigSubscriber.LastUpdated().IsEmpty() { + return nil + } + + log.Warningf(ctx, "waiting for span config subscription...") + continue + } + + return errors.Newf("unable to subscribe to span configs") +} + // registerLeaseholder registers the provided replica as a leaseholder in the // node's closed timestamp side transport. func (s *Store) registerLeaseholder( diff --git a/pkg/kv/kvserver/store_test.go b/pkg/kv/kvserver/store_test.go index d6025ea6d3ff..2760897ea23e 100644 --- a/pkg/kv/kvserver/store_test.go +++ b/pkg/kv/kvserver/store_test.go @@ -189,6 +189,12 @@ func createTestStoreWithoutStart( // and merge queues separately to cover event-driven splits and merges. cfg.TestingKnobs.DisableSplitQueue = true cfg.TestingKnobs.DisableMergeQueue = true + // When using the span configs infrastructure, we initialize dependencies + // (spanconfig.KVSubscriber) outside of pkg/kv/kvserver due to circular + // dependency reasons. Tests using this harness can probably be refactored + // to do the same (with some effort). That's unlikely to happen soon, so + // let's continue to use the system config span. + cfg.SpanConfigsDisabled = true eng := storage.NewDefaultInMemForTesting() stopper.AddCloser(eng) require.Nil(t, cfg.Transport) diff --git a/pkg/kv/kvserver/testing_knobs.go b/pkg/kv/kvserver/testing_knobs.go index 8e21061a3373..9c693b93f637 100644 --- a/pkg/kv/kvserver/testing_knobs.go +++ b/pkg/kv/kvserver/testing_knobs.go @@ -371,6 +371,10 @@ type StoreTestingKnobs struct { // MakeSystemConfigSpanUnavailableToQueues makes the system config span // unavailable to queues that ask for it. MakeSystemConfigSpanUnavailableToQueues bool + // UseSystemConfigSpanForQueues uses the system config span infrastructure + // for internal queues (as opposed to the span configs infrastructure). This + // is used only for (old) tests written with the system config span in mind. + UseSystemConfigSpanForQueues bool } // ModuleTestingKnobs is part of the base.ModuleTestingKnobs interface. diff --git a/pkg/migration/migrations/BUILD.bazel b/pkg/migration/migrations/BUILD.bazel index e9911386e6a4..8d78e75d2c22 100644 --- a/pkg/migration/migrations/BUILD.bazel +++ b/pkg/migration/migrations/BUILD.bazel @@ -8,6 +8,7 @@ go_library( "alter_table_statistics_avg_size.go", "ensure_no_draining_names.go", "insert_missing_public_schema_namespace_entry.go", + "migrate_span_configs.go", "migrations.go", "public_schema_migration.go", "schema_changes.go", @@ -24,6 +25,7 @@ go_library( "//pkg/migration", "//pkg/roachpb:with-mocks", "//pkg/security", + "//pkg/server/serverpb", "//pkg/sql", "//pkg/sql/catalog", "//pkg/sql/catalog/catalogkeys", @@ -56,6 +58,7 @@ go_test( "ensure_no_draining_names_external_test.go", "helpers_test.go", "main_test.go", + "migrate_span_configs_test.go", "public_schema_migration_external_test.go", ], data = glob(["testdata/**"]), @@ -67,10 +70,12 @@ go_test( "//pkg/keys", "//pkg/kv", "//pkg/kv/kvserver", + "//pkg/roachpb:with-mocks", "//pkg/security", "//pkg/security/securitytest", "//pkg/server", "//pkg/settings/cluster", + "//pkg/spanconfig", "//pkg/sql", "//pkg/sql/catalog", "//pkg/sql/catalog/catalogkv", @@ -82,6 +87,7 @@ go_test( "//pkg/sql/sem/tree", "//pkg/sql/sqlutil", "//pkg/sql/types", + "//pkg/testutils", "//pkg/testutils/serverutils", "//pkg/testutils/skip", "//pkg/testutils/sqlutils", diff --git a/pkg/migration/migrations/migrate_span_configs.go b/pkg/migration/migrations/migrate_span_configs.go new file mode 100644 index 000000000000..33f07ea8fd49 --- /dev/null +++ b/pkg/migration/migrations/migrate_span_configs.go @@ -0,0 +1,85 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package migrations + +import ( + "context" + "time" + + "github.com/cockroachdb/cockroach/pkg/clusterversion" + "github.com/cockroachdb/cockroach/pkg/jobs" + "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" + "github.com/cockroachdb/cockroach/pkg/migration" + "github.com/cockroachdb/cockroach/pkg/server/serverpb" + "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/retry" + "github.com/cockroachdb/errors" +) + +func ensureSpanConfigReconciliation( + ctx context.Context, _ clusterversion.ClusterVersion, d migration.TenantDeps, j *jobs.Job, +) error { + if !d.Codec.ForSystemTenant() { + return nil + } + + retryOpts := retry.Options{ + InitialBackoff: time.Second, + MaxBackoff: 2 * time.Second, + Multiplier: 2, + MaxRetries: 50, + } + for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); { + row, err := d.InternalExecutor.QueryRowEx(ctx, "get-spanconfig-progress", nil, + sessiondata.NodeUserSessionDataOverride, ` +SELECT progress + FROM system.jobs + WHERE id = (SELECT job_id FROM [SHOW AUTOMATIC JOBS] WHERE job_type = 'AUTO SPAN CONFIG RECONCILIATION') +`) + if err != nil { + return err + } + if row == nil { + log.Info(ctx, "reconciliation job not found, retrying...") + continue + } + progress, err := jobs.UnmarshalProgress(row[0]) + if err != nil { + return err + } + sp, ok := progress.GetDetails().(*jobspb.Progress_AutoSpanConfigReconciliation) + if !ok { + log.Fatal(ctx, "unexpected job progress type") + } + if sp.AutoSpanConfigReconciliation.Checkpoint.IsEmpty() { + log.Info(ctx, "waiting for span config reconciliation...") + continue + } + + return nil + } + + return errors.Newf("unable to reconcile span configs") +} + +func ensureSpanConfigSubscription( + ctx context.Context, _ clusterversion.ClusterVersion, deps migration.SystemDeps, _ *jobs.Job, +) error { + return deps.Cluster.UntilClusterStable(ctx, func() error { + return deps.Cluster.ForEveryNode(ctx, "ensure-span-config-subscription", + func(ctx context.Context, client serverpb.MigrationClient) error { + req := &serverpb.WaitForSpanConfigSubscriptionRequest{} + _, err := client.WaitForSpanConfigSubscription(ctx, req) + return err + }) + }) +} diff --git a/pkg/migration/migrations/migrate_span_configs_test.go b/pkg/migration/migrations/migrate_span_configs_test.go new file mode 100644 index 000000000000..95c18f612951 --- /dev/null +++ b/pkg/migration/migrations/migrate_span_configs_test.go @@ -0,0 +1,244 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package migrations_test + +import ( + "context" + "fmt" + "testing" + + "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/clusterversion" + "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/server" + "github.com/cockroachdb/cockroach/pkg/spanconfig" + "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" + "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/stretchr/testify/require" +) + +// TestEnsureSpanConfigReconciliation verifies that the migration waits for a +// span config reconciliation attempt, blocking until it occurs. +func TestEnsureSpanConfigReconciliation(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + + blockReconcilerCh := make(chan struct{}) + tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{ + Knobs: base.TestingKnobs{ + Server: &server.TestingKnobs{ + DisableAutomaticVersionUpgrade: 1, + BinaryVersionOverride: clusterversion.ByKey( + clusterversion.EnsureSpanConfigReconciliation - 1, + ), + }, + SpanConfig: &spanconfig.TestingKnobs{ + ReconcilerInitialInterceptor: func() { + <-blockReconcilerCh + }, + }, + }, + }, + }) + + defer tc.Stopper().Stop(ctx) + ts := tc.Server(0) + tdb := sqlutils.MakeSQLRunner(tc.ServerConn(0)) + scKVAccessor := ts.SpanConfigKVAccessor().(spanconfig.KVAccessor) + scReconciler := ts.SpanConfigReconciler().(spanconfig.Reconciler) + + tdb.Exec(t, `SET CLUSTER SETTING spanconfig.reconciliation_job.enabled = true`) + tdb.Exec(t, `SET CLUSTER SETTING spanconfig.reconciliation_job.checkpoint_interval = '100ms'`) + + { // Ensure that no span config entries are found. + entries, err := scKVAccessor.GetSpanConfigEntriesFor(ctx, []roachpb.Span{ + keys.EverythingSpan, + }) + require.NoError(t, err) + require.Empty(t, entries) + } + + // Ensure that upgrade attempts without having reconciled simply fail. + tdb.Exec(t, "SET statement_timeout='500ms'") + tdb.ExpectErr(t, "query execution canceled due to statement timeout", + "SET CLUSTER SETTING version = $1", + clusterversion.ByKey(clusterversion.EnsureSpanConfigReconciliation).String(), + ) + + close(blockReconcilerCh) // unblock the reconciliation process allows the upgrade to proceed + tdb.ExecSucceedsSoon(t, + "SET CLUSTER SETTING version = $1", + clusterversion.ByKey(clusterversion.EnsureSpanConfigReconciliation).String(), + ) + require.False(t, scReconciler.Checkpoint().IsEmpty()) + + { // Ensure that the host tenant's span configs are installed. + entries, err := scKVAccessor.GetSpanConfigEntriesFor(ctx, []roachpb.Span{ + keys.EverythingSpan, + }) + require.NoError(t, err) + require.NotEmpty(t, entries) + } +} + +func TestEnsureSpanConfigReconciliationMultiNode(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + + blockReconcilerCh := make(chan struct{}) + + serverArgs := make(map[int]base.TestServerArgs) + const numNodes = 2 + for i := 0; i < numNodes; i++ { + var spanConfigKnobs = spanconfig.TestingKnobs{} + if i == 0 { + spanConfigKnobs.ManagerDisableJobCreation = true + } else { + spanConfigKnobs.ReconcilerInitialInterceptor = func() { + <-blockReconcilerCh + } + } + serverArgs[i] = base.TestServerArgs{ + Knobs: base.TestingKnobs{ + Server: &server.TestingKnobs{ + DisableAutomaticVersionUpgrade: 1, + BinaryVersionOverride: clusterversion.ByKey( + clusterversion.EnsureSpanConfigReconciliation - 1, + ), + }, + SpanConfig: &spanConfigKnobs, + }, + } + } + tc := testcluster.StartTestCluster(t, numNodes, base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{ + Knobs: base.TestingKnobs{ + Server: &server.TestingKnobs{ + DisableAutomaticVersionUpgrade: 1, + BinaryVersionOverride: clusterversion.ByKey( + clusterversion.EnsureSpanConfigReconciliation - 1, + ), + }, + }, + }, + ServerArgsPerNode: serverArgs, + }) + defer tc.Stopper().Stop(ctx) + + ts := tc.Server(0) + tdb := sqlutils.MakeSQLRunner(tc.ServerConn(0)) + scKVAccessor := ts.SpanConfigKVAccessor().(spanconfig.KVAccessor) + scReconciler := tc.Server(1).SpanConfigReconciler().(spanconfig.Reconciler) + + tdb.Exec(t, `SET CLUSTER SETTING spanconfig.reconciliation_job.enabled = true`) + tdb.Exec(t, `SET CLUSTER SETTING spanconfig.reconciliation_job.checkpoint_interval = '100ms'`) + + { // Ensure that no span config entries are to be found. + entries, err := scKVAccessor.GetSpanConfigEntriesFor(ctx, []roachpb.Span{ + keys.EverythingSpan, + }) + require.NoError(t, err) + require.Empty(t, entries) + } + + // Ensure that upgrade attempts without having reconciled simply fail. + tdb.Exec(t, "SET statement_timeout='500ms'") + tdb.ExpectErr(t, "query execution canceled due to statement timeout", + "SET CLUSTER SETTING version = $1", + clusterversion.ByKey(clusterversion.EnsureSpanConfigReconciliation).String(), + ) + + close(blockReconcilerCh) // unblock the reconciliation process allows the upgrade to proceed + tdb.ExecSucceedsSoon(t, + "SET CLUSTER SETTING version = $1", + clusterversion.ByKey(clusterversion.EnsureSpanConfigReconciliation).String(), + ) + require.False(t, scReconciler.Checkpoint().IsEmpty()) + + { // Ensure that the host tenant's span configs are installed. + entries, err := scKVAccessor.GetSpanConfigEntriesFor(ctx, []roachpb.Span{ + keys.EverythingSpan, + }) + require.NoError(t, err) + require.NotEmpty(t, entries) + } +} + +// TestEnsureSpanConfigSubscription verifies that the migration waits for all +// stores to have observed a reconciliation state. +func TestEnsureSpanConfigSubscription(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + + blockSubscriberCh := make(chan struct{}) + tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{ + Knobs: base.TestingKnobs{ + Server: &server.TestingKnobs{ + DisableAutomaticVersionUpgrade: 1, + BinaryVersionOverride: clusterversion.ByKey( + clusterversion.EnsureSpanConfigSubscription - 1, + ), + }, + SpanConfig: &spanconfig.TestingKnobs{ + KVSubscriberPostRangefeedStartInterceptor: func() { + <-blockSubscriberCh + }, + }, + }, + }, + }) + + defer tc.Stopper().Stop(ctx) + ts := tc.Server(0) + tdb := sqlutils.MakeSQLRunner(tc.ServerConn(0)) + scKVAccessor := ts.SpanConfigKVAccessor().(spanconfig.KVAccessor) + scKVSubscriber := ts.SpanConfigKVSubscriber().(spanconfig.KVSubscriber) + + tdb.Exec(t, `SET CLUSTER SETTING spanconfig.reconciliation_job.enabled = true`) + + testutils.SucceedsSoon(t, func() error { + entries, err := scKVAccessor.GetSpanConfigEntriesFor(ctx, []roachpb.Span{ + keys.EverythingSpan, + }) + require.NoError(t, err) + if len(entries) == 0 { + return fmt.Errorf("empty global span configuration state") + } + return nil + }) + + // Ensure that upgrade attempts without having subscribed simply fail. + tdb.Exec(t, "SET statement_timeout='500ms'") + tdb.ExpectErr(t, "query execution canceled due to statement timeout", + "SET CLUSTER SETTING version = $1", + clusterversion.ByKey(clusterversion.EnsureSpanConfigSubscription).String(), + ) + + // Unblocking the subscription process allows the upgrade to proceed. + close(blockSubscriberCh) + tdb.ExecSucceedsSoon(t, + "SET CLUSTER SETTING version = $1", + clusterversion.ByKey(clusterversion.EnsureSpanConfigSubscription).String(), + ) + require.False(t, scKVSubscriber.LastUpdated().IsEmpty()) +} diff --git a/pkg/migration/migrations/migrations.go b/pkg/migration/migrations/migrations.go index a2998f5402d4..34657761ef7a 100644 --- a/pkg/migration/migrations/migrations.go +++ b/pkg/migration/migrations/migrations.go @@ -80,6 +80,17 @@ var migrations = []migration.Migration{ NoPrecondition, publicSchemaMigration, ), + migration.NewTenantMigration( + "enable span configs infrastructure", + toCV(clusterversion.EnsureSpanConfigReconciliation), + NoPrecondition, + ensureSpanConfigReconciliation, + ), + migration.NewSystemMigration( + "enable span configs infrastructure", + toCV(clusterversion.EnsureSpanConfigSubscription), + ensureSpanConfigSubscription, + ), } func init() { diff --git a/pkg/migration/tenant_migration.go b/pkg/migration/tenant_migration.go index 010b016b4627..4c6ca923bf66 100644 --- a/pkg/migration/tenant_migration.go +++ b/pkg/migration/tenant_migration.go @@ -36,7 +36,8 @@ type TenantDeps struct { CollectionFactory *descs.CollectionFactory LeaseManager *lease.Manager InternalExecutor sqlutil.InternalExecutor - SpanConfig struct { // deps for SeedTenantSpanConfigs; can be removed accordingly + + SpanConfig struct { // deps for span config migrations; can be removed accordingly spanconfig.KVAccessor Default roachpb.SpanConfig } diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index 6c195218fdc2..11ae212ded4a 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -342,6 +342,7 @@ go_test( "//pkg/server/telemetry", "//pkg/settings", "//pkg/settings/cluster", + "//pkg/spanconfig", "//pkg/sql", "//pkg/sql/catalog/catalogkeys", "//pkg/sql/catalog/catconstants", diff --git a/pkg/server/config.go b/pkg/server/config.go index 1370d4828f7e..e67a412505bc 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -174,10 +174,10 @@ type BaseConfig struct { // instantiate stores. StorageEngine enginepb.EngineType - // Enables the use of the (experimental) span configs infrastructure. + // SpanConfigsDisabled disables the use of the span configs infrastructure. // - // Environment Variable: COCKROACH_EXPERIMENTAL_SPAN_CONFIGS - SpanConfigsEnabled bool + // Environment Variable: COCKROACH_DISABLE_SPAN_CONFIGS + SpanConfigsDisabled bool // TestingKnobs is used for internal test controls only. TestingKnobs base.TestingKnobs @@ -469,8 +469,8 @@ func (cfg *Config) String() string { if cfg.Linearizable { fmt.Fprintln(w, "linearizable\t", cfg.Linearizable) } - if cfg.SpanConfigsEnabled { - fmt.Fprintln(w, "span configs enabled\t", cfg.SpanConfigsEnabled) + if !cfg.SpanConfigsDisabled { + fmt.Fprintln(w, "span configs enabled\t", !cfg.SpanConfigsDisabled) } _ = w.Flush() @@ -715,7 +715,7 @@ func (cfg *Config) RequireWebSession() bool { // variable based. Note that this only happens when initializing a node and not // when NewContext is called. func (cfg *Config) readEnvironmentVariables() { - cfg.SpanConfigsEnabled = envutil.EnvOrDefaultBool("COCKROACH_EXPERIMENTAL_SPAN_CONFIGS", cfg.SpanConfigsEnabled) + cfg.SpanConfigsDisabled = envutil.EnvOrDefaultBool("COCKROACH_DISABLE_SPAN_CONFIGS", cfg.SpanConfigsDisabled) cfg.Linearizable = envutil.EnvOrDefaultBool("COCKROACH_EXPERIMENTAL_LINEARIZABLE", cfg.Linearizable) cfg.ScanInterval = envutil.EnvOrDefaultDuration("COCKROACH_SCAN_INTERVAL", cfg.ScanInterval) cfg.ScanMinIdleTime = envutil.EnvOrDefaultDuration("COCKROACH_SCAN_MIN_IDLE_TIME", cfg.ScanMinIdleTime) diff --git a/pkg/server/config_test.go b/pkg/server/config_test.go index 7a6e4a735713..426438c6c9a5 100644 --- a/pkg/server/config_test.go +++ b/pkg/server/config_test.go @@ -95,7 +95,7 @@ func TestReadEnvironmentVariables(t *testing.T) { if err := os.Unsetenv("COCKROACH_EXPERIMENTAL_LINEARIZABLE"); err != nil { t.Fatal(err) } - if err := os.Unsetenv("COCKROACH_EXPERIMENTAL_SPAN_CONFIGS"); err != nil { + if err := os.Unsetenv("COCKROACH_DISABLE_SPAN_CONFIGS"); err != nil { t.Fatal(err) } if err := os.Unsetenv("COCKROACH_SCAN_INTERVAL"); err != nil { @@ -130,10 +130,10 @@ func TestReadEnvironmentVariables(t *testing.T) { // Set all the environment variables to valid values and ensure they are set // correctly. - if err := os.Setenv("COCKROACH_EXPERIMENTAL_SPAN_CONFIGS", "true"); err != nil { + if err := os.Setenv("COCKROACH_DISABLE_SPAN_CONFIGS", "true"); err != nil { t.Fatal(err) } - cfgExpected.SpanConfigsEnabled = true + cfgExpected.SpanConfigsDisabled = true if err := os.Setenv("COCKROACH_EXPERIMENTAL_LINEARIZABLE", "true"); err != nil { t.Fatal(err) } @@ -158,7 +158,7 @@ func TestReadEnvironmentVariables(t *testing.T) { } for _, envVar := range []string{ - "COCKROACH_EXPERIMENTAL_SPAN_CONFIGS", + "COCKROACH_DISABLE_SPAN_CONFIGS", "COCKROACH_EXPERIMENTAL_LINEARIZABLE", "COCKROACH_SCAN_INTERVAL", "COCKROACH_SCAN_MIN_IDLE_TIME", diff --git a/pkg/server/migration.go b/pkg/server/migration.go index 2bd4356f4ede..1861f3954352 100644 --- a/pkg/server/migration.go +++ b/pkg/server/migration.go @@ -240,3 +240,30 @@ func (m *migrationServer) DeprecateBaseEncryptionRegistry( resp := &serverpb.DeprecateBaseEncryptionRegistryResponse{} return resp, nil } + +// WaitForSpanConfigSubscription implements the MigrationServer interface. +func (m *migrationServer) WaitForSpanConfigSubscription( + ctx context.Context, _ *serverpb.WaitForSpanConfigSubscriptionRequest, +) (*serverpb.WaitForSpanConfigSubscriptionResponse, error) { + const opName = "wait-for-spanconfig-subscription" + ctx, span := m.server.AnnotateCtxWithSpan(ctx, opName) + defer span.Finish() + ctx = logtags.AddTag(ctx, opName, nil) + + if err := m.server.stopper.RunTaskWithErr(ctx, opName, func( + ctx context.Context, + ) error { + // Same as in SyncAllEngines, because stores can be added asynchronously, we + // need to ensure that the bootstrap process has happened. + m.server.node.waitForAdditionalStoreInit() + + return m.server.node.stores.VisitStores(func(s *kvserver.Store) error { + return s.WaitForSpanConfigSubscription(ctx) + }) + }); err != nil { + return nil, err + } + + resp := &serverpb.WaitForSpanConfigSubscriptionResponse{} + return resp, nil +} diff --git a/pkg/server/server.go b/pkg/server/server.go index afaf015edc87..68e0c78ae46f 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -646,8 +646,8 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { // tenant records. kvAccessorForTenantRecords spanconfig.KVAccessor } - if cfg.SpanConfigsEnabled { - storeCfg.SpanConfigsEnabled = true + storeCfg.SpanConfigsDisabled = cfg.SpanConfigsDisabled + if !cfg.SpanConfigsDisabled { spanConfigKnobs, _ := cfg.TestingKnobs.SpanConfig.(*spanconfig.TestingKnobs) if spanConfigKnobs != nil && spanConfigKnobs.StoreKVSubscriberOverride != nil { storeCfg.SpanConfigSubscriber = spanConfigKnobs.StoreKVSubscriberOverride @@ -1797,7 +1797,7 @@ func (s *Server) PreStart(ctx context.Context) error { return err } - if s.cfg.SpanConfigsEnabled && s.spanConfigSubscriber != nil { + if !s.cfg.SpanConfigsDisabled && s.spanConfigSubscriber != nil { if err := s.spanConfigSubscriber.Start(ctx); err != nil { return err } diff --git a/pkg/server/server_sql.go b/pkg/server/server_sql.go index 38ccd9ad15c6..fbd28fbbdbd6 100644 --- a/pkg/server/server_sql.go +++ b/pkg/server/server_sql.go @@ -755,6 +755,9 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*SQLServer, error) { if telemetryLoggingKnobs := cfg.TestingKnobs.TelemetryLoggingKnobs; telemetryLoggingKnobs != nil { execCfg.TelemetryLoggingTestingKnobs = telemetryLoggingKnobs.(*sql.TelemetryLoggingTestingKnobs) } + if spanConfigKnobs := cfg.TestingKnobs.SpanConfig; spanConfigKnobs != nil { + execCfg.SpanConfigTestingKnobs = spanConfigKnobs.(*spanconfig.TestingKnobs) + } statsRefresher := stats.MakeRefresher( cfg.AmbientCtx, @@ -873,9 +876,9 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*SQLServer, error) { sqlTranslator *spanconfigsqltranslator.SQLTranslator sqlWatcher *spanconfigsqlwatcher.SQLWatcher }{} - if !codec.ForSystemTenant() || cfg.SpanConfigsEnabled { + if !codec.ForSystemTenant() || !cfg.SpanConfigsDisabled { // Instantiate a span config manager. If we're the host tenant we'll - // only do it if COCKROACH_EXPERIMENTAL_SPAN_CONFIGS is set. + // only do it unless COCKROACH_DISABLE_SPAN_CONFIGS is set. spanConfigKnobs, _ := cfg.TestingKnobs.SpanConfig.(*spanconfig.TestingKnobs) spanConfig.sqlTranslator = spanconfigsqltranslator.New(execCfg, codec, spanConfigKnobs) spanConfig.sqlWatcher = spanconfigsqlwatcher.New( diff --git a/pkg/server/serverpb/migration.proto b/pkg/server/serverpb/migration.proto index c9d7af5f9954..57b2e69e91a9 100644 --- a/pkg/server/serverpb/migration.proto +++ b/pkg/server/serverpb/migration.proto @@ -62,6 +62,14 @@ message DeprecateBaseEncryptionRegistryRequest { // DeprecateBaseEncryptionRegistryRequest. message DeprecateBaseEncryptionRegistryResponse{} +// WaitForSpanConfigSubscriptionRequest waits until the target node is wholly +// subscribed to the global span configurations state. +message WaitForSpanConfigSubscriptionRequest{} + +// WaitForSpanConfigSubscriptionResponse is the response to a +// WaitForSpanConfigSubscriptionRequest. +message WaitForSpanConfigSubscriptionResponse{} + service Migration { // ValidateTargetClusterVersion is used to verify that the target node is // running a binary that's able to support the specified cluster version. @@ -90,4 +98,8 @@ service Migration { // DeprecateBaseRegistry is used to instruct the target node to stop // using the Base version monolithic encryption-at-rest registry. rpc DeprecateBaseEncryptionRegistry (DeprecateBaseEncryptionRegistryRequest) returns (DeprecateBaseEncryptionRegistryResponse) { } + + // WaitForSpanConfigSubscription waits until the target node is wholly + // subscribed to the global span configurations state. + rpc WaitForSpanConfigSubscription (WaitForSpanConfigSubscriptionRequest) returns (WaitForSpanConfigSubscriptionResponse) { } } diff --git a/pkg/server/status_test.go b/pkg/server/status_test.go index b03b820d9f7a..f3ba499aace6 100644 --- a/pkg/server/status_test.go +++ b/pkg/server/status_test.go @@ -41,6 +41,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/server/diagnostics/diagnosticspb" "github.com/cockroachdb/cockroach/pkg/server/serverpb" "github.com/cockroachdb/cockroach/pkg/server/status/statuspb" + "github.com/cockroachdb/cockroach/pkg/spanconfig" "github.com/cockroachdb/cockroach/pkg/sql" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catconstants" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" @@ -1819,6 +1820,9 @@ func TestStatusAPIStatements(t *testing.T) { AOSTClause: "AS OF SYSTEM TIME '-1us'", StubTimeNow: func() time.Time { return timeutil.Unix(aggregatedTs, 0) }, }, + SpanConfig: &spanconfig.TestingKnobs{ + ManagerDisableJobCreation: true, // TODO(irfansharif): #74919. + }, }, }, }) @@ -1932,6 +1936,9 @@ func TestStatusAPICombinedStatements(t *testing.T) { AOSTClause: "AS OF SYSTEM TIME '-1us'", StubTimeNow: func() time.Time { return timeutil.Unix(aggregatedTs, 0) }, }, + SpanConfig: &spanconfig.TestingKnobs{ + ManagerDisableJobCreation: true, // TODO(irfansharif): #74919. + }, }, }, }) diff --git a/pkg/server/testserver.go b/pkg/server/testserver.go index 39eb3cfe3783..0a142f883316 100644 --- a/pkg/server/testserver.go +++ b/pkg/server/testserver.go @@ -236,8 +236,8 @@ func makeTestConfigFromParams(params base.TestServerArgs) Config { if params.EnableDemoLoginEndpoint { cfg.EnableDemoLoginEndpoint = true } - if params.EnableSpanConfigs { - cfg.SpanConfigsEnabled = true + if params.DisableSpanConfigs { + cfg.SpanConfigsDisabled = true } // Ensure we have the correct number of engines. Add in-memory ones where diff --git a/pkg/spanconfig/spanconfigjob/BUILD.bazel b/pkg/spanconfig/spanconfigjob/BUILD.bazel index f814a5f32fd4..b49af153407c 100644 --- a/pkg/spanconfig/spanconfigjob/BUILD.bazel +++ b/pkg/spanconfig/spanconfigjob/BUILD.bazel @@ -8,9 +8,12 @@ go_library( deps = [ "//pkg/jobs", "//pkg/jobs/jobspb", + "//pkg/settings", "//pkg/settings/cluster", "//pkg/sql", + "//pkg/util", "//pkg/util/hlc", + "//pkg/util/timeutil", "@com_github_cockroachdb_errors//:errors", ], ) diff --git a/pkg/spanconfig/spanconfigjob/job.go b/pkg/spanconfig/spanconfigjob/job.go index cfadc4d9ef86..9853b934d41f 100644 --- a/pkg/spanconfig/spanconfigjob/job.go +++ b/pkg/spanconfig/spanconfigjob/job.go @@ -12,12 +12,16 @@ package spanconfigjob import ( "context" + "time" "github.com/cockroachdb/cockroach/pkg/jobs" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" + "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql" + "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/hlc" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" ) @@ -27,6 +31,14 @@ type resumer struct { var _ jobs.Resumer = (*resumer)(nil) +var reconciliationJobCheckpointInterval = settings.RegisterDurationSetting( + settings.TenantWritable, + "spanconfig.reconciliation_job.checkpoint_interval", + "the frequency at which the span config reconciliation job checkpoints itself", + 5*time.Second, + settings.NonNegativeDuration, +) + // Resume implements the jobs.Resumer interface. func (r *resumer) Resume(ctx context.Context, execCtxI interface{}) error { execCtx := execCtxI.(sql.JobExecContext) @@ -41,11 +53,35 @@ func (r *resumer) Resume(ctx context.Context, execCtxI interface{}) error { // the job all over again after some time, it's just that the checks for // failed jobs happen infrequently. + // TODO(irfansharif): We're still not using a persisted checkpoint, both + // here when starting at the empty timestamp and down in the reconciler + // (does the full reconciliation at every start). Once we do, it'll be + // possible that the checkpoint timestamp provided is too stale -- for a + // suspended tenant perhaps data was GC-ed from underneath it. When we + // bubble up said error, we want to retry at this level with an empty + // timestamp. + + settingValues := &execCtx.ExecCfg().Settings.SV + persistCheckpoints := util.Every(reconciliationJobCheckpointInterval.Get(settingValues)) + reconciliationJobCheckpointInterval.SetOnChange(settingValues, func(ctx context.Context) { + persistCheckpoints = util.Every(reconciliationJobCheckpointInterval.Get(settingValues)) + }) + + shouldPersistCheckpoint := true + if knobs := execCtx.ExecCfg().SpanConfigTestingKnobs; knobs != nil && knobs.JobDisablePersistingCheckpoints { + shouldPersistCheckpoint = false + } if err := rc.Reconcile(ctx, hlc.Timestamp{}, func() error { - // TODO(irfansharif): Stash this checkpoint somewhere and use it when - // starting back up. - _ = rc.Checkpoint() - return nil + if !shouldPersistCheckpoint { + return nil + } + if !persistCheckpoints.ShouldProcess(timeutil.Now()) { + return nil + } + + return r.job.SetProgress(ctx, nil, jobspb.AutoSpanConfigReconciliationProgress{ + Checkpoint: rc.Checkpoint(), + }) }); err != nil { return err } diff --git a/pkg/spanconfig/spanconfigkvaccessor/kvaccessor_test.go b/pkg/spanconfig/spanconfigkvaccessor/kvaccessor_test.go index 9d8380f0682b..11addd70b8b9 100644 --- a/pkg/spanconfig/spanconfigkvaccessor/kvaccessor_test.go +++ b/pkg/spanconfig/spanconfigkvaccessor/kvaccessor_test.go @@ -53,11 +53,7 @@ func TestDataDriven(t *testing.T) { datadriven.Walk(t, testutils.TestDataPath(t), func(t *testing.T, path string) { ctx := context.Background() - tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ - ServerArgs: base.TestServerArgs{ - EnableSpanConfigs: true, - }, - }) + tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{}) defer tc.Stopper().Stop(ctx) const dummySpanConfigurationsFQN = "defaultdb.public.dummy_span_configurations" diff --git a/pkg/spanconfig/spanconfigkvsubscriber/datadriven_test.go b/pkg/spanconfig/spanconfigkvsubscriber/datadriven_test.go index 6d3144861368..1e3cca97a003 100644 --- a/pkg/spanconfig/spanconfigkvsubscriber/datadriven_test.go +++ b/pkg/spanconfig/spanconfigkvsubscriber/datadriven_test.go @@ -104,11 +104,7 @@ func TestDataDriven(t *testing.T) { datadriven.Walk(t, testutils.TestDataPath(t), func(t *testing.T, path string) { ctx := context.Background() ctx, cancel := context.WithCancel(ctx) - tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ - ServerArgs: base.TestServerArgs{ - EnableSpanConfigs: true, - }, - }) + tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{}) defer cancel() defer tc.Stopper().Stop(ctx) diff --git a/pkg/spanconfig/spanconfigmanager/manager.go b/pkg/spanconfig/spanconfigmanager/manager.go index a82bcc466d76..17e329cfacca 100644 --- a/pkg/spanconfig/spanconfigmanager/manager.go +++ b/pkg/spanconfig/spanconfigmanager/manager.go @@ -32,7 +32,7 @@ import ( // check if the span config reconciliation job exists. If it's not found, it // will be started. It has no effect unless // spanconfig.reconciliation_job.enabled is configured. For host -// tenants, COCKROACH_EXPERIMENTAL_SPAN_CONFIGS needs to be additionally set. +// tenants, COCKROACH_DISABLE_SPAN_CONFIGS must not be set. var checkReconciliationJobInterval = settings.RegisterDurationSetting( settings.TenantWritable, "spanconfig.reconciliation_job.check_interval", @@ -42,16 +42,15 @@ var checkReconciliationJobInterval = settings.RegisterDurationSetting( ) // jobEnabledSetting gates the activation of the span config reconciliation job. -// -// For the host tenant it has no effect unless -// COCKROACH_EXPERIMENTAL_SPAN_CONFIGS is also set. +// For the host tenant it has no effect if COCKROACH_DISABLE_SPAN_CONFIGS is +// set. // // TODO(irfansharif): This should be a tenant read-only setting once the work // for #73349 is completed. var jobEnabledSetting = settings.RegisterBoolSetting( settings.TenantWritable, "spanconfig.reconciliation_job.enabled", - "enable the use of the kv accessor", false) + "enable the use of the kv accessor", true) // Manager is the coordinator of the span config subsystem. It ensures that // there's only one span config reconciliation job[1] for every tenant. It also diff --git a/pkg/spanconfig/spanconfigmanager/manager_test.go b/pkg/spanconfig/spanconfigmanager/manager_test.go index 9600ebef2bfd..22f9e240cf6c 100644 --- a/pkg/spanconfig/spanconfigmanager/manager_test.go +++ b/pkg/spanconfig/spanconfigmanager/manager_test.go @@ -49,7 +49,6 @@ func TestManagerConcurrentJobCreation(t *testing.T) { ctx := context.Background() tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ - EnableSpanConfigs: true, Knobs: base.TestingKnobs{ SpanConfig: &spanconfig.TestingKnobs{ ManagerDisableJobCreation: true, // disable the automatic job creation @@ -140,7 +139,6 @@ func TestManagerStartsJobIfFailed(t *testing.T) { ctx := context.Background() tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ - EnableSpanConfigs: true, Knobs: base.TestingKnobs{ SpanConfig: &spanconfig.TestingKnobs{ ManagerDisableJobCreation: true, // disable the automatic job creation @@ -189,7 +187,6 @@ func TestManagerCheckJobConditions(t *testing.T) { ctx := context.Background() tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ - EnableSpanConfigs: true, Knobs: base.TestingKnobs{ SpanConfig: &spanconfig.TestingKnobs{ ManagerDisableJobCreation: true, // disable the automatic job creation diff --git a/pkg/spanconfig/spanconfigreconciler/reconciler.go b/pkg/spanconfig/spanconfigreconciler/reconciler.go index 8e8d19eb6457..1db604eae484 100644 --- a/pkg/spanconfig/spanconfigreconciler/reconciler.go +++ b/pkg/spanconfig/spanconfigreconciler/reconciler.go @@ -136,11 +136,16 @@ func (r *Reconciler) Reconcile( // pass every time the reconciliation job kicks us off. _ = startTS + if fn := r.knobs.ReconcilerInitialInterceptor; fn != nil { + fn() + } + full := fullReconciler{ sqlTranslator: r.sqlTranslator, kvAccessor: r.kvAccessor, codec: r.codec, tenID: r.tenID, + knobs: r.knobs, } latestStore, reconciledUpUntil, err := full.reconcile(ctx) if err != nil { @@ -190,6 +195,7 @@ type fullReconciler struct { codec keys.SQLCodec tenID roachpb.TenantID + knobs *spanconfig.TestingKnobs } // reconcile runs the full reconciliation process, returning: @@ -276,6 +282,9 @@ func (f *fullReconciler) fetchExistingSpanConfigs( Key: keys.EverythingSpan.Key, EndKey: keys.TableDataMax, } + if f.knobs.ConfigureScratchRange { + tenantSpan.EndKey = keys.ScratchRangeMax + } } else { // Secondary tenants govern everything prefixed by their tenant ID. tenPrefix := keys.MakeTenantPrefix(f.tenID) @@ -408,7 +417,9 @@ func (r *incrementalReconciler) reconcile( func (r *incrementalReconciler) filterForMissingTableIDs( ctx context.Context, updates []spanconfig.DescriptorUpdate, ) (descpb.IDs, error) { + seen := make(map[descpb.ID]struct{}) var missingIDs descpb.IDs + if err := sql.DescsTxn(ctx, r.execCfg, func(ctx context.Context, txn *kv.Txn, descsCol *descs.Collection) error { for _, update := range updates { @@ -433,7 +444,10 @@ func (r *incrementalReconciler) filterForMissingTableIDs( } if considerAsMissing { - missingIDs = append(missingIDs, update.ID) // accumulate the set of missing table IDs + if _, found := seen[update.ID]; !found { + seen[update.ID] = struct{}{} + missingIDs = append(missingIDs, update.ID) // accumulate the set of missing table IDs + } } } diff --git a/pkg/spanconfig/spanconfigsqltranslator/sqltranslator.go b/pkg/spanconfig/spanconfigsqltranslator/sqltranslator.go index f53b38ad7bac..392a7060361d 100644 --- a/pkg/spanconfig/spanconfigsqltranslator/sqltranslator.go +++ b/pkg/spanconfig/spanconfigsqltranslator/sqltranslator.go @@ -93,6 +93,14 @@ func (s *SQLTranslator) Translate( } entries = append(entries, pseudoTableEntries...) + scratchRangeEntry, err := s.maybeGenerateScratchRangeEntry(ctx, txn, ids) + if err != nil { + return err + } + if !scratchRangeEntry.Empty() { + entries = append(entries, scratchRangeEntry) + } + // For every unique leaf ID, generate span configurations. for _, leafID := range leafIDs { translatedEntries, err := s.generateSpanConfigurations(ctx, leafID, txn, descsCol) @@ -535,3 +543,32 @@ func (s *SQLTranslator) maybeGeneratePseudoTableEntries( return nil, nil } + +func (s *SQLTranslator) maybeGenerateScratchRangeEntry( + ctx context.Context, txn *kv.Txn, ids descpb.IDs, +) (roachpb.SpanConfigEntry, error) { + if !s.knobs.ConfigureScratchRange || !s.codec.ForSystemTenant() { + return roachpb.SpanConfigEntry{}, nil // nothing to do + } + + for _, id := range ids { + if id != keys.RootNamespaceID { + continue // nothing to do + } + + zone, err := sql.GetHydratedZoneConfigForDatabase(ctx, txn, s.codec, keys.RootNamespaceID) + if err != nil { + return roachpb.SpanConfigEntry{}, err + } + + return roachpb.SpanConfigEntry{ + Span: roachpb.Span{ + Key: keys.ScratchRangeMin, + EndKey: keys.ScratchRangeMax, + }, + Config: zone.AsSpanConfig(), + }, nil + } + + return roachpb.SpanConfigEntry{}, nil +} diff --git a/pkg/spanconfig/spanconfigstore/store.go b/pkg/spanconfig/spanconfigstore/store.go index fea96b40fa4d..e6b743c1c226 100644 --- a/pkg/spanconfig/spanconfigstore/store.go +++ b/pkg/spanconfig/spanconfigstore/store.go @@ -28,13 +28,13 @@ import ( // EnabledSetting is a hidden cluster setting to enable the use of the span // configs infrastructure in KV. It switches each store in the cluster from // using the gossip backed system config span to instead using the span configs -// infrastructure. It has no effect unless COCKROACH_EXPERIMENTAL_SPAN_CONFIGS +// infrastructure. It has no effect if COCKROACH_DISABLE_SPAN_CONFIGS // is set. var EnabledSetting = settings.RegisterBoolSetting( settings.SystemOnly, "spanconfig.store.enabled", `use the span config infrastructure in KV instead of the system config span`, - false, + true, ) // Store is an in-memory data structure to store and retrieve span configs. diff --git a/pkg/spanconfig/testing_knobs.go b/pkg/spanconfig/testing_knobs.go index 132f3ca777d4..2a45e2676f92 100644 --- a/pkg/spanconfig/testing_knobs.go +++ b/pkg/spanconfig/testing_knobs.go @@ -37,7 +37,12 @@ type TestingKnobs struct { // not. ManagerAfterCheckedReconciliationJobExistsInterceptor func(exists bool) - // KVSubscriberPostRangefeedStartInterceptor is invoked after the rangefeed is started. + // JobDisablePersistingCheckpoints disables the span config reconciliation + // job from persisting checkpoints. + JobDisablePersistingCheckpoints bool + + // KVSubscriberPostRangefeedStartInterceptor is invoked after the rangefeed + // is started. KVSubscriberPostRangefeedStartInterceptor func() // KVSubscriberPreExitInterceptor is invoked right before returning from @@ -69,6 +74,14 @@ type TestingKnobs struct { // descriptors appear as missing -- a convenient+faster alternative to // waiting for the descriptor to actually get GC-ed in tests. ExcludeDroppedDescriptorsFromLookup bool + + // ConfigureScratchRange controls whether the scratch range (used in tests) + // applies the RANGE DEFAULT configuration. + ConfigureScratchRange bool + + // ReconcilerInitialInterceptor, if set, is invoked at the very outset of + // the reconciliation process. + ReconcilerInitialInterceptor func() } // ModuleTestingKnobs is part of the base.ModuleTestingKnobs interface. diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 7d2d69e82a69..02e5d1b5b884 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -198,7 +198,7 @@ var secondaryTenantZoneConfigsEnabled = settings.RegisterBoolSetting( settings.TenantWritable, secondaryTenantsZoneConfigsEnabledSettingName, "allow secondary tenants to set zone configurations; does not affect the system tenant", - false, + true, ) // traceTxnThreshold can be used to log SQL transactions that take @@ -1164,6 +1164,7 @@ type ExecutorConfig struct { StreamingTestingKnobs *StreamingTestingKnobs SQLStatsTestingKnobs *sqlstats.TestingKnobs TelemetryLoggingTestingKnobs *TelemetryLoggingTestingKnobs + SpanConfigTestingKnobs *spanconfig.TestingKnobs // HistogramWindowInterval is (server.Config).HistogramWindowInterval. HistogramWindowInterval time.Duration diff --git a/pkg/sql/logictest/logic.go b/pkg/sql/logictest/logic.go index d476259073cc..8b556681c2c0 100644 --- a/pkg/sql/logictest/logic.go +++ b/pkg/sql/logictest/logic.go @@ -165,8 +165,8 @@ import ( // # cluster-opt: opt1 opt2 // // The options are: -// - enable-span-config: If specified, the span configs infrastructure will be -// enabled. This is equivalent to setting COCKROACH_EXPERIMENTAL_SPAN_CONFIGS. +// - disable-span-config: If specified, the span configs infrastructure will be +// disabled. // - tracing-off: If specified, tracing defaults to being turned off. This is // used to override the environment, which may ask for tracing to be on by // default. @@ -1877,14 +1877,15 @@ type clusterOpt interface { apply(args *base.TestServerArgs) } -// clusterOptSpanConfigs corresponds to the enable-span-configs directive. -type clusterOptSpanConfigs struct{} +// clusterOptDisableSpanConfigs corresponds to the disable-span-configs +// directive. +type clusterOptDisableSpanConfigs struct{} -var _ clusterOpt = clusterOptSpanConfigs{} +var _ clusterOpt = clusterOptDisableSpanConfigs{} // apply implements the clusterOpt interface. -func (c clusterOptSpanConfigs) apply(args *base.TestServerArgs) { - args.EnableSpanConfigs = true +func (c clusterOptDisableSpanConfigs) apply(args *base.TestServerArgs) { + args.DisableSpanConfigs = true } // clusterOptTracingOff corresponds to the tracing-off directive. @@ -1933,8 +1934,8 @@ func readClusterOptions(t *testing.T, path string) []clusterOpt { } for _, opt := range fields[2:] { switch opt { - case "enable-span-configs": - res = append(res, clusterOptSpanConfigs{}) + case "disable-span-configs": + res = append(res, clusterOptDisableSpanConfigs{}) case "tracing-off": res = append(res, clusterOptTracingOff{}) default: diff --git a/pkg/sql/logictest/testdata/logic_test/auto_span_config_reconciliation_job b/pkg/sql/logictest/testdata/logic_test/auto_span_config_reconciliation_job index 81c6a5422519..cdae485e1716 100644 --- a/pkg/sql/logictest/testdata/logic_test/auto_span_config_reconciliation_job +++ b/pkg/sql/logictest/testdata/logic_test/auto_span_config_reconciliation_job @@ -1,5 +1,3 @@ -# cluster-opt: enable-span-configs - statement ok SET CLUSTER SETTING spanconfig.reconciliation_job.enabled = true; diff --git a/pkg/sql/logictest/testdata/logic_test/jobs b/pkg/sql/logictest/testdata/logic_test/jobs index 3f65cc73fe0a..5b9749bbbde7 100644 --- a/pkg/sql/logictest/testdata/logic_test/jobs +++ b/pkg/sql/logictest/testdata/logic_test/jobs @@ -24,10 +24,11 @@ SCHEMA CHANGE CREATE INDEX ON test.public.t (x) root query TTT SELECT job_type, description, user_name FROM crdb_internal.jobs WHERE user_name = 'root' ---- -SCHEMA CHANGE updating version for users table root -SCHEMA CHANGE updating version for role options table root -SCHEMA CHANGE updating privileges for database 54 root -SCHEMA CHANGE CREATE INDEX ON test.public.t (x) root +AUTO SPAN CONFIG RECONCILIATION reconciling span configurations root +SCHEMA CHANGE updating version for users table root +SCHEMA CHANGE updating version for role options table root +SCHEMA CHANGE updating privileges for database 54 root +SCHEMA CHANGE CREATE INDEX ON test.public.t (x) root user testuser @@ -73,11 +74,12 @@ SCHEMA CHANGE CREATE INDEX ON test.public.u (x) testuser query TTT SELECT job_type, description, user_name FROM crdb_internal.jobs WHERE user_name IN ('root', 'testuser') ---- -SCHEMA CHANGE updating version for users table root -SCHEMA CHANGE updating version for role options table root -SCHEMA CHANGE updating privileges for database 54 root -SCHEMA CHANGE CREATE INDEX ON test.public.t (x) root -SCHEMA CHANGE CREATE INDEX ON test.public.u (x) testuser +AUTO SPAN CONFIG RECONCILIATION reconciling span configurations root +SCHEMA CHANGE updating version for users table root +SCHEMA CHANGE updating version for role options table root +SCHEMA CHANGE updating privileges for database 54 root +SCHEMA CHANGE CREATE INDEX ON test.public.t (x) root +SCHEMA CHANGE CREATE INDEX ON test.public.u (x) testuser statement ok CREATE USER testuser2 diff --git a/pkg/sql/tests/split_test.go b/pkg/sql/tests/split_test.go index ac84cbc3dcb0..0c30e0bac272 100644 --- a/pkg/sql/tests/split_test.go +++ b/pkg/sql/tests/split_test.go @@ -73,6 +73,7 @@ func TestSplitOnTableBoundaries(t *testing.T) { params.ScanInterval = time.Millisecond params.ScanMinIdleTime = time.Millisecond params.ScanMaxIdleTime = time.Millisecond + params.DisableSpanConfigs = true s, sqlDB, kvDB := serverutils.StartServer(t, params) defer s.Stopper().Stop(context.Background()) diff --git a/pkg/testutils/localtestcluster/BUILD.bazel b/pkg/testutils/localtestcluster/BUILD.bazel index b13280b52997..c9245aff4812 100644 --- a/pkg/testutils/localtestcluster/BUILD.bazel +++ b/pkg/testutils/localtestcluster/BUILD.bazel @@ -13,12 +13,14 @@ go_library( "//pkg/gossip", "//pkg/keys", "//pkg/kv", + "//pkg/kv/kvclient/rangefeed:with-mocks", "//pkg/kv/kvserver", "//pkg/kv/kvserver/closedts/sidetransport", "//pkg/kv/kvserver/liveness", "//pkg/roachpb:with-mocks", "//pkg/rpc", "//pkg/settings/cluster", + "//pkg/spanconfig/spanconfigkvsubscriber", "//pkg/sql/catalog/bootstrap", "//pkg/storage", "//pkg/util", diff --git a/pkg/testutils/localtestcluster/local_test_cluster.go b/pkg/testutils/localtestcluster/local_test_cluster.go index 05e0dd6a6cad..a66566d81b56 100644 --- a/pkg/testutils/localtestcluster/local_test_cluster.go +++ b/pkg/testutils/localtestcluster/local_test_cluster.go @@ -23,12 +23,14 @@ import ( "github.com/cockroachdb/cockroach/pkg/gossip" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" + "github.com/cockroachdb/cockroach/pkg/kv/kvclient/rangefeed" "github.com/cockroachdb/cockroach/pkg/kv/kvserver" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/sidetransport" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/rpc" "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigkvsubscriber" "github.com/cockroachdb/cockroach/pkg/sql/catalog/bootstrap" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/util" @@ -209,6 +211,22 @@ func (ltc *LocalTestCluster) Start(t testing.TB, baseCtx *base.Config, initFacto ); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } + + rangeFeedFactory, err := rangefeed.NewFactory(ltc.stopper, ltc.DB, cfg.Settings, nil /* knobs */) + if err != nil { + t.Fatal(err) + } + cfg.SpanConfigSubscriber = spanconfigkvsubscriber.New( + ltc.stopper, + ltc.DB, + clock, + rangeFeedFactory, + keys.SpanConfigurationsTableID, + 1<<20, /* 1 MB */ + cfg.DefaultSpanConfig, + nil, + ) + ltc.Store = kvserver.NewStore(ctx, cfg, ltc.Eng, nodeDesc) var initialValues []roachpb.KeyValue