diff --git a/docs/design/2024-05-23-predicate-columns.md b/docs/design/2024-05-23-predicate-columns.md index 9598db10643ae..be07397448e55 100644 --- a/docs/design/2024-05-23-predicate-columns.md +++ b/docs/design/2024-05-23-predicate-columns.md @@ -26,7 +26,7 @@ - [Performance Tests](#performance-tests) - [Impacts \& Risks](#impacts--risks) - [If new predicate columns appear, they cannot be analyzed in time](#if-new-predicate-columns-appear-they-cannot-be-analyzed-in-time) - - [Use PREDICATE COLUMNS when your workload's query pattern is relatively stable](#use-predicate-columns-when-your-workloads-query-pattern-is--relatively-stable) + - [Use PREDICATE COLUMNS when your workload's query pattern is relatively stable](#use-predicate-columns-when-your-workloads-query-pattern-is-relatively-stable) - [Investigation \& Alternatives](#investigation--alternatives) - [CRDB](#crdb) - [Summary](#summary) @@ -214,14 +214,14 @@ In the experimental implementation, we introduce a new global variable `tidb_ena But because we decided to track all columns by default, so it becomes unnecessary to use this variable. We will mark it deprecated and remove it in the future. -In this feature, we introduce a new global variable `tidb_analyze_default_column_choice` to control whether to use predicate columns or all columns in the analyze process. +In this feature, we introduce a new global variable `tidb_analyze_column_options` to control whether to use predicate columns or all columns in the analyze process. Users can set this variable to `ALL` or `PREDICATE` to analyze all columns or only predicate columns. The default value will be `PREDICATE` after this feature is fully implemented. ```sql -SET GLOBAL tidb_analyze_default_column_choice = 'PREDICATE'; +SET GLOBAL tidb_analyze_column_options = 'PREDICATE'; -SET GLOBAL tidb_analyze_default_column_choice = 'ALL'; +SET GLOBAL tidb_analyze_column_options = 'ALL'; ``` | Value | Description | diff --git a/pkg/executor/set_test.go b/pkg/executor/set_test.go index 447f031e8c4a0..e56d9d41b7680 100644 --- a/pkg/executor/set_test.go +++ b/pkg/executor/set_test.go @@ -681,6 +681,14 @@ func TestSetVar(t *testing.T) { require.Error(t, tk.ExecToErr("set tidb_enable_column_tracking = 0")) require.Error(t, tk.ExecToErr("set global tidb_enable_column_tracking = -1")) + // test for tidb_analyze_column_options + tk.MustQuery("select @@tidb_analyze_column_options").Check(testkit.Rows("ALL")) + tk.MustExec("set global tidb_analyze_column_options = 'PREDICATE'") + tk.MustQuery("select @@tidb_analyze_column_options").Check(testkit.Rows("PREDICATE")) + tk.MustExec("set global tidb_analyze_column_options = 'all'") + tk.MustQuery("select @@tidb_analyze_column_options").Check(testkit.Rows("ALL")) + require.Error(t, tk.ExecToErr("set global tidb_analyze_column_options = 'UNKNOWN'")) + // test for tidb_ignore_prepared_cache_close_stmt tk.MustQuery("select @@global.tidb_ignore_prepared_cache_close_stmt").Check(testkit.Rows("0")) // default value is 0 tk.MustExec("set global tidb_ignore_prepared_cache_close_stmt=1") diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go index 1f15cdc67a8a0..c47010cef57af 100644 --- a/pkg/sessionctx/variable/sysvar.go +++ b/pkg/sessionctx/variable/sysvar.go @@ -34,6 +34,7 @@ import ( "github.com/pingcap/tidb/pkg/parser" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/charset" + "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/planner/util/fixcontrol" "github.com/pingcap/tidb/pkg/privilege/privileges/ldap" @@ -1010,7 +1011,33 @@ var defaultSysVars = []*SysVar{ RunAutoAnalyze.Store(TiDBOptOn(val)) return nil }, - }, { + }, + { + Scope: ScopeGlobal, + Name: TiDBAnalyzeColumnOptions, + Value: DefTiDBAnalyzeColumnOptions, + Type: TypeStr, + GetGlobal: func(ctx context.Context, s *SessionVars) (string, error) { + return AnalyzeColumnOptions.Load(), nil + }, + SetGlobal: func(_ context.Context, s *SessionVars, val string) error { + AnalyzeColumnOptions.Store(strings.ToUpper(val)) + return nil + }, + Validation: func(s *SessionVars, normalizedValue string, originalValue string, scope ScopeFlag) (string, error) { + choice := strings.ToUpper(normalizedValue) + if choice != model.AllColumns.String() && choice != model.PredicateColumns.String() { + return "", errors.Errorf( + "invalid value for %s, it should be either '%s' or '%s'", + TiDBAnalyzeColumnOptions, + model.AllColumns.String(), + model.PredicateColumns.String(), + ) + } + return normalizedValue, nil + }, + }, + { Scope: ScopeGlobal, Name: TiDBEnableAutoAnalyzePriorityQueue, Value: BoolToOnOff(DefTiDBEnableAutoAnalyzePriorityQueue), Type: TypeBool, GetGlobal: func(_ context.Context, s *SessionVars) (string, error) { return BoolToOnOff(EnableAutoAnalyzePriorityQueue.Load()), nil diff --git a/pkg/sessionctx/variable/tidb_vars.go b/pkg/sessionctx/variable/tidb_vars.go index d71675d74e667..35509e36e5e7e 100644 --- a/pkg/sessionctx/variable/tidb_vars.go +++ b/pkg/sessionctx/variable/tidb_vars.go @@ -977,6 +977,11 @@ const ( // TiDBEnableColumnTracking enables collecting predicate columns. // DEPRECATED: This variable is deprecated, please do not use this variable. TiDBEnableColumnTracking = "tidb_enable_column_tracking" + // TiDBAnalyzeColumnOptions specifies the default column selection strategy for both manual and automatic analyze operations. + // It accepts two values: + // `PREDICATE`: Analyze only the columns that are used in the predicates of the query. + // `ALL`: Analyze all columns in the table. + TiDBAnalyzeColumnOptions = "tidb_analyze_column_options" // TiDBDisableColumnTrackingTime records the last time TiDBEnableColumnTracking is set off. // It is used to invalidate the collected predicate columns after turning off TiDBEnableColumnTracking, which avoids physical deletion. // It doesn't have cache in memory, and we directly get/set the variable value from/to mysql.tidb. @@ -1372,6 +1377,7 @@ const ( DefTiDBMemQuotaAnalyze = -1 DefTiDBEnableAutoAnalyze = true DefTiDBEnableAutoAnalyzePriorityQueue = true + DefTiDBAnalyzeColumnOptions = "ALL" DefTiDBMemOOMAction = "CANCEL" DefTiDBMaxAutoAnalyzeTime = 12 * 60 * 60 DefTiDBEnablePrepPlanCache = true @@ -1498,20 +1504,29 @@ const ( // Process global variables. var ( - ProcessGeneralLog = atomic.NewBool(false) - RunAutoAnalyze = atomic.NewBool(DefTiDBEnableAutoAnalyze) - EnableAutoAnalyzePriorityQueue = atomic.NewBool(DefTiDBEnableAutoAnalyzePriorityQueue) - GlobalLogMaxDays = atomic.NewInt32(int32(config.GetGlobalConfig().Log.File.MaxDays)) - QueryLogMaxLen = atomic.NewInt32(DefTiDBQueryLogMaxLen) - EnablePProfSQLCPU = atomic.NewBool(false) - EnableBatchDML = atomic.NewBool(false) - EnableTmpStorageOnOOM = atomic.NewBool(DefTiDBEnableTmpStorageOnOOM) - ddlReorgWorkerCounter int32 = DefTiDBDDLReorgWorkerCount - ddlReorgBatchSize int32 = DefTiDBDDLReorgBatchSize - ddlFlashbackConcurrency int32 = DefTiDBDDLFlashbackConcurrency - ddlErrorCountLimit int64 = DefTiDBDDLErrorCountLimit - ddlReorgRowFormat int64 = DefTiDBRowFormatV2 - maxDeltaSchemaCount int64 = DefTiDBMaxDeltaSchemaCount + ProcessGeneralLog = atomic.NewBool(false) + RunAutoAnalyze = atomic.NewBool(DefTiDBEnableAutoAnalyze) + EnableAutoAnalyzePriorityQueue = atomic.NewBool(DefTiDBEnableAutoAnalyzePriorityQueue) + // AnalyzeColumnOptions is a global variable that indicates the default column choice for ANALYZE. + // The value of this variable is a string that can be one of the following values: + // "PREDICATE", "ALL". + // The behavior of the analyze operation depends on the value of `tidb_persist_analyze_options`: + // 1. If `tidb_persist_analyze_options` is enabled and the column choice from the analyze options record is set to `default`, + // the value of `tidb_analyze_column_options` determines the behavior of the analyze operation. + // 2. If `tidb_persist_analyze_options` is disabled, `tidb_analyze_column_options` is used directly to decide + // whether to analyze all columns or just the predicate columns. + AnalyzeColumnOptions = atomic.NewString(DefTiDBAnalyzeColumnOptions) + GlobalLogMaxDays = atomic.NewInt32(int32(config.GetGlobalConfig().Log.File.MaxDays)) + QueryLogMaxLen = atomic.NewInt32(DefTiDBQueryLogMaxLen) + EnablePProfSQLCPU = atomic.NewBool(false) + EnableBatchDML = atomic.NewBool(false) + EnableTmpStorageOnOOM = atomic.NewBool(DefTiDBEnableTmpStorageOnOOM) + ddlReorgWorkerCounter int32 = DefTiDBDDLReorgWorkerCount + ddlReorgBatchSize int32 = DefTiDBDDLReorgBatchSize + ddlFlashbackConcurrency int32 = DefTiDBDDLFlashbackConcurrency + ddlErrorCountLimit int64 = DefTiDBDDLErrorCountLimit + ddlReorgRowFormat int64 = DefTiDBRowFormatV2 + maxDeltaSchemaCount int64 = DefTiDBMaxDeltaSchemaCount // DDLSlowOprThreshold is the threshold for ddl slow operations, uint is millisecond. DDLSlowOprThreshold = config.GetGlobalConfig().Instance.DDLSlowOprThreshold ForcePriority = int32(DefTiDBForcePriority)