Skip to content

Commit

Permalink
slack-19.0: add flag to control vtorc recoveries
Browse files Browse the repository at this point in the history
Signed-off-by: Tim Vaillancourt <[email protected]>
  • Loading branch information
timvaillancourt committed Oct 18, 2024
1 parent 33a4559 commit 2004549
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 0 deletions.
1 change: 1 addition & 0 deletions go/flags/endtoend/vtorc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ vtorc \

Flags:
--allow-emergency-reparent Whether VTOrc should be allowed to run emergency reparent operation when it detects a dead primary (default true)
--allow-recovery Allow recovery actions (default true)
--alsologtostderr log to standard error as well as files
--audit-file-location string File location where the audit logs are to be stored
--audit-purge-duration duration Duration for which audit logs are held before being purged. Should be in multiples of days (default 168h0m0s)
Expand Down
4 changes: 4 additions & 0 deletions go/vt/vtorc/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ var (
auditToBackend = false
auditToSyslog = false
auditPurgeDuration = 7 * 24 * time.Hour // Equivalent of 7 days
allowRecovery = true
recoveryPeriodBlockDuration = 30 * time.Second
preventCrossCellFailover = false
waitReplicasTimeout = 30 * time.Second
Expand All @@ -76,6 +77,7 @@ func RegisterFlags(fs *pflag.FlagSet) {
fs.BoolVar(&auditToBackend, "audit-to-backend", auditToBackend, "Whether to store the audit log in the VTOrc database")
fs.BoolVar(&auditToSyslog, "audit-to-syslog", auditToSyslog, "Whether to store the audit log in the syslog")
fs.DurationVar(&auditPurgeDuration, "audit-purge-duration", auditPurgeDuration, "Duration for which audit logs are held before being purged. Should be in multiples of days")
fs.BoolVar(&allowRecovery, "allow-recovery", allowRecovery, "Allow recovery actions")
fs.DurationVar(&recoveryPeriodBlockDuration, "recovery-period-block-duration", recoveryPeriodBlockDuration, "Duration for which a new recovery is blocked on an instance after running a recovery")
fs.BoolVar(&preventCrossCellFailover, "prevent-cross-cell-failover", preventCrossCellFailover, "Prevent VTOrc from promoting a primary in a different cell than the current primary in case of a failover")
fs.DurationVar(&waitReplicasTimeout, "wait-replicas-timeout", waitReplicasTimeout, "Duration for which to wait for replica's to respond when issuing RPCs")
Expand Down Expand Up @@ -104,6 +106,7 @@ type Configuration struct {
WaitReplicasTimeoutSeconds int // Timeout on amount of time to wait for the replicas in case of ERS. Should be a small value because we should fail-fast. Should not be larger than LockTimeout since that is the total time we use for an ERS.
TolerableReplicationLagSeconds int // Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS.
TopoInformationRefreshSeconds int // Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topo-server.
AllowRecovery bool // Allow recoveries.
RecoveryPollSeconds int // Timer duration on which VTOrc recovery analysis runs
}

Expand Down Expand Up @@ -134,6 +137,7 @@ func UpdateConfigValuesFromFlags() {
Config.WaitReplicasTimeoutSeconds = int(waitReplicasTimeout / time.Second)
Config.TolerableReplicationLagSeconds = int(tolerableReplicationLag / time.Second)
Config.TopoInformationRefreshSeconds = int(topoInformationRefreshDuration / time.Second)
Config.AllowRecovery = allowRecovery
Config.RecoveryPollSeconds = int(recoveryPollDuration / time.Second)
}

Expand Down
3 changes: 3 additions & 0 deletions go/vt/vtorc/logic/vtorc.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,9 @@ func ContinuousDiscovery() {
}
}()
case <-recoveryTick:
if !config.Config.AllowRecovery {
continue
}
go func() {
if IsLeaderOrActive() {
go ClearActiveFailureDetections()
Expand Down

0 comments on commit 2004549

Please sign in to comment.