diff --git a/pkg/backup/client.go b/pkg/backup/client.go index 20d56e876..8f1bca928 100644 --- a/pkg/backup/client.go +++ b/pkg/backup/client.go @@ -807,7 +807,6 @@ func (bc *Client) handleFineGrained( return 0, errors.Trace(pderr) } storeID := leader.GetStoreId() - max := 0 req := backuppb.BackupRequest{ ClusterId: bc.clusterID, @@ -834,21 +833,27 @@ func (bc *Client) handleFineGrained( log.Error("fail to connect store", zap.Uint64("StoreID", storeID)) return 0, errors.Annotatef(err, "failed to connect to store %d", storeID) } + hasProgress := false + backoffMill := 0 err = SendBackup( ctx, storeID, client, req, // Handle responses with the same backoffer. func(resp *backuppb.BackupResponse) error { - response, backoffMs, err1 := + response, shouldBackoff, err1 := OnBackupResponse(storeID, bo, backupTS, lockResolver, resp) if err1 != nil { return err1 } - if max < backoffMs { - max = backoffMs + if backoffMill < shouldBackoff { + backoffMill = shouldBackoff } if response != nil { respCh <- response } + // When meet an error, we need to set hasProgress too, in case of + // overriding the backoffTime of original error. + // hasProgress would be false iff there is a early io.EOF from the stream. + hasProgress = true return nil }, func() (backuppb.BackupClient, error) { @@ -866,7 +871,14 @@ func (bc *Client) handleFineGrained( return 0, errors.Annotatef(err, "failed to send fine-grained backup [%s, %s)", redact.Key(req.StartKey), redact.Key(req.EndKey)) } - return max, nil + + // If no progress, backoff 10s for debouncing. + // 10s is the default interval of stores sending a heartbeat to the PD. + // And is the average new leader election timeout, which would be a reasonable back off time. + if !hasProgress { + backoffMill = 10000 + } + return backoffMill, nil } // SendBackup send backup request to the given store.