-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[fix] - resource leak #3402
[fix] - resource leak #3402
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1213,7 +1213,14 @@ func getSafeRemoteURL(repo *git.Repository, preferred string) string { | |
return safeURL | ||
} | ||
|
||
func (s *Git) handleBinary(ctx context.Context, gitDir string, reporter sources.ChunkReporter, chunkSkel *sources.Chunk, commitHash plumbing.Hash, path string) error { | ||
func (s *Git) handleBinary( | ||
ctx context.Context, | ||
gitDir string, | ||
reporter sources.ChunkReporter, | ||
chunkSkel *sources.Chunk, | ||
commitHash plumbing.Hash, | ||
path string, | ||
) (err error) { | ||
fileCtx := context.WithValues(ctx, "commit", commitHash.String()[:7], "path", path) | ||
fileCtx.Logger().V(5).Info("handling binary file") | ||
|
||
|
@@ -1227,9 +1234,32 @@ func (s *Git) handleBinary(ctx context.Context, gitDir string, reporter sources. | |
return nil | ||
} | ||
|
||
cmd := exec.Command("git", "-C", gitDir, "cat-file", "blob", commitHash.String()+":"+path) | ||
const ( | ||
cmdTimeout = 60 * time.Second | ||
waitDelay = 5 * time.Second | ||
) | ||
// NOTE: This kludge ensures the context timeout for the 'git cat-file' command | ||
// matches the timeout for the HandleFile operation. | ||
// By setting both timeouts to the same value, we can be more confident | ||
// that both operations will run for the same duration. | ||
// The command execution includes a small Wait delay before terminating the process, | ||
// giving HandleFile time to respect the context | ||
// and return before the process is forcibly killed. | ||
// This approach helps prevent premature termination and allows for more complete processing. | ||
|
||
// TODO: Develop a more robust mechanism to ensure consistent timeout behavior between the command execution | ||
// and the HandleFile operation. This should prevent premature termination and allow for complete processing. | ||
handlers.SetArchiveMaxTimeout(cmdTimeout) | ||
|
||
// Create a timeout context for the 'git cat-file' command to ensure it does not run indefinitely. | ||
// This prevents potential resource exhaustion by terminating the command if it exceeds the specified duration. | ||
catFileCtx, cancel := context.WithTimeoutCause(fileCtx, cmdTimeout, errors.New("git cat-file timeout")) | ||
defer cancel() | ||
|
||
cmd := exec.CommandContext(catFileCtx, "git", "-C", gitDir, "cat-file", "blob", commitHash.String()+":"+path) | ||
var stderr bytes.Buffer | ||
cmd.Stderr = &stderr | ||
cmd.WaitDelay = waitDelay // give the command a chance to finish before the timeout :) | ||
|
||
stdout, err := cmd.StdoutPipe() | ||
if err != nil { | ||
|
@@ -1240,9 +1270,30 @@ func (s *Git) handleBinary(ctx context.Context, gitDir string, reporter sources. | |
return fmt.Errorf("error starting git cat-file: %w\n%s", err, stderr.Bytes()) | ||
} | ||
|
||
defer func() { _ = cmd.Wait() }() | ||
// Ensure all data from the reader (stdout) is consumed to prevent broken pipe errors. | ||
// This operation discards any remaining data after HandleFile completion. | ||
// If the reader is fully consumed, the copy is essentially a no-op. | ||
// If an error occurs while discarding, it will be logged and combined with any existing error. | ||
// The command's completion is then awaited and any execution errors are handled. | ||
defer func() { | ||
n, copyErr := io.Copy(io.Discard, stdout) | ||
if copyErr != nil { | ||
ctx.Logger().Error( | ||
copyErr, | ||
"Failed to discard remaining stdout data after HandleFile completion", | ||
) | ||
} | ||
|
||
ctx.Logger().V(3).Info( | ||
"HandleFile did not consume all stdout data; excess discarded", | ||
"bytes_discarded", n) | ||
|
||
// Wait for the command to finish and handle any errors. | ||
waitErr := cmd.Wait() | ||
err = errors.Join(err, copyErr, waitErr) | ||
}() | ||
|
||
return handlers.HandleFile(fileCtx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(s.skipArchives)) | ||
return handlers.HandleFile(catFileCtx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(s.skipArchives)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does, but the So I feel like this approach effectively separates ownership: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, so Yeah, this is a great approach! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It used to be an |
||
} | ||
|
||
func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) error { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is only necessary when the underlying reader is a pipe. If the reader isn't fully consumed,
cmd.Wait()
hangs indefinitely. SinceHandleFile
is used by other parts of the codebase with different readers, the logic to consume the reader (if it's a pipe) has been moved to the caller.