diff --git a/internal/config/server/provider.go b/internal/config/server/provider.go index fa14fc778f..9ad315771f 100644 --- a/internal/config/server/provider.go +++ b/internal/config/server/provider.go @@ -22,6 +22,7 @@ type ProviderConfig struct { Git GitConfig `mapstructure:"git"` } +// GitConfig provides server-side configuration for Git operations like "clone" type GitConfig struct { MaxFiles int64 `mapstructure:"max_files" default:"10000"` MaxBytes int64 `mapstructure:"max_bytes" default:"100_000_000"` diff --git a/internal/engine/ingester/git/git_test.go b/internal/engine/ingester/git/git_test.go index 7c690f0735..bbb6869cb8 100644 --- a/internal/engine/ingester/git/git_test.go +++ b/internal/engine/ingester/git/git_test.go @@ -17,17 +17,17 @@ package git_test import ( "bytes" "context" - "github.com/stacklok/minder/internal/config/server" - v1 "github.com/stacklok/minder/pkg/providers/v1" "testing" "github.com/stretchr/testify/require" + "github.com/stacklok/minder/internal/config/server" engerrors "github.com/stacklok/minder/internal/engine/errors" gitengine "github.com/stacklok/minder/internal/engine/ingester/git" "github.com/stacklok/minder/internal/providers/credentials" gitclient "github.com/stacklok/minder/internal/providers/git" pb "github.com/stacklok/minder/pkg/api/protobuf/go/minder/v1" + v1 "github.com/stacklok/minder/pkg/providers/v1" ) func TestGitIngestWithCloneURLFromRepo(t *testing.T) { diff --git a/internal/providers/git/git.go b/internal/providers/git/git.go index 6cdac94ebb..57f6caf0a0 100644 --- a/internal/providers/git/git.go +++ b/internal/providers/git/git.go @@ -19,15 +19,14 @@ import ( "context" "errors" "fmt" - "io/fs" "github.com/go-git/go-billy/v5/memfs" - "github.com/go-git/go-git/v5/plumbing/cache" - "github.com/go-git/go-git/v5/storage/filesystem" - "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/cache" "github.com/go-git/go-git/v5/plumbing/transport" + "github.com/go-git/go-git/v5/storage/filesystem" + "github.com/stacklok/minder/internal/config/server" "github.com/stacklok/minder/internal/providers/git/memboxfs" minderv1 "github.com/stacklok/minder/pkg/api/protobuf/go/minder/v1" @@ -46,6 +45,7 @@ const maxCachedObjectSize = 100 * 1024 // 100KiB // Ensure that the Git client implements the Git interface var _ provifv1.Git = (*Git)(nil) +// Options implements the "functional options" pattern for Git type Options func(*Git) // NewGit creates a new GitHub client @@ -59,6 +59,7 @@ func NewGit(token provifv1.GitCredential, opts ...Options) *Git { return ret } +// WithConfig configures the Git implementation with server-side configuration options. func WithConfig(cfg server.GitConfig) Options { return func(g *Git) { g.maxFiles = cfg.MaxFiles @@ -97,9 +98,17 @@ func (g *Git) Clone(ctx context.Context, url, branch string) (*git.Repository, e TotalFileSize: g.maxBytes, } } + // go-git seems to want separate filesystems for the storer and the checked out files + storerFs := memfs.New() + if g.maxFiles != 0 && g.maxBytes != 0 { + storerFs = &memboxfs.LimitedFs{ + Fs: storerFs, + MaxFiles: g.maxFiles, + TotalFileSize: g.maxBytes, + } + } storerCache := cache.NewObjectLRU(maxCachedObjectSize) - // reuse same FS for storage and cloned files - storer := filesystem.NewStorage(memFS, storerCache) + storer := filesystem.NewStorage(storerFs, storerCache) // We clone to the memfs go-billy filesystem driver, which doesn't // allow for direct access to the underlying filesystem. This is @@ -112,8 +121,10 @@ func (g *Git) Clone(ctx context.Context, url, branch string) (*git.Repository, e return nil, provifv1.ErrProviderGitBranchNotFound } else if errors.Is(err, transport.ErrEmptyRemoteRepository) { return nil, provifv1.ErrRepositoryEmpty - } else if errors.Is(err, fs.ErrPermission) { - return nil, provifv1.ErrRepositoryTooLarge + } else if errors.Is(err, memboxfs.ErrTooManyFiles) { + return nil, fmt.Errorf("%w: %w", provifv1.ErrRepositoryTooLarge, err) + } else if errors.Is(err, memboxfs.ErrTooBig) { + return nil, fmt.Errorf("%w: %w", provifv1.ErrRepositoryTooLarge, err) } return nil, fmt.Errorf("could not clone repo: %w", err) } diff --git a/internal/providers/git/memboxfs/fs.go b/internal/providers/git/memboxfs/fs.go index 34e533e0dc..1380e82c59 100644 --- a/internal/providers/git/memboxfs/fs.go +++ b/internal/providers/git/memboxfs/fs.go @@ -28,17 +28,23 @@ import ( // LimitedFs provides a size-limited billy.Filesystem. This is a struct, there's // no constructor here. Note that LimitedFs is not thread-safe. type LimitedFs struct { - Fs billy.Filesystem + Fs billy.Filesystem MaxFiles int64 TotalFileSize int64 - currentFiles int64 - currentSize int64 + currentFiles int64 + currentSize int64 } // ErrNotImplemented is returned when a method is not implemented. var ErrNotImplemented = fmt.Errorf("not implemented") +// ErrTooBig is returned when a file is too big. +var ErrTooBig = fmt.Errorf("file too big") + +// ErrTooManyFiles is returned when there are too many files. +var ErrTooManyFiles = fmt.Errorf("too many files") + var _ billy.Filesystem = (*LimitedFs)(nil) // Chroot implements billy.Filesystem. @@ -50,7 +56,7 @@ func (_ *LimitedFs) Chroot(_ string) (billy.Filesystem, error) { func (f *LimitedFs) Create(filename string) (billy.File, error) { f.currentFiles++ if f.currentFiles >= f.MaxFiles { - return nil, fs.ErrPermission + return nil, fmt.Errorf("%w: %s", ErrTooManyFiles, filename) } file, err := f.Fs.Create(filename) return &fileWrapper{f: file, fs: f}, err @@ -71,7 +77,7 @@ func (f *LimitedFs) MkdirAll(filename string, perm fs.FileMode) error { // TODO: account for path segments correctly f.currentFiles++ if f.currentFiles >= f.MaxFiles { - return fs.ErrPermission + return fmt.Errorf("%w: %s", ErrTooManyFiles, filename) } return f.Fs.MkdirAll(filename, perm) } @@ -86,7 +92,7 @@ func (f *LimitedFs) OpenFile(filename string, flag int, perm fs.FileMode) (billy if flag&os.O_CREATE != 0 { f.currentFiles++ if f.currentFiles >= f.MaxFiles { - return nil, fs.ErrPermission + return nil, fmt.Errorf("%w: %s", ErrTooManyFiles, filename) } } file, err := f.Fs.OpenFile(filename, flag, perm) @@ -129,7 +135,7 @@ func (f *LimitedFs) Stat(filename string) (fs.FileInfo, error) { func (f *LimitedFs) Symlink(target string, link string) error { f.currentFiles++ if f.currentFiles >= f.MaxFiles { - return fs.ErrPermission + return fmt.Errorf("%w: %s", ErrTooManyFiles, link) } return f.Fs.Symlink(target, link) } @@ -138,7 +144,7 @@ func (f *LimitedFs) Symlink(target string, link string) error { func (f *LimitedFs) TempFile(dir string, prefix string) (billy.File, error) { f.currentFiles++ if f.currentFiles >= f.MaxFiles { - return nil, fs.ErrPermission + return nil, fmt.Errorf("%w: %s/%s", ErrTooManyFiles, dir, prefix) } file, err := f.Fs.TempFile(dir, prefix) return &fileWrapper{f: file, fs: f}, err @@ -191,7 +197,7 @@ func (f *fileWrapper) Truncate(size int64) error { growth := size - existingSize if growth+f.fs.currentSize > f.fs.TotalFileSize { - return fs.ErrPermission + return fmt.Errorf("%w: %s", ErrTooBig, f.Name()) } f.fs.currentSize += growth @@ -219,7 +225,7 @@ func (f *fileWrapper) Write(p []byte) (n int, err error) { growth = 0 } if growth+f.fs.currentSize > f.fs.TotalFileSize { - return 0, fs.ErrPermission + return 0, fmt.Errorf("%w: %s", ErrTooBig, f.Name()) } f.fs.currentSize += growth