From da17d77f280a936a7d7cbb97c40247af793acdf4 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Fri, 8 Sep 2023 09:51:28 +0200 Subject: [PATCH] Use a pager to iterate through a GCS bucket Iterating through a GCS bucket can be resource intensive, especially when doing it recursively for all objects in the bucket. This commit tries to resolve that by using a pager with a fixed page size and reusing a single slice to process results. Signed-off-by: Filip Petkovski --- go.mod | 2 +- go.sum | 1 - providers/gcs/gcs.go | 21 +++++++++++++-------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index dee14caa..d66fc2fe 100644 --- a/go.mod +++ b/go.mod @@ -27,6 +27,7 @@ require ( golang.org/x/oauth2 v0.4.0 golang.org/x/sync v0.1.0 google.golang.org/api v0.103.0 + google.golang.org/grpc v1.53.0 gopkg.in/alecthomas/kingpin.v2 v2.2.6 gopkg.in/yaml.v2 v2.4.0 ) @@ -92,7 +93,6 @@ require ( golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f // indirect - google.golang.org/grpc v1.53.0 // indirect google.golang.org/protobuf v1.28.1 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect gopkg.in/ini.v1 v1.67.0 // indirect diff --git a/go.sum b/go.sum index 860aca72..45756f80 100644 --- a/go.sum +++ b/go.sum @@ -491,7 +491,6 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/providers/gcs/gcs.go b/providers/gcs/gcs.go index 5ea45c7e..4ab45165 100644 --- a/providers/gcs/gcs.go +++ b/providers/gcs/gcs.go @@ -108,25 +108,30 @@ func (b *Bucket) Iter(ctx context.Context, dir string, f func(string) error, opt delimiter = "" } - it := b.bkt.Objects(ctx, &storage.Query{ + const pageSize = 1000 + items := make([]*storage.ObjectAttrs, 0, pageSize) + it := iterator.NewPager(b.bkt.Objects(ctx, &storage.Query{ Prefix: dir, Delimiter: delimiter, - }) + }), pageSize, "") for { select { case <-ctx.Done(): return ctx.Err() default: } - attrs, err := it.Next() - if err == iterator.Done { - return nil - } + token, err := it.NextPage(&items) if err != nil { return err } - if err := f(attrs.Prefix + attrs.Name); err != nil { - return err + for _, attrs := range items { + if err := f(attrs.Prefix + attrs.Name); err != nil { + return err + } + } + items = items[:0] + if token == "" { + return nil } } }