Skip to content

Commit

Permalink
Use a pager to iterate through a GCS bucket
Browse files Browse the repository at this point in the history
Iterating through a GCS bucket can be resource intensive, especially
when doing it recursively for all objects in the bucket.

This commit tries to resolve that by using a pager with a fixed page
size and reusing a single slice to process results.

Signed-off-by: Filip Petkovski <[email protected]>
  • Loading branch information
fpetkovski committed Sep 8, 2023
1 parent 1b257a3 commit da17d77
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 10 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ require (
golang.org/x/oauth2 v0.4.0
golang.org/x/sync v0.1.0
google.golang.org/api v0.103.0
google.golang.org/grpc v1.53.0
gopkg.in/alecthomas/kingpin.v2 v2.2.6
gopkg.in/yaml.v2 v2.4.0
)
Expand Down Expand Up @@ -92,7 +93,6 @@ require (
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f // indirect
google.golang.org/grpc v1.53.0 // indirect
google.golang.org/protobuf v1.28.1 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
Expand Down
1 change: 0 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,6 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down
21 changes: 13 additions & 8 deletions providers/gcs/gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,25 +108,30 @@ func (b *Bucket) Iter(ctx context.Context, dir string, f func(string) error, opt
delimiter = ""
}

it := b.bkt.Objects(ctx, &storage.Query{
const pageSize = 1000
items := make([]*storage.ObjectAttrs, 0, pageSize)
it := iterator.NewPager(b.bkt.Objects(ctx, &storage.Query{
Prefix: dir,
Delimiter: delimiter,
})
}), pageSize, "")
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
attrs, err := it.Next()
if err == iterator.Done {
return nil
}
token, err := it.NextPage(&items)
if err != nil {
return err
}
if err := f(attrs.Prefix + attrs.Name); err != nil {
return err
for _, attrs := range items {
if err := f(attrs.Prefix + attrs.Name); err != nil {
return err
}
}
items = items[:0]
if token == "" {
return nil
}
}
}
Expand Down

0 comments on commit da17d77

Please sign in to comment.