Skip to content

Commit

Permalink
Implement reindex command
Browse files Browse the repository at this point in the history
Ref: #22
  • Loading branch information
hypnoglow committed Jan 9, 2018
1 parent 40c3ccf commit 168e6e1
Show file tree
Hide file tree
Showing 9 changed files with 371 additions and 10 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,14 @@ The chart is deleted from the repo:
## Uninstall

$ helm plugin remove s3


## Documentation

Additional documentation is available in the [docs](docs) directory. This currently includes:
- estimated [usage cost calculation](docs/usage-cost.md)
- [best practices](docs/best-practice.md)
for organizing your repositories.

## Contributing

Contributions are welcome. Please see [these instructions](.github/CONTRIBUTING.md)
Expand Down
2 changes: 1 addition & 1 deletion cmd/helms3/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func runDelete(name, version, repoName string) error {
if err := storage.Delete(ctx, uri); err != nil {
return errors.WithMessage(err, "delete chart file from s3")
}
if _, err := storage.Upload(ctx, repoEntry.URL+"/index.yaml", idxReader); err != nil {
if err := storage.PutIndex(ctx, repoEntry.URL, idxReader); err != nil {
return errors.WithMessage(err, "upload new index to s3")
}

Expand Down
11 changes: 11 additions & 0 deletions cmd/helms3/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const (
actionVersion = "version"
actionInit = "init"
actionPush = "push"
actionReindex = "reindex"
actionDelete = "delete"

defaultTimeout = time.Second * 5
Expand Down Expand Up @@ -46,6 +47,11 @@ func main() {
Required().
String()

reindexCmd := cli.Command(actionReindex, "Reindex the repository.")
reindexTargetRepository := reindexCmd.Arg("repo", "Target repository to reindex").
Required().
String()

deleteCmd := cli.Command(actionDelete, "Delete chart from the repository.").Alias("del")
deleteChartName := deleteCmd.Arg("chartName", "Name of chart to delete").
Required().
Expand Down Expand Up @@ -81,6 +87,11 @@ func main() {
}
return

case actionReindex:
if err := runReindex(*reindexTargetRepository); err != nil {
log.Fatal(err)
}

case actionDelete:
if err := runDelete(*deleteChartName, *deleteChartVersion, *deleteTargetRepository); err != nil {
log.Fatal(err)
Expand Down
5 changes: 5 additions & 0 deletions cmd/helms3/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package main
import (
"context"
"fmt"
"path"
"strings"

"github.com/pkg/errors"

Expand All @@ -18,6 +20,9 @@ func runProxy(uri string) error {

b, err := storage.FetchRaw(ctx, uri)
if err != nil {
if strings.HasSuffix(uri, "index.yaml") && err == awss3.ErrObjectNotFound {
return fmt.Errorf("The index file does not exist by the path %s. If you haven't initialized the repository yet, try running \"helm s3 init %s\"", uri, path.Dir(uri))
}
return errors.WithMessage(err, "fetch from s3")
}

Expand Down
21 changes: 13 additions & 8 deletions cmd/helms3/push.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package main

import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
Expand Down Expand Up @@ -52,25 +53,29 @@ func runPush(chartPath string, repoName string) error {
return err
}

hash, err := provenance.DigestFile(fname)
if err != nil {
return errors.WithMessage(err, "get chart digest")
}

fchart, err := os.Open(fname)
if err != nil {
return errors.Wrap(err, "open chart file")
}

if _, err := storage.Upload(ctx, repoEntry.URL+"/"+fname, fchart); err != nil {
serializedChartMeta, err := json.Marshal(chart.Metadata)
if err != nil {
return errors.Wrap(err, "encode chart metadata to json")
}

if _, err := storage.PutChart(ctx, repoEntry.URL+"/"+fname, fchart, string(serializedChartMeta), hash); err != nil {
return errors.WithMessage(err, "upload chart to s3")
}

// Next, update the repository index.
// The gap between index fetching and uploading should be as small as
// possible to make the best effort to avoid race conditions.
// See https://github.com/hypnoglow/helm-s3/issues/18 for more info.

hash, err := provenance.DigestFile(fname)
if err != nil {
return errors.WithMessage(err, "get chart digest")
}

// Fetch current index, update it and upload it back.

b, err := storage.FetchRaw(ctx, repoEntry.URL+"/index.yaml")
Expand All @@ -91,7 +96,7 @@ func runPush(chartPath string, repoName string) error {
return errors.WithMessage(err, "get index reader")
}

if _, err := storage.Upload(ctx, repoEntry.URL+"/index.yaml", idxReader); err != nil {
if err := storage.PutIndex(ctx, repoEntry.URL, idxReader); err != nil {
return errors.WithMessage(err, "upload index to s3")
}

Expand Down
70 changes: 70 additions & 0 deletions cmd/helms3/reindex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package main

import (
"context"
"time"

"github.com/pkg/errors"

"github.com/hypnoglow/helm-s3/pkg/awss3"
"github.com/hypnoglow/helm-s3/pkg/awsutil"
"github.com/hypnoglow/helm-s3/pkg/helmutil"
"github.com/hypnoglow/helm-s3/pkg/index"
)

const (
reindexCommandDefaultTimeput = time.Second * 15
)

func runReindex(repoName string) error {
// Just one big timeout for the whole operation.
ctx, cancel := context.WithTimeout(context.Background(), reindexCommandDefaultTimeput)
defer cancel()

ctx = ctx

repoEntry, err := helmutil.LookupRepoEntry(repoName)
if err != nil {
return err
}

awsConfig, err := awsutil.Config()
if err != nil {
return errors.Wrap(err, "get aws config")
}

storage := awss3.NewStorage(awsConfig)

items := make(chan awss3.ChartInfo, 1)
errs := make(chan error, 1)

go storage.Traverse(context.TODO(), repoEntry.URL, items, errs)

builtIndex := make(chan *index.Index, 1)
go func() {
idx := index.New()
for item := range items {
idx.Add(item.Meta, item.Filename, repoEntry.URL, item.Hash)
}
idx.SortEntries()

builtIndex <- idx
}()

for err := range errs {
return errors.Wrap(err, "traverse the chart repository")
}

idx := <-builtIndex

r, err := idx.Reader()
if err != nil {
return errors.Wrap(err, "get index reader")
}

if err := storage.PutIndex(context.TODO(), repoEntry.URL, r); err != nil {
return errors.Wrap(err, "upload index to the repository")
}

return nil
}
41 changes: 41 additions & 0 deletions docs/best-practice.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Best Practice

## Reindexing your repository

In short, due to limitations of AWS your chart repository index can be broken
by accident. This means that it may not reflect the "real" state of your chart
files in S3 bucket. Nothing serious, but can be annoying.

To workaround this, the `helm s3 reindex <repo>` command is available. *Note: this
operation is is [much more expensive](usage-cost.md#reindex) than other in this plugin*.

## Organizing your repositories

A chart repository file structure is always flat.
It cannot contain nested directories.

The number of AWS S3 requests for reindex operation depends on your repository structure.
Due to limitations of AWS S3 API you cannot list objects of the folder under the key
excluding subfolders. `ListObjects` only can lists objects under the key recursively.

The plugin code makes its best to ignore subfolders, because chart repository is always flat.
But still, not all cases are covered.

Imagine the worst case scenario: you have 100 chart files in your repository, which is the
bucket root. And 1 million files in the "foo-bar" subfolder, which are not related to
the chart repository. In this case the plugin **have to** call `ListObjects`
about 1000 times (1000 objects per call) to make sure it did not miss any chart file.

By that, the golden rule is to **never have subfolders in your chart repository folder**.

So, there are two good options for your chart repository file structure inside S3 bucket:

1. One bucket - one repository. Create a bucket "yourcompany-charts-stable", or
"yourcompany-productname-charts" and use the bucket root as your chart repository.
In this case, never put any other files in that bucket.

2. One bucket - many repositories, each in separate subfolder. Create a bucket
"yourcompany-charts". Create a subfolder in it for each repository you need, for
example "stable" and "testing". Another option is to separate the repositories
by the product or by group of services, for example "backoffice", "order-processing", etc.
And again, never put any other files in the repository folder.
19 changes: 19 additions & 0 deletions docs/usage-cost.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Usage pricing

I hope this document helps you to calculate the AWS S3 usage cost for your use case.

Disclaimer: the plugin author is not responsible for your unexpected expenses.

**Make sure to consult the pricing for your region [here](https://aws.amazon.com/s3/pricing)!**

## Reindex

`helm s3 reindex <repo>` command is much more expensive operation than other in
this plugin. For example, reindexing a repository with 1000 chart files in it
results in 1 GET (`ListObjects`) request and 1000 HEAD (`HeadObject`) requests.
Plus it can make additional GET (`GetObject`) requests if it did not found
required metadata in the HEAD request response.

At the moment of writing this document the price for HEAD/GET requests in `eu-central-1` is `$0.0043 for 10 000 requests`.
So the whole reindex operation for this case may cost approximately **$0.00043** or even **$0.00086**.
This seems small, but multiple reindex operations per day may hurt your budget.
Loading

0 comments on commit 168e6e1

Please sign in to comment.