Skip to content

Commit

Permalink
Add option to filter layers from tarball
Browse files Browse the repository at this point in the history
Continuation of this PR:
google#209

This should be considered a relatively advanced option, but for folks that know
what they are doing you can reduce the amount of data that you need to encode in
the tarball for the daemon to load it.

The ultimate use case of this option will be from daemon.Write, which
currently uses the docker load interface to pull image into the daemon,
however, this currently reuploads (and redownloads) the base image on each write
in context like ko. If we can determine the set of layers that already exist
in the daemon we can elide these from the tarball to dramatically improve
performance.

Related: google#205
  • Loading branch information
jonjohnsonjr committed Oct 3, 2019
1 parent ff1ac7f commit 2dce6f7
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 12 deletions.
2 changes: 1 addition & 1 deletion go.mod

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 55 additions & 0 deletions pkg/v1/tarball/options.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Copyright 2019 Google LLC All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package tarball

import (
v1 "github.com/google/go-containerregistry/pkg/v1"
)

// Option is a functional option for tarball operations.
type Option func(*options) error

// LayerFilter defines a function for filtering layers.
// True - indicates the layer should be kept,
// False - indicates the layer should be excluded.
type LayerFilter func(v1.Layer) (bool, error)

type options struct {
filter LayerFilter
}

func makeOptions(opts ...Option) (*options, error) {
o := &options{
filter: func(v1.Layer) (bool, error) {
return true, nil
},
}

for _, option := range opts {
if err := option(o); err != nil {
return nil, err
}
}

return o, nil
}

// WithLayerFilter allows omitting layers when writing a tarball.
func WithLayerFilter(lf LayerFilter) Option {
return func(o *options) error {
o.filter = lf
return nil
}
}
36 changes: 25 additions & 11 deletions pkg/v1/tarball/write.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,62 +29,67 @@ import (

// WriteToFile writes in the compressed format to a tarball, on disk.
// This is just syntactic sugar wrapping tarball.Write with a new file.
func WriteToFile(p string, ref name.Reference, img v1.Image) error {
func WriteToFile(p string, ref name.Reference, img v1.Image, opt ...Option) error {
w, err := os.Create(p)
if err != nil {
return err
}
defer w.Close()

return Write(ref, img, w)
return Write(ref, img, w, opt...)
}

// MultiWriteToFile writes in the compressed format to a tarball, on disk.
// This is just syntactic sugar wrapping tarball.MultiWrite with a new file.
func MultiWriteToFile(p string, tagToImage map[name.Tag]v1.Image) error {
func MultiWriteToFile(p string, tagToImage map[name.Tag]v1.Image, opt ...Option) error {
refToImage := make(map[name.Reference]v1.Image, len(tagToImage))
for i, d := range tagToImage {
refToImage[i] = d
}
return MultiRefWriteToFile(p, refToImage)
return MultiRefWriteToFile(p, refToImage, opt...)
}

// MultiRefWriteToFile writes in the compressed format to a tarball, on disk.
// This is just syntactic sugar wrapping tarball.MultiRefWrite with a new file.
func MultiRefWriteToFile(p string, refToImage map[name.Reference]v1.Image) error {
func MultiRefWriteToFile(p string, refToImage map[name.Reference]v1.Image, opt ...Option) error {
w, err := os.Create(p)
if err != nil {
return err
}
defer w.Close()

return MultiRefWrite(refToImage, w)
return MultiRefWrite(refToImage, w, opt...)
}

// Write is a wrapper to write a single image and tag to a tarball.
func Write(ref name.Reference, img v1.Image, w io.Writer) error {
return MultiRefWrite(map[name.Reference]v1.Image{ref: img}, w)
func Write(ref name.Reference, img v1.Image, w io.Writer, opt ...Option) error {
return MultiRefWrite(map[name.Reference]v1.Image{ref: img}, w, opt...)
}

// MultiWrite writes the contents of each image to the provided reader, in the compressed format.
// The contents are written in the following format:
// One manifest.json file at the top level containing information about several images.
// One file for each layer, named after the layer's SHA.
// One file for the config blob, named after its SHA.
func MultiWrite(tagToImage map[name.Tag]v1.Image, w io.Writer) error {
func MultiWrite(tagToImage map[name.Tag]v1.Image, w io.Writer, opt ...Option) error {
refToImage := make(map[name.Reference]v1.Image, len(tagToImage))
for i, d := range tagToImage {
refToImage[i] = d
}
return MultiRefWrite(refToImage, w)
return MultiRefWrite(refToImage, w, opt...)
}

// MultiRefWrite writes the contents of each image to the provided reader, in the compressed format.
// The contents are written in the following format:
// One manifest.json file at the top level containing information about several images.
// One file for each layer, named after the layer's SHA.
// One file for the config blob, named after its SHA.
func MultiRefWrite(refToImage map[name.Reference]v1.Image, w io.Writer) error {
func MultiRefWrite(refToImage map[name.Reference]v1.Image, w io.Writer, opt ...Option) error {
o, err := makeOptions(opt...)
if err != nil {
return err
}

tf := tar.NewWriter(w)
defer tf.Close()

Expand Down Expand Up @@ -144,6 +149,15 @@ func MultiRefWrite(refToImage map[name.Reference]v1.Image, w io.Writer) error {
// https://www.gnu.org/software/gzip/manual/html_node/Overview.html
layerFiles[i] = fmt.Sprintf("%s.tar.gz", hex)

// We filter late because the length of layerFiles must match the diff_ids
// in config file. It is ok if the file doesn't exist when the daemon
// already has a given layer, since it won't try to read it.
if keep, err := o.filter(l); err != nil {
return err
} else if !keep {
continue
}

r, err := l.Compressed()
if err != nil {
return err
Expand Down
67 changes: 67 additions & 0 deletions pkg/v1/tarball/write_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
package tarball_test

import (
"archive/tar"
"io"
"io/ioutil"
"os"
"strings"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -350,3 +353,67 @@ func getDiffIDs(t *testing.T, layers []v1.Layer) []v1.Hash {

return diffIDs
}

func TestFilteredWrite(t *testing.T) {
// Make a tempfile for tarball writes.
fp, err := ioutil.TempFile("", "")
if err != nil {
t.Fatalf("Error creating temp file.")
}
t.Log(fp.Name())
defer fp.Close()
defer os.Remove(fp.Name())

// Make a random image
randImage, err := random.Image(256, 8)
if err != nil {
t.Fatalf("Error creating random image.")
}
tag, err := name.NewTag("gcr.io/foo/bar:latest", name.StrictValidation)
if err != nil {
t.Fatalf("Error creating test tag.")
}

layers, err := randImage.Layers()
if err != nil {
t.Fatalf("Layers() = %v", err)
}
rld, err := layers[0].Digest()
if err != nil {
t.Fatalf("Digest() = %v", err)
}

lf := func(l v1.Layer) (bool, error) {
// Filter the first layer in the image.
if ld, err := l.Digest(); err != nil {
return false, err
} else {
return ld != rld, nil
}
}

if err := tarball.WriteToFile(fp.Name(), tag, randImage, tarball.WithLayerFilter(lf)); err != nil {
t.Fatalf("Unexpected error writing tarball: %v", err)
}

f, err := os.Open(fp.Name())
if err != nil {
t.Fatalf("os.Open() = %v", err)
}
defer f.Close()

tarReader := tar.NewReader(f)
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
t.Fatalf("scanning tarfile: %v", err)
}

if strings.Contains(header.Name, rld.Hex) {
t.Errorf("Saw file %v in tarball, want %v elided.", header.Name, rld)
}
}
}

0 comments on commit 2dce6f7

Please sign in to comment.