Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

store: simplify imagestore implementation #1784

Merged
merged 4 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 60 additions & 106 deletions drivers/overlay/overlay.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,16 +310,6 @@ func isNetworkFileSystem(fsMagic graphdriver.FsMagic) bool {
// If overlay filesystem is not supported on the host, a wrapped graphdriver.ErrNotSupported is returned as error.
// If an overlay filesystem is not supported over an existing filesystem then a wrapped graphdriver.ErrIncompatibleFS is returned.
func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) {
// If custom --imagestore is selected never
// ditch the original graphRoot, instead add it as
// additionalImageStore so its images can still be
// read and used.
if options.ImageStore != "" {
graphRootAsAdditionalStore := fmt.Sprintf("AdditionalImageStore=%s", options.ImageStore)
options.DriverOptions = append(options.DriverOptions, graphRootAsAdditionalStore)
// complete base name with driver name included
options.ImageStore = filepath.Join(options.ImageStore, "overlay")
}
opts, err := parseOptions(options.DriverOptions)
if err != nil {
return nil, err
Expand Down Expand Up @@ -863,22 +853,15 @@ func (d *Driver) Status() [][2]string {
// Metadata returns meta data about the overlay driver such as
// LowerDir, UpperDir, WorkDir and MergeDir used to store data.
func (d *Driver) Metadata(id string) (map[string]string, error) {
dir, imagestore, _ := d.dir2(id)
dir := d.dir(id)
if _, err := os.Stat(dir); err != nil {
return nil, err
}
workDirBase := dir
if imagestore != "" {
if _, err := os.Stat(dir); err != nil {
return nil, err
}
workDirBase = imagestore
}

metadata := map[string]string{
"WorkDir": path.Join(workDirBase, "work"),
"MergedDir": path.Join(workDirBase, "merged"),
"UpperDir": path.Join(workDirBase, "diff"),
"WorkDir": path.Join(dir, "work"),
"MergedDir": path.Join(dir, "merged"),
"UpperDir": path.Join(dir, "diff"),
}

lowerDirs, err := d.getLowerDirs(id)
Expand All @@ -896,7 +879,7 @@ func (d *Driver) Metadata(id string) (map[string]string, error) {
// is being shutdown. For now, we just have to unmount the bind mounted
// we had created.
func (d *Driver) Cleanup() error {
_ = os.RemoveAll(d.getStagingDir())
_ = os.RemoveAll(filepath.Join(d.home, stagingDir))
return mount.Unmount(d.home)
}

Expand Down Expand Up @@ -992,8 +975,10 @@ func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) (retErr
return d.create(id, parent, opts, true)
}

func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disableQuota bool) (retErr error) {
dir, imageStore, _ := d.dir2(id)
func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, readOnly bool) (retErr error) {
dir, homedir, _ := d.dir2(id, readOnly)

disableQuota := readOnly

uidMaps := d.uidMaps
gidMaps := d.gidMaps
Expand All @@ -1004,7 +989,7 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
}

// Make the link directory if it does not exist
if err := idtools.MkdirAllAs(path.Join(d.home, linkDir), 0o755, 0, 0); err != nil {
if err := idtools.MkdirAllAs(path.Join(homedir, linkDir), 0o755, 0, 0); err != nil {
return err
}

Expand All @@ -1021,20 +1006,8 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
if err := idtools.MkdirAllAndChownNew(path.Dir(dir), 0o755, idPair); err != nil {
return err
}
workDirBase := dir
if imageStore != "" {
workDirBase = imageStore
if err := idtools.MkdirAllAndChownNew(path.Dir(imageStore), 0o755, idPair); err != nil {
return err
}
}
if parent != "" {
parentBase, parentImageStore, inAdditionalStore := d.dir2(parent)
// If parentBase path is additional image store, select the image contained in parentBase.
// See https://github.com/containers/podman/issues/19748
if parentImageStore != "" && !inAdditionalStore {
parentBase = parentImageStore
}
parentBase := d.dir(parent)
st, err := system.Stat(filepath.Join(parentBase, "diff"))
if err != nil {
return err
Expand All @@ -1055,23 +1028,13 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
if err := idtools.MkdirAllAndChownNew(dir, 0o700, idPair); err != nil {
return err
}
if imageStore != "" {
if err := idtools.MkdirAllAndChownNew(imageStore, 0o700, idPair); err != nil {
return err
}
}

defer func() {
// Clean up on failure
if retErr != nil {
if err2 := os.RemoveAll(dir); err2 != nil {
logrus.Errorf("While recovering from a failure creating a layer, error deleting %#v: %v", dir, err2)
}
if imageStore != "" {
if err2 := os.RemoveAll(workDirBase); err2 != nil {
logrus.Errorf("While recovering from a failure creating a layer, error deleting %#v: %v", workDirBase, err2)
}
}
}
}()

Expand All @@ -1094,11 +1057,6 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
if err := d.quotaCtl.SetQuota(dir, quota); err != nil {
return err
}
if imageStore != "" {
if err := d.quotaCtl.SetQuota(imageStore, quota); err != nil {
return err
}
}
}

perms := defaultPerms
Expand All @@ -1107,30 +1065,22 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
}

if parent != "" {
parentBase, parentImageStore, inAdditionalStore := d.dir2(parent)
// If parentBase path is additional image store, select the image contained in parentBase.
// See https://github.com/containers/podman/issues/19748
if parentImageStore != "" && !inAdditionalStore {
parentBase = parentImageStore
}
parentBase := d.dir(parent)
st, err := system.Stat(filepath.Join(parentBase, "diff"))
if err != nil {
return err
}
perms = os.FileMode(st.Mode())
}

if err := idtools.MkdirAs(path.Join(workDirBase, "diff"), perms, rootUID, rootGID); err != nil {
if err := idtools.MkdirAs(path.Join(dir, "diff"), perms, rootUID, rootGID); err != nil {
return err
}

lid := generateID(idLength)

linkBase := path.Join("..", id, "diff")
if imageStore != "" {
linkBase = path.Join(imageStore, "diff")
}
if err := os.Symlink(linkBase, path.Join(d.home, linkDir, lid)); err != nil {
if err := os.Symlink(linkBase, path.Join(homedir, linkDir, lid)); err != nil {
return err
}

Expand All @@ -1139,10 +1089,10 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
return err
}

if err := idtools.MkdirAs(path.Join(workDirBase, "work"), 0o700, rootUID, rootGID); err != nil {
if err := idtools.MkdirAs(path.Join(dir, "work"), 0o700, rootUID, rootGID); err != nil {
return err
}
if err := idtools.MkdirAs(path.Join(workDirBase, "merged"), 0o700, rootUID, rootGID); err != nil {
if err := idtools.MkdirAs(path.Join(dir, "merged"), 0o700, rootUID, rootGID); err != nil {
return err
}

Expand Down Expand Up @@ -1224,26 +1174,39 @@ func (d *Driver) getLower(parent string) (string, error) {
}

func (d *Driver) dir(id string) string {
p, _, _ := d.dir2(id)
p, _, _ := d.dir2(id, false)
return p
}

func (d *Driver) dir2(id string) (string, string, bool) {
newpath := path.Join(d.home, id)
imageStore := ""
func (d *Driver) getAllImageStores() []string {
additionalImageStores := d.AdditionalImageStores()
if d.imageStore != "" {
imageStore = path.Join(d.imageStore, id)
additionalImageStores = append([]string{d.imageStore}, additionalImageStores...)
}
return additionalImageStores
}

func (d *Driver) dir2(id string, useImageStore bool) (string, string, bool) {
var homedir string

if useImageStore && d.imageStore != "" {
homedir = path.Join(d.imageStore, d.name)
} else {
homedir = d.home
}

newpath := path.Join(homedir, id)

if _, err := os.Stat(newpath); err != nil {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ How does this handle any locking? The generic code maintains RW/RO locks on the individual images / layers. This… does nothing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is still outstanding.

I don't see how this is different than the previous version, what locking do we need here?

Copy link
Collaborator

@mtrmac mtrmac Jan 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, see the top-level comment #1784 (review) .

One immediate thing that springs to mind is deleting the parent layer while the child is being created/mounted/unmounted. I don’t know that we can fully defend against those kinds of things (we don’t hold layers locked for the whole time a child is mounted, and mount reference counts are, I think, basically per-machine and not visible over network shares).

Or maybe a layer with the same ID (pulled layers have deterministic IDs) being simultaneously created in both stores, making the path returned by d.dir[2]() change over time — e.g. for getStagingDir as an example.

There might well be more, or maybe these are handled by some mechanism I don’t know about.


Assuming the concern is real:

  • I’d like to see the scope of the problem restricted to the unavoidable minimum (whatever that is); in all situations where the generic code is iterating over getROLayerStoresLocked to find the right layer, and handling locking, the driver should not be iterating over the secondary layer stores again / redundantly without locking. Structure the code, or document it, in a way that is likely to be maintained correctly over time.
  • In the situations where the problem remains outstanding afterwards and is not fixable short-term, we should loudly document the problem, the situations where it arises, and the constraints, for future maintainers (compare LOCKING BUG in layers.go)
  • Alternatively, if this is never going to be fixed, and users need to use the feature appropriately (“the image store must never be modified while it is used with outer stores” ?), clearly document that in end-user documentation, and maybe point at it in the code to acknowledge and justify the unsynchronized accesses.
  • (As another option, a few years ago overlay used to do its own per-layer locking. I have removed that because I thought it’s redundant; actually external users can get a pointer to the driver object, so it is not 100% redundant, and doing that locking here might also be an option. I would prefer not to add that cost to every operation.)

I could see an argument that this is out of scope of this PR … I’m thinking that with this touching dir2 and dir, while the feature is still comparatively new, now is the best chance for fixing this we’ll ever have. Admittedly the timing is not super convenient now, but it will probably never be ideal.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the assumption with additional image stores is that images/layers are not removed. There are cases where we don't grab any lock, e.g. a read-only file system, so that is already the current state.

When we use a read-only layer, the layer could disappear also while a container is running, and there is nothing that prevents the removal now:

# mkdir -p /tmp/store/{rw,ro,empty}
# podman --root /tmp/store/rw pull busybox
# mount --bind --read-only /tmp/store/rw /tmp/store/ro
# podman --root /tmp/store/empty --storage-opt overlay.imagestore=/tmp/store/ro run -d busybox top
5d4baa2643c77deca49ff9b1a0a10df5c2965626b725b8a2225db54706e69980
# podman --root /tmp/store/rw rmi busybox
Untagged: docker.io/library/busybox:latest
Deleted: 9211bbaa0dbd68fed073065eb9f0a6ed00a75090a9235eca2554c62d1e75c58f

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, the silent deletion of dependent parents seems hard to fix. It’s also not really documented that the users must prevent that, AFAICT.

What about concurrent creations, and other operations?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've not thought about them, but are they very different than delete though? We would end up with the same layer in multiple stores and use it from the first one where we find it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could see e.g. a partial pull of a layer being started on our store, triggering the creation of a staging directory; before it finishes, the other image store creates the same layer; and now the code trying too commit the partially-pulled layer sees the new layer, determines a different staging directory path, and rejects the commit attempt. (I’m not saying that this is the only problem.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's address this in a separate PR. I've played with it this morning, but I am not sure yet how this should be done.

Do you've any suggestions?

for _, p := range d.AdditionalImageStores() {
for _, p := range d.getAllImageStores() {
l := path.Join(p, d.name, id)
_, err = os.Stat(l)
if err == nil {
return l, imageStore, true
return l, homedir, true
}
}
}
return newpath, imageStore, false
return newpath, homedir, false
}

func (d *Driver) getLowerDirs(id string) ([]string, error) {
Expand Down Expand Up @@ -1453,14 +1416,11 @@ func (d *Driver) Get(id string, options graphdriver.MountOpts) (string, error) {
}

func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountOpts) (_ string, retErr error) {
dir, imageStore, inAdditionalStore := d.dir2(id)
dir, _, inAdditionalStore := d.dir2(id, false)
if _, err := os.Stat(dir); err != nil {
return "", err
}
workDirBase := dir
if imageStore != "" {
workDirBase = imageStore
}

readWrite := !inAdditionalStore

if !d.SupportsShifting() || options.DisableShifting {
Expand Down Expand Up @@ -1565,7 +1525,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
}()

composeFsLayers := []string{}
composeFsLayersDir := filepath.Join(workDirBase, "composefs-layers")
composeFsLayersDir := filepath.Join(dir, "composefs-layers")
maybeAddComposefsMount := func(lowerID string, i int, readWrite bool) (string, error) {
composefsBlob := d.getComposefsData(lowerID)
_, err = os.Stat(composefsBlob)
Expand Down Expand Up @@ -1599,7 +1559,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
return dest, nil
}

diffDir := path.Join(workDirBase, "diff")
diffDir := path.Join(dir, "diff")

if dest, err := maybeAddComposefsMount(id, 0, readWrite); err != nil {
return "", err
Expand All @@ -1617,7 +1577,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
lower := ""
newpath := path.Join(d.home, l)
if st, err := os.Stat(newpath); err != nil {
for _, p := range d.AdditionalImageStores() {
for _, p := range d.getAllImageStores() {
lower = path.Join(p, d.name, l)
if st2, err2 := os.Stat(lower); err2 == nil {
if !permsKnown {
Expand Down Expand Up @@ -1685,16 +1645,16 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
optsList = append(optsList, "metacopy=on", "redirect_dir=on")
}

if len(absLowers) == 0 {
absLowers = append(absLowers, path.Join(dir, "empty"))
}

// user namespace requires this to move a directory from lower to upper.
rootUID, rootGID, err := idtools.GetRootUIDGID(options.UidMaps, options.GidMaps)
if err != nil {
return "", err
}

if len(absLowers) == 0 {
absLowers = append(absLowers, path.Join(dir, "empty"))
}

if err := idtools.MkdirAllAs(diffDir, perms, rootUID, rootGID); err != nil {
if !inAdditionalStore {
return "", err
Expand All @@ -1705,7 +1665,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
}
}

mergedDir := path.Join(workDirBase, "merged")
mergedDir := path.Join(dir, "merged")
// Create the driver merged dir
if err := idtools.MkdirAs(mergedDir, 0o700, rootUID, rootGID); err != nil && !os.IsExist(err) {
return "", err
Expand All @@ -1723,7 +1683,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
}
}()

workdir := path.Join(workDirBase, "work")
workdir := path.Join(dir, "work")

if d.options.mountProgram == "" && unshare.IsRootless() {
optsList = append(optsList, "userxattr")
Expand Down Expand Up @@ -1873,7 +1833,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO

// Put unmounts the mount path created for the give id.
func (d *Driver) Put(id string) error {
dir, _, inAdditionalStore := d.dir2(id)
dir, _, inAdditionalStore := d.dir2(id, false)
if _, err := os.Stat(dir); err != nil {
return err
}
Expand Down Expand Up @@ -2042,8 +2002,9 @@ func (g *overlayFileGetter) Close() error {
return nil
}

func (d *Driver) getStagingDir() string {
return filepath.Join(d.home, stagingDir)
func (d *Driver) getStagingDir(id string) string {
_, homedir, _ := d.dir2(id, d.imageStore != "")
return filepath.Join(homedir, stagingDir)
}

// DiffGetter returns a FileGetCloser that can read files from the directory that
Expand Down Expand Up @@ -2100,11 +2061,12 @@ func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.App
var applyDir string

if id == "" {
err := os.MkdirAll(d.getStagingDir(), 0o700)
stagingDir := d.getStagingDir(id)
err := os.MkdirAll(stagingDir, 0o700)
if err != nil && !os.IsExist(err) {
return graphdriver.DriverWithDifferOutput{}, err
}
applyDir, err = os.MkdirTemp(d.getStagingDir(), "")
applyDir, err = os.MkdirTemp(stagingDir, "")
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
Expand Down Expand Up @@ -2148,7 +2110,7 @@ func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.App
// ApplyDiffFromStagingDirectory applies the changes using the specified staging directory.
func (d *Driver) ApplyDiffFromStagingDirectory(id, parent string, diffOutput *graphdriver.DriverWithDifferOutput, options *graphdriver.ApplyDiffWithDifferOpts) error {
stagingDirectory := diffOutput.Target
if filepath.Dir(stagingDirectory) != d.getStagingDir() {
if filepath.Dir(stagingDirectory) != d.getStagingDir(id) {
return fmt.Errorf("%q is not a staging directory", stagingDirectory)
}
diffPath, err := d.getDiffPath(id)
Expand Down Expand Up @@ -2234,12 +2196,8 @@ func (d *Driver) getComposefsData(id string) string {
}

func (d *Driver) getDiffPath(id string) (string, error) {
dir, imagestore, _ := d.dir2(id)
base := dir
if imagestore != "" {
base = imagestore
}
return redirectDiffIfAdditionalLayer(path.Join(base, "diff"))
dir := d.dir(id)
return redirectDiffIfAdditionalLayer(path.Join(dir, "diff"))
}

func (d *Driver) getLowerDiffPaths(id string) ([]string, error) {
Expand Down Expand Up @@ -2330,12 +2288,8 @@ func (d *Driver) AdditionalImageStores() []string {
// by toContainer to those specified by toHost.
func (d *Driver) UpdateLayerIDMap(id string, toContainer, toHost *idtools.IDMappings, mountLabel string) error {
var err error
dir, imagestore, _ := d.dir2(id)
base := dir
if imagestore != "" {
base = imagestore
}
diffDir := filepath.Join(base, "diff")
dir := d.dir(id)
diffDir := filepath.Join(dir, "diff")

rootUID, rootGID := 0, 0
if toHost != nil {
Expand Down
Loading