From 7d43c9aa6b7730fab8c749ef275ee3ad30a7de50 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 20 Mar 2024 16:05:18 +0400 Subject: [PATCH] chore: annotate installer errors I want to catch a spurious error `ENODEV`, where exactly it comes from. Signed-off-by: Andrey Smirnov --- cmd/installer/pkg/install/install.go | 20 +++++++++---------- .../machined/pkg/controllers/block/devices.go | 2 +- .../block/internal/inotify/inotify.go | 7 +++++-- .../block/internal/inotify/inotify_test.go | 12 +++++++++++ .../block/internal/kobject/kobject.go | 6 +++++- internal/pkg/etcd/etcd.go | 5 +++++ 6 files changed, 38 insertions(+), 14 deletions(-) diff --git a/cmd/installer/pkg/install/install.go b/cmd/installer/pkg/install/install.go index 262d98e7ae..fc70c6bdf0 100644 --- a/cmd/installer/pkg/install/install.go +++ b/cmd/installer/pkg/install/install.go @@ -270,7 +270,7 @@ func (i *Installer) Install(ctx context.Context, mode Mode) (err error) { bd, err = retryBlockdeviceOpen(device) if err != nil { - return err + return fmt.Errorf("error opening blockdevice %s: %w", device, err) } defer bd.Close() //nolint:errcheck @@ -279,19 +279,19 @@ func (i *Installer) Install(ctx context.Context, mode Mode) (err error) { mountpoint, err = mount.SystemMountPointForLabel(ctx, bd, label, mount.WithPrefix(i.options.MountPrefix)) if err != nil { - return err + return fmt.Errorf("error finding system mount points for %s: %w", device, err) } mountpoints.Set(label, mountpoint) return nil }(); err != nil { - return err + return fmt.Errorf("error mounting for label %q: %w", label, err) } } if err = mount.Mount(mountpoints); err != nil { - return err + return fmt.Errorf("failed to mount standard mountpoints: %w", err) } defer func() { @@ -312,7 +312,7 @@ func (i *Installer) Install(ctx context.Context, mode Mode) (err error) { BootAssets: i.options.BootAssets, Printf: i.options.Printf, }); err != nil { - return err + return fmt.Errorf("failed to install bootloader: %w", err) } if i.options.Board != constants.BoardNone { @@ -333,7 +333,7 @@ func (i *Installer) Install(ctx context.Context, mode Mode) (err error) { RPiFirmwarePath: i.options.BootAssets.RPiFirmwarePath, Printf: i.options.Printf, }); err != nil { - return err + return fmt.Errorf("failed to install for board %s: %w", b.Name(), err) } } @@ -344,7 +344,7 @@ func (i *Installer) Install(ctx context.Context, mode Mode) (err error) { ArtifactsPath: filepath.Join(i.options.OverlayExtractedDir, constants.ImagerOverlayArtifactsPath), ExtraOptions: i.options.ExtraOptions, }); err != nil { - return err + return fmt.Errorf("failed to run overlay installer: %w", err) } } @@ -373,7 +373,7 @@ func (i *Installer) Install(ctx context.Context, mode Mode) (err error) { } if metaState, err = meta.New(context.Background(), nil, meta.WithPrinter(i.options.Printf), meta.WithFixedPath(metaPartitionName)); err != nil { - return err + return fmt.Errorf("failed to open META: %w", err) } var ok bool @@ -391,7 +391,7 @@ func (i *Installer) Install(ctx context.Context, mode Mode) (err error) { } if err = metaState.Flush(); err != nil { - return err + return fmt.Errorf("failed to flush META: %w", err) } } @@ -423,7 +423,7 @@ func retryBlockdeviceOpen(device string) (*blockdevice.BlockDevice, error) { err := retry.Constant(10*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(func() error { var openErr error - bd, openErr = blockdevice.Open(device) + bd, openErr = blockdevice.Open(device, blockdevice.WithExclusiveLock(true)) if openErr == nil { return nil } diff --git a/internal/app/machined/pkg/controllers/block/devices.go b/internal/app/machined/pkg/controllers/block/devices.go index 3c0c78c229..29ecf21b78 100644 --- a/internal/app/machined/pkg/controllers/block/devices.go +++ b/internal/app/machined/pkg/controllers/block/devices.go @@ -230,7 +230,7 @@ func (ctrl *DevicesController) processEvent(ctx context.Context, r controller.Ru } if err := inotifyWatcher.Remove(devPath); err != nil { - return fmt.Errorf("failed to remove inotify watch for %q: %w", devPath, err) + logger.Debug("failed to remove inotify watch", zap.String("device", devPath), zap.Error(err)) } default: logger.Debug("skipped, as action is not supported") diff --git a/internal/app/machined/pkg/controllers/block/internal/inotify/inotify.go b/internal/app/machined/pkg/controllers/block/internal/inotify/inotify.go index 2a6cd565ce..d02d605b78 100644 --- a/internal/app/machined/pkg/controllers/block/internal/inotify/inotify.go +++ b/internal/app/machined/pkg/controllers/block/internal/inotify/inotify.go @@ -39,7 +39,10 @@ func (w *watches) remove(wd uint32) { w.mu.Lock() defer w.mu.Unlock() - delete(w.path, w.wd[wd].path) + if _, ok := w.wd[wd]; ok { + delete(w.path, w.wd[wd].path) + } + delete(w.wd, wd) } @@ -207,7 +210,7 @@ func (w *Watcher) Run() (<-chan string, <-chan error) { } // Send the events that are not ignored on the events channel - if mask&unix.IN_IGNORED == 0 { + if mask&unix.IN_IGNORED == 0 && mask&unix.IN_CLOSE_WRITE != 0 { eventCh <- name } diff --git a/internal/app/machined/pkg/controllers/block/internal/inotify/inotify_test.go b/internal/app/machined/pkg/controllers/block/internal/inotify/inotify_test.go index 32924e5145..ca57c2d9b9 100644 --- a/internal/app/machined/pkg/controllers/block/internal/inotify/inotify_test.go +++ b/internal/app/machined/pkg/controllers/block/internal/inotify/inotify_test.go @@ -15,6 +15,7 @@ import ( "github.com/siderolabs/talos/internal/app/machined/pkg/controllers/block/internal/inotify" ) +//nolint:gocyclo func TestWatcher(t *testing.T) { watcher, err := inotify.NewWatcher() require.NoError(t, err) @@ -67,6 +68,17 @@ func TestWatcher(t *testing.T) { case <-time.After(100 * time.Millisecond): } + // remove file2 + require.NoError(t, os.Remove(filepath.Join(d, "file2"))) + + select { + case path := <-watchCh: + require.FailNow(t, "unexpected path", "%s", path) + case err = <-errCh: + require.FailNow(t, "unexpected error", "%s", err) + case <-time.After(100 * time.Millisecond): + } + require.NoError(t, watcher.Remove(filepath.Join(d, "file2"))) require.NoError(t, watcher.Close()) diff --git a/internal/app/machined/pkg/controllers/block/internal/kobject/kobject.go b/internal/app/machined/pkg/controllers/block/internal/kobject/kobject.go index 68bc715312..ed64c4ce8d 100644 --- a/internal/app/machined/pkg/controllers/block/internal/kobject/kobject.go +++ b/internal/app/machined/pkg/controllers/block/internal/kobject/kobject.go @@ -6,11 +6,13 @@ package kobject import ( + "errors" "fmt" "sync" "github.com/mdlayher/kobject" "go.uber.org/zap" + "golang.org/x/sys/unix" ) const readBufferSize = 64 * 1024 * 1024 @@ -77,7 +79,9 @@ func (w *Watcher) Run(logger *zap.Logger) <-chan *Event { for { ev, err := w.cli.Receive() if err != nil { - logger.Error("failed to receive kobject event", zap.Error(err)) + if !errors.Is(err, unix.EBADF) { + logger.Error("failed to receive kobject event", zap.Error(err)) + } return } diff --git a/internal/pkg/etcd/etcd.go b/internal/pkg/etcd/etcd.go index 99e8ac516e..605ba51707 100644 --- a/internal/pkg/etcd/etcd.go +++ b/internal/pkg/etcd/etcd.go @@ -172,6 +172,11 @@ func (c *Client) LeaveCluster(ctx context.Context, st state.State) error { return retry.ExpectedError(err) } + if errors.Is(err, rpctypes.ErrStopped) { + // retry the stopped errors as the member might be in the process of shutting down + return retry.ExpectedError(err) + } + return err }); err != nil { return err