From 3be7c061298545629870ce581cb348792a3dfabc Mon Sep 17 00:00:00 2001 From: sam boyer Date: Mon, 16 Oct 2017 20:55:51 -0400 Subject: [PATCH 1/5] gps: Adaptively clean dirty git repos Instead of forcing the user to clean up dirty git repositories, we can take at least basic steps to doing it ourselves - or, if we detect problems, instructing the user to fix it. The overhead introduced here is a `git status` call, which will be non-negligible on larger repos, but it's probably worth it for the resilient behavior. --- internal/gps/maybe_source.go | 8 ++++- internal/gps/vcs_source.go | 68 ++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/internal/gps/maybe_source.go b/internal/gps/maybe_source.go index f5f3f69e27..99ed56f757 100644 --- a/internal/gps/maybe_source.go +++ b/internal/gps/maybe_source.go @@ -100,13 +100,19 @@ func (m maybeGitSource) try(ctx context.Context, cachedir string, c singleSource return nil, 0, err } - c.setVersionMap(vl) state := sourceIsSetUp | sourceExistsUpstream | sourceHasLatestVersionList if r.CheckLocal() { state |= sourceExistsLocally + + // If repository already exists on disk, make a pass to be sure + // everything's clean. + if err = src.cleanup(ctx); err != nil { + return nil, 0, err + } } + c.setVersionMap(vl) return src, state, nil } diff --git a/internal/gps/vcs_source.go b/internal/gps/vcs_source.go index 33f0b5afdb..29c2b4c4eb 100644 --- a/internal/gps/vcs_source.go +++ b/internal/gps/vcs_source.go @@ -123,6 +123,74 @@ type gitSource struct { baseVCSSource } +// ensureClean sees to it that a git repository is clean and in working order, +// or returns an error if it can't. +func (s *gitSource) cleanup(ctx context.Context) error { + r := s.repo.(*gitRepo) + cmd := commandContext( + ctx, + "git", + "status", + "--porcelain", + ) + cmd.SetDir(r.LocalPath()) + + out, err := cmd.CombinedOutput() + if err != nil { + // An error on simple git status indicates some aggressive repository + // corruption, outside of the purview that we can deal with here. + return err + } + + if len(bytes.TrimSpace(out)) == 0 { + // No output from status indicates a clean tree, without any modified or + // untracked files - we're in good shape. + return nil + } + + // We could be more parsimonious about this, but it's probably not worth it + // - it's a rare case to have to do any cleanup anyway, so when we do, we + // might as well just throw the kitchen sink at it. + cmd = commandContext( + ctx, + "git", + "reset", + "--hard", + ) + cmd.SetDir(r.LocalPath()) + _, err = cmd.CombinedOutput() + if err != nil { + return err + } + + // We also need to git clean -df; just reuse defendAgainstSubmodules here, + // even though it's a bit layer-breaky. + err = r.defendAgainstSubmodules(ctx) + if err != nil { + return err + } + + // Check status one last time. If it's still not clean, give up. + cmd = commandContext( + ctx, + "git", + "status", + "--porcelain", + ) + cmd.SetDir(r.LocalPath()) + + out, err = cmd.CombinedOutput() + if err != nil { + return err + } + + if len(bytes.TrimSpace(out)) != 0 { + return errors.Errorf("failed to clean up git repository at %s - dirty? corrupted? status output: \n%s", r.LocalPath(), string(out)) + } + + return nil +} + func (s *gitSource) exportRevisionTo(ctx context.Context, rev Revision, to string) error { r := s.repo From fbaade344055cb49ee509c9e8e571c0c158183e5 Mon Sep 17 00:00:00 2001 From: sam boyer Date: Mon, 16 Oct 2017 21:05:38 -0400 Subject: [PATCH 2/5] gps: Supervise git cleanup, use new ct constant --- internal/gps/maybe_source.go | 8 +++++--- internal/gps/source_manager.go | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/internal/gps/maybe_source.go b/internal/gps/maybe_source.go index 99ed56f757..c306a92405 100644 --- a/internal/gps/maybe_source.go +++ b/internal/gps/maybe_source.go @@ -105,9 +105,11 @@ func (m maybeGitSource) try(ctx context.Context, cachedir string, c singleSource if r.CheckLocal() { state |= sourceExistsLocally - // If repository already exists on disk, make a pass to be sure - // everything's clean. - if err = src.cleanup(ctx); err != nil { + if err := superv.do(ctx, "git", ctValidateLocal, func(ctx context.Context) error { + // If repository already exists on disk, make a pass to be sure + // everything's clean. + return src.cleanup(ctx) + }); err != nil { return nil, 0, err } } diff --git a/internal/gps/source_manager.go b/internal/gps/source_manager.go index 021fca8ffc..4e7bda3a08 100644 --- a/internal/gps/source_manager.go +++ b/internal/gps/source_manager.go @@ -738,6 +738,7 @@ const ( ctSourceInit ctSourceFetch ctExportTree + ctValidateLocal ) func (ct callType) String() string { From cde77d8e72efb2b1ea037fc798f06e63549d5b15 Mon Sep 17 00:00:00 2001 From: sam boyer Date: Wed, 25 Oct 2017 21:31:39 -0400 Subject: [PATCH 3/5] gps: Tests for adaptive git repo recovery/cleanup This covers three basic cases - untracked files, modified files, and some corruption in the .git directory. The first two are plausible; the third is less so, as we don't know much about what real git failure patterns could look like in the .git directory itself. However, it's an adequate test inasmuch as it triggers failure in the basic git calls we make, thereby triggering the recovery procedure. --- internal/gps/manager_test.go | 6 --- internal/gps/maybe_source.go | 2 +- internal/gps/vcs_source.go | 4 +- internal/gps/vcs_source_test.go | 96 +++++++++++++++++++++++++++++++++ 4 files changed, 99 insertions(+), 9 deletions(-) diff --git a/internal/gps/manager_test.go b/internal/gps/manager_test.go index 98e264ec6e..430d64f923 100644 --- a/internal/gps/manager_test.go +++ b/internal/gps/manager_test.go @@ -259,12 +259,6 @@ func TestSourceInit(t *testing.T) { os.Stat(filepath.Join(cpath, "metadata", "github.com", "sdboyer", "gpkt", "cache.json")) - // TODO(sdboyer) disabled until we get caching working - //_, err = os.Stat(filepath.Join(cpath, "metadata", "github.com", "sdboyer", "gpkt", "cache.json")) - //if err != nil { - //t.Error("Metadata cache json file does not exist in expected location") - //} - // Ensure source existence values are what we expect var exists bool exists, err = sm.SourceExists(id) diff --git a/internal/gps/maybe_source.go b/internal/gps/maybe_source.go index c306a92405..3128181dcd 100644 --- a/internal/gps/maybe_source.go +++ b/internal/gps/maybe_source.go @@ -108,7 +108,7 @@ func (m maybeGitSource) try(ctx context.Context, cachedir string, c singleSource if err := superv.do(ctx, "git", ctValidateLocal, func(ctx context.Context) error { // If repository already exists on disk, make a pass to be sure // everything's clean. - return src.cleanup(ctx) + return src.ensureClean(ctx) }); err != nil { return nil, 0, err } diff --git a/internal/gps/vcs_source.go b/internal/gps/vcs_source.go index 29c2b4c4eb..0957128e1a 100644 --- a/internal/gps/vcs_source.go +++ b/internal/gps/vcs_source.go @@ -124,8 +124,8 @@ type gitSource struct { } // ensureClean sees to it that a git repository is clean and in working order, -// or returns an error if it can't. -func (s *gitSource) cleanup(ctx context.Context) error { +// or returns an error if the adaptive recovery attempts fail. +func (s *gitSource) ensureClean(ctx context.Context) error { r := s.repo.(*gitRepo) cmd := commandContext( ctx, diff --git a/internal/gps/vcs_source_test.go b/internal/gps/vcs_source_test.go index d48e34412d..47a1e44628 100644 --- a/internal/gps/vcs_source_test.go +++ b/internal/gps/vcs_source_test.go @@ -7,6 +7,7 @@ package gps import ( "context" "io/ioutil" + "log" "net/url" "os" "os/exec" @@ -647,6 +648,101 @@ func TestGitSourceListVersionsNoDupes(t *testing.T) { } } +func TestGitSourceAdaptiveCleanup(t *testing.T) { + t.Parallel() + + // This test is slowish, skip it on -short + if testing.Short() { + t.Skip("Skipping git adaptive failure recovery test in short mode") + } + requiresBins(t, "git") + + cpath, err := ioutil.TempDir("", "smcache") + if err != nil { + t.Fatalf("Failed to create temp dir: %s", err) + } + + var sm *SourceMgr + mkSM := func() { + // If sm is already set, make sure it's released, then create a new one. + if sm != nil { + sm.Release() + } + + var err error + sm, err = NewSourceManager(SourceManagerConfig{ + Cachedir: cpath, + Logger: log.New(test.Writer{TB: t}, "", 0), + }) + if err != nil { + t.Fatalf("Unexpected error on SourceManager creation: %s", err) + } + } + + mkSM() + id := mkPI("github.com/sdboyer/gpkt") + err = sm.SyncSourceFor(id) + if err != nil { + t.Fatal(err) + } + + repodir := filepath.Join(sm.cachedir, "sources", "https---github.com-sdboyer-gpkt") + if _, err := os.Stat(repodir); err != nil { + if os.IsNotExist(err) { + t.Fatalf("expected location for repodir did not exist: %q", repodir) + } else { + t.Fatal(err) + } + } + + // Create a file that git will see as untracked. + untrackedPath := filepath.Join(repodir, "untrackedfile") + f, err := os.Create(untrackedPath) + if err != nil { + t.Fatal(err) + } + f.Close() + + mkSM() + err = sm.SyncSourceFor(id) + if err != nil { + t.Fatalf("choked after adding dummy file: %q", err) + } + + if _, err := os.Stat(untrackedPath); err == nil { + t.Fatal("untracked file still existed after cleanup should've been triggered") + } + + // Remove a file that we know exists, which `git status` checks should catch. + readmePath := filepath.Join(repodir, "README.md") + os.Remove(readmePath) + + mkSM() + err = sm.SyncSourceFor(id) + if err != nil { + t.Fatalf("choked after removing known file: %q", err) + } + + if _, err := os.Stat(readmePath); err != nil { + t.Fatal("README was still absent after cleanup should've been triggered") + } + + // Remove .git/objects directory, which should make git bite it. + err = os.RemoveAll(filepath.Join(repodir, ".git", "objects")) + if err != nil { + t.Fatal(err) + } + + mkSM() + err = sm.SyncSourceFor(id) + if err != nil { + t.Fatalf("choked after removing .git/objects directory: %q", err) + } + + sm.Release() + os.RemoveAll(cpath) +} + func Test_bzrSource_exportRevisionTo_removeVcsFiles(t *testing.T) { t.Parallel() From ea9a941c0f9c488818db2a2b78d94ad9bc9c7e4e Mon Sep 17 00:00:00 2001 From: sam boyer Date: Wed, 25 Oct 2017 21:38:19 -0400 Subject: [PATCH 4/5] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6855d30c97..d66dc79222 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ BUG FIXES: * Releases targeting Windows now have a `.exe` suffix (#1291). +* Adaptively recover from dirty and corrupted git repositories in cache (#1279). IMPROVEMENTS: From 123ba6be14f9112112a886ffc63ccbd7a05c5b26 Mon Sep 17 00:00:00 2001 From: sam boyer Date: Fri, 27 Oct 2017 00:29:33 -0400 Subject: [PATCH 5/5] gps: See if ioutil.WriteFile() makes windows happy --- internal/gps/vcs_source_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/gps/vcs_source_test.go b/internal/gps/vcs_source_test.go index 47a1e44628..e3cfb83b9e 100644 --- a/internal/gps/vcs_source_test.go +++ b/internal/gps/vcs_source_test.go @@ -697,11 +697,10 @@ func TestGitSourceAdaptiveCleanup(t *testing.T) { // Create a file that git will see as untracked. untrackedPath := filepath.Join(repodir, "untrackedfile") - f, err := os.Create(untrackedPath) + err = ioutil.WriteFile(untrackedPath, []byte("foo"), 0644) if err != nil { t.Fatal(err) } - f.Close() mkSM() err = sm.SyncSourceFor(id)