diff --git a/cmd/lakectl/cmd/index.go b/cmd/lakectl/cmd/index.go deleted file mode 100644 index 6d6b199e4de..00000000000 --- a/cmd/lakectl/cmd/index.go +++ /dev/null @@ -1,59 +0,0 @@ -package cmd - -import ( - "errors" - "os" - "path/filepath" - - "github.com/treeverse/lakefs/pkg/uri" - "gopkg.in/yaml.v3" -) - -const ( - IndexFileName = ".lakefs_ref.yaml" - IgnoreMarker = "ignored by lakectl local:" - IndexFileMode = 0644 -) - -// Index defines the structure of the lakefs local reference file -// consisting of the information linking local directory with lakefs path -type Index struct { - root string `yaml:"-"` - PathURI string `yaml:"src"` - AtHead string `yaml:"at_head"` -} - -func (l *Index) LocalPath() string { - return l.root -} - -func (l *Index) GetCurrentURI() (*uri.URI, error) { - return uri.Parse(l.PathURI) -} - -func WriteIndex(path string, remote *uri.URI, atHead string) error { - idx := &Index{ - PathURI: remote.String(), - AtHead: atHead, - } - data, err := yaml.Marshal(idx) - if err != nil { - return err - } - idxPath := filepath.Join(path, IndexFileName) - return os.WriteFile(idxPath, data, IndexFileMode) -} - -func IndexExists(baseAbs string) bool { - refPath := filepath.Join(baseAbs, IndexFileName) - _, err := os.Stat(refPath) - switch { - case err == nil: - return true - case errors.Is(err, os.ErrNotExist): - return false - default: - DieErr(err) - return false // go fmt - } -} diff --git a/cmd/lakectl/cmd/local_init.go b/cmd/lakectl/cmd/local_init.go index a9ea4f3b6b2..ef942086ca8 100644 --- a/cmd/lakectl/cmd/local_init.go +++ b/cmd/lakectl/cmd/local_init.go @@ -8,6 +8,7 @@ import ( "github.com/spf13/cobra" "github.com/treeverse/lakefs/pkg/git" + "github.com/treeverse/lakefs/pkg/local" ) const ( @@ -36,18 +37,22 @@ var localInitCmd = &cobra.Command{ if err := os.MkdirAll(dir, os.ModePerm); err != nil { DieErr(err) } - if IndexExists(localPath) && !force { + exists, err := local.IndexExists(localPath) + if err != nil { + DieErr(err) + } + if exists && !force { DieFmt("directory '%s' already linked to a lakefs path, run command with --force to overwrite", localPath) } // dereference head := resolveCommitOrDie(cmd.Context(), getClient(), remote.Repository, remote.Ref) - err = WriteIndex(localPath, remote, head) + err = local.WriteIndex(localPath, remote, head) if err != nil { DieErr(err) } - ignoreFile, err := git.Ignore(localPath, []string{localPath, IndexFileName}, []string{IndexFileName}, IgnoreMarker) + ignoreFile, err := git.Ignore(localPath, []string{localPath, local.IndexFileName}, []string{local.IndexFileName}, local.IgnoreMarker) if err == nil { fmt.Println("location added to", ignoreFile) } else if !errors.Is(err, git.ErrNotARepository) { diff --git a/cmd/lakectl/cmd/local_list.go b/cmd/lakectl/cmd/local_list.go new file mode 100644 index 00000000000..f63329fad6c --- /dev/null +++ b/cmd/lakectl/cmd/local_list.go @@ -0,0 +1,76 @@ +package cmd + +import ( + "errors" + "path/filepath" + + "github.com/jedib0t/go-pretty/v6/table" + "github.com/spf13/cobra" + "github.com/treeverse/lakefs/pkg/git" + "github.com/treeverse/lakefs/pkg/local" +) + +const ( + localListMinArgs = 0 + localListMaxArgs = 1 + + indicesListTemplate = `{{.IndicesListTable | table -}}` +) + +var localListCmd = &cobra.Command{ + Use: "list [directory]", + Short: "find and list directories that are synced with lakeFS", + Args: cobra.RangeArgs(localListMinArgs, localListMaxArgs), + Run: func(cmd *cobra.Command, args []string) { + dir := "." + if len(args) > 0 { + dir = args[0] + } + abs, err := filepath.Abs(dir) + if err != nil { + DieErr(err) + } + gitRoot, err := git.GetRepositoryPath(abs) + if err == nil { + abs = gitRoot + } else if !(errors.Is(err, git.ErrNotARepository) || errors.Is(err, git.ErrNoGit)) { // allow support in environments with no git + DieErr(err) + } + + dirs, err := local.FindIndices(abs) + if err != nil { + DieErr(err) + } + + var rows [][]interface{} + for _, d := range dirs { + idx, err := local.ReadIndex(d) + if err != nil { + DieErr(err) + } + remote, err := idx.GetCurrentURI() + if err != nil { + DieErr(err) + } + rows = append(rows, table.Row{d, remote, idx.AtHead}) + } + data := struct { + IndicesListTable *Table + }{ + IndicesListTable: &Table{ + Headers: []interface{}{ + "Directory", + "Remote URI", + "Synced commit", + }, + Rows: rows, + }, + } + Write(indicesListTemplate, data) + }, +} + +//nolint:gochecknoinits +func init() { + localCmd.AddCommand(localListCmd) +} diff --git a/docs/reference/cli.md b/docs/reference/cli.md index d823d53a2cb..b6bb178d66e 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -2616,6 +2616,23 @@ lakectl local init [directory] [flags] +### lakectl local list + +find and list directories that are synced with lakeFS + +``` +lakectl local list [directory] [flags] +``` + +#### Options +{:.no_toc} + +``` + -h, --help help for list +``` + + + ### lakectl log Show log of commits diff --git a/pkg/fileutil/io.go b/pkg/fileutil/io.go index c067907c2dd..8eb66ff5231 100644 --- a/pkg/fileutil/io.go +++ b/pkg/fileutil/io.go @@ -1,6 +1,16 @@ package fileutil -import "os" +import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" +) + +const ( + DefaultDirectoryMask = 0o755 +) // IsDir Returns true if p is a directory, otherwise false func IsDir(p string) (bool, error) { @@ -10,3 +20,34 @@ func IsDir(p string) (bool, error) { } return stat.IsDir(), nil } + +func FindInParents(path, filename string) (string, error) { + var lookup string + fullPath, err := filepath.Abs(path) + if err != nil { + return "", err + } + for fullPath != string(filepath.Separator) && fullPath != filepath.VolumeName(fullPath) { + info, err := os.Stat(fullPath) + if errors.Is(err, fs.ErrNotExist) { + return "", fmt.Errorf("%s: %w", fullPath, fs.ErrNotExist) + } else if err != nil { + return "", err + } + if !info.IsDir() { + // find filename here + lookup = filepath.Join(filepath.Dir(fullPath), filename) + } else { + lookup = filepath.Join(fullPath, filename) + } + _, err = os.Stat(lookup) + if os.IsNotExist(err) { + fullPath = filepath.Dir(fullPath) + continue + } else if err != nil { + return "", err + } + return lookup, nil + } + return "", nil +} diff --git a/pkg/fileutil/io_test.go b/pkg/fileutil/io_test.go new file mode 100644 index 00000000000..437a8a3faff --- /dev/null +++ b/pkg/fileutil/io_test.go @@ -0,0 +1,85 @@ +package fileutil_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + "github.com/treeverse/lakefs/pkg/fileutil" +) + +func TestFindInParents(t *testing.T) { + root := t.TempDir() + dirTree := filepath.Join(root, "foo", "bar", "baz", "taz") + require.NoError(t, os.MkdirAll(dirTree, fileutil.DefaultDirectoryMask)) + t.Run("does not exist", func(t *testing.T) { + found, err := fileutil.FindInParents(root, ".doesnotexist21348329043289") + if err != nil { + t.Fatal(err) + } + if found != "" { + t.Errorf("expected found to be empty, got %v", found) + } + }) + + tests := []struct { + name string + deep string + filename string + filepath string + find bool + }{ + { + name: "find_at_leaf", + deep: filepath.Join(root, "foo", "bar", "baz"), + filename: "some_file0", + filepath: filepath.Join(root, "foo", "bar", "baz", "some_file0"), + find: true, + }, + { + name: "find_at_root", + deep: filepath.Join(root, "foo", "bar", "baz"), + filename: "some_file1", + filepath: filepath.Join(root, "some_file1"), + find: true, + }, + { + name: "find_at_subpath", + deep: filepath.Join(root, "foo", "bar", "baz", "taz"), + filename: "some_file2", + filepath: filepath.Join(root, "foo", "some_file2"), + find: true, + }, + { + name: "not_found_above", + deep: filepath.Join(root, "foo", "bar", "baz"), + filename: "some_file3", + filepath: filepath.Join(root, "foo", "bar", "baz", "taz", "some_file3"), + find: false, + }, + { + name: "doesnt_exist", + deep: filepath.Join(root, "foo", "bar", "baz"), + filename: ".doesnotexist21348329043289", + filepath: filepath.Join(root, "foo", "bar", "some_file4"), + find: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f, err := os.Create(tt.filepath) + require.NoError(t, err) + require.NoError(t, f.Close()) + + found, err := fileutil.FindInParents(tt.deep, tt.filename) + require.NoError(t, err) + if tt.find { + require.Equal(t, tt.filepath, found) + } else { + require.Equal(t, "", found) + } + }) + } +} diff --git a/pkg/git/errors.go b/pkg/git/errors.go index ad8089c1051..746ea9b40b7 100644 --- a/pkg/git/errors.go +++ b/pkg/git/errors.go @@ -5,6 +5,6 @@ import ( ) var ( - ErrGitError = errors.New("git error") ErrNotARepository = errors.New("not a git repository") + ErrNoGit = errors.New("no git support") ) diff --git a/pkg/git/git.go b/pkg/git/git.go index f1c8bf67e9b..198ee23a28f 100644 --- a/pkg/git/git.go +++ b/pkg/git/git.go @@ -21,6 +21,10 @@ const ( ) func git(dir string, args ...string) (string, error) { + _, err := exec.LookPath("git") // assume git is in path, otherwise consider as not having git support + if err != nil { + return "", ErrNoGit + } cmd := exec.Command("git", args...) cmd.Dir = dir out, err := cmd.CombinedOutput() @@ -42,7 +46,7 @@ func GetRepositoryPath(dir string) (string, error) { if strings.Contains(out, "not a git repository") { return "", ErrNotARepository } - return "", fmt.Errorf("%s: %w", out, ErrGitError) + return "", fmt.Errorf("%s: %w", out, err) } func createEntriesForIgnore(dir string, paths []string, exclude bool) ([]string, error) { diff --git a/pkg/git/git_test.go b/pkg/git/git_test.go index f38a7588292..e41a1424359 100644 --- a/pkg/git/git_test.go +++ b/pkg/git/git_test.go @@ -13,7 +13,7 @@ import ( "github.com/treeverse/lakefs/pkg/git" ) -func TestIsGitRepository(t *testing.T) { +func TestIsRepository(t *testing.T) { tmpdir := t.TempDir() tmpSubdir, err := os.MkdirTemp(tmpdir, "") require.NoError(t, err) @@ -35,7 +35,7 @@ func TestIsGitRepository(t *testing.T) { require.True(t, git.IsRepository(tmpSubdir)) } -func TestGetGitRepositoryPath(t *testing.T) { +func TestGetRepositoryPath(t *testing.T) { tmpdir := t.TempDir() tmpSubdir, err := os.MkdirTemp(tmpdir, "") require.NoError(t, err) @@ -50,7 +50,7 @@ func TestGetGitRepositoryPath(t *testing.T) { _, err = git.GetRepositoryPath(tmpdir) require.ErrorIs(t, err, git.ErrNotARepository) _, err = git.GetRepositoryPath(tmpFile.Name()) - require.ErrorIs(t, err, git.ErrGitError) + require.Error(t, err) // Init git repo on root require.NoError(t, exec.Command("git", "init", "-q", tmpdir).Run()) @@ -58,7 +58,7 @@ func TestGetGitRepositoryPath(t *testing.T) { require.NoError(t, err) require.Equal(t, tmpdir, gitPath) _, err = git.GetRepositoryPath(tmpFile.Name()) - require.ErrorIs(t, err, git.ErrGitError) + require.Error(t, err) gitPath, err = git.GetRepositoryPath(tmpSubdir) require.NoError(t, err) require.Equal(t, tmpdir, gitPath) @@ -85,7 +85,7 @@ func TestIgnore(t *testing.T) { _, err = git.Ignore(tmpdir, []string{}, []string{}, marker) require.ErrorIs(t, err, git.ErrNotARepository) _, err = git.Ignore(tmpFile.Name(), []string{}, []string{}, marker) - require.ErrorIs(t, err, git.ErrGitError) + require.Error(t, err) // Init git repo on tmpdir require.NoError(t, exec.Command("git", "init", "-q", tmpdir).Run()) @@ -107,7 +107,7 @@ func TestIgnore(t *testing.T) { verifyPathTracked(t, []string{filepath.Base(tmpSubdir), trackedFile}) _, err = git.Ignore(tmpFile.Name(), []string{}, []string{excludedPath}, marker) - require.ErrorIs(t, err, git.ErrGitError) + require.Error(t, err) result, err := git.Ignore(tmpdir, []string{}, []string{excludedPath}, marker) require.NoError(t, err) require.Equal(t, ignorePath, result) diff --git a/pkg/local/index.go b/pkg/local/index.go new file mode 100644 index 00000000000..c8b82329f7d --- /dev/null +++ b/pkg/local/index.go @@ -0,0 +1,113 @@ +package local + +import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + + "github.com/treeverse/lakefs/pkg/fileutil" + "github.com/treeverse/lakefs/pkg/uri" + "gopkg.in/yaml.v3" +) + +const ( + IndexFileName = ".lakefs_ref.yaml" + IgnoreMarker = "ignored by lakectl local:" + IndexFileMode = 0644 +) + +// Index defines the structure of the lakefs local reference file +// consisting of the information linking local directory with lakefs path +type Index struct { + root string `yaml:"-"` + PathURI string `yaml:"src"` + AtHead string `yaml:"at_head"` +} + +func (l *Index) LocalPath() string { + return l.root +} + +func (l *Index) GetCurrentURI() (*uri.URI, error) { + return uri.Parse(l.PathURI) +} + +func WriteIndex(path string, remote *uri.URI, atHead string) error { + idx := &Index{ + PathURI: remote.String(), + AtHead: atHead, + } + data, err := yaml.Marshal(idx) + if err != nil { + return err + } + idxPath := filepath.Join(path, IndexFileName) + return os.WriteFile(idxPath, data, IndexFileMode) +} + +func IndexExists(baseAbs string) (bool, error) { + refPath := filepath.Join(baseAbs, IndexFileName) + _, err := os.Stat(refPath) + switch { + case err == nil: + return true, nil + case errors.Is(err, os.ErrNotExist): + return false, nil + default: + return false, err + } +} + +func ReadIndex(path string) (*Index, error) { + idxPath, err := fileutil.FindInParents(path, IndexFileName) + if err != nil { + return nil, err + } + if idxPath == "" { + return nil, fmt.Errorf("%s: %w", path, fs.ErrNotExist) + } + data, err := os.ReadFile(idxPath) + if err != nil { + return nil, err + } + idx := &Index{ + root: filepath.Dir(idxPath), + } + err = yaml.Unmarshal(data, idx) + if err != nil { + return nil, err + } + return idx, nil +} + +func FindIndices(root string) ([]string, error) { + locs := make([]string, 0) + + err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + // don't traverse hidden folders like '.git', etc. + if d.IsDir() && strings.HasPrefix(d.Name(), ".") { + return filepath.SkipDir + } + // if we found an index, no need to further traverse + if filepath.Base(path) == IndexFileName { + // add the relative location of the directory containing the index + rel, err := filepath.Rel(root, filepath.Dir(path)) + if err != nil { + return err + } + locs = append(locs, rel) + return filepath.SkipDir // no need to traverse further! + } + return nil + }) + if err != nil { + return nil, err + } + return locs, nil +} diff --git a/pkg/local/index_test.go b/pkg/local/index_test.go new file mode 100644 index 00000000000..78b7543d1e9 --- /dev/null +++ b/pkg/local/index_test.go @@ -0,0 +1,107 @@ +package local_test + +import ( + "fmt" + "io/fs" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + "github.com/treeverse/lakefs/pkg/local" + "github.com/treeverse/lakefs/pkg/uri" +) + +const ( + repo = "foo" + ref = "bar" + uPath = "baz" + head = "head" +) + +var ( + testPath = uPath + testUri = &uri.URI{ + Repository: repo, + Ref: ref, + Path: &testPath, + } +) + +func writeIndex(t *testing.T, dir string) { + require.NoError(t, local.WriteIndex(dir, testUri, head)) +} + +func TestWriteIndex(t *testing.T) { + expectedContent := fmt.Sprintf("src: lakefs://%s/%s/%s\nat_head: %s\n", repo, ref, uPath, head) + tmpDir := t.TempDir() + writeIndex(t, tmpDir) + buf, err := os.ReadFile(filepath.Join(tmpDir, local.IndexFileName)) + require.NoError(t, err) + require.Equal(t, expectedContent, string(buf)) +} + +func TestReadIndex(t *testing.T) { + tmpDir := t.TempDir() + indexPath := filepath.Join(tmpDir, "path", "to", "index") + require.NoError(t, os.MkdirAll(indexPath, os.ModePerm)) + writeIndex(t, indexPath) + + // Verify error on no index + _, err := local.ReadIndex(tmpDir) + require.ErrorIs(t, err, fs.ErrNotExist) + + // Read index from path + res, err := local.ReadIndex(indexPath) + require.NoError(t, err) + require.Equal(t, indexPath, res.LocalPath()) + require.Equal(t, head, res.AtHead) + require.Equal(t, testUri.String(), res.PathURI) +} + +func TestFindIndices(t *testing.T) { + root := t.TempDir() + indicesFound := []string{ + filepath.Join(root, "path", "one"), + filepath.Join(root, "path", "two"), + filepath.Join(root, "path", "three", "four"), + filepath.Join(root, "path", "three", "five", "six"), + filepath.Join(root, "path2"), + } + indicesNotFound := []string{ + filepath.Join(root, "path", "one", "shouldNotFind"), + filepath.Join(root, "path", "three", "four", "five", "shouldNotFind"), + } + for _, dir := range append(indicesFound, indicesNotFound...) { + require.NoError(t, os.MkdirAll(dir, os.ModePerm)) + writeIndex(t, dir) + } + // Create some files + for i, dir := range indicesFound { + _, err := os.Create(filepath.Join(dir, fmt.Sprintf("file_%d", i))) + require.NoError(t, err) + } + + // Check on root + dirs, err := local.FindIndices(root) + require.NoError(t, err) + require.Equal(t, len(indicesFound), len(dirs)) + for _, dir := range indicesFound { + rel, err := filepath.Rel(root, dir) + require.NoError(t, err) + require.Contains(t, dirs, rel) + } + + // Check on different sub path that was not supposed to be found from root + dirs, err = local.FindIndices(filepath.Join(root, "path", "three", "four", "five")) + require.NoError(t, err) + require.Equal(t, 1, len(dirs)) + require.Equal(t, "shouldNotFind", dirs[0]) + + // Create file on root and check only one result + writeIndex(t, root) + dirs, err = local.FindIndices(filepath.Join(root)) + require.NoError(t, err) + require.Equal(t, 1, len(dirs)) + require.Equal(t, ".", dirs[0]) +}