diff --git a/cmd/lakectl/cmd/index.go b/cmd/lakectl/cmd/index.go new file mode 100644 index 00000000000..6d6b199e4de --- /dev/null +++ b/cmd/lakectl/cmd/index.go @@ -0,0 +1,59 @@ +package cmd + +import ( + "errors" + "os" + "path/filepath" + + "github.com/treeverse/lakefs/pkg/uri" + "gopkg.in/yaml.v3" +) + +const ( + IndexFileName = ".lakefs_ref.yaml" + IgnoreMarker = "ignored by lakectl local:" + IndexFileMode = 0644 +) + +// Index defines the structure of the lakefs local reference file +// consisting of the information linking local directory with lakefs path +type Index struct { + root string `yaml:"-"` + PathURI string `yaml:"src"` + AtHead string `yaml:"at_head"` +} + +func (l *Index) LocalPath() string { + return l.root +} + +func (l *Index) GetCurrentURI() (*uri.URI, error) { + return uri.Parse(l.PathURI) +} + +func WriteIndex(path string, remote *uri.URI, atHead string) error { + idx := &Index{ + PathURI: remote.String(), + AtHead: atHead, + } + data, err := yaml.Marshal(idx) + if err != nil { + return err + } + idxPath := filepath.Join(path, IndexFileName) + return os.WriteFile(idxPath, data, IndexFileMode) +} + +func IndexExists(baseAbs string) bool { + refPath := filepath.Join(baseAbs, IndexFileName) + _, err := os.Stat(refPath) + switch { + case err == nil: + return true + case errors.Is(err, os.ErrNotExist): + return false + default: + DieErr(err) + return false // go fmt + } +} diff --git a/cmd/lakectl/cmd/local.go b/cmd/lakectl/cmd/local.go new file mode 100644 index 00000000000..4ad28066bcd --- /dev/null +++ b/cmd/lakectl/cmd/local.go @@ -0,0 +1,18 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +var localCmd = &cobra.Command{ + Use: "local", + // TODO: Remove BETA when feature complete + Short: "BETA: sync local directories with lakeFS paths", +} + +//nolint:gochecknoinits +func init() { + // TODO: Remove line when feature complete + localCmd.Hidden = true + rootCmd.AddCommand(localCmd) +} diff --git a/cmd/lakectl/cmd/local_init.go b/cmd/lakectl/cmd/local_init.go new file mode 100644 index 00000000000..a9ea4f3b6b2 --- /dev/null +++ b/cmd/lakectl/cmd/local_init.go @@ -0,0 +1,66 @@ +package cmd + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/spf13/cobra" + "github.com/treeverse/lakefs/pkg/git" +) + +const ( + localInitMinArgs = 1 + localInitMaxArgs = 2 +) + +var localInitCmd = &cobra.Command{ + Use: "init [directory]", + Short: "set a local directory to sync with a lakeFS path", + Args: cobra.RangeArgs(localInitMinArgs, localInitMaxArgs), + Run: func(cmd *cobra.Command, args []string) { + remote := MustParsePathURI("path", args[0]) + dir := "." + if len(args) == localInitMaxArgs { + dir = args[1] + } + flagSet := cmd.Flags() + force := Must(flagSet.GetBool("force")) + + localPath, err := filepath.Abs(dir) + if err != nil { + DieErr(err) + } + + if err := os.MkdirAll(dir, os.ModePerm); err != nil { + DieErr(err) + } + if IndexExists(localPath) && !force { + DieFmt("directory '%s' already linked to a lakefs path, run command with --force to overwrite", localPath) + } + + // dereference + head := resolveCommitOrDie(cmd.Context(), getClient(), remote.Repository, remote.Ref) + err = WriteIndex(localPath, remote, head) + if err != nil { + DieErr(err) + } + + ignoreFile, err := git.Ignore(localPath, []string{localPath, IndexFileName}, []string{IndexFileName}, IgnoreMarker) + if err == nil { + fmt.Println("location added to", ignoreFile) + } else if !errors.Is(err, git.ErrNotARepository) { + DieErr(err) + } + + fmt.Printf("Successfully linked local directory '%s' with remote '%s'\n", localPath, remote) + }, +} + +//nolint:gochecknoinits +func init() { + AssignAutoConfirmFlag(localInitCmd.Flags()) + localInitCmd.Flags().Bool("force", false, "Overwrites if directory already linked to a lakeFS path") + localCmd.AddCommand(localInitCmd) +} diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 9a7dacf2541..d823d53a2cb 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -2558,6 +2558,64 @@ lakectl ingest --from --to [--dry-run] [fla +### lakectl local + +**note:** This command is a lakeFS plumbing command. Don't use it unless you're really sure you know what you're doing. +{: .note .note-warning } + +BETA: sync local directories with lakeFS paths + +#### Options +{:.no_toc} + +``` + -h, --help help for local +``` + + + +### lakectl local help + +Help about any command + +#### Synopsis +{:.no_toc} + +Help provides help for any command in the application. +Simply type local help [path to command] for full details. + +``` +lakectl local help [command] [flags] +``` + +#### Options +{:.no_toc} + +``` + -h, --help help for help +``` + + + +### lakectl local init + +set a local directory to sync with a lakeFS path + +``` +lakectl local init [directory] [flags] +``` + +#### Options +{:.no_toc} + +``` + --force Overwrites if directory already linked to a lakeFS path + -h, --help help for init + -y, --yes Automatically say yes to all confirmations +``` + + + ### lakectl log Show log of commits diff --git a/pkg/fileutil/io.go b/pkg/fileutil/io.go new file mode 100644 index 00000000000..c067907c2dd --- /dev/null +++ b/pkg/fileutil/io.go @@ -0,0 +1,12 @@ +package fileutil + +import "os" + +// IsDir Returns true if p is a directory, otherwise false +func IsDir(p string) (bool, error) { + stat, err := os.Stat(p) + if err != nil { + return false, err + } + return stat.IsDir(), nil +} diff --git a/pkg/git/errors.go b/pkg/git/errors.go new file mode 100644 index 00000000000..ad8089c1051 --- /dev/null +++ b/pkg/git/errors.go @@ -0,0 +1,10 @@ +package git + +import ( + "errors" +) + +var ( + ErrGitError = errors.New("git error") + ErrNotARepository = errors.New("not a git repository") +) diff --git a/pkg/git/git.go b/pkg/git/git.go new file mode 100644 index 00000000000..f1c8bf67e9b --- /dev/null +++ b/pkg/git/git.go @@ -0,0 +1,148 @@ +package git + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io/fs" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/treeverse/lakefs/pkg/fileutil" + "golang.org/x/exp/slices" +) + +const ( + IgnoreFile = ".gitignore" + IgnoreDefaultMode = 0644 +) + +func git(dir string, args ...string) (string, error) { + cmd := exec.Command("git", args...) + cmd.Dir = dir + out, err := cmd.CombinedOutput() + return string(out), err +} + +// IsRepository Return true if dir is a path to a directory in a git repository, false otherwise +func IsRepository(dir string) bool { + _, err := git(dir, "rev-parse", "--is-inside-work-tree") + return err == nil +} + +// GetRepositoryPath Returns the git repository root path if dir is a directory inside a git repository, otherwise returns error +func GetRepositoryPath(dir string) (string, error) { + out, err := git(dir, "rev-parse", "--show-toplevel") + if err == nil { + return strings.TrimSpace(out), nil + } + if strings.Contains(out, "not a git repository") { + return "", ErrNotARepository + } + return "", fmt.Errorf("%s: %w", out, ErrGitError) +} + +func createEntriesForIgnore(dir string, paths []string, exclude bool) ([]string, error) { + var entries []string + for _, p := range paths { + pathInRepo, err := filepath.Rel(dir, p) + if err != nil { + return nil, fmt.Errorf("%s :%w", p, err) + } + isDir, err := fileutil.IsDir(p) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + return nil, fmt.Errorf("%s :%w", p, err) + } + if isDir { + pathInRepo = filepath.Join(pathInRepo, "*") + } + if exclude { + pathInRepo = "!" + pathInRepo + } + entries = append(entries, pathInRepo) + } + return entries, nil +} + +func updateIgnoreFileSection(contents []byte, marker string, entries []string) []byte { + var newContent []byte + scanner := bufio.NewScanner(bytes.NewReader(contents)) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + newContent = append(newContent, []byte(fmt.Sprintln(line))...) + if line == marker { + for scanner.Scan() { + line = strings.TrimSpace(scanner.Text()) + if line == "" { + break + } + if !slices.Contains(entries, line) { + newContent = append(newContent, []byte(fmt.Sprintln(line))...) + } + } + buffer := strings.Join(entries, fmt.Sprintln("")) + fmt.Sprintln("") + newContent = append(newContent, buffer...) + } + } + + return newContent +} + +// Ignore modify/create .ignore file to include a section headed by the marker string and contains the provided ignore and exclude paths. +// If section exists, it will append paths to the given section, otherwise writes the section at the end of the file. +// All file paths must be absolute. +// dir is a path in the git repository, if a .gitignore file is not found, a new file will be created in the repository root +func Ignore(dir string, ignorePaths, excludePaths []string, marker string) (string, error) { + gitDir, err := GetRepositoryPath(dir) + if err != nil { + return "", err + } + + ignoreEntries, err := createEntriesForIgnore(gitDir, ignorePaths, false) + if err != nil { + return "", err + } + excludeEntries, err := createEntriesForIgnore(gitDir, excludePaths, true) + if err != nil { + return "", err + } + ignoreEntries = append(ignoreEntries, excludeEntries...) + + var ( + mode os.FileMode = IgnoreDefaultMode + ignoreFile []byte + ) + ignoreFilePath := filepath.Join(gitDir, IgnoreFile) + markerLine := "# " + marker + info, err := os.Stat(ignoreFilePath) + switch { + case err == nil: // ignore file exists + mode = info.Mode() + ignoreFile, err = os.ReadFile(ignoreFilePath) + if err != nil { + return "", err + } + idx := bytes.Index(ignoreFile, []byte(markerLine)) + if idx == -1 { + section := fmt.Sprintln(markerLine) + strings.Join(ignoreEntries, fmt.Sprintln("")) + fmt.Sprintln("") + ignoreFile = append(ignoreFile, section...) + } else { // Update section + ignoreFile = updateIgnoreFileSection(ignoreFile, markerLine, ignoreEntries) + } + + case !os.IsNotExist(err): + return "", err + default: // File doesn't exist + section := fmt.Sprintln(markerLine) + strings.Join(ignoreEntries, fmt.Sprintln("")) + fmt.Sprintln("") + ignoreFile = append(ignoreFile, []byte(section)...) + } + + if err = os.WriteFile(ignoreFilePath, ignoreFile, mode); err != nil { + return "", err + } + + return ignoreFilePath, nil +} diff --git a/pkg/git/git_test.go b/pkg/git/git_test.go new file mode 100644 index 00000000000..f38a7588292 --- /dev/null +++ b/pkg/git/git_test.go @@ -0,0 +1,166 @@ +package git_test + +import ( + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "syscall" + "testing" + + "github.com/stretchr/testify/require" + "github.com/treeverse/lakefs/pkg/git" +) + +func TestIsGitRepository(t *testing.T) { + tmpdir := t.TempDir() + tmpSubdir, err := os.MkdirTemp(tmpdir, "") + require.NoError(t, err) + defer os.Remove(tmpSubdir) + tmpFile, err := os.CreateTemp(tmpSubdir, "") + require.NoError(t, err) + defer func() { + _ = os.Remove(tmpFile.Name()) + _ = tmpFile.Close() + }() + + require.False(t, git.IsRepository(tmpFile.Name())) + require.False(t, git.IsRepository(tmpdir)) + + // Init git repo on root + require.NoError(t, exec.Command("git", "init", "-q", tmpdir).Run()) + require.False(t, git.IsRepository(tmpFile.Name())) + require.True(t, git.IsRepository(tmpdir)) + require.True(t, git.IsRepository(tmpSubdir)) +} + +func TestGetGitRepositoryPath(t *testing.T) { + tmpdir := t.TempDir() + tmpSubdir, err := os.MkdirTemp(tmpdir, "") + require.NoError(t, err) + defer os.Remove(tmpSubdir) + tmpFile, err := os.CreateTemp(tmpSubdir, "") + require.NoError(t, err) + defer func() { + _ = os.Remove(tmpFile.Name()) + _ = tmpFile.Close() + }() + + _, err = git.GetRepositoryPath(tmpdir) + require.ErrorIs(t, err, git.ErrNotARepository) + _, err = git.GetRepositoryPath(tmpFile.Name()) + require.ErrorIs(t, err, git.ErrGitError) + + // Init git repo on root + require.NoError(t, exec.Command("git", "init", "-q", tmpdir).Run()) + gitPath, err := git.GetRepositoryPath(tmpdir) + require.NoError(t, err) + require.Equal(t, tmpdir, gitPath) + _, err = git.GetRepositoryPath(tmpFile.Name()) + require.ErrorIs(t, err, git.ErrGitError) + gitPath, err = git.GetRepositoryPath(tmpSubdir) + require.NoError(t, err) + require.Equal(t, tmpdir, gitPath) +} + +func TestIgnore(t *testing.T) { + const ( + excludedFile = ".excluded.ex" + trackedFile = "file1" + marker = "Test Marker" + ) + tmpdir := t.TempDir() + tmpSubdir, err := os.MkdirTemp(tmpdir, "") + require.NoError(t, err) + defer os.Remove(tmpSubdir) + tmpFile, err := os.CreateTemp(tmpSubdir, "") + require.NoError(t, err) + defer func() { + _ = os.Remove(tmpFile.Name()) + _ = tmpFile.Close() + }() + excludedPath := filepath.Join(tmpSubdir, excludedFile) + + _, err = git.Ignore(tmpdir, []string{}, []string{}, marker) + require.ErrorIs(t, err, git.ErrNotARepository) + _, err = git.Ignore(tmpFile.Name(), []string{}, []string{}, marker) + require.ErrorIs(t, err, git.ErrGitError) + + // Init git repo on tmpdir + require.NoError(t, exec.Command("git", "init", "-q", tmpdir).Run()) + ignorePath := filepath.Join(tmpdir, git.IgnoreFile) + + // Create files in repo + fd, err := os.Create(filepath.Join(tmpdir, trackedFile)) + require.NoError(t, err) + fd.Close() + fd, err = os.Create(filepath.Join(tmpSubdir, "should_be_ignored")) + require.NoError(t, err) + fd.Close() + fd, err = os.Create(excludedPath) + require.NoError(t, err) + fd.Close() + + // Changing the working directory + require.NoError(t, syscall.Chdir(tmpdir)) + verifyPathTracked(t, []string{filepath.Base(tmpSubdir), trackedFile}) + + _, err = git.Ignore(tmpFile.Name(), []string{}, []string{excludedPath}, marker) + require.ErrorIs(t, err, git.ErrGitError) + result, err := git.Ignore(tmpdir, []string{}, []string{excludedPath}, marker) + require.NoError(t, err) + require.Equal(t, ignorePath, result) + rel, err := filepath.Rel(tmpdir, excludedPath) + require.NoError(t, err) + expected := fmt.Sprintf("!%s\n", rel) + + verifyPathTracked(t, []string{filepath.Base(tmpSubdir), trackedFile}) + + _, err = git.Ignore(tmpSubdir, []string{tmpFile.Name()}, []string{excludedPath}, marker) + require.NoError(t, err) + rel, err = filepath.Rel(tmpdir, tmpFile.Name()) + require.NoError(t, err) + expected = fmt.Sprintf("%s\n", rel) + expected + verifyPathTracked(t, []string{trackedFile}) + + _, err = git.Ignore(tmpSubdir, []string{tmpSubdir, filepath.Join(tmpdir, trackedFile)}, []string{}, marker) + rel, err = filepath.Rel(tmpdir, tmpSubdir) + require.NoError(t, err) + expected += fmt.Sprintf("%s\n%s\n", filepath.Join(rel, "*"), trackedFile) + require.NoError(t, err) + verifyPathTracked(t, []string{git.IgnoreFile}) + + _, err = git.Ignore(tmpSubdir, []string{tmpSubdir, filepath.Join(tmpdir, trackedFile), ignorePath}, []string{}, marker) + require.NoError(t, err) + require.Equal(t, ignorePath, result) + expected += fmt.Sprintf("%s\n", git.IgnoreFile) + expected = fmt.Sprintf("# %s\n", marker) + expected + + contents, _ := os.ReadFile(ignorePath) + fmt.Println(string(contents)) + verifyPathTracked(t, []string{}) + + contents, err = os.ReadFile(ignorePath) + require.NoError(t, err) + require.Equal(t, expected, string(contents)) +} + +func verifyPathTracked(t *testing.T, paths []string) { + cmd := exec.Command("git", "status") + r, w, _ := os.Pipe() + cmd.Stdout = w + require.NoError(t, cmd.Run()) + require.NoError(t, w.Close()) + out, err := io.ReadAll(r) + require.NoError(t, err) + outStr := string(out) + + if len(paths) == 0 { + require.Contains(t, outStr, "nothing to commit") + } else { + for _, p := range paths { + require.Contains(t, outStr, p) + } + } +}