Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lakectl local: implement init #6280

Merged
merged 11 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions cmd/lakectl/cmd/index.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package cmd

import (
"errors"
"os"
"path/filepath"

"github.com/treeverse/lakefs/pkg/uri"
"gopkg.in/yaml.v3"
)

const (
IndexFileName = ".lakefs_ref.yaml"
IgnoreMarker = "ignored by lakectl local:"
IndexFileMode = 0644
)

// Index defines the structure of the lakefs local reference file
// consisting of the information linking local directory with lakefs path
type Index struct {
root string `yaml:"-"`
PathURI string `yaml:"src"`
AtHead string `yaml:"at_head"`
}

func (l *Index) LocalPath() string {
return l.root
}

func (l *Index) GetCurrentURI() (*uri.URI, error) {
return uri.Parse(l.PathURI)
}

func WriteIndex(path string, remote *uri.URI, atHead string) error {
idx := &Index{
PathURI: remote.String(),
AtHead: atHead,
}
data, err := yaml.Marshal(idx)
if err != nil {
return err
}
idxPath := filepath.Join(path, IndexFileName)
return os.WriteFile(idxPath, data, IndexFileMode)
}

func IndexExists(baseAbs string) bool {
refPath := filepath.Join(baseAbs, IndexFileName)
_, err := os.Stat(refPath)
switch {
case err == nil:
return true
case errors.Is(err, os.ErrNotExist):
return false
default:
DieErr(err)
return false // go fmt
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

go fmt?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's going to change in the next PR

}
}
18 changes: 18 additions & 0 deletions cmd/lakectl/cmd/local.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package cmd

import (
"github.com/spf13/cobra"
)

var localCmd = &cobra.Command{
Use: "local",
// TODO: Remove BETA when feature complete
Short: "BETA: sync local directories with remote lakeFS locations",
}

//nolint:gochecknoinits
func init() {
// TODO: Remove line when feature complete
localCmd.Hidden = true
rootCmd.AddCommand(localCmd)
}
66 changes: 66 additions & 0 deletions cmd/lakectl/cmd/local_init.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package cmd

import (
"errors"
"fmt"
"os"
"path/filepath"

"github.com/spf13/cobra"
"github.com/treeverse/lakefs/pkg/git"
)

const (
localInitMinArgs = 1
localInitMaxArgs = 2
)

var localInitCmd = &cobra.Command{
Use: "init <path uri> [directory]",
Short: "set a local directory to sync with a lakeFS path",
Args: cobra.RangeArgs(localInitMinArgs, localInitMaxArgs),
Run: func(cmd *cobra.Command, args []string) {
remote := MustParsePathURI("path", args[0])
dir := "."
if len(args) == localInitMaxArgs {
dir = args[1]
}
flagSet := cmd.Flags()
force := Must(flagSet.GetBool("force"))

localPath, err := filepath.Abs(dir)
if err != nil {
DieErr(err)
}

if err := os.MkdirAll(dir, os.ModePerm); err != nil {
DieErr(err)
}
if IndexExists(localPath) && !force {
Die(fmt.Sprintf("directory '%s' already linked to a lakefs path, run command with --force to overwrite", localPath), 1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DieFmt

}

// dereference
head := resolveCommitOrDie(cmd.Context(), getClient(), remote.Repository, remote.Ref)
nopcoder marked this conversation as resolved.
Show resolved Hide resolved
err = WriteIndex(localPath, remote, head)
if err != nil {
DieErr(err)
}

ignoreFile, err := git.Ignore(localPath, []string{localPath, IndexFileName}, []string{IndexFileName}, IgnoreMarker)
if err != nil && !errors.Is(err, git.ErrNotARepository) {
DieErr(err)
} else if err == nil {
fmt.Println("location added to", ignoreFile)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternative indent vs less 'if'

Suggested change
if err != nil && !errors.Is(err, git.ErrNotARepository) {
DieErr(err)
} else if err == nil {
fmt.Println("location added to", ignoreFile)
}
if err != nil {
if !errors.Is(err, git.ErrNotARepository) {
DieErr(err)
}
fmt.Println("location added to", ignoreFile)
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not exactly, but modified similarly


fmt.Printf("Successfully linked local directory '%s' with remote '%s'\n", localPath, remote)
},
}

//nolint:gochecknoinits
func init() {
AssignAutoConfirmFlag(localInitCmd.Flags())
localInitCmd.Flags().Bool("force", false, "Overwrites if directory already linked to a lakeFS path")
localCmd.AddCommand(localInitCmd)
}
58 changes: 58 additions & 0 deletions docs/reference/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -2558,6 +2558,64 @@ lakectl ingest --from <object store URI> --to <lakeFS path URI> [--dry-run] [fla



### lakectl local

**note:** This command is a lakeFS plumbing command. Don't use it unless you're really sure you know what you're doing.
{: .note .note-warning }

BETA: sync local directories with remote lakeFS locations

#### Options
{:.no_toc}

```
-h, --help help for local
```



### lakectl local help

Help about any command

#### Synopsis
{:.no_toc}

Help provides help for any command in the application.
Simply type local help [path to command] for full details.

```
lakectl local help [command] [flags]
```

#### Options
{:.no_toc}

```
-h, --help help for help
```



### lakectl local init

set a local directory to sync with a lakeFS path

```
lakectl local init <path uri> [directory] [flags]
```

#### Options
{:.no_toc}

```
--force Overwrites if directory already linked to a lakeFS path
-h, --help help for init
-y, --yes Automatically say yes to all confirmations
```



### lakectl log

Show log of commits
Expand Down
12 changes: 12 additions & 0 deletions pkg/fileutil/io.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package fileutil

import "os"

// IsDir Returns true if p is a directory, otherwise false
func IsDir(p string) (bool, error) {
stat, err := os.Stat(p)
if err != nil {
return false, err
}
return stat.IsDir(), nil
}
10 changes: 10 additions & 0 deletions pkg/git/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package git

import (
"errors"
)

var (
ErrGitError = errors.New("git error")
ErrNotARepository = errors.New("not a git repository")
)
148 changes: 148 additions & 0 deletions pkg/git/git.go
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

General suggestion - package name is git so we can drop it from a lot of the function names.

Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
package git

import (
"bufio"
"bytes"
"errors"
"fmt"
"io/fs"
"os"
"os/exec"
"path/filepath"
"strings"

"github.com/treeverse/lakefs/pkg/fileutil"
"golang.org/x/exp/slices"
)

const (
IgnoreFile = ".gitignore"
IgnoreDefaultMode = 0644
)

func git(dir string, args ...string) (string, error) {
cmd := exec.Command("git", args...)
cmd.Dir = dir
out, err := cmd.CombinedOutput()
return string(out), err
}

// IsRepository Return true if dir is a path to a directory in a git repository, false otherwise
func IsRepository(dir string) bool {
_, err := git(dir, "rev-parse", "--is-inside-work-tree")
return err == nil
}

// GetRepositoryPath Returns the git repository root path if dir is a directory inside a git repository, otherwise returns error
func GetRepositoryPath(dir string) (string, error) {
out, err := git(dir, "rev-parse", "--show-toplevel")
if err == nil {
return strings.TrimSpace(out), nil
}
if strings.Contains(out, "not a git repository") {
return "", ErrNotARepository
}
return "", fmt.Errorf("%s: %w", out, ErrGitError)
}

func createEntriesForIgnore(dir string, paths []string, exclude bool) ([]string, error) {
var entries []string
for _, p := range paths {
pathInRepo, err := filepath.Rel(dir, p)
if err != nil {
return nil, fmt.Errorf("%s :%w", p, err)
}
isDir, err := fileutil.IsDir(p)
if err != nil && !errors.Is(err, fs.ErrNotExist) {
return nil, fmt.Errorf("%s :%w", p, err)
}
if isDir {
pathInRepo = filepath.Join(pathInRepo, "*")
}
if exclude {
pathInRepo = "!" + pathInRepo
}
entries = append(entries, pathInRepo)
}
return entries, nil
}

func updateIgnoreFileSection(contents []byte, marker string, entries []string) []byte {
var newContent []byte
scanner := bufio.NewScanner(bytes.NewReader(contents))
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
newContent = append(newContent, []byte(fmt.Sprintln(line))...)
if line == marker {
for scanner.Scan() {
line = strings.TrimSpace(scanner.Text())
if line == "" {
break
}
if !slices.Contains(entries, line) {
newContent = append(newContent, []byte(fmt.Sprintln(line))...)
}
}
buffer := strings.Join(entries, fmt.Sprintln("")) + fmt.Sprintln("")
newContent = append(newContent, buffer...)
}
}

return newContent
}
Comment on lines +70 to +92
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest the following to update/append the content:

func updateIgnoreFileSection(contents []byte, marker string, entries []string) ([]byte, error) {
	// Read content and remove/collect entries under marker
	var (
		result   bytes.Buffer
		inMarker bool
		scanner  = bufio.NewScanner(bytes.NewReader(contents))
	)
	for scanner.Scan() {
		line := strings.TrimSpace(scanner.Text())
		switch {
		case inMarker && line == "":
			inMarker = false
		case inMarker && !slices.Contains(entries, line):
			entries = append(entries, line)
		case !inMarker && line == marker:
			inMarker = true
		default:
			_, _ = fmt.Fprintln(&result, line)
		}
	}
	if err := scanner.Err(); err != nil {
		return nil, err
	}

	// Render marker and entries at the end
	_, _ = fmt.Fprintln(&result, marker)
	for _, entry := range entries {
		_, _ = fmt.Fprintln(&result, entry)
	}
	_, _ = fmt.Fprintln(&result)

	return result.Bytes(), nil
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would prefer not to rewrite it to the end of the file for now. I will open another PR to also add an end marker


// Ignore modify/create .ignore file to include a section headed by the marker string and contains the provided ignore and exclude paths.
// If section exists, it will append paths to the given section, otherwise writes the section at the end of the file.
// All file paths must be absolute.
// dir is a path in the git repository, if a .gitignore file is not found, a new file will be created in the repository root
func Ignore(dir string, ignorePaths, excludePaths []string, marker string) (string, error) {
gitDir, err := GetRepositoryPath(dir)
if err != nil {
return "", err
}

ignoreEntries, err := createEntriesForIgnore(gitDir, ignorePaths, false)
if err != nil {
return "", err
}
excludeEntries, err := createEntriesForIgnore(gitDir, excludePaths, true)
if err != nil {
return "", err
}
ignoreEntries = append(ignoreEntries, excludeEntries...)

var (
mode os.FileMode = IgnoreDefaultMode
ignoreFile []byte
)
ignoreFilePath := filepath.Join(gitDir, IgnoreFile)
markerLine := "# " + marker
info, err := os.Stat(ignoreFilePath)
switch {
case err == nil: // ignore file exists
mode = info.Mode()
ignoreFile, err = os.ReadFile(ignoreFilePath)
if err != nil {
return "", err
}
idx := bytes.Index(ignoreFile, []byte(markerLine))
if idx == -1 {
section := fmt.Sprintln(markerLine) + strings.Join(ignoreEntries, fmt.Sprintln("")) + fmt.Sprintln("")
ignoreFile = append(ignoreFile, section...)
} else { // Update section
ignoreFile = updateIgnoreFileSection(ignoreFile, markerLine, ignoreEntries)
}

case !os.IsNotExist(err):
return "", err
default: // File doesn't exist
section := fmt.Sprintln(markerLine) + strings.Join(ignoreEntries, fmt.Sprintln("")) + fmt.Sprintln("")
ignoreFile = append(ignoreFile, []byte(section)...)
}

if err = os.WriteFile(ignoreFilePath, ignoreFile, mode); err != nil {
return "", err
}

return ignoreFilePath, nil
}
Loading