diff --git a/files/README.md b/files/README.md new file mode 100644 index 000000000..f8be00c99 --- /dev/null +++ b/files/README.md @@ -0,0 +1,31 @@ +# go-ipfs-files + +[![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io) +[![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/) +[![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](http://webchat.freenode.net/?channels=%23ipfs) +[![standard-readme compliant](https://img.shields.io/badge/standard--readme-OK-green.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme) + +> File interfaces and utils used in IPFS + +## Lead Maintainer + +[Steven Allen](https://github.com/Stebalien) + +## Documentation + +https://godoc.org/github.com/ipfs/go-ipfs-files + +## Contribute + +Feel free to join in. All welcome. Open an [issue](https://github.com/ipfs/go-ipfs-files/issues)! + +This repository falls under the IPFS [Code of Conduct](https://github.com/ipfs/community/blob/master/code-of-conduct.md). + +### Want to hack on IPFS? + +[![](https://cdn.rawgit.com/jbenet/contribute-ipfs-gif/master/img/contribute.gif)](https://github.com/ipfs/community/blob/master/CONTRIBUTING.md) + +## License + +MIT + diff --git a/files/file.go b/files/file.go new file mode 100644 index 000000000..7ac1fc98a --- /dev/null +++ b/files/file.go @@ -0,0 +1,96 @@ +package files + +import ( + "errors" + "io" + "os" +) + +var ( + ErrNotDirectory = errors.New("file isn't a directory") + ErrNotReader = errors.New("file isn't a regular file") + + ErrNotSupported = errors.New("operation not supported") +) + +// Node is a common interface for files, directories and other special files +type Node interface { + io.Closer + + // Size returns size of this file (if this file is a directory, total size of + // all files stored in the tree should be returned). Some implementations may + // choose not to implement this + Size() (int64, error) +} + +// Node represents a regular Unix file +type File interface { + Node + + io.Reader + io.Seeker +} + +// DirEntry exposes information about a directory entry +type DirEntry interface { + // Name returns base name of this entry, which is the base name of referenced + // file + Name() string + + // Node returns the file referenced by this DirEntry + Node() Node +} + +// DirIterator is a iterator over directory entries. +// See Directory.Entries for more +type DirIterator interface { + // DirEntry holds information about current directory entry. + // Note that after creating new iterator you MUST call Next() at least once + // before accessing these methods. Calling these methods without prior calls + // to Next() and after Next() returned false may result in undefined behavior + DirEntry + + // Next advances iterator to the next file. + Next() bool + + // Err may return an error after previous call to Next() returned `false`. + // If previous call to Next() returned `true`, Err() is guaranteed to + // return nil + Err() error +} + +// Directory is a special file which can link to any number of files. +type Directory interface { + Node + + // Entries returns a stateful iterator over directory entries. The iterator + // may consume the Directory state so it must be called only once (this + // applies specifically to the multipartIterator). + // + // Example usage: + // + // it := dir.Entries() + // for it.Next() { + // name := it.Name() + // file := it.Node() + // [...] + // } + // if it.Err() != nil { + // return err + // } + // + // Note that you can't store the result of it.Node() and use it after + // advancing the iterator + Entries() DirIterator +} + +// FileInfo exposes information on files in local filesystem +type FileInfo interface { + Node + + // AbsPath returns full real file path. + AbsPath() string + + // Stat returns os.Stat of this file, may be nil for some files + Stat() os.FileInfo +} diff --git a/files/file_test.go b/files/file_test.go new file mode 100644 index 000000000..8c6c62229 --- /dev/null +++ b/files/file_test.go @@ -0,0 +1,142 @@ +package files + +import ( + "io" + "mime/multipart" + "strings" + "testing" +) + +func TestSliceFiles(t *testing.T) { + sf := NewMapDirectory(map[string]Node{ + "1": NewBytesFile([]byte("Some text!\n")), + "2": NewBytesFile([]byte("beep")), + "3": NewBytesFile([]byte("boop")), + }) + + CheckDir(t, sf, []Event{ + { + kind: TFile, + name: "1", + value: "Some text!\n", + }, + { + kind: TFile, + name: "2", + value: "beep", + }, + { + kind: TFile, + name: "3", + value: "boop", + }, + }) +} + +func TestReaderFiles(t *testing.T) { + message := "beep boop" + rf := NewBytesFile([]byte(message)) + buf := make([]byte, len(message)) + + if n, err := rf.Read(buf); n == 0 || err != nil { + t.Fatal("Expected to be able to read") + } + if err := rf.Close(); err != nil { + t.Fatal("Should be able to close") + } + if n, err := rf.Read(buf); n != 0 || err != io.EOF { + t.Fatal("Expected EOF when reading after close") + } +} +func TestMultipartFiles(t *testing.T) { + data := ` +--Boundary! +Content-Type: text/plain +Content-Disposition: file; filename="name" +Some-Header: beep + +beep +--Boundary! +Content-Type: application/x-directory +Content-Disposition: file; filename="dir" + +--Boundary! +Content-Type: text/plain +Content-Disposition: file; filename="dir/nested" + +some content +--Boundary! +Content-Type: application/symlink +Content-Disposition: file; filename="dir/simlynk" + +anotherfile +--Boundary! +Content-Type: text/plain +Content-Disposition: file; filename="implicit1/implicit2/deep_implicit" + +implicit file1 +--Boundary! +Content-Type: text/plain +Content-Disposition: file; filename="implicit1/shallow_implicit" + +implicit file2 +--Boundary!-- + +` + + reader := strings.NewReader(data) + mpReader := multipart.NewReader(reader, "Boundary!") + dir, err := NewFileFromPartReader(mpReader, multipartFormdataType) + if err != nil { + t.Fatal(err) + } + + CheckDir(t, dir, []Event{ + { + kind: TFile, + name: "name", + value: "beep", + }, + { + kind: TDirStart, + name: "dir", + }, + { + kind: TFile, + name: "nested", + value: "some content", + }, + { + kind: TSymlink, + name: "simlynk", + value: "anotherfile", + }, + { + kind: TDirEnd, + }, + { + kind: TDirStart, + name: "implicit1", + }, + { + kind: TDirStart, + name: "implicit2", + }, + { + kind: TFile, + name: "deep_implicit", + value: "implicit file1", + }, + { + kind: TDirEnd, + }, + { + kind: TFile, + name: "shallow_implicit", + value: "implicit file2", + }, + { + kind: TDirEnd, + }, + }) +} diff --git a/files/filewriter.go b/files/filewriter.go new file mode 100644 index 000000000..bf4bcf649 --- /dev/null +++ b/files/filewriter.go @@ -0,0 +1,59 @@ +package files + +import ( + "errors" + "fmt" + "io" + "os" + "path/filepath" +) + +var ErrInvalidDirectoryEntry = errors.New("invalid directory entry name") +var ErrPathExistsOverwrite = errors.New("path already exists and overwriting is not allowed") + +// WriteTo writes the given node to the local filesystem at fpath. +func WriteTo(nd Node, fpath string) error { + if _, err := os.Lstat(fpath); err == nil { + return ErrPathExistsOverwrite + } else if !os.IsNotExist(err) { + return err + } + switch nd := nd.(type) { + case *Symlink: + return os.Symlink(nd.Target, fpath) + case File: + f, err := createNewFile(fpath) + defer f.Close() + if err != nil { + return err + } + _, err = io.Copy(f, nd) + if err != nil { + return err + } + return nil + case Directory: + err := os.Mkdir(fpath, 0777) + if err != nil { + return err + } + + entries := nd.Entries() + for entries.Next() { + entryName := entries.Name() + if entryName == "" || + entryName == "." || + entryName == ".." || + !isValidFilename(entryName) { + return ErrInvalidDirectoryEntry + } + child := filepath.Join(fpath, entryName) + if err := WriteTo(entries.Node(), child); err != nil { + return err + } + } + return entries.Err() + default: + return fmt.Errorf("file type %T at %q is not supported", nd, fpath) + } +} diff --git a/files/filewriter_test.go b/files/filewriter_test.go new file mode 100644 index 000000000..00a0b1ce2 --- /dev/null +++ b/files/filewriter_test.go @@ -0,0 +1,100 @@ +package files + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestWriteTo(t *testing.T) { + sf := NewMapDirectory(map[string]Node{ + "1": NewBytesFile([]byte("Some text!\n")), + "2": NewBytesFile([]byte("beep")), + "3": NewMapDirectory(nil), + "4": NewBytesFile([]byte("boop")), + "5": NewMapDirectory(map[string]Node{ + "a": NewBytesFile([]byte("foobar")), + }), + }) + tmppath, err := os.MkdirTemp("", "files-test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmppath) + + path := filepath.Join(tmppath, "output") + + err = WriteTo(sf, path) + if err != nil { + t.Fatal(err) + } + expected := map[string]string{ + ".": "", + "1": "Some text!\n", + "2": "beep", + "3": "", + "4": "boop", + "5": "", + filepath.FromSlash("5/a"): "foobar", + } + err = filepath.Walk(path, func(cpath string, info os.FileInfo, err error) error { + if err != nil { + return err + } + rpath, err := filepath.Rel(path, cpath) + if err != nil { + return err + } + data, ok := expected[rpath] + if !ok { + return fmt.Errorf("expected something at %q", rpath) + } + delete(expected, rpath) + + if info.IsDir() { + if data != "" { + return fmt.Errorf("expected a directory at %q", rpath) + } + } else { + actual, err := os.ReadFile(cpath) + if err != nil { + return err + } + if string(actual) != data { + return fmt.Errorf("expected %q, got %q", data, string(actual)) + } + } + return nil + }) + if err != nil { + t.Fatal(err) + } + if len(expected) > 0 { + t.Fatalf("failed to find: %#v", expected) + } +} + +func TestDontAllowOverwrite(t *testing.T) { + tmppath, err := os.MkdirTemp("", "files-test") + assert.NoError(t, err) + defer os.RemoveAll(tmppath) + + path := filepath.Join(tmppath, "output") + + // Check we can actually write to the output path before trying invalid entries + // and leave an existing entry to test overwrite protection. + assert.NoError(t, WriteTo(NewMapDirectory(map[string]Node{ + "exisiting-entry": NewBytesFile(nil), + }), path)) + + assert.Equal(t, ErrPathExistsOverwrite, WriteTo(NewBytesFile(nil), filepath.Join(path))) + assert.Equal(t, ErrPathExistsOverwrite, WriteTo(NewBytesFile(nil), filepath.Join(path, "exisiting-entry"))) + // The directory in `path` has already been created so this should fail too: + assert.Equal(t, ErrPathExistsOverwrite, WriteTo(NewMapDirectory(map[string]Node{ + "any-name": NewBytesFile(nil), + }), filepath.Join(path))) + os.RemoveAll(path) +} diff --git a/files/filewriter_unix.go b/files/filewriter_unix.go new file mode 100644 index 000000000..98d040018 --- /dev/null +++ b/files/filewriter_unix.go @@ -0,0 +1,19 @@ +//go:build darwin || linux || netbsd || openbsd || freebsd || dragonfly + +package files + +import ( + "os" + "strings" + "syscall" +) + +var invalidChars = `/` + "\x00" + +func isValidFilename(filename string) bool { + return !strings.ContainsAny(filename, invalidChars) +} + +func createNewFile(path string) (*os.File, error) { + return os.OpenFile(path, os.O_EXCL|os.O_CREATE|os.O_WRONLY|syscall.O_NOFOLLOW, 0666) +} diff --git a/files/filewriter_unix_test.go b/files/filewriter_unix_test.go new file mode 100644 index 000000000..ffc33ce51 --- /dev/null +++ b/files/filewriter_unix_test.go @@ -0,0 +1,33 @@ +//go:build darwin || linux || netbsd || openbsd + +package files + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestWriteToInvalidPaths(t *testing.T) { + tmppath, err := os.MkdirTemp("", "files-test") + assert.NoError(t, err) + defer os.RemoveAll(tmppath) + + path := filepath.Join(tmppath, "output") + + // Check we can actually write to the output path before trying invalid entries. + assert.NoError(t, WriteTo(NewMapDirectory(map[string]Node{ + "valid-entry": NewBytesFile(nil), + }), path)) + os.RemoveAll(path) + + // Now try all invalid entry names + for _, entryName := range []string{"", ".", "..", "/", "", "not/a/base/path"} { + assert.Equal(t, ErrInvalidDirectoryEntry, WriteTo(NewMapDirectory(map[string]Node{ + entryName: NewBytesFile(nil), + }), filepath.Join(path))) + os.RemoveAll(path) + } +} diff --git a/files/filewriter_windows.go b/files/filewriter_windows.go new file mode 100644 index 000000000..a5d626199 --- /dev/null +++ b/files/filewriter_windows.go @@ -0,0 +1,45 @@ +//go:build windows + +package files + +import ( + "os" + "strings" +) + +var invalidChars = `<>:"/\|?*` + "\x00" + +var reservedNames = map[string]struct{}{ + "CON": {}, + "PRN": {}, + "AUX": {}, + "NUL": {}, + "COM1": {}, + "COM2": {}, + "COM3": {}, + "COM4": {}, + "COM5": {}, + "COM6": {}, + "COM7": {}, + "COM8": {}, + "COM9": {}, + "LPT1": {}, + "LPT2": {}, + "LPT3": {}, + "LPT4": {}, + "LPT5": {}, + "LPT6": {}, + "LPT7": {}, + "LPT8": {}, + "LPT9": {}, +} + +func isValidFilename(filename string) bool { + _, isReservedName := reservedNames[filename] + return !strings.ContainsAny(filename, invalidChars) && + !isReservedName +} + +func createNewFile(path string) (*os.File, error) { + return os.OpenFile(path, os.O_EXCL|os.O_CREATE|os.O_WRONLY, 0666) +} diff --git a/files/filewriter_windows_test.go b/files/filewriter_windows_test.go new file mode 100644 index 000000000..ca0222ba3 --- /dev/null +++ b/files/filewriter_windows_test.go @@ -0,0 +1,35 @@ +//go:build windows + +package files + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestWriteToInvalidPaths(t *testing.T) { + tmppath, err := os.MkdirTemp("", "files-test") + assert.NoError(t, err) + defer os.RemoveAll(tmppath) + + path := filepath.Join(tmppath, "output") + + // Check we can actually write to the output path before trying invalid entries. + assert.NoError(t, WriteTo(NewMapDirectory(map[string]Node{ + "valid-entry": NewBytesFile(nil), + }), path)) + os.RemoveAll(path) + + // Now try all invalid entry names + for _, entryName := range []string{"", ".", "..", "/", "", "not/a/base/path", + "<", ">", ":", "\"", "\\", "|", "?", "*", "\x00", + "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"} { + assert.Equal(t, ErrInvalidDirectoryEntry, WriteTo(NewMapDirectory(map[string]Node{ + entryName: NewBytesFile(nil), + }), filepath.Join(path))) + os.RemoveAll(path) + } +} diff --git a/files/filter.go b/files/filter.go new file mode 100644 index 000000000..6b90f1f34 --- /dev/null +++ b/files/filter.go @@ -0,0 +1,49 @@ +package files + +import ( + "os" + + ignore "github.com/crackcomm/go-gitignore" +) + +// Filter represents a set of rules for determining if a file should be included or excluded. +// A rule follows the syntax for patterns used in .gitgnore files for specifying untracked files. +// Examples: +// foo.txt +// *.app +// bar/ +// **/baz +// fizz/** +type Filter struct { + // IncludeHidden - Include hidden files + IncludeHidden bool + // Rules - File filter rules + Rules *ignore.GitIgnore +} + +// NewFilter creates a new file filter from a .gitignore file and/or a list of ignore rules. +// An ignoreFile is a path to a file with .gitignore-style patterns to exclude, one per line +// rules is an array of strings representing .gitignore-style patterns +// For reference on ignore rule syntax, see https://git-scm.com/docs/gitignore +func NewFilter(ignoreFile string, rules []string, includeHidden bool) (*Filter, error) { + var ignoreRules *ignore.GitIgnore + var err error + if ignoreFile == "" { + ignoreRules, err = ignore.CompileIgnoreLines(rules...) + } else { + ignoreRules, err = ignore.CompileIgnoreFileAndLines(ignoreFile, rules...) + } + if err != nil { + return nil, err + } + return &Filter{IncludeHidden: includeHidden, Rules: ignoreRules}, nil +} + +// ShouldExclude takes an os.FileInfo object and applies rules to determine if its target should be excluded. +func (filter *Filter) ShouldExclude(fileInfo os.FileInfo) (result bool) { + path := fileInfo.Name() + if !filter.IncludeHidden && isHidden(fileInfo) { + return true + } + return filter.Rules.MatchesPath(path) +} diff --git a/files/filter_test.go b/files/filter_test.go new file mode 100644 index 000000000..8ce25ee3b --- /dev/null +++ b/files/filter_test.go @@ -0,0 +1,53 @@ +package files + +import ( + "os" + "path/filepath" + "testing" +) + +type mockFileInfo struct { + os.FileInfo + name string +} + +func (m *mockFileInfo) Name() string { + return m.name +} + +func (m *mockFileInfo) Sys() interface{} { + return nil +} + +var _ os.FileInfo = &mockFileInfo{} + +func TestFileFilter(t *testing.T) { + includeHidden := true + filter, err := NewFilter("", nil, includeHidden) + if err != nil { + t.Errorf("failed to create filter with empty rules") + } + if filter.IncludeHidden != includeHidden { + t.Errorf("new filter should include hidden files") + } + _, err = NewFilter("ignoreFileThatDoesNotExist", nil, false) + if err == nil { + t.Errorf("creating a filter without an invalid ignore file path should have failed") + } + tmppath, err := os.MkdirTemp("", "filter-test") + if err != nil { + t.Fatal(err) + } + ignoreFilePath := filepath.Join(tmppath, "ignoreFile") + ignoreFileContents := []byte("a.txt") + if err := os.WriteFile(ignoreFilePath, ignoreFileContents, 0666); err != nil { + t.Fatal(err) + } + filterWithIgnoreFile, err := NewFilter(ignoreFilePath, nil, false) + if err != nil { + t.Errorf("failed to create filter with ignore file") + } + if !filterWithIgnoreFile.ShouldExclude(&mockFileInfo{name: "a.txt"}) { + t.Errorf("filter should've excluded expected file from ignoreFile: %s", "a.txt") + } +} diff --git a/files/helpers_test.go b/files/helpers_test.go new file mode 100644 index 000000000..0180b8f27 --- /dev/null +++ b/files/helpers_test.go @@ -0,0 +1,126 @@ +package files + +import ( + "io" + "testing" +) + +type Kind int + +const ( + TFile Kind = iota + TSymlink + TDirStart + TDirEnd +) + +type Event struct { + kind Kind + name string + value string +} + +func CheckDir(t *testing.T, dir Directory, expected []Event) { + expectedIndex := 0 + expect := func() (Event, int) { + t.Helper() + + if expectedIndex > len(expected) { + t.Fatal("no more expected entries") + } + i := expectedIndex + expectedIndex++ + + // Add an implicit "end" event at the end. It makes this + // function a bit easier to write. + next := Event{kind: TDirEnd} + if i < len(expected) { + next = expected[i] + } + return next, i + } + var check func(d Directory) + check = func(d Directory) { + it := d.Entries() + + for it.Next() { + next, i := expect() + + if it.Name() != next.name { + t.Fatalf("[%d] expected filename to be %q", i, next.name) + } + + switch next.kind { + case TFile: + mf, ok := it.Node().(File) + if !ok { + t.Fatalf("[%d] expected file to be a normal file: %T", i, it.Node()) + } + out, err := io.ReadAll(mf) + if err != nil { + t.Errorf("[%d] failed to read file", i) + continue + } + if string(out) != next.value { + t.Errorf( + "[%d] while reading %q, expected %q, got %q", + i, + it.Name(), + next.value, + string(out), + ) + continue + } + case TSymlink: + mf, ok := it.Node().(*Symlink) + if !ok { + t.Errorf("[%d] expected file to be a symlink: %T", i, it.Node()) + continue + } + if mf.Target != next.value { + t.Errorf( + "[%d] target of symlink %q should have been %q but was %q", + i, + it.Name(), + next.value, + mf.Target, + ) + continue + } + case TDirStart: + mf, ok := it.Node().(Directory) + if !ok { + t.Fatalf( + "[%d] expected file to be a directory: %T", + i, + it.Node(), + ) + } + check(mf) + case TDirEnd: + t.Errorf( + "[%d] expected end of directory, found %#v at %q", + i, + it.Node(), + it.Name(), + ) + return + default: + t.Fatal("unhandled type", next.kind) + } + if err := it.Node().Close(); err != nil { + t.Fatalf("[%d] expected to be able to close node", i) + } + } + next, i := expect() + + if it.Err() != nil { + t.Fatalf("[%d] got error: %s", i, it.Err()) + } + + if next.kind != TDirEnd { + t.Fatalf("[%d] found end of directory, expected %#v", i, next) + } + } + check(dir) +} diff --git a/files/is_hidden.go b/files/is_hidden.go new file mode 100644 index 000000000..9842ca232 --- /dev/null +++ b/files/is_hidden.go @@ -0,0 +1,17 @@ +//go:build !windows + +package files + +import ( + "os" +) + +func isHidden(fi os.FileInfo) bool { + fName := fi.Name() + switch fName { + case "", ".", "..": + return false + default: + return fName[0] == '.' + } +} diff --git a/files/is_hidden_windows.go b/files/is_hidden_windows.go new file mode 100644 index 000000000..9a0703863 --- /dev/null +++ b/files/is_hidden_windows.go @@ -0,0 +1,32 @@ +//go:build windows + +package files + +import ( + "os" + + windows "golang.org/x/sys/windows" +) + +func isHidden(fi os.FileInfo) bool { + fName := fi.Name() + switch fName { + case "", ".", "..": + return false + } + + if fName[0] == '.' { + return true + } + + sys := fi.Sys() + if sys == nil { + return false + } + wi, ok := sys.(*windows.Win32FileAttributeData) + if !ok { + return false + } + + return wi.FileAttributes&windows.FILE_ATTRIBUTE_HIDDEN != 0 +} diff --git a/files/linkfile.go b/files/linkfile.go new file mode 100644 index 000000000..526998652 --- /dev/null +++ b/files/linkfile.go @@ -0,0 +1,42 @@ +package files + +import ( + "os" + "strings" +) + +type Symlink struct { + Target string + + stat os.FileInfo + reader strings.Reader +} + +func NewLinkFile(target string, stat os.FileInfo) File { + lf := &Symlink{Target: target, stat: stat} + lf.reader.Reset(lf.Target) + return lf +} + +func (lf *Symlink) Close() error { + return nil +} + +func (lf *Symlink) Read(b []byte) (int, error) { + return lf.reader.Read(b) +} + +func (lf *Symlink) Seek(offset int64, whence int) (int64, error) { + return lf.reader.Seek(offset, whence) +} + +func (lf *Symlink) Size() (int64, error) { + return lf.reader.Size(), nil +} + +func ToSymlink(n Node) *Symlink { + l, _ := n.(*Symlink) + return l +} + +var _ File = &Symlink{} diff --git a/files/multifilereader.go b/files/multifilereader.go new file mode 100644 index 000000000..f6f225a38 --- /dev/null +++ b/files/multifilereader.go @@ -0,0 +1,152 @@ +package files + +import ( + "bytes" + "fmt" + "io" + "mime/multipart" + "net/textproto" + "net/url" + "path" + "sync" +) + +// MultiFileReader reads from a `commands.Node` (which can be a directory of files +// or a regular file) as HTTP multipart encoded data. +type MultiFileReader struct { + io.Reader + + // directory stack for NextFile + files []DirIterator + path []string + + currentFile Node + buf bytes.Buffer + mpWriter *multipart.Writer + closed bool + mutex *sync.Mutex + + // if true, the content disposition will be "form-data" + // if false, the content disposition will be "attachment" + form bool +} + +// NewMultiFileReader constructs a MultiFileReader. `file` can be any `commands.Directory`. +// If `form` is set to true, the Content-Disposition will be "form-data". +// Otherwise, it will be "attachment". +func NewMultiFileReader(file Directory, form bool) *MultiFileReader { + it := file.Entries() + + mfr := &MultiFileReader{ + files: []DirIterator{it}, + path: []string{""}, + form: form, + mutex: &sync.Mutex{}, + } + mfr.mpWriter = multipart.NewWriter(&mfr.buf) + + return mfr +} + +func (mfr *MultiFileReader) Read(buf []byte) (written int, err error) { + mfr.mutex.Lock() + defer mfr.mutex.Unlock() + + // if we are closed and the buffer is flushed, end reading + if mfr.closed && mfr.buf.Len() == 0 { + return 0, io.EOF + } + + // if the current file isn't set, advance to the next file + if mfr.currentFile == nil { + var entry DirEntry + + for entry == nil { + if len(mfr.files) == 0 { + mfr.mpWriter.Close() + mfr.closed = true + return mfr.buf.Read(buf) + } + + if !mfr.files[len(mfr.files)-1].Next() { + if mfr.files[len(mfr.files)-1].Err() != nil { + return 0, mfr.files[len(mfr.files)-1].Err() + } + mfr.files = mfr.files[:len(mfr.files)-1] + mfr.path = mfr.path[:len(mfr.path)-1] + continue + } + + entry = mfr.files[len(mfr.files)-1] + } + + // handle starting a new file part + if !mfr.closed { + + mfr.currentFile = entry.Node() + + // write the boundary and headers + header := make(textproto.MIMEHeader) + filename := url.QueryEscape(path.Join(path.Join(mfr.path...), entry.Name())) + dispositionPrefix := "attachment" + if mfr.form { + dispositionPrefix = "form-data; name=\"file\"" + } + + header.Set("Content-Disposition", fmt.Sprintf("%s; filename=\"%s\"", dispositionPrefix, filename)) + + var contentType string + + switch f := entry.Node().(type) { + case *Symlink: + contentType = "application/symlink" + case Directory: + newIt := f.Entries() + mfr.files = append(mfr.files, newIt) + mfr.path = append(mfr.path, entry.Name()) + contentType = "application/x-directory" + case File: + // otherwise, use the file as a reader to read its contents + contentType = "application/octet-stream" + default: + return 0, ErrNotSupported + } + + header.Set("Content-Type", contentType) + if rf, ok := entry.Node().(FileInfo); ok { + header.Set("abspath", rf.AbsPath()) + } + + _, err := mfr.mpWriter.CreatePart(header) + if err != nil { + return 0, err + } + } + } + + // if the buffer has something in it, read from it + if mfr.buf.Len() > 0 { + return mfr.buf.Read(buf) + } + + // otherwise, read from file data + switch f := mfr.currentFile.(type) { + case File: + written, err = f.Read(buf) + if err != io.EOF { + return written, err + } + } + + if err := mfr.currentFile.Close(); err != nil { + return written, err + } + + mfr.currentFile = nil + return written, nil +} + +// Boundary returns the boundary string to be used to separate files in the multipart data +func (mfr *MultiFileReader) Boundary() string { + return mfr.mpWriter.Boundary() +} diff --git a/files/multifilereader_test.go b/files/multifilereader_test.go new file mode 100644 index 000000000..e36788a91 --- /dev/null +++ b/files/multifilereader_test.go @@ -0,0 +1,197 @@ +package files + +import ( + "io" + "mime/multipart" + "testing" +) + +var text = "Some text! :)" + +func getTestMultiFileReader(t *testing.T) *MultiFileReader { + sf := NewMapDirectory(map[string]Node{ + "file.txt": NewBytesFile([]byte(text)), + "boop": NewMapDirectory(map[string]Node{ + "a.txt": NewBytesFile([]byte("bleep")), + "b.txt": NewBytesFile([]byte("bloop")), + }), + "beep.txt": NewBytesFile([]byte("beep")), + }) + + // testing output by reading it with the go stdlib "mime/multipart" Reader + return NewMultiFileReader(sf, true) +} + +func TestMultiFileReaderToMultiFile(t *testing.T) { + mfr := getTestMultiFileReader(t) + mpReader := multipart.NewReader(mfr, mfr.Boundary()) + mf, err := NewFileFromPartReader(mpReader, multipartFormdataType) + if err != nil { + t.Fatal(err) + } + + it := mf.Entries() + + if !it.Next() || it.Name() != "beep.txt" { + t.Fatal("iterator didn't work as expected") + } + + if !it.Next() || it.Name() != "boop" || DirFromEntry(it) == nil { + t.Fatal("iterator didn't work as expected") + } + + subIt := DirFromEntry(it).Entries() + + if !subIt.Next() || subIt.Name() != "a.txt" || DirFromEntry(subIt) != nil { + t.Fatal("iterator didn't work as expected") + } + + if !subIt.Next() || subIt.Name() != "b.txt" || DirFromEntry(subIt) != nil { + t.Fatal("iterator didn't work as expected") + } + + if subIt.Next() || it.Err() != nil { + t.Fatal("iterator didn't work as expected") + } + + // try to break internal state + if subIt.Next() || it.Err() != nil { + t.Fatal("iterator didn't work as expected") + } + + if !it.Next() || it.Name() != "file.txt" || DirFromEntry(it) != nil || it.Err() != nil { + t.Fatal("iterator didn't work as expected") + } + + if it.Next() || it.Err() != nil { + t.Fatal("iterator didn't work as expected") + } +} + +func TestMultiFileReaderToMultiFileSkip(t *testing.T) { + mfr := getTestMultiFileReader(t) + mpReader := multipart.NewReader(mfr, mfr.Boundary()) + mf, err := NewFileFromPartReader(mpReader, multipartFormdataType) + if err != nil { + t.Fatal(err) + } + + it := mf.Entries() + + if !it.Next() || it.Name() != "beep.txt" { + t.Fatal("iterator didn't work as expected") + } + + if !it.Next() || it.Name() != "boop" || DirFromEntry(it) == nil { + t.Fatal("iterator didn't work as expected") + } + + if !it.Next() || it.Name() != "file.txt" || DirFromEntry(it) != nil || it.Err() != nil { + t.Fatal("iterator didn't work as expected") + } + + if it.Next() || it.Err() != nil { + t.Fatal("iterator didn't work as expected") + } +} + +func TestOutput(t *testing.T) { + mfr := getTestMultiFileReader(t) + walker := &multipartWalker{reader: multipart.NewReader(mfr, mfr.Boundary())} + buf := make([]byte, 20) + + mpf, err := walker.nextFile() + if mpf == nil || err != nil { + t.Fatal("Expected non-nil multipartFile, nil error") + } + mpr, ok := mpf.(File) + if !ok { + t.Fatal("Expected file to be a regular file") + } + if n, err := mpr.Read(buf); n != 4 || err != nil { + t.Fatal("Expected to read from file", n, err) + } + if string(buf[:4]) != "beep" { + t.Fatal("Data read was different than expected") + } + + mpf, err = walker.nextFile() + if mpf == nil || err != nil { + t.Fatal("Expected non-nil multipartFile, nil error") + } + mpd, ok := mpf.(Directory) + if !ok { + t.Fatal("Expected file to be a directory") + } + + child, err := walker.nextFile() + if child == nil || err != nil { + t.Fatal("Expected to be able to read a child file") + } + if _, ok := child.(File); !ok { + t.Fatal("Expected file to not be a directory") + } + + child, err = walker.nextFile() + if child == nil || err != nil { + t.Fatal("Expected to be able to read a child file") + } + if _, ok := child.(File); !ok { + t.Fatal("Expected file to not be a directory") + } + + it := mpd.Entries() + if it.Next() { + t.Fatal("Expected to get false") + } + + mpf, err = walker.nextFile() + if mpf == nil || err != nil { + t.Fatal("Expected non-nil multipartFile, nil error") + } + + part, err := walker.getPart() + if part != nil || err != io.EOF { + t.Fatal("Expected to get (nil, io.EOF)") + } +} + +func TestCommonPrefix(t *testing.T) { + sf := NewMapDirectory(map[string]Node{ + "boop": NewMapDirectory(map[string]Node{ + "a": NewBytesFile([]byte("bleep")), + "aa": NewBytesFile([]byte("bleep")), + "aaa": NewBytesFile([]byte("bleep")), + }), + }) + mfr := NewMultiFileReader(sf, true) + reader, err := NewFileFromPartReader(multipart.NewReader(mfr, mfr.Boundary()), multipartFormdataType) + if err != nil { + t.Fatal(err) + } + + CheckDir(t, reader, []Event{ + { + kind: TDirStart, + name: "boop", + }, + { + kind: TFile, + name: "a", + value: "bleep", + }, + { + kind: TFile, + name: "aa", + value: "bleep", + }, + { + kind: TFile, + name: "aaa", + value: "bleep", + }, + { + kind: TDirEnd, + }, + }) +} diff --git a/files/multipartfile.go b/files/multipartfile.go new file mode 100644 index 000000000..27653982c --- /dev/null +++ b/files/multipartfile.go @@ -0,0 +1,232 @@ +package files + +import ( + "io" + "mime" + "mime/multipart" + "net/url" + "path" + "strings" +) + +const ( + multipartFormdataType = "multipart/form-data" + multipartMixedType = "multipart/mixed" + + applicationDirectory = "application/x-directory" + applicationSymlink = "application/symlink" + applicationFile = "application/octet-stream" + + contentTypeHeader = "Content-Type" +) + +type multipartDirectory struct { + path string + walker *multipartWalker + + // part is the part describing the directory. It's nil when implicit. + part *multipart.Part +} + +type multipartWalker struct { + part *multipart.Part + reader *multipart.Reader +} + +func (m *multipartWalker) consumePart() { + m.part = nil +} + +func (m *multipartWalker) getPart() (*multipart.Part, error) { + if m.part != nil { + return m.part, nil + } + if m.reader == nil { + return nil, io.EOF + } + + var err error + m.part, err = m.reader.NextPart() + if err == io.EOF { + m.reader = nil + } + return m.part, err +} + +// NewFileFromPartReader creates a Directory from a multipart reader. +func NewFileFromPartReader(reader *multipart.Reader, mediatype string) (Directory, error) { + switch mediatype { + case applicationDirectory, multipartFormdataType: + default: + return nil, ErrNotDirectory + } + + return &multipartDirectory{ + path: "/", + walker: &multipartWalker{ + reader: reader, + }, + }, nil +} + +func (w *multipartWalker) nextFile() (Node, error) { + part, err := w.getPart() + if err != nil { + return nil, err + } + w.consumePart() + + contentType := part.Header.Get(contentTypeHeader) + if contentType != "" { + var err error + contentType, _, err = mime.ParseMediaType(contentType) + if err != nil { + return nil, err + } + } + + switch contentType { + case multipartFormdataType, applicationDirectory: + return &multipartDirectory{ + part: part, + path: fileName(part), + walker: w, + }, nil + case applicationSymlink: + out, err := io.ReadAll(part) + if err != nil { + return nil, err + } + + return NewLinkFile(string(out), nil), nil + default: + return &ReaderFile{ + reader: part, + abspath: part.Header.Get("abspath"), + }, nil + } +} + +// fileName returns a normalized filename from a part. +func fileName(part *multipart.Part) string { + v := part.Header.Get("Content-Disposition") + _, params, err := mime.ParseMediaType(v) + if err != nil { + return "" + } + filename := params["filename"] + if escaped, err := url.QueryUnescape(filename); err == nil { + filename = escaped + } // if there is a unescape error, just treat the name as unescaped + + return path.Clean("/" + filename) +} + +// dirName appends a slash to the end of the filename, if not present. +// expects a _cleaned_ path. +func dirName(filename string) string { + if !strings.HasSuffix(filename, "/") { + filename += "/" + } + return filename +} + +// isChild checks if child is a child of parent directory. +// expects a _cleaned_ path. +func isChild(child, parent string) bool { + return strings.HasPrefix(child, dirName(parent)) +} + +// makeRelative makes the child path relative to the parent path. +// expects a _cleaned_ path. +func makeRelative(child, parent string) string { + return strings.TrimPrefix(child, dirName(parent)) +} + +type multipartIterator struct { + f *multipartDirectory + + curFile Node + curName string + err error +} + +func (it *multipartIterator) Name() string { + return it.curName +} + +func (it *multipartIterator) Node() Node { + return it.curFile +} + +func (it *multipartIterator) Next() bool { + if it.f.walker.reader == nil || it.err != nil { + return false + } + var part *multipart.Part + for { + part, it.err = it.f.walker.getPart() + if it.err != nil { + return false + } + + name := fileName(part) + + // Is the file in a different directory? + if !isChild(name, it.f.path) { + return false + } + + // Have we already entered this directory? + if it.curName != "" && isChild(name, path.Join(it.f.path, it.curName)) { + it.f.walker.consumePart() + continue + } + + // Make the path relative to the current directory. + name = makeRelative(name, it.f.path) + + // Check if we need to create a fake directory (more than one + // path component). + if idx := strings.IndexByte(name, '/'); idx >= 0 { + it.curName = name[:idx] + it.curFile = &multipartDirectory{ + path: path.Join(it.f.path, it.curName), + walker: it.f.walker, + } + return true + } + it.curName = name + + // Finally, advance to the next file. + it.curFile, it.err = it.f.walker.nextFile() + + return it.err == nil + } +} + +func (it *multipartIterator) Err() error { + // We use EOF to signal that this iterator is done. That way, we don't + // need to check every time `Next` is called. + if it.err == io.EOF { + return nil + } + return it.err +} + +func (f *multipartDirectory) Entries() DirIterator { + return &multipartIterator{f: f} +} + +func (f *multipartDirectory) Close() error { + if f.part != nil { + return f.part.Close() + } + return nil +} + +func (f *multipartDirectory) Size() (int64, error) { + return 0, ErrNotSupported +} + +var _ Directory = &multipartDirectory{} diff --git a/files/readerfile.go b/files/readerfile.go new file mode 100644 index 000000000..a03dae23f --- /dev/null +++ b/files/readerfile.go @@ -0,0 +1,81 @@ +package files + +import ( + "bytes" + "io" + "os" + "path/filepath" +) + +// ReaderFile is a implementation of File created from an `io.Reader`. +// ReaderFiles are never directories, and can be read from and closed. +type ReaderFile struct { + abspath string + reader io.ReadCloser + stat os.FileInfo + + fsize int64 +} + +func NewBytesFile(b []byte) File { + return &ReaderFile{"", NewReaderFile(bytes.NewReader(b)), nil, int64(len(b))} +} + +func NewReaderFile(reader io.Reader) File { + return NewReaderStatFile(reader, nil) +} + +func NewReaderStatFile(reader io.Reader, stat os.FileInfo) File { + rc, ok := reader.(io.ReadCloser) + if !ok { + rc = io.NopCloser(reader) + } + + return &ReaderFile{"", rc, stat, -1} +} + +func NewReaderPathFile(path string, reader io.ReadCloser, stat os.FileInfo) (*ReaderFile, error) { + abspath, err := filepath.Abs(path) + if err != nil { + return nil, err + } + + return &ReaderFile{abspath, reader, stat, -1}, nil +} + +func (f *ReaderFile) AbsPath() string { + return f.abspath +} + +func (f *ReaderFile) Read(p []byte) (int, error) { + return f.reader.Read(p) +} + +func (f *ReaderFile) Close() error { + return f.reader.Close() +} + +func (f *ReaderFile) Stat() os.FileInfo { + return f.stat +} + +func (f *ReaderFile) Size() (int64, error) { + if f.stat == nil { + if f.fsize >= 0 { + return f.fsize, nil + } + return 0, ErrNotSupported + } + return f.stat.Size(), nil +} + +func (f *ReaderFile) Seek(offset int64, whence int) (int64, error) { + if s, ok := f.reader.(io.Seeker); ok { + return s.Seek(offset, whence) + } + + return 0, ErrNotSupported +} + +var _ File = &ReaderFile{} +var _ FileInfo = &ReaderFile{} diff --git a/files/serialfile.go b/files/serialfile.go new file mode 100644 index 000000000..176038cde --- /dev/null +++ b/files/serialfile.go @@ -0,0 +1,168 @@ +package files + +import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" +) + +// serialFile implements Node, and reads from a path on the OS filesystem. +// No more than one file will be opened at a time. +type serialFile struct { + path string + files []os.FileInfo + stat os.FileInfo + filter *Filter +} + +type serialIterator struct { + files []os.FileInfo + path string + filter *Filter + + curName string + curFile Node + + err error +} + +// NewSerialFile takes a filepath, a bool specifying if hidden files should be included, +// and a fileInfo and returns a Node representing file, directory or special file. +func NewSerialFile(path string, includeHidden bool, stat os.FileInfo) (Node, error) { + filter, err := NewFilter("", nil, includeHidden) + if err != nil { + return nil, err + } + return NewSerialFileWithFilter(path, filter, stat) +} + +// NewSerialFileWith takes a filepath, a filter for determining which files should be +// operated upon if the filepath is a directory, and a fileInfo and returns a +// Node representing file, directory or special file. +func NewSerialFileWithFilter(path string, filter *Filter, stat os.FileInfo) (Node, error) { + switch mode := stat.Mode(); { + case mode.IsRegular(): + file, err := os.Open(path) + if err != nil { + return nil, err + } + return NewReaderPathFile(path, file, stat) + case mode.IsDir(): + // for directories, stat all of the contents first, so we know what files to + // open when Entries() is called + entries, err := os.ReadDir(path) + if err != nil { + return nil, err + } + contents := make([]fs.FileInfo, 0, len(entries)) + for _, entry := range entries { + content, err := entry.Info() + if err != nil { + return nil, err + } + contents = append(contents, content) + } + return &serialFile{path, contents, stat, filter}, nil + case mode&os.ModeSymlink != 0: + target, err := os.Readlink(path) + if err != nil { + return nil, err + } + return NewLinkFile(target, stat), nil + default: + return nil, fmt.Errorf("unrecognized file type for %s: %s", path, mode.String()) + } +} + +func (it *serialIterator) Name() string { + return it.curName +} + +func (it *serialIterator) Node() Node { + return it.curFile +} + +func (it *serialIterator) Next() bool { + // if there aren't any files left in the root directory, we're done + if len(it.files) == 0 { + return false + } + + stat := it.files[0] + it.files = it.files[1:] + for it.filter.ShouldExclude(stat) { + if len(it.files) == 0 { + return false + } + + stat = it.files[0] + it.files = it.files[1:] + } + + // open the next file + filePath := filepath.ToSlash(filepath.Join(it.path, stat.Name())) + + // recursively call the constructor on the next file + // if it's a regular file, we will open it as a ReaderFile + // if it's a directory, files in it will be opened serially + sf, err := NewSerialFileWithFilter(filePath, it.filter, stat) + if err != nil { + it.err = err + return false + } + + it.curName = stat.Name() + it.curFile = sf + return true +} + +func (it *serialIterator) Err() error { + return it.err +} + +func (f *serialFile) Entries() DirIterator { + return &serialIterator{ + path: f.path, + files: f.files, + filter: f.filter, + } +} + +func (f *serialFile) Close() error { + return nil +} + +func (f *serialFile) Stat() os.FileInfo { + return f.stat +} + +func (f *serialFile) Size() (int64, error) { + if !f.stat.IsDir() { + //something went terribly, terribly wrong + return 0, errors.New("serialFile is not a directory") + } + + var du int64 + err := filepath.Walk(f.path, func(p string, fi os.FileInfo, err error) error { + if err != nil || fi == nil { + return err + } + + if f.filter.ShouldExclude(fi) { + if fi.Mode().IsDir() { + return filepath.SkipDir + } + } else if fi.Mode().IsRegular() { + du += fi.Size() + } + + return nil + }) + + return du, err +} + +var _ Directory = &serialFile{} +var _ DirIterator = &serialIterator{} diff --git a/files/serialfile_test.go b/files/serialfile_test.go new file mode 100644 index 000000000..80c252a7e --- /dev/null +++ b/files/serialfile_test.go @@ -0,0 +1,194 @@ +package files + +import ( + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" + "testing" +) + +func isFullPathHidden(p string) bool { + return strings.HasPrefix(p, ".") || strings.Contains(p, "/.") +} + +func TestSerialFile(t *testing.T) { + t.Run("Hidden/NoFilter", func(t *testing.T) { testSerialFile(t, true, false) }) + t.Run("Hidden/Filter", func(t *testing.T) { testSerialFile(t, true, true) }) + t.Run("NotHidden/NoFilter", func(t *testing.T) { testSerialFile(t, false, false) }) + t.Run("NotHidden/Filter", func(t *testing.T) { testSerialFile(t, false, true) }) +} + +func testSerialFile(t *testing.T, hidden, withIgnoreRules bool) { + tmppath, err := os.MkdirTemp("", "files-test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmppath) + + testInputs := map[string]string{ + "1": "Some text!\n", + "2": "beep", + "3": "", + "4": "boop", + "5": "", + filepath.FromSlash("5/a"): "foobar", + ".6": "thing", + "7": "", + filepath.FromSlash("7/.foo"): "bla", + ".8": "", + filepath.FromSlash(".8/foo"): "bla", + } + fileFilter, err := NewFilter("", []string{"9", "10"}, hidden) + if err != nil { + t.Fatal(err) + } + if withIgnoreRules { + testInputs["9"] = "" + testInputs[filepath.FromSlash("9/b")] = "bebop" + testInputs["10"] = "" + testInputs[filepath.FromSlash("10/.c")] = "doowop" + } + + for p, c := range testInputs { + path := filepath.Join(tmppath, p) + if c != "" { + continue + } + if err := os.MkdirAll(path, 0777); err != nil { + t.Fatal(err) + } + } + + for p, c := range testInputs { + path := filepath.Join(tmppath, p) + if c == "" { + continue + } + if err := os.WriteFile(path, []byte(c), 0666); err != nil { + t.Fatal(err) + } + } + expectedPaths := make([]string, 0, 4) + expectedSize := int64(0) + +testInputs: + for p := range testInputs { + components := strings.Split(p, string(filepath.Separator)) + var stat os.FileInfo + for i := range components { + stat, err = os.Stat(filepath.Join( + append([]string{tmppath}, components[:i+1]...)..., + )) + if err != nil { + t.Fatal(err) + } + if fileFilter.ShouldExclude(stat) { + continue testInputs + } + } + expectedPaths = append(expectedPaths, p) + if stat.Mode().IsRegular() { + expectedSize += stat.Size() + } + } + + sort.Strings(expectedPaths) + + stat, err := os.Stat(tmppath) + if err != nil { + t.Fatal(err) + } + + sf, err := NewSerialFile(tmppath, hidden, stat) + if withIgnoreRules { + sf, err = NewSerialFileWithFilter(tmppath, fileFilter, stat) + } + if err != nil { + t.Fatal(err) + } + defer sf.Close() + + if size, err := sf.Size(); err != nil { + t.Fatalf("failed to determine size: %s", err) + } else if size != expectedSize { + t.Fatalf("expected size %d, got size %d", expectedSize, size) + } + + rootFound := false + actualPaths := make([]string, 0, len(expectedPaths)) + err = Walk(sf, func(path string, nd Node) error { + defer nd.Close() + + // root node. + if path == "" { + if rootFound { + return fmt.Errorf("found root twice") + } + if sf != nd { + return fmt.Errorf("wrong root") + } + rootFound = true + return nil + } + actualPaths = append(actualPaths, path) + if !hidden && isFullPathHidden(path) { + return fmt.Errorf("found a hidden file") + } + components := filepath.SplitList(path) + for i := range components { + if fileFilter.Rules.MatchesPath(filepath.Join(components[:i+1]...)) { + return fmt.Errorf("found a file that should be excluded") + } + } + + data, ok := testInputs[path] + if !ok { + return fmt.Errorf("expected something at %q", path) + } + delete(testInputs, path) + + switch nd := nd.(type) { + case *Symlink: + return fmt.Errorf("didn't expect a symlink") + case Directory: + if data != "" { + return fmt.Errorf("expected a directory at %q", path) + } + case File: + actual, err := io.ReadAll(nd) + if err != nil { + return err + } + if string(actual) != data { + return fmt.Errorf("expected %q, got %q", data, string(actual)) + } + } + return nil + }) + if err != nil { + t.Fatal(err) + } + if !rootFound { + t.Fatal("didn't find the root") + } + + if len(expectedPaths) != len(actualPaths) { + t.Fatalf("expected %d paths, found %d", + len(expectedPaths), + len(actualPaths), + ) + } + + for i := range expectedPaths { + if expectedPaths[i] != actualPaths[i] { + t.Errorf( + "expected path %q does not match actual %q", + expectedPaths[i], + actualPaths[i], + ) + } + } +} diff --git a/files/slicedirectory.go b/files/slicedirectory.go new file mode 100644 index 000000000..d11656261 --- /dev/null +++ b/files/slicedirectory.go @@ -0,0 +1,97 @@ +package files + +import "sort" + +type fileEntry struct { + name string + file Node +} + +func (e fileEntry) Name() string { + return e.name +} + +func (e fileEntry) Node() Node { + return e.file +} + +func FileEntry(name string, file Node) DirEntry { + return fileEntry{ + name: name, + file: file, + } +} + +type sliceIterator struct { + files []DirEntry + n int +} + +func (it *sliceIterator) Name() string { + return it.files[it.n].Name() +} + +func (it *sliceIterator) Node() Node { + return it.files[it.n].Node() +} + +func (it *sliceIterator) Next() bool { + it.n++ + return it.n < len(it.files) +} + +func (it *sliceIterator) Err() error { + return nil +} + +// SliceFile implements Node, and provides simple directory handling. +// It contains children files, and is created from a `[]Node`. +// SliceFiles are always directories, and can't be read from or closed. +type SliceFile struct { + files []DirEntry +} + +func NewMapDirectory(f map[string]Node) Directory { + ents := make([]DirEntry, 0, len(f)) + for name, nd := range f { + ents = append(ents, FileEntry(name, nd)) + } + sort.Slice(ents, func(i, j int) bool { + return ents[i].Name() < ents[j].Name() + }) + + return NewSliceDirectory(ents) +} + +func NewSliceDirectory(files []DirEntry) Directory { + return &SliceFile{files} +} + +func (f *SliceFile) Entries() DirIterator { + return &sliceIterator{files: f.files, n: -1} +} + +func (f *SliceFile) Close() error { + return nil +} + +func (f *SliceFile) Length() int { + return len(f.files) +} + +func (f *SliceFile) Size() (int64, error) { + var size int64 + + for _, file := range f.files { + s, err := file.Node().Size() + if err != nil { + return 0, err + } + size += s + } + + return size, nil +} + +var _ Directory = &SliceFile{} +var _ DirEntry = fileEntry{} diff --git a/files/tarwriter.go b/files/tarwriter.go new file mode 100644 index 000000000..cecbcae42 --- /dev/null +++ b/files/tarwriter.go @@ -0,0 +1,137 @@ +package files + +import ( + "archive/tar" + "errors" + "fmt" + "io" + "path" + "strings" + "time" +) + +var ( + ErrUnixFSPathOutsideRoot = errors.New("relative UnixFS paths outside the root are now allowed, use CAR instead") +) + +type TarWriter struct { + TarW *tar.Writer + baseDirSet bool + baseDir string +} + +// NewTarWriter wraps given io.Writer into a new tar writer +func NewTarWriter(w io.Writer) (*TarWriter, error) { + return &TarWriter{ + TarW: tar.NewWriter(w), + }, nil +} + +func (w *TarWriter) writeDir(f Directory, fpath string) error { + if err := writeDirHeader(w.TarW, fpath); err != nil { + return err + } + + it := f.Entries() + for it.Next() { + if err := w.WriteFile(it.Node(), path.Join(fpath, it.Name())); err != nil { + return err + } + } + return it.Err() +} + +func (w *TarWriter) writeFile(f File, fpath string) error { + size, err := f.Size() + if err != nil { + return err + } + + if err := writeFileHeader(w.TarW, fpath, uint64(size)); err != nil { + return err + } + + if _, err := io.Copy(w.TarW, f); err != nil { + return err + } + w.TarW.Flush() + return nil +} + +func validateTarFilePath(baseDir, fpath string) bool { + // Ensure the filepath has no ".", "..", etc within the known root directory. + fpath = path.Clean(fpath) + + // If we have a non-empty baseDir, check if the filepath starts with baseDir. + // If not, we can exclude it immediately. For 'ipfs get' and for the gateway, + // the baseDir would be '{cid}.tar'. + if baseDir != "" && !strings.HasPrefix(path.Clean(fpath), baseDir) { + return false + } + + // Otherwise, check if the path starts with '..' which would make it fall + // outside the root path. This works since the path has already been cleaned. + if strings.HasPrefix(fpath, "..") { + return false + } + + return true +} + +// WriteNode adds a node to the archive. +func (w *TarWriter) WriteFile(nd Node, fpath string) error { + if !w.baseDirSet { + w.baseDirSet = true // Use a variable for this as baseDir may be an empty string. + w.baseDir = fpath + } + + if !validateTarFilePath(w.baseDir, fpath) { + return ErrUnixFSPathOutsideRoot + } + + switch nd := nd.(type) { + case *Symlink: + return writeSymlinkHeader(w.TarW, nd.Target, fpath) + case File: + return w.writeFile(nd, fpath) + case Directory: + return w.writeDir(nd, fpath) + default: + return fmt.Errorf("file type %T is not supported", nd) + } +} + +// Close closes the tar writer. +func (w *TarWriter) Close() error { + return w.TarW.Close() +} + +func writeDirHeader(w *tar.Writer, fpath string) error { + return w.WriteHeader(&tar.Header{ + Name: fpath, + Typeflag: tar.TypeDir, + Mode: 0777, + ModTime: time.Now().Truncate(time.Second), + // TODO: set mode, dates, etc. when added to unixFS + }) +} + +func writeFileHeader(w *tar.Writer, fpath string, size uint64) error { + return w.WriteHeader(&tar.Header{ + Name: fpath, + Size: int64(size), + Typeflag: tar.TypeReg, + Mode: 0644, + ModTime: time.Now().Truncate(time.Second), + // TODO: set mode, dates, etc. when added to unixFS + }) +} + +func writeSymlinkHeader(w *tar.Writer, target, fpath string) error { + return w.WriteHeader(&tar.Header{ + Name: fpath, + Linkname: target, + Mode: 0777, + Typeflag: tar.TypeSymlink, + }) +} diff --git a/files/tarwriter_test.go b/files/tarwriter_test.go new file mode 100644 index 000000000..0e1488e7f --- /dev/null +++ b/files/tarwriter_test.go @@ -0,0 +1,149 @@ +package files + +import ( + "archive/tar" + "errors" + "io" + "testing" + "time" +) + +func TestTarWriter(t *testing.T) { + tf := NewMapDirectory(map[string]Node{ + "file.txt": NewBytesFile([]byte(text)), + "boop": NewMapDirectory(map[string]Node{ + "a.txt": NewBytesFile([]byte("bleep")), + "b.txt": NewBytesFile([]byte("bloop")), + }), + "beep.txt": NewBytesFile([]byte("beep")), + }) + + pr, pw := io.Pipe() + tw, err := NewTarWriter(pw) + if err != nil { + t.Fatal(err) + } + tr := tar.NewReader(pr) + + go func() { + defer tw.Close() + if err := tw.WriteFile(tf, ""); err != nil { + t.Error(err) + } + }() + + var cur *tar.Header + + checkHeader := func(name string, typ byte, size int64) { + if cur.Name != name { + t.Errorf("got wrong name: %s != %s", cur.Name, name) + } + if cur.Typeflag != typ { + t.Errorf("got wrong type: %d != %d", cur.Typeflag, typ) + } + if cur.Size != size { + t.Errorf("got wrong size: %d != %d", cur.Size, size) + } + now := time.Now() + if cur.ModTime.After(now) { + t.Errorf("wrote timestamp in the future: %s (now) < %s", now, cur.ModTime) + } + } + + if cur, err = tr.Next(); err != nil { + t.Fatal(err) + } + checkHeader("", tar.TypeDir, 0) + + if cur, err = tr.Next(); err != nil { + t.Fatal(err) + } + checkHeader("beep.txt", tar.TypeReg, 4) + + if cur, err = tr.Next(); err != nil { + t.Fatal(err) + } + checkHeader("boop", tar.TypeDir, 0) + + if cur, err = tr.Next(); err != nil { + t.Fatal(err) + } + checkHeader("boop/a.txt", tar.TypeReg, 5) + + if cur, err = tr.Next(); err != nil { + t.Fatal(err) + } + checkHeader("boop/b.txt", tar.TypeReg, 5) + + if cur, err = tr.Next(); err != nil { + t.Fatal(err) + } + checkHeader("file.txt", tar.TypeReg, 13) + + if cur, err = tr.Next(); err != io.EOF { + t.Fatal(err) + } +} + +func TestTarWriterRelativePathInsideRoot(t *testing.T) { + tf := NewMapDirectory(map[string]Node{ + "file.txt": NewBytesFile([]byte(text)), + "boop": NewMapDirectory(map[string]Node{ + "../a.txt": NewBytesFile([]byte("bleep")), + "b.txt": NewBytesFile([]byte("bloop")), + }), + "beep.txt": NewBytesFile([]byte("beep")), + }) + + tw, err := NewTarWriter(io.Discard) + if err != nil { + t.Fatal(err) + } + + defer tw.Close() + if err := tw.WriteFile(tf, ""); err != nil { + t.Error(err) + } +} + +func TestTarWriterFailsFileOutsideRoot(t *testing.T) { + tf := NewMapDirectory(map[string]Node{ + "file.txt": NewBytesFile([]byte(text)), + "boop": NewMapDirectory(map[string]Node{ + "../../a.txt": NewBytesFile([]byte("bleep")), + "b.txt": NewBytesFile([]byte("bloop")), + }), + "beep.txt": NewBytesFile([]byte("beep")), + }) + + tw, err := NewTarWriter(io.Discard) + if err != nil { + t.Fatal(err) + } + + defer tw.Close() + if err := tw.WriteFile(tf, ""); !errors.Is(err, ErrUnixFSPathOutsideRoot) { + t.Errorf("unexpected error, wanted: %v; got: %v", ErrUnixFSPathOutsideRoot, err) + } +} + +func TestTarWriterFailsFileOutsideRootWithBaseDir(t *testing.T) { + tf := NewMapDirectory(map[string]Node{ + "../file.txt": NewBytesFile([]byte(text)), + "boop": NewMapDirectory(map[string]Node{ + "a.txt": NewBytesFile([]byte("bleep")), + "b.txt": NewBytesFile([]byte("bloop")), + }), + "beep.txt": NewBytesFile([]byte("beep")), + }) + + tw, err := NewTarWriter(io.Discard) + if err != nil { + t.Fatal(err) + } + + defer tw.Close() + if err := tw.WriteFile(tf, "test.tar"); !errors.Is(err, ErrUnixFSPathOutsideRoot) { + t.Errorf("unexpected error, wanted: %v; got: %v", ErrUnixFSPathOutsideRoot, err) + } +} diff --git a/files/util.go b/files/util.go new file mode 100644 index 000000000..e727e7ae6 --- /dev/null +++ b/files/util.go @@ -0,0 +1,25 @@ +package files + +// ToFile is an alias for n.(File). If the file isn't a regular file, nil value +// will be returned +func ToFile(n Node) File { + f, _ := n.(File) + return f +} + +// ToDir is an alias for n.(Directory). If the file isn't directory, a nil value +// will be returned +func ToDir(n Node) Directory { + d, _ := n.(Directory) + return d +} + +// FileFromEntry calls ToFile on Node in the given entry +func FileFromEntry(e DirEntry) File { + return ToFile(e.Node()) +} + +// DirFromEntry calls ToDir on Node in the given entry +func DirFromEntry(e DirEntry) Directory { + return ToDir(e.Node()) +} diff --git a/files/walk.go b/files/walk.go new file mode 100644 index 000000000..f23e7e47f --- /dev/null +++ b/files/walk.go @@ -0,0 +1,27 @@ +package files + +import ( + "path/filepath" +) + +// Walk walks a file tree, like `os.Walk`. +func Walk(nd Node, cb func(fpath string, nd Node) error) error { + var helper func(string, Node) error + helper = func(path string, nd Node) error { + if err := cb(path, nd); err != nil { + return err + } + dir, ok := nd.(Directory) + if !ok { + return nil + } + iter := dir.Entries() + for iter.Next() { + if err := helper(filepath.Join(path, iter.Name()), iter.Node()); err != nil { + return err + } + } + return iter.Err() + } + return helper("", nd) +} diff --git a/files/webfile.go b/files/webfile.go new file mode 100644 index 000000000..594b81c82 --- /dev/null +++ b/files/webfile.go @@ -0,0 +1,89 @@ +package files + +import ( + "errors" + "fmt" + "io" + "net/http" + "net/url" + "os" +) + +// WebFile is an implementation of File which reads it +// from a Web URL (http). A GET request will be performed +// against the source when calling Read(). +type WebFile struct { + body io.ReadCloser + url *url.URL + contentLength int64 +} + +// NewWebFile creates a WebFile with the given URL, which +// will be used to perform the GET request on Read(). +func NewWebFile(url *url.URL) *WebFile { + return &WebFile{ + url: url, + } +} + +func (wf *WebFile) start() error { + if wf.body == nil { + s := wf.url.String() + resp, err := http.Get(s) + if err != nil { + return err + } + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return fmt.Errorf("got non-2XX status code %d: %s", resp.StatusCode, s) + } + wf.body = resp.Body + wf.contentLength = resp.ContentLength + } + return nil +} + +// Read reads the File from it's web location. On the first +// call to Read, a GET request will be performed against the +// WebFile's URL, using Go's default HTTP client. Any further +// reads will keep reading from the HTTP Request body. +func (wf *WebFile) Read(b []byte) (int, error) { + if err := wf.start(); err != nil { + return 0, err + } + return wf.body.Read(b) +} + +// Close closes the WebFile (or the request body). +func (wf *WebFile) Close() error { + if wf.body == nil { + return nil + } + return wf.body.Close() +} + +// TODO: implement +func (wf *WebFile) Seek(offset int64, whence int) (int64, error) { + return 0, ErrNotSupported +} + +func (wf *WebFile) Size() (int64, error) { + if err := wf.start(); err != nil { + return 0, err + } + if wf.contentLength < 0 { + return -1, errors.New("Content-Length hearer was not set") + } + + return wf.contentLength, nil +} + +func (wf *WebFile) AbsPath() string { + return wf.url.String() +} + +func (wf *WebFile) Stat() os.FileInfo { + return nil +} + +var _ File = &WebFile{} +var _ FileInfo = &WebFile{} diff --git a/files/webfile_test.go b/files/webfile_test.go new file mode 100644 index 000000000..94cddb5d2 --- /dev/null +++ b/files/webfile_test.go @@ -0,0 +1,97 @@ +package files + +import ( + "fmt" + "io" + "net/http" + "net/http/httptest" + "net/url" + "testing" +) + +func TestWebFile(t *testing.T) { + const content = "Hello world!" + s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, content) + })) + defer s.Close() + + u, err := url.Parse(s.URL) + if err != nil { + t.Fatal(err) + } + wf := NewWebFile(u) + body, err := io.ReadAll(wf) + if err != nil { + t.Fatal(err) + } + if string(body) != content { + t.Fatalf("expected %q but got %q", content, string(body)) + } +} + +func TestWebFile_notFound(t *testing.T) { + s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "File not found.", http.StatusNotFound) + })) + defer s.Close() + + u, err := url.Parse(s.URL) + if err != nil { + t.Fatal(err) + } + wf := NewWebFile(u) + _, err = io.ReadAll(wf) + if err == nil { + t.Fatal("expected error") + } +} + +func TestWebFileSize(t *testing.T) { + body := "Hello world!" + s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, body) + })) + defer s.Close() + + u, err := url.Parse(s.URL) + if err != nil { + t.Fatal(err) + } + + // Read size before reading file. + + wf1 := NewWebFile(u) + if size, err := wf1.Size(); err != nil { + t.Error(err) + } else if int(size) != len(body) { + t.Errorf("expected size to be %d, got %d", len(body), size) + } + + actual, err := io.ReadAll(wf1) + if err != nil { + t.Fatal(err) + } + if string(actual) != body { + t.Fatal("should have read the web file") + } + + wf1.Close() + + // Read size after reading file. + + wf2 := NewWebFile(u) + actual, err = io.ReadAll(wf2) + if err != nil { + t.Fatal(err) + } + if string(actual) != body { + t.Fatal("should have read the web file") + } + + if size, err := wf2.Size(); err != nil { + t.Error(err) + } else if int(size) != len(body) { + t.Errorf("expected size to be %d, got %d", len(body), size) + } +} diff --git a/go.mod b/go.mod index a04266b34..7f40a640f 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.19 require ( github.com/benbjohnson/clock v1.3.0 + github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 github.com/gorilla/mux v1.8.0 github.com/ipfs/go-cid v0.3.2 github.com/ipfs/go-ipns v0.3.0 @@ -16,6 +17,7 @@ require ( github.com/samber/lo v1.36.0 github.com/stretchr/testify v1.8.1 go.opencensus.io v0.23.0 + golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab ) require ( @@ -46,7 +48,6 @@ require ( go.uber.org/zap v1.23.0 // indirect golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e // indirect golang.org/x/exp v0.0.0-20220916125017-b168a2c6b86b // indirect - golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab // indirect gopkg.in/yaml.v3 v3.0.1 // indirect lukechampine.com/blake3 v1.1.7 // indirect ) diff --git a/go.sum b/go.sum index d21fda2df..ab57d0a12 100644 --- a/go.sum +++ b/go.sum @@ -6,6 +6,8 @@ github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZx github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 h1:HVTnpeuvF6Owjd5mniCL8DEXo7uYXdQEmOP4FJbV5tg= +github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3/go.mod h1:p1d6YEZWvFzEh4KLyvBcVSnrfNDDvK2zfK/4x2v/4pE= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=