diff --git a/cmd/car/extract.go b/cmd/car/extract.go
index a7bdff4..91c0c03 100644
--- a/cmd/car/extract.go
+++ b/cmd/car/extract.go
@@ -2,26 +2,18 @@ package main
import (
"context"
- "errors"
"fmt"
"io"
"os"
- "path"
- "path/filepath"
"runtime"
"strings"
"sync"
"github.com/ipfs/go-cid"
- "github.com/ipfs/go-unixfsnode"
- "github.com/ipfs/go-unixfsnode/data"
- "github.com/ipfs/go-unixfsnode/file"
+ "github.com/ipld/go-car/cmd/car/lib"
"github.com/ipld/go-car/v2"
carstorage "github.com/ipld/go-car/v2/storage"
- dagpb "github.com/ipld/go-codec-dagpb"
- "github.com/ipld/go-ipld-prime"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
- basicnode "github.com/ipld/go-ipld-prime/node/basic"
"github.com/ipld/go-ipld-prime/storage"
"github.com/urfave/cli/v2"
)
@@ -86,7 +78,7 @@ func ExtractCar(c *cli.Context) error {
var extractedFiles int
for _, root := range roots {
- count, err := extractRoot(c, &ls, root, outputDir, path)
+ count, err := lib.ExtractToDir(c.Context, &ls, root, outputDir, path, c.IsSet("verbose"), c.App.ErrWriter)
if err != nil {
return err
}
@@ -101,260 +93,6 @@ func ExtractCar(c *cli.Context) error {
return nil
}
-func extractRoot(c *cli.Context, ls *ipld.LinkSystem, root cid.Cid, outputDir string, path []string) (int, error) {
- if root.Prefix().Codec == cid.Raw {
- if c.IsSet("verbose") {
- fmt.Fprintf(c.App.ErrWriter, "skipping raw root %s\n", root)
- }
- return 0, nil
- }
-
- pbn, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: root}, dagpb.Type.PBNode)
- if err != nil {
- return 0, err
- }
- pbnode := pbn.(dagpb.PBNode)
-
- ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
- if err != nil {
- return 0, err
- }
-
- var outputResolvedDir string
- if outputDir != "-" {
- outputResolvedDir, err = filepath.EvalSymlinks(outputDir)
- if err != nil {
- return 0, err
- }
- if _, err := os.Stat(outputResolvedDir); os.IsNotExist(err) {
- if err := os.Mkdir(outputResolvedDir, 0755); err != nil {
- return 0, err
- }
- }
- }
-
- count, err := extractDir(c, ls, ufn, outputResolvedDir, "/", path)
- if err != nil {
- if !errors.Is(err, ErrNotDir) {
- return 0, fmt.Errorf("%s: %w", root, err)
- }
-
- // if it's not a directory, it's a file.
- ufsData, err := pbnode.LookupByString("Data")
- if err != nil {
- return 0, err
- }
- ufsBytes, err := ufsData.AsBytes()
- if err != nil {
- return 0, err
- }
- ufsNode, err := data.DecodeUnixFSData(ufsBytes)
- if err != nil {
- return 0, err
- }
- var outputName string
- if outputDir != "-" {
- outputName = filepath.Join(outputResolvedDir, "unknown")
- }
- if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw {
- if err := extractFile(c, ls, pbnode, outputName); err != nil {
- return 0, err
- }
- }
- return 1, nil
- }
-
- return count, nil
-}
-
-func resolvePath(root, pth string) (string, error) {
- rp, err := filepath.Rel("/", pth)
- if err != nil {
- return "", fmt.Errorf("couldn't check relative-ness of %s: %w", pth, err)
- }
- joined := path.Join(root, rp)
-
- basename := path.Dir(joined)
- final, err := filepath.EvalSymlinks(basename)
- if err != nil {
- return "", fmt.Errorf("couldn't eval symlinks in %s: %w", basename, err)
- }
- if final != path.Clean(basename) {
- return "", fmt.Errorf("path attempts to redirect through symlinks")
- }
- return joined, nil
-}
-
-func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputRoot, outputPath string, matchPath []string) (int, error) {
- if outputRoot != "" {
- dirPath, err := resolvePath(outputRoot, outputPath)
- if err != nil {
- return 0, err
- }
- // make the directory.
- if err := os.MkdirAll(dirPath, 0755); err != nil {
- return 0, err
- }
- }
-
- if n.Kind() != ipld.Kind_Map {
- return 0, ErrNotDir
- }
-
- subPath := matchPath
- if len(matchPath) > 0 {
- subPath = matchPath[1:]
- }
-
- extractElement := func(name string, n ipld.Node) (int, error) {
- var nextRes string
- if outputRoot != "" {
- var err error
- nextRes, err = resolvePath(outputRoot, path.Join(outputPath, name))
- if err != nil {
- return 0, err
- }
- if c.IsSet("verbose") {
- fmt.Fprintf(c.App.ErrWriter, "%s\n", nextRes)
- }
- }
-
- if n.Kind() != ipld.Kind_Link {
- return 0, fmt.Errorf("unexpected map value for %s at %s", name, outputPath)
- }
- // a directory may be represented as a map of name: if unixADL is applied
- vl, err := n.AsLink()
- if err != nil {
- return 0, err
- }
- dest, err := ls.Load(ipld.LinkContext{}, vl, basicnode.Prototype.Any)
- if err != nil {
- if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
- fmt.Fprintf(c.App.ErrWriter, "data for entry not found: %s (skipping...)\n", path.Join(outputPath, name))
- return 0, nil
- }
- return 0, err
- }
- // degenerate files are handled here.
- if dest.Kind() == ipld.Kind_Bytes {
- if err := extractFile(c, ls, dest, nextRes); err != nil {
- return 0, err
- }
- return 1, nil
- }
-
- // dir / pbnode
- pbb := dagpb.Type.PBNode.NewBuilder()
- if err := pbb.AssignNode(dest); err != nil {
- return 0, err
- }
- pbnode := pbb.Build().(dagpb.PBNode)
-
- // interpret dagpb 'data' as unixfs data and look at type.
- ufsData, err := pbnode.LookupByString("Data")
- if err != nil {
- return 0, err
- }
- ufsBytes, err := ufsData.AsBytes()
- if err != nil {
- return 0, err
- }
- ufsNode, err := data.DecodeUnixFSData(ufsBytes)
- if err != nil {
- return 0, err
- }
-
- switch ufsNode.DataType.Int() {
- case data.Data_Directory, data.Data_HAMTShard:
- ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
- if err != nil {
- return 0, err
- }
- return extractDir(c, ls, ufn, outputRoot, path.Join(outputPath, name), subPath)
- case data.Data_File, data.Data_Raw:
- if err := extractFile(c, ls, pbnode, nextRes); err != nil {
- return 0, err
- }
- return 1, nil
- case data.Data_Symlink:
- if nextRes == "" {
- return 0, fmt.Errorf("cannot extract a symlink to stdout")
- }
- data := ufsNode.Data.Must().Bytes()
- if err := os.Symlink(string(data), nextRes); err != nil {
- return 0, err
- }
- return 1, nil
- default:
- return 0, fmt.Errorf("unknown unixfs type: %d", ufsNode.DataType.Int())
- }
- }
-
- // specific path segment
- if len(matchPath) > 0 {
- val, err := n.LookupByString(matchPath[0])
- if err != nil {
- return 0, err
- }
- return extractElement(matchPath[0], val)
- }
-
- if outputPath == "-" && len(matchPath) == 0 {
- return 0, fmt.Errorf("cannot extract a directory to stdout, use a path to extract a specific file")
- }
-
- // everything
- var count int
- var shardSkip int
- mi := n.MapIterator()
- for !mi.Done() {
- key, val, err := mi.Next()
- if err != nil {
- if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
- shardSkip++
- continue
- }
- return 0, err
- }
- ks, err := key.AsString()
- if err != nil {
- return 0, err
- }
- ecount, err := extractElement(ks, val)
- if err != nil {
- return 0, err
- }
- count += ecount
- }
- if shardSkip > 0 {
- fmt.Fprintf(c.App.ErrWriter, "data for entry not found for %d unknown sharded entries (skipped...)\n", shardSkip)
- }
- return count, nil
-}
-
-func extractFile(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputName string) error {
- node, err := file.NewUnixFSFile(c.Context, n, ls)
- if err != nil {
- return err
- }
- nlr, err := node.AsLargeBytes()
- if err != nil {
- return err
- }
- var f *os.File
- if outputName == "" {
- f = os.Stdout
- } else {
- f, err = os.Create(outputName)
- if err != nil {
- return err
- }
- defer f.Close()
- }
- _, err = io.Copy(f, nlr)
- return err
-}
-
// TODO: dedupe this with lassie, probably into go-unixfsnode
func pathSegments(path string) ([]string, error) {
segments := strings.Split(path, "/")
diff --git a/cmd/car/filter.go b/cmd/car/filter.go
index 3c16304..d4c6c1d 100644
--- a/cmd/car/filter.go
+++ b/cmd/car/filter.go
@@ -8,8 +8,7 @@ import (
"strings"
"github.com/ipfs/go-cid"
- carv2 "github.com/ipld/go-car/v2"
- "github.com/ipld/go-car/v2/blockstore"
+ "github.com/ipld/go-car/cmd/car/lib"
"github.com/urfave/cli/v2"
)
@@ -19,16 +18,7 @@ func FilterCar(c *cli.Context) error {
return fmt.Errorf("an output filename must be provided")
}
- fd, err := os.Open(c.Args().First())
- if err != nil {
- return err
- }
- defer fd.Close()
- rd, err := carv2.NewBlockReader(fd)
- if err != nil {
- return err
- }
-
+ var err error
// Get the set of CIDs from stdin.
inStream := os.Stdin
if c.IsSet("cidFile") {
@@ -48,83 +38,7 @@ func FilterCar(c *cli.Context) error {
fmt.Printf("filtering to %d cids\n", len(cidMap))
}
- outRoots := make([]cid.Cid, 0)
- for _, r := range rd.Roots {
- if matchFilter(c, r, cidMap) {
- outRoots = append(outRoots, r)
- }
- }
-
- version := c.Int("version")
- options := []carv2.Option{}
- switch version {
- case 1:
- options = []carv2.Option{blockstore.WriteAsCarV1(true)}
- case 2:
- // already the default
- default:
- return fmt.Errorf("invalid CAR version %d", c.Int("version"))
- }
-
- outPath := c.Args().Get(1)
- if !c.Bool("append") {
- if _, err := os.Stat(outPath); err == nil || !os.IsNotExist(err) {
- // output to an existing file.
- if err := os.Truncate(outPath, 0); err != nil {
- return err
- }
- }
- } else {
- if version != 2 {
- return fmt.Errorf("can only append to version 2 car files")
- }
-
- // roots will need to be whatever is in the output already.
- cv2r, err := carv2.OpenReader(outPath)
- if err != nil {
- return err
- }
- if cv2r.Version != 2 {
- return fmt.Errorf("can only append to version 2 car files")
- }
- outRoots, err = cv2r.Roots()
- if err != nil {
- return err
- }
- _ = cv2r.Close()
- }
-
- if len(outRoots) == 0 {
- fmt.Fprintf(os.Stderr, "warning: no roots defined after filtering\n")
- }
-
- bs, err := blockstore.OpenReadWrite(outPath, outRoots, options...)
- if err != nil {
- return err
- }
-
- for {
- blk, err := rd.Next()
- if err != nil {
- if err == io.EOF {
- break
- }
- return err
- }
- if matchFilter(c, blk.Cid(), cidMap) {
- if err := bs.Put(c.Context, blk); err != nil {
- return err
- }
- }
- }
- return bs.Finalize()
-}
-
-func matchFilter(ctx *cli.Context, c cid.Cid, cidMap map[cid.Cid]struct{}) bool {
- if _, ok := cidMap[c]; ok {
- return !ctx.Bool("inverse")
- }
- return ctx.Bool("inverse")
+ return lib.FilterCar(c.Context, c.Args().First(), c.Args().Get(1), cidMap, c.Bool("invert"), c.Int("version"), c.Bool("append"))
}
func parseCIDS(r io.Reader) (map[cid.Cid]struct{}, error) {
diff --git a/cmd/car/get.go b/cmd/car/get.go
index f5d5b1c..df71f4d 100644
--- a/cmd/car/get.go
+++ b/cmd/car/get.go
@@ -199,7 +199,7 @@ func writeCarV2(ctx context.Context, rootCid cid.Cid, output string, bs *blockst
return outStore.Finalize()
}
-func writeCarV1(rootCid cid.Cid, output string, bs *blockstore.ReadOnly, strict bool, sel datamodel.Node, linkVisitOnlyOnce bool) error {
+func writeCarV1(rootCid cid.Cid, output string, bs *blockstore.ReadOnly, _ bool, sel datamodel.Node, linkVisitOnlyOnce bool) error {
opts := make([]car.Option, 0)
if linkVisitOnlyOnce {
opts = append(opts, car.TraverseLinksOnlyOnce())
diff --git a/cmd/car/inspect.go b/cmd/car/inspect.go
index f320500..512cd8a 100644
--- a/cmd/car/inspect.go
+++ b/cmd/car/inspect.go
@@ -1,15 +1,10 @@
package main
import (
- "bytes"
"fmt"
- "io"
"os"
- "sort"
- "strings"
- carv2 "github.com/ipld/go-car/v2"
- "github.com/multiformats/go-multicodec"
+ "github.com/ipld/go-car/cmd/car/lib"
"github.com/urfave/cli/v2"
)
@@ -23,115 +18,10 @@ func InspectCar(c *cli.Context) (err error) {
}
}
- rd, err := carv2.NewReader(inStream, carv2.ZeroLengthSectionAsEOF(true))
+ rep, err := lib.InspectCar(inStream, c.Bool("full"))
if err != nil {
return err
}
- stats, err := rd.Inspect(c.IsSet("full"))
- if err != nil {
- return err
- }
-
- if stats.Version == 1 && c.IsSet("full") { // check that we've read all the data
- got, err := inStream.Read(make([]byte, 1)) // force EOF
- if err != nil && err != io.EOF {
- return err
- } else if got > 0 {
- return fmt.Errorf("unexpected data after EOF: %d", got)
- }
- }
-
- var v2s string
- if stats.Version == 2 {
- idx := "(none)"
- if stats.IndexCodec != 0 {
- idx = stats.IndexCodec.String()
- }
- var buf bytes.Buffer
- stats.Header.Characteristics.WriteTo(&buf)
- v2s = fmt.Sprintf(`Characteristics: %x
-Data offset: %d
-Data (payload) length: %d
-Index offset: %d
-Index type: %s
-`, buf.Bytes(), stats.Header.DataOffset, stats.Header.DataSize, stats.Header.IndexOffset, idx)
- }
-
- var roots strings.Builder
- switch len(stats.Roots) {
- case 0:
- roots.WriteString(" (none)")
- case 1:
- roots.WriteString(" ")
- roots.WriteString(stats.Roots[0].String())
- default:
- for _, r := range stats.Roots {
- roots.WriteString("\n\t")
- roots.WriteString(r.String())
- }
- }
-
- var codecs strings.Builder
- {
- keys := make([]int, len(stats.CodecCounts))
- i := 0
- for codec := range stats.CodecCounts {
- keys[i] = int(codec)
- i++
- }
- sort.Ints(keys)
- for _, code := range keys {
- codec := multicodec.Code(code)
- codecs.WriteString(fmt.Sprintf("\n\t%s: %d", codec, stats.CodecCounts[codec]))
- }
- }
-
- var hashers strings.Builder
- {
- keys := make([]int, len(stats.MhTypeCounts))
- i := 0
- for codec := range stats.MhTypeCounts {
- keys[i] = int(codec)
- i++
- }
- sort.Ints(keys)
- for _, code := range keys {
- codec := multicodec.Code(code)
- hashers.WriteString(fmt.Sprintf("\n\t%s: %d", codec, stats.MhTypeCounts[codec]))
- }
- }
-
- rp := "No"
- if stats.RootsPresent {
- rp = "Yes"
- }
-
- pfmt := `Version: %d
-%sRoots:%s
-Root blocks present in data: %s
-Block count: %d
-Min / average / max block length (bytes): %d / %d / %d
-Min / average / max CID length (bytes): %d / %d / %d
-Block count per codec:%s
-CID count per multihash:%s
-`
-
- fmt.Printf(
- pfmt,
- stats.Version,
- v2s,
- roots.String(),
- rp,
- stats.BlockCount,
- stats.MinBlockLength,
- stats.AvgBlockLength,
- stats.MaxBlockLength,
- stats.MinCidLength,
- stats.AvgCidLength,
- stats.MaxCidLength,
- codecs.String(),
- hashers.String(),
- )
-
+ fmt.Print(rep.String())
return nil
}
diff --git a/cmd/car/lib/extract.go b/cmd/car/lib/extract.go
new file mode 100644
index 0000000..51ac959
--- /dev/null
+++ b/cmd/car/lib/extract.go
@@ -0,0 +1,301 @@
+package lib
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path"
+ "path/filepath"
+
+ "github.com/ipfs/go-cid"
+ "github.com/ipfs/go-unixfsnode"
+ "github.com/ipfs/go-unixfsnode/data"
+ "github.com/ipfs/go-unixfsnode/file"
+ carstorage "github.com/ipld/go-car/v2/storage"
+ dagpb "github.com/ipld/go-codec-dagpb"
+ "github.com/ipld/go-ipld-prime"
+ cidlink "github.com/ipld/go-ipld-prime/linking/cid"
+ "github.com/ipld/go-ipld-prime/node/basicnode"
+)
+
+var ErrNotDir = fmt.Errorf("not a directory")
+
+func ExtractFromFile(c context.Context, carPath string, outputDir string, logger io.Writer) error {
+ carFile, err := os.Open(carPath)
+ if err != nil {
+ return err
+ }
+ store, err := carstorage.OpenReadable(carFile)
+ if err != nil {
+ return err
+ }
+ roots := store.Roots()
+
+ ls := cidlink.DefaultLinkSystem()
+ ls.TrustedStorage = true
+ ls.SetReadStorage(store)
+
+ for _, root := range roots {
+ _, err = ExtractToDir(c, &ls, root, outputDir, []string{}, false, logger)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func ExtractToDir(c context.Context, ls *ipld.LinkSystem, root cid.Cid, outputDir string, path []string, verbose bool, logger io.Writer) (int, error) {
+ if root.Prefix().Codec == cid.Raw {
+ if verbose {
+ fmt.Fprintf(logger, "skipping raw root %s\n", root)
+ }
+ return 0, nil
+ }
+
+ pbn, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: root}, dagpb.Type.PBNode)
+ if err != nil {
+ return 0, err
+ }
+ pbnode := pbn.(dagpb.PBNode)
+
+ ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
+ if err != nil {
+ return 0, err
+ }
+
+ var outputResolvedDir string
+ if outputDir != "-" {
+ outputResolvedDir, err = filepath.EvalSymlinks(outputDir)
+ if err != nil {
+ return 0, err
+ }
+ if _, err := os.Stat(outputResolvedDir); os.IsNotExist(err) {
+ if err := os.Mkdir(outputResolvedDir, 0755); err != nil {
+ return 0, err
+ }
+ }
+ }
+
+ count, err := extractDir(c, ls, ufn, outputResolvedDir, "/", path, verbose, logger)
+ if err != nil {
+ if !errors.Is(err, ErrNotDir) {
+ return 0, fmt.Errorf("%s: %w", root, err)
+ }
+
+ // if it's not a directory, it's a file.
+ ufsData, err := pbnode.LookupByString("Data")
+ if err != nil {
+ return 0, err
+ }
+ ufsBytes, err := ufsData.AsBytes()
+ if err != nil {
+ return 0, err
+ }
+ ufsNode, err := data.DecodeUnixFSData(ufsBytes)
+ if err != nil {
+ return 0, err
+ }
+ var outputName string
+ if outputDir != "-" {
+ outputName = filepath.Join(outputResolvedDir, "unknown")
+ }
+ if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw {
+ if err := extractFile(c, ls, pbnode, outputName); err != nil {
+ return 0, err
+ }
+ }
+ return 1, nil
+ }
+
+ return count, nil
+}
+
+func resolvePath(root, pth string) (string, error) {
+ rp, err := filepath.Rel("/", pth)
+ if err != nil {
+ return "", fmt.Errorf("couldn't check relative-ness of %s: %w", pth, err)
+ }
+ joined := path.Join(root, rp)
+
+ basename := path.Dir(joined)
+ final, err := filepath.EvalSymlinks(basename)
+ if err != nil {
+ return "", fmt.Errorf("couldn't eval symlinks in %s: %w", basename, err)
+ }
+ if final != path.Clean(basename) {
+ return "", fmt.Errorf("path attempts to redirect through symlinks")
+ }
+ return joined, nil
+}
+
+func extractDir(c context.Context, ls *ipld.LinkSystem, n ipld.Node, outputRoot, outputPath string, matchPath []string, verbose bool, logger io.Writer) (int, error) {
+ if outputRoot != "" {
+ dirPath, err := resolvePath(outputRoot, outputPath)
+ if err != nil {
+ return 0, err
+ }
+ // make the directory.
+ if err := os.MkdirAll(dirPath, 0755); err != nil {
+ return 0, err
+ }
+ }
+
+ if n.Kind() != ipld.Kind_Map {
+ return 0, ErrNotDir
+ }
+
+ subPath := matchPath
+ if len(matchPath) > 0 {
+ subPath = matchPath[1:]
+ }
+
+ extractElement := func(name string, n ipld.Node) (int, error) {
+ var nextRes string
+ if outputRoot != "" {
+ var err error
+ nextRes, err = resolvePath(outputRoot, path.Join(outputPath, name))
+ if err != nil {
+ return 0, err
+ }
+ if verbose {
+ fmt.Fprintf(logger, "%s\n", nextRes)
+ }
+ }
+
+ if n.Kind() != ipld.Kind_Link {
+ return 0, fmt.Errorf("unexpected map value for %s at %s", name, outputPath)
+ }
+ // a directory may be represented as a map of name: if unixADL is applied
+ vl, err := n.AsLink()
+ if err != nil {
+ return 0, err
+ }
+ dest, err := ls.Load(ipld.LinkContext{}, vl, basicnode.Prototype.Any)
+ if err != nil {
+ if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
+ fmt.Fprintf(logger, "data for entry not found: %s (skipping...)\n", path.Join(outputPath, name))
+ return 0, nil
+ }
+ return 0, err
+ }
+ // degenerate files are handled here.
+ if dest.Kind() == ipld.Kind_Bytes {
+ if err := extractFile(c, ls, dest, nextRes); err != nil {
+ return 0, err
+ }
+ return 1, nil
+ }
+
+ // dir / pbnode
+ pbb := dagpb.Type.PBNode.NewBuilder()
+ if err := pbb.AssignNode(dest); err != nil {
+ return 0, err
+ }
+ pbnode := pbb.Build().(dagpb.PBNode)
+
+ // interpret dagpb 'data' as unixfs data and look at type.
+ ufsData, err := pbnode.LookupByString("Data")
+ if err != nil {
+ return 0, err
+ }
+ ufsBytes, err := ufsData.AsBytes()
+ if err != nil {
+ return 0, err
+ }
+ ufsNode, err := data.DecodeUnixFSData(ufsBytes)
+ if err != nil {
+ return 0, err
+ }
+
+ switch ufsNode.DataType.Int() {
+ case data.Data_Directory, data.Data_HAMTShard:
+ ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
+ if err != nil {
+ return 0, err
+ }
+ return extractDir(c, ls, ufn, outputRoot, path.Join(outputPath, name), subPath, verbose, logger)
+ case data.Data_File, data.Data_Raw:
+ if err := extractFile(c, ls, pbnode, nextRes); err != nil {
+ return 0, err
+ }
+ return 1, nil
+ case data.Data_Symlink:
+ if nextRes == "" {
+ return 0, fmt.Errorf("cannot extract a symlink to stdout")
+ }
+ data := ufsNode.Data.Must().Bytes()
+ if err := os.Symlink(string(data), nextRes); err != nil {
+ return 0, err
+ }
+ return 1, nil
+ default:
+ return 0, fmt.Errorf("unknown unixfs type: %d", ufsNode.DataType.Int())
+ }
+ }
+
+ // specific path segment
+ if len(matchPath) > 0 {
+ val, err := n.LookupByString(matchPath[0])
+ if err != nil {
+ return 0, err
+ }
+ return extractElement(matchPath[0], val)
+ }
+
+ if outputPath == "-" && len(matchPath) == 0 {
+ return 0, fmt.Errorf("cannot extract a directory to stdout, use a path to extract a specific file")
+ }
+
+ // everything
+ var count int
+ var shardSkip int
+ mi := n.MapIterator()
+ for !mi.Done() {
+ key, val, err := mi.Next()
+ if err != nil {
+ if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
+ shardSkip++
+ continue
+ }
+ return 0, err
+ }
+ ks, err := key.AsString()
+ if err != nil {
+ return 0, err
+ }
+ ecount, err := extractElement(ks, val)
+ if err != nil {
+ return 0, err
+ }
+ count += ecount
+ }
+ if shardSkip > 0 {
+ fmt.Fprintf(logger, "data for entry not found for %d unknown sharded entries (skipped...)\n", shardSkip)
+ }
+ return count, nil
+}
+
+func extractFile(c context.Context, ls *ipld.LinkSystem, n ipld.Node, outputName string) error {
+ node, err := file.NewUnixFSFile(c, n, ls)
+ if err != nil {
+ return err
+ }
+ nlr, err := node.AsLargeBytes()
+ if err != nil {
+ return err
+ }
+ var f *os.File
+ if outputName == "" {
+ f = os.Stdout
+ } else {
+ f, err = os.Create(outputName)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ }
+ _, err = io.Copy(f, nlr)
+ return err
+}
diff --git a/cmd/car/lib/filter.go b/cmd/car/lib/filter.go
new file mode 100644
index 0000000..e55f154
--- /dev/null
+++ b/cmd/car/lib/filter.go
@@ -0,0 +1,100 @@
+package lib
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/ipfs/go-cid"
+ carv2 "github.com/ipld/go-car/v2"
+ "github.com/ipld/go-car/v2/blockstore"
+)
+
+func FilterCar(ctx context.Context, infile, outfile string, cidMap map[cid.Cid]struct{}, invert bool, outVersion int, appendOutFile bool) error {
+ fd, err := os.Open(infile)
+ if err != nil {
+ return err
+ }
+ defer fd.Close()
+ rd, err := carv2.NewBlockReader(fd)
+ if err != nil {
+ return err
+ }
+
+ outRoots := make([]cid.Cid, 0)
+ for _, r := range rd.Roots {
+ if matchFilter(r, cidMap, invert) {
+ outRoots = append(outRoots, r)
+ }
+ }
+
+ options := []carv2.Option{}
+ switch outVersion {
+ case 1:
+ options = []carv2.Option{blockstore.WriteAsCarV1(true)}
+ case 2:
+ // already the default
+ default:
+ return fmt.Errorf("invalid CAR version %d", outVersion)
+ }
+
+ if !appendOutFile {
+ if _, err := os.Stat(outfile); err == nil || !os.IsNotExist(err) {
+ // output to an existing file.
+ if err := os.Truncate(outfile, 0); err != nil {
+ return err
+ }
+ }
+ } else {
+ if outVersion != 2 {
+ return fmt.Errorf("can only append to version 2 car files")
+ }
+
+ // roots will need to be whatever is in the output already.
+ cv2r, err := carv2.OpenReader(outfile)
+ if err != nil {
+ return err
+ }
+ if cv2r.Version != 2 {
+ return fmt.Errorf("can only append to version 2 car files")
+ }
+ outRoots, err = cv2r.Roots()
+ if err != nil {
+ return err
+ }
+ _ = cv2r.Close()
+ }
+
+ if len(outRoots) == 0 {
+ fmt.Fprintf(os.Stderr, "warning: no roots defined after filtering\n")
+ }
+
+ bs, err := blockstore.OpenReadWrite(outfile, outRoots, options...)
+ if err != nil {
+ return err
+ }
+
+ for {
+ blk, err := rd.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ return err
+ }
+ if matchFilter(blk.Cid(), cidMap, invert) {
+ if err := bs.Put(ctx, blk); err != nil {
+ return err
+ }
+ }
+ }
+ return bs.Finalize()
+}
+
+func matchFilter(c cid.Cid, cidMap map[cid.Cid]struct{}, invert bool) bool {
+ if _, ok := cidMap[c]; ok {
+ return !invert
+ }
+ return invert
+}
diff --git a/cmd/car/lib/inspect.go b/cmd/car/lib/inspect.go
new file mode 100644
index 0000000..e8db011
--- /dev/null
+++ b/cmd/car/lib/inspect.go
@@ -0,0 +1,167 @@
+package lib
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "os"
+ "sort"
+ "strings"
+
+ carv2 "github.com/ipld/go-car/v2"
+ "github.com/multiformats/go-multicodec"
+)
+
+type Stat struct {
+ Min, Mean, Max uint64
+}
+
+func (s Stat) String() string {
+ return fmt.Sprintf("%d / %d / %d", s.Min, s.Mean, s.Max)
+}
+
+type Roots []string
+
+func (r Roots) String() string {
+ var roots strings.Builder
+ switch len(r) {
+ case 0:
+ roots.WriteString(" (none)")
+ case 1:
+ roots.WriteString(" ")
+ roots.WriteString(r[0])
+ default:
+ for _, root := range r {
+ roots.WriteString("\n\t")
+ roots.WriteString(root)
+ }
+ }
+ return roots.String()
+}
+
+type Counts map[multicodec.Code]uint64
+
+func (cs Counts) String() string {
+ var codecs strings.Builder
+ {
+ keys := make([]int, len(cs))
+ i := 0
+ for codec := range cs {
+ keys[i] = int(codec)
+ i++
+ }
+ sort.Ints(keys)
+ for _, code := range keys {
+ codec := multicodec.Code(code)
+ codecs.WriteString(fmt.Sprintf("\n\t%s: %d", codec, cs[codec]))
+ }
+ }
+ return codecs.String()
+}
+
+type Report struct {
+ Characteristics []byte
+ DataOffset uint64
+ DataLength uint64
+ IndexOffset uint64
+ IndexType string
+ Version int
+ Roots Roots
+ RootsPresent bool
+ BlockCount uint64
+ BlkLength Stat
+ CidLength Stat
+ Codecs Counts
+ Hashes Counts
+}
+
+func (r *Report) String() string {
+ var v2s string
+ if r.Version == 2 {
+ v2s = fmt.Sprintf(`Characteristics: %x
+Data offset: %d
+Data (payload) length: %d
+Index offset: %d
+Index type: %s
+`, r.Characteristics, r.DataOffset, r.DataLength, r.IndexOffset, r.IndexType)
+ }
+
+ rp := "No"
+ if r.RootsPresent {
+ rp = "Yes"
+ }
+
+ pfmt := `Version: %d
+%sRoots:%s
+Root blocks present in data: %s
+Block count: %d
+Min / average / max block length (bytes): %s
+Min / average / max CID length (bytes): %s
+Block count per codec:%s
+CID count per multihash:%s
+`
+
+ return fmt.Sprintf(
+ pfmt,
+ r.Version,
+ v2s,
+ r.Roots.String(),
+ rp,
+ r.BlockCount,
+ r.BlkLength.String(),
+ r.CidLength.String(),
+ r.Codecs.String(),
+ r.Hashes.String(),
+ )
+}
+
+func InspectCar(inStream *os.File, verifyHashes bool) (*Report, error) {
+ rd, err := carv2.NewReader(inStream, carv2.ZeroLengthSectionAsEOF(true))
+ if err != nil {
+ return nil, err
+ }
+ stats, err := rd.Inspect(verifyHashes)
+ if err != nil {
+ return nil, err
+ }
+
+ if stats.Version == 1 && verifyHashes { // check that we've read all the data
+ got, err := inStream.Read(make([]byte, 1)) // force EOF
+ if err != nil && err != io.EOF {
+ return nil, err
+ } else if got > 0 {
+ return nil, fmt.Errorf("unexpected data after EOF: %d", got)
+ }
+ }
+
+ rep := Report{
+ Version: int(stats.Version),
+ Roots: []string{},
+ RootsPresent: stats.RootsPresent,
+ BlockCount: stats.BlockCount,
+ BlkLength: Stat{Min: stats.MinBlockLength, Mean: stats.AvgBlockLength, Max: stats.MaxBlockLength},
+ CidLength: Stat{Min: stats.MinCidLength, Mean: stats.AvgCidLength, Max: stats.MaxCidLength},
+ Codecs: stats.CodecCounts,
+ Hashes: stats.MhTypeCounts,
+ }
+
+ for _, c := range stats.Roots {
+ rep.Roots = append(rep.Roots, c.String())
+ }
+
+ if stats.Version == 2 {
+ idx := "(none)"
+ if stats.IndexCodec != 0 {
+ idx = stats.IndexCodec.String()
+ }
+ var buf bytes.Buffer
+ stats.Header.Characteristics.WriteTo(&buf)
+ rep.Characteristics = buf.Bytes()
+ rep.DataOffset = stats.Header.DataOffset
+ rep.DataLength = stats.Header.DataSize
+ rep.IndexOffset = stats.Header.IndexOffset
+ rep.IndexType = idx
+ }
+
+ return &rep, nil
+}
diff --git a/cmd/car/lib/root.go b/cmd/car/lib/root.go
new file mode 100644
index 0000000..d51347b
--- /dev/null
+++ b/cmd/car/lib/root.go
@@ -0,0 +1,25 @@
+package lib
+
+import (
+ "os"
+
+ "github.com/ipfs/go-cid"
+ carv2 "github.com/ipld/go-car/v2"
+)
+
+// CarRoot prints the root CID in a car
+func CarRoot(file string) (roots []cid.Cid, err error) {
+ inStream := os.Stdin
+ if len(file) >= 1 {
+ inStream, err = os.Open(file)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ rd, err := carv2.NewBlockReader(inStream)
+ if err != nil {
+ return nil, err
+ }
+ return rd.Roots, nil
+}
diff --git a/cmd/car/lib/verify.go b/cmd/car/lib/verify.go
new file mode 100644
index 0000000..cf30a9b
--- /dev/null
+++ b/cmd/car/lib/verify.go
@@ -0,0 +1,107 @@
+package lib
+
+import (
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/ipfs/go-cid"
+ carv2 "github.com/ipld/go-car/v2"
+ "github.com/ipld/go-car/v2/index"
+ "github.com/multiformats/go-multihash"
+)
+
+func VerifyCar(file string) error {
+ // header
+ rx, err := carv2.OpenReader(file)
+ if err != nil {
+ return err
+ }
+ defer rx.Close()
+ roots, err := rx.Roots()
+ if err != nil {
+ return err
+ }
+ if len(roots) == 0 {
+ return fmt.Errorf("no roots listed in car header")
+ }
+ rootMap := make(map[cid.Cid]struct{})
+ for _, r := range roots {
+ rootMap[r] = struct{}{}
+ }
+
+ if rx.Version == 2 {
+ if rx.Header.DataSize == 0 {
+ return fmt.Errorf("size of wrapped v1 car listed as '0'")
+ }
+
+ flen, err := os.Stat(file)
+ if err != nil {
+ return err
+ }
+ lengthToIndex := carv2.PragmaSize + carv2.HeaderSize + rx.Header.DataSize
+ if uint64(flen.Size()) > lengthToIndex && rx.Header.IndexOffset == 0 {
+ return fmt.Errorf("header claims no index, but extra bytes in file beyond data size")
+ }
+ if rx.Header.DataOffset < carv2.PragmaSize+carv2.HeaderSize {
+ return fmt.Errorf("data offset places data within carv2 header")
+ }
+ if rx.Header.IndexOffset < lengthToIndex {
+ return fmt.Errorf("index offset overlaps with data. data ends at %d. index offset of %d", lengthToIndex, rx.Header.IndexOffset)
+ }
+ }
+
+ // blocks
+ fd, err := os.Open(file)
+ if err != nil {
+ return err
+ }
+ rd, err := carv2.NewBlockReader(fd)
+ if err != nil {
+ return err
+ }
+
+ cidList := make([]cid.Cid, 0)
+ for {
+ blk, err := rd.Next()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return err
+ }
+ delete(rootMap, blk.Cid())
+ cidList = append(cidList, blk.Cid())
+ }
+
+ if len(rootMap) > 0 {
+ return fmt.Errorf("header lists root(s) not present as a block: %v", rootMap)
+ }
+
+ // index
+ if rx.Version == 2 && rx.Header.HasIndex() {
+ ir, err := rx.IndexReader()
+ if err != nil {
+ return err
+ }
+ idx, err := index.ReadFrom(ir)
+ if err != nil {
+ return err
+ }
+ for _, c := range cidList {
+ cidHash, err := multihash.Decode(c.Hash())
+ if err != nil {
+ return err
+ }
+ if cidHash.Code == multihash.IDENTITY {
+ continue
+ }
+ if err := idx.GetAll(c, func(_ uint64) bool {
+ return true
+ }); err != nil {
+ return fmt.Errorf("could not look up known cid %s in index: %w", c, err)
+ }
+ }
+ }
+ return nil
+}
diff --git a/cmd/car/root.go b/cmd/car/root.go
index 7e8d5b2..136fb94 100644
--- a/cmd/car/root.go
+++ b/cmd/car/root.go
@@ -2,27 +2,18 @@ package main
import (
"fmt"
- "os"
- carv2 "github.com/ipld/go-car/v2"
+ "github.com/ipld/go-car/cmd/car/lib"
"github.com/urfave/cli/v2"
)
// CarRoot prints the root CID in a car
func CarRoot(c *cli.Context) (err error) {
- inStream := os.Stdin
- if c.Args().Len() >= 1 {
- inStream, err = os.Open(c.Args().First())
- if err != nil {
- return err
- }
- }
-
- rd, err := carv2.NewBlockReader(inStream)
+ roots, err := lib.CarRoot(c.Args().First())
if err != nil {
return err
}
- for _, r := range rd.Roots {
+ for _, r := range roots {
fmt.Printf("%s\n", r.String())
}
diff --git a/cmd/car/verify.go b/cmd/car/verify.go
index faee3aa..52dc1fb 100644
--- a/cmd/car/verify.go
+++ b/cmd/car/verify.go
@@ -2,13 +2,8 @@ package main
import (
"fmt"
- "io"
- "os"
- "github.com/ipfs/go-cid"
- carv2 "github.com/ipld/go-car/v2"
- "github.com/ipld/go-car/v2/index"
- "github.com/multiformats/go-multihash"
+ "github.com/ipld/go-car/cmd/car/lib"
"github.com/urfave/cli/v2"
)
@@ -18,97 +13,5 @@ func VerifyCar(c *cli.Context) error {
return fmt.Errorf("usage: car verify ")
}
- // header
- rx, err := carv2.OpenReader(c.Args().First())
- if err != nil {
- return err
- }
- defer rx.Close()
- roots, err := rx.Roots()
- if err != nil {
- return err
- }
- if len(roots) == 0 {
- return fmt.Errorf("no roots listed in car header")
- }
- rootMap := make(map[cid.Cid]struct{})
- for _, r := range roots {
- rootMap[r] = struct{}{}
- }
-
- if rx.Version == 2 {
- if rx.Header.DataSize == 0 {
- return fmt.Errorf("size of wrapped v1 car listed as '0'")
- }
-
- flen, err := os.Stat(c.Args().First())
- if err != nil {
- return err
- }
- lengthToIndex := carv2.PragmaSize + carv2.HeaderSize + rx.Header.DataSize
- if uint64(flen.Size()) > lengthToIndex && rx.Header.IndexOffset == 0 {
- return fmt.Errorf("header claims no index, but extra bytes in file beyond data size")
- }
- if rx.Header.DataOffset < carv2.PragmaSize+carv2.HeaderSize {
- return fmt.Errorf("data offset places data within carv2 header")
- }
- if rx.Header.IndexOffset < lengthToIndex {
- return fmt.Errorf("index offset overlaps with data. data ends at %d. index offset of %d", lengthToIndex, rx.Header.IndexOffset)
- }
- }
-
- // blocks
- fd, err := os.Open(c.Args().First())
- if err != nil {
- return err
- }
- rd, err := carv2.NewBlockReader(fd)
- if err != nil {
- return err
- }
-
- cidList := make([]cid.Cid, 0)
- for {
- blk, err := rd.Next()
- if err == io.EOF {
- break
- }
- if err != nil {
- return err
- }
- delete(rootMap, blk.Cid())
- cidList = append(cidList, blk.Cid())
- }
-
- if len(rootMap) > 0 {
- return fmt.Errorf("header lists root(s) not present as a block: %v", rootMap)
- }
-
- // index
- if rx.Version == 2 && rx.Header.HasIndex() {
- ir, err := rx.IndexReader()
- if err != nil {
- return err
- }
- idx, err := index.ReadFrom(ir)
- if err != nil {
- return err
- }
- for _, c := range cidList {
- cidHash, err := multihash.Decode(c.Hash())
- if err != nil {
- return err
- }
- if cidHash.Code == multihash.IDENTITY {
- continue
- }
- if err := idx.GetAll(c, func(_ uint64) bool {
- return true
- }); err != nil {
- return fmt.Errorf("could not look up known cid %s in index: %w", c, err)
- }
- }
- }
-
- return nil
+ return lib.VerifyCar(c.Args().First())
}