Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-factor cmd functions to library #524

Merged
merged 2 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
266 changes: 2 additions & 264 deletions cmd/car/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,18 @@ package main

import (
"context"
"errors"
"fmt"
"io"
"os"
"path"
"path/filepath"
"runtime"
"strings"
"sync"

"github.com/ipfs/go-cid"
"github.com/ipfs/go-unixfsnode"
"github.com/ipfs/go-unixfsnode/data"
"github.com/ipfs/go-unixfsnode/file"
"github.com/ipld/go-car/cmd/car/lib"
"github.com/ipld/go-car/v2"
carstorage "github.com/ipld/go-car/v2/storage"
dagpb "github.com/ipld/go-codec-dagpb"
"github.com/ipld/go-ipld-prime"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
basicnode "github.com/ipld/go-ipld-prime/node/basic"
"github.com/ipld/go-ipld-prime/storage"
"github.com/urfave/cli/v2"
)
Expand Down Expand Up @@ -86,7 +78,7 @@ func ExtractCar(c *cli.Context) error {

var extractedFiles int
for _, root := range roots {
count, err := extractRoot(c, &ls, root, outputDir, path)
count, err := lib.ExtractToDir(c.Context, &ls, root, outputDir, path, c.IsSet("verbose"), c.App.ErrWriter)
if err != nil {
return err
}
Expand All @@ -101,260 +93,6 @@ func ExtractCar(c *cli.Context) error {
return nil
}

func extractRoot(c *cli.Context, ls *ipld.LinkSystem, root cid.Cid, outputDir string, path []string) (int, error) {
if root.Prefix().Codec == cid.Raw {
if c.IsSet("verbose") {
fmt.Fprintf(c.App.ErrWriter, "skipping raw root %s\n", root)
}
return 0, nil
}

pbn, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: root}, dagpb.Type.PBNode)
if err != nil {
return 0, err
}
pbnode := pbn.(dagpb.PBNode)

ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
if err != nil {
return 0, err
}

var outputResolvedDir string
if outputDir != "-" {
outputResolvedDir, err = filepath.EvalSymlinks(outputDir)
if err != nil {
return 0, err
}
if _, err := os.Stat(outputResolvedDir); os.IsNotExist(err) {
if err := os.Mkdir(outputResolvedDir, 0755); err != nil {
return 0, err
}
}
}

count, err := extractDir(c, ls, ufn, outputResolvedDir, "/", path)
if err != nil {
if !errors.Is(err, ErrNotDir) {
return 0, fmt.Errorf("%s: %w", root, err)
}

// if it's not a directory, it's a file.
ufsData, err := pbnode.LookupByString("Data")
if err != nil {
return 0, err
}
ufsBytes, err := ufsData.AsBytes()
if err != nil {
return 0, err
}
ufsNode, err := data.DecodeUnixFSData(ufsBytes)
if err != nil {
return 0, err
}
var outputName string
if outputDir != "-" {
outputName = filepath.Join(outputResolvedDir, "unknown")
}
if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw {
if err := extractFile(c, ls, pbnode, outputName); err != nil {
return 0, err
}
}
return 1, nil
}

return count, nil
}

func resolvePath(root, pth string) (string, error) {
rp, err := filepath.Rel("/", pth)
if err != nil {
return "", fmt.Errorf("couldn't check relative-ness of %s: %w", pth, err)
}
joined := path.Join(root, rp)

basename := path.Dir(joined)
final, err := filepath.EvalSymlinks(basename)
if err != nil {
return "", fmt.Errorf("couldn't eval symlinks in %s: %w", basename, err)
}
if final != path.Clean(basename) {
return "", fmt.Errorf("path attempts to redirect through symlinks")
}
return joined, nil
}

func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputRoot, outputPath string, matchPath []string) (int, error) {
if outputRoot != "" {
dirPath, err := resolvePath(outputRoot, outputPath)
if err != nil {
return 0, err
}
// make the directory.
if err := os.MkdirAll(dirPath, 0755); err != nil {
return 0, err
}
}

if n.Kind() != ipld.Kind_Map {
return 0, ErrNotDir
}

subPath := matchPath
if len(matchPath) > 0 {
subPath = matchPath[1:]
}

extractElement := func(name string, n ipld.Node) (int, error) {
var nextRes string
if outputRoot != "" {
var err error
nextRes, err = resolvePath(outputRoot, path.Join(outputPath, name))
if err != nil {
return 0, err
}
if c.IsSet("verbose") {
fmt.Fprintf(c.App.ErrWriter, "%s\n", nextRes)
}
}

if n.Kind() != ipld.Kind_Link {
return 0, fmt.Errorf("unexpected map value for %s at %s", name, outputPath)
}
// a directory may be represented as a map of name:<link> if unixADL is applied
vl, err := n.AsLink()
if err != nil {
return 0, err
}
dest, err := ls.Load(ipld.LinkContext{}, vl, basicnode.Prototype.Any)
if err != nil {
if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
fmt.Fprintf(c.App.ErrWriter, "data for entry not found: %s (skipping...)\n", path.Join(outputPath, name))
return 0, nil
}
return 0, err
}
// degenerate files are handled here.
if dest.Kind() == ipld.Kind_Bytes {
if err := extractFile(c, ls, dest, nextRes); err != nil {
return 0, err
}
return 1, nil
}

// dir / pbnode
pbb := dagpb.Type.PBNode.NewBuilder()
if err := pbb.AssignNode(dest); err != nil {
return 0, err
}
pbnode := pbb.Build().(dagpb.PBNode)

// interpret dagpb 'data' as unixfs data and look at type.
ufsData, err := pbnode.LookupByString("Data")
if err != nil {
return 0, err
}
ufsBytes, err := ufsData.AsBytes()
if err != nil {
return 0, err
}
ufsNode, err := data.DecodeUnixFSData(ufsBytes)
if err != nil {
return 0, err
}

switch ufsNode.DataType.Int() {
case data.Data_Directory, data.Data_HAMTShard:
ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
if err != nil {
return 0, err
}
return extractDir(c, ls, ufn, outputRoot, path.Join(outputPath, name), subPath)
case data.Data_File, data.Data_Raw:
if err := extractFile(c, ls, pbnode, nextRes); err != nil {
return 0, err
}
return 1, nil
case data.Data_Symlink:
if nextRes == "" {
return 0, fmt.Errorf("cannot extract a symlink to stdout")
}
data := ufsNode.Data.Must().Bytes()
if err := os.Symlink(string(data), nextRes); err != nil {
return 0, err
}
return 1, nil
default:
return 0, fmt.Errorf("unknown unixfs type: %d", ufsNode.DataType.Int())
}
}

// specific path segment
if len(matchPath) > 0 {
val, err := n.LookupByString(matchPath[0])
if err != nil {
return 0, err
}
return extractElement(matchPath[0], val)
}

if outputPath == "-" && len(matchPath) == 0 {
return 0, fmt.Errorf("cannot extract a directory to stdout, use a path to extract a specific file")
}

// everything
var count int
var shardSkip int
mi := n.MapIterator()
for !mi.Done() {
key, val, err := mi.Next()
if err != nil {
if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
shardSkip++
continue
}
return 0, err
}
ks, err := key.AsString()
if err != nil {
return 0, err
}
ecount, err := extractElement(ks, val)
if err != nil {
return 0, err
}
count += ecount
}
if shardSkip > 0 {
fmt.Fprintf(c.App.ErrWriter, "data for entry not found for %d unknown sharded entries (skipped...)\n", shardSkip)
}
return count, nil
}

func extractFile(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputName string) error {
node, err := file.NewUnixFSFile(c.Context, n, ls)
if err != nil {
return err
}
nlr, err := node.AsLargeBytes()
if err != nil {
return err
}
var f *os.File
if outputName == "" {
f = os.Stdout
} else {
f, err = os.Create(outputName)
if err != nil {
return err
}
defer f.Close()
}
_, err = io.Copy(f, nlr)
return err
}

// TODO: dedupe this with lassie, probably into go-unixfsnode
func pathSegments(path string) ([]string, error) {
segments := strings.Split(path, "/")
Expand Down
Loading
Loading