Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add car split command #226

Merged
merged 10 commits into from
Sep 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 14 additions & 128 deletions v2/cmd/car/car.go
Original file line number Diff line number Diff line change
@@ -1,152 +1,38 @@
package main

import (
"bufio"
"fmt"
"io"
"log"
"os"

"github.com/ipfs/go-cid"
carv2 "github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/index"
icarv1 "github.com/ipld/go-car/v2/internal/carv1"
"github.com/multiformats/go-multicodec"
"github.com/multiformats/go-varint"
"github.com/urfave/cli/v2"
)

func main() {
app := &cli.App{
Name: "car",
Usage: "Utility for working with car files",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "codec",
Aliases: []string{"c"},
Usage: "The type of index to write",
Value: multicodec.CarMultihashIndexSorted.String(),
},
},
Commands: []*cli.Command{
{
Name: "index",
Aliases: []string{"i"},
Usage: "write out the car with an index",
Action: func(c *cli.Context) error {
r, err := carv2.OpenReader(c.Args().Get(0))
if err != nil {
return err
}
defer r.Close()

var idx index.Index
if c.String("codec") != "none" {
var mc multicodec.Code
if err := mc.Set(c.String("codec")); err != nil {
return err
}
idx, err = index.New(mc)
if err != nil {
return err
}
}

outStream := os.Stdout
if c.Args().Len() >= 2 {
outStream, err = os.Create(c.Args().Get(1))
if err != nil {
return err
}
}
defer outStream.Close()

v1r := r.DataReader()

v2Header := carv2.NewHeader(r.Header.DataSize)
if c.String("codec") == "none" {
v2Header.IndexOffset = 0
if _, err := outStream.Write(carv2.Pragma); err != nil {
return err
}
if _, err := v2Header.WriteTo(outStream); err != nil {
return err
}
if _, err := io.Copy(outStream, v1r); err != nil {
return err
}
return nil
}

if _, err := outStream.Write(carv2.Pragma); err != nil {
return err
}
if _, err := v2Header.WriteTo(outStream); err != nil {
return err
}

// collect records as we go through the v1r
hdr, err := icarv1.ReadHeader(v1r)
if err != nil {
return fmt.Errorf("error reading car header: %w", err)
}
if err := icarv1.WriteHeader(hdr, outStream); err != nil {
return err
}

records := make([]index.Record, 0)
var sectionOffset int64
if sectionOffset, err = v1r.Seek(0, io.SeekCurrent); err != nil {
return err
}

br := bufio.NewReader(v1r)
for {
// Read the section's length.
sectionLen, err := varint.ReadUvarint(br)
if err != nil {
if err == io.EOF {
break
}
return err
}
if _, err := outStream.Write(varint.ToUvarint(sectionLen)); err != nil {
return err
}

// Null padding; by default it's an error.
// TODO: integrate corresponding ReadOption
if sectionLen == 0 {
// TODO: pad writer to expected length.
break
}

// Read the CID.
cidLen, c, err := cid.CidFromReader(br)
if err != nil {
return err
}
records = append(records, index.Record{Cid: c, Offset: uint64(sectionOffset)})
if _, err := c.WriteBytes(outStream); err != nil {
return err
}

// Seek to the next section by skipping the block.
// The section length includes the CID, so subtract it.
remainingSectionLen := int64(sectionLen) - int64(cidLen)
if _, err := io.CopyN(outStream, br, remainingSectionLen); err != nil {
return err
}
sectionOffset += int64(sectionLen) + int64(varint.UvarintSize(sectionLen))
}

if err := idx.Load(records); err != nil {
return err
}

return index.WriteTo(idx, outStream)
Action: IndexCar,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "codec",
Aliases: []string{"c"},
Usage: "The type of index to write",
Value: multicodec.CarMultihashIndexSorted.String(),
},
},
},
{
Name: "split",
Aliases: []string{"s"},
Usage: "Split an index to a detached file",
Action: SplitCar,
},
},
}

Expand Down
131 changes: 131 additions & 0 deletions v2/cmd/car/index.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package main

import (
"bufio"
"fmt"
"io"
"os"

"github.com/ipfs/go-cid"
carv2 "github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/index"
icarv1 "github.com/ipld/go-car/v2/internal/carv1"
"github.com/multiformats/go-multicodec"
"github.com/multiformats/go-varint"
"github.com/urfave/cli/v2"
)

// IndexCar is a command to add an index to a car
func IndexCar(c *cli.Context) error {
r, err := carv2.OpenReader(c.Args().Get(0))
if err != nil {
return err
}
defer r.Close()

var idx index.Index
if c.String("codec") != "none" {
var mc multicodec.Code
if err := mc.Set(c.String("codec")); err != nil {
return err
}
idx, err = index.New(mc)
if err != nil {
return err
}
}

outStream := os.Stdout
if c.Args().Len() >= 2 {
outStream, err = os.Create(c.Args().Get(1))
if err != nil {
return err
}
}
defer outStream.Close()

v1r := r.DataReader()

v2Header := carv2.NewHeader(r.Header.DataSize)
if c.String("codec") == "none" {
v2Header.IndexOffset = 0
if _, err := outStream.Write(carv2.Pragma); err != nil {
return err
}
if _, err := v2Header.WriteTo(outStream); err != nil {
return err
}
if _, err := io.Copy(outStream, v1r); err != nil {
return err
}
return nil
}

if _, err := outStream.Write(carv2.Pragma); err != nil {
return err
}
if _, err := v2Header.WriteTo(outStream); err != nil {
return err
}

// collect records as we go through the v1r
hdr, err := icarv1.ReadHeader(v1r)
if err != nil {
return fmt.Errorf("error reading car header: %w", err)
}
if err := icarv1.WriteHeader(hdr, outStream); err != nil {
return err
}

records := make([]index.Record, 0)
var sectionOffset int64
if sectionOffset, err = v1r.Seek(0, io.SeekCurrent); err != nil {
return err
}

br := bufio.NewReader(v1r)
for {
// Read the section's length.
sectionLen, err := varint.ReadUvarint(br)
if err != nil {
if err == io.EOF {
break
}
return err
}
if _, err := outStream.Write(varint.ToUvarint(sectionLen)); err != nil {
return err
}

// Null padding; by default it's an error.
// TODO: integrate corresponding ReadOption
if sectionLen == 0 {
// TODO: pad writer to expected length.
break
}

// Read the CID.
cidLen, c, err := cid.CidFromReader(br)
if err != nil {
return err
}
records = append(records, index.Record{Cid: c, Offset: uint64(sectionOffset)})
if _, err := c.WriteBytes(outStream); err != nil {
return err
}

// Seek to the next section by skipping the block.
// The section length includes the CID, so subtract it.
remainingSectionLen := int64(sectionLen) - int64(cidLen)
if _, err := io.CopyN(outStream, br, remainingSectionLen); err != nil {
return err
}
sectionOffset += int64(sectionLen) + int64(varint.UvarintSize(sectionLen))
}

if err := idx.Load(records); err != nil {
return err
}

return index.WriteTo(idx, outStream)
}
35 changes: 35 additions & 0 deletions v2/cmd/car/split.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package main

import (
"fmt"
"io"
"os"

carv2 "github.com/ipld/go-car/v2"
"github.com/urfave/cli/v2"
)

// SplitCar is a command to output the index part of a car.
func SplitCar(c *cli.Context) error {
r, err := carv2.OpenReader(c.Args().Get(0))
if err != nil {
return err
}
defer r.Close()

if !r.Header.HasIndex() {
return fmt.Errorf("no index present")
}

outStream := os.Stdout
if c.Args().Len() >= 2 {
outStream, err = os.Create(c.Args().Get(1))
if err != nil {
return err
}
}
defer outStream.Close()

_, err = io.Copy(outStream, r.IndexReader())
return err
}