Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: expose index for StorageCar #431

Merged
merged 1 commit into from
May 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions v2/storage/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type ReadableCar interface {
ipldstorage.ReadableStorage
ipldstorage.StreamingReadableStorage
Roots() []cid.Cid
Index() index.Index
}

// WritableCar is compatible with storage.WritableStorage but also returns
Expand All @@ -40,6 +41,7 @@ type ReadableCar interface {
type WritableCar interface {
ipldstorage.WritableStorage
Roots() []cid.Cid
Index() index.Index
Finalize() error
}

Expand Down Expand Up @@ -293,6 +295,12 @@ func (sc *StorageCar) Roots() []cid.Cid {
return sc.roots
}

// Index gives direct access to the index. It should be used with care.
// Modifying the index may result corruption or invalid reads.
func (sc *StorageCar) Index() index.Index {
return sc.idx
}

// Put adds a block to the CAR, where the block is identified by the given CID
// provided in string form. The keyStr value must be a valid CID binary string
// (not a multibase string representation), i.e. generated with CID#KeyString().
Expand Down
104 changes: 104 additions & 0 deletions v2/storage/storage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
carv2 "github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/index"
"github.com/ipld/go-car/v2/internal/carv1"
"github.com/ipld/go-car/v2/internal/store"
"github.com/ipld/go-car/v2/storage"
"github.com/multiformats/go-multicodec"
"github.com/multiformats/go-multihash"
Expand Down Expand Up @@ -1211,6 +1212,68 @@ func TestWholeCID(t *testing.T) {
}
}

func TestIndex(t *testing.T) {
tests := []struct {
name string
path string
wantCIDs []cid.Cid
}{
{
"IndexCarV1",
"../testdata/sample-v1.car",
listCids(t, newV1ReaderFromV1File(t, "../testdata/sample-v1.car", false)),
},
{
"IndexCarV2",
"../testdata/sample-wrapped-v2.car",
listCids(t, newV1ReaderFromV2File(t, "../testdata/sample-wrapped-v2.car", false)),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
f, err := os.Open(tt.path)
require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, f.Close()) })
subject, err := storage.OpenReadable(f, carv2.UseWholeCIDs(true))
require.NoError(t, err)

idx := subject.Index()

for _, c := range tt.wantCIDs {
_, isIdentity, err := store.IsIdentity(c)
require.NoError(t, err)
if isIdentity {
// the index doesn't hold identity CIDs
continue
}
_, err = index.GetFirst(idx, c)
require.NoError(t, err)
}

if idx, ok := idx.(index.IterableIndex); ok {
expected := make([]multihash.Multihash, 0, len(tt.wantCIDs))
for _, c := range tt.wantCIDs {
_, isIdentity, err := store.IsIdentity(c)
require.NoError(t, err)
if isIdentity {
// the index doesn't hold identity CIDs
continue
}
expected = append(expected, c.Hash())
}

var got []multihash.Multihash
err = idx.ForEach(func(m multihash.Multihash, u uint64) error {
got = append(got, m)
return nil
})
require.NoError(t, err)
require.ElementsMatch(t, expected, got)
}
})
}
}

type writerOnly struct {
io.Writer
}
Expand Down Expand Up @@ -1274,3 +1337,44 @@ type simpleBlock struct {
cid cid.Cid
data []byte
}

func newV1Reader(r io.Reader, zeroLenSectionAsEOF bool) (*carv1.CarReader, error) {
if zeroLenSectionAsEOF {
return carv1.NewCarReaderWithZeroLengthSectionAsEOF(r)
}
return carv1.NewCarReader(r)
}

func newV1ReaderFromV1File(t *testing.T, carv1Path string, zeroLenSectionAsEOF bool) *carv1.CarReader {
f, err := os.Open(carv1Path)
require.NoError(t, err)
t.Cleanup(func() { f.Close() })
v1r, err := newV1Reader(f, zeroLenSectionAsEOF)
require.NoError(t, err)
return v1r
}

func newV1ReaderFromV2File(t *testing.T, carv2Path string, zeroLenSectionAsEOF bool) *carv1.CarReader {
f, err := os.Open(carv2Path)
require.NoError(t, err)
t.Cleanup(func() { f.Close() })
v2r, err := carv2.NewReader(f)
require.NoError(t, err)
dr, err := v2r.DataReader()
require.NoError(t, err)
v1r, err := newV1Reader(dr, zeroLenSectionAsEOF)
require.NoError(t, err)
return v1r
}

func listCids(t *testing.T, v1r *carv1.CarReader) (cids []cid.Cid) {
for {
block, err := v1r.Next()
if err == io.EOF {
break
}
require.NoError(t, err)
cids = append(cids, block.Cid())
}
return
}