Skip to content

Commit

Permalink
fix: BlockMetadata#Offset should be for section, not block data
Browse files Browse the repository at this point in the history
  • Loading branch information
rvagg committed Sep 6, 2023
1 parent 5b2e696 commit 86d6af2
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 12 deletions.
22 changes: 14 additions & 8 deletions v2/block_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,19 +143,25 @@ func (br *BlockReader) Next() (blocks.Block, error) {

// BlockMetadata contains metadata about a block's section in a CAR file/stream.
//
// There are two offsets for the block data which will be the same if the
// original CAR is a CARv1, but will differ if the original CAR is a CARv2. In
// the case of a CARv2, SourceOffset will be the offset from the beginning of
// There are two offsets for the block section which will be the same if the
// original CAR is a CARv1, but will differ if the original CAR is a CARv2.
//
// The block section offset is position where the CAR section begins; that is,
// the begining of the length prefix (varint) prior to the CID and the block
// data. Reading the varint at the offset will give the length of the rest of
// the section (CID+data).
//
// In the case of a CARv2, SourceOffset will be the offset from the beginning of
// the file/steam, and Offset will be the offset from the beginning of the CARv1
// payload container within the CARv2.
//
// Offset is useful for index generation which requires an offset from the CARv1
// payload; while SourceOffset is useful for direct block reads out of the
// payload; while SourceOffset is useful for direct section reads out of the
// source file/stream regardless of version.
type BlockMetadata struct {
cid.Cid
Offset uint64 // Offset of the block data in the container CARv1
SourceOffset uint64 // SourceOffset is the offset of block data in the source file/stream
Offset uint64 // Offset of the section data in the container CARv1
SourceOffset uint64 // SourceOffset is the offset of section data in the source file/stream
Size uint64
}

Expand Down Expand Up @@ -185,7 +191,7 @@ func (br *BlockReader) SkipNext() (*BlockMetadata, error) {
}

blockSize := sectionSize - uint64(cidSize)
blockOffset := br.offset + lenSize + uint64(cidSize)
blockOffset := br.offset

// move our reader forward; either by seeking or slurping

Expand Down Expand Up @@ -231,7 +237,7 @@ func (br *BlockReader) SkipNext() (*BlockMetadata, error) {
}
}

br.offset = blockOffset + blockSize
br.offset = br.offset + lenSize + uint64(cidSize) + blockSize

return &BlockMetadata{
Cid: c,
Expand Down
8 changes: 4 additions & 4 deletions v2/block_reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,17 +245,17 @@ func TestBlockReader(t *testing.T) {
vb := make([]byte, 2)
for i := 0; i < 100; i++ {
blk := randBlock(100 + i) // we should cross the varint two-byte boundary in here somewhere
blks[i] = struct {
block blocks.Block
dataOffset uint64
}{block: blk, dataOffset: uint64(v1buf.Len())}
vn := varint.PutUvarint(vb, uint64(len(blk.Cid().Bytes())+len(blk.RawData())))
n, err := v1buf.Write(vb[:vn])
req.NoError(err)
req.Equal(n, vn)
n, err = v1buf.Write(blk.Cid().Bytes())
req.NoError(err)
req.Equal(len(blk.Cid().Bytes()), n)
blks[i] = struct {
block blocks.Block
dataOffset uint64
}{block: blk, dataOffset: uint64(v1buf.Len())}
n, err = v1buf.Write(blk.RawData())
req.NoError(err)
req.Equal(len(blk.RawData()), n)
Expand Down

0 comments on commit 86d6af2

Please sign in to comment.