Skip to content

Commit

Permalink
add Bao functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
lukechampine committed May 15, 2023
1 parent 85fa20d commit f9980aa
Show file tree
Hide file tree
Showing 8 changed files with 282 additions and 4 deletions.
146 changes: 146 additions & 0 deletions bao.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
package blake3

import (
"bytes"
"encoding/binary"
"io"
"math/bits"
)

// BaoEncode computes the intermediate BLAKE3 tree hashes of data and writes
// them to dst. If outboard is false, the contents of data are also written to
// dst, interleaved with the tree hashes. It also returns the tree root, i.e.
// the 256-bit BLAKE3 hash.
func BaoEncode(dst io.WriterAt, data io.Reader, dataLen int64, outboard bool) ([32]byte, error) {
var counter uint64
var chunkBuf [chunkSize]byte
var err error
read := func(p []byte) []byte {
if err == nil {
_, err = io.ReadFull(data, p)
}
return p
}
write := func(p []byte, off uint64) {
if err == nil {
_, err = dst.WriteAt(p, int64(off))
}
}

// NOTE: unlike the reference implementation, we write directly in
// pre-order, rather than writing in post-order and then flipping. This cuts
// the I/O required in half, but also makes hashing multiple chunks in SIMD
// a lot trickier. I'll save that optimization for a rainy day.
var rec func(bufLen uint64, flags uint32, off uint64) (uint64, [8]uint32)
rec = func(bufLen uint64, flags uint32, off uint64) (uint64, [8]uint32) {
if err != nil {
return 0, [8]uint32{}
} else if bufLen <= chunkSize {
cv := chainingValue(compressChunk(read(chunkBuf[:bufLen]), &iv, counter, flags))
counter++
if !outboard {
write(chunkBuf[:bufLen], off)
}
return 0, cv
}
mid := uint64(1) << (bits.Len64(bufLen-1) - 1)
lchildren, l := rec(mid, 0, off+64)
llen := lchildren * 32
if !outboard {
llen += (mid / chunkSize) * chunkSize
}
rchildren, r := rec(bufLen-mid, 0, off+64+llen)
write(cvToBytes(&l)[:], off)
write(cvToBytes(&r)[:], off+32)
return 2 + lchildren + rchildren, chainingValue(parentNode(l, r, iv, flags))
}

binary.LittleEndian.PutUint64(chunkBuf[:8], uint64(dataLen))
write(chunkBuf[:8], 0)
_, root := rec(uint64(dataLen), flagRoot, 8)
return *cvToBytes(&root), err
}

// BaoDecode reads content and tree data from the provided reader(s), and
// streams the verified content to dst. It returns false if verification fails.
// If the content and tree data are interleaved, outboard should be nil.
func BaoDecode(dst io.Writer, data, outboard io.Reader, root [32]byte) (bool, error) {
if outboard == nil {
outboard = data
}
var counter uint64
var buf [chunkSize]byte
var err error
read := func(r io.Reader, p []byte) []byte {
if err == nil {
_, err = io.ReadFull(r, p)
}
return p
}
readParent := func() (l, r [8]uint32) {
read(outboard, buf[:64])
return bytesToCV(buf[:32]), bytesToCV(buf[32:])
}

var rec func(cv [8]uint32, bufLen uint64, flags uint32) bool
rec = func(cv [8]uint32, bufLen uint64, flags uint32) bool {
if err != nil {
return false
} else if bufLen <= chunkSize {
n := compressChunk(read(data, buf[:bufLen]), &iv, counter, flags)
counter++
return cv == chainingValue(n)
}
l, r := readParent()
n := parentNode(l, r, iv, flags)
mid := uint64(1) << (bits.Len64(bufLen-1) - 1)
return chainingValue(n) == cv && rec(l, mid, 0) && rec(r, bufLen-mid, 0)
}

read(outboard, buf[:8])
dataLen := binary.LittleEndian.Uint64(buf[:8])
ok := rec(bytesToCV(root[:]), dataLen, flagRoot)
return ok, err
}

type bufferAt struct {
buf []byte
}

func (b *bufferAt) WriteAt(p []byte, off int64) (int, error) {
if copy(b.buf[off:], p) != len(p) {
panic("bad buffer size")
}
return len(p), nil
}

func baoOutboardSize(dataLen int) int {
if dataLen == 0 {
return 8
}
chunks := (dataLen + chunkSize - 1) / chunkSize
cvs := 2*chunks - 2 // no I will not elaborate
return 8 + cvs*32
}

// BaoEncodeBuf returns the Bao encoding and root (i.e. BLAKE3 hash) for data.
func BaoEncodeBuf(data []byte, outboard bool) ([]byte, [32]byte) {
bufSize := baoOutboardSize(len(data))
if !outboard {
bufSize += len(data)
}
buf := bufferAt{buf: make([]byte, bufSize)}
root, _ := BaoEncode(&buf, bytes.NewReader(data), int64(len(data)), outboard)
return buf.buf, root
}

// BaoVerifyBuf verifies the Bao encoding and root (i.e. BLAKE3 hash) for data.
// If the content and tree data are interleaved, outboard should be nil.
func BaoVerifyBuf(data, outboard []byte, root [32]byte) bool {
var or io.Reader = bytes.NewReader(outboard)
if outboard == nil {
or = nil
}
ok, _ := BaoDecode(io.Discard, bytes.NewReader(data), or, root)
return ok
}
107 changes: 107 additions & 0 deletions bao_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package blake3_test

import (
"bytes"
"os"
"testing"

"lukechampine.com/blake3"
)

func TestBaoGolden(t *testing.T) {
data, err := os.ReadFile("testdata/vectors.json")
if err != nil {
t.Fatal(err)
}
goldenInterleaved, err := os.ReadFile("testdata/bao-golden.bao")
if err != nil {
t.Fatal(err)
}
goldenOutboard, err := os.ReadFile("testdata/bao-golden.obao")
if err != nil {
t.Fatal(err)
}

interleaved, root := blake3.BaoEncodeBuf(data, false)
if toHex(root[:]) != "6654fbd1836b531b25e2782c9cc9b792c80abb36b024f59db5d5f6bd3187ddfe" {
t.Errorf("bad root: %x", root)
} else if !bytes.Equal(interleaved, goldenInterleaved) {
t.Error("bad interleaved encoding")
}

outboard, root := blake3.BaoEncodeBuf(data, true)
if toHex(root[:]) != "6654fbd1836b531b25e2782c9cc9b792c80abb36b024f59db5d5f6bd3187ddfe" {
t.Errorf("bad root: %x", root)
} else if !bytes.Equal(outboard, goldenOutboard) {
t.Error("bad outboard encoding")
}

// test empty input
interleaved, root = blake3.BaoEncodeBuf(nil, false)
if toHex(root[:]) != "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262" {
t.Errorf("bad root: %x", root)
} else if toHex(interleaved[:]) != "0000000000000000" {
t.Errorf("bad interleaved encoding: %x", interleaved)
} else if !blake3.BaoVerifyBuf(interleaved, nil, root) {
t.Error("verify failed")
}
outboard, root = blake3.BaoEncodeBuf(nil, false)
if toHex(root[:]) != "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262" {
t.Errorf("bad root: %x", root)
} else if toHex(outboard[:]) != "0000000000000000" {
t.Errorf("bad outboard encoding: %x", outboard)
} else if !blake3.BaoVerifyBuf(nil, outboard, root) {
t.Error("verify failed")
}
}

func TestBaoInterleaved(t *testing.T) {
data, _ := os.ReadFile("testdata/vectors.json")
interleaved, root := blake3.BaoEncodeBuf(data, false)
if !blake3.BaoVerifyBuf(interleaved, nil, root) {
t.Fatal("verify failed")
}
badRoot := root
badRoot[0] ^= 1
if blake3.BaoVerifyBuf(interleaved, nil, badRoot) {
t.Fatal("verify succeeded with bad root")
}
badPrefix := append([]byte(nil), interleaved...)
badPrefix[0] ^= 1
if blake3.BaoVerifyBuf(badPrefix, nil, root) {
t.Fatal("verify succeeded with bad length prefix")
}
badCVs := append([]byte(nil), interleaved...)
badCVs[8] ^= 1
if blake3.BaoVerifyBuf(badCVs, nil, root) {
t.Fatal("verify succeeded with bad cv data")
}
badData := append([]byte(nil), interleaved...)
badData[len(badData)-1] ^= 1
if blake3.BaoVerifyBuf(badData, nil, root) {
t.Fatal("verify succeeded with bad content")
}
}

func TestBaoOutboard(t *testing.T) {
data, _ := os.ReadFile("testdata/vectors.json")
outboard, root := blake3.BaoEncodeBuf(data, true)
if !blake3.BaoVerifyBuf(data, outboard, root) {
t.Fatal("verify failed")
}
badRoot := root
badRoot[0] ^= 1
if blake3.BaoVerifyBuf(data, outboard, badRoot) {
t.Fatal("verify succeeded with bad root")
}
badPrefix := append([]byte(nil), outboard...)
badPrefix[0] ^= 1
if blake3.BaoVerifyBuf(data, badPrefix, root) {
t.Fatal("verify succeeded with bad length prefix")
}
badCVs := append([]byte(nil), outboard...)
badCVs[8] ^= 1
if blake3.BaoVerifyBuf(data, badCVs, root) {
t.Fatal("verify succeeded with bad cv data")
}
}
6 changes: 3 additions & 3 deletions blake3_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"encoding/hex"
"encoding/json"
"io"
"io/ioutil"
"os"
"testing"

"lukechampine.com/blake3"
Expand All @@ -22,7 +22,7 @@ var testVectors = func() (vecs struct {
DeriveKey string `json:"derive_key"`
}
}) {
data, err := ioutil.ReadFile("testdata/vectors.json")
data, err := os.ReadFile("testdata/vectors.json")
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -197,7 +197,7 @@ func BenchmarkWrite(b *testing.B) {
func BenchmarkXOF(b *testing.B) {
b.ReportAllocs()
b.SetBytes(1024)
io.CopyN(ioutil.Discard, blake3.New(0, nil).XOF(), int64(b.N*1024))
io.CopyN(io.Discard, blake3.New(0, nil).XOF(), int64(b.N*1024))
}

func BenchmarkSum256(b *testing.B) {
Expand Down
8 changes: 8 additions & 0 deletions compress_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,11 @@ func mergeSubtrees(cvs *[maxSIMD][8]uint32, numCVs uint64, key *[8]uint32, flags
func wordsToBytes(words [16]uint32, block *[64]byte) {
*block = *(*[64]byte)(unsafe.Pointer(&words))
}

func bytesToCV(b []byte) [8]uint32 {
return *(*[8]uint32)(unsafe.Pointer(&b[0]))
}

func cvToBytes(cv *[8]uint32) *[32]byte {
return (*[32]byte)(unsafe.Pointer(cv))
}
17 changes: 17 additions & 0 deletions compress_noasm.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build !amd64
// +build !amd64

package blake3
Expand Down Expand Up @@ -74,3 +75,19 @@ func wordsToBytes(words [16]uint32, block *[64]byte) {
binary.LittleEndian.PutUint32(block[4*i:], w)
}
}

func bytesToCV(b []byte) [8]uint32 {
var cv [8]uint32
for i := range cv {
cv[i] = binary.LittleEndian.Uint32(b[4*i:])
}
return cv
}

func cvToBytes(cv *[8]uint32) *[32]byte {
var b [32]byte
for i, w := range cv {
binary.LittleEndian.PutUint32(b[4*i:], w)
}
return &b
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module lukechampine.com/blake3

go 1.13
go 1.17

require github.com/klauspost/cpuid/v2 v2.0.9
Binary file added testdata/bao-golden.bao
Binary file not shown.
Binary file added testdata/bao-golden.obao
Binary file not shown.

0 comments on commit f9980aa

Please sign in to comment.