Skip to content

Commit

Permalink
GH-41159: [Go][Parquet] Improvement Parquet BitWriter WriteVlqInt Per…
Browse files Browse the repository at this point in the history
…formance (#41160)

[GH-41159](#41159)
### Rationale for this change

This change improves Parquet FileWriter performance while writing parquets from arrow Records.
We saw a speed improvement from writing 320k rows/sec -> 650 rows/sec after making this change.

### What changes are included in this PR?
This PR reuses the `buf` variable being used by the bitWriter when writing parquet files.

### Are these changes tested?
Yes

### Are there any user-facing changes?
No

Authored-by: @ hhoughgg 

* GitHub Issue: #41159

Lead-authored-by: Andy Fan <[email protected]>
Co-authored-by: andyfan <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
  • Loading branch information
DuanWeiFan authored Apr 12, 2024
1 parent 48a9639 commit ec2d7cb
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
17 changes: 17 additions & 0 deletions go/parquet/internal/utils/bit_reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,23 @@ func TestBitWriter(t *testing.T) {

assert.Equal(t, byte(0xAA), buf[0])
assert.Equal(t, byte(0xCC), buf[1])

for i := 0; i < 3; i++ {
assert.True(t, bw.WriteVlqInt(uint64(i)))
}
assert.Equal(t, byte(0xAA), buf[0])
assert.Equal(t, byte(0xCC), buf[1])
assert.Equal(t, byte(0), buf[2])
assert.Equal(t, byte(1), buf[3])
assert.Equal(t, byte(2), buf[4])
}

func BenchmarkBitWriter(b *testing.B) {
buf := make([]byte, b.N)
bw := utils.NewBitWriter(utils.NewWriterAtBuffer(buf))
for i := 0; i < b.N; i++ {
assert.True(b, bw.WriteVlqInt(uint64(1)))
}
}

func TestBitReader(t *testing.T) {
Expand Down
6 changes: 3 additions & 3 deletions go/parquet/internal/utils/bit_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ type BitWriter struct {
byteoffset int
bitoffset uint
raw [8]byte
buf [binary.MaxVarintLen64]byte
}

// NewBitWriter initializes a new bit writer to write to the passed in interface
Expand Down Expand Up @@ -163,9 +164,8 @@ func (b *BitWriter) WriteAligned(val uint64, nbytes int) bool {
// without buffering.
func (b *BitWriter) WriteVlqInt(v uint64) bool {
b.Flush(true)
var buf [binary.MaxVarintLen64]byte
nbytes := binary.PutUvarint(buf[:], v)
if _, err := b.wr.WriteAt(buf[:nbytes], int64(b.byteoffset)); err != nil {
nbytes := binary.PutUvarint(b.buf[:], v)
if _, err := b.wr.WriteAt(b.buf[:nbytes], int64(b.byteoffset)); err != nil {
log.Println(err)
return false
}
Expand Down

0 comments on commit ec2d7cb

Please sign in to comment.