Skip to content

Commit

Permalink
Improve memory alignment
Browse files Browse the repository at this point in the history
This runs betteralign to pack structs smarter.
Originally proposed by etcd-io#673

Co-authored-by: Manuel Rüger <[email protected]>
Signed-off-by: Thomas Jungblut <[email protected]>
  • Loading branch information
tjungblu and mrueg committed Jul 8, 2024
1 parent 3fd94a4 commit 97dddf5
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 104 deletions.
2 changes: 1 addition & 1 deletion bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,6 @@ func cloneBytes(v []byte) []byte {

type BucketStructure struct {
Name string `json:"name"` // name of the bucket
KeyN int `json:"keyN"` // number of key/value pairs
Children []BucketStructure `json:"buckets,omitempty"` // child buckets
KeyN int `json:"keyN"` // number of key/value pairs
}
4 changes: 2 additions & 2 deletions cmd/bbolt/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,10 +370,10 @@ func newPageItemCommand(m *Main) *pageItemCommand {
}

type pageItemOptions struct {
format string
help bool
keyOnly bool
valueOnly bool
format string
}

// Run executes the command.
Expand Down Expand Up @@ -1617,8 +1617,8 @@ func (r *BenchResults) OpsPerSecond() int {
}

type PageError struct {
ID int
Err error
ID int
}

func (e *PageError) Error() string {
Expand Down
181 changes: 93 additions & 88 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,23 @@ type DB struct {
// refer to discussion in https://github.com/etcd-io/bbolt/issues/577.
stats Stats

// When enabled, the database will perform a Check() after every commit.
// A panic is issued if the database is in an inconsistent state. This
// flag has a large performance impact so it should only be used for
// debugging purposes.
StrictMode bool
pagePool sync.Pool

// Setting the NoSync flag will cause the database to skip fsync()
// calls after each commit. This can be useful when bulk loading data
// into a database and you can restart the bulk load in the event of
// a system failure or database corruption. Do not set this flag for
// normal use.
//
// If the package global IgnoreNoSync constant is true, this value is
// ignored. See the comment on that constant for more details.
//
// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
NoSync bool
logger Logger

// When true, skips syncing freelist to disk. This improves the database
// write performance under normal operation, but requires a full database
// re-sync during recovery.
NoFreelistSync bool
openFile func(string, int, os.FileMode) (*os.File, error)
file *os.File
data *[maxMapSize]byte
meta0 *common.Meta
meta1 *common.Meta
rwtx *Tx

freelist *freelist
batch *batch

ops struct {
writeAt func(b []byte, off int64) (n int, err error)
}

// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
// dramatic performance degradation if database is large and fragmentation in freelist is common.
Expand All @@ -71,18 +66,12 @@ type DB struct {
// The default type is array
FreelistType FreelistType

// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
// bypasses a truncate() and fsync() syscall on remapping.
//
// https://github.com/boltdb/bolt/issues/284
NoGrowSync bool

// When `true`, bbolt will always load the free pages when opening the DB.
// When opening db in write mode, this flag will always automatically
// set to `true`.
PreLoadFreelist bool
path string
// `dataref` isn't used at all on Windows, and the golangci-lint
// always fails on Windows platform.
//nolint
dataref []byte // mmap'ed readonly, write throws SEGV
txs []*Tx

// If you want to read the entire database fast, you can set MmapFlag to
// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
Expand All @@ -109,46 +98,61 @@ type DB struct {
// of truncate() and fsync() when growing the data file.
AllocSize int

// Mlock locks database file in memory when set to true.
// It prevents major page faults, however used memory can't be reclaimed.
//
// Supported only on Unix via mlock/munlock syscalls.
Mlock bool

logger Logger

path string
openFile func(string, int, os.FileMode) (*os.File, error)
file *os.File
// `dataref` isn't used at all on Windows, and the golangci-lint
// always fails on Windows platform.
//nolint
dataref []byte // mmap'ed readonly, write throws SEGV
data *[maxMapSize]byte
datasz int
meta0 *common.Meta
meta1 *common.Meta
pageSize int
opened bool
rwtx *Tx
txs []*Tx
mmaplock sync.RWMutex // Protects mmap access during remapping.
statlock sync.RWMutex // Protects stats access.

freelist *freelist
freelistLoad sync.Once

pagePool sync.Pool

batchMu sync.Mutex
batch *batch

rwlock sync.Mutex // Allows only one writer at a time.
metalock sync.Mutex // Protects meta page access.
mmaplock sync.RWMutex // Protects mmap access during remapping.
statlock sync.RWMutex // Protects stats access.
rwlock sync.Mutex // Allows only one writer at a time.
metalock sync.Mutex // Protects meta page access.

ops struct {
writeAt func(b []byte, off int64) (n int, err error)
}
// When enabled, the database will perform a Check() after every commit.
// A panic is issued if the database is in an inconsistent state. This
// flag has a large performance impact so it should only be used for
// debugging purposes.
StrictMode bool

// Setting the NoSync flag will cause the database to skip fsync()
// calls after each commit. This can be useful when bulk loading data
// into a database and you can restart the bulk load in the event of
// a system failure or database corruption. Do not set this flag for
// normal use.
//
// If the package global IgnoreNoSync constant is true, this value is
// ignored. See the comment on that constant for more details.
//
// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
NoSync bool

// When true, skips syncing freelist to disk. This improves the database
// write performance under normal operation, but requires a full database
// re-sync during recovery.
NoFreelistSync bool

// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
// bypasses a truncate() and fsync() syscall on remapping.
//
// https://github.com/boltdb/bolt/issues/284
NoGrowSync bool

// When `true`, bbolt will always load the free pages when opening the DB.
// When opening db in write mode, this flag will always automatically
// set to `true`.
PreLoadFreelist bool

// Mlock locks database file in memory when set to true.
// It prevents major page faults, however used memory can't be reclaimed.
//
// Supported only on Unix via mlock/munlock syscalls.
Mlock bool

opened bool

// Read only mode.
// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
Expand Down Expand Up @@ -995,8 +999,8 @@ type call struct {
type batch struct {
db *DB
timer *time.Timer
start sync.Once
calls []call
start sync.Once
}

// trigger runs the batch if it hasn't already been run.
Expand Down Expand Up @@ -1263,21 +1267,13 @@ func (db *DB) freepages() []common.Pgid {

// Options represents the options that can be set when opening a database.
type Options struct {
// Timeout is the amount of time to wait to obtain a file lock.
// When set to zero it will wait indefinitely.
Timeout time.Duration

// Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool

// Do not sync freelist to disk. This improves the database write performance
// under normal operation, but requires a full database re-sync during recovery.
NoFreelistSync bool
// Logger is the logger used for bbolt.
Logger Logger

// PreLoadFreelist sets whether to load the free pages when opening
// the db file. Note when opening db in write mode, bbolt will always
// load the free pages.
PreLoadFreelist bool
// OpenFile is used to open files. It defaults to os.OpenFile. This option
// is useful for writing hermetic tests.
OpenFile func(string, int, os.FileMode) (*os.File, error)

// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
// dramatic performance degradation if database is large and fragmentation in freelist is common.
Expand All @@ -1286,9 +1282,9 @@ type Options struct {
// The default type is array
FreelistType FreelistType

// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool
// Timeout is the amount of time to wait to obtain a file lock.
// When set to zero it will wait indefinitely.
Timeout time.Duration

// Sets the DB.MmapFlags flag before memory mapping the file.
MmapFlags int
Expand All @@ -1306,22 +1302,31 @@ type Options struct {
// PageSize overrides the default OS page size.
PageSize int

// Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool

// Do not sync freelist to disk. This improves the database write performance
// under normal operation, but requires a full database re-sync during recovery.
NoFreelistSync bool

// PreLoadFreelist sets whether to load the free pages when opening
// the db file. Note when opening db in write mode, bbolt will always
// load the free pages.
PreLoadFreelist bool

// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool

// NoSync sets the initial value of DB.NoSync. Normally this can just be
// set directly on the DB itself when returned from Open(), but this option
// is useful in APIs which expose Options but not the underlying DB.
NoSync bool

// OpenFile is used to open files. It defaults to os.OpenFile. This option
// is useful for writing hermetic tests.
OpenFile func(string, int, os.FileMode) (*os.File, error)

// Mlock locks database file in memory when set to true.
// It prevents potential page faults, however
// used memory can't be reclaimed. (UNIX only)
Mlock bool

// Logger is the logger used for bbolt.
Logger Logger
}

func (o *Options) String() string {
Expand Down
6 changes: 3 additions & 3 deletions freelist.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ type pidSet map[common.Pgid]struct{}
// freelist represents a list of all pages that are available for allocation.
// It also tracks pages that have been freed but are still in use by open transactions.
type freelist struct {
freelistType FreelistType // freelist type
ids []common.Pgid // all free and available free page ids.
readonlyTXIDs []common.Txid // all readonly transaction IDs.
allocs map[common.Pgid]common.Txid // mapping of Txid that allocated a pgid.
pending map[common.Txid]*txPending // mapping of soon-to-be free page ids by tx.
cache map[common.Pgid]struct{} // fast lookup of all free and pending page ids.
Expand All @@ -38,6 +35,9 @@ type freelist struct {
mergeSpans func(ids common.Pgids) // the mergeSpan func
getFreePageIDs func() []common.Pgid // get free pgids func
readIDs func(pgids []common.Pgid) // readIDs func reads list of pages and init the freelist
freelistType FreelistType // freelist type
ids []common.Pgid // all free and available free page ids.
readonlyTXIDs []common.Txid // all readonly transaction IDs.
}

// newFreelist returns an empty, initialized freelist.
Expand Down
4 changes: 2 additions & 2 deletions internal/btesting/btesting.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ const (

// DB is a test wrapper for bolt.DB.
type DB struct {
t testing.TB
*bolt.DB
f string
o *bolt.Options
t testing.TB
f string
}

// MustCreateDB returns a new, open DB at a temporary location.
Expand Down
4 changes: 2 additions & 2 deletions internal/common/inode.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ import "unsafe"
// It can be used to point to elements in a page or point
// to an element which hasn't been added to a page yet.
type Inode struct {
flags uint32
pgid Pgid
key []byte
value []byte
pgid Pgid
flags uint32
}

type Inodes []Inode
Expand Down
2 changes: 1 addition & 1 deletion internal/common/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,8 @@ func (n *leafPageElement) Bucket() *InBucket {

// PageInfo represents human readable information about a page.
type PageInfo struct {
ID int
Type string
ID int
Count int
OverflowCount int
}
Expand Down
10 changes: 5 additions & 5 deletions node.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ import (
// node represents an in-memory, deserialized page.
type node struct {
bucket *Bucket
isLeaf bool
unbalanced bool
spilled bool
key []byte
pgid common.Pgid
parent *node
key []byte
children nodes
inodes common.Inodes
pgid common.Pgid
isLeaf bool
unbalanced bool
spilled bool
}

// root returns the top-level node this node is attached to.
Expand Down

0 comments on commit 97dddf5

Please sign in to comment.