Skip to content

Commit

Permalink
refactor: Generate field ids using a sequence (#2339)
Browse files Browse the repository at this point in the history
## Relevant issue(s)

Resolves #2333

## Description

Generates field ids using a sequence instead of their index in the
array.

This is required for allowing field deletion and schema branching as
otherwise those two will alter the position and thus id of fields. For
example, previously if two schema branches were to add a new field each,
the new fields would share the same id and mess the datastore cache up
in interesting ways.
  • Loading branch information
AndrewSisley committed Feb 28, 2024
1 parent db75564 commit 9248cbc
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 48 deletions.
70 changes: 56 additions & 14 deletions core/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ const (
COLLECTION_INDEX = "/collection/index"
SCHEMA_VERSION = "/schema/version/v"
SCHEMA_VERSION_ROOT = "/schema/version/r"
SEQ = "/seq"
COLLECTION_SEQ = "/seq/collection"
INDEX_ID_SEQ = "/seq/index"
FIELD_ID_SEQ = "/seq/field"
PRIMARY_KEY = "/pk"
DATASTORE_DOC_VERSION_FIELD_ID = "v"
REPLICATOR = "/replicator/id"
Expand Down Expand Up @@ -165,11 +167,29 @@ type P2PCollectionKey struct {

var _ Key = (*P2PCollectionKey)(nil)

type SequenceKey struct {
SequenceName string
// CollectionIDSequenceKey is used to key the sequence used to generate collection ids.
type CollectionIDSequenceKey struct{}

var _ Key = (*CollectionIDSequenceKey)(nil)

// IndexIDSequenceKey is used to key the sequence used to generate index ids.
//
// The sequence is specific to each collection version.
type IndexIDSequenceKey struct {
CollectionID uint32
}

var _ Key = (*SequenceKey)(nil)
var _ Key = (*IndexIDSequenceKey)(nil)

// FieldIDSequenceKey is used to key the sequence used to generate field ids.
//
// The sequence is specific to each collection root. Multiple collection of the same root
// must maintain consistent field ids.
type FieldIDSequenceKey struct {
CollectionRoot uint32
}

var _ Key = (*FieldIDSequenceKey)(nil)

type ReplicatorKey struct {
ReplicatorID string
Expand Down Expand Up @@ -364,8 +384,12 @@ func NewSchemaRootKeyFromString(keyString string) (SchemaRootKey, error) {
}, nil
}

func NewSequenceKey(name string) SequenceKey {
return SequenceKey{SequenceName: name}
func NewIndexIDSequenceKey(collectionID uint32) IndexIDSequenceKey {
return IndexIDSequenceKey{CollectionID: collectionID}
}

func NewFieldIDSequenceKey(collectionRoot uint32) FieldIDSequenceKey {
return FieldIDSequenceKey{CollectionRoot: collectionRoot}
}

func (k DataStoreKey) WithValueFlag() DataStoreKey {
Expand Down Expand Up @@ -690,21 +714,39 @@ func (k SchemaRootKey) ToDS() ds.Key {
return ds.NewKey(k.ToString())
}

func (k SequenceKey) ToString() string {
result := SEQ
func (k CollectionIDSequenceKey) ToString() string {
return COLLECTION_SEQ
}

if k.SequenceName != "" {
result = result + "/" + k.SequenceName
}
func (k CollectionIDSequenceKey) Bytes() []byte {
return []byte(k.ToString())
}

return result
func (k CollectionIDSequenceKey) ToDS() ds.Key {
return ds.NewKey(k.ToString())
}

func (k IndexIDSequenceKey) ToString() string {
return INDEX_ID_SEQ + "/" + strconv.Itoa(int(k.CollectionID))
}

func (k IndexIDSequenceKey) Bytes() []byte {
return []byte(k.ToString())
}

func (k IndexIDSequenceKey) ToDS() ds.Key {
return ds.NewKey(k.ToString())
}

func (k FieldIDSequenceKey) ToString() string {
return FIELD_ID_SEQ + "/" + strconv.Itoa(int(k.CollectionRoot))
}

func (k SequenceKey) Bytes() []byte {
func (k FieldIDSequenceKey) Bytes() []byte {
return []byte(k.ToString())
}

func (k SequenceKey) ToDS() ds.Key {
func (k FieldIDSequenceKey) ToDS() ds.Key {
return ds.NewKey(k.ToString())
}

Expand Down
79 changes: 58 additions & 21 deletions db/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,20 @@ func (db *db) createCollection(
}
}

colSeq, err := db.getSequence(ctx, txn, core.COLLECTION)
colSeq, err := db.getSequence(ctx, txn, core.CollectionIDSequenceKey{})
if err != nil {
return nil, err
}
colID, err := colSeq.next(ctx, txn)
if err != nil {
return nil, err
}

fieldSeq, err := db.getSequence(ctx, txn, core.NewFieldIDSequenceKey(uint32(colID)))
if err != nil {
return nil, err
}

desc.ID = uint32(colID)
desc.RootID = desc.ID

Expand All @@ -123,14 +129,25 @@ func (db *db) createCollection(
return nil, err
}
desc.SchemaVersionID = schema.VersionID
for i, globalField := range schema.Fields {
for _, globalField := range schema.Fields {
var fieldID uint64
if globalField.Name == request.DocIDFieldName {
// There is no hard technical requirement for this, we just think it looks nicer
// if the doc id is at the zero index. It makes it look a little nicer in commit
// queries too.
fieldID = 0
} else {
fieldID, err = fieldSeq.next(ctx, txn)
if err != nil {
return nil, err
}
}

desc.Fields = append(
desc.Fields,
client.CollectionFieldDescription{
Name: globalField.Name,
// For now just set the field id to it's index. This does not work
// for branching schema and field deletion.
ID: client.FieldID(i),
ID: client.FieldID(fieldID),
},
)
}
Expand Down Expand Up @@ -218,7 +235,7 @@ func (db *db) updateSchema(
return err
}

colSeq, err := db.getSequence(ctx, txn, core.COLLECTION)
colSeq, err := db.getSequence(ctx, txn, core.CollectionIDSequenceKey{})
if err != nil {
return err
}
Expand Down Expand Up @@ -247,14 +264,32 @@ func (db *db) updateSchema(
if existingCol.RootID == client.OrphanRootID {
existingCol.RootID = col.RootID
}
for i, globalField := range schema.Fields {

fieldSeq, err := db.getSequence(ctx, txn, core.NewFieldIDSequenceKey(existingCol.RootID))
if err != nil {
return err
}

for _, globalField := range schema.Fields {
var fieldID client.FieldID
// We must check the source collection if the field already exists, and take its ID
// from there, otherwise the field must be generated by the sequence.
existingField, ok := col.GetFieldByName(globalField.Name)
if ok {
fieldID = existingField.ID
} else {
nextFieldID, err := fieldSeq.next(ctx, txn)
if err != nil {
return err
}
fieldID = client.FieldID(nextFieldID)
}

existingCol.Fields = append(
existingCol.Fields,
client.CollectionFieldDescription{
Name: globalField.Name,
// For now just set the field id to it's index. This does not work
// for branching schema and field deletion.
ID: client.FieldID(i),
ID: fieldID,
},
)
}
Expand All @@ -274,6 +309,11 @@ func (db *db) updateSchema(
return err
}

fieldSeq, err := db.getSequence(ctx, txn, core.NewFieldIDSequenceKey(col.RootID))
if err != nil {
return err
}

// Create any new collections without a name (inactive), if [setAsActiveVersion] is true
// they will be activated later along with any existing collection versions.
col.Name = immutable.None[string]()
Expand All @@ -286,22 +326,19 @@ func (db *db) updateSchema(
},
}

for i, globalField := range schema.Fields {
isNew := true
for _, localField := range col.Fields {
if localField.Name == globalField.Name {
isNew = false
break
for _, globalField := range schema.Fields {
_, exists := col.GetFieldByName(globalField.Name)
if !exists {
fieldID, err := fieldSeq.next(ctx, txn)
if err != nil {
return err
}
}
if isNew {

col.Fields = append(
col.Fields,
client.CollectionFieldDescription{
Name: globalField.Name,
// For now just set the field id to it's index. This does not work
// for branching schema and field deletion.
ID: client.FieldID(i),
ID: client.FieldID(fieldID),
},
)
}
Expand Down
6 changes: 5 additions & 1 deletion db/collection_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,11 @@ func (c *collection) createIndex(
return nil, err
}

colSeq, err := c.db.getSequence(ctx, txn, fmt.Sprintf("%s/%d", core.COLLECTION_INDEX, c.ID()))
colSeq, err := c.db.getSequence(
ctx,
txn,
core.NewIndexIDSequenceKey(c.ID()),
)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ func (db *db) initialize(ctx context.Context) error {

// init meta data
// collection sequence
_, err = db.getSequence(ctx, txn, core.COLLECTION)
_, err = db.getSequence(ctx, txn, core.CollectionIDSequenceKey{})
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion db/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ func TestCreateIndex_WithMultipleCollectionsAndIndexes_AssignIncrementedIDPerCol
desc, err := f.createCollectionIndexFor(col.Name().Value(), makeIndex(fieldName))
require.NoError(t, err)
assert.Equal(t, expectedID, desc.ID)
seqKey := core.NewSequenceKey(fmt.Sprintf("%s/%d", core.COLLECTION_INDEX, col.ID()))
seqKey := core.NewIndexIDSequenceKey(col.ID())
storedSeqKey, err := f.txn.Systemstore().Get(f.ctx, seqKey.ToDS())
assert.NoError(t, err)
storedSeqVal := binary.BigEndian.Uint64(storedSeqKey)
Expand Down
3 changes: 1 addition & 2 deletions db/indexed_docs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"testing"

ipfsDatastore "github.com/ipfs/go-datastore"
Expand Down Expand Up @@ -261,7 +260,7 @@ func (f *indexTestFixture) stubSystemStore(systemStoreOn *mocks.DSReaderWriter_E
systemStoreOn.Get(mock.Anything, colIndexOnNameKey.ToDS()).Maybe().Return(indexOnNameDescData, nil)

if f.users != nil {
sequenceKey := core.NewSequenceKey(fmt.Sprintf("%s/%d", core.COLLECTION_INDEX, f.users.ID()))
sequenceKey := core.NewIndexIDSequenceKey(f.users.ID())
systemStoreOn.Get(mock.Anything, sequenceKey.ToDS()).Maybe().Return([]byte{0, 0, 0, 0, 0, 0, 0, 1}, nil)
}

Expand Down
2 changes: 1 addition & 1 deletion db/lens.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (db *db) setMigration(ctx context.Context, txn datastore.Txn, cfg client.Le
return err
}

colSeq, err := db.getSequence(ctx, txn, core.COLLECTION)
colSeq, err := db.getSequence(ctx, txn, core.CollectionIDSequenceKey{})
if err != nil {
return err
}
Expand Down
10 changes: 3 additions & 7 deletions db/sequence.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,13 @@ import (
)

type sequence struct {
key core.SequenceKey
key core.Key
val uint64
}

func (db *db) getSequence(ctx context.Context, txn datastore.Txn, key string) (*sequence, error) {
if key == "" {
return nil, ErrKeyEmpty
}
seqKey := core.NewSequenceKey(key)
func (db *db) getSequence(ctx context.Context, txn datastore.Txn, key core.Key) (*sequence, error) {
seq := &sequence{
key: seqKey,
key: key,
val: uint64(0),
}

Expand Down
3 changes: 3 additions & 0 deletions docs/data_format_changes/i2333-field-id-seq.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Generate field ids using a sequence

The index and collection id sequences were also moved (for consistency).

0 comments on commit 9248cbc

Please sign in to comment.