Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Generate field ids using a sequence #2339

Merged
merged 8 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 56 additions & 14 deletions core/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ const (
COLLECTION_INDEX = "/collection/index"
SCHEMA_VERSION = "/schema/version/v"
SCHEMA_VERSION_ROOT = "/schema/version/r"
SEQ = "/seq"
COLLECTION_SEQ = "/seq/collection"
INDEX_ID_SEQ = "/seq/index"
FIELD_ID_SEQ = "/seq/field"
PRIMARY_KEY = "/pk"
DATASTORE_DOC_VERSION_FIELD_ID = "v"
REPLICATOR = "/replicator/id"
Expand Down Expand Up @@ -165,11 +167,29 @@ type P2PCollectionKey struct {

var _ Key = (*P2PCollectionKey)(nil)

type SequenceKey struct {
SequenceName string
// CollectionIDSequenceKey is used to key the sequence used to generate collection ids.
type CollectionIDSequenceKey struct{}

var _ Key = (*CollectionIDSequenceKey)(nil)

// IndexIDSequenceKey is used to key the sequence used to generate index ids.
//
// The sequence is specific to each collection version.
type IndexIDSequenceKey struct {
CollectionID uint32
}

var _ Key = (*SequenceKey)(nil)
var _ Key = (*IndexIDSequenceKey)(nil)

// FieldIDSequenceKey is used to key the sequence used to generate field ids.
//
// The sequence is specific to each collection root. Multiple collection of the same root
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: Can you remind me what the root is? Do you mean the root schema?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nevermind. I saw the answer bellow 😅

// must maintain consistent field ids.
type FieldIDSequenceKey struct {
CollectionRoot uint32
}

var _ Key = (*FieldIDSequenceKey)(nil)

type ReplicatorKey struct {
ReplicatorID string
Expand Down Expand Up @@ -364,8 +384,12 @@ func NewSchemaRootKeyFromString(keyString string) (SchemaRootKey, error) {
}, nil
}

func NewSequenceKey(name string) SequenceKey {
return SequenceKey{SequenceName: name}
func NewIndexIDSequenceKey(collectionID uint32) IndexIDSequenceKey {
return IndexIDSequenceKey{CollectionID: collectionID}
}

func NewFieldIDSequenceKey(collectionRoot uint32) FieldIDSequenceKey {
return FieldIDSequenceKey{CollectionRoot: collectionRoot}
}

func (k DataStoreKey) WithValueFlag() DataStoreKey {
Expand Down Expand Up @@ -690,21 +714,39 @@ func (k SchemaRootKey) ToDS() ds.Key {
return ds.NewKey(k.ToString())
}

func (k SequenceKey) ToString() string {
result := SEQ
func (k CollectionIDSequenceKey) ToString() string {
return COLLECTION_SEQ
}

if k.SequenceName != "" {
result = result + "/" + k.SequenceName
}
func (k CollectionIDSequenceKey) Bytes() []byte {
return []byte(k.ToString())
}

return result
func (k CollectionIDSequenceKey) ToDS() ds.Key {
return ds.NewKey(k.ToString())
}

func (k IndexIDSequenceKey) ToString() string {
return INDEX_ID_SEQ + "/" + strconv.Itoa(int(k.CollectionID))
}

func (k IndexIDSequenceKey) Bytes() []byte {
return []byte(k.ToString())
}

func (k IndexIDSequenceKey) ToDS() ds.Key {
return ds.NewKey(k.ToString())
}

func (k FieldIDSequenceKey) ToString() string {
return FIELD_ID_SEQ + "/" + strconv.Itoa(int(k.CollectionRoot))
}

func (k SequenceKey) Bytes() []byte {
func (k FieldIDSequenceKey) Bytes() []byte {
return []byte(k.ToString())
}

func (k SequenceKey) ToDS() ds.Key {
func (k FieldIDSequenceKey) ToDS() ds.Key {
return ds.NewKey(k.ToString())
}

Expand Down
79 changes: 58 additions & 21 deletions db/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,20 @@ func (db *db) createCollection(
}
}

colSeq, err := db.getSequence(ctx, txn, core.COLLECTION)
colSeq, err := db.getSequence(ctx, txn, core.CollectionIDSequenceKey{})
if err != nil {
return nil, err
}
colID, err := colSeq.next(ctx, txn)
if err != nil {
return nil, err
}

fieldSeq, err := db.getSequence(ctx, txn, core.NewFieldIDSequenceKey(uint32(colID)))
if err != nil {
return nil, err
}

desc.ID = uint32(colID)
desc.RootID = desc.ID

Expand All @@ -123,14 +129,25 @@ func (db *db) createCollection(
return nil, err
}
desc.SchemaVersionID = schema.VersionID
for i, globalField := range schema.Fields {
for _, globalField := range schema.Fields {
var fieldID uint64
if globalField.Name == request.DocIDFieldName {
// There is no hard technical requirement for this, we just think it looks nicer
// if the doc id is at the zero index. It makes it look a little nicer in commit
// queries too.
fieldID = 0
} else {
fieldID, err = fieldSeq.next(ctx, txn)
if err != nil {
return nil, err
}
}

desc.Fields = append(
desc.Fields,
client.CollectionFieldDescription{
Name: globalField.Name,
// For now just set the field id to it's index. This does not work
// for branching schema and field deletion.
ID: client.FieldID(i),
ID: client.FieldID(fieldID),
},
)
}
Expand Down Expand Up @@ -218,7 +235,7 @@ func (db *db) updateSchema(
return err
}

colSeq, err := db.getSequence(ctx, txn, core.COLLECTION)
colSeq, err := db.getSequence(ctx, txn, core.CollectionIDSequenceKey{})
if err != nil {
return err
}
Expand Down Expand Up @@ -247,14 +264,32 @@ func (db *db) updateSchema(
if existingCol.RootID == client.OrphanRootID {
existingCol.RootID = col.RootID
}
for i, globalField := range schema.Fields {

fieldSeq, err := db.getSequence(ctx, txn, core.NewFieldIDSequenceKey(existingCol.RootID))
if err != nil {
return err
}

for _, globalField := range schema.Fields {
var fieldID client.FieldID
// We must check the source collection if the field already exists, and take its ID
// from there, otherwise the field must be generated by the sequence.
existingField, ok := col.GetFieldByName(globalField.Name)
if ok {
fieldID = existingField.ID
} else {
nextFieldID, err := fieldSeq.next(ctx, txn)
if err != nil {
return err
}
fieldID = client.FieldID(nextFieldID)
}

existingCol.Fields = append(
existingCol.Fields,
client.CollectionFieldDescription{
Name: globalField.Name,
// For now just set the field id to it's index. This does not work
// for branching schema and field deletion.
ID: client.FieldID(i),
ID: fieldID,
},
)
}
Expand All @@ -274,6 +309,11 @@ func (db *db) updateSchema(
return err
}

fieldSeq, err := db.getSequence(ctx, txn, core.NewFieldIDSequenceKey(col.RootID))
if err != nil {
return err
}

// Create any new collections without a name (inactive), if [setAsActiveVersion] is true
// they will be activated later along with any existing collection versions.
col.Name = immutable.None[string]()
Expand All @@ -286,22 +326,19 @@ func (db *db) updateSchema(
},
}

for i, globalField := range schema.Fields {
isNew := true
for _, localField := range col.Fields {
if localField.Name == globalField.Name {
isNew = false
break
for _, globalField := range schema.Fields {
_, exists := col.GetFieldByName(globalField.Name)
if !exists {
fieldID, err := fieldSeq.next(ctx, txn)
if err != nil {
return err
}
}
if isNew {

col.Fields = append(
col.Fields,
client.CollectionFieldDescription{
Name: globalField.Name,
// For now just set the field id to it's index. This does not work
// for branching schema and field deletion.
ID: client.FieldID(i),
ID: client.FieldID(fieldID),
},
)
}
Expand Down
6 changes: 5 additions & 1 deletion db/collection_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,11 @@ func (c *collection) createIndex(
return nil, err
}

colSeq, err := c.db.getSequence(ctx, txn, fmt.Sprintf("%s/%d", core.COLLECTION_INDEX, c.ID()))
colSeq, err := c.db.getSequence(
ctx,
txn,
core.NewIndexIDSequenceKey(c.ID()),
)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ func (db *db) initialize(ctx context.Context) error {

// init meta data
// collection sequence
_, err = db.getSequence(ctx, txn, core.COLLECTION)
_, err = db.getSequence(ctx, txn, core.CollectionIDSequenceKey{})
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion db/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ func TestCreateIndex_WithMultipleCollectionsAndIndexes_AssignIncrementedIDPerCol
desc, err := f.createCollectionIndexFor(col.Name().Value(), makeIndex(fieldName))
require.NoError(t, err)
assert.Equal(t, expectedID, desc.ID)
seqKey := core.NewSequenceKey(fmt.Sprintf("%s/%d", core.COLLECTION_INDEX, col.ID()))
seqKey := core.NewIndexIDSequenceKey(col.ID())
storedSeqKey, err := f.txn.Systemstore().Get(f.ctx, seqKey.ToDS())
assert.NoError(t, err)
storedSeqVal := binary.BigEndian.Uint64(storedSeqKey)
Expand Down
3 changes: 1 addition & 2 deletions db/indexed_docs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"testing"

ipfsDatastore "github.com/ipfs/go-datastore"
Expand Down Expand Up @@ -261,7 +260,7 @@ func (f *indexTestFixture) stubSystemStore(systemStoreOn *mocks.DSReaderWriter_E
systemStoreOn.Get(mock.Anything, colIndexOnNameKey.ToDS()).Maybe().Return(indexOnNameDescData, nil)

if f.users != nil {
sequenceKey := core.NewSequenceKey(fmt.Sprintf("%s/%d", core.COLLECTION_INDEX, f.users.ID()))
sequenceKey := core.NewIndexIDSequenceKey(f.users.ID())
systemStoreOn.Get(mock.Anything, sequenceKey.ToDS()).Maybe().Return([]byte{0, 0, 0, 0, 0, 0, 0, 1}, nil)
}

Expand Down
2 changes: 1 addition & 1 deletion db/lens.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (db *db) setMigration(ctx context.Context, txn datastore.Txn, cfg client.Le
return err
}

colSeq, err := db.getSequence(ctx, txn, core.COLLECTION)
colSeq, err := db.getSequence(ctx, txn, core.CollectionIDSequenceKey{})
if err != nil {
return err
}
Expand Down
10 changes: 3 additions & 7 deletions db/sequence.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,13 @@ import (
)

type sequence struct {
key core.SequenceKey
key core.Key
val uint64
}

func (db *db) getSequence(ctx context.Context, txn datastore.Txn, key string) (*sequence, error) {
if key == "" {
return nil, ErrKeyEmpty
}
seqKey := core.NewSequenceKey(key)
func (db *db) getSequence(ctx context.Context, txn datastore.Txn, key core.Key) (*sequence, error) {
seq := &sequence{
key: seqKey,
key: key,
val: uint64(0),
}

Expand Down
3 changes: 3 additions & 0 deletions docs/data_format_changes/i2333-field-id-seq.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Generate field ids using a sequence

The index and collection id sequences were also moved (for consistency).
Loading