Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Virtualize WAL methods #53

Merged
merged 16 commits into from
Dec 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions doc/libsql_extensions.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,168 @@ CREATE FUNCTION your_function LANGUAGE wasm AS <source-code>
, where `<source-code>` is either a binary .wasm blob or text presented in WebAssembly Text format.

See an example in `CREATE FUNCTION` paragraph above.

## Virtual WAL

Write-ahead log is a journaling mode which enables nice write concurrency characteristics - it not only allows a single writer to run in parallel with readers, but also makes `BEGIN CONCURRENT` transactions with optimistic locking possible. In SQLite, WAL is not a virtual interface, it only has a single file-based implementation, with an additional WAL index kept in shared memory (in form of another mapped file). In libSQL, akin to VFS, it's possible to override WAL routines with custom code. That allows implementing pluggable backends for write-ahead log, which opens many possibilities (again, similar to the VFS mechanism).

### API

In order to register a new set of virtual WAL methods, these methods need to be implemented. This is the current API:
```c
typedef struct libsql_wal_methods {
int iVersion; /* Current version is 1, versioning is here for backward compatibility *.
/* Open and close a connection to a write-ahead log. */
int (*xOpen)(sqlite3_vfs*, sqlite3_file* , const char*, int no_shm_mode, i64 max_size, struct libsql_wal_methods*, Wal**);
int (*xClose)(Wal*, sqlite3* db, int sync_flags, int nBuf, u8 *zBuf);

/* Set the limiting size of a WAL file. */
void (*xLimit)(Wal*, i64 limit);

/* Used by readers to open (lock) and close (unlock) a snapshot. A
** snapshot is like a read-transaction. It is the state of the database
** at an instant in time. sqlite3WalOpenSnapshot gets a read lock and
** preserves the current state even if the other threads or processes
** write to or checkpoint the WAL. sqlite3WalCloseSnapshot() closes the
** transaction and releases the lock.
*/
int (*xBeginReadTransaction)(Wal *, int *);
void (*xEndReadTransaction)(Wal *);

/* Read a page from the write-ahead log, if it is present. */
int (*xFindFrame)(Wal *, Pgno, u32 *);
int (*xReadFrame)(Wal *, u32, int, u8 *);

/* If the WAL is not empty, return the size of the database. */
Pgno (*xDbsize)(Wal *pWal);

/* Obtain or release the WRITER lock. */
int (*xBeginWriteTransaction)(Wal *pWal);
int (*xEndWriteTransaction)(Wal *pWal);

/* Undo any frames written (but not committed) to the log */
int (*xUndo)(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx);

/* Return an integer that records the current (uncommitted) write
** position in the WAL */
void (*xSavepoint)(Wal *pWal, u32 *aWalData);

/* Move the write position of the WAL back to iFrame. Called in
** response to a ROLLBACK TO command. */
int (*xSavepointUndo)(Wal *pWal, u32 *aWalData);

/* Write a frame or frames to the log. */
int (*xFrames)(Wal *pWal, int, PgHdr *, Pgno, int, int);

/* Copy pages from the log to the database file */
int (*xCheckpoint)(
Wal *pWal, /* Write-ahead log connection */
sqlite3 *db, /* Check this handle's interrupt flag */
int eMode, /* One of PASSIVE, FULL and RESTART */
int (*xBusy)(void*), /* Function to call when busy */
void *pBusyArg, /* Context argument for xBusyHandler */
int sync_flags, /* Flags to sync db file with (or 0) */
int nBuf, /* Size of buffer nBuf */
u8 *zBuf, /* Temporary buffer to use */
int *pnLog, /* OUT: Number of frames in WAL */
int *pnCkpt /* OUT: Number of backfilled frames in WAL */
);

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since
** sqlite3WalCallback() was called. If no commits have occurred since
** the last call, then return 0.
*/
int (*xCallback)(Wal *pWal);

/* Tell the wal layer that an EXCLUSIVE lock has been obtained (or released)
** by the pager layer on the database file.
*/
int (*xExclusiveMode)(Wal *pWal, int op);

/* Return true if the argument is non-NULL and the WAL module is using
** heap-memory for the wal-index. Otherwise, if the argument is NULL or the
** WAL module is using shared-memory, return false.
*/
int (*xHeapMemory)(Wal *pWal);

// Only needed with SQLITE_ENABLE_SNAPSHOT, but part of the ABI
int (*xSnapshotGet)(Wal *pWal, sqlite3_snapshot **ppSnapshot);
void (*xSnapshotOpen)(Wal *pWal, sqlite3_snapshot *pSnapshot);
int (*xSnapshotRecover)(Wal *pWal);
int (*xSnapshotCheck)(Wal *pWal, sqlite3_snapshot *pSnapshot);
void (*xSnapshotUnlock)(Wal *pWal);

// Only needed with SQLITE_ENABLE_ZIPVFS, but part of the ABI
/* If the WAL file is not empty, return the number of bytes of content
** stored in each frame (i.e. the db page-size when the WAL was created).
*/
int (*xFramesize)(Wal *pWal);


/* Return the sqlite3_file object for the WAL file */
sqlite3_file *(*xFile)(Wal *pWal);

// Only needed with SQLITE_ENABLE_SETLK_TIMEOUT
int (*xWriteLock)(Wal *pWal, int bLock);

void (*xDb)(Wal *pWal, sqlite3 *db);

/* Return the WAL pathname length based on the owning pager's pathname len.
** For WAL implementations not based on a single file, 0 should be returned. */
int (*xPathnameLen)(int origPathname);

/* Get the WAL pathname to given buffer. Assumes that the buffer can hold
** at least xPathnameLen bytes. For WAL implementations not based on a single file,
** this operation can safely be a no-op.
** */
void (*xGetWalPathname)(char *buf, const char *orig, int orig_len);

/*
** This optional callback gets called before the main database file which owns
** the WAL file is open. It is a good place for initialization routines, as WAL
** is otherwise open lazily.
*/
int (*xPreMainDbOpen)(libsql_wal_methods *methods, const char *main_db_path);

/* True if the implementation relies on shared memory routines (e.g. locks) */
int bUsesShm;

const char *zName;
struct libsql_wal_methods *pNext;
} libsql_wal_methods;
```

### Registering WAL methods

After the implementation is ready, the following public functions can be used
to manage it:
```c
libsql_wal_methods_find
libsql_wal_methods_register
libsql_wal_methods_unregister
```
, and they are quite self-descriptive. They also work similarly to their `sqlite3_vfs*` counterparts, which they were modeled after.

### Using WAL methods

Custom WAL methods need to be declared when opening a new database connection.
That can be achieved either programatically by using a new flavor of the `sqlite3_open*` function:
```c
int libsql_open(
const char *filename, /* Database filename (UTF-8) */
sqlite3 **ppDb, /* OUT: SQLite db handle */
int flags, /* Flags */
const char *zVfs, /* Name of VFS module to use, NULL for default */
const char *zWal /* Name of WAL module to use, NULL for default */
)
```

... or via URI, by using a new `wal` parameter:
```
.open file:test.db?wal=my_impl_of_wal_methods
```

### Example

An example implementation can be browsed in the Rust test suite, at `test/rust_suite/src/virtual_wal.rs`
159 changes: 159 additions & 0 deletions ext/vwal/vwal.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#include "sqliteInt.h"
#include "wal.h"

/*
** This file contains a stub for implementing one's own WAL routines.
** Registering a new set of WAL methods can be done through
** libsql_wal_methods_register(). Later, a registered set can
** be used by passing its name as a parameter to libsql_open().
*/

extern int libsql_wal_methods_register(libsql_wal_methods*);

static int v_open(sqlite3_vfs *pVfs, sqlite3_file *pDbFd, const char *zWalName, int bNoShm, i64 mxWalSize, libsql_wal_methods *pMethods, Wal **ppWal) {
//TODO: implement
return SQLITE_MISUSE;
}

static int v_close(Wal *wal, sqlite3 *db, int sync_flags, int nBuf, u8 *zBuf) {
//TODO: implement
return SQLITE_MISUSE;
}

static void v_limit(Wal *wal, i64 limit) {
//TODO: implement
}

static int v_begin_read_transaction(Wal *wal, int *) {
//TODO: implement
return SQLITE_MISUSE;
}

static void v_end_read_transaction(Wal *wal) {
//TODO: implement
}

static int v_find_frame(Wal *wal, Pgno pgno, u32 *frame) {
//TODO: implement
return SQLITE_MISUSE;
}

static int v_read_frame(Wal *wal, u32 frame, int nOut, u8 *pOut) {
//TODO: implement
return SQLITE_MISUSE;
}

static Pgno v_dbsize(Wal *wal) {
//TODO: implement
return 0;
}

static int v_begin_write_transaction(Wal *wal) {
//TODO: implement
return SQLITE_MISUSE;
}

static int v_end_write_transaction(Wal *wal) {
//TODO: implement
return SQLITE_MISUSE;
}

static int v_undo(Wal *wal, int (*xUndo)(void *, Pgno), void *pUndoCtx) {
//TODO: implement
return SQLITE_MISUSE;
}

static void v_savepoint(Wal *wal, u32 *wal_data) {
//TODO: implement
}

static int v_savepoint_undo(Wal *wal, u32 *wal_data) {
//TODO: implement
return SQLITE_MISUSE;
}

static int v_frames(Wal *pWal, int szPage, PgHdr *pList, Pgno nTruncate, int isCommit, int sync_flags) {
//TODO: implement
return SQLITE_MISUSE;
}

static int v_checkpoint(Wal *wal, sqlite3 *db, int eMode, int (xBusy)(void *), void *pBusyArg, int sync_flags, int nBuf, u8 *zBuf, int *pnLog, int *pnCkpt) {
//TODO: implement
return SQLITE_MISUSE;
}

static int v_callback(Wal *wal) {
//TODO: implement
return SQLITE_MISUSE;
}

static int v_exclusive_mode(Wal *wal, int op) {
//TODO: implement
return SQLITE_MISUSE;;
}

static int v_heap_memory(Wal *wal) {
//TODO: implement
return SQLITE_MISUSE;
}

static sqlite3_file *v_file(Wal *wal) {
//TODO: implement
return NULL;
}

static void v_db(Wal *wal, sqlite3 *db) {
//TODO: implement
}

static int v_pathname_len(int n) {
return 0;
}

static void v_get_wal_pathname(char *buf, const char *orig, int orig_len) {
}

__attribute__((__visibility__("default")))
void libsql_register_vwal() {
static libsql_wal_methods methods = {
.iVersion = 1,
.xOpen = v_open,
.xClose = v_close,
.xLimit = v_limit,
.xBeginReadTransaction = v_begin_read_transaction,
.xEndReadTransaction = v_end_read_transaction,
.xFindFrame = v_find_frame,
.xReadFrame = v_read_frame,
.xDbsize = v_dbsize,
.xBeginWriteTransaction = v_begin_write_transaction,
.xEndWriteTransaction = v_end_write_transaction,
.xUndo = v_undo,
.xSavepoint = v_savepoint,
.xSavepointUndo = v_savepoint_undo,
.xFrames = v_frames,
.xCheckpoint = v_checkpoint,
.xCallback = v_callback,
.xExclusiveMode = v_exclusive_mode,
.xHeapMemory = v_heap_memory,
#ifdef SQLITE_ENABLE_SNAPSHOT
.xSnapshotGet = NULL,
.xSnapshotOpen = NULL,
.xSnapshotRecover = NULL,
.xSnapshotCheck = NULL,
.xSnapshotUnlock = NULL,
#endif
#ifdef SQLITE_ENABLE_ZIPVFS
.xFramesize = NULL,
#endif
.xFile = v_file,
#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
.xWriteLock = NULL,
#endif
.xDb = v_db,
.xPathnameLen = v_pathname_len,
.xGetWalPathname = v_get_wal_pathname,
.xPreMainDbOpen = NULL,
.zName = "vwal"
};
libsql_wal_methods_register(&methods);
}
9 changes: 6 additions & 3 deletions src/attach.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
** This file contains code used to implement the ATTACH and DETACH commands.
*/
#include "sqliteInt.h"
#include "wal.h"

#ifndef SQLITE_OMIT_ATTACH
/*
Expand Down Expand Up @@ -88,6 +89,7 @@ static void attachFunc(
Db *pNew; /* Db object for the newly attached database */
char *zErrDyn = 0;
sqlite3_vfs *pVfs;
libsql_wal_methods *pWal;

UNUSED_PARAMETER(NotUsed);
zFile = (const char *)sqlite3_value_text(argv[0]);
Expand All @@ -106,12 +108,13 @@ static void attachFunc(
** from sqlite3_deserialize() to close database db->init.iDb and
** reopen it as a MemDB */
pVfs = sqlite3_vfs_find("memdb");
pWal = libsql_wal_methods_find(NULL);
if( pVfs==0 ) return;
pNew = &db->aDb[db->init.iDb];
if( pNew->pBt ) sqlite3BtreeClose(pNew->pBt);
pNew->pBt = 0;
pNew->pSchema = 0;
rc = sqlite3BtreeOpen(pVfs, "x\0", db, &pNew->pBt, 0, SQLITE_OPEN_MAIN_DB);
rc = sqlite3BtreeOpen(pVfs, pWal, "x\0", db, &pNew->pBt, 0, SQLITE_OPEN_MAIN_DB);
}else{
/* This is a real ATTACH
**
Expand Down Expand Up @@ -155,7 +158,7 @@ static void attachFunc(
** or may not be initialized.
*/
flags = db->openFlags;
rc = sqlite3ParseUri(db->pVfs->zName, zFile, &flags, &pVfs, &zPath, &zErr);
rc = sqlite3ParseUri(db->pVfs->zName, db->pWalMethods->zName, zFile, &flags, &pVfs, &pWal, &zPath, &zErr);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_NOMEM ) sqlite3OomFault(db);
sqlite3_result_error(context, zErr, -1);
Expand All @@ -164,7 +167,7 @@ static void attachFunc(
}
assert( pVfs );
flags |= SQLITE_OPEN_MAIN_DB;
rc = sqlite3BtreeOpen(pVfs, zPath, db, &pNew->pBt, 0, flags);
rc = sqlite3BtreeOpen(pVfs, pWal, zPath, db, &pNew->pBt, 0, flags);
db->nDb++;
pNew->zDbSName = sqlite3DbStrDup(db, zName);
}
Expand Down
Loading