powersync-ja · rkistner · Aug 27, 2024 · May 13, 2024 · May 13, 2024 · May 13, 2024
diff --git a/.env b/.env
diff --git a/.envrc b/.envrc
@@ -0,0 +1,3 @@
+layout node
+use node
+[ -f .env ] && dotenv
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -0,0 +1,31 @@
+# Ensures packages test correctly
+name: Test Packages
+
+on:
+  push:
+
+jobs:
+  test:
+    name: Test Packages
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+
+      - uses: pnpm/action-setup@v4
+        name: Install pnpm
+
+      - name: Setup NodeJS
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: '.node-version'
+
+      - name: Install dependencies
+        run: pnpm install
+
+      - name: Build
+        run: pnpm build
+
+      - name: Test
+        run: pnpm test
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
 node_modules/
 test-db/
 *.db
+lib/
+tsconfig.tsbuildinfo
+benchmarks/db
diff --git a/.node-version b/.node-version
@@ -0,0 +1 @@
+v22.5.1
diff --git a/.prettierignore b/.prettierignore
@@ -0,0 +1,3 @@
+node_modules/
+lib/
+pnpm-lock.yaml
diff --git a/.prettierrc b/.prettierrc
@@ -0,0 +1,6 @@
+{
+  "trailingComma": "none",
+  "tabWidth": 2,
+  "semi": true,
+  "singleQuote": true
+}
diff --git a/DRIVER-API.md b/DRIVER-API.md
@@ -0,0 +1,139 @@
+## Driver API
+
+The driver API aims to have a small surface area, with little performance overhead. Ease of use is not important.
+
+To support all potential implementations, the main APIs are asynchronous. This does add overhead, but this is unavoidable when our goal is to have a universal driver API. We do however aim to keep the performance overhead as low as possible.
+
+The driver API primarily exposes:
+
+1. Connection pooling. Even when using a single connection, that connection should be locked for exclusive use by one consumer at a time.
+2. Prepared statements. Even if the underlying implementation does not use actual prepared statements, the same APIs can be exposed.
+
+In general, the setup of prepared statements (preparing a statement, binding parameters) are synchronous APIs, and don't throw on invalid queries. Executing the statement is asynchronous, and this is where errors are thrown.
+
+The driver API does not include transaction management. This is easily implemented on top of connection pooling/locking + prepared statements for begin/commit/rollback.
+
+### The API
+
+This is a simplified version of the API. For full details, see:
+[packages/driver/src/driver-api.ts](packages/driver/src/driver-api.ts).
+
+```ts
+export interface SqliteDriverConnectionPool {
+  /**
+   * Reserve a connection for exclusive use.
+   *
+   * If there is no available connection, this will wait until one is available.
+   */
+  reserveConnection(
+    options?: ReserveConnectionOptions
+  ): Promise<ReservedConnection>;
+
+  close(): Promise<void>;
+
+  [Symbol.asyncDispose](): Promise<void>;
+}
+
+export interface ReservedConnection {
+  /** Direct handle to the underlying connection. */
+  connection: SqliteDriverConnection;
+
+  /** Proxied to the underlying connection */
+  prepare(sql: string, options?: PrepareOptions): SqliteDriverStatement;
+
+  [Symbol.asyncDispose](): Promise<void>;
+}
+
+export interface SqliteDriverConnection {
+  /**
+   * Prepare a statement.
+   *
+   * Does not return any errors.
+   */
+  prepare(sql: string, options?: PrepareOptions): SqliteDriverStatement;
+}
+
+/**
+ * Represents a single prepared statement.
+ * Loosely modeled on the SQLite API.
+ */
+export interface SqliteDriverStatement {
+  bind(parameters: SqliteParameterBinding): void;
+
+  step(n?: number, options?: StepOptions): Promise<SqliteStepResult>;
+  getColumns(): Promise<string[]>;
+  finalize(): void;
+
+  reset(options?: ResetOptions): void;
+
+  [Symbol.dispose](): void;
+}
+```
+
+## Design decisions
+
+### Small surface area
+
+We want the driver to have as small surface area as possible. In rare cases we do allow exceptions for performance or simplicity reasons.
+
+### Reusability
+
+The same driver connection pool should be usable by multiple different consumers within the same process. For example, the same connection pool can be used directly, by an ORM, and/or by a sync library, without running into concurrency issues. This specifically affects connection pooling (see below).
+
+### Synchronous vs asynchronous
+
+Many implementations can only support asynchronous methods. However, having _every_ method asynchronous can add significant overhead, if you need to chain multiple methods to run a single query. We therefore aim to have a single asynchronous call per query for most use cases. This does mean that we defer errors until that asynchronous call, and do not throw errors in `prepare()` or `bind()`.
+
+### Transactions
+
+Full transaction support requires a large surface area, with many design possibilities. For example, do we support nested transactions (savepoints in SQLite)? Do we expose immediate/defferred/exclusive transactions? Do we use a wrapper function, explicit resource management, or manual commit/rollback calls to manage transactions?
+
+Instead, the driver API just provides the building blocks for transactions - connection pooling and prepared statements.
+
+### Connection pooling
+
+The driver API requires a connection pooling implementation, even if there is only a single underlying connection. Even in that case, it is important that a connection can be "reserved" for a single consumer at a time. This is needed for example to implement transactions, without requiring additional locking mechanisms (which would break the reusability requirement).
+
+Connection pooling also supports specifically requesting a read-only vs read-write connection. This is important for concurrency in SQLite, which can only support a single writer at a time, but any number of concurrent readers.
+
+### Read vs write queries
+
+There is no fundamental distinction between read and write queries in the driver prepared statement API. This is important for use cases such as `INSERT INTO ... RETURNING *` - a "write" api that also returns data. However, read vs write locks are taken into account with connection pooling.
+
+### "run" with results
+
+The `run` API that returns the last insert row id and number of changes are primarily for compatibility with current libraries/APIs. Many libraries in use return that automatically for any "run" statement, and splitting that out into a separate prepared statement could add significant performance overhead (requiring two prepared statements for every single "write" query).
+
+### Row arrays vs objects
+
+Returning an array of cells for each row, along with a separate "columns" array, is more flexible than just using an object per row. It is always possible to convert the array to an object, given the columns header.
+
+However, many current SQLite bindings do not expose the raw array calls. Even if they do, this path may be slower than using objects from the start. Since using the results as an array is quite rare in practice, this is left as an optional configuration, rather than a requirement for the all queries.
+
+### Separate bind/step/reset
+
+This allows a lot of flexibility, for example partial rebinding of parameters instead of specifying all parameters each time a prepared statement is used. However, those type of use cases are rare, and this is not important in the overall architecture. These could all be combined into a single "query with parameters" call, but would need to take into account optional streaming of results.
+
+### bigint
+
+SQLite supports up to 8-byte signed integers (up to 2^64-1), while JavaScript's number is limited to 2^53-1. General approaches include:
+
+1. Always use JS numbers. This requires using TEXT for larger integers, but can still store as INTEGER and cast when inserting or returning results.
+2. Automatically switching to bigint if the number is `>= 2^53`. This can easily introduce issues in the client, since `bigint` an `number` are not interoperable.
+3. Require an explicit option to get `bigint` results. This is the approach we went for here.
+4. Always use `number` for `REAL`, and `bigint` for `INTEGER`. You can use `cast(n to REAL)` to get a value back as a `number`. Since many users will just use small integers, this may not be ideal.
+
+### Pipelining
+
+The APIs guarantee that statements on a connection will be ordered in the order that calls were made. This allows pipelining statements to improve performance - the client can issue many queries before waiting for the results. One place where this breaks down is within transactions: It is possible for one statement to trigger a transaction rollback, in which case the next pipelined statement will run outside the transaction.
+
+The current API includes a flag to indicate a statement may only be run within a transaction to work around this issue, but other suggestions are welcome.
+
+## Driver implementation helpers
+
+The driver package also includes helpers to assist in implementating drivers. These are optional, and not part of the driver spec. It does however make it simple to support:
+
+1. Connection pooling - the driver itself just needs to implement logic for a single connection, and the utilities will handle connection pooling.
+2. Worker threads - this can assist in spawing a separate worker thread per conneciton, to get true concurrency. The same approaches could work to support web workers in browsers in the future.
+
+Some drivers may use different approaches for concurrency and connection pooling, without using these utilities.
diff --git a/README.md b/README.md
@@ -0,0 +1,47 @@
+# sqlite-js
+
+Universal SQLite APIs for JavaScript.
+
+The project provides two primary APIs:
+
+1. The driver API. This aims to expose a minimum API for drivers to implement, while supporting a rich set of functionality. This should have as little as possible performance overhead, while still supporting asynchronous implementations.
+
+2. The end-user API. This is a library built on top of the driver API, that exposes higher-level functionality such as transactions, convenience methods, template strings (later), pipelining.
+
+## @sqlite-js/driver
+
+This is a universal driver API and utilities for implementing drivers.
+
+The APIs here are low-level. These are intended to be implemented by drivers, and used by higher-level libraries.
+
+See [DRIVER-API.md](./DRIVER-API.md) for details on the design.
+
+### @sqlite-js/driver/node
+
+This is a driver implementation for NodeJS based on the experimental `node:sqlite` package.
+
+## @sqlite-js/better-sqlite3-driver
+
+This is a driver implementation for NodeJS implementation based `better-sqlite3`.
+
+## @sqlite-js/api
+
+This contains a higher-level API, with simple methods to execute queries, and supports transactions and pipelining.
+
+This is largely a proof-of-concept to validate and test the underlying driver APIs, rather than having a fixed design.
+
+The current iteration of the APIs is visible at [packages/api/src/api.ts](packages/api/src/api.ts).
+
+# Why split the APIs?
+
+A previous iteration used a single API for both the end-user API and the driver API. This had serveral disadvantages:
+
+1. The implementation per driver requires a lot more effort.
+2. Iterating on the API becomes much more difficult.
+   1. Implementing minor quality-of-life improvements for the end user becomes a required change in every driver.
+3. Optimizing the end-user API for performance is difficult. To cover all the different use cases, it requires implementing many different features such as prepared statements, batching, pipelining. This becomes a very large API for drivers to implement.
+4. The goals for the end-user API is different from the driver API:
+   1. End-users want a rich but simple-to-use API to access the database.
+   2. Drivers want a small surface area, that doesn't change often.
+
+Splitting out a separate driver API, and implementing the end-user API as a separate library, avoids all the above issues.
diff --git a/benchmarks/package.json b/benchmarks/package.json
@@ -0,0 +1,21 @@
+{
+  "name": "benchmarks",
+  "type": "module",
+  "scripts": {
+    "build": "tsc -b",
+    "start": "NODE_OPTIONS=\"--experimental-sqlite --disable-warning=ExperimentalWarning\" node lib/index.js"
+  },
+  "dependencies": {
+    "better-sqlite3": "^11.0.0",
+    "prando": "^6.0.1",
+    "sqlite": "^5.1.1",
+    "sqlite3": "^5.1.7",
+    "@sqlite-js/driver": "workspace:^",
+    "@sqlite-js/better-sqlite3-driver": "workspace:^",
+    "@sqlite-js/api": "workspace:^"
+  },
+  "devDependencies": {
+    "@types/node": "^20.14.2",
+    "typescript": "^5.4.5"
+  }
+}
diff --git a/benchmarks/src/Benchmark.ts b/benchmarks/src/Benchmark.ts
@@ -0,0 +1,108 @@
+import { BenchmarkResults } from './BenchmarkResults.js';
+
+export abstract class Benchmark {
+  abstract name: string;
+
+  async runAll(): Promise<BenchmarkResults> {
+    let results = new BenchmarkResults(this.name);
+    let droppedFrames = 0;
+    let last = performance.now();
+    var timer = setInterval(() => {
+      const now = performance.now();
+      const diff = now - last;
+      last = now;
+      if (diff >= 16) {
+        droppedFrames += Math.floor(diff / 16);
+      }
+    }, 1);
+
+    await this.setUp();
+
+    await results.record('Test 1: 1000 INSERTs', this.test1.bind(this));
+    await results.record(
+      'Test 2: 25000 INSERTs in a transaction',
+      this.test2.bind(this)
+    );
+    await results.record(
+      'Test 3: 25000 INSERTs into an indexed table',
+      this.test3.bind(this)
+    );
+    await results.record(
+      'Test 4: 100 SELECTs without an index',
+      this.test4.bind(this)
+    );
+    await results.record(
+      'Test 5: 100 SELECTs on a string comparison',
+      this.test5.bind(this)
+    );
+    await results.record(
+      'Test 7: 5000 SELECTs with an index',
+      this.test7.bind(this)
+    );
+    await results.record(
+      'Test 8: 1000 UPDATEs without an index',
+      this.test8.bind(this)
+    );
+    await results.record(
+      'Test 9: 25000 UPDATEs with an index',
+      this.test9.bind(this)
+    );
+    await results.record(
+      'Test 10: 25000 text UPDATEs with an index',
+      this.test10.bind(this)
+    );
+    await results.record(
+      'Test 11: INSERTs from a SELECT',
+      this.test11.bind(this)
+    );
+    await results.record(
+      'Test 12: DELETE without an index',
+      this.test12.bind(this)
+    );
+    await results.record(
+      'Test 13: DELETE with an index',
+      this.test13.bind(this)
+    );
+    await results.record(
+      'Test 14: A big INSERT after a big DELETE',
+      this.test14.bind(this)
+    );
+    await results.record(
+      'Test 15: A big DELETE followed by many small INSERTs',
+      this.test15.bind(this)
+    );
+    await results.record('Test 16: Clear table', this.test16.bind(this));
+
+    await this.tearDown();
+
+    clearInterval(timer);
+
+    const diff = performance.now() - last;
+    if (diff >= 16) {
+      droppedFrames += Math.floor(diff / 16);
+    }
+
+    console.log(`Dropped frames: ${droppedFrames} (diff ${diff})`);
+    return results;
+  }
+
+  abstract setUp(): Promise<void>;
+
+  abstract test1(): Promise<void>;
+  abstract test2(): Promise<void>;
+  abstract test3(): Promise<void>;
+  abstract test4(): Promise<void>;
+  abstract test5(): Promise<void>;
+  abstract test7(): Promise<void>;
+  abstract test8(): Promise<void>;
+  abstract test9(): Promise<void>;
+  abstract test10(): Promise<void>;
+  abstract test11(): Promise<void>;
+  abstract test12(): Promise<void>;
+  abstract test13(): Promise<void>;
+  abstract test14(): Promise<void>;
+  abstract test15(): Promise<void>;
+  abstract test16(): Promise<void>;
+
+  abstract tearDown(): Promise<void>;
+}