From 0f0cdfb20367418b143cc9402be8e8c1fcd4d94a Mon Sep 17 00:00:00 2001 From: Nathan Disidore Date: Thu, 25 Jul 2024 10:37:43 -0500 Subject: [PATCH] Implement Vectorize GA binding changes --- .../python-vectorize-api-test.wd-test | 2 +- .../test/vectorize/vectorize-api-test.js | 2 +- .../test/vectorize/vectorize-api-test.wd-test | 2 +- .../internal/test/vectorize/vectorize-mock.js | 2 +- src/cloudflare/internal/vectorize-v2-api.ts | 187 ++++++++++++++++++ src/cloudflare/internal/vectorize.d.ts | 19 +- types/defines/vectorize.d.ts | 17 ++ 7 files changed, 226 insertions(+), 5 deletions(-) create mode 100644 src/cloudflare/internal/vectorize-v2-api.ts diff --git a/src/cloudflare/internal/test/vectorize/python-vectorize-api-test.wd-test b/src/cloudflare/internal/test/vectorize/python-vectorize-api-test.wd-test index 7afcdccbb292..98ff3bd2fe21 100644 --- a/src/cloudflare/internal/test/vectorize/python-vectorize-api-test.wd-test +++ b/src/cloudflare/internal/test/vectorize/python-vectorize-api-test.wd-test @@ -12,7 +12,7 @@ const unitTests :Workerd.Config = ( bindings = [ ( name = "vectorSearch", wrapped = ( - moduleName = "cloudflare-internal:vectorize-api", + moduleName = "cloudflare-internal:vectorize-v2-api", innerBindings = [( name = "fetcher", service = "vector-search-mock" diff --git a/src/cloudflare/internal/test/vectorize/vectorize-api-test.js b/src/cloudflare/internal/test/vectorize/vectorize-api-test.js index d7917f20585d..010dcb5c826e 100644 --- a/src/cloudflare/internal/test/vectorize/vectorize-api-test.js +++ b/src/cloudflare/internal/test/vectorize/vectorize-api-test.js @@ -26,7 +26,7 @@ export const test_vector_search_vector_query = { returnMetadata: "indexed", }); assert.equal(true, results.count > 0); - /** @type {VectorizeQueryMatches} */ + /** @type {VectorizeMatches} */ const expected = { matches: [ { diff --git a/src/cloudflare/internal/test/vectorize/vectorize-api-test.wd-test b/src/cloudflare/internal/test/vectorize/vectorize-api-test.wd-test index b714176bfa28..c5ecfe02a878 100644 --- a/src/cloudflare/internal/test/vectorize/vectorize-api-test.wd-test +++ b/src/cloudflare/internal/test/vectorize/vectorize-api-test.wd-test @@ -12,7 +12,7 @@ const unitTests :Workerd.Config = ( bindings = [ ( name = "vector-search", wrapped = ( - moduleName = "cloudflare-internal:vectorize-api", + moduleName = "cloudflare-internal:vectorize-v2-api", innerBindings = [( name = "fetcher", service = "vector-search-mock" diff --git a/src/cloudflare/internal/test/vectorize/vectorize-mock.js b/src/cloudflare/internal/test/vectorize/vectorize-mock.js index 04fc1c0c7ab3..ad4bf64b22dd 100644 --- a/src/cloudflare/internal/test/vectorize/vectorize-mock.js +++ b/src/cloudflare/internal/test/vectorize/vectorize-mock.js @@ -2,7 +2,7 @@ // Licensed under the Apache 2.0 license found in the LICENSE file or at: // https://opensource.org/licenses/Apache-2.0 -/** @type {Array} */ +/** @type {Array} */ const exampleVectorMatches = [ { id: "b0daca4a-ffd8-4865-926b-e24800af2a2d", diff --git a/src/cloudflare/internal/vectorize-v2-api.ts b/src/cloudflare/internal/vectorize-v2-api.ts new file mode 100644 index 000000000000..25ffca9de096 --- /dev/null +++ b/src/cloudflare/internal/vectorize-v2-api.ts @@ -0,0 +1,187 @@ +// Copyright (c) 2023 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +interface Fetcher { + fetch: typeof fetch; +} + +enum Operation { + INDEX_GET = 0, + VECTOR_QUERY = 1, + VECTOR_INSERT = 2, + VECTOR_UPSERT = 3, + VECTOR_GET = 4, + VECTOR_DELETE = 5, +} + +class VectorizeImpl implements Vectorize { + public constructor(private readonly fetcher: Fetcher) {} + + public async describe(): Promise { + const res = await this._send(Operation.INDEX_GET, `/info`, { + method: "GET", + }); + + return await toJson(res); + } + + public async query( + vector: VectorFloatArray | number[], + options: VectorizeQueryOptions + ): Promise { + const res = await this._send(Operation.VECTOR_QUERY, `/query`, { + method: "POST", + body: JSON.stringify({ + ...options, + vector: Array.isArray(vector) ? vector : Array.from(vector), + }), + headers: { + "content-type": "application/json", + accept: "application/json", + }, + }); + + return await toJson(res); + } + + public async insert( + vectors: VectorizeVector[] + ): Promise { + const res = await this._send(Operation.VECTOR_INSERT, `/insert`, { + method: "POST", + body: JSON.stringify({ + vectors: vectors.map((vec) => ({ + ...vec, + values: Array.isArray(vec.values) + ? vec.values + : Array.from(vec.values), + })), + }), + headers: { + "content-type": "application/json", + "cf-vector-search-dim-width": String( + vectors.length ? vectors[0]?.values?.length : 0 + ), + "cf-vector-search-dim-height": String(vectors.length), + accept: "application/json", + }, + }); + + return await toJson(res); + } + + public async upsert( + vectors: VectorizeVector[] + ): Promise { + const res = await this._send(Operation.VECTOR_UPSERT, `/upsert`, { + method: "POST", + body: JSON.stringify({ + vectors: vectors.map((vec) => ({ + ...vec, + values: Array.isArray(vec.values) + ? vec.values + : Array.from(vec.values), + })), + }), + headers: { + "content-type": "application/json", + "cf-vector-search-dim-width": String( + vectors.length ? vectors[0]?.values?.length : 0 + ), + "cf-vector-search-dim-height": String(vectors.length), + accept: "application/json", + }, + }); + + return await toJson(res); + } + + public async getByIds(ids: string[]): Promise { + const res = await this._send(Operation.VECTOR_GET, `/getByIds`, { + method: "POST", + body: JSON.stringify({ ids }), + headers: { + "content-type": "application/json", + accept: "application/json", + }, + }); + + return await toJson(res); + } + + public async deleteByIds(ids: string[]): Promise { + const res = await this._send(Operation.VECTOR_DELETE, `/deleteByIds`, { + method: "POST", + body: JSON.stringify({ ids }), + headers: { + "content-type": "application/json", + accept: "application/json", + }, + }); + + return await toJson(res); + } + + private async _send( + operation: Operation, + endpoint: string, + init: RequestInit + ): Promise { + const res = await this.fetcher.fetch( + `http://vector-search/${endpoint}`, // `http://vector-search` is just a dummy host, the attached fetcher will receive the request + init + ); + if (res.status !== 200) { + let err: Error | null = null; + + try { + const errResponse = (await res.json()) as VectorizeError; + err = new Error( + `${Operation[operation]}_ERROR${ + typeof errResponse.code === "number" + ? ` (code = ${errResponse.code})` + : "" + }: ${errResponse.error}`, + { + cause: new Error(errResponse.error), + } + ); + } catch {} + + if (err) { + throw err; + } else { + throw new Error( + `${Operation[operation]}_ERROR: Status + ${res.status}`, + { + cause: new Error(`Status ${res.status}`), + } + ); + } + } + + return res; + } +} + +const maxBodyLogChars = 1_000; +async function toJson(response: Response): Promise { + const body = await response.text(); + try { + return JSON.parse(body) as T; + } catch { + throw new Error( + `Failed to parse body as JSON, got: ${ + body.length > maxBodyLogChars + ? `${body.slice(0, maxBodyLogChars)}…` + : body + }` + ); + } +} + +export function makeBinding(env: { fetcher: Fetcher }): Vectorize { + return new VectorizeImpl(env.fetcher); +} + +export default makeBinding; diff --git a/src/cloudflare/internal/vectorize.d.ts b/src/cloudflare/internal/vectorize.d.ts index c9f6361ec6a7..c22a29bb051e 100644 --- a/src/cloudflare/internal/vectorize.d.ts +++ b/src/cloudflare/internal/vectorize.d.ts @@ -91,6 +91,9 @@ type VectorizeIndexConfig = /** * Metadata about an existing index. + * + * This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released. + * See {@link VectorizeIndexInfo} for its post-beta equivalent. */ interface VectorizeIndexDetails { /** The unique ID of the index */ @@ -105,6 +108,20 @@ interface VectorizeIndexDetails { vectorsCount: number; } +/** + * Metadata about an existing index. + */ +interface VectorizeIndexInfo { + /** The number of records containing vectors within the index. */ + vectorsCount: number; + /** Number of dimensions the index has been configured for. */ + dimensions: number; + /** ISO 8601 datetime of the last processed mutation on in the index. All changes before this mutation will be reflected in the index state. */ + processedUpToDatetime: number; + /** UUIDv4 of the last mutation processed by the index. All changes before this mutation will be reflected in the index state. */ + processedUpToMutation: number; +} + /** * Represents a single vector value set along with its associated metadata. */ @@ -217,7 +234,7 @@ declare abstract class Vectorize { * Get information about the currently bound index. * @returns A promise that resolves with information about the current index. */ - public describe(): Promise; + public describe(): Promise; /** * Use the provided vector to perform a similarity search across the index. * @param vector Input vector that will be used to drive the similarity search. diff --git a/types/defines/vectorize.d.ts b/types/defines/vectorize.d.ts index 0d14327237fa..fbe0816b47a1 100644 --- a/types/defines/vectorize.d.ts +++ b/types/defines/vectorize.d.ts @@ -83,6 +83,9 @@ type VectorizeIndexConfig = /** * Metadata about an existing index. + * + * This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released. + * See {@link VectorizeIndexInfo} for its post-beta equivalent. */ interface VectorizeIndexDetails { /** The unique ID of the index */ @@ -97,6 +100,20 @@ interface VectorizeIndexDetails { vectorsCount: number; } +/** + * Metadata about an existing index. + */ +interface VectorizeIndexInfo { + /** The number of records containing vectors within the index. */ + vectorsCount: number; + /** Number of dimensions the index has been configured for. */ + dimensions: number; + /** ISO 8601 datetime of the last processed mutation on in the index. All changes before this mutation will be reflected in the index state. */ + processedUpToDatetime: number; + /** UUIDv4 of the last mutation processed by the index. All changes before this mutation will be reflected in the index state. */ + processedUpToMutation: number; +} + /** * Represents a single vector value set along with its associated metadata. */