From 3c1e9cc9722d0e9c7fa8035a9fc8ea8b5bf543e8 Mon Sep 17 00:00:00 2001 From: 0xcodercrane <108444211+0xcodercrane@users.noreply.github.com> Date: Mon, 30 Oct 2023 17:55:10 +0800 Subject: [PATCH 1/6] feat: create todo methods --- src/adapters/supabase/helpers/client.ts | 12 +++++ src/handlers/issue/embeddings.ts | 7 +++ src/handlers/issue/index.ts | 1 + src/handlers/processors.ts | 8 ++-- src/helpers/gpt.ts | 46 +++++++++++++++++++ .../20231030085814_create_embedding_table.sql | 11 +++++ 6 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 src/handlers/issue/embeddings.ts create mode 100644 supabase/migrations/20231030085814_create_embedding_table.sql diff --git a/src/adapters/supabase/helpers/client.ts b/src/adapters/supabase/helpers/client.ts index b8a3b23da..07f46fbfc 100644 --- a/src/adapters/supabase/helpers/client.ts +++ b/src/adapters/supabase/helpers/client.ts @@ -573,3 +573,15 @@ export const _approveLabelChange = async (changeId: number) => { return; }; + +/** + * Upserts embeddings vector to the `embeddings` table + * @param org - The organization name + * @param repo - The repository name + * @param issue - The issue number + * @param embeddings - The vector of floating point numbers + */ +export const upsertEmbeddings = async (org: string, repo: string, issue: number, embeddings: Array) => { + // TODO: Insert or Update embeddings data + return; +}; diff --git a/src/handlers/issue/embeddings.ts b/src/handlers/issue/embeddings.ts new file mode 100644 index 000000000..99e5ff715 --- /dev/null +++ b/src/handlers/issue/embeddings.ts @@ -0,0 +1,7 @@ +/** + * Generates an embedding vector for the current issue + */ + +export const generateEmbeddings = async () => { + return; +}; diff --git a/src/handlers/issue/index.ts b/src/handlers/issue/index.ts index 12e1212ae..24abdc279 100644 --- a/src/handlers/issue/index.ts +++ b/src/handlers/issue/index.ts @@ -1 +1,2 @@ export * from "./pre"; +export * from "./embeddings"; diff --git a/src/handlers/processors.ts b/src/handlers/processors.ts index 17bec49c7..afda85579 100644 --- a/src/handlers/processors.ts +++ b/src/handlers/processors.ts @@ -7,14 +7,14 @@ import { handleComment, issueClosedCallback, issueCreatedCallback, issueReopened import { checkPullRequests } from "./assign/auto"; import { createDevPoolPR } from "./pull-request"; import { runOnPush, validateConfigChange } from "./push"; -import { findDuplicateOne } from "./issue"; +import { findDuplicateOne, generateEmbeddings } from "./issue"; import { watchLabelChange } from "./label"; export const processors: Record = { [GithubEvent.ISSUES_OPENED]: { pre: [nullHandler], action: [findDuplicateOne, issueCreatedCallback], - post: [nullHandler], + post: [generateEmbeddings], }, [GithubEvent.ISSUES_REOPENED]: { pre: [nullHandler], @@ -44,12 +44,12 @@ export const processors: Record = { [GithubEvent.ISSUE_COMMENT_CREATED]: { pre: [nullHandler], action: [handleComment], - post: [nullHandler], + post: [generateEmbeddings], }, [GithubEvent.ISSUE_COMMENT_EDITED]: { pre: [nullHandler], action: [handleComment], - post: [nullHandler], + post: [generateEmbeddings], }, [GithubEvent.ISSUES_CLOSED]: { pre: [nullHandler], diff --git a/src/helpers/gpt.ts b/src/helpers/gpt.ts index 046503d79..d3d4dac51 100644 --- a/src/helpers/gpt.ts +++ b/src/helpers/gpt.ts @@ -178,3 +178,49 @@ export const askGPT = async (question: string, chatHistory: CreateChatCompletion return { answer, tokenUsage }; }; + +/** + * What is embedding? + * An embedding is a vector of floating point numbers to measure the relatedness of text strings. + * How can I get an embedding using OpenAI? + * To get an embedding, send your text string to the embeddings API endpoint along with a choice of embedding model ID (e.g., text-embedding-ada-002). + * The response will contain an embedding, which you can extract, save, and use. + * + * Example Request: + * + * curl https://api.openai.com/v1/embeddings \ + * -H "Content-Type: application/json" \ + * -H "Authorization: Bearer $OPENAI_API_KEY" \ + * -d '{ + * "input": "Your text string goes here", + * "model": "text-embedding-ada-002" + * }' + * + * Example Response: + * + * { + * "data": [ + * { + * "embedding": [ + * -0.006929283495992422, + * -0.005336422007530928, + * ... + * -4.547132266452536e-05, + * -0.024047505110502243 + * ], + * "index": 0, + * "object": "embedding" + * } + * ], + * "model": "text-embedding-ada-002", + * "object": "list", + * "usage": { + * "prompt_tokens": 5, + * "total_tokens": 5 + * } + * } + * @param words - The input data to generate the embedding for + */ +export const generateEmbeddings = async (words: string): Promise> => { + throw new Error("Not implemented yet"); +}; diff --git a/supabase/migrations/20231030085814_create_embedding_table.sql b/supabase/migrations/20231030085814_create_embedding_table.sql new file mode 100644 index 000000000..224e99aa9 --- /dev/null +++ b/supabase/migrations/20231030085814_create_embedding_table.sql @@ -0,0 +1,11 @@ +-- Creates the `embeddings` table to store embeddings for every issue +-- We've decided to use the `text-embedding-ada-002` model in the beginning which has 1536 output dimensions +CREATE TABLE IF NOT EXISTS embeddings ( + id serial PRIMARY KEY, + org character varying(255) NOT NULL, + repo character varying(255) NOT NULL, + issue integer NOT NULL, + embeddings vector(1536) NOT NULL, + created_at timestamptz NOT NULL, + updated_at timestamptz NOT NULL, +); \ No newline at end of file From 8b3ed232109b4bbcadb957e80b5d66c8a2d7e2ea Mon Sep 17 00:00:00 2001 From: 0xcodercrane <108444211+0xcodercrane@users.noreply.github.com> Date: Mon, 30 Oct 2023 18:30:50 +0800 Subject: [PATCH 2/6] feat: integrate embedding endpoint --- src/helpers/gpt.ts | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/helpers/gpt.ts b/src/helpers/gpt.ts index d3d4dac51..5305b9ddd 100644 --- a/src/helpers/gpt.ts +++ b/src/helpers/gpt.ts @@ -221,6 +221,26 @@ export const askGPT = async (question: string, chatHistory: CreateChatCompletion * } * @param words - The input data to generate the embedding for */ -export const generateEmbeddings = async (words: string): Promise> => { - throw new Error("Not implemented yet"); +export const generateEmbeddings = async (words: string): Promise => { + const logger = getLogger(); + const config = getBotConfig(); + + if (!config.ask.apiKey) { + logger.info(`No OpenAI API Key provided`); + throw new Error("You must configure the `openai-api-key` property in the bot configuration in order to use AI powered features."); + } + + const openai = new OpenAI({ + apiKey: config.ask.apiKey, + }); + + const embedding = await openai.embeddings.create({ + // TODO: A couple of embedding models exist and `text-embedding-ada-002` is the one recommended by OpenAI + // because it's better, cheaper and simpler to use. + // We might need to move the hardcoded model: `text-embedding-ada-002` to the bot configuration for better extensibility + model: "text-embedding-ada-002", + input: words, + }); + + return embedding.data[0]["embedding"] as number[]; }; From e431c79ea7571eb273462ec9bb574043ab1057d1 Mon Sep 17 00:00:00 2001 From: 0xcodercrane <108444211+0xcodercrane@users.noreply.github.com> Date: Tue, 31 Oct 2023 09:52:52 +0800 Subject: [PATCH 3/6] feat: complete the supabase adapter method --- src/adapters/supabase/helpers/client.ts | 32 +++++++++++++++++-- .../20231030085814_create_embedding_table.sql | 4 +-- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/adapters/supabase/helpers/client.ts b/src/adapters/supabase/helpers/client.ts index 07f46fbfc..6e513c44a 100644 --- a/src/adapters/supabase/helpers/client.ts +++ b/src/adapters/supabase/helpers/client.ts @@ -581,7 +581,33 @@ export const _approveLabelChange = async (changeId: number) => { * @param issue - The issue number * @param embeddings - The vector of floating point numbers */ -export const upsertEmbeddings = async (org: string, repo: string, issue: number, embeddings: Array) => { - // TODO: Insert or Update embeddings data - return; +export const upsertEmbeddings = async (org: string, repo: string, issue: number, embedding: number[]) => { + const { supabase } = getAdapters(); + const logger = getLogger(); + const { data } = await supabase.from("embeddings").select("*").eq("org", org).eq("repo", repo).eq("issue", issue).single(); + if (data) { + // Update the existing record with the new embedding for a given set. + const id = data["id"] as number; + const { error } = await supabase.from("embeddings").upsert({ + id: id, + embedding, + updated_at: new Date().toISOString(), + }); + if (error) { + logger.info(`Updating embedding table failed. id: ${id}, org: ${org}, repo: ${repo}, issue: ${issue}, embedding: ${embedding.join(", ")}`); + } + } else { + // Insert a new record to the `embeddings` table. + const { error } = await supabase.from("embeddings").upsert({ + org, + repo, + issue, + embedding, + created_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + }); + if (error) { + logger.info(`Inserting to embedding table failed. org: ${org}, repo: ${repo}, issue: ${issue}, embedding: ${embedding.join(", ")}`); + } + } }; diff --git a/supabase/migrations/20231030085814_create_embedding_table.sql b/supabase/migrations/20231030085814_create_embedding_table.sql index 224e99aa9..70d34bf79 100644 --- a/supabase/migrations/20231030085814_create_embedding_table.sql +++ b/supabase/migrations/20231030085814_create_embedding_table.sql @@ -1,11 +1,11 @@ --- Creates the `embeddings` table to store embeddings for every issue +-- Creates the `embeddings` table to store an embedding for every issue -- We've decided to use the `text-embedding-ada-002` model in the beginning which has 1536 output dimensions CREATE TABLE IF NOT EXISTS embeddings ( id serial PRIMARY KEY, org character varying(255) NOT NULL, repo character varying(255) NOT NULL, issue integer NOT NULL, - embeddings vector(1536) NOT NULL, + embedding vector(1536) NOT NULL, created_at timestamptz NOT NULL, updated_at timestamptz NOT NULL, ); \ No newline at end of file From 950b8a299f0826dbccf71cc4e49b723029fb1d5f Mon Sep 17 00:00:00 2001 From: 0xcodercrane <108444211+0xcodercrane@users.noreply.github.com> Date: Mon, 6 Nov 2023 09:37:33 +0800 Subject: [PATCH 4/6] feat: link the handler to event --- src/handlers/comment/action.ts | 6 ++---- src/handlers/issue/embeddings.ts | 26 ++++++++++++++++++++++++-- src/helpers/index.ts | 1 + 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/handlers/comment/action.ts b/src/handlers/comment/action.ts index 60ea5bbae..5b7e52483 100644 --- a/src/handlers/comment/action.ts +++ b/src/handlers/comment/action.ts @@ -51,11 +51,9 @@ export const handleComment = async (): Promise => { const callbackComment = response ?? successComment ?? ""; if (callbackComment) await callback(issue.number, callbackComment, payload.action, payload.comment); } catch (err: unknown) { - // Use failureComment for failed command if it is available - if (failureComment) { - await callback(issue.number, failureComment, payload.action, payload.comment); + for (const comment of [failureComment, ErrorDiff(err)]) { + if (comment) await callback(issue.number, comment, payload.action, payload.comment); } - await callback(issue.number, ErrorDiff(err), payload.action, payload.comment); } } else { logger.info(`Skipping for a command: ${command}`); diff --git a/src/handlers/issue/embeddings.ts b/src/handlers/issue/embeddings.ts index 99e5ff715..d8fb6004f 100644 --- a/src/handlers/issue/embeddings.ts +++ b/src/handlers/issue/embeddings.ts @@ -1,7 +1,29 @@ +import { getAdapters, getBotConfig, getBotContext, getLogger } from "../../bindings"; +import { Payload } from "../../types"; +import { generateEmbeddings } from "../../helpers"; +import { upsertEmbeddings } from "../../adapters/supabase"; + /** * Generates an embedding vector for the current issue */ +export const embeddings = async () => { + const { payload: _payload } = getBotContext(); + const logger = getLogger(); + const payload = _payload as Payload; + const issue = payload.issue; + + if (!issue) { + logger.info(`Skip to generate embeddings because of no issue instance`); + return; + } + + if (!issue.body) { + logger.info("Skip to generate embeddings because of empty body"); + return; + } -export const generateEmbeddings = async () => { - return; + const embeddings = await generateEmbeddings(issue.body); + if (embeddings.length > 0) { + await upsertEmbeddings(payload.repository.owner.login, payload.repository.name, issue.number, embeddings); + } }; diff --git a/src/helpers/index.ts b/src/helpers/index.ts index 07cf66d74..496669f8b 100644 --- a/src/helpers/index.ts +++ b/src/helpers/index.ts @@ -10,3 +10,4 @@ export * from "./payout"; export * from "./file"; export * from "./similarity"; export * from "./commit"; +export * from "./gpt"; From 152c49c94ceba7660841ba893f92fe3d2ecc734c Mon Sep 17 00:00:00 2001 From: 0xcodercrane <108444211+0xcodercrane@users.noreply.github.com> Date: Mon, 6 Nov 2023 09:39:38 +0800 Subject: [PATCH 5/6] build: remove never read methods --- src/handlers/issue/embeddings.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/handlers/issue/embeddings.ts b/src/handlers/issue/embeddings.ts index d8fb6004f..d3a4955b5 100644 --- a/src/handlers/issue/embeddings.ts +++ b/src/handlers/issue/embeddings.ts @@ -1,4 +1,4 @@ -import { getAdapters, getBotConfig, getBotContext, getLogger } from "../../bindings"; +import { getBotContext, getLogger } from "../../bindings"; import { Payload } from "../../types"; import { generateEmbeddings } from "../../helpers"; import { upsertEmbeddings } from "../../adapters/supabase"; From b5b9e8b583431cf885db5fd6f92799746545162e Mon Sep 17 00:00:00 2001 From: 0xcodercrane <108444211+0xcodercrane@users.noreply.github.com> Date: Mon, 6 Nov 2023 09:59:45 +0800 Subject: [PATCH 6/6] build: update method names --- src/handlers/processors.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/handlers/processors.ts b/src/handlers/processors.ts index afda85579..ac7814a23 100644 --- a/src/handlers/processors.ts +++ b/src/handlers/processors.ts @@ -7,14 +7,14 @@ import { handleComment, issueClosedCallback, issueCreatedCallback, issueReopened import { checkPullRequests } from "./assign/auto"; import { createDevPoolPR } from "./pull-request"; import { runOnPush, validateConfigChange } from "./push"; -import { findDuplicateOne, generateEmbeddings } from "./issue"; +import { findDuplicateOne, embeddings } from "./issue"; import { watchLabelChange } from "./label"; export const processors: Record = { [GithubEvent.ISSUES_OPENED]: { pre: [nullHandler], action: [findDuplicateOne, issueCreatedCallback], - post: [generateEmbeddings], + post: [embeddings], }, [GithubEvent.ISSUES_REOPENED]: { pre: [nullHandler], @@ -44,12 +44,12 @@ export const processors: Record = { [GithubEvent.ISSUE_COMMENT_CREATED]: { pre: [nullHandler], action: [handleComment], - post: [generateEmbeddings], + post: [embeddings], }, [GithubEvent.ISSUE_COMMENT_EDITED]: { pre: [nullHandler], action: [handleComment], - post: [generateEmbeddings], + post: [embeddings], }, [GithubEvent.ISSUES_CLOSED]: { pre: [nullHandler],