From 4f35c8c5e5830ddd4c47da65387c8caab619b64e Mon Sep 17 00:00:00 2001 From: Biz Melesse Date: Thu, 24 Oct 2024 12:12:04 -0700 Subject: [PATCH] Output eval results as a csv --- .../evals/component_retrieval/evaluator.mjs | 35 ++++++++++++++++++- .../component_retrieval/package-lock.json | 29 +++++++++++++++ .../evals/component_retrieval/package.json | 1 + 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/packages/evals/component_retrieval/evaluator.mjs b/packages/evals/component_retrieval/evaluator.mjs index 7dabb7ca181b0..a0a9f2d9122a2 100644 --- a/packages/evals/component_retrieval/evaluator.mjs +++ b/packages/evals/component_retrieval/evaluator.mjs @@ -4,12 +4,14 @@ import "dotenv/config"; import fs from "fs/promises"; import path from "path"; import { diff } from "json-diff"; +import { json2csv } from "json-2-csv"; const GREEN_CHECK = "\x1b[32m✔\x1b[0m"; const RED_CROSS = "\x1b[31m✖\x1b[0m"; let totalEvals = 0; let totalSuccesses = 0; +let apiResults = [] const apiHost = process.env.API_BASE_URL || "https://api.pipedream.com"; @@ -41,7 +43,23 @@ function customDiff(original, updated, oldLabel = "expected", newLabel = "actual return replaceLabels(result); } +async function exportToCsv(filePath, limit, threshold) { + const csvData = json2csv(apiResults, { + fields: ["query", "evalTriggers", "apiTriggers", "evalActions", "apiActions", "success"] + }); + const parts = filePath.split("/") + const path = parts[parts.length -1].split(".json")[0] + await fs.writeFile(`./csv/${path}-${limit}-${threshold}.csv`, csvData); +} + +function arrayToString(items) { + if (items) return items.join(",") + return "" +} + async function processEvalFile(filePath) { + const limit = 3 + const threshold = 0.65 try { const content = await fs.readFile(filePath, "utf-8"); const evalData = JSON.parse(content); @@ -53,7 +71,7 @@ async function processEvalFile(filePath) { } = evalTest; const encodedQuery = encodeURIComponent(query); - const apiUrl = `${apiHost}/v1/components/search?query=${encodedQuery}`; + const apiUrl = `${apiHost}/v1/components/search?query=${encodedQuery}&similarity_threshold=${threshold}&limit=${limit}`; const response = await fetch(apiUrl, { headers: { @@ -63,16 +81,20 @@ async function processEvalFile(filePath) { }); const apiData = await response.json(); + // Compare actual and expected const apiTriggers = apiData?.triggers ?? []; const apiActions = apiData?.actions ?? []; + const triggersMatch = JSON.stringify(apiTriggers.sort()) === JSON.stringify(triggers.sort()); const actionsMatch = JSON.stringify(apiActions.sort()) === JSON.stringify(actions.sort()); + let success = false if (triggersMatch && actionsMatch) { totalSuccesses++; + success = true console.log(`${GREEN_CHECK} Success for query: "${query}"`); } else { console.log(`${RED_CROSS} Failure for query: "${query}"`); @@ -82,10 +104,21 @@ async function processEvalFile(filePath) { actions, }, apiData)); } + + const record = { + query: query.replace("\"", ""), + apiTriggers: arrayToString(apiTriggers), + apiActions: arrayToString(apiActions), + evalTriggers: arrayToString(triggers), + evalActions: arrayToString(actions), + success: success + }; + apiResults.push(record) } } catch (error) { console.error(`Error processing file ${filePath}:`, error.message); } + await exportToCsv(filePath, limit, threshold) } async function main() { diff --git a/packages/evals/component_retrieval/package-lock.json b/packages/evals/component_retrieval/package-lock.json index eeebfb5eb738d..0f1736b44be18 100644 --- a/packages/evals/component_retrieval/package-lock.json +++ b/packages/evals/component_retrieval/package-lock.json @@ -9,6 +9,7 @@ "version": "0.0.1", "dependencies": { "dotenv": "^16.4.5", + "json-2-csv": "^5.5.6", "json-diff": "^1.0.6" } }, @@ -28,6 +29,22 @@ "node": ">=0.1.90" } }, + "node_modules/deeks": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/deeks/-/deeks-3.1.0.tgz", + "integrity": "sha512-e7oWH1LzIdv/prMQ7pmlDlaVoL64glqzvNgkgQNgyec9ORPHrT2jaOqMtRyqJuwWjtfb6v+2rk9pmaHj+F137A==", + "engines": { + "node": ">= 16" + } + }, + "node_modules/doc-path": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/doc-path/-/doc-path-4.1.1.tgz", + "integrity": "sha512-h1ErTglQAVv2gCnOpD3sFS6uolDbOKHDU1BZq+Kl3npPqroU3dYL42lUgMfd5UimlwtRgp7C9dLGwqQ5D2HYgQ==", + "engines": { + "node": ">=16" + } + }, "node_modules/dotenv": { "version": "16.4.5", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", @@ -55,6 +72,18 @@ "resolved": "https://registry.npmjs.org/heap/-/heap-0.2.7.tgz", "integrity": "sha512-2bsegYkkHO+h/9MGbn6KWcE45cHZgPANo5LXF7EvWdT0yT2EguSVO1nDgU5c8+ZOPwp2vMNa7YFsJhVcDR9Sdg==" }, + "node_modules/json-2-csv": { + "version": "5.5.6", + "resolved": "https://registry.npmjs.org/json-2-csv/-/json-2-csv-5.5.6.tgz", + "integrity": "sha512-N673XbJgHwUq9JreKpk530jSywPF/rEAQ08dV99QQpkluP/4HTwshpoP9hmDz26iSFqu7eNAPgyJfu/77HvPGA==", + "dependencies": { + "deeks": "3.1.0", + "doc-path": "4.1.1" + }, + "engines": { + "node": ">= 16" + } + }, "node_modules/json-diff": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/json-diff/-/json-diff-1.0.6.tgz", diff --git a/packages/evals/component_retrieval/package.json b/packages/evals/component_retrieval/package.json index 5b5ca619bc3f6..6f16913e997f5 100644 --- a/packages/evals/component_retrieval/package.json +++ b/packages/evals/component_retrieval/package.json @@ -5,6 +5,7 @@ "main": "evaluator.mjs", "dependencies": { "dotenv": "^16.4.5", + "json-2-csv": "^5.5.6", "json-diff": "^1.0.6" } }