Skip to content

Commit

Permalink
Output eval results as a csv
Browse files Browse the repository at this point in the history
  • Loading branch information
bzwrk committed Oct 24, 2024
1 parent 74b4b5b commit 4f35c8c
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 1 deletion.
35 changes: 34 additions & 1 deletion packages/evals/component_retrieval/evaluator.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ import "dotenv/config";
import fs from "fs/promises";
import path from "path";
import { diff } from "json-diff";
import { json2csv } from "json-2-csv";

const GREEN_CHECK = "\x1b[32m✔\x1b[0m";
const RED_CROSS = "\x1b[31m✖\x1b[0m";

let totalEvals = 0;
let totalSuccesses = 0;
let apiResults = []

const apiHost = process.env.API_BASE_URL || "https://api.pipedream.com";

Expand Down Expand Up @@ -41,7 +43,23 @@ function customDiff(original, updated, oldLabel = "expected", newLabel = "actual
return replaceLabels(result);
}

async function exportToCsv(filePath, limit, threshold) {
const csvData = json2csv(apiResults, {
fields: ["query", "evalTriggers", "apiTriggers", "evalActions", "apiActions", "success"]
});
const parts = filePath.split("/")
const path = parts[parts.length -1].split(".json")[0]
await fs.writeFile(`./csv/${path}-${limit}-${threshold}.csv`, csvData);
}

function arrayToString(items) {
if (items) return items.join(",")
return ""
}

async function processEvalFile(filePath) {
const limit = 3
const threshold = 0.65
try {
const content = await fs.readFile(filePath, "utf-8");
const evalData = JSON.parse(content);
Expand All @@ -53,7 +71,7 @@ async function processEvalFile(filePath) {
} = evalTest;

const encodedQuery = encodeURIComponent(query);
const apiUrl = `${apiHost}/v1/components/search?query=${encodedQuery}`;
const apiUrl = `${apiHost}/v1/components/search?query=${encodedQuery}&similarity_threshold=${threshold}&limit=${limit}`;

const response = await fetch(apiUrl, {
headers: {
Expand All @@ -63,16 +81,20 @@ async function processEvalFile(filePath) {
});
const apiData = await response.json();


// Compare actual and expected
const apiTriggers = apiData?.triggers ?? [];
const apiActions = apiData?.actions ?? [];

const triggersMatch =
JSON.stringify(apiTriggers.sort()) === JSON.stringify(triggers.sort());
const actionsMatch =
JSON.stringify(apiActions.sort()) === JSON.stringify(actions.sort());

let success = false
if (triggersMatch && actionsMatch) {
totalSuccesses++;
success = true
console.log(`${GREEN_CHECK} Success for query: "${query}"`);
} else {
console.log(`${RED_CROSS} Failure for query: "${query}"`);
Expand All @@ -82,10 +104,21 @@ async function processEvalFile(filePath) {
actions,
}, apiData));
}

const record = {
query: query.replace("\"", ""),
apiTriggers: arrayToString(apiTriggers),
apiActions: arrayToString(apiActions),
evalTriggers: arrayToString(triggers),
evalActions: arrayToString(actions),
success: success
};
apiResults.push(record)
}
} catch (error) {
console.error(`Error processing file ${filePath}:`, error.message);
}
await exportToCsv(filePath, limit, threshold)
}

async function main() {
Expand Down
29 changes: 29 additions & 0 deletions packages/evals/component_retrieval/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions packages/evals/component_retrieval/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"main": "evaluator.mjs",
"dependencies": {
"dotenv": "^16.4.5",
"json-2-csv": "^5.5.6",
"json-diff": "^1.0.6"
}
}

0 comments on commit 4f35c8c

Please sign in to comment.