From 0245531e6a0284e94bebde647a44ec9c18bb5ad5 Mon Sep 17 00:00:00 2001 From: nickevansuk <2616208+nickevansuk@users.noreply.github.com> Date: Wed, 5 Apr 2023 09:59:55 +0100 Subject: [PATCH] feat: Data quality measures --- packages/openactive-broker-microservice/app.js | 18 ++++++++++++++++-- .../src/broker-config.js | 4 ++-- .../src/validator/types.d.ts | 1 + .../src/validator/validator-worker-pool.js | 17 +++++++++++++++++ .../src/validator/validator-worker.js | 9 +++++++-- .../templates/validation-errors.handlebars | 13 +++++++++++++ 6 files changed, 56 insertions(+), 6 deletions(-) diff --git a/packages/openactive-broker-microservice/app.js b/packages/openactive-broker-microservice/app.js index 5f0f412f1d..3ee8e5e550 100644 --- a/packages/openactive-broker-microservice/app.js +++ b/packages/openactive-broker-microservice/app.js @@ -103,6 +103,18 @@ async function renderValidationErrorsHtml(validatorWorkerPool) { errorKey, ...obj, })), + profileMeasuresPerFeed: [...Object.entries(validatorWorkerPool.getProfileMeasuresPerFeed())].map(([feedContextIdentifier, profileMeasures]) => ({ + feedContextIdentifier, + totalItemCount: profileMeasures.totalItemCount, + profileMeasures: [...Object.entries(profileMeasures.profiles)].map(([profile, measures]) => ({ + profile, + measures: [...Object.entries(measures)].map(([measure, count]) => ({ + measure, + count, + percentage: Math.round((count / profileMeasures.totalItemCount) * 100), + })), + })), + })), }); } @@ -232,7 +244,7 @@ async function harvestRPDE({ json, pageIndex: context.pages, contentType: response.headers['content-type'], - cacheControl: response.headers['Cache-Control'], + cacheControl: response.headers['cache-control'], status: response.status, isInitialHarvestComplete, isOrdersFeed, @@ -1474,7 +1486,9 @@ Validation errors found in Dataset Site JSON-LD: `); - throw new Error('Unable to read valid JSON-LD from Dataset Site.'); + // TODO: Set this via an env var or similar + // if (continueOnInvalidDatasetSite) + // throw new Error('Unable to read valid JSON-LD from Dataset Site.'); } // Set global based on data result diff --git a/packages/openactive-broker-microservice/src/broker-config.js b/packages/openactive-broker-microservice/src/broker-config.js index 56b6dae42e..0bd7075b08 100644 --- a/packages/openactive-broker-microservice/src/broker-config.js +++ b/packages/openactive-broker-microservice/src/broker-config.js @@ -28,8 +28,8 @@ const ORDER_PROPOSALS_FEED_IDENTIFIER = 'OrderProposalsFeed'; // These options are not recommended for general use, but are available for specific test environment configuration and debugging const OPPORTUNITY_FEED_REQUEST_HEADERS = config.has('broker.opportunityFeedRequestHeaders') ? config.get('broker.opportunityFeedRequestHeaders') : {}; const DATASET_DISTRIBUTION_OVERRIDE = config.has('broker.datasetDistributionOverride') ? config.get('broker.datasetDistributionOverride') : []; -const DO_NOT_FILL_BUCKETS = config.has('broker.disableBucketAllocation') ? config.get('broker.disableBucketAllocation') : false; -const DO_NOT_HARVEST_ORDERS_FEED = config.has('broker.disableOrdersFeedHarvesting') ? config.get('broker.disableOrdersFeedHarvesting') : false; +const DO_NOT_FILL_BUCKETS = VALIDATE_ONLY || (config.has('broker.disableBucketAllocation') ? config.get('broker.disableBucketAllocation') : false); +const DO_NOT_HARVEST_ORDERS_FEED = VALIDATE_ONLY || (config.has('broker.disableOrdersFeedHarvesting') ? config.get('broker.disableOrdersFeedHarvesting') : false); const DISABLE_BROKER_TIMEOUT = config.has('broker.disableBrokerMicroserviceTimeout') ? config.get('broker.disableBrokerMicroserviceTimeout') : false; const LOG_AUTH_CONFIG = config.has('broker.logAuthConfig') ? config.get('broker.logAuthConfig') : false; diff --git a/packages/openactive-broker-microservice/src/validator/types.d.ts b/packages/openactive-broker-microservice/src/validator/types.d.ts index 368c9f366f..74418d75ee 100644 --- a/packages/openactive-broker-microservice/src/validator/types.d.ts +++ b/packages/openactive-broker-microservice/src/validator/types.d.ts @@ -6,6 +6,7 @@ export type ValidatorWorkerResponse = { numItemsPerFeed: { [feedContextIdentifier: string]: number; }; + profileMeasuresPerFeed: any }; export type ValidatorWorkerRequestParsedItem = { diff --git a/packages/openactive-broker-microservice/src/validator/validator-worker-pool.js b/packages/openactive-broker-microservice/src/validator/validator-worker-pool.js index 9cf97f384c..2bdf457e0d 100644 --- a/packages/openactive-broker-microservice/src/validator/validator-worker-pool.js +++ b/packages/openactive-broker-microservice/src/validator/validator-worker-pool.js @@ -3,6 +3,7 @@ * * Validator is computationally expensive, so we parallelise the work in order to get Broker up to speed more quickly. */ +const { addMeasures } = require('@openactive/data-model-validator'); const { execPipe, take, toArray, map } = require('iter-tools'); const { isNil } = require('lodash'); const fs = require('fs').promises; @@ -45,6 +46,7 @@ class ValidatorWorkerPool { * }>} */ this._validationResults = new Map(); + this._profileMeasuresPerFeed = {}; // TODO: Convert this to a map /** * Info that relates to stopping the Validator Worker Pool. * @@ -102,6 +104,10 @@ class ValidatorWorkerPool { return this._validationResults; } + getProfileMeasuresPerFeed() { + return this._profileMeasuresPerFeed; + } + /** * Start running Validator Worker Pool. Once started, this will run indefinitely until it is stopped with * `stopWhenTimedOut()`. @@ -158,6 +164,7 @@ class ValidatorWorkerPool { for (const { opportunityId, error } of message.errors) { this._processValidationError(opportunityId, error); } + this._processProfileMeasures(message.profileMeasuresPerFeed); // Inform Broker that some items have been validated (so it can update its progress bars) for (const [feedContextIdentifier, numItems] of Object.entries(message.numItemsPerFeed)) { this._onValidateItems(feedContextIdentifier, numItems); @@ -185,6 +192,16 @@ class ValidatorWorkerPool { callback(numItems); } + /** + * Aggregate the profile measures to minimise memory usage + */ + _processProfileMeasures(profileMeasuresPerFeed) { + for (const [feedContextIdentifier, profileMeasures] of Object.entries(profileMeasuresPerFeed)) { + if (!this._profileMeasuresPerFeed[feedContextIdentifier]) this._profileMeasuresPerFeed[feedContextIdentifier] = {}; + addMeasures(this._profileMeasuresPerFeed[feedContextIdentifier], profileMeasures); + } + } + /** * Prepare validation results for eventual render. This involves a compression, which is essential to make the * results readable and to minimise memory usage, as there can be a vast number of errors. diff --git a/packages/openactive-broker-microservice/src/validator/validator-worker.js b/packages/openactive-broker-microservice/src/validator/validator-worker.js index 9bec114ede..9da074a829 100644 --- a/packages/openactive-broker-microservice/src/validator/validator-worker.js +++ b/packages/openactive-broker-microservice/src/validator/validator-worker.js @@ -1,4 +1,4 @@ -const { validate } = require('@openactive/data-model-validator'); +const { validateWithMeasures, addMeasures } = require('@openactive/data-model-validator'); const fs = require('fs').promises; const { execPipe, filter, toArray, map } = require('iter-tools'); const { workerData, parentPort } = require('worker_threads'); @@ -22,10 +22,11 @@ async function run() { const numItemsPerFeed = {}; /** @type {ValidatorWorkerResponse['errors']} */ const errors = []; + const profileMeasuresPerFeed = {}; for (const { feedContextIdentifier, validationMode, item } of requestParsed) { numItemsPerFeed[feedContextIdentifier] = (numItemsPerFeed[feedContextIdentifier] ?? 0) + 1; - const allOaValidationErrors = await validate(item, { + const { errors: allOaValidationErrors, profileMeasures: allOaProfileMeasures } = await validateWithMeasures(item, { loadRemoteJson: true, remoteJsonCachePath: VALIDATOR_TMP_DIR, remoteJsonCacheTimeToLive: 3600, @@ -42,11 +43,15 @@ async function run() { }))), toArray); errors.push(...newErrors); + + if (!profileMeasuresPerFeed[feedContextIdentifier]) profileMeasuresPerFeed[feedContextIdentifier] = {}; + addMeasures(profileMeasuresPerFeed[feedContextIdentifier], allOaProfileMeasures); } /** @type {ValidatorWorkerResponse} */ const response = { errors, numItemsPerFeed, + profileMeasuresPerFeed, }; parentPort.postMessage(response); } diff --git a/packages/openactive-broker-microservice/templates/validation-errors.handlebars b/packages/openactive-broker-microservice/templates/validation-errors.handlebars index 6c670cf729..86ed2ae2a1 100644 --- a/packages/openactive-broker-microservice/templates/validation-errors.handlebars +++ b/packages/openactive-broker-microservice/templates/validation-errors.handlebars @@ -20,6 +20,19 @@ +
+{{#each profileMeasuresPerFeed }} +{{{ feedContextIdentifier }}} + Total Items: {{{ totalItemCount }}} + {{#each profileMeasures }} + Profile: {{{ profile }}} + {{#each measures }} + {{{ measure }}}: {{{ count }}} ({{{ percentage }}}%) + {{/each}} + {{/each}} +{{/each}} ++