Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Data quality measures #533

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion packages/openactive-broker-microservice/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,18 @@ async function renderValidationErrorsHtml(validatorWorkerPool) {
errorKey,
...obj,
})),
profileMeasuresPerFeed: [...Object.entries(validatorWorkerPool.getProfileMeasuresPerFeed())].map(([feedContextIdentifier, profileMeasures]) => ({
feedContextIdentifier,
totalItemCount: profileMeasures.totalItemCount,
profileMeasures: [...Object.entries(profileMeasures.profiles)].map(([profile, measures]) => ({
profile,
measures: [...Object.entries(measures)].map(([measure, count]) => ({
measure,
count,
percentage: Math.round((count / profileMeasures.totalItemCount) * 100),
})),
})),
})),
});
}

Expand Down Expand Up @@ -1474,7 +1486,9 @@ Validation errors found in Dataset Site JSON-LD:

`);

throw new Error('Unable to read valid JSON-LD from Dataset Site.');
// TODO: Set this via an env var or similar
// if (continueOnInvalidDatasetSite)
// throw new Error('Unable to read valid JSON-LD from Dataset Site.');
}

// Set global based on data result
Expand Down
4 changes: 2 additions & 2 deletions packages/openactive-broker-microservice/src/broker-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ const ORDER_PROPOSALS_FEED_IDENTIFIER = 'OrderProposalsFeed';
// These options are not recommended for general use, but are available for specific test environment configuration and debugging
const OPPORTUNITY_FEED_REQUEST_HEADERS = config.has('broker.opportunityFeedRequestHeaders') ? config.get('broker.opportunityFeedRequestHeaders') : {};
const DATASET_DISTRIBUTION_OVERRIDE = config.has('broker.datasetDistributionOverride') ? config.get('broker.datasetDistributionOverride') : [];
const DO_NOT_FILL_BUCKETS = config.has('broker.disableBucketAllocation') ? config.get('broker.disableBucketAllocation') : false;
const DO_NOT_HARVEST_ORDERS_FEED = config.has('broker.disableOrdersFeedHarvesting') ? config.get('broker.disableOrdersFeedHarvesting') : false;
const DO_NOT_FILL_BUCKETS = VALIDATE_ONLY || (config.has('broker.disableBucketAllocation') ? config.get('broker.disableBucketAllocation') : false);
const DO_NOT_HARVEST_ORDERS_FEED = VALIDATE_ONLY || (config.has('broker.disableOrdersFeedHarvesting') ? config.get('broker.disableOrdersFeedHarvesting') : false);
const DISABLE_BROKER_TIMEOUT = config.has('broker.disableBrokerMicroserviceTimeout') ? config.get('broker.disableBrokerMicroserviceTimeout') : false;
const LOG_AUTH_CONFIG = config.has('broker.logAuthConfig') ? config.get('broker.logAuthConfig') : false;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export type ValidatorWorkerResponse = {
numItemsPerFeed: {
[feedContextIdentifier: string]: number;
};
profileMeasuresPerFeed: any
};

export type ValidatorWorkerRequestParsedItem = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*
* Validator is computationally expensive, so we parallelise the work in order to get Broker up to speed more quickly.
*/
const { addMeasures } = require('@openactive/data-model-validator');
const { execPipe, take, toArray, map } = require('iter-tools');
const { isNil } = require('lodash');
const fs = require('fs').promises;
Expand Down Expand Up @@ -45,6 +46,7 @@ class ValidatorWorkerPool {
* }>}
*/
this._validationResults = new Map();
this._profileMeasuresPerFeed = {}; // TODO: Convert this to a map
/**
* Info that relates to stopping the Validator Worker Pool.
*
Expand Down Expand Up @@ -102,6 +104,10 @@ class ValidatorWorkerPool {
return this._validationResults;
}

getProfileMeasuresPerFeed() {
return this._profileMeasuresPerFeed;
}

/**
* Start running Validator Worker Pool. Once started, this will run indefinitely until it is stopped with
* `stopWhenTimedOut()`.
Expand Down Expand Up @@ -158,6 +164,7 @@ class ValidatorWorkerPool {
for (const { opportunityId, error } of message.errors) {
this._processValidationError(opportunityId, error);
}
this._processProfileMeasures(message.profileMeasuresPerFeed);
// Inform Broker that some items have been validated (so it can update its progress bars)
for (const [feedContextIdentifier, numItems] of Object.entries(message.numItemsPerFeed)) {
this._onValidateItems(feedContextIdentifier, numItems);
Expand Down Expand Up @@ -185,6 +192,16 @@ class ValidatorWorkerPool {
callback(numItems);
}

/**
* Aggregate the profile measures to minimise memory usage
*/
_processProfileMeasures(profileMeasuresPerFeed) {
for (const [feedContextIdentifier, profileMeasures] of Object.entries(profileMeasuresPerFeed)) {
if (!this._profileMeasuresPerFeed[feedContextIdentifier]) this._profileMeasuresPerFeed[feedContextIdentifier] = {};
addMeasures(this._profileMeasuresPerFeed[feedContextIdentifier], profileMeasures);
}
}

/**
* Prepare validation results for eventual render. This involves a compression, which is essential to make the
* results readable and to minimise memory usage, as there can be a vast number of errors.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const { validate } = require('@openactive/data-model-validator');
const { validateWithMeasures, addMeasures } = require('@openactive/data-model-validator');
const fs = require('fs').promises;
const { execPipe, filter, toArray, map } = require('iter-tools');
const { workerData, parentPort } = require('worker_threads');
Expand All @@ -22,10 +22,11 @@ async function run() {
const numItemsPerFeed = {};
/** @type {ValidatorWorkerResponse['errors']} */
const errors = [];
const profileMeasuresPerFeed = {};
for (const { feedContextIdentifier, validationMode, item } of requestParsed) {
numItemsPerFeed[feedContextIdentifier] = (numItemsPerFeed[feedContextIdentifier] ?? 0) + 1;

const allOaValidationErrors = await validate(item, {
const { errors: allOaValidationErrors, profileMeasures: allOaProfileMeasures } = await validateWithMeasures(item, {
loadRemoteJson: true,
remoteJsonCachePath: VALIDATOR_TMP_DIR,
remoteJsonCacheTimeToLive: 3600,
Expand All @@ -42,11 +43,15 @@ async function run() {
}))),
toArray);
errors.push(...newErrors);

if (!profileMeasuresPerFeed[feedContextIdentifier]) profileMeasuresPerFeed[feedContextIdentifier] = {};
addMeasures(profileMeasuresPerFeed[feedContextIdentifier], allOaProfileMeasures);
}
/** @type {ValidatorWorkerResponse} */
const response = {
errors,
numItemsPerFeed,
profileMeasuresPerFeed,
};
parentPort.postMessage(response);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,19 @@
</div>
</section>

<pre>
{{#each profileMeasuresPerFeed }}
{{{ feedContextIdentifier }}}
Total Items: {{{ totalItemCount }}}
{{#each profileMeasures }}
Profile: {{{ profile }}}
{{#each measures }}
{{{ measure }}}: {{{ count }}} ({{{ percentage }}}%)
{{/each}}
{{/each}}
{{/each}}
</pre>

<div class="album py-5 bg-light">
<div class="container">

Expand Down