Skip to content

Commit

Permalink
feat(deduplication): add deduplication post processing script
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed May 16, 2019
1 parent e61690c commit 1d9c00d
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 1 deletion.
1 change: 1 addition & 0 deletions Document.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ function Document( source, layer, source_id ){

// define default post-processing scripts
this.addPostProcessingScript( require('./post/intersections') );
this.addPostProcessingScript( require('./post/deduplication') );

// mandatory properties
this.setSource( source );
Expand Down
23 changes: 23 additions & 0 deletions post/deduplication.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/**
* Deduplication post-processing script ensures that array
* values only contain unique entries.
*/

const _ = require('lodash');
const prefixes = [ 'name', 'address_parts' ];

function deduplication( doc ){
prefixes.forEach(prefix => {
let index = doc[prefix];
if ( _.isPlainObject( index ) ){
for( let field in index ){
let values = index[field];
if( _.isArray( values ) && values.length > 1 ){
index[field] = _.uniq(values);
}
}
}
});
}

module.exports = deduplication;
3 changes: 2 additions & 1 deletion test/document/post.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@

const Document = require('../../Document');
const intersections = require('../../post/intersections');
const DEFAULT_SCRIPTS = [ intersections ];
const deduplication = require('../../post/deduplication');
const DEFAULT_SCRIPTS = [ intersections, deduplication ];

module.exports.tests = {};

Expand Down
47 changes: 47 additions & 0 deletions test/post/deduplication.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@

var Document = require('../../Document');
var deduplication = require('../../post/deduplication');

module.exports.tests = {};

module.exports.tests.dedupe = function (test) {
test('dedupe - name', function (t) {
var doc = new Document('mysource', 'mylayer', 'myid');

doc.setNameAlias('default', 'test');
doc.setName('default', 'test');
doc.setNameAlias('default', 'test');
doc.setNameAlias('default', 'test 2');
doc.setNameAlias('default', 'test');

deduplication(doc);
t.deepEquals(doc.name.default, ['test', 'test 2']);

t.end();
});
test('dedupe - address_parts', function (t) {
var doc = new Document('mysource', 'mylayer', 'myid');

doc.setAddressAlias('street', 'test');
doc.setAddress('street', 'test');
doc.setAddressAlias('street', 'test');
doc.setAddressAlias('street', 'test 2');
doc.setAddressAlias('street', 'test');

deduplication(doc);
t.deepEquals(doc.address_parts.street, ['test', 'test 2']);

t.end();
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
return tape('post/deduplication: ' + name, testFunction);
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common);
}
};
1 change: 1 addition & 0 deletions test/run.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ var tests = [
require('./document/toESDocument.js'),
require('./document/post.js'),
require('./post/intersections.js'),
require('./post/deduplication.js'),
require('./DocumentMapperStream.js'),
require('./util/transform.js'),
require('./util/valid.js'),
Expand Down

0 comments on commit 1d9c00d

Please sign in to comment.