Skip to content

Commit

Permalink
feat(parent_json): Add support for importing parent properties. (#97)
Browse files Browse the repository at this point in the history
* Added support for importing parent property.

* Updated the parent_json structure to match the db model. New/fixed unit tests, Upgraded the palias-model.

* Adding the documentation for parent_json field.

Co-authored-by: mansoor.sajjad <[email protected]>
  • Loading branch information
mansoor-sajjad and mansoor.sajjad authored Aug 9, 2022
1 parent 6af5516 commit d062fa5
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 4 deletions.
42 changes: 42 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,48 @@ Popularity values can be specified to mark records as more important than others

Category values can be added to a record. For a single category, use the `category` field. For multiple categories, use `category_json`, with the same formatting as for alias names.

## Parent

Parent information for record can be added using the `parent_json` field.

Only the valid parent field names specified in the `pelias/model` are supported, records with parent containing unsupported fields names will be ignored with a warning in the logs and will not be imported.

[List of valid fields in pelias/model](https://github.com/pelias/model/blob/master/Document.js), which eventually should match the [list of valid fields in pelias/schema](https://github.com/pelias/schema/blob/master/mappings/document.js).

The contents of the `parent_json` field must be a valid JSON object. An example of the valid contents of `parent_json` field are:

```
{
"county": [{
"id": "34",
"name": "Innlandet",
"abbr": "InL",
"source": "OSM",
}],
"country": [{
"id": "NOR",
"name": "Norway"
"abbr": "NO"
}],
"locality": [{
"id": "3403",
"name": "Hamar"
"source": "SomeSource"
}]
}
```

In CSV files, records that contain commas must be quoted using double quotes, and records with a double quote in the value itself must be double-double-quoted, as shown below in the example for the `parent_json` field.

```
"{""county"":[{""id"":""34"",""name"":""Innlandet""}],""country"":[{""id"":""NOR"",""name"":""NO""}],""locality"":[{""id"":""3403"",""name"":""Hamar""}]}"
```

The valid properties for any parent field are `id`, `name`, `abbr` (abbreviation), `source`, where `id` and `name` are mandatory fields.
Any other fields will be ignored without any warning. In case the mandatory fields are missing the record will be ignored with the warning in the logs and will not be imported.

In the case where multiple parent values are provided for the same field name, we store all copies in the elastic index, making them all searchable, but only the first entry is used for displaying the label.

## Custom data

Arbitrary custom data that does not fit into the standard Pelias schema can be stored for later retrieval under the `addendum` property.
Expand Down
16 changes: 13 additions & 3 deletions lib/streams/documentStream.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'use strict';

const _ = require("lodash");
const through = require( 'through2' );
const logger = require( 'pelias-logger' ).get( 'csv-importer' );

Expand Down Expand Up @@ -135,6 +136,15 @@ function processRecord(record, next_uid, stats) {

const pelias_document = new peliasModel.Document( source, layer, model_id );

const parent = getCaseInsensitiveAsJSON( `parent_json`, record );
if (_.isPlainObject(parent)) {
_.each(parent, (values, field) => {
_.each(values, (value) => {
pelias_document.addParent(field, value.name, value.id, value.abbr, value.source)
})
})
}

getMultiLangNames(record).forEach(([lang, value]) => {
const names = getNames(value, lang === 'default' ? '' : `_${lang}`);
if (names && names.length > 0) {
Expand Down Expand Up @@ -162,9 +172,9 @@ function processRecord(record, next_uid, stats) {
pelias_document.setAddress('cross_street', cross_street);
}

const housenumber = getHousenumber(record);
if (housenumber) {
pelias_document.setAddress('number', housenumber);
const houseNumber = getHousenumber(record);
if (houseNumber) {
pelias_document.setAddress('number', houseNumber);
}

const postcode = getPostalCode(record);
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"pelias-config": "^4.12.0",
"pelias-dbclient": "^2.13.0",
"pelias-logger": "^1.3.0",
"pelias-model": "^9.2.0",
"pelias-model": "^9.4.0",
"pelias-wof-admin-lookup": "^7.7.0",
"through2": "^3.0.0",
"through2-filter": "^3.0.0",
Expand Down
135 changes: 135 additions & 0 deletions test/streams/documentStream.js
Original file line number Diff line number Diff line change
Expand Up @@ -522,3 +522,138 @@ tape( 'documentStream rejects invalid popularity', function(test) {
test.end();
});
});

tape('documentStream accepts parent value if present', function (test) {
const input = {
LAT: 5,
LON: 6,
popularity: '500',
parent_json: {
country: [{ id: "NOR", name: "NO" }]
}
};
const stats = { badRecordCount: 0 };
const documentStream = DocumentStream.create('prefix', stats);

test_stream([input], documentStream, function (err, actual) {
test.equal(actual.length, 1, 'the document should be pushed');
test.equal(stats.badRecordCount, 0, 'bad record count unchanged');
test.end();
});
});

tape('documentStream accepts multiple parent values if present', function (test) {
const input = {
LAT: 5,
LON: 6,
popularity: '500',
parent_json: {
country: [{ id: "NOR", name: "Norway" }],
locality: [{ id: "0301", name: "Oslo", abbr: "osl", source: "nationalRegistry" }]
}
};
const stats = { badRecordCount: 0 };
const documentStream = DocumentStream.create('prefix', stats);

test_stream([input], documentStream, function (err, actual) {
test.equal(actual.length, 1, 'the document should be pushed');
test.equal(stats.badRecordCount, 0, 'bad record count unchanged');
test.end();
});
});

tape('documentStream rejects parent without name property', function (test) {
const input = {
LAT: 5,
LON: 6,
popularity: '500',
parent_json: {
country: [{ id: "NOR" }],
}
};
const stats = { badRecordCount: 0 };
const documentStream = DocumentStream.create('prefix', stats);

test_stream([input], documentStream, function (err, actual) {
test.equal(actual.length, 0, 'the document should be rejected');
test.equal(stats.badRecordCount, 1, 'bad record count went up by 1');
test.end();
});
});

tape('documentStream rejects parent without id property', function (test) {
const input = {
LAT: 5,
LON: 6,
popularity: '500',
parent_json: {
country: [{ name: "NOR" }],
}
};
const stats = { badRecordCount: 0 };
const documentStream = DocumentStream.create('prefix', stats);

test_stream([input], documentStream, function (err, actual) {
test.equal(actual.length, 0, 'the document should be rejected');
test.equal(stats.badRecordCount, 1, 'bad record count went up by 1');
test.end();
});
});

tape('documentStream rejects parent with invalid structure', function (test) {
const input = {
LAT: 5,
LON: 6,
popularity: '500',
parent_json: {
country: { name: "NOR" },
}
};
const stats = { badRecordCount: 0 };
const documentStream = DocumentStream.create('prefix', stats);

test_stream([input], documentStream, function (err, actual) {
test.equal(actual.length, 0, 'the document should be rejected');
test.equal(stats.badRecordCount, 1, 'bad record count went up by 1');
test.end();
});
});

tape('documentStream rejects parent with non-existing parent', function (test) {
const input = {
LAT: 5,
LON: 6,
popularity: '500',
parent_json: {
land: { name: "NOR" },
}
};
const stats = { badRecordCount: 0 };
const documentStream = DocumentStream.create('prefix', stats);

test_stream([input], documentStream, function (err, actual) {
test.equal(actual.length, 0, 'the document should be rejected');
test.equal(stats.badRecordCount, 1, 'bad record count went up by 1');
test.end();
});
});

tape('documentStream ignores non-existing parent field property', function (test) {
const input = {
LAT: 5,
LON: 6,
popularity: '500',
parent_json: {
country: [{ id: "NOR", name: "Norway", someProperty: "Norge" }],
locality: [{ id: "0301", name: "Oslo", abbr: "osl", source: "nationalRegistry" }]
}
};
const stats = { badRecordCount: 0 };
const documentStream = DocumentStream.create('prefix', stats);

test_stream([input], documentStream, function (err, actual) {
test.equal(actual.length, 1, 'the document should be pushed');
test.equal(stats.badRecordCount, 0, 'bad record count unchanged');
test.end();
});
});

0 comments on commit d062fa5

Please sign in to comment.