Skip to content

Commit

Permalink
convert libpostal calls to a microservice
Browse files Browse the repository at this point in the history
  • Loading branch information
trescube committed Nov 14, 2017
1 parent b5e48af commit e0c25d0
Show file tree
Hide file tree
Showing 9 changed files with 945 additions and 176 deletions.
105 changes: 91 additions & 14 deletions controller/libpostal.js
Original file line number Diff line number Diff line change
@@ -1,31 +1,108 @@
const text_analyzer = require('pelias-text-analyzer');
const _ = require('lodash');
const iso3166 = require('iso3166-1');
const Debug = require('../helper/debug');
const debugLog = new Debug('controller:libpostal');
const logger = require('pelias-logger').get('api');

function setup(should_execute) {
// mapping object from libpostal fields to pelias fields
var field_mapping = {
island: 'island',
category: 'category',
house: 'query',
house_number: 'number',
road: 'street',
suburb: 'neighbourhood',
city_district: 'borough',
city: 'city',
state_district: 'county',
state: 'state',
postcode: 'postalcode',
country: 'country'
};

// This controller calls the hosted libpostal service and converts the response
// to a generic format for later use. The hosted service returns an array like:
//
// ```
// [
// {
// label: 'house_number',
// value: '30'
// },
// {
// label: 'road',
// value: 'west 26th street'
// },
// {
// label: 'city',
// value: 'new york'
// },
// {
// label: 'state',
// value: 'ny'
// }
//]
// ```
//
// where `label` can be any of (currently):
// - house (generally interpreted as unknown, treated by pelias like a query term)
// - category (like "restaurants")
// - house_number
// - road
// - unit (apt or suite #)
// - suburb (like a neighbourhood)
// - city
// - city_district (like an NYC borough)
// - state_district (like a county)
// - state
// - postcode
// - country
//
// The Pelias query module is not concerned with unit.
//
function setup(libpostalService, should_execute) {
function controller( req, res, next ){
// bail early if req/res don't pass conditions for execution
if (!should_execute(req, res)) {
return next();
}

const initialTime = debugLog.beginTimer(req);
// parse text with query parser
const parsed_text = text_analyzer.parse(req.clean.text);

if (parsed_text !== undefined) {
// if a known ISO2 country was parsed, convert it to ISO3
if (_.has(parsed_text, 'country') && iso3166.is2(_.toUpper(parsed_text.country))) {
parsed_text.country = iso3166.to3(_.toUpper(parsed_text.country));
libpostalService(req, (err, response) => {
if (err) {
// push err.message or err onto req.errors
req.errors.push( _.get(err, 'message', err) );

} else if (_.some(_.countBy(response, o => o.label), count => count > 1)) {
logger.warn(`discarding libpostal parse of '${req.clean.text}' due to duplicate field assignments`);
return next();

} else if (_.isEmpty(response)) {
return next();

} else {
req.clean.parser = 'libpostal';
req.clean.parsed_text = response.reduce(function(o, f) {
if (field_mapping.hasOwnProperty(f.label)) {
o[field_mapping[f.label]] = f.value;
}

return o;
}, {});

if (_.has(req.clean.parsed_text, 'country') && iso3166.is2(_.toUpper(req.clean.parsed_text.country))) {
req.clean.parsed_text.country = iso3166.to3(_.toUpper(req.clean.parsed_text.country));
}

debugLog.push(req, {parsed_text: req.clean.parsed_text});

}

req.clean.parser = 'libpostal';
req.clean.parsed_text = parsed_text;
debugLog.push(req, {parsed_text: req.clean.parsed_text});
}
debugLog.stopTimer(req, initialTime);
return next();
debugLog.stopTimer(req, initialTime);
return next();

});

}

Expand Down
75 changes: 75 additions & 0 deletions controller/structured_libpostal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
const _ = require('lodash');
const Debug = require('../helper/debug');
const debugLog = new Debug('controller:libpostal');
const logger = require('pelias-logger').get('api');

// if there's a house_number in the libpostal response, return it
// otherwise return the postcode field (which may be undefined)
function findHouseNumberField(response) {
const house_number_field = response.find(f => f.label === 'house_number');

if (house_number_field) {
return house_number_field;
}

return response.find(f => f.label === 'postcode');

}

function setup(libpostalService, should_execute) {
function controller( req, res, next ){
// bail early if req/res don't pass conditions for execution
if (!should_execute(req, res)) {
return next();
}

const initialTime = debugLog.beginTimer(req);

libpostalService(req, (err, response) => {
if (err) {
// push err.message or err onto req.errors
req.errors.push( _.get(err, 'message', err) );

} else {
// figure out which field contains the probable house number, prefer house_number
// libpostal parses some inputs, like `3370 cobbe ave`, as a postcode+street
// so because we're treating the entire field as a street address, it's safe
// to assume that an identified postcode is actually a house number.
const house_number_field = findHouseNumberField(response);

// if we're fairly certain that libpostal identified a house number
// (from either the house_number or postcode field), place it into the
// number field and remove the first instance of that value from address
// and assign to street
// eg - '1090 N Charlotte St' becomes number=1090 and street=N Charlotte St
if (house_number_field) {
req.clean.parsed_text.number = house_number_field.value;

// remove the first instance of the number and trim whitespace
req.clean.parsed_text.street = _.trim(_.replace(req.clean.parsed_text.address, req.clean.parsed_text.number, ''));

} else {
// otherwise no house number was identifiable, so treat the entire input
// as a street
req.clean.parsed_text.street = req.clean.parsed_text.address;

}

// the address field no longer means anything since it's been parsed, so remove it
delete req.clean.parsed_text.address;

debugLog.push(req, {parsed_text: response});

}

debugLog.stopTimer(req, initialTime);
return next();

});

}

return controller;
}

module.exports = setup;
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
"pelias-model": "5.2.0",
"pelias-query": "9.1.1",
"pelias-sorting": "1.1.0",
"pelias-text-analyzer": "1.10.2",
"predicates": "^2.0.0",
"retry": "^0.10.1",
"stats-lite": "^2.0.4",
Expand Down
23 changes: 22 additions & 1 deletion routes/v1.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ var controllers = {
coarse_reverse: require('../controller/coarse_reverse'),
mdToHTML: require('../controller/markdownToHtml'),
libpostal: require('../controller/libpostal'),
structured_libpostal: require('../controller/structured_libpostal'),
place: require('../controller/place'),
placeholder: require('../controller/placeholder'),
search: require('../controller/search'),
Expand Down Expand Up @@ -96,6 +97,7 @@ const PlaceHolder = require('../service/configurations/PlaceHolder');
const PointInPolygon = require('../service/configurations/PointInPolygon');
const Language = require('../service/configurations/Language');
const Interpolation = require('../service/configurations/Interpolation');
const Libpostal = require('../service/configurations/Libpostal');

/**
* Append routes to app
Expand All @@ -122,6 +124,18 @@ function addRoutes(app, peliasConfig) {
const interpolationService = serviceWrapper(interpolationConfiguration);
const isInterpolationEnabled = _.constant(interpolationConfiguration.isEnabled());

// standard libpostal should use req.clean.text for the `address` parameter
const libpostalConfiguration = new Libpostal(
_.defaultTo(peliasConfig.api.services.libpostal, {}),
_.property('clean.text'));
const libpostalService = serviceWrapper(libpostalConfiguration);

// structured libpostal should use req.clean.parsed_text.address for the `address` parameter
const structuredLibpostalConfiguration = new Libpostal(
_.defaultTo(peliasConfig.api.services.libpostal, {}),
_.property('clean.parsed_text.address'));
const structuredLibpostalService = serviceWrapper(structuredLibpostalConfiguration);

// fallback to coarse reverse when regular reverse didn't return anything
const coarseReverseShouldExecute = all(
isPipServiceEnabled, not(hasRequestErrors), not(hasResponseData)
Expand All @@ -132,6 +146,12 @@ function addRoutes(app, peliasConfig) {
not(isRequestSourcesOnlyWhosOnFirst)
);

// for libpostal to execute for structured requests, req.clean.parsed_text.address must exist
const structuredLibpostalShouldExecute = all(
not(hasRequestErrors),
hasParsedTextProperties.all('address')
);

// execute placeholder if libpostal only parsed as admin-only and needs to
// be geodisambiguated
const placeholderGeodisambiguationShouldExecute = all(
Expand Down Expand Up @@ -256,7 +276,7 @@ function addRoutes(app, peliasConfig) {
sanitizers.search.middleware(peliasConfig.api),
middleware.requestLanguage,
middleware.calcSize(),
controllers.libpostal(libpostalShouldExecute),
controllers.libpostal(libpostalService, libpostalShouldExecute),
controllers.placeholder(placeholderService, geometricFiltersApply, placeholderGeodisambiguationShouldExecute),
controllers.placeholder(placeholderService, geometricFiltersDontApply, placeholderIdsLookupShouldExecute),
controllers.search_with_ids(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute),
Expand Down Expand Up @@ -286,6 +306,7 @@ function addRoutes(app, peliasConfig) {
sanitizers.structured_geocoding.middleware(peliasConfig.api),
middleware.requestLanguage,
middleware.calcSize(),
controllers.structured_libpostal(structuredLibpostalService, structuredLibpostalShouldExecute),
controllers.search(peliasConfig.api, esclient, queries.structured_geocoding, not(hasResponseDataOrRequestErrors)),
postProc.trimByGranularityStructured(),
postProc.distances('focus.point.'),
Expand Down
33 changes: 33 additions & 0 deletions service/configurations/Libpostal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
'use strict';

const url = require('url');

const ServiceConfiguration = require('pelias-microservice-wrapper').ServiceConfiguration;

class Libpostal extends ServiceConfiguration {
constructor(o, propertyExtractor) {
super('libpostal', o);

// save off the propertyExtractor function
// this is used to extract a single property from req. eg:
// * _.property('clean.text')
// * _.property('clean.parsed_text.address')
// will return those properties from req
this.propertyExtractor = propertyExtractor;

}

getParameters(req) {
return {
address: this.propertyExtractor(req)
};

}

getUrl(req) {
return url.resolve(this.baseUrl, 'parse');
}

}

module.exports = Libpostal;
Loading

0 comments on commit e0c25d0

Please sign in to comment.