Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "Railway Station alias" to improve searches #470

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
21 changes: 21 additions & 0 deletions config/venue_normalization.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@

/**
expansion and normalization of venue names to allow for consistent searching and more effective synonym use
**/

var mapping = {
'railway': {
'station': {
'alt_suffixes': ['station'],
'suffix': 'Railway Station',
}
},
'station': {
'light_rail': {
'alt_suffixes': ['station', 'light rail'],
'suffix': 'light rail station',
}
}
};

module.exports = mapping;
6 changes: 5 additions & 1 deletion stream/importPipeline.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
var categoryDefaults = require('../config/category_map');
var venueNormalizations = require('../config/venue_normalization');

var streams = {};

streams.config = {
categoryDefaults: categoryDefaults
categoryDefaults: categoryDefaults,
venueNormalizations: venueNormalizations
};

streams.pbfParser = require('./multiple_pbfs').create;
Expand All @@ -13,6 +15,7 @@ streams.tagMapper = require('./tag_mapper');
streams.adminLookup = require('pelias-wof-admin-lookup').create;
streams.addressExtractor = require('./address_extractor');
streams.categoryMapper = require('./category_mapper');
streams.venueNormalization = require('./venue_normalization');
streams.dbMapper = require('pelias-model').createDocumentMapperStream;
streams.elasticsearch = require('pelias-dbclient');

Expand All @@ -24,6 +27,7 @@ streams.import = function(){
.pipe( streams.addressExtractor() )
.pipe( streams.blacklistStream() )
.pipe( streams.categoryMapper( categoryDefaults ) )
.pipe( streams.venueNormalization( venueNormalizations ) )
.pipe( streams.adminLookup() )
.pipe( streams.dbMapper() )
.pipe( streams.elasticsearch({name: 'openstreetmap'}) );
Expand Down
76 changes: 76 additions & 0 deletions stream/venue_normalization.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@

/**
The venue normalization is similar to the category mapper
It's designed to add standardized aliases for different venue types to allow easier searching'
**/

var through = require('through2');
var peliasLogger = require( 'pelias-logger' ).get( 'openstreetmap' );

module.exports = function( mapping ){

return through.obj( function( doc, enc, next ){

try {

// do not normalize addresses
if( doc.getType().match('address') ){
return next( null, doc );
}

// skip records with no tags
var tags = doc.getMeta('tags');
if( !tags ){
return next( null, doc );
}

var name = doc.getName( 'default' );
if ( !name ) {
name = doc.getNameAliases( 'default' );
}
if ( !name || name.legnth === 0 ){
return next( null, doc );
}

// iterate over mapping
for( var key in mapping ){

// check each mapping key against document tags
if( !tags.hasOwnProperty( key ) ){ continue; }

// handle regular features
for( var feature in mapping[key] ){
if( tags[key] === feature ){
var rule = mapping[key][feature];
addAliases( name, rule, doc );
}
}
}
}

catch( e ){
peliasLogger.error( 'venue normalization error' );
peliasLogger.error( e.stack );
peliasLogger.error( JSON.stringify( doc, null, 2 ) );
}

return next( null, doc );

});

};

function addAliases( name, rule, doc ) {
if( rule.hasOwnProperty( 'alt_suffixes' ) ){
for( var suffix in rule.alt_suffixes ){
var suffix_name = rule.alt_suffixes[suffix];
if( name.toLowerCase().endsWith( ' ' + suffix_name ) ){
name = name.slice( 0, -suffix_name.length - 1 );
}
}
}

if( rule.hasOwnProperty( 'suffix' ) && name.length > 0 ){
doc.setNameAlias( 'default', name + ' ' + rule.suffix );
}
}
1 change: 1 addition & 0 deletions test/end-to-end.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ streams.pbfParser()
.pipe( streams.tagMapper() )
.pipe( streams.addressExtractor() )
.pipe( streams.categoryMapper( streams.config.categoryDefaults ) )
.pipe( streams.venueNormalization( streams.config.venueNormalizations ) )
.pipe( model.createDocumentMapperStream() )
.pipe( sink.obj(function (doc) {
results.push(doc);
Expand Down
Loading