Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "Railway Station alias" to improve searches #470

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
69 changes: 69 additions & 0 deletions config/venue_normalization.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@

/**
expansion and normalization of venue names to allow for consistent searching and more effective synonym use
**/

var mapping = [
{
'conditions': [
['railway', 'station']
],
'synonyms': [
'train station',
'station'
],
'suffix': 'railway station'
},
{
'conditions': [
['public_transport', 'station'],
['station', 'light_rail']
],
'synonyms': [
'light rail'
],
'suffix': 'light rail station'
},
{
'conditions': [
['public_transport', 'stop_position'],
['ferry', 'yes']
],
'synonyms': [
'ferry',
'wharf',
],
'suffix': 'ferry terminal'
},
{
'conditions': [
['amenity', 'parking']
],
'synonyms': [
'garage',
'car park',
'parking',
],
'suffix': 'car park'
}
];

/*{
'railway': {
'station': {
'alt_suffixes': ['station'],
'suffix': 'Railway Station',
}
},
'station': {
'light_rail': {
'alt_suffixes': ['station', 'light rail'],
'suffix': 'light rail station',
}
},
'public_transport': {

}
};*/

module.exports = mapping;
8 changes: 6 additions & 2 deletions stream/importPipeline.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
var categoryDefaults = require('../config/category_map');
var venueNormalizations = require('../config/venue_normalization');

var streams = {};

streams.config = {
categoryDefaults: categoryDefaults
categoryDefaults: categoryDefaults,
venueNormalizations: venueNormalizations
};

streams.pbfParser = require('./multiple_pbfs').create;
Expand All @@ -13,6 +15,7 @@ streams.tagMapper = require('./tag_mapper');
streams.adminLookup = require('pelias-wof-admin-lookup').create;
streams.addressExtractor = require('./address_extractor');
streams.categoryMapper = require('./category_mapper');
streams.venueNormalization = require('./venue_normalization');
streams.dbMapper = require('pelias-model').createDocumentMapperStream;
streams.elasticsearch = require('pelias-dbclient');

Expand All @@ -24,9 +27,10 @@ streams.import = function(){
.pipe( streams.addressExtractor() )
.pipe( streams.blacklistStream() )
.pipe( streams.categoryMapper( categoryDefaults ) )
.pipe( streams.venueNormalization( venueNormalizations ) )
.pipe( streams.adminLookup() )
.pipe( streams.dbMapper() )
.pipe( streams.elasticsearch({name: 'openstreetmap'}) );
};

module.exports = streams;
module.exports = streams;
87 changes: 87 additions & 0 deletions stream/venue_normalization.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@

/**
The venue normalization is similar to the category mapper
It's designed to add standardized aliases for different venue types to allow easier searching'
**/

var through = require('through2');
var peliasLogger = require( 'pelias-logger' ).get( 'openstreetmap' );

module.exports = function( mapping ){

return through.obj( function( doc, enc, next ){

try {

// do not normalize addresses
if( doc.getType().match('address') ){
return next( null, doc );
}

// skip records with no tags
var tags = doc.getMeta('tags');
if( !tags ){
return next( null, doc );
}

var name = doc.getName( 'default' );
if ( !name ) {
name = doc.getNameAliases( 'default' );
}
if ( !name || name.legnth === 0 ){
return next( null, doc );
}

// iterate over mapping
for( var idx in mapping ){
var process = true;
var rule = mapping[idx];

loop_conditions:
for( var condition in rule.conditions ) {
var cond = rule.conditions[condition];

if( !tags.hasOwnProperty( cond[0] ) ) {
process = false;
break loop_conditions;
}

if ( cond.length === 2 && cond[1] !== tags[ cond[0] ] )
{
process = false;
break loop_conditions;
}
}

if (!process) {
continue;
}

var current_name = name;

if( rule.hasOwnProperty( 'synonyms' ) ) {
for( var synonym_idx in rule.synonyms ) {
var synonym = rule.synonyms[synonym_idx];
if( current_name.toLowerCase().endsWith( ' ' + synonym ) ) {
current_name = current_name.slice( 0, -synonym.length - 1 );
}
}
}

if( rule.hasOwnProperty( 'suffix' ) && current_name.length > 0 ){
doc.setNameAlias( 'default', current_name + ' ' + rule.suffix );
}
}
}

catch( e ){
peliasLogger.error( 'venue normalization error' );
peliasLogger.error( e.stack );
peliasLogger.error( JSON.stringify( doc, null, 2 ) );
}

return next( null, doc );

});

};
1 change: 1 addition & 0 deletions test/end-to-end.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ streams.pbfParser()
.pipe( streams.tagMapper() )
.pipe( streams.addressExtractor() )
.pipe( streams.categoryMapper( streams.config.categoryDefaults ) )
.pipe( streams.venueNormalization( streams.config.venueNormalizations ) )
.pipe( model.createDocumentMapperStream() )
.pipe( sink.obj(function (doc) {
results.push(doc);
Expand Down
Loading