Skip to content

Commit

Permalink
feat(names): Detect near duplicate names
Browse files Browse the repository at this point in the history
This handles the case where one alt name is a substring fully contained
in another.
  • Loading branch information
orangejulius committed Sep 24, 2021
1 parent 71b4b89 commit 481144e
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 20 deletions.
2 changes: 1 addition & 1 deletion stream/tag_mapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ module.exports = function(){
if (trimmed_value) {
if (!trimmed_name ) {
doc.setName('default', trim( tags[key]));
} else {
} else if(!trimmed_name.includes(trimmed_value)) {
doc.setNameAlias('default', trim( tags[key]));
}
}
Expand Down
25 changes: 6 additions & 19 deletions test/fixtures/combined_vancouver_queens.json
Original file line number Diff line number Diff line change
Expand Up @@ -856355,16 +856355,10 @@
"_type": "_doc",
"data": {
"name": {
"default": [
"Mountain Equipment Co-op (MEC)",
"MEC"
]
"default": "Mountain Equipment Co-op (MEC)"
},
"phrase": {
"default": [
"Mountain Equipment Co-op (MEC)",
"MEC"
]
"default": "Mountain Equipment Co-op (MEC)"
},
"address_parts": {
"number": "212",
Expand Down Expand Up @@ -892394,8 +892388,7 @@
"_type": "_doc",
"data": {
"name": {
"default": "IPOH Asian House"
},
"default": "IPOH Asian House" },
"phrase": {
"default": "IPOH Asian House"
},
Expand Down Expand Up @@ -892429,16 +892422,10 @@
"_type": "_doc",
"data": {
"name": {
"default": [
"On Lok Restaurant & Wun Tun House",
"On Lok"
]
"default": "On Lok Restaurant & Wun Tun House"
},
"phrase": {
"default": [
"On Lok Restaurant & Wun Tun House",
"On Lok"
]
"default": "On Lok Restaurant & Wun Tun House"
},
"address_parts": {
"number": "2010",
Expand Down Expand Up @@ -956784,4 +956771,4 @@
"bounding_box": "{\"min_lat\":49.2174915,\"max_lat\":49.2194865,\"min_lon\":-123.2018987,\"max_lon\":-123.1991481}"
}
}
]
]
32 changes: 32 additions & 0 deletions test/stream/tag_mapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,38 @@ module.exports.tests.osm_names = function(test, common) {
});
};

// Reject alt names that are a substring of the main name
module.exports.tests.substring_alt_name = function(test, common) {
var doc = new Document('a','b',1);
doc.setMeta('tags', { 'name': 'test place', 'alt_name': 'test pl' });

test('rejects - substring alt name', function(t) {
var stream = mapper();
stream.pipe( through.obj( function( doc, enc, next ){
t.deepEqual(doc.name, { default: 'test place' }, 'substring name removed');
t.end(); // test will fail if not called (or called twice).
next();
}));
stream.write(doc);
});
};

// Reject alt names that are a substring of the main name, even if they appear before the name in list of tags
module.exports.tests.substring_alt_name2 = function(test, common) {
var doc = new Document('a','b',1);
doc.setMeta('tags', { 'alt_name': 'test pl', name: 'test place'});

test('rejects - substring alt name', function(t) {
var stream = mapper();
stream.pipe( through.obj( function( doc, enc, next ){
t.deepEqual(doc.name, { default: 'test place' }, 'substring name removed');
t.end(); // test will fail if not called (or called twice).
next();
}));
stream.write(doc);
});
};

// Cover the case of a tag key being 'name:' eg. { 'name:': 'foo' }
// Not to be confused with { 'name': 'foo' } (note the extraneous colon)
module.exports.tests.extraneous_colon = function(test, common) {
Expand Down

0 comments on commit 481144e

Please sign in to comment.