Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Require GID in Elasticsearch _id field #1364

Merged
merged 2 commits into from
Oct 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions helper/decode_gid.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
const _ = require('lodash');

// helper method to decode a GID from a string
// for additional validation see sanitizer/_ids.js

function decodeGID(gid) {
const parts = gid.split(':');

if ( parts.length < 3 ) {
return;
}

// empty strings and other invalid values are expected to be handled by the caller
return {
source: parts[0],
layer: parts[1],
id: parts.slice(2).join(':'),
};
}

module.exports = decodeGID;
8 changes: 5 additions & 3 deletions helper/geojsonify.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const _ = require('lodash');
const Document = require('pelias-model').Document;
const codec = require('pelias-model').codec;
const field = require('./fieldValue');
const decode_gid = require('./decode_gid');

function geojsonifyPlaces( params, docs ){

Expand Down Expand Up @@ -40,13 +41,14 @@ function geojsonifyPlaces( params, docs ){
}

function geojsonifyPlace(params, place) {
const gid_components = decode_gid(place._id);
// setup the base doc
const doc = {
id: place.source_id,
gid: new Document(place.source, place.layer, place.source_id).getGid(),
id: gid_components.id,
gid: new Document(place.source, place.layer, gid_components.id).getGid(),
layer: place.layer,
source: place.source,
source_id: place.source_id,
source_id: gid_components.id,
bounding_box: place.bounding_box,
lat: parseFloat(place.center_point.lat),
lng: parseFloat(place.center_point.lon)
Expand Down
20 changes: 9 additions & 11 deletions sanitizer/_ids.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
var _ = require('lodash'),
check = require('check-types'),
type_mapping = require('../helper/type_mapping');

var ID_DELIM = ':';
const _ = require('lodash');
const check = require('check-types');
const type_mapping = require('../helper/type_mapping');
const decode_gid = require('../helper/decode_gid');

// validate inputs, convert types and apply defaults id generally looks like
// 'geonames:venue:4163334' (source:layer:id) so, all three are required
Expand All @@ -18,23 +17,22 @@ var targetError = function(target, target_list) {
};

function sanitizeId(rawId, messages) {
var parts = rawId.split(ID_DELIM);
const gid = decode_gid(rawId);

if ( parts.length < 3 ) {
if (!gid ) {
messages.errors.push( formatError(rawId) );
return;
}

var source = parts[0];
var layer = parts[1];
var id = parts.slice(2).join(ID_DELIM);
const { source, layer, id } = gid;

// check if any parts of the gid are empty
if (_.includes([source, layer, id], '')) {
messages.errors.push( formatError(rawId) );
return;
}
var valid_values = Object.keys(type_mapping.source_mapping);

const valid_values = Object.keys(type_mapping.source_mapping);
if (!_.includes(valid_values, source)) {
messages.errors.push( targetError(source, valid_values) );
return;
Expand Down
14 changes: 0 additions & 14 deletions test/unit/fixture/dedupe_elasticsearch_results.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'openstreetmap',
'source_id': 'node:357289197',
'category': [
'education'
],
Expand Down Expand Up @@ -118,7 +117,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'geonames',
'source_id': '5219083',
'category': [
'education'
],
Expand Down Expand Up @@ -197,7 +195,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'geonames',
'source_id': '5183465',
'category': [
'entertainment',
'recreation'
Expand Down Expand Up @@ -277,7 +274,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'openstreetmap',
'source_id': 'node:368338500',
'category': [
'education'
],
Expand Down Expand Up @@ -339,7 +335,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'openstreetmap',
'source_id': 'way:84969670',
'category': [
'education'
],
Expand Down Expand Up @@ -409,7 +404,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'geonames',
'source_id': '5192545',
'category': [
'education'
],
Expand Down Expand Up @@ -488,7 +482,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'geonames',
'source_id': '5198085',
'category': [
'education'
],
Expand Down Expand Up @@ -558,7 +551,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'geonames',
'source_id': '5208101',
'category': [
'education'
],
Expand Down Expand Up @@ -638,7 +630,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'openstreetmap',
'source_id': 'way:161088588',
'category': [
'education'
],
Expand Down Expand Up @@ -717,7 +708,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'geonames',
'source_id': '5200263',
'category': [
'education'
],
Expand Down Expand Up @@ -788,7 +778,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'openstreetmap',
'source_id': 'way:34212977',
'category': [
'education'
],
Expand Down Expand Up @@ -867,7 +856,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'openstreetmap',
'source_id': 'node:357330916',
'category': [
'education'
],
Expand Down Expand Up @@ -937,7 +925,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'openstreetmap',
'source_id': 'node:357330919',
'category': [
'education'
],
Expand Down Expand Up @@ -1007,7 +994,6 @@ module.exports = [
'address_parts': {},
'alpha3': 'USA',
'source': 'geonames',
'source_id': '5197082',
'category': [
'education'
],
Expand Down
51 changes: 51 additions & 0 deletions test/unit/helper/decode_gid.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
const decode_gid = require('../../../helper/decode_gid');

module.exports.tests = {};

module.exports.tests.valid_gid = function(test, common) {
test('standard GID is decoded correctly', function(t) {
const gid = decode_gid('whosonfirst:locality:123');

t.equal(gid.source, 'whosonfirst');
t.equal(gid.layer, 'locality');
t.equal(gid.id, '123');
t.end();
});

test('GID with colons in ID decoded correctly', function(t) {
const gid = decode_gid('openaddresses:address:city_of_new_york:123');

t.equal(gid.source, 'openaddresses');
t.equal(gid.layer, 'address');
t.equal(gid.id, 'city_of_new_york:123');
t.end();
});
};

module.exports.tests.invalid_gid = function(test, common) {
test('gid without 3 parts returns undefined', function(t) {
const gid = decode_gid('whosonfirst:locality');

t.equal(gid, undefined);
t.end();
});

test('gid without 3 full parts returns empty strings', function(t) {
const gid = decode_gid('whosonfirst:locality:');

t.equal(gid.source, 'whosonfirst');
t.equal(gid.layer, 'locality');
t.equal(gid.id, '');
t.end();
});
};

module.exports.all = (tape, common) => {
function test(name, testFunction) {
return tape(`decode GID: ${name}`, testFunction);
}

for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};
Loading