From 3c35f5635aa83f86d3c2e581ba2eae07ce4e0518 Mon Sep 17 00:00:00 2001 From: Joxit Date: Thu, 28 Jan 2021 15:02:58 +0100 Subject: [PATCH] feat: global configuration option to select stored languages --- Document.js | 4 +- package.json | 1 + post/language_default.js | 19 ++++ post/language_field_filter.js | 33 +++++++ schema.js | 18 ++++ test/document/post.js | 23 +++-- test/document/toESDocument.js | 3 +- test/post/language_default.js | 115 +++++++++++++++++++++++++ test/post/language_field_filter.js | 134 +++++++++++++++++++++++++++++ test/run.js | 3 + test/schema.js | 68 +++++++++++++++ 11 files changed, 414 insertions(+), 7 deletions(-) create mode 100644 post/language_default.js create mode 100644 post/language_field_filter.js create mode 100644 schema.js create mode 100644 test/post/language_default.js create mode 100644 test/post/language_field_filter.js create mode 100644 test/schema.js diff --git a/Document.js b/Document.js index d28e820..351cb6a 100644 --- a/Document.js +++ b/Document.js @@ -1,4 +1,4 @@ -const config = require('pelias-config').generate(); +const config = require('pelias-config').generate(require('./schema')); const validate = require('./util/valid'); const transform = require('./util/transform'); const _ = require('lodash'); @@ -44,6 +44,8 @@ function Document( source, layer, source_id ){ this.addPostProcessingScript( require('./post/intersections') ); this.addPostProcessingScript( require('./post/seperable_street_names').post ); this.addPostProcessingScript( require('./post/deduplication') ); + this.addPostProcessingScript( require('./post/language_default')(config) ); + this.addPostProcessingScript( require('./post/language_field_filter')(config) ); this.addPostProcessingScript( require('./post/language_field_trimming') ); // mandatory properties diff --git a/package.json b/package.json index 7d010aa..f1729ad 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ "node": ">= 10.0.0" }, "dependencies": { + "joi": "^17.1.1", "lodash": "^4.6.1", "pelias-config": "^4.5.0", "through2": "^3.0.0" diff --git a/post/language_default.js b/post/language_default.js new file mode 100644 index 0000000..120ef68 --- /dev/null +++ b/post/language_default.js @@ -0,0 +1,19 @@ +const _ = require('lodash'); +const prefixes = ['name', 'phrase']; + +module.exports = config => { + return function defaultLang(doc) { + const defaultLang = config.get('imports.langs.default'); + if (!_.isString(defaultLang)) { return; } + + prefixes.forEach((prefix) => { + let field = doc[prefix]; + + if (!_.isPlainObject(field)) { return; } + + if (!_.isEmpty(field[defaultLang])) { + field['default'] = _.concat(field[defaultLang], field['default']).filter((e) => _.isString(e)); + } + }); + }; +}; diff --git a/post/language_field_filter.js b/post/language_field_filter.js new file mode 100644 index 0000000..c96bf45 --- /dev/null +++ b/post/language_field_filter.js @@ -0,0 +1,33 @@ +const _ = require('lodash'); +const prefixes = ['name', 'phrase']; + +function normalizeConfig(config) { + const keep = config.get('imports.langs.keep') || config.get('imports.langs'); + if (_.isString(keep)) { + return new Set([keep]); + } + if (_.isArray(keep)) { + return new Set(keep); + } +} + +module.exports = (config) => { + const keep = normalizeConfig(config); + return function filter (doc) { + if (!keep) { return; } + + prefixes.forEach((prefix) => { + let field = doc[prefix]; + + if (!_.isPlainObject(field)) { + return; + } + + _.each(field, (_names, lang) => { + if (lang !== 'default' && !keep.has(lang)) { + delete field[lang]; + } + }); + }); + }; +}; diff --git a/schema.js b/schema.js new file mode 100644 index 0000000..13dd04b --- /dev/null +++ b/schema.js @@ -0,0 +1,18 @@ + +const Joi = require('joi'); + +module.exports = Joi.object().keys({ + imports: Joi.object().keys({ + langs: Joi.alternatives().try( + Joi.string().regex(/^[a-z]{2}$/), + Joi.array().items(Joi.string().regex(/^[a-z]{2}$/)).min(1), + Joi.object().keys({ + keep: Joi.alternatives().try( + Joi.string().regex(/^[a-z]{2}$/), + Joi.array().items(Joi.string().regex(/^[a-z]{2}$/)).min(1) + ), + default: Joi.string().regex(/^[a-z]{2}$/) + }) + ) + }).unknown(true) +}).unknown(true); diff --git a/test/document/post.js b/test/document/post.js index 74b9928..4fd684a 100644 --- a/test/document/post.js +++ b/test/document/post.js @@ -1,17 +1,26 @@ -const Document = require('../../Document'); +const _ = require('lodash'); +const proxyquire = require('proxyquire'); +const config = { get: _.get.bind(null, {}) }; +const Document = proxyquire('../../Document', { 'pelias-config': config }); const intersections = require('../../post/intersections'); const seperable_street_names = require('../../post/seperable_street_names').post; const deduplication = require('../../post/deduplication'); const language_field_trimming = require('../../post/language_field_trimming'); -const DEFAULT_SCRIPTS = [intersections, seperable_street_names, deduplication, language_field_trimming ]; +const language_field_filter = require('../../post/language_field_filter'); +const language_default = require('../../post/language_default'); +const DEFAULT_SCRIPT_NAMES = [ + intersections, seperable_street_names, deduplication, + language_default(config), language_field_filter(config), language_field_trimming +].map(f => f.name); module.exports.tests = {}; module.exports.tests.addPostProcessingScript = function(test) { test('default scripts', function(t) { let doc = new Document('mysource','mylayer','myid'); - t.deepEqual(doc._post, DEFAULT_SCRIPTS, 'default processing scripts'); + t.equal(doc._post.length, 6); + t.deepEqual(doc._post.map(f => f.name), DEFAULT_SCRIPT_NAMES, 'default processing scripts'); t.end(); }); test('invalid type', function(t) { @@ -26,7 +35,7 @@ module.exports.tests.addPostProcessingScript = function(test) { let script = function(){}; let doc = new Document('mysource','mylayer','myid'); doc.addPostProcessingScript( script ); - t.deepEqual(doc._post, DEFAULT_SCRIPTS.concat( script ), 'default processing scripts'); + t.deepEqual(doc._post.map(f => f.name), DEFAULT_SCRIPT_NAMES.concat( script.name ), 'default processing scripts'); t.end(); }); test('set same function twice (allowed)', function(t) { @@ -34,7 +43,11 @@ module.exports.tests.addPostProcessingScript = function(test) { let doc = new Document('mysource','mylayer','myid'); doc.addPostProcessingScript( script ); doc.addPostProcessingScript( script ); - t.deepEqual(doc._post, DEFAULT_SCRIPTS.concat( script, script ), 'default processing scripts'); + t.deepEqual( + doc._post.map(f => f.name), + DEFAULT_SCRIPT_NAMES.concat( script.name, script.name ), + 'default processing scripts' + ); t.end(); }); }; diff --git a/test/document/toESDocument.js b/test/document/toESDocument.js index 567a410..3179ef5 100644 --- a/test/document/toESDocument.js +++ b/test/document/toESDocument.js @@ -1,5 +1,6 @@ const proxyquire = require('proxyquire'); const codec = require('../../codec'); +const _ = require('lodash'); var fakeGeneratedConfig = { schema: { @@ -10,7 +11,7 @@ var fakeGeneratedConfig = { const fakeConfig = { generate: function fakeGenerate() { - return fakeGeneratedConfig; + return Object.assign({ get: _.get.bind(null, fakeGeneratedConfig) }, fakeGeneratedConfig); } }; diff --git a/test/post/language_default.js b/test/post/language_default.js new file mode 100644 index 0000000..c0c2b81 --- /dev/null +++ b/test/post/language_default.js @@ -0,0 +1,115 @@ +const language_default = require('../../post/language_default'); +const Document = require('../../Document'); +const _ = require('lodash'); + +const generateFakeConfig = (config) => { + return { get: _.get.bind(null, config) }; +}; + +module.exports.tests = {}; + +module.exports.tests.default = function (test) { + test('default - empty config', function (t) { + const fakeConfig = generateFakeConfig({}); + const doc = new Document('mysource', 'mylayer', 'myid'); + + doc.setName('default', 'test1'); + doc.setNameAlias('default', 'test2'); + doc.setNameAlias('default', 'test3'); + + doc.setName('en', 'test4'); + doc.setNameAlias('en', 'test3'); + doc.setNameAlias('en', 'test5'); + + language_default(fakeConfig)(doc); + + t.deepEquals(doc.name.default, ['test1', 'test2', 'test3']); + t.deepEquals(doc.phrase.default, ['test1', 'test2', 'test3']); + + t.end(); + }); + + test('default - en with aliases', function (t) { + const fakeConfig = generateFakeConfig({ imports: { langs: { default: 'en' } } }); + const doc = new Document('mysource', 'mylayer', 'myid'); + + doc.setName('default', 'test1'); + doc.setNameAlias('default', 'test2'); + doc.setNameAlias('default', 'test3'); + + doc.setName('en', 'test4'); + doc.setNameAlias('en', 'test3'); + doc.setNameAlias('en', 'test5'); + + language_default(fakeConfig)(doc); + + t.deepEquals(doc.name.default, ['test4', 'test3', 'test5', 'test1', 'test2', 'test3']); + t.deepEquals(doc.phrase.default, ['test4', 'test3', 'test5', 'test1', 'test2', 'test3']); + + t.end(); + }); + + test('default - en without aliases', function (t) { + const fakeConfig = generateFakeConfig({ imports: { langs: { default: 'en' } } }); + + const doc = new Document('mysource', 'mylayer', 'myid'); + + doc.setName('default', 'test1'); + doc.setNameAlias('default', 'test2'); + doc.setNameAlias('default', 'test3'); + + doc.setName('en', 'test4'); + + language_default(fakeConfig)(doc); + + t.deepEquals(doc.name.default, ['test4', 'test1', 'test2', 'test3']); + t.deepEquals(doc.phrase.default, ['test4', 'test1', 'test2', 'test3']); + + t.end(); + }); + + test('default - without en name', function (t) { + const fakeConfig = generateFakeConfig({ imports: { langs: { default: 'en' } } }); + + const doc = new Document('mysource', 'mylayer', 'myid'); + + doc.setName('default', 'test1'); + doc.setNameAlias('default', 'test2'); + doc.setNameAlias('default', 'test3'); + + language_default(fakeConfig)(doc); + + t.deepEquals(doc.name.default, ['test1', 'test2', 'test3']); + t.deepEquals(doc.phrase.default, ['test1', 'test2', 'test3']); + + t.end(); + }); + + test('default - without default name', function (t) { + const fakeConfig = generateFakeConfig({ imports: { langs: { default: 'en' } } }); + + const doc = new Document('mysource', 'mylayer', 'myid'); + + doc.setName('en', 'test1'); + + language_default(fakeConfig)(doc); + + t.deepEquals(doc.name.default, ['test1']); + t.deepEquals(doc.phrase.default, ['test1']); + + t.deepEquals(doc.name.en, 'test1'); + t.deepEquals(doc.phrase.en, 'test1'); + + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('post/language_default: ' + name, testFunction); + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/post/language_field_filter.js b/test/post/language_field_filter.js new file mode 100644 index 0000000..e79a8df --- /dev/null +++ b/test/post/language_field_filter.js @@ -0,0 +1,134 @@ +const language_field_filter = require('../../post/language_field_filter'); +const Document = require('../../Document'); +const _ = require('lodash'); + +const generateDocument = () => { + const doc = new Document('mysource', 'mylayer', 'myid'); + + doc.setName('default', 'test1'); + doc.setNameAlias('default', 'test2'); + doc.setNameAlias('default', 'test3'); + + doc.setName('en', 'test4'); + doc.setNameAlias('en', 'test3'); + doc.setNameAlias('en', 'test5'); + + doc.setName('de', 'test6'); + + doc.setName('fr', 'test7'); + + return doc; +}; + +const generateFakeConfig = (config) => { + return { get: _.get.bind(null, config) }; +}; + +module.exports.tests = {}; + +module.exports.tests.filter = function (test) { + test('filter - empty config', function (t) { + const fakeConfig = generateFakeConfig({}); + const doc = generateDocument(); + + language_field_filter(fakeConfig)(doc); + + t.deepEquals(doc, generateDocument()); + + t.end(); + }); + + test('filter - langs as string', function (t) { + const fakeConfig = generateFakeConfig({ imports: { langs: 'en' } }); + const doc = generateDocument(); + + language_field_filter(fakeConfig)(doc); + + t.ok(doc.name.default, 'default should be truthy'); + t.ok(doc.phrase.default, 'default should be truthy'); + + t.ok(doc.name.en, 'en should be truthy'); + t.ok(doc.phrase.en, 'en should be truthy'); + + t.false(doc.name.de, 'de should be falsy'); + t.false(doc.phrase.de, 'default should be falsy'); + + t.false(doc.name.fr, 'fr should be falsy'); + t.false(doc.phrase.fr, 'fr should be falsy'); + + t.end(); + }); + + test('filter - langs as array', function (t) { + const fakeConfig = generateFakeConfig({ imports: { langs: ['en', 'de'] } }); + const doc = generateDocument(); + + language_field_filter(fakeConfig)(doc); + + t.ok(doc.name.default, 'default should be truthy'); + t.ok(doc.phrase.default, 'default should be truthy'); + + t.ok(doc.name.en, 'en should be truthy'); + t.ok(doc.phrase.en, 'en should be truthy'); + + t.ok(doc.name.de, 'de should be truthy'); + t.ok(doc.phrase.de, 'de should be truthy'); + + t.false(doc.name.fr, 'fr should be falsy'); + t.false(doc.phrase.fr, 'fr should be falsy'); + + t.end(); + }); + + test('filter - keep as string', function (t) { + const fakeConfig = generateFakeConfig({ imports: { langs: { keep: 'en' } } }); + const doc = generateDocument(); + + language_field_filter(fakeConfig)(doc); + + t.ok(doc.name.default, 'default should be truthy'); + t.ok(doc.phrase.default, 'default should be truthy'); + + t.ok(doc.name.en, 'en should be truthy'); + t.ok(doc.phrase.en, 'en should be truthy'); + + t.false(doc.name.de, 'de should be falsy'); + t.false(doc.phrase.de, 'default should be falsy'); + + t.false(doc.name.fr, 'fr should be falsy'); + t.false(doc.phrase.fr, 'fr should be falsy'); + + t.end(); + }); + + test('filter - keep as array', function (t) { + const fakeConfig = generateFakeConfig({ imports: { langs: { keep: ['en', 'de'] } } }); + const doc = generateDocument(); + + language_field_filter(fakeConfig)(doc); + + t.ok(doc.name.default, 'default should be truthy'); + t.ok(doc.phrase.default, 'default should be truthy'); + + t.ok(doc.name.en, 'en should be truthy'); + t.ok(doc.phrase.en, 'en should be truthy'); + + t.ok(doc.name.de, 'de should be truthy'); + t.ok(doc.phrase.de, 'de should be truthy'); + + t.false(doc.name.fr, 'fr should be falsy'); + t.false(doc.phrase.fr, 'fr should be falsy'); + + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('post/language_field_filter: ' + name, testFunction); + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/run.js b/test/run.js index 8214362..f705020 100644 --- a/test/run.js +++ b/test/run.js @@ -25,11 +25,14 @@ var tests = [ require('./post/intersections.js'), require('./post/deduplication.js'), require('./post/seperable_street_names.js'), + require('./post/language_default.js'), + require('./post/language_field_filter.js'), require('./post/language_field_trimming.js'), require('./DocumentMapperStream.js'), require('./util/transform.js'), require('./util/valid.js'), require('./serialize/test.js'), + require('./schema.js'), ]; tests.map(function(t) { diff --git a/test/schema.js b/test/schema.js new file mode 100644 index 0000000..7c23d53 --- /dev/null +++ b/test/schema.js @@ -0,0 +1,68 @@ +const schema = require('../schema'); + +function validate(config) { + const result = schema.validate(config); + if (result.error) { + throw new Error(result.error.details[0].message); + } +} + +module.exports.tests = {}; + +module.exports.tests.schema = function (test) { + test('missing imports should not throw error', function (t) { + t.doesNotThrow(validate.bind(null, {})); + t.end(); + }); + + test('missing langs should not throw error', function (t) { + t.doesNotThrow(validate.bind(null, { imports: {}})); + t.end(); + }); + + test('missing keep/default should not throw error', function (t) { + t.doesNotThrow(validate.bind(null, { imports: { langs: {}}})); + t.end(); + }); + + test('correct values should not throw error', function (t) { + t.doesNotThrow(validate.bind(null, { imports: { langs: { keep: ['en'], default: 'en'}}})); + t.doesNotThrow(validate.bind(null, { imports: { langs: { keep: 'en', default: 'en'}}})); + t.doesNotThrow(validate.bind(null, { imports: { langs: 'en' }})); + t.end(); + }); + + test('incorrect `langs` languages should throw error', function (t) { + t.throws(validate.bind(null, { imports: { langs: ['english']}})); + t.throws(validate.bind(null, { imports: { langs: [123]}})); + t.throws(validate.bind(null, { imports: { langs: 'english'}})); + t.throws(validate.bind(null, { imports: { langs: 123}})); + t.end(); + }); + + test('incorrect `langs.keep` languages should throw error', function (t) { + t.throws(validate.bind(null, { imports: { langs: { keep: ['english']}}})); + t.throws(validate.bind(null, { imports: { langs: { keep: [123]}}})); + t.throws(validate.bind(null, { imports: { langs: { keep: 'english'}}})); + t.throws(validate.bind(null, { imports: { langs: { keep: 123}}})); + t.end(); + }); + + test('incorrect `langs.default` languages should throw error', function (t) { + t.throws(validate.bind(null, { imports: { langs: { default: ['english']}}})); + t.throws(validate.bind(null, { imports: { langs: { default: [123]}}})); + t.throws(validate.bind(null, { imports: { langs: { default: 'english'}}})); + t.throws(validate.bind(null, { imports: { langs: { default: 123}}})); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('schema: ' + name, testFunction); + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test, common); + } +};