Skip to content

Commit

Permalink
feat(post): add alphanumeric postcodes post-processing script
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed May 27, 2024
1 parent 6cfffc0 commit f64cd68
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 4 deletions.
1 change: 1 addition & 0 deletions Document.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ function Document( source, layer, source_id ){
// define default post-processing scripts
this.addPostProcessingScript( require('./post/intersections') );
this.addPostProcessingScript( require('./post/seperable_street_names').post );
this.addPostProcessingScript( require('./post/alphanumeric_postcodes') );
this.addPostProcessingScript( require('./post/deduplication') );
this.addPostProcessingScript( require('./post/language_field_trimming') );
this.addPostProcessingScript( require('./post/popularity') );
Expand Down
50 changes: 50 additions & 0 deletions post/alphanumeric_postcodes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
const _ = require('lodash');
const ADDRESS_LAYER_NAME = 'address';
const ALPHANUMERIC_POSTCODE = /^([0-9]{4})(\s*)([A-Za-z]{2})$/;

/**
* Alphanumeric postcodes post-processing script ensures that both the expanded
* and contracted version of alphanumeric postcodes are indexed.
*
* Without this script a postcode such as '1383GN' would not be matched to the
* query '1383'.
*
* The script is intended to detect these alphanumeric postcodes and index both
* permutations, ie. '1383GN' = ['1383GN', '1383 GN'].
*
* The inverse case should also be covered. ie. '1383 GN' = ['1383 GN', '1383GN'].
*
* Note: the regex is currently restrictive by design, the UK for instance uses
* alphanumeric postcodes in the format 'E81DN' which could cause error when splitting
* with this method, they are currently ignored. Future work should consider global
* postcode formats.
*
* Note: this script is intended to run *before* the 'deduplication' post processing
* script so that prior aliases don't generate duplicate terms.
*/

function postcodes( doc ){

// only apply to docs from the address layer
if( doc.getLayer() !== ADDRESS_LAYER_NAME ){ return; }

// ensure postcode is set
let postcode = doc.getAddress('zip');
if( !_.isString(postcode) || _.isEmpty(postcode) ){ return; }

// ensure postcode is alphanumeric
let matches = postcode.match(ALPHANUMERIC_POSTCODE);
if( matches.length !== 4 ){ return; }

// generate postcode aliases for the postcode.
let [ , numeric, spaces, alpha ] = matches;

// detect if the existing postcode is expanded or not
if ( spaces.length ) {
doc.setAddressAlias('zip', `${numeric}${alpha}`); // add contracted form as alias
} else {
doc.setAddressAlias('zip', `${numeric} ${alpha}`); // add expanded form as alias
}
}

module.exports = postcodes;
6 changes: 5 additions & 1 deletion test/document/post.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
const Document = require('../../Document');
const intersections = require('../../post/intersections');
const seperable_street_names = require('../../post/seperable_street_names').post;
const alphanumeric_postcodes = require('../../post/alphanumeric_postcodes');
const deduplication = require('../../post/deduplication');
const language_field_trimming = require('../../post/language_field_trimming');
const popularity = require('../../post/popularity');
const DEFAULT_SCRIPTS = [intersections, seperable_street_names, deduplication, language_field_trimming, popularity];
const DEFAULT_SCRIPTS = [
intersections, seperable_street_names, alphanumeric_postcodes,
deduplication, language_field_trimming, popularity
];

module.exports.tests = {};

Expand Down
76 changes: 76 additions & 0 deletions test/post/alphanumeric_postcodes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
const Document = require('../../Document');
const postcodes = require('../../post/alphanumeric_postcodes');

module.exports.tests = {};

module.exports.tests.alias = function(test) {
test('expand', function(t) {
const doc = new Document('mysource','address','myid');

// zip not set
postcodes(doc);
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set');

// set postcode
doc.setAddress('zip', '1383GN');

// add expanded version
postcodes(doc);
t.deepEqual(doc.getAddressAliases('zip'), ['1383 GN'], 'alias set');

t.end();
});
test('contract', function(t) {
const doc = new Document('mysource','address','myid');

// zip not set
postcodes(doc);
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set');

// set postcode
doc.setAddress('zip', '1383 GN');

// add contracted version
postcodes(doc);
t.deepEqual(doc.getAddressAliases('zip'), ['1383GN'], 'alias set');

t.end();
});
};

module.exports.tests.noop = function(test) {
test('noop: invalid layer != "address"', function(t) {
const doc = new Document('mysource','not_address','myid');

// set postcode
doc.setAddress('zip', '1383GN');

// no alias added
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set');

t.end();
});

test('noop: postcode doesnt match regex', function(t) {
const doc = new Document('mysource','address','myid');

// set postcode
doc.setAddress('zip', 'E81DN');

// no alias added
t.deepEqual(doc.getAddressAliases('zip'), [], 'no alias set');

t.end();
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
return tape('post/alphanumeric_postcodes: ' + name, testFunction);
}

for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};
7 changes: 4 additions & 3 deletions test/run.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
var tape = require('tape');
var common = {};
const tape = require('tape');
const common = {};

var tests = [
const tests = [
require('./Document.js'),
require('./errors.js'),
require('./document/centroid.js'),
Expand All @@ -23,6 +23,7 @@ var tests = [
require('./document/toESDocument.js'),
require('./document/post.js'),
require('./post/intersections.js'),
require('./post/alphanumeric_postcodes.js'),
require('./post/deduplication.js'),
require('./post/seperable_street_names.js'),
require('./post/language_field_trimming.js'),
Expand Down

0 comments on commit f64cd68

Please sign in to comment.