diff --git a/src/peliasDocGenerators.js b/src/peliasDocGenerators.js index 18092ff5..52e495e9 100644 --- a/src/peliasDocGenerators.js +++ b/src/peliasDocGenerators.js @@ -1,4 +1,4 @@ -var map_stream = require('through2-map'); +var through2 = require('through2'); var _ = require('lodash'); var iso3166 = require('iso3166-1'); @@ -6,83 +6,111 @@ var Document = require('pelias-model').Document; module.exports = {}; -module.exports.create = function(hierarchy_finder) { - return map_stream.obj(function(record) { - var wofDoc = new Document( 'whosonfirst', record.place_type, record.id ); - - if (record.name) { - wofDoc.setName('default', record.name); - } - wofDoc.setCentroid({ lat: record.lat, lon: record.lon }); +function assignField(hierarchyElement, wofDoc) { + switch (hierarchyElement.place_type) { + case 'neighbourhood': + case 'locality': + case 'borough': + case 'localadmin': + case 'county': + case 'macrocounty': + case 'macroregion': + // the above place_types don't have abbrevations (yet) + wofDoc.addParent(hierarchyElement.place_type, hierarchyElement.name, hierarchyElement.id.toString()); + break; + case 'region': + case 'dependency': + if (hierarchyElement.hasOwnProperty('abbreviation')) { + wofDoc.addParent(hierarchyElement.place_type, hierarchyElement.name, hierarchyElement.id.toString(), hierarchyElement.abbreviation); + } else { + wofDoc.addParent(hierarchyElement.place_type, hierarchyElement.name, hierarchyElement.id.toString()); + } + break; + case 'country': + // this is placetype=country, so lookup and set the iso3 from iso2 + if (iso3166.is2(hierarchyElement.iso2)) { + var iso3 = iso3166.to3(hierarchyElement.iso2); - // only set population if available - if (record.population) { - wofDoc.setPopulation(record.population); - } + wofDoc.setAlpha3(iso3); + wofDoc.addParent('country', hierarchyElement.name, hierarchyElement.id.toString(), iso3); - // only set popularity if available - if (record.popularity) { - wofDoc.setPopularity(record.popularity); - } + } else { + wofDoc.addParent('country', hierarchyElement.name, hierarchyElement.id.toString()); - // WOF bbox is defined as: - // lowerLeft.lon, lowerLeft.lat, upperRight.lon, upperRight.lat - // so convert to what ES understands - if (!_.isUndefined(record.bounding_box)) { - var parsedBoundingBox = record.bounding_box.split(',').map(parseFloat); - var marshaledBoundingBoxBox = { - upperLeft: { - lat: parsedBoundingBox[3], - lon: parsedBoundingBox[0] - }, - lowerRight: { - lat: parsedBoundingBox[1], - lon: parsedBoundingBox[2] - } - - }; - wofDoc.setBoundingBox(marshaledBoundingBoxBox); - } + } - // iterate the hierarchy, assigning fields - hierarchy_finder(record).forEach(function(hierarchy_element) { - switch (hierarchy_element.place_type) { - case 'neighbourhood': - case 'locality': - case 'borough': - case 'localadmin': - case 'county': - case 'macrocounty': - case 'macroregion': - // the above place_types don't have abbrevations (yet) - wofDoc.addParent(hierarchy_element.place_type, hierarchy_element.name, hierarchy_element.id.toString()); - break; - case 'region': - case 'dependency': - if (hierarchy_element.hasOwnProperty('abbreviation')) { - wofDoc.addParent(hierarchy_element.place_type, hierarchy_element.name, hierarchy_element.id.toString(), hierarchy_element.abbreviation); - } else { - wofDoc.addParent(hierarchy_element.place_type, hierarchy_element.name, hierarchy_element.id.toString()); - } - break; - case 'country': - // this is placetype=country, so lookup and set the iso3 from iso2 - if (iso3166.is2(hierarchy_element.iso2)) { - var iso3 = iso3166.to3(hierarchy_element.iso2); - - wofDoc.setAlpha3(iso3); - wofDoc.addParent('country', hierarchy_element.name, hierarchy_element.id.toString(), iso3); - - } else { - wofDoc.addParent('country', hierarchy_element.name, hierarchy_element.id.toString()); - - } - - break; + break; + } + +} + +// method that extracts the logic for Document creation. `hierarchy` is optional +function setupDocument(record, hierarchy) { + var wofDoc = new Document( 'whosonfirst', record.place_type, record.id ); + + if (record.name) { + wofDoc.setName('default', record.name); + } + wofDoc.setCentroid({ lat: record.lat, lon: record.lon }); + + // only set population if available + if (record.population) { + wofDoc.setPopulation(record.population); + } + + // only set popularity if available + if (record.popularity) { + wofDoc.setPopularity(record.popularity); + } + + // WOF bbox is defined as: + // lowerLeft.lon, lowerLeft.lat, upperRight.lon, upperRight.lat + // so convert to what ES understands + if (!_.isUndefined(record.bounding_box)) { + var parsedBoundingBox = record.bounding_box.split(',').map(parseFloat); + var marshaledBoundingBoxBox = { + upperLeft: { + lat: parsedBoundingBox[3], + lon: parsedBoundingBox[0] + }, + lowerRight: { + lat: parsedBoundingBox[1], + lon: parsedBoundingBox[2] } + + }; + wofDoc.setBoundingBox(marshaledBoundingBoxBox); + } + + // a `hierarchy` is composed of potentially multiple WOF records, so iterate + // and assign fields + if (!_.isUndefined(hierarchy)) { + hierarchy.forEach(function(hierarchyElement) { + assignField(hierarchyElement, wofDoc); }); - return wofDoc; + } + + return wofDoc; + +} + +module.exports.create = function(hierarchy_finder) { + return through2.obj(function(record, enc, next) { + // if there are no hierarchies, then just return the doc as-is + var hierarchies = hierarchy_finder(record); + + if (hierarchies && hierarchies.length > 0) { + hierarchies.forEach(function(hierarchy) { + this.push(setupDocument(record, hierarchy)); + }, this); + + } else { + this.push(setupDocument(record)); + + } + + next(); }); diff --git a/test/components/extractFieldsTest.js b/test/components/extractFieldsTest.js index 81687c5b..acbe31eb 100644 --- a/test/components/extractFieldsTest.js +++ b/test/components/extractFieldsTest.js @@ -115,7 +115,7 @@ tape('readStreamComponents', function(test) { 'iso:country': 'YZ', 'wof:abbreviation': 'XY', 'gn:population': 98765, - 'zs:pop10': 87654, + 'zs:pop10': 87654 } } ]; @@ -157,7 +157,7 @@ tape('readStreamComponents', function(test) { 'geom:bbox': '-13.691314,49.909613,1.771169,60.847886', 'iso:country': 'YZ', 'wof:abbreviation': 'XY', - 'zs:pop10': 98765, + 'zs:pop10': 98765 } } ]; @@ -199,7 +199,7 @@ tape('readStreamComponents', function(test) { 'geom:bbox': '-13.691314,49.909613,1.771169,60.847886', 'iso:country': 'YZ', 'wof:abbreviation': 'XY', - 'qs:pop': 98765, + 'qs:pop': 98765 } } ]; @@ -217,6 +217,7 @@ tape('readStreamComponents', function(test) { popularity: undefined, abbreviation: 'XY', bounding_box: '-13.691314,49.909613,1.771169,60.847886', + hierarchies: [] } ]; @@ -240,7 +241,7 @@ tape('readStreamComponents', function(test) { 'geom:bbox': '-13.691314,49.909613,1.771169,60.847886', 'iso:country': 'YZ', 'wof:abbreviation': 'XY', - 'mz:population': 98765, + 'mz:population': 98765 } } ]; @@ -258,6 +259,7 @@ tape('readStreamComponents', function(test) { popularity: undefined, abbreviation: 'XY', bounding_box: '-13.691314,49.909613,1.771169,60.847886', + hierarchies: [] } ]; @@ -281,7 +283,7 @@ tape('readStreamComponents', function(test) { 'geom:bbox': '-13.691314,49.909613,1.771169,60.847886', 'iso:country': 'YZ', 'wof:abbreviation': 'XY', - 'zs:pop10': 0, + 'zs:pop10': 0 } } ]; @@ -541,7 +543,8 @@ tape('readStreamComponents', function(test) { population: undefined, popularity: undefined, bounding_box: undefined, - abbreviation: undefined + abbreviation: undefined, + hierarchies: [] } ]; @@ -580,7 +583,8 @@ tape('readStreamComponents', function(test) { population: undefined, popularity: undefined, bounding_box: '-14.691314,50.909613,2.771169,61.847886', - abbreviation: undefined + abbreviation: undefined, + hierarchies: [] } ]; @@ -619,7 +623,8 @@ tape('readStreamComponents', function(test) { population: undefined, popularity: undefined, bounding_box: '', - abbreviation: undefined + abbreviation: undefined, + hierarchies: [] } ]; @@ -657,7 +662,8 @@ tape('readStreamComponents', function(test) { population: undefined, popularity: undefined, bounding_box: '', - abbreviation: undefined + abbreviation: undefined, + hierarchies: [] } ]; diff --git a/test/peliasDocGeneratorsTest.js b/test/peliasDocGeneratorsTest.js index 6dbc3ac3..07d176aa 100644 --- a/test/peliasDocGeneratorsTest.js +++ b/test/peliasDocGeneratorsTest.js @@ -41,11 +41,12 @@ tape('create', function(test) { var hierarchies_finder = function() { return [ - wofRecords['1'] + [ + wofRecords['1'] + ] ]; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) { @@ -58,7 +59,7 @@ tape('create', function(test) { }); - test.test('place_types that allow abbreviations should honor them when available', function(t) { + test.test('region and dependency (that allow abbreviations) should honor them when available', function(t) { ['region', 'dependency'].forEach(function(place_type) { var wofRecords = { 1: { @@ -87,11 +88,12 @@ tape('create', function(test) { var hierarchies_finder = function() { return [ - wofRecords['1'] + [ + wofRecords['1'] + ] ]; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) { @@ -132,7 +134,6 @@ tape('create', function(test) { return []; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) { @@ -144,32 +145,31 @@ tape('create', function(test) { test.test('wofRecord without bounding_box should have undefined bounding box in output', function(t) { var wofRecords = { - 1: { - id: 1, - name: 'name 1', - lat: 12.121212, - lon: 21.212121, - parent_id: undefined, - place_type: 'continent' - } + 1: { + id: 1, + name: 'name 1', + lat: 12.121212, + lon: 21.212121, + parent_id: undefined, + place_type: 'continent' + } }; var input = [ - wofRecords['1'] + wofRecords['1'] ]; var expected = [ - new Document( 'whosonfirst', 'continent', '1' ) - .setName('default', 'name 1') - .setCentroid({ lat: 12.121212, lon: 21.212121 }) + new Document( 'whosonfirst', 'continent', '1' ) + .setName('default', 'name 1') + .setCentroid({ lat: 12.121212, lon: 21.212121 }) ]; // don't care about hierarchies in this test var hierarchies_finder = function() { - return []; + return []; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) { @@ -207,7 +207,6 @@ tape('create', function(test) { return []; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) { @@ -243,11 +242,12 @@ tape('create', function(test) { var hierarchies_finder = function() { return [ - wofRecords['1'] + [ + wofRecords['1'] + ] ]; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) { @@ -284,11 +284,12 @@ tape('create', function(test) { var hierarchies_finder = function() { return [ - wofRecords['1'] + [ + wofRecords['1'] + ] ]; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) { @@ -327,11 +328,12 @@ tape('create', function(test) { var hierarchies_finder = function() { return [ - wofRecords['1'] + [ + wofRecords['1'] + ] ]; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) { @@ -371,11 +373,12 @@ tape('create', function(test) { var hierarchies_finder = function() { return [ - wofRecords['1'] + [ + wofRecords['1'] + ] ]; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) { @@ -414,11 +417,12 @@ tape('create', function(test) { var hierarchies_finder = function() { return [ - wofRecords['1'] + [ + wofRecords['1'] + ] ]; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) { @@ -458,11 +462,12 @@ tape('create', function(test) { var hierarchies_finder = function() { return [ - wofRecords['1'] + [ + wofRecords['1'] + ] ]; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) { @@ -472,31 +477,28 @@ tape('create', function(test) { }); - test.test('hierarchy should be walked to populate parentage', function(t) { + test.test('a document should be created for each available hierarchy', function(t) { var wofRecords = { 1: { id: 1, name: 'neighbourhood name', lat: 12.121212, lon: 21.212121, - place_type: 'neighbourhood', - iso2: 'US' + place_type: 'neighbourhood' }, 2: { id: 2, - name: 'locality name', + name: 'country name 1', lat: 13.131313, lon: 31.313131, - place_type: 'locality', - iso2: 'US' + place_type: 'country' }, 3: { id: 3, - name: 'region name', + name: 'country name 2', lat: 14.141414, lon: 41.414141, - place_type: 'region', - iso2: 'US' + place_type: 'country' } }; @@ -511,19 +513,27 @@ tape('create', function(test) { .setName('default', 'neighbourhood name') .setCentroid({ lat: 12.121212, lon: 21.212121 }) .addParent( 'neighbourhood', 'neighbourhood name', '1') - .addParent( 'locality', 'locality name', '2') - .addParent( 'region', 'region name', '3') + .addParent( 'country', 'country name 1', '2'), + new Document( 'whosonfirst', 'neighbourhood', '1') + .setName('default', 'neighbourhood name') + .setCentroid({ lat: 12.121212, lon: 21.212121 }) + .addParent( 'neighbourhood', 'neighbourhood name', '1') + .addParent( 'country', 'country name 2', '3') ]; var hierarchies_finder = function() { return [ - wofRecords['1'], - wofRecords['2'], - wofRecords['3'] + [ + wofRecords['1'], + wofRecords['2'] + ], + [ + wofRecords['1'], + wofRecords['3'] + ] ]; }; - // seed the parent_id_walker with wofRecords var docGenerator = peliasDocGenerators.create(hierarchies_finder); test_stream(input, docGenerator, function(err, actual) {