Skip to content

Commit

Permalink
modified peliasDocGenerators to accommodate multiple hierarchies
Browse files Browse the repository at this point in the history
  • Loading branch information
trescube committed Oct 17, 2016
1 parent 8591951 commit d8a1b45
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 128 deletions.
170 changes: 99 additions & 71 deletions src/peliasDocGenerators.js
Original file line number Diff line number Diff line change
@@ -1,88 +1,116 @@
var map_stream = require('through2-map');
var through2 = require('through2');
var _ = require('lodash');
var iso3166 = require('iso3166-1');

var Document = require('pelias-model').Document;

module.exports = {};

module.exports.create = function(hierarchy_finder) {
return map_stream.obj(function(record) {
var wofDoc = new Document( 'whosonfirst', record.place_type, record.id );

if (record.name) {
wofDoc.setName('default', record.name);
}
wofDoc.setCentroid({ lat: record.lat, lon: record.lon });
function assignField(hierarchyElement, wofDoc) {
switch (hierarchyElement.place_type) {
case 'neighbourhood':
case 'locality':
case 'borough':
case 'localadmin':
case 'county':
case 'macrocounty':
case 'macroregion':
// the above place_types don't have abbrevations (yet)
wofDoc.addParent(hierarchyElement.place_type, hierarchyElement.name, hierarchyElement.id.toString());
break;
case 'region':
case 'dependency':
if (hierarchyElement.hasOwnProperty('abbreviation')) {
wofDoc.addParent(hierarchyElement.place_type, hierarchyElement.name, hierarchyElement.id.toString(), hierarchyElement.abbreviation);
} else {
wofDoc.addParent(hierarchyElement.place_type, hierarchyElement.name, hierarchyElement.id.toString());
}
break;
case 'country':
// this is placetype=country, so lookup and set the iso3 from iso2
if (iso3166.is2(hierarchyElement.iso2)) {
var iso3 = iso3166.to3(hierarchyElement.iso2);

// only set population if available
if (record.population) {
wofDoc.setPopulation(record.population);
}
wofDoc.setAlpha3(iso3);
wofDoc.addParent('country', hierarchyElement.name, hierarchyElement.id.toString(), iso3);

// only set popularity if available
if (record.popularity) {
wofDoc.setPopularity(record.popularity);
}
} else {
wofDoc.addParent('country', hierarchyElement.name, hierarchyElement.id.toString());

// WOF bbox is defined as:
// lowerLeft.lon, lowerLeft.lat, upperRight.lon, upperRight.lat
// so convert to what ES understands
if (!_.isUndefined(record.bounding_box)) {
var parsedBoundingBox = record.bounding_box.split(',').map(parseFloat);
var marshaledBoundingBoxBox = {
upperLeft: {
lat: parsedBoundingBox[3],
lon: parsedBoundingBox[0]
},
lowerRight: {
lat: parsedBoundingBox[1],
lon: parsedBoundingBox[2]
}

};
wofDoc.setBoundingBox(marshaledBoundingBoxBox);
}
}

// iterate the hierarchy, assigning fields
hierarchy_finder(record).forEach(function(hierarchy_element) {
switch (hierarchy_element.place_type) {
case 'neighbourhood':
case 'locality':
case 'borough':
case 'localadmin':
case 'county':
case 'macrocounty':
case 'macroregion':
// the above place_types don't have abbrevations (yet)
wofDoc.addParent(hierarchy_element.place_type, hierarchy_element.name, hierarchy_element.id.toString());
break;
case 'region':
case 'dependency':
if (hierarchy_element.hasOwnProperty('abbreviation')) {
wofDoc.addParent(hierarchy_element.place_type, hierarchy_element.name, hierarchy_element.id.toString(), hierarchy_element.abbreviation);
} else {
wofDoc.addParent(hierarchy_element.place_type, hierarchy_element.name, hierarchy_element.id.toString());
}
break;
case 'country':
// this is placetype=country, so lookup and set the iso3 from iso2
if (iso3166.is2(hierarchy_element.iso2)) {
var iso3 = iso3166.to3(hierarchy_element.iso2);

wofDoc.setAlpha3(iso3);
wofDoc.addParent('country', hierarchy_element.name, hierarchy_element.id.toString(), iso3);

} else {
wofDoc.addParent('country', hierarchy_element.name, hierarchy_element.id.toString());

}

break;
break;
}

}

// method that extracts the logic for Document creation. `hierarchy` is optional
function setupDocument(record, hierarchy) {
var wofDoc = new Document( 'whosonfirst', record.place_type, record.id );

if (record.name) {
wofDoc.setName('default', record.name);
}
wofDoc.setCentroid({ lat: record.lat, lon: record.lon });

// only set population if available
if (record.population) {
wofDoc.setPopulation(record.population);
}

// only set popularity if available
if (record.popularity) {
wofDoc.setPopularity(record.popularity);
}

// WOF bbox is defined as:
// lowerLeft.lon, lowerLeft.lat, upperRight.lon, upperRight.lat
// so convert to what ES understands
if (!_.isUndefined(record.bounding_box)) {
var parsedBoundingBox = record.bounding_box.split(',').map(parseFloat);
var marshaledBoundingBoxBox = {
upperLeft: {
lat: parsedBoundingBox[3],
lon: parsedBoundingBox[0]
},
lowerRight: {
lat: parsedBoundingBox[1],
lon: parsedBoundingBox[2]
}

};
wofDoc.setBoundingBox(marshaledBoundingBoxBox);
}

// a `hierarchy` is composed of potentially multiple WOF records, so iterate
// and assign fields
if (!_.isUndefined(hierarchy)) {
hierarchy.forEach(function(hierarchyElement) {
assignField(hierarchyElement, wofDoc);
});

return wofDoc;
}

return wofDoc;

}

module.exports.create = function(hierarchy_finder) {
return through2.obj(function(record, enc, next) {
// if there are no hierarchies, then just return the doc as-is
var hierarchies = hierarchy_finder(record);

if (hierarchies && hierarchies.length > 0) {
hierarchies.forEach(function(hierarchy) {
this.push(setupDocument(record, hierarchy));
}, this);

} else {
this.push(setupDocument(record));

}

next();

});

Expand Down
24 changes: 15 additions & 9 deletions test/components/extractFieldsTest.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ tape('readStreamComponents', function(test) {
'iso:country': 'YZ',
'wof:abbreviation': 'XY',
'gn:population': 98765,
'zs:pop10': 87654,
'zs:pop10': 87654
}
}
];
Expand Down Expand Up @@ -157,7 +157,7 @@ tape('readStreamComponents', function(test) {
'geom:bbox': '-13.691314,49.909613,1.771169,60.847886',
'iso:country': 'YZ',
'wof:abbreviation': 'XY',
'zs:pop10': 98765,
'zs:pop10': 98765
}
}
];
Expand Down Expand Up @@ -199,7 +199,7 @@ tape('readStreamComponents', function(test) {
'geom:bbox': '-13.691314,49.909613,1.771169,60.847886',
'iso:country': 'YZ',
'wof:abbreviation': 'XY',
'qs:pop': 98765,
'qs:pop': 98765
}
}
];
Expand All @@ -217,6 +217,7 @@ tape('readStreamComponents', function(test) {
popularity: undefined,
abbreviation: 'XY',
bounding_box: '-13.691314,49.909613,1.771169,60.847886',
hierarchies: []
}
];

Expand All @@ -240,7 +241,7 @@ tape('readStreamComponents', function(test) {
'geom:bbox': '-13.691314,49.909613,1.771169,60.847886',
'iso:country': 'YZ',
'wof:abbreviation': 'XY',
'mz:population': 98765,
'mz:population': 98765
}
}
];
Expand All @@ -258,6 +259,7 @@ tape('readStreamComponents', function(test) {
popularity: undefined,
abbreviation: 'XY',
bounding_box: '-13.691314,49.909613,1.771169,60.847886',
hierarchies: []
}
];

Expand All @@ -281,7 +283,7 @@ tape('readStreamComponents', function(test) {
'geom:bbox': '-13.691314,49.909613,1.771169,60.847886',
'iso:country': 'YZ',
'wof:abbreviation': 'XY',
'zs:pop10': 0,
'zs:pop10': 0
}
}
];
Expand Down Expand Up @@ -541,7 +543,8 @@ tape('readStreamComponents', function(test) {
population: undefined,
popularity: undefined,
bounding_box: undefined,
abbreviation: undefined
abbreviation: undefined,
hierarchies: []
}
];

Expand Down Expand Up @@ -580,7 +583,8 @@ tape('readStreamComponents', function(test) {
population: undefined,
popularity: undefined,
bounding_box: '-14.691314,50.909613,2.771169,61.847886',
abbreviation: undefined
abbreviation: undefined,
hierarchies: []
}
];

Expand Down Expand Up @@ -619,7 +623,8 @@ tape('readStreamComponents', function(test) {
population: undefined,
popularity: undefined,
bounding_box: '',
abbreviation: undefined
abbreviation: undefined,
hierarchies: []
}
];

Expand Down Expand Up @@ -657,7 +662,8 @@ tape('readStreamComponents', function(test) {
population: undefined,
popularity: undefined,
bounding_box: '',
abbreviation: undefined
abbreviation: undefined,
hierarchies: []
}
];

Expand Down
Loading

0 comments on commit d8a1b45

Please sign in to comment.