From 6796cfca4ceb0e0a071290812bfd1d1c04f978f6 Mon Sep 17 00:00:00 2001 From: Bhargav Kowshik Date: Tue, 14 Mar 2017 12:52:38 +0530 Subject: [PATCH 1/9] WIP: Initial setup of matches-to-wikidata comparator --- comparators/matches-to-wikidata.js | 27 +++++++++ tests/fixtures/matches-to-wikidata.json | 75 +++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 comparators/matches-to-wikidata.js create mode 100644 tests/fixtures/matches-to-wikidata.json diff --git a/comparators/matches-to-wikidata.js b/comparators/matches-to-wikidata.js new file mode 100644 index 0000000..a2469ac --- /dev/null +++ b/comparators/matches-to-wikidata.js @@ -0,0 +1,27 @@ +'use strict'; + +var sqlite = require('sqlite3'); + +module.exports = matchesToWikidata; + +function matchesToWikidata(newVersion, oldVersion, callback) { + var result = {}; + + // If feature does not have a Wikidata tag, nothing to do. + if (!newVersion.properties.hasOwnProperty('wikidata')) return callback(null, result); + + var db = new sqlite.Database('landmarks.spatialite'); + var query = 'SELECT score FROM mb_landmark WHERE qid=? LIMIT 1;'; + var args = [newVersion.properties['wikidata']]; + + db.get(query, args, function(error, record) { + // If error or feature not in local database, nothing to do. + if (error || !record) { + db.close(); + return callback(null, result); + } + db.close(); + }); + + return callback(null, result); +} diff --git a/tests/fixtures/matches-to-wikidata.json b/tests/fixtures/matches-to-wikidata.json new file mode 100644 index 0000000..3a2ad29 --- /dev/null +++ b/tests/fixtures/matches-to-wikidata.json @@ -0,0 +1,75 @@ +{ + "compareFunction": "matches-to-wikidata", + "fixtures": [ + { + "description": "Test feature with no Wikidata tag", + "expectedResult": {}, + "newVersion": { + "type": "Feature", + "id": "node!4007681408!2", + "properties": { + "name": "Colônia Terra Nova", + "place": "suburb" + }, + "geometry": null + }, + "oldVersion": null + }, + { + "description": "Test feature that does not exist in local Wikidata database", + "expectedResult": {}, + "newVersion": { + "type": "Feature", + "id": "way!24464264!33", + "properties": { + "name": "Liwonde National Park", + "wikidata": "Q2739189" + }, + "geometry": null + }, + "oldVersion": null + }, + { + "description": "Test feature exists in local Wikidata database but OSM featues does not have a name", + "expectedResult": {}, + "newVersion": { + "type": "Feature", + "id": "node!4007681408!2", + "properties": { + "place": "suburb" + }, + "geometry": null + }, + "oldVersion": null + }, + { + "description": "Test feature exists in local Wikidata database and OSM feature with name tag match", + "expectedResult": {}, + "newVersion": { + "type": "Feature", + "id": "node!4007681408!2", + "properties": { + "name": "Colônia Terra Nova", + "place": "suburb" + }, + "geometry": null + }, + "oldVersion": null + }, + { + "description": "Test feature exists in local Wikidata database and OSM feature with name tag does not match", + "expectedResult": {}, + "newVersion": { + "type": "Feature", + "id": "node!4007681408!2", + "properties": { + "name": "Terra Nova", + "place": "suburb", + "wikidata": "Q35525" + }, + "geometry": null + }, + "oldVersion": null + } + ] +} From 581ec9c1339f318b5d6bcd69b684a05c792f2c1b Mon Sep 17 00:00:00 2001 From: Bhargav Kowshik Date: Thu, 16 Mar 2017 10:58:24 +0530 Subject: [PATCH 2/9] Script to extract name and Wikidata ID --- scripts/wikidata/parse-wikidata.js | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 scripts/wikidata/parse-wikidata.js diff --git a/scripts/wikidata/parse-wikidata.js b/scripts/wikidata/parse-wikidata.js new file mode 100644 index 0000000..1910168 --- /dev/null +++ b/scripts/wikidata/parse-wikidata.js @@ -0,0 +1,44 @@ +var readline = require('readline'); +var fs = require('fs'); +var argv = require('minimist')(process.argv.slice(2)); + +if (!argv.wikidata) { + console.log(''); + console.log('USAGE: node parse-wikidata.js [OPTIONS]'); + console.log(''); + console.log(' OPTIONS:'); + console.log(' --wikidata Line delimited wikidata file'); + console.log(''); + return; +} + +var reader = readline.createInterface({ + input: fs.createReadStream(argv.wikidata), + output: null +}); + + +function getFeatureName(feature) { + if (feature.hasOwnProperty('labels') && feature['labels'].hasOwnProperty('en')) + return feature['labels']['en']['value']; + else + return undefined; +} + +reader.on('line', function (line) { + // Handling the trailing comma at the end of line. + line = line.slice(0, -1); + try { + var feature = JSON.parse(line); + + var featureID = feature['id']; + var featureName = getFeatureName(feature); + if (featureName !== undefined) { + console.log(featureID + ', ' + featureName); + } + } catch(error) { + } +}); + +reader.on('close', function () { +}); From fb3334c2d07b68021e06b8439d39a429fc6f9c1e Mon Sep 17 00:00:00 2001 From: Bhargav Kowshik Date: Thu, 16 Mar 2017 14:01:54 +0530 Subject: [PATCH 3/9] First version of comparator to match name to Wikidata --- comparators/name-matches-to-wikidata.js | 41 ++++++++++++++++++++ tests/fixtures/name-matches-to-wikidata.json | 40 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 comparators/name-matches-to-wikidata.js create mode 100644 tests/fixtures/name-matches-to-wikidata.json diff --git a/comparators/name-matches-to-wikidata.js b/comparators/name-matches-to-wikidata.js new file mode 100644 index 0000000..c61c946 --- /dev/null +++ b/comparators/name-matches-to-wikidata.js @@ -0,0 +1,41 @@ +'use strict'; + +var request = require('request'); + +module.exports = nameMatchesToWikidata; + +function getWikidataName(feature, id) { + if (feature.hasOwnProperty('entities') && + feature['entities'].hasOwnProperty(id) && + feature['entities'][id].hasOwnProperty(['labels']) && + feature['entities'][id]['labels'].hasOwnProperty('en')) + return feature['entities'][id]['labels']['en']['value']; + else + return undefined; +} + +function nameMatchesToWikidata(newVersion, oldVersion, callback) { + var result = {}; + + if (!newVersion) return callback(null, result); + + if (newVersion.properties.hasOwnProperty('wikidata') && newVersion.properties.hasOwnProperty('name')) { + + var osmName = newVersion.properties['name']; + var wikidataID = newVersion.properties['wikidata']; + var url = 'https://www.wikidata.org/w/api.php?action=wbgetentities&ids=' + wikidataID + '&format=json'; + + request(url, function (error, response, body) { + if (!error && response && (response.statusCode === 200)) { + var wikidataFeature = JSON.parse(body); + var wikidataName = getWikidataName(wikidataFeature, wikidataID); + if (osmName === wikidataName) return callback(null, { + 'result:name_matches_to_wikidata': false + }); + } + return callback(null, result); + }); + } else { + return callback(null, result); + } +} diff --git a/tests/fixtures/name-matches-to-wikidata.json b/tests/fixtures/name-matches-to-wikidata.json new file mode 100644 index 0000000..1a79d18 --- /dev/null +++ b/tests/fixtures/name-matches-to-wikidata.json @@ -0,0 +1,40 @@ +{ + "compareFunction": "name-matches-to-wikidata", + "fixtures": [ + { + "description": "Test OSM name different from Wikidata name", + "expectedResult": { + "result:name_matches_to_wikidata": false + }, + "newVersion": { + "type": "Feature", + "properties": { + "osm:id": 427818536, + "osm:type": "way", + "name": "Central Park", + "wikidata": "Q160409" + }, + "geometry": null + }, + "oldVersion": null + }, + { + "description": "Test OSM name matches with alternate name on Wikidata. Ex: Bengaluru", + "expectedResult": {}, + "newVersion": null, + "oldVersion": null + }, + { + "description": "Test feature with no Wikidata tag", + "expectedResult": {}, + "newVersion": null, + "oldVersion": null + }, + { + "description": "Test feature with Wikidata tag and not a name modification", + "expectedResult": {}, + "newVersion": null, + "oldVersion": null + } + ] +} From c08283b67383ac958f5380ba9fb80eb40fa363ab Mon Sep 17 00:00:00 2001 From: Bhargav Kowshik Date: Thu, 16 Mar 2017 14:02:33 +0530 Subject: [PATCH 4/9] Script to parse through Wikidata is not required --- scripts/wikidata/parse-wikidata.js | 44 ------------------------------ 1 file changed, 44 deletions(-) delete mode 100644 scripts/wikidata/parse-wikidata.js diff --git a/scripts/wikidata/parse-wikidata.js b/scripts/wikidata/parse-wikidata.js deleted file mode 100644 index 1910168..0000000 --- a/scripts/wikidata/parse-wikidata.js +++ /dev/null @@ -1,44 +0,0 @@ -var readline = require('readline'); -var fs = require('fs'); -var argv = require('minimist')(process.argv.slice(2)); - -if (!argv.wikidata) { - console.log(''); - console.log('USAGE: node parse-wikidata.js [OPTIONS]'); - console.log(''); - console.log(' OPTIONS:'); - console.log(' --wikidata Line delimited wikidata file'); - console.log(''); - return; -} - -var reader = readline.createInterface({ - input: fs.createReadStream(argv.wikidata), - output: null -}); - - -function getFeatureName(feature) { - if (feature.hasOwnProperty('labels') && feature['labels'].hasOwnProperty('en')) - return feature['labels']['en']['value']; - else - return undefined; -} - -reader.on('line', function (line) { - // Handling the trailing comma at the end of line. - line = line.slice(0, -1); - try { - var feature = JSON.parse(line); - - var featureID = feature['id']; - var featureName = getFeatureName(feature); - if (featureName !== undefined) { - console.log(featureID + ', ' + featureName); - } - } catch(error) { - } -}); - -reader.on('close', function () { -}); From d2fb517e580c042f5fe1e52ab263dcdd3bfadbf1 Mon Sep 17 00:00:00 2001 From: Bhargav Kowshik Date: Thu, 16 Mar 2017 14:04:15 +0530 Subject: [PATCH 5/9] Remove files from a previous iteration of the script --- comparators/matches-to-wikidata.js | 27 --------- tests/fixtures/matches-to-wikidata.json | 75 ------------------------- 2 files changed, 102 deletions(-) delete mode 100644 comparators/matches-to-wikidata.js delete mode 100644 tests/fixtures/matches-to-wikidata.json diff --git a/comparators/matches-to-wikidata.js b/comparators/matches-to-wikidata.js deleted file mode 100644 index a2469ac..0000000 --- a/comparators/matches-to-wikidata.js +++ /dev/null @@ -1,27 +0,0 @@ -'use strict'; - -var sqlite = require('sqlite3'); - -module.exports = matchesToWikidata; - -function matchesToWikidata(newVersion, oldVersion, callback) { - var result = {}; - - // If feature does not have a Wikidata tag, nothing to do. - if (!newVersion.properties.hasOwnProperty('wikidata')) return callback(null, result); - - var db = new sqlite.Database('landmarks.spatialite'); - var query = 'SELECT score FROM mb_landmark WHERE qid=? LIMIT 1;'; - var args = [newVersion.properties['wikidata']]; - - db.get(query, args, function(error, record) { - // If error or feature not in local database, nothing to do. - if (error || !record) { - db.close(); - return callback(null, result); - } - db.close(); - }); - - return callback(null, result); -} diff --git a/tests/fixtures/matches-to-wikidata.json b/tests/fixtures/matches-to-wikidata.json deleted file mode 100644 index 3a2ad29..0000000 --- a/tests/fixtures/matches-to-wikidata.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "compareFunction": "matches-to-wikidata", - "fixtures": [ - { - "description": "Test feature with no Wikidata tag", - "expectedResult": {}, - "newVersion": { - "type": "Feature", - "id": "node!4007681408!2", - "properties": { - "name": "Colônia Terra Nova", - "place": "suburb" - }, - "geometry": null - }, - "oldVersion": null - }, - { - "description": "Test feature that does not exist in local Wikidata database", - "expectedResult": {}, - "newVersion": { - "type": "Feature", - "id": "way!24464264!33", - "properties": { - "name": "Liwonde National Park", - "wikidata": "Q2739189" - }, - "geometry": null - }, - "oldVersion": null - }, - { - "description": "Test feature exists in local Wikidata database but OSM featues does not have a name", - "expectedResult": {}, - "newVersion": { - "type": "Feature", - "id": "node!4007681408!2", - "properties": { - "place": "suburb" - }, - "geometry": null - }, - "oldVersion": null - }, - { - "description": "Test feature exists in local Wikidata database and OSM feature with name tag match", - "expectedResult": {}, - "newVersion": { - "type": "Feature", - "id": "node!4007681408!2", - "properties": { - "name": "Colônia Terra Nova", - "place": "suburb" - }, - "geometry": null - }, - "oldVersion": null - }, - { - "description": "Test feature exists in local Wikidata database and OSM feature with name tag does not match", - "expectedResult": {}, - "newVersion": { - "type": "Feature", - "id": "node!4007681408!2", - "properties": { - "name": "Terra Nova", - "place": "suburb", - "wikidata": "Q35525" - }, - "geometry": null - }, - "oldVersion": null - } - ] -} From 2dfb7ed07c79af80b4398b893e5f31584453dc20 Mon Sep 17 00:00:00 2001 From: Bhargav Kowshik Date: Thu, 16 Mar 2017 14:06:55 +0530 Subject: [PATCH 6/9] Corrected both logic and test to check not equal --- comparators/name-matches-to-wikidata.js | 2 +- tests/fixtures/name-matches-to-wikidata.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/comparators/name-matches-to-wikidata.js b/comparators/name-matches-to-wikidata.js index c61c946..9a4cca1 100644 --- a/comparators/name-matches-to-wikidata.js +++ b/comparators/name-matches-to-wikidata.js @@ -29,7 +29,7 @@ function nameMatchesToWikidata(newVersion, oldVersion, callback) { if (!error && response && (response.statusCode === 200)) { var wikidataFeature = JSON.parse(body); var wikidataName = getWikidataName(wikidataFeature, wikidataID); - if (osmName === wikidataName) return callback(null, { + if (osmName !== wikidataName) return callback(null, { 'result:name_matches_to_wikidata': false }); } diff --git a/tests/fixtures/name-matches-to-wikidata.json b/tests/fixtures/name-matches-to-wikidata.json index 1a79d18..c94eb34 100644 --- a/tests/fixtures/name-matches-to-wikidata.json +++ b/tests/fixtures/name-matches-to-wikidata.json @@ -11,7 +11,7 @@ "properties": { "osm:id": 427818536, "osm:type": "way", - "name": "Central Park", + "name": "Central Park is now something else", "wikidata": "Q160409" }, "geometry": null From 9d0aea18d4aad7876ada0201f1305221a58da5ea Mon Sep 17 00:00:00 2001 From: Bhargav Kowshik Date: Thu, 16 Mar 2017 14:08:48 +0530 Subject: [PATCH 7/9] Fixed some linting errors --- comparators/name-matches-to-wikidata.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/comparators/name-matches-to-wikidata.js b/comparators/name-matches-to-wikidata.js index 9a4cca1..07b249f 100644 --- a/comparators/name-matches-to-wikidata.js +++ b/comparators/name-matches-to-wikidata.js @@ -5,13 +5,13 @@ var request = require('request'); module.exports = nameMatchesToWikidata; function getWikidataName(feature, id) { - if (feature.hasOwnProperty('entities') && - feature['entities'].hasOwnProperty(id) && - feature['entities'][id].hasOwnProperty(['labels']) && - feature['entities'][id]['labels'].hasOwnProperty('en')) - return feature['entities'][id]['labels']['en']['value']; - else - return undefined; + if (feature.hasOwnProperty('entities') && + feature['entities'].hasOwnProperty(id) && + feature['entities'][id].hasOwnProperty(['labels']) && + feature['entities'][id]['labels'].hasOwnProperty('en')) + return feature['entities'][id]['labels']['en']['value']; + else + return undefined; } function nameMatchesToWikidata(newVersion, oldVersion, callback) { From b39c8c8e3888b0686badcb07483a9c36671d6d7a Mon Sep 17 00:00:00 2001 From: Bhargav Kowshik Date: Thu, 16 Mar 2017 14:25:56 +0530 Subject: [PATCH 8/9] Check alias on Wikidata --- comparators/name-matches-to-wikidata.js | 18 +++++- tests/fixtures/name-matches-to-wikidata.json | 58 ++++++++++++++++++-- 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/comparators/name-matches-to-wikidata.js b/comparators/name-matches-to-wikidata.js index 07b249f..96a49f5 100644 --- a/comparators/name-matches-to-wikidata.js +++ b/comparators/name-matches-to-wikidata.js @@ -14,6 +14,20 @@ function getWikidataName(feature, id) { return undefined; } +function getWikidataAliasNames(feature, id) { + var names = []; + if (feature.hasOwnProperty('entities') && + feature['entities'].hasOwnProperty(id) && + feature['entities'][id].hasOwnProperty(['aliases']) && + feature['entities'][id]['aliases'].hasOwnProperty('en')) { + var aliases = feature['entities'][id]['aliases']['en']; + for (var i = 0; i < aliases.length; i++) { + names.push(aliases[i]['value']); + } + } + return names; +} + function nameMatchesToWikidata(newVersion, oldVersion, callback) { var result = {}; @@ -29,7 +43,9 @@ function nameMatchesToWikidata(newVersion, oldVersion, callback) { if (!error && response && (response.statusCode === 200)) { var wikidataFeature = JSON.parse(body); var wikidataName = getWikidataName(wikidataFeature, wikidataID); - if (osmName !== wikidataName) return callback(null, { + var wikidataAliasNames = getWikidataAliasNames(wikidataFeature, wikidataID); + + if ((osmName !== wikidataName) && (wikidataAliasNames.indexOf(osmName) === -1)) return callback(null, { 'result:name_matches_to_wikidata': false }); } diff --git a/tests/fixtures/name-matches-to-wikidata.json b/tests/fixtures/name-matches-to-wikidata.json index c94eb34..36d0164 100644 --- a/tests/fixtures/name-matches-to-wikidata.json +++ b/tests/fixtures/name-matches-to-wikidata.json @@ -19,21 +19,71 @@ "oldVersion": null }, { - "description": "Test OSM name matches with alternate name on Wikidata. Ex: Bengaluru", + "description": "Test OSM name matches with aliases on Wikidata", "expectedResult": {}, - "newVersion": null, + "newVersion": { + "type": "Feature", + "properties": { + "osm:id": 3401391999, + "osm:type": "node", + "name": "Bengaluru", + "wikidata": "Q1355" + }, + "geometry": null + }, "oldVersion": null }, { "description": "Test feature with no Wikidata tag", "expectedResult": {}, - "newVersion": null, + "newVersion": { + "type": "Feature", + "properties": { + "osm:id": 3401391999, + "osm:type": "node", + "name": "Bengaluru" + }, + "geometry": null + }, "oldVersion": null }, { "description": "Test feature with Wikidata tag and not a name modification", "expectedResult": {}, - "newVersion": null, + "newVersion": { + "type": "Feature", + "properties": { + "osm:id": 3401391999, + "osm:type": "node", + "name": "Bengaluru", + "osm:version": 2 + }, + "geometry": null + }, + "oldVersion": { + "type": "Feature", + "properties": { + "osm:id": 3401391999, + "osm:type": "node", + "name": "Bengaluru", + "osm:version": 1 + }, + "geometry": null + } + }, + { + "description": "Test new feature with Wikidata", + "expectedResult": {}, + "newVersion": { + "type": "Feature", + "properties": { + "osm:id": 3401391999, + "osm:type": "node", + "name": "Bengaluru", + "osm:version": 1 + }, + "geometry": null + }, "oldVersion": null } ] From b9fdfdc4f9fa6e99d1ff00e1cd872848654d7314 Mon Sep 17 00:00:00 2001 From: Bhargav Kowshik Date: Thu, 16 Mar 2017 14:37:11 +0530 Subject: [PATCH 9/9] Test if feature is new or is a name modification --- comparators/name-matches-to-wikidata.js | 5 +++++ tests/fixtures/name-matches-to-wikidata.json | 22 +++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/comparators/name-matches-to-wikidata.js b/comparators/name-matches-to-wikidata.js index 96a49f5..054f861 100644 --- a/comparators/name-matches-to-wikidata.js +++ b/comparators/name-matches-to-wikidata.js @@ -33,6 +33,11 @@ function nameMatchesToWikidata(newVersion, oldVersion, callback) { if (!newVersion) return callback(null, result); + // Check if feature is newly created. + if (newVersion.properties['osm:version'] !== 1) { + if (!oldVersion || (newVersion.properties['name'] === oldVersion.properties['name'])) return callback(null, result); + } + if (newVersion.properties.hasOwnProperty('wikidata') && newVersion.properties.hasOwnProperty('name')) { var osmName = newVersion.properties['name']; diff --git a/tests/fixtures/name-matches-to-wikidata.json b/tests/fixtures/name-matches-to-wikidata.json index 36d0164..bbb890a 100644 --- a/tests/fixtures/name-matches-to-wikidata.json +++ b/tests/fixtures/name-matches-to-wikidata.json @@ -12,7 +12,8 @@ "osm:id": 427818536, "osm:type": "way", "name": "Central Park is now something else", - "wikidata": "Q160409" + "wikidata": "Q160409", + "osm:version": 1 }, "geometry": null }, @@ -55,8 +56,9 @@ "properties": { "osm:id": 3401391999, "osm:type": "node", - "name": "Bengaluru", - "osm:version": 2 + "name": "Bengaluru is somethong else", + "osm:version": 2, + "wikidata": "Q1355" }, "geometry": null }, @@ -65,22 +67,26 @@ "properties": { "osm:id": 3401391999, "osm:type": "node", - "name": "Bengaluru", - "osm:version": 1 + "name": "Bengaluru is somethong else", + "osm:version": 1, + "wikidata": "Q1355" }, "geometry": null } }, { "description": "Test new feature with Wikidata", - "expectedResult": {}, + "expectedResult": { + "result:name_matches_to_wikidata": false + }, "newVersion": { "type": "Feature", "properties": { "osm:id": 3401391999, "osm:type": "node", - "name": "Bengaluru", - "osm:version": 1 + "name": "Bengaluru is something else", + "osm:version": 1, + "wikidata": "Q1355" }, "geometry": null },