-
Notifications
You must be signed in to change notification settings - Fork 15
OSM name of a feature matches to Wikidata name #105
Changes from all commits
6796cfc
581ec9c
fb3334c
c08283b
d2fb517
2dfb7ed
9d0aea1
b39c8c8
b9fdfdc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
'use strict'; | ||
|
||
var request = require('request'); | ||
|
||
module.exports = nameMatchesToWikidata; | ||
|
||
function getWikidataName(feature, id) { | ||
if (feature.hasOwnProperty('entities') && | ||
feature['entities'].hasOwnProperty(id) && | ||
feature['entities'][id].hasOwnProperty(['labels']) && | ||
feature['entities'][id]['labels'].hasOwnProperty('en')) | ||
return feature['entities'][id]['labels']['en']['value']; | ||
else | ||
return undefined; | ||
} | ||
|
||
function getWikidataAliasNames(feature, id) { | ||
var names = []; | ||
if (feature.hasOwnProperty('entities') && | ||
feature['entities'].hasOwnProperty(id) && | ||
feature['entities'][id].hasOwnProperty(['aliases']) && | ||
feature['entities'][id]['aliases'].hasOwnProperty('en')) { | ||
var aliases = feature['entities'][id]['aliases']['en']; | ||
for (var i = 0; i < aliases.length; i++) { | ||
names.push(aliases[i]['value']); | ||
} | ||
} | ||
return names; | ||
} | ||
|
||
function nameMatchesToWikidata(newVersion, oldVersion, callback) { | ||
var result = {}; | ||
|
||
if (!newVersion) return callback(null, result); | ||
|
||
// Check if feature is newly created. | ||
if (newVersion.properties['osm:version'] !== 1) { | ||
if (!oldVersion || (newVersion.properties['name'] === oldVersion.properties['name'])) return callback(null, result); | ||
} | ||
|
||
if (newVersion.properties.hasOwnProperty('wikidata') && newVersion.properties.hasOwnProperty('name')) { | ||
|
||
var osmName = newVersion.properties['name']; | ||
var wikidataID = newVersion.properties['wikidata']; | ||
var url = 'https://www.wikidata.org/w/api.php?action=wbgetentities&ids=' + wikidataID + '&format=json'; | ||
|
||
request(url, function (error, response, body) { | ||
if (!error && response && (response.statusCode === 200)) { | ||
var wikidataFeature = JSON.parse(body); | ||
var wikidataName = getWikidataName(wikidataFeature, wikidataID); | ||
var wikidataAliasNames = getWikidataAliasNames(wikidataFeature, wikidataID); | ||
|
||
if ((osmName !== wikidataName) && (wikidataAliasNames.indexOf(osmName) === -1)) return callback(null, { | ||
'result:name_matches_to_wikidata': false | ||
}); | ||
} | ||
return callback(null, result); | ||
}); | ||
} else { | ||
return callback(null, result); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
{ | ||
"compareFunction": "name-matches-to-wikidata", | ||
"fixtures": [ | ||
{ | ||
"description": "Test OSM name different from Wikidata name", | ||
"expectedResult": { | ||
"result:name_matches_to_wikidata": false | ||
}, | ||
"newVersion": { | ||
"type": "Feature", | ||
"properties": { | ||
"osm:id": 427818536, | ||
"osm:type": "way", | ||
"name": "Central Park is now something else", | ||
"wikidata": "Q160409", | ||
"osm:version": 1 | ||
}, | ||
"geometry": null | ||
}, | ||
"oldVersion": null | ||
}, | ||
{ | ||
"description": "Test OSM name matches with aliases on Wikidata", | ||
"expectedResult": {}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this give There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @planemad the current design of all compare functions is to return results only if interesting, and by interesting we mean mostly harmful changes. When things are good, the compare functions return nothing, i.e: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This design essentially does not differentiate between no data and a positive result. If the goal is to have every changeset reviewed by the community, comparators should pass on as much useful knowledge as possible to a human reviewer to make the final decision. This comparator has essentially done the tedious effort of looking up Wikidata and comparing names, witholding this finding will lead to duplicate human effort on the same activity. What do we gain by this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Totally agree @planemad, 💯 The easier bit is returning I am curious to hear more on this, shall we create a separate ticket for the same? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes please. We should have consistent design principles that will serve as a guide to build useful compare functions without being constrained by limitations of osmcha. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Created a new ticket here: #106 |
||
"newVersion": { | ||
"type": "Feature", | ||
"properties": { | ||
"osm:id": 3401391999, | ||
"osm:type": "node", | ||
"name": "Bengaluru", | ||
"wikidata": "Q1355" | ||
}, | ||
"geometry": null | ||
}, | ||
"oldVersion": null | ||
}, | ||
{ | ||
"description": "Test feature with no Wikidata tag", | ||
"expectedResult": {}, | ||
"newVersion": { | ||
"type": "Feature", | ||
"properties": { | ||
"osm:id": 3401391999, | ||
"osm:type": "node", | ||
"name": "Bengaluru" | ||
}, | ||
"geometry": null | ||
}, | ||
"oldVersion": null | ||
}, | ||
{ | ||
"description": "Test feature with Wikidata tag and not a name modification", | ||
"expectedResult": {}, | ||
"newVersion": { | ||
"type": "Feature", | ||
"properties": { | ||
"osm:id": 3401391999, | ||
"osm:type": "node", | ||
"name": "Bengaluru is somethong else", | ||
"osm:version": 2, | ||
"wikidata": "Q1355" | ||
}, | ||
"geometry": null | ||
}, | ||
"oldVersion": { | ||
"type": "Feature", | ||
"properties": { | ||
"osm:id": 3401391999, | ||
"osm:type": "node", | ||
"name": "Bengaluru is somethong else", | ||
"osm:version": 1, | ||
"wikidata": "Q1355" | ||
}, | ||
"geometry": null | ||
} | ||
}, | ||
{ | ||
"description": "Test new feature with Wikidata", | ||
"expectedResult": { | ||
"result:name_matches_to_wikidata": false | ||
}, | ||
"newVersion": { | ||
"type": "Feature", | ||
"properties": { | ||
"osm:id": 3401391999, | ||
"osm:type": "node", | ||
"name": "Bengaluru is something else", | ||
"osm:version": 1, | ||
"wikidata": "Q1355" | ||
}, | ||
"geometry": null | ||
}, | ||
"oldVersion": null | ||
} | ||
] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@bkowshik Though we are assuming that we won't be hitting wikidata API too hard, but just to be 💯 , what we can do is, catch the errors when wikidata API is
ratelimited
and find out a way for it to report to us. Maybe we can also use: 'result:wikidataApiLimitExceeded: true, the way we do it for escalate and then read it on vandalism side to send us these error. [This](https://www.mediawiki.org/wiki/API:Errors_and_warnings) list the error codes returned by wikidata API. We can catch for
ratelimited`. If we get such errors from vandalism, we can figure out some other way, so as to not hit wikidata API hard and also be ensured that this comparator has worked the way it is expected.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you @amishas157 copied over your comments to a new ticket about best practices for working with external APIs here: #107