Skip to content
This repository has been archived by the owner on Mar 15, 2024. It is now read-only.

OSM name of a feature matches to Wikidata name #105

Merged
merged 9 commits into from
Mar 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions comparators/name-matches-to-wikidata.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
'use strict';

var request = require('request');

module.exports = nameMatchesToWikidata;

function getWikidataName(feature, id) {
if (feature.hasOwnProperty('entities') &&
feature['entities'].hasOwnProperty(id) &&
feature['entities'][id].hasOwnProperty(['labels']) &&
feature['entities'][id]['labels'].hasOwnProperty('en'))
return feature['entities'][id]['labels']['en']['value'];
else
return undefined;
}

function getWikidataAliasNames(feature, id) {
var names = [];
if (feature.hasOwnProperty('entities') &&
feature['entities'].hasOwnProperty(id) &&
feature['entities'][id].hasOwnProperty(['aliases']) &&
feature['entities'][id]['aliases'].hasOwnProperty('en')) {
var aliases = feature['entities'][id]['aliases']['en'];
for (var i = 0; i < aliases.length; i++) {
names.push(aliases[i]['value']);
}
}
return names;
}

function nameMatchesToWikidata(newVersion, oldVersion, callback) {
var result = {};

if (!newVersion) return callback(null, result);

// Check if feature is newly created.
if (newVersion.properties['osm:version'] !== 1) {
if (!oldVersion || (newVersion.properties['name'] === oldVersion.properties['name'])) return callback(null, result);
}

if (newVersion.properties.hasOwnProperty('wikidata') && newVersion.properties.hasOwnProperty('name')) {

var osmName = newVersion.properties['name'];
var wikidataID = newVersion.properties['wikidata'];
var url = 'https://www.wikidata.org/w/api.php?action=wbgetentities&ids=' + wikidataID + '&format=json';

request(url, function (error, response, body) {
if (!error && response && (response.statusCode === 200)) {
var wikidataFeature = JSON.parse(body);
var wikidataName = getWikidataName(wikidataFeature, wikidataID);
var wikidataAliasNames = getWikidataAliasNames(wikidataFeature, wikidataID);

if ((osmName !== wikidataName) && (wikidataAliasNames.indexOf(osmName) === -1)) return callback(null, {
'result:name_matches_to_wikidata': false
});
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bkowshik Though we are assuming that we won't be hitting wikidata API too hard, but just to be 💯 , what we can do is, catch the errors when wikidata API is ratelimited and find out a way for it to report to us. Maybe we can also use: 'result:wikidataApiLimitExceeded: true, the way we do it for escalate and then read it on vandalism side to send us these error. [This](https://www.mediawiki.org/wiki/API:Errors_and_warnings) list the error codes returned by wikidata API. We can catch for ratelimited`. If we get such errors from vandalism, we can figure out some other way, so as to not hit wikidata API hard and also be ensured that this comparator has worked the way it is expected.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @amishas157 copied over your comments to a new ticket about best practices for working with external APIs here: #107

return callback(null, result);
});
} else {
return callback(null, result);
}
}
96 changes: 96 additions & 0 deletions tests/fixtures/name-matches-to-wikidata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
{
"compareFunction": "name-matches-to-wikidata",
"fixtures": [
{
"description": "Test OSM name different from Wikidata name",
"expectedResult": {
"result:name_matches_to_wikidata": false
},
"newVersion": {
"type": "Feature",
"properties": {
"osm:id": 427818536,
"osm:type": "way",
"name": "Central Park is now something else",
"wikidata": "Q160409",
"osm:version": 1
},
"geometry": null
},
"oldVersion": null
},
{
"description": "Test OSM name matches with aliases on Wikidata",
"expectedResult": {},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this give "result:name_matches_to_wikidata": true ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@planemad the current design of all compare functions is to return results only if interesting, and by interesting we mean mostly harmful changes. When things are good, the compare functions return nothing, i.e: {}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This design essentially does not differentiate between no data and a positive result.

If the goal is to have every changeset reviewed by the community, comparators should pass on as much useful knowledge as possible to a human reviewer to make the final decision. This comparator has essentially done the tedious effort of looking up Wikidata and comparing names, witholding this finding will lead to duplicate human effort on the same activity. What do we gain by this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comparators should pass on as much useful knowledge as possible to a human reviewer to make the final decision.

Totally agree @planemad, 💯 The easier bit is returning "result:name_matches_to_wikidata": true from the compare function. The harder one is on osmcha's side. Just to keep the scale of things manageable, osmcha stores just the features that the comparators have flagged for being potentially harmful/problematic and not all the features. Yes, osmcha has all the changesets but not all the features.

I am curious to hear more on this, shall we create a separate ticket for the same?


cc: @willemarcel @batpad @geohacker @amishas157

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bkowshik @planemad That makes sense. But need to find a good way to deal with parsing these different kind of results and scale it as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we create a separate ticket for the same?

Yes please. We should have consistent design principles that will serve as a guide to build useful compare functions without being constrained by limitations of osmcha.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created a new ticket here: #106

"newVersion": {
"type": "Feature",
"properties": {
"osm:id": 3401391999,
"osm:type": "node",
"name": "Bengaluru",
"wikidata": "Q1355"
},
"geometry": null
},
"oldVersion": null
},
{
"description": "Test feature with no Wikidata tag",
"expectedResult": {},
"newVersion": {
"type": "Feature",
"properties": {
"osm:id": 3401391999,
"osm:type": "node",
"name": "Bengaluru"
},
"geometry": null
},
"oldVersion": null
},
{
"description": "Test feature with Wikidata tag and not a name modification",
"expectedResult": {},
"newVersion": {
"type": "Feature",
"properties": {
"osm:id": 3401391999,
"osm:type": "node",
"name": "Bengaluru is somethong else",
"osm:version": 2,
"wikidata": "Q1355"
},
"geometry": null
},
"oldVersion": {
"type": "Feature",
"properties": {
"osm:id": 3401391999,
"osm:type": "node",
"name": "Bengaluru is somethong else",
"osm:version": 1,
"wikidata": "Q1355"
},
"geometry": null
}
},
{
"description": "Test new feature with Wikidata",
"expectedResult": {
"result:name_matches_to_wikidata": false
},
"newVersion": {
"type": "Feature",
"properties": {
"osm:id": 3401391999,
"osm:type": "node",
"name": "Bengaluru is something else",
"osm:version": 1,
"wikidata": "Q1355"
},
"geometry": null
},
"oldVersion": null
}
]
}