Skip to content

Commit

Permalink
fixes wikidata entry creation
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Mar 6, 2024
1 parent 7e4a95c commit ca51737
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 67 deletions.
2 changes: 1 addition & 1 deletion ceurws/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Version(object):
name = "CEUR-WS Volume Browser"
version = ceurws.__version__
date = "2022-08-14"
updated = "2024-03-05"
updated = "2024-03-06"
description = "CEUR-WS Volume browser"

authors = "Tim Holzheim, Wolfgang Fahl"
Expand Down
32 changes: 16 additions & 16 deletions ceurws/volume_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,17 +179,18 @@ async def onWikidataButtonClick(self, _args):
"""
try:
wdRecord = self.wdSync.getWikidataProceedingsRecord(self.volume)
qId, err = self.wdSync.addProceedingsToWikidata(
result = self.wdSync.addProceedingsToWikidata(
wdRecord, write=True, ignoreErrors=False
)
qId=result.qid
if qId is not None:
msg = f"wikidata export of {self.volume.number} to {qId} done"
ui.notify(msg)
self.updateWikidataSpan(
qId=qId, volume=self.volume
)
else:
err_msg = f"error:{err}"
err_msg = f"error:{result.error}"
self.solution.log_view.push(err_msg)
except Exception as ex:
self.solution.handle_exception(ex)
Expand Down Expand Up @@ -417,15 +418,16 @@ async def createProceedingsItemFromVolume(self, volume: Volume):
if self.dry_run:
markup=self.get_dict_as_html_table(wdRecord)
self.add_msg(markup)
qId, errors = self.wdSync.addProceedingsToWikidata(
result= self.wdSync.addProceedingsToWikidata(
wdRecord, write=write, ignoreErrors=self.ignore_errors
)
qId=result.qid
if qId is not None:
proc_link=self.createWdLink(qId, f"Proceedings entry for Vol {volume.number} {qId} was created")
self.add_msg(proc_link)
else:
self.add_msg(f"Creating wikidata Proceedings entry for Vol {volume.number} failed")
for key,value in errors.items():
for key,value in result.errors.items():
msg=f"{key}:{value}"
self.add_msg(msg)
except Exception as ex:
Expand All @@ -444,20 +446,18 @@ async def createEventItemAndLinkProceedings(
"""
try:
write=self.optional_login()
(
proceedingsQId,
eventQId,
msg,
) = self.wdSync.doCreateEventItemAndLinkProceedings(volume, proceedingsWikidataId,write=write)
results = self.wdSync.doCreateEventItemAndLinkProceedings(volume, proceedingsWikidataId,write=write)
if write:
self.wdSync.logout()
ui.notify(msg)
self.add_msg("<br>"+msg)
if eventQId:
event_link=self.createWdLink(eventQId,f"Event for Vol {volume.number} {eventQId}<br>")
proc_link=self.createWdLink(proceedingsQId, f"Proceedings for Vol {volume.number} {proceedingsQId}<br>")
self.add_msg(event_link)
self.add_msg(proc_link)
for key,result in results.items():
if result.qid:
link=self.createWdLink(result.qid,f"{key} for Vol {volume.number} {result.qid}")
self.add_msg("<br>"+link)
if result.msg:
self.add_msg("<br>"+result.msg)
if len(result.errors)>0:
for error in result.errors.values():
self.add_msg(f"error {str(error)}")
except Exception as ex:
self.solution.handle_exception(ex)
pass
109 changes: 61 additions & 48 deletions ceurws/wikidatasync.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@
from lodstorage.query import EndpointManager, QueryManager
from lodstorage.sparql import SPARQL
from lodstorage.sql import SQLDB
from ez_wikidata.wikidata import UrlReference, Wikidata
from ez_wikidata.wikidata import UrlReference, Wikidata, WikidataResult
from ez_wikidata.wdproperty import PropertyMapping, WdDatatype

from ceurws.ceur_ws import CEURWS, PaperManager, Volume, VolumeManager
from ceurws.models.dblp import DblpPaper, DblpProceeding, DblpScholar
from ceurws.utils.json_cache import JsonCacheManager
from ceurws.indexparser import ParserConfig


class WikidataSync(object):
"""
synchronize with wikidata
Expand Down Expand Up @@ -400,22 +401,26 @@ def addProceedingsToWikidata(
"""
if write:
self.login()
qid, errors = self.doAddProceedingsToWikidata(record, write, ignoreErrors)
result = self.doAddProceedingsToWikidata(record, write, ignoreErrors)
if write:
self.logout()
return qid, errors
return result

def doAddProceedingsToWikidata(
self, record: dict, write: bool = True, ignoreErrors: bool = False
):
self,
record: dict,
write: bool = True,
ignoreErrors: bool = False
)->WikidataResult:
"""
Creates a wikidata proceedings entry for the given record
Args:
record(dict): the data to add
write(bool): if True actually write
ignoreErrors(bool): if True ignore errors
Returns:
WikidataResult: the result of the add operation
"""
mappings = [
PropertyMapping(
Expand Down Expand Up @@ -484,14 +489,14 @@ def doAddProceedingsToWikidata(
),
]
reference = UrlReference(url=record.get("ceurwsUrl"))
qId, errors = self.wd.add_record(
result = self.wd.add_record(
record=record,
property_mappings=mappings,
write=write,
ignore_errors=ignoreErrors,
reference=reference,
)
return qId, errors
return result

def askWikidata(self, askQuery: str) -> bool:
try:
Expand Down Expand Up @@ -532,7 +537,7 @@ def addLinkBetweenProceedingsAndEvent(
proceedingsWikidataId: str = None,
write: bool = True,
ignoreErrors: bool = False,
):
)->WikidataResult:
"""
add the link between the wikidata proceedings item and the given event wikidata item
Args:
Expand All @@ -541,6 +546,9 @@ def addLinkBetweenProceedingsAndEvent(
proceedingsWikidataId: wikidata id of the proceedings item
write(bool): if True actually write
ignoreErrors(bool): if True ignore errors
Returns:
WikidataResult: the result of the add operation
"""
if proceedingsWikidataId is None:
proceedingsWikidataId = self.getWikidataIdByVolumeNumber(
Expand All @@ -559,15 +567,15 @@ def addLinkBetweenProceedingsAndEvent(
volume_url = Volume.getVolumeUrlOf(volumeNumber)
reference = UrlReference(volume_url)
record = {"isProceedingsFrom": eventItemQid}
_, errors = self.wd.add_record(
result = self.wd.add_record(
item_id=proceedingsWikidataId,
record=record,
property_mappings=mappings,
write=write,
ignore_errors=ignoreErrors,
reference=reference,
)
return proceedingsWikidataId, errors
return result

def doAddEventToWikidata(
self, record: dict, write: bool = True, ignoreErrors: bool = False
Expand All @@ -580,7 +588,7 @@ def doAddEventToWikidata(
ignoreErrors(bool): if True ignore errors
Returns:
(qid, errors) id of the created entry and occurred errors
WikidataResult: the result of the add operation
"""
entityQid = record.get("instanceOf")
entity = record.get("description")
Expand Down Expand Up @@ -657,29 +665,32 @@ def doAddEventToWikidata(
]
reference_url = record.pop("referenceUrl")
reference = UrlReference(url=reference_url)
qId, errors = self.wd.add_record(
result = self.wd.add_record(
record=record,
property_mappings=mappings,
write=write,
ignore_errors=ignoreErrors,
reference=reference,
)
return qId, errors
return result

def addDblpPublicationId(
self,
volumeNumber: int,
dblpRecordId: str = None,
write: bool = True,
ignoreErrors: bool = False,
):
)->WikidataResult:
"""
try to add the dblp publication id (P8978) to the proceedings record
Args:
volumeNumber: ceurws volumenumber of the proceedings
dblpRecordId: dblp record id to add to the proceedings item. If None query dblp for the record id
write: if True actually write
ignoreErrors(bool): if True ignore errors
Returns:
WikidataResult: the result of the add operation
"""
proceedingsWikidataId = self.getWikidataIdByVolumeNumber(number=volumeNumber)
if proceedingsWikidataId is None:
Expand Down Expand Up @@ -723,15 +734,15 @@ def addDblpPublicationId(
volume_url = Volume.getVolumeUrlOf(volumeNumber)
reference = UrlReference(volume_url)
record = {"DBLP publication ID": dblpRecordId}
_, errors = self.wd.add_record(
result = self.wd.add_record(
item_id=proceedingsWikidataId,
record=record,
property_mappings=mappings,
write=write,
ignore_errors=ignoreErrors,
reference=reference,
)
return True, errors
return result

def addAcronymToItem(
self,
Expand Down Expand Up @@ -789,7 +800,7 @@ def addOfficialWebsiteToItem(
ignoreErrors(bool): if True ignore errors
Returns:
(qid, errors) id of the created entry and occurred errors
WikidataResult: the result of the add operation
"""
mappings = [
PropertyMapping(
Expand Down Expand Up @@ -1002,8 +1013,11 @@ def getEventTypeFromTitle(cls, title: str) -> (str, str):
return academicWorkshop

def doCreateEventItemAndLinkProceedings(
self, volume: Volume, proceedingsWikidataId: str = None, write: bool = False
):
self,
volume: Volume,
proceedingsWikidataId: str = None,
write: bool = False
)->Dict[str,WikidataResult]:
"""
Create event wikidata item for given volume and link the proceedings with the event
Args:
Expand All @@ -1014,63 +1028,63 @@ def doCreateEventItemAndLinkProceedings(
Returns:
proceedingsQId, eventQId, msg
"""
results={}
volNumber = getattr(volume, "number")
if proceedingsWikidataId is None and self.checkIfProceedingsFromExists(
volNumber, eventItemQid=None
):
# link between proceedings and event already exists
proceedingsWikidataId = self.getWikidataIdByVolumeNumber(number=volNumber)
return (
proceedingsWikidataId,
None,
"Event and Link between proceedings and event already exists",
results["Proceedings"]=WikidataResult(
qid=proceedingsWikidataId,
msg=f"Proceedings for Vol-{volNumber} already exists",
)
dblpEntityIds = self.dbpEndpoint.getDblpIdByVolumeNumber(volNumber)
if len(dblpEntityIds) > 1:
return (
None,
None,
f"Multiple dblpEventIds found for Vol-{volNumber}: {','.join(dblpEntityIds)}",
)
dblpEntityId=None
msg=None
if len(dblpEntityIds) > 1:
msg=f"Multiple dblpEventIds found for Vol-{volNumber}: {','.join(dblpEntityIds)}",
elif len(dblpEntityIds) == 1:
dblpEntityId = dblpEntityIds[0]
else:
dblpEntityId = None
results["dblp"]=WikidataResult(
qid=dblpEntityId,
msg=msg
)
wdItems = self.getWikidataIdByDblpEventId(dblpEntityId, volNumber)
msg = ""
eventQid = None
errors = None
if len(wdItems) == 0:
# event item does not exist → create a new one
volume.resolveLoctime()
eventRecord = self.getWikidataEventRecord(volume)
eventQid, errors = self.doAddEventToWikidata(
event_result= self.doAddEventToWikidata(
record=eventRecord, write=write
)
msg += "Created Event item;"
eventQid=event_result.qid
results["Event"]=event_result
elif len(wdItems) == 1:
# the event item already exists
eventQid = wdItems[0]
msg += "Event item already exists;"
results["Event"]=WikidataResult(
# the event item already exists
qid = wdItems[0],
msg = "Event item already exists;"
)
else:
return None, None, f"Multiple event entries exist: {','.join(wdItems)}"
results["Event"]=WikidataResult(
msg=f"Multiple event entries exist: {','.join(wdItems)}"
)
if eventQid is not None:
# add link between Proceedings and the event item
proceedingsWikidataId, errors = self.addLinkBetweenProceedingsAndEvent(
link_result = self.addLinkBetweenProceedingsAndEvent(
volumeNumber=volNumber,
eventItemQid=eventQid,
proceedingsWikidataId=proceedingsWikidataId,
write=write,
)
msg += "Added Link between Proceedings and Event item;"
return proceedingsWikidataId, eventQid, msg

else:
return (
None,
None,
f"An error occured during the creation of the event entry for {volume}",
)
link_result.msg="Added Link between Proceedings and Event item;"
results["link"]=link_result
return results

@classmethod
def removeWdPrefix(cls, value: str):
Expand Down Expand Up @@ -1126,7 +1140,6 @@ def getAuthorByIds(self, identifiers: dict) -> Dict[str, str]:
res[item_id] = name
return res


class DblpEndpoint:
"""
provides queries and a dblp endpoint to execute them
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dependencies = [
# https://pypi.org/project/pyGenericSpreadSheet/
#'pyGenericSpreadSheet>=0.2.4',
# https://pypi.org/project/py-ez-wikidata/
'py-ez-wikidata>=0.1.2',
'py-ez-wikidata>=0.1.5',
# https://pypi.org/project/py-3rdparty-mediawiki/
'py-3rdparty-mediawiki>=0.9.3',
# https://github.com/tqdm/tqdm
Expand Down
6 changes: 5 additions & 1 deletion tests/test_wikidatasync.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,13 +623,17 @@ def test_add_missing_event_homepages(self):
if event_record.get("homepage", None) is None:
print("adding", homepage)
record = {"homepage": homepage}
self.wdSync.wd.add_record(
result=self.wdSync.wd.add_record(
record=record,
item_id=event_qid,
property_mappings=prop_mapping,
write=True,
reference=UrlReference(url=parser.volumeUrl(volnumber)),
)
if (len(result.errors)>0):
print(f"error adding homepage for volume {volnumber} failed")
for index,error in enumerate(result.errors.values()):
print(f"{index+1}:{str(error)}")
else:
print("event has already a homepage")
else:
Expand Down

0 comments on commit ca51737

Please sign in to comment.