Skip to content

Commit

Permalink
refactors to use py_ez_wikidata
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Mar 5, 2024
1 parent 6b197dc commit 7e4a95c
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 37 deletions.
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,4 @@ CEUR make python implementation for https://ceur-ws.org/
## Docs and Tutorials
[Wiki](https://wiki.bitplan.com/index.php/PyCEURmake)

## CEUR-WS Volume Browser
[Volume Browser http://ceur-ws-browser.bitplan.com](http://ceur-ws-browser.bitplan.com)


1 change: 0 additions & 1 deletion ceurws/ceur_ws_web_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from ceurws.webserver import CeurWsWebServer
from ceurws.wikidatasync import WikidataSync


class CeurWsCmd(WebserverCmd):
"""
command line handling for CEUR-WS Volume browser
Expand Down
10 changes: 10 additions & 0 deletions ceurws/models/dblp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@

@dataclass
class DblpScholar:
"""
a dblp scholar
example: Tim Berners-Lee
https://dblp.org/pid/b/TimBernersLee.html
"""
dblp_author_id: str
label: Optional[str] = None
wikidata_id: Optional[str] = None
Expand All @@ -27,6 +34,9 @@ def __post_init__(self):

@dataclass
class DblpProceeding:
"""
"""
dblp_publication_id: str
volume_number: int
title: str
Expand Down
2 changes: 1 addition & 1 deletion ceurws/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Version(object):
name = "CEUR-WS Volume Browser"
version = ceurws.__version__
date = "2022-08-14"
updated = "2024-02-22"
updated = "2024-03-05"
description = "CEUR-WS Volume browser"

authors = "Tim Holzheim, Wolfgang Fahl"
Expand Down
17 changes: 15 additions & 2 deletions ceurws/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
@author: wf
"""
from ngwidgets.widgets import Link

from tabulate import tabulate

class View:
"""
Expand Down Expand Up @@ -38,7 +38,20 @@ def createLink(self, url: str, text: str):
"""
link = Link.create(url, text, target="_blank")
return link


def createWdLink(self,qid:str,text:str):
wd_url=f"{View.wdPrefix}/{qid}"
link=self.createLink(wd_url, text)
return link

def get_dict_as_html_table(self,data_dict)->str:
# Convert the dictionary to a list of lists for tabulate
data_list = [[key, value] for key, value in data_dict.items()]

# Generate the HTML table
html_table = tabulate(data_list, tablefmt="html", headers=["Key", "Value"])
return html_table

def createExternalLink(
self,
row: dict,
Expand Down
107 changes: 88 additions & 19 deletions ceurws/volume_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from ceurws.wikidatasync import DblpEndpoint
from ngwidgets.progress import NiceguiProgressbar


class VolumeView(View):
"""
displays a single volume
Expand Down Expand Up @@ -70,7 +69,7 @@ def updateWikidataSpan(self, qId: str, volume: Volume):
if self.wdSpan is None:
self.wdSpan = ui.html()
volume_link = Link.create(
url=self.volume.url, text=f"{volume}:{volume.acronym}"
url=self.volume.url, text=f"{volume.number}:{volume.acronym}"
)
wd_url = self.wdSync.itemUrl(qId)
wd_link = Link.create(url=wd_url, text=f"{qId} ")
Expand Down Expand Up @@ -184,10 +183,10 @@ async def onWikidataButtonClick(self, _args):
wdRecord, write=True, ignoreErrors=False
)
if qId is not None:
msg = f"wikidata export of {self.volume.volumeNumber} to {qId} done"
msg = f"wikidata export of {self.volume.number} to {qId} done"
ui.notify(msg)
self.updateWikidataSpan(
wdSync=self.app.wdSync, qId=qId, volume=self.volume
qId=qId, volume=self.volume
)
else:
err_msg = f"error:{err}"
Expand All @@ -213,6 +212,7 @@ def __init__(self, solution, parent):
self.parent = parent
self.wdSync=self.solution.wdSync
self.dry_run=True
self.ignore_errors=False
self.get_volume_lod()
self.setup_ui()

Expand All @@ -238,6 +238,8 @@ def setup_ui(self):
.tooltip("Export to Wikidata")
)
self.dry_run_switch = ui.switch("dry run").bind_value(self,"dry_run")
self.ignore_errors_check_box=ui.checkbox("ignore_errors", value=self.ignore_errors).bind_value(self, "ignore_errors")

pass
self.progress_bar=NiceguiProgressbar(total=100,desc="added",unit="volume")
with ui.row() as self.log_row:
Expand All @@ -255,6 +257,24 @@ def setup_ui(self):
except Exception as ex:
self.solution.handle_exception(ex)

def clear_msg(self,msg:str=""):
"""
clear the log_view with the given message
Args:
msg(str): the message to display
"""
self.log_view.content=msg

def add_msg(self,html_markup:str):
"""
add the given html_markup message to the log_view
Args:
msg(str): the html formatted message to add
"""
self.log_view.content+=html_markup

async def onWikidataButtonClick(self, _args):
"""
handle wikidata sync request
Expand All @@ -264,7 +284,8 @@ async def onWikidataButtonClick(self, _args):
for row in selected_rows:
vol_number=row["#"]
volume = self.wdSync.volumesByNumber[vol_number]
self.log_view.content=f"{len(selected_rows)} Volumes selected<br>"
msg=f"{len(selected_rows)} Volumes selected<br>"
self.clear_msg(msg)
await self.add_or_update_volume_in_wikidata(volume)
pass
except Exception as ex:
Expand All @@ -281,37 +302,37 @@ async def on_check_recently_update_volumes_button_click(self,args):
volumesByNumber,
addedVolumeNumberList,
) = self.wdSync.getRecentlyAddedVolumeList()
self.log_view.content+= f"<br>found {len(addedVolumeNumberList)} new volumes"
self.add_msg(f"<br>found {len(addedVolumeNumberList)} new volumes")
total = len(addedVolumeNumberList)
self.progress_bar.total=total
for i, volumeNumber in enumerate(addedVolumeNumberList):
if i % 100 == 0 and i != 0:
self.wdSync.storeVolumes()
time.sleep(60)
volume = volumesByNumber[volumeNumber]
self.updateRecentlyAddedVolume(volume, self.log_view, i + 1, total)
self.updateRecentlyAddedVolume(volume,i + 1, total)
url=f"/volume/{volume.number}"
text=f"{volume}:{volume.acronym}"
link=self.createLink(url,text)
self.log_view.content+=f"{link}&nbsp;"
self.add_msg+=f":{link}"
pass
self.wdSync.storeVolumes()
self.progress_bar.reset()
self.lod_grid.update()
except Exception as ex:
self.solution.handle_exception(ex)

def updateRecentlyAddedVolume(self, volume, feedback, index, total):
def updateRecentlyAddedVolume(self, volume, index, total):
"""
update a recently added Volume
Args:
volume(Volume): the volume to update
feedback: the div where to but the feedback message
index(int): the relative index of the volume currently being added
total(int): the total number of volumes currently being added
"""
feedback.content+= f"reading {index}/{total} from {volume.url}"
html_msg=f"<br>reading {index}/{total} from {volume.url}"
self.add_msg(html_msg)
volume.extractValuesFromVolumePage()
self.wdSync.addVolume(volume)
self.progress_bar.update_value(index)
Expand Down Expand Up @@ -344,6 +365,7 @@ async def add_or_update_volume_in_wikidata(self,volume:Volume):
try:
msg=f"trying to add Volume {volume.number} to wikidata"
ui.notify(msg)
self.add_msg(msg+"<br>")
proceedingsWikidataId = await self.createProceedingsItemFromVolume(
volume
)
Expand All @@ -352,37 +374,66 @@ async def add_or_update_volume_in_wikidata(self,volume:Volume):
volume, proceedingsWikidataId
)
else:
msg=f"Volume {volume.vol_number} proceedings creation in wikidata failed"
msg=f"<br>adding Volume {volume.number} proceedings to wikidata failed"
self.add_msg(msg)
ui.notify(msg)
except Exception as ex:
self.solution.handle_exception(ex)

def optional_login(self)->bool:
"""
check if we need to login
Returns:
bool: True if write is enabled
"""
write = not self.dry_run
if write:
self.wdSync.login()
return write

async def createProceedingsItemFromVolume(self, volume: Volume):
"""
Create wikidata item for proceedings of given volume
"""
qId=None
try:
write = not self.dry_run
if write:
self.wdSync.login()
write=self.optional_login()
# check if already in wikidata → use URN
urn = getattr(volume, "urn")
wdItems = self.wdSync.getProceedingWdItemsByUrn(urn)
if len(wdItems) > 0:
html=f"{volume} already in Wikidata see "
html=f"Volume {volume.number} already in Wikidata see "
delim=""
for wdItem in wdItems:
qId = wdItem.split("/")[-1]
link=self.createLink(wdItem,qId)
html+=f"{link}{delim}"
delim=","
self.log_view.content+=html+"<br>"
self.add_msg(html+"<br>")
else:
# A proceedings volume for the URN is not known → create wd entry
wdRecord = self.wdSync.getWikidataProceedingsRecord(volume)
if self.dry_run:
markup=self.get_dict_as_html_table(wdRecord)
self.add_msg(markup)
qId, errors = self.wdSync.addProceedingsToWikidata(
wdRecord, write=write, ignoreErrors=self.ignore_errors
)
if qId is not None:
proc_link=self.createWdLink(qId, f"Proceedings entry for Vol {volume.number} {qId} was created")
self.add_msg(proc_link)
else:
self.add_msg(f"Creating wikidata Proceedings entry for Vol {volume.number} failed")
for key,value in errors.items():
msg=f"{key}:{value}"
self.add_msg(msg)
except Exception as ex:
self.solution.handle_exception(ex)
return qId

async def createEventItemAndLinkProceedings(
self, volume: Volume, proceedingsWikidataId: str = None
):
self, volume: Volume, proceedingsWikidataId: str = None):
"""
Create event wikidata item for given volume and link
the proceedings with the event
Expand All @@ -391,4 +442,22 @@ async def createEventItemAndLinkProceedings(
volume(Volume): the volume for which to create the event item
proceedingsWikidataId: wikidata id of the proceedings
"""
try:
write=self.optional_login()
(
proceedingsQId,
eventQId,
msg,
) = self.wdSync.doCreateEventItemAndLinkProceedings(volume, proceedingsWikidataId,write=write)
if write:
self.wdSync.logout()
ui.notify(msg)
self.add_msg("<br>"+msg)
if eventQId:
event_link=self.createWdLink(eventQId,f"Event for Vol {volume.number} {eventQId}<br>")
proc_link=self.createWdLink(proceedingsQId, f"Proceedings for Vol {volume.number} {proceedingsQId}<br>")
self.add_msg(event_link)
self.add_msg(proc_link)
except Exception as ex:
self.solution.handle_exception(ex)
pass
8 changes: 4 additions & 4 deletions ceurws/wikidatasync.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
from lodstorage.query import EndpointManager, QueryManager
from lodstorage.sparql import SPARQL
from lodstorage.sql import SQLDB
from spreadsheet.wikidata import PropertyMapping, UrlReference, WdDatatype, Wikidata
from ez_wikidata.wikidata import UrlReference, Wikidata
from ez_wikidata.wdproperty import PropertyMapping, WdDatatype

from ceurws.ceur_ws import CEURWS, PaperManager, Volume, VolumeManager
from ceurws.models.dblp import DblpPaper, DblpProceeding, DblpScholar
from ceurws.utils.download import Download
from ceurws.utils.json_cache import JsonCacheManager
from ceurws.indexparser import ParserConfig

Expand Down Expand Up @@ -51,7 +51,7 @@ def __init__(
self.prepareRDF()
self.wdQuery = self.qm.queriesByName["Proceedings"]
self.baseurl = baseurl
self.wd = Wikidata(baseurl=self.baseurl, debug=debug)
self.wd = Wikidata(debug=debug)
self.sqldb = SQLDB(CEURWS.CACHE_FILE)
self.procRecords = None
self.dbpEndpoint = DblpEndpoint(endpoint=dblp_endpoint_url)
Expand Down Expand Up @@ -1069,7 +1069,7 @@ def doCreateEventItemAndLinkProceedings(
return (
None,
None,
f"An error occured during the creation of the proceedings entry for {volume}",
f"An error occured during the creation of the event entry for {volume}",
)

@classmethod
Expand Down
11 changes: 4 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,17 @@ dependencies = [
#https://github.com/martinblech/xmltodict
'xmltodict>=0.13.0',
# https://pypi.org/project/pylodstorage/
'pylodstorage>=0.4.11',
'pylodstorage>=0.9.0',
# https://github.com/pyparsing/pyparsing
'pyparsing>=3.0.9',
# https://pypi.org/project/beautifulsoup4/
'BeautifulSoup4>=4.11.2',
# https://pypi.org/project/lxml/
'lxml>=4.9.2',
# pyjustpywidgets
# https://pypi.org/project/pyJustpyWidgets/
# 'pyJustpyWidgets>=0.1.13',
# https://pypi.org/project/justpy/
# 'justpy>=0.13.0',
# https://pypi.org/project/pyGenericSpreadSheet/
'pyGenericSpreadSheet>=0.2.4',
#'pyGenericSpreadSheet>=0.2.4',
# https://pypi.org/project/py-ez-wikidata/
'py-ez-wikidata>=0.1.2',
# https://pypi.org/project/py-3rdparty-mediawiki/
'py-3rdparty-mediawiki>=0.9.3',
# https://github.com/tqdm/tqdm
Expand Down

0 comments on commit 7e4a95c

Please sign in to comment.