From 5c07e63bfd26fb8faa84b585112db8cf1f248b77 Mon Sep 17 00:00:00 2001 From: Wolfgang Fahl Date: Mon, 26 Feb 2024 10:20:48 +0100 Subject: [PATCH] reformats code --- scripts/blackisort | 7 + skg/citeproc.py | 115 +++++------ skg/crossref.py | 53 ++--- skg/dblp.py | 50 +++-- skg/dblp2wikidata.py | 26 +++ skg/doi.py | 126 ++++++------ skg/event.py | 46 ++--- skg/graph.py | 232 +++++++++++----------- skg/kg.py | 277 ++++++++++++++------------ skg/location.py | 15 +- skg/orcid.py | 98 +++++----- skg/owl.py | 163 ++++++++-------- skg/paper.py | 43 +++-- skg/profiler.py | 6 +- skg/schema.py | 105 +++++----- skg/scholar.py | 51 +++-- skg/scholargrid.py | 341 ++++++++++++++++++--------------- skg/search.py | 42 ++-- skg/searchengine.py | 44 +++-- skg/semantic_scholar.py | 16 +- skg/skgbrowser.py | 231 ++++++++++++---------- skg/smw.py | 103 ++++++---- skg/sotsog.py | 284 +++++++++++++++------------ skg/version.py | 38 ++-- skg/wdsearch.py | 89 +++++---- skg/wikidata.py | 70 +++---- tests/base_skg_test.py | 1 + tests/basetest.py | 4 +- tests/testScholarGrid.py | 5 +- tests/test_crossref.py | 6 +- tests/test_dblp.py | 89 +++++---- tests/test_dblp2wikidata.py | 27 +++ tests/test_doi.py | 10 +- tests/test_location.py | 41 ++-- tests/test_orcid.py | 4 +- tests/test_scholar.py | 65 ++++--- tests/test_searchengine.py | 8 +- tests/test_semantic_scholar.py | 4 +- tests/test_skg.py | 1 + tests/test_smw.py | 8 +- tests/test_sotsog.py | 3 +- tests/test_wdsearch.py | 6 +- 42 files changed, 1626 insertions(+), 1327 deletions(-) create mode 100755 scripts/blackisort create mode 100644 skg/dblp2wikidata.py create mode 100644 tests/test_dblp2wikidata.py diff --git a/scripts/blackisort b/scripts/blackisort new file mode 100755 index 0000000..fe7b3b8 --- /dev/null +++ b/scripts/blackisort @@ -0,0 +1,7 @@ +#!/bin/bash +# WF 2024-01-10 +package=skg +isort tests/*.py +black tests/*.py +isort $package/*.py +black $package/*.py diff --git a/skg/citeproc.py b/skg/citeproc.py index 78ec404..b15b701 100644 --- a/skg/citeproc.py +++ b/skg/citeproc.py @@ -1,114 +1,117 @@ -''' +""" Created on 2022-12-21 @author: wf -''' +""" import datetime + + class Citeproc: """ see https://en.wikipedia.org/wiki/CiteProc """ - + @classmethod - def asScite(cls,meta_data:dict,retrieved_from:str)->str: + def asScite(cls, meta_data: dict, retrieved_from: str) -> str: """ convert the given meta data to #Scite format - + see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php Args: meta_data(dict): the citeproc compatible metadata dict to convert retrieved_from(str): the url the metadata was retrieved from - + Returns: str: Semantic Mediawiki markup """ + def unlist(value): - if type(value)!=list: + if type(value) != list: return value - text="" - delim="" + text = "" + delim = "" for item in value: - text+=f"{delim}{item}" - delim=";" - if len(value)>1: - text+="|+sep=;" + text += f"{delim}{item}" + delim = ";" + if len(value) > 1: + text += "|+sep=;" return text - + def firstValue(value): - if type(value)!=list: + if type(value) != list: return value else: return value[0] - - def get_author(value)->str: + + def get_author(value) -> str: """ get the author markup - + Args: value(list): the list to disassemble - + Returns: str: Mediawiki markup """ - author="" - delim="" + author = "" + delim = "" for arec in value: if "given" in arec and "family" in arec: - author+= f"""{delim}{arec["given"]} {arec["family"]}""" - delim=";" + author += f"""{delim}{arec["given"]} {arec["family"]}""" + delim = ";" elif "family" in arec: - author+= f"""{delim}{arec["family"]}""" - delim=";" + author += f"""{delim}{arec["family"]}""" + delim = ";" else: # incomplete author record ignored pass return author - - timestamp=datetime.datetime.utcnow().strftime('%Y-%m-%d') - ref_type="journal-article" - title=meta_data["title"] + + timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d") + ref_type = "journal-article" + title = meta_data["title"] if type(title) is list: - title=title[0] - title_2=title.lower()[:2] - author_lower="" + title = title[0] + title_2 = title.lower()[:2] + author_lower = "" if "author" in meta_data: - first_author=firstValue(meta_data["author"]) + first_author = firstValue(meta_data["author"]) if "family" in first_author: - family=firstValue(first_author["family"]) - author_lower=family.lower() + family = firstValue(first_author["family"]) + author_lower = family.lower() else: # debug break point pass - year="" + year = "" if "published-print" in meta_data: - year=meta_data["published-print"]["date-parts"][0][0] + year = meta_data["published-print"]["date-parts"][0][0] if not year and "issued" in meta_data: - year=meta_data["issued"]["date-parts"][0][0] - reference=f"{author_lower}{year}{title_2}" - markup="" - for skey,mkey,func in [ - ("title","title",unlist), - ("subtitle","subtitle",unlist), - ("authors","author",get_author), - ("journal","container-title",unlist), - ("publisher","publisher",str), - ("issn","ISSN",unlist), - ("subject","subject",unlist), - ("volume","volume",str), - ("pages","page",str), - ("doi","DOI",str) + year = meta_data["issued"]["date-parts"][0][0] + reference = f"{author_lower}{year}{title_2}" + markup = "" + for skey, mkey, func in [ + ("title", "title", unlist), + ("subtitle", "subtitle", unlist), + ("authors", "author", get_author), + ("journal", "container-title", unlist), + ("publisher", "publisher", str), + ("issn", "ISSN", unlist), + ("subject", "subject", unlist), + ("volume", "volume", str), + ("pages", "page", str), + ("doi", "DOI", str), ]: if mkey in meta_data: - value=meta_data[mkey] + value = meta_data[mkey] if value: - value=func(value) - markup+=f"\n|{skey}={value}" - markup=f"""{{{{#scite: + value = func(value) + markup += f"\n|{skey}={value}" + markup = f"""{{{{#scite: |reference={reference} |type={ref_type}{markup} |year={year} |retrieved-from={retrieved_from} |retrieved-on={timestamp} }}}}""" - full_markup=f"{title}\n[[CiteRef::{reference}]]\n{markup}" + full_markup = f"{title}\n[[CiteRef::{reference}]]\n{markup}" return full_markup diff --git a/skg/crossref.py b/skg/crossref.py index ff2f496..e02b5a2 100644 --- a/skg/crossref.py +++ b/skg/crossref.py @@ -1,58 +1,63 @@ -''' +""" Created on 17.11.2022 @author: wf -''' -import skg +""" import habanero import habanero.cn as cn + +import skg from skg.citeproc import Citeproc + class Crossref: """ Crossref access """ - - def __init__(self,mailto=None,ua_string=None): + + def __init__(self, mailto=None, ua_string=None): """ constructor """ if mailto is None: - mailto="wf@bitplan.com" + mailto = "wf@bitplan.com" if ua_string is None: - ua_string=f"pysotsog/{skg.__version__} (https://pypi.org/project/pysotsog/; mailto:{mailto})" - #self.cr = habanero.Crossref(mailto=mailto,ua_string=ua_string) - self.cr = habanero.Crossref(ua_string="") - - def doiMetaData(self, dois:list): - """ + ua_string = f"pysotsog/{skg.__version__} (https://pypi.org/project/pysotsog/; mailto:{mailto})" + # self.cr = habanero.Crossref(mailto=mailto,ua_string=ua_string) + self.cr = habanero.Crossref(ua_string="") + + def doiMetaData(self, dois: list): + """ get the meta data for the given dois - + Args: doi(list): a list of dois """ metadata = None response = self.cr.works(ids=dois) - if 'status' in response and 'message' in response and response['status'] == 'ok': - metadata = response['message'] + if ( + "status" in response + and "message" in response + and response["status"] == "ok" + ): + metadata = response["message"] return metadata - - def doiBibEntry(self,dois:list): + + def doiBibEntry(self, dois: list): """ get bib entries for the given dois """ - bibentry=cn.content_negotiation(ids = dois, format = "bibentry") + bibentry = cn.content_negotiation(ids=dois, format="bibentry") return bibentry - - def asScite(self,meta_data:dict)->str: + + def asScite(self, meta_data: dict) -> str: """ convert the given meta data to #Scite format - + see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php - + Returns: str: Semantic Mediawiki markup """ - markup=Citeproc.asScite(meta_data,retrieved_from=self.cr.base_url) + markup = Citeproc.asScite(meta_data, retrieved_from=self.cr.base_url) return markup - \ No newline at end of file diff --git a/skg/dblp.py b/skg/dblp.py index 25aa695..20eeeed 100644 --- a/skg/dblp.py +++ b/skg/dblp.py @@ -1,42 +1,51 @@ -''' +""" Created on 2022-11-17 @author: wf -''' +""" from lodstorage.sparql import SPARQL + from skg.owl import Owl + class Dblp: """ Schloss Dagstuhl Dblp computer science bibliography """ - - def __init__(self,endpoint:str="https://qlever.cs.uni-freiburg.de/api/dblp"): + + def __init__(self, endpoint: str = "https://qlever.cs.uni-freiburg.de/api/dblp"): """ constructor - + Args: endpoint(str): the endpoint to use """ - self.endpoint=endpoint - self.schema=Owl("dblp","https://dblp.org/rdf/schema", "Wolfgang Fahl","2022-11-19") - self.sparql=SPARQL(self.endpoint) - - - def get_paper_records(self,regex:str,prop_name:str="title",limit:int=100,debug:bool=False)->list: + self.endpoint = endpoint + self.schema = Owl( + "dblp", "https://dblp.org/rdf/schema", "Wolfgang Fahl", "2022-11-19" + ) + self.sparql = SPARQL(self.endpoint) + + def get_paper_records( + self, + regex: str, + prop_name: str = "title", + limit: int = 100, + debug: bool = False, + ) -> list: """ get papers fitting the given regex - + Args: prop_name(str): the property to filter regex(str): the regex to filter for limit(int): the maximum number of records to return debug(bool): if True show debug information - + Returns: list: a list of dict of paper records """ - sparql_query="""PREFIX dblp: + sparql_query = """PREFIX dblp: PREFIX xsd: SELECT ?paper @@ -58,8 +67,8 @@ def get_paper_records(self,regex:str,prop_name:str="title",limit:int=100,debug:b ?paper dblp:yearOfPublication ?year. OPTIONAL { ?paper dblp:monthOfPublication ?month}. """ - sparql_query+=f"""FILTER regex(?{prop_name}, "{regex}").\n""" - sparql_query+=f""" + sparql_query += f"""FILTER regex(?{prop_name}, "{regex}").\n""" + sparql_query += f""" }} GROUP BY ?paper @@ -74,11 +83,11 @@ def get_paper_records(self,regex:str,prop_name:str="title",limit:int=100,debug:b LIMIT {limit}""" if debug: print(sparql_query) - records=self.sparql.queryAsListOfDicts(sparql_query) + records = self.sparql.queryAsListOfDicts(sparql_query) return records - - def get_random_papers(self,year:int=2020,limit:int=10): - sparql_query=f"""PREFIX dblp: + + def get_random_papers(self, year: int = 2020, limit: int = 10): + sparql_query = f"""PREFIX dblp: SELECT ?paper (SAMPLE(?doi_o) as ?doi) @@ -100,4 +109,3 @@ def get_random_papers(self,year:int=2020,limit:int=10): ORDER BY ?sortKey LIMIT {limit} """ - \ No newline at end of file diff --git a/skg/dblp2wikidata.py b/skg/dblp2wikidata.py new file mode 100644 index 0000000..65f107e --- /dev/null +++ b/skg/dblp2wikidata.py @@ -0,0 +1,26 @@ +""" +Created on 2024-02-26 + +@author: wf +""" +from argparse import Namespace + +class Dblp2Wikidata: + """ + utility to transfering Dblp person entries to Wikidata + """ + + def __init__(self,debug:bool=False): + self.debug=debug + pass + + def transfer(self, args:Namespace): + """ + Main method to handle the transfer of DBLP entries to Wikidata. + + Args: + args(Namespace): Command line arguments. + """ + search_term = getattr(args, 'dblp2wikidata', None) + if self.debug: + print(f"trying to transfer DBLP person entry for {search_term}") \ No newline at end of file diff --git a/skg/doi.py b/skg/doi.py index f75e2c4..73761e3 100644 --- a/skg/doi.py +++ b/skg/doi.py @@ -1,147 +1,147 @@ -''' +""" Created on 2022-11-22 @author: wf -''' -#import aiohttp -import urllib.request +""" import json import re + +# import aiohttp +import urllib.request + from skg.citeproc import Citeproc + class DOI: """ Digital Object Identifier handling - + see e.g. https://www.wikidata.org/wiki/Property:P356 see https://www.doi.org/doi_handbook/2_Numbering.html#2.2 see https://github.com/davidagraf/doi2bib2/blob/master/server/doi2bib.js see https://citation.crosscite.org/docs.html - + """ - pattern=re.compile(r"((?P10)\.(?P[0-9]{4,})(?:\.[0-9]+)*(?:\/|%2F)(?:(?![\"&\'])\S)+)") - - def __init__(self,doi:str): + + pattern = re.compile( + r"((?P10)\.(?P[0-9]{4,})(?:\.[0-9]+)*(?:\/|%2F)(?:(?![\"&\'])\S)+)" + ) + + def __init__(self, doi: str): """ a DOI """ - self.doi=doi - match=re.match(DOI.pattern,doi) - self.ok=bool(match) + self.doi = doi + match = re.match(DOI.pattern, doi) + self.ok = bool(match) if self.ok: - self.registrant_code=match.group("registrant_code") - + self.registrant_code = match.group("registrant_code") + @classmethod - def isDOI(cls,doi:str): + def isDOI(cls, doi: str): """ check that the given string is a doi - + Args: doi(str): the potential DOI string """ if not doi: return False - if isinstance(doi,list): - ok=len(doi)>0 + if isinstance(doi, list): + ok = len(doi) > 0 for single_doi in doi: - ok=ok and cls.isDOI(single_doi) + ok = ok and cls.isDOI(single_doi) return ok - if not isinstance(doi,str): + if not isinstance(doi, str): return False - doi_obj=DOI(doi) + doi_obj = DOI(doi) return doi_obj.ok - - def fetch_response(self,url:str,headers:dict): + + def fetch_response(self, url: str, headers: dict): """ fetch reponse for the given url with the given headers - + Args: url(str): the url to fetch the data for headers(dict): the headers to use """ - req=urllib.request.Request(url,headers=headers) - response=urllib.request.urlopen(req) + req = urllib.request.Request(url, headers=headers) + response = urllib.request.urlopen(req) return response - - def fetch_json(self,url:str,headers:dict): + + def fetch_json(self, url: str, headers: dict): """ fetch json for the given url with the given headers - + Args: url(str): the url to fetch the data for headers(dict): the headers to use - + Returns: json: json data """ - #async with aiohttp.ClientSession(headers=headers) as session: + # async with aiohttp.ClientSession(headers=headers) as session: # async with session.get(url) as response: # return await response.json() - text=self.fetch_text(url, headers) - json_data=json.loads(text) + text = self.fetch_text(url, headers) + json_data = json.loads(text) return json_data - - def fetch_text(self,url,headers)->str: + + def fetch_text(self, url, headers) -> str: """ fetch text for the given url with the given headers - + Args: url(str): the url to fetch the data for headers(dict): the headers to use - + Returns: str: the text """ - #async with aiohttp.ClientSession(headers=headers) as session: + # async with aiohttp.ClientSession(headers=headers) as session: # async with session.get(url) as response: # return await response.text() - response=self.fetch_response(url, headers) - encoding = response.headers.get_content_charset('utf-8') + response = self.fetch_response(url, headers) + encoding = response.headers.get_content_charset("utf-8") content = response.read() text = content.decode(encoding) return text - + def doi2bibTex(self): """ get the bibtex result for my doi """ - url=f"https://doi.org/{self.doi}" - headers= { - 'Accept': 'application/x-bibtex; charset=utf-8' - } - return self.fetch_text(url,headers) - + url = f"https://doi.org/{self.doi}" + headers = {"Accept": "application/x-bibtex; charset=utf-8"} + return self.fetch_text(url, headers) + def doi2Citeproc(self): """ get the Citeproc JSON result for my doi see https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html """ - url=f"https://doi.org/{self.doi}" - headers= { - 'Accept': 'application/vnd.citationstyles.csl+json; charset=utf-8' - } + url = f"https://doi.org/{self.doi}" + headers = {"Accept": "application/vnd.citationstyles.csl+json; charset=utf-8"} return self.fetch_json(url, headers) - + def dataCiteLookup(self): """ get the dataCite json result for my doi """ - url=f"https://api.datacite.org/dois/{self.doi}" - headers= { - 'Accept': 'application/vnd.api+json; charset=utf-8' - } + url = f"https://api.datacite.org/dois/{self.doi}" + headers = {"Accept": "application/vnd.api+json; charset=utf-8"} return self.fetch_json(url, headers) - - def asScite(self)->str: + + def asScite(self) -> str: """ get DOI metadata and convert to Semantic Cite markup - + see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php - + Returns: str: Semantic Mediawiki markup """ if not hasattr(self, "meta_data"): - self.meta_data=self.doi2Citeproc() - markup=Citeproc.asScite(self.meta_data,retrieved_from="https://doi.org/") - return markup \ No newline at end of file + self.meta_data = self.doi2Citeproc() + markup = Citeproc.asScite(self.meta_data, retrieved_from="https://doi.org/") + return markup diff --git a/skg/event.py b/skg/event.py index 8065fae..f68f386 100644 --- a/skg/event.py +++ b/skg/event.py @@ -1,76 +1,76 @@ -''' +""" Created on 2022-11-16 @author: wf -''' +""" import skg.graph + class Event(skg.graph.Node): """ an instance of a scientific event """ - + @classmethod def getSamples(cls): - samples=[ + samples = [ { - "wikiDataId":"Q112055391", + "wikiDataId": "Q112055391", "title": "The Third Wikidata Workshop", "location": "Hangzhou", "point_in_time": "2022-10-24", - "official_website": "https://wikidataworkshop.github.io/2022/" + "official_website": "https://wikidataworkshop.github.io/2022/", } ] return samples - + + class EventSeries(skg.graph.Node): """ an instance of an academic event series """ - + @classmethod def getSamples(cls): - samples=[ - { - "wikiDataId":"Q6053150", - "short_name":"ISWC" - }, + samples = [ + {"wikiDataId": "Q6053150", "short_name": "ISWC"}, { "wikiDataId": "Q105491257", "short_name": "ECDL", "title": "European Conference on Research and Advanced Technology for Digital Libraries (English)", - "official_website": "http://ecdlconference.isti.cnr.it/" + "official_website": "http://ecdlconference.isti.cnr.it/", }, { "wikiDataId": "Q105695678", "short_name": "VNC (English)", "DBLP_venue_ID": "conf/vnc", "VIAF_ID": "267408611", - "title": "IEEE Vehicular Networking Conference" + "title": "IEEE Vehicular Networking Conference", }, { - "wikiDataId":"Q17012957", - "short_name":"ESWC", + "wikiDataId": "Q17012957", + "short_name": "ESWC", "inception": "2004", - "gndId": "1091749205" - } + "gndId": "1091749205", + }, ] return samples - + + class Proceedings(skg.graph.Node): """ Proceedings of an event """ + @classmethod def getSamples(cls): - samples=[ + samples = [ { "wikiDataId": "Q115053286", "short_name": "Wikidata 2022 (English)", "title": "Proceedings of the 3rd Wikidata Workshop 2022 (English)", "publication_date": "2022-11-03", - "full_work_available_at_URL": "http://ceur-ws.org/Vol-3262/" + "full_work_available_at_URL": "http://ceur-ws.org/Vol-3262/", } ] return samples - diff --git a/skg/graph.py b/skg/graph.py index 2774436..c5ea58d 100644 --- a/skg/graph.py +++ b/skg/graph.py @@ -1,167 +1,173 @@ -''' +""" Created on 2022-11-16 @author: wf -''' -from skg.wikidata import Wikidata -from skg.dblp import Dblp +""" from lodstorage.sparql import SPARQL +from skg.dblp import Dblp +from skg.wikidata import Wikidata + + class Concept: """ an Entity """ - def __init__(self,name:str,cls): + + def __init__(self, name: str, cls): """ constructor - + Args: name(str): the name of the node cls: a class """ - self.name=name - self.props={} - self.cls=cls - if hasattr(cls,"getSamples"): + self.name = name + self.props = {} + self.cls = cls + if hasattr(cls, "getSamples"): for sample in cls.getSamples(): for key in sample.keys(): if not key in self.props: - self.props[key]=Property(self,key) - - def map(self,map_name:str,map_list:list): + self.props[key] = Property(self, key) + + def map(self, map_name: str, map_list: list): """ map the given list of property mappings under the given map_name - + Args: map_name(str): the name of the mapping e.g. "wikidata" map_list(list): a list of mapping tuples """ - for prop_name,mapped_prop in map_list: + for prop_name, mapped_prop in map_list: if prop_name in self.props: - prop=self.props[prop_name] - prop.setmap(map_name,mapped_prop) + prop = self.props[prop_name] + prop.setmap(map_name, mapped_prop) return self - - def map_wikidata(self,wd_class:str,scholia_suffix,map_list:list): + + def map_wikidata(self, wd_class: str, scholia_suffix, map_list: list): """ map wikidata entries - + Args: wd_class(str): the main wikidata base class scholia_suffix(str): the scholia suffix """ - self.wd_class=wd_class - self.scholia_suffix=scholia_suffix - self.map("wikidata",map_list) - return self - - + self.wd_class = wd_class + self.scholia_suffix = scholia_suffix + self.map("wikidata", map_list) + return self + + class Property: """ a Property """ - def __init__(self,concept:Concept,name:str): + + def __init__(self, concept: Concept, name: str): """ constructor - + Args: concept(Concept): the concept this property belongs to name(str): the name of the property - + """ - self.concept=concept - self.name=name - self.maps={} - - def setmap(self,map_name,mapped_prop): + self.concept = concept + self.name = name + self.maps = {} + + def setmap(self, map_name, mapped_prop): """ map the given property """ - self.maps[map_name]=mapped_prop - - def getmap(self,map_name): + self.maps[map_name] = mapped_prop + + def getmap(self, map_name): return self.maps[map_name] - - def hasmap(self,map_name:str)->bool: + + def hasmap(self, map_name: str) -> bool: """ check whether there is a mapping for the given map_name - + Args: map_name(str): the map name to check - + Returns: bool: True if there is mapping """ return map_name in self.maps - + + class Node: """ a Node in the scholary knowledge graph """ - debug=False - + + debug = False + def __init__(self): """ constructor """ - + def __str__(self): """ return a text representation of me """ - text=f"{self.concept.name} ➞ {self.label}:" - delim="\n " + text = f"{self.concept.name} ➞ {self.label}:" + delim = "\n " for prop in self.concept.props.values(): if hasattr(self, prop.name): - text+=f"{delim}{prop.name}={getattr(self,prop.name)}" + text += f"{delim}{prop.name}={getattr(self,prop.name)}" return text - - def from_dict(self,concept,record:str): + + def from_dict(self, concept, record: str): """ get my values from the given record """ - self.concept=concept - self.label=record[concept.name] + self.concept = concept + self.label = record[concept.name] for key in concept.props.keys(): if key in record: setattr(self, key, record[key]) - + def browser_url(self): """ get my browser url """ - if self.provenance=="wikidata": - url=self.scholia_url() + if self.provenance == "wikidata": + url = self.scholia_url() else: - url=self.label + url = self.label return url - + def scholia_url(self): """ get my scholia url """ - prefix=f"https://scholia.toolforge.org/{self.concept.scholia_suffix}" - wd_url=getattr(self, "wikiDataId",None) + prefix = f"https://scholia.toolforge.org/{self.concept.scholia_suffix}" + wd_url = getattr(self, "wikiDataId", None) if wd_url is None: return None else: - qid=wd_url.replace("http://www.wikidata.org/entity/","") + qid = wd_url.replace("http://www.wikidata.org/entity/", "") return f"{prefix}/{qid}" - + @classmethod - def setProvenance(cls,instances:list,provenance:str): + def setProvenance(cls, instances: list, provenance: str): """ set the provenance of the given instances """ for instance in instances: - instance.provenance=provenance - + instance.provenance = provenance + @classmethod - def from_sparql(cls,sparql:SPARQL,sparql_query:str,concept:Concept): + def from_sparql(cls, sparql: SPARQL, sparql_query: str, concept: Concept): """ get instance from the given sparql access point with the given sparql_query for the given concept - + Args: sparql(SPARQL): the sparql access point sparql_query(str): the query to execute @@ -169,51 +175,53 @@ def from_sparql(cls,sparql:SPARQL,sparql_query:str,concept:Concept): """ if Node.debug: print(sparql_query) - records=sparql.queryAsListOfDicts(sparql_query) - instances=cls.from_records(records,concept) + records = sparql.queryAsListOfDicts(sparql_query) + instances = cls.from_records(records, concept) return instances - + @classmethod - def from_records(cls,records:list,concept:Concept): + def from_records(cls, records: list, concept: Concept): """ get instances from the given records for the given concept - + Args: records(list): a list of dicts to get instances for concept(Concept): the concept to create instances for """ - instances=[] + instances = [] for record in records: # call my constructor - instance=cls() - instance.from_dict(concept,record) + instance = cls() + instance.from_dict(concept, record) instances.append(instance) return instances - + @classmethod - def from_wikidata_via_id(cls,concept:Concept,id_name:str,id_value:str,lang:str="en"): + def from_wikidata_via_id( + cls, concept: Concept, id_name: str, id_value: str, lang: str = "en" + ): """ get a node instance from wikidata for the given parameters - + Args: concept(Concept): the concept to return id_name(str): the name of the id to search / lookup with id_value(str): the value of the id lang(str): the language code to apply """ - wikidata=Wikidata() - if id_name=="wikiDataId": - value_clause=f"" + wikidata = Wikidata() + if id_name == "wikiDataId": + value_clause = f"" else: - value_clause=f'''"{id_value}"''' - sparql_query=f"""# Query for {concept.name} details via ID {id_name} value {id_value} + value_clause = f'''"{id_value}"''' + sparql_query = f"""# Query for {concept.name} details via ID {id_name} value {id_value} PREFIX wd: PREFIX wdt: PREFIX rdfs: SELECT DISTINCT ?{concept.name} ?qId""" for prop in concept.props.values(): - sparql_query+=f" ?{prop.name}" - sparql_query+=f""" + sparql_query += f" ?{prop.name}" + sparql_query += f""" WHERE {{ VALUES ?{id_name} {{ {value_clause} @@ -224,47 +232,50 @@ def from_wikidata_via_id(cls,concept:Concept,id_name:str,id_value:str,lang:str=" FILTER(LANG(?{concept.name})="{lang}"). """ for prop in concept.props.values(): - if prop.name=="wikiDataId": + if prop.name == "wikiDataId": continue if not (prop.hasmap("wikidata")): - raise Exception(f"Property {prop.name} of {concept.name} has no wikidata mapping") - wd_prop=prop.getmap("wikidata") - clause=f"?wikiDataId wdt:{wd_prop} ?{prop.name}." - if prop.name!=id_name: - clause=f"OPTIONAL {{ {clause} }}" - sparql_query+="\n "+clause - sparql_query+="\n}" - instances=cls.from_sparql(wikidata.sparql,sparql_query,concept) + raise Exception( + f"Property {prop.name} of {concept.name} has no wikidata mapping" + ) + wd_prop = prop.getmap("wikidata") + clause = f"?wikiDataId wdt:{wd_prop} ?{prop.name}." + if prop.name != id_name: + clause = f"OPTIONAL {{ {clause} }}" + sparql_query += "\n " + clause + sparql_query += "\n}" + instances = cls.from_sparql(wikidata.sparql, sparql_query, concept) cls.setProvenance(instances, "wikidata") return instances - - + @classmethod - def from_dblp_via_id(cls,concept:Concept,id_name:str,id_value:str,lang:str="en"): + def from_dblp_via_id( + cls, concept: Concept, id_name: str, id_value: str, lang: str = "en" + ): """ get a node instance from dblp for the given parameters - + Args: concept(Concept): the concept to return id_name(str): the name of the id to search / lookup with id_value(str): the value of the id lang(str): the language code to apply """ - dblp=Dblp() - sparql_query=f""" + dblp = Dblp() + sparql_query = f""" PREFIX dblp: SELECT ?{concept.name}""" for prop in concept.props.values(): if prop.hasmap("dblp"): - sparql_query+=f" ?{prop.name}" - if id_name=="doi": - value_clause=f"" - elif id_name=="orcid": - value_clause=f"" + sparql_query += f" ?{prop.name}" + if id_name == "doi": + value_clause = f"" + elif id_name == "orcid": + value_clause = f"" else: - value_clause=f'''"{id_value}"''' - sparql_query+=f""" + value_clause = f'''"{id_value}"''' + sparql_query += f""" WHERE {{ VALUES ?{id_name} {{ {value_clause} @@ -272,10 +283,9 @@ def from_dblp_via_id(cls,concept:Concept,id_name:str,id_value:str,lang:str="en") """ for prop in concept.props.values(): if prop.hasmap("dblp"): - dblp_prop=prop.getmap("dblp") - sparql_query+=f"""?{concept.name} dblp:{dblp_prop} ?{dblp_prop}.\n""" - sparql_query+="}\n" - instances=cls.from_sparql(dblp.sparql,sparql_query,concept) + dblp_prop = prop.getmap("dblp") + sparql_query += f"""?{concept.name} dblp:{dblp_prop} ?{dblp_prop}.\n""" + sparql_query += "}\n" + instances = cls.from_sparql(dblp.sparql, sparql_query, concept) cls.setProvenance(instances, "dblp") return instances - \ No newline at end of file diff --git a/skg/kg.py b/skg/kg.py index e46eea2..b6bea38 100644 --- a/skg/kg.py +++ b/skg/kg.py @@ -1,152 +1,184 @@ -''' +""" Created on 2022-11-16 @author: wf -''' -from skg.scholar import Scholar,Institution -from skg.paper import Paper -from skg.event import Event,EventSeries,Proceedings -from skg.location import Country -from skg.graph import Concept +""" import datetime + +from skg.event import Event, EventSeries, Proceedings +from skg.graph import Concept +from skg.location import Country +from skg.paper import Paper +from skg.scholar import Institution, Scholar from skg.version import Version + class SKG_Def: """ scholary knowledge graph """ - + def __init__(self): """ constructor """ - self.concepts={ + self.concepts = { # main concepts - "Scholar": Concept(name="Scholar",cls=Scholar), - "Institution": Concept(name="Institution",cls=Institution), - "Paper": Concept(name="Paper",cls=Paper), - "Event": Concept(name="Event",cls=Event), - "EventSeries": Concept(name="EventSeries",cls=EventSeries), - "Proceedings": Concept(name="Proceedings",cls=Proceedings), + "Scholar": Concept(name="Scholar", cls=Scholar), + "Institution": Concept(name="Institution", cls=Institution), + "Paper": Concept(name="Paper", cls=Paper), + "Event": Concept(name="Event", cls=Event), + "EventSeries": Concept(name="EventSeries", cls=EventSeries), + "Proceedings": Concept(name="Proceedings", cls=Proceedings), # neighbour concepts - "Country": Concept(name="Country",cls=Country) + "Country": Concept(name="Country", cls=Country), } - self.concepts["Scholar"].map_wikidata("Q5","author",[ - ("name","label"), - ("dblpId","P2456"), - ("gndId","P227"), - ("linkedInId","P6634"), - ("homepage","P856"), - ("googleScholarUser","P1960"), - ("orcid","P496"), - ("givenName","P735"), - ("familyName","P734"), - ("gender","P21"), - ("image","P18"), - ("occupation","P106"), - ("Semantic_Scholar_author_ID","P4012") - ]).map("dblp",[ - ("name","primaryCreatorName"), - ("homepage","primaryHomepage"), - ("orcid","orcid") - ]).map("smw",[ - ("wikiDataId","wikiDataId"), - ("familyName","name"), - ("givenName","firstName"), - ("googleScholarUser","googleScholarUser"), - ("homepage","homepage"), - ("dblpId","dblpId"), - ("orcid","orcid"), - ("linkedInId","linkedInId") - ]) - self.concepts["Institution"].map_wikidata("Q4671277","organization",[ - ("short_name","P1813"), # 2.0 % - ("inception","P571"), # 65.8 % - ("image","P18"), # 15.2 % - ("country","P17"), # 88.8 % - ("located_in","P131"), # 51.9 % - ("official_website","P856"), # 59.1% - ("coordinate_location","P625") # 44.0 % - ]) - self.concepts["Paper"].map_wikidata("Q13442814","work",[ - ("title","label"), - ("doi","P356"), - ("DBLP_publication_ID","P8978"), - ("publication_date","P577") - ]).map("dblp",[ - ("title","title"), - ("doi","doi") - ]) + self.concepts["Scholar"].map_wikidata( + "Q5", + "author", + [ + ("name", "label"), + ("dblpId", "P2456"), + ("gndId", "P227"), + ("linkedInId", "P6634"), + ("homepage", "P856"), + ("googleScholarUser", "P1960"), + ("orcid", "P496"), + ("givenName", "P735"), + ("familyName", "P734"), + ("gender", "P21"), + ("image", "P18"), + ("occupation", "P106"), + ("Semantic_Scholar_author_ID", "P4012"), + ], + ).map( + "dblp", + [ + ("name", "primaryCreatorName"), + ("homepage", "primaryHomepage"), + ("orcid", "orcid"), + ], + ).map( + "smw", + [ + ("wikiDataId", "wikiDataId"), + ("familyName", "name"), + ("givenName", "firstName"), + ("googleScholarUser", "googleScholarUser"), + ("homepage", "homepage"), + ("dblpId", "dblpId"), + ("orcid", "orcid"), + ("linkedInId", "linkedInId"), + ], + ) + self.concepts["Institution"].map_wikidata( + "Q4671277", + "organization", + [ + ("short_name", "P1813"), # 2.0 % + ("inception", "P571"), # 65.8 % + ("image", "P18"), # 15.2 % + ("country", "P17"), # 88.8 % + ("located_in", "P131"), # 51.9 % + ("official_website", "P856"), # 59.1% + ("coordinate_location", "P625"), # 44.0 % + ], + ) + self.concepts["Paper"].map_wikidata( + "Q13442814", + "work", + [ + ("title", "label"), + ("doi", "P356"), + ("DBLP_publication_ID", "P8978"), + ("publication_date", "P577"), + ], + ).map("dblp", [("title", "title"), ("doi", "doi")]) # scientific event - self.concepts["Event"].map_wikidata("Q52260246","event",[ - ("title","P1476"), - ("country","P17"), # 93.9% -> Human Settlement - ("location","P276"), # 94.6% - ("point_in_time","P585"), - ("official_website","P856") - ]) + self.concepts["Event"].map_wikidata( + "Q52260246", + "event", + [ + ("title", "P1476"), + ("country", "P17"), # 93.9% -> Human Settlement + ("location", "P276"), # 94.6% + ("point_in_time", "P585"), + ("official_website", "P856"), + ], + ) # academic event series - self.concepts["EventSeries"].map_wikidata("Q47258130","event-series",[ - ("title","P1476"), # 96.7 % - ("short_name","P1813"), # 93.1 % - ("VIAF_ID","P214"), # 60.5 % - ("DBLP_venue_ID","P8926"), # 96.4 % - ("gndId","P227"), #42.3 % - ("inception","P571"), # 22.3 % - ("official_website","P856") # 13.5 % - ]) + self.concepts["EventSeries"].map_wikidata( + "Q47258130", + "event-series", + [ + ("title", "P1476"), # 96.7 % + ("short_name", "P1813"), # 93.1 % + ("VIAF_ID", "P214"), # 60.5 % + ("DBLP_venue_ID", "P8926"), # 96.4 % + ("gndId", "P227"), # 42.3 % + ("inception", "P571"), # 22.3 % + ("official_website", "P856"), # 13.5 % + ], + ) # proceedings - self.concepts["Proceedings"].map_wikidata("Q1143604","venue",[ - ("title","P1476"), - ("short_name","P1813"), - ("full_work_available_at_URL","P953"), - ("publication_date","P577") - ]) + self.concepts["Proceedings"].map_wikidata( + "Q1143604", + "venue", + [ + ("title", "P1476"), + ("short_name", "P1813"), + ("full_work_available_at_URL", "P953"), + ("publication_date", "P577"), + ], + ) # country - self.concepts["Country"].map_wikidata("Q6256","topic",[ - ("name","label"), # 100% ? - ("homepage","P856"), # 49.4% - ("population","P1082"), # 57.4% - ("capital","P36"), #59.8% - ("coordinate_location","P625"), #58.6% - ("iso_code","P297") # 53.3% - - ]) - - self.concepts_by_qid={} + self.concepts["Country"].map_wikidata( + "Q6256", + "topic", + [ + ("name", "label"), # 100% ? + ("homepage", "P856"), # 49.4% + ("population", "P1082"), # 57.4% + ("capital", "P36"), # 59.8% + ("coordinate_location", "P625"), # 58.6% + ("iso_code", "P297"), # 53.3% + ], + ) + + self.concepts_by_qid = {} for concept in self.concepts.values(): if concept.wd_class in self.concepts_by_qid: raise Exception(f"duplicate wd_class definition: {concept.wd_class}") - self.concepts_by_qid[concept.wd_class]=concept - - def conceptForQid(self,qid:str)->Concept: + self.concepts_by_qid[concept.wd_class] = concept + + def conceptForQid(self, qid: str) -> Concept: """ get the concept for the given wikidata Q Identifieer - + Args: qid(str): get the concept for the given Qid - + Return: Concept: or None if none is found """ - concept=self.concepts_by_qid.get(qid,None) + concept = self.concepts_by_qid.get(qid, None) return concept - - def toPlantuml(self,header:str=None, footer:str=None)->str: + + def toPlantuml(self, header: str = None, footer: str = None) -> str: """ get a plantuml version of this knowledge graph - + Args: header(str): the header to apply footer(str): the footer to apply - + Returns: str: the plantuml markup - + """ - timestamp=datetime.datetime.utcnow().strftime('%Y-%m-%d') + timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d") if header is None: - header=f"""/'{Version.name}:{Version.description} + header = f"""/'{Version.name}:{Version.description} updated {timestamp} authors:{Version.authors} @@ -156,28 +188,27 @@ def toPlantuml(self,header:str=None, footer:str=None)->str: package skg {{ """ if footer is None: - footer="}\n" - markup=f"{header}" - indent=" " - for concept_name,concept in self.concepts.items(): - markup+=f"""{indent}class {concept_name} {{\n""" - for prop_name,prop in concept.props.items(): - markup+=f"""{indent} {prop_name}\n""" - markup+=f"""\n{indent}}}\n""" - markup+=f"{footer}" + footer = "}\n" + markup = f"{header}" + indent = " " + for concept_name, concept in self.concepts.items(): + markup += f"""{indent}class {concept_name} {{\n""" + for prop_name, prop in concept.props.items(): + markup += f"""{indent} {prop_name}\n""" + markup += f"""\n{indent}}}\n""" + markup += f"{footer}" return markup - - def toSiDiF(self)->str: + + def toSiDiF(self) -> str: """ convert me to SiDiF format """ - sidif="" - for concept_name,concept in self.concepts.items(): - sidif+=f"""# + sidif = "" + for concept_name, concept in self.concepts.items(): + sidif += f"""# # {concept_name} # {concept_name} isA Topic "{concept_name} is name of it """ return sidif - \ No newline at end of file diff --git a/skg/location.py b/skg/location.py index 772dcbd..84e590c 100644 --- a/skg/location.py +++ b/skg/location.py @@ -1,25 +1,26 @@ -''' +""" Created on 2022-11-21 @author: wf -''' +""" import skg.graph + class Country(skg.graph.Node): """ an instance of a country """ - + @classmethod def getSamples(cls): - samples=[ + samples = [ { - "wikiDataId":"Q334", + "wikiDataId": "Q334", "name": "Singapore", - "iso_code": "SG", + "iso_code": "SG", "homepage": "https://www.gov.sg/", "population": 5866139, - "coordinate_location": "1°18'N, 103°48'E" + "coordinate_location": "1°18'N, 103°48'E", } ] return samples diff --git a/skg/orcid.py b/skg/orcid.py index a9c4f18..d43a84e 100644 --- a/skg/orcid.py +++ b/skg/orcid.py @@ -1,88 +1,92 @@ -''' +""" Created on 2022-11-19 @author: wf -''' -import requests +""" import re -from stdnum.iso7064.mod_11_2 import validate + +import requests +from stdnum.iso7064.mod_11_2 import validate + class ORCID: """ ORCID handling - - see e.g. + + see e.g. https://info.orcid.org/brand-guidelines/#h-orcid-logos-and-icons https://pub.orcid.org/v3.0/ """ - pattern=re.compile(r"^(\d{4}-){3}\d{3}(\d|X)$") - - def __init__(self,orcid:str): + + pattern = re.compile(r"^(\d{4}-){3}\d{3}(\d|X)$") + + def __init__(self, orcid: str): """ constructor - + Args: orcid(str): the orcid """ - self.orcid=orcid - #https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier - self.orcid_num=orcid.replace("-","") - match=re.match(ORCID.pattern,orcid) - self.ok=bool(match) and validate(self.orcid_num) - + self.orcid = orcid + # https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier + self.orcid_num = orcid.replace("-", "") + match = re.match(ORCID.pattern, orcid) + self.ok = bool(match) and validate(self.orcid_num) + @classmethod - def isORCID(cls,orcid:str)->bool: + def isORCID(cls, orcid: str) -> bool: """ check that the given string is an ORCID - + Args: orcid(str): the potential ORCID string - + Returns: bool: True if the string represents a valid ORCID otherwise false """ if not orcid: return False - orcid_obj=ORCID(orcid) + orcid_obj = ORCID(orcid) return orcid_obj.ok - - def getMetadata(self,op:str=None)->dict: + + def getMetadata(self, op: str = None) -> dict: """ get the ORCID metadata data - + Args: - op(str): the https://pub.orcid.org/v3.0/ API + op(str): the https://pub.orcid.org/v3.0/ API operation to apply - default is "Fetch record details" - + Returns: dict: the dictionary derived from the JSON response - + """ - op="" if op is None else f"/{op}" - url=f'https://pub.orcid.org/v3.0/{self.orcid}{op}' - r = requests.get(url, - headers = {'User-Agent':'Mozilla/5.0', 'accept' : 'application/json'}) - json_data=r.json() + op = "" if op is None else f"/{op}" + url = f"https://pub.orcid.org/v3.0/{self.orcid}{op}" + r = requests.get( + url, headers={"User-Agent": "Mozilla/5.0", "accept": "application/json"} + ) + json_data = r.json() return json_data - - def asHtml(self,mode:str="full",inline:str="")->str: + + def asHtml(self, mode: str = "full", inline: str = "") -> str: """ - the orcid logo - + the orcid logo + Args: mode(str): the mode - inline(str): in inline mode this is the text to be displayed inline - + inline(str): in inline mode this is the text to be displayed inline + Returns: str: the html code - + """ - href=f"""https://orcid.org/{self.orcid}""" - logo="""ORCID logo""" - if mode=="full": - html=f"""{logo}{href}""" - elif mode=="compact": - html=f"""{logo}{self.orcid}""" - elif mode=="inline": - html=f"""{inline}{logo}""" - return html \ No newline at end of file + href = f"""https://orcid.org/{self.orcid}""" + logo = """ORCID logo""" + if mode == "full": + html = f"""{logo}{href}""" + elif mode == "compact": + html = f"""{logo}{self.orcid}""" + elif mode == "inline": + html = f"""{inline}{logo}""" + return html diff --git a/skg/owl.py b/skg/owl.py index 1c37a66..8a74549 100644 --- a/skg/owl.py +++ b/skg/owl.py @@ -1,176 +1,179 @@ -''' +""" Created on 2022-11-22 @author: wf -''' +""" import json -from rdflib.namespace import OWL -from skg.schema import Schema + import rdflib +from rdflib.namespace import OWL + from skg.profiler import Profiler +from skg.schema import Schema + class Owl(Schema): """ Web Ontology Language access see https://en.wikipedia.org/wiki/Web_Ontology_Language """ - - def __init__(self,name:str,url:str,authors:str,inception:str): + + def __init__(self, name: str, url: str, authors: str, inception: str): """ constructor - + Args: name(str): the name of this schema url(str): the url of this schema authors(str): the authors of this schema inception(str): the inception of this schema """ - Schema.__init__(self,name,url,authors,inception) - self.schema_url=url - self.schema=None - - def show_triples(self,result): + Schema.__init__(self, name, url, authors, inception) + self.schema_url = url + self.schema = None + + def show_triples(self, result): """ show the triples for the given query result """ - for i,row in enumerate(result): + for i, row in enumerate(result): print(f"{i+1}:{row}") - - def query_schema(self,query:str,formats:str="",profile:bool=False): + + def query_schema(self, query: str, formats: str = "", profile: bool = False): """ query the schema - + Args: query(str): the SPARQL query to execute formats(str): if "triples" is in th format string show the results string profile(bool): if True show timing information for the query """ - profiler=Profiler(f"query {query}",profile=profile) - result=self.schema.query(query) + profiler = Profiler(f"query {query}", profile=profile) + result = self.schema.query(query) if "triples" in formats: self.show_triples(result) if profile: profiler.time(f" for {len(result)} triples") - return result - - def loadSchema(self,formats:str="",profile:bool=False): + return result + + def loadSchema(self, formats: str = "", profile: bool = False): """ load the schema - + Args: formats(str): the formats to dump profile(bool): if True show timing """ # https://stackoverflow.com/questions/56631109/how-to-parse-and-load-an-ontology-in-python - profiler=Profiler(f"reading {self.name} schema",profile=profile) + profiler = Profiler(f"reading {self.name} schema", profile=profile) self.schema = rdflib.Graph() - self.schema.parse (self.schema_url, format='application/rdf+xml') + self.schema.parse(self.schema_url, format="application/rdf+xml") if profile: profiler.time(f" for {len(self.schema)} triples") for t_format in formats.split(","): - if t_format and t_format!="triples": - print (self.schema.serialize(format=t_format)) - self.schema.bind('owl',OWL) + if t_format and t_format != "triples": + print(self.schema.serialize(format=t_format)) + self.schema.bind("owl", OWL) query = """select distinct ?s ?p ?o where { ?s ?p ?o} """ - self.query_schema(query,formats=formats,profile=profile) + self.query_schema(query, formats=formats, profile=profile) return self.schema - - def unprefix_value(self,value:object,prefixes:list=["http://xmlns.com/foaf/0.1/"])->str: + + def unprefix_value( + self, value: object, prefixes: list = ["http://xmlns.com/foaf/0.1/"] + ) -> str: """ get rid of RDF prefixes to simplify our life - + Args: value(object): the RDFLib value to unprefix prefixes(list): list of prefixes to remove Returns: str: a simple string representation """ - if isinstance(value,list): - if len(value)>=1: - value=value[0] - if isinstance(value,dict): - for akey in ["@id","@value"]: + if isinstance(value, list): + if len(value) >= 1: + value = value[0] + if isinstance(value, dict): + for akey in ["@id", "@value"]: if akey in value: - value=value[akey] - if isinstance(value,str): - parts=value.split("#") - if len(parts)==2: - value=parts[1] + value = value[akey] + if isinstance(value, str): + parts = value.split("#") + if len(parts) == 2: + value = parts[1] for prefix in prefixes: if value.startswith(prefix): - value=value.replace(prefix,"") + value = value.replace(prefix, "") return value - - def unprefix_row(self,row:dict): + + def unprefix_row(self, row: dict): """ get rid of the RDF prefixes in keys and values of the given row to simplify our life - + Args: row(dict): a dict of RDF values to unprefix """ for key in list(row.keys()): - org_value=row[key] - value=self.unprefix_value(org_value) - row[key]=value + org_value = row[key] + value = self.unprefix_value(org_value) + row[key] = value if "#" in key: - noprefix_key=self.unprefix_value(key) + noprefix_key = self.unprefix_value(key) row[noprefix_key] = row.pop(key) - row[f"{key}_rdf"]=org_value - + row[f"{key}_rdf"] = org_value + def toClasses(self): """ convert to a classes dict of dicts - + Returns: dict: a dict of dictionaries """ - json_ld=self.schema.serialize(format="json-ld") - schema_dict=json.loads(json_ld) - classes={} + json_ld = self.schema.serialize(format="json-ld") + schema_dict = json.loads(json_ld) + classes = {} # get rid of prefixes for row in schema_dict: self.unprefix_row(row) # pass 1 - classes for row in schema_dict: - name=row["@id"] - ptype=row["@type"] - comment=row.get("comment","") - label=row.get("label","") - subClassOf=row.get("subClassOf","") - if ptype=="Class": + name = row["@id"] + ptype = row["@type"] + comment = row.get("comment", "") + label = row.get("label", "") + subClassOf = row.get("subClassOf", "") + if ptype == "Class": if name in classes: - clazz=classes[name] + clazz = classes[name] else: - clazz={ - "@comment":comment, + clazz = { + "@comment": comment, "@label": label, - "@subClassOf": subClassOf + "@subClassOf": subClassOf, } - classes[name]=clazz + classes[name] = clazz # pass 2 - properties for row in schema_dict: - name=row["@id"] - ptype=row["@type"] - comment=row.get("comment","") - domain=row.get("domain","") - prange=row.get("range","") - plabel=row.get("label") - if ptype=="Property": - prop={ + name = row["@id"] + ptype = row["@type"] + comment = row.get("comment", "") + domain = row.get("domain", "") + prange = row.get("range", "") + plabel = row.get("label") + if ptype == "Property": + prop = { "name": name, "comment": comment, "label": plabel, "domain": domain, - "range": prange + "range": prange, } if domain in classes: - clazz=classes[domain] - clazz[name]=prop + clazz = classes[domain] + clazz[name] = prop pass - wrapped_classes={ - "classes":classes - } + wrapped_classes = {"classes": classes} return wrapped_classes diff --git a/skg/paper.py b/skg/paper.py index 7b7f559..4d8306c 100644 --- a/skg/paper.py +++ b/skg/paper.py @@ -1,46 +1,47 @@ -''' +""" Created on 2022-11-16 @author: wf -''' +""" import skg.graph from skg.doi import DOI + class Paper(skg.graph.Node): - ''' + """ a scientific paper - ''' + """ @classmethod def getSamples(cls): - samples=[ - { + samples = [ + { "wikiDataId": "Q55693406", - "title":"Designing the web for an open society", + "title": "Designing the web for an open society", "doi": "10.1145/1963405.1963408", - "DBLP_publication_ID": "conf/www/Berners-Lee11", + "DBLP_publication_ID": "conf/www/Berners-Lee11", "publication_date": 2011, }, { "doi": "10.1007/978-3-031-19433-7_21", - "title": "An Analysis of Content Gaps Versus User Needs in the Wikidata Knowledge Graph" - } + "title": "An Analysis of Content Gaps Versus User Needs in the Wikidata Knowledge Graph", + }, ] return samples - + def __init__(self): - ''' + """ Constructor - ''' - - def fromDOI(self,doi:str): + """ + + def fromDOI(self, doi: str): """ construct me from the given doi """ - self.doi=doi - self.doi_obj=DOI(doi) - self.doi_obj.meta_data=self.doi_obj.doi2Citeproc() + self.doi = doi + self.doi_obj = DOI(doi) + self.doi_obj.meta_data = self.doi_obj.doi2Citeproc() if not hasattr(self, "title"): - self.title=self.doi_obj.meta_data["title"] - if not hasattr(self,"label"): - self.label=f"https://doi.org/{self.doi}" \ No newline at end of file + self.title = self.doi_obj.meta_data["title"] + if not hasattr(self, "label"): + self.label = f"https://doi.org/{self.doi}" diff --git a/skg/profiler.py b/skg/profiler.py index 1a1b8d9..8fa56f6 100644 --- a/skg/profiler.py +++ b/skg/profiler.py @@ -1,9 +1,11 @@ -''' +""" Created on 2022-11-18 @author: wf -''' +""" import time + + class Profiler: """ simple profiler diff --git a/skg/schema.py b/skg/schema.py index 3c66722..39e59d9 100644 --- a/skg/schema.py +++ b/skg/schema.py @@ -1,83 +1,84 @@ -''' +""" Created on 2022-11-22 @author: wf -''' +""" import datetime -class Schema(): + +class Schema: """ a schema """ - - def __init__(self,name:str,url:str,authors:str,inception:str): + + def __init__(self, name: str, url: str, authors: str, inception: str): """ constructor - + Args: name(str): the name of this schema url(str): the url of this schema authors(str): the authors of this schema inception(str): the inception of this schema """ - self.name=name - self.url=url - self.authors=authors - self.inception=inception - - def classesToPlantUml(self,classes:dict,indent:str=" "): + self.name = name + self.url = url + self.authors = authors + self.inception = inception + + def classesToPlantUml(self, classes: dict, indent: str = " "): """ convert the given classes dict to plantuml - + Args: classes(dict): a dictionary of classes indent(str): the indentation to apply """ - classes=classes["classes"] - markup="" - for cname,clazz in classes.items(): - class_markup="" - rel_markup="" # relations - for pname,prop in clazz.items(): + classes = classes["classes"] + markup = "" + for cname, clazz in classes.items(): + class_markup = "" + rel_markup = "" # relations + for pname, prop in clazz.items(): if pname.startswith("@"): pass else: - prange=prop['range'] + prange = prop["range"] if prange in classes: # Class01 "1" *-- "many" Class02 : contains - rel_markup+=f"{indent}{cname}--{prange}:{pname}\n" + rel_markup += f"{indent}{cname}--{prange}:{pname}\n" else: - class_markup+=f"{indent} {pname}:{prange}\n" - class_markup=f"{indent}class {cname}{{\n{class_markup}\n{indent}}}\n" - class_markup+=rel_markup + class_markup += f"{indent} {pname}:{prange}\n" + class_markup = f"{indent}class {cname}{{\n{class_markup}\n{indent}}}\n" + class_markup += rel_markup if "@subClassOf" in clazz: - general=clazz["@subClassOf"] + general = clazz["@subClassOf"] if general: - class_markup+=f"{indent}{general} <|-- {cname}\n" - note=f"{indent}note top of {cname}\n" + class_markup += f"{indent}{general} <|-- {cname}\n" + note = f"{indent}note top of {cname}\n" if "@label" in clazz: - note+=f"""{indent}{clazz["@label"]}\n""" + note += f"""{indent}{clazz["@label"]}\n""" if "@comment" in clazz: - note+=f"""{indent}{clazz["@comment"]}\n""" - note+=f"{indent}end note\n" - class_markup=note+class_markup - markup+=class_markup + note += f"""{indent}{clazz["@comment"]}\n""" + note += f"{indent}end note\n" + class_markup = note + class_markup + markup += class_markup return markup - - def toPlantUml(self,header=None,footer=None)->str: - """ - get a plantuml version of the schema - - Args: - header(str): the header to apply - footer(str): the footer to apply - - Returns: - str: the plantuml markup - """ - timestamp=datetime.datetime.utcnow().strftime('%Y-%m-%d') - if header is None: - header=f"""/' + + def toPlantUml(self, header=None, footer=None) -> str: + """ + get a plantuml version of the schema + + Args: + header(str): the header to apply + footer(str): the footer to apply + + Returns: + str: the plantuml markup + """ + timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d") + if header is None: + header = f"""/' {self.authors} {self.inception} updated {timestamp} @@ -92,8 +93,8 @@ class Document {{ }} package dblp {{ """ - if footer is None: - footer="}\n" - classes=self.toClasses() - markup=header+self.classesToPlantUml(classes,indent=" ")+footer - return markup \ No newline at end of file + if footer is None: + footer = "}\n" + classes = self.toClasses() + markup = header + self.classesToPlantUml(classes, indent=" ") + footer + return markup diff --git a/skg/scholar.py b/skg/scholar.py index 4085314..e645e32 100644 --- a/skg/scholar.py +++ b/skg/scholar.py @@ -1,8 +1,8 @@ -''' +""" Created on 2022-11-16 @author: wf -''' +""" import skg.graph @@ -10,19 +10,19 @@ class Scholar(skg.graph.Node): """ an instance of a scholar that writes papers to be an author """ - + @classmethod def getSamples(cls): - samples=[ + samples = [ { - "wikiDataId":"Q54303353", + "wikiDataId": "Q54303353", "name": "Stefan Decker", - "gndId":"", - "dblpId":"d/StefanDecker", - "orcid":"0000-0001-6324-7164", - "linkedInId":"", - "googleScholarUser":"uhVkSswAAAAJ", - "homepage":"http://www.stefandecker.org" + "gndId": "", + "dblpId": "d/StefanDecker", + "orcid": "0000-0001-6324-7164", + "linkedInId": "", + "googleScholarUser": "uhVkSswAAAAJ", + "homepage": "http://www.stefandecker.org", }, { "name": "Tim Berners-Lee", @@ -36,29 +36,26 @@ def getSamples(cls): { "name": "Anna Lisa Gentile", "wikiDataId": "Q54832532", - "Semantic_Scholar_author_ID": "Anna Lisa Gentile" - } + "Semantic_Scholar_author_ID": "Anna Lisa Gentile", + }, ] return samples - - + def __init__(self): """ constructor """ - + + class Institution(skg.graph.Node): """ academic institution a scholar might be affiliated with """ - + @classmethod def getSamples(cls): - samples=[ - { - "wikiDataId": "Q273263", - "short_name": "RWTH Aachen (German)" - }, + samples = [ + {"wikiDataId": "Q273263", "short_name": "RWTH Aachen (German)"}, { "wikiDataId": "Q391028", "inception": "1908", @@ -66,12 +63,12 @@ def getSamples(cls): "country": "Canada", "image": "https://commons.wikimedia.org/wiki/File:Irving_K._Barber_Library.jpg", "located_in": "Vancouver", - "official_website": "https://www.ubc.ca/" - } + "official_website": "https://www.ubc.ca/", + }, ] - return samples - + return samples + def __init__(self): """ constructor - """ \ No newline at end of file + """ diff --git a/skg/scholargrid.py b/skg/scholargrid.py index 2c23e9a..b441a08 100644 --- a/skg/scholargrid.py +++ b/skg/scholargrid.py @@ -1,150 +1,177 @@ -''' +""" Created on 2023-01-04 @author: wf -''' -from skg.smw import SemWiki -from wd.wdgrid import WikidataGrid,GridSync -from spreadsheet.wbquery import WikibaseQuery -from lodstorage.sparql import SPARQL +""" from typing import Callable +from lodstorage.sparql import SPARQL +from spreadsheet.wbquery import WikibaseQuery +from wd.wdgrid import GridSync, WikidataGrid + +from skg.smw import SemWiki + -class ScholarQuery(): - @classmethod +class ScholarQuery: + @classmethod def get(cls) -> WikibaseQuery: """ get the WikiBaseQuery for scholars - + Returns: WikibaseQuery: the wikibase query """ - scholar_mapping=[ + scholar_mapping = [ # @TODO use metamodel info and read from wiki - {'Column': '', - 'Entity': 'Scholar', - 'Lookup': '', - 'PropVarname': 'instanceof', - 'PropertyId': 'P31', - 'PropertyName': 'instanceof', - 'Qualifier': '', - 'Type': '', - 'Value': 'Q5'}, - {'Column': 'wikiDataId', - 'Entity': 'Scholar', - 'Lookup': '', - 'PropVarname': '', - 'PropertyId': '', - 'PropertyName': '', - 'Qualifier': '', - 'Type': 'item', - 'Value': ''}, - {'Column': 'name', - 'Entity': 'Scholar', - 'Lookup': 'Q101352', - 'PropVarname': 'family_name', - 'PropertyId': 'P734', - 'PropertyName': 'family name', - 'Qualifier': '', - 'Type': '', - 'Value': ''}, - {'Column': 'firstName', - 'Entity': 'Scholar', - 'Lookup': 'Q202444', - 'PropVarname': 'given_name', - 'PropertyId': 'P735', - 'PropertyName': 'given name', - 'Qualifier': '', - 'Type': '', - 'Value': ''}, - {'Column': 'homepage', - 'Entity': 'Scholar', - 'Lookup': '', - 'PropVarname': 'official_website', - 'PropertyId': 'P856', - 'PropertyName': 'official website', - 'Qualifier': '', - 'Type': 'url', - 'Value': ''}, - {'Column': 'linkedInId', - 'Entity': 'Scholar', - 'Lookup': '', - 'PropVarname': 'LinkedIn_personal_profile_ID', - 'PropertyId': 'P6634', - 'PropertyName': 'LinkedIn personal profile ID', - 'Qualifier': '', - 'Type': 'extid', - 'Value': ''}, - {'Column': 'orcid', - 'Entity': 'Scholar', - 'Lookup': '', - 'PropVarname': 'ORCID_iD', - 'PropertyId': 'P496', - 'PropertyName': 'ORCID iD', - 'Qualifier': '', - 'Type': 'extid', - 'Value': ''}, - {'Column': 'googleScholarUser', - 'Entity': 'Scholar', - 'Lookup': '', - 'PropVarname': 'Google_Scholar_author_ID', - 'PropertyId': 'P1960', - 'PropertyName': 'Google Scholar author ID', - 'Qualifier': '', - 'Type': 'extid', - 'Value': ''}, - {'Column': 'researchGate', - 'Entity': 'Scholar', - 'Lookup': '', - 'PropVarname': 'ResearchGate_profile_ID', - 'PropertyId': 'P2038', - 'PropertyName': 'ResearchGate profile ID', - 'Qualifier': '', - 'Type': 'extid', - 'Value': ''}, - {'Column': 'gndId', - 'Entity': 'Scholar', - 'Lookup': '', - 'PropVarname': 'GND_ID', - 'PropertyId': 'P227', - 'PropertyName': 'GND ID', - 'Qualifier': '', - 'Type': 'extid', - 'Value': ''}, - {'Column': 'dblpId', - 'Entity': 'Scholar', - 'Lookup': '', - 'PropVarname': 'DBLP_author_ID', - 'PropertyId': 'P2456', - 'PropertyName': 'DBLP author ID', - 'Qualifier': '', - 'Type': 'extid', - 'Value': ''} + { + "Column": "", + "Entity": "Scholar", + "Lookup": "", + "PropVarname": "instanceof", + "PropertyId": "P31", + "PropertyName": "instanceof", + "Qualifier": "", + "Type": "", + "Value": "Q5", + }, + { + "Column": "wikiDataId", + "Entity": "Scholar", + "Lookup": "", + "PropVarname": "", + "PropertyId": "", + "PropertyName": "", + "Qualifier": "", + "Type": "item", + "Value": "", + }, + { + "Column": "name", + "Entity": "Scholar", + "Lookup": "Q101352", + "PropVarname": "family_name", + "PropertyId": "P734", + "PropertyName": "family name", + "Qualifier": "", + "Type": "", + "Value": "", + }, + { + "Column": "firstName", + "Entity": "Scholar", + "Lookup": "Q202444", + "PropVarname": "given_name", + "PropertyId": "P735", + "PropertyName": "given name", + "Qualifier": "", + "Type": "", + "Value": "", + }, + { + "Column": "homepage", + "Entity": "Scholar", + "Lookup": "", + "PropVarname": "official_website", + "PropertyId": "P856", + "PropertyName": "official website", + "Qualifier": "", + "Type": "url", + "Value": "", + }, + { + "Column": "linkedInId", + "Entity": "Scholar", + "Lookup": "", + "PropVarname": "LinkedIn_personal_profile_ID", + "PropertyId": "P6634", + "PropertyName": "LinkedIn personal profile ID", + "Qualifier": "", + "Type": "extid", + "Value": "", + }, + { + "Column": "orcid", + "Entity": "Scholar", + "Lookup": "", + "PropVarname": "ORCID_iD", + "PropertyId": "P496", + "PropertyName": "ORCID iD", + "Qualifier": "", + "Type": "extid", + "Value": "", + }, + { + "Column": "googleScholarUser", + "Entity": "Scholar", + "Lookup": "", + "PropVarname": "Google_Scholar_author_ID", + "PropertyId": "P1960", + "PropertyName": "Google Scholar author ID", + "Qualifier": "", + "Type": "extid", + "Value": "", + }, + { + "Column": "researchGate", + "Entity": "Scholar", + "Lookup": "", + "PropVarname": "ResearchGate_profile_ID", + "PropertyId": "P2038", + "PropertyName": "ResearchGate profile ID", + "Qualifier": "", + "Type": "extid", + "Value": "", + }, + { + "Column": "gndId", + "Entity": "Scholar", + "Lookup": "", + "PropVarname": "GND_ID", + "PropertyId": "P227", + "PropertyName": "GND ID", + "Qualifier": "", + "Type": "extid", + "Value": "", + }, + { + "Column": "dblpId", + "Entity": "Scholar", + "Lookup": "", + "PropVarname": "DBLP_author_ID", + "PropertyId": "P2456", + "PropertyName": "DBLP author ID", + "Qualifier": "", + "Type": "extid", + "Value": "", + }, ] - wbQuery=WikibaseQuery("scholar") + wbQuery = WikibaseQuery("scholar") for row in scholar_mapping: wbQuery.addPropertyFromDescriptionRow(row) return wbQuery + class SmwGrid(GridSync): """ a semantic mediawiki based grid synchable with WikiData - - """ + + """ + def __init__( - self, - app, - entityName: str, - entityPluralName: str, - pk: str, - getLod: Callable, - wikiUsers: list, - wikiId: str, - sparql: SPARQL, - debug: bool = False): + self, + app, + entityName: str, + entityPluralName: str, + pk: str, + getLod: Callable, + wikiUsers: list, + wikiId: str, + sparql: SPARQL, + debug: bool = False, + ): """ constructor - + Args: app(App): the app that i am part of entityName(str): the name of the entity type of items to be shown in the grid @@ -156,21 +183,21 @@ def __init__( sparql(SPARQL): the SPARQL endpoint to use debug(bool): if True show debugging information """ - self.app=app - self.wikiUsers=wikiUsers - self.wikiId=wikiId - wikiUser=self.wikiUsers[wikiId] - self.semwiki=SemWiki(wikiUser) + self.app = app + self.wikiUsers = wikiUsers + self.wikiId = wikiId + wikiUser = self.wikiUsers[wikiId] + self.semwiki = SemWiki(wikiUser) wdGrid = WikidataGrid( - app=app, - source=wikiId, - entityName=entityName, - entityPluralName=entityPluralName, - getLod=getLod, - debug=debug + app=app, + source=wikiId, + entityName=entityName, + entityPluralName=entityPluralName, + getLod=getLod, + debug=debug, ) # we'd rather lazy load - #wdGrid.lod=wdGrid.getLod() + # wdGrid.lod=wdGrid.getLod() super().__init__(wdGrid, entityName, pk, sparql=sparql, debug=debug) @@ -178,11 +205,13 @@ class ScholarGrid(SmwGrid): """ show a grid of scholars """ - - def __init__(self, app, wikiUsers, wikiId: str, sparql: SPARQL, debug: bool = False): + + def __init__( + self, app, wikiUsers, wikiId: str, sparql: SPARQL, debug: bool = False + ): """ constructor - + Args: app(App): the app that I am part of wikiUsers(list): the wikiUsers @@ -194,30 +223,30 @@ def __init__(self, app, wikiUsers, wikiId: str, sparql: SPARQL, debug: bool = Fa entityPluralName = "Scholars" pk = "item" super().__init__( - app=app, - wikiUsers=wikiUsers, - wikiId=wikiId, - entityName=entityName, - entityPluralName=entityPluralName, - pk=pk, - getLod=self.getScholars, - sparql=sparql, - debug=debug + app=app, + wikiUsers=wikiUsers, + wikiId=wikiId, + entityName=entityName, + entityPluralName=entityPluralName, + pk=pk, + getLod=self.getScholars, + sparql=sparql, + debug=debug, ) - - def getScholars(self)->list: + + def getScholars(self) -> list: """ - get the list of scholars - + get the list of scholars + Returns: list: the list of dicts of scholars """ # get a dict of dict - scholars_dod=self.semwiki.scholars() + scholars_dod = self.semwiki.scholars() # get a list of dicts - scholars_lod=list(scholars_dod.values()) + scholars_lod = list(scholars_dod.values()) # @TODO - shouldn't this be better specified in the mapping? for row in scholars_lod: - row["label"]=row["Scholar"] - self.wbQuery=ScholarQuery.get() - return scholars_lod \ No newline at end of file + row["label"] = row["Scholar"] + self.wbQuery = ScholarQuery.get() + return scholars_lod diff --git a/skg/search.py b/skg/search.py index 0d0167f..3d1ff81 100644 --- a/skg/search.py +++ b/skg/search.py @@ -1,18 +1,26 @@ -''' +""" Created on 2022-11-19 @author: wf -''' +""" + class SearchOptions: """ wrapper for search results """ - def __init__(self,limit:int=9,lang='en',show:bool=True, - markup_names=["bibtex"],open_browser:bool=False): + + def __init__( + self, + limit: int = 9, + lang="en", + show: bool = True, + markup_names=["bibtex"], + open_browser: bool = False, + ): """ constructor - + Args: limit(int): limit for the maximum number of results lang(str): the language code to use for the search @@ -21,24 +29,26 @@ def __init__(self,limit:int=9,lang='en',show:bool=True, open_browser(bool): if True open a browser for the target page of the item e.g. scholia """ - self.limit=limit - self.lang=lang - self.show=show - self.markup_names=markup_names - self.open_browser=open_browser - + self.limit = limit + self.lang = lang + self.show = show + self.markup_names = markup_names + self.open_browser = open_browser + + class SearchResult: """ wrapper for search results """ - def __init__(self,search_list:list,options=SearchOptions): + + def __init__(self, search_list: list, options=SearchOptions): """ constructor - + Args: search_list(list): a list of search terms options(SearchOptions): the search options to apply """ - self.search_list=search_list - self.options=options - self.items=[] \ No newline at end of file + self.search_list = search_list + self.options = options + self.items = [] diff --git a/skg/searchengine.py b/skg/searchengine.py index dfee2c0..0bab772 100644 --- a/skg/searchengine.py +++ b/skg/searchengine.py @@ -1,51 +1,53 @@ -''' +""" Created on 18.11.2022 @author: wf -''' +""" +import sys + from search_engine_parser.core.engines.bing import Search as BingSearch +from search_engine_parser.core.engines.duckduckgo import Search as DuckDuckGoSearch from search_engine_parser.core.engines.google import Search as GoogleSearch +from search_engine_parser.core.engines.googlescholar import ( + Search as GoogleScholarSearch, +) from search_engine_parser.core.engines.yahoo import Search as YahooSearch -from search_engine_parser.core.engines.duckduckgo import Search as DuckDuckGoSearch -from search_engine_parser.core.engines.googlescholar import Search as GoogleScholarSearch -import sys - + + class InternetSearch: """ generic internet search """ - - def __init__(self,debug:bool=False): + + def __init__(self, debug: bool = False): """ constructor """ - self.debug=debug + self.debug = debug self.gsearch = GoogleSearch() self.ysearch = YahooSearch() self.bsearch = BingSearch() self.dsearch = DuckDuckGoSearch() - self.gs_search=GoogleScholarSearch() - self.engines=[self.gs_search,self.ysearch,self.dsearch,self.bsearch] - - def handleException(self,ex): + self.gs_search = GoogleScholarSearch() + self.engines = [self.gs_search, self.ysearch, self.dsearch, self.bsearch] + + def handleException(self, ex): """ handle the given exception """ if self.debug: - print(f"{str(ex)}",file=sys.stderr) - - def search(self,search_term:str): + print(f"{str(ex)}", file=sys.stderr) + + def search(self, search_term: str): """ search my engines for the given search_term """ - search_args=(search_term, 1) + search_args = (search_term, 1) for engine in self.engines: try: - result=engine.search(*search_args) - yield engine.name,result.results + result = engine.search(*search_args) + yield engine.name, result.results pass except Exception as ex: self.handleException(ex) pass - - \ No newline at end of file diff --git a/skg/semantic_scholar.py b/skg/semantic_scholar.py index 2ba0e0e..f7750b8 100644 --- a/skg/semantic_scholar.py +++ b/skg/semantic_scholar.py @@ -1,32 +1,32 @@ -''' +""" Created on 2022-11-22 @author: wf -''' +""" # see https://pypi.org/project/semanticscholar/ from semanticscholar import SemanticScholar as SemScholar + class SemanticScholar: """ wrapper for Semantic Scholar API """ - + def __init__(self): """ constructor """ self.sch = SemScholar() - - def get_paper(self,doi:str): + + def get_paper(self, doi: str): """ get the paper with the given DOI identifier """ - paper=self.sch.get_paper(doi) + paper = self.sch.get_paper(doi) return paper - + def get_author(self): """ https://api.semanticscholar.org/api-docs/graph#tag/Author-Data/operation/get_graph_get_author_search """ pass - \ No newline at end of file diff --git a/skg/skgbrowser.py b/skg/skgbrowser.py index 1a35715..37e6f9a 100644 --- a/skg/skgbrowser.py +++ b/skg/skgbrowser.py @@ -1,18 +1,21 @@ -''' +""" Created on 2022-11-18 @author: wf -''' -from nicegui import ui, Client +""" +from urllib import parse + from ngwidgets.input_webserver import InputWebserver from ngwidgets.webserver import WebserverConfig -from ngwidgets.widgets import Link, Lang -from urllib import parse +from ngwidgets.widgets import Lang, Link +from nicegui import Client, ui +from wikibot3rd.wikiuser import WikiUser + from skg.orcid import ORCID from skg.scholargrid import ScholarGrid -from wikibot3rd.wikiuser import WikiUser -from skg.wikidata import Wikidata from skg.version import Version +from skg.wikidata import Wikidata + class SkgBrowser(InputWebserver): """ @@ -20,162 +23,178 @@ class SkgBrowser(InputWebserver): """ @classmethod - def get_config(cls)->WebserverConfig: - copy_right="(c)2022 Wolfgang Fahl" + def get_config(cls) -> WebserverConfig: + copy_right = "(c)2022 Wolfgang Fahl" if not hasattr(cls, "config"): - cls.config=WebserverConfig(copy_right=copy_right,version=Version(),default_port=8765) + cls.config = WebserverConfig( + copy_right=copy_right, version=Version(), default_port=8765 + ) return cls.config - + def __init__(self): """Constructs all the necessary attributes for the WebServer object.""" - config=SkgBrowser.get_config() - self.sotsog=config.sotsog - self.options=config.options - InputWebserver.__init__(self,config=config) - self.language="en" - self.wikiId="or" - self.markup_name=None - + config = SkgBrowser.get_config() + self.sotsog = config.sotsog + self.options = config.options + InputWebserver.__init__(self, config=config) + self.language = "en" + self.wikiId = "or" + self.markup_name = None + def configure_run(self): - self.markup_names=["-","bibtex","scite","smw"] - self.markup_name=self.markup_names[1] + self.markup_names = ["-", "bibtex", "scite", "smw"] + self.markup_name = self.markup_names[1] # wiki users - self.wikiUsers=WikiUser.getWikiUsers() - self.wikiId=self.args.wikiId - wikidata=Wikidata() - self.sparql=wikidata.sparql - - @ui.page('/scholars') + self.wikiUsers = WikiUser.getWikiUsers() + self.wikiId = self.args.wikiId + wikidata = Wikidata() + self.sparql = wikidata.sparql + + @ui.page("/scholars") async def scholars(client: Client): return await self.scholars(client) - + def configure_menu(self): """ configure additional non-standard menu entries """ - #self.link_button(name='Scholars',icon_name='account-school',target='/scholars') + # self.link_button(name='Scholars',icon_name='account-school',target='/scholars') pass - - def createItemLink(self,item,term:str,index:int)->str: + + def createItemLink(self, item, term: str, index: int) -> str: """ create a link for the given item - + Args: item(Node): the item to create a link for term(str): the """ - if index>0: - style="color:grey" - text=f"{term}{index+1}" - delim=" " + if index > 0: + style = "color:grey" + text = f"{term}{index+1}" + delim = " " else: - style="" - text=term - delim="" - link=Link.create(item.browser_url(),text,tooltip=item.label,target="_blank",style=style) - if item.concept.name=="Scholar": - if hasattr(item,"orcid"): - orcid=ORCID(item.orcid) - link+=orcid.asHtml() - markup=delim+link - return markup - - async def onSearchButton(self,_msg): + style = "" + text = term + delim = "" + link = Link.create( + item.browser_url(), text, tooltip=item.label, target="_blank", style=style + ) + if item.concept.name == "Scholar": + if hasattr(item, "orcid"): + orcid = ORCID(item.orcid) + link += orcid.asHtml() + markup = delim + link + return markup + + async def onSearchButton(self, _msg): """ handle button to search for terms """ try: - self.results.content="" - self.markup.content="" - terms=self.searchTerms.value.split("\n") - self.messages.content="Searching" - delim="" + self.results.content = "" + self.markup.content = "" + terms = self.searchTerms.value.split("\n") + self.messages.content = "Searching" + delim = "" for term in terms: if term: - msg=f"... {term}\n" - self.messages.content+=msg - if self.markup_name=="-": - self.options.markup_names=[] + msg = f"... {term}\n" + self.messages.content += msg + if self.markup_name == "-": + self.options.markup_names = [] else: - self.options.markup_names=[self.markup_name] - search_result=self.sotsog.search([term],self.options) - items=search_result.items - rmarkup="" - if len(items)==0: + self.options.markup_names = [self.markup_name] + search_result = self.sotsog.search([term], self.options) + items = search_result.items + rmarkup = "" + if len(items) == 0: # TODO check google search # https://pypi.org/project/googlesearch-python/ - params=parse.urlencode({'q':term}) - search_url=f"https://www.google.com/search?{params}" - rmarkup=Link.create(search_url, term, "not found", target="_blank",style="color:red") + params = parse.urlencode({"q": term}) + search_url = f"https://www.google.com/search?{params}" + rmarkup = Link.create( + search_url, + term, + "not found", + target="_blank", + style="color:red", + ) else: - for i,item in enumerate(items): - rmarkup+=self.createItemLink(item,term,i) - if len(item.markups)>0: - markups="" - for _markup_name,markup in item.markups.items(): - markups+=markup - self.markup.content+=f"
{markups}
" - #break - self.results.content+=delim+rmarkup - delim="
" - + for i, item in enumerate(items): + rmarkup += self.createItemLink(item, term, i) + if len(item.markups) > 0: + markups = "" + for _markup_name, markup in item.markups.items(): + markups += markup + self.markup.content += f"
{markups}
" + # break + self.results.content += delim + rmarkup + delim = "
" + except BaseException as ex: self.handle_exception(ex) - + def addLanguageSelect(self): """ add a language selector """ - lang_dict=Lang.get_language_dict() - self.add_select("language:",lang_dict).bind_value(self, "language") - + lang_dict = Lang.get_language_dict() + self.add_select("language:", lang_dict).bind_value(self, "language") + def addWikiUserSelect(self): """ add a wiki user selector """ - if len(self.wikiUsers)>0: - wu_dict={} + if len(self.wikiUsers) > 0: + wu_dict = {} for wikiUser in sorted(self.wikiUsers): - wu_dict[wikiUser]=wikiUser - self.add_select("wiki:",wu_dict).bind_value(self,"wikiId") - - async def scholars(self,client:Client): - ''' + wu_dict[wikiUser] = wikiUser + self.add_select("wiki:", wu_dict).bind_value(self, "wikiId") + + async def scholars(self, client: Client): + """ scholar display - - ''' + + """ self.setup_menu() with ui.element("div").classes("w-full h-full"): try: - self.scholarsGrid=ScholarGrid(self,self.wikiUsers,self.wikiId,sparql=self.sparql) + self.scholarsGrid = ScholarGrid( + self, self.wikiUsers, self.wikiId, sparql=self.sparql + ) # @TODO refactor the two setup calls to one to hide wdgrid details - #self.scholarsGrid.setup(a=self.rowB, header=self.rowA) - #self.scholarsGrid.wdgrid.setup(a=self.rowC) + # self.scholarsGrid.setup(a=self.rowB, header=self.rowA) + # self.scholarsGrid.wdgrid.setup(a=self.rowC) except BaseException as ex: - self.handle_exception(ex) + self.handle_exception(ex) await self.setup_footer() - + def configure_settings(self): """ configure settings """ self.addLanguageSelect() self.addWikiUserSelect() - - async def home(self,_client:Client): - ''' + + async def home(self, _client: Client): + """ provide the main content page - - ''' + + """ self.setup_menu() with ui.element("div").classes("w-full h-full"): with ui.splitter() as splitter: with splitter.before: - self.add_select("markup", self.markup_names).bind_value(self,"markup_name") - self.searchTerms=ui.textarea(placeholder="enter search terms") - self.searchButton=ui.button("search",on_click=self.onSearchButton) + self.add_select("markup", self.markup_names).bind_value( + self, "markup_name" + ) + self.searchTerms = ui.textarea(placeholder="enter search terms") + self.searchButton = ui.button( + "search", on_click=self.onSearchButton + ) with splitter.after: - self.markup=ui.html() - self.messages=ui.html() - self.results=ui.html() - await self.setup_footer() \ No newline at end of file + self.markup = ui.html() + self.messages = ui.html() + self.results = ui.html() + await self.setup_footer() diff --git a/skg/smw.py b/skg/smw.py index 7d66ee8..c067f56 100644 --- a/skg/smw.py +++ b/skg/smw.py @@ -1,37 +1,48 @@ -''' +""" Created on 22.11.2022 @author: wf -''' -from wikibot3rd.wikiuser import WikiUser -from wikibot3rd.wikiclient import WikiClient +""" from wikibot3rd.smw import SMWClient +from wikibot3rd.wikiclient import WikiClient +from wikibot3rd.wikiuser import WikiUser + from skg.wikidata import Wikidata + + class SemWiki: """ access to Semantic mediawiki """ - - def __init__(self,wikiUser:WikiUser,withLogin:bool=None): + + def __init__(self, wikiUser: WikiUser, withLogin: bool = None): """ - + constructor - + Args: wikiUser:WikiUser """ - self.wikiUser=wikiUser - self.wikiClient=WikiClient.ofWikiId(wikiUser.wikiId) + self.wikiUser = wikiUser + self.wikiClient = WikiClient.ofWikiId(wikiUser.wikiId) if withLogin is None: - withLogin=self.wikiClient.needsLogin() + withLogin = self.wikiClient.needsLogin() if withLogin: self.wikiClient.login() - self.smw=SMWClient(self.wikiClient.getSite()) - - def id_refs(self,mainlabel="pageTitle",condition="DOI::+",title:str="DOI references",askExtra:str="",id_prop="DOI",id_name="doi")->list: + self.smw = SMWClient(self.wikiClient.getSite()) + + def id_refs( + self, + mainlabel="pageTitle", + condition="DOI::+", + title: str = "DOI references", + askExtra: str = "", + id_prop="DOI", + id_name="doi", + ) -> list: """ get a list of id references from the given wiki - + Args: mainlabel(str): the mainlabel to use condition(str): the condition to apply @@ -45,24 +56,26 @@ def id_refs(self,mainlabel="pageTitle",condition="DOI::+",title:str="DOI referen |?Last_editor_is=lastEditor }}}} """ - refs=self.smw.query(ask,title) + refs = self.smw.query(ask, title) return refs - + def papers(self): """ get the paper records """ - askExtra="""\n|?Citation_text=reference""" - paper_records=self.id_refs(condition="Citation_text::+",title="doi paper referencs", askExtra=askExtra) + askExtra = """\n|?Citation_text=reference""" + paper_records = self.id_refs( + condition="Citation_text::+", title="doi paper referencs", askExtra=askExtra + ) return paper_records - + def scholars(self): """ get scholars """ - condition="Concept:Scholar" - mainlabel="Scholar" - askExtra="""|?Scholar wikiDataId = wikiDataId + condition = "Concept:Scholar" + mainlabel = "Scholar" + askExtra = """|?Scholar wikiDataId = wikiDataId |?Scholar name = name |?Scholar firstName = firstName |?Scholar description = description @@ -75,36 +88,42 @@ def scholars(self): |?Scholar smartCRMId = smartCRMId |sort=Scholar name,Scholar firstName |order=ascending,ascending -""" - scholars=self.id_refs(mainlabel, condition, "scholars", askExtra, "Scholar wikiDataId", "wikiDataId") +""" + scholars = self.id_refs( + mainlabel, + condition, + "scholars", + askExtra, + "Scholar wikiDataId", + "wikiDataId", + ) return scholars - + @classmethod - def asMarkup(self,scholar)->str: + def asMarkup(self, scholar) -> str: """ return the markup for the given scholar - + Args: scholar(Node): the scholar Returns: str: the semantic mediawiki markup """ - markup="{{Scholar" - - for prop_name,prop in scholar.concept.props.items(): + markup = "{{Scholar" + + for prop_name, prop in scholar.concept.props.items(): if prop.hasmap("smw"): - smw_prop=prop.getmap("smw") - if hasattr(scholar,prop_name): - value=getattr(scholar,prop_name) + smw_prop = prop.getmap("smw") + if hasattr(scholar, prop_name): + value = getattr(scholar, prop_name) # @TODO refactor - qid=Wikidata.getQid(value) - if value!=qid: + qid = Wikidata.getQid(value) + if value != qid: # potential lookup need - if prop_name!="wikiDataId": - value=Wikidata.getLabelForQid(qid) + if prop_name != "wikiDataId": + value = Wikidata.getLabelForQid(qid) else: - value=qid - markup+=f"\n|{smw_prop}={value}" - markup+="\n}}" + value = qid + markup += f"\n|{smw_prop}={value}" + markup += "\n}}" return markup - \ No newline at end of file diff --git a/skg/sotsog.py b/skg/sotsog.py index 340aae4..c883959 100644 --- a/skg/sotsog.py +++ b/skg/sotsog.py @@ -1,209 +1,251 @@ -''' +""" Created on 2022-11-16 @author: wf -''' +""" import sys import webbrowser from argparse import ArgumentParser -from skg.wdsearch import WikidataSearch -from skg.wikidata import Wikidata -from skg.smw import SemWiki -from skg.kg import SKG_Def -from skg.graph import Node -from skg.paper import Paper + +from ngwidgets.ngwidgets_cmd import WebserverCmd + +from skg.crossref import Crossref from skg.doi import DOI +from skg.graph import Node +from skg.kg import SKG_Def from skg.orcid import ORCID -from skg.crossref import Crossref -from skg.skgbrowser import SkgBrowser +from skg.paper import Paper from skg.search import SearchOptions, SearchResult -from ngwidgets.ngwidgets_cmd import WebserverCmd +from skg.skgbrowser import SkgBrowser +from skg.smw import SemWiki +from skg.wdsearch import WikidataSearch +from skg.wikidata import Wikidata +from skg.dblp2wikidata import Dblp2Wikidata class SotSog(WebserverCmd): """ - Standing on the shoulders of giants + Standing on the shoulders of giants """ - + def __init__(self): """ constructor - + """ - self.config=SkgBrowser.get_config() - self.config.sotsog=self + self.config = SkgBrowser.get_config() + self.config.sotsog = self WebserverCmd.__init__(self, self.config, SkgBrowser, DEBUG) - Node.debug=self.debug - self.wikipedia_url="https://en.wikipedia.org/wiki/Standing_on_the_shoulders_of_giants" - self.skg_def=SKG_Def() - self.scholar_concept=self.skg_def.concepts["Scholar"] - - def getMarkups(self,item,options:SearchOptions)->dict: + Node.debug = self.debug + self.wikipedia_url = ( + "https://en.wikipedia.org/wiki/Standing_on_the_shoulders_of_giants" + ) + self.skg_def = SKG_Def() + self.scholar_concept = self.skg_def.concepts["Scholar"] + + def getMarkups(self, item, options: SearchOptions) -> dict: """ get the markups for the given item and search options - + Args: item(Node): the item to get the markup for options(SearchOptions): the search options to apply """ - markups={} - do_markup=len(options.markup_names)>0 + markups = {} + do_markup = len(options.markup_names) > 0 if do_markup: - if item.concept.name=="Paper": - doi=getattr(item, "doi",None) + if item.concept.name == "Paper": + doi = getattr(item, "doi", None) if doi is not None: - crossref=Crossref() + crossref = Crossref() if "bibtex" in options.markup_names: - bibentry=crossref.doiBibEntry([doi]) - markups["bibtex"]=bibentry + bibentry = crossref.doiBibEntry([doi]) + markups["bibtex"] = bibentry if "scite" in options.markup_names: - #meta_data=crossref.doiMetaData([doi]) - #scite_entry=crossref.asScite(meta_data) + # meta_data=crossref.doiMetaData([doi]) + # scite_entry=crossref.asScite(meta_data) if not hasattr(item, "doi_obj"): item.fromDOI(doi) - scite_entry=item.doi_obj.asScite() - markups["scite"]=scite_entry - if item.concept.name=="Scholar": + scite_entry = item.doi_obj.asScite() + markups["scite"] = scite_entry + if item.concept.name == "Scholar": if "smw" in options.markup_names: - markups["smw"]=SemWiki.asMarkup(item) + markups["smw"] = SemWiki.asMarkup(item) return markups - - def wd_search(self,wd:Wikidata,search_term:str,options)->list: + + def wd_search(self, wd: Wikidata, search_term: str, options) -> list: """ do a wikidata search """ - items=[] - wds=WikidataSearch(language=options.lang,debug=self.debug) - search_options=wds.searchOptions(search_term,limit=options.limit) - qids=[] - for qid,itemLabel,desc in search_options: + items = [] + wds = WikidataSearch(language=options.lang, debug=self.debug) + search_options = wds.searchOptions(search_term, limit=options.limit) + qids = [] + for qid, itemLabel, desc in search_options: qids.append(qid) - class_map=wd.getClassQids(qids) - for qid,itemLabel,desc in search_options: + class_map = wd.getClassQids(qids) + for qid, itemLabel, desc in search_options: if qid in class_map: - class_rows=class_map[qid] + class_rows = class_map[qid] for class_row in class_rows: - class_qid=class_row["class_qid"] - concept=self.skg_def.conceptForQid(class_qid) + class_qid = class_row["class_qid"] + concept = self.skg_def.conceptForQid(class_qid) if concept is not None: - wd_items=concept.cls.from_wikidata_via_id(concept,"wikiDataId", qid, lang=options.lang) - if len(wd_items)>0: - item=wd_items[0] + wd_items = concept.cls.from_wikidata_via_id( + concept, "wikiDataId", qid, lang=options.lang + ) + if len(wd_items) > 0: + item = wd_items[0] items.append(item) - self.handleItem(item,qid,itemLabel,desc,options) - return items - - def handleItem(self,item,item_id,itemLabel,desc,options): + self.handleItem(item, qid, itemLabel, desc, options) + return items + + def handleItem(self, item, item_id, itemLabel, desc, options): """ handle the given item as a search result """ if options.show: print(f"{itemLabel}({item_id}):{desc}✅") print(item) - item.markups=self.getMarkups(item,options) + item.markups = self.getMarkups(item, options) if options.show: - for markup_name,markup in item.markups.items(): + for markup_name, markup in item.markups.items(): print(f"{markup_name} markup:") print(markup) pass if options.open_browser: - browser_url=item.browser_url() + browser_url = item.browser_url() if browser_url is not None: print(f"opening {browser_url} in browser") webbrowser.open(browser_url) - def handleItems(self,items,options): + def handleItems(self, items, options): """ handle the given items """ for item in items: - item_id=item.wikiDataId - itemLabel=item.label - desc="?" + item_id = item.wikiDataId + itemLabel = item.label + desc = "?" self.handleItem(item, item_id, itemLabel, desc, options) - - def handleDoiItem(self,item,options:SearchOptions): - item_id=item.doi - itemLabel=item.title - desc=item.title + + def handleDoiItem(self, item, options: SearchOptions): + item_id = item.doi + itemLabel = item.title + desc = item.title self.handleItem(item, item_id, itemLabel, desc, options) - - def search(self,search_list,options:SearchOptions)->SearchResult: + + def search(self, search_list, options: SearchOptions) -> SearchResult: """ search with the given search list - + Args: search_list(list): a list of search terms options(SearchOptions): the search options to apply """ - search_result=SearchResult(search_list,options) - search_term=' '.join(search_list) + search_result = SearchResult(search_list, options) + search_term = " ".join(search_list) for prefix in ["https://doi.org"]: if search_term.startswith(prefix): - search_term=search_term.replace(prefix,"") - wd=Wikidata(debug=self.debug) + search_term = search_term.replace(prefix, "") + wd = Wikidata(debug=self.debug) if ORCID.isORCID(search_term): - scholar_concept=self.skg_def.concepts["Scholar"] - items=Node.from_wikidata_via_id(scholar_concept, "orcid", search_term, options.lang) - self.handleItems(items,options) + scholar_concept = self.skg_def.concepts["Scholar"] + items = Node.from_wikidata_via_id( + scholar_concept, "orcid", search_term, options.lang + ) + self.handleItems(items, options) elif DOI.isDOI(search_term): # DOI may not be referencing paper but something else - paper_concept=self.skg_def.concepts["Paper"] - items=Paper.from_wikidata_via_id(paper_concept, "doi", search_term, options.lang) - self.handleItems(items,options) - dblp_items=Paper.from_dblp_via_id(paper_concept, "doi", search_term.lower()) - if len(dblp_items)==0: - paper=Paper() - paper.concept=paper_concept + paper_concept = self.skg_def.concepts["Paper"] + items = Paper.from_wikidata_via_id( + paper_concept, "doi", search_term, options.lang + ) + self.handleItems(items, options) + dblp_items = Paper.from_dblp_via_id( + paper_concept, "doi", search_term.lower() + ) + if len(dblp_items) == 0: + paper = Paper() + paper.concept = paper_concept paper.fromDOI(search_term) - paper.provenance="doi" - dblp_items=[paper] + paper.provenance = "doi" + dblp_items = [paper] for item in dblp_items: - self.handleDoiItem(item,options) + self.handleDoiItem(item, options) items.extend(dblp_items) else: - items=self.wd_search(wd,search_term,options) - search_result.items=items + items = self.wd_search(wd, search_term, options) + search_result.items = items return search_result - - def getArgParser(self,description:str,version_msg)->ArgumentParser: + + def getArgParser(self, description: str, version_msg) -> ArgumentParser: """ override the default argparser call - """ - parser=super().getArgParser(description, version_msg) - parser.add_argument('search', action='store', nargs='*', help="search terms") - parser.add_argument("--bibtex",help="output bibtex format",action="store_true") - parser.add_argument("-la", "--lang",help="language code to use",default="en") - parser.add_argument("-li", "--limit",help="limit the number of search results",type=int,default=9) - parser.add_argument("-nb","--nobrowser",help="do not open browser",action="store_true") - parser.add_argument("--scite",help="output #scite format",action="store_true") - parser.add_argument("--smw",help="output Semantic MediaWiki (SMW) format",action="store_true") - parser.add_argument("--wikiId",help="the id of the SMW wiki to connect with",default="ceur-ws") + """ + parser = super().getArgParser(description, version_msg) + parser.add_argument("search", action="store", nargs="*", help="search terms") + parser.add_argument( + "--bibtex", help="output bibtex format", action="store_true" + ) + parser.add_argument("-la", "--lang", help="language code to use", default="en") + parser.add_argument( + "-li", + "--limit", + help="limit the number of search results", + type=int, + default=9, + ) + parser.add_argument( + "-nb", "--nobrowser", help="do not open browser", action="store_true" + ) + parser.add_argument("--scite", help="output #scite format", action="store_true") + parser.add_argument( + "--smw", help="output Semantic MediaWiki (SMW) format", action="store_true" + ) + parser.add_argument( + "--wikiId", help="the id of the SMW wiki to connect with", default="ceur-ws" + ) + parser.add_argument("-dw","--dblp2wikidata", action="store_true", help="Transfer DBLP entries to Wikidata") + return parser - - def handle_args(self)->bool: - markup_names=[] - args=self.args - if args.bibtex: markup_names.append("bibtex") - if args.scite: markup_names.append("scite") - if args.smw: markup_names.append("smw") - self.config.options=SearchOptions(limit=args.limit,lang=args.lang, - markup_names=markup_names, - open_browser=not args.nobrowser) - handled=super().handle_args() + + def handle_args(self) -> bool: + """ + handle the command line args + """ + markup_names = [] + args = self.args + if args.bibtex: + markup_names.append("bibtex") + if args.scite: + markup_names.append("scite") + if args.smw: + markup_names.append("smw") + self.config.options = SearchOptions( + limit=args.limit, + lang=args.lang, + markup_names=markup_names, + open_browser=not args.nobrowser, + ) + handled = super().handle_args() if not handled: - self.search(args.search,self.config.options) - handled=True - return handled - - -def main(argv:list=None): + if args.dblp2wikidata: + d2w=Dblp2Wikidata() + d2w.transfer(args) + self.search(args.search, self.config.options) + handled = True + return handled + + +def main(argv: list = None): """ main call """ - cmd=SotSog() - exit_code=cmd.cmd_main(argv) + cmd = SotSog() + exit_code = cmd.cmd_main(argv) return exit_code - + + DEBUG = 0 if __name__ == "__main__": if DEBUG: diff --git a/skg/version.py b/skg/version.py index 6d03d4c..4e0dbc5 100644 --- a/skg/version.py +++ b/skg/version.py @@ -1,30 +1,32 @@ -''' +""" Created on 2022-04-01 @author: wf -''' +""" import skg + class Version(object): - ''' + """ Version handling for pysotsog - ''' - name="pysotsog" - description='sotsog: Standing on the shoulders of giants - with direct access to the clouds' - version=skg.__version__ - date = '2022-11-16' - updated = '2023-10-28' - authors='Wolfgang Fahl' - doc_url="https://wiki.bitplan.com/index.php/Pysotsog" - chat_url="https://github.com/WolfgangFahl/pysotsog/discussions" - cm_url="https://github.com/WolfgangFahl/pysotsog" - license=f'''Copyright 2022 contributors. All rights reserved. + """ + + name = "pysotsog" + description = ( + "sotsog: Standing on the shoulders of giants - with direct access to the clouds" + ) + version = skg.__version__ + date = "2022-11-16" + updated = "2023-10-28" + authors = "Wolfgang Fahl" + doc_url = "https://wiki.bitplan.com/index.php/Pysotsog" + chat_url = "https://github.com/WolfgangFahl/pysotsog/discussions" + cm_url = "https://github.com/WolfgangFahl/pysotsog" + license = f"""Copyright 2022 contributors. All rights reserved. Licensed under the Apache License 2.0 http://www.apache.org/licenses/LICENSE-2.0 Distributed on an "AS IS" basis without warranties - or conditions of any kind, either express or implied.''' - longDescription=f"""{name} version {version} + or conditions of any kind, either express or implied.""" + longDescription = f"""{name} version {version} {description} Created by {authors} on {date} last updated {updated}""" - - \ No newline at end of file diff --git a/skg/wdsearch.py b/skg/wdsearch.py index 56c7f1a..1ce392b 100644 --- a/skg/wdsearch.py +++ b/skg/wdsearch.py @@ -1,78 +1,85 @@ -''' +""" Created on 24.07.2022 @author: wf -''' +""" import json import os -import urllib.request import urllib.parse +import urllib.request + class WikidataSearch(object): - ''' + """ Wikidata Search - ''' + """ - def __init__(self,language='en',timeout=2.0,debug:bool=False): - ''' + def __init__(self, language="en", timeout=2.0, debug: bool = False): + """ Constructor - + Args: language(str): the language to use e.g. en/fr timeout(float): maximum time to wait for result debug(bool): if True debug details should be shown - ''' - self.language=language - self.timeout=timeout - self.debug=debug - - def searchOptions(self,searchFor:str,limit:int=9)->list: - ''' + """ + self.language = language + self.timeout = timeout + self.debug = debug + + def searchOptions(self, searchFor: str, limit: int = 9) -> list: + """ search and return a list of qid,itemLabel description tuples - + Args: searchFor(str): the string to search for limit(int): the maximum amount of results to search for - ''' - options=[] - srlist=self.search(searchFor, limit) + """ + options = [] + srlist = self.search(searchFor, limit) if srlist is not None: for sr in srlist: - qid=sr["id"] - itemLabel=sr["label"] - desc="" + qid = sr["id"] + itemLabel = sr["label"] + desc = "" if "display" in sr: - display=sr["display"] + display = sr["display"] if "description" in display: - desc=display["description"]["value"] - options.append((qid,itemLabel,desc,)) + desc = display["description"]["value"] + options.append( + ( + qid, + itemLabel, + desc, + ) + ) return options - def search(self,searchFor:str,limit:int=9): - ''' - + def search(self, searchFor: str, limit: int = 9): + """ + Args: searchFor(str): the string to search for limit(int): the maximum amount of results to search for - ''' + """ try: - apiurl=f"https://www.wikidata.org/w/api.php?action=wbsearchentities&language={self.language}&format=json&limit={limit}&search=" + apiurl = f"https://www.wikidata.org/w/api.php?action=wbsearchentities&language={self.language}&format=json&limit={limit}&search=" if self.debug: print(apiurl) - searchEncoded=urllib.parse.quote_plus(searchFor) - apisearch=apiurl+searchEncoded - with urllib.request.urlopen(apisearch,timeout=self.timeout) as url: + searchEncoded = urllib.parse.quote_plus(searchFor) + apisearch = apiurl + searchEncoded + with urllib.request.urlopen(apisearch, timeout=self.timeout) as url: searchResult = json.loads(url.read().decode()) return searchResult["search"] except Exception as _error: return None - + def getProperties(self): - ''' - get the Wikidata Properties - ''' - scriptdir=os.path.dirname(__file__) - jsonPath=f"{scriptdir}/resources/wdprops.json" + """ + get the Wikidata Properties + """ + scriptdir = os.path.dirname(__file__) + jsonPath = f"{scriptdir}/resources/wdprops.json" with open(jsonPath) as jsonFile: - props=json.load(jsonFile) - return props \ No newline at end of file + props = json.load(jsonFile) + return props diff --git a/skg/wikidata.py b/skg/wikidata.py index 3d0d8c4..1d3fe4f 100644 --- a/skg/wikidata.py +++ b/skg/wikidata.py @@ -1,67 +1,73 @@ -''' +""" Created on 2022-11-16 @author: wf -''' -from lodstorage.sparql import SPARQL +""" from lodstorage.lod import LOD +from lodstorage.sparql import SPARQL + + class Wikidata: """ Wikidata access wrapper """ - instance=None - def __init__(self,endpoint:str="https://query.wikidata.org/sparql",debug:bool=False): + + instance = None + + def __init__( + self, endpoint: str = "https://query.wikidata.org/sparql", debug: bool = False + ): """ constructor """ - self.endpoint=endpoint + self.endpoint = endpoint self.sparql = SPARQL(endpoint) - self.debug=debug - Wikidata.instance=self - + self.debug = debug + Wikidata.instance = self + @classmethod def getInstance(cls): if cls.instance is None: Wikidata() return cls.instance - + @classmethod - def getQid(self,wd_url:str): - qid=wd_url.replace("http://www.wikidata.org/entity/","") + def getQid(self, wd_url: str): + qid = wd_url.replace("http://www.wikidata.org/entity/", "") return qid - + @classmethod - def getLabelForQid(self,qid:str,lang:str="en")->str: + def getLabelForQid(self, qid: str, lang: str = "en") -> str: """ get a label for the given Wikidata QID - + Args: qid(str): the Wikidata ID lang(str): the language """ - sparql_query=f"""SELECT ?itemLabel WHERE {{ + sparql_query = f"""SELECT ?itemLabel WHERE {{ VALUES ?item {{ wd:{qid} }} ?item rdfs:label ?itemLabel. FILTER(LANG(?itemLabel)="{lang}"). }}""" - wd=Wikidata.getInstance() - lod=wd.sparql.queryAsListOfDicts(sparql_query) - label=None - if len(lod)==1: - label=lod[0]["itemLabel"] + wd = Wikidata.getInstance() + lod = wd.sparql.queryAsListOfDicts(sparql_query) + label = None + if len(lod) == 1: + label = lod[0]["itemLabel"] return label - - def getClassQids(self,qids:list)->dict: + + def getClassQids(self, qids: list) -> dict: """ - get the Wikidata Q-Identifiers + get the Wikidata Q-Identifiers for the given wikidata ids - + Args: qids(list): the list of id """ - sparql_query=f"""# get the instanceof values for a given entity + sparql_query = f"""# get the instanceof values for a given entity SELECT ?item ?itemLabel ?qid ?class_qid ?class ?classLabel WHERE {{ @@ -69,11 +75,11 @@ def getClassQids(self,qids:list)->dict: """ for qid in qids: if not qid.startswith("http:"): - wd_url=f"http://www.wikidata.org/entity/{qid}" + wd_url = f"http://www.wikidata.org/entity/{qid}" else: - wd_url=qid - sparql_query+=f" <{wd_url}>\n" - sparql_query+=f"""}} + wd_url = qid + sparql_query += f" <{wd_url}>\n" + sparql_query += f"""}} ?item wdt:P31/wdt:P279* ?class. ?item rdfs:label ?itemLabel FILTER(LANG(?itemLabel)="en") @@ -84,6 +90,6 @@ def getClassQids(self,qids:list)->dict: }}""" if self.debug: print(sparql_query) - class_rows=self.sparql.queryAsListOfDicts(sparql_query) - class_map=LOD.getLookup(class_rows, "qid", withDuplicates=True) + class_rows = self.sparql.queryAsListOfDicts(sparql_query) + class_map = LOD.getLookup(class_rows, "qid", withDuplicates=True) return class_map diff --git a/tests/base_skg_test.py b/tests/base_skg_test.py index 5a5b8f2..1fe6238 100644 --- a/tests/base_skg_test.py +++ b/tests/base_skg_test.py @@ -4,6 +4,7 @@ @author: wf """ from ngwidgets.basetest import Basetest + from skg.kg import SKG_Def diff --git a/tests/basetest.py b/tests/basetest.py index 8b157a1..f7e796c 100644 --- a/tests/basetest.py +++ b/tests/basetest.py @@ -5,10 +5,12 @@ """ import getpass -from unittest import TestCase import os +from unittest import TestCase + from skg.profiler import Profiler + class Basetest(TestCase): """ base test case diff --git a/tests/testScholarGrid.py b/tests/testScholarGrid.py index bced9ab..37b452c 100644 --- a/tests/testScholarGrid.py +++ b/tests/testScholarGrid.py @@ -4,9 +4,10 @@ @author: wf """ from ngwidgets.basetest import Basetest -from skg.scholargrid import ScholarGrid, ScholarQuery -from wikibot3rd.wikiuser import WikiUser from spreadsheet.wbquery import WikibaseQuery +from wikibot3rd.wikiuser import WikiUser + +from skg.scholargrid import ScholarGrid, ScholarQuery from skg.wikidata import Wikidata diff --git a/tests/test_crossref.py b/tests/test_crossref.py index bf2624f..b16bea7 100644 --- a/tests/test_crossref.py +++ b/tests/test_crossref.py @@ -3,11 +3,13 @@ @author: wf """ -from ngwidgets.basetest import Basetest -from skg.crossref import Crossref import json from dataclasses import dataclass +from ngwidgets.basetest import Basetest + +from skg.crossref import Crossref + @dataclass class Example: diff --git a/tests/test_dblp.py b/tests/test_dblp.py index 191c648..15f63e4 100644 --- a/tests/test_dblp.py +++ b/tests/test_dblp.py @@ -1,27 +1,29 @@ -''' +""" Created on 2022-11-17 @author: wf -''' -from tests.base_skg_test import BaseSkgTest +""" +import json + from skg.dblp import Dblp from skg.graph import Node -import json +from tests.base_skg_test import BaseSkgTest + class TestDblp(BaseSkgTest): """ test dblp access """ - + def setUp(self, debug=False, profile=True): BaseSkgTest.setUp(self, debug=debug, profile=profile) - self.dblp=Dblp() - + self.dblp = Dblp() + def test_dblp_papers(self): """ test dblp paper access """ - sparql_query=""" + sparql_query = """ PREFIX dblp: SELECT ?paper @@ -42,73 +44,78 @@ def test_dblp_papers(self): LIMIT 10 """ # rows since the query above returns truly tabular results - paper_rows=self.dblp.sparql.queryAsListOfDicts(sparql_query) - debug=self.debug - debug=True + paper_rows = self.dblp.sparql.queryAsListOfDicts(sparql_query) + debug = self.debug + debug = True if debug: for row in paper_rows: print(row) - + def test_dblp_schema(self): """ test loading the dblp schema """ - schema=self.dblp.schema - schema.loadSchema(formats="n3,json-ld") # xml - classes=schema.toClasses() - debug=self.debug - debug=True + schema = self.dblp.schema + schema.loadSchema(formats="n3,json-ld") # xml + classes = schema.toClasses() + debug = self.debug + debug = True if debug: - print(json.dumps(classes,indent=2)) - classes=classes["classes"] + print(json.dumps(classes, indent=2)) + classes = classes["classes"] self.assertTrue("Entity" in classes) - entity=classes["Entity"] + entity = classes["Entity"] self.assertTrue("@subClassOf" in entity) - self.assertEqual("Thing",entity["@subClassOf"]) - + self.assertEqual("Thing", entity["@subClassOf"]) + def test_uml(self): """ test getting uml markup """ - schema=self.dblp.schema + schema = self.dblp.schema schema.loadSchema() - uml_markup=schema.toPlantUml() - debug=True + uml_markup = schema.toPlantUml() + debug = True if debug: print(uml_markup) - + def test_dblp_item_via_id_search(self): """ test getting papers by id from dblp """ - debug=self.debug - debug=True - paper_concept=self.skg_def.concepts["Paper"] - author_concept=self.skg_def.concepts["Scholar"] - id_examples=[ + debug = self.debug + debug = True + paper_concept = self.skg_def.concepts["Paper"] + author_concept = self.skg_def.concepts["Scholar"] + id_examples = [ { "id_name": "orcid", "id_value": "0000-0003-1279-3709", - "concept": author_concept + "concept": author_concept, }, { "id_name": "doi", "id_value": "10.1007/978-3-031-19433-7_21", - "concept": paper_concept - } + "concept": paper_concept, + }, ] - - def checkItem(item:Node,id_name:str,id_value:str,debug:bool=False): + + def checkItem(item: Node, id_name: str, id_value: str, debug: bool = False): """ check the given item - + Args: item(Node): the item to check id_name(str): the name of the id used to retrieve the item id_value(str) the value that has been used to retriebe the item debug(bool): if True show debug information """ - if id_name=="doi": - self.assertEqual(f"http://dx.doi.org/{id_value}",item.doi) - - self.check_id_examples(id_examples, createFunc=Node.from_dblp_via_id,checkItem=checkItem,debug=debug) \ No newline at end of file + if id_name == "doi": + self.assertEqual(f"http://dx.doi.org/{id_value}", item.doi) + + self.check_id_examples( + id_examples, + createFunc=Node.from_dblp_via_id, + checkItem=checkItem, + debug=debug, + ) diff --git a/tests/test_dblp2wikidata.py b/tests/test_dblp2wikidata.py new file mode 100644 index 0000000..17fa572 --- /dev/null +++ b/tests/test_dblp2wikidata.py @@ -0,0 +1,27 @@ +''' +Created on 2024-02-26 + +@author: wf +''' +from ngwidgets.basetest import Basetest +from skg.dblp2wikidata import Dblp2Wikidata +from argparse import Namespace + +class TestDblp2Wikidata(Basetest): + """ + test Dblp2Wikidata utility + """ + + def setUp(self, debug=True, profile=True): + Basetest.setUp(self, debug=debug, profile=profile) + self.d2w = Dblp2Wikidata(debug=debug) + + def test_transfer(self): + """ + Test the transfer method for a known DBLP entry + """ + test_search_terms=["82/6542","Donald C. Gause"] + for test_search_term in test_search_terms: + test_args = Namespace(dblp2wikidata=test_search_term) + self.d2w.transfer(test_args) + \ No newline at end of file diff --git a/tests/test_doi.py b/tests/test_doi.py index e88a8bf..73bc4a0 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -3,12 +3,14 @@ @author: wf """ -from ngwidgets.basetest import Basetest -from skg.doi import DOI -from skg.dblp import Dblp +import json from dataclasses import dataclass from unittest import IsolatedAsyncioTestCase -import json + +from ngwidgets.basetest import Basetest + +from skg.dblp import Dblp +from skg.doi import DOI @dataclass diff --git a/tests/test_location.py b/tests/test_location.py index f0eba7b..6cedc1b 100644 --- a/tests/test_location.py +++ b/tests/test_location.py @@ -1,35 +1,36 @@ -''' +""" Created on 2022-11-21 @author: wf -''' -from tests.base_skg_test import BaseSkgTest +""" from skg.graph import Node from skg.location import Country +from tests.base_skg_test import BaseSkgTest + class TestLocation(BaseSkgTest): """ test concerning the scholar/author concept """ - + def test_locations_by_wikidata_id(self): """ - test searching locations + test searching locations """ - country_concept=self.skg_def.concepts["Country"] - id_examples=[ + country_concept = self.skg_def.concepts["Country"] + id_examples = [ { "id_name": "iso_code", "id_value": "SG", "concept": country_concept, - "example": Country.getSamples()[0] + "example": Country.getSamples()[0], }, ] - - def checkItem(item:Node,id_name:str,id_value:str,debug:bool=False): + + def checkItem(item: Node, id_name: str, id_value: str, debug: bool = False): """ check the given item - + Args: item(Node): the item to check id_name(str): the name of the id used to retrieve the item @@ -37,12 +38,16 @@ def checkItem(item:Node,id_name:str,id_value:str,debug:bool=False): debug(bool): if True show debug information """ # @TODO check against id_example - self.assertEqual("SG",item.iso_code) - self.assertEqual(5866139.0,item.population) + self.assertEqual("SG", item.iso_code) + self.assertEqual(5866139.0, item.population) self.assertTrue("Q334" in item.wikiDataId) pass - - - debug=self.debug - debug=True - self.check_id_examples(id_examples, createFunc=Node.from_wikidata_via_id,checkItem=checkItem,debug=debug) + + debug = self.debug + debug = True + self.check_id_examples( + id_examples, + createFunc=Node.from_wikidata_via_id, + checkItem=checkItem, + debug=debug, + ) diff --git a/tests/test_orcid.py b/tests/test_orcid.py index 289b778..2d31892 100644 --- a/tests/test_orcid.py +++ b/tests/test_orcid.py @@ -3,9 +3,11 @@ @author: wf """ +import json + from ngwidgets.basetest import Basetest + from skg.orcid import ORCID -import json class TestORCID(Basetest): diff --git a/tests/test_scholar.py b/tests/test_scholar.py index 312c2b4..da749dd 100644 --- a/tests/test_scholar.py +++ b/tests/test_scholar.py @@ -1,68 +1,73 @@ -''' +""" Created on 2022-11-16 @author: wf -''' -from tests.base_skg_test import BaseSkgTest -from skg.scholar import Scholar +""" from skg.graph import Node +from skg.scholar import Scholar from skg.smw import SemWiki +from tests.base_skg_test import BaseSkgTest + class TestScholar(BaseSkgTest): """ test concerning the scholar/author concept """ - + def test_scholar_by_wikidata_id(self): """ test searching a scholar/author by ORCID,dblpId or wikiDataId from wikidata """ - author_concept=self.skg_def.concepts["Scholar"] - id_examples=[ + author_concept = self.skg_def.concepts["Scholar"] + id_examples = [ { "id_name": "orcid", "id_value": "0000-0003-1279-3709", - "concept": author_concept + "concept": author_concept, }, { "id_name": "dblpId", "id_value": "b/TimBernersLee", - "concept": author_concept + "concept": author_concept, }, - { - "id_name": "wikiDataId", - "id_value": "Q80", - "concept": author_concept - }, + {"id_name": "wikiDataId", "id_value": "Q80", "concept": author_concept}, ] - - def checkItem(scholar:Scholar,id_name:str,id_value:str,debug:bool=False): + + def checkItem( + scholar: Scholar, id_name: str, id_value: str, debug: bool = False + ): """ check the given item - + Args: item(Node): the item to check id_name(str): the name of the id used to retrieve the item id_value(str) the value that has been used to retriebe the item debug(bool): if True show debug information """ - self.assertEqual("Tim Berners-Lee",scholar.label) - self.assertEqual("https://scholia.toolforge.org/author/Q80",scholar.scholia_url()) - - debug=self.debug - debug=True - self.check_id_examples(id_examples, createFunc=Node.from_wikidata_via_id,checkItem=checkItem,debug=debug) + self.assertEqual("Tim Berners-Lee", scholar.label) + self.assertEqual( + "https://scholia.toolforge.org/author/Q80", scholar.scholia_url() + ) + + debug = self.debug + debug = True + self.check_id_examples( + id_examples, + createFunc=Node.from_wikidata_via_id, + checkItem=checkItem, + debug=debug, + ) def test_smw_markup(self): """ test Semantic MediaWiki markup for a scholar """ - orcids=["0000-0002-4030-0978"] - author_concept=self.skg_def.concepts["Scholar"] + orcids = ["0000-0002-4030-0978"] + author_concept = self.skg_def.concepts["Scholar"] for orcid in orcids: - scholars=Node.from_wikidata_via_id(author_concept, "orcid", orcid) - scholar=scholars[0] - markup=SemWiki.asMarkup(scholar) - print (markup) - \ No newline at end of file + scholars = Node.from_wikidata_via_id(author_concept, "orcid", orcid) + scholar = scholars[0] + markup = SemWiki.asMarkup(scholar) + print(markup) diff --git a/tests/test_searchengine.py b/tests/test_searchengine.py index 6e78a95..cd01587 100644 --- a/tests/test_searchengine.py +++ b/tests/test_searchengine.py @@ -3,12 +3,14 @@ @author: wf """ -from ngwidgets.basetest import Basetest -from skg.searchengine import InternetSearch -from skg.dblp import Dblp import pprint from collections import Counter +from ngwidgets.basetest import Basetest + +from skg.dblp import Dblp +from skg.searchengine import InternetSearch + class TestSearchEngine(Basetest): """ diff --git a/tests/test_semantic_scholar.py b/tests/test_semantic_scholar.py index 773267b..5dff51c 100644 --- a/tests/test_semantic_scholar.py +++ b/tests/test_semantic_scholar.py @@ -3,9 +3,11 @@ @author: wf """ +import json + from ngwidgets.basetest import Basetest + from skg.semantic_scholar import SemanticScholar -import json class TestSemanticScholar(Basetest): diff --git a/tests/test_skg.py b/tests/test_skg.py index 85f6698..fcd0f62 100644 --- a/tests/test_skg.py +++ b/tests/test_skg.py @@ -4,6 +4,7 @@ @author: wf """ from ngwidgets.basetest import Basetest + from skg.kg import SKG_Def diff --git a/tests/test_smw.py b/tests/test_smw.py index cc1bc57..b9d56ba 100644 --- a/tests/test_smw.py +++ b/tests/test_smw.py @@ -3,12 +3,14 @@ @author: wf """ +import json +from collections import Counter + from ngwidgets.basetest import Basetest +from wikibot3rd.wikiuser import WikiUser + from skg.doi import DOI from skg.smw import SemWiki -from wikibot3rd.wikiuser import WikiUser -import json -from collections import Counter class TestSMW(Basetest): diff --git a/tests/test_sotsog.py b/tests/test_sotsog.py index a9a6e1e..1da4975 100644 --- a/tests/test_sotsog.py +++ b/tests/test_sotsog.py @@ -4,8 +4,9 @@ @author: wf """ from ngwidgets.basetest import Basetest -from skg.sotsog import SotSog + from skg.search import SearchOptions +from skg.sotsog import SotSog class TestSotsog(Basetest): diff --git a/tests/test_wdsearch.py b/tests/test_wdsearch.py index a8f98fa..8dc96f1 100644 --- a/tests/test_wdsearch.py +++ b/tests/test_wdsearch.py @@ -3,12 +3,14 @@ @author: wf """ +import json import unittest + +from lodstorage.lod import LOD from ngwidgets.basetest import Basetest + from skg.wdsearch import WikidataSearch from skg.wikidata import Wikidata -import json -from lodstorage.lod import LOD class TestWikidataSearch(Basetest):