Skip to content

Commit

Permalink
reformats code
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Feb 26, 2024
1 parent 8c7d443 commit 5c07e63
Show file tree
Hide file tree
Showing 42 changed files with 1,626 additions and 1,327 deletions.
7 changes: 7 additions & 0 deletions scripts/blackisort
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
# WF 2024-01-10
package=skg
isort tests/*.py
black tests/*.py
isort $package/*.py
black $package/*.py
115 changes: 59 additions & 56 deletions skg/citeproc.py
Original file line number Diff line number Diff line change
@@ -1,114 +1,117 @@
'''
"""
Created on 2022-12-21
@author: wf
'''
"""
import datetime


class Citeproc:
"""
see https://en.wikipedia.org/wiki/CiteProc
"""

@classmethod
def asScite(cls,meta_data:dict,retrieved_from:str)->str:
def asScite(cls, meta_data: dict, retrieved_from: str) -> str:
"""
convert the given meta data to #Scite format
see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php
Args:
meta_data(dict): the citeproc compatible metadata dict to convert
retrieved_from(str): the url the metadata was retrieved from
Returns:
str: Semantic Mediawiki markup
"""

def unlist(value):
if type(value)!=list:
if type(value) != list:
return value
text=""
delim=""
text = ""
delim = ""
for item in value:
text+=f"{delim}{item}"
delim=";"
if len(value)>1:
text+="|+sep=;"
text += f"{delim}{item}"
delim = ";"
if len(value) > 1:
text += "|+sep=;"
return text

def firstValue(value):
if type(value)!=list:
if type(value) != list:
return value
else:
return value[0]
def get_author(value)->str:

def get_author(value) -> str:
"""
get the author markup
Args:
value(list): the list to disassemble
Returns:
str: Mediawiki markup
"""
author=""
delim=""
author = ""
delim = ""
for arec in value:
if "given" in arec and "family" in arec:
author+= f"""{delim}{arec["given"]} {arec["family"]}"""
delim=";"
author += f"""{delim}{arec["given"]} {arec["family"]}"""
delim = ";"
elif "family" in arec:
author+= f"""{delim}{arec["family"]}"""
delim=";"
author += f"""{delim}{arec["family"]}"""
delim = ";"
else:
# incomplete author record ignored
pass
return author
timestamp=datetime.datetime.utcnow().strftime('%Y-%m-%d')
ref_type="journal-article"
title=meta_data["title"]

timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
ref_type = "journal-article"
title = meta_data["title"]
if type(title) is list:
title=title[0]
title_2=title.lower()[:2]
author_lower=""
title = title[0]
title_2 = title.lower()[:2]
author_lower = ""
if "author" in meta_data:
first_author=firstValue(meta_data["author"])
first_author = firstValue(meta_data["author"])
if "family" in first_author:
family=firstValue(first_author["family"])
author_lower=family.lower()
family = firstValue(first_author["family"])
author_lower = family.lower()
else:
# debug break point
pass
year=""
year = ""
if "published-print" in meta_data:
year=meta_data["published-print"]["date-parts"][0][0]
year = meta_data["published-print"]["date-parts"][0][0]
if not year and "issued" in meta_data:
year=meta_data["issued"]["date-parts"][0][0]
reference=f"{author_lower}{year}{title_2}"
markup=""
for skey,mkey,func in [
("title","title",unlist),
("subtitle","subtitle",unlist),
("authors","author",get_author),
("journal","container-title",unlist),
("publisher","publisher",str),
("issn","ISSN",unlist),
("subject","subject",unlist),
("volume","volume",str),
("pages","page",str),
("doi","DOI",str)
year = meta_data["issued"]["date-parts"][0][0]
reference = f"{author_lower}{year}{title_2}"
markup = ""
for skey, mkey, func in [
("title", "title", unlist),
("subtitle", "subtitle", unlist),
("authors", "author", get_author),
("journal", "container-title", unlist),
("publisher", "publisher", str),
("issn", "ISSN", unlist),
("subject", "subject", unlist),
("volume", "volume", str),
("pages", "page", str),
("doi", "DOI", str),
]:
if mkey in meta_data:
value=meta_data[mkey]
value = meta_data[mkey]
if value:
value=func(value)
markup+=f"\n|{skey}={value}"
markup=f"""{{{{#scite:
value = func(value)
markup += f"\n|{skey}={value}"
markup = f"""{{{{#scite:
|reference={reference}
|type={ref_type}{markup}
|year={year}
|retrieved-from={retrieved_from}
|retrieved-on={timestamp}
}}}}"""
full_markup=f"{title}\n[[CiteRef::{reference}]]\n{markup}"
full_markup = f"{title}\n[[CiteRef::{reference}]]\n{markup}"
return full_markup
53 changes: 29 additions & 24 deletions skg/crossref.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,63 @@
'''
"""
Created on 17.11.2022
@author: wf
'''
import skg
"""
import habanero
import habanero.cn as cn

import skg
from skg.citeproc import Citeproc


class Crossref:
"""
Crossref access
"""
def __init__(self,mailto=None,ua_string=None):

def __init__(self, mailto=None, ua_string=None):
"""
constructor
"""
if mailto is None:
mailto="[email protected]"
mailto = "[email protected]"
if ua_string is None:
ua_string=f"pysotsog/{skg.__version__} (https://pypi.org/project/pysotsog/; mailto:{mailto})"
#self.cr = habanero.Crossref(mailto=mailto,ua_string=ua_string)
self.cr = habanero.Crossref(ua_string="")
def doiMetaData(self, dois:list):
"""
ua_string = f"pysotsog/{skg.__version__} (https://pypi.org/project/pysotsog/; mailto:{mailto})"
# self.cr = habanero.Crossref(mailto=mailto,ua_string=ua_string)
self.cr = habanero.Crossref(ua_string="")

def doiMetaData(self, dois: list):
"""
get the meta data for the given dois
Args:
doi(list): a list of dois
"""
metadata = None
response = self.cr.works(ids=dois)
if 'status' in response and 'message' in response and response['status'] == 'ok':
metadata = response['message']
if (
"status" in response
and "message" in response
and response["status"] == "ok"
):
metadata = response["message"]
return metadata
def doiBibEntry(self,dois:list):

def doiBibEntry(self, dois: list):
"""
get bib entries for the given dois
"""
bibentry=cn.content_negotiation(ids = dois, format = "bibentry")
bibentry = cn.content_negotiation(ids=dois, format="bibentry")
return bibentry
def asScite(self,meta_data:dict)->str:

def asScite(self, meta_data: dict) -> str:
"""
convert the given meta data to #Scite format
see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php
Returns:
str: Semantic Mediawiki markup
"""
markup=Citeproc.asScite(meta_data,retrieved_from=self.cr.base_url)
markup = Citeproc.asScite(meta_data, retrieved_from=self.cr.base_url)
return markup

50 changes: 29 additions & 21 deletions skg/dblp.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,51 @@
'''
"""
Created on 2022-11-17
@author: wf
'''
"""
from lodstorage.sparql import SPARQL

from skg.owl import Owl


class Dblp:
"""
Schloss Dagstuhl Dblp computer science bibliography
"""
def __init__(self,endpoint:str="https://qlever.cs.uni-freiburg.de/api/dblp"):

def __init__(self, endpoint: str = "https://qlever.cs.uni-freiburg.de/api/dblp"):
"""
constructor
Args:
endpoint(str): the endpoint to use
"""
self.endpoint=endpoint
self.schema=Owl("dblp","https://dblp.org/rdf/schema", "Wolfgang Fahl","2022-11-19")
self.sparql=SPARQL(self.endpoint)


def get_paper_records(self,regex:str,prop_name:str="title",limit:int=100,debug:bool=False)->list:
self.endpoint = endpoint
self.schema = Owl(
"dblp", "https://dblp.org/rdf/schema", "Wolfgang Fahl", "2022-11-19"
)
self.sparql = SPARQL(self.endpoint)

def get_paper_records(
self,
regex: str,
prop_name: str = "title",
limit: int = 100,
debug: bool = False,
) -> list:
"""
get papers fitting the given regex
Args:
prop_name(str): the property to filter
regex(str): the regex to filter for
limit(int): the maximum number of records to return
debug(bool): if True show debug information
Returns:
list: a list of dict of paper records
"""
sparql_query="""PREFIX dblp: <https://dblp.org/rdf/schema#>
sparql_query = """PREFIX dblp: <https://dblp.org/rdf/schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT
?paper
Expand All @@ -58,8 +67,8 @@ def get_paper_records(self,regex:str,prop_name:str="title",limit:int=100,debug:b
?paper dblp:yearOfPublication ?year.
OPTIONAL { ?paper dblp:monthOfPublication ?month}.
"""
sparql_query+=f"""FILTER regex(?{prop_name}, "{regex}").\n"""
sparql_query+=f"""
sparql_query += f"""FILTER regex(?{prop_name}, "{regex}").\n"""
sparql_query += f"""
}}
GROUP BY
?paper
Expand All @@ -74,11 +83,11 @@ def get_paper_records(self,regex:str,prop_name:str="title",limit:int=100,debug:b
LIMIT {limit}"""
if debug:
print(sparql_query)
records=self.sparql.queryAsListOfDicts(sparql_query)
records = self.sparql.queryAsListOfDicts(sparql_query)
return records
def get_random_papers(self,year:int=2020,limit:int=10):
sparql_query=f"""PREFIX dblp: <https://dblp.org/rdf/schema#>

def get_random_papers(self, year: int = 2020, limit: int = 10):
sparql_query = f"""PREFIX dblp: <https://dblp.org/rdf/schema#>
SELECT
?paper
(SAMPLE(?doi_o) as ?doi)
Expand All @@ -100,4 +109,3 @@ def get_random_papers(self,year:int=2020,limit:int=10):
ORDER BY ?sortKey
LIMIT {limit}
"""

26 changes: 26 additions & 0 deletions skg/dblp2wikidata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Created on 2024-02-26
@author: wf
"""
from argparse import Namespace

class Dblp2Wikidata:
"""
utility to transfering Dblp person entries to Wikidata
"""

def __init__(self,debug:bool=False):
self.debug=debug
pass

def transfer(self, args:Namespace):
"""
Main method to handle the transfer of DBLP entries to Wikidata.
Args:
args(Namespace): Command line arguments.
"""
search_term = getattr(args, 'dblp2wikidata', None)
if self.debug:
print(f"trying to transfer DBLP person entry for {search_term}")
Loading

0 comments on commit 5c07e63

Please sign in to comment.