Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pdf2reproschema #41

Draft
wants to merge 28 commits into
base: ref/linkml
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
87f7a6a
add print for testing
yibeichan Jan 25, 2024
3162782
update clear_header
yibeichan Jan 25, 2024
64521d9
remove print
yibeichan Jan 25, 2024
4c1081a
fix order and other errors
yibeichan Feb 7, 2024
50fe4ce
change ui yesno to radio
yibeichan Feb 7, 2024
4359791
fix typo
yibeichan Feb 7, 2024
b17b5c7
update context, field->item, fix isVis
yibeichan Feb 22, 2024
ca3162d
remove useless due to failed validation
yibeichan Feb 23, 2024
4288f8a
remove visibility at the item level & remove matrixInfo
yibeichan Feb 23, 2024
57ca52e
fix choice
yibeichan Feb 25, 2024
82e2300
remove identifier
yibeichan Feb 28, 2024
c6cabf5
updating validate command to the new pydantic model
djarecka Apr 5, 2024
ad8a82c
updating/fixing the tests; updating the model to use CreativeWork; ch…
djarecka Apr 18, 2024
3c7049f
fix conversion tests
yibeichan Apr 21, 2024
a60612f
remove test output
yibeichan Apr 21, 2024
e1e847d
change test output directory
yibeichan Apr 21, 2024
51d30b7
final improvments on tests
yibeichan Apr 21, 2024
ab7c051
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 21, 2024
0543e41
model version after adding Thing class
djarecka Apr 27, 2024
105489b
Merge pull request #37 from yibeichan/master
yibeichan Apr 29, 2024
5c0dff1
Merge branch 'master' into ref/linkml
djarecka May 2, 2024
36bbb36
updating model after removing CreativeWork and ImageUrl
djarecka May 9, 2024
2f3e3ca
adding tests to initialize the model classes
djarecka May 9, 2024
2e54331
fixing load_file; adding write_obj_jsonld function and expanding test…
djarecka May 10, 2024
71e90f0
changing redcap2reproschema to use ned pydantic classes; some small c…
djarecka May 13, 2024
ecc93b7
Merge remote-tracking branch 'remotes/djarecka/ref/linkml' into ref/l…
ibevers May 13, 2024
311fd17
Initiate pdf2reproschema
ibevers May 13, 2024
4a4d84b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions reproschema/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,15 @@ def main(log_level):


@main.command()
@click.option("--shapefile", default=None, type=click.Path(exists=True, dir_okay=False))
@click.argument("path", nargs=1, type=str)
def validate(shapefile, path):
def validate(path):
if not (path.startswith("http") or os.path.exists(path)):
raise ValueError(f"{path} must be a URL or an existing file or directory")
from .validate import validate

validate(shapefile, path)
result = validate(path)
if result:
click.echo("Validation successful")


@main.command()
Expand Down
129 changes: 82 additions & 47 deletions reproschema/jsonldutils.py
Original file line number Diff line number Diff line change
@@ -1,77 +1,112 @@
from pyld import jsonld
from pyshacl import validate as shacl_validate
import json
import os
from .utils import start_server, stop_server, lgr
from pathlib import Path
from copy import deepcopy
from urllib.parse import urlparse
from .utils import start_server, stop_server, lgr, fixing_old_schema
from .models import Item, Activity, Protocol, ResponseOption, ResponseActivity, Response


def _is_url(path):
"""
Determine whether the given path is a URL.
"""
parsed = urlparse(path)
return parsed.scheme in ("http", "https", "ftp", "ftps")


def _is_file(path):
"""
Determine whether the given path is a valid file path.
"""
return os.path.isfile(path)


def load_file(path_or_url, started=False, http_kwargs={}):
try:
"""Load a file or URL and return the expanded JSON-LD data."""
path_or_url = str(path_or_url)
if _is_url(path_or_url):
data = jsonld.expand(path_or_url)
if len(data) == 1:
if "@id" not in data[0]:
if "@id" not in data[0] and "id" not in data[0]:
data[0]["@id"] = path_or_url
except jsonld.JsonLdError as e:
if 'only "http" and "https"' in str(e):
lgr.debug("Reloading with local server")
root = os.path.dirname(path_or_url)
if not started:
stop, port = start_server(**http_kwargs)
else:
if "port" not in http_kwargs:
raise KeyError("port key missing in http_kwargs")
port = http_kwargs["port"]
base_url = f"http://localhost:{port}/"
if root:
base_url += f"{root}/"
with open(path_or_url) as json_file:
data = json.load(json_file)
try:
data = jsonld.expand(data, options={"base": base_url})
except:
raise
finally:
if not started:
stop_server(stop)
if len(data) == 1:
if "@id" not in data[0]:
data[0]["@id"] = base_url + os.path.basename(path_or_url)
elif _is_file(path_or_url):
lgr.debug("Reloading with local server")
root = os.path.dirname(path_or_url)
if not started:
stop, port = start_server(**http_kwargs)
else:
if "port" not in http_kwargs:
raise KeyError("port key missing in http_kwargs")
port = http_kwargs["port"]
base_url = f"http://localhost:{port}/"
if root:
base_url += f"{root}/"
with open(path_or_url) as json_file:
data = json.load(json_file)
try:
data = jsonld.expand(data, options={"base": base_url})
except:
raise
finally:
if not started:
stop_server(stop)
if len(data) == 1:
if "@id" not in data[0] and "id" not in data[0]:
data[0]["@id"] = base_url + os.path.basename(path_or_url)
else:
raise Exception(f"{path_or_url} is not a valid URL or file path")
return data


def validate_data(data, shape_file_path):
"""Validate an expanded jsonld document against a shape.
def validate_data(data):
"""Validate an expanded jsonld document against the pydantic model.

Parameters
----------
data : dict
Python dictionary containing JSONLD object
shape_file_path : str
SHACL file for the document

Returns
-------
conforms: bool
Whether the document is conformant with the shape
v_text: str
Validation information returned by PySHACL
Validation errors if any returned by pydantic

"""
kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"}
normalized = jsonld.normalize(data, kwargs)
data_file_format = "nquads"
shape_file_format = "turtle"
conforms, v_graph, v_text = shacl_validate(
normalized,
shacl_graph=shape_file_path,
data_graph_format=data_file_format,
shacl_graph_format=shape_file_format,
inference="rdfs",
debug=False,
serialize_report_graph=True,
)
# do we need it?
# kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"}
# normalized = jsonld.normalize(data, kwargs)
if data[0]["@type"][0] == "http://schema.repronim.org/Field":
obj_type = Item
elif data[0]["@type"][0] == "http://schema.repronim.org/ResponseOption":
obj_type = ResponseOption
elif data[0]["@type"][0] == "http://schema.repronim.org/Activity":
obj_type = Activity
elif data[0]["@type"][0] == "http://schema.repronim.org/Protocol":
obj_type = Protocol
elif data[0]["@type"][0] == "http://schema.repronim.org/ResponseActivity":
obj_type = ResponseActivity
elif data[0]["@type"][0] == "http://schema.repronim.org/Response":
obj_type = Response
else:
raise ValueError("Unknown type")
data_fixed = [fixing_old_schema(data[0], copy_data=True)]
# TODO: where should we load the context from?
contexfile = Path(__file__).resolve().parent / "models/reproschema"
with open(contexfile) as fp:
context = json.load(fp)
data_fixed_comp = jsonld.compact(data_fixed, context)
del data_fixed_comp["@context"]
conforms = False
v_text = ""
try:
obj_type(**data_fixed_comp)
conforms = True
except Exception as e:
v_text = str(e)
return conforms, v_text


Expand Down
5 changes: 2 additions & 3 deletions reproschema/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
from .protocol import Protocol
from .activity import Activity
from .item import Item
from .model import Activity, Item, Protocol, ResponseOption, ResponseActivity, Response
from .utils import load_schema, write_obj_jsonld
66 changes: 0 additions & 66 deletions reproschema/models/activity.py

This file was deleted.

76 changes: 0 additions & 76 deletions reproschema/models/base.py

This file was deleted.

Loading