diff --git a/reproschema/cli.py b/reproschema/cli.py index e31b03e..685af39 100644 --- a/reproschema/cli.py +++ b/reproschema/cli.py @@ -42,14 +42,15 @@ def main(log_level): @main.command() -@click.option("--shapefile", default=None, type=click.Path(exists=True, dir_okay=False)) @click.argument("path", nargs=1, type=str) -def validate(shapefile, path): +def validate(path): if not (path.startswith("http") or os.path.exists(path)): raise ValueError(f"{path} must be a URL or an existing file or directory") from .validate import validate - validate(shapefile, path) + result = validate(path) + if result: + click.echo("Validation successful") @main.command() diff --git a/reproschema/jsonldutils.py b/reproschema/jsonldutils.py index f88f38f..c82f0d2 100644 --- a/reproschema/jsonldutils.py +++ b/reproschema/jsonldutils.py @@ -1,77 +1,112 @@ from pyld import jsonld -from pyshacl import validate as shacl_validate import json import os -from .utils import start_server, stop_server, lgr +from pathlib import Path +from copy import deepcopy +from urllib.parse import urlparse +from .utils import start_server, stop_server, lgr, fixing_old_schema +from .models import Item, Activity, Protocol, ResponseOption, ResponseActivity, Response + + +def _is_url(path): + """ + Determine whether the given path is a URL. + """ + parsed = urlparse(path) + return parsed.scheme in ("http", "https", "ftp", "ftps") + + +def _is_file(path): + """ + Determine whether the given path is a valid file path. + """ + return os.path.isfile(path) def load_file(path_or_url, started=False, http_kwargs={}): - try: + """Load a file or URL and return the expanded JSON-LD data.""" + path_or_url = str(path_or_url) + if _is_url(path_or_url): data = jsonld.expand(path_or_url) if len(data) == 1: - if "@id" not in data[0]: + if "@id" not in data[0] and "id" not in data[0]: data[0]["@id"] = path_or_url - except jsonld.JsonLdError as e: - if 'only "http" and "https"' in str(e): - lgr.debug("Reloading with local server") - root = os.path.dirname(path_or_url) - if not started: - stop, port = start_server(**http_kwargs) - else: - if "port" not in http_kwargs: - raise KeyError("port key missing in http_kwargs") - port = http_kwargs["port"] - base_url = f"http://localhost:{port}/" - if root: - base_url += f"{root}/" - with open(path_or_url) as json_file: - data = json.load(json_file) - try: - data = jsonld.expand(data, options={"base": base_url}) - except: - raise - finally: - if not started: - stop_server(stop) - if len(data) == 1: - if "@id" not in data[0]: - data[0]["@id"] = base_url + os.path.basename(path_or_url) + elif _is_file(path_or_url): + lgr.debug("Reloading with local server") + root = os.path.dirname(path_or_url) + if not started: + stop, port = start_server(**http_kwargs) else: + if "port" not in http_kwargs: + raise KeyError("port key missing in http_kwargs") + port = http_kwargs["port"] + base_url = f"http://localhost:{port}/" + if root: + base_url += f"{root}/" + with open(path_or_url) as json_file: + data = json.load(json_file) + try: + data = jsonld.expand(data, options={"base": base_url}) + except: raise + finally: + if not started: + stop_server(stop) + if len(data) == 1: + if "@id" not in data[0] and "id" not in data[0]: + data[0]["@id"] = base_url + os.path.basename(path_or_url) + else: + raise Exception(f"{path_or_url} is not a valid URL or file path") return data -def validate_data(data, shape_file_path): - """Validate an expanded jsonld document against a shape. +def validate_data(data): + """Validate an expanded jsonld document against the pydantic model. Parameters ---------- data : dict Python dictionary containing JSONLD object - shape_file_path : str - SHACL file for the document Returns ------- conforms: bool Whether the document is conformant with the shape v_text: str - Validation information returned by PySHACL + Validation errors if any returned by pydantic """ - kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"} - normalized = jsonld.normalize(data, kwargs) - data_file_format = "nquads" - shape_file_format = "turtle" - conforms, v_graph, v_text = shacl_validate( - normalized, - shacl_graph=shape_file_path, - data_graph_format=data_file_format, - shacl_graph_format=shape_file_format, - inference="rdfs", - debug=False, - serialize_report_graph=True, - ) + # do we need it? + # kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"} + # normalized = jsonld.normalize(data, kwargs) + if data[0]["@type"][0] == "http://schema.repronim.org/Field": + obj_type = Item + elif data[0]["@type"][0] == "http://schema.repronim.org/ResponseOption": + obj_type = ResponseOption + elif data[0]["@type"][0] == "http://schema.repronim.org/Activity": + obj_type = Activity + elif data[0]["@type"][0] == "http://schema.repronim.org/Protocol": + obj_type = Protocol + elif data[0]["@type"][0] == "http://schema.repronim.org/ResponseActivity": + obj_type = ResponseActivity + elif data[0]["@type"][0] == "http://schema.repronim.org/Response": + obj_type = Response + else: + raise ValueError("Unknown type") + data_fixed = [fixing_old_schema(data[0], copy_data=True)] + # TODO: where should we load the context from? + contexfile = Path(__file__).resolve().parent / "models/reproschema" + with open(contexfile) as fp: + context = json.load(fp) + data_fixed_comp = jsonld.compact(data_fixed, context) + del data_fixed_comp["@context"] + conforms = False + v_text = "" + try: + obj_type(**data_fixed_comp) + conforms = True + except Exception as e: + v_text = str(e) return conforms, v_text diff --git a/reproschema/models/__init__.py b/reproschema/models/__init__.py index 1c1a154..347ad42 100644 --- a/reproschema/models/__init__.py +++ b/reproschema/models/__init__.py @@ -1,3 +1,2 @@ -from .protocol import Protocol -from .activity import Activity -from .item import Item +from .model import Activity, Item, Protocol, ResponseOption, ResponseActivity, Response +from .utils import load_schema, write_obj_jsonld diff --git a/reproschema/models/activity.py b/reproschema/models/activity.py deleted file mode 100644 index 0ad79e2..0000000 --- a/reproschema/models/activity.py +++ /dev/null @@ -1,66 +0,0 @@ -from .base import SchemaBase - - -class Activity(SchemaBase): - """ - class to deal with reproschema activities - """ - - schema_type = "reproschema:Activity" - - def __init__(self, version=None): - super().__init__(version) - self.schema["ui"] = {"shuffle": [], "order": [], "addProperties": []} - - def set_ui_shuffle(self, shuffle=False): - self.schema["ui"]["shuffle"] = shuffle - - def set_URI(self, URI): - self.URI = URI - - def get_URI(self): - return self.URI - - # TODO - # preamble - # compute - # citation - # image - - def set_defaults(self, name): - self._ReproschemaSchema__set_defaults(name) # this looks wrong - self.set_ui_shuffle(False) - - def update_activity(self, item_info): - # TODO - # - remove the hard coding on visibility and valueRequired - - # update the content of the activity schema with new item - - item_info["URI"] = "items/" + item_info["name"] - - append_to_activity = { - "variableName": item_info["name"], - "isAbout": item_info["URI"], - "isVis": item_info["visibility"], - "valueRequired": False, - } - - self.schema["ui"]["order"].append(item_info["URI"]) - self.schema["ui"]["addProperties"].append(append_to_activity) - - def sort(self): - schema_order = [ - "@context", - "@type", - "@id", - "prefLabel", - "description", - "schemaVersion", - "version", - "ui", - ] - self.sort_schema(schema_order) - - ui_order = ["shuffle", "order", "addProperties"] - self.sort_ui(ui_order) diff --git a/reproschema/models/base.py b/reproschema/models/base.py deleted file mode 100644 index f3ecf93..0000000 --- a/reproschema/models/base.py +++ /dev/null @@ -1,76 +0,0 @@ -import json -import os - - -class SchemaBase: - """ - class to deal with reproschema schemas - """ - - schema_type = None - - def __init__(self, version): - URL = "https://raw.githubusercontent.com/ReproNim/reproschema/" - VERSION = version or "1.0.0-rc2" - - self.schema = { - "@context": URL + VERSION + "/contexts/generic", - "@type": self.schema_type, - "schemaVersion": VERSION, - "version": "0.0.1", - } - - def set_filename(self, name): - self.schema_file = name + "_schema" - self.schema["@id"] = name + "_schema" - - def get_name(self): - return self.schema_file.replace("_schema", "") - - def get_filename(self): - return self.schema_file - - def set_pref_label(self, pref_label): - self.schema["prefLabel"] = pref_label - - def set_description(self, description): - self.schema["description"] = description - - def set_directory(self, output_directory): - self.dir = output_directory - - def __set_defaults(self, name): - self.set_filename(name) - self.set_directory(name) - self.set_pref_label(name.replace("_", " ")) - self.set_description(name.replace("_", " ")) - - def sort_schema(self, schema_order): - reordered_dict = {k: self.schema[k] for k in schema_order} - self.schema = reordered_dict - - def sort_ui(self, ui_order): - reordered_dict = {k: self.schema["ui"][k] for k in ui_order} - self.schema["ui"] = reordered_dict - - def write(self, output_dir): - with open(os.path.join(output_dir, self.schema_file), "w") as ff: - json.dump(self.schema, ff, sort_keys=False, indent=4) - - @classmethod - def from_data(cls, data): - if cls.schema_type is None: - raise ValueError("SchemaBase cannot be used to instantiate class") - if cls.schema_type != data["@type"]: - raise ValueError(f"Mismatch in type {data['@type']} != {cls.schema_type}") - klass = cls() - klass.schema = data - return klass - - @classmethod - def from_file(cls, filepath): - with open(filepath) as fp: - data = json.load(fp) - if "@type" not in data: - raise ValueError("Missing @type key") - return cls.from_data(data) diff --git a/reproschema/models/item.py b/reproschema/models/item.py deleted file mode 100644 index fc08c70..0000000 --- a/reproschema/models/item.py +++ /dev/null @@ -1,156 +0,0 @@ -from .base import SchemaBase - - -class Item(SchemaBase): - """ - class to deal with reproschema activities - """ - - schema_type = "reproschema:Field" - - def __init__(self, version=None): - super().__init__(version) - self.schema["ui"] = {"inputType": []} - self.schema["question"] = {} - self.schema["responseOptions"] = {} - # default input type is "char" - self.set_input_type_as_char() - - def set_URI(self, URI): - self.URI = URI - - # TODO - # image - # readonlyValue - - def set_defaults(self, name): - self._ReproschemaSchema__set_defaults(name) # this looks wrong - self.schema_file = name - self.schema["@id"] = name - self.set_input_type_as_char() - - def set_question(self, question, lang="en"): - self.schema["question"][lang] = question - - def set_input_type(self, input_type): - self.schema["ui"]["inputType"] = input_type - - def set_response_options(self, response_options): - self.schema["responseOptions"] = response_options - - """ - - input types with different response choices - - """ - - def set_input_type_as_radio(self, response_options): - self.set_input_type("radio") - self.set_response_options(response_options) - - def set_input_type_as_select(self, response_options): - self.set_input_type("select") - self.set_response_options(response_options) - - def set_input_type_as_slider(self): - self.set_input_type_as_char() # until the slide item of the ui is fixed - # self.set_input_type("slider") - # self.set_response_options({"valueType": "xsd:string"}) - - def set_input_type_as_language(self): - URL = "https://raw.githubusercontent.com/ReproNim/reproschema/" - - self.set_input_type("selectLanguage") - - response_options = { - "valueType": "xsd:string", - "multipleChoice": True, - "choices": URL + "master/resources/languages.json", - } - self.set_response_options(response_options) - - """ - - input types with no response choice - - """ - - def set_input_type_as_char(self): - self.set_input_type("text") - self.set_response_options({"valueType": "xsd:string"}) - - def set_input_type_as_int(self): - self.set_input_type("number") - self.set_response_options({"valueType": "xsd:integer"}) - - def set_input_type_as_float(self): - self.set_input_type("float") - self.set_response_options({"valueType": "xsd:float"}) - - def set_input_type_as_time_range(self): - self.set_input_type("timeRange") - self.set_response_options({"valueType": "datetime"}) - - def set_input_type_as_date(self): - self.set_input_type("date") - self.set_response_options({"valueType": "xsd:date"}) - - """ - - input types with no response choice but with some parameters - - """ - - def set_input_type_as_multitext(self, max_length=300): - self.set_input_type("text") - self.set_response_options({"valueType": "xsd:string", "maxLength": max_length}) - - # TODO - # email: EmailInput/EmailInput.vue - # audioCheck: AudioCheck/AudioCheck.vue - # audioRecord: WebAudioRecord/Audio.vue - # audioPassageRecord: WebAudioRecord/Audio.vue - # audioImageRecord: WebAudioRecord/Audio.vue - # audioRecordNumberTask: WebAudioRecord/Audio.vue - # audioAutoRecord: AudioCheckRecord/AudioCheckRecord.vue - # year: YearInput/YearInput.vue - # selectCountry: SelectInput/SelectInput.vue - # selectState: SelectInput/SelectInput.vue - # documentUpload: DocumentUpload/DocumentUpload.vue - # save: SaveData/SaveData.vue - # static: Static/Static.vue - # StaticReadOnly: Static/Static.vue - - def set_basic_response_type(self, response_type): - # default (also valid for "char" input type) - self.set_input_type_as_char() - - if response_type == "int": - self.set_input_type_as_int() - - elif response_type == "float": - self.set_input_type_as_float() - - elif response_type == "date": - self.set_input_type_as_date() - - elif response_type == "time range": - self.set_input_type_as_time_range() - - elif response_type == "language": - self.set_input_type_as_language() - - def sort(self): - schema_order = [ - "@context", - "@type", - "@id", - "prefLabel", - "description", - "schemaVersion", - "version", - "ui", - "question", - "responseOptions", - ] - self.sort_schema(schema_order) diff --git a/reproschema/model.py b/reproschema/models/model.py similarity index 61% rename from reproschema/model.py rename to reproschema/models/model.py index 5d8808e..9c387fa 100644 --- a/reproschema/model.py +++ b/reproschema/models/model.py @@ -1,21 +1,19 @@ from __future__ import annotations from datetime import datetime, date -from enum import Enum - from decimal import Decimal -from typing import List, Dict, Optional, Any, Union -from pydantic import BaseModel as BaseModel, ConfigDict, Field, field_validator +from enum import Enum import re import sys +from typing import Any, List, Literal, Dict, Optional, Union +from pydantic.version import VERSION as PYDANTIC_VERSION -if sys.version_info >= (3, 8): - from typing import Literal +if int(PYDANTIC_VERSION[0]) >= 2: + from pydantic import BaseModel, ConfigDict, Field, field_validator else: - from typing_extensions import Literal - + from pydantic import BaseModel, Field, validator metamodel_version = "None" -version = "None" +version = "1.0.0" class ConfiguredBaseModel(BaseModel): @@ -25,8 +23,8 @@ class ConfiguredBaseModel(BaseModel): extra="forbid", arbitrary_types_allowed=True, use_enum_values=True, + strict=False, ) - pass @@ -56,7 +54,101 @@ class MissingType(str, Enum): TimedOut = "reproschema:TimedOut" -class AdditionalNoteObj(ConfiguredBaseModel): +class Agent(ConfiguredBaseModel): + pass + + +class Participant(Agent): + """ + An Agent describing characteristics associated with a participant. + """ + + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + subject_id: Optional[str] = Field(None) + + +class Thing(ConfiguredBaseModel): + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) + + +class Activity(Thing): + """ + An assessment in a protocol. + """ + + about: Optional[str] = Field( + None, description="""The subject matter of the Field.""" + ) + altLabel: Optional[Dict[str, str]] = Field( + default_factory=dict, + title="alternate label", + description="""The alternate label.""", + ) + associatedMedia: Optional[str] = Field( + None, + title="associatedMedia", + description="""A media object that encodes this creative work. This property is a synonym for encoding.""", + ) + citation: Optional[Dict[str, str]] = Field(default_factory=dict) + compute: Optional[List[ComputeSpecification]] = Field( + default_factory=list, + title="computation", + description="""An array of objects indicating computations in an activity or protocol and maps it to the corresponding Item. scoring logic is a subset of all computations that could be performed and not all computations will be scoring. For example, one may want to do conversion from one unit to another.""", + ) + cronTable: Optional[str] = Field( + None, title="cronTable", description="""TODO not described in reproschema""" + ) + description: Optional[Dict[str, str]] = Field(default_factory=dict) + image: Optional[Union[ImageObject, str]] = Field( + None, + title="image", + description="""An image of the item. This can be a URL or a fully described ImageObject.""", + ) + messages: Optional[List[MessageSpecification]] = Field( + default_factory=list, + title="messages", + description="""An array of objects to define conditional messages in an activity or protocol.""", + ) + preamble: Optional[Dict[str, str]] = Field( + default_factory=dict, + title="Preamble", + description="""The preamble for an assessment""", + ) + prefLabel: Optional[Dict[str, str]] = Field( + default_factory=dict, + title="preferred label", + description="""The preferred label.""", + ) + schemaVersion: Optional[str] = Field(None) + ui: Optional[UI] = Field( + None, + title="UI", + description="""An element to control UI specifications. Originally @nest in jsonld, but using a class in the model.""", + ) + version: Optional[str] = Field(None) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) + + +class AdditionalNoteObj(Thing): """ A set of objects to define notes in a Item. For example, most Redcap and NDA data dictionaries have notes for each item which needs to be captured in reproschema """ @@ -78,9 +170,18 @@ class AdditionalNoteObj(ConfiguredBaseModel): title="value", description="""The value for each option in choices or in additionalNotesObj""", ) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class AdditionalProperty(ConfiguredBaseModel): +class AdditionalProperty(Thing): """ An object to describe the various properties added to assessments and Items. """ @@ -136,13 +237,18 @@ class AdditionalProperty(ConfiguredBaseModel): title="UI", description="""An element to control UI specifications. Originally @nest in jsonld, but using a class in the model.""", ) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class Agent(ConfiguredBaseModel): - None - - -class Choice(ConfiguredBaseModel): +class Choice(Thing): """ An object to describe a response option. """ @@ -154,15 +260,25 @@ class Choice(ConfiguredBaseModel): description="""An image of the item. This can be a URL or a fully described ImageObject.""", ) value: Optional[ - Union[Decimal, Dict[str, str], MissingType, StructuredValue, bool, str] + Union[ + float, int, Decimal, Dict[str, str], MissingType, StructuredValue, bool, str + ] ] = Field( None, title="value", description="""The value for each option in choices or in additionalNotesObj""", ) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class ComputeSpecification(ConfiguredBaseModel): +class ComputeSpecification(Thing): """ An object to define computations in an activity or protocol. """ @@ -177,73 +293,18 @@ class ComputeSpecification(ConfiguredBaseModel): title="variableName", description="""The name used to represent an item.""", ) - - -class CreativeWork(ConfiguredBaseModel): - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) - - -class Activity(CreativeWork): - """ - An assessment in a protocol. - """ - - about: Optional[str] = Field( - None, description="""The subject matter of the Field.""" - ) - altLabel: Optional[Dict[str, str]] = Field( - default_factory=dict, - title="alternate label", - description="""The alternate label.""", - ) - associatedMedia: Optional[str] = Field( - None, - title="associatedMedia", - description="""A media object that encodes this CreativeWork. This property is a synonym for encoding.""", - ) - citation: Optional[Dict[str, str]] = Field(default_factory=dict) - compute: Optional[List[ComputeSpecification]] = Field( - default_factory=list, - title="computation", - description="""An array of objects indicating computations in an activity or protocol and maps it to the corresponding Item. scoring logic is a subset of all computations that could be performed and not all computations will be scoring. For example, one may want to do conversion from one unit to another.""", - ) - cronTable: Optional[str] = Field( - None, title="cronTable", description="""TODO not described in reproschema""" - ) - description: Optional[Dict[str, str]] = Field(default_factory=dict) - image: Optional[Union[ImageObject, str]] = Field( + id: Optional[str] = Field( None, - title="image", - description="""An image of the item. This can be a URL or a fully described ImageObject.""", - ) - messages: Optional[List[MessageSpecification]] = Field( - default_factory=list, - title="messages", - description="""An array of objects to define conditional messages in an activity or protocol.""", - ) - preamble: Optional[Dict[str, str]] = Field( - default_factory=dict, - title="Preamble", - description="""The preamble for an assessment""", + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", ) - prefLabel: Optional[Dict[str, str]] = Field( - default_factory=dict, - title="preferred label", - description="""The preferred label.""", - ) - schemaVersion: Optional[str] = Field(None) - ui: Optional[UI] = Field( + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( None, - title="UI", - description="""An element to control UI specifications. Originally @nest in jsonld, but using a class in the model.""", + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", ) - version: Optional[str] = Field(None) - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) -class Item(CreativeWork): +class Item(Thing): """ An item in an assessment. """ @@ -264,7 +325,7 @@ class Item(CreativeWork): associatedMedia: Optional[str] = Field( None, title="associatedMedia", - description="""A media object that encodes this CreativeWork. This property is a synonym for encoding.""", + description="""A media object that encodes this creative work. This property is a synonym for encoding.""", ) audio: Optional[Union[AudioObject, str]] = Field( None, title="audio", description="""TODO""" @@ -275,9 +336,6 @@ class Item(CreativeWork): title="image", description="""An image of the item. This can be a URL or a fully described ImageObject.""", ) - imageUrl: Optional[str] = Field( - None, title="imageUrl", description="""An image url.""" - ) isPartOf: Optional[Activity] = Field(None) preamble: Optional[Dict[str, str]] = Field( default_factory=dict, @@ -302,46 +360,82 @@ class Item(CreativeWork): description="""An element to control UI specifications. Originally @nest in jsonld, but using a class in the model.""", ) version: Optional[str] = Field(None) - video: Optional[VideoObject] = Field(None) - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) + video: Optional[Union[VideoObject, str]] = Field(None) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class LandingPage(ConfiguredBaseModel): +class LandingPage(Thing): """ An object to define the landing page of a protocol. """ inLanguage: Optional[str] = Field(None) - id: Optional[str] = Field(None) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class MediaObject(CreativeWork): +class MediaObject(Thing): """ Add description """ - contentUrl: str = Field(...) inLanguage: Optional[str] = Field(None) - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) + contentUrl: str = Field(...) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) class AudioObject(MediaObject): - contentUrl: str = Field(...) inLanguage: Optional[str] = Field(None) - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) + contentUrl: str = Field(...) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) class ImageObject(MediaObject): - contentUrl: str = Field(...) inLanguage: Optional[str] = Field(None) - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) + contentUrl: str = Field(...) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class MessageSpecification(ConfiguredBaseModel): +class MessageSpecification(Thing): """ An object to define messages in an activity or protocol. """ @@ -356,9 +450,18 @@ class MessageSpecification(ConfiguredBaseModel): title="Message", description="""The message to be conditionally displayed for an item.""", ) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class OverrideProperty(ConfiguredBaseModel): +class OverrideProperty(Thing): """ An object to override the various properties added to assessments and Items. """ @@ -404,17 +507,18 @@ class OverrideProperty(ConfiguredBaseModel): title="variableName", description="""The name used to represent an item.""", ) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class Participant(Agent): - """ - An Agent describing characteristics associated with a participant. - """ - - subject_id: Optional[str] = Field(None) - - -class Protocol(CreativeWork): +class Protocol(Thing): """ A representation of a study which comprises one or more assessments. """ @@ -430,7 +534,7 @@ class Protocol(CreativeWork): associatedMedia: Optional[str] = Field( None, title="associatedMedia", - description="""A media object that encodes this CreativeWork. This property is a synonym for encoding.""", + description="""A media object that encodes this creative work. This property is a synonym for encoding.""", ) compute: Optional[List[ComputeSpecification]] = Field( default_factory=list, @@ -463,11 +567,18 @@ class Protocol(CreativeWork): description="""An element to control UI specifications. Originally @nest in jsonld, but using a class in the model.""", ) version: Optional[str] = Field(None) - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class Response(CreativeWork): +class Response(Thing): """ Describes the response of an item. """ @@ -485,11 +596,18 @@ class Response(CreativeWork): description="""The value for each option in choices or in additionalNotesObj""", ) wasAttributedTo: Optional[Participant] = Field(None) - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class ResponseActivity(CreativeWork): +class ResponseActivity(Thing): """ Captures information about some action that took place. It also links to information (entities) that were used during the activity """ @@ -500,11 +618,18 @@ class ResponseActivity(CreativeWork): startedAtTime: Optional[datetime] = Field(None) used: Optional[List[str]] = Field(default_factory=list) wasAssociatedWith: Optional[SoftwareAgent] = Field(None) - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class ResponseOption(CreativeWork): +class ResponseOption(Thing): """ An element (object or by URL)to describe the properties of response of the Item. """ @@ -531,31 +656,50 @@ class ResponseOption(CreativeWork): title="unitOptions", description="""A list of objects to represent a human displayable name alongside the more formal value for units.""", ) - valueType: Optional[List[str]] = Field( + valueType: Optional[Union[str, List[str]]] = Field( default_factory=list, title="The type of the response", description="""The type of the response of an item. For example, string, integer, etc.""", ) - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class SoftwareAgent(ConfiguredBaseModel): +class SoftwareAgent(Thing): """ Captures information about some action that took place. It also links to information (entities) that were used during the activity """ version: Optional[str] = Field(None) url: Optional[str] = Field(None) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) -class StructuredValue(CreativeWork): - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) - - -class Thing(ConfiguredBaseModel): - None +class StructuredValue(Thing): + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) class UI(ConfiguredBaseModel): @@ -596,7 +740,7 @@ class UI(ConfiguredBaseModel): readonlyValue: Optional[bool] = Field(None) -class UnitOption(ConfiguredBaseModel): +class UnitOption(Thing): """ An object to represent a human displayable name alongside the more formal value for units. """ @@ -611,24 +755,41 @@ class UnitOption(ConfiguredBaseModel): title="value", description="""The value for each option in choices or in additionalNotesObj""", ) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) class VideoObject(MediaObject): - contentUrl: str = Field(...) inLanguage: Optional[str] = Field(None) - id: Optional[str] = Field(None) - category: Optional[str] = Field(None) + contentUrl: str = Field(...) + id: Optional[str] = Field( + None, + description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""", + ) + name: Optional[Dict[str, str]] = Field(default_factory=dict) + category: Optional[str] = Field( + None, + description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""", + ) # Model rebuild # see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model +Agent.model_rebuild() +Participant.model_rebuild() +Thing.model_rebuild() +Activity.model_rebuild() AdditionalNoteObj.model_rebuild() AdditionalProperty.model_rebuild() -Agent.model_rebuild() Choice.model_rebuild() ComputeSpecification.model_rebuild() -CreativeWork.model_rebuild() -Activity.model_rebuild() Item.model_rebuild() LandingPage.model_rebuild() MediaObject.model_rebuild() @@ -636,14 +797,12 @@ class VideoObject(MediaObject): ImageObject.model_rebuild() MessageSpecification.model_rebuild() OverrideProperty.model_rebuild() -Participant.model_rebuild() Protocol.model_rebuild() Response.model_rebuild() ResponseActivity.model_rebuild() ResponseOption.model_rebuild() SoftwareAgent.model_rebuild() StructuredValue.model_rebuild() -Thing.model_rebuild() UI.model_rebuild() UnitOption.model_rebuild() VideoObject.model_rebuild() diff --git a/reproschema/models/protocol.py b/reproschema/models/protocol.py deleted file mode 100644 index 10fa951..0000000 --- a/reproschema/models/protocol.py +++ /dev/null @@ -1,77 +0,0 @@ -from .base import SchemaBase - - -class Protocol(SchemaBase): - """ - class to deal with reproschema protocols - """ - - schema_type = "reproschema:Protocol" - - def __init__(self, version=None): - super().__init__(version) - self.schema["ui"] = { - "allow": [], - "shuffle": [], - "order": [], - "addProperties": [], - } - - def set_landing_page(self, landing_page_url, lang="en"): - self.schema["landingPage"] = {"@id": landing_page_url, "inLanguage": lang} - - # TODO - # def add_landing_page(self, landing_page_url, lang="en"): - # preamble - # compute - - def set_image(self, image_url): - self.schema["image"] = image_url - - def set_ui_allow(self): - self.schema["ui"]["allow"] = [ - "reproschema:AutoAdvance", - "reproschema:AllowExport", - ] - - def set_ui_shuffle(self, shuffle=False): - self.schema["ui"]["shuffle"] = shuffle - - def set_defaults(self, name): - self._ReproschemaSchema__set_defaults(name) # this looks wrong - self.set_landing_page("../../README-en.md") - self.set_ui_allow() - self.set_ui_shuffle(False) - - def append_activity(self, activity): - # TODO - # - remove the hard coding on visibility and valueRequired - - # update the content of the protocol with this new activity - append_to_protocol = { - "variableName": activity.get_name(), - "isAbout": activity.get_URI(), - "prefLabel": {"en": activity.schema["prefLabel"]}, - "isVis": True, - "valueRequired": False, - } - - self.schema["ui"]["order"].append(activity.URI) - self.schema["ui"]["addProperties"].append(append_to_protocol) - - def sort(self): - schema_order = [ - "@context", - "@type", - "@id", - "prefLabel", - "description", - "schemaVersion", - "version", - "landingPage", - "ui", - ] - self.sort_schema(schema_order) - - ui_order = ["allow", "shuffle", "order", "addProperties"] - self.sort_ui(ui_order) diff --git a/reproschema/models/reproschema b/reproschema/models/reproschema new file mode 100644 index 0000000..1d8e6dd --- /dev/null +++ b/reproschema/models/reproschema @@ -0,0 +1,252 @@ +{ + "comments": { + "description": "Auto generated by LinkML jsonld context generator", + "generation_date": "2024-02-16T13:37:16", + "source": "reproschema.yaml" + }, + "@context": { + "linkml": "https://w3id.org/linkml/", + "nidm": "http://purl.org/nidash/nidm#", + "owl": "http://www.w3.org/2002/07/owl#", + "prov": "http://www.w3.org/ns/prov#", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "reproschema": "http://schema.repronim.org/", + "schema": "http://schema.org/", + "skos": "http://www.w3.org/2004/02/skos/core#", + "xml": { + "@id": "http://www.w3.org/XML/1998/namespace", + "@prefix": true + }, + "xsd": "http://www.w3.org/2001/XMLSchema#", + "@vocab": "http://schema.repronim.org/", + "@version": 1.1, + "@language": "en", + "id": "@id", + "category": "@type", + "ui": "@nest", + "about": { + "@id": "schema:about" + }, + "addProperties": { + "@container": "@set", + "@nest": "ui" + }, + "additionalNotesObj": { + "@container": "@set" + }, + "allow": { + "@type": "@id", + "@container": "@set", + "@nest": "ui" + }, + "altLabel": { + "@id": "skos:altLabel", + "@container": "@language" + }, + "associatedMedia": { + "@id": "schema:associatedMedia" + }, + "audio": { + "@type": "@id", + "@id": "schema:audio" + }, + "choices": { + "@container": "@set" + }, + "citation": { + "@id": "schema:citation", + "@container": "@language" + }, + "column": { + "@type": "xsd:string" + }, + "compute": { + "@id": "reproschema:compute", + "@container": "@set" + }, + "contentUrl": { + "@type": "@id", + "@id": "schema:contentUrl" + }, + "cronTable": { + "@id": "reproschema:cronTable" + }, + "datumType": { + "@id": "reproschema:datumType" + }, + "description": { + "@id": "schema:description", + "@container": "@language" + }, + "endedAtTime": { + "@type": "xsd:dateTime", + "@id": "prov:endedAtTime" + }, + "generated": { + "@id": "prov:generated" + }, + "image": { + "@type": "@id", + "@id": "schema:image" + }, + "imageUrl": { + "@type": "@id" + }, + "inLanguage": { + "@id": "schema:inLanguage", + "@language": null + }, + "inputType": { + "@type": "xsd:string", + "@nest": "ui" + }, + "isAbout": { + "@type": "@id" + }, + "isPartOf": { + "@type": "@id", + "@id": "schema:isPartOf" + }, + "landingPage": { + "@type": "@id", + "@container": "@set" + }, + "limit": { + "@language": null + }, + "maxValue": { + "@id": "schema:maxValue" + }, + "message": { + "@container": "@language" + }, + "messages": { + "@container": "@set" + }, + "minValue": { + "@id": "schema:minValue" + }, + "multipleChoice": { + "@type": "xsd:boolean" + }, + "name": { + "@id": "schema:name", + "@container": "@language" + }, + "order": { + "@type": "@id", + "@container": "@list", + "@nest": "ui" + }, + "overrideProperties": { + "@container": "@set", + "@nest": "ui" + }, + "preamble": { + "@id": "reproschema:preamble", + "@container": "@language" + }, + "prefLabel": { + "@id": "skos:prefLabel", + "@container": "@language" + }, + "question": { + "@id": "schema:question", + "@container": "@language" + }, + "randomMaxDelay": { + "@language": null + }, + "readonlyValue": { + "@type": "xsd:boolean", + "@id": "schema:readonlyValue", + "@nest": "ui" + }, + "responseOptions": { + "@type": "@id" + }, + "schedule": { + "@language": null + }, + "schemaVersion": { + "@id": "schema:schemaVersion", + "@language": null + }, + "shuffle": { + "@type": "xsd:boolean", + "@nest": "ui" + }, + "source": { + "@type": "xsd:string" + }, + "startedAtTime": { + "@type": "xsd:dateTime", + "@id": "prov:startedAtTime" + }, + "subject_id": { + "@id": "nidm:subject_id" + }, + "unitOptions": { + "@type": "@id", + "@container": "@set" + }, + "url": { + "@type": "@id", + "@id": "schema:url" + }, + "used": { + "@type": "@id", + "@container": "@set", + "@id": "prov:used" + }, + "valueRequired": { + "@type": "xsd:boolean", + "@id": "schema:valueRequired" + }, + "valueType": { + "@type": "@id", + "@container": "@set" + }, + "version": { + "@id": "schema:version", + "@language": null + }, + "video": { + "@type": "@id", + "@id": "schema:video" + }, + "wasAssociatedWith": { + "@type": "@id", + "@id": "prov:wasAssociatedWith" + }, + "wasAttributedTo": { + "@type": "@id", + "@id": "prov:wasAttributedTo" + }, + "Activity": { + "@id": "reproschema:Activity" + }, + "Agent": { + "@id": "prov:Agent" + }, + "CreativeWork": { + "@id": "schema:CreativeWork" + }, + "LangString": { + "@id": "rdf:langString" + }, + "MediaObject": { + "@id": "schema:MediaObject" + }, + "StructuredValue": { + "@id": "schema:StructuredValue" + }, + "Thing": { + "@id": "schema:Thing" + }, + "VideoObject": { + "@id": "schema:VideoObject" + } + } +} diff --git a/reproschema/models/tests/test_schema.py b/reproschema/models/tests/test_schema.py index a68e808..04a4658 100644 --- a/reproschema/models/tests/test_schema.py +++ b/reproschema/models/tests/test_schema.py @@ -1,20 +1,139 @@ -from .. import Protocol, Activity, Item - - -def test_constructors(): - Protocol() - Activity() - Item() - version = "1.0.0-rc2" - proto = Protocol(version=version) - assert proto.schema["schemaVersion"] == version - act = Activity(version) - assert act.schema["schemaVersion"] == version - item = Item(version) - assert item.schema["schemaVersion"] == version - - -def test_constructors_from_data(): - Protocol.from_data({"@type": "reproschema:Protocol"}) - Activity.from_data({"@type": "reproschema:Activity"}) - Item.from_data({"@type": "reproschema:Field"}) +from .. import Protocol, Activity, Item, ResponseOption +from ..utils import write_obj_jsonld +from ...utils import start_server, stop_server +from ...jsonldutils import load_file + +from pyld import jsonld +import json, os +from pathlib import Path + +import pytest + + +@pytest.fixture +def server_http_kwargs(request): + http_kwargs = {} + stop, port = start_server() + http_kwargs["port"] = port + + olddir = os.getcwd() + os.chdir(os.path.dirname(__file__)) + + def stoping_server(): + stop_server(stop) + os.chdir(olddir) + + request.addfinalizer(stoping_server) + return http_kwargs + + +@pytest.mark.parametrize("model_class", [Protocol, Activity, Item, ResponseOption]) +def test_constructors(model_class): + ob = model_class() + assert hasattr(ob, "id") + assert hasattr(ob, "category") + + +def test_protocol(tmp_path, server_http_kwargs): + """check if protocol is created correctly for a simple example + and if it can be written to the file as jsonld. + """ + protocol_dict = { + "category": "reproschema:Protocol", + "id": "protocol1.jsonld", + "prefLabel": {"en": "Protocol1", "es": "Protocol1_es"}, + "description": {"en": "example Protocol"}, + "schemaVersion": "1.0.0-rc4", + "version": "0.0.1", + "messages": [ + { + "message": { + "en": "Test message: Triggered when item1 value is greater than 0" + }, + "jsExpression": "item1 > 0", + } + ], + } + protocol_obj = Protocol(**protocol_dict) + + file_path = tmp_path / "protocol1.jsonld" + write_obj_jsonld(protocol_obj, file_path) + data = load_file(file_path, started=True, http_kwargs=server_http_kwargs) + expanded = jsonld.expand(data) + assert len(expanded) > 0 + + +def test_activity(tmp_path, server_http_kwargs): + """check if activity is created correctly for a simple example + and if it can be written to the file as jsonld.""" + activity_dict = { + "category": "reproschema:Activity", + "id": "activity1.jsonld", + "prefLabel": {"en": "Example 1"}, + "description": {"en": "Activity example 1"}, + "schemaVersion": "1.0.0-rc4", + "version": "0.0.1", + "image": { + "category": "AudioObject", + "contentUrl": "http://example.com/sample-image.png", + }, + "preamble": { + "en": "Over the last 2 weeks, how often have you been bothered by any of the following problems?", + "es": "Durante las últimas 2 semanas, ¿con qué frecuencia le han molestado los siguintes problemas?", + }, + "compute": [ + {"variableName": "activity1_total_score", "jsExpression": "item1 + item2"} + ], + } + activity_obj = Activity(**activity_dict) + + file_path = tmp_path / "activity1.jsonld" + write_obj_jsonld(activity_obj, file_path) + data = load_file(file_path, started=True, http_kwargs=server_http_kwargs) + expanded = jsonld.expand(data) + assert len(expanded) > 0 + + +def test_item(tmp_path, server_http_kwargs): + """check if item is created correctly for a simple example" + and if it can be written to the file as jsonld.""" + + item_dict = { + "category": "reproschema:Field", + "id": "item1.jsonld", + "prefLabel": {"en": "item1"}, + "altLabel": {"en": "item1_alt"}, + "description": {"en": "Q1 of example 1"}, + "schemaVersion": "1.0.0-rc4", + "version": "0.0.1", + "audio": { + "category": "AudioObject", + "contentUrl": "http://media.freesound.org/sample-file.mp4", + }, + "image": { + "category": "ImageObject", + "contentUrl": "http://example.com/sample-image.jpg", + }, + "question": { + "en": "Little interest or pleasure in doing things", + "es": "Poco interés o placer en hacer cosas", + }, + # "ui": {"inputType": "radio"}, + "responseOptions": { + "minValue": 0, + "maxValue": 3, + "multipleChoice": False, + "choices": [ + {"name": {"en": "Not at all", "es": "Para nada"}, "value": "a"}, + {"name": {"en": "Several days", "es": "Varios días"}, "value": "b"}, + ], + }, + } + + item_obj = Item(**item_dict) + + file_path = tmp_path / "item1.jsonld" + write_obj_jsonld(item_obj, file_path) + data = load_file(file_path, started=True, http_kwargs=server_http_kwargs) + expanded = jsonld.expand(data) + assert len(expanded) > 0 diff --git a/reproschema/models/utils.py b/reproschema/models/utils.py index 745ec4a..e6452df 100644 --- a/reproschema/models/utils.py +++ b/reproschema/models/utils.py @@ -1,7 +1,8 @@ import json -from . import Protocol, Activity, Item +from .model import Protocol, Activity, Item, ResponseOption +# TODO: where can we be used? def load_schema(filepath): with open(filepath) as fp: data = json.load(fp) @@ -14,3 +15,16 @@ def load_schema(filepath): return Activity.from_data(data) if schema_type == "reproschema:Item": return Item.from_data(data) + + +def write_obj_jsonld(model_obj, path): + """Write a pydantic model object to a jsonld file.""" + contextfile = "https://raw.githubusercontent.com/djarecka/reproschema/linkml_new_tmp/contexts/reproschema_new" + model_dict = model_obj.model_dump( + exclude_unset=True, + ) + model_dict["@context"] = contextfile + + with open(path, "w") as f: + json.dump(model_dict, f, indent=4) + return path diff --git a/reproschema/pdf2reproschema.py b/reproschema/pdf2reproschema.py new file mode 100644 index 0000000..63bc305 --- /dev/null +++ b/reproschema/pdf2reproschema.py @@ -0,0 +1,16 @@ +""" +Converts pdfs of questionnaires to ReproSchema activities. +""" + + +# parse pdf text +# LLM(parsed_text, pydantic_models) +# format validation +# lexical match +# feed back to LLM until converges +# evaluate +# if good, optionally subset +# push to UI for check + + +# modes: human-in-the-loop, maximally automated diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index f267a2f..cefb8a4 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -5,24 +5,49 @@ import re import yaml from bs4 import BeautifulSoup +from .models import Activity, Item, Protocol, write_obj_jsonld matrix_group_count = {} def clean_header(header): - return {k.lstrip("\ufeff"): v for k, v in header.items()} + cleaned_header = {} + for k, v in header.items(): + # Strip BOM, whitespace, and enclosing quotation marks if present + cleaned_key = k.lstrip("\ufeff").strip().strip('"') + cleaned_header[cleaned_key] = v + return cleaned_header def normalize_condition(condition_str): + # Regular expressions for various pattern replacements re_parentheses = re.compile(r"\(([0-9]*)\)") re_non_gt_lt_equal = re.compile(r"([^>|<])=") re_brackets = re.compile(r"\[([^\]]*)\]") + re_extra_spaces = re.compile(r"\s+") + re_double_quotes = re.compile(r'"') + re_or = re.compile(r"\bor\b") # Match 'or' as whole word + # Apply regex replacements condition_str = re_parentheses.sub(r"___\1", condition_str) condition_str = re_non_gt_lt_equal.sub(r"\1 ==", condition_str) - condition_str = condition_str.replace(" and ", " && ").replace(" or ", " || ") condition_str = re_brackets.sub(r" \1 ", condition_str) - return condition_str + + # Replace 'or' with '||', ensuring not to replace '||' + condition_str = re_or.sub("||", condition_str) + + # Replace 'and' with '&&' + condition_str = condition_str.replace(" and ", " && ") + + # Trim extra spaces and replace double quotes with single quotes + condition_str = re_extra_spaces.sub( + " ", condition_str + ).strip() # Reduce multiple spaces to a single space + condition_str = re_double_quotes.sub( + "'", condition_str + ) # Replace double quotes with single quotes + + return condition_str.strip() def process_visibility(data): @@ -42,7 +67,11 @@ def process_visibility(data): def parse_field_type_and_value(field, input_type_map): field_type = field.get("Field Type", "") - input_type = input_type_map.get(field_type, field_type) + # Check if field_type is 'yesno' and directly assign 'radio' as the input type + if field_type == "yesno": + input_type = "radio" # Directly set to 'radio' for 'yesno' fields + else: + input_type = input_type_map.get(field_type, field_type) # Original logic # Initialize the default value type as string value_type = "xsd:string" @@ -55,7 +84,8 @@ def parse_field_type_and_value(field, input_type_map): "time_": "xsd:time", "email": "xsd:string", "phone": "xsd:string", - } # todo: input_type="signature" + # No change needed here for 'yesno', as it's handled above + } # Get the validation type from the field, if available validation_type = field.get( @@ -91,27 +121,15 @@ def process_choices(field_type, choices_str): except ValueError: value = parts[0] - choice_obj = {"name": parts[1], "value": value} - if len(parts) == 3: - # Handle image url - choice_obj["schema:image"] = f"{parts[2]}.png" + choice_obj = {"name": {"en": " ".join(parts[1:])}, "value": value} + # remove image for now + # if len(parts) == 3: + # # Handle image url + # choice_obj["image"] = f"{parts[2]}.png" choices.append(choice_obj) return choices -def write_to_file(abs_folder_path, form_name, field_name, rowData): - file_path = os.path.join( - f"{abs_folder_path}", "activities", form_name, "items", f"{field_name}" - ) - os.makedirs(os.path.dirname(file_path), exist_ok=True) - try: - with open(file_path, "w") as file: - json.dump(rowData, file, indent=4) - print(f"Item schema for {form_name} written successfully.") - except Exception as e: - print(f"Error in writing item schema for {form_name}: {e}") - - def parse_html(input_string, default_language="en"): result = {} soup = BeautifulSoup(input_string, "html.parser") @@ -144,6 +162,7 @@ def process_row( response_list, additional_notes_list, ): + """Process a row of the REDCap data and generate the jsonld file for the item.""" global matrix_group_count matrix_group_name = field.get("Matrix Group Name", "") if matrix_group_name: @@ -155,11 +174,10 @@ def process_row( item_id = field.get("Variable / Field Name", "") rowData = { - "@context": schema_context_url, - "@type": "reproschema:Field", - "@id": item_id, - "prefLabel": item_id, - "description": f"{item_id} of {form_name}", + "category": "reproschema:Item", + "id": item_id, + "prefLabel": {"en": item_id}, + "description": {"en": f"{item_id} of {form_name}"}, } field_type = field.get("Field Type", "") @@ -179,10 +197,7 @@ def process_row( } for key, value in field.items(): - if ( - schema_map.get(key) in ["question", "schema:description", "preamble"] - and value - ): + if schema_map.get(key) in ["question", "description", "preamble"] and value: rowData.update({schema_map[key]: parse_html(value)}) elif schema_map.get(key) == "allow" and value: @@ -214,32 +229,35 @@ def process_row( } ) - elif schema_map.get(key) == "visibility" and value: - condition = normalize_condition(value) - rowData.setdefault("visibility", []).append( - {"variableName": field["Variable / Field Name"], "isVis": condition} - ) - - elif key == "Identifier?" and value: - identifier_val = value.lower() == "y" - rowData.update( - { - schema_map[key]: [ - {"legalStandard": "unknown", "isIdentifier": identifier_val} - ] - } - ) + # elif key == "Identifier?" and value: + # identifier_val = value.lower() == "y" + # rowData.update( + # { + # schema_map[key]: [ + # {"legalStandard": "unknown", "isIdentifier": identifier_val} + # ] + # } + # ) elif key in additional_notes_list and value: notes_obj = {"source": "redcap", "column": key, "value": value} rowData.setdefault("additionalNotesObj", []).append(notes_obj) - write_to_file(abs_folder_path, form_name, field["Variable / Field Name"], rowData) + it = Item(**rowData) + file_path_item = os.path.join( + f"{abs_folder_path}", + "activities", + form_name, + "items", + f'{field["Variable / Field Name"]}', + ) + write_obj_jsonld(it, file_path_item) def create_form_schema( abs_folder_path, schema_context_url, + redcap_version, form_name, activity_display_name, activity_description, @@ -248,42 +266,37 @@ def create_form_schema( matrix_list, scores_list, ): + """Create the JSON-LD schema for the Activity.""" # Use a set to track unique items and preserve order unique_order = list(dict.fromkeys(order.get(form_name, []))) # Construct the JSON-LD structure json_ld = { - "@context": schema_context_url, - "@type": "reproschema:Activity", - "@id": f"{form_name}_schema", - "prefLabel": activity_display_name, - "description": activity_description, + "category": "reproschema:Activity", + "id": f"{form_name}_schema", + "prefLabel": {"en": activity_display_name}, + "description": {"en": activity_description}, "schemaVersion": "1.0.0-rc4", - "version": "0.0.1", + "version": redcap_version, "ui": { "order": unique_order, "addProperties": bl_list, "shuffle": False, }, } - - if matrix_list: - json_ld["matrixInfo"] = matrix_list + act = Activity(**json_ld) + # remove matrixInfo to pass validataion + # if matrix_list: + # json_ld["matrixInfo"] = matrix_list if scores_list: json_ld["scoringLogic"] = scores_list path = os.path.join(f"{abs_folder_path}", "activities", form_name) + os.makedirs(path, exist_ok=True) filename = f"{form_name}_schema" file_path = os.path.join(path, filename) - try: - os.makedirs(path, exist_ok=True) - with open(file_path, "w") as file: - json.dump(json_ld, file, indent=4) - print(f"{form_name} Instrument schema created") - except OSError as e: - print(f"Error creating directory {path}: {e}") - except IOError as e: - print(f"Error writing to file {file_path}: {e}") + write_obj_jsonld(act, file_path) + print(f"{form_name} Instrument schema created") def process_activities(activity_name, protocol_visibility_obj, protocol_order): @@ -296,6 +309,7 @@ def process_activities(activity_name, protocol_visibility_obj, protocol_order): def create_protocol_schema( abs_folder_path, schema_context_url, + redcap_version, protocol_name, protocol_display_name, protocol_description, @@ -304,48 +318,44 @@ def create_protocol_schema( ): # Construct the protocol schema protocol_schema = { - "@context": schema_context_url, - "@type": "reproschema:Protocol", - "@id": f"{protocol_name}_schema", - "skos:prefLabel": protocol_display_name, - "skos:altLabel": f"{protocol_name}_schema", - "schema:description": protocol_description, - "schema:schemaVersion": "1.0.0-rc4", - "schema:version": "0.0.1", + "category": "reproschema:Protocol", + "id": f"{protocol_name}_schema", + "prefLabel": {"en": protocol_display_name}, + "altLabel": {"en": f"{protocol_name}_schema"}, + "description": {"en": protocol_description}, + "schemaVersion": "1.0.0-rc4", + "version": redcap_version, "ui": { "addProperties": [], - "order": protocol_order, + "order": [], "shuffle": False, }, } # Populate addProperties list for activity in protocol_order: + full_path = f"../activities/{activity}/{activity}_schema" add_property = { - "isAbout": f"../activities/{activity}/{activity}_schema", + "isAbout": full_path, "variableName": f"{activity}_schema", # Assuming activity name as prefLabel, update as needed - "prefLabel": activity.replace("_", " ").title(), + "prefLabel": {"en": activity.replace("_", " ").title()}, + "isVis": protocol_visibility_obj.get( + activity, True + ), # Default to True if not specified } protocol_schema["ui"]["addProperties"].append(add_property) + # Add the full path to the order list + protocol_schema["ui"]["order"].append(full_path) - # Add visibility if needed - if protocol_visibility_obj: - protocol_schema["ui"]["visibility"] = protocol_visibility_obj - + prot = Protocol(**protocol_schema) + # Write the protocol schema to file protocol_dir = f"{abs_folder_path}/{protocol_name}" + os.makedirs(protocol_dir, exist_ok=True) schema_file = f"{protocol_name}_schema" file_path = os.path.join(protocol_dir, schema_file) - - try: - os.makedirs(protocol_dir, exist_ok=True) - with open(file_path, "w") as file: - json.dump(protocol_schema, file, indent=4) - print("Protocol schema created") - except OSError as e: - print(f"Error creating directory {protocol_dir}: {e}") - except IOError as e: - print(f"Error writing to file {file_path}: {e}") + write_obj_jsonld(prot, file_path) + print("Protocol schema created") def parse_language_iso_codes(input_string): @@ -388,6 +398,7 @@ def process_csv( for field in datas[form_name]: field_name = field["Variable / Field Name"] order[form_name].append(f"items/{field_name}") + print("Processing field: ", field_name, " in form: ", form_name) process_row( abs_folder_path, schema_context_url, @@ -420,6 +431,8 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): protocol_name = protocol.get("protocol_name") protocol_display_name = protocol.get("protocol_display_name") protocol_description = protocol.get("protocol_description") + redcap_version = protocol.get("redcap_version") + # we can add reproschema version here (or automatically extract) if not protocol_name: raise ValueError("Protocol name not specified in the YAML file.") @@ -434,7 +447,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): abs_folder_path = os.path.abspath(protocol_name) if schema_context_url is None: - schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic" + schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/efb74e155c09e13aa009ea04609ba4f1152fcbc6/contexts/reproschema_new" # Initialize variables schema_map = { @@ -451,7 +464,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): "Choices, Calculations, OR Slider Labels": "choices", # column F "Branching Logic (Show field only if...)": "visibility", # column L "Custom Alignment": "customAlignment", # column N - "Identifier?": "identifiable", # column K + # "Identifier?": "identifiable", # column K "multipleChoice": "multipleChoice", "responseType": "@type", } @@ -515,6 +528,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): create_form_schema( abs_folder_path, schema_context_url, + redcap_version, form_name, activity_display_name, activity_description, @@ -530,6 +544,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): create_protocol_schema( abs_folder_path, schema_context_url, + redcap_version, protocol_name, protocol_display_name, protocol_description, diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py index 3d03cf3..298c56e 100644 --- a/reproschema/reproschema2redcap.py +++ b/reproschema/reproschema2redcap.py @@ -142,6 +142,7 @@ def get_csv_data(dir_path): if protocol_dir.is_dir(): # Check for a _schema file in each directory schema_file = next(protocol_dir.glob("*_schema"), None) + print(f"Found schema file: {schema_file}") if schema_file: # Process the found _schema file parsed_protocol_json = read_json_file(schema_file) @@ -152,8 +153,14 @@ def get_csv_data(dir_path): normalized_relative_path = Path( relative_activity_path.lstrip("../") ) - activity_path = dir_path / normalized_relative_path - print(f"Processing activity {activity_path}") + + activity_path = ( + dir_path + / "activities" + / normalized_relative_path + / (normalized_relative_path.name + "_schema") + ) + parsed_activity_json = read_json_file(activity_path) if parsed_activity_json: diff --git a/reproschema/tests/test_redcap2reproschema.py b/reproschema/tests/test_redcap2reproschema.py index 2386a8c..ac24cf5 100644 --- a/reproschema/tests/test_redcap2reproschema.py +++ b/reproschema/tests/test_redcap2reproschema.py @@ -1,10 +1,10 @@ import os import shutil import pytest +import yaml from click.testing import CliRunner -from ..cli import main # Import the Click group +from ..cli import main -# Assuming your test files are located in a 'tests' directory CSV_FILE_NAME = "redcap_dict.csv" YAML_FILE_NAME = "redcap2rs.yaml" CSV_TEST_FILE = os.path.join( @@ -15,17 +15,34 @@ ) -def test_redcap2reproschema_success(): +def test_redcap2reproschema(tmpdir): runner = CliRunner() - with runner.isolated_filesystem(): - # Copy the test files to the isolated filesystem - shutil.copy(CSV_TEST_FILE, CSV_FILE_NAME) - shutil.copy(YAML_TEST_FILE, YAML_FILE_NAME) + temp_csv_file = tmpdir.join(CSV_FILE_NAME) + temp_yaml_file = tmpdir.join(YAML_FILE_NAME) + + shutil.copy(CSV_TEST_FILE, str(temp_csv_file)) # Convert to string + shutil.copy(YAML_TEST_FILE, str(temp_yaml_file)) # Convert to string + print("tmpdir: ", tmpdir) + # Change the current working directory to tmpdir + with tmpdir.as_cwd(): + # Read YAML to find the expected output directory name + with open(str(temp_yaml_file), "r") as file: # Convert to string + protocol = yaml.safe_load(file) + protocol_name = protocol.get("protocol_name", "").replace(" ", "_") - # Run the command within the isolated filesystem result = runner.invoke( - main, ["redcap2reproschema", CSV_FILE_NAME, YAML_FILE_NAME] + main, + [ + "redcap2reproschema", + str(temp_csv_file), + str(temp_yaml_file), + ], # Convert to string ) - print(result.output) - assert result.exit_code == 0 + + assert ( + result.exit_code == 0 + ), f"The command failed to execute successfully: {result.output}" + assert os.path.isdir( + protocol_name + ), f"Expected output directory '{protocol_name}' does not exist" diff --git a/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml b/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml index 3330f3b..95d4a9c 100644 --- a/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml +++ b/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml @@ -9,13 +9,8 @@ protocol_name: "test_redcap2rs" # Example: "My_Protocol" # This name will be displayed in the application. protocol_display_name: "redcap protocols" -# GitHub Repository Information: -# Create a GitHub repository named 'reproschema' to store your reproschema protocols. -# Replace 'your_github_username' with your actual GitHub username. -user_name: "yibeichan" -repo_name: "redcap2reproschema" # Recommended name; can be different if preferred. -repo_url: "https://github.com/{{user_name}}/{{repo_name}}" - # Protocol Description: # Provide a brief description of your protocol. protocol_description: "testing" # Example: "This protocol is for ..." + +redcap_version: "3.0.0" diff --git a/reproschema/tests/test_reproschema2redcap.py b/reproschema/tests/test_reproschema2redcap.py index f0a02ce..eff26b3 100644 --- a/reproschema/tests/test_reproschema2redcap.py +++ b/reproschema/tests/test_reproschema2redcap.py @@ -2,46 +2,41 @@ import pytest from click.testing import CliRunner from ..cli import main -from shutil import copytree +from shutil import copytree, rmtree from pathlib import Path import csv -def test_reproschema2redcap_success(): +def test_reproschema2redcap(tmpdir): runner = CliRunner() with runner.isolated_filesystem(): # Copy necessary test data into the isolated filesystem original_data_dir = os.path.join( - os.path.dirname(__file__), "test_rs2redcap_data" + os.path.dirname(__file__), "test_rs2redcap_data", "test_redcap2rs" ) copytree(original_data_dir, "input_data") - input_path = Path("input_data") # Using Path object - output_csv_path = "output.csv" + input_path = Path("input_data") + output_csv_path = os.path.join(tmpdir, "output.csv") - # Invoke the reproschema2redcap command result = runner.invoke( main, ["reproschema2redcap", str(input_path), output_csv_path] ) - # Print the output for debugging print(result.output) - # Assert the expected outcomes assert result.exit_code == 0 - # Check if the output CSV file has been created assert os.path.exists(output_csv_path) - # Read and print the contents of the CSV file with open(output_csv_path, "r", encoding="utf-8") as csv_file: reader = csv.reader(csv_file) csv_contents = list(reader) - print("CSV File Contents:") - for row in csv_contents: - print(row) - # Optionally, assert conditions about the CSV contents - # For example, assert that the file is not empty - assert len(csv_contents) > 0 + assert ( + len(csv_contents) > 1 + ) # More than one row indicates content beyond headers + + # Clean up temporary directory after use (optional) + # rmtree(tmpdir) diff --git a/reproschema/tests/test_validate.py b/reproschema/tests/test_validate.py index 96e40db..43c4d08 100644 --- a/reproschema/tests/test_validate.py +++ b/reproschema/tests/test_validate.py @@ -5,15 +5,15 @@ def test_validate(): os.chdir(os.path.dirname(__file__)) - assert validate_dir("data", os.path.abspath("reproschema-shacl.ttl")) + assert validate_dir("data") def test_type_error(): os.chdir(os.path.dirname(__file__)) with pytest.raises(ValueError): - validate_dir("contexts", os.path.abspath("reproschema-shacl.ttl")) + validate_dir("contexts") def test_url(): url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc1/examples/activities/activity1.jsonld" - assert validate(os.path.abspath("reproschema-shacl.ttl"), url) + assert validate(url) diff --git a/reproschema/utils.py b/reproschema/utils.py index 2f85d1a..d08ba87 100644 --- a/reproschema/utils.py +++ b/reproschema/utils.py @@ -4,6 +4,7 @@ from tempfile import mkdtemp import requests import requests_cache +from copy import deepcopy from . import get_logger @@ -52,3 +53,105 @@ def start_server(port=8000, path=None, tmpdir=None): def stop_server(stop): stop() requests_cache.clear() + + +# items that have to be fixed in the old schema +LANG_FIX = [ + "http://schema.org/schemaVersion", + "http://schema.org/version", + "http://schema.repronim.org/limit", + "http://schema.repronim.org/randomMaxDelay", + "http://schema.org/inLanguage", + "http://schema.repronim.org/schedule", +] +BOOL_FIX = [ + "http://schema.repronim.org/shuffle", + "http://schema.org/readonlyValue", + "http://schema.repronim.org/multipleChoice", + "http://schema.org/valueRequired", +] + +ALLOWTYPE_FIX = ["http://schema.repronim.org/allow"] +ALLOWTYPE_MAPPING = { + "http://schema.repronim.org/Skipped": "http://schema.repronim.org/AllowSkip", + "http://schema.repronim.org/DontKnow": "http://schema.repronim.org/AllowAltResponse", +} + +IMAGE_FIX = ["http://schema.org/image"] + + +def _lang_fix(data_el): + if isinstance(data_el, dict): + data_el.pop("@language", None) + elif isinstance(data_el, list) and len(data_el) == 1: + data_el = data_el[0] + data_el.pop("@language", None) + else: + raise Exception(f"expected a list or dictionary, got {data_el}") + return data_el + + +def _image_fix(data_el): + if isinstance(data_el, dict): + if "@id" not in data_el and "@value" in data_el: + data_el["@id"] = data_el.pop("@value") + data_el.pop("@language", None) + elif isinstance(data_el, list) and len(data_el) == 1: + data_el = data_el[0] + data_el = _image_fix(data_el) + else: + raise Exception(f"expected a list or dictionary, got {data_el}") + return data_el + + +def _bool_fix(data_el): + if isinstance(data_el, dict): + data_el["@type"] = "http://www.w3.org/2001/XMLSchema#boolean" + elif isinstance(data_el, list): + for el in data_el: + _bool_fix(el) + else: + raise Exception(f"expected a list or dictionary, got {data_el}") + + +def _allowtype_fix(data_el): + if isinstance(data_el, dict): + if data_el["@id"] in ALLOWTYPE_MAPPING: + data_el["@id"] = ALLOWTYPE_MAPPING[data_el["@id"]] + elif isinstance(data_el, list): + for el in data_el: + _allowtype_fix(el) + else: + raise Exception(f"expected a list or dictionary, got {data_el}") + + +def fixing_old_schema(data, copy_data=False): + """Fixes the old schema so it can be load to teh new model""" + if copy_data: + data = deepcopy(data) + for key, val in data.items(): + if key in LANG_FIX: + data[key] = _lang_fix(val) + elif key in BOOL_FIX: + _bool_fix(val) + elif key in ALLOWTYPE_FIX: + _allowtype_fix(val) + elif key in IMAGE_FIX: + data[key] = _image_fix(val) + elif isinstance(val, (str, bool, int, float)): + pass + elif isinstance(val, dict): + fix_rec(val) + elif isinstance(val, list): + for el in val: + if isinstance(el, (str, bool, int, float)): + pass + elif isinstance(el, dict): + fixing_old_schema(el) + else: + raise Exception( + f"expected a list, str, bool or numerics, got {data_el}" + ) + else: + raise Exception(f"type {type(val)} not supported yet") + return data diff --git a/reproschema/validate.py b/reproschema/validate.py index 64b612e..a4f7fec 100644 --- a/reproschema/validate.py +++ b/reproschema/validate.py @@ -3,8 +3,8 @@ from .jsonldutils import load_file, validate_data -def validate_dir(directory, shape_file, started=False, http_kwargs={}): - """Validate a directory containing JSONLD documents +def validate_dir(directory, started=False, http_kwargs={}): + """Validate a directory containing JSONLD documents against the ReproSchema pydantic model. .. warning:: This assumes every file in the directory can be read by a json parser. @@ -12,8 +12,6 @@ def validate_dir(directory, shape_file, started=False, http_kwargs={}): ---------- directory: str Path to directory to walk for validation - shape_file: str - Path containing validation SHACL shape files started : bool Whether an http server exists or not http_kwargs : dict @@ -27,6 +25,9 @@ def validate_dir(directory, shape_file, started=False, http_kwargs={}): if any document is non-conformant. """ + if not os.path.isdir(directory): + raise Exception(f"{directory} is not a directory") + print(f"Validating directory {directory}") stop = None if not started: stop, port = start_server(**http_kwargs) @@ -41,7 +42,8 @@ def validate_dir(directory, shape_file, started=False, http_kwargs={}): data = load_file(full_file_name, started=True, http_kwargs=http_kwargs) if len(data) == 0: raise ValueError("Empty data graph") - conforms, vtext = validate_data(data, shape_file) + print(f"Validating {full_file_name}") + conforms, vtext = validate_data(data) except (ValueError,): if stop is not None: stop_server(stop) @@ -57,13 +59,11 @@ def validate_dir(directory, shape_file, started=False, http_kwargs={}): return True -def validate(shapefile, path): +def validate(path): """Helper function to validate directory or path Parameters ---------- - shapefile : path-like - Path to folder or file containing ReproSchema SHACL descriptors path : path-like Path to folder or file containing JSONLD documents. @@ -74,15 +74,11 @@ def validate(shapefile, path): exception. """ - if shapefile is None: - shapefile = os.path.join( - os.path.dirname(__file__), "tests", "reproschema-shacl.ttl" - ) if os.path.isdir(path): - conforms = validate_dir(path, shapefile) + conforms = validate_dir(path) else: data = load_file(path, started=False) - conforms, vtext = validate_data(data, shapefile) + conforms, vtext = validate_data(data) if not conforms: lgr.critical(f"File {path} has validation errors.") raise ValueError(vtext) diff --git a/templates/redcap2rs.yaml b/templates/redcap2rs.yaml index 1e1dbc3..4bbf78f 100644 --- a/templates/redcap2rs.yaml +++ b/templates/redcap2rs.yaml @@ -12,3 +12,5 @@ protocol_display_name: "Your protocol display name" # Protocol Description: # Provide a brief description of your protocol. protocol_description: "Description for your protocol" # Example: "This protocol is for ..." + +redcap_version: "x.y.z" # Example: "3.0.0"