diff --git a/reproschema/cli.py b/reproschema/cli.py
index e31b03e..685af39 100644
--- a/reproschema/cli.py
+++ b/reproschema/cli.py
@@ -42,14 +42,15 @@ def main(log_level):
@main.command()
-@click.option("--shapefile", default=None, type=click.Path(exists=True, dir_okay=False))
@click.argument("path", nargs=1, type=str)
-def validate(shapefile, path):
+def validate(path):
if not (path.startswith("http") or os.path.exists(path)):
raise ValueError(f"{path} must be a URL or an existing file or directory")
from .validate import validate
- validate(shapefile, path)
+ result = validate(path)
+ if result:
+ click.echo("Validation successful")
@main.command()
diff --git a/reproschema/jsonldutils.py b/reproschema/jsonldutils.py
index f88f38f..c82f0d2 100644
--- a/reproschema/jsonldutils.py
+++ b/reproschema/jsonldutils.py
@@ -1,77 +1,112 @@
from pyld import jsonld
-from pyshacl import validate as shacl_validate
import json
import os
-from .utils import start_server, stop_server, lgr
+from pathlib import Path
+from copy import deepcopy
+from urllib.parse import urlparse
+from .utils import start_server, stop_server, lgr, fixing_old_schema
+from .models import Item, Activity, Protocol, ResponseOption, ResponseActivity, Response
+
+
+def _is_url(path):
+ """
+ Determine whether the given path is a URL.
+ """
+ parsed = urlparse(path)
+ return parsed.scheme in ("http", "https", "ftp", "ftps")
+
+
+def _is_file(path):
+ """
+ Determine whether the given path is a valid file path.
+ """
+ return os.path.isfile(path)
def load_file(path_or_url, started=False, http_kwargs={}):
- try:
+ """Load a file or URL and return the expanded JSON-LD data."""
+ path_or_url = str(path_or_url)
+ if _is_url(path_or_url):
data = jsonld.expand(path_or_url)
if len(data) == 1:
- if "@id" not in data[0]:
+ if "@id" not in data[0] and "id" not in data[0]:
data[0]["@id"] = path_or_url
- except jsonld.JsonLdError as e:
- if 'only "http" and "https"' in str(e):
- lgr.debug("Reloading with local server")
- root = os.path.dirname(path_or_url)
- if not started:
- stop, port = start_server(**http_kwargs)
- else:
- if "port" not in http_kwargs:
- raise KeyError("port key missing in http_kwargs")
- port = http_kwargs["port"]
- base_url = f"http://localhost:{port}/"
- if root:
- base_url += f"{root}/"
- with open(path_or_url) as json_file:
- data = json.load(json_file)
- try:
- data = jsonld.expand(data, options={"base": base_url})
- except:
- raise
- finally:
- if not started:
- stop_server(stop)
- if len(data) == 1:
- if "@id" not in data[0]:
- data[0]["@id"] = base_url + os.path.basename(path_or_url)
+ elif _is_file(path_or_url):
+ lgr.debug("Reloading with local server")
+ root = os.path.dirname(path_or_url)
+ if not started:
+ stop, port = start_server(**http_kwargs)
else:
+ if "port" not in http_kwargs:
+ raise KeyError("port key missing in http_kwargs")
+ port = http_kwargs["port"]
+ base_url = f"http://localhost:{port}/"
+ if root:
+ base_url += f"{root}/"
+ with open(path_or_url) as json_file:
+ data = json.load(json_file)
+ try:
+ data = jsonld.expand(data, options={"base": base_url})
+ except:
raise
+ finally:
+ if not started:
+ stop_server(stop)
+ if len(data) == 1:
+ if "@id" not in data[0] and "id" not in data[0]:
+ data[0]["@id"] = base_url + os.path.basename(path_or_url)
+ else:
+ raise Exception(f"{path_or_url} is not a valid URL or file path")
return data
-def validate_data(data, shape_file_path):
- """Validate an expanded jsonld document against a shape.
+def validate_data(data):
+ """Validate an expanded jsonld document against the pydantic model.
Parameters
----------
data : dict
Python dictionary containing JSONLD object
- shape_file_path : str
- SHACL file for the document
Returns
-------
conforms: bool
Whether the document is conformant with the shape
v_text: str
- Validation information returned by PySHACL
+ Validation errors if any returned by pydantic
"""
- kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"}
- normalized = jsonld.normalize(data, kwargs)
- data_file_format = "nquads"
- shape_file_format = "turtle"
- conforms, v_graph, v_text = shacl_validate(
- normalized,
- shacl_graph=shape_file_path,
- data_graph_format=data_file_format,
- shacl_graph_format=shape_file_format,
- inference="rdfs",
- debug=False,
- serialize_report_graph=True,
- )
+ # do we need it?
+ # kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"}
+ # normalized = jsonld.normalize(data, kwargs)
+ if data[0]["@type"][0] == "http://schema.repronim.org/Field":
+ obj_type = Item
+ elif data[0]["@type"][0] == "http://schema.repronim.org/ResponseOption":
+ obj_type = ResponseOption
+ elif data[0]["@type"][0] == "http://schema.repronim.org/Activity":
+ obj_type = Activity
+ elif data[0]["@type"][0] == "http://schema.repronim.org/Protocol":
+ obj_type = Protocol
+ elif data[0]["@type"][0] == "http://schema.repronim.org/ResponseActivity":
+ obj_type = ResponseActivity
+ elif data[0]["@type"][0] == "http://schema.repronim.org/Response":
+ obj_type = Response
+ else:
+ raise ValueError("Unknown type")
+ data_fixed = [fixing_old_schema(data[0], copy_data=True)]
+ # TODO: where should we load the context from?
+ contexfile = Path(__file__).resolve().parent / "models/reproschema"
+ with open(contexfile) as fp:
+ context = json.load(fp)
+ data_fixed_comp = jsonld.compact(data_fixed, context)
+ del data_fixed_comp["@context"]
+ conforms = False
+ v_text = ""
+ try:
+ obj_type(**data_fixed_comp)
+ conforms = True
+ except Exception as e:
+ v_text = str(e)
return conforms, v_text
diff --git a/reproschema/models/__init__.py b/reproschema/models/__init__.py
index 1c1a154..347ad42 100644
--- a/reproschema/models/__init__.py
+++ b/reproschema/models/__init__.py
@@ -1,3 +1,2 @@
-from .protocol import Protocol
-from .activity import Activity
-from .item import Item
+from .model import Activity, Item, Protocol, ResponseOption, ResponseActivity, Response
+from .utils import load_schema, write_obj_jsonld
diff --git a/reproschema/models/activity.py b/reproschema/models/activity.py
deleted file mode 100644
index 0ad79e2..0000000
--- a/reproschema/models/activity.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from .base import SchemaBase
-
-
-class Activity(SchemaBase):
- """
- class to deal with reproschema activities
- """
-
- schema_type = "reproschema:Activity"
-
- def __init__(self, version=None):
- super().__init__(version)
- self.schema["ui"] = {"shuffle": [], "order": [], "addProperties": []}
-
- def set_ui_shuffle(self, shuffle=False):
- self.schema["ui"]["shuffle"] = shuffle
-
- def set_URI(self, URI):
- self.URI = URI
-
- def get_URI(self):
- return self.URI
-
- # TODO
- # preamble
- # compute
- # citation
- # image
-
- def set_defaults(self, name):
- self._ReproschemaSchema__set_defaults(name) # this looks wrong
- self.set_ui_shuffle(False)
-
- def update_activity(self, item_info):
- # TODO
- # - remove the hard coding on visibility and valueRequired
-
- # update the content of the activity schema with new item
-
- item_info["URI"] = "items/" + item_info["name"]
-
- append_to_activity = {
- "variableName": item_info["name"],
- "isAbout": item_info["URI"],
- "isVis": item_info["visibility"],
- "valueRequired": False,
- }
-
- self.schema["ui"]["order"].append(item_info["URI"])
- self.schema["ui"]["addProperties"].append(append_to_activity)
-
- def sort(self):
- schema_order = [
- "@context",
- "@type",
- "@id",
- "prefLabel",
- "description",
- "schemaVersion",
- "version",
- "ui",
- ]
- self.sort_schema(schema_order)
-
- ui_order = ["shuffle", "order", "addProperties"]
- self.sort_ui(ui_order)
diff --git a/reproschema/models/base.py b/reproschema/models/base.py
deleted file mode 100644
index f3ecf93..0000000
--- a/reproschema/models/base.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import json
-import os
-
-
-class SchemaBase:
- """
- class to deal with reproschema schemas
- """
-
- schema_type = None
-
- def __init__(self, version):
- URL = "https://raw.githubusercontent.com/ReproNim/reproschema/"
- VERSION = version or "1.0.0-rc2"
-
- self.schema = {
- "@context": URL + VERSION + "/contexts/generic",
- "@type": self.schema_type,
- "schemaVersion": VERSION,
- "version": "0.0.1",
- }
-
- def set_filename(self, name):
- self.schema_file = name + "_schema"
- self.schema["@id"] = name + "_schema"
-
- def get_name(self):
- return self.schema_file.replace("_schema", "")
-
- def get_filename(self):
- return self.schema_file
-
- def set_pref_label(self, pref_label):
- self.schema["prefLabel"] = pref_label
-
- def set_description(self, description):
- self.schema["description"] = description
-
- def set_directory(self, output_directory):
- self.dir = output_directory
-
- def __set_defaults(self, name):
- self.set_filename(name)
- self.set_directory(name)
- self.set_pref_label(name.replace("_", " "))
- self.set_description(name.replace("_", " "))
-
- def sort_schema(self, schema_order):
- reordered_dict = {k: self.schema[k] for k in schema_order}
- self.schema = reordered_dict
-
- def sort_ui(self, ui_order):
- reordered_dict = {k: self.schema["ui"][k] for k in ui_order}
- self.schema["ui"] = reordered_dict
-
- def write(self, output_dir):
- with open(os.path.join(output_dir, self.schema_file), "w") as ff:
- json.dump(self.schema, ff, sort_keys=False, indent=4)
-
- @classmethod
- def from_data(cls, data):
- if cls.schema_type is None:
- raise ValueError("SchemaBase cannot be used to instantiate class")
- if cls.schema_type != data["@type"]:
- raise ValueError(f"Mismatch in type {data['@type']} != {cls.schema_type}")
- klass = cls()
- klass.schema = data
- return klass
-
- @classmethod
- def from_file(cls, filepath):
- with open(filepath) as fp:
- data = json.load(fp)
- if "@type" not in data:
- raise ValueError("Missing @type key")
- return cls.from_data(data)
diff --git a/reproschema/models/item.py b/reproschema/models/item.py
deleted file mode 100644
index fc08c70..0000000
--- a/reproschema/models/item.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from .base import SchemaBase
-
-
-class Item(SchemaBase):
- """
- class to deal with reproschema activities
- """
-
- schema_type = "reproschema:Field"
-
- def __init__(self, version=None):
- super().__init__(version)
- self.schema["ui"] = {"inputType": []}
- self.schema["question"] = {}
- self.schema["responseOptions"] = {}
- # default input type is "char"
- self.set_input_type_as_char()
-
- def set_URI(self, URI):
- self.URI = URI
-
- # TODO
- # image
- # readonlyValue
-
- def set_defaults(self, name):
- self._ReproschemaSchema__set_defaults(name) # this looks wrong
- self.schema_file = name
- self.schema["@id"] = name
- self.set_input_type_as_char()
-
- def set_question(self, question, lang="en"):
- self.schema["question"][lang] = question
-
- def set_input_type(self, input_type):
- self.schema["ui"]["inputType"] = input_type
-
- def set_response_options(self, response_options):
- self.schema["responseOptions"] = response_options
-
- """
-
- input types with different response choices
-
- """
-
- def set_input_type_as_radio(self, response_options):
- self.set_input_type("radio")
- self.set_response_options(response_options)
-
- def set_input_type_as_select(self, response_options):
- self.set_input_type("select")
- self.set_response_options(response_options)
-
- def set_input_type_as_slider(self):
- self.set_input_type_as_char() # until the slide item of the ui is fixed
- # self.set_input_type("slider")
- # self.set_response_options({"valueType": "xsd:string"})
-
- def set_input_type_as_language(self):
- URL = "https://raw.githubusercontent.com/ReproNim/reproschema/"
-
- self.set_input_type("selectLanguage")
-
- response_options = {
- "valueType": "xsd:string",
- "multipleChoice": True,
- "choices": URL + "master/resources/languages.json",
- }
- self.set_response_options(response_options)
-
- """
-
- input types with no response choice
-
- """
-
- def set_input_type_as_char(self):
- self.set_input_type("text")
- self.set_response_options({"valueType": "xsd:string"})
-
- def set_input_type_as_int(self):
- self.set_input_type("number")
- self.set_response_options({"valueType": "xsd:integer"})
-
- def set_input_type_as_float(self):
- self.set_input_type("float")
- self.set_response_options({"valueType": "xsd:float"})
-
- def set_input_type_as_time_range(self):
- self.set_input_type("timeRange")
- self.set_response_options({"valueType": "datetime"})
-
- def set_input_type_as_date(self):
- self.set_input_type("date")
- self.set_response_options({"valueType": "xsd:date"})
-
- """
-
- input types with no response choice but with some parameters
-
- """
-
- def set_input_type_as_multitext(self, max_length=300):
- self.set_input_type("text")
- self.set_response_options({"valueType": "xsd:string", "maxLength": max_length})
-
- # TODO
- # email: EmailInput/EmailInput.vue
- # audioCheck: AudioCheck/AudioCheck.vue
- # audioRecord: WebAudioRecord/Audio.vue
- # audioPassageRecord: WebAudioRecord/Audio.vue
- # audioImageRecord: WebAudioRecord/Audio.vue
- # audioRecordNumberTask: WebAudioRecord/Audio.vue
- # audioAutoRecord: AudioCheckRecord/AudioCheckRecord.vue
- # year: YearInput/YearInput.vue
- # selectCountry: SelectInput/SelectInput.vue
- # selectState: SelectInput/SelectInput.vue
- # documentUpload: DocumentUpload/DocumentUpload.vue
- # save: SaveData/SaveData.vue
- # static: Static/Static.vue
- # StaticReadOnly: Static/Static.vue
-
- def set_basic_response_type(self, response_type):
- # default (also valid for "char" input type)
- self.set_input_type_as_char()
-
- if response_type == "int":
- self.set_input_type_as_int()
-
- elif response_type == "float":
- self.set_input_type_as_float()
-
- elif response_type == "date":
- self.set_input_type_as_date()
-
- elif response_type == "time range":
- self.set_input_type_as_time_range()
-
- elif response_type == "language":
- self.set_input_type_as_language()
-
- def sort(self):
- schema_order = [
- "@context",
- "@type",
- "@id",
- "prefLabel",
- "description",
- "schemaVersion",
- "version",
- "ui",
- "question",
- "responseOptions",
- ]
- self.sort_schema(schema_order)
diff --git a/reproschema/model.py b/reproschema/models/model.py
similarity index 61%
rename from reproschema/model.py
rename to reproschema/models/model.py
index 5d8808e..9c387fa 100644
--- a/reproschema/model.py
+++ b/reproschema/models/model.py
@@ -1,21 +1,19 @@
from __future__ import annotations
from datetime import datetime, date
-from enum import Enum
-
from decimal import Decimal
-from typing import List, Dict, Optional, Any, Union
-from pydantic import BaseModel as BaseModel, ConfigDict, Field, field_validator
+from enum import Enum
import re
import sys
+from typing import Any, List, Literal, Dict, Optional, Union
+from pydantic.version import VERSION as PYDANTIC_VERSION
-if sys.version_info >= (3, 8):
- from typing import Literal
+if int(PYDANTIC_VERSION[0]) >= 2:
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
else:
- from typing_extensions import Literal
-
+ from pydantic import BaseModel, Field, validator
metamodel_version = "None"
-version = "None"
+version = "1.0.0"
class ConfiguredBaseModel(BaseModel):
@@ -25,8 +23,8 @@ class ConfiguredBaseModel(BaseModel):
extra="forbid",
arbitrary_types_allowed=True,
use_enum_values=True,
+ strict=False,
)
-
pass
@@ -56,7 +54,101 @@ class MissingType(str, Enum):
TimedOut = "reproschema:TimedOut"
-class AdditionalNoteObj(ConfiguredBaseModel):
+class Agent(ConfiguredBaseModel):
+ pass
+
+
+class Participant(Agent):
+ """
+ An Agent describing characteristics associated with a participant.
+ """
+
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ subject_id: Optional[str] = Field(None)
+
+
+class Thing(ConfiguredBaseModel):
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
+
+
+class Activity(Thing):
+ """
+ An assessment in a protocol.
+ """
+
+ about: Optional[str] = Field(
+ None, description="""The subject matter of the Field."""
+ )
+ altLabel: Optional[Dict[str, str]] = Field(
+ default_factory=dict,
+ title="alternate label",
+ description="""The alternate label.""",
+ )
+ associatedMedia: Optional[str] = Field(
+ None,
+ title="associatedMedia",
+ description="""A media object that encodes this creative work. This property is a synonym for encoding.""",
+ )
+ citation: Optional[Dict[str, str]] = Field(default_factory=dict)
+ compute: Optional[List[ComputeSpecification]] = Field(
+ default_factory=list,
+ title="computation",
+ description="""An array of objects indicating computations in an activity or protocol and maps it to the corresponding Item. scoring logic is a subset of all computations that could be performed and not all computations will be scoring. For example, one may want to do conversion from one unit to another.""",
+ )
+ cronTable: Optional[str] = Field(
+ None, title="cronTable", description="""TODO not described in reproschema"""
+ )
+ description: Optional[Dict[str, str]] = Field(default_factory=dict)
+ image: Optional[Union[ImageObject, str]] = Field(
+ None,
+ title="image",
+ description="""An image of the item. This can be a URL or a fully described ImageObject.""",
+ )
+ messages: Optional[List[MessageSpecification]] = Field(
+ default_factory=list,
+ title="messages",
+ description="""An array of objects to define conditional messages in an activity or protocol.""",
+ )
+ preamble: Optional[Dict[str, str]] = Field(
+ default_factory=dict,
+ title="Preamble",
+ description="""The preamble for an assessment""",
+ )
+ prefLabel: Optional[Dict[str, str]] = Field(
+ default_factory=dict,
+ title="preferred label",
+ description="""The preferred label.""",
+ )
+ schemaVersion: Optional[str] = Field(None)
+ ui: Optional[UI] = Field(
+ None,
+ title="UI",
+ description="""An element to control UI specifications. Originally @nest in jsonld, but using a class in the model.""",
+ )
+ version: Optional[str] = Field(None)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
+
+
+class AdditionalNoteObj(Thing):
"""
A set of objects to define notes in a Item. For example, most Redcap and NDA data dictionaries have notes for each item which needs to be captured in reproschema
"""
@@ -78,9 +170,18 @@ class AdditionalNoteObj(ConfiguredBaseModel):
title="value",
description="""The value for each option in choices or in additionalNotesObj""",
)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class AdditionalProperty(ConfiguredBaseModel):
+class AdditionalProperty(Thing):
"""
An object to describe the various properties added to assessments and Items.
"""
@@ -136,13 +237,18 @@ class AdditionalProperty(ConfiguredBaseModel):
title="UI",
description="""An element to control UI specifications. Originally @nest in jsonld, but using a class in the model.""",
)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class Agent(ConfiguredBaseModel):
- None
-
-
-class Choice(ConfiguredBaseModel):
+class Choice(Thing):
"""
An object to describe a response option.
"""
@@ -154,15 +260,25 @@ class Choice(ConfiguredBaseModel):
description="""An image of the item. This can be a URL or a fully described ImageObject.""",
)
value: Optional[
- Union[Decimal, Dict[str, str], MissingType, StructuredValue, bool, str]
+ Union[
+ float, int, Decimal, Dict[str, str], MissingType, StructuredValue, bool, str
+ ]
] = Field(
None,
title="value",
description="""The value for each option in choices or in additionalNotesObj""",
)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class ComputeSpecification(ConfiguredBaseModel):
+class ComputeSpecification(Thing):
"""
An object to define computations in an activity or protocol.
"""
@@ -177,73 +293,18 @@ class ComputeSpecification(ConfiguredBaseModel):
title="variableName",
description="""The name used to represent an item.""",
)
-
-
-class CreativeWork(ConfiguredBaseModel):
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
-
-
-class Activity(CreativeWork):
- """
- An assessment in a protocol.
- """
-
- about: Optional[str] = Field(
- None, description="""The subject matter of the Field."""
- )
- altLabel: Optional[Dict[str, str]] = Field(
- default_factory=dict,
- title="alternate label",
- description="""The alternate label.""",
- )
- associatedMedia: Optional[str] = Field(
- None,
- title="associatedMedia",
- description="""A media object that encodes this CreativeWork. This property is a synonym for encoding.""",
- )
- citation: Optional[Dict[str, str]] = Field(default_factory=dict)
- compute: Optional[List[ComputeSpecification]] = Field(
- default_factory=list,
- title="computation",
- description="""An array of objects indicating computations in an activity or protocol and maps it to the corresponding Item. scoring logic is a subset of all computations that could be performed and not all computations will be scoring. For example, one may want to do conversion from one unit to another.""",
- )
- cronTable: Optional[str] = Field(
- None, title="cronTable", description="""TODO not described in reproschema"""
- )
- description: Optional[Dict[str, str]] = Field(default_factory=dict)
- image: Optional[Union[ImageObject, str]] = Field(
+ id: Optional[str] = Field(
None,
- title="image",
- description="""An image of the item. This can be a URL or a fully described ImageObject.""",
- )
- messages: Optional[List[MessageSpecification]] = Field(
- default_factory=list,
- title="messages",
- description="""An array of objects to define conditional messages in an activity or protocol.""",
- )
- preamble: Optional[Dict[str, str]] = Field(
- default_factory=dict,
- title="Preamble",
- description="""The preamble for an assessment""",
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
)
- prefLabel: Optional[Dict[str, str]] = Field(
- default_factory=dict,
- title="preferred label",
- description="""The preferred label.""",
- )
- schemaVersion: Optional[str] = Field(None)
- ui: Optional[UI] = Field(
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
None,
- title="UI",
- description="""An element to control UI specifications. Originally @nest in jsonld, but using a class in the model.""",
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
)
- version: Optional[str] = Field(None)
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
-class Item(CreativeWork):
+class Item(Thing):
"""
An item in an assessment.
"""
@@ -264,7 +325,7 @@ class Item(CreativeWork):
associatedMedia: Optional[str] = Field(
None,
title="associatedMedia",
- description="""A media object that encodes this CreativeWork. This property is a synonym for encoding.""",
+ description="""A media object that encodes this creative work. This property is a synonym for encoding.""",
)
audio: Optional[Union[AudioObject, str]] = Field(
None, title="audio", description="""TODO"""
@@ -275,9 +336,6 @@ class Item(CreativeWork):
title="image",
description="""An image of the item. This can be a URL or a fully described ImageObject.""",
)
- imageUrl: Optional[str] = Field(
- None, title="imageUrl", description="""An image url."""
- )
isPartOf: Optional[Activity] = Field(None)
preamble: Optional[Dict[str, str]] = Field(
default_factory=dict,
@@ -302,46 +360,82 @@ class Item(CreativeWork):
description="""An element to control UI specifications. Originally @nest in jsonld, but using a class in the model.""",
)
version: Optional[str] = Field(None)
- video: Optional[VideoObject] = Field(None)
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
+ video: Optional[Union[VideoObject, str]] = Field(None)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class LandingPage(ConfiguredBaseModel):
+class LandingPage(Thing):
"""
An object to define the landing page of a protocol.
"""
inLanguage: Optional[str] = Field(None)
- id: Optional[str] = Field(None)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class MediaObject(CreativeWork):
+class MediaObject(Thing):
"""
Add description
"""
- contentUrl: str = Field(...)
inLanguage: Optional[str] = Field(None)
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
+ contentUrl: str = Field(...)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
class AudioObject(MediaObject):
- contentUrl: str = Field(...)
inLanguage: Optional[str] = Field(None)
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
+ contentUrl: str = Field(...)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
class ImageObject(MediaObject):
- contentUrl: str = Field(...)
inLanguage: Optional[str] = Field(None)
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
+ contentUrl: str = Field(...)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class MessageSpecification(ConfiguredBaseModel):
+class MessageSpecification(Thing):
"""
An object to define messages in an activity or protocol.
"""
@@ -356,9 +450,18 @@ class MessageSpecification(ConfiguredBaseModel):
title="Message",
description="""The message to be conditionally displayed for an item.""",
)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class OverrideProperty(ConfiguredBaseModel):
+class OverrideProperty(Thing):
"""
An object to override the various properties added to assessments and Items.
"""
@@ -404,17 +507,18 @@ class OverrideProperty(ConfiguredBaseModel):
title="variableName",
description="""The name used to represent an item.""",
)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class Participant(Agent):
- """
- An Agent describing characteristics associated with a participant.
- """
-
- subject_id: Optional[str] = Field(None)
-
-
-class Protocol(CreativeWork):
+class Protocol(Thing):
"""
A representation of a study which comprises one or more assessments.
"""
@@ -430,7 +534,7 @@ class Protocol(CreativeWork):
associatedMedia: Optional[str] = Field(
None,
title="associatedMedia",
- description="""A media object that encodes this CreativeWork. This property is a synonym for encoding.""",
+ description="""A media object that encodes this creative work. This property is a synonym for encoding.""",
)
compute: Optional[List[ComputeSpecification]] = Field(
default_factory=list,
@@ -463,11 +567,18 @@ class Protocol(CreativeWork):
description="""An element to control UI specifications. Originally @nest in jsonld, but using a class in the model.""",
)
version: Optional[str] = Field(None)
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class Response(CreativeWork):
+class Response(Thing):
"""
Describes the response of an item.
"""
@@ -485,11 +596,18 @@ class Response(CreativeWork):
description="""The value for each option in choices or in additionalNotesObj""",
)
wasAttributedTo: Optional[Participant] = Field(None)
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class ResponseActivity(CreativeWork):
+class ResponseActivity(Thing):
"""
Captures information about some action that took place. It also links to information (entities) that were used during the activity
"""
@@ -500,11 +618,18 @@ class ResponseActivity(CreativeWork):
startedAtTime: Optional[datetime] = Field(None)
used: Optional[List[str]] = Field(default_factory=list)
wasAssociatedWith: Optional[SoftwareAgent] = Field(None)
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class ResponseOption(CreativeWork):
+class ResponseOption(Thing):
"""
An element (object or by URL)to describe the properties of response of the Item.
"""
@@ -531,31 +656,50 @@ class ResponseOption(CreativeWork):
title="unitOptions",
description="""A list of objects to represent a human displayable name alongside the more formal value for units.""",
)
- valueType: Optional[List[str]] = Field(
+ valueType: Optional[Union[str, List[str]]] = Field(
default_factory=list,
title="The type of the response",
description="""The type of the response of an item. For example, string, integer, etc.""",
)
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class SoftwareAgent(ConfiguredBaseModel):
+class SoftwareAgent(Thing):
"""
Captures information about some action that took place. It also links to information (entities) that were used during the activity
"""
version: Optional[str] = Field(None)
url: Optional[str] = Field(None)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
-class StructuredValue(CreativeWork):
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
-
-
-class Thing(ConfiguredBaseModel):
- None
+class StructuredValue(Thing):
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
class UI(ConfiguredBaseModel):
@@ -596,7 +740,7 @@ class UI(ConfiguredBaseModel):
readonlyValue: Optional[bool] = Field(None)
-class UnitOption(ConfiguredBaseModel):
+class UnitOption(Thing):
"""
An object to represent a human displayable name alongside the more formal value for units.
"""
@@ -611,24 +755,41 @@ class UnitOption(ConfiguredBaseModel):
title="value",
description="""The value for each option in choices or in additionalNotesObj""",
)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
class VideoObject(MediaObject):
- contentUrl: str = Field(...)
inLanguage: Optional[str] = Field(None)
- id: Optional[str] = Field(None)
- category: Optional[str] = Field(None)
+ contentUrl: str = Field(...)
+ id: Optional[str] = Field(
+ None,
+ description="""A unique identifier for an entity. Must be either a CURIE shorthand for a URI or a complete URI.""",
+ )
+ name: Optional[Dict[str, str]] = Field(default_factory=dict)
+ category: Optional[str] = Field(
+ None,
+ description="""Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In an RDF database it should be a model class URI. This field is multi-valued.""",
+ )
# Model rebuild
# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model
+Agent.model_rebuild()
+Participant.model_rebuild()
+Thing.model_rebuild()
+Activity.model_rebuild()
AdditionalNoteObj.model_rebuild()
AdditionalProperty.model_rebuild()
-Agent.model_rebuild()
Choice.model_rebuild()
ComputeSpecification.model_rebuild()
-CreativeWork.model_rebuild()
-Activity.model_rebuild()
Item.model_rebuild()
LandingPage.model_rebuild()
MediaObject.model_rebuild()
@@ -636,14 +797,12 @@ class VideoObject(MediaObject):
ImageObject.model_rebuild()
MessageSpecification.model_rebuild()
OverrideProperty.model_rebuild()
-Participant.model_rebuild()
Protocol.model_rebuild()
Response.model_rebuild()
ResponseActivity.model_rebuild()
ResponseOption.model_rebuild()
SoftwareAgent.model_rebuild()
StructuredValue.model_rebuild()
-Thing.model_rebuild()
UI.model_rebuild()
UnitOption.model_rebuild()
VideoObject.model_rebuild()
diff --git a/reproschema/models/protocol.py b/reproschema/models/protocol.py
deleted file mode 100644
index 10fa951..0000000
--- a/reproschema/models/protocol.py
+++ /dev/null
@@ -1,77 +0,0 @@
-from .base import SchemaBase
-
-
-class Protocol(SchemaBase):
- """
- class to deal with reproschema protocols
- """
-
- schema_type = "reproschema:Protocol"
-
- def __init__(self, version=None):
- super().__init__(version)
- self.schema["ui"] = {
- "allow": [],
- "shuffle": [],
- "order": [],
- "addProperties": [],
- }
-
- def set_landing_page(self, landing_page_url, lang="en"):
- self.schema["landingPage"] = {"@id": landing_page_url, "inLanguage": lang}
-
- # TODO
- # def add_landing_page(self, landing_page_url, lang="en"):
- # preamble
- # compute
-
- def set_image(self, image_url):
- self.schema["image"] = image_url
-
- def set_ui_allow(self):
- self.schema["ui"]["allow"] = [
- "reproschema:AutoAdvance",
- "reproschema:AllowExport",
- ]
-
- def set_ui_shuffle(self, shuffle=False):
- self.schema["ui"]["shuffle"] = shuffle
-
- def set_defaults(self, name):
- self._ReproschemaSchema__set_defaults(name) # this looks wrong
- self.set_landing_page("../../README-en.md")
- self.set_ui_allow()
- self.set_ui_shuffle(False)
-
- def append_activity(self, activity):
- # TODO
- # - remove the hard coding on visibility and valueRequired
-
- # update the content of the protocol with this new activity
- append_to_protocol = {
- "variableName": activity.get_name(),
- "isAbout": activity.get_URI(),
- "prefLabel": {"en": activity.schema["prefLabel"]},
- "isVis": True,
- "valueRequired": False,
- }
-
- self.schema["ui"]["order"].append(activity.URI)
- self.schema["ui"]["addProperties"].append(append_to_protocol)
-
- def sort(self):
- schema_order = [
- "@context",
- "@type",
- "@id",
- "prefLabel",
- "description",
- "schemaVersion",
- "version",
- "landingPage",
- "ui",
- ]
- self.sort_schema(schema_order)
-
- ui_order = ["allow", "shuffle", "order", "addProperties"]
- self.sort_ui(ui_order)
diff --git a/reproschema/models/reproschema b/reproschema/models/reproschema
new file mode 100644
index 0000000..1d8e6dd
--- /dev/null
+++ b/reproschema/models/reproschema
@@ -0,0 +1,252 @@
+{
+ "comments": {
+ "description": "Auto generated by LinkML jsonld context generator",
+ "generation_date": "2024-02-16T13:37:16",
+ "source": "reproschema.yaml"
+ },
+ "@context": {
+ "linkml": "https://w3id.org/linkml/",
+ "nidm": "http://purl.org/nidash/nidm#",
+ "owl": "http://www.w3.org/2002/07/owl#",
+ "prov": "http://www.w3.org/ns/prov#",
+ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+ "reproschema": "http://schema.repronim.org/",
+ "schema": "http://schema.org/",
+ "skos": "http://www.w3.org/2004/02/skos/core#",
+ "xml": {
+ "@id": "http://www.w3.org/XML/1998/namespace",
+ "@prefix": true
+ },
+ "xsd": "http://www.w3.org/2001/XMLSchema#",
+ "@vocab": "http://schema.repronim.org/",
+ "@version": 1.1,
+ "@language": "en",
+ "id": "@id",
+ "category": "@type",
+ "ui": "@nest",
+ "about": {
+ "@id": "schema:about"
+ },
+ "addProperties": {
+ "@container": "@set",
+ "@nest": "ui"
+ },
+ "additionalNotesObj": {
+ "@container": "@set"
+ },
+ "allow": {
+ "@type": "@id",
+ "@container": "@set",
+ "@nest": "ui"
+ },
+ "altLabel": {
+ "@id": "skos:altLabel",
+ "@container": "@language"
+ },
+ "associatedMedia": {
+ "@id": "schema:associatedMedia"
+ },
+ "audio": {
+ "@type": "@id",
+ "@id": "schema:audio"
+ },
+ "choices": {
+ "@container": "@set"
+ },
+ "citation": {
+ "@id": "schema:citation",
+ "@container": "@language"
+ },
+ "column": {
+ "@type": "xsd:string"
+ },
+ "compute": {
+ "@id": "reproschema:compute",
+ "@container": "@set"
+ },
+ "contentUrl": {
+ "@type": "@id",
+ "@id": "schema:contentUrl"
+ },
+ "cronTable": {
+ "@id": "reproschema:cronTable"
+ },
+ "datumType": {
+ "@id": "reproschema:datumType"
+ },
+ "description": {
+ "@id": "schema:description",
+ "@container": "@language"
+ },
+ "endedAtTime": {
+ "@type": "xsd:dateTime",
+ "@id": "prov:endedAtTime"
+ },
+ "generated": {
+ "@id": "prov:generated"
+ },
+ "image": {
+ "@type": "@id",
+ "@id": "schema:image"
+ },
+ "imageUrl": {
+ "@type": "@id"
+ },
+ "inLanguage": {
+ "@id": "schema:inLanguage",
+ "@language": null
+ },
+ "inputType": {
+ "@type": "xsd:string",
+ "@nest": "ui"
+ },
+ "isAbout": {
+ "@type": "@id"
+ },
+ "isPartOf": {
+ "@type": "@id",
+ "@id": "schema:isPartOf"
+ },
+ "landingPage": {
+ "@type": "@id",
+ "@container": "@set"
+ },
+ "limit": {
+ "@language": null
+ },
+ "maxValue": {
+ "@id": "schema:maxValue"
+ },
+ "message": {
+ "@container": "@language"
+ },
+ "messages": {
+ "@container": "@set"
+ },
+ "minValue": {
+ "@id": "schema:minValue"
+ },
+ "multipleChoice": {
+ "@type": "xsd:boolean"
+ },
+ "name": {
+ "@id": "schema:name",
+ "@container": "@language"
+ },
+ "order": {
+ "@type": "@id",
+ "@container": "@list",
+ "@nest": "ui"
+ },
+ "overrideProperties": {
+ "@container": "@set",
+ "@nest": "ui"
+ },
+ "preamble": {
+ "@id": "reproschema:preamble",
+ "@container": "@language"
+ },
+ "prefLabel": {
+ "@id": "skos:prefLabel",
+ "@container": "@language"
+ },
+ "question": {
+ "@id": "schema:question",
+ "@container": "@language"
+ },
+ "randomMaxDelay": {
+ "@language": null
+ },
+ "readonlyValue": {
+ "@type": "xsd:boolean",
+ "@id": "schema:readonlyValue",
+ "@nest": "ui"
+ },
+ "responseOptions": {
+ "@type": "@id"
+ },
+ "schedule": {
+ "@language": null
+ },
+ "schemaVersion": {
+ "@id": "schema:schemaVersion",
+ "@language": null
+ },
+ "shuffle": {
+ "@type": "xsd:boolean",
+ "@nest": "ui"
+ },
+ "source": {
+ "@type": "xsd:string"
+ },
+ "startedAtTime": {
+ "@type": "xsd:dateTime",
+ "@id": "prov:startedAtTime"
+ },
+ "subject_id": {
+ "@id": "nidm:subject_id"
+ },
+ "unitOptions": {
+ "@type": "@id",
+ "@container": "@set"
+ },
+ "url": {
+ "@type": "@id",
+ "@id": "schema:url"
+ },
+ "used": {
+ "@type": "@id",
+ "@container": "@set",
+ "@id": "prov:used"
+ },
+ "valueRequired": {
+ "@type": "xsd:boolean",
+ "@id": "schema:valueRequired"
+ },
+ "valueType": {
+ "@type": "@id",
+ "@container": "@set"
+ },
+ "version": {
+ "@id": "schema:version",
+ "@language": null
+ },
+ "video": {
+ "@type": "@id",
+ "@id": "schema:video"
+ },
+ "wasAssociatedWith": {
+ "@type": "@id",
+ "@id": "prov:wasAssociatedWith"
+ },
+ "wasAttributedTo": {
+ "@type": "@id",
+ "@id": "prov:wasAttributedTo"
+ },
+ "Activity": {
+ "@id": "reproschema:Activity"
+ },
+ "Agent": {
+ "@id": "prov:Agent"
+ },
+ "CreativeWork": {
+ "@id": "schema:CreativeWork"
+ },
+ "LangString": {
+ "@id": "rdf:langString"
+ },
+ "MediaObject": {
+ "@id": "schema:MediaObject"
+ },
+ "StructuredValue": {
+ "@id": "schema:StructuredValue"
+ },
+ "Thing": {
+ "@id": "schema:Thing"
+ },
+ "VideoObject": {
+ "@id": "schema:VideoObject"
+ }
+ }
+}
diff --git a/reproschema/models/tests/test_schema.py b/reproschema/models/tests/test_schema.py
index a68e808..04a4658 100644
--- a/reproschema/models/tests/test_schema.py
+++ b/reproschema/models/tests/test_schema.py
@@ -1,20 +1,139 @@
-from .. import Protocol, Activity, Item
-
-
-def test_constructors():
- Protocol()
- Activity()
- Item()
- version = "1.0.0-rc2"
- proto = Protocol(version=version)
- assert proto.schema["schemaVersion"] == version
- act = Activity(version)
- assert act.schema["schemaVersion"] == version
- item = Item(version)
- assert item.schema["schemaVersion"] == version
-
-
-def test_constructors_from_data():
- Protocol.from_data({"@type": "reproschema:Protocol"})
- Activity.from_data({"@type": "reproschema:Activity"})
- Item.from_data({"@type": "reproschema:Field"})
+from .. import Protocol, Activity, Item, ResponseOption
+from ..utils import write_obj_jsonld
+from ...utils import start_server, stop_server
+from ...jsonldutils import load_file
+
+from pyld import jsonld
+import json, os
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture
+def server_http_kwargs(request):
+ http_kwargs = {}
+ stop, port = start_server()
+ http_kwargs["port"] = port
+
+ olddir = os.getcwd()
+ os.chdir(os.path.dirname(__file__))
+
+ def stoping_server():
+ stop_server(stop)
+ os.chdir(olddir)
+
+ request.addfinalizer(stoping_server)
+ return http_kwargs
+
+
+@pytest.mark.parametrize("model_class", [Protocol, Activity, Item, ResponseOption])
+def test_constructors(model_class):
+ ob = model_class()
+ assert hasattr(ob, "id")
+ assert hasattr(ob, "category")
+
+
+def test_protocol(tmp_path, server_http_kwargs):
+ """check if protocol is created correctly for a simple example
+ and if it can be written to the file as jsonld.
+ """
+ protocol_dict = {
+ "category": "reproschema:Protocol",
+ "id": "protocol1.jsonld",
+ "prefLabel": {"en": "Protocol1", "es": "Protocol1_es"},
+ "description": {"en": "example Protocol"},
+ "schemaVersion": "1.0.0-rc4",
+ "version": "0.0.1",
+ "messages": [
+ {
+ "message": {
+ "en": "Test message: Triggered when item1 value is greater than 0"
+ },
+ "jsExpression": "item1 > 0",
+ }
+ ],
+ }
+ protocol_obj = Protocol(**protocol_dict)
+
+ file_path = tmp_path / "protocol1.jsonld"
+ write_obj_jsonld(protocol_obj, file_path)
+ data = load_file(file_path, started=True, http_kwargs=server_http_kwargs)
+ expanded = jsonld.expand(data)
+ assert len(expanded) > 0
+
+
+def test_activity(tmp_path, server_http_kwargs):
+ """check if activity is created correctly for a simple example
+ and if it can be written to the file as jsonld."""
+ activity_dict = {
+ "category": "reproschema:Activity",
+ "id": "activity1.jsonld",
+ "prefLabel": {"en": "Example 1"},
+ "description": {"en": "Activity example 1"},
+ "schemaVersion": "1.0.0-rc4",
+ "version": "0.0.1",
+ "image": {
+ "category": "AudioObject",
+ "contentUrl": "http://example.com/sample-image.png",
+ },
+ "preamble": {
+ "en": "Over the last 2 weeks, how often have you been bothered by any of the following problems?",
+ "es": "Durante las últimas 2 semanas, ¿con qué frecuencia le han molestado los siguintes problemas?",
+ },
+ "compute": [
+ {"variableName": "activity1_total_score", "jsExpression": "item1 + item2"}
+ ],
+ }
+ activity_obj = Activity(**activity_dict)
+
+ file_path = tmp_path / "activity1.jsonld"
+ write_obj_jsonld(activity_obj, file_path)
+ data = load_file(file_path, started=True, http_kwargs=server_http_kwargs)
+ expanded = jsonld.expand(data)
+ assert len(expanded) > 0
+
+
+def test_item(tmp_path, server_http_kwargs):
+ """check if item is created correctly for a simple example"
+ and if it can be written to the file as jsonld."""
+
+ item_dict = {
+ "category": "reproschema:Field",
+ "id": "item1.jsonld",
+ "prefLabel": {"en": "item1"},
+ "altLabel": {"en": "item1_alt"},
+ "description": {"en": "Q1 of example 1"},
+ "schemaVersion": "1.0.0-rc4",
+ "version": "0.0.1",
+ "audio": {
+ "category": "AudioObject",
+ "contentUrl": "http://media.freesound.org/sample-file.mp4",
+ },
+ "image": {
+ "category": "ImageObject",
+ "contentUrl": "http://example.com/sample-image.jpg",
+ },
+ "question": {
+ "en": "Little interest or pleasure in doing things",
+ "es": "Poco interés o placer en hacer cosas",
+ },
+ # "ui": {"inputType": "radio"},
+ "responseOptions": {
+ "minValue": 0,
+ "maxValue": 3,
+ "multipleChoice": False,
+ "choices": [
+ {"name": {"en": "Not at all", "es": "Para nada"}, "value": "a"},
+ {"name": {"en": "Several days", "es": "Varios días"}, "value": "b"},
+ ],
+ },
+ }
+
+ item_obj = Item(**item_dict)
+
+ file_path = tmp_path / "item1.jsonld"
+ write_obj_jsonld(item_obj, file_path)
+ data = load_file(file_path, started=True, http_kwargs=server_http_kwargs)
+ expanded = jsonld.expand(data)
+ assert len(expanded) > 0
diff --git a/reproschema/models/utils.py b/reproschema/models/utils.py
index 745ec4a..e6452df 100644
--- a/reproschema/models/utils.py
+++ b/reproschema/models/utils.py
@@ -1,7 +1,8 @@
import json
-from . import Protocol, Activity, Item
+from .model import Protocol, Activity, Item, ResponseOption
+# TODO: where can we be used?
def load_schema(filepath):
with open(filepath) as fp:
data = json.load(fp)
@@ -14,3 +15,16 @@ def load_schema(filepath):
return Activity.from_data(data)
if schema_type == "reproschema:Item":
return Item.from_data(data)
+
+
+def write_obj_jsonld(model_obj, path):
+ """Write a pydantic model object to a jsonld file."""
+ contextfile = "https://raw.githubusercontent.com/djarecka/reproschema/linkml_new_tmp/contexts/reproschema_new"
+ model_dict = model_obj.model_dump(
+ exclude_unset=True,
+ )
+ model_dict["@context"] = contextfile
+
+ with open(path, "w") as f:
+ json.dump(model_dict, f, indent=4)
+ return path
diff --git a/reproschema/pdf2reproschema.py b/reproschema/pdf2reproschema.py
new file mode 100644
index 0000000..63bc305
--- /dev/null
+++ b/reproschema/pdf2reproschema.py
@@ -0,0 +1,16 @@
+"""
+Converts pdfs of questionnaires to ReproSchema activities.
+"""
+
+
+# parse pdf text
+# LLM(parsed_text, pydantic_models)
+# format validation
+# lexical match
+# feed back to LLM until converges
+# evaluate
+# if good, optionally subset
+# push to UI for check
+
+
+# modes: human-in-the-loop, maximally automated
diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index f267a2f..cefb8a4 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -5,24 +5,49 @@
import re
import yaml
from bs4 import BeautifulSoup
+from .models import Activity, Item, Protocol, write_obj_jsonld
matrix_group_count = {}
def clean_header(header):
- return {k.lstrip("\ufeff"): v for k, v in header.items()}
+ cleaned_header = {}
+ for k, v in header.items():
+ # Strip BOM, whitespace, and enclosing quotation marks if present
+ cleaned_key = k.lstrip("\ufeff").strip().strip('"')
+ cleaned_header[cleaned_key] = v
+ return cleaned_header
def normalize_condition(condition_str):
+ # Regular expressions for various pattern replacements
re_parentheses = re.compile(r"\(([0-9]*)\)")
re_non_gt_lt_equal = re.compile(r"([^>|<])=")
re_brackets = re.compile(r"\[([^\]]*)\]")
+ re_extra_spaces = re.compile(r"\s+")
+ re_double_quotes = re.compile(r'"')
+ re_or = re.compile(r"\bor\b") # Match 'or' as whole word
+ # Apply regex replacements
condition_str = re_parentheses.sub(r"___\1", condition_str)
condition_str = re_non_gt_lt_equal.sub(r"\1 ==", condition_str)
- condition_str = condition_str.replace(" and ", " && ").replace(" or ", " || ")
condition_str = re_brackets.sub(r" \1 ", condition_str)
- return condition_str
+
+ # Replace 'or' with '||', ensuring not to replace '||'
+ condition_str = re_or.sub("||", condition_str)
+
+ # Replace 'and' with '&&'
+ condition_str = condition_str.replace(" and ", " && ")
+
+ # Trim extra spaces and replace double quotes with single quotes
+ condition_str = re_extra_spaces.sub(
+ " ", condition_str
+ ).strip() # Reduce multiple spaces to a single space
+ condition_str = re_double_quotes.sub(
+ "'", condition_str
+ ) # Replace double quotes with single quotes
+
+ return condition_str.strip()
def process_visibility(data):
@@ -42,7 +67,11 @@ def process_visibility(data):
def parse_field_type_and_value(field, input_type_map):
field_type = field.get("Field Type", "")
- input_type = input_type_map.get(field_type, field_type)
+ # Check if field_type is 'yesno' and directly assign 'radio' as the input type
+ if field_type == "yesno":
+ input_type = "radio" # Directly set to 'radio' for 'yesno' fields
+ else:
+ input_type = input_type_map.get(field_type, field_type) # Original logic
# Initialize the default value type as string
value_type = "xsd:string"
@@ -55,7 +84,8 @@ def parse_field_type_and_value(field, input_type_map):
"time_": "xsd:time",
"email": "xsd:string",
"phone": "xsd:string",
- } # todo: input_type="signature"
+ # No change needed here for 'yesno', as it's handled above
+ }
# Get the validation type from the field, if available
validation_type = field.get(
@@ -91,27 +121,15 @@ def process_choices(field_type, choices_str):
except ValueError:
value = parts[0]
- choice_obj = {"name": parts[1], "value": value}
- if len(parts) == 3:
- # Handle image url
- choice_obj["schema:image"] = f"{parts[2]}.png"
+ choice_obj = {"name": {"en": " ".join(parts[1:])}, "value": value}
+ # remove image for now
+ # if len(parts) == 3:
+ # # Handle image url
+ # choice_obj["image"] = f"{parts[2]}.png"
choices.append(choice_obj)
return choices
-def write_to_file(abs_folder_path, form_name, field_name, rowData):
- file_path = os.path.join(
- f"{abs_folder_path}", "activities", form_name, "items", f"{field_name}"
- )
- os.makedirs(os.path.dirname(file_path), exist_ok=True)
- try:
- with open(file_path, "w") as file:
- json.dump(rowData, file, indent=4)
- print(f"Item schema for {form_name} written successfully.")
- except Exception as e:
- print(f"Error in writing item schema for {form_name}: {e}")
-
-
def parse_html(input_string, default_language="en"):
result = {}
soup = BeautifulSoup(input_string, "html.parser")
@@ -144,6 +162,7 @@ def process_row(
response_list,
additional_notes_list,
):
+ """Process a row of the REDCap data and generate the jsonld file for the item."""
global matrix_group_count
matrix_group_name = field.get("Matrix Group Name", "")
if matrix_group_name:
@@ -155,11 +174,10 @@ def process_row(
item_id = field.get("Variable / Field Name", "")
rowData = {
- "@context": schema_context_url,
- "@type": "reproschema:Field",
- "@id": item_id,
- "prefLabel": item_id,
- "description": f"{item_id} of {form_name}",
+ "category": "reproschema:Item",
+ "id": item_id,
+ "prefLabel": {"en": item_id},
+ "description": {"en": f"{item_id} of {form_name}"},
}
field_type = field.get("Field Type", "")
@@ -179,10 +197,7 @@ def process_row(
}
for key, value in field.items():
- if (
- schema_map.get(key) in ["question", "schema:description", "preamble"]
- and value
- ):
+ if schema_map.get(key) in ["question", "description", "preamble"] and value:
rowData.update({schema_map[key]: parse_html(value)})
elif schema_map.get(key) == "allow" and value:
@@ -214,32 +229,35 @@ def process_row(
}
)
- elif schema_map.get(key) == "visibility" and value:
- condition = normalize_condition(value)
- rowData.setdefault("visibility", []).append(
- {"variableName": field["Variable / Field Name"], "isVis": condition}
- )
-
- elif key == "Identifier?" and value:
- identifier_val = value.lower() == "y"
- rowData.update(
- {
- schema_map[key]: [
- {"legalStandard": "unknown", "isIdentifier": identifier_val}
- ]
- }
- )
+ # elif key == "Identifier?" and value:
+ # identifier_val = value.lower() == "y"
+ # rowData.update(
+ # {
+ # schema_map[key]: [
+ # {"legalStandard": "unknown", "isIdentifier": identifier_val}
+ # ]
+ # }
+ # )
elif key in additional_notes_list and value:
notes_obj = {"source": "redcap", "column": key, "value": value}
rowData.setdefault("additionalNotesObj", []).append(notes_obj)
- write_to_file(abs_folder_path, form_name, field["Variable / Field Name"], rowData)
+ it = Item(**rowData)
+ file_path_item = os.path.join(
+ f"{abs_folder_path}",
+ "activities",
+ form_name,
+ "items",
+ f'{field["Variable / Field Name"]}',
+ )
+ write_obj_jsonld(it, file_path_item)
def create_form_schema(
abs_folder_path,
schema_context_url,
+ redcap_version,
form_name,
activity_display_name,
activity_description,
@@ -248,42 +266,37 @@ def create_form_schema(
matrix_list,
scores_list,
):
+ """Create the JSON-LD schema for the Activity."""
# Use a set to track unique items and preserve order
unique_order = list(dict.fromkeys(order.get(form_name, [])))
# Construct the JSON-LD structure
json_ld = {
- "@context": schema_context_url,
- "@type": "reproschema:Activity",
- "@id": f"{form_name}_schema",
- "prefLabel": activity_display_name,
- "description": activity_description,
+ "category": "reproschema:Activity",
+ "id": f"{form_name}_schema",
+ "prefLabel": {"en": activity_display_name},
+ "description": {"en": activity_description},
"schemaVersion": "1.0.0-rc4",
- "version": "0.0.1",
+ "version": redcap_version,
"ui": {
"order": unique_order,
"addProperties": bl_list,
"shuffle": False,
},
}
-
- if matrix_list:
- json_ld["matrixInfo"] = matrix_list
+ act = Activity(**json_ld)
+ # remove matrixInfo to pass validataion
+ # if matrix_list:
+ # json_ld["matrixInfo"] = matrix_list
if scores_list:
json_ld["scoringLogic"] = scores_list
path = os.path.join(f"{abs_folder_path}", "activities", form_name)
+ os.makedirs(path, exist_ok=True)
filename = f"{form_name}_schema"
file_path = os.path.join(path, filename)
- try:
- os.makedirs(path, exist_ok=True)
- with open(file_path, "w") as file:
- json.dump(json_ld, file, indent=4)
- print(f"{form_name} Instrument schema created")
- except OSError as e:
- print(f"Error creating directory {path}: {e}")
- except IOError as e:
- print(f"Error writing to file {file_path}: {e}")
+ write_obj_jsonld(act, file_path)
+ print(f"{form_name} Instrument schema created")
def process_activities(activity_name, protocol_visibility_obj, protocol_order):
@@ -296,6 +309,7 @@ def process_activities(activity_name, protocol_visibility_obj, protocol_order):
def create_protocol_schema(
abs_folder_path,
schema_context_url,
+ redcap_version,
protocol_name,
protocol_display_name,
protocol_description,
@@ -304,48 +318,44 @@ def create_protocol_schema(
):
# Construct the protocol schema
protocol_schema = {
- "@context": schema_context_url,
- "@type": "reproschema:Protocol",
- "@id": f"{protocol_name}_schema",
- "skos:prefLabel": protocol_display_name,
- "skos:altLabel": f"{protocol_name}_schema",
- "schema:description": protocol_description,
- "schema:schemaVersion": "1.0.0-rc4",
- "schema:version": "0.0.1",
+ "category": "reproschema:Protocol",
+ "id": f"{protocol_name}_schema",
+ "prefLabel": {"en": protocol_display_name},
+ "altLabel": {"en": f"{protocol_name}_schema"},
+ "description": {"en": protocol_description},
+ "schemaVersion": "1.0.0-rc4",
+ "version": redcap_version,
"ui": {
"addProperties": [],
- "order": protocol_order,
+ "order": [],
"shuffle": False,
},
}
# Populate addProperties list
for activity in protocol_order:
+ full_path = f"../activities/{activity}/{activity}_schema"
add_property = {
- "isAbout": f"../activities/{activity}/{activity}_schema",
+ "isAbout": full_path,
"variableName": f"{activity}_schema",
# Assuming activity name as prefLabel, update as needed
- "prefLabel": activity.replace("_", " ").title(),
+ "prefLabel": {"en": activity.replace("_", " ").title()},
+ "isVis": protocol_visibility_obj.get(
+ activity, True
+ ), # Default to True if not specified
}
protocol_schema["ui"]["addProperties"].append(add_property)
+ # Add the full path to the order list
+ protocol_schema["ui"]["order"].append(full_path)
- # Add visibility if needed
- if protocol_visibility_obj:
- protocol_schema["ui"]["visibility"] = protocol_visibility_obj
-
+ prot = Protocol(**protocol_schema)
+ # Write the protocol schema to file
protocol_dir = f"{abs_folder_path}/{protocol_name}"
+ os.makedirs(protocol_dir, exist_ok=True)
schema_file = f"{protocol_name}_schema"
file_path = os.path.join(protocol_dir, schema_file)
-
- try:
- os.makedirs(protocol_dir, exist_ok=True)
- with open(file_path, "w") as file:
- json.dump(protocol_schema, file, indent=4)
- print("Protocol schema created")
- except OSError as e:
- print(f"Error creating directory {protocol_dir}: {e}")
- except IOError as e:
- print(f"Error writing to file {file_path}: {e}")
+ write_obj_jsonld(prot, file_path)
+ print("Protocol schema created")
def parse_language_iso_codes(input_string):
@@ -388,6 +398,7 @@ def process_csv(
for field in datas[form_name]:
field_name = field["Variable / Field Name"]
order[form_name].append(f"items/{field_name}")
+ print("Processing field: ", field_name, " in form: ", form_name)
process_row(
abs_folder_path,
schema_context_url,
@@ -420,6 +431,8 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
protocol_name = protocol.get("protocol_name")
protocol_display_name = protocol.get("protocol_display_name")
protocol_description = protocol.get("protocol_description")
+ redcap_version = protocol.get("redcap_version")
+ # we can add reproschema version here (or automatically extract)
if not protocol_name:
raise ValueError("Protocol name not specified in the YAML file.")
@@ -434,7 +447,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
abs_folder_path = os.path.abspath(protocol_name)
if schema_context_url is None:
- schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic"
+ schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/efb74e155c09e13aa009ea04609ba4f1152fcbc6/contexts/reproschema_new"
# Initialize variables
schema_map = {
@@ -451,7 +464,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
"Choices, Calculations, OR Slider Labels": "choices", # column F
"Branching Logic (Show field only if...)": "visibility", # column L
"Custom Alignment": "customAlignment", # column N
- "Identifier?": "identifiable", # column K
+ # "Identifier?": "identifiable", # column K
"multipleChoice": "multipleChoice",
"responseType": "@type",
}
@@ -515,6 +528,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
create_form_schema(
abs_folder_path,
schema_context_url,
+ redcap_version,
form_name,
activity_display_name,
activity_description,
@@ -530,6 +544,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
create_protocol_schema(
abs_folder_path,
schema_context_url,
+ redcap_version,
protocol_name,
protocol_display_name,
protocol_description,
diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py
index 3d03cf3..298c56e 100644
--- a/reproschema/reproschema2redcap.py
+++ b/reproschema/reproschema2redcap.py
@@ -142,6 +142,7 @@ def get_csv_data(dir_path):
if protocol_dir.is_dir():
# Check for a _schema file in each directory
schema_file = next(protocol_dir.glob("*_schema"), None)
+ print(f"Found schema file: {schema_file}")
if schema_file:
# Process the found _schema file
parsed_protocol_json = read_json_file(schema_file)
@@ -152,8 +153,14 @@ def get_csv_data(dir_path):
normalized_relative_path = Path(
relative_activity_path.lstrip("../")
)
- activity_path = dir_path / normalized_relative_path
- print(f"Processing activity {activity_path}")
+
+ activity_path = (
+ dir_path
+ / "activities"
+ / normalized_relative_path
+ / (normalized_relative_path.name + "_schema")
+ )
+
parsed_activity_json = read_json_file(activity_path)
if parsed_activity_json:
diff --git a/reproschema/tests/test_redcap2reproschema.py b/reproschema/tests/test_redcap2reproschema.py
index 2386a8c..ac24cf5 100644
--- a/reproschema/tests/test_redcap2reproschema.py
+++ b/reproschema/tests/test_redcap2reproschema.py
@@ -1,10 +1,10 @@
import os
import shutil
import pytest
+import yaml
from click.testing import CliRunner
-from ..cli import main # Import the Click group
+from ..cli import main
-# Assuming your test files are located in a 'tests' directory
CSV_FILE_NAME = "redcap_dict.csv"
YAML_FILE_NAME = "redcap2rs.yaml"
CSV_TEST_FILE = os.path.join(
@@ -15,17 +15,34 @@
)
-def test_redcap2reproschema_success():
+def test_redcap2reproschema(tmpdir):
runner = CliRunner()
- with runner.isolated_filesystem():
- # Copy the test files to the isolated filesystem
- shutil.copy(CSV_TEST_FILE, CSV_FILE_NAME)
- shutil.copy(YAML_TEST_FILE, YAML_FILE_NAME)
+ temp_csv_file = tmpdir.join(CSV_FILE_NAME)
+ temp_yaml_file = tmpdir.join(YAML_FILE_NAME)
+
+ shutil.copy(CSV_TEST_FILE, str(temp_csv_file)) # Convert to string
+ shutil.copy(YAML_TEST_FILE, str(temp_yaml_file)) # Convert to string
+ print("tmpdir: ", tmpdir)
+ # Change the current working directory to tmpdir
+ with tmpdir.as_cwd():
+ # Read YAML to find the expected output directory name
+ with open(str(temp_yaml_file), "r") as file: # Convert to string
+ protocol = yaml.safe_load(file)
+ protocol_name = protocol.get("protocol_name", "").replace(" ", "_")
- # Run the command within the isolated filesystem
result = runner.invoke(
- main, ["redcap2reproschema", CSV_FILE_NAME, YAML_FILE_NAME]
+ main,
+ [
+ "redcap2reproschema",
+ str(temp_csv_file),
+ str(temp_yaml_file),
+ ], # Convert to string
)
- print(result.output)
- assert result.exit_code == 0
+
+ assert (
+ result.exit_code == 0
+ ), f"The command failed to execute successfully: {result.output}"
+ assert os.path.isdir(
+ protocol_name
+ ), f"Expected output directory '{protocol_name}' does not exist"
diff --git a/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml b/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml
index 3330f3b..95d4a9c 100644
--- a/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml
+++ b/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml
@@ -9,13 +9,8 @@ protocol_name: "test_redcap2rs" # Example: "My_Protocol"
# This name will be displayed in the application.
protocol_display_name: "redcap protocols"
-# GitHub Repository Information:
-# Create a GitHub repository named 'reproschema' to store your reproschema protocols.
-# Replace 'your_github_username' with your actual GitHub username.
-user_name: "yibeichan"
-repo_name: "redcap2reproschema" # Recommended name; can be different if preferred.
-repo_url: "https://github.com/{{user_name}}/{{repo_name}}"
-
# Protocol Description:
# Provide a brief description of your protocol.
protocol_description: "testing" # Example: "This protocol is for ..."
+
+redcap_version: "3.0.0"
diff --git a/reproschema/tests/test_reproschema2redcap.py b/reproschema/tests/test_reproschema2redcap.py
index f0a02ce..eff26b3 100644
--- a/reproschema/tests/test_reproschema2redcap.py
+++ b/reproschema/tests/test_reproschema2redcap.py
@@ -2,46 +2,41 @@
import pytest
from click.testing import CliRunner
from ..cli import main
-from shutil import copytree
+from shutil import copytree, rmtree
from pathlib import Path
import csv
-def test_reproschema2redcap_success():
+def test_reproschema2redcap(tmpdir):
runner = CliRunner()
with runner.isolated_filesystem():
# Copy necessary test data into the isolated filesystem
original_data_dir = os.path.join(
- os.path.dirname(__file__), "test_rs2redcap_data"
+ os.path.dirname(__file__), "test_rs2redcap_data", "test_redcap2rs"
)
copytree(original_data_dir, "input_data")
- input_path = Path("input_data") # Using Path object
- output_csv_path = "output.csv"
+ input_path = Path("input_data")
+ output_csv_path = os.path.join(tmpdir, "output.csv")
- # Invoke the reproschema2redcap command
result = runner.invoke(
main, ["reproschema2redcap", str(input_path), output_csv_path]
)
- # Print the output for debugging
print(result.output)
- # Assert the expected outcomes
assert result.exit_code == 0
- # Check if the output CSV file has been created
assert os.path.exists(output_csv_path)
- # Read and print the contents of the CSV file
with open(output_csv_path, "r", encoding="utf-8") as csv_file:
reader = csv.reader(csv_file)
csv_contents = list(reader)
- print("CSV File Contents:")
- for row in csv_contents:
- print(row)
- # Optionally, assert conditions about the CSV contents
- # For example, assert that the file is not empty
- assert len(csv_contents) > 0
+ assert (
+ len(csv_contents) > 1
+ ) # More than one row indicates content beyond headers
+
+ # Clean up temporary directory after use (optional)
+ # rmtree(tmpdir)
diff --git a/reproschema/tests/test_validate.py b/reproschema/tests/test_validate.py
index 96e40db..43c4d08 100644
--- a/reproschema/tests/test_validate.py
+++ b/reproschema/tests/test_validate.py
@@ -5,15 +5,15 @@
def test_validate():
os.chdir(os.path.dirname(__file__))
- assert validate_dir("data", os.path.abspath("reproschema-shacl.ttl"))
+ assert validate_dir("data")
def test_type_error():
os.chdir(os.path.dirname(__file__))
with pytest.raises(ValueError):
- validate_dir("contexts", os.path.abspath("reproschema-shacl.ttl"))
+ validate_dir("contexts")
def test_url():
url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc1/examples/activities/activity1.jsonld"
- assert validate(os.path.abspath("reproschema-shacl.ttl"), url)
+ assert validate(url)
diff --git a/reproschema/utils.py b/reproschema/utils.py
index 2f85d1a..d08ba87 100644
--- a/reproschema/utils.py
+++ b/reproschema/utils.py
@@ -4,6 +4,7 @@
from tempfile import mkdtemp
import requests
import requests_cache
+from copy import deepcopy
from . import get_logger
@@ -52,3 +53,105 @@ def start_server(port=8000, path=None, tmpdir=None):
def stop_server(stop):
stop()
requests_cache.clear()
+
+
+# items that have to be fixed in the old schema
+LANG_FIX = [
+ "http://schema.org/schemaVersion",
+ "http://schema.org/version",
+ "http://schema.repronim.org/limit",
+ "http://schema.repronim.org/randomMaxDelay",
+ "http://schema.org/inLanguage",
+ "http://schema.repronim.org/schedule",
+]
+BOOL_FIX = [
+ "http://schema.repronim.org/shuffle",
+ "http://schema.org/readonlyValue",
+ "http://schema.repronim.org/multipleChoice",
+ "http://schema.org/valueRequired",
+]
+
+ALLOWTYPE_FIX = ["http://schema.repronim.org/allow"]
+ALLOWTYPE_MAPPING = {
+ "http://schema.repronim.org/Skipped": "http://schema.repronim.org/AllowSkip",
+ "http://schema.repronim.org/DontKnow": "http://schema.repronim.org/AllowAltResponse",
+}
+
+IMAGE_FIX = ["http://schema.org/image"]
+
+
+def _lang_fix(data_el):
+ if isinstance(data_el, dict):
+ data_el.pop("@language", None)
+ elif isinstance(data_el, list) and len(data_el) == 1:
+ data_el = data_el[0]
+ data_el.pop("@language", None)
+ else:
+ raise Exception(f"expected a list or dictionary, got {data_el}")
+ return data_el
+
+
+def _image_fix(data_el):
+ if isinstance(data_el, dict):
+ if "@id" not in data_el and "@value" in data_el:
+ data_el["@id"] = data_el.pop("@value")
+ data_el.pop("@language", None)
+ elif isinstance(data_el, list) and len(data_el) == 1:
+ data_el = data_el[0]
+ data_el = _image_fix(data_el)
+ else:
+ raise Exception(f"expected a list or dictionary, got {data_el}")
+ return data_el
+
+
+def _bool_fix(data_el):
+ if isinstance(data_el, dict):
+ data_el["@type"] = "http://www.w3.org/2001/XMLSchema#boolean"
+ elif isinstance(data_el, list):
+ for el in data_el:
+ _bool_fix(el)
+ else:
+ raise Exception(f"expected a list or dictionary, got {data_el}")
+
+
+def _allowtype_fix(data_el):
+ if isinstance(data_el, dict):
+ if data_el["@id"] in ALLOWTYPE_MAPPING:
+ data_el["@id"] = ALLOWTYPE_MAPPING[data_el["@id"]]
+ elif isinstance(data_el, list):
+ for el in data_el:
+ _allowtype_fix(el)
+ else:
+ raise Exception(f"expected a list or dictionary, got {data_el}")
+
+
+def fixing_old_schema(data, copy_data=False):
+ """Fixes the old schema so it can be load to teh new model"""
+ if copy_data:
+ data = deepcopy(data)
+ for key, val in data.items():
+ if key in LANG_FIX:
+ data[key] = _lang_fix(val)
+ elif key in BOOL_FIX:
+ _bool_fix(val)
+ elif key in ALLOWTYPE_FIX:
+ _allowtype_fix(val)
+ elif key in IMAGE_FIX:
+ data[key] = _image_fix(val)
+ elif isinstance(val, (str, bool, int, float)):
+ pass
+ elif isinstance(val, dict):
+ fix_rec(val)
+ elif isinstance(val, list):
+ for el in val:
+ if isinstance(el, (str, bool, int, float)):
+ pass
+ elif isinstance(el, dict):
+ fixing_old_schema(el)
+ else:
+ raise Exception(
+ f"expected a list, str, bool or numerics, got {data_el}"
+ )
+ else:
+ raise Exception(f"type {type(val)} not supported yet")
+ return data
diff --git a/reproschema/validate.py b/reproschema/validate.py
index 64b612e..a4f7fec 100644
--- a/reproschema/validate.py
+++ b/reproschema/validate.py
@@ -3,8 +3,8 @@
from .jsonldutils import load_file, validate_data
-def validate_dir(directory, shape_file, started=False, http_kwargs={}):
- """Validate a directory containing JSONLD documents
+def validate_dir(directory, started=False, http_kwargs={}):
+ """Validate a directory containing JSONLD documents against the ReproSchema pydantic model.
.. warning:: This assumes every file in the directory can be read by a json parser.
@@ -12,8 +12,6 @@ def validate_dir(directory, shape_file, started=False, http_kwargs={}):
----------
directory: str
Path to directory to walk for validation
- shape_file: str
- Path containing validation SHACL shape files
started : bool
Whether an http server exists or not
http_kwargs : dict
@@ -27,6 +25,9 @@ def validate_dir(directory, shape_file, started=False, http_kwargs={}):
if any document is non-conformant.
"""
+ if not os.path.isdir(directory):
+ raise Exception(f"{directory} is not a directory")
+ print(f"Validating directory {directory}")
stop = None
if not started:
stop, port = start_server(**http_kwargs)
@@ -41,7 +42,8 @@ def validate_dir(directory, shape_file, started=False, http_kwargs={}):
data = load_file(full_file_name, started=True, http_kwargs=http_kwargs)
if len(data) == 0:
raise ValueError("Empty data graph")
- conforms, vtext = validate_data(data, shape_file)
+ print(f"Validating {full_file_name}")
+ conforms, vtext = validate_data(data)
except (ValueError,):
if stop is not None:
stop_server(stop)
@@ -57,13 +59,11 @@ def validate_dir(directory, shape_file, started=False, http_kwargs={}):
return True
-def validate(shapefile, path):
+def validate(path):
"""Helper function to validate directory or path
Parameters
----------
- shapefile : path-like
- Path to folder or file containing ReproSchema SHACL descriptors
path : path-like
Path to folder or file containing JSONLD documents.
@@ -74,15 +74,11 @@ def validate(shapefile, path):
exception.
"""
- if shapefile is None:
- shapefile = os.path.join(
- os.path.dirname(__file__), "tests", "reproschema-shacl.ttl"
- )
if os.path.isdir(path):
- conforms = validate_dir(path, shapefile)
+ conforms = validate_dir(path)
else:
data = load_file(path, started=False)
- conforms, vtext = validate_data(data, shapefile)
+ conforms, vtext = validate_data(data)
if not conforms:
lgr.critical(f"File {path} has validation errors.")
raise ValueError(vtext)
diff --git a/templates/redcap2rs.yaml b/templates/redcap2rs.yaml
index 1e1dbc3..4bbf78f 100644
--- a/templates/redcap2rs.yaml
+++ b/templates/redcap2rs.yaml
@@ -12,3 +12,5 @@ protocol_display_name: "Your protocol display name"
# Protocol Description:
# Provide a brief description of your protocol.
protocol_description: "Description for your protocol" # Example: "This protocol is for ..."
+
+redcap_version: "x.y.z" # Example: "3.0.0"