diff --git a/.github/workflows/pdoc.yml b/.github/workflows/pdoc.yml
new file mode 100644
index 000000000..654b6cb5c
--- /dev/null
+++ b/.github/workflows/pdoc.yml
@@ -0,0 +1,95 @@
+name: pdoc
+
+# build the documentation whenever there are new commits on main
+on:
+ push:
+ branches:
+ - develop
+ workflow_dispatch: # Allow manually triggering the workflow
+
+# security: restrict permissions for CI jobs.
+permissions:
+ contents: read
+
+concurrency:
+ # cancel the current running workflow from the same branch, PR when a new workflow is triggered
+ # when the trigger is not a PR but a push, it will use the commit sha to generate the concurrency group
+ # {{ github.workflow }}: the workflow name is used to generate the concurrency group. This allows you to have more than one workflows
+ # {{ github.ref_type }}: the type of Git ref object created in the repository. Can be either branch or tag
+ # {{ github.event.pull_request.number}}: get PR number
+ # {{ github.sha }}: full commit sha
+ # credit: https://github.com/Sage-Bionetworks-Workflows/sagetasks/blob/main/.github/workflows/ci.yml
+ group: >-
+ ${{ github.workflow }}-${{ github.ref_type }}-
+ ${{ github.event.pull_request.number || github.sha }}
+ cancel-in-progress: true
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ env:
+ POETRY_VERSION: 1.2.0
+ strategy:
+ matrix:
+ python-version: ["3.9", "3.10"]
+
+ steps:
+ #----------------------------------------------
+ # check-out repo and set-up python
+ #----------------------------------------------
+ - name: Check out repository
+ uses: actions/checkout@v3
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v3
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ #----------------------------------------------
+ # install & configure poetry
+ #----------------------------------------------
+ - name: Install Poetry
+ run: |
+ curl -sSL https://install.python-poetry.org \
+ | python3 - --version ${{ env.POETRY_VERSION }};
+ poetry config virtualenvs.create true;
+ poetry config virtualenvs.in-project true;
+
+ #----------------------------------------------
+ # load cached venv if cache exists
+ #----------------------------------------------
+ - name: Load cached venv
+ id: cached-poetry-dependencies
+ uses: actions/cache@v3
+ with:
+ path: .venv
+ key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
+
+ #----------------------------------------------
+ # install dependencies if cache does not exist
+ #----------------------------------------------
+ - name: Install dependencies
+ if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+ run: poetry install --no-interaction --no-root
+
+ # create documentation
+ - run: poetry show pdoc
+ - run: poetry run pdoc --docformat google -o docs/schematic schematic/manifest schematic/models schematic/schemas schematic/store schematic/utils schematic/visualization
+
+ - uses: actions/upload-pages-artifact@v1
+ with:
+ path: docs/schematic
+
+ # Deploy the artifact to GitHub pages.
+ # This is a separate job so that only actions/deploy-pages has the necessary permissions.
+ deploy:
+ needs: build
+ runs-on: ubuntu-latest
+ permissions:
+ pages: write
+ id-token: write
+ environment:
+ name: github-pages
+ url: ${{ steps.deployment.outputs.page_url }}
+ steps:
+ - id: deployment
+ uses: actions/deploy-pages@v1
\ No newline at end of file
diff --git a/README.md b/README.md
index 1b4c36b34..486262f27 100644
--- a/README.md
+++ b/README.md
@@ -179,6 +179,72 @@ For new features, bugs, enhancements
*Note*: Make sure you have the latest version of the `develop` branch on your local machine.
+## Installation Guide - Docker
+
+1. Install docker from https://www.docker.com/ .
+2. Identify docker image of interest from [Schematic DockerHub](https://hub.docker.com/r/sagebionetworks/schematic/tags)
+ Ex `docker pull sagebionetworks/schematic:latest` from the CLI or, run `docker compose up` after cloning the schematic github repo
+ in this case, `sagebionetworks/schematic:latest` is the name of the image chosen
+3. Run Schematic Command with `docker run `.
+ - For more information on flags for `docker run` and what they do, visit the [Docker Documentation](https://docs.docker.com/engine/reference/commandline/run/)
+ - These example commands assume that you have navigated to the directory you want to run schematic from. To specify your working directory, use `$(pwd)` on MacOS/Linux or `%cd%` on Windows.
+ - If not using the latest image, then the full name should be specified: ie `sagebionetworks/schematic:commit-e611e4a`
+ - If using local image created by `docker compose up`, then the docker image name should be changed: i.e. `schematic_schematic`
+ - Using the `--name` flag sets the name of the container running locally on your machine
+
+### Example For REST API
+
+#### Use file path of `config.yml` to run API endpoints:
+```
+docker run --rm -p 3001:3001 \
+ -v $(pwd):/schematic -w /schematic --name schematic \
+ -e SCHEMATIC_CONFIG=/schematic/config.yml \
+ -e GE_HOME=/usr/src/app/great_expectations/ \
+ sagebionetworks/schematic \
+ python /usr/src/app/run_api.py
+```
+
+#### Use content of `config.yml` as an environment variable to run API endpoints:
+1. save content of `config.yml` as to environment variable `SCHEMATIC_CONFIG_CONTENT` by doing: `export SCHEMATIC_CONFIG_CONTENT=$(cat config.yml)`
+
+2. Pass `SCHEMATIC_CONFIG_CONTENT` as an environment variable by using `docker run`
+
+```
+docker run --rm -p 3001:3001 \
+ -v $(pwd):/schematic -w /schematic --name schematic \
+ -e GE_HOME=/usr/src/app/great_expectations/ \
+ -e SCHEMATIC_CONFIG_CONTENT=$SCHEMATIC_CONFIG_CONTENT \
+ sagebionetworks/schematic \
+ python /usr/src/app/run_api.py
+```
+
+
+### Example For Schematic on mac/linux
+To run example below, first clone schematic into your home directory `git clone https://github.com/sage-bionetworks/schematic ~/schematic`
+Then update .synapseConfig with your credentials
+```
+docker run \
+ -v ~/schematic:/schematic \
+ -w /schematic \
+ -e SCHEMATIC_CONFIG=/schematic/config.yml \
+ -e GE_HOME=/usr/src/app/great_expectations/ \
+ sagebionetworks/schematic schematic model \
+ -c /schematic/config.yml validate \
+ -mp /schematic/tests/data/mock_manifests/Valid_Test_Manifest.csv \
+ -dt MockComponent \
+ -js /schematic/tests/data/example.model.jsonld
+```
+
+### Example For Schematic on Windows
+```
+docker run -v %cd%:/schematic \
+ -w /schematic \
+ -e GE_HOME=/usr/src/app/great_expectations/ \
+ sagebionetworks/schematic \
+ schematic model \
+ -c config.yml validate -mp tests/data/mock_manifests/inValid_Test_Manifest.csv -dt MockComponent -js /schematic/data/example.model.jsonld
+```
+
# Other Contribution Guidelines
## Updating readthedocs documentation
1. `cd docs`
diff --git a/api/__init__.py b/api/__init__.py
index 07c1c5c3b..fe8fd1777 100644
--- a/api/__init__.py
+++ b/api/__init__.py
@@ -15,7 +15,10 @@ def create_app():
# path to config.yml file saved as a Flask config variable
default_config = os.path.abspath(os.path.join(__file__, "../../config.yml"))
schematic_config = os.environ.get("SCHEMATIC_CONFIG", default_config)
+ schematic_config_content = os.environ.get("SCHEMATIC_CONFIG_CONTENT")
+
app.config["SCHEMATIC_CONFIG"] = schematic_config
+ app.config["SCHEMATIC_CONFIG_CONTENT"] = schematic_config_content
# Configure flask app
# app.config[] = schematic[]
diff --git a/api/routes.py b/api/routes.py
index 20a4b74b7..9a3c3ac11 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -37,7 +37,14 @@
def config_handler(asset_view=None):
path_to_config = app.config["SCHEMATIC_CONFIG"]
- # check if file exists at the path created, i.e., app.config['SCHEMATIC_CONFIG']
+ # if content of the config file is provided:
+ content_of_config = app.config["SCHEMATIC_CONFIG_CONTENT"]
+
+ # if the environment variable exists
+ if content_of_config:
+ CONFIG.load_config_content_from_env()
+
+ # check if path to config is provided
if os.path.isfile(path_to_config):
CONFIG.load_config(path_to_config, asset_view = asset_view)
diff --git a/docker-compose.yml b/docker-compose.yml
index 51aefdeb6..f6e15a901 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -13,4 +13,5 @@ services:
APP_HOST: "0.0.0.0"
APP_PORT: "3001"
SCHEMATIC_CONFIG: /schematic/config.yml
+ SCHEMATIC_CONFIG_CONTENT: "${SCHEMATIC_CONFIG_CONTENT}"
GE_HOME: /usr/src/app/great_expectations/
diff --git a/poetry.lock b/poetry.lock
index 6b1104156..569fae7ed 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -73,6 +73,18 @@ cffi = ">=1.0.1"
dev = ["cogapp", "pre-commit", "pytest", "wheel"]
tests = ["pytest"]
+[[package]]
+name = "astunparse"
+version = "1.6.3"
+description = "An AST unparser for Python"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+six = ">=1.6.1,<2.0"
+wheel = ">=0.23.0,<1.0"
+
[[package]]
name = "atomicwrites"
version = "1.4.1"
@@ -1160,6 +1172,23 @@ category = "dev"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
+[[package]]
+name = "pdoc"
+version = "12.2.0"
+description = "API Documentation for Python Projects"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+astunparse = {version = "*", markers = "python_version < \"3.9\""}
+Jinja2 = ">=2.11.0"
+MarkupSafe = "*"
+pygments = ">=2.12.0"
+
+[package.extras]
+dev = ["flake8", "hypothesis", "mypy", "pytest", "pytest-cov", "pytest-timeout", "tox"]
+
[[package]]
name = "pexpect"
version = "4.8.0"
@@ -1970,6 +1999,17 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
dev = ["coverage", "pallets-sphinx-themes", "pytest", "pytest-timeout", "sphinx", "sphinx-issues", "tox"]
watchdog = ["watchdog"]
+[[package]]
+name = "wheel"
+version = "0.37.1"
+description = "A built-package format for Python"
+category = "main"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
+
+[package.extras]
+test = ["pytest (>=3.0.0)", "pytest-cov"]
+
[[package]]
name = "widgetsnbextension"
version = "4.0.2"
@@ -2001,7 +2041,7 @@ testing = ["func-timeout", "jaraco-itertools", "pytest (>=6)", "pytest-black (>=
[metadata]
lock-version = "1.1"
python-versions = ">=3.7.1,<3.11"
-content-hash = "77f2068fe6df8f020fe207e2f6ce49d8850e3b507d11288f7f7e0bdc69dc0c0d"
+content-hash = "4546028400d18ed2c6b6de3057a5537db284b8d2e9263b320f1f823280a900b1"
[metadata.files]
alabaster = [
@@ -2047,6 +2087,10 @@ argon2-cffi-bindings = [
{file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed2937d286e2ad0cc79a7087d3c272832865f779430e0cc2b4f3718d3159b0cb"},
{file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5e00316dabdaea0b2dd82d141cc66889ced0cdcbfa599e8b471cf22c620c329a"},
]
+astunparse = [
+ {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"},
+ {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"},
+]
atomicwrites = [
{file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"},
]
@@ -2643,6 +2687,10 @@ pathspec = [
{file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"},
{file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"},
]
+pdoc = [
+ {file = "pdoc-12.2.0-py3-none-any.whl", hash = "sha256:043ab59983ea166ba15c9950eca3683194e5fbc7bbcdd61595f95d18fdc5c05c"},
+ {file = "pdoc-12.2.0.tar.gz", hash = "sha256:1a4f2ca3f02772941b7b7fe85cb50b9b0c86ed4c2417bcf7d0bd7ad189ae1ba8"},
+]
pexpect = [
{file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"},
{file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"},
@@ -3280,6 +3328,10 @@ werkzeug = [
{file = "Werkzeug-1.0.1-py2.py3-none-any.whl", hash = "sha256:2de2a5db0baeae7b2d2664949077c2ac63fbd16d98da0ff71837f7d1dea3fd43"},
{file = "Werkzeug-1.0.1.tar.gz", hash = "sha256:6c80b1e5ad3665290ea39320b91e1be1e0d5f60652b964a3070216de83d2e47c"},
]
+wheel = [
+ {file = "wheel-0.37.1-py2.py3-none-any.whl", hash = "sha256:4bdcd7d840138086126cd09254dc6195fb4fc6f01c050a1d7236f2630db1d22a"},
+ {file = "wheel-0.37.1.tar.gz", hash = "sha256:e9a504e793efbca1b8e0e9cb979a249cf4a0a7b5b8c9e8b65a5e39d49529c1c4"},
+]
widgetsnbextension = [
{file = "widgetsnbextension-4.0.2-py3-none-any.whl", hash = "sha256:966bd61443926b6adcc0abef9f499c48bdeda181c333b0f49842d7385d440579"},
{file = "widgetsnbextension-4.0.2.tar.gz", hash = "sha256:07f0e8582f920b24316cef16490f1aeb498f2c875d48980540e5c5dbf0ff5e2d"},
diff --git a/pyproject.toml b/pyproject.toml
index 4cf0a526d..d44157406 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,6 +66,7 @@ Jinja2 = "2.11.3"
openpyxl = "^3.0.9"
"backports.zoneinfo" = {markers = "python_version < \"3.9\"", version = "^0.2.1"}
Flask-Cors = "^3.0.10"
+pdoc = "^12.2.0"
[tool.poetry.dev-dependencies]
diff --git a/schematic/configuration.py b/schematic/configuration.py
index 4ba69b32e..df1d200c2 100644
--- a/schematic/configuration.py
+++ b/schematic/configuration.py
@@ -9,12 +9,16 @@ def __init__(self):
# entire configuration data
self.DATA = None
+
def __getattribute__(self, name):
value = super().__getattribute__(name)
- if value is None and "SCHEMATIC_CONFIG" in os.environ:
+ if value is None and "SCHEMATIC_CONFIG_CONTENT" in os.environ:
+ self.load_config_content_from_env()
+ value = super().__getattribute__(name)
+ elif value is None and "SCHEMATIC_CONFIG" in os.environ:
self.load_config_from_env()
value = super().__getattribute__(name)
- elif value is None and "SCHEMATIC_CONFIG" not in os.environ:
+ elif value is None and "SCHEMATIC_CONFIG" not in os.environ and "SCHEMATIC_CONFIG_CONTENT" not in os.environ:
raise AttributeError(
"The '%s' configuration field was accessed, but it hasn't been "
"set yet, presumably because the schematic.CONFIG.load_config() "
@@ -34,6 +38,14 @@ def get(self, key, default):
value = default
return value
+ def load_config_content(self, str_yaml: str) -> dict:
+ try:
+ config_data = yaml.safe_load(str_yaml)
+ except yaml.YAMLError as exc:
+ print(exc)
+ return None
+ return config_data
+
@staticmethod
def load_yaml(file_path: str) -> dict:
with open(file_path, "r") as stream:
@@ -45,9 +57,15 @@ def load_yaml(file_path: str) -> dict:
return config_data
def normalize_path(self, path):
- # Retrieve parent directory of the config to decode relative paths
- parent_dir = os.path.dirname(self.CONFIG_PATH)
- # Ensure absolute file paths
+
+ if self.CONFIG_PATH:
+ # Retrieve parent directory of the config to decode relative paths
+ parent_dir = os.path.dirname(self.CONFIG_PATH)
+ else:
+ # assume the parent dir would be the current work dir
+ parent_dir = os.getcwd()
+
+ # Ensure absolute file paths
if not os.path.isabs(path):
path = os.path.join(parent_dir, path)
# And lastly, normalize file paths
@@ -61,7 +79,19 @@ def load_config_from_env(self):
)
return self.load_config(schematic_config)
- def load_config(self, config_path=None, asset_view=None):
+ def load_config_content_from_env(self):
+ schematic_config_content = os.environ["SCHEMATIC_CONFIG_CONTENT"]
+
+ print(
+ 'Loading content of config file: %s' % schematic_config_content
+ )
+
+ config_content_yaml = self.load_config_content(schematic_config_content)
+ self.DATA = config_content_yaml
+
+ return self.DATA
+
+ def load_config(self, config_path=None, asset_view=None):
# If config_path is None, try loading from environment
if config_path is None and "SCHEMATIC_CONFIG" in os.environ:
return self.load_config_from_env()
diff --git a/schematic/schemas/df_parser.py b/schematic/schemas/df_parser.py
index 5f46b1626..d4e6c7081 100644
--- a/schematic/schemas/df_parser.py
+++ b/schematic/schemas/df_parser.py
@@ -165,10 +165,10 @@ def get_property(
se: a schema explorer object allowing the traversal and modification of a schema graph
property_display_name: human readable label for the schema object/attribute: key characteristic X of the assay, related protocol, or downstream data that we want to record as metadata feature
property_class_name: *schema* label of the class/object that this is a property of
- description: definition or a reference containing the definition of attribute X. Preferably provide a source ontology link or code in addition to the definition.
- requires_range: what is the set/domain of values that this attribute can be assigned to; currently only used to specify primitive types. TODO: extend to reg exp patterns
- requires_dependencies: important characteristics, if any, of property X that need to be recorded as metadata features given property X is specified. These characteristics are attributes themselves and need to pre-exist in the schema as such
- validation_rules: a list of validation rules defined for this class (e.g. defining what is a valid object of this property)
+ description: definition or a reference containing the definition of attribute X. Preferably provide a source ontology link or code in addition to the definition.
+ requires_range: what is the set/domain of values that this attribute can be assigned to; currently only used to specify primitive types. TODO: extend to reg exp patterns
+ requires_dependencies: important characteristics, if any, of property X that need to be recorded as metadata features given property X is specified. These characteristics are attributes themselves and need to pre-exist in the schema as such
+ validation_rules: a list of validation rules defined for this class (e.g. defining what is a valid object of this property)
Returns: a json schema.org property object
diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py
index 206a9957d..ca57490ba 100644
--- a/schematic/store/synapse.py
+++ b/schematic/store/synapse.py
@@ -79,8 +79,23 @@ def __init__(
self.syn = self.login(token, access_token, input_token)
self.project_scope = project_scope
+
+
+ # check if "master_fileview" has been set
+ try:
+ self.storageFileview = CONFIG["synapse"]["master_fileview"]
+ except KeyError:
+ raise MissingConfigValueError(("synapse", "master_fileview"))
+
+ # check if "manifest_basename" has been set
+ try:
+ self.manifest = CONFIG["synapse"]["manifest_basename"]
+ except KeyError:
+ raise MissingConfigValueError(("synapse", "manifest_basename"))
+
try:
self.storageFileview = CONFIG["synapse"]["master_fileview"]
+ self.manifest = CONFIG["synapse"]["manifest_basename"]
if self.project_scope:
self.storageFileviewTable = self.syn.tableQuery(
f"SELECT * FROM {self.storageFileview} WHERE projectId IN {tuple(self.project_scope + [''])}"
@@ -90,17 +105,10 @@ def __init__(
self.storageFileviewTable = self.syn.tableQuery(
"SELECT * FROM " + self.storageFileview
).asDataFrame()
-
- self.manifest = CONFIG["synapse"]["manifest_basename"]
-
- except KeyError:
- raise MissingConfigValueError(("synapse", "master_fileview"))
except AttributeError:
raise AttributeError("storageFileview attribute has not been set.")
except SynapseHTTPError:
raise AccessCredentialsError(self.storageFileview)
- except ValueError:
- raise MissingConfigValueError(("synapse", "master_fileview"))
@staticmethod
def login(token=None, access_token=None, input_token=None):
@@ -1413,7 +1421,7 @@ def make_synapse_table(self,
# remove rows
self.syn.delete(existing_results)
# wait for row deletion to finish on synapse before getting empty table
- sleep(1)
+ sleep(10)
# removes all current columns
current_table = self.syn.get(existingTableId)