diff --git a/.github/workflows/pdoc.yml b/.github/workflows/pdoc.yml new file mode 100644 index 000000000..654b6cb5c --- /dev/null +++ b/.github/workflows/pdoc.yml @@ -0,0 +1,95 @@ +name: pdoc + +# build the documentation whenever there are new commits on main +on: + push: + branches: + - develop + workflow_dispatch: # Allow manually triggering the workflow + +# security: restrict permissions for CI jobs. +permissions: + contents: read + +concurrency: + # cancel the current running workflow from the same branch, PR when a new workflow is triggered + # when the trigger is not a PR but a push, it will use the commit sha to generate the concurrency group + # {{ github.workflow }}: the workflow name is used to generate the concurrency group. This allows you to have more than one workflows + # {{ github.ref_type }}: the type of Git ref object created in the repository. Can be either branch or tag + # {{ github.event.pull_request.number}}: get PR number + # {{ github.sha }}: full commit sha + # credit: https://github.com/Sage-Bionetworks-Workflows/sagetasks/blob/main/.github/workflows/ci.yml + group: >- + ${{ github.workflow }}-${{ github.ref_type }}- + ${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true +jobs: + build: + runs-on: ubuntu-latest + env: + POETRY_VERSION: 1.2.0 + strategy: + matrix: + python-version: ["3.9", "3.10"] + + steps: + #---------------------------------------------- + # check-out repo and set-up python + #---------------------------------------------- + - name: Check out repository + uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + + #---------------------------------------------- + # install & configure poetry + #---------------------------------------------- + - name: Install Poetry + run: | + curl -sSL https://install.python-poetry.org \ + | python3 - --version ${{ env.POETRY_VERSION }}; + poetry config virtualenvs.create true; + poetry config virtualenvs.in-project true; + + #---------------------------------------------- + # load cached venv if cache exists + #---------------------------------------------- + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }} + + #---------------------------------------------- + # install dependencies if cache does not exist + #---------------------------------------------- + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: poetry install --no-interaction --no-root + + # create documentation + - run: poetry show pdoc + - run: poetry run pdoc --docformat google -o docs/schematic schematic/manifest schematic/models schematic/schemas schematic/store schematic/utils schematic/visualization + + - uses: actions/upload-pages-artifact@v1 + with: + path: docs/schematic + + # Deploy the artifact to GitHub pages. + # This is a separate job so that only actions/deploy-pages has the necessary permissions. + deploy: + needs: build + runs-on: ubuntu-latest + permissions: + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - id: deployment + uses: actions/deploy-pages@v1 \ No newline at end of file diff --git a/README.md b/README.md index 1b4c36b34..486262f27 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,72 @@ For new features, bugs, enhancements *Note*: Make sure you have the latest version of the `develop` branch on your local machine. +## Installation Guide - Docker + +1. Install docker from https://www.docker.com/ .
+2. Identify docker image of interest from [Schematic DockerHub](https://hub.docker.com/r/sagebionetworks/schematic/tags)
+ Ex `docker pull sagebionetworks/schematic:latest` from the CLI or, run `docker compose up` after cloning the schematic github repo
+ in this case, `sagebionetworks/schematic:latest` is the name of the image chosen +3. Run Schematic Command with `docker run `.
+ - For more information on flags for `docker run` and what they do, visit the [Docker Documentation](https://docs.docker.com/engine/reference/commandline/run/)
+ - These example commands assume that you have navigated to the directory you want to run schematic from. To specify your working directory, use `$(pwd)` on MacOS/Linux or `%cd%` on Windows.
+ - If not using the latest image, then the full name should be specified: ie `sagebionetworks/schematic:commit-e611e4a`
+ - If using local image created by `docker compose up`, then the docker image name should be changed: i.e. `schematic_schematic`
+ - Using the `--name` flag sets the name of the container running locally on your machine
+ +### Example For REST API
+ +#### Use file path of `config.yml` to run API endpoints: +``` +docker run --rm -p 3001:3001 \ + -v $(pwd):/schematic -w /schematic --name schematic \ + -e SCHEMATIC_CONFIG=/schematic/config.yml \ + -e GE_HOME=/usr/src/app/great_expectations/ \ + sagebionetworks/schematic \ + python /usr/src/app/run_api.py +``` + +#### Use content of `config.yml` as an environment variable to run API endpoints: +1. save content of `config.yml` as to environment variable `SCHEMATIC_CONFIG_CONTENT` by doing: `export SCHEMATIC_CONFIG_CONTENT=$(cat config.yml)` + +2. Pass `SCHEMATIC_CONFIG_CONTENT` as an environment variable by using `docker run` + +``` +docker run --rm -p 3001:3001 \ + -v $(pwd):/schematic -w /schematic --name schematic \ + -e GE_HOME=/usr/src/app/great_expectations/ \ + -e SCHEMATIC_CONFIG_CONTENT=$SCHEMATIC_CONFIG_CONTENT \ + sagebionetworks/schematic \ + python /usr/src/app/run_api.py +``` + + +### Example For Schematic on mac/linux
+To run example below, first clone schematic into your home directory `git clone https://github.com/sage-bionetworks/schematic ~/schematic`
+Then update .synapseConfig with your credentials +``` +docker run \ + -v ~/schematic:/schematic \ + -w /schematic \ + -e SCHEMATIC_CONFIG=/schematic/config.yml \ + -e GE_HOME=/usr/src/app/great_expectations/ \ + sagebionetworks/schematic schematic model \ + -c /schematic/config.yml validate \ + -mp /schematic/tests/data/mock_manifests/Valid_Test_Manifest.csv \ + -dt MockComponent \ + -js /schematic/tests/data/example.model.jsonld +``` + +### Example For Schematic on Windows
+``` +docker run -v %cd%:/schematic \ + -w /schematic \ + -e GE_HOME=/usr/src/app/great_expectations/ \ + sagebionetworks/schematic \ + schematic model \ + -c config.yml validate -mp tests/data/mock_manifests/inValid_Test_Manifest.csv -dt MockComponent -js /schematic/data/example.model.jsonld +``` + # Other Contribution Guidelines ## Updating readthedocs documentation 1. `cd docs` diff --git a/api/__init__.py b/api/__init__.py index 07c1c5c3b..fe8fd1777 100644 --- a/api/__init__.py +++ b/api/__init__.py @@ -15,7 +15,10 @@ def create_app(): # path to config.yml file saved as a Flask config variable default_config = os.path.abspath(os.path.join(__file__, "../../config.yml")) schematic_config = os.environ.get("SCHEMATIC_CONFIG", default_config) + schematic_config_content = os.environ.get("SCHEMATIC_CONFIG_CONTENT") + app.config["SCHEMATIC_CONFIG"] = schematic_config + app.config["SCHEMATIC_CONFIG_CONTENT"] = schematic_config_content # Configure flask app # app.config[] = schematic[] diff --git a/api/routes.py b/api/routes.py index 20a4b74b7..9a3c3ac11 100644 --- a/api/routes.py +++ b/api/routes.py @@ -37,7 +37,14 @@ def config_handler(asset_view=None): path_to_config = app.config["SCHEMATIC_CONFIG"] - # check if file exists at the path created, i.e., app.config['SCHEMATIC_CONFIG'] + # if content of the config file is provided: + content_of_config = app.config["SCHEMATIC_CONFIG_CONTENT"] + + # if the environment variable exists + if content_of_config: + CONFIG.load_config_content_from_env() + + # check if path to config is provided if os.path.isfile(path_to_config): CONFIG.load_config(path_to_config, asset_view = asset_view) diff --git a/docker-compose.yml b/docker-compose.yml index 51aefdeb6..f6e15a901 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,4 +13,5 @@ services: APP_HOST: "0.0.0.0" APP_PORT: "3001" SCHEMATIC_CONFIG: /schematic/config.yml + SCHEMATIC_CONFIG_CONTENT: "${SCHEMATIC_CONFIG_CONTENT}" GE_HOME: /usr/src/app/great_expectations/ diff --git a/poetry.lock b/poetry.lock index 6b1104156..569fae7ed 100644 --- a/poetry.lock +++ b/poetry.lock @@ -73,6 +73,18 @@ cffi = ">=1.0.1" dev = ["cogapp", "pre-commit", "pytest", "wheel"] tests = ["pytest"] +[[package]] +name = "astunparse" +version = "1.6.3" +description = "An AST unparser for Python" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = ">=1.6.1,<2.0" +wheel = ">=0.23.0,<1.0" + [[package]] name = "atomicwrites" version = "1.4.1" @@ -1160,6 +1172,23 @@ category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +[[package]] +name = "pdoc" +version = "12.2.0" +description = "API Documentation for Python Projects" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +astunparse = {version = "*", markers = "python_version < \"3.9\""} +Jinja2 = ">=2.11.0" +MarkupSafe = "*" +pygments = ">=2.12.0" + +[package.extras] +dev = ["flake8", "hypothesis", "mypy", "pytest", "pytest-cov", "pytest-timeout", "tox"] + [[package]] name = "pexpect" version = "4.8.0" @@ -1970,6 +1999,17 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" dev = ["coverage", "pallets-sphinx-themes", "pytest", "pytest-timeout", "sphinx", "sphinx-issues", "tox"] watchdog = ["watchdog"] +[[package]] +name = "wheel" +version = "0.37.1" +description = "A built-package format for Python" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + +[package.extras] +test = ["pytest (>=3.0.0)", "pytest-cov"] + [[package]] name = "widgetsnbextension" version = "4.0.2" @@ -2001,7 +2041,7 @@ testing = ["func-timeout", "jaraco-itertools", "pytest (>=6)", "pytest-black (>= [metadata] lock-version = "1.1" python-versions = ">=3.7.1,<3.11" -content-hash = "77f2068fe6df8f020fe207e2f6ce49d8850e3b507d11288f7f7e0bdc69dc0c0d" +content-hash = "4546028400d18ed2c6b6de3057a5537db284b8d2e9263b320f1f823280a900b1" [metadata.files] alabaster = [ @@ -2047,6 +2087,10 @@ argon2-cffi-bindings = [ {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed2937d286e2ad0cc79a7087d3c272832865f779430e0cc2b4f3718d3159b0cb"}, {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5e00316dabdaea0b2dd82d141cc66889ced0cdcbfa599e8b471cf22c620c329a"}, ] +astunparse = [ + {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, + {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, +] atomicwrites = [ {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, ] @@ -2643,6 +2687,10 @@ pathspec = [ {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, ] +pdoc = [ + {file = "pdoc-12.2.0-py3-none-any.whl", hash = "sha256:043ab59983ea166ba15c9950eca3683194e5fbc7bbcdd61595f95d18fdc5c05c"}, + {file = "pdoc-12.2.0.tar.gz", hash = "sha256:1a4f2ca3f02772941b7b7fe85cb50b9b0c86ed4c2417bcf7d0bd7ad189ae1ba8"}, +] pexpect = [ {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, @@ -3280,6 +3328,10 @@ werkzeug = [ {file = "Werkzeug-1.0.1-py2.py3-none-any.whl", hash = "sha256:2de2a5db0baeae7b2d2664949077c2ac63fbd16d98da0ff71837f7d1dea3fd43"}, {file = "Werkzeug-1.0.1.tar.gz", hash = "sha256:6c80b1e5ad3665290ea39320b91e1be1e0d5f60652b964a3070216de83d2e47c"}, ] +wheel = [ + {file = "wheel-0.37.1-py2.py3-none-any.whl", hash = "sha256:4bdcd7d840138086126cd09254dc6195fb4fc6f01c050a1d7236f2630db1d22a"}, + {file = "wheel-0.37.1.tar.gz", hash = "sha256:e9a504e793efbca1b8e0e9cb979a249cf4a0a7b5b8c9e8b65a5e39d49529c1c4"}, +] widgetsnbextension = [ {file = "widgetsnbextension-4.0.2-py3-none-any.whl", hash = "sha256:966bd61443926b6adcc0abef9f499c48bdeda181c333b0f49842d7385d440579"}, {file = "widgetsnbextension-4.0.2.tar.gz", hash = "sha256:07f0e8582f920b24316cef16490f1aeb498f2c875d48980540e5c5dbf0ff5e2d"}, diff --git a/pyproject.toml b/pyproject.toml index 4cf0a526d..d44157406 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,7 @@ Jinja2 = "2.11.3" openpyxl = "^3.0.9" "backports.zoneinfo" = {markers = "python_version < \"3.9\"", version = "^0.2.1"} Flask-Cors = "^3.0.10" +pdoc = "^12.2.0" [tool.poetry.dev-dependencies] diff --git a/schematic/configuration.py b/schematic/configuration.py index 4ba69b32e..df1d200c2 100644 --- a/schematic/configuration.py +++ b/schematic/configuration.py @@ -9,12 +9,16 @@ def __init__(self): # entire configuration data self.DATA = None + def __getattribute__(self, name): value = super().__getattribute__(name) - if value is None and "SCHEMATIC_CONFIG" in os.environ: + if value is None and "SCHEMATIC_CONFIG_CONTENT" in os.environ: + self.load_config_content_from_env() + value = super().__getattribute__(name) + elif value is None and "SCHEMATIC_CONFIG" in os.environ: self.load_config_from_env() value = super().__getattribute__(name) - elif value is None and "SCHEMATIC_CONFIG" not in os.environ: + elif value is None and "SCHEMATIC_CONFIG" not in os.environ and "SCHEMATIC_CONFIG_CONTENT" not in os.environ: raise AttributeError( "The '%s' configuration field was accessed, but it hasn't been " "set yet, presumably because the schematic.CONFIG.load_config() " @@ -34,6 +38,14 @@ def get(self, key, default): value = default return value + def load_config_content(self, str_yaml: str) -> dict: + try: + config_data = yaml.safe_load(str_yaml) + except yaml.YAMLError as exc: + print(exc) + return None + return config_data + @staticmethod def load_yaml(file_path: str) -> dict: with open(file_path, "r") as stream: @@ -45,9 +57,15 @@ def load_yaml(file_path: str) -> dict: return config_data def normalize_path(self, path): - # Retrieve parent directory of the config to decode relative paths - parent_dir = os.path.dirname(self.CONFIG_PATH) - # Ensure absolute file paths + + if self.CONFIG_PATH: + # Retrieve parent directory of the config to decode relative paths + parent_dir = os.path.dirname(self.CONFIG_PATH) + else: + # assume the parent dir would be the current work dir + parent_dir = os.getcwd() + + # Ensure absolute file paths if not os.path.isabs(path): path = os.path.join(parent_dir, path) # And lastly, normalize file paths @@ -61,7 +79,19 @@ def load_config_from_env(self): ) return self.load_config(schematic_config) - def load_config(self, config_path=None, asset_view=None): + def load_config_content_from_env(self): + schematic_config_content = os.environ["SCHEMATIC_CONFIG_CONTENT"] + + print( + 'Loading content of config file: %s' % schematic_config_content + ) + + config_content_yaml = self.load_config_content(schematic_config_content) + self.DATA = config_content_yaml + + return self.DATA + + def load_config(self, config_path=None, asset_view=None): # If config_path is None, try loading from environment if config_path is None and "SCHEMATIC_CONFIG" in os.environ: return self.load_config_from_env() diff --git a/schematic/schemas/df_parser.py b/schematic/schemas/df_parser.py index 5f46b1626..d4e6c7081 100644 --- a/schematic/schemas/df_parser.py +++ b/schematic/schemas/df_parser.py @@ -165,10 +165,10 @@ def get_property( se: a schema explorer object allowing the traversal and modification of a schema graph property_display_name: human readable label for the schema object/attribute: key characteristic X of the assay, related protocol, or downstream data that we want to record as metadata feature property_class_name: *schema* label of the class/object that this is a property of - description: definition or a reference containing the definition of attribute X. Preferably provide a source ontology link or code in addition to the definition. - requires_range: what is the set/domain of values that this attribute can be assigned to; currently only used to specify primitive types. TODO: extend to reg exp patterns - requires_dependencies: important characteristics, if any, of property X that need to be recorded as metadata features given property X is specified. These characteristics are attributes themselves and need to pre-exist in the schema as such - validation_rules: a list of validation rules defined for this class (e.g. defining what is a valid object of this property) + description: definition or a reference containing the definition of attribute X. Preferably provide a source ontology link or code in addition to the definition. + requires_range: what is the set/domain of values that this attribute can be assigned to; currently only used to specify primitive types. TODO: extend to reg exp patterns + requires_dependencies: important characteristics, if any, of property X that need to be recorded as metadata features given property X is specified. These characteristics are attributes themselves and need to pre-exist in the schema as such + validation_rules: a list of validation rules defined for this class (e.g. defining what is a valid object of this property) Returns: a json schema.org property object diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 206a9957d..ca57490ba 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -79,8 +79,23 @@ def __init__( self.syn = self.login(token, access_token, input_token) self.project_scope = project_scope + + + # check if "master_fileview" has been set + try: + self.storageFileview = CONFIG["synapse"]["master_fileview"] + except KeyError: + raise MissingConfigValueError(("synapse", "master_fileview")) + + # check if "manifest_basename" has been set + try: + self.manifest = CONFIG["synapse"]["manifest_basename"] + except KeyError: + raise MissingConfigValueError(("synapse", "manifest_basename")) + try: self.storageFileview = CONFIG["synapse"]["master_fileview"] + self.manifest = CONFIG["synapse"]["manifest_basename"] if self.project_scope: self.storageFileviewTable = self.syn.tableQuery( f"SELECT * FROM {self.storageFileview} WHERE projectId IN {tuple(self.project_scope + [''])}" @@ -90,17 +105,10 @@ def __init__( self.storageFileviewTable = self.syn.tableQuery( "SELECT * FROM " + self.storageFileview ).asDataFrame() - - self.manifest = CONFIG["synapse"]["manifest_basename"] - - except KeyError: - raise MissingConfigValueError(("synapse", "master_fileview")) except AttributeError: raise AttributeError("storageFileview attribute has not been set.") except SynapseHTTPError: raise AccessCredentialsError(self.storageFileview) - except ValueError: - raise MissingConfigValueError(("synapse", "master_fileview")) @staticmethod def login(token=None, access_token=None, input_token=None): @@ -1413,7 +1421,7 @@ def make_synapse_table(self, # remove rows self.syn.delete(existing_results) # wait for row deletion to finish on synapse before getting empty table - sleep(1) + sleep(10) # removes all current columns current_table = self.syn.get(existingTableId)