diff --git a/README.md b/README.md index 5c3df66..be500f6 100644 --- a/README.md +++ b/README.md @@ -17,11 +17,13 @@ These notebooks perform optimally within a HEAL Gen3 Workspace and the notebooks ### VLMD extraction and validation -The [VLMD docs](heal/vlmd/README.md) describe how to use the SDK for extracting and validating VLMD dictionaries. + +The [VLMD documentation](heal/vlmd/README.md) describes how to use the SDK for extracting and validating VLMD dictionaries. + ### Run tests -``` +```bash poetry run pytest -vv tests ``` @@ -33,19 +35,38 @@ reference the git repo. As an example, `pip install` can be called from the command line for getting the master branch of the `heal-platform-sdk`, -``` +```bash pip install -e git+https://github.com/uc-cdis/heal-platform-sdk.git#egg=heal ``` or a particular branch, eg, -``` +```bash pip install -e git+https://github.com/uc-cdis/heal-platform-sdk.git@my-branch#egg=heal ``` The specification can also be listed in requirements.txt file (with, say, a tag specification of 0.1.0) -``` +```bash pip install -e git+https://github.com/uc-cdis/heal-platform-sdk.git@0.1.0#egg=heal ``` + +### CLI + +The SDK exposes a Command Line Interface (CLI) for some functions. + +The CLI can be invoked as follows + +`heal [OPTIONS] COMMAND [ARGS]` + +For a list of commands and options run + +`heal --help` + +For example, the following can validate a VLMD file in csv format: + +`heal vlmd validate --input_file "vlmd_for_validation.csv"` + +The [VLMD documentation](heal/vlmd/README.md) provides information on +using the VLMD functions, such as `extract` and `validate`. diff --git a/heal/cli/__init__.py b/heal/cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/heal/cli/extract.py b/heal/cli/extract.py new file mode 100644 index 0000000..0f1d07a --- /dev/null +++ b/heal/cli/extract.py @@ -0,0 +1,33 @@ +import click +from cdislogging import get_logger + +from heal.vlmd.extract.extract import vlmd_extract + +logging = get_logger("__name__") + + +@click.command() +@click.option( + "--input_file", + "input_file", + required=True, + help="name of file to extract HEAL-compliant VLMD file", + type=click.Path(writable=True), +) +@click.option( + "--output_dir", + "output_dir", + help="directory to write converted dictionary'", + default=".", + type=click.Path(writable=True), + show_default=True, +) +def extract(input_file, output_dir): + """Extract HEAL-compliant VLMD file from input file""" + + logging.info(f"Extracting VLMD from {input_file}") + + try: + vlmd_extract(input_file, output_dir=output_dir) + except Exception as e: + logging.error(f"Extraction error {str(e)}") diff --git a/heal/cli/heal_cli.py b/heal/cli/heal_cli.py new file mode 100644 index 0000000..57a2a2d --- /dev/null +++ b/heal/cli/heal_cli.py @@ -0,0 +1,32 @@ +import logging + +import cdislogging +import click + +import heal.cli.vlmd as vlmd + + +@click.group() +@click.option( + "--silent", + "silent", + is_flag=True, + default=False, + help="don't show ANY logs", +) +@click.pass_context +def main(ctx, silent): + """HEAL-Platform SDK Command Line Interface""" + ctx.ensure_object(dict) + + if silent: + # we still need to define the logger, the log_level here doesn't + # really matter b/c we immediately disable all logging + logger = cdislogging.get_logger("heal_cli", log_level="debug") + # disables all logging + logging.disable(logging.CRITICAL) + + +main.add_command(vlmd.vlmd) +if __name__ == "__main__": + main() diff --git a/heal/cli/validate.py b/heal/cli/validate.py new file mode 100644 index 0000000..0052bc5 --- /dev/null +++ b/heal/cli/validate.py @@ -0,0 +1,27 @@ +import click +from cdislogging import get_logger + +from heal.vlmd.validate.validate import vlmd_validate + +logging = get_logger("__name__") + + +@click.command() +@click.option( + "--input_file", + "input_file", + required=True, + help="name of file to validate", + type=click.Path(writable=True), +) +def validate(input_file): + """Validate VLMD input file""" + + logging.info(f"Validating VLMD file{input_file}") + + try: + vlmd_validate(input_file) + logging.info("Valid") + except Exception as e: + logging.error(f"Validation error {str(e)}") + logging.error("Invalid file") diff --git a/heal/cli/vlmd.py b/heal/cli/vlmd.py new file mode 100644 index 0000000..6026b79 --- /dev/null +++ b/heal/cli/vlmd.py @@ -0,0 +1,19 @@ +import click + +from heal.cli import extract, validate + + +@click.group() +def main(): + """HEAL Command Line Interface""" + pass + + +@click.group() +def vlmd(): + """Commands for VLMD""" + pass + + +vlmd.add_command(extract.extract) +vlmd.add_command(validate.validate) diff --git a/heal/vlmd/README.md b/heal/vlmd/README.md index 01c0ebe..65c3474 100644 --- a/heal/vlmd/README.md +++ b/heal/vlmd/README.md @@ -1,5 +1,31 @@ # VLMD methods +## VLMD extract + +The extract module implements extraction and conversion of dictionaries into different formats. + +The current formats are csv, json, and tsv. + +The `vlmd_extract()` method raises a `jsonschema.ValidationError` for an invalid input files and raises +`ExtractionError` for any other type of error. + +Example extraction code: + +```python +from jsonschema import ValidationError + +from healsdk.vlmd import vlmd_extract + +try: + vlmd_extract("vlmd_for_extraction.csv", output_dir="./output") + +except ValidationError as v_err: + # handle validation error + +except ExtractionError as e_err: + # handle extraction error +``` + ## VLMD validation This module validates VLMD data dictionaries against stored schemas. The `vlmd_validate()` method @@ -10,7 +36,7 @@ will raise an `ExtractionError` if the input_file cannot be converted Example validation code: -``` +```python from jsonschema import ValidationError from heal.vlmd import vlmd_validate, ExtractionError @@ -38,7 +64,7 @@ and raises an `ExtractionError` for any other type of error. Example extraction code: -``` +```python from jsonschema import ValidationError from heal.vlmd import vlmd_extract, ExtractionError @@ -70,3 +96,23 @@ To add code for a new dictionary file type: * Call the new extractor module from the `conversion.py` module * Add new file writing utilities if saving converted dictionaries in the new format * Create unit tests as needed for new code + + +## CLI + +The CLI can be invoked as follows + +`heal [OPTIONS] COMMAND [ARGS]` + +For a list of VLMD commands and options run + +`heal vlmd --help` + +For example, the following can validate a VLMD file in csv format: + +`heal vlmd validate --input_file "vlmd_for_validation.csv"` + +The following would extract a json format VLMD file from a csv format input file and +write a json file in the directory `output`: + +`heal vlmd extract --input_file "vlmd_for_extraction.csv" --output_dir "./output"` diff --git a/heal/vlmd/extract/extract.py b/heal/vlmd/extract/extract.py index 846c79b..a4f7b9a 100644 --- a/heal/vlmd/extract/extract.py +++ b/heal/vlmd/extract/extract.py @@ -42,19 +42,25 @@ def vlmd_extract( file_suffix = Path(input_file).suffix.replace(".", "") if file_suffix not in ALLOWED_INPUT_TYPES: - raise ExtractionError(f"Input file must be one of {ALLOWED_INPUT_TYPES}") + message = f"Input file must be one of {ALLOWED_INPUT_TYPES}" + logger.error(message) + raise ExtractionError(message) if not isfile(input_file): - raise ExtractionError(f"Input file does not exist: {input_file}") + message = f"Input file does not exist: {input_file}" + logger.error(message) + raise ExtractionError(message) if file_type not in ALLOWED_FILE_TYPES: - raise ExtractionError(f"File type must be one of {ALLOWED_FILE_TYPES}") + message = f"File type must be one of {ALLOWED_FILE_TYPES}" + logger.error(message) + raise ExtractionError(message) if file_type == "auto": file_type = file_suffix if output_type not in ALLOWED_OUTPUT_TYPES: - raise ExtractionError( - f"Unrecognized output_type '{output_type}' - should be in {ALLOWED_OUTPUT_TYPES}" - ) + message = f"Unrecognized output_type '{output_type}' - should be in {ALLOWED_OUTPUT_TYPES}" + logger.error(message) + raise ExtractionError(message) # validate try: diff --git a/heal/vlmd/validate/validate.py b/heal/vlmd/validate/validate.py index e4b23be..01d6857 100644 --- a/heal/vlmd/validate/validate.py +++ b/heal/vlmd/validate/validate.py @@ -62,21 +62,29 @@ def vlmd_validate( ) file_suffix = Path(input_file).suffix.replace(".", "") if file_suffix not in ALLOWED_INPUT_TYPES: - raise ValueError(f"Input file must be one of {ALLOWED_INPUT_TYPES}") + message = f"Input file must be one of {ALLOWED_INPUT_TYPES}" + logger.error(message) + raise ValueError(message) if not isfile(input_file): - raise IOError(f"Input file does not exist: {input_file}") + message = f"Input file does not exist: {input_file}" + logger.error(message) + raise IOError(message) if schema_type not in ALLOWED_SCHEMA_TYPES: - raise ValueError(f"Schema type must be in {ALLOWED_SCHEMA_TYPES}") + message = f"Schema type must be in {ALLOWED_SCHEMA_TYPES}" + logger.error(message) + raise ValueError(message) schema = get_schema(input_file, schema_type) if schema is None: - raise ValueError(f"Could not get schema for type = {schema_type}") + message = f"Could not get schema for type = {schema_type}" + logger.error(message) + raise ValueError(message) output_type = output_type if output_type else "json" if output_type not in ALLOWED_OUTPUT_TYPES: - raise ValueError( - f"Unrecognized output_type '{output_type}' - should be in {ALLOWED_OUTPUT_TYPES}" - ) + message = f"Unrecognized output_type '{output_type}' - should be in {ALLOWED_OUTPUT_TYPES}" + logger.error(message) + raise ValueError(message) # TODO: We need this for csv - see if we can add this to get_schema if file_suffix in ["csv", "tsv"]: @@ -89,7 +97,9 @@ def vlmd_validate( logger.debug("Getting csv data from file") data = read_delim(input_file).to_dict(orient="records") if len(data) == 0: - raise ValidationError("Could not read csv data from input") + message = "Could not read csv data from input" + logger.error(message) + raise ValidationError(message) elif file_suffix == "json": logger.debug("Getting json data from file") data = read_data_from_json_file(input_file) @@ -106,9 +116,9 @@ def vlmd_validate( # convert input_type = file_type_to_fxn_map.get(file_suffix) if not input_type: - raise ExtractionError( - f"Could not get conversion function from file_suffix '{file_suffix}'" - ) + message = f"Could not get conversion function from file_suffix '{file_suffix}'" + logger.error(message) + raise ExtractionError(message) data_dictionaries = {} logger.debug(f"Verifying vlmd can be converted using input_type '{input_type}'") data_dictionary_props = {} @@ -139,7 +149,9 @@ def vlmd_validate( # TODO: see if we can add this to get_schema schema = add_types_to_props(schema) if schema is None: - raise ValueError(f"Could not get schema for type = {schema_type}") + message = f"Could not get schema for type = {schema_type}" + logger.error(message) + raise ValueError(message) try: jsonschema.validate(instance=converted_dictionary, schema=schema) diff --git a/pyproject.toml b/pyproject.toml index 9551243..6894124 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,9 @@ pytest = "^7.0.0" pytest-cov = "*" requests-mock = "*" +[tool.poetry.scripts] +heal = "heal.cli.heal_cli:main" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/tests/conftest.py b/tests/conftest.py index 31b75a4..d28d12c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import json + import pytest from heal.vlmd.config import ( diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..f0cceef --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,108 @@ +import os + +from click.testing import CliRunner + +import heal.cli.heal_cli as cli_module +import heal.vlmd.file_utils as file_utils + + +def test_help(): + """Test the help menu""" + runner = CliRunner() + expected_text = "HEAL-Platform SDK Command Line Interface" + expected_commands = ["vlmd Commands for VLMD"] + result = runner.invoke(cli_module.main, ["--help"]) + assert result.exit_code == 0 + assert expected_text in result.output + for command_text in expected_commands: + assert command_text in result.output + + +def test_vlmd_help(): + """Test the VLMD submenu""" + runner = CliRunner() + expected_text = "Commands for VLMD" + expected_commands = [ + "extract Extract HEAL-compliant VLMD file from input file", + "validate Validate VLMD input file", + ] + result = runner.invoke(cli_module.main, ["vlmd", "--help"]) + assert result.exit_code == 0 + assert expected_text in result.output + for command_text in expected_commands: + assert command_text in result.output + + +def test_extract_help(): + """Test the extract submenu""" + runner = CliRunner() + expected_text = "Extract HEAL-compliant VLMD file from input file" + expected_commands = [ + "--input_file PATH name of file to extract HEAL-compliant VLMD file", + "--output_dir PATH directory to write converted dictionary' [default: .]", + ] + result = runner.invoke(cli_module.main, ["vlmd", "extract", "--help"]) + assert result.exit_code == 0 + assert expected_text in result.output + for command_text in expected_commands: + assert command_text in result.output + + +def test_extract(tmp_path): + """Test the cli extract""" + runner = CliRunner() + input_file = "tests/test_data/vlmd/valid/vlmd_valid.csv" + expected_output_file = file_utils.get_output_filepath( + tmp_path, input_file, output_type="json" + ) + result = runner.invoke( + cli_module.main, + ["vlmd", "extract", "--input_file", input_file, "--output_dir", tmp_path], + ) + assert result.exit_code == 0 + assert os.path.isfile(expected_output_file) + + +def test_extract_missing_input_file(tmp_path): + """Test the cli extract""" + runner = CliRunner() + input_file = None + result = runner.invoke( + cli_module.main, + ["vlmd", "extract", "--input_file", input_file, "--output_dir", tmp_path], + ) + assert result.exit_code != 0 + + +def test_validate_help(): + """Test the validate submenu""" + runner = CliRunner() + expected_text = "Validate VLMD input file" + expected_commands = [ + "--input_file PATH name of file to validate", + ] + result = runner.invoke(cli_module.main, ["vlmd", "validate", "--help"]) + assert result.exit_code == 0 + assert expected_text in result.output + for command_text in expected_commands: + assert command_text in result.output + + +def test_validate(tmp_path): + """Test the cli validation""" + runner = CliRunner() + input_file = "tests/test_data/vlmd/valid/vlmd_valid.json" + result = runner.invoke( + cli_module.main, ["vlmd", "validate", "--input_file", input_file] + ) + assert result.exit_code == 0 + + +def test_validate_missing_input_file(tmp_path): + """Test the cli validation""" + runner = CliRunner() + input_file = None + result = runner.invoke( + cli_module.main, ["vlmd", "validate", "--input_file", input_file] + ) + assert result.exit_code != 0