Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/add vlmd cli #15

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ In the notebooks directory there are jupyter notebooks that may be used to downl

These notebooks perform optimally within a HEAL Gen3 Workspace and the notebooks will be automatically installed to a user's workspace when the workspace is initiated. However, you may also use these notebooks on your local machine.

### VLMD validation
### VLMD extraction and validation

The [VLMD validation docs](heal/vlmd/README.md) describe how to use the SDK for validating VLMD dictionaries.
The [VLMD documentation](heal/vlmd/README.md) describe how to use the SDK for extracting and validating VLMD dictionaries.

### Run tests

Expand Down Expand Up @@ -49,3 +49,22 @@ The specification can also be listed in requirements.txt file
```
pip install -e git+https://github.com/uc-cdis/[email protected]#egg=heal
```

### CLI

The SDK exposes a Command Line Interface (CLI) for some functions.

The CLI can be invoked as follows

`heal [OPTIONS] COMMAND [ARGS]`

For a list of commands and options run

`heal --help`

For example, the following can validate a VLMD file in csv format:

`heal vlmd validate --input_file "vlmd_for_validation.csv"`

The [VLMD documentation](heal/VLMD/README.md) provides information on
using the VLMD functions, such as `extract` and `validate`.
Empty file added heal/cli/__init__.py
Empty file.
34 changes: 34 additions & 0 deletions heal/cli/extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import click

from cdislogging import get_logger

from heal.vlmd.extract.extract import vlmd_extract
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This won't work here until the extract PR is merged and we pull master into this PR.


logging = get_logger("__name__")


@click.command()
@click.option(
"--input_file",
"input_file",
help="name of file to extract HEAL-compliant VLMD file",
type=click.Path(writable=True),
)
@click.option(
"--output_dir",
"output_dir",
help="directory to write converted dictionary'",
default=".",
type=click.Path(writable=True),
show_default=True,
)
def extract(input_file, output_dir):
"""Extract HEAL-compliant VLMD file from input file"""

logging.info(f"Extracting VLMD from {input_file}")

try:
vlmd_extract(input_file, output_dir=output_dir)
except Exception as e:
logging.warning(str(e))
raise e
31 changes: 31 additions & 0 deletions heal/cli/heal_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import click
import logging

import cdislogging
import heal.cli.vlmd as vlmd


@click.group()
@click.option(
"--silent",
"silent",
is_flag=True,
default=False,
help="don't show ANY logs",
)
@click.pass_context
def main(ctx, silent):
"""HEAL-Platform SDK Command Line Interface"""
ctx.ensure_object(dict)

if silent:
# we still need to define the logger, the log_level here doesn't
# really matter b/c we immediately disable all logging
logger = cdislogging.get_logger("heal_cli", log_level="debug")
# disables all logging
logging.disable(logging.CRITICAL)


main.add_command(vlmd.vlmd)
if __name__ == "__main__":
main()
27 changes: 27 additions & 0 deletions heal/cli/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import click

from cdislogging import get_logger

from heal.vlmd.validate.validate import vlmd_validate

logging = get_logger("__name__")


@click.command()
@click.option(
"--input_file",
"input_file",
help="name of file to validate",
type=click.Path(writable=True),
)
def validate(input_file):
"""Validate VLMD input file"""

logging.info(f"Validating VLMD file{input_file}")

try:
vlmd_validate(input_file)
except Exception as e:
logging.warning(str(e))
raise e
logging.info("Valid")
19 changes: 19 additions & 0 deletions heal/cli/vlmd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import click

from heal.cli import extract, validate


@click.group()
def main():
"""HEAL Command Line Interface"""
pass


@click.group()
def vlmd():
"""Commands for VLMD"""
pass


vlmd.add_command(extract.extract)
vlmd.add_command(validate.validate)
55 changes: 52 additions & 3 deletions heal/vlmd/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,31 @@
# VLMD methods

## VLMD extract

The extract module implements extraction and conversion of dictionaries into different formats.

The current formats are csv, json, and tsv.

The `vlmd_extract()` method raises a `jsonschema.ValidationError` for an invalid input files and raises
`ExtractionError` for any other type of error.

Example extraction code:

```
from jsonschema import ValidationError

from healsdk.vlmd import vlmd_extract

try:
vlmd_extract("vlmd_for_extraction.csv", output_dir="./output")

except ValidationError as v_err:
# handle validation error

except ExtractionError as e_err:
# handle extraction error
```

## VLMD validation

This module validates VLMD data dictionaries against stored schemas.
Expand All @@ -21,13 +47,36 @@ except ValidationError as e:

```

### Adding new validators
## Adding new file types for extraction and validation

The module currently validates the following types of dictionaries: csv, json, tsv.
The above moduels currently handle the following types of dictionaries: csv, json, tsv.

To add code for a new dictionary file type:

* Create a new schema for the data type or validate against the existing json schema
* Create a new validator module for the new file type
* Call the new module from the `validator.py` module
* Call the new validator module from the `validator.py` module
* Create a new extractor module for the new file type
* Call the new extractor module from the `conversion.py` module
* Add new file writing utilities if needed
* Create unit tests as needed for new code


## CLI

The CLI can be invoked as follows

`heal [OPTIONS] COMMAND [ARGS]`

For a list of VLMD commands and options run

`heal vlmd --help`

For example, the following can validate a VLMD file in csv format:

`heal vlmd validate --input_file "vlmd_for_validation.csv"`

The following would extract a json format VLMD file from a csv format input file and
write a json file in the directory `output`:

`heal vlmd extract --input_file "vlmd_for_extraction.csv" --output_dir "./output"`
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "heal"
version = "0.1.2"
description = "HEAL Platform SDK"
authors = ["Center for Translational Data Science at the University of Chicago <support@datacommons.io>"]
authors = ["Center for Translational Data Science at the University of Chicago <support@gen3.org>"]
license = "Apache-2.0"
packages = [
{ include = "heal" },
Expand All @@ -19,6 +19,9 @@ pytest = "^7.0.0"
pytest-cov = "*"
requests-mock = "*"

[tool.poetry.scripts]
heal = "heal.cli.heal_cli:main"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
Expand Down
83 changes: 83 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os

from click.testing import CliRunner
import pytest

import heal.vlmd.file_utils as file_utils
import heal.cli.heal_cli as cli_module


def test_help():
runner = CliRunner()
expected_text = "HEAL-Platform SDK Command Line Interface"
expected_commands = ["vlmd Commands for VLMD"]
result = runner.invoke(cli_module.main, ["--help"])
assert result.exit_code == 0
assert expected_text in result.output
for command_text in expected_commands:
assert command_text in result.output


def test_vlmd_help():
runner = CliRunner()
expected_text = "Commands for VLMD"
expected_commands = [
"extract Extract HEAL-compliant VLMD file from input file",
"validate Validate VLMD input file",
]
result = runner.invoke(cli_module.main, ["vlmd", "--help"])
assert result.exit_code == 0
assert expected_text in result.output
for command_text in expected_commands:
assert command_text in result.output


def test_extract_help():
runner = CliRunner()
expected_text = "Extract HEAL-compliant VLMD file from input file"
expected_commands = [
"--input_file PATH name of file to extract HEAL-compliant VLMD file",
"--output_dir PATH directory to write converted dictionary' [default: .]",
]
result = runner.invoke(cli_module.main, ["vlmd", "extract", "--help"])
assert result.exit_code == 0
assert expected_text in result.output
for command_text in expected_commands:
assert command_text in result.output


def test_extract(tmp_path):
runner = CliRunner()
input_file = "tests/test_data/vlmd/valid/vlmd_valid.csv"
expected_output_file = file_utils.get_output_filepath(
tmp_path, input_file, output_type="json"
)
print(f"Expected output file {expected_output_file}")
result = runner.invoke(
cli_module.main,
["vlmd", "extract", "--input_file", input_file, "--output_dir", tmp_path],
)
assert result.exit_code == 0
assert os.path.isfile(expected_output_file)


def test_validate_help():
runner = CliRunner()
expected_text = "Validate VLMD input file"
expected_commands = [
"--input_file PATH name of file to validate",
]
result = runner.invoke(cli_module.main, ["vlmd", "validate", "--help"])
assert result.exit_code == 0
assert expected_text in result.output
for command_text in expected_commands:
assert command_text in result.output


def test_validate(tmp_path):
runner = CliRunner()
input_file = "tests/test_data/vlmd/valid/vlmd_valid.json"
result = runner.invoke(
cli_module.main, ["vlmd", "validate", "--input_file", input_file]
)
assert result.exit_code == 0
Loading