uc-cdis · george42-ctds · Nov 11, 2024 · Nov 11, 2024 · Nov 11, 2024 · Nov 11, 2024
diff --git a/README.md b/README.md
@@ -15,9 +15,9 @@ In the notebooks directory there are jupyter notebooks that may be used to downl
 
 These notebooks perform optimally within a HEAL Gen3 Workspace and the notebooks will be automatically installed to a user's workspace when the workspace is initiated. However, you may also use these notebooks on your local machine.
 
-### VLMD validation
+### VLMD extraction and validation
 
-The [VLMD validation docs](heal/vlmd/README.md) describe how to use the SDK for validating VLMD dictionaries.
+The [VLMD documentation](heal/vlmd/README.md) describe how to use the SDK for extracting and validating VLMD dictionaries.
 
 ### Run tests
 
@@ -49,3 +49,22 @@ The specification can also be listed in requirements.txt file
 ```
 pip install -e git+https://github.com/uc-cdis/[email protected]#egg=heal
 ```
+
+### CLI
+
+The SDK exposes a Command Line Interface (CLI) for some functions.
+
+The CLI can be invoked as follows
+
+`heal [OPTIONS] COMMAND [ARGS]`
+
+For a list of commands and options run
+
+`heal --help`
+
+For example, the following can validate a VLMD file in csv format:
+
+`heal vlmd validate --input_file "vlmd_for_validation.csv"`
+
+The [VLMD documentation](heal/VLMD/README.md)  provides information on
+using the VLMD functions, such as `extract` and `validate`.
diff --git a/heal/cli/__init__.py b/heal/cli/__init__.py
diff --git a/heal/cli/extract.py b/heal/cli/extract.py
@@ -0,0 +1,34 @@
+import click
+
+from cdislogging import get_logger
+
+from heal.vlmd.extract.extract import vlmd_extract
+
+logging = get_logger("__name__")
+
+
+@click.command()
+@click.option(
+    "--input_file",
+    "input_file",
+    help="name of file to extract HEAL-compliant VLMD file",
+    type=click.Path(writable=True),
+)
+@click.option(
+    "--output_dir",
+    "output_dir",
+    help="directory to write converted dictionary'",
+    default=".",
+    type=click.Path(writable=True),
+    show_default=True,
+)
+def extract(input_file, output_dir):
+    """Extract HEAL-compliant VLMD file from input file"""
+
+    logging.info(f"Extracting VLMD from {input_file}")
+
+    try:
+        vlmd_extract(input_file, output_dir=output_dir)
+    except Exception as e:
+        logging.warning(str(e))
+        raise e
diff --git a/heal/cli/heal_cli.py b/heal/cli/heal_cli.py
@@ -0,0 +1,31 @@
+import click
+import logging
+
+import cdislogging
+import heal.cli.vlmd as vlmd
+
+
+@click.group()
+@click.option(
+    "--silent",
+    "silent",
+    is_flag=True,
+    default=False,
+    help="don't show ANY logs",
+)
+@click.pass_context
+def main(ctx, silent):
+    """HEAL-Platform SDK Command Line Interface"""
+    ctx.ensure_object(dict)
+
+    if silent:
+        # we still need to define the logger, the log_level here doesn't
+        # really matter b/c we immediately disable all logging
+        logger = cdislogging.get_logger("heal_cli", log_level="debug")
+        # disables all logging
+        logging.disable(logging.CRITICAL)
+
+
+main.add_command(vlmd.vlmd)
+if __name__ == "__main__":
+    main()
diff --git a/heal/cli/validate.py b/heal/cli/validate.py
@@ -0,0 +1,27 @@
+import click
+
+from cdislogging import get_logger
+
+from heal.vlmd.validate.validate import vlmd_validate
+
+logging = get_logger("__name__")
+
+
+@click.command()
+@click.option(
+    "--input_file",
+    "input_file",
+    help="name of file to validate",
+    type=click.Path(writable=True),
+)
+def validate(input_file):
+    """Validate VLMD input file"""
+
+    logging.info(f"Validating VLMD file{input_file}")
+
+    try:
+        vlmd_validate(input_file)
+    except Exception as e:
+        logging.warning(str(e))
+        raise e
+    logging.info("Valid")
diff --git a/heal/cli/vlmd.py b/heal/cli/vlmd.py
@@ -0,0 +1,19 @@
+import click
+
+from heal.cli import extract, validate
+
+
+@click.group()
+def main():
+    """HEAL Command Line Interface"""
+    pass
+
+
+@click.group()
+def vlmd():
+    """Commands for VLMD"""
+    pass
+
+
+vlmd.add_command(extract.extract)
+vlmd.add_command(validate.validate)
diff --git a/heal/vlmd/README.md b/heal/vlmd/README.md
@@ -1,5 +1,31 @@
 # VLMD methods
 
+## VLMD extract
+
+The extract module implements extraction and conversion of dictionaries into different formats.
+
+The current formats are csv, json, and tsv.
+
+The `vlmd_extract()` method raises a `jsonschema.ValidationError` for an invalid input files and raises
+`ExtractionError` for any other type of error.
+
+Example extraction code:
+
+```
+from jsonschema import ValidationError
+
+from healsdk.vlmd import vlmd_extract
+
+try:
+  vlmd_extract("vlmd_for_extraction.csv", output_dir="./output")
+
+except ValidationError as v_err:
+  # handle validation error
+
+except ExtractionError as e_err:
+  # handle extraction error
+```
+
 ## VLMD validation
 
 This module validates VLMD data dictionaries against stored schemas.
@@ -21,13 +47,36 @@ except ValidationError as e:
 
 ```
 
-### Adding new validators
+## Adding new file types for extraction and validation
 
-The module currently validates the following types of dictionaries: csv, json, tsv.
+The above moduels currently handle the following types of dictionaries: csv, json, tsv.
 
 To add code for a new dictionary file type:
 
 * Create a new schema for the data type or validate against the existing json schema
 * Create a new validator module for the new file type
-* Call the new module from the `validator.py` module
+* Call the new validator module from the `validator.py` module
+* Create a new extractor module for the new file type
+* Call the new extractor module from the `conversion.py` module
+* Add new file writing utilities if needed
 * Create unit tests as needed for new code
+
+
+## CLI
+
+The CLI can be invoked as follows
+
+`heal [OPTIONS] COMMAND [ARGS]`
+
+For a list of VLMD commands and options run
+
+`heal vlmd --help`
+
+For example, the following can validate a VLMD file in csv format:
+
+`heal vlmd validate --input_file "vlmd_for_validation.csv"`
+
+The following would extract a json format VLMD file from a csv format input file and
+write a json file in the directory `output`:
+
+`heal vlmd extract --input_file "vlmd_for_extraction.csv" --output_dir "./output"`
diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "heal"
 version = "0.1.2"
 description = "HEAL Platform SDK"
-authors = ["Center for Translational Data Science at the University of Chicago <support@datacommons.io>"]
+authors = ["Center for Translational Data Science at the University of Chicago <support@gen3.org>"]
 license = "Apache-2.0"
 packages = [
     { include = "heal" },
@@ -19,6 +19,9 @@ pytest = "^7.0.0"
 pytest-cov = "*"
 requests-mock = "*"
 
+[tool.poetry.scripts]
+heal = "heal.cli.heal_cli:main"
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -0,0 +1,83 @@
+import os
+
+from click.testing import CliRunner
+import pytest
+
+import heal.vlmd.file_utils as file_utils
+import heal.cli.heal_cli as cli_module
+
+
+def test_help():
+    runner = CliRunner()
+    expected_text = "HEAL-Platform SDK Command Line Interface"
+    expected_commands = ["vlmd  Commands for VLMD"]
+    result = runner.invoke(cli_module.main, ["--help"])
+    assert result.exit_code == 0
+    assert expected_text in result.output
+    for command_text in expected_commands:
+        assert command_text in result.output
+
+
+def test_vlmd_help():
+    runner = CliRunner()
+    expected_text = "Commands for VLMD"
+    expected_commands = [
+        "extract   Extract HEAL-compliant VLMD file from input file",
+        "validate  Validate VLMD input file",
+    ]
+    result = runner.invoke(cli_module.main, ["vlmd", "--help"])
+    assert result.exit_code == 0
+    assert expected_text in result.output
+    for command_text in expected_commands:
+        assert command_text in result.output
+
+
+def test_extract_help():
+    runner = CliRunner()
+    expected_text = "Extract HEAL-compliant VLMD file from input file"
+    expected_commands = [
+        "--input_file PATH  name of file to extract HEAL-compliant VLMD file",
+        "--output_dir PATH  directory to write converted dictionary'  [default: .]",
+    ]
+    result = runner.invoke(cli_module.main, ["vlmd", "extract", "--help"])
+    assert result.exit_code == 0
+    assert expected_text in result.output
+    for command_text in expected_commands:
+        assert command_text in result.output
+
+
+def test_extract(tmp_path):
+    runner = CliRunner()
+    input_file = "tests/test_data/vlmd/valid/vlmd_valid.csv"
+    expected_output_file = file_utils.get_output_filepath(
+        tmp_path, input_file, output_type="json"
+    )
+    print(f"Expected output file {expected_output_file}")
+    result = runner.invoke(
+        cli_module.main,
+        ["vlmd", "extract", "--input_file", input_file, "--output_dir", tmp_path],
+    )
+    assert result.exit_code == 0
+    assert os.path.isfile(expected_output_file)
+
+
+def test_validate_help():
+    runner = CliRunner()
+    expected_text = "Validate VLMD input file"
+    expected_commands = [
+        "--input_file PATH  name of file to validate",
+    ]
+    result = runner.invoke(cli_module.main, ["vlmd", "validate", "--help"])
+    assert result.exit_code == 0
+    assert expected_text in result.output
+    for command_text in expected_commands:
+        assert command_text in result.output
+
+
+def test_validate(tmp_path):
+    runner = CliRunner()
+    input_file = "tests/test_data/vlmd/valid/vlmd_valid.json"
+    result = runner.invoke(
+        cli_module.main, ["vlmd", "validate", "--input_file", input_file]
+    )
+    assert result.exit_code == 0