Skip to content
This repository has been archived by the owner on Oct 15, 2020. It is now read-only.

Initial cli #8

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ ignore =
[isort]
default_section = THIRDPARTY
known_first_party = sgkit
known_third_party = dask,numpy,pysnptools,pytest,xarray
known_third_party = dask,numpy,pysnptools,pytest,setuptools,xarray
multi_line_output = 3
include_trailing_comma = True
force_grid_wrap = 0
Expand All @@ -62,3 +62,7 @@ ignore_missing_imports = True

[mypy-sgkit_plink.tests.*]
disallow_untyped_defs = False

[options.entry_points]
console_scripts =
plink2sg = sgkit_plink.cli:plink2sg_main
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env python
from setuptools import setup

setup(use_scm_version=True)
4 changes: 4 additions & 0 deletions sgkit_plink/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from . import cli

if __name__ == "__main__":
cli.sgkit_plink_main()
95 changes: 95 additions & 0 deletions sgkit_plink/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""
Command line utilities for conversion.
"""
import argparse
import os
import signal

from dask.diagnostics import ProgressBar

from . import read_plink


def set_sigpipe_handler():
if os.name == "posix":
# Set signal handler for SIGPIPE to quietly kill the program.
signal.signal(signal.SIGPIPE, signal.SIG_DFL)


def run_plink2sg(args):
# TODO add args for bim and fam seps
# TODO add logging and verbosity so we can write out info messages
# about what we're doing here.
ds = read_plink(args.plink_path, bim_sep=" ", fam_sep=" ")

# TODO add a command line options for this progess bar.
with ProgressBar():
jeromekelleher marked this conversation as resolved.
Show resolved Hide resolved

# TODO Do the dask compute manually so that we can have
# more influence over the number of threads used.

# TODO add options to give more control over the zarr
# format used. We probably want to have the option of
# writing to a ZipFile anyway, so that we don't have
# gazillions of files.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, here's an example that sets custom numcodec filters for Zarr: https://github.com/related-sciences/gwas-analysis/blob/master/notebooks/platform/xarray/lib/io/core.py#L60. I can see that becoming another thing that we do often between read_* and ds.to_zarr in addition to using options for zipping files and maybe rechunking.

# TODO catch keyboard interrups here, clean up the
# zarr file, and write a meaningful error message.
ds.to_zarr(args.sgkit_path, mode="w")
# TODO write out an INFO summary of the size of the
# input and output


def add_plink2sg_arguments(parser):

# TODO we don't seem to have an __version__ defined yet.
# parser.add_argument(
# "-V",
# "--version",
# action="version",
# version=f"%(prog)s {sgkit_plink.__version__}",
# )

parser.add_argument("plink_path", help="The plink dataset to read")
parser.add_argument("sgkit_path", help="The path to write the converted dataset to")


def get_plink2sg_parser(parser=None):

parser = argparse.ArgumentParser(description="Convert plink files to sgkit format")
add_plink2sg_arguments(parser)

return parser


def get_sgkit_plink_parser():
top_parser = argparse.ArgumentParser(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have any strong opinions on using argparse vs Click or Fire?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, haven't got strong opinions. I haven't tried either of these. How about I get something basic up and running with argparse, and we can weigh up the pros and cons of external packages based on this?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's also tiangolo/typer which is not likely to be our solution but is worth knowing about. I like it because it's from the same author as FastAPI so you get a consistent experience across CLI and web Interfaces.

description=("Utilities for converting data from plink to sgkit and vice versa")
)
subparsers = top_parser.add_subparsers(dest="subcommand")
subparsers.required = True
parser = subparsers.add_parser("plink2sg")
parser.set_defaults(runner=run_plink2sg)
add_plink2sg_arguments(parser)
# TODO add sg2plink also
return top_parser


def plink2sg_main(arg_list=None):
"""
Top-level hook for the plink2sg console script.
"""
parser = get_plink2sg_parser()
args = parser.parse_args(arg_list)
args.runner(args)


def sgkit_plink_main(arg_list=None):
"""
Top-level hook called when running python -m sgkit_plink. Just
exists to call plink2sg or sg2plink as subcommands.
"""
parser = get_sgkit_plink_parser()
set_sigpipe_handler()
args = parser.parse_args(arg_list)
args.runner(args)