Skip to content

Commit

Permalink
Merge branch 'main' into ingest
Browse files Browse the repository at this point in the history
  • Loading branch information
sellth committed Oct 9, 2023
2 parents 96a93e7 + e9c49b6 commit 25baf0b
Show file tree
Hide file tree
Showing 86 changed files with 644 additions and 2,080 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
lfs: true

- name: Install mamba
run: conda install -y mamba==0.27.0
run: conda install -y mamba

- name: Prepare environment.yaml file
run: >
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Tooling for connecting GitLab, pipelines, and SODAR at CUBI.
Prerequisites when using conda:

```bash
$ conda create -n cubi-tk python=3.7
$ conda create -n cubi-tk python=3.10
$ conda activate cubi-tk
```

Expand Down
2 changes: 1 addition & 1 deletion cubi_tk/archive/readme.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@

import attr
from cookiecutter.main import cookiecutter
from cubi_isa_templates import IsaTabTemplate
from logzero import logger

from . import common
from ..common import execute_shell_commands
from ..isa_tpl import IsaTabTemplate

_TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), "templates")

Expand Down
4 changes: 3 additions & 1 deletion cubi_tk/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,11 @@ def yield_files_recursively(path, print_=False, file=sys.stderr):
"""Recursively yield below path to ``file`` in sorted order, print optionally"""
while len(path) > 1 and path[-1] == "/": # trim trailing slashes
path = path[:-1] # pragma: no cover
paths = glob.glob(os.path.join(path, "**"))
paths = glob.glob(os.path.join(path, "**"), recursive=True)
for p in sorted(paths):
p = p[len(path) + 1 :]
if not p:
continue
if print_:
print(p, file=file) # pragma: no cover
yield p
Expand Down
33 changes: 26 additions & 7 deletions cubi_tk/irods/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
import typing

from irods.collection import iRODSCollection
from irods.column import Like
from irods.data_object import iRODSDataObject
from irods.models import Collection as CollectionModel
from irods.models import DataObject as DataObjectModel
from irods.session import iRODSSession
from logzero import logger
import tqdm
Expand Down Expand Up @@ -103,16 +106,32 @@ def get_irods_error(cls, e: Exception):
es = str(e)
return es if es != "None" else e.__class__.__name__

def get_data_objs(self, root_coll: iRODSCollection):
def get_data_objs(
self, root_coll: iRODSCollection
) -> typing.Dict[
str, typing.Union[typing.Dict[str, iRODSDataObject], typing.List[iRODSDataObject]]
]:
"""Get data objects recursively under the given iRODS path."""
data_objs = dict(files=[], checksums={})
ignore_schemes = [k.lower() for k in HASH_SCHEMES if k != self.args.hash_scheme.upper()]
for res in root_coll.walk():
for obj in res[2]:
if obj.path.endswith("." + self.args.hash_scheme.lower()):
data_objs["checksums"][obj.path] = obj
elif obj.path.split(".")[-1] not in ignore_schemes:
data_objs["files"].append(obj)
irods_sess = root_coll.manager.sess

query = irods_sess.query(DataObjectModel, CollectionModel).filter(
Like(CollectionModel.name, f"{root_coll.path}%")
)

for res in query:
# If the 'res' dict is not split into Colllection&Object the resulting iRODSDataObject is not fully functional, likely because a name/path/... attribute is overwritten somewhere
coll_res = {k: v for k, v in res.items() if k.icat_id >= 500}
obj_res = {k: v for k, v in res.items() if k.icat_id < 500}
coll = iRODSCollection(root_coll.manager, coll_res)
obj = iRODSDataObject(irods_sess.data_objects, parent=coll, results=[obj_res])

if obj.path.endswith("." + self.args.hash_scheme.lower()):
data_objs["checksums"][obj.path] = obj
elif obj.path.split(".")[-1] not in ignore_schemes:
data_objs["files"].append(obj)

return data_objs

def check_args(self, _args):
Expand Down
169 changes: 37 additions & 132 deletions cubi_tk/isa_tpl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,152 +18,37 @@
Available Templates
-------------------
The `Cookiecutter`_ directories are located in this module's directory. Currently available templates are:
- ``isatab-generic``
- ``isatab-germline``
- ``isatab-microarray``
- ``isatab-ms_meta_biocrates``
- ``isatab-single_cell_rnaseq``
- ``isatab-bulk_rnaseq``
- ``isatab-tumor_normal_dna``
- ``isatab-tumor_normal_triplets``
- ``isatab-stem_cell_core_bulk``
- ``isatab-stem_cell_core_sc``
These have been moved to a separate repository: see `cubi-isa-templates`_.
Adding Templates
----------------
Adding templates consists of the following steps:
1. Add a new template directory below ``cubi_tk/isa_tpl``.
2. Register it appending a ``IsaTabTemplate`` object to ``_TEMPLATES`` in ``cubi_tk.isa_tpl``.
3. Add it to the list above in the docstring.
The easiest way to start out is to copy an existing cookiecutter template and registration.
See `cubi-isa-templates`_.
More Information
----------------
Also see ``cubi-tk isa-tpl`` CLI documentation and ``cubi-tk isa-tab --help`` for more information.
.. _Cookiecutter: https://cookiecutter.readthedocs.io/
.. _cubi-isa-templates: https://github.com/bihealth/cubi-isa-templates
"""

import argparse
from functools import partial
import json
import os
from pathlib import Path
import typing
import shutil
import warnings

import attr
import altamisa
from cookiecutter.main import cookiecutter
from cubi_isa_templates import TEMPLATES
from logzero import logger
from toolz import curry

from ..common import run_nocmd, yield_files_recursively


@attr.s(frozen=True, auto_attribs=True)
class IsaTabTemplate:
"""Information regarding an ISA-tab template."""

#: Name of the ISA-tab template.
name: str

#: Path to template directory.
path: str

#: Configuration loaded from ``cookiecutter.json``.
configuration: typing.Dict[str, typing.Any]

#: Optional description string.
description: typing.Optional[str] = None


#: Base directory to this file.
_BASE_DIR = os.path.dirname(__file__)


def load_variables(template_name, extra=None):
"""Load variables given the template name."""
extra = extra or {}
config_path = os.path.join(_BASE_DIR, template_name, "cookiecutter.json")
with open(config_path, "rt") as inputf:
result = json.load(inputf)
result.update(extra)
return result


#: Known ISA-tab templates (internal, mapping generated below).
_TEMPLATES = (
IsaTabTemplate(
name="single_cell_rnaseq",
path=os.path.join(_BASE_DIR, "isatab-single_cell_rnaseq"),
description="single cell RNA sequencing ISA-tab template",
configuration=load_variables("isatab-single_cell_rnaseq"),
),
IsaTabTemplate(
name="bulk_rnaseq",
path=os.path.join(_BASE_DIR, "isatab-bulk_rnaseq"),
description="bulk RNA sequencing ISA-tab template",
configuration=load_variables("isatab-generic"),
),
IsaTabTemplate(
name="tumor_normal_dna",
path=os.path.join(_BASE_DIR, "isatab-tumor_normal_dna"),
description="Tumor-Normal DNA sequencing ISA-tab template",
configuration=load_variables("isatab-tumor_normal_dna", {"is_triplet": False}),
),
IsaTabTemplate(
name="tumor_normal_triplets",
path=os.path.join(_BASE_DIR, "isatab-tumor_normal_triplets"),
description="Tumor-Normal DNA+RNA sequencing ISA-tab template",
configuration=load_variables("isatab-tumor_normal_triplets", {"is_triplet": True}),
),
IsaTabTemplate(
name="germline",
path=os.path.join(_BASE_DIR, "isatab-germline"),
description="germline DNA sequencing ISA-tab template",
configuration=load_variables("isatab-germline"),
),
IsaTabTemplate(
name="generic",
path=os.path.join(_BASE_DIR, "isatab-generic"),
description="generic RNA sequencing ISA-tab template",
configuration=load_variables("isatab-generic"),
),
IsaTabTemplate(
name="microarray",
path=os.path.join(_BASE_DIR, "isatab-microarray"),
description="microarray ISA-tab template",
configuration=load_variables("isatab-microarray"),
),
IsaTabTemplate(
name="ms_meta_biocrates",
path=os.path.join(_BASE_DIR, "isatab-ms_meta_biocrates"),
description="MS Metabolomics Biocrates kit ISA-tab template",
configuration=load_variables("isatab-ms_meta_biocrates"),
),
IsaTabTemplate(
name="stem_cell_core_bulk",
path=os.path.join(_BASE_DIR, "isatab-stem_cell_core_bulk"),
description="Bulk RNA sequencing ISA-tab template from hiPSC for stem cell core projects",
configuration=load_variables("isatab-stem_cell_core_bulk"),
),
IsaTabTemplate(
name="stem_cell_core_sc",
path=os.path.join(_BASE_DIR, "isatab-stem_cell_core_sc"),
description="Single cell RNA sequencing ISA-tab template from hiPSC for stem cell core projects",
configuration=load_variables("isatab-stem_cell_core_sc"),
),
)

#: Known ISA-tab templates.
TEMPLATES = {tpl.name: tpl for tpl in _TEMPLATES}


@curry
def run_cookiecutter(tpl, args, _parser=None, _subparser=None, no_input=False):
"""Run cookiecutter, ``tpl`` will be bound with ``toolz.curry``."""
Expand All @@ -172,16 +57,13 @@ def run_cookiecutter(tpl, args, _parser=None, _subparser=None, no_input=False):
if getattr(args, "var_%s" % name, None) is not None:
extra_context[name] = getattr(args, "var_%s" % name)

logger.info(tpl.configuration)
logger.info(args)

output_dir = os.path.realpath(args.output_dir)
output_base = os.path.dirname(output_dir)
extra_context["__output_dir"] = os.path.basename(output_dir)
if args.verbose:
logger.info(tpl.configuration)
logger.info(args)

# FIXME: better solution? (added because args.var_is_triplet is None)
if "is_triplet" in tpl.configuration:
extra_context["is_triplet"] = tpl.configuration["is_triplet"]
output_dir = Path(args.output_dir).resolve()
output_base = output_dir.parent
extra_context["__output_dir"] = Path(output_dir).name

logger.info("Start running cookiecutter")
logger.info(" template path: %s", tpl.path)
Expand Down Expand Up @@ -255,4 +137,27 @@ def run(args, parser, subparser): # pragma: nocover
if not args.tpl: # pragma: nocover
return run_nocmd(args, parser, subparser)
else:
return args.isa_tpl_cmd(args, parser, subparser)
status = args.isa_tpl_cmd(args, parser, subparser)

# output validation
if not status:
logger.info("Running AltamISA validator:")
i_files = Path(args.output_dir).rglob("i_*")
args.show_duplicate_warnings = False
warnings.filterwarnings(
"error", category=altamisa.exceptions.CriticalIsaValidationWarning
)
for i in i_files:
try:
with i.open() as i_file:
args.input_investigation_file = i_file
altamisa.apps.isatab_validate.run(args)
except (
altamisa.exceptions.ParseIsatabException,
altamisa.exceptions.CriticalIsaValidationWarning,
):
shutil.rmtree(args.output_dir)
raise
return 0
else:
return status
17 changes: 0 additions & 17 deletions cubi_tk/isa_tpl/isatab-bulk_rnaseq/cookiecutter.json

This file was deleted.

This file was deleted.

Loading

0 comments on commit 25baf0b

Please sign in to comment.