Skip to content

Commit

Permalink
Changes based on Code-Review & feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicolai-vKuegelgen committed Nov 9, 2023
1 parent 58916de commit 78ae06a
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 62 deletions.
22 changes: 14 additions & 8 deletions cubi_tk/snappy/itransfer_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,21 @@ def check_args(self, args):

toml_config = load_toml_config(args)
if not args.sodar_url:
if toml_config:
args.sodar_url = toml_config.get("global", {}).get("sodar_server_url")
else:
if not toml_config:

Check warning on line 193 in cubi_tk/snappy/itransfer_common.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/snappy/itransfer_common.py#L193

Added line #L193 was not covered by tests
logger.error("SODAR URL not found in config files. Please specify on command line.")
res = 1
args.sodar_url = toml_config.get("global", {}).get("sodar_server_url")
if not args.sodar_url:

Check warning on line 197 in cubi_tk/snappy/itransfer_common.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/snappy/itransfer_common.py#L195-L197

Added lines #L195 - L197 were not covered by tests
logger.error("SODAR URL not found in config files. Please specify on command line.")
res = 1
if not args.sodar_api_token:
if toml_config:
args.sodar_api_token = toml_config.get("global", {}).get("sodar_api_token")
else:
if not toml_config:

Check warning on line 201 in cubi_tk/snappy/itransfer_common.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/snappy/itransfer_common.py#L201

Added line #L201 was not covered by tests
logger.error(
"SODAR API token not found in config files. Please specify on command line."
)
res = 1
args.sodar_api_token = toml_config.get("global", {}).get("sodar_api_token")
if not args.sodar_api_token:

Check warning on line 207 in cubi_tk/snappy/itransfer_common.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/snappy/itransfer_common.py#L205-L207

Added lines #L205 - L207 were not covered by tests
logger.error(
"SODAR API token not found in config files. Please specify on command line."
)
Expand Down Expand Up @@ -454,8 +460,8 @@ def get_landing_zone_by_uuid(self, lz_uuid):

def get_landing_zone_uuid_by_path(self, lz_irods_path, project_uuid, assay_uuid=None):
"""
:param lz_path: Landing zone path.
:type lz_path: str
:param lz_irods_path: Landing zone path.
:type lz_irods_path: str
:param project_uuid: Project UUID.
:type project_uuid: str
Expand Down
150 changes: 96 additions & 54 deletions cubi_tk/sodar/ingest_fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import tqdm

from ..common import check_irods_icommands, load_toml_config, sizeof_fmt
from ..exceptions import MissingFileException, ParameterException
from ..exceptions import MissingFileException, ParameterException, UserCanceledException
from ..snappy.itransfer_common import (
SnappyItransferCommandBase,
TransferJob,
Expand Down Expand Up @@ -98,7 +98,7 @@ def setup_argparse(cls, parser: argparse.ArgumentParser) -> None:
"--remote-dir-pattern",
default=DEFAULT_DEST_PATTERN,
help=f"Pattern to use for constructing remote pattern, default: {DEFAULT_DEST_PATTERN}. "
"'collection_name' is the target irods collection and will be filled with the (-m regex modified) "
"'collection_name' is the target iRODS collection and will be filled with the (-m regex modified) "
"'sample' unless --match-column is not used to fill it from the assay table. Any capture group of the "
"src-regex ('sample', 'lane', ...) can be used along with 'date' and 'filename'.",
)
Expand All @@ -107,7 +107,8 @@ def setup_argparse(cls, parser: argparse.ArgumentParser) -> None:
default=None,
help="Alternative assay column against which the {sample} from the src-regex should be matched, "
"in order to determine collections based on the assay table (e.g. last material or collection-column). "
"If not set it is assumed that {sample} matches the irods collections directly.",
"If not set it is assumed that {sample} matches the iRODS collections directly. If it matches multiple "
"columns the last one can be used.",
)
parser.add_argument(
"-m",
Expand All @@ -118,7 +119,7 @@ def setup_argparse(cls, parser: argparse.ArgumentParser) -> None:
default=[],
type=str,
help="Substitutions applied to the extracted sample name, "
"which is used to determine iRods collections."
"which is used to determine iRODS collections."
"Can be used to change extracted string to correct collections names "
"or to match the values of '--match-column'."
"Use pythons regex syntax of 're.sub' package. "
Expand All @@ -133,8 +134,9 @@ def setup_argparse(cls, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--collection-column",
default=None,
help="Assay column from that matchs irods collection names. "
"If not set, the last material column will be used.",
help="Assay column from that matches iRODS collection names. "
"If not set, the last material column will be used. If it matches multiple "
"columns the last one can be used.",
)
parser.add_argument(
"--tmp",
Expand All @@ -159,15 +161,21 @@ def check_args(self, args):

toml_config = load_toml_config(args)
if not args.sodar_url:
if toml_config:
args.sodar_url = toml_config.get("global", {}).get("sodar_server_url")
else:
if not toml_config:

Check warning on line 164 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L164

Added line #L164 was not covered by tests
logger.error("SODAR URL not found in config files. Please specify on command line.")
res = 1
args.sodar_url = toml_config.get("global", {}).get("sodar_server_url")
if not args.sodar_url:

Check warning on line 168 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L166-L168

Added lines #L166 - L168 were not covered by tests
logger.error("SODAR URL not found in config files. Please specify on command line.")
res = 1

Check warning on line 170 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L170

Added line #L170 was not covered by tests
if not args.sodar_api_token:
if toml_config:
args.sodar_api_token = toml_config.get("global", {}).get("sodar_api_token")
else:
if not toml_config:

Check warning on line 172 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L172

Added line #L172 was not covered by tests
logger.error(
"SODAR API token not found in config files. Please specify on command line."
)
res = 1
args.sodar_api_token = toml_config.get("global", {}).get("sodar_api_token")
if not args.sodar_api_token:

Check warning on line 178 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L176-L178

Added lines #L176 - L178 were not covered by tests
logger.error(
"SODAR API token not found in config files. Please specify on command line."
)
Expand All @@ -180,7 +188,7 @@ def get_project_uuid(self, lz_uuid: str):
:param lz_uuid: Landing zone UUID.
:type lz_uuid: str
:return: Returns Sodar UUID of corresponding project.
:return: Returns SODAR UUID of corresponding project.
"""
from sodar_cli.api import landingzone

Expand All @@ -202,35 +210,77 @@ def get_match_to_collection_mapping(
"""Return a dict that matches all values from a specific `ìn_column` of the assay table
to a corresponding `out_column` (default if not defined: last Material column)."""

# This part is only needed to get `assay.file_name`
# -> could be removed if we can get around that
investigation = api.samplesheet.retrieve(
sodar_url=self.args.sodar_url,
sodar_api_token=self.args.sodar_api_token,
project_uuid=project_uuid,
)
assay = None
for study in investigation.studies.values():
for assay_uuid in study.assays.keys():
if (self.args.assay is None) and (assay is None):
assay = study.assays[assay_uuid]
if (self.args.assay is not None) and (self.args.assay == assay_uuid):
assay = study.assays[assay_uuid]
logger.info("Using irods path of assay %s: %s", assay_uuid, assay.irods_path)
break

isa_dict = api.samplesheet.export(

Check warning on line 213 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L213

Added line #L213 was not covered by tests
sodar_url=self.args.sodar_url,
sodar_api_token=self.args.sodar_api_token,
project_uuid=project_uuid,
)
if len(isa_dict["assays"]) > 1:
if not self.args.assay:
msg = "Multiple assays found in investigation, please specify which one to use with --assay."

Check warning on line 220 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L218-L220

Added lines #L218 - L220 were not covered by tests
logger.error(msg)
raise ParameterException(msg)

Check warning on line 222 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L222

Added line #L222 was not covered by tests

investigation = api.samplesheet.retrieve(

Check warning on line 224 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L224

Added line #L224 was not covered by tests
sodar_url=self.args.sodar_url,
sodar_api_token=self.args.sodar_api_token,
project_uuid=project_uuid,
)
for study in investigation.studies.values():
for assay_uuid in study.assays.keys():
if self.args.assay == assay_uuid:
assay_file_name = study.assays[assay_uuid].file_name
break

Check warning on line 233 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L229-L233

Added lines #L229 - L233 were not covered by tests
# First break can only break out of inner loop
else:
continue
break

Check warning on line 237 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L236-L237

Added lines #L236 - L237 were not covered by tests
else:
msg = f"Assay with UUID {self.args.assay} not found in investigation."

Check warning on line 239 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L239

Added line #L239 was not covered by tests
logger.error(msg)
raise ParameterException(msg)

Check warning on line 241 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L241

Added line #L241 was not covered by tests
else:
assay_file_name = list(isa_dict["assays"].keys())[0]

Check warning on line 243 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L243

Added line #L243 was not covered by tests

assay_tsv = isa_dict["assays"][assay.file_name]["tsv"]
assay_tsv = isa_dict["assays"][assay_file_name]["tsv"]
assay_header, *assay_lines = assay_tsv.rstrip("\n").split("\n")
assay_header = assay_header.split("\t")
assay_lines = map(lambda x: x.split("\t"), assay_lines)

Check warning on line 248 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L245-L248

Added lines #L245 - L248 were not covered by tests

# Never match these assay cols
def check_col_index(column_index):
if not column_index:
msg = "Could not identify any column in the assay sheet matching provided data. Please review input: --match-column={0}".format(

Check warning on line 252 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L250-L252

Added lines #L250 - L252 were not covered by tests
in_column
)
logger.error(msg)
raise ParameterException(msg)
elif len(column_index) > 1:
column_index = max(column_index)
if self.args.yes:

Check warning on line 259 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L256-L259

Added lines #L256 - L259 were not covered by tests
logger.info(
"Multiple columns in the assay sheet match the provided column name ({}), using the last one.".format(
assay_header[column_index]
)
)
elif (

Check warning on line 265 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L265

Added line #L265 was not covered by tests
input(
"Multiple columns in the assay sheet match the provided column name ({}), use the last one? [yN] ".format(
assay_header[column_index]
)
)
.lower()
.startswith("y")
):
pass

Check warning on line 274 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L274

Added line #L274 was not covered by tests
else:
msg = "Not possible to continue the process without a defined match-column. Breaking..."

Check warning on line 276 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L276

Added line #L276 was not covered by tests
logger.info(msg)
raise UserCanceledException(msg)

Check warning on line 278 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L278

Added line #L278 was not covered by tests
else:
column_index = column_index[0]
return column_index

Check warning on line 281 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L280-L281

Added lines #L280 - L281 were not covered by tests

# Never match these (hidden) assay columns
ignore_cols = (

Check warning on line 284 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L284

Added line #L284 was not covered by tests
"Performer",
"Date",
Expand All @@ -247,35 +297,27 @@ def get_match_to_collection_mapping(
and in_column.lower()
in re.sub("(Parameter Value|Comment|Characteristics)", "", head).lower()
]
if not in_column_index or len(in_column_index) > 1:
msg = "Could not identify a valid unique column of the assay sheet matching provided data. Please review input: --match-column={0}".format(
in_column
)
logger.error(msg)
raise ParameterException(msg)
in_column_index = in_column_index[0]
in_column_index = check_col_index(in_column_index)

Check warning on line 300 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L300

Added line #L300 was not covered by tests

if out_column is None:

Check warning on line 302 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L302

Added line #L302 was not covered by tests
# Get index of last material column that is not 'Raw Data File'
out_column_index = max(
[
i
for i, head in enumerate(assay_header)
if head not in ignore_cols
and not re.match("Raw Data File|Parameter Value|Comment|Characteristics", head)
]
materials = (

Check warning on line 304 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L304

Added line #L304 was not covered by tests
"Extract Name",
"Labeled Extract Name",
"Library Name",
"Sample Name",
"Source Name",
)
out_column_index = max([i for i, head in enumerate(assay_header) if head in materials])

Check warning on line 311 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L311

Added line #L311 was not covered by tests
else:
out_column_index = [

Check warning on line 313 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L313

Added line #L313 was not covered by tests
i for i, head in enumerate(assay_header) if re.match(out_column, head)
i
for i, head in enumerate(assay_header)
if head not in ignore_cols
and out_column.lower()
in re.sub("(Parameter Value|Comment|Characteristics)", "", head).lower()
]
if not out_column_index or len(out_column_index) > 1:
msg = "Could not identify a valid unique column of the assay sheet matching provided data. Please review input: --collection-column={0}".format(
out_column
)
logger.error(msg)
raise ParameterException(msg)
out_column_index = out_column_index[0]
out_column_index = check_col_index(out_column_index)

Check warning on line 320 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L320

Added line #L320 was not covered by tests

return {line[in_column_index]: line[out_column_index] for line in assay_lines}

Check warning on line 322 in cubi_tk/sodar/ingest_fastq.py

View check run for this annotation

Codecov / codecov/patch

cubi_tk/sodar/ingest_fastq.py#L322

Added line #L322 was not covered by tests

Expand Down
17 changes: 17 additions & 0 deletions tests/test_sodar_ingest_fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import json
import os
import re
from unittest import mock

from pyfakefs import fake_filesystem, fake_pathlib
Expand Down Expand Up @@ -40,6 +41,22 @@ def test_run_sodar_ingest_fastq_nothing(capsys):
assert res.err


def test_run_sodar_ingest_fastq_src_regex():
from cubi_tk.sodar.ingest_fastq import DEFAULT_SRC_REGEX

# Collection of example filenames and the expected {sample} value the regex should capture
test_filenames = {
"Sample1-N1-RNA1-RNA_seq1.fastq.gz": "Sample1-N1-RNA1-RNA_seq1",
"P1234_Samplename_S14_L006_R2_001.fastq.gz": "P1234_Samplename",
"P1234_Samplename2_R1.fastq.gz": "P1234_Samplename2",
}

for test_filename, expected_sample in test_filenames.items():
res = re.match(DEFAULT_SRC_REGEX, test_filename)
assert res is not None
assert res.groupdict()["sample"] == expected_sample


def test_run_sodar_ingest_fastq_smoke_test(mocker, requests_mock):
# --- setup arguments
irods_path = "/irods/dest"
Expand Down

0 comments on commit 78ae06a

Please sign in to comment.