From a0526c49bc0b8171ca5d83765588747902f24336 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 12 Jan 2024 14:20:53 +0100 Subject: [PATCH 1/3] feat: properly setup of type checking (#112) --- MANIFEST.in | 3 +- Makefile | 50 +++- altamisa/_version.py | 2 +- altamisa/apps/isatab2dot.py | 12 +- altamisa/apps/isatab2isatab.py | 8 +- altamisa/apps/isatab_validate.py | 76 +++--- altamisa/constants/investigation_headers.py | 4 +- altamisa/constants/table_headers.py | 4 +- altamisa/constants/table_restrictions.py | 5 +- altamisa/constants/table_tokens.py | 4 +- altamisa/exceptions.py | 2 +- altamisa/isatab/__init__.py | 2 +- altamisa/isatab/headers.py | 55 ++-- altamisa/isatab/helpers.py | 9 +- altamisa/isatab/models.py | 177 +++++++++---- altamisa/isatab/parse_assay_study.py | 232 +++++++++++------ altamisa/isatab/parse_investigation.py | 84 ++++--- altamisa/isatab/validate_assay_study.py | 77 +++--- altamisa/isatab/validate_investigation.py | 44 ++-- altamisa/isatab/write_assay_study.py | 101 +++++--- altamisa/isatab/write_investigation.py | 262 +++++++------------- altamisa/py.typed | 0 codemeta.json | 10 +- docs/examples/create_isa_model.py | 13 +- docs/examples/process_isa_model.py | 2 +- pyproject.toml | 19 ++ requirements/base.txt | 1 + requirements/test.txt | 7 +- setup.cfg | 5 - setup.py | 12 +- tests/conftest.py | 62 ++--- tests/test_apps.py | 12 +- tests/test_exceptions.py | 1 - tests/test_parse_assay.py | 32 ++- tests/test_parse_investigation.py | 5 +- tests/test_parse_study.py | 17 +- tests/test_write_assay.py | 7 +- tests/test_write_investigation.py | 8 +- tests/test_write_study.py | 11 +- versioneer.py | 8 +- 40 files changed, 850 insertions(+), 595 deletions(-) create mode 100644 altamisa/py.typed create mode 100644 pyproject.toml diff --git a/MANIFEST.in b/MANIFEST.in index 9e9ba50..fd162b2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -11,6 +11,7 @@ recursive-exclude * *.py[co] recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif include versioneer.py -include vcfpy/_version.py +include altamisa/_version.py +include altamisa/py.typed include requirements.txt requirements/*.txt diff --git a/Makefile b/Makefile index 149e196..7f316f9 100644 --- a/Makefile +++ b/Makefile @@ -1,21 +1,59 @@ -.PHONY: default black flake8 test test-v test-vv +.PHONY: default +default: help -default: black flake8 +.PHONY: help +help: + @echo "make help - show this help" + @echo "make lint - run all linting" + @echo "make format - run all formatting" + @echo "make lint-isort - run isort linting" + @echo "make format-isort - run isort formatting" + @echo "make lint-black - run black linting" + @echo "make format-black - run black formatting" + @echo "make lint-flake8 - run flake8 linting" + @echo "make lint-pyright - run pyright linting" + @echo "make test - run all tests" + @echo "make test-v - run all tests with verbose output" + @echo "make test-vv - run all tests with very verbose output" -black: - black -l 100 --exclude "versioneer.py|_version.py" . +.PHONY: lint +lint: lint-isort lint-black lint-flake8 lint-pyright + +.PHONY: format +format: format-isort format-black + +.PHONY: lint-isort +lint-isort: + isort --check-only --diff --force-sort-within-sections --profile=black . + +.PHONY: format-isort +format-isort: + isort --force-sort-within-sections --profile=black . -black-check: +.PHONY: lint-black +lint-black: black -l 100 --exclude "versioneer.py|_version.py" --check . -flake8: +.PHONY: format-black +format-black: + black -l 100 --exclude "versioneer.py|_version.py" . + +.PHONY: lint-flake8 +lint-flake8: flake8 . +.PHONY: lint-pyright +lint-pyright: + pyright + +.PHONY: test test: pytest +.PHONY: test-v test-v: pytest -v +.PHONY: test-vv test-vv: pytest -vv diff --git a/altamisa/_version.py b/altamisa/_version.py index ff1a769..87a902a 100644 --- a/altamisa/_version.py +++ b/altamisa/_version.py @@ -12,12 +12,12 @@ """Git implementation of _version.py.""" import errno +import functools import os import re import subprocess import sys from typing import Any, Callable, Dict, List, Optional, Tuple -import functools def get_keywords() -> Dict[str, str]: diff --git a/altamisa/apps/isatab2dot.py b/altamisa/apps/isatab2dot.py index 1590574..e48a44c 100644 --- a/altamisa/apps/isatab2dot.py +++ b/altamisa/apps/isatab2dot.py @@ -2,12 +2,12 @@ """Conversion of ISA-Tab to dot. """ +import argparse import json -import sys import os -import argparse +import sys -from altamisa.isatab import InvestigationReader, StudyReader, AssayReader +from altamisa.isatab import AssayReader, InvestigationReader, StudyReader def print_dot( @@ -59,6 +59,9 @@ def run(args): print(' rankdir = "LR";', file=args.output_file) for s, study_info in enumerate(investigation.studies): + if not study_info.info.path: + print(" /* no file for study {} */".format(s + 1), file=args.output_file) + continue with open(os.path.join(path, study_info.info.path), "rt") as inputf: study = StudyReader.from_stream("S{}".format(s + 1), inputf).read() print(" /* study {} */".format(study_info.info.path), file=args.output_file) @@ -68,6 +71,9 @@ def run(args): print(" }", file=args.output_file) for a, assay_info in enumerate(study_info.assays): + if not assay_info.path: + print(" /* no file for assay {} */".format(a + 1), file=args.output_file) + continue with open(os.path.join(path, assay_info.path), "rt") as inputf: assay = AssayReader.from_stream( "S{}".format(s + 1), "A{}".format(a + 1), inputf diff --git a/altamisa/apps/isatab2isatab.py b/altamisa/apps/isatab2isatab.py index c4d5b7d..688af69 100644 --- a/altamisa/apps/isatab2isatab.py +++ b/altamisa/apps/isatab2isatab.py @@ -7,6 +7,7 @@ import sys import warnings +from altamisa.exceptions import IsaException from altamisa.isatab import ( AssayReader, AssayValidator, @@ -18,7 +19,6 @@ StudyValidator, StudyWriter, ) -from altamisa.exceptions import IsaException def run(args): @@ -30,7 +30,11 @@ def run(args): if not args.no_warnings: for record in records: warnings.showwarning( - record.message, record.category, record.filename, record.lineno, record.line + record.message, + record.category, + record.filename, + lineno=record.lineno, + line=record.line, ) diff --git a/altamisa/apps/isatab_validate.py b/altamisa/apps/isatab_validate.py index 8e56deb..32acb34 100644 --- a/altamisa/apps/isatab_validate.py +++ b/altamisa/apps/isatab_validate.py @@ -2,11 +2,13 @@ """Read from ISA-Tab and print validation warnings, if any. """ -import argparse import os -import sys import warnings +import attrs +import typer +from typing_extensions import Annotated + from altamisa.isatab import ( AssayReader, AssayValidator, @@ -16,8 +18,40 @@ StudyValidator, ) +#: Typer application instance. +app = typer.Typer() + + +@attrs.define +class Arguments: + input_investigation_file: str + show_duplicate_warnings: bool + -def run(args): +@app.command() +def main( + input_investigation_file: Annotated[ + str, + typer.Option( + "--input-investigation-file", + "-i", + help="Path to input investigation file", + ), + ], + show_duplicate_warnings: Annotated[ + bool, + typer.Option( + "--show-duplicate-warnings/--no-show-duplicate_warnings", + help="Show duplicated warnings, i.e. with same message and same category (False by default)", + ), + ] = False, +): + """Main entry point.""" + # Convert to `Arguments` object. + args = Arguments( + input_investigation_file=input_investigation_file, + show_duplicate_warnings=show_duplicate_warnings, + ) # Show all warnings of same type and content if args.show_duplicate_warnings: warnings.simplefilter("always") @@ -29,20 +63,20 @@ def run(args): # Print warnings for record in records: warnings.showwarning( - record.message, record.category, record.filename, record.lineno, record.line + record.message, record.category, record.filename, lineno=record.lineno, line=record.line ) -def run_warnings_caught(args): +def run_warnings_caught(args: Arguments): # Read investigation - investigation = InvestigationReader.from_stream(args.input_investigation_file).read() - args.input_investigation_file.close() + with open(args.input_investigation_file, "rt") as inputf: + investigation = InvestigationReader.from_stream(inputf).read() # Validate investigation InvestigationValidator(investigation).validate() # Read studies and assays - path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name)) + path_in = os.path.normpath(os.path.dirname(args.input_investigation_file)) studies = {} assays = {} for s, study_info in enumerate(investigation.studies): @@ -69,29 +103,5 @@ def run_warnings_caught(args): ).validate() -def main(argv=None): - parser = argparse.ArgumentParser() - - parser.add_argument( - "-i", - "--input-investigation-file", - required=True, - type=argparse.FileType("rt"), - help="Path to input investigation file", - ) - parser.add_argument( - "--show-duplicate-warnings", - dest="show_duplicate_warnings", - action="store_true", - help=( - "Show duplicated warnings, i.e. with same message and same category (False by default)" - ), - ) - parser.set_defaults(no_warnings=False) - - args = parser.parse_args(argv) - return run(args) - - if __name__ == "__main__": # pragma: no cover - sys.exit(main()) + typer.run(main) diff --git a/altamisa/constants/investigation_headers.py b/altamisa/constants/investigation_headers.py index 3aaf583..ceb4115 100644 --- a/altamisa/constants/investigation_headers.py +++ b/altamisa/constants/investigation_headers.py @@ -7,8 +7,8 @@ __author__ = ( - "Manuel Holtgrewe , " - "Mathias Kuhring " + "Manuel Holtgrewe , " + "Mathias Kuhring " ) diff --git a/altamisa/constants/table_headers.py b/altamisa/constants/table_headers.py index 3718fc0..a9ea534 100644 --- a/altamisa/constants/table_headers.py +++ b/altamisa/constants/table_headers.py @@ -7,8 +7,8 @@ __author__ = ( - "Manuel Holtgrewe , " - "Mathias Kuhring " + "Manuel Holtgrewe , " + "Mathias Kuhring " ) diff --git a/altamisa/constants/table_restrictions.py b/altamisa/constants/table_restrictions.py index 1954e8c..fe8a53c 100644 --- a/altamisa/constants/table_restrictions.py +++ b/altamisa/constants/table_restrictions.py @@ -3,14 +3,13 @@ __author__ = ( - "Manuel Holtgrewe , " - "Mathias Kuhring " + "Manuel Holtgrewe , " + "Mathias Kuhring " ) from . import table_headers - # Assay measurement types (only the once needed for special validations) PROTEIN_EXPRESSION_PROFILING = "protein expression profiling" #: PROTEIN_IDENTIFICATION = "protein identification" #: diff --git a/altamisa/constants/table_tokens.py b/altamisa/constants/table_tokens.py index 13c6331..8937378 100644 --- a/altamisa/constants/table_tokens.py +++ b/altamisa/constants/table_tokens.py @@ -3,8 +3,8 @@ __author__ = ( - "Manuel Holtgrewe , " - "Mathias Kuhring " + "Manuel Holtgrewe , " + "Mathias Kuhring " ) diff --git a/altamisa/exceptions.py b/altamisa/exceptions.py index 875f599..b47764c 100644 --- a/altamisa/exceptions.py +++ b/altamisa/exceptions.py @@ -3,7 +3,7 @@ Exceptions and Warnings used in the AltamISA library. """ -__author__ = "Manuel Holtgrewe " +__author__ = "Manuel Holtgrewe " class IsaException(Exception): diff --git a/altamisa/isatab/__init__.py b/altamisa/isatab/__init__.py index 419c951..e0bdd40 100644 --- a/altamisa/isatab/__init__.py +++ b/altamisa/isatab/__init__.py @@ -14,5 +14,5 @@ from .parse_investigation import InvestigationReader # noqa: F401 from .validate_assay_study import AssayValidator, StudyValidator # noqa: F401 from .validate_investigation import InvestigationValidator # noqa: F401 -from .write_assay_study import AssayWriter, StudyWriter, RefTableBuilder # noqa: F401 +from .write_assay_study import AssayWriter, RefTableBuilder, StudyWriter # noqa: F401 from .write_investigation import InvestigationWriter # noqa: F401 diff --git a/altamisa/isatab/headers.py b/altamisa/isatab/headers.py index 8b7e95b..5dc1005 100644 --- a/altamisa/isatab/headers.py +++ b/altamisa/isatab/headers.py @@ -4,29 +4,30 @@ """ from __future__ import generator_stop -from typing import Iterator, List -from ..constants import table_headers -from ..exceptions import ParseIsatabException +from typing import Iterator, List, Optional, Tuple +import warnings +from ..constants import table_headers +from ..exceptions import ParseIsatabException, ParseIsatabWarning -__author__ = "Manuel Holtgrewe " +__author__ = "Manuel Holtgrewe " class ColumnHeader: """Column header in a study or assay file""" - def __init__(self, column_type, col_no, span): + def __init__(self, column_type: str, col_no: int, span: int): #: The type of this header - self.column_type = column_type + self.column_type: str = column_type #: The column number this header refers to - self.col_no = col_no + self.col_no: int = col_no #: Number of columns this header spans - self.span = span + self.span: int = span #: Link to the TermSourceRefHeader to use - self.term_source_ref_header = None + self.term_source_ref_header: Optional[ColumnHeader] = None #: Link to the UnitHeader to use - self.unit_header = None + self.unit_header: Optional[ColumnHeader] = None def __str__(self): tpl = "ColumnHeader(column_type={}, col_no={}, span={})" @@ -44,9 +45,9 @@ class SimpleColumnHeader(ColumnHeader): """Base class for simple column headers.""" #: The value to use for the ``type`` argument. - column_type = None + column_type: str - def __init__(self, col_no): + def __init__(self, col_no: int): super().__init__(self.column_type, col_no, 1) @@ -295,10 +296,10 @@ class UnitHeader(SimpleColumnHeader): class LabeledColumnHeader(ColumnHeader): """Base class for labeled column headers.""" - #: The value to use for the ``type`` argument. - column_type = None + #: The label of the header + label: str - def __init__(self, col_no, label): + def __init__(self, col_no: int, label: str): super().__init__(self.column_type, col_no, 1) self.label = label @@ -311,7 +312,7 @@ def __repr__(self): def get_simple_string(self): """Return a list of simple string representations of the column types""" - return ["".join((self.column_type, "[", self.label, "]"))] + return ["".join((self.column_type or "MISSING", "[", self.label, "]"))] class CharacteristicsHeader(LabeledColumnHeader): @@ -349,8 +350,11 @@ class HeaderParserBase: :param tokens: List of strings, e.g. a split line read from a tsv/cvs file. """ + #: The file type configured for. + file_type: str + #: Names of the allowed headers - allowed_headers = None + allowed_headers: Tuple[str, ...] #: Headers that are mapped to ``SimpleColumnHeader`` simple_headers = { @@ -419,6 +423,10 @@ def run(self) -> Iterator[ColumnHeader]: break def _parse_next(self): + # Only warn if not allowed headers configured. + if self.allowed_headers is None: + msg = f"Allowed headers not configured for {self.file_type} file." + warnings.warn(msg, ParseIsatabWarning) # Get next value from header val = next(self.it) # StopIteration is OK here # Process either by exact match to "Term Source REF", or other exact @@ -426,17 +434,16 @@ def _parse_next(self): if val == table_headers.TERM_SOURCE_REF: return self._parse_term_source_ref() elif val in self.simple_headers: - if val not in self.allowed_headers: + if self.allowed_headers and val not in self.allowed_headers: tpl = 'Header "{}" not allowed in {}.' - msg = tpl.format(val, self.file_type) + msg = tpl.format(val, self) raise ParseIsatabException(msg) return self._parse_simple_column_header(self.simple_headers[val]) else: for label, type_ in self.labeled_headers.items(): if val.startswith(label): - if label not in self.allowed_headers: - tpl = 'Header "{}" not allowed in {}.' - msg = tpl.format(label, self.file_type) + if self.allowed_headers and label not in self.allowed_headers: + msg = f'Header "{label}" not allowed in {self.file_type}.' raise ParseIsatabException(msg) return self._parse_labeled_column_header(val, label, type_) # None of the if-statements above was taken @@ -477,7 +484,7 @@ class StudyHeaderParser(HeaderParserBase): file_type = "study" # for exceptions only - allowed_headers = ( + allowed_headers: Tuple[str, ...] = ( # Material names table_headers.SAMPLE_NAME, table_headers.SOURCE_NAME, @@ -502,7 +509,7 @@ class AssayHeaderParser(HeaderParserBase): file_type = "assay" # for exceptions only - allowed_headers = ( + allowed_headers: Tuple[str, ...] = ( # Material names table_headers.EXTRACT_NAME, table_headers.LABELED_EXTRACT_NAME, diff --git a/altamisa/isatab/helpers.py b/altamisa/isatab/helpers.py index 24aad32..a1666d8 100644 --- a/altamisa/isatab/helpers.py +++ b/altamisa/isatab/helpers.py @@ -4,21 +4,20 @@ """ +from typing import Any, List import warnings - from ..exceptions import ParseIsatabWarning - -__author__ = "Mathias Kuhring " +__author__ = "Mathias Kuhring " -def is_ontology_term_ref(v): +def is_ontology_term_ref(v: Any): """Duck typing check for objects of class `models.OntologyTermRef`""" return hasattr(v, "name") and hasattr(v, "ontology_name") and hasattr(v, "accession") -def list_strip(line: list): +def list_strip(line: List[str]) -> List[str]: """Remove trailing space from strings in a list (e.g. a csv line)""" new_line = [field.strip() for field in line] if new_line != line: diff --git a/altamisa/isatab/models.py b/altamisa/isatab/models.py index d61255c..2481d60 100644 --- a/altamisa/isatab/models.py +++ b/altamisa/isatab/models.py @@ -6,14 +6,14 @@ comments for a material). """ +import datetime from datetime import date from pathlib import Path -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Literal, Optional, Sequence, Tuple, Union import attr - -__author__ = "Manuel Holtgrewe " +__author__ = "Manuel Holtgrewe " # Base types, used throughout ------------------------------------------------- @@ -66,16 +66,46 @@ def __attrs_post_init__(self): object.__setattr__(self, "ontology_name", None) #: Human-readable name of the term - name: str = None + name: Optional[str] = None #: The accession of the referenced term - accession: str = None + accession: Optional[str] = None #: Name of the ontology (links to ``OntologyRef.name``) - ontology_name: str = None + ontology_name: Optional[str] = None #: Shortcut for the commonly used "free text or reference to a term in an #: ontology" idiom. -FreeTextOrTermRef = Union[OntologyTermRef, str] +FreeTextOrTermRef = Union[OntologyTermRef, str, None] + + +def free_text_or_term_ref_to_str(value: FreeTextOrTermRef) -> Optional[str]: + """Extract ``name`` from a ``FreeTextOrTermRef`` or use String.""" + if isinstance(value, str): + return value + elif isinstance(value, OntologyTermRef): + return value.name + else: + return "" + + +def free_text_or_term_ref_accession(value: FreeTextOrTermRef) -> Optional[str]: + """Obtain accession from a ``FreeTextOrTermRef`` or ``None`` if is ``str``.""" + if isinstance(value, str): + return None + elif isinstance(value, OntologyTermRef): + return value.accession + else: + return "" + + +def free_text_or_term_ref_ontology(value: FreeTextOrTermRef) -> Optional[str]: + """Obtain ontology name from a ``FreeTextOrTermRef`` or ``None`` if is ``str``.""" + if isinstance(value, str): + return None + elif isinstance(value, OntologyTermRef): + return value.ontology_name + else: + return "" @attr.s(auto_attribs=True, frozen=True) @@ -104,7 +134,7 @@ class OntologyRef: #: Description of the ontology description: str #: Comments - comments: Tuple[Comment] + comments: Tuple[Comment, ...] #: Headers from/for ISA-tab parsing/writing headers: List[str] @@ -114,7 +144,7 @@ class BasicInfo: """Basic metadata for an investigation or study (``INVESTIGATION`` or ``STUDY``).""" #: Path to the investigation or study file - path: Path + path: Optional[Path] #: Investigation/Study identifier identifier: str #: Investigation/Study title @@ -122,11 +152,11 @@ class BasicInfo: #: Investigation/Study description description: str #: Investigation/Study submission date - submission_date: date + submission_date: Optional[date] #: Investigation/Study public release date - public_release_date: date + public_release_date: Optional[date] #: Comments - comments: Tuple[Comment] + comments: Tuple[Comment, ...] #: Headers from/for ISA-tab parsing/writing headers: List[str] @@ -146,7 +176,7 @@ class PublicationInfo: #: Publication status status: FreeTextOrTermRef #: Comments - comments: Tuple[Comment] + comments: Tuple[Comment, ...] #: Headers from/for ISA-tab parsing/writing headers: List[str] @@ -174,7 +204,7 @@ class ContactInfo: #: Role of contact role: FreeTextOrTermRef #: Comments - comments: Tuple[Comment] + comments: Tuple[Comment, ...] #: Headers from/for ISA-tab parsing/writing headers: List[str] @@ -186,7 +216,7 @@ class DesignDescriptorsInfo: #: Design descriptors type type: FreeTextOrTermRef #: Comments - comments: Tuple[Comment] + comments: Tuple[Comment, ...] #: Headers from/for ISA-tab parsing/writing headers: List[str] @@ -200,7 +230,7 @@ class FactorInfo: #: Factor type type: FreeTextOrTermRef #: Comments - comments: Tuple[Comment] + comments: Tuple[Comment, ...] #: Headers from/for ISA-tab parsing/writing headers: List[str] @@ -216,9 +246,9 @@ class AssayInfo: #: Assay platform platform: str #: Path to assay file - path: Path + path: Optional[Path] #: Comments - comments: Tuple[Comment] + comments: Tuple[Comment, ...] #: Headers from/for ISA-tab parsing/writing headers: List[str] @@ -252,7 +282,7 @@ class ProtocolInfo: #: Protocol components components: Dict[str, ProtocolComponentInfo] #: Comments - comments: Tuple[Comment] + comments: Tuple[Comment, ...] #: Headers from/for ISA-tab parsing/writing headers: List[str] @@ -264,17 +294,17 @@ class StudyInfo: #: Basic study information info: BasicInfo #: Study designs by name - designs: Tuple[DesignDescriptorsInfo] + designs: Tuple[DesignDescriptorsInfo, ...] #: Publication list for study - publications: Tuple[PublicationInfo] + publications: Tuple[PublicationInfo, ...] #: Study factors by name factors: Dict[str, FactorInfo] #: Study assays - assays: Tuple[AssayInfo] + assays: Tuple[AssayInfo, ...] #: Study protocols by name protocols: Dict[str, ProtocolInfo] #: Study contact list - contacts: Tuple[ContactInfo] + contacts: Tuple[ContactInfo, ...] @attr.s(auto_attribs=True, frozen=True) @@ -286,12 +316,24 @@ class InvestigationInfo: #: Basic information on investigation info: BasicInfo #: List of investigation publications - publications: Tuple[PublicationInfo] + publications: Tuple[PublicationInfo, ...] #: Contact list for investigation - contacts: Tuple[ContactInfo] + contacts: Tuple[ContactInfo, ...] #: List of studies in this investigation - studies: Tuple[StudyInfo] + studies: Tuple[StudyInfo, ...] + +#: Type alias for types in investigation file that have a comments. +InvestigationFieldWithComments = Union[ + OntologyRef, + BasicInfo, + PublicationInfo, + ContactInfo, + DesignDescriptorsInfo, + FactorInfo, + ProtocolInfo, + AssayInfo, +] # Types used in study and assay files ----------------------------------------- @@ -305,7 +347,7 @@ class Characteristics: #: Characteristics value value: List[FreeTextOrTermRef] #: Characteristics unit - unit: FreeTextOrTermRef + unit: Optional[FreeTextOrTermRef] @attr.s(auto_attribs=True, frozen=True) @@ -315,9 +357,9 @@ class FactorValue: #: Factor name name: str #: Factor value - value: FreeTextOrTermRef + value: List[FreeTextOrTermRef] #: Factor value unit - unit: FreeTextOrTermRef + unit: Optional[FreeTextOrTermRef] @attr.s(auto_attribs=True, frozen=True) @@ -327,9 +369,46 @@ class ParameterValue: #: Parameter name name: str #: Parameter value - value: List[FreeTextOrTermRef] + value: Optional[List[FreeTextOrTermRef]] #: Parameter value unit - unit: FreeTextOrTermRef + unit: Optional[FreeTextOrTermRef] + + +#: Type alias for "complex" values. +ComplexValue = Union[Characteristics, FactorValue, ParameterValue] + + +def build_characteristics( + name: str, value: Union[FreeTextOrTermRef, Sequence[FreeTextOrTermRef]], unit: FreeTextOrTermRef +) -> Characteristics: + """Helper function that builds a `Characteristics`""" + if value is None or isinstance(value, (str, OntologyTermRef)): + # is free text or term ref + return Characteristics(name=name, value=[value], unit=unit) + else: + return Characteristics(name=name, value=list(value), unit=unit) + + +def build_factor_value( + name: str, value: Union[FreeTextOrTermRef, Sequence[FreeTextOrTermRef]], unit: FreeTextOrTermRef +) -> FactorValue: + """Helper function that builds a `FactorValue`""" + if value is None or isinstance(value, (str, OntologyTermRef)): + # is free text or term ref + return FactorValue(name=name, value=[value], unit=unit) + else: + return FactorValue(name=name, value=list(value), unit=unit) + + +def build_parameter_value( + name: str, value: Union[FreeTextOrTermRef, Sequence[FreeTextOrTermRef]], unit: FreeTextOrTermRef +) -> ParameterValue: + """Helper function that builds a `ParameterValue`""" + if value is None or isinstance(value, (str, OntologyTermRef)): + # is free text or term ref + return ParameterValue(name=name, value=[value], unit=unit) + else: + return ParameterValue(name=name, value=list(value), unit=unit) @attr.s(auto_attribs=True, frozen=True) @@ -347,15 +426,15 @@ class Material: #: Original name of a material or data file name: str #: The label of a Labeled Extract - extract_label: FreeTextOrTermRef + extract_label: Optional[Union[FreeTextOrTermRef, Sequence[FreeTextOrTermRef]]] #: Material characteristics - characteristics: Tuple[Characteristics] + characteristics: Tuple[Characteristics, ...] #: Material comments - comments: Tuple[Comment] + comments: Tuple[Comment, ...] #: Material factor values - factor_values: Tuple[FactorValue] + factor_values: Tuple[FactorValue, ...] #: Material type - material_type: FreeTextOrTermRef + material_type: Optional[Union[FreeTextOrTermRef, Sequence[FreeTextOrTermRef]]] #: Columns headers from/for ISA-tab parsing/writing headers: List[str] @@ -375,31 +454,31 @@ class Process: #: protocol reference with an auto-incrementing number. unique_name: str #: Original name of a process (e.g. from Assay Name etc.) - name: str + name: Optional[str] #: Type of original name (e.g. Assay Name) - name_type: str + name_type: Optional[str] #: Process date - date: date + date: Optional[Union[datetime.date, Literal[""]]] #: Performer of process - performer: str + performer: Optional[str] #: Tuple of parameters values - parameter_values: Tuple[ParameterValue] + parameter_values: Tuple[ParameterValue, ...] #: Tuple of process comments - comments: Tuple[Comment] + comments: Tuple[Comment, ...] - array_design_ref: str + array_design_ref: Optional[str] """ Array design reference (special case annotation)\n Technology types: "DNA microarray", "protein microarray"\n Protocol types: "nucleic acid hybridization", "hybridization" """ - first_dimension: FreeTextOrTermRef + first_dimension: Optional[Union[FreeTextOrTermRef, Sequence[FreeTextOrTermRef]]] """ First dimension (special case annotation, INSTEAD of Gel Electrophoresis Assay Name)\n Technology types: "gel electrophoresis"\n Protocol types: "electrophoresis" """ - second_dimension: FreeTextOrTermRef + second_dimension: Optional[Union[FreeTextOrTermRef, Sequence[FreeTextOrTermRef]]] """ Second dimension (special case annotation, INSTEAD of Gel Electrophoresis Assay Name)\n Technology types: "gel electrophoresis"\n @@ -410,6 +489,10 @@ class Process: headers: List[str] +#: Type alias for a node in the graph. +Node = Union[Material, Process] + + @attr.s(auto_attribs=True, frozen=True) class Arc: """Representation of an arc between two ``Material`` and/or ``Process`` nodes.""" @@ -443,7 +526,7 @@ class Study: #: A mapping from process name to ``Process`` object processes: Dict[str, Process] #: The processing arcs - arcs: Tuple[Arc] + arcs: Tuple[Arc, ...] @attr.s(auto_attribs=True, frozen=True) @@ -460,4 +543,4 @@ class Assay: #: A mapping from process name to ``Process`` object processes: Dict[str, Process] #: The processing arcs - arcs: Tuple[Arc] + arcs: Tuple[Arc, ...] diff --git a/altamisa/isatab/parse_assay_study.py b/altamisa/isatab/parse_assay_study.py index 9947df6..fc5cb96 100644 --- a/altamisa/isatab/parse_assay_study.py +++ b/altamisa/isatab/parse_assay_study.py @@ -8,61 +8,99 @@ import csv from datetime import datetime from pathlib import Path -from typing import List, TextIO +from typing import ( + Callable, + Dict, + Generator, + Generic, + List, + Optional, + Sequence, + TextIO, + Tuple, + Type, + TypeVar, + Union, +) -from ..constants import table_tokens -from ..constants import table_headers +from . import models +from ..constants import table_headers, table_tokens from ..exceptions import ParseIsatabException -from .headers import ColumnHeader, StudyHeaderParser, AssayHeaderParser, LabeledColumnHeader +from .headers import ( + ArrayDesignRefHeader, + AssayHeaderParser, + CharacteristicsHeader, + ColumnHeader, + CommentHeader, + DateHeader, + FactorValueHeader, + FirstDimensionHeader, + LabeledColumnHeader, + LabeledExtractHeader, + MaterialTypeHeader, + ParameterValueHeader, + PerformerHeader, + SecondDimensionHeader, + StudyHeaderParser, + UnitHeader, +) from .helpers import list_strip -from . import models + +__author__ = "Manuel Holtgrewe " -__author__ = "Manuel Holtgrewe " +#: Type variable for generic Material/Process. +TNode = TypeVar("TNode") +#: Type variable for cells/value. +TCell = TypeVar("TCell") -class _NodeBuilderBase: +class _NodeBuilderBase(Generic[TNode]): """Base class for Material and Process builder objects""" #: Headers to use for naming - name_headers = None + name_headers: Tuple[str, ...] #: Allowed ``column_type``s. - allowed_column_types = None + allowed_column_types: Tuple[str, ...] def __init__( - self, column_headers: List[ColumnHeader], filename: str, study_id: str, assay_id: str + self, + column_headers: List[ColumnHeader], + filename: str, + study_id: str, + assay_id: Optional[str], ): #: The column descriptions to build ``Material`` from. - self.column_headers = column_headers + self.column_headers: List[ColumnHeader] = column_headers #: The "Protocol REF" header, if any - self.protocol_ref_header = None + self.protocol_ref_header: Optional[ColumnHeader] = None #: The header to use for building names, if any - self.name_header = None + self.name_header: Optional[ColumnHeader] = None #: The headers for the characteristics - self.characteristic_headers = [] + self.characteristic_headers: List[CharacteristicsHeader] = [] #: The headers for comments - self.comment_headers = [] + self.comment_headers: List[CommentHeader] = [] #: The factor value headers - self.factor_value_headers = [] + self.factor_value_headers: List[FactorValueHeader] = [] #: The parameter value headers - self.parameter_value_headers = [] + self.parameter_value_headers: List[ParameterValueHeader] = [] #: The header for array design ref self.array_design_ref = None #: The header for array design ref - self.array_design_ref_header = None + self.array_design_ref_header: Optional[ArrayDesignRefHeader] = None #: The header for first and second dimension - self.first_dimension_header = None - self.second_dimension_header = None + self.first_dimension_header: Optional[FirstDimensionHeader] = None + self.second_dimension_header: Optional[SecondDimensionHeader] = None #: The header for extract label type - self.extract_label_header = None + self.extract_label_header: Optional[LabeledExtractHeader] = None #: The header for material type - self.material_type_header = None + self.material_type_header: Optional[MaterialTypeHeader] = None #: The header for the performer - self.performer_header = None + self.performer_header: Optional[PerformerHeader] = None #: The header for the date - self.date_header = None + self.date_header: Optional[DateHeader] = None #: The header for the unit - self.unit_header = None + self.unit_header: Optional[UnitHeader] = None #: Current counter value self.counter_value = 0 #: Assign column headers to their roles (properties above) @@ -73,6 +111,10 @@ def __init__( #: Original file name self.filename = filename + def build(self, line: List[str]) -> TNode: + _ = line + raise NotImplementedError() + def _next_counter(self): """Increment counter value and return""" self.counter_value += 1 @@ -99,49 +141,49 @@ def _assign_column_headers(self): # noqa: C901 elif header.column_type in self.name_headers: assert not self.name_header self.name_header = header - elif header.column_type == table_headers.CHARACTERISTICS: - self.characteristic_headers.append(header) - elif header.column_type == table_headers.COMMENT: - self.comment_headers.append(header) - elif header.column_type == table_headers.FACTOR_VALUE: - self.factor_value_headers.append(header) - elif header.column_type == table_headers.PARAMETER_VALUE: - self.parameter_value_headers.append(header) - elif header.column_type == table_headers.MATERIAL_TYPE: + elif header.column_type == table_headers.CHARACTERISTICS: # type check here + self.characteristic_headers.append(header) # type: ignore + elif header.column_type == table_headers.COMMENT: # type check here + self.comment_headers.append(header) # type: ignore + elif header.column_type == table_headers.FACTOR_VALUE: # type check here + self.factor_value_headers.append(header) # type: ignore + elif header.column_type == table_headers.PARAMETER_VALUE: # type check here + self.parameter_value_headers.append(header) # type: ignore + elif header.column_type == table_headers.MATERIAL_TYPE: # type check here if self.material_type_header: # pragma: no cover self._raise_seen_before("Material Type", header.col_no) else: - self.material_type_header = header - elif header.column_type == table_headers.ARRAY_DESIGN_REF: + self.material_type_header = header # type: ignore + elif header.column_type == table_headers.ARRAY_DESIGN_REF: # type check here if self.array_design_ref_header: # pragma: no cover self._raise_seen_before("Array Design REF", header.col_no) else: - self.array_design_ref_header = header - elif header.column_type == table_headers.FIRST_DIMENSION: + self.array_design_ref_header = header # type: ignore + elif header.column_type == table_headers.FIRST_DIMENSION: # type check here if self.first_dimension_header: # pragma: no cover self._raise_seen_before("First Dimension", header.col_no) else: - self.first_dimension_header = header - elif header.column_type == table_headers.SECOND_DIMENSION: + self.first_dimension_header = header # type: ignore + elif header.column_type == table_headers.SECOND_DIMENSION: # type check here if self.second_dimension_header: # pragma: no cover self._raise_seen_before("Second Dimension", header.col_no) else: - self.second_dimension_header = header - elif header.column_type == table_headers.LABEL: + self.second_dimension_header = header # type: ignore + elif header.column_type == table_headers.LABEL: # type check here if self.extract_label_header: # pragma: no cover self._raise_seen_before("Label", header.col_no) else: - self.extract_label_header = header - elif header.column_type == table_headers.DATE: + self.extract_label_header = header # type: ignore + elif header.column_type == table_headers.DATE: # type check here if self.date_header: # pragma: no cover self._raise_seen_before("Date", header.col_no) else: - self.date_header = header - elif header.column_type == table_headers.PERFORMER: + self.date_header = header # type: ignore + elif header.column_type == table_headers.PERFORMER: # type check here if self.performer_header: # pragma: no cover self._raise_seen_before("Performer", header.col_no) else: - self.performer_header = header + self.performer_header = header # type: ignore elif header.column_type == table_headers.TERM_SOURCE_REF: # Guard against misuse / errors if not prev: # pragma: no cover @@ -175,9 +217,10 @@ def _assign_column_headers(self): # noqa: C901 raise ParseIsatabException(msg) else: # The previous non-secondary header is annotated with an ontology term. - prev.term_source_ref_header = header + if prev: + prev.term_source_ref_header = header is_secondary = True - elif header.column_type == table_headers.UNIT: + elif prev and header.column_type == table_headers.UNIT: if prev.unit_header or prev.column_type == table_headers.UNIT: # pragma: no cover self._raise_seen_before("Unit", header.col_no) else: @@ -193,17 +236,34 @@ def _raise_seen_before(name, col_no): # pragma: no cover msg = tpl.format(name, col_no) raise ParseIsatabException(msg) - def _build_complex(self, header, line, klass, allow_list=False): + def _build_complex( + self, + header: LabeledColumnHeader, + line: List[str], + klass: Callable[ + [ + str, + Union[models.FreeTextOrTermRef, Sequence[models.FreeTextOrTermRef]], + models.FreeTextOrTermRef, + ], + TCell, + ], + allow_list=False, + ) -> TCell: """Build a complex annotation (e.g., may have term reference or unit.""" # First, build the individual components value = self._build_freetext_or_term_ref(header, line, allow_list=allow_list) unit = self._build_freetext_or_term_ref(header.unit_header, line) + if unit is not None and not isinstance(unit, (str, models.OntologyTermRef)): + raise ParseIsatabException( + "Unit must be a string or an OntologyTermRef, not {}".format(type(unit)) + ) # Then, constructing ``klass`` is easy return klass(header.label, value, unit) def _build_freetext_or_term_ref( - self, header, line: List[str], allow_list=False - ) -> models.FreeTextOrTermRef: + self, header: Optional[ColumnHeader], line: List[str], allow_list=False + ) -> Optional[Union[models.FreeTextOrTermRef, Sequence[models.FreeTextOrTermRef]]]: if not header: return None elif header.term_source_ref_header: @@ -224,6 +284,9 @@ def _build_freetext_or_term_ref( ] return term_refs else: # pragma: no cover + import pdb + + pdb.set_trace() tpl = ( "Irregular numbers of fields in ontology term columns" "(i.e. ';'-separated fields): {}" @@ -243,7 +306,7 @@ def _build_simple_headers_list(self) -> List[str]: return [h for headers in self.column_headers for h in headers.get_simple_string()] @staticmethod - def _token_with_escape(string, escape="\\", separator=";"): + def _token_with_escape(string: str, escape: str = "\\", separator: str = ";") -> List[str]: # Source: https://rosettacode.org/wiki/Tokenize_a_string_with_escaping#Python result = [] segment = "" @@ -264,12 +327,12 @@ def _token_with_escape(string, escape="\\", separator=";"): return result -class _MaterialBuilder(_NodeBuilderBase): +class _MaterialBuilder(_NodeBuilderBase[models.Material]): """Helper class to construct a ``Material`` object from a line""" - name_headers = table_headers.MATERIAL_NAME_HEADERS + name_headers: Tuple[str, ...] = table_headers.MATERIAL_NAME_HEADERS - allowed_column_types = ( + allowed_column_types: Tuple[str, ...] = ( # Primary annotations (not parametrized) table_headers.MATERIAL_TYPE, # Primary annotations (parametrized) @@ -286,7 +349,10 @@ def build(self, line: List[str]) -> models.Material: """Build and return ``Material`` from TSV file line.""" counter_value = self._next_counter() # First, build the individual components - assert self.name_header or self.protocol_ref_header + if not self.name_header: + raise ParseIsatabException( + "No name header found for material found for file {}".format(self.filename) + ) type_ = self.name_header.column_type assay_id = "-{}".format(self.assay_id) if self.assay_id else "" name = line[self.name_header.col_no] @@ -315,14 +381,15 @@ def build(self, line: List[str]) -> models.Material: unique_name = models.AnnotatedStr(name_val, was_empty=True) extract_label = self._build_freetext_or_term_ref(self.extract_label_header, line) characteristics = tuple( - self._build_complex(hdr, line, models.Characteristics, allow_list=True) + self._build_complex(hdr, line, models.build_characteristics, allow_list=True) for hdr in self.characteristic_headers ) comments = tuple( models.Comment(hdr.label, line[hdr.col_no]) for hdr in self.comment_headers ) factor_values = tuple( - self._build_complex(hdr, line, models.FactorValue) for hdr in self.factor_value_headers + self._build_complex(hdr, line, models.build_factor_value) + for hdr in self.factor_value_headers ) material_type = self._build_freetext_or_term_ref(self.material_type_header, line) # Then, constructing ``Material`` is easy @@ -339,12 +406,12 @@ def build(self, line: List[str]) -> models.Material: ) -class _ProcessBuilder(_NodeBuilderBase): +class _ProcessBuilder(_NodeBuilderBase[models.Process]): """Helper class to construct ``Process`` objects.""" - name_headers = table_headers.PROCESS_NAME_HEADERS + name_headers: Tuple[str, ...] = table_headers.PROCESS_NAME_HEADERS - allowed_column_types = ( + allowed_column_types: Tuple[str, ...] = ( table_headers.PROTOCOL_REF, # Primary annotations (not parametrized) table_headers.PERFORMER, @@ -385,7 +452,7 @@ def build(self, line: List[str]) -> models.Process: models.Comment(hdr.label, line[hdr.col_no]) for hdr in self.comment_headers ) parameter_values = tuple( - self._build_complex(hdr, line, models.ParameterValue, allow_list=True) + self._build_complex(hdr, line, models.build_parameter_value, allow_list=True) for hdr in self.parameter_value_headers ) # Check for special case annotations @@ -410,9 +477,16 @@ def build(self, line: List[str]) -> models.Process: self._build_simple_headers_list(), ) - def _build_protocol_ref_and_name(self, line: List[str]): + def _build_protocol_ref_and_name( + self, line: List[str] + ) -> Tuple[str, Union[models.AnnotatedStr, str], Optional[str], Optional[str]]: # At least one of these headers has to be specified - assert self.name_header or self.protocol_ref_header + if not self.protocol_ref_header: # pragma: no cover + raise ParseIsatabException( + "No protocol reference header found for process found for file {}".format( + self.filename + ) + ) # Perform case distinction on which case is actually true counter_value = self._next_counter() assay_id = "-{}".format(self.assay_id) if self.assay_id else "" @@ -477,10 +551,14 @@ class _RowBuilderBase: """Base class for row builders from study and assay files""" #: Registry of column header to node builder - node_builders = None + node_builders: Dict[str, Type[_NodeBuilderBase]] def __init__( - self, header: List[ColumnHeader], filename: str, study_id: str, assay_id: str = None + self, + header: List[ColumnHeader], + filename: str, + study_id: str, + assay_id: Optional[str] = None, ): self.header = header self.filename = filename @@ -488,7 +566,7 @@ def __init__( self.assay_id = assay_id self._builders = list(self._make_builders()) - def _make_builders(self): + def _make_builders(self) -> Generator[_NodeBuilderBase, None, None]: """Construct the builder objects for the objects""" breaks = list(self._make_breaks()) for start, end in zip(breaks, breaks[1:]): @@ -566,7 +644,7 @@ def _make_breaks(self): noname_protocol_ref = False yield len(self.header) # index to end of list - def build(self, line): + def build(self, line: List[str]) -> List[models.Node]: return [b.build(line) for b in self._builders] @@ -730,11 +808,11 @@ class StudyRowReader: """ @classmethod - def from_stream(klass, study_id: str, input_file: TextIO, filename: str = None): + def from_stream(cls, study_id: str, input_file: TextIO, filename: Optional[str] = None): """Construct from file-like object""" return StudyRowReader(study_id, input_file, filename) - def __init__(self, study_id: str, input_file: TextIO, filename: str): + def __init__(self, study_id: str, input_file: TextIO, filename: Optional[str]): self.study_id = study_id self.input_file = input_file self.filename = filename or getattr(input_file, "name", "") @@ -804,7 +882,7 @@ class StudyReader: """ @classmethod - def from_stream(klass, study_id: str, input_file: TextIO, filename=None): + def from_stream(cls, study_id: str, input_file: TextIO, filename: Optional[str] = None): """Construct from file-like object""" return StudyReader(study_id, input_file, filename) @@ -849,11 +927,13 @@ class AssayRowReader: """ @classmethod - def from_stream(klass, study_id: str, assay_id: str, input_file: TextIO, filename: str = None): + def from_stream( + cls, study_id: str, assay_id: str, input_file: TextIO, filename: Optional[str] = None + ): """Construct from file-like object""" return AssayRowReader(study_id, assay_id, input_file, filename) - def __init__(self, study_id: str, assay_id: str, input_file: TextIO, filename: str): + def __init__(self, study_id: str, assay_id: str, input_file: TextIO, filename: Optional[str]): self.study_id = study_id self.assay_id = assay_id self.input_file = input_file @@ -926,7 +1006,9 @@ class AssayReader: """ @classmethod - def from_stream(klass, study_id: str, assay_id: str, input_file: TextIO, filename=None): + def from_stream( + cls, study_id: str, assay_id: str, input_file: TextIO, filename: Optional[str] = None + ): """Construct from file-like object""" return AssayReader(study_id, assay_id, input_file, filename) diff --git a/altamisa/isatab/parse_investigation.py b/altamisa/isatab/parse_investigation.py index 5ac8aff..6e9fb64 100644 --- a/altamisa/isatab/parse_investigation.py +++ b/altamisa/isatab/parse_investigation.py @@ -4,20 +4,19 @@ from __future__ import generator_stop -import os import csv -from datetime import datetime +import datetime +import os from pathlib import Path -from typing import Iterator, TextIO +from typing import Dict, Iterator, List, Optional, Sequence, TextIO import warnings +from . import models from ..constants import investigation_headers from ..exceptions import ParseIsatabException, ParseIsatabWarning from .helpers import list_strip -from . import models - -__author__ = "Manuel Holtgrewe " +__author__ = "Manuel Holtgrewe " # Helper function to extract comment headers and values from a section dict @@ -91,17 +90,14 @@ def _split_study_protocols_components( # Helper function to validate and convert string dates to date objects -def _parse_date(date_string) -> datetime.date: +def _parse_date(date_string: str) -> Optional[datetime.date]: if date_string: try: - date = datetime.strptime(date_string, "%Y-%m-%d").date() + return datetime.datetime.strptime(date_string, "%Y-%m-%d").date() except ValueError as e: # pragma: no cover - tpl = 'Invalid ISO8601 date "{}"' - msg = tpl.format(date_string) - raise ParseIsatabException(msg) from e + raise ParseIsatabException(f'Invalid ISO8601 date "{date_string}"') from e else: - date = None - return date + return None class InvestigationReader: @@ -113,17 +109,17 @@ class InvestigationReader: """ @classmethod - def from_stream(self, input_file: TextIO, filename=None): + def from_stream(cls, input_file: TextIO, filename=None): """Construct from file-like object""" return InvestigationReader(input_file, filename) def __init__(self, input_file: TextIO, filename=None): self._filename = filename or getattr(input_file, "name", "") self._reader = csv.reader(input_file, delimiter="\t", quotechar='"') - self._line = None + self._line: Optional[List[str]] = None self._read_next_line() - def _read_next_line(self): + def _read_next_line(self) -> Optional[List[str]]: """Read next line, skipping comments starting with ``'#'``.""" prev_line = self._line try: @@ -158,9 +154,9 @@ def read(self) -> models.InvestigationInfo: # ("section headings MUST appear in the Investigation file (in order)") ontology_refs = {o.name: o for o in self._read_ontology_source_reference()} info = self._read_basic_info() - publications = list(self._read_publications()) - contacts = list(self._read_contacts()) - studies = list(self._read_studies()) + publications = tuple(self._read_publications()) + contacts = tuple(self._read_contacts()) + studies = tuple(self._read_studies()) investigation = models.InvestigationInfo( ontology_refs, info, publications, contacts, studies ) @@ -170,11 +166,12 @@ def read(self) -> models.InvestigationInfo: # i.e. ONTOLOGY SOURCE REFERENCE, INVESTIGATION PUBLICATIONS, # INVESTIGATION CONTACTS, STUDY DESIGN DESCRIPTORS, STUDY PUBLICATIONS, # STUDY FACTORS, STUDY ASSAYS, STUDY PROTOCOLS, STUDY CONTACTS - def _read_multi_column_section(self, prefix, ref_keys, section_name): + def _read_multi_column_section(self, prefix: str, ref_keys: Sequence[str], section_name: str): section = {} comment_keys = [] while self._next_line_startswith(prefix) or self._next_line_startswith_comment(): line = self._read_next_line() + assert line is not None key = line[0] if key.startswith("Comment"): comment_keys.append(key) @@ -206,6 +203,8 @@ def _read_single_column_section(self, prefix, ref_keys, section_name): comment_keys = [] while self._next_line_startswith(prefix) or self._next_line_startswith_comment(): line = self._read_next_line() + if line is None: + break if len(line) > 2: # pragma: no cover tpl = "Line {} contains more than one value: {}" msg = tpl.format(line[0], line[1:]) @@ -233,7 +232,9 @@ def _read_single_column_section(self, prefix, ref_keys, section_name): def _read_ontology_source_reference(self) -> Iterator[models.OntologyRef]: # Read ONTOLOGY SOURCE REFERENCE header line = self._read_next_line() - if not line[0] == investigation_headers.ONTOLOGY_SOURCE_REFERENCE: # pragma: no cover + if ( + not line or not line[0] == investigation_headers.ONTOLOGY_SOURCE_REFERENCE + ): # pragma: no cover tpl = "Expected {} but got {}" msg = tpl.format(investigation_headers.ONTOLOGY_SOURCE_REFERENCE, line) raise ParseIsatabException(msg) @@ -259,7 +260,7 @@ def _read_ontology_source_reference(self) -> Iterator[models.OntologyRef]: def _read_basic_info(self) -> models.BasicInfo: # Read INVESTIGATION header line = self._read_next_line() - if not line[0] == investigation_headers.INVESTIGATION: # pragma: no cover + if not line or not line[0] == investigation_headers.INVESTIGATION: # pragma: no cover tpl = "Expected {} but got {}" msg = tpl.format(investigation_headers.INVESTIGATION, line) raise ParseIsatabException(msg) @@ -286,7 +287,9 @@ def _read_basic_info(self) -> models.BasicInfo: def _read_publications(self) -> Iterator[models.PublicationInfo]: # Read INVESTIGATION PUBLICATIONS header line = self._read_next_line() - if not line[0] == investigation_headers.INVESTIGATION_PUBLICATIONS: # pragma: no cover + if ( + not line or not line[0] == investigation_headers.INVESTIGATION_PUBLICATIONS + ): # pragma: no cover tpl = "Expected {} but got {}" msg = tpl.format(investigation_headers.INVESTIGATION_PUBLICATIONS, line) raise ParseIsatabException(msg) @@ -311,7 +314,9 @@ def _read_publications(self) -> Iterator[models.PublicationInfo]: def _read_contacts(self) -> Iterator[models.ContactInfo]: # Read INVESTIGATION CONTACTS header line = self._read_next_line() - if not line[0] == investigation_headers.INVESTIGATION_CONTACTS: # pragma: no cover + if ( + not line or not line[0] == investigation_headers.INVESTIGATION_CONTACTS + ): # pragma: no cover tpl = "Expected {} but got {}" msg = tpl.format(investigation_headers.INVESTIGATION_CONTACTS, line) raise ParseIsatabException(msg) @@ -359,7 +364,7 @@ def _read_studies(self) -> Iterator[models.StudyInfo]: while self._line: # Read STUDY header line = self._read_next_line() - if not line[0] == investigation_headers.STUDY: # pragma: no cover + if not line or not line[0] == investigation_headers.STUDY: # pragma: no cover tpl = "Expected {} but got {}" msg = tpl.format(investigation_headers.INVESTIGATION, line) raise ParseIsatabException(msg) @@ -396,10 +401,12 @@ def _read_studies(self) -> Iterator[models.StudyInfo]: basic_info, design_descriptors, publications, factors, assays, protocols, contacts ) - def _read_study_design_descriptors(self) -> Iterator[models.FreeTextOrTermRef]: + def _read_study_design_descriptors(self) -> Iterator[models.DesignDescriptorsInfo]: # Read STUDY DESIGN DESCRIPTORS header line = self._read_next_line() - if not line[0] == investigation_headers.STUDY_DESIGN_DESCRIPTORS: # pragma: no cover + if ( + not line or not line[0] == investigation_headers.STUDY_DESIGN_DESCRIPTORS + ): # pragma: no cover tpl = "Expected {} but got {}" msg = tpl.format(investigation_headers.STUDY_DESIGN_DESCRIPTORS, line) raise ParseIsatabException(msg) @@ -419,7 +426,7 @@ def _read_study_design_descriptors(self) -> Iterator[models.FreeTextOrTermRef]: def _read_study_publications(self) -> Iterator[models.PublicationInfo]: # Read STUDY PUBLICATIONS header line = self._read_next_line() - if not line[0] == investigation_headers.STUDY_PUBLICATIONS: # pragma: no cover + if not line or not line[0] == investigation_headers.STUDY_PUBLICATIONS: # pragma: no cover tpl = "Expected {} but got {}" msg = tpl.format(investigation_headers.STUDY_PUBLICATIONS, line) raise ParseIsatabException(msg) @@ -444,7 +451,7 @@ def _read_study_publications(self) -> Iterator[models.PublicationInfo]: def _read_study_factors(self) -> Iterator[models.FactorInfo]: # Read STUDY FACTORS header line = self._read_next_line() - if not line[0] == investigation_headers.STUDY_FACTORS: # pragma: no cover + if not line or not line[0] == investigation_headers.STUDY_FACTORS: # pragma: no cover tpl = "Expected {} but got {}" msg = tpl.format(investigation_headers.STUDY_FACTORS, line) raise ParseIsatabException(msg) @@ -464,7 +471,7 @@ def _read_study_factors(self) -> Iterator[models.FactorInfo]: def _read_study_assays(self) -> Iterator[models.AssayInfo]: # Read STUDY ASSAYS header line = self._read_next_line() - if not line[0] == investigation_headers.STUDY_ASSAYS: # pragma: no cover + if not line or not line[0] == investigation_headers.STUDY_ASSAYS: # pragma: no cover tpl = "Expected {} but got {}" msg = tpl.format(investigation_headers.STUDY_ASSAYS, line) raise ParseIsatabException(msg) @@ -517,7 +524,7 @@ def _read_study_assays(self) -> Iterator[models.AssayInfo]: def _read_study_protocols(self) -> Iterator[models.ProtocolInfo]: # Read STUDY PROTOCOLS header line = self._read_next_line() - if not line[0] == investigation_headers.STUDY_PROTOCOLS: # pragma: no cover + if not line or not line[0] == investigation_headers.STUDY_PROTOCOLS: # pragma: no cover tpl = "Expected {} but got {}" msg = tpl.format(investigation_headers.STUDY_PROTOCOLS, line) raise ParseIsatabException(msg) @@ -553,12 +560,13 @@ def _read_study_protocols(self) -> Iterator[models.ProtocolInfo]: msg = tpl.format(investigation_headers.STUDY_PROTOCOL_NAME, name) raise ParseIsatabException(msg) type_ont = models.OntologyTermRef(type_term, type_term_acc, type_term_src) - paras = { - p.name if hasattr(p, "name") else p: p - for p in _split_study_protocols_parameters( - para_names, para_name_term_accs, para_name_term_srcs - ) - } + paras: Dict[str, models.FreeTextOrTermRef] = {} + for p in _split_study_protocols_parameters( + para_names, para_name_term_accs, para_name_term_srcs + ): + key = models.free_text_or_term_ref_to_str(p) + if key: + paras[key] = p comps = { c.name: c for c in _split_study_protocols_components( @@ -581,7 +589,7 @@ def _read_study_protocols(self) -> Iterator[models.ProtocolInfo]: def _read_study_contacts(self) -> Iterator[models.ContactInfo]: # Read STUDY CONTACTS header line = self._read_next_line() - if not line[0] == investigation_headers.STUDY_CONTACTS: # pragma: no cover + if not line or not line[0] == investigation_headers.STUDY_CONTACTS: # pragma: no cover tpl = "Expected {} but got {}" msg = tpl.format(investigation_headers.STUDY_CONTACTS, line) raise ParseIsatabException(msg) diff --git a/altamisa/isatab/validate_assay_study.py b/altamisa/isatab/validate_assay_study.py index f78f8c5..9cd6d3b 100644 --- a/altamisa/isatab/validate_assay_study.py +++ b/altamisa/isatab/validate_assay_study.py @@ -7,16 +7,15 @@ writing) and provide a comprehensive list of warnings of different degree. """ -from typing import Dict +from typing import Dict, Optional, Union import warnings +from . import models from ..constants import table_headers, table_restrictions, table_tokens -from ..exceptions import ModerateIsaValidationWarning, CriticalIsaValidationWarning +from ..exceptions import CriticalIsaValidationWarning, ModerateIsaValidationWarning from .helpers import is_ontology_term_ref -from . import models - -__author__ = "Mathias Kuhring " +__author__ = "Mathias Kuhring " # Constants to differentiate models when validating materials, processes or arcs @@ -69,7 +68,7 @@ def __init__( model_type, factor_refs: Dict[str, models.FactorInfo], ontology_validator: _OntologyTermRefValidator, - assay_info: models.AssayInfo = None, + assay_info: Optional[models.AssayInfo] = None, ): self._model_type = model_type self._factor_refs = factor_refs @@ -215,15 +214,15 @@ def _validate_single_assay_restriction( def _validate_ontology_term_refs(self, material: models.Material): # Validate consistency of all potential ontology term references in a material - if material.extract_label and is_ontology_term_ref(material.extract_label): + if isinstance(material.extract_label, models.OntologyTermRef): self._ontology_validator.validate(material.extract_label) - if material.material_type and is_ontology_term_ref(material.material_type): + if isinstance(material.material_type, models.OntologyTermRef): self._ontology_validator.validate(material.material_type) for c in material.characteristics: for v in c.value: - if is_ontology_term_ref(v): + if isinstance(v, models.OntologyTermRef): self._ontology_validator.validate(v) - if is_ontology_term_ref(c.unit): + if isinstance(c.unit, models.OntologyTermRef): self._ontology_validator.validate(c.unit) def _validate_factor_values(self, factor_values): @@ -241,8 +240,8 @@ class _ProcessValidator: def __init__( self, protocols: Dict[str, models.ProtocolInfo], - ontology_validator: _OntologyTermRefValidator = None, - assay_info: models.AssayInfo = None, + ontology_validator: Optional[_OntologyTermRefValidator] = None, + assay_info: Optional[models.AssayInfo] = None, ): self._protocols = protocols self._ontology_validator = ontology_validator @@ -305,6 +304,9 @@ def _validate_restrictions_by_assay_tech(self, test, assay_tech_restrictions): # Check if restricted to assay technology if ( test in assay_tech_restrictions + and self._assay_info + and isinstance(self._assay_info.technology_type, models.OntologyTermRef) + and self._assay_info.technology_type.name and self._assay_info.technology_type.name.lower() not in assay_tech_restrictions[test] ): tpl = '"{}" not supported by assay technology "{}" (only "{}")' @@ -322,15 +324,19 @@ def _validate_restrictions_by_protocol_type( if test in protocol_type_restrictions: # Check prototype with partial matching, as types are sometimes extended any_match = False + protocol = self._protocols[process.protocol_ref] + if isinstance(protocol.type, models.OntologyTermRef) and protocol.type.name: + protocol_name = protocol.type.name + else: + protocol_name = None for res_type in protocol_type_restrictions[test]: - any_match = ( - any_match or res_type in self._protocols[process.protocol_ref].type.name.lower() - ) + if isinstance(protocol.type, models.OntologyTermRef) and protocol.type.name: + any_match = any_match or res_type in protocol.type.name.lower() if not any_match: tpl = '"{}" not supported by protocol type "{}" (only "{}")' msg = tpl.format( test, - self._protocols[process.protocol_ref].type.name, + protocol_name, ", ".join(protocol_type_restrictions[test]), ) warnings.warn(msg, ModerateIsaValidationWarning) @@ -368,23 +374,25 @@ def _validate_special_case_annotations(self, process: models.Process): ) def _validate_ontology_term_refs(self, process: models.Process): + if not self._ontology_validator: + return # skip # Validate consistency of all potential ontology term references in a process for parameter in process.parameter_values: - for v in parameter.value: - if is_ontology_term_ref(v): + for v in parameter.value or []: + if isinstance(v, models.OntologyTermRef): self._ontology_validator.validate(v) - if is_ontology_term_ref(parameter.unit): + if isinstance(parameter.unit, models.OntologyTermRef): self._ontology_validator.validate(parameter.unit) - if process.first_dimension and is_ontology_term_ref(process.first_dimension): + if isinstance(process.first_dimension, models.OntologyTermRef): self._ontology_validator.validate(process.first_dimension) - if process.second_dimension and is_ontology_term_ref(process.second_dimension): + if isinstance(process.second_dimension, models.OntologyTermRef): self._ontology_validator.validate(process.second_dimension) class _ArcValidator: """Validator for Arcs""" - def __init__(self, materials, processes, model_type): + def __init__(self, materials, processes, model_type: str): self._nodes = {**materials, **processes} self._model_type = model_type @@ -419,10 +427,10 @@ def validate(self, arc: models.Arc): class _AssayAndStudyValidator: """Base validator for Study and Assay""" - _study_info = None - _assay_info = None - _model = None - _model_type = None + _study_info: models.StudyInfo + _assay_info: Optional[models.AssayInfo] + _model: Union[models.Study, models.Assay] + _model_type: str def __init__(self, investigation: models.InvestigationInfo): self._ontology_validator = _OntologyTermRefValidator(investigation.ontology_source_refs) @@ -506,7 +514,7 @@ def __init__( study_info: models.StudyInfo, assay_info: models.AssayInfo, assay: models.Assay, - parent_study: models.Study = None, + parent_study: Optional[models.Study] = None, ): self._study_info = study_info self._assay_info = assay_info @@ -524,6 +532,8 @@ def validate(self): def _validate_dependency(self): """Validate if assay complies with parent study""" + if not self._parent_study: + return # skip # Check if all samples in the assays are declared in the parent study # Collect materials of type "Sample Name" @@ -539,9 +549,12 @@ def _validate_dependency(self): samples_not_in_study = [s for s in assay_samples if s not in study_samples] if samples_not_in_study: tpl = "Found samples in assay '{}' but not in parent study '{}':\\n{}" - msg = tpl.format( - self._assay_info.path.name, - self._study_info.info.path.name, - ", ".join(samples_not_in_study), - ) + if self._assay_info: + msg = tpl.format( + self._assay_info.path.name if self._assay_info.path else "", + self._study_info.info.path.name if self._study_info.info.path else "", + ", ".join(samples_not_in_study), + ) + else: + msg = "Found samples in assay but not in parent study" warnings.warn(msg, CriticalIsaValidationWarning) diff --git a/altamisa/isatab/validate_investigation.py b/altamisa/isatab/validate_investigation.py index 771f8af..136cfc3 100644 --- a/altamisa/isatab/validate_investigation.py +++ b/altamisa/isatab/validate_investigation.py @@ -11,17 +11,15 @@ from typing import Dict, Tuple import warnings +from . import models from ..exceptions import ( AdvisoryIsaValidationWarning, CriticalIsaValidationWarning, ModerateIsaValidationWarning, ) -from .helpers import is_ontology_term_ref -from . import models from .validate_assay_study import _OntologyTermRefValidator - -__author__ = "Mathias Kuhring " +__author__ = "Mathias Kuhring " # Pattern and helper functions for validation ------------------------------------------------------ @@ -34,7 +32,7 @@ PMID_PATTERN = re.compile("^\\d+$") -def _validate_mail_address(mail_address) -> str: +def _validate_mail_address(mail_address: str) -> None: """Helper function to validate mail strings""" if mail_address and not MAIL_PATTERN.match(mail_address): tpl = "Invalid mail address: {}" @@ -42,7 +40,7 @@ def _validate_mail_address(mail_address) -> str: warnings.warn(msg, AdvisoryIsaValidationWarning) -def _validate_phone_number(phone_number) -> str: +def _validate_phone_number(phone_number: str) -> None: """Helper function to validate phone/fax number strings""" if phone_number and not PHONE_PATTERN.match(phone_number): tpl = "Invalid phone/fax number: {}" @@ -50,7 +48,7 @@ def _validate_phone_number(phone_number) -> str: warnings.warn(msg, AdvisoryIsaValidationWarning) -def _validate_doi(doi) -> str: +def _validate_doi(doi: str) -> None: """Helper function to validate doi strings""" if doi and not DOI_PATTERN.match(doi): tpl = "Invalid doi string: {}" @@ -58,7 +56,7 @@ def _validate_doi(doi) -> str: warnings.warn(msg, AdvisoryIsaValidationWarning) -def _validate_pubmed_id(pubmed_id) -> str: +def _validate_pubmed_id(pubmed_id: str) -> None: """Helper function to validate pubmed id strings""" if pubmed_id and not PMID_PATTERN.match(pubmed_id): tpl = "Invalid pubmed_id string: {}" @@ -195,36 +193,36 @@ def _validate_studies(self): self._validate_assays(study.assays, study.info.identifier) self._validate_protocols(study.protocols) - def _validate_publications(self, publications: Tuple[models.PublicationInfo]): + def _validate_publications(self, publications: Tuple[models.PublicationInfo, ...]): # Validate format of specific fields in publications for publication in publications: _validate_pubmed_id(publication.pubmed_id) _validate_doi(publication.doi) - if is_ontology_term_ref(publication.status): + if isinstance(publication.status, models.OntologyTermRef): self._ontology_validator.validate(publication.status) - def _validate_contacts(self, contacts: Tuple[models.ContactInfo]): + def _validate_contacts(self, contacts: Tuple[models.ContactInfo, ...]): # Validate format of specific fields in contacts for contact in contacts: _validate_mail_address(contact.email) _validate_phone_number(contact.phone) _validate_phone_number(contact.fax) - if is_ontology_term_ref(contact.role): + if isinstance(contact.role, models.OntologyTermRef): self._ontology_validator.validate(contact.role) - def _validate_designs(self, designs: Tuple[models.DesignDescriptorsInfo]): + def _validate_designs(self, designs: Tuple[models.DesignDescriptorsInfo, ...]): # Validate format of specific fields in designs for design in designs: - if is_ontology_term_ref(design.type): + if isinstance(design.type, models.OntologyTermRef): self._ontology_validator.validate(design.type) def _validate_factors(self, factors: Dict[str, models.FactorInfo]): # Validate format of specific fields in factors for factor in factors.values(): - if is_ontology_term_ref(factor.type): + if isinstance(factor.type, models.OntologyTermRef): self._ontology_validator.validate(factor.type) - def _validate_assays(self, assays: Tuple[models.AssayInfo], study_id: str): + def _validate_assays(self, assays: Tuple[models.AssayInfo, ...], study_id: str): # Check if any assays exists (according to specs, having an assays is not mandatory) if not assays: tpl = "No assays declared in study '{}' of investigation '{}'" @@ -236,12 +234,12 @@ def _validate_assays(self, assays: Tuple[models.AssayInfo], study_id: str): # (path, measurement type, technology type and technology platform) meas_type = ( assay.measurement_type.name - if is_ontology_term_ref(assay.measurement_type) + if isinstance(assay.measurement_type, models.OntologyTermRef) else assay.measurement_type ) tech_type = ( assay.technology_type.name - if is_ontology_term_ref(assay.technology_type) + if isinstance(assay.technology_type, models.OntologyTermRef) else assay.technology_type ) if not (assay.path and meas_type and tech_type): @@ -268,19 +266,19 @@ def _validate_assays(self, assays: Tuple[models.AssayInfo], study_id: str): else: self._assay_paths.add(assay.path) # Validate format of specific fields in assays - if is_ontology_term_ref(assay.measurement_type): + if isinstance(assay.measurement_type, models.OntologyTermRef): self._ontology_validator.validate(assay.measurement_type) - if is_ontology_term_ref(assay.technology_type): + if isinstance(assay.technology_type, models.OntologyTermRef): self._ontology_validator.validate(assay.technology_type) def _validate_protocols(self, protocols: Dict[str, models.ProtocolInfo]): # Validate format of specific fields in protocols for protocol in protocols.values(): - if is_ontology_term_ref(protocol.type): + if isinstance(protocol.type, models.OntologyTermRef): self._ontology_validator.validate(protocol.type) for parameter in protocol.parameters.values(): - if is_ontology_term_ref(parameter): + if isinstance(parameter, models.OntologyTermRef): self._ontology_validator.validate(parameter) for component in protocol.components.values(): - if is_ontology_term_ref(component.type): + if isinstance(component.type, models.OntologyTermRef): self._ontology_validator.validate(component.type) diff --git a/altamisa/isatab/write_assay_study.py b/altamisa/isatab/write_assay_study.py index 5c065d2..d711d5d 100644 --- a/altamisa/isatab/write_assay_study.py +++ b/altamisa/isatab/write_assay_study.py @@ -4,22 +4,27 @@ from __future__ import generator_stop + import csv import functools import os -from typing import NamedTuple, TextIO +from typing import Callable, Dict, List, Optional, Sequence, TextIO, Tuple, Type, Union from ..constants import table_headers from ..constants.table_tokens import TOKEN_UNKNOWN from ..exceptions import WriteIsatabException -from .headers import AssayHeaderParser, StudyHeaderParser +from .headers import ( + AssayHeaderParser, + ColumnHeader, + HeaderParserBase, + StudyHeaderParser, +) from .helpers import is_ontology_term_ref -from .models import Material, OntologyTermRef, Process - +from .models import Arc, Assay, Material, OntologyTermRef, Process, Study __author__ = ( - "Manuel Holtgrewe , " - "Mathias Kuhring " + "Manuel Holtgrewe , " + "Mathias Kuhring " ) @@ -29,19 +34,28 @@ class _Digraph: """Simple class encapsulating directed graph with vertices and arcs""" - def __init__(self, vertices, arcs, predicate_is_starting): + def __init__( + self, + vertices: List[Union[Material, Process]], + arcs: List[Arc], + predicate_is_starting: Callable[[Union[Material, Process]], bool], + ): #: Graph vertices/nodes (models.Material and models.Process) - self.vertices = vertices + self.vertices: List[Union[Material, Process]] = vertices #: Graph arcs/edges (models.Arc) - self.arcs = arcs + self.arcs: List[Arc] = arcs #: Name to node mapping - self.v_by_name = {v.unique_name: v for v in self.vertices} + self.v_by_name: Dict[str, Union[Material, Process]] = { + v.unique_name: v for v in self.vertices + } #: Arcs as tuple of tail and head - self.a_by_name = {(a[0], a[1]): None for a in self.arcs} + self.a_by_name: Dict[Tuple[str, str], Optional[Arc]] = { + (a[0], a[1]): None for a in self.arcs + } #: Names of starting nodes self.source_names = [v.unique_name for v in self.vertices if predicate_is_starting(v)] #: Outgoing vertices/nodes - self.outgoing = {} + self.outgoing: Dict[str, List[str]] = {} for s_name, t_name in self.a_by_name.keys(): self.outgoing.setdefault(s_name, []).append(t_name) @@ -51,15 +65,15 @@ class _UnionFind: """Union-Find (disjoint set) data structure allowing to address by vertex name""" - def __init__(self, vertex_names): + def __init__(self, vertex_names: List[str]): #: Node name to id mapping - self._name_to_id = {v: i for i, v in enumerate(vertex_names)} + self._name_to_id: Dict[str, int] = {v: i for i, v in enumerate(vertex_names)} #: Pointer to the containing sets - self._id = list(range(len(vertex_names))) + self._id: List[int] = list(range(len(vertex_names))) #: Size of the set (_sz[_id[v]] is the size of the set that contains v) - self._sz = [1] * len(vertex_names) + self._sz: List[int] = [1] * len(vertex_names) - def find(self, v): + def find(self, v: int) -> int: assert type(v) is int j = v @@ -69,13 +83,13 @@ def find(self, v): return j - def find_by_name(self, v_name): + def find_by_name(self, v_name: str) -> int: return self.find(self._name_to_id[v_name]) - def union_by_name(self, v_name, w_name): + def union_by_name(self, v_name: str, w_name: str): self.union(self.find_by_name(v_name), self.find_by_name(w_name)) - def union(self, v, w): + def union(self, v: int, w: int): assert type(v) is int assert type(w) is int i = self.find(v) @@ -94,7 +108,7 @@ def union(self, v, w): self._sz[i] += self._sz[j] -def _is_of_starting_type(starting_type, v): +def _is_of_starting_type(starting_type: str, v: Union[Material, Process]) -> bool: """Predicate to select vertices based on starting type.""" return getattr(v, "type", None) == starting_type @@ -104,12 +118,12 @@ class RefTableBuilder: def __init__(self, nodes, arcs, predicate_is_starting): # Input directed graph - self.digraph = _Digraph(nodes, arcs, predicate_is_starting) + self.digraph: _Digraph = _Digraph(nodes, arcs, predicate_is_starting) #: Output table rows - self._rows = [] + self._rows: List[List[str]] = [] - def _partition(self): - uf = _UnionFind(self.digraph.v_by_name.keys()) + def _partition(self) -> List[List[str]]: + uf = _UnionFind(list(self.digraph.v_by_name.keys())) for arc in self.digraph.arcs: uf.union_by_name(arc[0], arc[1]) @@ -121,10 +135,10 @@ def _partition(self): return list(result.values()) - def _dump_row(self, v_names): + def _dump_row(self, v_names: Sequence[str]): self._rows.append(list(v_names)) - def _dfs(self, source, path): + def _dfs(self, source: str, path: List[str]): next_v_names = None if source in self.digraph.outgoing: @@ -139,13 +153,13 @@ def _dfs(self, source, path): else: self._dump_row(path) - def _process_component(self, v_names): + def _process_component(self, v_names: List[str]): # NB: The algorithm below looks a bit involved but it's the simplest way without an # external library to get the intersection of two lists of strings in the same order as # in the input file and still using hashing for lookup. intersection = set(v_names) & set(self.digraph.source_names) sources_set = set() - sources = [] + sources: List[str] = [] for name in self.digraph.source_names: if name in intersection and name not in sources_set: sources_set.add(name) @@ -154,7 +168,7 @@ def _process_component(self, v_names): for source in sources: self._dfs(source, [source]) - def run(self): + def run(self) -> List[List[str]]: components = self._partition() for component in components: @@ -170,20 +184,33 @@ class _WriterBase: """Base class that writes a file from an ``Study`` or ``Assay`` object.""" #: Note type starting a graph - _starting_type = None + _starting_type: str #: Parser for study or assay headers - _header_parser = None + _header_parser: Type[HeaderParserBase] + + # Reference table for output + _ref_table: List[List[str]] + # Headers for output + _headers: List[List[ColumnHeader]] @classmethod def from_stream( - cls, study_or_assay: NamedTuple, output_file: TextIO, quote=None, lineterminator=None + cls, + study_or_assay: Union[Study, Assay], + output_file: TextIO, + quote=None, + lineterminator=None, ): """Construct from file-like object""" return cls(study_or_assay, output_file, quote, lineterminator) def __init__( - self, study_or_assay: NamedTuple, output_file: TextIO, quote=None, lineterminator=None + self, + study_or_assay: Union[Study, Assay], + output_file: TextIO, + quote=None, + lineterminator=None, ): # Study or Assay model self._model = study_or_assay @@ -203,12 +230,8 @@ def __init__( escapechar="\\", quotechar=self.quote if self.quote else "|", ) - # Reference table for output - self._ref_table = None - # Headers for output - self._headers = None - def _write_next_line(self, line: [str]): + def _write_next_line(self, line: List[str]): """Write next line.""" self._writer.writerow(line) diff --git a/altamisa/isatab/write_investigation.py b/altamisa/isatab/write_investigation.py index 087b0c1..9412d1b 100644 --- a/altamisa/isatab/write_investigation.py +++ b/altamisa/isatab/write_investigation.py @@ -3,25 +3,24 @@ """ from __future__ import generator_stop + import csv import os -from typing import Dict, List, TextIO +from typing import Collection, Dict, List, Optional, TextIO import warnings -from ..exceptions import WriteIsatabException, WriteIsatabWarning -from ..constants import investigation_headers -from .helpers import is_ontology_term_ref from . import models - +from ..constants import investigation_headers +from ..exceptions import WriteIsatabException, WriteIsatabWarning __author__ = ( - "Manuel Holtgrewe , " - "Mathias Kuhring " + "Manuel Holtgrewe , " + "Mathias Kuhring " ) # Helper to extract comments and align them into rows -def _extract_comments(section_objects: list): +def _extract_comments(section_objects: Collection[models.InvestigationFieldWithComments]): names = sorted({comment.name for obj in section_objects for comment in obj.comments}) comments = {name: [""] * len(section_objects) for name in names} for i, obj in enumerate(section_objects): @@ -121,7 +120,7 @@ def _write_section( section_name: str, section: Dict[str, list], comments: Dict[str, list], - headers: List[str] = None, + headers: Optional[List[str]] = None, ): # Add comments to section dict if comments: @@ -200,26 +199,15 @@ def _write_publications(self): publication.authors ) section[investigation_headers.INVESTIGATION_PUBLICATION_TITLE].append(publication.title) - if is_ontology_term_ref(publication.status): - section[investigation_headers.INVESTIGATION_PUBLICATION_STATUS].append( - publication.status.name or "" - ) - section[ - investigation_headers.INVESTIGATION_PUBLICATION_STATUS_TERM_ACCESSION_NUMBER - ].append(publication.status.accession or "") - section[ - investigation_headers.INVESTIGATION_PUBLICATION_STATUS_TERM_SOURCE_REF - ].append(publication.status.ontology_name or "") - else: - section[investigation_headers.INVESTIGATION_PUBLICATION_STATUS].append( - publication.status - ) - section[ - investigation_headers.INVESTIGATION_PUBLICATION_STATUS_TERM_ACCESSION_NUMBER - ].append("") - section[ - investigation_headers.INVESTIGATION_PUBLICATION_STATUS_TERM_SOURCE_REF - ].append("") + section[investigation_headers.INVESTIGATION_PUBLICATION_STATUS].append( + models.free_text_or_term_ref_to_str(publication.status) or "" + ) + section[ + investigation_headers.INVESTIGATION_PUBLICATION_STATUS_TERM_ACCESSION_NUMBER + ].append(models.free_text_or_term_ref_accession(publication.status) or "") + section[investigation_headers.INVESTIGATION_PUBLICATION_STATUS_TERM_SOURCE_REF].append( + models.free_text_or_term_ref_ontology(publication.status) or "" + ) comments = _extract_comments(self.investigation.publications) headers = _extract_section_header( list(self.investigation.publications)[0] if self.investigation.publications else None, @@ -247,22 +235,15 @@ def _write_contacts(self): section[investigation_headers.INVESTIGATION_PERSON_AFFILIATION].append( contact.affiliation ) - if is_ontology_term_ref(contact.role): - section[investigation_headers.INVESTIGATION_PERSON_ROLES].append( - contact.role.name or "" - ) - section[ - investigation_headers.INVESTIGATION_PERSON_ROLES_TERM_ACCESSION_NUMBER - ].append(contact.role.accession or "") - section[investigation_headers.INVESTIGATION_PERSON_ROLES_TERM_SOURCE_REF].append( - contact.role.ontology_name or "" - ) - else: - section[investigation_headers.INVESTIGATION_PERSON_ROLES].append(contact.role) - section[ - investigation_headers.INVESTIGATION_PERSON_ROLES_TERM_ACCESSION_NUMBER - ].append("") - section[investigation_headers.INVESTIGATION_PERSON_ROLES_TERM_SOURCE_REF].append("") + section[investigation_headers.INVESTIGATION_PERSON_ROLES].append( + models.free_text_or_term_ref_to_str(contact.role) or "" + ) + section[investigation_headers.INVESTIGATION_PERSON_ROLES_TERM_ACCESSION_NUMBER].append( + models.free_text_or_term_ref_accession(contact.role) or "" + ) + section[investigation_headers.INVESTIGATION_PERSON_ROLES_TERM_SOURCE_REF].append( + models.free_text_or_term_ref_ontology(contact.role) or "" + ) comments = _extract_comments(self.investigation.contacts) headers = _extract_section_header( list(self.investigation.contacts)[0] if self.investigation.contacts else None, @@ -302,18 +283,15 @@ def _write_study_design_descriptors(self, study: models.StudyInfo): # Read STUDY DESIGN DESCRIPTORS section section = _init_multi_column_section(investigation_headers.STUDY_DESIGN_DESCR_KEYS) for design in study.designs: - if is_ontology_term_ref(design.type): - section[investigation_headers.STUDY_DESIGN_TYPE].append(design.type.name or "") - section[investigation_headers.STUDY_DESIGN_TYPE_TERM_ACCESSION_NUMBER].append( - design.type.accession or "" - ) - section[investigation_headers.STUDY_DESIGN_TYPE_TERM_SOURCE_REF].append( - design.type.ontology_name or "" - ) - else: - section[investigation_headers.STUDY_DESIGN_TYPE].append(design.type) - section[investigation_headers.STUDY_DESIGN_TYPE_TERM_ACCESSION_NUMBER].append("") - section[investigation_headers.STUDY_DESIGN_TYPE_TERM_SOURCE_REF].append("") + section[investigation_headers.STUDY_DESIGN_TYPE].append( + models.free_text_or_term_ref_to_str(design.type) or "" + ) + section[investigation_headers.STUDY_DESIGN_TYPE_TERM_ACCESSION_NUMBER].append( + models.free_text_or_term_ref_accession(design.type) or "" + ) + section[investigation_headers.STUDY_DESIGN_TYPE_TERM_SOURCE_REF].append( + models.free_text_or_term_ref_ontology(design.type) or "" + ) comments = _extract_comments(study.designs) headers = _extract_section_header( list(study.designs)[0] if study.designs else None, @@ -331,22 +309,15 @@ def _write_study_publications(self, study: models.StudyInfo): section[investigation_headers.STUDY_PUBLICATION_DOI].append(publication.doi) section[investigation_headers.STUDY_PUBLICATION_AUTHOR_LIST].append(publication.authors) section[investigation_headers.STUDY_PUBLICATION_TITLE].append(publication.title) - if is_ontology_term_ref(publication.status): - section[investigation_headers.STUDY_PUBLICATION_STATUS].append( - publication.status.name or "" - ) - section[ - investigation_headers.STUDY_PUBLICATION_STATUS_TERM_ACCESSION_NUMBER - ].append(publication.status.accession or "") - section[investigation_headers.STUDY_PUBLICATION_STATUS_TERM_SOURCE_REF].append( - publication.status.ontology_name or "" - ) - else: - section[investigation_headers.STUDY_PUBLICATION_STATUS].append(publication.status) - section[ - investigation_headers.STUDY_PUBLICATION_STATUS_TERM_ACCESSION_NUMBER - ].append("") - section[investigation_headers.STUDY_PUBLICATION_STATUS_TERM_SOURCE_REF].append("") + section[investigation_headers.STUDY_PUBLICATION_STATUS].append( + models.free_text_or_term_ref_to_str(publication.status) or "" + ) + section[investigation_headers.STUDY_PUBLICATION_STATUS_TERM_ACCESSION_NUMBER].append( + models.free_text_or_term_ref_accession(publication.status) or "" + ) + section[investigation_headers.STUDY_PUBLICATION_STATUS_TERM_SOURCE_REF].append( + models.free_text_or_term_ref_ontology(publication.status) or "" + ) comments = _extract_comments(study.publications) headers = _extract_section_header( list(study.publications)[0] if study.publications else None, @@ -359,18 +330,15 @@ def _write_study_factors(self, study: models.StudyInfo): section = _init_multi_column_section(investigation_headers.STUDY_FACTORS_KEYS) for factor in study.factors.values(): section[investigation_headers.STUDY_FACTOR_NAME].append(factor.name) - if is_ontology_term_ref(factor.type): - section[investigation_headers.STUDY_FACTOR_TYPE].append(factor.type.name) - section[investigation_headers.STUDY_FACTOR_TYPE_TERM_ACCESSION_NUMBER].append( - factor.type.accession - ) - section[investigation_headers.STUDY_FACTOR_TYPE_TERM_SOURCE_REF].append( - factor.type.ontology_name - ) - else: - section[investigation_headers.STUDY_FACTOR_TYPE].append(factor.type) - section[investigation_headers.STUDY_FACTOR_TYPE_TERM_ACCESSION_NUMBER].append("") - section[investigation_headers.STUDY_FACTOR_TYPE_TERM_SOURCE_REF].append("") + section[investigation_headers.STUDY_FACTOR_TYPE].append( + models.free_text_or_term_ref_to_str(factor.type) or "" + ) + section[investigation_headers.STUDY_FACTOR_TYPE_TERM_ACCESSION_NUMBER].append( + models.free_text_or_term_ref_accession(factor.type) or "" + ) + section[investigation_headers.STUDY_FACTOR_TYPE_TERM_SOURCE_REF].append( + models.free_text_or_term_ref_ontology(factor.type) or "" + ) comments = _extract_comments(study.factors.values()) headers = _extract_section_header( list(study.factors.values())[0] if study.factors else None, @@ -384,47 +352,25 @@ def _write_study_assays(self, study: models.StudyInfo): for assay in study.assays: section[investigation_headers.STUDY_ASSAY_FILE_NAME].append(assay.path or "") - if is_ontology_term_ref(assay.measurement_type): - section[investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE].append( - assay.measurement_type.name or "" - ) - section[ - investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE_TERM_ACCESSION_NUMBER - ].append(assay.measurement_type.accession or "") - section[investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE_TERM_SOURCE_REF].append( - assay.measurement_type.ontology_name or "" - ) - else: - section[investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE].append( - assay.measurement_type - ) - section[ - investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE_TERM_ACCESSION_NUMBER - ].append("") - section[investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE_TERM_SOURCE_REF].append( - "" - ) - - if is_ontology_term_ref(assay.technology_type): - section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE].append( - assay.technology_type.name or "" - ) - section[ - investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE_TERM_ACCESSION_NUMBER - ].append(assay.technology_type.accession or "") - section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE_TERM_SOURCE_REF].append( - assay.technology_type.ontology_name or "" - ) - else: - section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE].append( - assay.technology_type - ) - section[ - investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE_TERM_ACCESSION_NUMBER - ].append("") - section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE_TERM_SOURCE_REF].append( - "" - ) + section[investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE].append( + models.free_text_or_term_ref_to_str(assay.measurement_type) or "" + ) + section[ + investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE_TERM_ACCESSION_NUMBER + ].append(models.free_text_or_term_ref_accession(assay.measurement_type) or "") + section[investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE_TERM_SOURCE_REF].append( + models.free_text_or_term_ref_ontology(assay.measurement_type) or "" + ) + + section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE].append( + models.free_text_or_term_ref_to_str(assay.technology_type) or "" + ) + section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE_TERM_ACCESSION_NUMBER].append( + models.free_text_or_term_ref_accession(assay.technology_type) or "" + ) + section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE_TERM_SOURCE_REF].append( + models.free_text_or_term_ref_ontology(assay.technology_type) or "" + ) section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_PLATFORM].append(assay.platform) @@ -440,18 +386,15 @@ def _write_study_protocols(self, study: models.StudyInfo): for protocol in study.protocols.values(): section[investigation_headers.STUDY_PROTOCOL_NAME].append(protocol.name) - if is_ontology_term_ref(protocol.type): - section[investigation_headers.STUDY_PROTOCOL_TYPE].append(protocol.type.name or "") - section[investigation_headers.STUDY_PROTOCOL_TYPE_TERM_ACCESSION_NUMBER].append( - protocol.type.accession or "" - ) - section[investigation_headers.STUDY_PROTOCOL_TYPE_TERM_SOURCE_REF].append( - protocol.type.ontology_name or "" - ) - else: - section[investigation_headers.STUDY_PROTOCOL_TYPE].append(protocol.type) - section[investigation_headers.STUDY_PROTOCOL_TYPE_TERM_ACCESSION_NUMBER].append("") - section[investigation_headers.STUDY_PROTOCOL_TYPE_TERM_SOURCE_REF].append("") + section[investigation_headers.STUDY_PROTOCOL_TYPE].append( + models.free_text_or_term_ref_to_str(protocol.type) or "" + ) + section[investigation_headers.STUDY_PROTOCOL_TYPE_TERM_ACCESSION_NUMBER].append( + models.free_text_or_term_ref_accession(protocol.type) or "" + ) + section[investigation_headers.STUDY_PROTOCOL_TYPE_TERM_SOURCE_REF].append( + models.free_text_or_term_ref_ontology(protocol.type) or "" + ) section[investigation_headers.STUDY_PROTOCOL_DESCRIPTION].append(protocol.description) section[investigation_headers.STUDY_PROTOCOL_URI].append(protocol.uri) @@ -461,14 +404,9 @@ def _write_study_protocols(self, study: models.StudyInfo): accessions = [] ontologies = [] for parameter in protocol.parameters.values(): - if is_ontology_term_ref(parameter): - names.append(parameter.name or "") - accessions.append(parameter.accession or "") - ontologies.append(parameter.ontology_name or "") - else: - names.append(parameter.name) - accessions.append("") - ontologies.append("") + names.append(models.free_text_or_term_ref_to_str(parameter) or "") + accessions.append(models.free_text_or_term_ref_accession(parameter) or "") + ontologies.append(models.free_text_or_term_ref_ontology(parameter) or "") section[investigation_headers.STUDY_PROTOCOL_PARAMETERS_NAME].append(";".join(names)) section[ investigation_headers.STUDY_PROTOCOL_PARAMETERS_NAME_TERM_ACCESSION_NUMBER @@ -483,14 +421,9 @@ def _write_study_protocols(self, study: models.StudyInfo): ontologies = [] for component in protocol.components.values(): names.append(component.name) - if is_ontology_term_ref(component.type): - types.append(component.type.name or "") - accessions.append(component.type.accession or "") - ontologies.append(component.type.ontology_name or "") - else: - names.append(component.type) - accessions.append("") - ontologies.append("") + types.append(models.free_text_or_term_ref_to_str(component.type) or "") + accessions.append(models.free_text_or_term_ref_accession(component.type) or "") + ontologies.append(models.free_text_or_term_ref_ontology(component.type) or "") section[investigation_headers.STUDY_PROTOCOL_COMPONENTS_NAME].append(";".join(names)) section[investigation_headers.STUDY_PROTOCOL_COMPONENTS_TYPE].append(";".join(types)) section[ @@ -519,18 +452,15 @@ def _write_study_contacts(self, study: models.StudyInfo): section[investigation_headers.STUDY_PERSON_FAX].append(contact.fax) section[investigation_headers.STUDY_PERSON_ADDRESS].append(contact.address) section[investigation_headers.STUDY_PERSON_AFFILIATION].append(contact.affiliation) - if is_ontology_term_ref(contact.role): - section[investigation_headers.STUDY_PERSON_ROLES].append(contact.role.name or "") - section[investigation_headers.STUDY_PERSON_ROLES_TERM_ACCESSION_NUMBER].append( - contact.role.accession or "" - ) - section[investigation_headers.STUDY_PERSON_ROLES_TERM_SOURCE_REF].append( - contact.role.ontology_name or "" - ) - else: - section[investigation_headers.STUDY_PERSON_ROLES].append(contact.role) - section[investigation_headers.STUDY_PERSON_ROLES_TERM_ACCESSION_NUMBER].append("") - section[investigation_headers.STUDY_PERSON_ROLES_TERM_SOURCE_REF].append("") + section[investigation_headers.STUDY_PERSON_ROLES].append( + models.free_text_or_term_ref_to_str(contact.role) or "" + ) + section[investigation_headers.STUDY_PERSON_ROLES_TERM_ACCESSION_NUMBER].append( + models.free_text_or_term_ref_accession(contact.role) or "" + ) + section[investigation_headers.STUDY_PERSON_ROLES_TERM_SOURCE_REF].append( + models.free_text_or_term_ref_ontology(contact.role) or "" + ) comments = _extract_comments(study.contacts) headers = _extract_section_header( list(study.contacts)[0] if study.contacts else None, diff --git a/altamisa/py.typed b/altamisa/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/codemeta.json b/codemeta.json index 4f5f2f0..65021d3 100644 --- a/codemeta.json +++ b/codemeta.json @@ -5,35 +5,35 @@ { "@id": "https://orcid.org/0000-0002-3287-0313", "@type": "Person", - "email": "mathias.kuhring@bihealth.de", + "email": "mathias.kuhring@bih-charite.de", "name": "Mathias Kuhring", "affiliation": "Berlin Institute of Health (BIH), Berlin, German" }, { "@id": "https://orcid.org/0000-0002-4180-8810", "@type": "Person", - "email": "mikko.nieminen@bihealth.de", + "email": "mikko.nieminen@bih-charite.de", "name": "Mikko Nieminen", "affiliation": "Berlin Institute of Health (BIH), Berlin, German" }, { "@id": "https://orcid.org/0000-0002-5423-1651", "@type": "Person", - "email": "jennifer.kirwan@bihealth.de", + "email": "jennifer.kirwan@bih-charite.de", "name": "Jennifer Kirwan", "affiliation": "Berlin Institute of Health (BIH), Berlin, German" }, { "@id": "https://orcid.org/0000-0002-3284-0632", "@type": "Person", - "email": "dieter.beule@bihealth.de", + "email": "dieter.beule@bih-charite.de", "name": "Dieter Beule", "affiliation": "Berlin Institute of Health (BIH), Berlin, German" }, { "@id": "https://orcid.org/0000-0002-3051-1763", "@type": "Person", - "email": "manuel.holtgrewe@bihealth.de", + "email": "manuel.holtgrewe@bih-charite.de", "name": "Manuel Holtgrewe", "affiliation": "Berlin Institute of Health (BIH), Berlin, German" } diff --git a/docs/examples/create_isa_model.py b/docs/examples/create_isa_model.py index 1c618e5..e45139a 100644 --- a/docs/examples/create_isa_model.py +++ b/docs/examples/create_isa_model.py @@ -3,9 +3,16 @@ import os import sys -from altamisa.isatab import models, table_headers -from altamisa.isatab import AssayValidator, InvestigationValidator, StudyValidator -from altamisa.isatab import AssayWriter, InvestigationWriter, StudyWriter +from altamisa.isatab import ( + AssayValidator, + AssayWriter, + InvestigationValidator, + InvestigationWriter, + StudyValidator, + StudyWriter, + models, + table_headers, +) def create_and_write(out_path): diff --git a/docs/examples/process_isa_model.py b/docs/examples/process_isa_model.py index a8e4719..d9e31ce 100644 --- a/docs/examples/process_isa_model.py +++ b/docs/examples/process_isa_model.py @@ -1,9 +1,9 @@ # Process ISA model data -from altamisa.isatab import * import os import warnings +from altamisa.isatab import * # Parse and validate an investigation file with open("i_investigation.txt", "rt") as investigation_file: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..afa94ca --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,19 @@ +[tool.pyright] +include = [ + "altamisa", + "tests", +] +exclude = [ + "**/node_modules", + "**/__pycache__", + "altamisa/versioneer.py", + "altamisa/_version.py" +] +defineConstant = { DEBUG = true } +stubPath = "stubs" + +reportMissingImports = true +reportMissingTypeStubs = false + +pythonVersion = "3.8" +pythonPlatform = "Linux" diff --git a/requirements/base.txt b/requirements/base.txt index 751689e..1850f45 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,3 +1,4 @@ # Base requirements attrs +typer[all] diff --git a/requirements/test.txt b/requirements/test.txt index 2b49c57..42f9503 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -11,7 +11,10 @@ pytest-cov # Coverage report coverage -codacy-coverage >=1.3.6 -# Flake8 for lintoing +# Flake8 for linting flake8 >=3.5.0 + +isort + +pyright diff --git a/setup.cfg b/setup.cfg index 3768989..574711b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -19,11 +19,6 @@ select = B,C,E,F,W,T4,B9 [tool:pytest] addopts = --cov=altamisa --cov=tests --cov-report=xml testpaths = tests -pep8ignore = - docs/* ALL - examples/*.py E501 - tests/*.py E501 - vcfpy/*.py F401 [coverage:run] omit = diff --git a/setup.py b/setup.py index f4476ac..9b8603e 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ import os.path -from setuptools import setup, find_packages +from setuptools import find_packages, setup import versioneer @@ -35,8 +35,8 @@ def parse_requirements(path): setup( author="Dieter Beule, Jennifer Kirwan, Mathias Kuhring, Manuel Holtgrewe, Mikko Nieminen", author_email=( - "dieter.beule@bihealth.de, jennifer.kirwan@bihalth.de, mathias.kuhring@bihealth.de, " - "manuel.holtgrewe@bihealth.de, mikko.nieminen@bihealth.de" + "dieter.beule@bih-charite.de, jennifer.kirwan@bihalth.de, mathias.kuhring@bih-charite.de, " + "manuel.holtgrewe@bih-charite.de, mikko.nieminen@bih-charite.de" ), classifiers=[ "Development Status :: 4 - Beta", @@ -52,9 +52,9 @@ def parse_requirements(path): ], entry_points={ "console_scripts": ( - "isatab2dot = altamisa.apps.isatab2dot:main", - "isatab2isatab = altamisa.apps.isatab2isatab:main", - "isatab_validate = altamisa.apps.isatab_validate:main", + "isatab2dot = altamisa.apps.isatab2dot:app", + "isatab2isatab = altamisa.apps.isatab2isatab:app", + "isatab_validate = altamisa.apps.isatab_validate:app", ) }, description="Implementation of ISA-tools data model and ISA-TAB", diff --git a/tests/conftest.py b/tests/conftest.py index 0498f76..d7d6c5e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,25 +3,27 @@ """ import os.path +from typing import Iterator, TextIO + import pytest @pytest.fixture -def minimal_investigation_file(): +def minimal_investigation_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/i_minimal/i_minimal.txt") with open(path, "rt") as file: yield file @pytest.fixture -def minimal2_investigation_file(): +def minimal2_investigation_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/i_minimal2/i_minimal2.txt") with open(path, "rt") as file: yield file @pytest.fixture -def minimal_study_file(): +def minimal_study_file() -> Iterator[TextIO]: """This file only contains the bare essentials, although ISA-Tab might actually forgive us having no ``Process``. """ @@ -31,21 +33,21 @@ def minimal_study_file(): @pytest.fixture -def minimal_assay_file(): +def minimal_assay_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/i_minimal/a_minimal.txt") with open(path, "rt") as file: yield file @pytest.fixture -def small_investigation_file(): +def small_investigation_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/i_small/i_small.txt") with open(path, "rt") as file: yield file @pytest.fixture -def small_study_file(): +def small_study_file() -> Iterator[TextIO]: """This file contains a very limited number of annotations and one sample that is split (tumor-normal case). """ @@ -55,14 +57,14 @@ def small_study_file(): @pytest.fixture -def small_assay_file(): +def small_assay_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/i_small/a_small.txt") with open(path, "rt") as file: yield file @pytest.fixture -def full_investigation_file(): +def full_investigation_file() -> Iterator[TextIO]: """This file contains values for each normal investigation section and key.""" path = os.path.join(os.path.dirname(__file__), "data/i_fullinvest/i_fullinvest.txt") with open(path, "rt") as file: @@ -70,7 +72,7 @@ def full_investigation_file(): @pytest.fixture -def full2_investigation_file(): +def full2_investigation_file() -> Iterator[TextIO]: """This file contains values for each normal investigation section and key.""" path = os.path.join(os.path.dirname(__file__), "data/i_fullinvest2/i_fullinvest2.txt") with open(path, "rt") as file: @@ -78,7 +80,7 @@ def full2_investigation_file(): @pytest.fixture -def comment_investigation_file(): +def comment_investigation_file() -> Iterator[TextIO]: """This file contains comments for each investigation section.""" path = os.path.join(os.path.dirname(__file__), "data/i_comments/i_comments.txt") with open(path, "rt") as file: @@ -86,7 +88,7 @@ def comment_investigation_file(): @pytest.fixture -def assays_investigation_file(): +def assays_investigation_file() -> Iterator[TextIO]: """This file contains two studies with no assays, once with tab-separation (empty column) and once without (no column). """ @@ -96,7 +98,7 @@ def assays_investigation_file(): @pytest.fixture -def assays2_investigation_file(): +def assays2_investigation_file() -> Iterator[TextIO]: """This file contains two studies with no assays, once with tab-separation (empty column) and once without (no column). """ @@ -106,21 +108,21 @@ def assays2_investigation_file(): @pytest.fixture -def small2_investigation_file(): +def small2_investigation_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/i_small2/i_small2.txt") with open(path, "rt") as file: yield file @pytest.fixture -def small2_study_file(): +def small2_study_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/i_small2/s_small2.txt") with open(path, "rt") as file: yield file @pytest.fixture -def small2_assay_file(): +def small2_assay_file() -> Iterator[TextIO]: """This file contains splitting and pooling examples.""" path = os.path.join(os.path.dirname(__file__), "data/i_small2/a_small2.txt") with open(path, "rt") as file: @@ -128,14 +130,14 @@ def small2_assay_file(): @pytest.fixture -def gelelect_investigation_file(): +def gelelect_investigation_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/test_gelelect/i_Investigation.txt") with open(path, "rt") as file: yield file @pytest.fixture -def gelelect_assay_file(): +def gelelect_assay_file() -> Iterator[TextIO]: """This file contains special cases for gel electrophoresis assays.""" path = os.path.join( os.path.dirname(__file__), @@ -146,14 +148,14 @@ def gelelect_assay_file(): @pytest.fixture -def BII_I_1_investigation_file(): +def BII_I_1_investigation_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/BII-I-1/i_investigation.txt") with open(path, "rt") as file: yield file @pytest.fixture -def BII_I_2_investigation_file(): +def BII_I_2_investigation_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/BII-I-2/i_investigation.txt") with open(path, "rt") as file: yield file @@ -163,7 +165,7 @@ def BII_I_2_investigation_file(): @pytest.fixture -def assay_file_exception_labeled_header_format(): +def assay_file_exception_labeled_header_format() -> Iterator[TextIO]: path = os.path.join( os.path.dirname(__file__), "data/test_exceptions/a_exception_labeled_header_format.txt" ) @@ -172,7 +174,7 @@ def assay_file_exception_labeled_header_format(): @pytest.fixture -def assay_file_exception_labeled_header_not_allowed(): +def assay_file_exception_labeled_header_not_allowed() -> Iterator[TextIO]: path = os.path.join( os.path.dirname(__file__), "data/test_exceptions/a_exception_labeled_header_not_allowed.txt" ) @@ -181,7 +183,7 @@ def assay_file_exception_labeled_header_not_allowed(): @pytest.fixture -def assay_file_exception_duplicated_header(): +def assay_file_exception_duplicated_header() -> Iterator[TextIO]: path = os.path.join( os.path.dirname(__file__), "data/test_exceptions/a_exception_duplicated_header.txt" ) @@ -190,7 +192,7 @@ def assay_file_exception_duplicated_header(): @pytest.fixture -def assay_file_exception_simple_header_not_allowed(): +def assay_file_exception_simple_header_not_allowed() -> Iterator[TextIO]: path = os.path.join( os.path.dirname(__file__), "data/test_exceptions/a_exception_simple_header_not_allowed.txt" ) @@ -199,7 +201,7 @@ def assay_file_exception_simple_header_not_allowed(): @pytest.fixture -def assay_file_exception_term_source_ref_next_column(): +def assay_file_exception_term_source_ref_next_column() -> Iterator[TextIO]: path = os.path.join( os.path.dirname(__file__), "data/test_exceptions/a_exception_term_source_ref_next_column.txt", @@ -209,7 +211,7 @@ def assay_file_exception_term_source_ref_next_column(): @pytest.fixture -def assay_file_exception_term_source_ref_stop_iteration(): +def assay_file_exception_term_source_ref_stop_iteration() -> Iterator[TextIO]: path = os.path.join( os.path.dirname(__file__), "data/test_exceptions/a_exception_term_source_ref_stop_iteration.txt", @@ -219,7 +221,7 @@ def assay_file_exception_term_source_ref_stop_iteration(): @pytest.fixture -def assay_file_exception_unknown_header(): +def assay_file_exception_unknown_header() -> Iterator[TextIO]: path = os.path.join( os.path.dirname(__file__), "data/test_exceptions/a_exception_unknown_header.txt" ) @@ -228,7 +230,7 @@ def assay_file_exception_unknown_header(): @pytest.fixture -def assay_file_exception_invalid_column_type(): +def assay_file_exception_invalid_column_type() -> Iterator[TextIO]: path = os.path.join( os.path.dirname(__file__), "data/test_exceptions/a_exception_invalid_column_type.txt" ) @@ -237,14 +239,14 @@ def assay_file_exception_invalid_column_type(): @pytest.fixture -def only_investigation_file(): +def only_investigation_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/i_onlyinvest/i_onlyinvest.txt") with open(path, "rt") as file: yield file @pytest.fixture -def investigation_file_exception_comment_format(): +def investigation_file_exception_comment_format() -> Iterator[TextIO]: path = os.path.join( os.path.dirname(__file__), "data/test_exceptions/i_invest_comment_format.txt" ) @@ -256,7 +258,7 @@ def investigation_file_exception_comment_format(): @pytest.fixture -def warnings_investigation_file(): +def warnings_investigation_file() -> Iterator[TextIO]: path = os.path.join(os.path.dirname(__file__), "data/i_warnings/i_warnings.txt") with open(path, "rt") as file: yield file diff --git a/tests/test_apps.py b/tests/test_apps.py index 56d9dc9..f580f18 100644 --- a/tests/test_apps.py +++ b/tests/test_apps.py @@ -4,9 +4,12 @@ import os.path import pytest +from typer.testing import CliRunner -from altamisa.apps import isatab2isatab, isatab2dot, isatab_validate -from altamisa.exceptions import IsaWarning, IsaException +from altamisa.apps import isatab2dot, isatab2isatab, isatab_validate +from altamisa.exceptions import IsaException, IsaWarning + +runner = CliRunner() def test_isatab_validate(): @@ -14,7 +17,10 @@ def test_isatab_validate(): argv = ["--input-investigation-file", i_file, "--show-duplicate-warnings"] with pytest.warns(IsaWarning) as record: - assert not isatab_validate.main(argv) + result = runner.invoke(isatab_validate.app, argv) + assert result.exit_code == 1 + assert "Warnign" in result.stdout + assert "Warnign" in result.stderr assert 17 == len(record) diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index 3d2868e..bff59fe 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -7,7 +7,6 @@ from altamisa.exceptions import ParseIsatabException from altamisa.isatab import AssayReader, InvestigationReader - # Test header exceptions --------------------------------------------------------------------------- diff --git a/tests/test_parse_assay.py b/tests/test_parse_assay.py index e3f3db1..fa8b253 100644 --- a/tests/test_parse_assay.py +++ b/tests/test_parse_assay.py @@ -4,21 +4,25 @@ import io import os +from typing import TextIO + import pytest from altamisa.constants import table_headers from altamisa.exceptions import IsaWarning -from altamisa.isatab import models from altamisa.isatab import ( - InvestigationReader, - InvestigationValidator, - AssayRowReader, AssayReader, + AssayRowReader, AssayValidator, + InvestigationReader, + InvestigationValidator, + models, ) -def test_assay_row_reader_minimal_assay(minimal_investigation_file, minimal_assay_file): +def test_assay_row_reader_minimal_assay( + minimal_investigation_file: TextIO, minimal_assay_file: TextIO +): """Use ``AssayRowReader`` to read in minimal assay file.""" # Create new row reader and check read headers @@ -87,7 +91,7 @@ def test_assay_row_reader_minimal_assay(minimal_investigation_file, minimal_assa assert expected == first_row[3] -def test_assay_reader_minimal_assay(minimal_investigation_file, minimal_assay_file): +def test_assay_reader_minimal_assay(minimal_investigation_file: TextIO, minimal_assay_file: TextIO): """Use ``AssayReader`` to read in minimal assay file. Using the ``AssayReader`` instead of the ``AssayRowReader`` gives us @@ -186,7 +190,7 @@ def test_assay_reader_minimal_assay(minimal_investigation_file, minimal_assay_fi assert expected == assay.arcs -def test_assay_row_reader_small_assay(small_investigation_file, small_assay_file): +def test_assay_row_reader_small_assay(small_investigation_file: TextIO, small_assay_file: TextIO): """Use ``AssayRowReader`` to read in small assay file.""" # Create new row reader and check read headers @@ -432,7 +436,7 @@ def test_assay_row_reader_small_assay(small_investigation_file, small_assay_file assert expected == second_row[7] -def test_assay_reader_small_assay(small_investigation_file, small_assay_file): +def test_assay_reader_small_assay(small_investigation_file: TextIO, small_assay_file: TextIO): """Use ``AssayReader`` to read in small assay file.""" # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream(small_investigation_file).read() @@ -641,7 +645,7 @@ def test_assay_reader_small_assay(small_investigation_file, small_assay_file): assert expected == assay.arcs -def test_assay_reader_small2_assay(small2_investigation_file, small2_assay_file): +def test_assay_reader_small2_assay(small2_investigation_file: TextIO, small2_assay_file: TextIO): """Use ``AssayReader`` to read in small assay file.""" # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream(small2_investigation_file).read() @@ -754,7 +758,7 @@ def test_assay_reader_small2_assay(small2_investigation_file, small2_assay_file) assert sorted(expected) == sorted(assay.arcs) -def test_assay_reader_gelelect(gelelect_investigation_file, gelelect_assay_file): +def test_assay_reader_gelelect(gelelect_investigation_file: TextIO, gelelect_assay_file: TextIO): """Use ``AssayReader`` to read in small assay file.""" with pytest.warns(IsaWarning) as record: # Load investigation @@ -858,7 +862,9 @@ def test_assay_reader_gelelect(gelelect_investigation_file, gelelect_assay_file) assert expected == assay.processes["S1-A1-electrophoresis-9-2"] -def test_assay_reader_minimal_assay_iostring(minimal_investigation_file, minimal_assay_file): +def test_assay_reader_minimal_assay_iostring( + minimal_investigation_file: TextIO, minimal_assay_file: TextIO +): # Load investigation (tested elsewhere) stringio = io.StringIO(minimal_investigation_file.read()) investigation = InvestigationReader.from_stream(stringio).read() @@ -890,7 +896,9 @@ def test_assay_reader_minimal_assay_iostring(minimal_investigation_file, minimal assert 3 == len(assay.arcs) -def test_assay_reader_minimal_assay_iostring2(minimal_investigation_file, minimal_assay_file): +def test_assay_reader_minimal_assay_iostring2( + minimal_investigation_file: TextIO, minimal_assay_file: TextIO +): # Load investigation (tested elsewhere) stringio = io.StringIO(minimal_investigation_file.read()) investigation = InvestigationReader.from_stream(stringio).read() diff --git a/tests/test_parse_investigation.py b/tests/test_parse_investigation.py index 29c0bd0..f5e855d 100644 --- a/tests/test_parse_investigation.py +++ b/tests/test_parse_investigation.py @@ -4,9 +4,9 @@ from datetime import date from pathlib import Path -import pytest import warnings +import pytest from altamisa.constants import investigation_headers from altamisa.exceptions import ( @@ -16,8 +16,7 @@ ModerateIsaValidationWarning, ParseIsatabWarning, ) -from altamisa.isatab import models -from altamisa.isatab import InvestigationReader, InvestigationValidator +from altamisa.isatab import InvestigationReader, InvestigationValidator, models def test_parse_minimal_investigation(minimal_investigation_file): diff --git a/tests/test_parse_study.py b/tests/test_parse_study.py index 0c6db65..6b4cc7e 100644 --- a/tests/test_parse_study.py +++ b/tests/test_parse_study.py @@ -5,17 +5,18 @@ from datetime import date import io import os + import pytest from altamisa.constants import table_headers from altamisa.exceptions import IsaWarning -from altamisa.isatab import models from altamisa.isatab import ( InvestigationReader, InvestigationValidator, - StudyRowReader, StudyReader, + StudyRowReader, StudyValidator, + models, ) @@ -269,7 +270,7 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file None, (models.Characteristics("status", ["0"], None),), (), - (models.FactorValue("treatment", "yes", None),), + (models.FactorValue("treatment", ["yes"], None),), None, headers_sample, ) @@ -310,7 +311,7 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file None, (models.Characteristics("status", ["2"], None),), (), - (models.FactorValue("treatment", "", None),), + (models.FactorValue("treatment", [""], None),), None, headers_sample, ) @@ -438,7 +439,7 @@ def test_study_reader_small_study(small_investigation_file, small_study_file): None, (models.Characteristics("status", ["0"], None),), (), - (models.FactorValue("treatment", "yes", None),), + (models.FactorValue("treatment", ["yes"], None),), None, headers_sample, ) @@ -450,7 +451,7 @@ def test_study_reader_small_study(small_investigation_file, small_study_file): None, (models.Characteristics("status", ["2"], None),), (), - (models.FactorValue("treatment", "", None),), + (models.FactorValue("treatment", [""], None),), None, headers_sample, ) @@ -462,7 +463,7 @@ def test_study_reader_small_study(small_investigation_file, small_study_file): None, (models.Characteristics("status", ["1"], None),), (), - (models.FactorValue("treatment", "yes", None),), + (models.FactorValue("treatment", ["yes"], None),), None, headers_sample, ) @@ -474,7 +475,7 @@ def test_study_reader_small_study(small_investigation_file, small_study_file): None, (models.Characteristics("status", [""], None),), (), - (models.FactorValue("treatment", "", None),), + (models.FactorValue("treatment", [""], None),), None, headers_sample, ) diff --git a/tests/test_write_assay.py b/tests/test_write_assay.py index 0b6cde5..bcfc631 100644 --- a/tests/test_write_assay.py +++ b/tests/test_write_assay.py @@ -4,6 +4,7 @@ import filecmp import os + import pytest from altamisa.exceptions import ( @@ -14,11 +15,11 @@ ParseIsatabWarning, ) from altamisa.isatab import ( - InvestigationReader, - InvestigationValidator, AssayReader, AssayValidator, AssayWriter, + InvestigationReader, + InvestigationValidator, ) @@ -34,6 +35,8 @@ def _parse_write_assert_assay(investigation_file, tmp_path, quote=None, normaliz if skip and str(assay_info.path) in skip: continue # Load assay + if not assay_info.path: + raise ValueError("Assay {} has no path".format(assay_info)) path_in = os.path.join(directory, assay_info.path) with open(path_in, "rt") as inputf: assay = AssayReader.from_stream( diff --git a/tests/test_write_investigation.py b/tests/test_write_investigation.py index ebffeca..a93b4d1 100644 --- a/tests/test_write_investigation.py +++ b/tests/test_write_investigation.py @@ -3,6 +3,7 @@ import filecmp + import pytest from altamisa.exceptions import ( @@ -12,8 +13,11 @@ ParseIsatabWarning, WriteIsatabWarning, ) -from altamisa.isatab import InvestigationReader, InvestigationWriter, InvestigationValidator - +from altamisa.isatab import ( + InvestigationReader, + InvestigationValidator, + InvestigationWriter, +) # Tests with one-time reading and writing diff --git a/tests/test_write_study.py b/tests/test_write_study.py index c188079..88210b7 100644 --- a/tests/test_write_study.py +++ b/tests/test_write_study.py @@ -3,10 +3,15 @@ import filecmp -import pytest import os -from altamisa.exceptions import ModerateIsaValidationWarning, IsaWarning, ParseIsatabWarning +import pytest + +from altamisa.exceptions import ( + IsaWarning, + ModerateIsaValidationWarning, + ParseIsatabWarning, +) from altamisa.isatab import ( InvestigationReader, InvestigationValidator, @@ -25,6 +30,8 @@ def _parse_write_assert(investigation_file, tmp_path, quote=None): # Iterate studies for s, study_info in enumerate(investigation.studies): # Load study + if not study_info.info.path: + raise ValueError("Study {} has no path".format(study_info)) path_in = os.path.join(directory, study_info.info.path) with open(path_in, "rt") as inputf: study = StudyReader.from_stream("S{}".format(s + 1), inputf).read() diff --git a/versioneer.py b/versioneer.py index 1e3753e..db2c2bd 100644 --- a/versioneer.py +++ b/versioneer.py @@ -310,15 +310,14 @@ import configparser import errno +import functools import json import os +from pathlib import Path import re import subprocess import sys -from pathlib import Path -from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union -from typing import NoReturn -import functools +from typing import Any, Callable, Dict, List, NoReturn, Optional, Tuple, Union, cast have_tomllib = True if sys.version_info >= (3, 11): @@ -1995,6 +1994,7 @@ def run(self) -> None: if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe # type: ignore + # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ From 70bfab947cc75991e10782b3b0fc1c82280cf2d6 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 15 Jan 2024 02:46:48 +0100 Subject: [PATCH 2/3] wip --- altamisa/apps/isatab2dot.py | 128 ++++++++++++-------- altamisa/apps/isatab2isatab.py | 169 +++++++++++++++++---------- altamisa/isatab/headers.py | 3 +- altamisa/isatab/parse_assay_study.py | 16 +-- tests/test_apps.py | 17 +-- tests/test_parse_study.py | 5 +- tests/test_write_study.py | 2 +- 7 files changed, 208 insertions(+), 132 deletions(-) diff --git a/altamisa/apps/isatab2dot.py b/altamisa/apps/isatab2dot.py index e48a44c..30f4cb0 100644 --- a/altamisa/apps/isatab2dot.py +++ b/altamisa/apps/isatab2dot.py @@ -2,13 +2,26 @@ """Conversion of ISA-Tab to dot. """ -import argparse +from contextlib import ExitStack import json import os import sys +import attrs +import typer +from typing_extensions import Annotated + from altamisa.isatab import AssayReader, InvestigationReader, StudyReader +#: Typer application instance. +app = typer.Typer() + + +@attrs.define +class Arguments: + investigation_file: str + output_file: str + def print_dot( obj, @@ -49,61 +62,78 @@ def print_dot( print("{}{} -> {};".format(indent, json.dumps(arc.tail), json.dumps(arc.head)), file=outf) -def run(args): +def run(args: Arguments): with open(args.investigation_file, "rt") as inputf: investigation = InvestigationReader.from_stream(inputf).read() path = os.path.dirname(args.investigation_file) - print("digraph investigation {", file=args.output_file) - print(' rankdir = "LR";', file=args.output_file) - - for s, study_info in enumerate(investigation.studies): - if not study_info.info.path: - print(" /* no file for study {} */".format(s + 1), file=args.output_file) - continue - with open(os.path.join(path, study_info.info.path), "rt") as inputf: - study = StudyReader.from_stream("S{}".format(s + 1), inputf).read() - print(" /* study {} */".format(study_info.info.path), file=args.output_file) - print(" subgraph clusterStudy{} {{".format(s), file=args.output_file) - print(' label = "Study: {}"'.format(study_info.info.path), file=args.output_file) - print_dot(study, args.output_file) - print(" }", file=args.output_file) - - for a, assay_info in enumerate(study_info.assays): - if not assay_info.path: - print(" /* no file for assay {} */".format(a + 1), file=args.output_file) - continue - with open(os.path.join(path, assay_info.path), "rt") as inputf: - assay = AssayReader.from_stream( - "S{}".format(s + 1), "A{}".format(a + 1), inputf - ).read() - print(" /* assay {} */".format(assay_info.path), file=args.output_file) - print(" subgraph clusterAssayS{}A{} {{".format(s, a), file=args.output_file) - print(' label = "Assay: {}"'.format(assay_info.path), file=args.output_file) - print_dot(assay, args.output_file) - print(" }", file=args.output_file) - - print("}", file=args.output_file) + with ExitStack() as stack: + if args.output_file == "-": + output_file = sys.stdout + else: + output_file = stack.enter_context(open(args.output_file, "wt")) + print("digraph investigation {", file=output_file) + print(' rankdir = "LR";', file=output_file) -def main(argv=None): - parser = argparse.ArgumentParser() - - parser.add_argument( - "-i", "--investigation-file", required=True, help="Path to investigation file" - ) - parser.add_argument( - "-o", - "--output-file", - default="-", - type=argparse.FileType("wt"), - help='Path to output file, stdout ("-") by default', + for s, study_info in enumerate(investigation.studies): + if not study_info.info.path: + print(" /* no file for study {} */".format(s + 1), file=output_file) + continue + with open(os.path.join(path, study_info.info.path), "rt") as inputf: + study = StudyReader.from_stream("S{}".format(s + 1), inputf).read() + print(" /* study {} */".format(study_info.info.path), file=output_file) + print(" subgraph clusterStudy{} {{".format(s), file=output_file) + print(' label = "Study: {}"'.format(study_info.info.path), file=output_file) + print_dot(study, output_file) + print(" }", file=output_file) + + for a, assay_info in enumerate(study_info.assays): + if not assay_info.path: + print(" /* no file for assay {} */".format(a + 1), file=output_file) + continue + with open(os.path.join(path, assay_info.path), "rt") as inputf: + assay = AssayReader.from_stream( + "S{}".format(s + 1), "A{}".format(a + 1), inputf + ).read() + print(" /* assay {} */".format(assay_info.path), file=output_file) + print(" subgraph clusterAssayS{}A{} {{".format(s, a), file=output_file) + print(' label = "Assay: {}"'.format(assay_info.path), file=output_file) + print_dot(assay, output_file) + print(" }", file=output_file) + + print("}", file=output_file) + + +@app.command() +def main( + investigation_file: Annotated[ + str, + typer.Option( + "--investigation-file", + "-i", + help="Path to input investigation file", + ), + ], + output_file: Annotated[ + str, + typer.Option( + "--output-file", + "-o", + help="Path to output file, stdout ('-') by default", + ), + ] = "-", +): + """Main entry point.""" + # Convert to `Arguments` object. + args = Arguments( + investigation_file=investigation_file, + output_file=output_file, ) - - args = parser.parse_args(argv) - return run(args) + # Actually run. + run(args) -if __name__ == "__main__": - sys.exit(main()) # pragma: no cover +if __name__ == "__main__": # pragma: no cover + typer.run(main) diff --git a/altamisa/apps/isatab2isatab.py b/altamisa/apps/isatab2isatab.py index 688af69..bc889df 100644 --- a/altamisa/apps/isatab2isatab.py +++ b/altamisa/apps/isatab2isatab.py @@ -2,11 +2,17 @@ """Read from ISA-Tab and directly write to ISA-Tab. """ -import argparse +from contextlib import ExitStack import os import sys +import typing +from typing import Dict, Optional, Tuple import warnings +import attrs +import typer +from typing_extensions import Annotated + from altamisa.exceptions import IsaException from altamisa.isatab import ( AssayReader, @@ -19,15 +25,27 @@ StudyValidator, StudyWriter, ) +from altamisa.isatab.models import Assay, InvestigationInfo, Study + +#: Typer application instance. +app = typer.Typer() + +@attrs.define +class Arguments: + input_investigation_file: str + output_investigation_file: str + quotes: Optional[str] + warnings: bool -def run(args): + +def run(args: Arguments): # Collect warnings with warnings.catch_warnings(record=True) as records: run_warnings_caught(args) # Print warnings - if not args.no_warnings: + if args.warnings: for record in records: warnings.showwarning( record.message, @@ -38,7 +56,7 @@ def run(args): ) -def run_warnings_caught(args): +def run_warnings_caught(args: Arguments): # Check if input and output directory are different path_in = os.path.realpath(os.path.dirname(args.input_investigation_file)) path_out = os.path.realpath(os.path.dirname(args.output_investigation_file)) @@ -47,29 +65,36 @@ def run_warnings_caught(args): msg = tpl.format(path_in, path_out) raise IsaException(msg) - if args.input_investigation_file == "-": # pragma: no cover - args.input_investigation_file = sys.stdin - else: - args.input_investigation_file = open(args.input_investigation_file, "rt") - if args.output_investigation_file == "-": # pragma: no cover - args.output_investigation_file = sys.stdout - else: - args.output_investigation_file = open(args.output_investigation_file, "wt") - - investigation, studies, assays = run_reading(args, path_in) - run_writing(args, path_out, investigation, studies, assays) - - -def run_reading(args, path_in): + with ExitStack() as stack: + if args.output_investigation_file == "-": # pragma: no cover + output_investigation_file = sys.stdout + else: + output_investigation_file = stack.push(open(args.output_investigation_file, "wt")) + + investigation, studies, assays = run_reading(args, path_in) + run_writing( + args, + path_out, + output_investigation_file, + investigation, + studies, + assays, + ) + + +def run_reading( + args, path_in +) -> Tuple[InvestigationInfo, Dict[int, Study], Dict[int, Dict[int, Assay]]]: # Read investigation - investigation = InvestigationReader.from_stream(args.input_investigation_file).read() + with open(args.input_investigation_file, "rt") as inputf: + investigation = InvestigationReader.from_stream(inputf).read() # Validate investigation InvestigationValidator(investigation).validate() # Read studies and assays - studies = {} - assays = {} + studies: Dict[int, Study] = {} + assays: Dict[int, Dict[int, Assay]] = {} for s, study_info in enumerate(investigation.studies): if study_info.info.path: with open(os.path.join(path_in, study_info.info.path), "rt") as inputf: @@ -94,27 +119,34 @@ def run_reading(args, path_in): return investigation, studies, assays -def run_writing(args, path_out, investigation, studies, assays): +def run_writing( + args, + path_out, + output_investigation_file: typing.TextIO, + investigation: InvestigationInfo, + studies: Dict[int, Study], + assays: Dict[int, Dict[int, Assay]], +): # Write investigation - if args.output_investigation_file.name == "": + if output_investigation_file.name == "": InvestigationWriter.from_stream( - investigation, args.output_investigation_file, quote=args.quotes + investigation, output_investigation_file, quote=args.quotes ).write() else: - with open(args.output_investigation_file.name, "wt", newline="") as outputf: + with open(output_investigation_file.name, "wt", newline="") as outputf: InvestigationWriter.from_stream(investigation, outputf, quote=args.quotes).write() # Write studies and assays for s, study_info in enumerate(investigation.studies): - if args.output_investigation_file.name == "": + if output_investigation_file.name == "": if study_info.info.path: StudyWriter.from_stream( - studies[s], args.output_investigation_file, quote=args.quotes + studies[s], output_investigation_file, quote=args.quotes ).write() for a, assay_info in enumerate(study_info.assays): if assay_info.path: AssayWriter.from_stream( - assays[s][a], args.output_investigation_file, quote=args.quotes + assays[s][a], output_investigation_file, quote=args.quotes ).write() else: if study_info.info.path: @@ -128,44 +160,53 @@ def run_writing(args, path_out, investigation, studies, assays): AssayWriter.from_stream(assays[s][a], outputf, quote=args.quotes).write() -def main(argv=None): - parser = argparse.ArgumentParser() - - parser.add_argument( - "-i", - "--input-investigation-file", - required=True, - type=str, - help="Path to input investigation file", - ) - parser.add_argument( - "-o", - "--output-investigation-file", - default="-", - type=str, - help=( - 'Path to output investigation file, stdout ("-") by default. ' - "Needs to be in a different directory!" +@app.command() +def main( + input_investigation_file: Annotated[ + str, + typer.Option( + "--input-investigation-file", + "-i", + help="Path to input investigation file", ), + ], + output_investigation_file: Annotated[ + str, + typer.Option( + "--output-investigation-file", + "-o", + help=( + 'Path to output investigation file, stdout ("-") by default. ' + "Needs to be in a different directory!" + ), + ), + ], + quotes: Annotated[ + Optional[str], + typer.Option( + "--quotes", + "-q", + help='Character for quoting, e.g. "\\"" (None by default)', + ), + ] = None, + warnings: Annotated[ + bool, + typer.Option( + "--warnings/--no-warnings", + help="Show ISA-tab related warnings (default is to show)", + ), + ] = True, +): + # Convert to `Arguments` object. + args = Arguments( + input_investigation_file=input_investigation_file, + output_investigation_file=output_investigation_file, + quotes=quotes, + warnings=warnings, ) - parser.add_argument( - "-q", - "--quotes", - default=None, - type=str, - help='Character for quoting, e.g. "\\"" (None by default)', - ) - parser.add_argument( - "--no-warnings", - dest="no_warnings", - action="store_true", - help="Suppress ISA-tab related warnings (False by default)", - ) - parser.set_defaults(no_warnings=False) - - args = parser.parse_args(argv) + # Start application return run(args) -if __name__ == "__main__": - sys.exit(main()) # pragma: no cover +if __name__ == "__main__": # pragma: no cover + typer.run(main) diff --git a/altamisa/isatab/headers.py b/altamisa/isatab/headers.py index 5dc1005..09849ae 100644 --- a/altamisa/isatab/headers.py +++ b/altamisa/isatab/headers.py @@ -435,8 +435,7 @@ def _parse_next(self): return self._parse_term_source_ref() elif val in self.simple_headers: if self.allowed_headers and val not in self.allowed_headers: - tpl = 'Header "{}" not allowed in {}.' - msg = tpl.format(val, self) + msg = f'Header "{val}" not allowed in assay.' raise ParseIsatabException(msg) return self._parse_simple_column_header(self.simple_headers[val]) else: diff --git a/altamisa/isatab/parse_assay_study.py b/altamisa/isatab/parse_assay_study.py index fc5cb96..6e89e2f 100644 --- a/altamisa/isatab/parse_assay_study.py +++ b/altamisa/isatab/parse_assay_study.py @@ -284,9 +284,6 @@ def _build_freetext_or_term_ref( ] return term_refs else: # pragma: no cover - import pdb - - pdb.set_trace() tpl = ( "Irregular numbers of fields in ontology term columns" "(i.e. ';'-separated fields): {}" @@ -481,7 +478,7 @@ def _build_protocol_ref_and_name( self, line: List[str] ) -> Tuple[str, Union[models.AnnotatedStr, str], Optional[str], Optional[str]]: # At least one of these headers has to be specified - if not self.protocol_ref_header: # pragma: no cover + if not self.name_header and not self.protocol_ref_header: # pragma: no cover raise ParseIsatabException( "No protocol reference header found for process found for file {}".format( self.filename @@ -492,7 +489,8 @@ def _build_protocol_ref_and_name( assay_id = "-{}".format(self.assay_id) if self.assay_id else "" name = None name_type = None - if not self.name_header: + if not self.name_header: # and self.protocol_ref_header: + assert self.protocol_ref_header, "invariant: checked above" # Name header is not given, will use auto-generated unique name # based on protocol ref. protocol_ref = line[self.protocol_ref_header.col_no] @@ -505,6 +503,7 @@ def _build_protocol_ref_and_name( ) unique_name = models.AnnotatedStr(name_val, was_empty=True) elif not self.protocol_ref_header: + assert self.name_header, "invariant: checked above" # Name header is given, but protocol ref header is not protocol_ref = table_tokens.TOKEN_UNKNOWN name = line[self.name_header.col_no] @@ -541,8 +540,11 @@ def _build_protocol_ref_and_name( ) unique_name = models.AnnotatedStr(name_val, was_empty=True) if not protocol_ref: # pragma: no cover - tpl = "Missing protocol reference in column {} of file {} " - msg = tpl.format(self.protocol_ref_header.col_no + 1, self.filename) + if self.protocol_ref_header: + tpl = "Missing protocol reference in column {} of file {} " + msg = tpl.format(self.protocol_ref_header.col_no + 1, self.filename) + else: + msg = "Missing protocol reference in file {}".format(self.filename) raise ParseIsatabException(msg) return protocol_ref, unique_name, name, name_type diff --git a/tests/test_apps.py b/tests/test_apps.py index f580f18..9afc7cc 100644 --- a/tests/test_apps.py +++ b/tests/test_apps.py @@ -18,9 +18,7 @@ def test_isatab_validate(): with pytest.warns(IsaWarning) as record: result = runner.invoke(isatab_validate.app, argv) - assert result.exit_code == 1 - assert "Warnign" in result.stdout - assert "Warnign" in result.stderr + assert result.exit_code == 0 assert 17 == len(record) @@ -37,9 +35,10 @@ def test_isatab2isatab(tmpdir): ] with pytest.warns(IsaWarning) as record: - assert not isatab2isatab.main(argv) + result = runner.invoke(isatab2isatab.app, argv) + assert result.exit_code == 0 - assert 10 == len(record) + assert 8 == len(record) def test_isatab2isatab_input_is_output(tmpdir): @@ -53,8 +52,9 @@ def test_isatab2isatab_input_is_output(tmpdir): '"', ] - with pytest.raises(IsaException): - isatab2isatab.main(argv) + result = runner.invoke(isatab2isatab.app, argv) + assert result.exit_code == 1 + assert "Can't output ISA-tab files to same directory as as input" in str(result) def test_isatab2dot(tmpdir): @@ -66,4 +66,5 @@ def test_isatab2dot(tmpdir): str(tmpdir.mkdir("dot").join("out.dot")), ] - assert not isatab2dot.main(argv) + result = runner.invoke(isatab2dot.app, argv) + assert result.exit_code == 0 diff --git a/tests/test_parse_study.py b/tests/test_parse_study.py index 6b4cc7e..887df4c 100644 --- a/tests/test_parse_study.py +++ b/tests/test_parse_study.py @@ -334,7 +334,10 @@ def test_study_reader_small_study(small_investigation_file, small_study_file): # Read study study = reader.read() - StudyValidator(investigation, investigation.studies[0], study).validate() + with pytest.warns(IsaWarning) as record: + StudyValidator(investigation, investigation.studies[0], study).validate() + # Check warnings + assert 1 == len(record) # Check results assert os.path.normpath(str(study.file)).endswith(os.path.normpath("data/i_small/s_small.txt")) diff --git a/tests/test_write_study.py b/tests/test_write_study.py index 88210b7..21d9bbb 100644 --- a/tests/test_write_study.py +++ b/tests/test_write_study.py @@ -61,7 +61,7 @@ def test_study_writer_small(small_investigation_file, tmp_path): with pytest.warns(IsaWarning) as record: _parse_write_assert(small_investigation_file, tmp_path) # Check warnings - assert 2 == len(record) + assert 3 == len(record) def test_study_writer_small2(small2_investigation_file, tmp_path): From b95b3d5958bff55a07a2dd47fd5e5597f1fa3fec Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 15 Jan 2024 02:56:53 +0100 Subject: [PATCH 3/3] wip --- .github/workflows/ci.yml | 31 +++++++++++++++---------------- tests/test_apps.py | 2 +- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 07186fa..bfbcfc1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,32 +16,31 @@ jobs: - '3.12' steps: - - name: Install Python via conda. - uses: s-weigand/setup-conda@v1 + - name: Install Python + uses: actions/setup-python@v4 with: - python-version: ${{ matrix.python-version }} - conda-channels: defaults,bioconda,conda-forge + python-version: "${{ matrix.python-version }}" + - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: lfs: true - - name: Install mamba. - run: conda install -y mamba + - name: Install test dependencies via pip run: pip install -r requirements/test_black.txt + + - name: Lint + run: make lint + - name: Run tests - run: pytest - - name: Create text report - run: coverage report - - name: Create XML report for codacy - run: coverage xml + run: | + make test + coverage report + coverage xml + - name: Upload Python coverage reports to Codecov uses: codecov/codecov-action@v3 with: flags: python env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - - name: Check style with black - run: make black-check - - name: Check style with flake8 - run: flake8 . diff --git a/tests/test_apps.py b/tests/test_apps.py index 9afc7cc..638a726 100644 --- a/tests/test_apps.py +++ b/tests/test_apps.py @@ -7,7 +7,7 @@ from typer.testing import CliRunner from altamisa.apps import isatab2dot, isatab2isatab, isatab_validate -from altamisa.exceptions import IsaException, IsaWarning +from altamisa.exceptions import IsaWarning runner = CliRunner()