Skip to content

Commit

Permalink
more useful errors for parameter/component splits
Browse files Browse the repository at this point in the history
  • Loading branch information
sellth committed Nov 3, 2023
1 parent 9497e70 commit 6644590
Showing 1 changed file with 20 additions and 9 deletions.
29 changes: 20 additions & 9 deletions altamisa/isatab/parse_investigation.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,18 @@ def _parse_comment_header(val):

# Helper function to extract protocol parameters
def _split_study_protocols_parameters(
names, name_term_accs, name_term_srcs
protocol_name, names, name_term_accs, name_term_srcs
) -> Iterator[models.FreeTextOrTermRef]:
names = names.split(";")
name_term_accs = name_term_accs.split(";")
name_term_srcs = name_term_srcs.split(";")
if not (len(names) == len(name_term_accs) == len(name_term_srcs)): # pragma: no cover
tpl = 'Unequal protocol parameter splits; found: "{}", "{}", "{}"'
msg = tpl.format(names, name_term_accs, name_term_srcs)
msg = (
f"Unequal parameter splits in protocol '{protocol_name}':\n"
f"Parameter Names: {len(names)}\n"
f"Term Accession Numers: {len(name_term_accs)}\n"
f"Term Source REFs: {len(name_term_srcs)}"
)
raise ParseIsatabException(msg)
if len(names) > len(set(names)): # pragma: no cover
tpl = "Repeated protocol parameter; found: {}"
Expand All @@ -63,7 +67,7 @@ def _split_study_protocols_parameters(

# Helper function to extract protocol components
def _split_study_protocols_components(
names, types, type_term_accs, type_term_srcs
protocol_name, names, types, type_term_accs, type_term_srcs
) -> Iterator[models.ProtocolComponentInfo]:
names = names.split(";")
types = types.split(";")
Expand All @@ -72,8 +76,13 @@ def _split_study_protocols_components(
if not (
len(names) == len(types) == len(type_term_accs) == len(type_term_srcs)
): # pragma: no cover
tpl = "Unequal protocol component splits; " 'found: "{}", "{}", "{}", "{}"'
msg = tpl.format(names, types, type_term_accs, type_term_srcs)
msg = (
f"Unequal component splits in protocol '{protocol_name}':\n"
f"Components Names: {len(names)}\n"
f"Components Types: {len(types)}\n"
f"Type Term Accession Numers: {len(type_term_accs)}\n"
f"Type Term Source REFs: {len(type_term_srcs)}"
)
raise ParseIsatabException(msg)
if len(names) > len(set(names)): # pragma: no cover
tpl = "Repeated protocol components; found: {}"
Expand Down Expand Up @@ -193,7 +202,9 @@ def _read_multi_column_section(self, prefix, ref_keys, section_name):
msg = tpl.format(section_name, list(sorted(section)))
raise ParseIsatabException(msg) # TODO: should be warning?
if not len(set([len(v) for v in section.values()])) == 1: # pragma: no cover
lengths = "\n".join(map(str, [f"{key}: {len(value)}" for key, value in section.items()]))
lengths = "\n".join(
map(str, [f"{key}: {len(value)}" for key, value in section.items()])
)
msg = f"Inconsistent entry lengths in section {section_name}:\n{lengths}"
raise ParseIsatabException(msg)
return section, comment_keys
Expand Down Expand Up @@ -556,13 +567,13 @@ def _read_study_protocols(self) -> Iterator[models.ProtocolInfo]:
paras = {
p.name if hasattr(p, "name") else p: p
for p in _split_study_protocols_parameters(
para_names, para_name_term_accs, para_name_term_srcs
name, para_names, para_name_term_accs, para_name_term_srcs
)
}
comps = {
c.name: c
for c in _split_study_protocols_components(
comp_names, comp_types, comp_type_term_accs, comp_type_term_srcs
name, comp_names, comp_types, comp_type_term_accs, comp_type_term_srcs
)
}
comments = _parse_comments(section, comment_keys, i)
Expand Down

0 comments on commit 6644590

Please sign in to comment.