From 307525498995898d8d2c63a08d28d516bc2bb913 Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Thu, 21 Nov 2024 22:20:36 -0500 Subject: [PATCH 01/12] Reactivate bad code --- .../ome_tiff_field_validator.py | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/ingest_validation_tests/ome_tiff_field_validator.py b/src/ingest_validation_tests/ome_tiff_field_validator.py index 0f37bfe..e2c4641 100644 --- a/src/ingest_validation_tests/ome_tiff_field_validator.py +++ b/src/ingest_validation_tests/ome_tiff_field_validator.py @@ -1,8 +1,8 @@ import json import re -# from functools import partial -# from multiprocessing import Pool +from functools import partial +from multiprocessing import Pool from os import cpu_count from pathlib import Path from typing import List, Optional @@ -32,7 +32,7 @@ def expand_terms(dct: dict, prefix: str = "") -> dict: else: child_dct = {} if isinstance(val, list): - assert len(val) == 1, "Expected only one element in list of dicts" + assert len(val) == 1, f"Expected only one element in list of dicts: {val}" child_dct.update(expand_terms(val[0], expanded_prefix + key)) elif isinstance(val, dict): child_dct.update(expand_terms(val, expanded_prefix + key)) @@ -114,7 +114,7 @@ def collect_errors(self, **kwargs) -> List[Optional[str]]: all_tests.update(test_set["fields"]) threads = kwargs.get("coreuse", None) or cpu_count() // 4 or 1 - # pool = Pool(threads) + pool = Pool(threads) _log(f"Threading at OmeTiffFieldValidator with {threads}") filenames_to_test = [] for glob_expr in [ @@ -130,17 +130,16 @@ def collect_errors(self, **kwargs) -> List[Optional[str]]: # TODO: turn back on when issues with XML parsing are resolved # still collecting files so we know if this plugin *should* have run - # rslt_list: List[Optional[str]] = list( - # rslt - # for rslt in pool.imap_unordered( - # partial(_check_ome_tiff_file, tests=all_tests), filenames_to_test - # ) - # if rslt is not None - # ) - # if rslt_list: - # return rslt_list - # elif filenames_to_test: - if filenames_to_test: + rslt_list: List[Optional[str]] = list( + rslt + for rslt in pool.imap_unordered( + partial(_check_ome_tiff_file, tests=all_tests), filenames_to_test + ) + if rslt is not None + ) + if rslt_list: + return rslt_list + elif filenames_to_test: _log( f"Found files to test but skipping ome-tiff field validation. Files: {filenames_to_test}" ) From 82be08e4821096d4fa053f6d76c1ccfc87437b9f Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Thu, 21 Nov 2024 22:33:22 -0500 Subject: [PATCH 02/12] Temporarily kill threading --- .../ome_tiff_field_validator.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/ingest_validation_tests/ome_tiff_field_validator.py b/src/ingest_validation_tests/ome_tiff_field_validator.py index e2c4641..dad28d8 100644 --- a/src/ingest_validation_tests/ome_tiff_field_validator.py +++ b/src/ingest_validation_tests/ome_tiff_field_validator.py @@ -130,13 +130,19 @@ def collect_errors(self, **kwargs) -> List[Optional[str]]: # TODO: turn back on when issues with XML parsing are resolved # still collecting files so we know if this plugin *should* have run - rslt_list: List[Optional[str]] = list( - rslt - for rslt in pool.imap_unordered( - partial(_check_ome_tiff_file, tests=all_tests), filenames_to_test - ) - if rslt is not None - ) + rslt_list = [] + for fname in filenames_to_test: + rslt = _check_ome_tiff_file(fname, tests=all_tests) + if rslt is not None: + from pprint import pprint + pprint(rslt) + rslt_list.append(rslt) +# rslt_list: List[Optional[str]] = list( +# for rslt in pool.imap_unordered( +# partial(_check_ome_tiff_file, tests=all_tests), filenames_to_test +# ) +# if rslt is not None +# ) if rslt_list: return rslt_list elif filenames_to_test: From 09f219a747e292d26e93d9da7ff31bc37d91b38e Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Tue, 26 Nov 2024 14:50:03 -0500 Subject: [PATCH 03/12] Handle list of terms; add diagnostics --- .../ome_tiff_field_validator.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/ingest_validation_tests/ome_tiff_field_validator.py b/src/ingest_validation_tests/ome_tiff_field_validator.py index dad28d8..5fef56a 100644 --- a/src/ingest_validation_tests/ome_tiff_field_validator.py +++ b/src/ingest_validation_tests/ome_tiff_field_validator.py @@ -30,18 +30,20 @@ def expand_terms(dct: dict, prefix: str = "") -> dict: elif key == "$" and isinstance(val, str): # special case? rslt[expanded_prefix + key] = val else: - child_dct = {} + child_dct_l = [] if isinstance(val, list): - assert len(val) == 1, f"Expected only one element in list of dicts: {val}" - child_dct.update(expand_terms(val[0], expanded_prefix + key)) + for elt in val: + child_dct_l.append(expand_terms(elt, expanded_prefix + key)) elif isinstance(val, dict): - child_dct.update(expand_terms(val, expanded_prefix + key)) + child_dct_l.append(expand_terms(val, expanded_prefix + key)) elif val is None: - child_dct[expanded_prefix + key] = None + child_dct_l.append({expanded_prefix + key : None}) else: raise ValueError(f"list or dict expected; got {type(val)} {val}") - for key, val in child_dct.items(): - rslt[key] = val + for child_dct in child_dct_l: + for key, val in child_dct.items(): + print(f"HERE {key} {val}") + rslt[key] = val return rslt @@ -80,6 +82,8 @@ def _check_ome_tiff_file(file: str, /, tests: dict) -> Optional[str]: expanded_props = {} for term_dct in image_props: expanded_props.update(expand_terms(term_dct)) + from pprint import pprint + pprint(expanded_props) error_l = [] for key in tests: try: From 0d9ebf656971441bce9c11772924c7fde2f491da Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Tue, 26 Nov 2024 19:22:52 -0500 Subject: [PATCH 04/12] New test case --- test_data/complex_small_ome_tiff.zip | Bin 0 -> 4369 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test_data/complex_small_ome_tiff.zip diff --git a/test_data/complex_small_ome_tiff.zip b/test_data/complex_small_ome_tiff.zip new file mode 100644 index 0000000000000000000000000000000000000000..9df75eafb63c52c8314beb36572c786771f2acbd GIT binary patch literal 4369 zcmeHLdsI_rwr6x4I_=bS-73`b>P)Q&^0HK-goNdFC?X*du^_KRkcVI(3Lyz0f#r2o zR!LDQVjv-42S`*184XD^AyBV3QU#KdBVvdNSJIdsLXrZ9S0Lm@JNMC9%XR;of9~C9 zec#^ukMsNO{q23uI%gkE-2B_OJzfc|f|mZ;_}k{~;p=gVSaOz-PsNv)Aa%du(3W`FWwwC!ZHqP~Y+R&2N9&?BVfxJL(49=7xCdHDCjFjgSNL@G!&Q z@`!pf;DNnx{HZeH*|Yh1_y~MP1pZJY{&+-~n=tGZ;eYYH;`}>4>m+i-_e<{3+|Y~f zmB;pTyUH=ow-j05=dWD7O(`gx3ehzj`Z_OY;JYnT%7*vYdagLF5;wB&Pd_JY`;~<4 z{%^T*>0jKk39MfGVAI(B_XA_hB2qidG5>XdeMDtbZ4@rem2a#M_N<@G&o>d%Qn#*; zKRzEKpocT@nHz@l2r{dj*@PH4fm?D4LtUfi?Sd?qxv6R92UzOXe7%VlJA(J!m@KJ= zlu<0FlO;WH_s%@b)ArnHfWZVk>qhV48kQ-brbORPzfF%1F2~0{o(M2U4Z80ckOyb> zY~{-VBzRZ`_e+cmPKm7tKvEPp(F}YEos>0)b=9UcPWBx>1CI4I_I3qo8o`?oiM9*L zX-ib|y){g*3o4_1qG0#(Co~qY9x9;iQE&|jcHXjP4ZHy5ICe8yQ3>rlwZ;s7cfM&x zMT*cjZS8sh+DO>wZ){0J#~L0#!5T@?+$6JRmGnnsa~LlXeDS0+DpZp}`^?&T2K)-j zqXi?mZ3$|gS+f9s3FXp)6s@=fA&j7DrJlhA#rMqT9ndJpm;t2-(Vh!i&pDyv*a0 zLXiz^XlxfBhy=Az77eAifn^hQks&;Xo#6g)lU@f@Q!4bi#mm&5p5Oy0 z*|DGT4JxUVP_h!2xZWaiUtFk*d+xJpdP}} zVi{LZ=zy-vKp+S}>5iEE$<%*zn!tIxd5-^&JzC)Wpvs8#1hT=S(XBSja_7gOffTa+ zCffNKh7V{-VO*@Y<^q^2yXucgDhB=}=!yq0vTJRJu{^#809zmeEezR?I~>kqX@D-rJAkzS^1Ks7YO) z@WV5l<6o9@+ry*k*5FYSY^y-^Gu2MLx9NYrY5(8;KXn2>^IIKSD9JOP*i$(_Cr}0moF|Bs`dhkC?2r5I-Y#h1Bh*uU$jN&|QHcZ|g$>9y ztg5|zlukLUqB}CifWY}%v-+tU-u%lpLT~$XZTbji>EeQHhx%5o!J5RB`NEjVr-FmAhEFK#M}gP*uq@RWq-V6W?K7Yrjx6q zE`lAw=&LyeeoJ3?s3FcP?#OP_!85~ydWG`PMDNRkvmsBVqWVa_O|Iz?O1rshQK>9p zPO{+{H++C`XKmcr_R_Brd15qikPa^imv|%2Xv;+83bCGcUH_c%*6`Z?@);I$I8738 zS|V(9JOAdblYO-oJlHQ3lO-bABmuXE?-$JsBjo*T7O}jr;`X4<&ba5aavx!$WP_Q) z!RItlOW^6T((Q1ssEy}UwB5DV@N0l&Bd*Qti>v)!e%O?akq7MBL< zxrbyIcq1Ao@qz>rBgJ9jq5AI%4Hl2!8jWcB0f zCFJ`AR6r8DEZSc(`eVV;-N|KS??C?Ls>PE>D>!#PkccEV%nQ8JGY6&3=0`$KLL?jg zIb1ZPU`!beQtMn&>qn&pmo0A3k-2oY_4vX;tI)RUz)V_wT)M} zIBmxF;RV%RWOamV;cANEc@W=G%5R-_&0JkrJf9k-j;y&)UpJOqUHfZ&ae!n@S6lK# zs@@j9+&cXLCoS4#z3~%W|#vZZVKmByi z4DD&?uEuJ^y^6-KaLbmQFAOt5INJf`Le@2)_=i2e18B#de1Q!9+?bGwHt#sFWA;L+ zi>JD}c0f6dvFVquf@EWu!j!{aCxrUd_A#j6L;ztpepAoHM$MWynpiQ zsXwNDq&!{8BNQ5oPpCYB4Tz(%)|Azy!QslPsgk-h>4R;;rJTC7wFqT5qj`E9g`S%s zv!}<~Qu=)-TBZO=b4C1xe!T8nuZ${rp z>8rnw{E*C?>*2tyglFw(9k#v?F~mhTdh7Is^e}~`GCj{>%Am&x>UfACX4$j$cMFX& z0?(L-BtZ-*h_Q>PP z!5f@XREo-B{0qX`)L4yZMh_Dm(9eNeTb7c5?Hy!ky5smaeF7*&R;D{lEH= z<$x(`hlwW1@OO^i_&`-KPTenXnED6CQ~IUmZNy>vllGVIPNFZ~^J*Uz4Rs`ELuH>B z)J?0T=riuD>{fR*R`X)mBb>iXK#|I-;RV(8P$t-^+v+L0#Syxz{)%b*Y}V*ZK?Zay zy}C!aP9q&3BNex_vc!%r$xC(CHhAp zLrXVou1`|>2kr$KT%~U9$D=A>DgB1pAe`M@xaFL>2`S*#16~1=(YuetsX|@$Mpl0@ zGr8uum$h>TMOTV~CZzPb2{(&2FJ=}tRI!&m@@bmGXq)c`hF$C|N|Hq~JN2Z=T8){b zse@4Mm`NEZR$jMF+{1*_#x*t;jy)OQdFWij486^1>w&6fy}0k+{sjB zUPN17^w#)A`lpX022HWvLiZDq)Z)5Wd1?3{JYLy{_`V`xF!-U-){kJb9?-9?B=V@P zm%NpKmnY3_SR<5c?%w;=a;IdsA5fMHkV0L0Zx(My?c&`>W3eM^*SVBoNz2!s9Wsi$ zBWm2yHeX`Dphxe1T-w<3tedC2(R(qjsq$HX=@=nq;3m?b4l+!w;o*E^g4R;7Yl&l0 zE9LoPdDGiEN+Vn9FB->AmkkS*YgH~wfp9`#Y1I{qXjjw->rQu);cW8LfOs{ii z_%#_=Kjb85Y#+ZqQvQ5v**CK+V=gMwKSWX8BKm0*J%=X}ZD=VaN~hH>F4l)XTx?14 zW_RPAmCK5a@c`^brd)hmCX%lx?cWlzOL$r*aiyZov$rJb3{#ud{o_Gbl}HAJdH!pF zK5=6`6d_vAz8<;9_3qKc-)!>V{H7Vf2Y>r?d(Xpp@Rj+-iNTg%CI)}^_x=m`o8|@o i3GU(1cRu;e;Q#CN;7#0He)lTmP3}wU)_;2MmG>W4KNMO3 literal 0 HcmV?d00001 From e4549b3841040fbd8ce70ca235a8912d01d0a973 Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Tue, 26 Nov 2024 19:23:27 -0500 Subject: [PATCH 05/12] json-schema now wants explicit version --- src/ingest_validation_tests/ome_tiff_fields_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ingest_validation_tests/ome_tiff_fields_schema.json b/src/ingest_validation_tests/ome_tiff_fields_schema.json index e88cefe..3837787 100644 --- a/src/ingest_validation_tests/ome_tiff_fields_schema.json +++ b/src/ingest_validation_tests/ome_tiff_fields_schema.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/schema#", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "http://schemata.hubmapconsortium.org/ome_tiff_fields_schema_schema.json", "title": "ome-tiff fields schema", "description": "schema for the definitions file of required ome-tiff fields", From aa7499b363528eee1d5b2249ab77ac96d324a202 Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Tue, 26 Nov 2024 19:24:29 -0500 Subject: [PATCH 06/12] Fix one test case, add another --- tests/test_ome_tiff_field_validator.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/test_ome_tiff_field_validator.py b/tests/test_ome_tiff_field_validator.py index 7453d42..ea500bd 100644 --- a/tests/test_ome_tiff_field_validator.py +++ b/tests/test_ome_tiff_field_validator.py @@ -8,17 +8,24 @@ @pytest.mark.parametrize( ("test_data_fname", "msg_re_list", "assay_type"), ( - # ( - # "test_data/codex_tree_ometiff_bad.zip", - # [ - # ".*tubhiswt_C0_bad.ome.tif is not a valid OME.TIFF file.*", - # ".*sample1.ome.tif is not a valid OME.TIFF file.*", - # ".*sample2.ome.tif is not a valid OME.TIFF file.*", - # ], - # "CODEX", - # ), + ( + "test_data/codex_tree_ometiff_bad.zip", + [ + ".*tubhiswt_C0_bad.ome.tif is not a valid OME.TIFF file.*", + ".*sample1.ome.tif is not a valid OME.TIFF file.*", + ".*sample2.ome.tif is not a valid OME.TIFF file.*", + ], + "CODEX", + ), ("test_data/codex_tree_ometiff_good.zip", [], "CODEX"), ("test_data/fake_snrnaseq_tree_good.zip", [], "snRNAseq"), + ("test_data/complex_small_ome_tiff.zip", + [ + ".*complex_small_ome_tiff/917_cropped_0_Z0_C3_T0.ome.tiff is not" + " a valid OME.TIFF file: Pixels_PhysicalSizeX is required but missing;" + " Pixels_PhysicalSizeY is required but missing;" + " Pixels_PhysicalSizeZ is required but missing" + ], "PAS"), ), ) def test_ome_tiff_field_validator(test_data_fname, msg_re_list, assay_type, tmp_path): From b4596e2478aa77550b5fab21fa7602bd2fba55bf Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Tue, 26 Nov 2024 19:25:06 -0500 Subject: [PATCH 07/12] Apply to all assay types (for ome.tiff files) --- src/ingest_validation_tests/ome_tiff_fields.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ingest_validation_tests/ome_tiff_fields.json b/src/ingest_validation_tests/ome_tiff_fields.json index 0462d77..2c97318 100644 --- a/src/ingest_validation_tests/ome_tiff_fields.json +++ b/src/ingest_validation_tests/ome_tiff_fields.json @@ -1,6 +1,6 @@ [ { - "re": "C.*EX", + "re": ".*", "fields": { "Pixels_DimensionOrder": { "dtype": "categorical", From 678a1379d7a57acfcb7ae2770480b20ea6c679db Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Tue, 26 Nov 2024 19:27:43 -0500 Subject: [PATCH 08/12] Change recursion result to list --- .../ome_tiff_field_validator.py | 66 ++++++++++--------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/src/ingest_validation_tests/ome_tiff_field_validator.py b/src/ingest_validation_tests/ome_tiff_field_validator.py index 5fef56a..b1a32a7 100644 --- a/src/ingest_validation_tests/ome_tiff_field_validator.py +++ b/src/ingest_validation_tests/ome_tiff_field_validator.py @@ -17,37 +17,37 @@ def _log(message: str): print(message) -def expand_terms(dct: dict, prefix: str = "") -> dict: +def expand_terms(dct: dict, prefix: str = "") -> list: """ Convert a dict of of XML info as provided by xmlschema to the form used in the dictionary of expected fields """ - rslt = {} + rslt = [] expanded_prefix = prefix + "_" if prefix else "" for key, val in dct.items(): if key.startswith("@"): # terminal element - rslt[expanded_prefix + key[1:]] = val + rslt.append((expanded_prefix + key[1:], val)) elif key == "$" and isinstance(val, str): # special case? - rslt[expanded_prefix + key] = val + rslt.append((expanded_prefix + key, val)) else: - child_dct_l = [] + child_list_list = [] if isinstance(val, list): for elt in val: - child_dct_l.append(expand_terms(elt, expanded_prefix + key)) + child_list_list.append(expand_terms(elt, expanded_prefix + key)) elif isinstance(val, dict): - child_dct_l.append(expand_terms(val, expanded_prefix + key)) + child_list_list.append(expand_terms(val, expanded_prefix + key)) elif val is None: - child_dct_l.append({expanded_prefix + key : None}) + child_list_list.append([(expanded_prefix + key, None)]) else: raise ValueError(f"list or dict expected; got {type(val)} {val}") - for child_dct in child_dct_l: - for key, val in child_dct.items(): - print(f"HERE {key} {val}") - rslt[key] = val + for child_list in child_list_list: + for key, val in child_list: + rslt.append((key, val)) return rslt -def check_one_prop(key: str, all_prop_dct: dict, this_test: dict) -> None: +def check_one_prop(key: str, all_prop_list: list, this_test: dict) -> None: + all_prop_keys = set(key for key, val in all_prop_list) test_type = this_test["dtype"] if test_type == "trap": # This test is useful when you want to scan lots of ome-tiff files for an @@ -58,18 +58,19 @@ def check_one_prop(key: str, all_prop_dct: dict, this_test: dict) -> None: pass elif test_type == "categorical": allowed_vals = this_test["allowed_values"] - assert key in all_prop_dct, f"{key} is required but missing" - assert all_prop_dct[key] in allowed_vals, ( - f"{key} = {all_prop_dct[key]}" f" not one of {allowed_vals}" - ) + assert key in all_prop_keys, f"{key} is required but missing" + for val in [thisval for thiskey, thisval in all_prop_list if thiskey == key]: + assert val in allowed_vals, ( + f"{key} == {val} is not one of {allowed_vals}" + ) elif test_type == "integer": - assert key in all_prop_dct, f"{key} is required but missing" - assert isinstance(all_prop_dct[key], int), f"{key} = {all_prop_dct[key]}" f" is not an int" + assert key in all_prop_keys, f"{key} is required but missing" + for val in [thisval for thiskey, thisval in all_prop_list if thiskey == key]: + assert isinstance(val, int), f"{key} = {val} is not an int" elif test_type == "float": - assert key in all_prop_dct, f"{key} is required but missing" - assert isinstance(all_prop_dct[key], float), ( - f"{key} = {all_prop_dct[key]}" f" is not a float" - ) + assert key in all_prop_keys, f"{key} is required but missing" + for val in [thisval for thiskey, thisval in all_prop_list if thiskey == key]: + assert isinstance(val, float), f"{key} = {val} is not a float" else: raise NotImplementedError(f"Unimplemented dtype {test_type} for ome-tiff field") @@ -78,12 +79,14 @@ def _check_ome_tiff_file(file: str, /, tests: dict) -> Optional[str]: try: with tifffile.TiffFile(file) as tf: xml_document = xmlschema.XmlDocument(tf.ome_metadata) + except Exception as excp: + return f"{file} is not a valid OME.TIFF file: Failed to read OME XML" + + try: image_props = xmlschema.to_dict(xml_document)["Image"] - expanded_props = {} + expanded_props = [] for term_dct in image_props: - expanded_props.update(expand_terms(term_dct)) - from pprint import pprint - pprint(expanded_props) + expanded_props.extend(expand_terms(term_dct)) error_l = [] for key in tests: try: @@ -117,6 +120,9 @@ def collect_errors(self, **kwargs) -> List[Optional[str]]: if re.fullmatch(test_set["re"], self.assay_type): all_tests.update(test_set["fields"]) + if not all_tests: + return [] # nothing to test for this assay + threads = kwargs.get("coreuse", None) or cpu_count() // 4 or 1 pool = Pool(threads) _log(f"Threading at OmeTiffFieldValidator with {threads}") @@ -138,8 +144,7 @@ def collect_errors(self, **kwargs) -> List[Optional[str]]: for fname in filenames_to_test: rslt = _check_ome_tiff_file(fname, tests=all_tests) if rslt is not None: - from pprint import pprint - pprint(rslt) + _log(f"Result of _check_ome_tiff_file({fname}) is {rslt}") rslt_list.append(rslt) # rslt_list: List[Optional[str]] = list( # for rslt in pool.imap_unordered( @@ -150,9 +155,6 @@ def collect_errors(self, **kwargs) -> List[Optional[str]]: if rslt_list: return rslt_list elif filenames_to_test: - _log( - f"Found files to test but skipping ome-tiff field validation. Files: {filenames_to_test}" - ) return [None] else: return [] From 74f059633215a4019f2d5ba0fe8a9e2c0edea5d6 Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Tue, 26 Nov 2024 19:34:08 -0500 Subject: [PATCH 09/12] Turn threading back on --- .../ome_tiff_field_validator.py | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/ingest_validation_tests/ome_tiff_field_validator.py b/src/ingest_validation_tests/ome_tiff_field_validator.py index b1a32a7..aae6ab5 100644 --- a/src/ingest_validation_tests/ome_tiff_field_validator.py +++ b/src/ingest_validation_tests/ome_tiff_field_validator.py @@ -137,21 +137,13 @@ def collect_errors(self, **kwargs) -> List[Optional[str]]: for file in path.glob(glob_expr): filenames_to_test.append(file) - # TODO: turn back on when issues with XML parsing are resolved - # still collecting files so we know if this plugin *should* have run - - rslt_list = [] - for fname in filenames_to_test: - rslt = _check_ome_tiff_file(fname, tests=all_tests) - if rslt is not None: - _log(f"Result of _check_ome_tiff_file({fname}) is {rslt}") - rslt_list.append(rslt) -# rslt_list: List[Optional[str]] = list( -# for rslt in pool.imap_unordered( -# partial(_check_ome_tiff_file, tests=all_tests), filenames_to_test -# ) -# if rslt is not None -# ) + rslt_list: List[Optional[str]] = list( + rslt + for rslt in pool.imap_unordered( + partial(_check_ome_tiff_file, tests=all_tests), filenames_to_test + ) + if rslt is not None + ) if rslt_list: return rslt_list elif filenames_to_test: From cecb69e4e07ebd022313baaa334b9e3113731bc7 Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Tue, 26 Nov 2024 21:37:05 -0500 Subject: [PATCH 10/12] flake8 --- src/ingest_validation_tests/ome_tiff_field_validator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ingest_validation_tests/ome_tiff_field_validator.py b/src/ingest_validation_tests/ome_tiff_field_validator.py index aae6ab5..a4695bc 100644 --- a/src/ingest_validation_tests/ome_tiff_field_validator.py +++ b/src/ingest_validation_tests/ome_tiff_field_validator.py @@ -52,8 +52,8 @@ def check_one_prop(key: str, all_prop_list: list, this_test: dict) -> None: if test_type == "trap": # This test is useful when you want to scan lots of ome-tiff files for an # example of a new field type - if key in all_prop_dct: - raise RuntimeError(f"TRAP: {key} -> {all_prop_dct[key]} vs {this_test}") + if key in all_prop_keys: + raise RuntimeError(f"TRAP: {key} in {all_prop_keys} vs {this_test}") else: pass elif test_type == "categorical": @@ -79,7 +79,7 @@ def _check_ome_tiff_file(file: str, /, tests: dict) -> Optional[str]: try: with tifffile.TiffFile(file) as tf: xml_document = xmlschema.XmlDocument(tf.ome_metadata) - except Exception as excp: + except Exception: return f"{file} is not a valid OME.TIFF file: Failed to read OME XML" try: @@ -138,7 +138,7 @@ def collect_errors(self, **kwargs) -> List[Optional[str]]: filenames_to_test.append(file) rslt_list: List[Optional[str]] = list( - rslt + rslt for rslt in pool.imap_unordered( partial(_check_ome_tiff_file, tests=all_tests), filenames_to_test ) From 6cc9f9835687d994da7b273ba7ca54d5a49d6e44 Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Tue, 26 Nov 2024 21:44:53 -0500 Subject: [PATCH 11/12] black --- .../ome_tiff_field_validator.py | 4 +--- tests/test_ome_tiff_field_validator.py | 17 ++++++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/ingest_validation_tests/ome_tiff_field_validator.py b/src/ingest_validation_tests/ome_tiff_field_validator.py index a4695bc..68dfbae 100644 --- a/src/ingest_validation_tests/ome_tiff_field_validator.py +++ b/src/ingest_validation_tests/ome_tiff_field_validator.py @@ -60,9 +60,7 @@ def check_one_prop(key: str, all_prop_list: list, this_test: dict) -> None: allowed_vals = this_test["allowed_values"] assert key in all_prop_keys, f"{key} is required but missing" for val in [thisval for thiskey, thisval in all_prop_list if thiskey == key]: - assert val in allowed_vals, ( - f"{key} == {val} is not one of {allowed_vals}" - ) + assert val in allowed_vals, f"{key} == {val} is not one of {allowed_vals}" elif test_type == "integer": assert key in all_prop_keys, f"{key} is required but missing" for val in [thisval for thiskey, thisval in all_prop_list if thiskey == key]: diff --git a/tests/test_ome_tiff_field_validator.py b/tests/test_ome_tiff_field_validator.py index ea500bd..fc33f96 100644 --- a/tests/test_ome_tiff_field_validator.py +++ b/tests/test_ome_tiff_field_validator.py @@ -19,13 +19,16 @@ ), ("test_data/codex_tree_ometiff_good.zip", [], "CODEX"), ("test_data/fake_snrnaseq_tree_good.zip", [], "snRNAseq"), - ("test_data/complex_small_ome_tiff.zip", - [ - ".*complex_small_ome_tiff/917_cropped_0_Z0_C3_T0.ome.tiff is not" - " a valid OME.TIFF file: Pixels_PhysicalSizeX is required but missing;" - " Pixels_PhysicalSizeY is required but missing;" - " Pixels_PhysicalSizeZ is required but missing" - ], "PAS"), + ( + "test_data/complex_small_ome_tiff.zip", + [ + ".*complex_small_ome_tiff/917_cropped_0_Z0_C3_T0.ome.tiff is not" + " a valid OME.TIFF file: Pixels_PhysicalSizeX is required but missing;" + " Pixels_PhysicalSizeY is required but missing;" + " Pixels_PhysicalSizeZ is required but missing" + ], + "PAS", + ), ), ) def test_ome_tiff_field_validator(test_data_fname, msg_re_list, assay_type, tmp_path): From 131a93639e56f7d4dc5a86a690b9c81f7efe5fcb Mon Sep 17 00:00:00 2001 From: Joel Welling Date: Tue, 26 Nov 2024 21:54:42 -0500 Subject: [PATCH 12/12] isort --- src/ingest_validation_tests/ome_tiff_field_validator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ingest_validation_tests/ome_tiff_field_validator.py b/src/ingest_validation_tests/ome_tiff_field_validator.py index 68dfbae..21ad1f6 100644 --- a/src/ingest_validation_tests/ome_tiff_field_validator.py +++ b/src/ingest_validation_tests/ome_tiff_field_validator.py @@ -1,6 +1,5 @@ import json import re - from functools import partial from multiprocessing import Pool from os import cpu_count