From 3bee5e14fe6ab098819f52d4235722756db56f31 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 23 May 2024 09:47:28 +1000 Subject: [PATCH 1/5] adds support for getitems in argstring formatting --- pydra/engine/helpers.py | 7 ++----- pydra/engine/tests/test_helpers.py | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 4d8e84132b..682e7e1933 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -649,12 +649,9 @@ def argstr_formatting(argstr, inputs, value_updates=None): if value_updates: inputs_dict.update(value_updates) # getting all fields that should be formatted, i.e. {field_name}, ... - inp_fields = re.findall(r"{\w+}", argstr) - inp_fields_float = re.findall(r"{\w+:[0-9.]+f}", argstr) - inp_fields += [re.sub(":[0-9.]+f", "", el) for el in inp_fields_float] + inp_fields = re.findall(r"{(\w+)(?::[0-9.]+f|\[[\w]+\])?}", argstr) val_dict = {} - for fld in inp_fields: - fld_name = fld[1:-1] # extracting the name form {field_name} + for fld_name in inp_fields: fld_value = inputs_dict[fld_name] fld_attr = getattr(attrs.fields(type(inputs)), fld_name) if fld_value is attr.NOTHING or ( diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 06ce39220d..308980acbb 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -4,6 +4,7 @@ import random import platform import pytest +import attrs import cloudpickle as cp from unittest.mock import Mock from fileformats.generic import Directory, File @@ -15,9 +16,9 @@ load_and_run, position_sort, parse_copyfile, + argstr_formatting, ) from ...utils.hash import hash_function -from .. import helpers_file from ..core import Workflow @@ -311,3 +312,21 @@ def mock_field(copyfile): parse_copyfile(mock_field((1, 2))) with pytest.raises(TypeError, match="Unrecognised type for collation copyfile"): parse_copyfile(mock_field((Mode.copy, 2))) + + +def test_argstr_formatting(): + @attrs.define + class Inputs: + a1_field: str + b2_field: float + c3_field: dict[str, str] + d4_field: list[str] + + inputs = Inputs("1", 2.0, {"c": "3"}, ["4"]) + assert ( + argstr_formatting( + "{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}", + inputs, + ) + == "1 2.000000 -test 3 -me 4" + ) From 97124a781cd382de7842b48dcec4f5a2c8a1b103 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 24 May 2024 09:06:27 +1000 Subject: [PATCH 2/5] Update pydra/engine/tests/test_helpers.py Co-authored-by: Chris Markiewicz --- pydra/engine/tests/test_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 308980acbb..7061d5badd 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -319,8 +319,8 @@ def test_argstr_formatting(): class Inputs: a1_field: str b2_field: float - c3_field: dict[str, str] - d4_field: list[str] + c3_field: ty.Dict[str, str] + d4_field: ty.List[str] inputs = Inputs("1", 2.0, {"c": "3"}, ["4"]) assert ( From 5e01f5e233d35c09cf2ff8ed6c388321a077d4e6 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 24 May 2024 09:08:55 +1000 Subject: [PATCH 3/5] Update pydra/engine/helpers.py Co-authored-by: Chris Markiewicz --- pydra/engine/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 682e7e1933..177eca69d5 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -649,7 +649,7 @@ def argstr_formatting(argstr, inputs, value_updates=None): if value_updates: inputs_dict.update(value_updates) # getting all fields that should be formatted, i.e. {field_name}, ... - inp_fields = re.findall(r"{(\w+)(?::[0-9.]+f|\[[\w]+\])?}", argstr) + inp_fields = parse_format_string(argstr) val_dict = {} for fld_name in inp_fields: fld_value = inputs_dict[fld_name] From 7d84fbab33558e455e5696b61dab57a6be760373 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 24 May 2024 11:48:02 +1000 Subject: [PATCH 4/5] implemented chris's suggestions --- pydra/engine/helpers.py | 19 +++++++++++++++++ pydra/engine/tests/test_helpers.py | 34 +++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 177eca69d5..e6eaa012ef 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -735,3 +735,22 @@ def parse_copyfile(fld: attr.Attribute, default_collation=FileSet.CopyCollation. f"Unrecognised type for collation copyfile metadata of {fld}, {collation}" ) return mode, collation + + +def parse_format_string(fmtstr): + """Parse a argstr format string and return all keywords used in it.""" + identifier = r"[a-zA-Z_]\w*" + attribute = rf"\.{identifier}" + item = r"\[\w+\]" + # Example: var.attr[key][0].attr2 (capture "var") + field_with_lookups = ( + f"({identifier})(?:{attribute}|{item})*" # Capture only the keyword + ) + conversion = "(?:!r|!s)" + nobrace = "[^{}]*" + # Example: 0{pads[hex]}x (capture "pads") + fmtspec = f"{nobrace}(?:{{({identifier}){nobrace}}}{nobrace})?" # Capture keywords in spec + full_field = f"{{{field_with_lookups}{conversion}?(?::{fmtspec})?}}" + + all_keywords = re.findall(full_field, fmtstr) + return set().union(*all_keywords) - {""} diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 7061d5badd..48fd6e3120 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -3,6 +3,7 @@ from pathlib import Path import random import platform +import typing as ty import pytest import attrs import cloudpickle as cp @@ -17,6 +18,7 @@ position_sort, parse_copyfile, argstr_formatting, + parse_format_string, ) from ...utils.hash import hash_function from ..core import Workflow @@ -51,7 +53,7 @@ def test_hash_file(tmpdir): with open(outdir / "test.file", "w") as fp: fp.write("test") assert ( - hash_function(File(outdir / "test.file")) == "37fcc546dce7e59585f3217bb4c30299" + hash_function(File(outdir / "test.file")) == "f32ab20c4a86616e32bf2504e1ac5a22" ) @@ -330,3 +332,33 @@ class Inputs: ) == "1 2.000000 -test 3 -me 4" ) + + +def test_parse_format_string1(): + assert parse_format_string("{a}") == {"a"} + + +def test_parse_format_string2(): + assert parse_format_string("{abc}") == {"abc"} + + +def test_parse_format_string3(): + assert parse_format_string("{a:{b}}") == {"a", "b"} + + +def test_parse_format_string4(): + assert parse_format_string("{a:{b[2]}}") == {"a", "b"} + + +def test_parse_format_string5(): + assert parse_format_string("{a.xyz[somekey].abc:{b[a][b].d[0]}}") == {"a", "b"} + + +def test_parse_format_string6(): + assert parse_format_string("{a:05{b[a 2][b].e}}") == {"a", "b"} + + +def test_parse_format_string7(): + assert parse_format_string( + "{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}" + ) == {"a1_field", "b2_field", "c3_field", "d4_field"} From fc3b31fec9758940d69c568774161f163cf73908 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 24 May 2024 11:54:59 +1000 Subject: [PATCH 5/5] updated hashes in test asserts to match latest fileformats --- pydra/engine/tests/test_specs.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 4f54cd4041..77a0f690b7 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -140,7 +140,7 @@ def test_input_file_hash_1(tmp_path): fields = [("in_file", File)] input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,)) inputs = make_klass(input_spec) - assert inputs(in_file=outfile).hash == "0e9306e5cae1de1b4dff1f27cca03bce" + assert inputs(in_file=outfile).hash == "02fa5f6f1bbde7f25349f54335e1adaf" def test_input_file_hash_2(tmp_path): @@ -154,7 +154,7 @@ def test_input_file_hash_2(tmp_path): # checking specific hash value hash1 = inputs(in_file=file).hash - assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb" + assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3" # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" @@ -185,7 +185,7 @@ def test_input_file_hash_2a(tmp_path): # checking specific hash value hash1 = inputs(in_file=file).hash - assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb" + assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3" # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" @@ -204,7 +204,7 @@ def test_input_file_hash_2a(tmp_path): # checking if string is also accepted hash4 = inputs(in_file=str(file)).hash - assert hash4 == "aee7c7ae25509fb4c92a081d58d17a67" + assert hash4 == "800af2b5b334c9e3e5c40c0e49b7ffb5" def test_input_file_hash_3(tmp_path): @@ -278,7 +278,7 @@ def test_input_file_hash_4(tmp_path): # checking specific hash value hash1 = inputs(in_file=[[file, 3]]).hash - assert hash1 == "11b7e9c90bc8d9dc5ccfc8d4526ba091" + assert hash1 == "0693adbfac9f675af87e900065b1de00" # the same file, but int field changes hash1a = inputs(in_file=[[file, 5]]).hash @@ -315,7 +315,7 @@ def test_input_file_hash_5(tmp_path): # checking specific hash value hash1 = inputs(in_file=[{"file": file, "int": 3}]).hash - assert hash1 == "5fd53b79e55bbf62a4bb3027eb753a2c" + assert hash1 == "56e6e2c9f3bdf0cd5bd3060046dea480" # the same file, but int field changes hash1a = inputs(in_file=[{"file": file, "int": 5}]).hash