diff --git a/cwltool/command_line_tool.py b/cwltool/command_line_tool.py index 8c778ca21..15255c5f7 100644 --- a/cwltool/command_line_tool.py +++ b/cwltool/command_line_tool.py @@ -826,6 +826,7 @@ def job( _checksum = partial( compute_checksums, runtimeContext.make_fs_access(runtimeContext.basedir), + cachebuilder, ) visit_class( [cachebuilder.files, cachebuilder.bindings], @@ -1252,7 +1253,7 @@ def collect_output_ports( ) if compute_checksum: - adjustFileObjs(ret, partial(compute_checksums, fs_access)) + adjustFileObjs(ret, partial(compute_checksums, fs_access, builder)) expected_schema = cast(Schema, self.names.get_name("outputs_record_schema", None)) validate_ex( expected_schema, diff --git a/cwltool/process.py b/cwltool/process.py index e0629b9e5..b61db2062 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -391,7 +391,7 @@ def _check_adjust(a_file: CWLObjectType) -> CWLObjectType: visit_class(outputObj, ("File", "Directory"), _check_adjust) if compute_checksum: - visit_class(outputObj, ("File",), functools.partial(compute_checksums, fs_access)) + visit_class(outputObj, ("File",), functools.partial(compute_checksums, fs_access, None)) return outputObj @@ -1341,14 +1341,30 @@ def scandeps( return r -def compute_checksums(fs_access: StdFsAccess, fileobj: CWLObjectType) -> None: +def compute_checksums(fs_access: StdFsAccess, builder: Builder, fileobj: CWLObjectType) -> None: + """ + Compute the checksums of a file object. + + :param fs_access: Used to compute file stats such as its size. + :param builder: Optional CWL builder that must have a :py:class:`PathMapper` , which + will be used to resolve the actual file location (not its ``stagedir``). + :param fileobj: File object. + :raises ValueError: If a builder is provided but without having a :py:class:`PathMapper`. + """ if "checksum" not in fileobj: checksum = hashlib.sha1() # nosec - location = cast(str, fileobj["location"]) - with fs_access.open(location, "rb") as f: + location = file_path = cast(str, fileobj["location"]) + if builder: + if not builder.pathmapper: + raise ValueError( + "Do not call compute_checksums using a " + "builder that doesn't have a pathmapper." + ) + file_path = builder.pathmapper.mapper(location)[0] + with fs_access.open(file_path, "rb") as f: contents = f.read(1024 * 1024) while contents != b"": checksum.update(contents) contents = f.read(1024 * 1024) fileobj["checksum"] = "sha1$%s" % checksum.hexdigest() - fileobj["size"] = fs_access.size(location) + fileobj["size"] = fs_access.size(file_path)