From 7afa5f1d4af9b64153a84e248c87c891cdffd33b Mon Sep 17 00:00:00 2001 From: Marcel Zwiers Date: Fri, 2 Feb 2024 23:03:08 +0100 Subject: [PATCH] Refactoring of the dynamic runindex logic Part 2 (PR #213) - Revert renaming at the end, instead rename when encountering run-2 - Remove the datasource.targets bookkeeping - Adjust the tests accordingly --- bidscoin/bcoin.py | 2 +- bidscoin/bids.py | 118 ++++++++++----------- bidscoin/plugins/dcm2niix2bids.py | 75 +++++++------- bidscoin/plugins/nibabel2bids.py | 8 +- bidscoin/plugins/spec2nii2bids.py | 10 +- docs/CHANGELOG.md | 2 +- tests/test_bids.py | 163 ++++++++++++------------------ 7 files changed, 158 insertions(+), 220 deletions(-) diff --git a/bidscoin/bcoin.py b/bidscoin/bcoin.py index c0687211..6f0ffa98 100755 --- a/bidscoin/bcoin.py +++ b/bidscoin/bcoin.py @@ -554,7 +554,7 @@ def test_bidscoin(bidsmapfile: Union[Path,dict], options: dict=None, testplugins LOGGER.warning(f"Failed test: {plugin.stem}") if not success: - LOGGER.warning('Not all tests finished successfully (this may be ok, but check the output above)') + LOGGER.warning('Not all tests finished successfully (this may be OK, but check the output above)') trackusage('bidscoin_error') else: LOGGER.success('All tests finished successfully :-)') diff --git a/bidscoin/bids.py b/bidscoin/bids.py index b9b5687e..2de4d715 100644 --- a/bidscoin/bids.py +++ b/bidscoin/bids.py @@ -77,18 +77,17 @@ def __init__(self, provenance: Union[str, Path]='', plugins: Dict[str, Plugin]=N :param targets: A list of output targets of the source """ - self.path: Path = Path(provenance) - self.datatype: str = datatype - self.dataformat: str = dataformat - self.plugins: dict = plugins + self.path: Path = Path(provenance) + self.datatype: str = datatype + self.dataformat: str = dataformat + self.plugins = plugins if not plugins: self.plugins = {} if not dataformat: self.is_datasource() - self.subprefix: str = subprefix - self.sesprefix: str = sesprefix - self.targets: Set[Path] = set(targets) - self._cache: dict = {} + self.subprefix: str = subprefix + self.sesprefix: str = sesprefix + self._cache: dict = {} def resubprefix(self) -> str: """Returns the subprefix with escaped regular expression characters (except '-'). A single '*' wildcard is returned as ''""" @@ -121,7 +120,7 @@ def is_datasource(self) -> bool: return False - def properties(self, tagname: str, run: dict=None) -> Union[str, int]: + def properties(self, tagname: str, run: Run=None) -> Union[str, int]: """ Gets the 'filepath[:regex]', 'filename[:regex]', 'filesize' or 'nrfiles' filesystem property. The filepath (with trailing "/") and filename can be parsed using an optional regular expression re.findall(regex, filepath/filename). The last match is returned @@ -242,7 +241,7 @@ def attributes(self, attributekey: str, validregexp: bool=False, cache: bool=Tru return attributeval - def _extattributes(self) -> dict: + def _extattributes(self) -> Attributes: """ Read attributes from the json sidecar file if it is there @@ -256,10 +255,10 @@ def _extattributes(self) -> dict: attributes = json.load(json_fid) if not isinstance(attributes, dict): LOGGER.warning(f"Skipping unexpectedly formatted meta-data in: {jsonfile}") - return {} + return Attributes({}) self._cache.update(attributes) - return attributes + return Attributes(attributes) def subid_sesid(self, subid: str=None, sesid: str=None) -> Tuple[str, str]: """ @@ -930,7 +929,7 @@ def load_bidsmap(yamlfile: Path=Path(), folder: Path=templatefolder, plugins:Uni if bidsmapversion.rsplit('.', 1)[0] != __version__.rsplit('.', 1)[0] and any(checks): LOGGER.warning(f'BIDScoiner version conflict: {yamlfile} was created with version {bidsmapversion}, but this is version {__version__}') elif bidsmapversion != __version__ and any(checks): - LOGGER.info(f'BIDScoiner version difference: {yamlfile} was created with version {bidsmapversion}, but this is version {__version__}. This is normally ok but check the https://bidscoin.readthedocs.io/en/latest/CHANGELOG.html') + LOGGER.info(f'BIDScoiner version difference: {yamlfile} was created with version {bidsmapversion}, but this is version {__version__}. This is normally OK but check the https://bidscoin.readthedocs.io/en/latest/CHANGELOG.html') # Make sure subprefix and sesprefix are strings subprefix = bidsmap['Options']['bidscoin']['subprefix'] = bidsmap['Options']['bidscoin']['subprefix'] or '' @@ -1481,7 +1480,7 @@ def find_run(bidsmap: Bidsmap, provenance: str, dataformat: str='', datatype: st return Run({}) -def delete_run(bidsmap: Bidsmap, provenance: Union[dict, str], datatype: str= '', dataformat: str='') -> None: +def delete_run(bidsmap: Bidsmap, provenance: Union[Run, str], datatype: str= '', dataformat: str='') -> None: """ Delete the first matching run from the BIDS map @@ -1771,7 +1770,7 @@ def get_derivatives(datatype: str, exceptions: Union[tuple,list]=()) -> list: return [] -def get_bidsname(subid: str, sesid: str, run: dict, validkeys: bool, runtime: bool=False, cleanup: bool=True) -> str: +def get_bidsname(subid: str, sesid: str, run: Run, validkeys: bool, runtime: bool=False, cleanup: bool=True) -> str: """ Composes a filename as it should be according to the BIDS standard using the BIDS keys in run. The bids values are dynamically updated and cleaned, and invalid bids keys and empty bids values are ignored @@ -1928,17 +1927,18 @@ def insert_bidskeyval(bidsfile: Union[str, Path], bidskey: str, newvalue: str, v return newbidsfile -def increment_runindex(outfolder: Path, bidsname: str, run: Run) -> Union[Path, str]: +def increment_runindex(outfolder: Path, bidsname: str, run: Run, scans_table: pd.DataFrame=pd.DataFrame()) -> str: """ Checks if a file with the same bidsname already exists in the folder and then increments the dynamic runindex (if any) until no such file is found. - Important side effect for <<>> dynamic value: - If the run-less file already exists, start with run-2, run-1 will be added later to run-less files + NB: For <<>> runs, if the run-less file already exists, then add 'run-2' to bidsname, rename run-less files to + 'run-1' and update the `scans_table` accordingly. :param outfolder: The full pathname of the bids output folder - :param bidsname: The bidsname with a provisional runindex + :param bidsname: The bidsname with a provisional runindex, e.g. from get_bidsname() :param run: The run mapping with the BIDS key-value pairs + :param scans_table: BIDS scans.tsv dataframe with all filenames and e.g. acquisition timestamps :return: The bidsname with the original or incremented runindex """ @@ -1946,63 +1946,47 @@ def increment_runindex(outfolder: Path, bidsname: str, run: Run) -> Union[Path, runval = str(run['bids'].get('run') or '') if not (runval.startswith('<<') and runval.endswith('>>') and (runval.replace('<','').replace('>','').isdecimal() or runval == '<<>>')): return bidsname + bidsext = ''.join(Path(bidsname).suffixes) + bidsname = bidsname.split('.')[0] - # Increment the run-index if the bidsfile already exists - while list(outfolder.glob(f"{Path(bidsname).with_suffix('').stem}.*")): - runindex = get_bidsvalue(bidsname, 'run') - if not runindex: # The run-less bids file already exists -> start with run-2 - bidsname = insert_bidskeyval(bidsname, 'run', '2', False) - else: # Do the normal increment - bidsname = get_bidsvalue(bidsname, 'run', str(int(runindex) + 1)) + # Make an inventory of the runs + runless_name = insert_bidskeyval(bidsname, 'run', '', False) + run1_name = insert_bidskeyval(bidsname, 'run', '1', False) + runless_files = list(outfolder.glob(f"{runless_name}.*")) + run1_files = list(outfolder.glob(f"{run1_name}.*")) - return bidsname - - -def rename_runless_to_run1(runs: List[Run], scans_table: pd.DataFrame) -> None: - """ - Adds run-1 label to run-less files that use dynamic index (<<>>) in bidsmap run-items for which files with - run-2 label exist in the output folder. Additionally, 'scans_table' is updated based on the changes. - - :param runs: Bidsmap run-items with accumulated files under datasource.targets (all files created via that run-item) - :param scans_table: BIDS scans.tsv dataframe with all filenames and acquisition timestamps - :return: - """ - - for run in runs: - if run['bids'].get('run') != '<<>>': - continue + # Start incrementing from run-1 if we have already renamed runless to run-1 + if run1_files and runval == '<<>>': + bidsname = run1_name - for target in run['datasource'].targets.copy(): # Copy: avoid problems with removing items within loop + # Increment the run-index if the bidsfile already exists until that's no longer the case + while list(outfolder.glob(f"{bidsname}.*")): # The run already exists -> increment the run-index + runindex = get_bidsvalue(bidsname, 'run') or '1' # If run-less -> identify as existing run-1 + bidsname = insert_bidskeyval(bidsname, 'run', str(int(runindex) + 1), False) - outfolder = target.parent - bidsext = ''.join(target.suffixes) - bidsname = target.with_suffix('').stem - run1_bidsname = insert_bidskeyval(bidsname, 'run', '1', False) - run2_bidsname = insert_bidskeyval(bidsname, 'run', '2', False) + # Rename run-less to run-1 when dealing with a new run-2 + if runless_files and get_bidsvalue(bidsname, 'run') == '2': - # Rename runless to run-1 if run-2 exists - if not get_bidsvalue(bidsname, 'run') and list(outfolder.glob(f"{run2_bidsname}.*")): + # Check if everything is OK + if runless_files and run1_files: + LOGGER.error(f"File already exists, cannot rename {outfolder/runless_name}.* -> {run1_name}.*") + return bidsname + bidsext - # Add run-1 to run-less bidsname files because run-2 exists - for runless_file in outfolder.glob(f"{bidsname}.*"): - ext = ''.join(runless_file.suffixes) - run1_file = (outfolder/run1_bidsname).with_suffix(ext) - LOGGER.info(f"Found run-2 files for <<>> index, renaming\n{runless_file} ->\n{run1_file}") - runless_file.replace(run1_file) + # Rename run-less to run-1 + for runless_file in runless_files: + LOGGER.info(f"Found run-2 files for <<>> index, renaming\n{runless_file} -> {run1_name}") + runless_file.replace((outfolder/run1_name).with_suffix(''.join(runless_file.suffixes))) - # Change row name in the scans table - runless_entry = f"{outfolder.name}/{bidsname}{bidsext}" # NB: '/' as_posix - run1_entry = f"{outfolder.name}/{run1_bidsname}{bidsext}" # NB: '/' as_posix - if runless_entry in scans_table.index: - LOGGER.verbose(f"Renaming scans entry:\n{runless_entry} -> {run1_entry}") - scans_table.rename(index={runless_entry: run1_entry}, inplace=True) + # Update the scans-tsv file + runless_entry = f"{outfolder.name}/{runless_file.name}" # NB: as posix + run1_entry = insert_bidskeyval(runless_entry, 'run', '1', False) + if runless_entry in scans_table.index: + scans_table.rename(index={runless_entry: run1_entry}, inplace=True) - # Change target from run-less to run-1 - run['datasource'].targets.remove(target) - run['datasource'].targets.add((outfolder/run1_bidsname).with_suffix(bidsext)) + return bidsname + bidsext -def updatemetadata(datasource: DataSource, targetmeta: Path, usermeta: dict, extensions: list, sourcemeta: Path = Path()) -> dict: +def updatemetadata(datasource: DataSource, targetmeta: Path, usermeta: Meta, extensions: list, sourcemeta: Path = Path()) -> Meta: """ Load the metadata from the target (json sidecar), then add metadata from the source (json sidecar) and finally add the user metadata (meta table). Source metadata other than json sidecars are copied over to the target folder. Special @@ -2076,7 +2060,7 @@ def updatemetadata(datasource: DataSource, targetmeta: Path, usermeta: dict, ext if not metapool.get('B0FieldSource'): metapool.pop('B0FieldSource', None) if not metapool.get('B0FieldIdentifier'): metapool.pop('B0FieldIdentifier', None) - return metapool + return Meta(metapool) def addparticipant(participants_tsv: Path, subid: str='', sesid: str='', data: dict=None, dryrun: bool=False) -> Tuple[pd.DataFrame, dict]: diff --git a/bidscoin/plugins/dcm2niix2bids.py b/bidscoin/plugins/dcm2niix2bids.py index e3138445..1d89c8d8 100644 --- a/bidscoin/plugins/dcm2niix2bids.py +++ b/bidscoin/plugins/dcm2niix2bids.py @@ -14,7 +14,7 @@ from pathlib import Path from bidscoin import bcoin, bids, lsdirs, due, Doi from bidscoin.utilities import physio -from bidscoin.bids import Bidsmap, Run, Plugin +from bidscoin.bids import Bidsmap, Plugin try: from nibabel.testing import data_path except ImportError: @@ -147,7 +147,7 @@ def bidsmapper_plugin(session: Path, bidsmap_new: Bidsmap, bidsmap_old: Bidsmap, if dataformat == 'DICOM': for file in sourcefile.parent.iterdir(): if abs(file.stat().st_size - sourcefile.stat().st_size) > 1024 * 50 and file.suffix == sourcefile.suffix: - LOGGER.warning(f"Not all {file.suffix}-files in '{sourcefile.parent}' have the same size. This may be ok but can also be indicative of a truncated acquisition or file corruption(s)") + LOGGER.warning(f"Not all {file.suffix}-files in '{sourcefile.parent}' have the same size. This may be OK but can also be indicative of a truncated acquisition or file corruption(s)") break # See if we can find a matching run in the old bidsmap @@ -231,7 +231,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N scans_table.index.name = 'filename' # Process all the source files or run subfolders - matched_runs: List[Run] = [] sourcefile = Path() for source in sources: @@ -246,6 +245,7 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N # Get a matching run from the bidsmap datasource = bids.DataSource(sourcefile, {'dcm2niix2bids': options}, dataformat) run, match = bids.get_matching_run(datasource, bidsmap, runtime=True) + runindex = run['bids'].get('run', '') # Check if we should ignore this run if datasource.datatype in bidsmap['Options']['bidscoin']['ignoretypes']: @@ -258,7 +258,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N continue LOGGER.info(f"--> Coining: {source}") - matched_runs.append(run) # Create the BIDS session/datatype output folder suffix = datasource.dynamicvalue(run['bids']['suffix'], True, True) @@ -269,11 +268,11 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run - ignore = bids.check_ignore(datasource.datatype, bidsignore) - bidsname = bids.get_bidsname(subid, sesid, run, not ignore, runtime=True) - ignore = ignore or bids.check_ignore(bidsname+'.json', bidsignore, 'file') - bidsname = bids.increment_runindex(outfolder, bidsname, run) - jsonfiles = set() # Set -> Collect the associated json-files (for updating them later) -- possibly > 1 + ignore = bids.check_ignore(datasource.datatype, bidsignore) + bidsname = bids.get_bidsname(subid, sesid, run, not ignore, runtime=True) + ignore = ignore or bids.check_ignore(bidsname+'.json', bidsignore, 'file') + bidsname = bids.increment_runindex(outfolder, bidsname, run, scans_table) + sidecars = set() # -> A store for all output targets, in the form of json sidecar-files (which is the easiest) # Check if the bidsname is valid bidstest = (Path('/')/subid/sesid/datasource.datatype/bidsname).with_suffix('.json').as_posix() @@ -290,7 +289,7 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N # Check if the source files all have approximately the same size (difference < 50kB) for file in source.iterdir() if source.is_dir() else []: if abs(file.stat().st_size - sourcefile.stat().st_size) > 1024 * 50 and file.suffix == sourcefile.suffix: - LOGGER.warning(f"Not all {file.suffix}-files in '{source}' have the same size. This may be ok but can also be indicative of a truncated acquisition or file corruption(s)") + LOGGER.warning(f"Not all {file.suffix}-files in '{source}' have the same size. This may be OK but can also be indicative of a truncated acquisition or file corruption(s)") break # Convert physiological log files (dcm2niix can't handle these) @@ -300,8 +299,7 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N try: physiodata = physio.readphysio(sourcefile) physio.physio2tsv(physiodata, outfolder/bidsname) - jsonfiles.update(outfolder.glob(f"{bidsname}.json")) # add existing created json files: bidsname.json - datasource.targets.update(outfolder.glob(f"{bidsname}.*tsv*")) # Add nifti files created using this run-item + sidecars.add(outfolder/f"{bidsname}.json") # Collect the created json file except Exception as physioerror: LOGGER.error(f"Could not read/convert physiological file: {sourcefile}\n{physioerror}") continue @@ -314,27 +312,28 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N filename = bidsname, outfolder = outfolder, source = source) - if bcoin.run_command(command): - if not list(outfolder.glob(f"{bidsname}.*nii*")): continue + if bcoin.run_command(command) and not next(outfolder.glob(f"{bidsname}.*"), None): + continue - jsonfiles.update(outfolder.glob(f"{bidsname}.json")) # add existing created json files: bidsname.json - datasource.targets.update(outfolder.glob(f"{bidsname}.*nii*")) # Add nifti files created using this run-item + # Collect the json file output file + if next(outfolder.glob(f"{bidsname}.nii*"), None): + sidecars.add(outfolder/f"{bidsname}.json") # Handle the ABCD GE pepolar sequence - extrafile = list(outfolder.glob(f"{bidsname}a.nii*")) + extrafile = next(outfolder.glob(f"{bidsname}a.nii*"), None) if extrafile: # Load the json meta-data to see if it's a pepolar sequence - with extrafile[0].with_suffix('').with_suffix('.json').open('r') as json_fid: + with extrafile.with_suffix('').with_suffix('.json').open('r') as json_fid: jsondata = json.load(json_fid) if 'PhaseEncodingPolarityGE' in jsondata: - ext = ''.join(extrafile[0].suffixes) + ext = ''.join(extrafile.suffixes) invfile = bids.get_bidsvalue(outfolder/(bidsname+ext), 'dir', bids.get_bidsvalue(bidsname,'dir') + jsondata['PhaseEncodingPolarityGE']) - LOGGER.verbose(f"Renaming GE reversed polarity image: {extrafile[0]} -> {invfile}") - extrafile[0].replace(invfile) - extrafile[0].with_suffix('').with_suffix('.json').replace(invfile.with_suffix('').with_suffix('.json')) - jsonfiles.add(invfile.with_suffix('').with_suffix('.json')) + LOGGER.verbose(f"Renaming GE reversed polarity image: {extrafile} -> {invfile}") + extrafile.replace(invfile) + extrafile.with_suffix('').with_suffix('.json').replace(invfile.with_suffix('').with_suffix('.json')) + sidecars.add(invfile.with_suffix('').with_suffix('.json')) else: - LOGGER.warning(f"Unexpected variants of {outfolder/bidsname}* were produced by dcm2niix. Possibly this can be remedied by using the dcm2niix -i option (to ignore derived, localizer and 2D images) or by clearing the BIDS folder before running bidscoiner") + LOGGER.error(f"Unexpected variants of {outfolder/bidsname}* were produced by dcm2niix. Possibly this can be remedied by using the dcm2niix -i option (to ignore derived, localizer and 2D images) or by clearing the BIDS folder before running bidscoiner") # Replace uncropped output image with the cropped one if '-x y' in options.get('args',''): @@ -353,7 +352,7 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N # Strip each dcm2niix postfix and assign it to bids entities in a newly constructed bidsname ext = ''.join(dcm2niixfile.suffixes) postfixes = dcm2niixfile.name.split(bidsname)[1].rsplit(ext)[0].split('_')[1:] - newbidsname = dcm2niixfile.name + newbidsname = bids.insert_bidskeyval(dcm2niixfile.name, 'run', bids.sanitize(runindex), ignore) # Restart the run-index. NB: Unlike bidsname, newbidsname has a file extension for postfix in postfixes: # Patch the echo entity in the newbidsname with the dcm2niix echo info # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder @@ -366,10 +365,10 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N elif echonr[0:-1].isdecimal(): LOGGER.verbose(f"Splitting off echo-number {echonr[0:-1]} from the '{postfix}' postfix") newbidsname = bids.insert_bidskeyval(newbidsname, 'echo', echonr[0:-1].lstrip('0'), ignore) # Strip of the 'a', 'b', etc. from `e1a`, `e1b`, etc - newbidsname = bids.get_bidsvalue(newbidsname, fallback, echonr[-1]) # Append the 'a' to the acq-label + newbidsname = bids.get_bidsvalue(newbidsname, fallback, echonr[-1]) # Append the 'a' to the fallback-label else: LOGGER.error(f"Unexpected postix '{postfix}' found in {dcm2niixfile}") - newbidsname = bids.get_bidsvalue(newbidsname, fallback, postfix) # Append the unknown postfix to the acq-label + newbidsname = bids.get_bidsvalue(newbidsname, fallback, postfix) # Append the unknown postfix to the fallback-label # Patch the phase entity in the newbidsname with the dcm2niix mag/phase info elif 'part' in run['bids'] and postfix in ('ph','real','imaginary'): # e.g. part: ['', 'mag', 'phase', 'real', 'imag', 0] @@ -413,7 +412,7 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N newbidsname = newbidsname.replace('_magnitude_ph', '_fieldmap') # Case 3: One magnitude + one fieldmap image in one folder / datasource newbidsname = newbidsname.replace('_fieldmap_ph', '_fieldmap') # Case 3 - # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data + # Append the dcm2niix info to the fallback-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data else: newbidsname = bids.get_bidsvalue(newbidsname, fallback, postfix) @@ -426,13 +425,12 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N LOGGER.warning(f"The {newbidsname} image is a derivate / not BIDS-compliant -- you can probably delete it safely and update {scans_tsv}") # Save the NIfTI file with the newly constructed name - newbidsname = bids.increment_runindex(outfolder, newbidsname, run) # Update the runindex now that the acq-label has changed + newbidsname = bids.increment_runindex(outfolder, newbidsname, run, scans_table) # Update the runindex now that the name has changed newbidsfile = outfolder/newbidsname LOGGER.verbose(f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}") if newbidsfile.is_file(): LOGGER.warning(f"Overwriting existing {newbidsfile} file -- check your results carefully!") dcm2niixfile.replace(newbidsfile) - datasource.targets.add(newbidsfile) # Rename all associated files (i.e. the json-, bval- and bvec-files) oldjsonfile = dcm2niixfile.with_suffix('').with_suffix('.json') @@ -440,13 +438,21 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N if not oldjsonfile.is_file(): LOGGER.warning(f"Unexpected file conversion result: {oldjsonfile} not found") else: - jsonfiles.discard(oldjsonfile) - jsonfiles.add(newjsonfile) + sidecars.discard(oldjsonfile) + sidecars.add(newjsonfile) for oldfile in outfolder.glob(dcm2niixfile.with_suffix('').stem + '.*'): oldfile.replace(newjsonfile.with_suffix(''.join(oldfile.suffixes))) # Loop over all the newly produced json sidecar-files and adapt the data (NB: assumes every NIfTI-file comes with a json-file) - for jsonfile in sorted(jsonfiles): + for jsonfile in sorted(sidecars): + + # Check if everything went OK + if not next(outfolder.glob(f"{jsonfile.stem}.*"), None): + LOGGER.error(f"Unexpected conversion result, no output files: {outfolder/jsonfile.stem}.*") + continue + if not jsonfile.is_file(): + LOGGER.warning(f"Unexpected conversion result, could not find: {jsonfile}") + continue # Load / copy over the source meta-data metadata = bids.updatemetadata(datasource, jsonfile, run['meta'], options['meta']) @@ -495,9 +501,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N scanpath = outputfile[0].relative_to(bidsses) scans_table.loc[scanpath.as_posix(), 'acq_time'] = acq_time - # Handle dynamic index for run-1 - bids.rename_runless_to_run1(matched_runs, scans_table) - # Write the scans_table to disk LOGGER.verbose(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time','filename'], inplace=True) diff --git a/bidscoin/plugins/nibabel2bids.py b/bidscoin/plugins/nibabel2bids.py index 079f4e0f..276e7c52 100644 --- a/bidscoin/plugins/nibabel2bids.py +++ b/bidscoin/plugins/nibabel2bids.py @@ -190,7 +190,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> None: scans_table.index.name = 'filename' # Collect the different Nibabel source files for all files in the session - matched_runs: List[Run] = [] for sourcefile in sourcefiles: datasource = bids.DataSource(sourcefile, {'nibabel2bids':options}) @@ -207,7 +206,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> None: continue LOGGER.info(f"--> Coining: {sourcefile}") - matched_runs.append(run) # Create the BIDS session/datatype output folder outfolder = bidsses/datasource.datatype @@ -217,7 +215,7 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> None: bidsignore = bids.check_ignore(datasource.datatype, bidsmap['Options']['bidscoin']['bidsignore']) bidsname = bids.get_bidsname(subid, sesid, run, not bidsignore, runtime=True) bidsignore = bidsignore or bids.check_ignore(bidsname+'.json', bidsmap['Options']['bidscoin']['bidsignore'], 'file') - bidsname = bids.increment_runindex(outfolder, bidsname, run) + bidsname = bids.increment_runindex(outfolder, bidsname, run, scans_table) bidsfile = (outfolder/bidsname).with_suffix(ext) # Check if the bidsname is valid @@ -233,7 +231,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> None: # Save the sourcefile as a BIDS NIfTI file nib.save(nib.load(sourcefile), bidsfile) - datasource.targets.add(bidsfile) # Load / copy over the source meta-data sidecar = bidsfile.with_suffix('').with_suffix('.json') @@ -245,9 +242,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> None: acq_time = dateutil.parser.parse(f"1925-01-01T{metadata.get('AcquisitionTime', '')}") scans_table.loc[bidsfile.relative_to(bidsses).as_posix(), 'acq_time'] = acq_time.isoformat() - # Handle dynamic index for run-1 - bids.rename_runless_to_run1(matched_runs, scans_table) - # Write the scans_table to disk LOGGER.verbose(f"Writing data to: {scans_tsv}") scans_table.replace('','n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a') diff --git a/bidscoin/plugins/spec2nii2bids.py b/bidscoin/plugins/spec2nii2bids.py index 7677e11d..16e58d4e 100644 --- a/bidscoin/plugins/spec2nii2bids.py +++ b/bidscoin/plugins/spec2nii2bids.py @@ -198,7 +198,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N scans_table.index.name = 'filename' # Loop over all MRS source data files and convert them to BIDS - matched_runs: List[Run] = [] for sourcefile in sourcefiles: # Get a data source, a matching run from the bidsmap @@ -216,7 +215,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N continue LOGGER.info(f"--> Coining: {sourcefile}") - matched_runs.append(run) # Create the BIDS session/datatype output folder outfolder = bidsses/datasource.datatype @@ -226,7 +224,7 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N bidsignore = bids.check_ignore(datasource.datatype, bidsmap['Options']['bidscoin']['bidsignore']) bidsname = bids.get_bidsname(subid, sesid, run, not bidsignore, runtime=True) bidsignore = bidsignore or bids.check_ignore(bidsname+'.json', bidsmap['Options']['bidscoin']['bidsignore'], 'file') - bidsname = bids.increment_runindex(outfolder, bidsname, run) + bidsname = bids.increment_runindex(outfolder, bidsname, run, scans_table) sidecar = (outfolder/bidsname).with_suffix('.json') # Check if the bidsname is valid @@ -259,9 +257,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N if bcoin.run_command(f'{command} {dformat} -j -f "{bidsname}" -o "{outfolder}" {args} {arg} "{sourcefile}"'): if not list(outfolder.glob(f"{bidsname}.nii*")): continue - # Add files created using this bidsmap run-item (except sidecars) - datasource.targets.update(outfolder.glob(f"{bidsname}.*nii*")) - # Load / copy over and adapt the newly produced json sidecar-file (NB: assumes every NIfTI-file comes with a json-file) metadata = bids.updatemetadata(datasource, sidecar, run['meta'], options['meta']) with sidecar.open('w') as json_fid: @@ -290,9 +285,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N acq_time = 'n/a' scans_table.loc[sidecar.with_suffix('.nii.gz').relative_to(bidsses).as_posix(), 'acq_time'] = acq_time - # Handle dynamic index for run-1 - bids.rename_runless_to_run1(matched_runs, scans_table) - # Write the scans_table to disk LOGGER.verbose(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time','filename'], inplace=True) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 7e6cb6d3..775807e1 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -17,7 +17,7 @@ - A DICOM parser for retrieving the `` value (Siemens and GE) ### Changed -- `bidscoiner_plugin()` API: you can (should) return a personals dict (instead of writing it to `participants.tsv`) and the datasource targets +- `bidscoiner_plugin()` API: you can (should) return a personals dict (instead of writing it to `participants.tsv`) - Using DRMAA library for skullstrip (instead of qsub/sbatch) - Removed the pet2bids and phys2bids plugins (code is no longer actively developed) - Sorting of DICOMDIR files is more robust diff --git a/tests/test_bids.py b/tests/test_bids.py index dfbf7d7d..773027d5 100644 --- a/tests/test_bids.py +++ b/tests/test_bids.py @@ -10,6 +10,7 @@ from nibabel.testing import data_path from pydicom.data import get_testdata_file from bidscoin import bcoin, bids, bidsmap_template +from bidscoin.bids import Run, Plugin bcoin.setup_logging() @@ -45,14 +46,14 @@ class TestDataSource: @pytest.fixture() def datasource(self, dcm_file): - return bids.DataSource(dcm_file, {'dcm2niix2bids': {}}, 'DICOM') + return bids.DataSource(dcm_file, {'dcm2niix2bids': Plugin({})}, 'DICOM') @pytest.fixture() def extdatasource(self, dcm_file, tmp_path): ext_dcm_file = shutil.copyfile(dcm_file, tmp_path/dcm_file.name) with ext_dcm_file.with_suffix('.json').open('w') as sidecar: json.dump({'PatientName': 'ExtendedAttributesTest'}, sidecar) - return bids.DataSource(ext_dcm_file, {'dcm2niix2bids': {}}, 'DICOM') + return bids.DataSource(ext_dcm_file, {'dcm2niix2bids': Plugin({})}, 'DICOM') def test_is_datasource(self, datasource): assert datasource.is_datasource() @@ -76,7 +77,7 @@ def test_subid_sesid(self, subid, sesid, subprefix, sesprefix, tmp_path, dcm_fil subsesdir = tmp_path/'data'/subid/sesid subsesdir.mkdir(parents=True) subses_file = shutil.copy(dcm_file, subsesdir) - subses_source = bids.DataSource(subses_file, {'dcm2niix2bids': {}}, 'DICOM', subprefix=subprefix, sesprefix=sesprefix) + subses_source = bids.DataSource(subses_file, {'dcm2niix2bids': Plugin({})}, 'DICOM', subprefix=subprefix, sesprefix=sesprefix) sub, ses = subses_source.subid_sesid(f"<>", f"<>") expected_sub = 'sub-' + bids.sanitize(re.sub(f"^{subses_source.resubprefix()}", '', subid) if subid.startswith(subprefix) or subprefix=='*' else '') # NB: this expression is too complicated / resembles the actual code too much :-/ expected_ses = 'ses-' + bids.sanitize(re.sub(f"^{subses_source.resesprefix()}", '', sesid)) if (subid.startswith(subprefix) or subprefix=='*') and (sesid.startswith(sesprefix) or sesprefix=='*') and sesid else '' @@ -119,7 +120,7 @@ def test_get_dicomfile(dcm_file, dicomdir): def test_get_datasource(dicomdir): - datasource = bids.get_datasource(dicomdir.parent, {'dcm2niix2bids': {}}) + datasource = bids.get_datasource(dicomdir.parent, {'dcm2niix2bids': Plugin({})}) assert datasource.is_datasource() assert datasource.dataformat == 'DICOM' @@ -323,6 +324,14 @@ def test_check_ignore(): assert bids.check_ignore('sub-01_foo.nii', bidsignore, 'file') is True +def test_sanitize(): + + assert bids.sanitize('<<>>') == '' + assert bids.sanitize('<<1>>') == '1' + assert bids.sanitize('@foo-bar.baz#') == 'foobarbaz' + assert bids.sanitize("Joe's reward_task") == 'Joesrewardtask' + + def test_find_run(study_bidsmap): # Load a bidsmap and create a duplicate dataformat section @@ -424,7 +433,7 @@ def test_exist_run(study_bidsmap): def test_insert_bidskeyval(): - bidsname = bids.insert_bidskeyval(Path('bids')/'sub-01'/'anat'/'sub-01_T1w', 'run', 1, True) + bidsname = bids.insert_bidskeyval(Path('bids')/'sub-01'/'anat'/'sub-01_T1w', 'run', '1', True) assert bidsname == Path('bids')/'sub-01'/'anat'/'sub-01_run-1_T1w' bidsname = bids.insert_bidskeyval(Path('bids')/'sub-01'/'anat'/'sub-01_run-2_T1w.nii', 'run', '', True) @@ -433,125 +442,80 @@ def test_insert_bidskeyval(): bidsname = bids.insert_bidskeyval('sub-01_foo-bar_T1w', 'foo', 'baz', True) assert bidsname == 'sub-01_T1w' - bidsname = bids.insert_bidskeyval('sub-01_foo-bar_T1w.nii', 'foo', 'baz', False) - assert bidsname == 'sub-01_foo-baz_T1w.nii' + bidsname = bids.insert_bidskeyval('anat/sub-01_foo-bar_T1w.nii', 'foo', 'baz', False) + assert bidsname == 'anat/sub-01_foo-baz_T1w.nii' -def test_increment_runindex__no_run1(tmp_path): - """Test if run-index is preserved or added to the bidsname""" +def test_increment_runindex(tmp_path): + """Test if run-index is preserved or added to the bidsname, files are renamed and scans-table updated""" + # Define the test data outfolder = tmp_path/'bids'/'sub-01'/'anat' + outfolder.mkdir(parents=True) + runless = 'sub-01_T1w' + run1 = 'sub-01_run-1_T1w' + run2 = 'sub-01_run-2_T1w' + run3 = 'sub-01_run-3_T1w' - # Test runindex is <<>>, so no run is added to the bidsname - bidsname = bids.increment_runindex(outfolder, 'sub-01_T1w', {'bids': {'run': '<<>>'}}) - assert bidsname == 'sub-01_T1w' + # ------- Tests with no existing data ------- - bidsname = bids.increment_runindex(outfolder, 'sub-01_run-1_T1w.nii', {'bids': {'run': '<<1>>'}}) - assert bidsname == 'sub-01_run-1_T1w.nii' + bidsname = bids.increment_runindex(outfolder, runless, Run({'bids': {'run': '<<>>'}})) + assert bidsname == runless - # Test runindex is <<2>>, so run-2 is preserved in the bidsname - bidsname = bids.increment_runindex(outfolder, 'sub-01_run-2_T1w.nii.gz', {'bids': {'run': '<<2>>'}}) - assert bidsname == 'sub-01_run-2_T1w.nii.gz' + bidsname = bids.increment_runindex(outfolder, run1 + '.nii', Run({'bids': {'run': '<<1>>'}})) + assert bidsname == run1 + '.nii' + bidsname = bids.increment_runindex(outfolder, run2 + '.nii.gz', Run({'bids': {'run': '<<2>>'}})) + assert bidsname == run2 + '.nii.gz' -def test_increment_runindex__runless_exist(tmp_path): - """Test run-index is <<>>, so run-2 is added to the bidsname""" + # ------- Tests with run-less data ------- # Create the run-less files - outfolder = tmp_path/'bids'/'sub-01'/'anat' - outfolder.mkdir(parents=True) for suffix in ('.nii.gz', '.json'): - (outfolder/'sub-01_T1w').with_suffix(suffix).touch() - - # Test run-index is <<>>, so the run-index is incremented - bidsname = bids.increment_runindex(outfolder, 'sub-01_T1w', {'bids': {'run': '<<>>'}}) - assert bidsname == 'sub-01_run-2_T1w' - - -def test_increment_runindex__runless_run2_exist(tmp_path): - """Test run-index is <<>>, so run-3 is added to the bidsname""" - - # Create the files - outfolder = tmp_path/'bids'/'sub-01'/'anat' - outfolder.mkdir(parents=True) + (outfolder/runless).with_suffix(suffix).touch() + + # Create the scans test tables + scans_data = {'filename': [f"anat/{runless}.nii.gz", 'anat/otherscan.nii.gz'], 'acq_time': ['runless', 'other']} + new_scans_data = {'filename': [f"anat/{run1}.nii.gz", 'anat/otherscan.nii.gz'], 'acq_time': ['runless', 'other']} + scans_table = pd.DataFrame(scans_data ).set_index('filename') + new_scans_table = pd.DataFrame(new_scans_data).set_index('filename') + + # Test renaming of run-less to run-1 + updating scans_table + bidsname = bids.increment_runindex(outfolder, runless, Run({'bids': {'run': '<<>>'}}), scans_table) + assert bidsname == run2 + assert scans_table.equals(new_scans_table) for suffix in ('.nii.gz', '.json'): - (outfolder/'sub-01_T1w').with_suffix(suffix).touch() - (outfolder/'sub-01_run-2_T1w').with_suffix(suffix).touch() - - # Test run-index is <<>>, so the run-index is incremented - bidsname = bids.increment_runindex(outfolder, 'sub-01_T1w.nii.gz', {'bids': {'run': '<<>>'}}) - assert bidsname == 'sub-01_run-3_T1w.nii.gz' + assert (outfolder/runless).with_suffix(suffix).is_file() is False + assert (outfolder/run1 ).with_suffix(suffix).is_file() is True + # We now have run-1 files only + bidsname = bids.increment_runindex(outfolder, run1, Run({'bids': {'run': '<<1>>'}})) + assert bidsname == run2 -def test_increment_runindex__run1_run2_exist(tmp_path): - """Test if run-3 is added to the bidsname""" + # ------- Tests with run-1 & run-2 data ------- - # Create the run-1 and run-2 files - outfolder = tmp_path/'bids'/'sub-01'/'anat' - outfolder.mkdir(parents=True) + # Create the run-2 files for suffix in ('.nii.gz', '.json'): - (outfolder/'sub-01_run-1_T1w').with_suffix(suffix).touch() - (outfolder/'sub-01_run-2_T1w').with_suffix(suffix).touch() + (outfolder/run2).with_suffix(suffix).touch() - # Test run-index is <<1>>, so the run-index is incremented - bidsname = bids.increment_runindex(outfolder, 'sub-01_run-1_T1w', {'bids': {'run': '<<1>>'}}) - assert bidsname == 'sub-01_run-3_T1w' + bidsname = bids.increment_runindex(outfolder, runless + '.nii.gz', Run({'bids': {'run': '<<>>'}}), scans_table) + assert bidsname == run3 + '.nii.gz' + assert scans_table.equals(new_scans_table) # -> Must remain untouched - # Test run-index is <>, so the run-index is untouched - bidsname = bids.increment_runindex(outfolder, 'sub-01_run-1_T1w', {'bids': {'run': '<>'}}) - assert bidsname == 'sub-01_run-1_T1w' + bidsname = bids.increment_runindex(outfolder, run1, Run({'bids': {'run': '<<1>>'}})) + assert bidsname == run3 - # Test run-index is 2, so the run-index is untouched - bidsname = bids.increment_runindex(outfolder, 'sub-01_run-2_T1w', {'bids': {'run': '2'}}) - assert bidsname == 'sub-01_run-2_T1w' + bidsname = bids.increment_runindex(outfolder, run1, Run({'bids': {'run': '<>'}})) + assert bidsname == run1 # -> Must remain untouched - -def test_rename_runless_to_run1(tmp_path): - """Test <<>> index renaming run-less files to run-1 files.""" - - # Create data - run = bids.create_run() - run['bids'] = {'run': '<<>>'} - matched_runs = [] - old_runless_bidsname = 'sub-01_T1w' - new_run1_bidsname = 'sub-01_run-1_T1w' - run2_bidsname = 'sub-01_run-2_T1w' - outfolder = tmp_path/'bids'/'sub-01'/'anat' - outfolder.mkdir(parents=True) - for suffix in ('.nii.gz', '.json'): - for file_name in (old_runless_bidsname, run2_bidsname): - outfile = (outfolder/file_name).with_suffix(suffix) - outfile.touch() - if suffix == '.nii.gz': - run['datasource'].targets.add(outfile) - matched_runs.append(run) - - # Create the scans table - scans_data = { - 'filename': ['anat/sub-01_T2w.nii.gz', f"anat/{old_runless_bidsname}.nii.gz", f"anat/{run2_bidsname}.nii.gz"], - 'acq_time': ['acq1', 'acq2', 'acq3'], - } - result_scans_data = { - 'filename': ['anat/sub-01_T2w.nii.gz', f"anat/{new_run1_bidsname}.nii.gz", f"anat/{run2_bidsname}.nii.gz"], - 'acq_time': ['acq1', 'acq2', 'acq3'], - } - scans_table = pd.DataFrame(scans_data).set_index('filename') - result_scans_table = pd.DataFrame(result_scans_data).set_index('filename') - - # Run the function - bids.rename_runless_to_run1(matched_runs, scans_table) - - # Check the results - assert result_scans_table.equals(scans_table) - for suffix in ('.nii.gz', '.json'): - assert (outfolder/old_runless_bidsname).with_suffix(suffix).is_file() is False - assert (outfolder/new_run1_bidsname).with_suffix(suffix).is_file() is True + bidsname = bids.increment_runindex(outfolder, run1, Run({'bids': {'run': '2'}})) + assert bidsname == run1 # -> Must remain untouched def test_get_bidsname(raw_dicomdir): dicomfile = raw_dicomdir/'Doe^Archibald'/'01-XR C Spine Comp Min 4 Views'/'001-Cervical LAT'/'6154' - run = {'datasource': bids.DataSource(dicomfile, {'dcm2niix2bids': {}}, 'DICOM')} + run = {'datasource': bids.DataSource(dicomfile, {'dcm2niix2bids': Plugin({})}, 'DICOM')} run['bids'] = {'acq':'py#dicom', 'foo@':'bar#123', 'run':'<>', 'suffix':'T0w'} bidsname = bids.get_bidsname('sub-001', 'ses-01', run, validkeys=False, cleanup=False) # Test default: runtime=False @@ -587,6 +551,7 @@ def test_get_bidsvalue(): bidsfile = 'sub-01_run-1_T1w.nii.gz' + assert bids.get_bidsvalue(bidsfile, 'run', '2') == 'sub-01_run-2_T1w.nii.gz' assert bids.get_bidsvalue(bidsfile, 'fallback', 'bar') == 'sub-01_acq-bar_run-1_T1w.nii.gz' @@ -598,7 +563,7 @@ def test_updatemetadata(dcm_file, tmp_path): sourcefile.with_suffix('.jsn').touch() with sourcefile.with_suffix('.json').open('w') as fid: json.dump({'PatientName': 'SourceTest'}, fid) - extdatasource = bids.DataSource(sourcefile, {'dcm2niix2bids': {}}, 'DICOM') + extdatasource = bids.DataSource(sourcefile, {'dcm2niix2bids': Plugin({})}, 'DICOM') # Create the metadata sidecar file outfolder = tmp_path/'bids'/'sub-001'/'ses-01'/'anat'