From cc14c655b4166ffdcbe88e81ab4cbb69f5b9ff6d Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Wed, 26 Mar 2025 12:59:39 -0400 Subject: [PATCH 1/9] Made cache path validator container agnostic --- nf_core/pipelines/download.py | 4 ++-- nf_core/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index 3d371ca68..3b27a2cfe 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -31,7 +31,7 @@ from nf_core.utils import ( NFCORE_CACHE_DIR, NFCORE_DIR, - SingularityCacheFilePathValidator, + CacheFilePathValidator, ) log = logging.getLogger(__name__) @@ -554,7 +554,7 @@ def prompt_singularity_cachedir_remote(self): while cachedir_index is None: prompt_cachedir_index = questionary.path( "Specify a list of the container images that are already present on the remote system:", - validate=SingularityCacheFilePathValidator, + validate=CacheFilePathValidator, style=nf_core.utils.nfcore_question_style, ).unsafe_ask() cachedir_index = os.path.abspath(os.path.expanduser(prompt_cachedir_index)) diff --git a/nf_core/utils.py b/nf_core/utils.py index 3c347d145..5d31b387d 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -993,9 +993,9 @@ def prompt_pipeline_release_branch( ) -class SingularityCacheFilePathValidator(questionary.Validator): +class CacheFilePathValidator(questionary.Validator): """ - Validator for file path specified as --singularity-cache-index argument in nf-core pipelines download + Validator for file path specified for example through the --singularity-cache-index argument in nf-core pipelines download """ def validate(self, value): From a6e187614586f599747e9e23a1edaff9ecac75e0 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Wed, 26 Mar 2025 13:09:01 -0400 Subject: [PATCH 2/9] Added cache env variable for container agnostic functionality --- nf_core/pipelines/download.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index 3b27a2cfe..e0a71a4d2 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -146,6 +146,12 @@ def __init__( self.additional_tags = None # Specifying a cache index or container library implies that containers should be downloaded. self.container_system = "singularity" if container_cache_index or bool(container_library) else container_system + if container_system == "singularity": + self.container_cache_env = "NXF_SINGULARITY_CACHEDIR" + elif container_system == "docker": + self.container_cache_env = "NXF_DOCKER_CACHEDIR" + else: + self.container_cache_env = None # Manually specified container library (registry) if isinstance(container_library, str) and bool(len(container_library)): self.container_library = [container_library] From 797fbd305e93f97d7f20f6a52c2fd34e85f6d095 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Wed, 26 Mar 2025 13:09:58 -0400 Subject: [PATCH 3/9] Added docker pull progress object --- nf_core/pipelines/download.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index e0a71a4d2..35d17ce1e 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -80,6 +80,12 @@ def get_renderables(self): "[blue]{task.fields[current_log]}", rich.progress.BarColumn(bar_width=None), ) + if task.fields.get("progress_type") == "docker_pull": + self.columns = ( + "[magenta]{task.description}", + "[blue]{task.fields[current_log]}", + rich.progress.BarColumn(bar_width=None), + ) yield self.make_tasks_table([task]) From 5a07f28b4f68208235422cd7c6a5a209703502b0 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Wed, 26 Mar 2025 13:13:52 -0400 Subject: [PATCH 4/9] Migrate singularity specific functions to container agnostic functions --- nf_core/pipelines/download.py | 242 ++++++++++++++++++---------------- 1 file changed, 131 insertions(+), 111 deletions(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index 35d17ce1e..e6d7577d1 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -203,10 +203,11 @@ def download_workflow(self): if not self.container_cache_utilisation == "remote": self.prompt_container_download() else: - self.container_system = "singularity" - self.prompt_singularity_cachedir_creation() - self.prompt_singularity_cachedir_utilization() - self.prompt_singularity_cachedir_remote() + if not self.container_system: + self.container_system = "singularity" + self.prompt_cachedir_creation() + self.prompt_cachedir_utilization() + self.prompt_cachedir_remote() # Nothing meaningful to compress here. if not self.platform: self.prompt_compression_type() @@ -334,7 +335,7 @@ def download_workflow_platform(self, location=None): self.gather_registries(self.workflow_repo.access()) try: - self.get_singularity_images(current_revision=revision) + self.get_container_images(current_revision=revision) except OSError as e: raise DownloadError(f"[red]{e}[/]") from e @@ -451,23 +452,19 @@ def prompt_container_download(self): stderr.print("\nIn addition to the pipeline code, this tool can download software containers.") self.container_system = questionary.select( "Download software container images:", - choices=["none", "singularity"], + choices=["none", "singularity","docker"], style=nf_core.utils.nfcore_question_style, ).unsafe_ask() - def prompt_singularity_cachedir_creation(self): - """Prompt about using $NXF_SINGULARITY_CACHEDIR if not already set""" - if ( - self.container_system == "singularity" - and os.environ.get("NXF_SINGULARITY_CACHEDIR") is None - and stderr.is_interactive # Use rich auto-detection of interactive shells - ): + def prompt_cachedir_creation(self): + """Prompt about using the container cachedir if not already set""" + if (os.environ.get(self.container_cache_env) is None and stderr.is_interactive): # Use rich auto-detection of interactive shells stderr.print( - "\nNextflow and nf-core can use an environment variable called [blue]$NXF_SINGULARITY_CACHEDIR[/] that is a path to a directory where remote Singularity images are stored. " + f"\nNextflow and nf-core can use an environment variable called [blue]${self.container_cache_env}[/] that is a path to a directory where remote Singularity images are stored. " "This allows downloaded images to be cached in a central location." ) if rich.prompt.Confirm.ask( - "[blue bold]?[/] [bold]Define [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] for a shared Singularity image download folder?[/]" + f"[blue bold]?[/] [bold]Define [blue not bold]${self.container_cache_env}/] for a shared Singularity image download folder?[/]" ): if not self.container_cache_index: self.container_cache_utilisation == "amend" # retain "remote" choice. @@ -481,83 +478,97 @@ def prompt_singularity_cachedir_creation(self): ).unsafe_ask() cachedir_path = os.path.abspath(os.path.expanduser(prompt_cachedir_path)) if prompt_cachedir_path == "": - log.error("Not using [blue]$NXF_SINGULARITY_CACHEDIR[/]") + log.error(f"Not using [blue]${self.container_cache_env}/]") cachedir_path = False elif not os.path.isdir(cachedir_path): log.error(f"'{cachedir_path}' is not a directory.") cachedir_path = None if cachedir_path: - os.environ["NXF_SINGULARITY_CACHEDIR"] = cachedir_path + os.environ[{self.container_cache_env}] = cachedir_path """ Optionally, create a permanent entry for the NXF_SINGULARITY_CACHEDIR in the terminal profile. Currently support for bash and zsh. ToDo: "sh", "dash", "ash","csh", "tcsh", "ksh", "fish", "cmd", "powershell", "pwsh"? """ - - if os.getenv("SHELL", "") == "/bin/bash": - shellprofile_path = os.path.expanduser("~/~/.bash_profile") - if not os.path.isfile(shellprofile_path): - shellprofile_path = os.path.expanduser("~/.bashrc") + if self.container_system == "singularity": + if os.getenv("SHELL", "") == "/bin/bash": + shellprofile_path = os.path.expanduser("~/~/.bash_profile") if not os.path.isfile(shellprofile_path): - shellprofile_path = False - elif os.getenv("SHELL", "") == "/bin/zsh": - shellprofile_path = os.path.expanduser("~/.zprofile") - if not os.path.isfile(shellprofile_path): - shellprofile_path = os.path.expanduser("~/.zshenv") + shellprofile_path = os.path.expanduser("~/.bashrc") + if not os.path.isfile(shellprofile_path): + shellprofile_path = False + elif os.getenv("SHELL", "") == "/bin/zsh": + shellprofile_path = os.path.expanduser("~/.zprofile") + if not os.path.isfile(shellprofile_path): + shellprofile_path = os.path.expanduser("~/.zshenv") + if not os.path.isfile(shellprofile_path): + shellprofile_path = False + else: + shellprofile_path = os.path.expanduser("~/.profile") if not os.path.isfile(shellprofile_path): shellprofile_path = False - else: - shellprofile_path = os.path.expanduser("~/.profile") - if not os.path.isfile(shellprofile_path): - shellprofile_path = False - - if shellprofile_path: - stderr.print( - f"\nSo that [blue]$NXF_SINGULARITY_CACHEDIR[/] is always defined, you can add it to your [blue not bold]~/{os.path.basename(shellprofile_path)}[/] file ." - "This will then be automatically set every time you open a new terminal. We can add the following line to this file for you: \n" - f'[blue]export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"[/]' - ) - append_to_file = rich.prompt.Confirm.ask( - f"[blue bold]?[/] [bold]Add to [blue not bold]~/{os.path.basename(shellprofile_path)}[/] ?[/]" - ) - if append_to_file: - with open(os.path.expanduser(shellprofile_path), "a") as f: - f.write( - "\n\n#######################################\n" - f"## Added by `nf-core pipelines download` v{nf_core.__version__} ##\n" - + f'export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"' - + "\n#######################################\n" - ) - log.info(f"Successfully wrote to [blue]{shellprofile_path}[/]") - log.warning( - "You will need reload your terminal after the download completes for this to take effect." + + if shellprofile_path + stderr.print( + f"\nSo that [blue]$NXF_SINGULARITY_CACHEDIR[/] is always defined, you can add it to your [blue not bold]~/{os.path.basename(shellprofile_path)}[/] file ." + "This will then be automatically set every time you open a new terminal. We can add the following line to this file for you: \n" + f'[blue]export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"[/]' ) + append_to_file = rich.prompt.Confirm.ask( + f"[blue bold]?[/] [bold]Add to [blue not bold]~/{os.path.basename(shellprofile_path)}[/] ?[/]" + ) + if append_to_file: + with open(os.path.expanduser(shellprofile_path), "a") as f: + f.write( + "\n\n#######################################\n" + f"## Added by `nf-core pipelines download` v{nf_core.__version__} ##\n" + + f'export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"' + + "\n#######################################\n" + ) + log.info(f"Successfully wrote to [blue]{shellprofile_path}[/]") + log.warning( + "You will need reload your terminal after the download completes for this to take effect." + ) - def prompt_singularity_cachedir_utilization(self): - """Ask if we should *only* use $NXF_SINGULARITY_CACHEDIR without copying into target""" + def prompt_cachedir_utilization(self): + """Ask if we should *only* use the container cachedir without copying into target""" if ( - self.container_cache_utilisation is None # no choice regarding singularity cache has been made. - and self.container_system == "singularity" - and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None + self.container_cache_utilisation is None # no choice regarding cache has been made. and stderr.is_interactive + and self.container_cache_env is not None + and os.environ.get(self.container_cache_env) is not None ): - stderr.print( - "\nIf you are working on the same system where you will run Nextflow, you can amend the downloaded images to the ones in the" - "[blue not bold]$NXF_SINGULARITY_CACHEDIR[/] folder, Nextflow will automatically find them. " - "However if you will transfer the downloaded files to a different system then they should be copied to the target folder." - ) + if self.container_system == "singularity": + stderr.print( + "\nIf you are working on the same system where you will run Nextflow, you can amend the downloaded images to the ones in the" + "[blue not bold]$NXF_SINGULARITY_CACHEDIR[/] folder, Nextflow will automatically find them. " + "However if you will transfer the downloaded files to a different system then they should be copied to the target folder." + ) + self.container_cache_utilisation = questionary.select( + "Copy singularity images from $NXF_SINGULARITY_CACHEDIR to the target folder or amend new images to the cache?", + choices=["amend", "copy"], + style=nf_core.utils.nfcore_question_style, + ).unsafe_ask() + if ( + self.container_system == "docker" + ): + stderr.print( + "\nIf you are working on the same system where you will run Nextflow, you can amend the downloaded images with the ones in the" + "Docker local cache, Nextflow will automatically find them. " + "However if you will transfer the downloaded files to a different system then they should be moved before a run." + "Then you should load them into your docker via the command `ls -1 [docker_cache_folder]/*.tar | xargs --no-run-if-empty -L 1 docker load -i`" + ) self.container_cache_utilisation = questionary.select( - "Copy singularity images from $NXF_SINGULARITY_CACHEDIR to the target folder or amend new images to the cache?", + f"Copy container images from {self.container_cache_env} to the target folder or amend new images to the cache?", choices=["amend", "copy"], style=nf_core.utils.nfcore_question_style, ).unsafe_ask() - def prompt_singularity_cachedir_remote(self): - """Prompt about the index of a remote $NXF_SINGULARITY_CACHEDIR""" + def prompt_cachedir_remote(self): + """Prompt about the index of a remote container cachedir""" if ( - self.container_system == "singularity" - and self.container_cache_utilisation == "remote" + self.container_cache_utilisation == "remote" and self.container_cache_index is None and stderr.is_interactive # Use rich auto-detection of interactive shells ): @@ -1155,8 +1166,8 @@ def symlink_singularity_images(self, image_out_path: str) -> None: finally: os.close(image_dir) - def get_singularity_images(self, current_revision: str = "") -> None: - """Loop through container names and download Singularity images""" + def get_container_images(self, current_revision: str = "") -> None: + """Loop through container names and download container images""" if len(self.containers) == 0: log.info("No container names found in workflow") @@ -1179,7 +1190,7 @@ def get_singularity_images(self, current_revision: str = "") -> None: containers_pull: List[Tuple[str, str, Optional[str]]] = [] for container in self.containers: # Fetch the output and cached filenames for this container - out_path, cache_path = self.singularity_image_filenames(container) + out_path, cache_path = self.check_image_cache(container) # Check that the directories exist out_path_dir = os.path.dirname(out_path) @@ -1197,7 +1208,7 @@ def get_singularity_images(self, current_revision: str = "") -> None: containers_exist.append(container) continue - # We have a copy of this in the NXF_SINGULARITY_CACHE dir + # We have a copy of this in the cache dir if cache_path and os.path.exists(cache_path): containers_cache.append((container, out_path, cache_path)) continue @@ -1229,18 +1240,18 @@ def get_singularity_images(self, current_revision: str = "") -> None: if containers_cache: for container in containers_cache: - progress.update(task, description="Copying singularity images from cache") - self.singularity_copy_cache_image(*container) + progress.update(task, description=f"Copying {self.container_system} images from cache") + self.copy_cache_image(*container) progress.update(task, advance=1) if containers_download or containers_pull: # if clause gives slightly better UX, because Download is no longer displayed if nothing is left to be downloaded. with concurrent.futures.ThreadPoolExecutor(max_workers=self.parallel_downloads) as pool: - progress.update(task, description="Downloading singularity images") + progress.update(task, description=f"Downloading {self.container_system} images") # Kick off concurrent downloads future_downloads = [ - pool.submit(self.singularity_download_image, *containers, progress) + pool.submit(self.download_image, *containers, progress) for containers in containers_download ] @@ -1267,12 +1278,12 @@ def get_singularity_images(self, current_revision: str = "") -> None: raise for containers in containers_pull: - progress.update(task, description="Pulling singularity images") + progress.update(task, description=f"Pulling {self.container_system} images") # it is possible to try multiple registries / mirrors if multiple were specified. # Iteration happens over a copy of self.container_library[:], as I want to be able to remove failing registries for subsequent images. for library in self.container_library[:]: try: - self.singularity_pull_image(*containers, library, progress) + self.pull_image(*containers, library, progress) # Pulling the image was successful, no ContainerError was raised, break the library loop break except ContainerError.ImageExistsError: @@ -1314,8 +1325,8 @@ def get_singularity_images(self, current_revision: str = "") -> None: # Task should advance in any case. Failure to pull will not kill the download process. progress.update(task, advance=1) - def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str]]: - """Check Singularity cache for image, copy to destination folder if found. + def check_image_cache(self, container: str) -> Tuple[str, Optional[str]]: + """Check container cache for image, copy to destination folder if found. Args: container (str): A pipeline's container name. Can be direct download URL @@ -1323,10 +1334,10 @@ def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str Returns: tuple (str, str): Returns a tuple of (out_path, cache_path). - out_path is the final target output path. it may point to the NXF_SINGULARITY_CACHEDIR, if cache utilisation was set to 'amend'. - If cache utilisation was set to 'copy', it will point to the target folder, a subdirectory of the output directory. In the latter case, - cache_path may either be None (image is not yet cached locally) or point to the image in the NXF_SINGULARITY_CACHEDIR, so it will not be - downloaded from the web again, but directly copied from there. See get_singularity_images() for implementation. + out_path is the final target output path. it may point to the NXF_SINGULARITY_CACHEDIR/NXF_DOCKER_CACHEDIR, if cache utilization was set to 'amend'. + If cache utilization was set to 'copy', it will point to the target folder, a subdirectory of the output directory. In the latter case, + cache_path may either be None (image is not yet cached locally) or point to the image in the NXF_SINGULARITY_CACHEDIR/NXF_DOCKER_CACHEDIR, so it will not be + downloaded from the web again, but directly copied from there. See get_container_images() for implementation. """ # Generate file paths @@ -1358,32 +1369,33 @@ def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str out_name = re.sub(f"{trim_pattern}", "", out_name) # Full destination and cache paths - out_path = os.path.abspath(os.path.join(self.outdir, "singularity-images", out_name)) + out_path = os.path.abspath(os.path.join(self.outdir, f"{self.container_system}-images", out_name)) cache_path = None - if os.environ.get("NXF_SINGULARITY_CACHEDIR"): - cache_path = os.path.join(os.environ["NXF_SINGULARITY_CACHEDIR"], out_name) + if os.environ.get(self.container_cache_env): + cache_path = os.path.join(os.environ[self.container_cache_env], out_name) # Use only the cache - set this as the main output path if self.container_cache_utilisation == "amend": out_path = cache_path cache_path = None elif self.container_cache_utilisation in ["amend", "copy"]: - raise FileNotFoundError("Singularity cache is required but no '$NXF_SINGULARITY_CACHEDIR' set!") + raise FileNotFoundError(f"Singularity or Docker cache is required but no '${self.container_cache_env} set!") return (out_path, cache_path) - def singularity_copy_cache_image(self, container: str, out_path: str, cache_path: Optional[str]) -> None: - """Copy Singularity image from NXF_SINGULARITY_CACHEDIR to target folder.""" + def copy_cache_image(self, container: str, out_path: str, cache_path: Optional[str]) -> None: + """Copy container image from the cache env to target folder.""" # Copy to destination folder if we have a cached version if cache_path and os.path.exists(cache_path): log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") shutil.copyfile(cache_path, out_path) # Create symlinks to ensure that the images are found even with different registries being used. - self.symlink_singularity_images(out_path) + if self.container_system == 'singularity': + self.symlink_singularity_images(out_path) - def singularity_download_image( + def download_image( self, container: str, out_path: str, cache_path: Optional[str], progress: DownloadProgress ) -> None: - """Download a singularity image from the web. + """Download a container image from the web. Use native Python to download the file. @@ -1457,12 +1469,12 @@ def singularity_download_image( finally: del output_path_tmp - def singularity_pull_image( + def pull_image( self, container: str, out_path: str, cache_path: Optional[str], library: List[str], progress: DownloadProgress ) -> None: - """Pull a singularity image using ``singularity pull`` + """Pull a container image using ``singularity/docker/apptainer pull`` - Attempt to use a local installation of singularity to pull the image. + Attempt to use a local installation of singularity/docker/apptainer to pull the image. Args: container (str): A pipeline's container name. Usually it is of similar format @@ -1490,34 +1502,42 @@ def singularity_pull_image( else: address = f"docker://{library}/{container.replace('docker://', '')}" absolute_URI = False - - if shutil.which("singularity"): - singularity_command = [ - "singularity", - "pull", - "--name", - output_path, - address, - ] - elif shutil.which("apptainer"): - singularity_command = ["apptainer", "pull", "--name", output_path, address] + pull_command = None + if self.container_system == "singularity": + if shutil.which("singularity"): + pull_command = [ + "singularity", + "pull", + "--name", + output_path, + address, + ] + elif shutil.which("apptainer"): + pull_command = ["apptainer", "pull", "--name", output_path, address] + else: + raise OSError("Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH") + # Pull the docker image, if we caching is enabled save the image as well + elif self.container_system == "docker": + pull_command = ["docker", "image", "pull",address] + if self.container_cache_utilisation: + pull_command += ["docker","save","image","--output",output_path] else: - raise OSError("Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH") - log.debug(f"Building singularity image: {address}") - log.debug(f"Singularity command: {' '.join(singularity_command)}") + log.error("Container system not recognized") + log.debug(f"Building {self.container_system} image: {address}") + log.debug(f"{self.container_system} command: {' '.join(pull_command)}") # Progress bar to show that something is happening task = progress.add_task( container, start=False, total=False, - progress_type="singularity_pull", + progress_type=f"{self.container_system}_pull", current_log="", ) # Run the singularity pull command with subprocess.Popen( - singularity_command, + pull_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, From a4484c2564951ac246f58ace28609af8415ff0a2 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Wed, 26 Mar 2025 13:15:56 -0400 Subject: [PATCH 5/9] Add switch and update log messages for docker support --- nf_core/pipelines/download.py | 54 +++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index e6d7577d1..fd2cf2257 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -291,7 +291,7 @@ def download_workflow_static(self): raise DownloadError("Error editing pipeline config file to use local configs!") from e # Collect all required singularity images - if self.container_system == "singularity": + if self.container_system: self.find_container_images(os.path.join(self.outdir, revision_dirname)) self.gather_registries(os.path.join(self.outdir, revision_dirname)) @@ -326,7 +326,7 @@ def download_workflow_platform(self, location=None): self.workflow_repo.bare_clone(os.path.join(self.outdir, self.output_filename)) # extract the required containers - if self.container_system == "singularity": + if self.container_system: for revision, commit in self.wf_sha.items(): # Checkout the repo in the current revision self.workflow_repo.checkout(commit) @@ -582,7 +582,7 @@ def prompt_cachedir_remote(self): ).unsafe_ask() cachedir_index = os.path.abspath(os.path.expanduser(prompt_cachedir_index)) if prompt_cachedir_index == "": - log.error("Will disregard contents of a remote [blue]$NXF_SINGULARITY_CACHEDIR[/]") + log.error(f"Will disregard contents of a remote [blue]${self.self.container_cache_env}[/]") self.container_cache_index = None self.container_cache_utilisation = "copy" elif not os.access(cachedir_index, os.R_OK): @@ -612,14 +612,14 @@ def read_remote_containers(self): raise LookupError("Could not find valid container names in the index file.") self.containers_remote = sorted(list(set(self.containers_remote))) except (FileNotFoundError, LookupError) as e: - log.error(f"[red]Issue with reading the specified remote $NXF_SINGULARITY_CACHE index:[/]\n{e}\n") + log.error(f"[red]Issue with reading the specified remote ${self.container_cache_env} index:[/]\n{e}\n") if stderr.is_interactive and rich.prompt.Confirm.ask("[blue]Specify a new index file and try again?"): self.container_cache_index = None # reset chosen path to index file. - self.prompt_singularity_cachedir_remote() + self.prompt_cachedir_remote() else: - log.info("Proceeding without consideration of the remote $NXF_SINGULARITY_CACHE index.") + log.info(f"Proceeding without consideration of the remote ${self.container_cache_env} index.") self.container_cache_index = None - if os.environ.get("NXF_SINGULARITY_CACHEDIR"): + if os.environ.get(self.container_cache_env): self.container_cache_utilisation = "copy" # default to copy if possible, otherwise skip. else: self.container_cache_utilisation = None @@ -1218,15 +1218,21 @@ def get_container_images(self, current_revision: str = "") -> None: containers_download.append((container, out_path, cache_path)) continue - # Pull using singularity + # Pull using singularity or docker containers_pull.append((container, out_path, cache_path)) - # Exit if we need to pull images and Singularity is not installed + # Exit if we need to pull images and Singularity/Docker is not installed if len(containers_pull) > 0: - if not (shutil.which("singularity") or shutil.which("apptainer")): - raise OSError( - "Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH" - ) + if self.container_system == "singularity": + if not (shutil.which("singularity") or shutil.which("apptainer")): + raise OSError( + "Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH" + ) + if self.container_system == "docker": + if not shutil.which("docker"): + raise OSError( + "Docker is needed to pull images, but it is not installed or not in $PATH" + ) if containers_exist: if self.container_cache_index is not None: @@ -1403,10 +1409,10 @@ def download_image( container (str): A pipeline's container name. Usually it is of similar format to ``https://depot.galaxyproject.org/singularity/name:version`` out_path (str): The final target output path - cache_path (str, None): The NXF_SINGULARITY_CACHEDIR path if set, None if not + cache_path (str, None): The NXF_SINGULARITY_CACHEDIR/NXF_DOCKER_CACHEDIR path if set, None if not progress (Progress): Rich progress bar instance to add tasks to. """ - log.debug(f"Downloading Singularity image: '{container}'") + log.debug(f"Downloading {self.container_system} image: '{container}'") # Set output path to save file to output_path = cache_path or out_path @@ -1447,10 +1453,12 @@ def download_image( log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") progress.update(task, description="Copying from cache to target directory") shutil.copyfile(cache_path, out_path) - self.symlink_singularity_images(cache_path) # symlinks inside the cache directory + if self.container_system == "singularity": + self.symlink_singularity_images(cache_path) # symlinks inside the cache directory # Create symlinks to ensure that the images are found even with different registries being used. - self.symlink_singularity_images(out_path) + if self.container_system == "singularity": + self.symlink_singularity_images(out_path) progress.remove_task(task) @@ -1459,7 +1467,7 @@ def download_image( for t in progress.task_ids: progress.remove_task(t) # Try to delete the incomplete download - log.debug(f"Deleting incompleted singularity image download:\n'{output_path_tmp}'") + log.debug(f"Deleting incompleted {self.container_system} image download:\n'{output_path_tmp}'") if output_path_tmp and os.path.exists(output_path_tmp): os.remove(output_path_tmp) if output_path and os.path.exists(output_path): @@ -1482,13 +1490,13 @@ def pull_image( library (list of str): A list of libraries to try for pulling the image. Raises: - Various exceptions possible from `subprocess` execution of Singularity. + Various exceptions possible from `subprocess` execution of Singularity/Docker/Apptainer. """ output_path = cache_path or out_path # where the output of 'singularity pull' is first generated before being copied to the NXF_SINGULARITY_CACHDIR. # if not defined by the Singularity administrators, then use the temporary directory to avoid storing the images in the work directory. - if os.environ.get("SINGULARITY_CACHEDIR") is None: + if os.environ.get("SINGULARITY_CACHEDIR") is None and self.container_system == "singularity": os.environ["SINGULARITY_CACHEDIR"] = str(NFCORE_CACHE_DIR) # Sometimes, container still contain an explicit library specification, which @@ -1568,10 +1576,12 @@ def pull_image( log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") progress.update(task, current_log="Copying from cache to target directory") shutil.copyfile(cache_path, out_path) - self.symlink_singularity_images(cache_path) # symlinks inside the cache directory + if self.container_system == 'singularity': + self.symlink_singularity_images(cache_path) # symlinks inside the cache directory # Create symlinks to ensure that the images are found even with different registries being used. - self.symlink_singularity_images(out_path) + if self.container_system == 'singularity': + self.symlink_singularity_images(out_path) progress.remove_task(task) From f0d94aef297c06f72f53aad56f5ff29da70d0331 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Wed, 26 Mar 2025 13:17:01 -0400 Subject: [PATCH 6/9] Add docker cache creation using save --- nf_core/pipelines/download.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index fd2cf2257..dc8c76e5d 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -596,15 +596,17 @@ def prompt_cachedir_remote(self): def read_remote_containers(self): """Reads the file specified as index for the remote Singularity cache dir""" if ( - self.container_system == "singularity" - and self.container_cache_utilisation == "remote" + self.container_cache_utilisation == "remote" and self.container_cache_index is not None ): n_total_images = 0 try: with open(self.container_cache_index) as indexfile: for line in indexfile.readlines(): - match = re.search(r"([^\/\\]+\.img)", line, re.S) + match_str = r"([^\/\\]+\.img)" + if self.container_system == "docker": + match_str = r"([^\/\\]+\.tar)" + match = re.search(match_str, line, re.S) if match: n_total_images += 1 self.containers_remote.append(match.group(0)) @@ -1354,12 +1356,17 @@ def check_image_cache(self, container: str) -> Tuple[str, Optional[str]]: out_name = re.sub(r"^.*:\/\/", "", out_name) # Detect file extension extension = ".img" - if ".sif:" in out_name: - extension = ".sif" - out_name = out_name.replace(".sif:", "-") - elif out_name.endswith(".sif"): - extension = ".sif" - out_name = out_name[:-4] + if self.container_system == "singularity": + if ".sif:" in out_name: + extension = ".sif" + out_name = out_name.replace(".sif:", "-") + elif out_name.endswith(".sif"): + extension = ".sif" + out_name = out_name[:-4] + if self.container_system == "docker": + if out_name.endswith(".tar"): + extension = ".tar" + out_name = out_name[:-4] # Strip : and / characters out_name = out_name.replace("/", "-").replace(":", "-") # Add file extension From a80c1ad0a4805fc39b7ac70ed904ddb015b25f81 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Wed, 26 Mar 2025 13:20:00 -0400 Subject: [PATCH 7/9] Parse docker error messages from pull --- nf_core/pipelines/download.py | 40 +++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index dc8c76e5d..6eab64a51 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -1563,20 +1563,32 @@ def pull_image( for line in proc.stdout: lines.append(line) progress.update(task, current_log=line.strip()) - - if lines: - # something went wrong with the container retrieval - if any("FATAL: " in line for line in lines): - progress.remove_task(task) - raise ContainerError( - container=container, - registry=library, - address=address, - absolute_URI=absolute_URI, - out_path=out_path if out_path else cache_path or "", - singularity_command=singularity_command, - error_msg=lines, - ) + if self.container_system == "singularity": + if lines: + # something went wrong with the container retrieval + if any("FATAL: " in line for line in lines): + progress.remove_task(task) + raise ContainerError( + container=container, + registry=library, + address=address, + absolute_URI=absolute_URI, + out_path=out_path if out_path else cache_path or "", + singularity_command=pull_command, + error_msg=lines, + ) + if self.container_system == "docker": + if lines: + if any ("Error response from daemon: " in line for line in lines): + raise ContainerError( + container=container, + registry=library, + address=address, + absolute_URI=absolute_URI, + out_path=out_path if out_path else cache_path or "", + singularity_command=pull_command, + error_msg=lines, + ) # Copy cached download if we are using the cache if cache_path: From 535b8fca0ef0c0ed80209b5f2e1b119edadb9e01 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Wed, 26 Mar 2025 13:54:47 -0400 Subject: [PATCH 8/9] Fixed syntax error --- nf_core/pipelines/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index 6eab64a51..6503cfaa2 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -509,7 +509,7 @@ def prompt_cachedir_creation(self): if not os.path.isfile(shellprofile_path): shellprofile_path = False - if shellprofile_path + if shellprofile_path: stderr.print( f"\nSo that [blue]$NXF_SINGULARITY_CACHEDIR[/] is always defined, you can add it to your [blue not bold]~/{os.path.basename(shellprofile_path)}[/] file ." "This will then be automatically set every time you open a new terminal. We can add the following line to this file for you: \n" From 2977cf56379074e686a33fe448b137d42693ddaa Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Wed, 26 Mar 2025 14:21:08 -0400 Subject: [PATCH 9/9] Update pull command default --- nf_core/pipelines/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index 6503cfaa2..998006a12 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -1517,7 +1517,7 @@ def pull_image( else: address = f"docker://{library}/{container.replace('docker://', '')}" absolute_URI = False - pull_command = None + pull_command = [] if self.container_system == "singularity": if shutil.which("singularity"): pull_command = [