diff --git a/.github/actions/build_container/action.yaml b/.github/actions/build_container/action.yaml index 999aca9..34ad796 100644 --- a/.github/actions/build_container/action.yaml +++ b/.github/actions/build_container/action.yaml @@ -21,12 +21,6 @@ inputs: description: Extra args to pass to buildah required: false default: '' - DOCKERHUB_USER: - description: Username for Dockerhub authentication - required: true - DOCKERHUB_PASSWORD: - description: Password for Dockerhub authentication - required: true GHCR_USER: description: Username for GHCR authentication required: true @@ -55,7 +49,6 @@ runs: echo "${{ inputs.SPACK_DEPLOYMENT_KEY_PUB }}" > ${{ inputs.BUILD_PATH }}/key.pub echo "${{ inputs.SPACK_DEPLOYMENT_KEY_PRIVATE }}" > ${{ inputs.BUILD_PATH }}/key aws ecr get-login-password --region us-east-1 | buildah login --username AWS --password-stdin ${{ inputs.AWS_ECR_URL }} - buildah login --username ${{ inputs.DOCKERHUB_USER }} --password ${{ inputs.DOCKERHUB_PASSWORD }} docker.io buildah login --username ${{ inputs.GHCR_USER }} --password ${{ inputs.GHCR_TOKEN }} ghcr.io # This is written like that in case $BUILDAH_EXTRA_ARGS has args that require spaces, # which is tricky with shell variable expansion. Similar to Kaniko, see also: diff --git a/.github/workflows/spacktainer.yaml b/.github/workflows/spacktainer.yaml index 5aa96cd..7e5152f 100644 --- a/.github/workflows/spacktainer.yaml +++ b/.github/workflows/spacktainer.yaml @@ -28,8 +28,6 @@ jobs: --label ch.epfl.bbpgitlab.ci-commit-branch="$GITHUB_REF_NAME" --build-arg SPACK_BRANCH=develop # ' --label org.opencontainers.image.created="$CI_JOB_STARTED_AT"' - DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }} - DOCKERHUB_PASSWORD: ${{ secrets.DOCKERHUB_PASSWORD }} SPACK_DEPLOYMENT_KEY_PUB: ${{ secrets.SPACK_DEPLOYMENT_KEY_PUB }} SPACK_DEPLOYMENT_KEY_PRIVATE: ${{ secrets.SPACK_DEPLOYMENT_KEY_PRIVATE }} runtime-container-job: @@ -58,8 +56,6 @@ jobs: --label ch.epfl.bbpgitlab.ci-commit-branch="$GITHUB_REF_NAME" --build-arg SPACK_BRANCH=develop # ' --label org.opencontainers.image.created="$CI_JOB_STARTED_AT"' - DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }} - DOCKERHUB_PASSWORD: ${{ secrets.DOCKERHUB_PASSWORD }} SPACK_DEPLOYMENT_KEY_PUB: ${{ secrets.SPACK_DEPLOYMENT_KEY_PUB }} SPACK_DEPLOYMENT_KEY_PRIVATE: ${{ secrets.SPACK_DEPLOYMENT_KEY_PRIVATE }} spacktainer-build-job: @@ -115,7 +111,5 @@ jobs: --build-arg MIRROR_AUTH_ARG="\"--s3-access-key-id='${{ secrets.AWS_CACHE_ACCESS_KEY_ID }} --s3-access-key-secret=${{ secrets.AWS_CACHE_SECRET_ACCESS_KEY }}'\"" # ' --label org.opencontainers.image.created="$CI_JOB_STARTED_AT"' - DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }} - DOCKERHUB_PASSWORD: ${{ secrets.DOCKERHUB_PASSWORD }} SPACK_DEPLOYMENT_KEY_PUB: ${{ secrets.SPACK_DEPLOYMENT_KEY_PUB }} SPACK_DEPLOYMENT_KEY_PRIVATE: ${{ secrets.SPACK_DEPLOYMENT_KEY_PRIVATE }} diff --git a/README.md b/README.md index ab2b960..09ee518 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,8 @@ This repository aims to be the one-stop shop for all of our container needs. ## Defining containers -The only files you should have to edit as an end-user are located in the `container_definitions` folder. There's a subfolder per architecture (currently supported: `amd64` and `arm64`) under which both `yaml` (in subdirectories) and `def` files can live. -* A YAML file file defines a Spack container - in it you can define the Spack specs as you would in a Spack environment. If you have specific requirements for dependencies, you can add `spack: packages: ...` keys to define those, again, as in a Spack environment. +The only files you should have to edit as an end-user are located in the `container_definitions` folder. There's a subfolder per architecture (currently supported: `amd64` and `arm64`) under which both `spack.yaml` (in subdirectories) and `def` files can live. +* A `spack.yaml` file file defines a Spack container - in it you can define the Spack specs as you would in a Spack environment. If you have specific requirements for dependencies, you can add `spack: packages: ...` keys to define those, again, as in a Spack environment. * A def file defines a singularity container that will be built from an existing container on docker-hub. nexus-storage is already defined for amd64 as an example. In both cases, the filename will be used as the name of your container. In case of a YAML file, the container version will be derived from the first package in your spec. In case of a def file, the version will be the same as the tag on docker hub. @@ -20,7 +20,6 @@ In both cases, the filename will be used as the name of your container. In case Create a folder under `spacktainer/files` to hold your container's files. Make sure to use your container's name to keep everything somewhat orderly. In your container definition file, add a `spacktainer` section with a `files` key. This key holds a list of `source:target` filepairs (note that there is no space between source and target!) Source is specified starting from the level below `spacktainer`; in the example below the folder structure would look like this: - ``` spacktainer/files └── my-awesome-container @@ -37,10 +36,6 @@ spacktainer/files spack: specs: - my-awesome-package -spacktainer: - files: - - files/my-awesome-container/script.sh:/opt/script.sh - - files/my-awesome-container/some_folder:/opt/some_folder ``` # Developer documentation @@ -58,27 +53,18 @@ spacktainer: * `AWS_CACHE_ACCESS_KEY_ID` / `AWS_CACHE_SECRET_ACCESS_KEY`: AWS keypair for accessing the cache bucket hosted by Amazon * `AWS_INFRASTRUCTURE_ACCESS_KEY_ID` / `AWS_INFRASTRUCTURE_SECRET_ACCESS_KEY`: AWS keypair for accessing the containers bucket hosted by Amazon (bbpinfrastructureassets) -* `BBP_CACHE_ACCESS_KEY_ID` / `BBP_CACHE_SECRET_ACCESS_KEY`: AWS keypair for accessing the cache bucket hosted by BBP * `SPACK_DEPLOYMENT_KEY_PRIVATE`: the Spack private deployment key (as a file!) * `SPACK_DEPLOYMENT_KEY_PUBLIC`: the Spack public deployment key (as a file!) -* `DOCKERHUB_USER` / `DOCKERHUB_PASSWORD`: credentials for docker hub +* `GHCR_USER` / `GHCR_TOKEN`: the user and associated access token to write to the GitHub Container Registry (GHCR) * `GITLAB_API_TOKEN`: private (!) gitlab token with API_READ access (CI_JOB_TOKEN does not have enough permissions). Change this once I'm gone ## Repository layout -There are a few python projects in this repository: - -* get_artifacts: download artifacts from a pipeline. It's fairly specific to this repository. -* job_creator: the main project; this will take care of generating the jobs in this project. Both of the other ones are called at some point in the pipelines it generates. It is further detailed below. -* spackitor: the spack janitor that will clean the build cache. It has its own readme and comes with some useful scripts for manual actions. - -Apart from that, folders of note are: +Folders of note are: * builder: base container that contains our spack fork, needed to build the software that will be in the spacktainer * container_definitions: this is where users will define their containers * runtime: base container that contains everything needed to run the spack-built environment -* singularitah: base container that contains singularity and s3cmd -* spacktainer: contains the Dockerfile that will be used to build the spacky containers ## job_creator @@ -100,7 +86,7 @@ The main entrypoints can be found, unsurprisingly, in the `__main__.py` file. Th `utils.py` contains utility functions for reading/writing yaml, getting the multiarch job for a container, ... -## Pulling images with Sarus or Podman +## Pulling images with Apptainer, Podman, or Sarus Make sure you have your AWS credentials set up. Then identify the image you want to run. In the following, `spacktainers/neurodamus-neocortex` is going to be used. Identify the @@ -135,6 +121,16 @@ Get a login token from AWS: [secret] ``` +### Pulling with Apptainer (or Singularity) + +Pull from the registry, logging in at the same time with the `AWS` username and token from +above: +``` +❯ apptainer pull --docker-login docker://130659266700.dkr.ecr.us-east-1.amazonaws.com/spacktainers/neurodamus-neocortex +``` +The resulting `neurodamus-neocortex.sif` file is the container and can be copied to a +better storage location as desired. + ### Pulling with Podman Log into the registry, using `AWS` as the username: @@ -155,11 +151,54 @@ Everything in Sarus goes into one command: ## Reproducing GitHub Action builds containerized +First the `builder` and `runtime` containers need to be built locally, with corresponding tags: +``` +❯ podman build --format=docker builder -t local_builder +❯ podman build --format=docker runtime -t local_runtime +``` + +Then create a new directory and add a `Dockerfile` inside, with the following contents: +``` +FROM local_builder AS builder +FROM local_runtime AS runtime + +COPY --from=builder /etc/debian_version /etc/debian_version +``` +The last line is sometimes required to avoid optimizations that would skip including the +`builder` container. + +Use a local Spack installation to create a GPG keypair to sign built packages, i.e: +``` +❯ spack gpg create --export-secret key --export key.pub "Le Loup" "le.loup@epfl.ch" +``` + +And create a `spack.yaml`, i.e.: +``` +spack: + specs: + - zlib + packages: + all: + providers: + mpi: [mpich] +``` +The provider setting to prefer `mpich` may be helpful to execute the containers later with +a runtime and SLURM using `srun --mpi=pmi2 ...`, which will facilitate better MPI +communications. + +Then build the Docker file: +``` +❯ podman build --format=docker . +``` + +### Using the official builder + See above instructions under [pulling containers](#user-content-pulling-with-podman) to login and pull the `spacktainers/builder` container. Then launch the container and install something, i.e., with: ``` -❯ podman run -it 130659266700.dkr.ecr.us-east-1.amazonaws.com/spacktainers/builder +❯ podman pull ghcr.io/bluebrain/spack-builder:latest +❯ podman run -it ghcr.io/bluebrain/spack-builder:latest root@43dec0527c62:/# (cd /opt/spack-repos/ && git pull) Already up to date. root@43dec0527c62:/# spack install zlib @@ -168,10 +207,21 @@ root@43dec0527c62:/# spack install zlib Environments may be recreated as present under [`container_definitions/`][(./container_definitions). -## Reproducing GitHub Action builds locally +You may use a `Dockerfile` as constructed above, but replace the local tags with the +GitHub container registry ones to build a `spack.yaml`, too: +``` +FROM ghcr.io/bluebrain/spack-builder AS builder +FROM ghcr.io/bluebrain/spack-runtime AS runtime + +COPY --from=builder /etc/debian_version /etc/debian_version +``` +This will still require a local GPG key pair to sign packages! + +## Reproducing GitHub Action builds locally (outside a container) Prerequisites needed to try the container building locally: +0. A installation using Ubuntu 24.04 LTS, with compilers set up 1. The upstream Spack commit we are using in the [`builder/Dockerfile`](builder/Dockerfile), in the argument `SPACK_BRANCH` (may be overwritten by the CI). Referred to as `${SPACK_BRANCH}` here. diff --git a/get_artifacts/pyproject.toml b/get_artifacts/pyproject.toml deleted file mode 100644 index f0bb786..0000000 --- a/get_artifacts/pyproject.toml +++ /dev/null @@ -1,15 +0,0 @@ -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" - -[project] -name = "get_artifacts" -version = "0.1.0" -dependencies = [ - "click", - "furl", - "requests", -] - -[project.scripts] -ga = "get_artifacts.__main__:get_artifacts" diff --git a/get_artifacts/src/get_artifacts/__main__.py b/get_artifacts/src/get_artifacts/__main__.py deleted file mode 100644 index 0d106e9..0000000 --- a/get_artifacts/src/get_artifacts/__main__.py +++ /dev/null @@ -1,134 +0,0 @@ -import glob -import logging -import os -from pathlib import Path -from pprint import pformat - -import click -import requests -from furl import furl - -logger = logging.getLogger(__name__) -fh = logging.FileHandler("get_artifacts.log") -fmt = logging.Formatter("[%(asctime)s] [%(levelname)s] %(msg)s") -fh.setFormatter(fmt) -sh = logging.StreamHandler() -sh.setFormatter(fmt) - -logger.setLevel(logging.DEBUG) -fh.setLevel(logging.DEBUG) -sh.setLevel(logging.DEBUG) - -logger.addHandler(fh) -logger.addHandler(sh) - - -def artifacts_url(base_url, project_id, job_id): - url = base_url / "projects" / str(project_id) / "jobs" / str(job_id) / "artifacts" - logger.debug(f"Artifacts url: {url}") - return url - - -def bridges_url(base_url, project_id, pipeline_id): - url = ( - base_url - / "projects" - / str(project_id) - / "pipelines" - / str(pipeline_id) - / "bridges" - ) - logger.debug(f"Bridges url: {url}") - return url - - -def jobs_url(base_url, project_id, pipeline_id): - url = ( - base_url - / "projects" - / str(project_id) - / "pipelines" - / str(pipeline_id) - / "jobs" - ) - logger.debug(f"Jobs url: {url}") - return url - - -def pipeline_url(base_url, project_id, pipeline_id): - url = base_url / "projects" / str(project_id) / "pipelines" / str(pipeline_id) - logger.debug(f"Pipeline url: {url}") - return url - - -@click.command() -@click.option( - "--parent-pipeline", "-P", help="ID of the parent pipeline", required=True -) -@click.option("--private-token", "-t", help="Private gitlab api token", required=False) -def get_artifacts(parent_pipeline, private_token): - project_id = "2432" - logger.info(f"Getting artifacts for pipeline {parent_pipeline}") - - session = requests.Session() - session.headers["Content-Type"] = "application/json" - if private_token: - # Yes, there is a CI_JOB_TOKEN, but that doesn't have the necessary permissions - # https://docs.gitlab.com/ee/ci/jobs/ci_job_token.html - # it can, for example, not access the pipelines API - logger.debug("Using private token specified on the command line") - session.headers["PRIVATE-TOKEN"] = private_token - else: - raise RuntimeError( - "No gitlab api token found, either specify it with `-t` or run this in a job" - ) - - base_url = furl(os.environ.get("CI_API_V4_URL", "https://bbpgitlab.epfl.ch/api/v4")) - logger.info("Finding bridge jobs") - bridges = session.get(bridges_url(base_url, project_id, parent_pipeline)).json() - logger.debug(f"Bridges: {pformat(bridges)}") - bridge = next( - b for b in bridges if b["name"] == "base containers and pipeline generation" - ) - logger.debug(f"Bridge: {pformat(bridge)}") - logger.info("Finding base containers and pipeline generation") - run_pipeline = session.get( - pipeline_url( - base_url, - bridge["downstream_pipeline"]["project_id"], - bridge["downstream_pipeline"]["id"], - ) - ).json() - logger.debug(f"Run pipeline: {pformat(run_pipeline)}") - logger.info("Getting jobs from base containers and pipeline generation") - jobs = session.get(jobs_url(base_url, project_id, run_pipeline["id"])).json() - logger.debug(f"Jobs: {pformat(jobs)}") - for architecture in [ - archdir.name for archdir in Path("container_definitions").glob("[!_]*") - ]: - logger.info(f"Architecture: {architecture}") - process_job = next( - j for j in jobs if j["name"] == f"process spack pipeline for {architecture}" - ) - spack_artifacts = session.get( - artifacts_url(base_url, project_id, process_job["id"]) - ) - logger.info(f"Downloading spack artifacts for {architecture}") - with open(f"spack.artifacts.{architecture}.zip", "wb") as fp: - fp.write(spack_artifacts.content) - - spacktainer_job = next( - j - for j in jobs - if j["name"] == f"generate spacktainer jobs for {architecture}" - ) - spacktainer_artifacts = session.get( - artifacts_url(base_url, project_id, spacktainer_job["id"]) - ) - logger.info(f"Downloading spacktainer artifacts for {architecture}") - with open(f"spacktainer.artifacts.{architecture}.zip", "wb") as fp: - fp.write(spacktainer_artifacts.content) - - -if __name__ == "__main__": - get_artifacts() diff --git a/job_creator/pyproject.toml b/job_creator/pyproject.toml deleted file mode 100644 index 3c0ad43..0000000 --- a/job_creator/pyproject.toml +++ /dev/null @@ -1,18 +0,0 @@ -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" - -[project] -name = "job_creator" -version = "0.1.0" -dependencies = [ - "boto3", - "click", - "gitpython", - "natsort", - "requests", - "ruamel.yaml" -] - -[project.scripts] -jc = "job_creator.__main__:jc" diff --git a/job_creator/src/job_creator/__init__.py b/job_creator/src/job_creator/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/job_creator/src/job_creator/__main__.py b/job_creator/src/job_creator/__main__.py deleted file mode 100644 index 292b202..0000000 --- a/job_creator/src/job_creator/__main__.py +++ /dev/null @@ -1,357 +0,0 @@ -import copy -import glob -import logging -import logging.config -import os -from itertools import chain -from pathlib import Path - -import click -from natsort import natsorted - -from job_creator.architectures import architecture_map -from job_creator.ci_objects import Job, Trigger, Workflow -from job_creator.containers import (Spacktainerizer, - generate_base_container_workflow, - generate_spacktainers_workflow) -from job_creator.job_templates import (clean_cache_yaml, - generate_containers_workflow_yaml) -from job_creator.logging_config import LOGGING_CONFIG -from job_creator.packages import generate_packages_workflow -from job_creator.utils import (get_arch_or_multiarch_job, get_architectures, - load_yaml, write_yaml) - -logging.config.dictConfig(LOGGING_CONFIG) -logger = logging.getLogger("job_creator") - - -def debug_output(debug): - if debug: - stream_handler = next(h for h in logger.handlers if h.name == "sh") - stream_handler.setLevel(logging.DEBUG) - - -@click.group() -def jc(): - pass - - -@jc.command -@click.option( - "--architecture", - "-a", - help="Architecture to generate spacktainer pipeline for", -) -@click.option( - "--out-dir", - "-o", - help="Which directory to write the spacktainer build pipeline to", -) -@click.option("--s3cmd-version", "-s", default="2.3.0", help="s3cmd version") -def generate_spacktainer_workflow(architecture, out_dir, s3cmd_version): - """ - Generate the workflow that will build the actual spack-package-based containers - for the given container definition - """ - out_dir = Path(out_dir) - out_dir.mkdir(parents=True, exist_ok=True) - workflow = generate_spacktainers_workflow(architecture, out_dir, s3cmd_version) - write_yaml(workflow.to_dict(), f"{out_dir}/spacktainer_pipeline.yaml") - - -@jc.command -@click.option( - "--pipeline-dir", - "-d", - help="Directory containing YAML pipeline files generated by spack", -) -@click.option( - "--out-dir", - "-o", - help="Output dir in which to dump split pipelines. Will be created if necessary.", -) -@click.option( - "--debug/--no-debug", default=False, help="Show debug logging on the console" -) -def process_spack_pipeline(pipeline_dir, out_dir, debug): - """ - Given a directory with spack-generated pipeline files, this will: - * merge all of them into one giant pipeline file, keeping the earliest version of any duplicates - * split it along the generated stages: each stage will become its own workflow - * in each "stage", do the necessary spack mirror manipulation, variable setting, ... - * add a job before all the stages run that will collect artifacts needed, so that - the stages can grab them from within the same workflow - * configure "stage" dependencies - """ - debug_output(debug) - logger.info("Processing spack pipeline") - packages_count = 0 - noop_workflow = Workflow() - workflow = Workflow() - architecture = out_dir.split(".")[1] - pipeline_dir = Path(pipeline_dir) - out_dir = Path(out_dir) - for generated_pipeline_file in chain( - pipeline_dir.glob("*/*.yml"), pipeline_dir.glob("*/*.yaml") - ): - logger.info( - f"Processing spack workflow for file {generated_pipeline_file} with output dir {out_dir}" - ) - spack_generated_pipeline = load_yaml(generated_pipeline_file) - - noop_name = "no-specs-to-rebuild" - if noop_name in spack_generated_pipeline: - noop = spack_generated_pipeline[noop_name] - if noop_name not in noop_workflow: - noop_workflow.add_job( - Job( - name=noop_name, - architecture=architecture, - append_arch=False, - **spack_generated_pipeline[noop_name], - ) - ) - noop_workflow.rules = spack_generated_pipeline.get("workflow", {}).get( - "rules" - ) - continue - - artifacts_root = Path( - spack_generated_pipeline["variables"]["SPACK_ARTIFACTS_ROOT"] - ) - container_name = artifacts_root.name - for name, item in spack_generated_pipeline.items(): - if name == "variables": - if "variables" in architecture_map[architecture]: - logger.debug( - f"Adding {architecture} variables to variables section" - ) - item.update(architecture_map[architecture]["variables"]) - logger.debug("Setting workflow-level variables") - workflow.variables.update(item) - continue - elif name == "workflow": - logger.debug("Adding workflow rules") - workflow.rules = item["rules"] - continue - elif name == "stages": - logger.debug("Ignoring stages, we'll roll our own") - continue - logger.debug(f"Found an actual job: {name}") - if name != "rebuild-index": - packages_count += 1 - job = Job(name=name, architecture=architecture, append_arch=False, **item) - logger.debug(f"Job came from the {container_name} population job") - job.needs = [ - { - "pipeline": os.environ["CI_PIPELINE_ID"], - "job": f"generate build cache population job for {container_name} for {architecture}", - "artifacts": True, - } - ] - job.add_spack_mirror() - job.set_aws_variables() - job.timeout = "4h" - job.variables.update(spack_generated_pipeline["variables"]) - - job.image["pull_policy"] = "always" - workflow.add_job(job, special_spack_treatment=True) - - out_dir.mkdir(parents=True, exist_ok=True) - if workflow.jobs: - logger.debug("Sorting workflow stages and jobs") - workflow.stages = natsorted(workflow.stages) - workflow.jobs = natsorted(workflow.jobs, key=lambda x: f"{x.stage}-{x.name}") - logger.debug("Writing to merged_spack_pipeline.yaml for debugging purposes") - write_yaml(workflow.to_dict(), "merged_spack_pipeline.yaml") - else: - write_yaml(noop_workflow.to_dict(), "spack_pipeline.yaml") - return - - logger.debug("Splitting into stage pipelines and generating trigger workflow") - trigger_workflow = Workflow(rules=workflow.rules) - collect_job = Job( - "collect artifacts", - architecture, - needs=[ - { - "pipeline": os.environ.get("CI_PIPELINE_ID"), - "job": f"process spack pipeline for {architecture}", - "artifacts": True, - } - ], - script=[ - "cat spack_pipeline.yaml", - f"find artifacts.{architecture}", - ], - stage="collect artifacts", - artifacts={"when": "always", "paths": ["*.yaml", "artifacts.*"]}, - rules=[{"when": "always"}], - ) - trigger_workflow.add_job(collect_job) - - previous_stage = None - - for stage in workflow.stages: - logger.debug(f"Stage: {stage}") - pipeline_file = out_dir / f"pipeline-{stage}.yaml" - stage_workflow = Workflow(rules=workflow.rules, variables=workflow.variables) - stage_workflow.jobs = [job for job in workflow.jobs if job.stage == stage] - - if not stage_workflow.jobs: - logger.debug(f"No jobs for {stage} - skipping") - continue - - for job in stage_workflow.jobs: - job.stage = None - - write_yaml(stage_workflow.to_dict(), pipeline_file) - - needs = [ - { - "job": collect_job.name, - "artifacts": True, - }, - ] - - if previous_stage: - needs.append({"job": previous_stage}) - - stage_trigger = Trigger( - name=stage, - trigger={ - "include": [ - { - "artifact": str(pipeline_file), - "job": collect_job.name, - } - ], - "strategy": "depend", - }, - needs=needs, - stage=stage, - rules=[{"when": "always"}], - ) - - trigger_workflow.add_trigger(stage_trigger) - - previous_stage = stage - - write_yaml(trigger_workflow.to_dict(), "spack_pipeline.yaml") - logger.info(f"{packages_count} packages will be rebuilt.") - - -def generate_containers_workflow(existing_workflow, architectures, s3cmd_version): - """ - Generate the jobs to build the spacktainer containers - """ - builder = Spacktainerizer(name="builder", build_path="builder") - - workflow = Workflow() - for architecture in architectures: - arch_job = Job( - "generate spacktainer jobs", - architecture=architecture, - force_needs=True, - **copy.deepcopy(generate_containers_workflow_yaml), - ) - arch_job.image = { - "name": f"{builder.registry_image}:{builder.registry_image_tag}", - "pull_policy": "always", - } - arch_job.needs.extend( - [j.name for j in get_arch_or_multiarch_job(existing_workflow, architecture)] - ) - arch_job.variables["ARCHITECTURE"] = architecture - arch_job.variables["OUTPUT_DIR"] = f"artifacts.{architecture}" - arch_job.variables["S3CMD_VERSION"] = s3cmd_version - - workflow.add_job(arch_job) - return workflow - - -def generate_clean_cache_workflow(architectures, cache_population_job_names): - """ - Generate the jobs to clean the build cache - """ - workflow = Workflow() - stage = "clean build cache" - workflow.stages = [stage] - for architecture in architectures: - arch_job = Job( - "clean build cache", - architecture=architecture, - stage=stage, - **copy.deepcopy(clean_cache_yaml), - ) - - bucket_info = architecture_map[architecture]["cache_bucket"] - for job_name in cache_population_job_names[architecture]: - arch_job.needs.append( - { - "job": job_name, - "artifacts": True, - } - ) - env_args = [] - arch_container_definitions = Path(f"container_definitions/{architecture}/") - for container_definition in arch_container_definitions.glob("*yaml"): - env_args.append( - f"-e jobs_scratch_dir.{architecture}/{container_definition.stem}/concrete_environment/spack.lock" - ) - arch_job.variables = { - "SPACK_ENV_ARGS": " ".join(env_args), - "BUCKET": bucket_info["name"], - "MAX_AGE": bucket_info["max_age"], - } - workflow.add_job(arch_job) - - return workflow - - -@jc.command -@click.option( - "--singularity-version", "-S", default="4.0.2", help="Singularity version" -) -@click.option("--s3cmd-version", "-s", default="2.3.0", help="s3cmd version") -@click.option( - "--output-file", - "-o", - default="generated_pipeline.yaml", - help="Which file to write the output to", -) -@click.option( - "--debug/--no-debug", default=False, help="Show debug logging on the console" -) -def create_jobs(singularity_version, s3cmd_version, output_file, debug): - debug_output(debug) - architectures = get_architectures() - workflow = generate_base_container_workflow( - singularity_version, s3cmd_version, architectures=architectures - ) - packages_workflow, cache_population_job_names = generate_packages_workflow( - architectures - ) - workflow += packages_workflow - workflow += generate_clean_cache_workflow(architectures, cache_population_job_names) - workflow += generate_containers_workflow(workflow, architectures, s3cmd_version) - - for job in [ - j - for j in workflow.jobs - if "generate build cache population" in j.name - or "generate spacktainer" in j.name - ]: - logger.debug(f"Adding needs for {job.name}") - [ - job.add_need(need.name) - for need in get_arch_or_multiarch_job(workflow, job.architecture) - ] - - # TODO - # * rules? - write_yaml(workflow.to_dict(), output_file) - - -if __name__ == "__main__": - jc() diff --git a/job_creator/src/job_creator/architectures.py b/job_creator/src/job_creator/architectures.py deleted file mode 100644 index 44413d3..0000000 --- a/job_creator/src/job_creator/architectures.py +++ /dev/null @@ -1,54 +0,0 @@ -import os - - -prod = os.environ.get("CI_COMMIT_REF_SLUG") == os.environ.get("CI_DEFAULT_BRANCH") - -architecture_map = { - "amd64": { - "tag": "kubernetes", - "proxy": True, - "cache_bucket": { - "name": "spack-build-cache" if prod else "spack-build-cache-dev", - "max_age": 90 if prod else 30, - "endpoint_url": "https://bbpobjectstorage.epfl.ch", - "keypair_variables": { - "access_key": "BBP_CACHE_ACCESS_KEY_ID", - "secret_key": "BBP_CACHE_SECRET_ACCESS_KEY", - }, - }, - "containers_bucket": { - "name": "sboinfrastructureassets", - "keypair_variables": { - "access_key": "AWS_INFRASTRUCTURE_ACCESS_KEY_ID", - "secret_key": "AWS_INFRASTRUCTURE_SECRET_ACCESS_KEY", - }, - }, - "base_arch": "%gcc@12 os=ubuntu22.04 target=x86_64_v3", - "variables": { - "KUBERNETES_CPU_REQUEST": 4, - "KUBERNETES_CPU_LIMIT": 8, - "KUBERNETES_MEMORY_REQUEST": "8Gi", - "KUBERNETES_MEMORY_LIMIT": "8Gi", - }, - }, - "arm64": { - "tag": "aws_graviton", - "proxy": False, - "cache_bucket": { - "name": "spack-cache-xlme2pbun4", - "max_age": 90 if prod else 30, - "keypair_variables": { - "access_key": "AWS_CACHE_ACCESS_KEY_ID", - "secret_key": "AWS_CACHE_SECRET_ACCESS_KEY", - }, - }, - "containers_bucket": { - "name": "sboinfrastructureassets", - "keypair_variables": { - "access_key": "AWS_INFRASTRUCTURE_ACCESS_KEY_ID", - "secret_key": "AWS_INFRASTRUCTURE_SECRET_ACCESS_KEY", - }, - }, - "base_arch": "%gcc@12 os=ubuntu22.04 target=armv8.4a", - }, -} diff --git a/job_creator/src/job_creator/ci_objects.py b/job_creator/src/job_creator/ci_objects.py deleted file mode 100644 index c4b1255..0000000 --- a/job_creator/src/job_creator/ci_objects.py +++ /dev/null @@ -1,385 +0,0 @@ -import copy -import logging -import logging.config -import os -import urllib - -from job_creator.architectures import architecture_map -from job_creator.logging_config import LOGGING_CONFIG - -logging.config.dictConfig(LOGGING_CONFIG) -logger = logging.getLogger("job_creator") - - -class NoArchitecture(Exception): - pass - - -class Workflow: - """ - Gitlab Workflow model - Make sure to add your jobs/stages in the order they need to execute! - """ - - def __init__(self, include=None, rules=None, variables=None): - self.stages = [] - self.jobs = [] - self.include = include or [] - self.rules = rules or [] - self.variables = variables or {} - - def add_include(self, include): - if include not in self.include: - self.include.append(include) - - def add_stage(self, stage): - if stage not in self.stages: - self.stages.append(stage) - - def _add_joblike(self, add_type, joblike, replace=False): - if joblike.name in self: - logger.debug(f"{add_type.capitalize()} {joblike.name} already in workflow") - return - self.jobs.append(joblike) - if joblike.stage: - self.add_stage(joblike.stage) - - def add_job(self, job, special_spack_treatment=False): - """ - Add a job to the workflow - - :param job: the Job object to add - :param special_spack_treatment: when merging spack pipelines, we'll encounter duplicate - jobs. In this case, we'll want the job in the earliest - possible stage. - - """ - replace = False - if ( - job.name in self - and not job.name.startswith("rebuild-index") - and special_spack_treatment - ): - logger.debug(f"Job {job.name} already in workflow, special spack treatment set") - existing_job = self.get_job(job.name)[0] - existing_stage_number = int(existing_job.stage.split("-")[1]) - new_stage_number = int(job.stage.split("-")[1]) - if new_stage_number < existing_stage_number: - logger.debug( - f"Stage {new_stage_number} < {existing_stage_number} - replacing" - ) - replace = True - elif new_stage_number == existing_stage_number: - logger.debug( - f"Stage {existing_stage_number} == {new_stage_number} - not replacing" - ) - else: - logger.debug( - f"Stage {existing_stage_number} < {new_stage_number} - not replacing" - ) - self._add_joblike("job", job, replace) - - def get_job(self, job_name, startswith=False): - """ - Return a job with a given name, if it's present in the workflow - - :param startswith: if set to True, return a list of jobs whose names start with the given string - """ - - retval = None - - if startswith: - retval = [job for job in self.jobs if job.name.startswith(job_name)] - else: - retval = [job for job in self.jobs if job.name == job_name] - - return retval - - def add_trigger(self, trigger): - self._add_joblike("trigger", trigger) - - def to_dict(self): - as_dict = {"stages": self.stages} if self.stages else {} - as_dict.update({job.name: job.to_dict() for job in self.jobs}) - if self.include: - as_dict["include"] = self.include - if self.rules: - as_dict["workflow"] = {"rules": self.rules} - if self.variables: - as_dict["variables"] = self.variables - return as_dict - - def _dedup(self, seq): - """ - Deduplicate items in a list while keeping order - See https://stackoverflow.com/questions/480214/how-do-i-remove-duplicates-from-a-list-while-preserving-order - Will keep the first item - """ - seen = list() - seen_append = seen.append - return [x for x in seq if not (x in seen or seen_append(x))] - - def __add__(self, other): - if not isinstance(other, Workflow): - raise TypeError(f"cannot add Workflow and {type(other)}") - - new = Workflow() - my_stages = copy.deepcopy(self.stages) - other_stages = copy.deepcopy(other.stages) - if set(my_stages).issubset(set(other_stages)): - new.stages = other_stages - else: - new.stages = self._dedup(my_stages + other_stages) - - new.jobs = copy.deepcopy(self.jobs) - for other_job in other.jobs: - new.add_job(other_job) - - include = copy.deepcopy(self.include) - include.extend(copy.deepcopy(other.include)) - include = self._dedup(include) - new.include = include - - return new - - def __iadd__(self, other): - if not isinstance(other, Workflow): - raise TypeError(f"cannot add Workflow and {type(other)}") - - my_stages = copy.deepcopy(self.stages) - other_stages = copy.deepcopy(other.stages) - if set(my_stages).issubset(set(other_stages)): - self.stages = other_stages - else: - for stage in other.stages: - self.add_stage(stage) - for other_job in other.jobs: - self.add_job(other_job) - for other_include in other.include: - self.add_include(other_include) - - return self - - def __contains__(self, item): - """ - Check whether a specific job name is part of this workflow - """ - return item in [j.name for j in self.jobs] - - -class Job: - def __init__( - self, - name, - architecture=None, - force_needs=False, - needs=None, - script=None, - stage=None, - artifacts=None, - before_script=None, - variables=None, - timeout=None, - bucket="cache", - rules=None, - append_arch=True, - **kwargs, - ): - """ - :param bucket: set to either "cache" to use the cache bucket, - or "infra" to use the infra bucket - """ - self.force_needs = force_needs - self.extra_properties = [] - self.name = name - self.tags = [] - self.needs = needs or [] - self.script = script - self.stage = stage - self.artifacts = artifacts - self.before_script = before_script if before_script else [] - self.variables = variables or {} - self.timeout = timeout - self.rules = rules or [] - self.image = None - self._bucket = bucket - for key, value in kwargs.items(): - logger.debug(f"Setting {key}: {value}") - self.extra_properties.append(key) - setattr(self, key, value) - self.set_architecture(architecture, append_arch) - - def set_architecture(self, architecture=None, append_arch=True): - if architecture: - if append_arch: - self.name += f" for {architecture}" - self.architecture = architecture - architecture_tag = architecture_map[architecture]["tag"] - if architecture_tag not in self.tags: - self.tags.append(architecture_tag) - self.set_proxy_variables() - self.set_aws_variables() - else: - self.architecture = None - - def set_proxy_variables(self): - if not architecture_map[self.architecture].get("proxy", True): - self.update_before_script( - "unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY" - ) - - def configure_s3cmd(self): - """ - * determine proxy - * determine access keys - """ - if not self.architecture: - raise NoArchitecture( - f"Cannot configure s3cmd - no architecture specified for {self.name}" - ) - - script_lines = [] - if architecture_map[self.architecture].get("proxy", False): - proxy = urllib.parse.urlsplit(os.environ["HTTP_PROXY"]) - proxy_host = proxy.hostname - proxy_port = proxy.port - script_lines += [ - f"sed -i 's/^proxy_host.*/proxy_host={proxy_host}/' /root/.s3cfg", - f"sed -i 's/^proxy_port.*/proxy_port={proxy_port}/' /root/.s3cfg", - ] - - if bucket_keypair := architecture_map[self.architecture][self.bucket_key].get( - "keypair_variables" - ): - script_lines += [ - f"sed -i 's/^access_key.*/access_key='${bucket_keypair['access_key']}'/' /root/.s3cfg", - f"sed -i 's/^secret_key.*/secret_key='${bucket_keypair['secret_key']}'/' /root/.s3cfg", - ] - - self.update_before_script(script_lines, append=True) - - @property - def bucket_key(self): - if self._bucket == "cache": - return "cache_bucket" - elif self._bucket == "infra": - return "containers_bucket" - else: - raise ValueError(f"Don't know what to do with bucket {self._bucket}") - - def set_aws_variables(self): - if not self.architecture: - raise NoArchitecture( - f"Cannot set AWS variables - no architecture specified for {self.name}" - ) - - script_lines = [] - - if bucket_keypair := architecture_map[self.architecture][self.bucket_key].get( - "keypair_variables" - ): - script_lines = [ - f"export AWS_ACCESS_KEY_ID=${bucket_keypair['access_key']}", - f"export AWS_SECRET_ACCESS_KEY=${bucket_keypair['secret_key']}", - ] - else: - logger.info(f"No keypair defined for {self.architecture}") - if endpoint_url := architecture_map[self.architecture][self.bucket_key].get( - "endpoint_url" - ): - script_lines.append(f"export S3_ENDPOINT_URL={endpoint_url}") - - self.update_before_script(script_lines) - - def add_need(self, need): - if need not in self.needs: - self.needs.append(need) - - def update_before_script(self, lines, append=False): - """ - Set the given lines as the before_script if there isn't one yet - If append=True, insert the lines at the end - If append=False, insert the lines at the start - """ - if isinstance(lines, str): - lines = [lines] - if not set(lines).issubset(set(self.before_script)): - if append: - self.before_script.extend(lines) - else: - self.before_script[0:0] = lines - - def _property_as_dict(self, prop_name): - if hasattr(self, prop_name) and getattr(self, prop_name): - return {prop_name: getattr(self, prop_name)} - else: - if prop_name == "needs" and self.force_needs: - return {"needs": []} - return {} - - def add_spack_mirror(self): - bucket_info = architecture_map[self.architecture]["cache_bucket"] - endpoint_url = bucket_info.get("endpoint_url") - aws_keypair = bucket_info.get("keypair_variables") - - mirror_add_cmd = [ - "spack mirror add", - f"--s3-access-key-id=${aws_keypair['access_key']}", - f"--s3-access-key-secret=${aws_keypair['secret_key']}", - f"--s3-endpoint-url={endpoint_url}" if endpoint_url else "", - f"s3Cache s3://{bucket_info['name']}", - ] - before_script_lines = [ - ". ${SPACK_ROOT}/share/spack/setup-env.sh", - "spack mirror rm bbpS3 || true", - " ".join(mirror_add_cmd), - ] - - self.update_before_script(before_script_lines, append=True) - - def to_dict(self): - as_dict = {} - - for prop_name in [ - "needs", - "script", - "stage", - "artifacts", - "tags", - "before_script", - "variables", - "image", - "timeout", - "rules", - ] + self.extra_properties: - as_dict.update(self._property_as_dict(prop_name)) - - return as_dict - - def __repr__(self): - return f" {self.name}" - - -class Trigger: - def __init__( - self, name, trigger, needs=None, stage=None, architecture=None, rules=None - ): - self.name = name - if architecture: - self.name += f" for {architecture}" - - self.trigger = trigger - self.needs = needs - self.stage = stage - self.rules = rules or [] - - def to_dict(self): - as_dict = {"trigger": self.trigger} - if self.needs: - as_dict["needs"] = self.needs - if self.stage: - as_dict["stage"] = self.stage - if self.rules: - as_dict["rules"] = self.rules - - return as_dict diff --git a/job_creator/src/job_creator/containers.py b/job_creator/src/job_creator/containers.py deleted file mode 100644 index b2df9d6..0000000 --- a/job_creator/src/job_creator/containers.py +++ /dev/null @@ -1,1122 +0,0 @@ -import copy -import glob -import hashlib -import json -import re -import logging -import logging.config -import os -import shlex -import shutil -import subprocess -from datetime import datetime -from functools import cached_property -from pathlib import Path -from typing import Dict, List - -import boto3 -from botocore.exceptions import ClientError -from git import Repo - -from job_creator.architectures import architecture_map, prod -from job_creator.ci_objects import Job, Workflow -from job_creator.job_templates import (bb5_download_sif_yaml, - bbp_containerizer_include_yaml, - build_custom_containers_yaml, - build_spacktainer_yaml, - buildah_build_yaml, - buildah_include_yaml, create_sif_yaml, - docker_hub_push_yaml, multiarch_yaml) -from job_creator.logging_config import LOGGING_CONFIG -from job_creator.spack_template import spack_template -from job_creator.utils import (docker_hub_login, docker_hub_repo_exists, - docker_hub_repo_tag_exists, load_yaml, - merge_dicts, write_yaml) - -logging.config.dictConfig(LOGGING_CONFIG) -logger = logging.getLogger("job_creator") - - -class ImageNotFoundError(Exception): - pass - - -class BaseContainer: - """ - Base class with common container functionality - """ - - def __init__( - self, - name, - build_path, - architectures="amd64", - registry="bbpgitlab.epfl.ch:5050/hpc/spacktainers/", - ): - self.name = name - self.job_name = f"build {self.name}" - self.registry = registry - self.registry_image = f"{os.environ.get('CI_REGISTRY_IMAGE')}/{build_path}" - self.build_path = build_path - - if isinstance(architectures, str): - self.architectures = [architectures] - else: - self.architectures = list(architectures) - - self.workflow = Workflow(**copy.deepcopy(buildah_include_yaml)) - - @property - def registry_image_tag(self) -> str: - """ - The tag the container will have once created: at least the date, - optionally followed by the branch name if not building on CI_DEFAULT_BRANCH - """ - branch = os.environ.get("CI_COMMIT_REF_SLUG") - if branch == os.environ.get("CI_DEFAULT_BRANCH"): - tag = "latest" - else: - tag = datetime.strftime(datetime.today(), "%Y.%m.%d") - tag += f"-{branch}" - - return tag - - def generate(self, *args, **kwargs) -> Workflow: - """ - The method which will generate a workflow to build this container - """ - raise NotImplementedError("Children must implement this") - - def needs_build(self) -> bool: - """ - Does the container need building? - """ - raise NotImplementedError("Children must implement this") - - @cached_property - def container_checksum(self) -> str: - """ - Checksum calculated based on the files needed to build the container (e.g. Dockerfile) - """ - return self._generate_container_checksum() - - def _generate_container_checksum(self) -> str: - """ - Checksum calculated based on the files needed to build the container (e.g. Dockerfile) - """ - checksums = [] - for filepath in sorted(glob.glob(f"{self.build_path}/*")): - with open(filepath, "r") as fp: - checksums.append(hashlib.sha256(fp.read().encode()).hexdigest()) - container_checksum = hashlib.sha256(":".join(checksums).encode()).hexdigest() - return container_checksum - - def container_info( - self, - registry: str | None = None, - registry_user: str | None = None, - registry_password: str | None = None, - registry_image: str | None = None, - ) -> Dict: - """ - Get the container info from the repository through `skopeo inspect` - """ - registry = registry if registry else os.environ.get("CI_REGISTRY") - registry_user = ( - registry_user if registry_user else os.environ.get("CI_REGISTRY_USER") - ) - registry_password = ( - registry_password - if registry_password - else os.environ.get("CI_REGISTRY_PASSWORD") - ) - registry_image = registry_image if registry_image else self.registry_image - skopeo_login_cmd = [ - "skopeo", - "login", - "-u", - registry_user, - "-p", - registry_password, - registry, - ] - logger.debug(f"Running `{skopeo_login_cmd}`") - subprocess.run(skopeo_login_cmd) - - for architecture in self.architectures: - skopeo_inspect_cmd = [ - "skopeo", - "inspect", - f"--override-arch={architecture}", - f"docker://{registry_image}:{self.registry_image_tag}", - ] - logger.debug( - f"Running `{' '.join(skopeo_inspect_cmd).replace(registry_password, 'MASKED')}`" - ) - result = subprocess.run(skopeo_inspect_cmd, capture_output=True) - - if result.returncode != 0: - raise ImageNotFoundError( - f"Image {self.name}:{self.registry_image_tag} not found in {registry}" - ) - info = json.loads(result.stdout) - - # if the override-arch is not found, skopeo just returns whatever other arch - # is available without complaining. Thanks, skopeo! - if info["Architecture"] != architecture: - raise ImageNotFoundError( - f"Image {self.name}:{self.registry_image_tag} with architecture {architecture} not found in {registry}" - ) - - return info - - def compose_workflow(self) -> None: - """ - Append architecture to the REGISTRY_IMAGE_TAG if necessary - and create multiarch job if necessary - """ - if len(self.architectures) > 1: - for job in self.workflow.jobs: - job.variables[ - "REGISTRY_IMAGE_TAG" - ] = f"{job.variables['REGISTRY_IMAGE_TAG']}-{job.architecture}" - - self.create_multiarch_job() - - def create_multiarch_job(self) -> None: - """ - If the container is being built for multiple architectures, create a multiarch job - """ - if len(self.architectures) > 1: - multiarch_job_name = f"create multiarch for {self.name}" - multiarch_job = Job(multiarch_job_name, **copy.deepcopy(multiarch_yaml)) - multiarch_job.needs = [job.name for job in self.workflow.jobs] - logger.debug("Replace placeholders in multiarch job script") - for idx, line in enumerate(multiarch_job.script): - multiarch_job.script[idx] = line.replace( - "%REGISTRY_IMAGE%", self.registry_image - ).replace("%REGISTRY_IMAGE_TAG%", self.registry_image_tag) - - self.workflow.add_job(multiarch_job) - - def get_s3_connection(self, bucket: Dict) -> boto3.client: - if keypair_variables := bucket.get("keypair_variables"): - os.environ["AWS_ACCESS_KEY_ID"] = os.environ[ - keypair_variables["access_key"] - ] - os.environ["AWS_SECRET_ACCESS_KEY"] = os.environ[ - keypair_variables["secret_key"] - ] - - s3 = boto3.client("s3") - - return s3 - - -class Spacktainerizer(BaseContainer): - """ - Base class for the runtime and builder containers that contain our Spack fork - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.spack_branch = os.environ.get("SPACK_BRANCH", "develop") - - @cached_property - def spack_commit(self) -> str: - """ - Get the latest spack commit - """ - logger.debug(f"Cloning spack for {self.name} {self.spack_branch}") - spack_clone_dir = "spack" - if os.path.exists(spack_clone_dir): - shutil.rmtree(spack_clone_dir) - spack = Repo.clone_from( - "https://github.com/bluebrain/spack", - to_path=spack_clone_dir, - multi_options=[f"-b {self.spack_branch}", "--depth=1"], - ) - return spack.head.commit.hexsha - - def needs_build(self) -> bool: - """ - Check whether the container needs building: - * Does the container exist? - * Have any of the files needed for it (e.g. Dockerfile) changed? - * Was the existing container built with the most recent spack commit? - """ - - logger.info(f"Checking whether we need to build {self.name}") - try: - container_info = self.container_info() - existing_spack_commit = container_info["Labels"][ - "ch.epfl.bbpgitlab.spack_commit" - ] - existing_container_checksum = container_info["Labels"][ - "ch.epfl.bbpgitlab.container_checksum" - ] - except ImageNotFoundError as ex: - logger.info(ex) - logger.info(f"We'll have to build {self.name}") - return True - logger.debug("Image found!") - - logger.debug(f"Existing container checksum: {existing_container_checksum}") - logger.debug(f"My container checksum: {self.container_checksum}") - logger.debug(f"Existing spack commit: {existing_spack_commit}") - logger.debug(f"My spack commit: {self.spack_commit}") - if ( - existing_container_checksum == self.container_checksum - and existing_spack_commit == self.spack_commit - ): - logger.info(f"No need to build {self.name}") - return False - - logger.info(f"We'll have to build {self.name}") - return True - - def generate(self, *args, **kwargs) -> Workflow: - """ - Generate the workflow that will build this container, if necessary - """ - if not self.needs_build(): - return Workflow() - - buildah_extra_args = [ - f"--build-arg SPACK_BRANCH={self.spack_branch}", - f'--label org.opencontainers.image.title="{self.name}"', - f'--label org.opencontainers.image.version="{self.registry_image_tag}"', - f'--label ch.epfl.bbpgitlab.spack_commit="{self.spack_commit}"', - f'--label ch.epfl.bbpgitlab.container_checksum="{self.container_checksum}"', - ] - - for architecture in self.architectures: - arch_job = Job( - self.job_name, - architecture=architecture, - **copy.deepcopy(buildah_build_yaml), - ) - arch_job.variables["CI_REGISTRY_IMAGE"] = self.registry_image - arch_job.variables["REGISTRY_IMAGE_TAG"] = self.registry_image_tag - arch_job.variables[ - "BUILDAH_EXTRA_ARGS" - ] += f" {' '.join(buildah_extra_args)}" - arch_job.variables["BUILD_PATH"] = self.build_path - cache_bucket = architecture_map[architecture]["cache_bucket"] - arch_job.variables[ - "BUILDAH_EXTRA_ARGS" - ] += f' --build-arg CACHE_BUCKET="s3://{cache_bucket["name"]}"' - if endpoint_url := architecture_map[architecture]["cache_bucket"].get( - "endpoint_url" - ): - arch_job.variables[ - "BUILDAH_EXTRA_ARGS" - ] += f' --build-arg MIRROR_URL="{endpoint_url}"' - - arch_job.update_before_script( - ['cp "$SPACK_DEPLOYMENT_KEY_PUBLIC" "$CI_PROJECT_DIR/builder/key.pub"'], - append=True, - ) - self.workflow.add_job(arch_job) - - self.compose_workflow() - - return self.workflow - - -class Singularitah(BaseContainer): - """ - A container containing singularity and s3cmd - """ - - def __init__(self, singularity_version, s3cmd_version, *args, **kwargs): - super().__init__(*args, **kwargs) - self.singularity_version = singularity_version - self.s3cmd_version = s3cmd_version - - def needs_build(self) -> bool: - """ - Check whether the container needs building: - * Does it exist? - * Does it have the correct s3cmd and singularity versions? - * Has the Dockerfile or any of the related files changed? - """ - logger.info(f"Checking whether we need to build {self.name}") - try: - container_info = self.container_info() - existing_singularity_version = container_info["Labels"][ - "ch.epfl.bbpgitlab.singularity_version" - ] - existing_s3cmd_version = container_info["Labels"][ - "ch.epfl.bbpgitlab.s3cmd_version" - ] - existing_container_checksum = container_info["Labels"][ - "ch.epfl.bbpgitlab.container_checksum" - ] - except ImageNotFoundError as ex: - logger.info(ex) - logger.info(f"We'll have to build {self.name}") - return True - logger.debug(f"Image {self.name} found") - - if ( - existing_container_checksum == self.container_checksum - and existing_s3cmd_version == self.s3cmd_version - and existing_singularity_version == self.singularity_version - ): - logger.info(f"No need to build {self.name}") - return False - - logger.info(f"We'll have to build {self.name}") - return True - - def generate(self, *args, **kwargs) -> Workflow: - """ - Generate the workflow that will build this container, if necessary - """ - if not self.needs_build(): - return Workflow() - - buildah_extra_args = [ - f'--label org.opencontainers.image.title="{self.name}"', - f'--label org.opencontainers.image.version="{self.registry_image_tag}"', - f'--label ch.epfl.bbpgitlab.singularity_version="{self.singularity_version}"', - f'--label ch.epfl.bbpgitlab.s3cmd_version="{self.s3cmd_version}"', - f'--label ch.epfl.bbpgitlab.container_checksum="{self.container_checksum}"', - f'--build-arg SINGULARITY_VERSION="{self.singularity_version}"', - f'--build-arg S3CMD_VERSION="{self.s3cmd_version}"', - ] - for architecture in self.architectures: - build_job = Job( - self.job_name, - architecture=architecture, - **copy.deepcopy(buildah_build_yaml), - ) - build_job.variables["CI_REGISTRY_IMAGE"] = self.registry_image - build_job.variables["REGISTRY_IMAGE_TAG"] = self.registry_image_tag - build_job.variables[ - "BUILDAH_EXTRA_ARGS" - ] += f" {' '.join(buildah_extra_args)}" - build_job.variables["BUILD_PATH"] = self.build_path - - self.workflow.add_job(build_job) - self.compose_workflow() - - return self.workflow - - -class Spackah(BaseContainer): - """ - A container built based on one or more Spack specs. - """ - - def __init__( - self, - name, - architecture, - out_dir, - registry="bbpgitlab.epfl.ch:5050/hpc/spacktainers/", - ): - self.name = name - self.architectures = [architecture] - self.architecture = architecture - self.registry = registry - self.registry_image = f"{os.environ.get('CI_REGISTRY_IMAGE')}/{name}" - self.container_definition_file = ( - f"container_definitions/{self.architecture}/{self.name}.yaml" - ) - self.spacktainer_yaml = load_yaml(self.container_definition_file) - self.container_yaml = {"spack": self.spacktainer_yaml.pop("spack")} - self.hub_namespace = "bluebrain" - self.hub_repo = f"spackah-{self.name}" - - includes = merge_dicts( - copy.deepcopy(buildah_include_yaml), - copy.deepcopy(bbp_containerizer_include_yaml), - ) - self.workflow = Workflow(**includes) - - self.spack_env_dir = out_dir / self.architecture / self.name - self.spack_env_dir.mkdir(parents=True, exist_ok=True) - - self._generate_spack_yaml() - self.concretize_spec() - - def concretize_spec(self) -> None: - """ - Concretize the full container spec with Spack - Will set the spack_lock property - """ - spack_root = os.environ["SPACK_ROOT"] - spack_cmd = shlex.split( - f"bash -c 'source {spack_root}/share/spack/setup-env.sh && " - f"spack env activate {self.spack_env_dir} && spack concretize -f'", - ) - result = subprocess.run(spack_cmd) - if result.returncode != 0: - stdout = result.stdout.decode() if result.stdout else "" - stderr = result.stderr.decode() if result.stderr else "" - raise RuntimeError( - f"Failed to concretize spec for {self.name}:\n{stdout}\n{stderr}" - ) - - self.spack_lock = self.spack_env_dir / "spack.lock" - - def _generate_container_checksum(self) -> str: - """ - Calculate the checksum of the container definition file - """ - with open( - f"container_definitions/{self.architecture}/{self.name}.yaml", "r" - ) as fp: - container_checksum = hashlib.sha256(fp.read().encode()).hexdigest() - - return container_checksum - - def _generate_spack_yaml(self) -> None: - """ - Merges the container definition with the Spack yaml template - """ - spack_yaml = copy.deepcopy(spack_template) - merge_dicts(spack_yaml, self.container_yaml) - spack_yaml["spack"]["packages"]["all"]["require"] = architecture_map[ - self.architecture - ]["base_arch"] - write_yaml(spack_yaml, self.spack_env_dir / "spack.yaml") - - def get_main_package(self) -> str: - """ - Determine the main package for this container (first in the spec list) - """ - main_spec = self.container_yaml["spack"]["specs"][0] - main_package = main_spec.split("~")[0].split("+")[0].strip() - return main_package - - def get_package_version(self, package_name: str) -> str: - """ - Get the version of a package present in the spack lockfile - """ - with open(self.spack_lock, "r") as fp: - spack_lock = json.load(fp) - - logger.debug(f"Looking for package {package_name}") - logger.debug(f"Roots: {spack_lock['roots']}") - name_from_spec = re.compile(r"^\s*([\w-]*)") - spack_hash = next( - root - for root in spack_lock["roots"] - if re.match(name_from_spec, root["spec"])[1] == package_name - )["hash"] - package_version = spack_lock["concrete_specs"][spack_hash]["version"] - - return package_version - - @property - def registry_image_tag(self) -> str: - """ - The tag the container will have once created - main package version followed by architecture, - if not building on main also insert the CI_COMMIT_REF_SLUG - """ - main_package_version = self.get_package_version(self.get_main_package()) - if prod: - tag = f"{main_package_version}" - else: - ci_commit_ref_slug = os.environ.get("CI_COMMIT_REF_SLUG") - tag = f"{main_package_version}__{ci_commit_ref_slug}" - - if self.architecture == "arm64": - logger.info("We want an architecture suffix on arm64") - tag = f"{tag}-{self.architecture}" - return tag - - def _create_build_job(self, builder_image_tag: str) -> Job: - """ - Create the job that will build the container image - """ - build_job = Job( - f"build {self.name} container", - architecture=self.architecture, - needs=[ - { - "pipeline": os.environ.get("CI_PIPELINE_ID"), - "job": f"generate spacktainer jobs for {self.architecture}", - "artifacts": True, - }, - ], - **copy.deepcopy(build_spacktainer_yaml), - ) - - bucket_name = architecture_map[self.architecture]["cache_bucket"]["name"] - access_key_var = architecture_map[self.architecture]["cache_bucket"][ - "keypair_variables" - ]["access_key"] - secret_key_var = architecture_map[self.architecture]["cache_bucket"][ - "keypair_variables" - ]["secret_key"] - buildah_extra_args = [ - f"--label org.opencontainers.image.title={self.name}", - f"--label org.opencontainers.image.version={self.registry_image_tag}", - f"--label ch.epfl.bbpgitlab.spack_lock_sha256={self.spack_lock_checksum}", - f"--label ch.epfl.bbpgitlab.container_checksum={self.container_checksum}", - f'--build-arg MIRROR_AUTH_ARG="--s3-access-key-id ${access_key_var} --s3-access-key-secret ${secret_key_var}"', - f"--build-arg CACHE_BUCKET={bucket_name}", - ] - - if endpoint_url := architecture_map[self.architecture]["cache_bucket"].get( - "endpoint_url" - ): - buildah_extra_args.append( - f'--build-arg MIRROR_URL_ARG="--s3-endpoint-url {endpoint_url}"' - ) - - build_path = "spacktainer" - - build_job.variables["CI_REGISTRY_IMAGE"] = self.registry_image - build_job.variables["REGISTRY_IMAGE_TAG"] = self.registry_image_tag - build_job.variables["SPACK_ENV_DIR"] = str(self.spack_env_dir) - build_job.variables["ARCH"] = self.architecture - build_job.variables["BUILD_PATH"] = build_path - build_job.variables["BUILDAH_EXTRA_ARGS"] += f" {' '.join(buildah_extra_args)}" - - dockerfile = Path(f"{build_path}/Dockerfile") - dockerfile_lines = [ - f"FROM bbpgitlab.epfl.ch:5050/hpc/spacktainers/builder:{builder_image_tag} AS builder", - f"FROM bbpgitlab.epfl.ch:5050/hpc/spacktainers/runtime:{builder_image_tag}", - "# Triggers building the 'builder' image, otherwise it is optimized away", - "COPY --from=builder /etc/debian_version /etc/debian_version", - ] - - if self.spacktainer_yaml: - for filepair in self.spacktainer_yaml["spacktainer"].get("files"): - source, target = filepair.split(":") - dockerfile_lines.append(f'"COPY {source} {target}"') - - build_job.update_before_script( - f"mkdir -p {dockerfile.parent}", - ) - build_job.update_before_script( - [f"echo {line} >> {dockerfile}" for line in dockerfile_lines], append=True - ) - build_job.artifacts = { - "when": "always", - "paths": ["spacktainer/Dockerfile"], - } - - return build_job - - def _create_sif_job(self, build_job: Job | None, singularity_image_tag: str) -> Job: - """ - Create the job that will build and upload the SIF image - """ - create_sif_job = Job( - f"create {self.name} sif file", - architecture=self.architecture, - bucket="infra", - **copy.deepcopy(create_sif_yaml), - ) - if build_job: - create_sif_job.needs.append(build_job.name) - - bucket = architecture_map[self.architecture]["containers_bucket"] - fs_container_path = f"/tmp/{self.container_filename}" - - create_sif_job.variables["CI_REGISTRY_IMAGE"] = self.registry_image - create_sif_job.variables["REGISTRY_IMAGE_TAG"] = self.registry_image_tag - create_sif_job.variables["FS_CONTAINER_PATH"] = fs_container_path - create_sif_job.variables["CONTAINER_NAME"] = self.name - create_sif_job.variables["SPACK_LOCK_SHA256"] = self.spack_lock_checksum - create_sif_job.variables["CONTAINER_CHECKSUM"] = self.container_checksum - create_sif_job.variables["BUCKET"] = bucket["name"] - create_sif_job.variables[ - "S3_CONTAINER_PATH" - ] = f"s3://{bucket['name']}/containers/{self.container_filename}" - - create_sif_job.image = f"bbpgitlab.epfl.ch:5050/hpc/spacktainers/singularitah:{singularity_image_tag}" - create_sif_job.configure_s3cmd() - - return create_sif_job - - def _create_docker_hub_push_job(self, build_job: Job | None) -> Job: - """ - Create the job that will push the container image to docker hub - """ - job = Job( - f"push {self.name}:{self.registry_image_tag} to docker hub", - **copy.deepcopy(docker_hub_push_yaml), - ) - job.variables["CONTAINER_NAME"] = self.name - job.variables["REGISTRY_IMAGE_TAG"] = self.registry_image_tag - job.variables["HUB_REPO_NAMESPACE"] = self.hub_namespace - job.variables["HUB_REPO_NAME"] = self.hub_repo - if build_job: - job.needs.append(build_job.name) - - return job - - def _create_bb5_download_sif_job( - self, create_sif_job: Job | None, s3cmd_version: str - ): - job = Job( - f"download {self.name} SIF to bb5", **copy.deepcopy(bb5_download_sif_yaml) - ) - sif_root = Path("/gpfs/bbp.cscs.ch/ssd/containers/hpc/spacktainers") - sif_file = sif_root / self.container_filename - job.variables["BUCKET"] = architecture_map[self.architecture][ - "containers_bucket" - ]["name"] - job.variables["SIF_FILENAME"] = self.container_filename - job.variables["FULL_SIF_PATH"] = str(sif_file) - job.variables["SPACK_LOCK_CHECKSUM"] = self.spack_lock_checksum - job.variables["CONTAINER_CHECKSUM"] = self.container_checksum - job.variables["S3CMD_VERSION"] = s3cmd_version - if create_sif_job: - job.needs.append(create_sif_job.name) - return job - - def generate( - self, builder_image_tag: str, singularity_image_tag: str, s3cmd_version: str - ) -> Workflow: - """ - Generate the workflow that will build this container, if necessary - """ - - build_job = None - create_sif_job = None - - if self.needs_build(): - build_job = self._create_build_job(builder_image_tag) - self.workflow.add_job(build_job) - - if self.needs_sif_upload(): - create_sif_job = self._create_sif_job(build_job, singularity_image_tag) - self.workflow.add_job(create_sif_job) - - if self.architecture == "amd64": - logger.info("We want the amd64 containers on bb5") - bb5_download_sif_job = self._create_bb5_download_sif_job( - create_sif_job, s3cmd_version - ) - self.workflow.add_job(bb5_download_sif_job) - - if self.needs_docker_hub_push(): - docker_hub_push_job = self._create_docker_hub_push_job(build_job) - self.workflow.add_job(docker_hub_push_job) - - logger.info(f"Workflow stages for {self.name}: {self.workflow.stages}") - return self.workflow - - @cached_property - def spack_lock_checksum(self) -> str: - """ - Calculate the sha256sum of the spack.lock file for this container - """ - with open(self.spack_lock, "r") as fp: - checksum = hashlib.sha256(fp.read().encode()).hexdigest() - - return checksum - - @property - def container_filename(self) -> str: - """ - SIF filename for the container in the S3 bucket - """ - return f"{self.name}__{self.registry_image_tag}.sif" - - def needs_docker_hub_push(self) -> bool: - """ - Check whether the container needs to be pushed to Docker Hub - * repository exists - * tag not present - * checksums mismatch - """ - branch = os.environ.get("CI_COMMIT_REF_SLUG") - if branch != os.environ.get("CI_DEFAULT_BRANCH"): - logger.info("Not on default branch, no need to push to docker hub") - return False - - docker_hub_user = os.environ["DOCKERHUB_USER"] - docker_hub_auth_token = os.environ["DOCKERHUB_PASSWORD"] - dh = docker_hub_login(docker_hub_user, docker_hub_auth_token) - if not docker_hub_repo_exists(dh, self.hub_namespace, self.hub_repo): - logger.info( - f"Docker Hub repository {self.hub_namespace}/{self.hub_repo} does not exist - no need to push" - ) - return False - if not docker_hub_repo_tag_exists( - dh, self.hub_namespace, self.hub_repo, self.registry_image_tag - ): - logger.info( - f"Tag {self.registry_image_tag} does not exist in Docker Hub repo {self.hub_namespace}/{self.hub_repo} - we'll have to push" - ) - return True - - container_info = self.container_info( - "docker.io", - docker_hub_user, - docker_hub_auth_token, - f"{self.hub_namespace}/{self.hub_repo}", - ) - try: - repo_spack_lock_checksum = container_info["Labels"][ - "ch.epfl.bbpgitlab.spack_lock_sha256" - ] - repo_container_checksum = container_info["Labels"][ - "ch.epfl.bbpgitlab.container_checksum" - ] - except KeyError: - logger.info("Missing one of our checksums - need to push to Docker Hub") - return True - - logger.debug(f"existing spack.lock checksum: {repo_spack_lock_checksum}") - logger.debug(f"my spack.lock checksum: {self.spack_lock_checksum}") - - logger.debug(f"existing container checksum: {repo_container_checksum}") - logger.debug(f"my container checksum: {self.container_checksum}") - - if ( - repo_container_checksum == self.container_checksum - and repo_spack_lock_checksum == self.spack_lock_checksum - ): - logger.info( - f"No need to push {self.name}:{self.registry_image_tag} to docker hub" - ) - return False - - logger.info( - f"We'll have to push {self.name}:{self.registry_image_tag} to docker hub" - ) - return True - - def needs_sif_upload(self) -> bool: - """ - Check whether the container needs to be uploaded as a SIF file - """ - bucket = architecture_map[self.architecture]["containers_bucket"] - s3 = self.get_s3_connection(bucket) - try: - object_info = s3.head_object( - Bucket=bucket["name"], - Key=f"containers/{self.container_filename}", - ) - bucket_container_checksum = object_info["ResponseMetadata"][ - "HTTPHeaders" - ].get("x-amz-meta-container-checksum", "container checksum not set") - bucket_spack_sha256 = object_info["ResponseMetadata"]["HTTPHeaders"].get( - "x-amz-meta-spack-lock-sha256", "container spack lock sha256 not set" - ) - except ClientError: - logger.debug(f"No SIF file found for {self.name}") - return True - - if ( - bucket_container_checksum != self.container_checksum - or bucket_spack_sha256 != self.spack_lock_checksum - ): - logger.debug( - f"Rebuild SIF for checksum mismatch: {bucket_container_checksum}/{self.container_checksum} or {bucket_spack_sha256}/{self.spack_lock_checksum}" - ) - return True - - return False - - def needs_build(self) -> bool: - """ - Check whether the container needs building: - * Check whether the container exists - * Check its container_checksum - * Check its spack_sha265 - """ - try: - container_info = self.container_info() - existing_container_checksum = container_info["Labels"][ - "ch.epfl.bbpgitlab.container_checksum" - ] - existing_spack_lock_checksum = container_info["Labels"][ - "ch.epfl.bbpgitlab.spack_lock_sha256" - ] - except ImageNotFoundError as ex: - logger.info(ex) - logger.info(f"Image not found - we'll have to build {self.name}") - return True - - logger.debug(f"existing spack.lock checksum: {existing_spack_lock_checksum}") - logger.debug(f"my spack.lock checksum: {self.spack_lock_checksum}") - - if ( - existing_container_checksum == self.container_checksum - and existing_spack_lock_checksum == self.spack_lock_checksum - ): - logger.info(f"No need to build {self.name}") - return False - - logger.info(f"We'll have to build {self.name}") - return True - - def compose_workflow(self): - raise NotImplementedError("Not applicable for Spackah containers") - - def create_multiarch_job(self): - raise NotImplementedError("Not applicable for Spackah containers") - - -class CustomContainer(BaseContainer): - """ - Custom containers are containers which are not built by us, but which already exist on - docker hub. Write a singularity definition file and place it under the desired architecture, - giving it the name you want your container to have. - """ - - def __init__( - self, - name, - architecture, - ): - self.name = name - self.architecture = architecture - self.architectures = [self.architecture] - - @cached_property - def definition(self) -> List[str]: - """ - Read the definition file and return the content as a list of lines - """ - with open( - f"container_definitions/{self.architecture}/{self.name}.def", "r" - ) as fp: - return fp.readlines() - - def get_source(self) -> tuple[str, str]: - """ - Read the definition file and return the image and tag of the source container image - """ - from_line = next( - line for line in self.definition if line.lower().startswith("from:") - ) - _, image, tag = [x.strip() for x in from_line.split(":")] - - return image, tag - - @property - def registry_image_tag(self) -> str: - """ - Determine the tag the container will have in the registry - In this case, it is taken straight from the source container - """ - _, version = self.get_source() - if prod: - tag = f"{version}__{self.architecture}" - else: - ci_commit_ref_slug = os.environ.get("CI_COMMIT_REF_SLUG") - tag = f"{version}__{ci_commit_ref_slug}__{self.architecture}" - - return tag - - @cached_property - def source_container_checksum(self) -> str: - return self.read_source_container_checksum() - - def read_source_container_checksum(self) -> str: - """ - Inspect the container we're converting to SIF and get its checksum - """ - source_image, source_version = self.get_source() - registry = os.environ.get("CI_REGISTRY") - registry_user = os.environ.get("CI_REGISTRY_USER") - registry_password = os.environ.get("CI_REGISTRY_PASSWORD") - skopeo_login_cmd = [ - "skopeo", - "login", - "-u", - registry_user, - "-p", - registry_password, - registry, - ] - logger.debug(f"Running `{skopeo_login_cmd}`") - subprocess.run(skopeo_login_cmd) - - skopeo_inspect_cmd = [ - "skopeo", - "inspect", - f"--override-arch={self.architecture}", - f"docker://{source_image}:{source_version}", - ] - logger.debug(f"Running `{skopeo_inspect_cmd}`") - result = subprocess.run(skopeo_inspect_cmd, capture_output=True) - - if result.returncode != 0: - raise ImageNotFoundError( - f"Issue with skopeo command: {result.stderr.decode()}" - ) - info = json.loads(result.stdout) - container_checksum = info["Digest"].split(":")[-1] - return container_checksum - - @property - def container_filename(self) -> str: - """ - SIF filename for the container in the S3 bucket - """ - return f"{self.name}__{self.registry_image_tag}.sif" - - def needs_build(self) -> bool: - """ - Check whether the container needs building: - 1. Does the container exist in the bucket? - 2. Compare digest from source with bucket container - - # TODO if necessary, this can probably be refined to a per-job level - instead of the whole chain - """ - bucket = architecture_map[self.architecture]["containers_bucket"] - s3 = self.get_s3_connection(bucket) - try: - object_info = s3.head_object( - Bucket=bucket["name"], - Key=f"containers/{self.container_filename}", - ) - bucket_checksum = object_info["ResponseMetadata"]["HTTPHeaders"][ - "x-amz-meta-digest" - ] - if bucket_checksum != self.source_container_checksum: - logger.debug( - f"{self.name}: local: {self.source_container_checksum}, bucket: {bucket_checksum}" - ) - return True - except ClientError: - logger.debug(f"No container found for {self.name}") - return True - - return False - - def generate(self, singularity_image_tag: str) -> Workflow: - """ - Generate the workflow that will build this container, if necessary - """ - workflow = Workflow() - if self.needs_build(): - build_job = Job( - f"build sif file for {self.name}", - architecture=self.architecture, - force_needs=True, - bucket="infra", - **copy.deepcopy(build_custom_containers_yaml), - ) - - bucket_name = architecture_map[self.architecture]["containers_bucket"][ - "name" - ] - build_job.variables["CONTAINER_FILENAME"] = self.container_filename - build_job.variables[ - "CONTAINER_DEFINITION" - ] = f"container_definitions/{self.architecture}/{self.name}.def" - build_job.variables["SOURCE_DIGEST"] = self.source_container_checksum - build_job.variables[ - "S3_CONTAINER_PATH" - ] = f"s3://{bucket_name}/containers/{self.container_filename}" - build_job.configure_s3cmd() - build_job.image = f"bbpgitlab.epfl.ch:5050/hpc/spacktainers/singularitah:{singularity_image_tag}" - - workflow.add_job(build_job) - - return workflow - - -def generate_base_container_workflow( - singularity_version: str, s3cmd_version: str, architectures: List[str] -) -> Workflow: - """ - Generate the workflow that will build the base containers (builder, runtime, singularitah) - - :param singularity_version: which version of singularity to install in the singularitah container - :param s3cmd_version: which version of s3cmd to install in the singularitah container - :param architectures: which architectures to build for ([amd64, arm64]) - """ - logger.info("Generating base container jobs") - singularitah = Singularitah( - name="singularitah", - singularity_version=singularity_version, - s3cmd_version=s3cmd_version, - build_path="singularitah", - architectures=architectures, - ) - builder = Spacktainerizer( - name="builder", build_path="builder", architectures=architectures - ) - runtime = Spacktainerizer( - name="runtime", build_path="runtime", architectures=architectures - ) - workflow = singularitah.generate() - workflow += builder.generate() - workflow += runtime.generate() - - return workflow - - -def generate_spacktainers_workflow( - architecture: str, out_dir: Path, s3cmd_version: str -) -> Workflow: - """ - Generate the workflow that will build the actual spack-based containers - - :param architecture: which architecture to generate the workflow for (amd64, arm64) - :param out_dir: which directory to put the output into - """ - workflow = Workflow() - builder = Spacktainerizer( - name="builder", build_path="builder", architectures=[architecture] - ) - for container_path in glob.glob(f"container_definitions/{architecture}/*yaml"): - container_name = os.path.splitext(os.path.basename(container_path))[0] - logger.info( - f"Generating workflow for container {container_name} on {architecture}" - ) - - singularitah = Singularitah( - name="singularitah", - singularity_version="", - s3cmd_version="", - build_path="singularitah", - architectures=[architecture], - ) - logger.info(f"Generating job for {container_name}") - container = Spackah( - name=container_name, architecture=architecture, out_dir=out_dir - ) - container_workflow = container.generate( - builder.registry_image_tag, singularitah.registry_image_tag, s3cmd_version - ) - logger.debug( - f"Container {container_name} workflow jobs are {container_workflow.jobs}" - ) - workflow += container_workflow - - for custom_container_path in glob.glob( - f"container_definitions/{architecture}/*def" - ): - custom_container_name = os.path.splitext( - os.path.basename(custom_container_path) - )[0] - logger.info( - f"Generating workflow for custom container {custom_container_name} on {architecture}" - ) - custom = CustomContainer(custom_container_name, architecture) - custom_workflow = custom.generate(singularitah.registry_image_tag) - logger.debug( - f"Container {custom_container_name} workflow jobs are {custom_workflow.jobs}" - ) - workflow += custom_workflow - - logger.debug(f"Workflow jobs are {workflow.jobs}") - if not workflow.jobs: - workflow.add_job( - Job( - name="No containers to rebuild", - script="echo No containers to rebuild", - rules=[ - {"if": "$CI_PIPELINE_SOURCE == 'parent_pipeline'"}, - ], - ) - ) - return workflow diff --git a/job_creator/src/job_creator/job_templates.py b/job_creator/src/job_creator/job_templates.py deleted file mode 100644 index 89abed3..0000000 --- a/job_creator/src/job_creator/job_templates.py +++ /dev/null @@ -1,304 +0,0 @@ -parent_pipeline_rule = { - "rules": [ - {"if": "$CI_PIPELINE_SOURCE == 'parent_pipeline'"}, - ] -} - -buildah_include_yaml = { - "include": [ - {"project": "cs/gitlabci-templates", "file": "/build-image-using-buildah.yml"} - ], -} - -bbp_containerizer_include_yaml = { - "include": [ - { - "project": "nse/bbp-containerizer", - "file": "/python/ci/templates/convert-image.yml", - } - ], -} - -buildah_build_yaml = { - "extends": ".build-image-using-buildah", - "stage": "build base containers", - "timeout": "8h", - "variables": { - "KUBERNETES_CPU_LIMIT": 4, - "KUBERNETES_CPU_REQUEST": 2, - "KUBERNETES_MEMORY_LIMIT": "16Gi", - "KUBERNETES_MEMORY_REQUEST": "4Gi", - "REGISTRY_IMAGE_TAG": "", - "BUILD_PATH": "", - "CI_REGISTRY_IMAGE": "", - "BUILDAH_EXTRA_ARGS": ( - '--label org.opencontainers.image.revision="$CI_COMMIT_SHA"' - ' --label org.opencontainers.image.authors="$GITLAB_USER_NAME <$GITLAB_USER_EMAIL>"' - ' --label org.opencontainers.image.url="$CI_PROJECT_URL"' - ' --label org.opencontainers.image.source="$CI_PROJECT_URL"' - ' --label org.opencontainers.image.created="$CI_JOB_STARTED_AT"' - ' --label ch.epfl.bbpgitlab.ci-pipeline-url="$CI_PIPELINE_URL"' - ' --label ch.epfl.bbpgitlab.ci-commit-branch="$CI_COMMIT_REF_SLUG" ' - ), - }, - **parent_pipeline_rule, -} - -multiarch_yaml = { - "image": "ubuntu:22.04", - "stage": "base containers multiarch", - "script": [ - "apt-get update && apt-get install -y podman", - 'echo "Creating multiarch manifest %REGISTRY_IMAGE%:%REGISTRY_IMAGE_TAG%"', - "podman login -u ${CI_REGISTRY_USER} -p ${CI_REGISTRY_PASSWORD} --tls-verify=false ${CI_REGISTRY}", - "podman manifest create mylist", - 'echo "Adding %REGISTRY_IMAGE%:%REGISTRY_IMAGE_TAG%-arm64"', - "podman manifest add --tls-verify=false mylist %REGISTRY_IMAGE%:%REGISTRY_IMAGE_TAG%-arm64", - 'echo "Adding %REGISTRY_IMAGE%:%REGISTRY_IMAGE_TAG%-amd64"', - "podman manifest add --tls-verify=false mylist %REGISTRY_IMAGE%:%REGISTRY_IMAGE_TAG%-amd64", - "podman manifest push --tls-verify=false mylist %REGISTRY_IMAGE%:%REGISTRY_IMAGE_TAG%", - 'if [[ "$CI_COMMIT_REF_SLUG" == "$CI_DEFAULT_BRANCH" ]]; then', - ' echo "Also creating multiarch manifest for %REGISTRY_IMAGE%:latest multiarch"', - " podman manifest create mylist-latest", - ' echo "Adding %REGISTRY_IMAGE%:latest-arm64"', - " podman manifest add --tls-verify=false mylist-latest %REGISTRY_IMAGE%:latest-arm64", - ' echo "Adding %REGISTRY_IMAGE%:latest-amd64"', - " podman manifest add --tls-verify=false mylist-latest %REGISTRY_IMAGE%:latest-amd64", - " podman manifest push --tls-verify=false mylist-latest %REGISTRY_IMAGE%:latest", - "fi", - ], - **parent_pipeline_rule, -} - -packages_yaml = { - "timeout": "1h", - "stage": "generate build cache population job", - "script": [ - "cat /proc/cpuinfo", - "cat /proc/meminfo", - 'git config --global url."https://gitlab-ci-token:${CI_JOB_TOKEN}@bbpgitlab.epfl.ch/".insteadOf ssh://git@bbpgitlab.epfl.ch/', - ". $SPACK_ROOT/share/spack/setup-env.sh", - "spack arch", - 'spack gpg trust "$SPACK_DEPLOYMENT_KEY_PUBLIC"', - 'spack gpg trust "$SPACK_DEPLOYMENT_KEY_PRIVATE"', - "cat spack.yaml", - "spack env activate --without-view .", - "spack config blame packages", - "spack config blame mirrors", - "spack compiler find", - "spack concretize -f", - 'spack -d ci generate --check-index-only --artifacts-root "${ENV_DIR}" --output-file "${ENV_DIR}/${CI_JOB_NAME}.yml"', - ], - "artifacts": {"when": "always", "paths": ["${ENV_DIR}"]}, - **parent_pipeline_rule, -} - -process_spack_pipeline_yaml = { - "image": "ubuntu:22.04", - "stage": "process spack-generated pipelines", - "script": [ - "apt-get update && apt-get install -y ca-certificates git python3 python3-pip", - "pip install --upgrade pip setuptools", - "pip install -e ./job_creator", - "find ${SPACK_PIPELINES_ARCH_DIR}", - "jc process-spack-pipeline -d ${SPACK_PIPELINES_ARCH_DIR} -o ${OUTPUT_DIR}", - ], - "artifacts": { - "when": "always", - "paths": ["artifacts.*", "*spack_pipeline.yaml", "job_creator.log"], - }, - **parent_pipeline_rule, -} - -clean_cache_yaml = { - "image": "python:3.10-buster", - "timeout": "4h", - "allow_failure": True, - "script": [ - "apt-get update && apt-get install -y git", - "pip install ./spackitor", - "git clone https://github.com/bluebrain/spack", - "spackitor ${SPACK_ENV_ARGS} --bucket ${BUCKET} --max-age ${MAX_AGE} --spack-directory ./spack", - ], - **parent_pipeline_rule, -} - -generate_containers_workflow_yaml = { - "stage": "generate containers workflow", - "variables": { - "KUBERNETES_CPU_LIMIT": 4, - "KUBERNETES_CPU_REQUEST": 2, - "KUBERNETES_MEMORY_LIMIT": "16Gi", - "KUBERNETES_MEMORY_REQUEST": "4Gi", - }, - "script": [ - "apt-get update && apt-get install -y ca-certificates git python3 python3-pip skopeo", - "pip install --upgrade pip setuptools", - "pip install -e ./job_creator", - "jc generate-spacktainer-workflow -a ${ARCHITECTURE} -o ${OUTPUT_DIR} -s ${S3CMD_VERSION}", - ], - "artifacts": { - "when": "always", - "paths": [ - "artifacts.*/*/*/spack.lock", - "artifacts.*/*/*/spack.yaml", - "${OUTPUT_DIR}", - "job_creator.log", - ], - }, - **parent_pipeline_rule, -} - -build_spacktainer_yaml = { - "stage": "build spacktainer containers", - "extends": ".build-image-using-buildah", - "variables": { - "KUBERNETES_CPU_LIMIT": 4, - "KUBERNETES_CPU_REQUEST": 2, - "KUBERNETES_MEMORY_LIMIT": "16Gi", - "KUBERNETES_MEMORY_REQUEST": "4Gi", - "BUILDAH_EXTRA_ARGS": ( - ' --label org.opencontainers.image.revision="$CI_COMMIT_SHA"' - ' --label org.opencontainers.image.authors="$GITLAB_USER_NAME <$GITLAB_USER_EMAIL>"' - ' --label org.opencontainers.image.url="$CI_PROJECT_URL"' - ' --label org.opencontainers.image.source="$CI_PROJECT_URL"' - ' --label org.opencontainers.image.created="$CI_JOB_STARTED_AT"' - ' --label ch.epfl.bbpgitlab.ci-pipeline-url="$CI_PIPELINE_URL"' - ' --label ch.epfl.bbpgitlab.ci-commit-branch="$CI_COMMIT_REF_SLUG"' - ' --build-arg GITLAB_CI="$GITLAB_CI"' - ' --build-arg CI_JOB_TOKEN="$CI_JOB_TOKEN"' - ), - }, - "before_script": [ - "mkdir -p ${BUILD_PATH}", - "cp $SPACK_ENV_DIR/spack.yaml ${BUILD_PATH}/", - ], - **parent_pipeline_rule, -} - -create_sif_yaml = { - "stage": "create SIF files", - "variables": { - "KUBERNETES_CPU_LIMIT": 4, - "KUBERNETES_CPU_REQUEST": 2, - "KUBERNETES_MEMORY_LIMIT": "16Gi", - "KUBERNETES_MEMORY_REQUEST": "4Gi", - }, - "script": [ - "/bin/bash", - "cat /root/.s3cfg", - "ps", - "export SINGULARITY_DOCKER_USERNAME=${CI_REGISTRY_USER}", - "export SINGULARITY_DOCKER_PASSWORD=${CI_JOB_TOKEN}", - 'singularity pull --no-https "${FS_CONTAINER_PATH}" "docker://${CI_REGISTRY_IMAGE}:${REGISTRY_IMAGE_TAG}"', - "set +e", - "container_info=$(s3cmd info ${S3_CONTAINER_PATH}); retval=$?", - "echo $retval", - "set -e", - "if [[ ${retval} -ne 0 ]]; then", - " echo ${S3_CONTAINER_PATH} does not exist yet - deleting old versions and uploading", - " for existing_sif in $(s3cmd ls s3://${BUCKET}/containers/${CONTAINER_NAME}__ | awk '{print $4}'); do", - " LAST_MOD=$(s3cmd info ${existing_sif} | awk '/^\s+Last mod:/' | tr -d ':')", - " echo last mod is ${LAST_MOD}", - " remove=$(python -c \"from datetime import datetime, timedelta; print(datetime.strptime('${LAST_MOD}'.strip(), 'Last mod %a, %d %b %Y %H%M%S %Z') < datetime.now() - timedelta(weeks=1))\")", - " echo remove is ${remove}", - ' if [ "${remove}" == "True" ]; then', - " echo Removing ${existing_sif}", - " s3cmd rm ${existing_sif}", - " else", - " echo ${existing_sif} is less than a week old - keeping it for now as it might still be in use.", - " fi" " done", - " echo Uploading", - " s3cmd put --add-header x-amz-meta-container-checksum:${CONTAINER_CHECKSUM} --add-header x-amz-meta-spack-lock-sha256:${SPACK_LOCK_SHA256} ${FS_CONTAINER_PATH} ${S3_CONTAINER_PATH}", - "else", - " echo ${S3_CONTAINER_PATH} exists - checking sha256sum", - " bucket_spack_lock_sha256=$(echo ${container_info} | awk -F':' '/x-amz-meta-spack-lock-sha256/ {print $2}' | sed 's/ //g')", - " bucket_container_checksum=$(echo ${container_info} | awk -F':' '/x-amz-meta-container-checksum/ {print $2}' | sed 's/ //g')", - ' echo "Bucket spack lock sha256 is ${bucket_spack_lock_sha256} (expected ${SPACK_LOCK_SHA256})"', - ' echo "Bucket container checksum is ${bucket_container_checksum} (expected ${CONTAINER_CHECKSUM})"', - ' if [[ "${CONTAINER_CHECKSUM}" != "${bucket_container_checksum}" ]] || [[ "${SPACK_LOCK_SHA256}" != "${bucket_spack_lock_sha256}" ]]; then', - " echo checksum mismatch - re-uploading", - " s3cmd put --add-header x-amz-meta-container-checksum:${CONTAINER_CHECKSUM} --add-header x-amz-meta-spack-lock-sha256:${SPACK_LOCK_SHA256} ${FS_CONTAINER_PATH} ${S3_CONTAINER_PATH}", - " else", - " echo checksums match - nothing to do here", - " fi", - "fi", - ], - **parent_pipeline_rule, -} - -build_custom_containers_yaml = { - "stage": "create SIF files", - "variables": { - "KUBERNETES_CPU_LIMIT": 4, - "KUBERNETES_CPU_REQUEST": 2, - "KUBERNETES_MEMORY_LIMIT": "16Gi", - "KUBERNETES_MEMORY_REQUEST": "4Gi", - }, - "script": [ - "cat /root/.s3cfg", - "echo Building SIF", - "singularity build ${CONTAINER_FILENAME} ${CONTAINER_DEFINITION}", - "echo Uploading ${CONTAINER_FILENAME} to ${S3_CONTAINER_PATH}", - "s3cmd put --add-header x-amz-meta-digest:${SOURCE_DIGEST} ${CONTAINER_FILENAME} ${S3_CONTAINER_PATH}", - ], - **parent_pipeline_rule, -} - -docker_hub_push_yaml = { - "stage": "push to docker hub", - "image": "ubuntu:22.04", - "variables": { - "timeout": "4h", - }, - "script": [ - "apt-get update", - "apt-get install -y ca-certificates podman", - "podman login -u ${CI_REGISTRY_USER} -p ${CI_REGISTRY_PASSWORD} --tls-verify=false ${CI_REGISTRY}", - "podman login -u ${DOCKERHUB_USER} -p ${DOCKERHUB_PASSWORD} --tls-verify=false docker.io", - "podman pull ${CI_REGISTRY_IMAGE}/${CONTAINER_NAME}:${REGISTRY_IMAGE_TAG}", - "echo podman push ${CONTAINER_NAME}:${REGISTRY_IMAGE_TAG} docker://docker.io/bluebrain/${HUB_REPO_NAME}:${REGISTRY_IMAGE_TAG}", - "podman image list", - "echo Pushing, possibly twice because podman sometimes fails on the first attempt", - "podman push ${CONTAINER_NAME}:${REGISTRY_IMAGE_TAG} docker://docker.io/bluebrain/${HUB_REPO_NAME}:${REGISTRY_IMAGE_TAG} || podman --log-level=debug push ${CONTAINER_NAME}:${REGISTRY_IMAGE_TAG} docker://docker.io/bluebrain/${HUB_REPO_NAME}:${REGISTRY_IMAGE_TAG}", - ], - **parent_pipeline_rule, -} - -bb5_download_sif_yaml = { - "stage": "download SIF to bb5", - "tags": ["bb5_map"], - "script": [ - "module load unstable singularityce", - "if [ -e ${FULL_SIF_PATH} ]; then", - " EXISTING_SPACK_LOCK_CHECKSUM=$(singularity inspect ${FULL_SIF_PATH} | awk '/spack_lock_sha256/ {print $2}')", - " EXISTING_CONTAINER_CHECKSUM=$(singularity inspect ${FULL_SIF_PATH} | awk '/container_checksum/ {print $2}')", - " if [[ ${SPACK_LOCK_CHECKSUM} == ${EXISTING_SPACK_LOCK_CHECKSUM} ]] && [[ ${CONTAINER_CHECKSUM} == ${EXISTING_CONTAINER_CHECKSUM} ]]; then", - " echo ${FULL_SIF_PATH} 'exists and checksums match, nothing to do here'", - " exit 0", - " else", - " echo ${FULL_SIF_PATH} 'exists but checksums mismatch, re-downloading'", - " echo ${EXISTING_SPACK_LOCK_CHECKSUM} vs ${SPACK_LOCK_CHECKSUM}", - " echo ${EXISTING_CONTAINER_CHECKSUM} vs ${CONTAINER_CHECKSUM}", - " echo Removing ${FULL_SIF_PATH}", - " rm ${FULL_SIF_PATH}", - " fi", - "fi", - "echo Configuring s3cmd", - "sed -i 's/^access_key.*/access_key='${AWS_INFRASTRUCTURE_ACCESS_KEY_ID}'/' _s3cfg", - "sed -i 's/^secret_key.*/secret_key='${AWS_INFRASTRUCTURE_SECRET_ACCESS_KEY}'/' _s3cfg", - "let length=$(($(echo $(expr index ${HTTP_PROXY:7} :)) - 1))", - "PROXY_HOST=${HTTP_PROXY:7:${length}}", - "PROXY_PORT=${HTTP_PROXY:$((7+${length}+1))}", - "sed -i 's/^proxy_host.*/proxy_host='${PROXY_HOST}'/' _s3cfg", - "sed -i 's/^proxy_port.*/proxy_port='${PROXY_PORT}'/' _s3cfg", - "cat _s3cfg", - "echo Downloading s3cmd", - "wget https://github.com/s3tools/s3cmd/releases/download/v${S3CMD_VERSION}/s3cmd-${S3CMD_VERSION}.tar.gz", - "tar xf s3cmd-${S3CMD_VERSION}.tar.gz", - "export PATH=$(realpath ./s3cmd-${S3CMD_VERSION}):$PATH", - "echo s3cmd get --config=_s3cfg s3://${BUCKET}/containers/${SIF_FILENAME} ${FULL_SIF_PATH}", - "s3cmd get --config=_s3cfg s3://${BUCKET}/containers/${SIF_FILENAME} ${FULL_SIF_PATH}", - ], - **parent_pipeline_rule, -} diff --git a/job_creator/src/job_creator/logging_config.py b/job_creator/src/job_creator/logging_config.py deleted file mode 100644 index 9e98472..0000000 --- a/job_creator/src/job_creator/logging_config.py +++ /dev/null @@ -1,24 +0,0 @@ -LOGGING_CONFIG = { - "version": 1, - "disable_existing_loggers": False, - "formatters": {"fmt": {"format": "[%(asctime)s] [%(levelname)s] %(msg)s"}}, - "handlers": { - "sh": { - "class": "logging.StreamHandler", - "level": "INFO", - "formatter": "fmt", - }, - "fh": { - "class": "logging.handlers.RotatingFileHandler", - "level": "DEBUG", - "formatter": "fmt", - "filename": "job_creator.log", - }, - }, - "loggers": { - "job_creator": { - "level": "DEBUG", - "handlers": ["sh", "fh"] - } - } -} diff --git a/job_creator/src/job_creator/packages.py b/job_creator/src/job_creator/packages.py deleted file mode 100644 index 0ff11af..0000000 --- a/job_creator/src/job_creator/packages.py +++ /dev/null @@ -1,181 +0,0 @@ -import copy -import logging -import logging.config -import os -from datetime import datetime -from pathlib import Path -from typing import Dict, List, Tuple - -from ruamel.yaml import YAML - -from job_creator.architectures import architecture_map -from job_creator.ci_objects import Job, Workflow -from job_creator.job_templates import (packages_yaml, - process_spack_pipeline_yaml) -from job_creator.logging_config import LOGGING_CONFIG -from job_creator.utils import merge_dicts - -logging.config.dictConfig(LOGGING_CONFIG) -logger = logging.getLogger("job_creator") - - -def read_container_definitions(arch: str) -> Tuple[str, Dict]: - """ - Read and iterate through all container definitions, returning a tuple containing - the container name and its definition - """ - yaml = YAML(typ="safe", pure=True) - arch_folder = Path(f"container_definitions/{arch}/") - for df in arch_folder.glob(f"*.yaml"): - logger.debug(f"Reading file {df.name}") - with open(df, "r") as fp: - yield df.stem, yaml.load(fp) - - -def process_spack_yaml( - container_name: str, container_definition: Dict, architecture: str -) -> None: - """ - Create the full spack.yaml needed to build a container - """ - yaml = YAML(typ="safe", pure=True) - with open("spack.yaml", "r") as fp: - spack = yaml.load(fp) - - spack["spack"]["specs"] = container_definition["spack"]["specs"] - if package_restrictions := container_definition["spack"].get("packages"): - merge_dicts(spack, {"spack": {"packages": package_restrictions}}) - - for section in spack["spack"]["ci"]["pipeline-gen"]: - for key in section: - section[key]["tags"] = [architecture_map[architecture]["tag"]] - section[key]["image"] = builder_image() - section[key]["image"]["entrypoint"] = [""] - - spack["spack"]["mirrors"][ - "bbpS3_upload" - ] = f"s3://{architecture_map[architecture]['cache_bucket']['name']}" - for package, pkg_conf in spack["spack"]["packages"].items(): - if pkg_conf.get("require") == "%BASE_ARCH%": - pkg_conf["require"] = architecture_map[architecture]["base_arch"] - - yaml = YAML() - yaml.indent(mapping=2, sequence=4, offset=2) - yaml.width = 120 - with open(f"merged_spack_{container_name}_{architecture}.yaml", "w") as fp: - yaml.dump(spack, fp) - - -def builder_image(): - """ - Return the builder image as it needs to appear in the pipeline yaml - """ - current_branch = os.environ.get("CI_COMMIT_REF_SLUG") - if current_branch == os.environ.get("CI_DEFAULT_BRANCH"): - image_tag = "latest" - else: - today = datetime.strftime(datetime.today(), "%Y.%m.%d") - image_tag = f"{today}-{current_branch}" - image = { - "name": f"bbpgitlab.epfl.ch:5050/hpc/spacktainers/builder:{image_tag}", - "pull_policy": "always", - } - - return image - - -def generate_process_spack_jobs(architectures, cache_population_job_names): - """ - Generate the job that will process the spack-produced jobs - """ - workflow = Workflow() - for architecture in architectures: - job = Job( - "process spack pipeline", - architecture=architecture, - **copy.deepcopy(process_spack_pipeline_yaml), - ) - for job_name in cache_population_job_names[architecture]: - job.needs.append( - { - "job": job_name, - "artifacts": True, - }, - ) - job.variables.update( - { - "SPACK_PIPELINES_ARCH_DIR": f"jobs_scratch_dir.{architecture}", - "OUTPUT_DIR": f"artifacts.{architecture}", - } - ) - workflow.add_job(job) - - return workflow - - -def generate_packages_workflow(architectures): - """ - Generate the job that will run `spack ci generate` - """ - logger.info("Generating packages jobs") - workflow = Workflow() - cache_population_job_names = {arch: [] for arch in architectures} - - for architecture in architectures: - logger.info( - f"Generating generate build cache population jobs for {architecture}" - ) - for container_name, container_definition in read_container_definitions( - architecture - ): - logger.info( - f"Generating generate build cache population job for {container_name}" - ) - packages_job = Job( - f"generate build cache population job for {container_name}", - architecture=architecture, - **copy.deepcopy(packages_yaml), - ) - cache_population_job_names[architecture].append(packages_job.name) - logger.debug("Adding build cache-related variables") - packages_job.variables["SPACK_BUILD_CACHE_BUCKET"] = architecture_map[ - architecture - ]["cache_bucket"]["name"] - packages_job.variables[ - "ENV_DIR" - ] = f"${{CI_PROJECT_DIR}}/jobs_scratch_dir.{architecture}/{container_name}/" - packages_job.variables["CONTAINER_NAME"] = container_name - packages_job.variables.update( - architecture_map[architecture].get("variables", {}) - ) - logger.debug("Adding tags, image and needs") - packages_job.image = builder_image() - packages_job.needs.append( - { - "pipeline": os.environ.get("CI_PIPELINE_ID"), - "job": "generate base pipeline", - "artifacts": True, - } - ) - logger.debug("Keypair variables") - packages_job.add_spack_mirror() - packages_job.set_aws_variables() - - logger.debug(f"Adding rename merged_spack_{architecture}.yaml command") - packages_job.update_before_script( - f"mv merged_spack_{container_name}_{architecture}.yaml spack.yaml", - append=True, - ) - logger.debug("Generating spack.yaml for containers") - logger.debug(f"{container_name} definition: {container_definition}") - process_spack_yaml( - container_name, - container_definition, - architecture, - ) - workflow.add_job(packages_job) - - logger.debug("Generating job to process spack-generated yaml") - workflow += generate_process_spack_jobs(architectures, cache_population_job_names) - - return workflow, cache_population_job_names diff --git a/job_creator/src/job_creator/spack_template.py b/job_creator/src/job_creator/spack_template.py deleted file mode 100644 index fc53aca..0000000 --- a/job_creator/src/job_creator/spack_template.py +++ /dev/null @@ -1,6 +0,0 @@ -from job_creator.utils import load_yaml - -spack_template = load_yaml("spack.yaml") -spack_template["spack"]["config"]["install_tree"] = "/opt/software" -spack_template["spack"]["view"] = "/opt/view" -spack_template["spack"].pop("specs") diff --git a/job_creator/src/job_creator/utils.py b/job_creator/src/job_creator/utils.py deleted file mode 100644 index a651c73..0000000 --- a/job_creator/src/job_creator/utils.py +++ /dev/null @@ -1,144 +0,0 @@ -import json -import logging -import logging.config -from pathlib import Path - -import requests -import ruamel.yaml - -from job_creator.logging_config import LOGGING_CONFIG - -logging.config.dictConfig(LOGGING_CONFIG) -logger = logging.getLogger("job_creator") - -DOCKER_HUB_API = "https://hub.docker.com/v2" - - -class NonAliasingRoundTripRepresenter(ruamel.yaml.representer.RoundTripRepresenter): - def ignore_aliases(self, data): - return True - - -def load_yaml(path): - yaml = ruamel.yaml.YAML(typ="safe") - with open(path, "r") as fp: - loaded = yaml.load(fp) - - return loaded - - -def write_yaml(content, path): - yaml = ruamel.yaml.YAML() - yaml.Representer = NonAliasingRoundTripRepresenter - yaml.indent(mapping=2, sequence=4, offset=2) - yaml.width = 120 - yaml.default_flow_style = False - yaml.default_style = '"' - with open(path, "w") as fp: - yaml.dump(content, fp) - - -def merge_dicts(a, b, path=None): - """Merges b into a - - :param a: dict to merge into - :param b: dict to merge into a - :param path: where we are in the merge, for error reporting - - :returns: dictionary a with values from b merged in - :rtype: dict - """ - path = [] if path is None else path - for key in b: - if key in a: - if isinstance(a[key], dict) and isinstance(b[key], dict): - merge_dicts(a[key], b[key], path + [str(key)]) - elif a[key] == b[key]: - pass # same leaf value - elif isinstance(a[key], list) and isinstance(b[key], list): - a[key].extend(b[key]) - else: - raise Exception("Conflict at %s" % ".".join(path + [str(key)])) - else: - a[key] = b[key] - return a - - -def get_architectures(): - """ - Retrieve the architectures that need to be built based on the container definitions - """ - architectures = [ - archdir.name for archdir in Path("container_definitions").glob("[!_]*") - ] - - return architectures - - -def get_arch_or_multiarch_job(workflow, architecture, container_name="builder"): - """ - Given a workflow and the name of a container (in practice, this will usually be builder), - this method will return a list with either the build job or, if applicable, the multiarch job. - If the container doesn't need to be built, will return an empty list - """ - - multiarch_job_name = f"create multiarch for {container_name}" - builder_job_name = f"build {container_name}" - - logger.debug(f"Getting {container_name} build jobs in {workflow.jobs}") - if multiarch_jobs := workflow.get_job(multiarch_job_name, startswith=True): - logger.debug(f"Multi-arch jobs found: {multiarch_jobs}") - return multiarch_jobs - elif build_jobs := workflow.get_job(builder_job_name, startswith=True): - logger.debug(f"Build jobs found: {build_jobs}") - return build_jobs - else: - return [] - - -def docker_hub_login(username: str, auth_token: str) -> requests.Session: - """ - Login to docker hub and return a session with the correct headers set - """ - - session = requests.Session() - response = session.post( - "https://hub.docker.com/v2/users/login/", - data=json.dumps({"username": username, "password": auth_token}), - headers={"Content-Type": "application/json"}, - ) - auth_token = response.json()["token"] - session.headers = { - "Content-Type": "application/json", - "Authorization": f"JWT {auth_token}", - } - - return session - - -def docker_hub_repo_exists(session: requests.Session, namespace: str, repo: str) -> bool: - """ - Check whether a docker hub repository exists - - :param session: an authenticated Session to docker hub - :param namespace: the namespace in which the repository is to be found - :param repo: the name of the repository to check - """ - - response = session.get(f"{DOCKER_HUB_API}/namespaces/{namespace}/repositories/{repo}/") - return response.status_code == 200 - - -def docker_hub_repo_tag_exists(session: requests.Session, namespace: str, repo: str, tag: str) -> bool: - """ - Check whether a tag exists in a given docker hub repository - - :param session: an authenticated Session to docker hub - :param namespace: the namespace in which the repository is to be found - :param repo: the name of the repository - :param tag: the tag to check - """ - - response = session.get(f"{DOCKER_HUB_API}/namespaces/{namespace}/repositories/{repo}/tags/{tag}") - - return response.status_code == 200 diff --git a/singularitah/Dockerfile b/singularitah/Dockerfile deleted file mode 100644 index 6a364f6..0000000 --- a/singularitah/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM alpine:3.18 -# https://github.com/mattn/go-sqlite3/issues/1164#issuecomment-1848677118 - -ARG SINGULARITY_VERSION -ARG S3CMD_VERSION - -RUN apk add autoconf automake bash fuse3-dev gcc git glib-dev libc-dev libseccomp-dev libtool linux-headers make py3-dateutil squashfs-tools go wget -RUN wget https://github.com/sylabs/singularity/releases/download/v${SINGULARITY_VERSION}/singularity-ce-${SINGULARITY_VERSION}.tar.gz -RUN tar xf singularity-ce-${SINGULARITY_VERSION}.tar.gz -RUN cd singularity-ce-${SINGULARITY_VERSION} && \ - ./mconfig && \ - cd builddir && \ - make && \ - make install -RUN singularity --version - -RUN mkdir /opt/s3cmd -COPY _s3cfg /root/.s3cfg -RUN cat /root/.s3cfg -RUN wget https://github.com/s3tools/s3cmd/releases/download/v${S3CMD_VERSION}/s3cmd-${S3CMD_VERSION}.tar.gz -RUN tar xf s3cmd-${S3CMD_VERSION}.tar.gz -C /opt/s3cmd/ --strip-components=1 -ENV PATH="${PATH}:/opt/s3cmd" - -ENTRYPOINT ["/bin/bash", "-c"] diff --git a/singularitah/_s3cfg b/singularitah/_s3cfg deleted file mode 100644 index 5afec2f..0000000 --- a/singularitah/_s3cfg +++ /dev/null @@ -1,87 +0,0 @@ -[default] -access_key = -access_token = -add_encoding_exts = -add_headers = -bucket_location = us-east-1 -ca_certs_file = -cache_file = -check_ssl_certificate = True -check_ssl_hostname = True -cloudfront_host = cloudfront.amazonaws.com -connection_max_age = 5 -connection_pooling = True -content_disposition = -content_type = -default_mime_type = binary/octet-stream -delay_updates = False -delete_after = False -delete_after_fetch = False -delete_removed = False -dry_run = False -enable_multipart = True -encoding = UTF-8 -encrypt = False -expiry_date = -expiry_days = -expiry_prefix = -follow_symlinks = False -force = False -get_continue = False -gpg_command = /usr/bin/gpg -gpg_decrypt = %(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s -gpg_encrypt = %(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s -gpg_passphrase = -guess_mime_type = True -host_base = s3.amazonaws.com -host_bucket = %(bucket)s.s3.amazonaws.com -human_readable_sizes = False -invalidate_default_index_on_cf = False -invalidate_default_index_root_on_cf = True -invalidate_on_cf = False -kms_key = -limit = -1 -limitrate = 0 -list_md5 = False -log_target_prefix = -long_listing = False -max_delete = -1 -mime_type = -multipart_chunk_size_mb = 15 -multipart_copy_chunk_size_mb = 1024 -multipart_max_chunks = 10000 -preserve_attrs = True -progress_meter = True -proxy_host = -proxy_port = 0 -public_url_use_https = False -put_continue = False -recursive = False -recv_chunk = 65536 -reduced_redundancy = False -requester_pays = False -restore_days = 1 -restore_priority = Standard -secret_key = -send_chunk = 65536 -server_side_encryption = False -signature_v2 = False -signurl_use_https = False -simpledb_host = sdb.amazonaws.com -skip_existing = False -socket_timeout = 300 -ssl_client_cert_file = -ssl_client_key_file = -stats = False -stop_on_error = False -storage_class = -throttle_max = 100 -upload_id = -urlencoding_mode = normal -use_http_expect = False -use_https = True -use_mime_magic = True -verbosity = WARNING -website_endpoint = http://%(bucket)s.s3-website-%(location)s.amazonaws.com/ -website_error = -website_index = index.html diff --git a/spackitor/README.md b/spackitor/README.md deleted file mode 100644 index 2b10a07..0000000 --- a/spackitor/README.md +++ /dev/null @@ -1,111 +0,0 @@ -# Spackitor - -Spackitor is the Spack Janitor - this is the tool that cleans up the build cache. It can do so in two ways: - - 1. Either you give it a (list of) spack environment file(s). In that case it will clean anything that is not specified in them and that is older than the specified time. - 2. If you don't give it (a) spack environment file(s), it will simply remove anything older than the specified time. - -Anything that depends on a deleted object will also be deleted. -Spackitor cleanup is simply a matter of removing S3 objects (both the `.spack` object and the `.sig` object). The pipeline itself takes care of updating the index file. - - -## The `scripts` dir - -Scripts in this directory are written as helpers and may or may not be documented. Use at your own risk and make sure to read and edit them for your use case! - -### s3.py - -Some useful operations I've found myself doing on the buckets. - -```python -from s3 import browser -``` - -The browser object has an s3_client property, saving you the trouble of creating it yourself, and various handy functions for bucket cleanup. -Make sure you have `~/.ssh/buildcache_keys` with two lines: first line is the access key, second line is the secret key. -Most methods should have a docstring that is clear enough. - -As an example, we'll purge all the `dev` versions of `py-bbp-workflow` in the production bucket. Note that after an operation like this, you *must* rebuild the cache (see below). - -```python -In [1]: from s3 import browser -In [2]: package_objects = browser.find_package_objects("py-bbp-workflow", bucket="spack-build-cache") -In [3]: browser.s3_client.delete_objects(Bucket="spack-build-cache", Delete={"Objects": [{"Key": x["Key"]} for x in package_objects if ".dev" in x["Key"]]}) -Out[3]: -{'ResponseMetadata': {'RequestId': '1694685187482502', - 'HostId': '12763010', - 'HTTPStatusCode': 200, - 'HTTPHeaders': {'date': 'Thu, 14 Sep 2023 12:48:31 GMT', - 'content-type': 'application/xml', - 'transfer-encoding': 'chunked', - 'connection': 'keep-alive', - 'server': 'StorageGRID/11.6.0.7', - 'x-amz-request-id': '1694685187482502', - 'x-amz-id-2': '12763010', - 'x-ntap-sg-trace-id': '85e822f5e0207319'}, - 'RetryAttempts': 0}, - 'Deleted': [{'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev2-6unhycokwejx7iblm4z3lhi5vxxiq5xj.spec.json.sig'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev2-oc76gh2y3mt6impaal7vc222uxdrfvol.spec.json.sig'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev2-xemjhuhcwrc65aweq6zuim3gxwropzzw.spec.json.sig'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev3-ajsokhwhsb75v4zkpj2fjd75hzf5ha2q.spec.json.sig'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev3-amobtu74jahfcsnkzokt4hlohyxfepj2.spec.json.sig'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev3-f3fe5avblvdap7x35toopdrwoojs2wb2.spec.json.sig'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev3-osibvkeqa2exoxeltfbjtefk7dz256ix.spec.json.sig'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev3-xi6vf4zrpmjl2bt76zlu22iq4sr4gdmp.spec.json.sig'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.33.dev1-gzdxkjmp2y3wqhnyyhltrczlgci3u6pd.spec.json.sig'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.33.dev2-k2tiw5alqu2jovbhrikwb4liaa5avs3z.spec.json.sig'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.33.dev2-meyyfo45s27ztuuyolv5w5crfbwlytu4.spec.json.sig'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/py-bbp-workflow-3.1.32.dev2/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev2-6unhycokwejx7iblm4z3lhi5vxxiq5xj.spack'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/py-bbp-workflow-3.1.32.dev2/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev2-oc76gh2y3mt6impaal7vc222uxdrfvol.spack'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/py-bbp-workflow-3.1.32.dev2/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev2-xemjhuhcwrc65aweq6zuim3gxwropzzw.spack'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/py-bbp-workflow-3.1.32.dev3/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev3-ajsokhwhsb75v4zkpj2fjd75hzf5ha2q.spack'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/py-bbp-workflow-3.1.32.dev3/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev3-amobtu74jahfcsnkzokt4hlohyxfepj2.spack'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/py-bbp-workflow-3.1.32.dev3/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev3-f3fe5avblvdap7x35toopdrwoojs2wb2.spack'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/py-bbp-workflow-3.1.32.dev3/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev3-osibvkeqa2exoxeltfbjtefk7dz256ix.spack'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/py-bbp-workflow-3.1.32.dev3/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.32.dev3-xi6vf4zrpmjl2bt76zlu22iq4sr4gdmp.spack'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/py-bbp-workflow-3.1.33.dev1/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.33.dev1-gzdxkjmp2y3wqhnyyhltrczlgci3u6pd.spack'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/py-bbp-workflow-3.1.33.dev2/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.33.dev2-k2tiw5alqu2jovbhrikwb4liaa5avs3z.spack'}, - {'Key': 'build_cache/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/py-bbp-workflow-3.1.33.dev2/linux-ubuntu22.04-x86_64_v3-gcc-12.3.0-py-bbp-workflow-3.1.33.dev2-meyyfo45s27ztuuyolv5w5crfbwlytu4.spack'}]} -``` - - ---- -A note on rebuilding the index. -You don't have to do both, just the one which is relevant. You need: - * AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY shell variables - * The bucket configured as a spack mirror (otherwise spack tries to go to Amazon S3 instead of our own endpoint) - * This git diff: - -``` -diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py -index 8ceeeea738..1d4020a66f 100644 ---- a/lib/spack/spack/binary_distribution.py -+++ b/lib/spack/spack/binary_distribution.py -@@ -986,6 +986,8 @@ def file_read_method(file_path): - sync_command_args = [ - "s3", - "sync", -+ "--endpoint-url", -+ "https://bbpobjectstorage.epfl.ch", - "--exclude", - "*", - "--include", -``` - -```bash -export AWS_ACCESS_KEY_ID=... -export AWS_SECRET_ACCESS_KEY=... -spack mirror add --s3-endpoint-url https://bbpobjectstorage.epfl.ch bbpS3 s3://spack-build-cache # if not done yet -spack mirror add --s3-endpoint-url https://bbpobjectstorage.epfl.ch bbpS3-dev s3://spack-build-cache-dev # if not done yet -spack buildcache update-index -d s3://spack-build-cache -spack buildcache update-index -d s3://spack-build-cache-dev -``` ---- - -### purge_dependents.py - -This script will purge a package and everything that depends on it from the specified cache. Don't forget to rebuild the index afterwards! (see the note above) - -It's also advisable to run the spack-cacher pipeline from `main` after doing this, as a lot of packages will have to be rebuilt. - -It uses the `s3.py` script under the hood, so refer to the documentation higher in this file on how to specify credentials. diff --git a/spackitor/pyproject.toml b/spackitor/pyproject.toml deleted file mode 100644 index d44631a..0000000 --- a/spackitor/pyproject.toml +++ /dev/null @@ -1,27 +0,0 @@ -[build-system] -requires = ["setuptools"] -build_backend = "setuptools" - -[project] -name = "spackitor" -description = "Spack janitor to clean the binary build cache" -version = "0.2.0" -authors = [ { name = "Erik Heeren", email = "erik.heeren@epfl.ch" } ] -dependencies = ["boto3", "click"] -classifiers = [ - "Development Status :: 3 - Alpha", - "License :: OSI Approved :: MIT License", - "Natural Language :: English", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3.10", -] - -[project.optional-dependencies] -test = [ - "pytest", - "coverage" -] - -[project.scripts] -spackitor = "spackitor.__main__:clean_cache" diff --git a/spackitor/scripts/purge_checksum.py b/spackitor/scripts/purge_checksum.py deleted file mode 100644 index 072ee3b..0000000 --- a/spackitor/scripts/purge_checksum.py +++ /dev/null @@ -1,54 +0,0 @@ -import json -from io import BytesIO - -import boto3 -from botocore.exceptions import ClientError - -from src import spackitor - -BUCKET = "spack-build-cache" -S3_PAGINATION = 1000 - - -def main(string_to_purge): - """ - Purge all objects which contain a given string - """ - session = boto3.session.Session() - s3_client = session.client( - service_name="s3", - endpoint_url="https://bbpobjectstorage.epfl.ch", - aws_access_key_id="QNJQ73E6O6HQICUIAM8B", - aws_secret_access_key="TOnJkQX3Uorex1OPfqTEY3P8lAX1Y2ipFdLSNDhx", - ) - objects = { - "IsTruncated": True, - "NextContinuationToken": "", - "Contents": [{"Key": ""}], - } - - all_objects = [] - delete_from_bucket = [] - keep_in_bucket = [] - - while objects["IsTruncated"]: - objects = s3_client.list_objects_v2(Bucket=BUCKET, ContinuationToken=objects["NextContinuationToken"], StartAfter=objects["Contents"][-1]["Key"]) - for s3_object in objects["Contents"]: - all_objects.append(s3_object["Key"]) - - for s3_object_key in all_objects: - if string_to_purge in s3_object_key: - print(f"Object {s3_object_key} contains {string_to_purge} - removing") - delete_from_bucket.append({"Key": s3_object["Key"]}) - - print(f"Will remove {len(delete_from_bucket)} objects.") - - for keys_to_delete in spackitor.split_list(delete_from_bucket, S3_PAGINATION): - spackitor.delete(s3_client, BUCKET, {"Objects": keys_to_delete}) - - -if __name__ == "__main__": - main("l3xgawhlb2tdgsz3mhmmd7bjlrgfv6oi") - main("7dy2hyb3qr44s4qi7de4v2fjj3xisfeq") - main("rrlt4yla3jv52ye3mwiyre7tts2hziro") - main("isnguizzrzkmyxbfhzgrkcbpo5wvw45b") diff --git a/spackitor/scripts/purge_dependents.py b/spackitor/scripts/purge_dependents.py deleted file mode 100755 index 5d5d292..0000000 --- a/spackitor/scripts/purge_dependents.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python -from argparse import ArgumentParser - -from s3 import Browser - -""" -Script to purge everything that depends on a package, all the way to the top. -There's probably a lot of duplicate work being done, to be cleaned up. -""" - - -def humanize_checksum(index, checksum): - return f"{index['database']['installs'][checksum]['spec']['name']} ({checksum})" - - -def get_sub_dependents(index, dependents): - """ - Dependents is a list of hashes - """ - - all_sub_dependents = dependents.copy() - - for dependent in dependents: - print(f" * {humanize_checksum(index, dependent)}") - sub_dependents = [ - install - for install in index["database"]["installs"] - if any( - x["hash"] == dependent - for x in index["database"]["installs"][install]["spec"].get( - "dependencies", [] - ) - ) - ] - - if sub_dependents: - humanized_sub_dependents = [ - humanize_checksum(index, sub_dependent) - for sub_dependent in sub_dependents - ] - print(f" -> {humanized_sub_dependents}") - all_sub_dependents.extend(sub_dependents) - all_sub_dependents.extend(get_sub_dependents(index, sub_dependents)) - - return all_sub_dependents - - -def get_dependents(index, package): - print(f"Finding dependents for {package}") - dependents = [ - install - for install in index["database"]["installs"] - if any( - x["name"] == package - for x in index["database"]["installs"][install]["spec"].get( - "dependencies", [] - ) - ) - ] - - dependents.extend(get_sub_dependents(index, dependents)) - - return dependents - - -def main(bucket, package, delete): - browser = Browser() - index = browser.get_index(bucket) - dependents = get_dependents(index, package) - print(f"{dependents} ({len(dependents)})") - - if delete: - for dependent in dependents: - print(f"Looking for {dependent}") - package_objects = browser.find_package_objects(dependent, bucket=bucket) - to_delete = { - "Objects": [ - {"Key": x["Key"]} for x in package_objects if dependent in x["Key"] - ] - } - if to_delete["Objects"]: - print(f"DELETE {to_delete}") - browser.s3_client.delete_objects(Bucket=bucket, Delete=to_delete) - else: - print( - f"SKIP {humanize_checksum(index, dependent)} - no objects in cache" - ) - - -if __name__ == "__main__": - parser = ArgumentParser( - "Dependents purger. Finds a package and everything that depends on it, " - "all the way to the top. Will delete the whole chain if asked to do so.") - ) - parser.add_argument( - "-b", - "--bucket", - default="spack-build-cache", - help="The bucket to purge. spack-build-cache or spack-build-cache-dev", - ) - parser.add_argument("-p", "--package", help="Which package you wish to purge") - parser.add_argument( - "-d", - "--delete", - help="Actually do the delete. If not set, just prints", - action="store_true", - default=False, - ) - args = parser.parse_args() - - print(f"Bucket: {args.bucket}") - print(f"Package: {args.package}") - print(f"Delete: {args.delete}") - - main(args.bucket, args.package, args.delete) diff --git a/spackitor/scripts/purge_signatures.py b/spackitor/scripts/purge_signatures.py deleted file mode 100644 index cc5b6f1..0000000 --- a/spackitor/scripts/purge_signatures.py +++ /dev/null @@ -1,73 +0,0 @@ -import json -from io import BytesIO - -import boto3 -from botocore.exceptions import ClientError - -from src import spackitor - -BUCKET = "spack-build-cache" -S3_PAGINATION = 1000 - - -def main(): - """ - Purge all signature objects which no longer have a package - """ - session = boto3.session.Session() - s3_client = session.client( - service_name="s3", - endpoint_url="https://bbpobjectstorage.epfl.ch", - aws_access_key_id="QNJQ73E6O6HQICUIAM8B", - aws_secret_access_key="TOnJkQX3Uorex1OPfqTEY3P8lAX1Y2ipFdLSNDhx", - ) - objects = { - "IsTruncated": True, - "NextContinuationToken": "", - "Contents": [{"Key": ""}], - } - - all_objects = [] - delete_from_bucket = [] - keep_in_bucket = [] - - while objects["IsTruncated"]: - print(objects["Contents"][-1]) - objects = s3_client.list_objects_v2(Bucket=BUCKET, ContinuationToken=objects["NextContinuationToken"], StartAfter=objects["Contents"][-1]["Key"]) - for s3_object in objects["Contents"]: - all_objects.append(s3_object["Key"]) - - for s3_object_key in all_objects: - if s3_object_key.endswith(".spec.json.sig"): - real_spec = download_sig(s3_client, s3_object_key) - obj_key = spackitor.build_key(real_spec["spec"]["nodes"][0]) - if obj_key in all_objects: - print(f"Object {obj_key} found - keeping signature") - keep_in_bucket.append(s3_object["Key"]) - else: - print(f"Object {obj_key} not found - removing signature") - delete_from_bucket.append({"Key": s3_object["Key"]}) - - print(f"Will keep {len(keep_in_bucket)} signatures.") - print(f"Will kill {len(delete_from_bucket)} signatures.") - - for keys_to_delete in spackitor.split_list(delete_from_bucket, S3_PAGINATION): - spackitor.delete(s3_client, BUCKET, {"Objects": keys_to_delete}) - - -def download_sig(s3_client, sig_key): - """ - Download and parse a signature file, return the contents stripped of signature - and parsed through json - """ - bio = BytesIO() - s3_client.download_fileobj(BUCKET, sig_key, bio) - bio.seek(0) - sig_spec = bio.read() - real_spec = json.loads("\n".join(sig_spec.decode().splitlines()[3:-16])) - - return real_spec - - -if __name__ == "__main__": - main() diff --git a/spackitor/scripts/s3.py b/spackitor/scripts/s3.py deleted file mode 100755 index 2944547..0000000 --- a/spackitor/scripts/s3.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python - -import json -import os -from io import BytesIO - -import boto3 - - -class Browser: - def __init__(self): - self.s3_client = self.get_s3_client() - - def list(self, bucket="spack-build-cache", prefix="", delimiter=""): - """ - Iterator to list all objects in the bucket (max 1000 at a time). - There is a way to play with prefix and delimiter to group them. - TODO: use these two params for more efficient searching. - """ - objects = { - "IsTruncated": True, - "NextContinuationToken": "", - "Contents": [{"Key": ""}], - } - - while objects["IsTruncated"]: - objects = self.s3_client.list_objects_v2( - Bucket=bucket, - Prefix=prefix, - Delimiter=delimiter, - ContinuationToken=objects["NextContinuationToken"], - StartAfter=objects["Contents"][-1]["Key"], - ) - - yield objects - - def find_lonely_objects(self, bucket="spack-build-cache", delete=False): - """ - Find "lonely" objects: .spack without matching .spec.json.sig or vice versa - - If you specify delete=True, they will be deleted as well. - """ - found_keys = [] - for object_set in self.list(bucket, prefix="", delimiter=""): - for key in object_set["Contents"]: - found_keys.append(key["Key"]) - - sig_ext = ".spec.json.sig" - spack_ext = ".spack" - replaced = ( - lambda x: os.path.basename(x).replace(sig_ext, spack_ext) - if x.endswith(sig_ext) - else os.path.basename(x).replace(spack_ext, sig_ext) - ) - lonely_keys = [ - x - for x in found_keys - if replaced(x) not in [os.path.basename(y) for y in found_keys] - ] - - if delete: - print( - f"Deleting {len(lonely_keys)} keys - don't forget to rebuild the index!" - ) - for keys_to_delete in self.split_list(lonely_keys): - self.s3_client.delete_objects( - Bucket=bucket, - Delete={"Objects": [{"Key": key} for key in keys_to_delete]}, - ) - - return lonely_keys - - def split_list(self, source, chunk_size=1000): - for x in range(0, len(source), chunk_size): - yield source[x : x + chunk_size] - - def find_package_objects(self, package, bucket="spack-build-cache"): - """ - Find all objects in the bucket that contain the package name - """ - found_keys = [] - for object_set in self.list(bucket, prefix="", delimiter=""): - for key in object_set["Contents"]: - if package in key["Key"]: - found_keys.append(key) - - return found_keys - - def get_index(self, bucket="spack-build-cache"): - """ - Get the contents of the index.json object, nicely parsed - """ - bio = BytesIO() - self.s3_client.download_fileobj(bucket, "build_cache/index.json", bio) - return json.loads(bio.getvalue()) - - def get_s3_client(self): - """ - Return a Boto3 client object that is connected to bbpobjectstorage.epfl.ch - ~/.ssh/buildcache_keys contains two lines: access and secret key. In that order. - """ - access_key, secret_key = [ - line.strip() - for line in open(os.path.expanduser("~/.ssh/buildcache_keys"), "r") - ] - - session = boto3.session.Session() - s3_client = session.client( - service_name="s3", - endpoint_url="https://bbpobjectstorage.epfl.ch", - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - ) - - return s3_client - - -def main(): - browser = Browser() - return browser.s3_client - - -if __name__ == "__main__": - main() - - -browser = Browser() diff --git a/spackitor/src/spackitor/__init__.py b/spackitor/src/spackitor/__init__.py deleted file mode 100644 index da45614..0000000 --- a/spackitor/src/spackitor/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from spackitor import __main__ as spackitor diff --git a/spackitor/src/spackitor/__main__.py b/spackitor/src/spackitor/__main__.py deleted file mode 100644 index 38b6e63..0000000 --- a/spackitor/src/spackitor/__main__.py +++ /dev/null @@ -1,346 +0,0 @@ -import datetime -import json -import logging -import os -from io import BytesIO -from pathlib import Path -from typing import List, Optional, Set, Union - -import boto3 -import click -from botocore.exceptions import ClientError - -DEFAULT_MAX_AGE = 30 -DEFAULT_BUCKET = "spack-build-cache" -S3_PAGINATION = 1000 - - -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) -fmt = logging.Formatter("%(asctime)s - %(levelname)s - %(msg)s") -fh = logging.FileHandler("./spackitor.log") -fh.setFormatter(fmt) -fh.setLevel(logging.DEBUG) -logger.addHandler(fh) -sh = logging.StreamHandler() -sh.setFormatter(fmt) -sh.setLevel(logging.INFO) -logger.addHandler(sh) - - -def extract_target(spec_target: Union[str, dict]) -> str: - """ """ - return spec_target["name"] if isinstance(spec_target, dict) else spec_target - - -def build_sig_key(spec: dict) -> str: - """ - Take a spack spec and build the expected .json.sig key in the binary build cache for it - """ - target = extract_target(spec["arch"]["target"]) - key_name = "-".join( - [ - spec["arch"]["platform"], - spec["arch"]["platform_os"], - target, - spec["compiler"]["name"], - spec["compiler"]["version"], - spec["name"], - spec["version"], - spec["hash"], - ] - ) - - path = "/".join(["build_cache", f"{key_name}.spec.json.sig"]) - - return path - - -def build_key(spec: dict) -> str: - """ - Take a spack spec and build the expected key in the binary build cache for it - """ - logger.debug(f"Building key with spec {spec}") - target = extract_target(spec["arch"]["target"]) - - part1 = "-".join([spec["arch"]["platform"], spec["arch"]["platform_os"], target]) - part2 = "-".join([spec["compiler"]["name"], spec["compiler"]["version"]]) - part3 = "-".join([spec["name"], spec["version"]]) - part4 = "-".join( - [ - spec["arch"]["platform"], - spec["arch"]["platform_os"], - target, - spec["compiler"]["name"], - spec["compiler"]["version"], - spec["name"], - spec["version"], - spec["hash"], - ] - ) - - path = "/".join(["build_cache", part1, part2, part3, part4]) - path += ".spack" - - return path - - -def parse_spack_env(spack_env_path: str) -> List[str]: - """ - Parse a spack environment file and get all package spec S3 paths in it. - """ - if not os.path.exists(spack_env_path): - raise ValueError(f"{spack_env_path} does not exist") - - paths = [] - - with open(spack_env_path, "r") as fp: - spack_env = json.load(fp) - - for spec_hash in spack_env["concrete_specs"]: - spec = spack_env["concrete_specs"][spec_hash] - path = build_key(spec) - - paths.append(path) - - return paths - - -def get_s3_client() -> boto3.session.Session.client: - """ - Get an S3 client object, using the AWS_* environment variables as credentials - """ - aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID") - aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY") - endpoint_url = os.environ.get("S3_ENDPOINT_URL", None) - - if not aws_access_key_id or not aws_secret_access_key: - raise ValueError( - "No or incomplete AWS access key found. Please set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables." - ) - - logger.info(f"Connecting with endpoint {endpoint_url}") - session = boto3.session.Session() - s3_client = session.client( - service_name="s3", - endpoint_url=endpoint_url, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - ) - return s3_client - - -def list_spack_packages_in_repo(spack_directory: str) -> List[str]: - """ - List all packages in the spack directory - Returns a list of package names - """ - - all_packages = [] - - for bbrepo in ["repo-bluebrain", "repo-patches"]: - all_packages.extend( - [ - p.name - for p in Path( - os.sep.join([spack_directory, "bluebrain", bbrepo, "packages"]) - ).glob("*") - ] - ) - - for builtin_repo in ["builtin"]: - all_packages.extend( - [ - p.name - for p in Path( - os.sep.join( - [ - spack_directory, - "var", - "spack", - "repos", - builtin_repo, - "packages", - ] - ) - ).glob("*") - ] - ) - - return all_packages - - -def split_list(source, chunk_size): - for x in range(0, len(source), chunk_size): - yield source[x : x + chunk_size] - - -def traverse_index( - s3_client: boto3.session.Session.client, - bucket: str, - whitelist: List[str], - deleted_hashes: Set[str], - max_age: int, - spack_directory: str, -): - """ - Traverse the S3 bucket, and clean paths which are not in the whitelist (if any) and older than - the maximum age. - If deleted_hashes contains anything, any packages depending on these hashes will also be - deleted. - """ - index = download_index(s3_client, bucket) - existing_packages = list_spack_packages_in_repo(spack_directory) - delete_from_bucket = [] - for package_checksum in index["database"]["installs"]: - package_spec = index["database"]["installs"][package_checksum]["spec"] - dependency_hashes = set( - [dep["hash"] for dep in package_spec.get("dependencies", [])] - ) - key = build_key(package_spec) - sig_key = build_sig_key(package_spec) - head = object_exists(s3_client, bucket, key) - if head: - if deleted_hashes.intersection(dependency_hashes): - click.echo( - f"Cleanup: Package {package_spec['name']} / {package_spec['hash']} depended on at least one deleted object" - ) - delete_from_bucket.append({"Key": key}) - delete_from_bucket.append({"Key": sig_key}) - deleted_hashes.add(package_spec["hash"]) - last_modified = head["LastModified"] - age = ( - datetime.datetime.now(head["LastModified"].tzinfo) - - head["LastModified"] - ) - if package_spec["name"] not in existing_packages: - click.echo( - f"Cleanup: Package {package_spec['name']} not in existing packages" - ) - delete_from_bucket.append({"Key": key}) - delete_from_bucket.append({"Key": sig_key}) - deleted_hashes.add(package_spec["hash"]) - continue - if key in whitelist: - click.echo(f"Skip: Package {package_spec['name']} is in whitelist") - continue - if age.days > max_age: - click.echo( - f"Cleanup: {package_spec['name']}: {age.days} days > {max_age}: {key}" - ) - delete_from_bucket.append({"Key": key}) - delete_from_bucket.append({"Key": sig_key}) - deleted_hashes.add(package_spec["hash"]) - - for keys_to_delete in split_list(delete_from_bucket, S3_PAGINATION): - delete(s3_client, bucket, {"Objects": keys_to_delete}) - - return deleted_hashes - - -def delete(s3_client: boto3.session.Session.client, bucket: str, keys_to_delete: dict): - """ - Perform the delete call and raise if a response code >= 400 was returned - """ - click.echo(f"Deleting {len(keys_to_delete['Objects'])} objects") - response = s3_client.delete_objects(Bucket=bucket, Delete=keys_to_delete) - if response["ResponseMetadata"]["HTTPStatusCode"] >= 400: - raise RuntimeError(f"Failed to delete, here is the full response: {response}") - - -def download_index(s3_client: boto3.session.Session.client, bucket: str) -> dict: - """ - Download the index.json file from the build cache and returns the contents. - - Returns the contents of the index - """ - bio = BytesIO() - s3_client.download_fileobj(bucket, "build_cache/index.json", bio) - index = json.loads(bio.getvalue()) - - return index - - -def _clean_cache( - spack_envs: tuple, - spack_directory: str, - bucket: str, - max_age: int = DEFAULT_MAX_AGE, -): - if not (os.path.exists(spack_directory)): - raise ValueError(f"Spack directory {spack_directory} does not exist - aborting") - - whitelist_paths = [] - - if spack_envs: - for spack_env in spack_envs: - whitelist_paths.extend(parse_spack_env(spack_env)) - - whitelist_paths = list(set(whitelist_paths)) - - s3_client = get_s3_client() - - deleted_hashes = set() - while deleted_hashes := traverse_index( - s3_client, bucket, whitelist_paths, deleted_hashes, max_age, spack_directory - ): - pass - - -def object_exists( - s3_client: boto3.session.Session.client, bucket: str, key: str -) -> Optional[dict]: - try: - head = s3_client.head_object(Bucket=bucket, Key=key) - return head - except ClientError as e: - if e.response["Error"]["Code"] == "404": - return None - else: - raise - - -@click.command() -@click.option( - "--spack-envs", - "-e", - help="Comma-separated list of spack environment files", - multiple=True, -) -@click.option( - "--bucket", - "-b", - default=DEFAULT_BUCKET, - help="S3 bucket in which the build cache lives", -) -@click.option( - "--max-age", - "-a", - default=DEFAULT_MAX_AGE, - type=int, - help="Maximum age in days for anything that will be cleaned - older will be removed.", -) -@click.option( - "--spack-directory", "-s", help="Where the spack repository was checked out" -) -def clean_cache( - spack_envs: Optional[tuple], spack_directory: str, bucket: str, max_age: int -): - """ - Clean the specified cache. - - If a (list of) spack environment files is given, anything not in them that is older than the specified time will be removed. - If no spack environment file is given, anything older than the specified time will be removed. - - The spack directory is necessary to check whether packages have been removed from the repository. If they have been, they will be deleted from the build cache as well. - """ - - click.echo(f"Spack envs: {spack_envs}") - click.echo(f"Spack directory: {spack_directory}") - click.echo(f"Bucket: {bucket}") - click.echo(f"Max age: {max_age}") - _clean_cache(spack_envs, spack_directory, bucket, max_age) - - -if __name__ == "__main__": - clean_cache() diff --git a/spackitor/tests/spack.lock b/spackitor/tests/spack.lock deleted file mode 100644 index a8f3a03..0000000 --- a/spackitor/tests/spack.lock +++ /dev/null @@ -1,48 +0,0 @@ -{ - "concrete_specs": { - "4zgnw5n6v32wunbjg4ajkt3tukld2uo6": { - "name": "present_new_enough", - "version": "1.2.3", - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64" - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "hash": "4zgnw5n6v32wunbjg4ajkt3tukld2uo6" - }, - "rexlp3wrtheojr4o3dsa5lcctgixpa6x": { - "name": "absent_new_enough", - "version": "4.5.6", - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64" - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "hash": "rexlp3wrtheojr4o3dsa5lcctgixpa6x" - }, - "xi6262rcvjhsobjid63toj7nwvjlj6x5": { - "name": "present_too_old", - "version": "1.4.3", - "arch": { - "platform": "linux", - "platform_os": "ubuntu22.04", - "target": "x86_64" - }, - "compiler": {"name": "gcc", "version": "12.2.0"}, - "hash": "xi6262rcvjhsobjid63toj7nwvjlj6x5" - }, - "ydl77hkfdl6w4tuwv5wznvbxmf6uvvra": { - "name": "absent_too_old", - "version": "4.3.1-4.3.4", - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64" - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "hash": "ydl77hkfdl6w4tuwv5wznvbxmf6uvvra" - } - } -} diff --git a/spackitor/tests/test_unit.py b/spackitor/tests/test_unit.py deleted file mode 100644 index 94df9dd..0000000 --- a/spackitor/tests/test_unit.py +++ /dev/null @@ -1,671 +0,0 @@ -import datetime -import itertools -import json -import os -from pathlib import PosixPath -from unittest.mock import MagicMock, call, patch -from uuid import uuid4 - -import pytest -from botocore.exceptions import ClientError -from dateutil.tz import tzutc - -from spackitor import spackitor - -EXPECTED_PACKAGES = [ - "build_cache/linux-ubuntu20.04-x86_64/gcc-9.4.0/present_new_enough-1.2.3/linux-ubuntu20.04-x86_64-gcc-9.4.0-present_new_enough-1.2.3-4zgnw5n6v32wunbjg4ajkt3tukld2uo6.spack", - "build_cache/linux-ubuntu20.04-x86_64/gcc-9.4.0/absent_new_enough-4.5.6/linux-ubuntu20.04-x86_64-gcc-9.4.0-absent_new_enough-4.5.6-rexlp3wrtheojr4o3dsa5lcctgixpa6x.spack", - "build_cache/linux-ubuntu22.04-x86_64/gcc-12.2.0/present_too_old-1.4.3/linux-ubuntu22.04-x86_64-gcc-12.2.0-present_too_old-1.4.3-xi6262rcvjhsobjid63toj7nwvjlj6x5.spack", - "build_cache/linux-ubuntu20.04-x86_64/gcc-9.4.0/absent_too_old-4.3.1-4.3.4/linux-ubuntu20.04-x86_64-gcc-9.4.0-absent_too_old-4.3.1-4.3.4-ydl77hkfdl6w4tuwv5wznvbxmf6uvvra.spack", -] - -SECOND_EXPECTED_PACKAGES = [ - "build_cache/linux-ubuntu20.04-x86_64/gcc-9.4.0/from_second_specfile_new_enough-3.5/linux-ubuntu20.04-x86_64-gcc-9.4.0-from_second_specfile_new_enough-3.5-zfzudapqgpxfqj4os2oym3ne6qvkzlap.spack", - "build_cache/linux-ubuntu20.04-x86_64/gcc-9.4.0/from_second_specfile_too_old-39.2-alpha1/linux-ubuntu20.04-x86_64-gcc-9.4.0-from_second_specfile_too_old-39.2-alpha1-mjbs2jb2hwyybpqghgwamdw5exb3s44b.spack", -] - -EXPECTED_SIGNATURES = [ - "build_cache/linux-ubuntu20.04-x86_64-gcc-9.4.0-present_new_enough-1.2.3-4zgnw5n6v32wunbjg4ajkt3tukld2uo6.spec.json.sig", -] - -MAX_AGE = 3 -LAST_MODIFIED_NEW_ENOUGH = datetime.datetime.now(tzutc()) - datetime.timedelta(days=1) -LAST_MODIFIED_TOO_OLD = datetime.datetime.now(tzutc()) - datetime.timedelta(days=5) - - -def test_build_sig_key(): - spackitor.build_sig_key( - { - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "name": "present_new_enough", - "version": "1.2.3", - "hash": "4zgnw5n6v32wunbjg4ajkt3tukld2uo6", - } - ) - - -@pytest.mark.parametrize("specify_env", [True, False]) -@patch("spackitor.spackitor.parse_spack_env") -@patch("spackitor.spackitor.get_s3_client") -@patch("spackitor.spackitor.traverse_index", return_value=None) -@patch("os.path.exists", return_value=True) -def test_clean_cache( - mock_exists, - mock_traverse_index, - mock_get_s3_client, - mock_parse_spack_env, - specify_env, -): - spack_env = " " if specify_env else None - spackitor._clean_cache( - spack_env, spack_directory="/opt/spack", bucket="spack-build-cache" - ) - if not specify_env: - mock_parse_spack_env.assert_not_called() - else: - mock_parse_spack_env.assert_called_once() - - mock_traverse_index.assert_called_once() - - -@patch("spackitor.spackitor.boto3") -def test_raises_no_access_keys(mocked_boto3): - with pytest.raises( - ValueError, - match="^No or incomplete AWS access key found. Please set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables.$", - ): - spackitor.get_s3_client() - - -@patch("spackitor.spackitor.boto3") -def test_uses_aws_env_vars(mock_boto3): - mock_access_key = "access key" - mock_secret_key = "secret key" - endpoint_url = "https://bbpobjectstorage.epfl.ch" - os.environ["AWS_ACCESS_KEY_ID"] = mock_access_key - os.environ["AWS_SECRET_ACCESS_KEY"] = mock_secret_key - os.environ["S3_ENDPOINT_URL"] = endpoint_url - - mock_session = MagicMock() - mock_boto3.session.Session.return_value = mock_session - - spackitor.get_s3_client() - - mock_session.client.assert_called_with( - service_name="s3", - endpoint_url=endpoint_url, - aws_access_key_id=mock_access_key, - aws_secret_access_key=mock_secret_key, - ) - - -def test_build_key(): - with open("spackitor/tests/spack.lock", "r") as fp: - spec = json.load(fp) - path = spackitor.build_key( - spec["concrete_specs"]["4zgnw5n6v32wunbjg4ajkt3tukld2uo6"] - ) - assert ( - path - == "build_cache/linux-ubuntu20.04-x86_64/gcc-9.4.0/present_new_enough-1.2.3/linux-ubuntu20.04-x86_64-gcc-9.4.0-present_new_enough-1.2.3-4zgnw5n6v32wunbjg4ajkt3tukld2uo6.spack" - ) - - -def test_parse_nonexistant_spack_env(): - spack_env = "/tmp/this/file/does/not/exist.txt" - with pytest.raises(ValueError, match=f"^{spack_env} does not exist$"): - spackitor.parse_spack_env(spack_env) - - -def test_parse_spack_env(): - spack_env = "spackitor/tests/spack.lock" - - packages = spackitor.parse_spack_env(spack_env) - assert packages == EXPECTED_PACKAGES - - -@patch("spackitor.spackitor.download_index") -@patch("spackitor.spackitor.get_s3_client") -@patch("spackitor.spackitor.list_spack_packages_in_repo") -def test_traverse_index( - mock_list_packages_in_repo, - mock_get_s3_client, - mock_download_index, -): - """ - In the package names, "present/absent" means to "present/absent in the spack environment" - """ - - mock_download_index.return_value = { - "database": { - "installs": { - "4zgnw5n6v32wunbjg4ajkt3tukld2uo6": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "name": "present_new_enough", - "version": "1.2.3", - "hash": "4zgnw5n6v32wunbjg4ajkt3tukld2uo6", - }, - }, - "rexlp3wrtheojr4o3dsa5lcctgixpa6x": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "name": "absent_new_enough", - "version": "4.5.6", - "hash": "rexlp3wrtheojr4o3dsa5lcctgixpa6x", - }, - }, - "xi6262rcvjhsobjid63toj7nwvjlj6x5": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu22.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "12.2.0"}, - "name": "present_too_old", - "version": "1.4.3", - "hash": "xi6262rcvjhsobjid63toj7nwvjlj6x5", - } - }, - "ydl77hkfdl6w4tuwv5wznvbxmf6uvvra": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "name": "absent_too_old", - "version": "4.3.1-4.3.4", - "hash": "ydl77hkfdl6w4tuwv5wznvbxmf6uvvra", - } - }, - "xqlk7kkfqg6988uwv5wznvbxmf6uv29z": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "name": "deleted-package", - "version": "4.3.1-4.3.4", - "hash": "xqlk7kkfqg6988uwv5wznvbxmf6uv29z", - } - }, - "zfzudapqgpxfqj4os2oym3ne6qvkzlap": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "name": "from_second_specfile_new_enough", - "version": "3.5", - "hash": "zfzudapqgpxfqj4os2oym3ne6qvkzlap", - } - }, - "mjbs2jb2hwyybpqghgwamdw5exb3s44b": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "name": "from_second_specfile_too_old", - "version": "39.2-alpha1", - "hash": "mjbs2jb2hwyybpqghgwamdw5exb3s44b", - } - }, - "zrffl34kskkdfh289045lkkdhsi1l4jh": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "name": "depends_on_deleted", - "version": "7.8.9", - "hash": "zrffl34kskkdfh289045lkkdhsi1l4jh", - "dependencies": [ - {"name": "absent_too_old", - "hash": "ydl77hkfdl6w4tuwv5wznvbxmf6uvvra", - "type": ["build", "run"]}, - ], - }, - }, - } - } - } - - prod_updated_index = { - "database": { - "installs": { - "4zgnw5n6v32wunbjg4ajkt3tukld2uo6": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "name": "present_new_enough", - "version": "1.2.3", - "hash": "4zgnw5n6v32wunbjg4ajkt3tukld2uo6", - }, - }, - "rexlp3wrtheojr4o3dsa5lcctgixpa6x": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "name": "absent_new_enough", - "version": "4.5.6", - "hash": "rexlp3wrtheojr4o3dsa5lcctgixpa6x", - }, - }, - "xi6262rcvjhsobjid63toj7nwvjlj6x5": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu22.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "12.2.0"}, - "name": "present_too_old", - "version": "1.4.3", - "hash": "xi6262rcvjhsobjid63toj7nwvjlj6x5", - } - }, - "zfzudapqgpxfqj4os2oym3ne6qvkzlap": { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu20.04", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.4.0"}, - "name": "from_second_specfile_new_enough", - "version": "3.5", - "hash": "zfzudapqgpxfqj4os2oym3ne6qvkzlap", - } - }, - } - } - } - - mock_list_packages_in_repo.return_value = [ - "present_new_enough", - "absent_new_enough", - "present_too_old", - "absent_too_old", - "from_second_specfile_new_enough", - "depends_on_deleted", - ] - whitelist = [ - key - for key in EXPECTED_PACKAGES + SECOND_EXPECTED_PACKAGES - if "absent" not in key - ] - - delete_objects = MagicMock( - return_value={"ResponseMetadata": {"HTTPStatusCode": 200}} - ) - mock_s3_client = MagicMock() - mock_s3_client.head_object = mock_head - mock_s3_client.delete_objects = delete_objects - spackitor.traverse_index( - mock_s3_client, "spack-build-cache", whitelist, set(), MAX_AGE, "/opt/spack" - ) - print(f"Call count: {delete_objects.call_count}") - delete_objects.assert_called_with( - Bucket="spack-build-cache", - Delete={ - "Objects": [ - { - "Key": "build_cache/linux-ubuntu20.04-x86_64/gcc-9.4.0/absent_too_old-4.3.1-4.3.4/linux-ubuntu20.04-x86_64-gcc-9.4.0-absent_too_old-4.3.1-4.3.4-ydl77hkfdl6w4tuwv5wznvbxmf6uvvra.spack" - }, - { - "Key": "build_cache/linux-ubuntu20.04-x86_64-gcc-9.4.0-absent_too_old-4.3.1-4.3.4-ydl77hkfdl6w4tuwv5wznvbxmf6uvvra.spec.json.sig" - }, - { - "Key": "build_cache/linux-ubuntu20.04-x86_64/gcc-9.4.0/deleted-package-4.3.1-4.3.4/linux-ubuntu20.04-x86_64-gcc-9.4.0-deleted-package-4.3.1-4.3.4-xqlk7kkfqg6988uwv5wznvbxmf6uv29z.spack" - }, - { - "Key": "build_cache/linux-ubuntu20.04-x86_64-gcc-9.4.0-deleted-package-4.3.1-4.3.4-xqlk7kkfqg6988uwv5wznvbxmf6uv29z.spec.json.sig" - }, - { - "Key": "build_cache/linux-ubuntu20.04-x86_64/gcc-9.4.0/from_second_specfile_too_old-39.2-alpha1/linux-ubuntu20.04-x86_64-gcc-9.4.0-from_second_specfile_too_old-39.2-alpha1-mjbs2jb2hwyybpqghgwamdw5exb3s44b.spack" - }, - { - "Key": "build_cache/linux-ubuntu20.04-x86_64-gcc-9.4.0-from_second_specfile_too_old-39.2-alpha1-mjbs2jb2hwyybpqghgwamdw5exb3s44b.spec.json.sig" - }, - { - "Key": "build_cache/linux-ubuntu20.04-x86_64/gcc-9.4.0/depends_on_deleted-7.8.9/linux-ubuntu20.04-x86_64-gcc-9.4.0-depends_on_deleted-7.8.9-zrffl34kskkdfh289045lkkdhsi1l4jh.spack" - }, - { - "Key": "build_cache/linux-ubuntu20.04-x86_64-gcc-9.4.0-depends_on_deleted-7.8.9-zrffl34kskkdfh289045lkkdhsi1l4jh.spec.json.sig" - }, - ] - }, - ) - - -class ObjectLister: - def __init__(self, max_age): - self.counter = 0 - self.object_count = 3272 - self.max_age = max_age - self.packages = sorted( - [ - str(uuid4()).replace("-", "_") + "_too_old" - for x in range(self.object_count) - ] - ) - - self.specs = [ - { - "spec": { - "arch": { - "platform": "linux", - "platform_os": "ubuntu", - "target": "x86_64", - }, - "compiler": {"name": "gcc", "version": "9.3.0"}, - "name": package, - "version": "1.2.3", - "hash": str(uuid4()).split("-")[0], - } - } - for package in self.packages - ] - print(f"Packages: {len(self.packages)}") - package_objects = [ - f"build_cache/{spec['spec']['arch']['platform']}-{spec['spec']['arch']['platform_os']}-{spec['spec']['arch']['target']}/{spec['spec']['compiler']['name']}-{spec['spec']['compiler']['version']}/{spec['spec']['name']}-{spec['spec']['version']}/{spec['spec']['arch']['platform']}-{spec['spec']['arch']['platform_os']}-{spec['spec']['arch']['target']}-{spec['spec']['compiler']['name']}-{spec['spec']['compiler']['version']}-{spec['spec']['name']}-{spec['spec']['version']}-{spec['spec']['hash']}.spack" - for spec in self.specs - ] - - sig_objects = [ - f"build_cache/{spec['spec']['arch']['platform']}-{spec['spec']['arch']['platform_os']}-{spec['spec']['arch']['target']}-{spec['spec']['compiler']['name']}-{spec['spec']['compiler']['version']}-{spec['spec']['name']}-{spec['spec']['version']}-{spec['spec']['hash']}.spec.json.sig" - for spec in self.specs - ] - - self.s3_objects = [ - x - for x in itertools.chain.from_iterable( - itertools.zip_longest(package_objects, sig_objects) - ) - ] - - print(f"First package: {self.packages[0]}") - print(f"Packages 999-1000: {self.packages[999:1001]}") - print(f"Packages 1999-2000: {self.packages[1999:2001]}") - print(f"Packages 2999-3000: {self.packages[2999:3001]}") - print(f"Last package: {self.packages[-1]}") - - def list(self, *args, **kwargs): - print(f"Counter is {self.counter}") - is_truncated = True if self.counter + 1000 < self.object_count else False - - print(f"Truncated: {is_truncated}") - s3_object_list = { - "IsTruncated": is_truncated, - "NextContinuationToken": "abc", - "Contents": [ - { - "Key": s3_object, - "LastModified": datetime.datetime.now(tzutc()) - - datetime.timedelta(days=self.max_age + 1), - } - for s3_object in self.s3_objects[self.counter : self.counter + 1000] - ], - } - self.counter += 1000 - print(f"Counter is now {self.counter}") - return s3_object_list - - -def test_failed_delete_raises(): - mock_s3_client = MagicMock() - response = {"ResponseMetadata": {"HTTPStatusCode": 500}} - delete_objects = MagicMock(return_value=response) - mock_s3_client.delete_objects = delete_objects - with pytest.raises( - RuntimeError, match=f"^Failed to delete, here is the full response: {response}$" - ): - spackitor.delete(mock_s3_client, "bukkit", {"Objects": []}) - - -@patch("spackitor.spackitor.download_index") -@patch("spackitor.spackitor.get_s3_client") -@patch("spackitor.spackitor.list_spack_packages_in_repo") -def test_traverse_index_many_keys( - mock_list_packages_in_repo, - mock_get_s3_client, - mock_download_index, -): - """ - Test behaviour when there are more than 1000 keys to be deleted. - """ - max_age = 3 - lister = ObjectLister(max_age) - mock_index = { - "database": {"installs": {spec["spec"]["hash"]: spec for spec in lister.specs}} - } - - mock_download_index.return_value = mock_index - mock_list_packages_in_repo.return_value = lister.packages - whitelist = [] - delete_objects = MagicMock( - return_value={"ResponseMetadata": {"HTTPStatusCode": 200}} - ) - mock_s3_client = MagicMock() - mock_s3_client.delete_objects = delete_objects - mock_s3_client.head_object = mock_head - spackitor.traverse_index( - mock_s3_client, - bucket="spack-build-cache", - whitelist=whitelist, - deleted_hashes=set(), - max_age=max_age, - spack_directory="/opt/spack", - ) - - assert delete_objects.call_count == 7 - calls = [ - call( - Bucket="spack-build-cache", - Delete={ - "Objects": [ - {"Key": s3_object} for s3_object in lister.s3_objects[0:1000] - ] - }, - ), - call( - Bucket="spack-build-cache", - Delete={ - "Objects": [ - {"Key": s3_object} for s3_object in lister.s3_objects[1000:2000] - ] - }, - ), - call( - Bucket="spack-build-cache", - Delete={ - "Objects": [ - {"Key": s3_object} for s3_object in lister.s3_objects[2000:3000] - ] - }, - ), - call( - Bucket="spack-build-cache", - Delete={ - "Objects": [ - {"Key": s3_object} for s3_object in lister.s3_objects[3000:4000] - ] - }, - ), - call( - Bucket="spack-build-cache", - Delete={ - "Objects": [ - {"Key": s3_object} for s3_object in lister.s3_objects[4000:5000] - ] - }, - ), - call( - Bucket="spack-build-cache", - Delete={ - "Objects": [ - {"Key": s3_object} for s3_object in lister.s3_objects[5000:6000] - ] - }, - ), - call( - Bucket="spack-build-cache", - Delete={ - "Objects": [ - {"Key": s3_object} for s3_object in lister.s3_objects[6000:6546] - ] - }, - ), - ] - delete_objects.assert_has_calls(calls) - - -@patch("spackitor.spackitor.BytesIO") -def test_download_index(mock_bytes_io): - mock_bio = MagicMock() - mock_spec = '{"database": {"version": "6", "installs": {"sjvxlmpwkszvwto62aahpwx3gbfp7s55": "some_spec"}}}' - mock_bio.getvalue = MagicMock(return_value=mock_spec) - mock_bytes_io.return_value = mock_bio - bucket = "bukkit" - mock_s3_client = MagicMock() - mock_s3_client.download_fileobj = MagicMock(return_value=mock_spec) - spackitor.download_index(mock_s3_client, bucket) - mock_s3_client.download_fileobj.assert_called_with( - bucket, "build_cache/index.json", mock_bio - ) - - -def test_nonexistant_spack_directory(): - spack_directory = "/some/path/that/does/not/exist" - with pytest.raises( - ValueError, - match=f"^Spack directory {spack_directory} does not exist - aborting$", - ): - spackitor._clean_cache( - None, spack_directory=spack_directory, bucket="spack-build-cache-dev" - ) - - -def test_list_spack_packages_in_repo(): - """ - * repo-patches - * patch-package1 - * patch-package2 - * repo-bluebrain - * bb-package1 - * bb-package2 - * bb-package3 - * builtin - * builtin-package1 - """ - - class MockPath: - def __init__(self, path): - self.path = path - - def glob(self, glob): - if self.path == "/opt/spack/bluebrain/repo-patches/packages": - return iter( - [ - PosixPath(os.sep.join([self.path, "patch-package1"])), - PosixPath(os.sep.join([self.path, "patch-package2"])), - ] - ) - if self.path == "/opt/spack/bluebrain/repo-bluebrain/packages": - return iter( - [ - PosixPath(os.sep.join([self.path, "bb-package1"])), - PosixPath(os.sep.join([self.path, "bb-package2"])), - PosixPath(os.sep.join([self.path, "bb-package3"])), - ] - ) - if self.path == "/opt/spack/var/spack/repos/builtin/packages": - return iter( - [ - PosixPath(os.sep.join([self.path, "builtin-package1"])), - ] - ) - - spack_dir = "/opt/spack" - with patch("spackitor.spackitor.Path", MockPath): - all_packages = spackitor.list_spack_packages_in_repo(spack_dir) - - assert all_packages == [ - "bb-package1", - "bb-package2", - "bb-package3", - "patch-package1", - "patch-package2", - "builtin-package1", - ] - - -def mock_head(*args, **kwargs): - if "checksum2" in kwargs["Key"]: - raise ClientError( - error_response={"Error": {"Code": "404"}}, operation_name="HEAD" - ) - elif "checksum3" in kwargs["Key"]: - raise ClientError( - error_response={"Error": {"Code": "500"}}, operation_name="HEAD" - ) - elif "test_exists_false" in kwargs["Key"]: - raise ClientError( - error_response={"Error": {"Code": "404"}}, operation_name="HEAD" - ) - elif "too_old" in kwargs["Key"]: - return {"ResponseMetadata": {}, "LastModified": LAST_MODIFIED_TOO_OLD} - else: - return {"ResponseMetadata": {}, "LastModified": LAST_MODIFIED_NEW_ENOUGH} - - -@pytest.mark.parametrize("exists", [True, False]) -def test_object_exists(exists): - mock_s3_client = MagicMock() - mock_s3_client.head_object = mock_head - exists = spackitor.object_exists( - mock_s3_client, "bukkit", "test_exists" if exists else "test_exists_false" - ) - - if exists: - assert exists == { - "ResponseMetadata": {}, - "LastModified": LAST_MODIFIED_NEW_ENOUGH, - } - else: - assert exists is None