diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 0ed4c6982..000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,263 +0,0 @@ -version: 2.1 -orbs: - hubploy: - jobs: - build-image: - description: "Build an image via hubploy" - parameters: - deployment: - type: string - push: - type: boolean - default: false - - docker: - - image: python:3.11-slim-bullseye - working_directory: ~/repo - resource_class: large - steps: - - checkout - - run: - name: Install git & curl - command: | - apt-get update && apt-get install --yes --no-install-recommends git curl - - restore_cache: - keys: - - v3.11-dependencies-{{ checksum "requirements.txt" }} - # fallback to using the latest cache if no exact match is found - - v3.11-dependencies- - - - run: - name: install dependencies - command: | - python3 -m venv venv - source venv/bin/activate - pip install --upgrade -r requirements.txt - echo 'export PATH="${HOME}/repo/venv/bin:$PATH"' >> ${BASH_ENV} - - - unless: - condition: << parameters.push >> - steps: - - run: - name: Determine range of commits we are building - command: | - # CircleCI doesn't have equivalent to Travis' COMMIT_RANGE - COMMIT_RANGE=$(./.circleci/get-commit-range.py 2> /tmp/commit-range-err.txt || true) - if [ -s /tmp/commit-range-err.txt ]; then - echo "Unable to get commit range." - cat /tmp/commit-range-err.txt - exit 1 - fi - echo COMMIT_RANGE: ${COMMIT_RANGE} - echo "export COMMIT_RANGE='${COMMIT_RANGE}'" >> ${BASH_ENV} - - - when: - condition: << parameters.push >> - # Currently all our images live on google cloud, so we install gcloud sdk when pushing - steps: - - run: - name: Install google cloud sdk - command: | - export GCLOUD_URL=https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-457.0.0-linux-x86_64.tar.gz - curl -sSL $GCLOUD_URL | tar -xzf - -C ${HOME} - # Be careful with quote ordering here. ${PATH} must not be expanded - # Don't use ~ here - bash can interpret PATHs containing ~, but most other things can't. - # Always use full PATHs in PATH! - echo 'export PATH="${HOME}/google-cloud-sdk/bin:${PATH}"' >> ${BASH_ENV} - # Try to tell cloud sdk to use python3 - echo 'export CLOUDSDK_PYTHON=python3' >> ${BASH_ENV} - - - run: - name: Install gcloud auth plugin - command: | - gcloud components install gke-gcloud-auth-plugin - - - run: - name: Configure credential helper for Google Artifact Registry - command: | - gcloud auth configure-docker us-central1-docker.pkg.dev - - - run: - name: Install sops - command: | - echo $SOPS_ACCOUNT_KEY > ${HOME}/repo/sops.key - echo 'export GOOGLE_APPLICATION_CREDENTIALS=${HOME}/repo/sops.key' >> ${BASH_ENV} - mkdir -p ${HOME}/repo/bin - curl -sSL https://github.com/mozilla/sops/releases/download/v3.7.0/sops-v3.7.0.linux -o ${HOME}/repo/bin/sops - chmod 755 ${HOME}/repo/bin/sops - echo 'export PATH="${HOME}/repo/bin:${PATH}"' >> ${BASH_ENV} - - - setup_remote_docker - - save_cache: - paths: - - ./venv - key: v3.11-dependencies-{{ checksum "requirements.txt" }} - - - run: - name: Build image if needed - command: | - if [ "<< parameters.push >>" == "true" -o -z "${COMMIT_RANGE}" ]; then - HUBPLOY_ARGS="--check-registry --push" - else - HUBPLOY_ARGS="--commit-range ${COMMIT_RANGE}" - fi - hubploy build << parameters.deployment >> ${HUBPLOY_ARGS} - no_output_timeout: 90m - -jobs: - deploy: - docker: - - image: python:3.11-slim-bullseye - working_directory: ~/repo - steps: - - run: - name: Install base apt packages - command: | - apt-get update -qq --yes - apt-get install -qq --yes git curl lsb-release apt-transport-https - - checkout - # Download and cache dependencies - - restore_cache: - keys: - - v3.11-dependencies-gcloud-457-{{ checksum "requirements.txt" }} - # fallback to using the latest cache if no exact match is found - - v3.11-dependencies-gcloud-457- - - - run: - name: install dependencies - command: | - python3 -m venv venv - source venv/bin/activate - pip install --upgrade -r requirements.txt - - # Can be removed once https://github.com/docker/docker-py/issues/2225 is merged and released - pip install --upgrade git+https://github.com/docker/docker-py.git@b6f6e7270ef1acfe7398b99b575d22d0d37ae8bf - - export GCLOUD_URL=https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-457.0.0-linux-x86_64.tar.gz - curl -sSL $GCLOUD_URL | tar -xzf - -C ${HOME} - # Be careful with quote ordering here. ${PATH} must not be expanded - # Don't use ~ here - bash can interpret PATHs containing ~, but most other things can't. - # Always use full PATHs in PATH! - echo 'export PATH="${HOME}/repo/venv/bin:${HOME}/google-cloud-sdk/bin:${PATH}"' >> ${BASH_ENV} - - curl -sSL https://github.com/mozilla/sops/releases/download/v3.7.0/sops-v3.7.0.linux -o venv/bin/sops - chmod 755 venv/bin/sops - - - save_cache: - paths: - - ./venv - key: v3.11-dependencies-gcloud-457-{{ checksum "requirements.txt" }} - - - run: - name: Authenticating with google service account for kms/sops - command: | - echo $SOPS_ACCOUNT_KEY > ${HOME}/repo/sops.key - echo 'export GOOGLE_APPLICATION_CREDENTIALS=${HOME}/repo/sops.key' >> ${BASH_ENV} - - - run: - name: Install helm3 - command: | - curl -L https://get.helm.sh/helm-v3.13.3-linux-amd64.tar.gz | \ - tar -xzf - - mv linux-amd64/helm /usr/local/bin - helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/ - helm repo update - - - run: - name: Post annotation to Grafana - command: | - # We get GRAFANA_API_KEY from circle secret config. FIXME: put this inside git-crypt - export PULL_REQUEST_ID=$(git log -1 --pretty=%B | head -n1 | sed 's/^.*#\([0-9]*\).*/\1/') - export AUTHOR_NAME="$(git log -1 --pretty=%aN)" - export PULL_REQUEST_TITLE="$(git log --pretty=%B -1 | tail -n+3)" - - # added by sknapp 2023-12-19 to unblock builds - # python3 scripts/post-grafana-annotation.py \ - # --grafana-url http://grafana.datahub.berkeley.edu\ - # --tag deployment-start \ - # "$(echo -en ${PULL_REQUEST_TITLE}\\n\\n${AUTHOR_NAME}: https://github.com/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}/pull/${PULL_REQUEST_ID})" - - run: - name: Install gcloud auth plugin - command: | - gcloud components install gke-gcloud-auth-plugin - - - run: - name: Deploy datahub - command: | - hubploy deploy --timeout 30m datahub hub ${CIRCLE_BRANCH} - no_output_timeout: 30m - - - run: - name: Deploy dlab - command: | - hubploy deploy --timeout 30m dlab hub ${CIRCLE_BRANCH} - no_output_timeout: 30m - - - run: - name: Deploy highschool hub - command: | - hubploy deploy --timeout 30m highschool hub ${CIRCLE_BRANCH} - no_output_timeout: 30m - - - run: - name: Deploy prob140 - command: | - hubploy deploy --timeout 30m prob140 hub ${CIRCLE_BRANCH} - no_output_timeout: 30m - - - run: - name: Deploy R - command: | - hubploy deploy --timeout 30m r hub ${CIRCLE_BRANCH} - no_output_timeout: 30m - - - run: - name: Deploy workshop - command: | - hubploy deploy --timeout 30m workshop hub ${CIRCLE_BRANCH} - no_output_timeout: 30m - -workflows: - version: 2 - test-build-images: - jobs: - - hubploy/build-image: - deployment: datahub - # Workshop hub also uses this - name: datahub image build - # Filters can only be per-job? wtf - filters: - branches: - ignore: - - staging - - prod - - deploy: - jobs: - - hubploy/build-image: - # workshop hub also uses this image - deployment: datahub - name: datahub image build - push: true - # Filters can only be per-job? wtf - filters: - branches: - only: - - staging - # Build images only during the staging deploy. All merges - # to prod need to go via staging, so prod should *never* - # use images not built for staging. By enforcing this at the - # CI level, we also make prod deploys go faster! - - deploy: - requires: - - datahub image build - - filters: - branches: - only: - - staging - - deploy: - filters: - branches: - only: - - prod diff --git a/.circleci/get-commit-range.py b/.circleci/get-commit-range.py deleted file mode 100755 index 98429ceec..000000000 --- a/.circleci/get-commit-range.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python3 -import os -import argparse -from github import Github -import sys - - -def from_pr(project, repo, pr_number): - gh = Github() - pr = gh.get_repo(f'{project}/{repo}').get_pull(pr_number) - return f'{pr.base.sha}...{pr.head.sha}' - -def from_branch(project, repo, branch_name): - """ - Return commit_range for a PR from a branch name. - - CircleCI doesn't give us the PR Number when making a PR from the same - repo, rather than a fork. This is terrible. Until this gets fixed, - we iterate through all open PRs and find the PR we're operating on. - """ - gh = Github() - prs = gh.get_repo(f'{project}/{repo}').get_pulls(state='all', sort='updated') - for pr in prs: - if pr.head.ref == branch_name: - return f'{pr.base.sha}...{pr.head.sha}' - - raise ValueError(f'No PR from branch {branch_name} in upstream repo found') - - - -def main(): - argparser = argparse.ArgumentParser() - argparser.add_argument( - 'project', - default=os.environ.get('CIRCLE_PROJECT_USERNAME'), - nargs='?' - ) - argparser.add_argument( - 'repo', - default=os.environ.get('CIRCLE_PROJECT_REPONAME'), - nargs='?' - ) - - argparser.add_argument( - '--pr-number', - type=int, - nargs='?' - ) - argparser.add_argument( - '--branch-name', - nargs='?' - ) - - args = argparser.parse_args() - - pr_number = None - branch_name = None - - if args.pr_number: - pr_number = args.pr_number - else: - if 'CIRCLE_PR_NUMBER' in os.environ: - # When PR is from a fork - pr_number = int(os.environ['CIRCLE_PR_NUMBER']) - else: - if args.branch_name: - branch_name = args.branch_name - else: - if 'CIRCLE_COMPARE_URL' in os.environ: - # Post merge, where we must have CIRCLE_COMPARE_URL override CIRCLE_BRANCH - if '...' in os.environ['CIRCLE_COMPARE_URL']: - print(os.environ['CIRCLE_COMPARE_URL'].split('/')[-1]) - return - if 'CIRCLE_BRANCH' in os.environ: - branch_name = os.environ['CIRCLE_BRANCH'] - else: - print("Must provide one of --branch-name or --pr-number", file=sys.stderr) - sys.exit(1) - - if pr_number: - print(from_pr(args.project, args.repo, pr_number)) - elif branch_name: - print(from_branch(args.project, args.repo, branch_name)) - else: - raise ValueError('Neither pr_number nor branch were set') - - -if __name__ == '__main__': - main() diff --git a/.github/scripts/determine-hub-deployments.py b/.github/scripts/determine-hub-deployments.py index 36bfecce9..3172858b5 100755 --- a/.github/scripts/determine-hub-deployments.py +++ b/.github/scripts/determine-hub-deployments.py @@ -53,6 +53,7 @@ def main(args): "--ignore", "-i", nargs="*", + action="extend", default=["template"], help="Ignore one or more deployment targets." ) diff --git a/.github/workflows/deploy-hubs.yaml b/.github/workflows/deploy-hubs.yaml index 4852593a1..de50a511c 100644 --- a/.github/workflows/deploy-hubs.yaml +++ b/.github/workflows/deploy-hubs.yaml @@ -57,7 +57,6 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt - pip install --force-reinstall git+https://github.com/shaneknapp/hubploy.git@major-refactor - name: Auth to gcloud if: ${{ env.DEPLOY }} @@ -103,7 +102,7 @@ jobs: echo "Deploying single-user image and hub config to ${deployment}" hubploy --verbose deploy --timeout 30m ${deployment} hub staging echo - done < <(python .github/scripts/determine-hub-deployments.py --only-deploy gradebook logodev shiny stat159 stat20 nature a11y ugr01 data101 astro biology cee dev publichealth eecs julia data102 ischool data8 data100) + done < <(python .github/scripts/determine-hub-deployments.py --ignore edx) deploy-hubs-to-prod: if: github.event_name == 'push' && github.ref == 'refs/heads/prod' @@ -152,7 +151,6 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt - pip install --force-reinstall git+https://github.com/shaneknapp/hubploy.git@major-refactor - name: Auth to gcloud if: ${{ env.DEPLOY }} @@ -198,4 +196,4 @@ jobs: echo "Deploying single-user image and hub config to ${deployment}" hubploy --verbose deploy --timeout 30m ${deployment} hub prod echo - done < <(python .github/scripts/determine-hub-deployments.py --only-deploy gradebook logodev shiny stat159 stat20 nature a11y ugr01 data101 astro biology cee dev publichealth eecs julia data102 ischool data8 data100) + done < <(python .github/scripts/determine-hub-deployments.py --ignore edx) diff --git a/deployments/datahub/hubploy.yaml b/deployments/datahub/hubploy.yaml index 31a953111..2d20c6263 100644 --- a/deployments/datahub/hubploy.yaml +++ b/deployments/datahub/hubploy.yaml @@ -1,12 +1,6 @@ images: images: - - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/primary-user-image - path: images/default - registry: - provider: gcloud - gcloud: - project: ucb-datahub-2018 - service_key: gcr-key.json + - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/datahub-user-image:8ae97e7410ae cluster: provider: gcloud diff --git a/deployments/datahub/image/README.md b/deployments/datahub/image/README.md new file mode 100644 index 000000000..ccbf8a246 --- /dev/null +++ b/deployments/datahub/image/README.md @@ -0,0 +1,5 @@ +# Datahub Image + +This image is now located [in its own repo](https://github.com/berkeley-dsep-infra/datahub-user-image). + +Please see [the contribution guide](https://github.com/berkeley-dsep-infra/datahub-user-image/blob/main/CONTRIBUTING.md) for instructions on how to propose changes to the image. diff --git a/deployments/datahub/images/default/Dockerfile b/deployments/datahub/images/default/Dockerfile deleted file mode 100644 index 50ddb1a31..000000000 --- a/deployments/datahub/images/default/Dockerfile +++ /dev/null @@ -1,216 +0,0 @@ -FROM buildpack-deps:jammy-scm - -# Set up common env variables -ENV TZ=America/Los_Angeles -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -ENV LC_ALL en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US.UTF-8 -ENV DEBIAN_FRONTEND=noninteractive -ENV NB_USER jovyan -ENV NB_UID 1000 - -ENV CONDA_DIR /srv/conda -ENV R_LIBS_USER /srv/r - -RUN apt-get -qq update --yes && \ - apt-get -qq install --yes locales && \ - echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ - locale-gen - -RUN adduser --disabled-password --gecos "Default Jupyter user" ${NB_USER} - -# Do not exclude manpages from being installed. -RUN sed -i '/usr.share.man/s/^/#/' /etc/dpkg/dpkg.cfg.d/excludes - -# Reinstall coreutils so that basic man pages are installed. Due to dpkg's -# exclusion, they were not originally installed. -RUN apt --reinstall install coreutils - -# Install all apt packages -COPY apt.txt /tmp/apt.txt -RUN apt-get -qq update --yes && \ - apt-get -qq install --yes --no-install-recommends \ - $(grep -v ^# /tmp/apt.txt) && \ - apt-get -qq purge && \ - apt-get -qq clean && \ - rm -rf /var/lib/apt/lists/* - -# From docker-ce-packaging -# Remove diverted man binary to prevent man-pages being replaced with "minimized" message. See docker/for-linux#639 -RUN if [ "$(dpkg-divert --truename /usr/bin/man)" = "/usr/bin/man.REAL" ]; then \ - rm -f /usr/bin/man; \ - dpkg-divert --quiet --remove --rename /usr/bin/man; \ - fi - -RUN mandb -c - -# Create user owned R libs dir -# This lets users temporarily install packages -RUN install -d -o ${NB_USER} -g ${NB_USER} ${R_LIBS_USER} - -# Install R. -# These packages must be installed into the base stage since they are in system -# paths rather than /srv. -# Pre-built R packages from rspm are built against system libs in jammy. -#ENV R_VERSION=4.3.2-1.2204.0 -#ENV LITTLER_VERSION=0.3.18-2.2204.0 -ENV R_VERSION=4.4.1-1.2204.0 -ENV LITTLER_VERSION=0.3.19-1.2204.0 -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 -RUN echo "deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" > /etc/apt/sources.list.d/cran.list -RUN curl --silent --location --fail https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc > /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc -RUN apt-get update --yes > /dev/null && \ - apt-get install --yes -qq r-base-core=${R_VERSION} r-base-dev=${R_VERSION} littler=${LITTLER_VERSION} > /dev/null - -ENV RSTUDIO_URL=https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2024.04.2-764-amd64.deb -RUN curl --silent --location --fail ${RSTUDIO_URL} > /tmp/rstudio.deb && \ - apt install --no-install-recommends --yes /tmp/rstudio.deb && \ - rm /tmp/rstudio.deb - -ENV SHINY_SERVER_URL https://download3.rstudio.org/ubuntu-18.04/x86_64/shiny-server-1.5.20.1002-amd64.deb -RUN curl --silent --location --fail ${SHINY_SERVER_URL} > /tmp/shiny-server.deb && \ - apt install --no-install-recommends --yes /tmp/shiny-server.deb && \ - rm /tmp/shiny-server.deb - -# Install our custom Rprofile.site file -COPY Rprofile.site /usr/lib/R/etc/Rprofile.site -# Create directory for additional R/RStudio setup code -RUN mkdir /etc/R/Rprofile.site.d -# RStudio needs its own config -COPY rsession.conf /etc/rstudio/rsession.conf - -# R_LIBS_USER is set by default in /etc/R/Renviron, which RStudio loads. -# We uncomment the default, and set what we wanna - so it picks up -# the packages we install. Without this, RStudio doesn't see the packages -# that R does. -# Stolen from https://github.com/jupyterhub/repo2docker/blob/6a07a48b2df48168685bb0f993d2a12bd86e23bf/repo2docker/buildpacks/r.py -# To try fight https://community.rstudio.com/t/timedatectl-had-status-1/72060, -# which shows up sometimes when trying to install packages that want the TZ -# timedatectl expects systemd running, which isn't true in our containers -RUN sed -i -e '/^R_LIBS_USER=/s/^/#/' /etc/R/Renviron && \ - echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron && \ - echo "TZ=${TZ}" >> /etc/R/Renviron - -# For command-line access to quarto, which is installed by rstudio. -RUN ln -s /usr/lib/rstudio-server/bin/quarto/bin/quarto /usr/local/bin/quarto - -# Install R libraries as our user -USER ${NB_USER} - -COPY class-libs.R /tmp/class-libs.R -RUN mkdir -p /tmp/r-packages - -# Install all our base R packages -COPY install.R /tmp/install.R -RUN echo "/tmp/install.R" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash -RUN rm -rf /tmp/downloaded_packages - -# DLAB CTAWG, Fall '20 - Summer '21 -# https://github.com/berkeley-dsep-infra/datahub/issues/1942 -COPY r-packages/dlab-ctawg.r /tmp/r-packages/ -RUN echo "/usr/bin/r /tmp/r-packages/dlab-ctawg.r" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash -RUN rm -rf /tmp/downloaded_packages - -# Econ 140, Fall '22 and into the future -# https://github.com/berkeley-dsep-infra/datahub/issues/3757 -COPY r-packages/econ-140.r /tmp/r-packages -RUN echo "/usr/bin/r /tmp/r-packages/econ-140.r" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash -RUN rm -rf /tmp/downloaded_packages - -# EEP/IAS C119, Spring '23 -# https://github.com/berkeley-dsep-infra/datahub/issues/4203 -COPY r-packages/eep-1118.r /tmp/r-packages -RUN echo "/usr/bin/r /tmp/r-packages/eep-1118.r" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash -RUN rm -rf /tmp/downloaded_packages - -# Stat 135, Fall '23 -# https://github.com/berkeley-dsep-infra/datahub/issues/4907 -COPY r-packages/2023-fall-stat-135.r /tmp/r-packages -RUN echo "/usr/bin/r /tmp/r-packages/2023-fall-stat-135.r" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash -RUN rm -rf /tmp/downloaded_packages - -# MBA 247, Fall '23 -# issue TBD; discussed over email -COPY r-packages/2023-fall-mba-247.r /tmp/r-packages/ -RUN echo "/usr/bin/r /tmp/r-packages/2023-fall-mba-247.r" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash -RUN rm -rf /tmp/downloaded_packages - -# POL SCI 3, SP 24 -# https://github.com/berkeley-dsep-infra/datahub/issues/5496 -COPY r-packages/2024-sp-polsci-3.r /tmp/r-packages/ -RUN echo "/usr/bin/r /tmp/r-packages/2024-sp-polsci-3.r" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash -RUN rm -rf /tmp/downloaded_packages - -ENV PATH ${CONDA_DIR}/bin:$PATH:/usr/lib/rstudio-server/bin - -# Set this to be on container storage, rather than under $HOME ENV IPYTHONDIR ${CONDA_DIR}/etc/ipython - -WORKDIR /home/${NB_USER} - -# Install mambaforge as root -USER root -COPY install-mambaforge.bash /tmp/install-mambaforge.bash -RUN echo "/tmp/install-mambaforge.bash" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash - -# Install conda environment as our user -USER ${NB_USER} - -COPY infra-requirements.txt /tmp/infra-requirements.txt -COPY environment.yml /tmp/environment.yml - -RUN echo "/srv/conda/bin/mamba env update -p ${CONDA_DIR} -f /tmp/environment.yml" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash -RUN echo "/srv/conda/bin/mamba clean -afy" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash -RUN echo "/srv/conda/bin/pip install --no-cache -r /tmp/infra-requirements.txt" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash - -# 2024-01-13 sknapp: incompatible due to notebook 7 -# RUN jupyter contrib nbextensions install --sys-prefix --symlink && \ -# jupyter nbextensions_configurator enable --sys-prefix - -# Used by MCB32, but incompatible with ipywidgets 8.x -# RUN jupyter nbextension enable --py --sys-prefix qgrid - -# install chromium browser for playwright -# https://github.com/berkeley-dsep-infra/datahub/issues/5062 -# playwright is only availalbe in nbconvert[webpdf], via pip/pypi. -# see also environment.yaml -# DH-164 -ENV PLAYWRIGHT_BROWSERS_PATH ${CONDA_DIR} -RUN playwright install chromium - -# Install IR kernelspec -RUN echo "/usr/bin/r -e \"IRkernel::installspec(user = FALSE, prefix='${CONDA_DIR}')\"" | /usr/bin/time -f "User\t%U\nSys\t%S\nReal\t%E\nCPU\t%P" /usr/bin/bash - -# 2024-01-13 sknapp: incompatible due to notebook 7 -# COPY d8extension.bash /usr/local/sbin/d8extension.bash -# RUN /usr/local/sbin/d8extension.bash - -ENV NLTK_DATA ${CONDA_DIR}/nltk_data -COPY connectors/text.bash /usr/local/sbin/connector-text.bash -RUN /usr/local/sbin/connector-text.bash - -#COPY connectors/2021-fall-phys-188-288.bash /usr/local/sbin/ -#RUN /usr/local/sbin/2021-fall-phys-188-288.bash - -#ESPM, FA 24 -# https://github.com/berkeley-dsep-infra/datahub/issues/5827 -ENV VSCODE_EXTENSIONS=${CONDA_DIR}/share/code-server/extensions -USER root -RUN mkdir -p ${VSCODE_EXTENSIONS} && \ - chown -R jovyan:jovyan ${VSCODE_EXTENSIONS} -USER ${NB_USER} -# Install Code Server Jupyter extension -RUN /srv/conda/bin/code-server --extensions-dir ${VSCODE_EXTENSIONS} --install-extension ms-toolsai.jupyter -# Install Code Server Python extension -RUN /srv/conda/bin/code-server --extensions-dir ${VSCODE_EXTENSIONS} --install-extension ms-python.python - -# clear out /tmp -USER root -RUN rm -rf /tmp/* - -USER ${NB_USER} - -EXPOSE 8888 - -ENTRYPOINT ["tini", "--"] diff --git a/deployments/datahub/images/default/Rprofile.site b/deployments/datahub/images/default/Rprofile.site deleted file mode 100644 index 12886857d..000000000 --- a/deployments/datahub/images/default/Rprofile.site +++ /dev/null @@ -1,36 +0,0 @@ -# Use RStudio's CRAN mirror to get binary packages -# 'latest' just means it has all available versions. -# We can specify version numbers in devtools::install_version -options(repos = c(CRAN = "https://packagemanager.rstudio.com/all/__linux__/jammy/latest")) - -# RStudio's CRAN mirror needs this to figure out which binary package to serve. -# If not set properly, it will just serve up source packages -# Quite hilarious, IMO. -# See https://docs.rstudio.com/rspm/admin/binaries.html -# UPDATE: see the newer setting below... -#options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version$platform, R.version$arch, R.version$os))) - -# If install.packages() is run from an RStudio console, it downloads binaries. -# If it is run from an RStudio terminal, from a Jupyter terminal, or from a -# Jupyter R notebook, it downloads source. Setting the user agent to the string -# below sets it to be binary. This may improve image build times. -# If it works, it'd be better to dynamically set the R version as above, and -# also the RStudio Server version if possible. -options(HTTPUserAgent = "RStudio Server (2024.04.2.764); R (4.4.1 x86_64-pc-linux-gnu x86_64 linux-gnu)") - -## Source extra Rprofile files. -# -# This lets us easily add new code via extraFiles (or other z2jh-related -# magic) without having to rebuild the image each time. - -# Get a list of files in the directory -rprofile_dir <- "/etc/R/Rprofile.site.d/" -rprofile_files <- list.files(rprofile_dir, full.names = TRUE) - -# Source each file -for (rprofile_file in rprofile_files) { - cat("Sourcing:", rprofile_file, "\n") - source(rprofile_file) -} -# Don't keep temporary variables in environment -rm(rprofile_dir, rprofile_files, rprofile_file) diff --git a/deployments/datahub/images/default/apt.txt b/deployments/datahub/images/default/apt.txt deleted file mode 100644 index c49e056e3..000000000 --- a/deployments/datahub/images/default/apt.txt +++ /dev/null @@ -1,87 +0,0 @@ -# editors, pagers, tools -less -tar -vim -micro -tini -locales -nano - -# for ops debugging -net-tools - -# for timing builds -time - -# for jupyter-tree-download, #3979 -zip - -# for nbconvert -pandoc -texlive-xetex -texlive-fonts-recommended -# provides FandolSong-Regular.otf for issue #2714 -texlive-lang-chinese -texlive-plain-generic - -# for LS88-5 and modules basemap -libspatialindex-dev - -# For L&S22 -graphviz - -# for phys 151 -gfortran - -# for issue #2695, fall 2021? -libarmadillo-dev - -# for notebook-as-pdf -libx11-xcb1 -libxtst6 -libxrandr2 -libasound2 -libpangocairo-1.0-0 -libatk1.0-0 -libatk-bridge2.0-0 -libgtk-3-0 -libnss3 -libxss1 -fonts-noto-color-emoji - -# for rstudio -psmisc -sudo -libapparmor1 -lsb-release -libclang-dev - -# R package qpdf -libpoppler-cpp-dev - -# R package imager -libx11-dev -libglpk-dev -libgmp3-dev -libxml2-dev - -# R package units -libudunits2-dev - -# Used by littler for IRkernel::installspec -libzmq5 - -# MUSIC 30: musescore3 -# https://github.com/berkeley-dsep-infra/datahub/issues/5047 -musescore3 - -# additional R packages -# https://github.com/berkeley-dsep-infra/datahub/issues/5061 -libgdal-dev -libgeos-dev -libproj-dev -libmysqlclient-dev - -# 2024-08 workshop -# https://github.com/berkeley-dsep-infra/datahub/issues/5906 -man diff --git a/deployments/datahub/images/default/class-libs.R b/deployments/datahub/images/default/class-libs.R deleted file mode 100644 index 1c9343537..000000000 --- a/deployments/datahub/images/default/class-libs.R +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env Rscript - -class_libs_install_version <- function(class_name, class_libs) { - print(paste("Installing packages for", class_name)) - for (i in seq(1, length(class_libs), 2)) { - installed_packages <- rownames(installed.packages()) - package_name = class_libs[i] - version = class_libs[i+1] - # Only install packages if they haven't already been installed! - # devtools doesn't do that by default - if (!package_name %in% installed_packages) { - print(paste("Installing", package_name, version)) - devtools::install_version(package_name, version, quiet=TRUE) - } else { - # FIXME: This ignores version incompatibilities :'( - print(paste("Not installing", package_name, " as it is already installed")) - } - } - print(paste("Done installing packages for", class_name)) -} diff --git a/deployments/datahub/images/default/connectors/2021-fall-phys-188-288.bash b/deployments/datahub/images/default/connectors/2021-fall-phys-188-288.bash deleted file mode 100755 index bdab34a70..000000000 --- a/deployments/datahub/images/default/connectors/2021-fall-phys-188-288.bash +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -set -euo pipefail - -cd ${CONDA_DIR} -git clone https://github.com/HerculesJack/assignment-1910 -cd assignment-1910 -bash install.sh > /dev/null 2>&1 -rm -rf assignment-1910 \ No newline at end of file diff --git a/deployments/datahub/images/default/connectors/text.bash b/deployments/datahub/images/default/connectors/text.bash deleted file mode 100755 index f9e7284f6..000000000 --- a/deployments/datahub/images/default/connectors/text.bash +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -set -euo pipefail - -mkdir ${NLTK_DATA} - -# MODEL_____________________ TERMS____ -# averaged_perceptron_tagger: 17sp 16fa -# cmudict: 16fa -# maxent_ne_chunker: 17sp 16fa -# punkt: 17sp 16fa -# stopwords: 17sp -# wordnet: 17sp 16fa -# words: 17sp 16fa -python -m nltk.downloader -d ${NLTK_DATA} \ - averaged_perceptron_tagger \ - cmudict \ - maxent_ne_chunker \ - punkt \ - stopwords \ - wordnet \ - words diff --git a/deployments/datahub/images/default/d8extension.bash b/deployments/datahub/images/default/d8extension.bash deleted file mode 100755 index 3f8fdbd13..000000000 --- a/deployments/datahub/images/default/d8extension.bash +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -set -e - -cd /tmp -git clone https://github.com/minrk/nbextension-scratchpad -cd nbextension-scratchpad/ -git checkout e92fa23 -cd .. -jupyter nbextension install --sys-prefix nbextension-scratchpad -jupyter nbextension enable --sys-prefix nbextension-scratchpad/main -cd .. -rm -rf /tmp/nbextension-scratchpad diff --git a/deployments/datahub/images/default/environment.yml b/deployments/datahub/images/default/environment.yml deleted file mode 100644 index fc23d4e9b..000000000 --- a/deployments/datahub/images/default/environment.yml +++ /dev/null @@ -1,240 +0,0 @@ -# https://docs.conda.io/projects/conda-build/en/latest/resources/package-spec.html#examples -# For conda, == is exact constraint, while = is fuzzy constraint. -# pip uses https://peps.python.org/pep-0440/ which does not have =. -dependencies: -- nodejs=18.* -- traitlets=5.9.* -- pip=22.2.* -- python=3.11.* -- syncthing==1.18.6 -- nbclassic==1.0.0 - -# gh-scoped-creds needs a newer version of git than what jammy provides -- git==2.45.1 - -# pymc3 needs this -- mkl-service=2.4.* - -# Base scientific packages that other conda packages we install depend on -# We don't want to have conda packages depend on pip packages if possible -- numpy=1.26.* -- matplotlib=3.9.* -- scipy=1.10.0 -- ipympl=0.9.* -- pandas==2.2.2 -- statsmodels=0.13.5 -- scikit-learn=1.4.* -- seaborn=0.13.2 -# - bokeh=2.3.* -- decorator=5.0.* -- networkx=2.6.* -- spacy=3.7.3 -- nltk=3.6.* - -# 3577, L&S 22, Spring 2023 -# Packages listed below will be used during SP 25 -# - spacy-model-en_core_web_sm=3.4.0 -# - spacy-model-en_core_web_md=3.4.0 -# - lemminflect=0.2.2 - -# EPS88, data100 -# https://github.com/berkeley-dsep-infra/datahub/issues/1796 -# https://github.com/berkeley-dsep-infra/datahub/issues/2824 -- shapely==2.0.4 -- cartopy==0.23.0 - -# ls 88-3; neuro -- pillow=10.3.0 - -# music 30, spring 2022, https://github.com/berkeley-dsep-infra/datahub/issues/3180 -- pysoundfile==0.10.3.post1 - -# ds100, spring 2022, #3208 -- python-kaleido==0.2.1 - -# https://github.com/berkeley-dsep-infra/datahub/issues/3305, spring 2022 -- fenics==2019.1.0 -- mshr==2019.1.0 - -# https://github.com/berkeley-dsep-infra/datahub/issues/3324, spring 2022 -- gdown==4.4.0 - -# Econ 148, Spring 2023, https://github.com/berkeley-dsep-infra/datahub/issues/4067 -- ipykernel = 6.19.4 - -# Econ 148, Spring 2023 https://github.com/berkeley-dsep-infra/datahub/issues/4251 -- sqlalchemy==1.4.46 - -# UGBA88 Fall 2023, https://github.com/berkeley-dsep-infra/datahub/issues/5019 -- pymysql==1.1.0 - -# MUSIC 30, https://github.com/berkeley-dsep-infra/datahub/issues/5047 -- music21==8.3.0 - -# data8; foundation -#- datascience==0.17.6 -- jupyter-server-proxy==4.2.0 -- jupyter-rsession-proxy==2.2.0 -- folium==0.12.1.post1 - -# cogsci131; spring 2018 -#- nose==1.3.7 - -# modules -- beautifulsoup4==4.9.3 - -# PS88 https://github.com/berkeley-dsep-infra/datahub/issues/2925 -- linearmodels==6.0 - -# JupyterLab pypi extensions -- jupyterlab-geojson==3.4.0 - -# INDENG 142 Spring 2021 (future semesters as well) - https://github.com/berkeley-dsep-infra/datahub/issues/2314 -- fancyimpute==0.7.0 - -# ESPM 167 - https://github.com/berkeley-dsep-infra/datahub/issues/2278 -- contextily==1.1.0 - -# EPS 256, https://github.com/berkeley-dsep-infra/datahub/issues/1775 -- obspy==1.4.1 - -# Install VSCode for ESPM courses - https://github.com/berkeley-dsep-infra/datahub/issues/5716 -- jupyter-vscode-proxy==0.6 -- code-server==4.23.1 - -# 2024-08 workshop, #5908 -- tree==2.1.3 -- ruff==0.5.6 - -# Stat 165/265 requires prophet, Spring, 2024 -- prophet==1.1.5 - -# [DH-319] https://github.com/berkeley-dsep-infra/datahub/issues/5827, ESPM 157 -- altair==5.3.0 -- leafmap==0.36.4 -- mystmd==1.3.1 -- jupyterlab-git==0.50.1 - -# [DH-364] https://github.com/berkeley-dsep-infra/datahub/issues/6155 -- openpyxl==3.1.5 - -- pip: - # - -r /tmp/infra-requirements.txt - # Econ 148, Spring 2023 https://github.com/berkeley-dsep-infra/datahub/issues/4093 - - pycountry-convert==0.7.2 - # Options to convert notebooks to pdf; error converting to PDF https://github.com/berkeley-dsep-infra/datahub/issues/5062 - - nbconvert[webpdf] - # Former contents of requirements.txt - # Some things were moved up to conda above. - # - # Until https://github.com/okpy/ok-client/pull/473 is merged - # - git+https://github.com/yuvipanda/ok-client@6961d778741fe61911be4d00beff9bd8afc1edf7 - # - - jupyter-shiny-proxy==1.1 - # - # nb2pdf==0.6.2 commented out by sknapp 06.10.2023 to unblock https://github.com/berkeley-dsep-infra/datahub/issues/5062 - # - - # astr 128/256; spring 2021 - # - astroquery==0.4.6 - # - astropy==5.3.3 - # - dustmaps==1.0.9 - # - george==0.4.0 - # - exoplanet==0.5.2 - # - torch==1.13.1 - # - torchvision==0.14.1 - # - pyvo==1.1 - # - joblib==1.3.1 - # - theano-pymc==1.1.2 - # - pymc3==3.11.2 - - - requests==2.31.0 - - # Google spreadsheets, Eric Van Dusen / Keeley Takimoto / Modules - - gspread-pandas==2.3.0 - - gspread==4.0.1 - - # eps 109; fall 2019 - # sknapp 2024-08-12: keeping this package as i believe it's still useful - - ffmpeg-python==0.2.0 - - # issue #875, global 150Q/pacs 190 - fall 2019 - # sknapp 2024-08-12: keeping this package as i believe it's still useful - - wordcloud==1.9.3 - - # issue #929, SW 282 - fall 2019 - # sknapp 2024-08-12: keeping this package as i believe it's still useful - - pyreadstat==1.2.7 - - # issue 954, EPS24 - fall 2019 - # sknapp 2024-08-12: keeping this package as i believe it's still useful - - xarray==0.19.0 - - # issue 1001, Physics 188/288 - fall 2019 - #- umap-learn==0.5.1 - #- hdbscan==0.8.31 - - # espm 125/bio 105; fall 2019 - # see https://github.com/berkeley-dsep-infra/datahub/issues/1796 - #- bitarray==2.3.0 - #- nlmpy==1.0.1 - - # physics 188/288 fall, 2019 - #- getdist==1.3.1 - #- tensorflow-hub==0.12.0 - #- tensorflow-probability==0.13.0 - - # cs16A/B, spring 2020 - - lcapy==0.96 - - # ds198 mch infodemiology, fall 2020/spring 2021 - # google apis - - google-api-python-client==2.15.0 - - google-auth-httplib2==0.1.0 - - google-auth-oauthlib==0.4.5 - - google==3.0.0 - - # issue 1847; LS 22 spring 2021 - - graphviz==0.17 - - # issue #1903, data h195A fall 2020 - - habanero==0.7.4 - - # https://github.com/berkeley-dsep-infra/datahub/issues/1981 - - ipycanvas==0.13.1 - - # sknapp 2023-12-15: we will eventually move prob140 back to datahub in FA24 - # https://jira-secure.berkeley.edu/browse/DH-203 - - prob140==0.4.1.5 - - sympy==1.8 - - # IB 105 / ESPM 125, Fall 2021 - https://github.com/berkeley-dsep-infra/datahub/issues/2696 - - geonomics==1.3.12 - - # https://github.com/berkeley-dsep-infra/datahub/issues/2950 - # Needed to work with a new enough version of httplib2 - - httplib2>=0.22.0 - - # Used by MCB32, but incompatible with ipywidgets 8.x - # - qgrid==1.3.1 - - # Engineering 7 https://github.com/berkeley-dsep-infra/datahub/issues/5337 - - otter-grader==4.4.1 - # pulled in by ottr, if not pinned to 1.16.2, 1.16.3 causes DH-323 - - jupytext==1.16.2 - - # https://github.com/berkeley-dsep-infra/datahub/issues/5497 - - ottr-force-save-labextension==0.1.1 - - # [DH-231] Installing gh-scoped-creds for https://github.com/berkeley-dsep-infra/datahub/issues/5515 - - gh-scoped-creds==4.1 - - # [DH-243] https://github.com/berkeley-dsep-infra/datahub/issues/5571, Legal Studies 123, Spring 24 - - geopandas==0.14.3 - - rtree==1.2.0 - - # [DH-319] https://github.com/berkeley-dsep-infra/datahub/issues/5827, ESPM 157 - - ibis-framework[pandas,duckdb]==9.2.0 - # - jupyterlab_myst==2.4.2 (Causing issues with rendering of the latex output for a CEE course) - - # ATTEMPT TO PUT NEW PACKAGES IN THE CONDA LIST ABOVE FIRST, RATHER THAN PIP diff --git a/deployments/datahub/images/default/infra-requirements.txt b/deployments/datahub/images/default/infra-requirements.txt deleted file mode 100644 index 0fb0bd930..000000000 --- a/deployments/datahub/images/default/infra-requirements.txt +++ /dev/null @@ -1,29 +0,0 @@ -# WARNING: Original source at scripts/infra-packages/requirements.txt -# PLEASE DO NOT EDIT ELSEWHERE -# After editing scripts/infra-packages/requirements.txt, please run -# scripts/infra-packages/sync.bash. - -# This file pins versions of notebook related python packages we want -# across all hubs. This makes sure we don't need to upgrade them -# everwhere one by one. - -# FIXME: Freeze this to get exact versions of all dependencies -notebook==7.0.7 -jupyterlab==4.0.11 -nbgitpuller==1.2.1 -jupyter-resource-usage==1.0.1 -# Matches version in images/hub/Dockerfile -jupyterhub==4.1.6 -appmode==0.8.0 -ipywidgets==8.0.7 -jupyter-tree-download==1.0.1 -git-credential-helpers==0.2 -# Measure popularity of different packages in our hubs -# https://discourse.jupyter.org/t/request-for-implementation-instrument-libraries-actively-used-by-users-on-a-jupyterhub/7994?u=yuvipanda -git+https://github.com/shaneknapp/python-popularity-contest.git@add-error-handling -# RISE is useful for presentations - see https://github.com/berkeley-dsep-infra/datahub/issues/2527 -RISE==5.7.1 -# syncthing for dropbox-like functionality -jupyter-syncthing-proxy==1.0.3 -# jupyter archival tool for easy user downloads -jupyter-archive==3.4.0 diff --git a/deployments/datahub/images/default/install-mambaforge.bash b/deployments/datahub/images/default/install-mambaforge.bash deleted file mode 100755 index 47bf53f1d..000000000 --- a/deployments/datahub/images/default/install-mambaforge.bash +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash -# This downloads and installs a pinned version of mambaforge -set -ex - -cd $(dirname $0) -MAMBAFORGE_VERSION='24.3.0-0' - -URL="https://github.com/conda-forge/miniforge/releases/download/${MAMBAFORGE_VERSION}/Mambaforge-${MAMBAFORGE_VERSION}-Linux-x86_64.sh" -INSTALLER_PATH=/tmp/mambaforge-installer.sh - -wget --quiet $URL -O ${INSTALLER_PATH} -chmod +x ${INSTALLER_PATH} - -bash ${INSTALLER_PATH} -b -p ${CONDA_DIR} -export PATH="${CONDA_DIR}/bin:$PATH" - -# Do not attempt to auto update conda or dependencies -conda config --system --set auto_update_conda false -conda config --system --set show_channel_urls true - -# empty conda history file, -# which seems to result in some effective pinning of packages in the initial env, -# which we don't intend. -# this file must not be *removed*, however -echo '' > ${CONDA_DIR}/conda-meta/history - -# Clean things out! -conda clean --all -f -y - -# Remove the big installer so we don't increase docker image size too much -rm ${INSTALLER_PATH} - -# Remove the pip cache created as part of installing mambaforge -rm -rf /root/.cache - -chown -R $NB_USER:$NB_USER ${CONDA_DIR} - -conda list -n root diff --git a/deployments/datahub/images/default/install.R b/deployments/datahub/images/default/install.R deleted file mode 100755 index c1b33a09d..000000000 --- a/deployments/datahub/images/default/install.R +++ /dev/null @@ -1,207 +0,0 @@ -#!/usr/bin/env r - -# Install devtools so we can install versioned packages -install.packages("devtools") - -source("/tmp/class-libs.R") - -# install ottr, needs to go first issue #3216 -# install ottr, 1.4.0, issue #5314 -# install ottr, 1.5.0 https://github.com/berkeley-dsep-infra/datahub/issues/5498 -devtools::install_version("ottr", version = "1.5.0", repos = "https://cran.r-project.org", upgrade = "never", quiet = FALSE) - -# dplyr package + backends -# From https://github.com/rocker-org/rocker-versioned2/blob/b8d23396468c5dc73115cce6c5716424d80ffcb0/scripts/install_tidyverse.sh#L30 -dplyr_packages = c( - "dplyr", "1.0.2", - "arrow", "2.0.0", - "dbplyr", "2.0.0", - "DBI", "1.1.0", - "dtplyr", "1.0.1", - "nycflights13", "1.0.2", - "Lahman", "8.0-0", - "RMariaDB", "1.1.0", - "RPostgres", "1.3.0", - "RSQLite", "2.2.1", - "fst", "0.9.4" -) - -# From https://github.com/rocker-org/rocker-versioned2/blob/b8d23396468c5dc73115cce6c5716424d80ffcb0/scripts/install_verse.sh#L82 -publishing_packages = c( - "tinytex", "0.28", - "blogdown", "0.21", - "bookdown", "0.21", - "rticles", "0.18", - "rmdshower", "2.1.1", - "rJava", "0.9-13", - "xaringan", "0.19" -) - -# R packages to be installed that aren't from apt -# Combination of informal requests & rocker image suggestions -# Some of these were already in datahub image -cran_packages = c( - "AER", "1.2-9", - "BH", "1.72.0-3", - "BiocManager", "1.30.10", - "DBI", "1.1.0", - "FNN", "1.1.3", - "IRkernel", "1.1.1", - "Matrix", "1.3-0", - "R.methodsS3", "1.8.1", - "R.oo", "1.24.0", - "R.utils", "2.10.1", - "RCSF", "1.0.2", - "RColorBrewer", "1.1-2", - "RCurl", "1.98-1.2", - "RNetCDF", "2.4-2", - "RandomFields", "3.3.8", - "RandomFieldsUtils", "0.5.3", - "Rcpp", "1.0.5", - "RcppProgress", "0.4.2", - "assertthat", "0.2.1", - "base64enc", "0.1-3", - "bibtex", "0.4.2.3", - "bindrcpp", "0.2.2", - "broom", "0.7.3", - "checkr", "0.5.0", - "clipr", "0.7.1", - "crayon", "1.3.4", - "crosstalk", "1.1.0.1", - "curl", "4.3", - "data.table", "1.13.6", - "dichromat", "2.0-0", - "digest", "0.6.27", - "docopt", "0.7.1", - "e1071", "1.7-4", - "evaluate", "0.14", - "forcats", "0.5.0", - "future", "1.21.0", - "gdalUtils", "2.0.3.2", - "gdtools", "0.2.3", - "geoR", "1.8-1", - "geometry", "0.4.5", - "geosphere", "1.5-10", - "gert", "1.0.2", - "ggplot2", "3.3.3", - "git2r", "0.27.1", - "globals", "0.14.0", - "glue", "1.4.2", - "gstat", "2.0-6", - "haven", "2.3.1", - "hdf5r", "1.3.3", - "here", "1.0.1", - "highr", "0.8", - "hms", "0.5.3", - "htmlwidgets", "1.5.3", - "httpuv", "1.5.4", - "httr", "1.4.2", - "intervals", "0.15.2", - "ivpack", "1.2", - "jsonlite", "1.7.2", - "knitr", "1.30", - "units", "0.6-7", - "leafem", "0.1.3", - "leaflet", "2.0.3", - "leafpop", "0.0.6", - "leafsync", "0.1.0", - "learnr", "0.11.2", - "lfe", "3.0-0", - "linprog", "0.9-2", - "listenv", "0.8.0", - "lpSolve", "5.6.15", - "lubridate", "1.7.9.2", - "lwgeom", "0.2-13", - "magic", "1.5-9", - "manipulateWidget", "0.10.1", - "mapdata", "2.3.0", - "mapproj", "1.2.7", - "maptools", "1.0-2", - "mapview", "2.9.0", - "markdown", "1.1", - "matrixStats", "0.57.0", - "memoise", "1.1.0", - "miniUI", "0.1.1.1", - "modelr", "0.1.8", - "ncdf4", "1.17", - "ncmeta", "0.3.0", - "nlme", "3.1-151", - "openssl", "1.4.3", - "packrat", "0.5.0", - "pander", "0.6.3", - "pbdZMQ", "0.3-4", - "pillar", "1.4.7", - "png", "0.1-7", - "praise", "1.0.0", - "proj4", "1.0-10", - "proto", "1.0.0", - "pryr", "0.1.4", - "rapportools", "1.0", - "raster", "3.4-5", - "rdrobust", "2.0.2", - "readr", "1.4.0", - "readxl", "1.3.1", - "redland", "1.0.17-14", - "rematch", "1.0.1", - "repr", "1.1.0", - "reprex", "0.3.0", - "reshape", "0.8.8", - "reticulate", "1.34.0", - "rgeos", "0.5-5", - "rgl", "0.103.5", - "rjson", "0.2.20", - "rlang", "0.4.10", - "rlas", "1.3.8", - "rlist", "0.4.6.1", - "rmarkdown", "2.6", - "rpart", "4.1-15", - "rprojroot", "2.0.2", - "rsconnect", "0.8.16", - "satellite", "1.0.2", - "selectr", "0.4-2", - "shiny", "1.5.0", - "sp", "1.4-4", - "spacetime", "1.2-3", - "spatialreg", "1.1-5", - "spatstat", "1.64-1", - "spatstat.data", "1.7-0", - "spdep", "1.1-5", - "splancs", "2.01-40", - "stargazer", "5.2.3", - "stars", "0.4-3", - "stringi", "1.5.3", - "stringr", "1.4.0", - "summarytools", "0.9.8", - "svglite", "1.2.3.2", - "systemfonts", "0.3.2", - "testit", "0.12", - "testthat", "3.0.4", - "tibble", "3.0.4", - "tidync", "0.2.4", - "tidyr", "1.1.2", - "tidyverse", "1.3.0", - "tmap", "3.2", - "tmaptools", "3.1", - "tufte", "0.9", - "utf8", "1.1.4", - "uuid", "0.1-4", - "viridis", "0.5.1", - "vroom", "1.3.2", - "whoami", "1.3.0", - "widgetframe", "0.3.1", - "withr", "2.3.0", - "xfun", "0.19", - "xml2", "1.3.2", - "xts", "0.12.1", - "yaml", "2.2.1" - ) - -class_libs_install_version("Base packages", cran_packages) -class_libs_install_version("dplyr packages", dplyr_packages) -class_libs_install_version("publishing packages", publishing_packages) - -# Bioconductor packages present in rocker images -# FIXME: Find a way to version these? -# FIXME: Make sure these are binary installs? -BiocManager::install('rhdf5') -BiocManager::install('Rhdf5lib') diff --git a/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r b/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r deleted file mode 100644 index 660f6184f..000000000 --- a/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env Rscript - -source("/tmp/class-libs.R") - -class_name = "MBA 247 Fall 2023" -class_libs = c( - "caret", "6.0-94", - "arules", "1.7-6", - "arulesViz", "1.5-2", - "h2o", "3.42.0.2", - "tm", "0.7-11", - "SnowballC", "0.7.1", - "wordcloud", "2.6", - "pROC", "1.18.4", - "rpart.plot", "3.1.1", - "randomForest","4.7-1.1", - "xgboost","1.7.5.1", - "factoextra","1.0.7" -) -class_libs_install_version(class_name, class_libs) diff --git a/deployments/datahub/images/default/r-packages/2023-fall-stat-135.r b/deployments/datahub/images/default/r-packages/2023-fall-stat-135.r deleted file mode 100644 index b26c0532c..000000000 --- a/deployments/datahub/images/default/r-packages/2023-fall-stat-135.r +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env Rscript - -# https://github.com/berkeley-dsep-infra/datahub/issues/4907 - -source("/tmp/class-libs.R") - -class_name = "Stat 135 Fall 2023" -class_libs = c( - "mosaicData", "0.20.3" -) -class_libs_install_version(class_name, class_libs) diff --git a/deployments/datahub/images/default/r-packages/2024-sp-polsci-3.r b/deployments/datahub/images/default/r-packages/2024-sp-polsci-3.r deleted file mode 100644 index 1115769ca..000000000 --- a/deployments/datahub/images/default/r-packages/2024-sp-polsci-3.r +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env Rscript -# From https://github.com/berkeley-dsep-infra/datahub/issues/5496 -# pol sci 3, spring 2024 and into the future - -print("Installing packages for POL SCI 3") - -source("/tmp/class-libs.R") - -class_name="POL SCI 3" -class_libs = c( - "estimatr", "1.0.2" -) - -class_libs_install_version(class_name, class_libs) - -print("Done installing packages for POL SCI 3") - diff --git a/deployments/datahub/images/default/r-packages/dlab-ctawg.r b/deployments/datahub/images/default/r-packages/dlab-ctawg.r deleted file mode 100644 index 6a15b9086..000000000 --- a/deployments/datahub/images/default/r-packages/dlab-ctawg.r +++ /dev/null @@ -1,15 +0,0 @@ -# https://github.com/berkeley-dsep-infra/datahub/issues/1942 -print("Installing packages for DLab CTAWG") - -source("/tmp/class-libs.R") -class_name = "D-Lab CTAWG" - -class_libs = c( - "qpdf", "1.1", - # needs libpoppler-cpp-dev - "pdftools", "2.3.1", - # needs libx11-dev - "imager", "0.42.3" -) - -class_libs_install_version(class_name, class_libs) diff --git a/deployments/datahub/images/default/r-packages/econ-140.r b/deployments/datahub/images/default/r-packages/econ-140.r deleted file mode 100755 index 93489dfee..000000000 --- a/deployments/datahub/images/default/r-packages/econ-140.r +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env Rscript -# From https://github.com/berkeley-dsep-infra/datahub/issues/3757 -# econ 140, fall 2022 and into the future - -print("Installing packages for ECON 140") - -source("/tmp/class-libs.R") - -class_name="ECON 140" -class_libs = c( - "ipumsr", "0.5.0" -) - -class_libs_install_version(class_name, class_libs) - -print("Done installing packages for ECON 140") - diff --git a/deployments/datahub/images/default/r-packages/eep-1118.r b/deployments/datahub/images/default/r-packages/eep-1118.r deleted file mode 100755 index 051ba7249..000000000 --- a/deployments/datahub/images/default/r-packages/eep-1118.r +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env Rscript -# From https://github.com/berkeley-dsep-infra/datahub/issues/1103 && https://github.com/berkeley-dsep-infra/datahub/issues/4203 -# eep 1118, Spring 2023 - -print("Installing packages for EEP 1118") - -source("/tmp/class-libs.R") - -class_name="EEP 1118" -class_libs = c( - "car", "3.0-10", - "mfx", "1.2-2", - "psych", "2.2.9", - "pacman", "0.5.1" -) - -class_libs_install_version(class_name, class_libs) - -print("Done installing packages for EEP 1118") diff --git a/deployments/datahub/images/default/rsession.conf b/deployments/datahub/images/default/rsession.conf deleted file mode 100644 index 1f82b6b54..000000000 --- a/deployments/datahub/images/default/rsession.conf +++ /dev/null @@ -1,2 +0,0 @@ -# Use binary packages! -r-cran-repos=https://packagemanager.rstudio.com/all/__linux__/jammy/latest diff --git a/deployments/dlab/hubploy.yaml b/deployments/dlab/hubploy.yaml index a82066120..13bb10c5c 100644 --- a/deployments/dlab/hubploy.yaml +++ b/deployments/dlab/hubploy.yaml @@ -1,12 +1,7 @@ images: images: - - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/primary-user-image + - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/datahub-user-image:8ae97e7410ae path: ../datahub/images/default - registry: - provider: gcloud - gcloud: - project: ucb-datahub-2018 - service_key: gcr-key.json cluster: provider: gcloud diff --git a/deployments/highschool/hubploy.yaml b/deployments/highschool/hubploy.yaml index a82066120..13bb10c5c 100644 --- a/deployments/highschool/hubploy.yaml +++ b/deployments/highschool/hubploy.yaml @@ -1,12 +1,7 @@ images: images: - - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/primary-user-image + - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/datahub-user-image:8ae97e7410ae path: ../datahub/images/default - registry: - provider: gcloud - gcloud: - project: ucb-datahub-2018 - service_key: gcr-key.json cluster: provider: gcloud diff --git a/deployments/prob140/hubploy.yaml b/deployments/prob140/hubploy.yaml index a82066120..2d20c6263 100644 --- a/deployments/prob140/hubploy.yaml +++ b/deployments/prob140/hubploy.yaml @@ -1,12 +1,6 @@ images: images: - - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/primary-user-image - path: ../datahub/images/default - registry: - provider: gcloud - gcloud: - project: ucb-datahub-2018 - service_key: gcr-key.json + - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/datahub-user-image:8ae97e7410ae cluster: provider: gcloud diff --git a/deployments/r/hubploy.yaml b/deployments/r/hubploy.yaml index a82066120..2d20c6263 100644 --- a/deployments/r/hubploy.yaml +++ b/deployments/r/hubploy.yaml @@ -1,12 +1,6 @@ images: images: - - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/primary-user-image - path: ../datahub/images/default - registry: - provider: gcloud - gcloud: - project: ucb-datahub-2018 - service_key: gcr-key.json + - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/datahub-user-image:8ae97e7410ae cluster: provider: gcloud diff --git a/deployments/template/{{cookiecutter.hub_name}}/hubploy.yaml b/deployments/template/{{cookiecutter.hub_name}}/hubploy.yaml index 2209f7e0e..40a9e4fb6 100644 --- a/deployments/template/{{cookiecutter.hub_name}}/hubploy.yaml +++ b/deployments/template/{{cookiecutter.hub_name}}/hubploy.yaml @@ -1,6 +1,6 @@ images: images: - - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/primary-user-image:PLACEHOLDER + - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/datahub-user-image:8ae97e7410ae cluster: provider: gcloud diff --git a/deployments/workshop/hubploy.yaml b/deployments/workshop/hubploy.yaml index a82066120..2d20c6263 100644 --- a/deployments/workshop/hubploy.yaml +++ b/deployments/workshop/hubploy.yaml @@ -1,12 +1,6 @@ images: images: - - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/primary-user-image - path: ../datahub/images/default - registry: - provider: gcloud - gcloud: - project: ucb-datahub-2018 - service_key: gcr-key.json + - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/datahub-user-image:8ae97e7410ae cluster: provider: gcloud diff --git a/requirements.txt b/requirements.txt index 399167dc6..5a3c13fba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ # allow for base image specification in hubploy.yaml -git+https://github.com/berkeley-dsep-infra/hubploy.git@89199c76c92bcb9a035a6ed685a6e2538f0cabe2 +git+https://github.com/shaneknapp/hubploy.git@major-refactor pygithub pyrsistent==0.19.3 attrs==23.1.0 chartpress -jupyter-repo2docker==2024.3.0 +jupyter-repo2docker==2024.07.0 myst-parser chardet requests==2.31.0 diff --git a/scripts/user-image-management/repos.txt b/scripts/user-image-management/repos.txt index 0c0b587d8..7105cf4cd 100644 --- a/scripts/user-image-management/repos.txt +++ b/scripts/user-image-management/repos.txt @@ -6,6 +6,7 @@ git@github.com:berkeley-dsep-infra/data8-user-image.git git@github.com:berkeley-dsep-infra/data100-user-image.git git@github.com:berkeley-dsep-infra/data101-user-image.git git@github.com:berkeley-dsep-infra/data102-user-image.git +git@github.com:berkeley-dsep-infra/datahub-user-image.git git@github.com:berkeley-dsep-infra/dev-primary-image.git git@github.com:berkeley-dsep-infra/dev-secondary-image.git git@github.com:berkeley-dsep-infra/edx-user-image.git