diff --git a/deployments/biology/image/Dockerfile b/deployments/biology/image/Dockerfile index 8e11cbfbb..70680021a 100644 --- a/deployments/biology/image/Dockerfile +++ b/deployments/biology/image/Dockerfile @@ -1,4 +1,4 @@ -FROM buildpack-deps:focal-scm +FROM buildpack-deps:jammy-scm ENV TZ=America/Los_Angeles RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone @@ -29,22 +29,11 @@ RUN apt-get update -qq --yes > /dev/null && \ apt-get install --yes -qq \ libpython2.7 > /dev/null -## libraries required for mothur -## libreadline6 required -#RUN apt-get update -qq --yes > /dev/null && \ -# apt-get install --yes -qq \ -# libreadline6-dev > /dev/null - ## library required for fast-PCA & https://github.com/DReichLab/EIG RUN apt-get update -qq --yes && \ apt-get install --yes --no-install-recommends -qq \ libgsl-dev >/dev/null -## library required for running ccb293 package qiime -#RUN apt-get update -qq --yes > /dev/null && \ -# apt-get install --yes -qq \ -# tzdata > /dev/null - # Install these without 'recommended' packages to keep image smaller. # Useful utils that folks sort of take for granted RUN apt-get update -qq --yes && \ @@ -64,25 +53,48 @@ RUN apt-get update -qq --yes && \ RUN echo "${LC_ALL} UTF-8" > /etc/locale.gen && \ locale-gen -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 -RUN echo "deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/" > /etc/apt/sources.list.d/cran.list +# Needed by RStudio +RUN apt-get update -qq --yes && \ + apt-get install --yes --no-install-recommends -qq \ + psmisc \ + sudo \ + libapparmor1 \ + lsb-release \ + libclang-dev \ + libpq5 > /dev/null +# Needed by many R libraries +# Picked up from https://github.com/rocker-org/rocker/blob/9dc3e458d4e92a8f41ccd75687cd7e316e657cc0/r-rspm/focal/Dockerfile +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libgdal-dev \ + libgeos3.10.2 \ + libproj22 \ + libudunits2-0 \ + libxml2 > /dev/null -# Install R packages -# Our pre-built R packages from rspm are built against system libs in focal -# rstan takes forever to compile from source, and needs libnodejs -# So we install older (10.x) nodejs from apt rather than newer from conda -ENV R_VERSION=4.1.2-1.2004.0 +# Install R. +# These packages must be installed into the base stage since they are in system +# paths rather than /srv. +# Pre-built R packages from rspm are built against system libs in jammy. +ENV R_VERSION=4.4.1-1.2204.0 +ENV LITTLER_VERSION=0.3.19-1.2204.0 RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 -RUN echo "deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/" > /etc/apt/sources.list.d/cran.list +RUN echo "deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" > /etc/apt/sources.list.d/cran.list +RUN curl --silent --location --fail https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc > /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc RUN apt-get update -qq --yes > /dev/null && \ apt-get install --yes -qq \ - r-base-core=${R_VERSION} \ - r-base-dev=${R_VERSION} \ - r-cran-littler=0.3.14-1.2004.0 \ - libglpk-dev \ - libzmq5 \ - nodejs npm > /dev/null + r-base-core=${R_VERSION} \ + r-base-dev=${R_VERSION} \ + littler=${LITTLER_VERSION} \ + libglpk-dev \ + libzmq5 \ + nodejs npm > /dev/null + +ENV RSTUDIO_URL=https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2024.04.2-764-amd64.deb +RUN curl --silent --location --fail ${RSTUDIO_URL} > /tmp/rstudio.deb && \ + apt install --no-install-recommends --yes /tmp/rstudio.deb && \ + rm /tmp/rstudio.deb # Install desktop packages RUN apt-get update -qq --yes > /dev/null && \ @@ -123,30 +135,6 @@ RUN apt-get update -qq --yes > /dev/null && \ WORKDIR /home/jovyan -# Needed by RStudio -RUN apt-get update -qq --yes && \ - apt-get install --yes --no-install-recommends -qq \ - psmisc \ - sudo \ - libapparmor1 \ - lsb-release \ - libclang-dev \ - libpq5 > /dev/null - -ENV RSTUDIO_URL https://download2.rstudio.org/server/bionic/amd64/rstudio-server-2021.09.1-372-amd64.deb -RUN curl --silent --location --fail ${RSTUDIO_URL} > /tmp/rstudio.deb && \ - dpkg -i /tmp/rstudio.deb && \ - rm /tmp/rstudio.deb - -# Needed by many R libraries -# Picked up from https://github.com/rocker-org/rocker/blob/9dc3e458d4e92a8f41ccd75687cd7e316e657cc0/r-rspm/focal/Dockerfile -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - libgdal26 \ - libgeos-3.8.0 \ - libproj15 \ - libudunits2-0 \ - libxml2 > /dev/null # R_LIBS_USER is set by default in /etc/R/Renviron, which RStudio loads. # We uncomment the default, and set what we wanna - so it picks up # the packages we install. Without this, RStudio doesn't see the packages diff --git a/deployments/biology/image/Rprofile.site b/deployments/biology/image/Rprofile.site index 46ca03775..961f50b97 100644 --- a/deployments/biology/image/Rprofile.site +++ b/deployments/biology/image/Rprofile.site @@ -1,10 +1,10 @@ # Use RStudio's CRAN mirror to get binary packages # 'latest' just means it has all available versions. # We can specify version numbers in devtools::install_version -options(repos = c(CRAN = "https://packagemanager.rstudio.com/all/__linux__/focal/latest")) +options(repos = c(CRAN = "https://packagemanager.rstudio.com/all/__linux__/jammy/latest")) # RStudio's CRAN mirror needs this to figure out which binary package to serve. # If not set properly, it will just serve up source packages # Quite hilarious, IMO. # See https://docs.rstudio.com/rspm/admin/binaries.html -options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version$platform, R.version$arch, R.version$os))) \ No newline at end of file +options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version$platform, R.version$arch, R.version$os))) diff --git a/deployments/biology/image/rsession.conf b/deployments/biology/image/rsession.conf index a9ab23c21..1f82b6b54 100644 --- a/deployments/biology/image/rsession.conf +++ b/deployments/biology/image/rsession.conf @@ -1,2 +1,2 @@ # Use binary packages! -r-cran-repos=https://packagemanager.rstudio.com/all/__linux__/focal/latest +r-cran-repos=https://packagemanager.rstudio.com/all/__linux__/jammy/latest diff --git a/deployments/datahub/config/common.yaml b/deployments/datahub/config/common.yaml index b9bb996f2..8aff4a141 100644 --- a/deployments/datahub/config/common.yaml +++ b/deployments/datahub/config/common.yaml @@ -64,6 +64,20 @@ jupyterhub: - course::1535811::enrollment_type::teacher - course::1535811::enrollment_type::ta + # MBA 247, Fall 2024, https://github.com/berkeley-dsep-infra/datahub/issues/5923 + course-staff-1536576: + # description: Enable course staff to view and access servers. + # # this role provides permissions to... + scopes: + - admin-ui + - list:users!group=course::1536576 + - admin:servers!group=course::1536576 + - access:servers!group=course::1536576 + # # this role will be assigned to... + groups: + - course::1536576::enrollment_type::teacher + - course::1536576::enrollment_type::ta + nodeSelector: hub.jupyter.org/pool-name: core-pool-2024-05-08 initContainers: @@ -200,3 +214,7 @@ jupyterhub: course::1535590: # Data 6, https://github.com/berkeley-dsep-infra/datahub/issues/5847 mem_limit: 2048M mem_guarantee: 2048M + + course::1537045: # ESPM 157, https://github.com/berkeley-dsep-infra/datahub/issues/5914 + mem_limit: 4096M + mem_guarantee: 4096M diff --git a/deployments/datahub/images/default/Dockerfile b/deployments/datahub/images/default/Dockerfile index 49062d34d..520e07473 100644 --- a/deployments/datahub/images/default/Dockerfile +++ b/deployments/datahub/images/default/Dockerfile @@ -50,16 +50,17 @@ RUN install -d -o ${NB_USER} -g ${NB_USER} ${R_LIBS_USER} # These packages must be installed into the base stage since they are in system # paths rather than /srv. # Pre-built R packages from rspm are built against system libs in jammy. -ENV R_VERSION=4.3.2-1.2204.0 -ENV LITTLER_VERSION=0.3.18-2.2204.0 +#ENV R_VERSION=4.3.2-1.2204.0 +#ENV LITTLER_VERSION=0.3.18-2.2204.0 +ENV R_VERSION=4.4.1-1.2204.0 +ENV LITTLER_VERSION=0.3.19-1.2204.0 RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 RUN echo "deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" > /etc/apt/sources.list.d/cran.list RUN curl --silent --location --fail https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc > /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc RUN apt-get update --yes > /dev/null && \ apt-get install --yes -qq r-base-core=${R_VERSION} r-base-dev=${R_VERSION} littler=${LITTLER_VERSION} > /dev/null -#ENV RSTUDIO_URL https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2023.06.0-421-amd64.deb -ENV RSTUDIO_URL https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2023.12.0-369-amd64.deb +ENV RSTUDIO_URL=https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2024.04.2-764-amd64.deb RUN curl --silent --location --fail ${RSTUDIO_URL} > /tmp/rstudio.deb && \ apt install --no-install-recommends --yes /tmp/rstudio.deb && \ rm /tmp/rstudio.deb @@ -188,6 +189,18 @@ RUN /usr/local/sbin/connector-text.bash #COPY connectors/2021-fall-phys-188-288.bash /usr/local/sbin/ #RUN /usr/local/sbin/2021-fall-phys-188-288.bash +#ESPM, FA 24 +# https://github.com/berkeley-dsep-infra/datahub/issues/5827 +ENV VSCODE_EXTENSIONS=${CONDA_DIR}/share/code-server/extensions +USER root +RUN mkdir -p ${VSCODE_EXTENSIONS} && \ + chown -R jovyan:jovyan ${VSCODE_EXTENSIONS} +USER ${NB_USER} +# Install Code Server Jupyter extension +RUN /srv/conda/bin/code-server --extensions-dir ${VSCODE_EXTENSIONS} --install-extension ms-toolsai.jupyter +# Install Code Server Python extension +RUN /srv/conda/bin/code-server --extensions-dir ${VSCODE_EXTENSIONS} --install-extension ms-python.python + # clear out /tmp USER root RUN rm -rf /tmp/* diff --git a/deployments/datahub/images/default/Rprofile.site b/deployments/datahub/images/default/Rprofile.site index 9b6af5395..12886857d 100644 --- a/deployments/datahub/images/default/Rprofile.site +++ b/deployments/datahub/images/default/Rprofile.site @@ -16,8 +16,7 @@ options(repos = c(CRAN = "https://packagemanager.rstudio.com/all/__linux__/jammy # below sets it to be binary. This may improve image build times. # If it works, it'd be better to dynamically set the R version as above, and # also the RStudio Server version if possible. -options(HTTPUserAgent = "RStudio Server (2023.12.0.369); R (4.3.2 x86_64-pc-linux-gnu x86_64 linux-gnu)") - +options(HTTPUserAgent = "RStudio Server (2024.04.2.764); R (4.4.1 x86_64-pc-linux-gnu x86_64 linux-gnu)") ## Source extra Rprofile files. # diff --git a/deployments/datahub/images/default/environment.yml b/deployments/datahub/images/default/environment.yml index bda73d41e..9d5aa9aa2 100644 --- a/deployments/datahub/images/default/environment.yml +++ b/deployments/datahub/images/default/environment.yml @@ -72,13 +72,13 @@ dependencies: - music21==8.3.0 # data8; foundation -- datascience==0.17.6 +#- datascience==0.17.6 - jupyter-server-proxy==4.2.0 - jupyter-rsession-proxy==2.2.0 - folium==0.12.1.post1 # cogsci131; spring 2018 -- nose==1.3.7 +#- nose==1.3.7 # modules - beautifulsoup4==4.9.3 @@ -145,38 +145,41 @@ dependencies: # - theano-pymc==1.1.2 # - pymc3==3.11.2 - # eep 153; spring 2019 - requests==2.31.0 - - Pint==0.17 + # Google spreadsheets, Eric Van Dusen / Keeley Takimoto / Modules - gspread-pandas==2.3.0 - gspread==4.0.1 # eps 109; fall 2019 + # sknapp 2024-08-12: keeping this package as i believe it's still useful - ffmpeg-python==0.2.0 # issue #875, global 150Q/pacs 190 - fall 2019 + # sknapp 2024-08-12: keeping this package as i believe it's still useful - wordcloud==1.9.3 # issue #929, SW 282 - fall 2019 + # sknapp 2024-08-12: keeping this package as i believe it's still useful - pyreadstat==1.2.7 # issue 954, EPS24 - fall 2019 + # sknapp 2024-08-12: keeping this package as i believe it's still useful - xarray==0.19.0 # issue 1001, Physics 188/288 - fall 2019 - - umap-learn==0.5.1 - - hdbscan==0.8.31 + #- umap-learn==0.5.1 + #- hdbscan==0.8.31 # espm 125/bio 105; fall 2019 # see https://github.com/berkeley-dsep-infra/datahub/issues/1796 - - bitarray==2.3.0 - - nlmpy==1.0.1 + #- bitarray==2.3.0 + #- nlmpy==1.0.1 # physics 188/288 fall, 2019 - - getdist==1.3.1 - - tensorflow-hub==0.12.0 - - tensorflow-probability==0.13.0 + #- getdist==1.3.1 + #- tensorflow-hub==0.12.0 + #- tensorflow-probability==0.13.0 # cs16A/B, spring 2020 - lcapy==0.96 @@ -228,6 +231,7 @@ dependencies: - rtree==1.2.0 # [DH-319] https://github.com/berkeley-dsep-infra/datahub/issues/5827, ESPM 157 - - ibis-framework[pandas]==9.2.0 + - ibis-framework[pandas,duckdb]==9.2.0 + - jupyterlab_myst==2.4.2 # ATTEMPT TO PUT NEW PACKAGES IN THE CONDA LIST ABOVE FIRST, RATHER THAN PIP diff --git a/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r b/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r index 31b6da47c..660f6184f 100644 --- a/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r +++ b/deployments/datahub/images/default/r-packages/2023-fall-mba-247.r @@ -14,6 +14,7 @@ class_libs = c( "pROC", "1.18.4", "rpart.plot", "3.1.1", "randomForest","4.7-1.1", - "xgboost","1.7.5.1" + "xgboost","1.7.5.1", + "factoextra","1.0.7" ) class_libs_install_version(class_name, class_libs) diff --git a/deployments/dev/images/default/Dockerfile b/deployments/dev/images/default/Dockerfile index f4dc81b29..bf9307b03 100644 --- a/deployments/dev/images/default/Dockerfile +++ b/deployments/dev/images/default/Dockerfile @@ -34,8 +34,8 @@ RUN apt-get -qq update --yes && \ # These packages must be installed into the base stage since they are in system # paths rather than /srv. # Pre-built R packages from rspm are built against system libs in jammy. -ENV R_VERSION=4.3.2-1.2204.0 -ENV LITTLER_VERSION=0.3.18-2.2204.0 +ENV R_VERSION=4.4.1-1.2204.0 +ENV LITTLER_VERSION=0.3.19-1.2204.0 RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 RUN echo "deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" > /etc/apt/sources.list.d/cran.list RUN curl --silent --location --fail https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc > /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc @@ -45,7 +45,7 @@ RUN apt-get update -qq --yes > /dev/null && \ r-base-dev=${R_VERSION} \ littler=${LITTLER_VERSION} > /dev/null -ENV RSTUDIO_URL https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2023.06.0-421-amd64.deb +ENV RSTUDIO_URL=https://download2.rstudio.org/server/jammy/amd64/rstudio-server-2024.04.2-764-amd64.deb RUN curl --silent --location --fail ${RSTUDIO_URL} > /tmp/rstudio.deb && \ apt install --no-install-recommends --yes /tmp/rstudio.deb && \ rm /tmp/rstudio.deb diff --git a/deployments/dev/images/default/Rprofile.site b/deployments/dev/images/default/Rprofile.site index 51f9daba8..a75374482 100644 --- a/deployments/dev/images/default/Rprofile.site +++ b/deployments/dev/images/default/Rprofile.site @@ -17,7 +17,7 @@ options( options( HTTPUserAgent = sprintf( "RStudio Server (%s); R (%s)", - "2022.7.1.554", + "2024.04.2.764", paste(getRversion(), R.version$platform, R.version$arch, R.version$os) ) ) diff --git a/deployments/ischool/image/Dockerfile b/deployments/ischool/image/Dockerfile index 2b3e4c1ad..72835b8f0 100644 --- a/deployments/ischool/image/Dockerfile +++ b/deployments/ischool/image/Dockerfile @@ -1,4 +1,4 @@ -FROM rocker/geospatial:4.3.1 +FROM rocker/geospatial:4.4.1 ENV NB_USER rstudio ENV NB_UID 1000 diff --git a/deployments/publichealth/image/Dockerfile b/deployments/publichealth/image/Dockerfile index ded0c4edd..802e5baec 100644 --- a/deployments/publichealth/image/Dockerfile +++ b/deployments/publichealth/image/Dockerfile @@ -1,4 +1,4 @@ -FROM ghcr.io/rocker-org/geospatial:4.3.2 +FROM ghcr.io/rocker-org/geospatial:4.4.1 ENV NB_USER rstudio ENV NB_UID 1000 diff --git a/deployments/shiny/image/Dockerfile b/deployments/shiny/image/Dockerfile index 941cfc7ec..9d26e61eb 100644 --- a/deployments/shiny/image/Dockerfile +++ b/deployments/shiny/image/Dockerfile @@ -1,4 +1,4 @@ -FROM rocker/geospatial:4.3.2 +FROM rocker/geospatial:4.4.1 ENV NB_USER rstudio ENV NB_UID 1000 @@ -55,7 +55,7 @@ RUN apt-get update > /dev/null && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -ENV SHINY_SERVER_URL https://download3.rstudio.org/ubuntu-18.04/x86_64/shiny-server-1.5.21.1012-amd64.deb +ENV SHINY_SERVER_URL https://download3.rstudio.org/ubuntu-18.04/x86_64/shiny-server-1.5.22.1017-amd64.deb RUN curl --silent --location --fail ${SHINY_SERVER_URL} > /tmp/shiny-server.deb && \ apt install --no-install-recommends --yes /tmp/shiny-server.deb && \ rm /tmp/shiny-server.deb diff --git a/deployments/stat20/image/Dockerfile b/deployments/stat20/image/Dockerfile index 7fd933152..e83591c4a 100644 --- a/deployments/stat20/image/Dockerfile +++ b/deployments/stat20/image/Dockerfile @@ -1,4 +1,4 @@ -FROM rocker/geospatial:4.3.2 +FROM rocker/geospatial:4.4.1 # https://github.com/rocker-org/rocker-versioned2/wiki/geospatial_e06f866673fa ENV NB_USER rstudio diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 26f9a6322..dbc9ba1b1 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -18,7 +18,7 @@ website: - text: "Contributing" href: admins/pre-reqs.qmd - text: "Admin Tasks" - href: admins/howto/preview-local.qmd + href: admins/howto/documentation.qmd - text: "Policy" href: policy/create_policy.qmd page-navigation: true @@ -30,7 +30,7 @@ website: text: Home - section: "Using DataHub" contents: - - users/services.qmd + - users/features.qmd - users/private-repo.qmd - users/hubs.qmd - users/authentication.qmd @@ -43,7 +43,7 @@ website: - admins/credentials.qmd - section: "Common Administrator Tasks" contents: - - admins/howto/preview-local.qmd + - admins/howto/documentation.qmd - admins/howto/dns.qmd - admins/howto/core-pool.qmd - admins/howto/new-hub.qmd @@ -59,10 +59,6 @@ website: - admins/howto/clusterswitch.qmd - admins/howto/github-token.qmd - admins/howto/google-sheets.qmd - - section: "Hub Deployments" - contents: - - admins/deployments/datahub.qmd - - admins/deployments/stat159.qmd - section: "Policy" contents: - policy/create_policy.qmd @@ -74,7 +70,8 @@ website: format: html: - theme: book + theme: cosmo + toc: true #title-block-banner: datahub.svg navbar: left: diff --git a/docs/admins/deployments/datahub.qmd b/docs/admins/deployments/datahub.qmd deleted file mode 100644 index f0f4e3aea..000000000 --- a/docs/admins/deployments/datahub.qmd +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: DataHub ---- - -datahub.berkeley.edu provides standard computing environment to many -foundational courses across diverse disciplines. - -## Image - -The datahub image contains both Python and R environments. A user can -create jupyter notebooks utilizing either Python or R, or can run -RStudio using R or Python. - -The image is currently not based on repo2docker. - -## Resources - -A handful of courses have been granted elevated memory limits within the -hub configuration. - -CDSS staff and a small number of instructors have been given -administrative privileges. diff --git a/docs/admins/deployments/index.qmd b/docs/admins/deployments/index.qmd deleted file mode 100644 index d0bc7e663..000000000 --- a/docs/admins/deployments/index.qmd +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Hub Deployments ---- diff --git a/docs/admins/howto/calendar-scaler.qmd b/docs/admins/howto/calendar-scaler.qmd index 9ffcc6d46..47304c872 100644 --- a/docs/admins/howto/calendar-scaler.qmd +++ b/docs/admins/howto/calendar-scaler.qmd @@ -131,8 +131,7 @@ functions. When you're ready, create a PR. The deployment workflow is as follows: 1. Get all authed-up for `chartpress` by performing the - steps listed - [here](https://docs.datahub.berkeley.edu/en/latest/admins/howto/rebuild-hub-image.html#). + [documented steps](rebuild-hub-image.qmd). 2. Run `chartpress --push` from the root `datahub/` directory. If this succeeds, check your `git status` and add @@ -155,8 +154,7 @@ to change/add/update any packages, you'll need to do the following: 5. Check your git status and diffs, and create a pull request if necessary. 6. Get all authed-up for `chartpress` by performing the - steps listed - [here](https://docs.datahub.berkeley.edu/en/latest/admins/howto/rebuild-hub-image.html#). + [documented steps](rebuild-hub-image.qmd). 7. Run `chartpress --push` from the root `datahub/` directory. If this succeeds, check your `git status` and add diff --git a/docs/admins/howto/clusterswitch.qmd b/docs/admins/howto/clusterswitch.qmd index 1a814051d..5d4d54d06 100644 --- a/docs/admins/howto/clusterswitch.qmd +++ b/docs/admins/howto/clusterswitch.qmd @@ -7,8 +7,7 @@ This document describes how to switch an existing hub to a new cluster. The exa You might find it easier to switch to a new cluster if you're running a [very old k8s version](https://cloud.google.com/kubernetes-engine/docs/release-notes), or in lieu of performing a [cluster credential rotation](https://cloud.google.com/kubernetes-engine/docs/how-to/credential-rotation). Sometimes starting from scratch is easier than an iterative and potentially destructive series of operations. ## Create a new cluster -1. Create a new cluster using the specifications here: - https://docs.datahub.berkeley.edu/en/latest/admins/cluster-config.html +1. Create a new cluster using the specified [configuration](../cluster-config.qmd). 2. Set up helm on the cluster according to the instructions here: http://z2jh.jupyter.org/en/latest/setup-helm.html - Make sure the version of helm you're working with matches the version CircleCI is using. @@ -47,7 +46,7 @@ configured on the new cluster. Until this is done, `hubploy` and `helm` will fa 4. Change the corresponding entry in `support/requirements.yaml` to `$cert-manager-version` and commit the changes (do not push). ## Create the node-placeholder k8s namespace -The [calendar autoscaler](https://docs.datahub.berkeley.edu/en/latest/admins/howto/calendar-scaler.html) requires the `node-placeholder` namespace. Run the following command to create it: +The [calendar autoscaler](calendar-scaler.qmd) requires the `node-placeholder` namespace. Run the following command to create it: ```bash kubectl create namespace node-placeholder diff --git a/docs/admins/howto/documentation.qmd b/docs/admins/howto/documentation.qmd new file mode 100644 index 000000000..cf392e233 --- /dev/null +++ b/docs/admins/howto/documentation.qmd @@ -0,0 +1,78 @@ +--- +title: Documentation +--- + +## Overview + +Documentation is managed under the `docs/` folder, and is generated with +[Quarto](https://quarto/). It is published to this site, +, hosted at GitHub Pages. Content is written +in [markdown](https://quarto.org/docs/authoring/markdown-basics.html). + +## GitHub Pages Hosting + +### CNAME + +The hostname *docs.datahub.berkeley.edu* is registered as a CNAME for *berkeley-dsep-infra.github.io* in campus DNS. We also must specify the CNAME in the datahub repo's [GitHub Pages settings](https://github.com/berkeley-dsep-infra/datahub/settings/pages). GitHub will then know to serve up the Pages content of the datahub repo when it receives web requests at berkeley-dsep-infra.github.io. + +GitHub Pages also needs the file `CNAME` to exist in the base of the gh-pages branch. This is why the file exists in `docs/` directory, since content there gets pushed to gh-pages. + +### Action + +The +[GitHub Action workflow](https://github.com/berkeley-dsep-infra/datahub/actions/workflows/quarto-docs.yml) checks merges for paths matching `docs/`. If there are matches, it will checkout the repo and use Quarto to build content in the `docs/` directory and publish static content to the `gh-pages` branch. + +GitHub Pages' `pages-build-deployment` action will then bundle up that content and push it to GitHub's web servers. Changes will only be visible after this step has completed. + +::: {.callout-note appearance="simple"} +Our documentation automation has always run on merges to `staging` branch, not `prod`. +::: + +## Local Development + +You can test documentation changes locally by running Quarto on your own device. This can be done by either rendering the content and viewing the static HTML, or by running Quarto in a preview mode. + +### Render Static HTML + +Navigate to the `docs` directory and run `quarto render`. This will build the +entire website in the `_site` directory. You can then open files in your web +browser. + +You can also choose to render individual files, which saves time if you do not +want to render the whole site. Run `quarto render ./path/to/filename.qmd`, and +then open the corresponding HTML file in the `_site` directory. + +### Live Preview + +Navigate to the `docs` directory and run `quarto preview`. This also causes the whole site to render, but then launches a local web server and a browser that connects to that server. Quarto dynamically rebuilds pages that you modify. Quarto considers this the [ideal workflow](https://quarto.org/docs/tools/text-editors.html#workflow) for authoring content. + +### IDE Support + +Applications like RStudio and VS Code support running the live preview method internally. You may prefer starting the editing process from those applications, and letting them managing the preview lifecycle. + +## Style Guide + +These are some conventions we can use to keep the style consistent: + +- Use backticks (\`example\` yields `example`) for filesystem paths, program names, command execution, or anything that should be rendered in monospace font. +- Use asterisks (\*example\* yields *example*) for emphasis or for meaningful terms. +- Don't append colons (:) to headings, although they can appear in normal text. +- When including hyperlinks, try using [descriptive, meaningful text](https://www.w3.org/WAI/WCAG22/quickref/?versions=2.1#link-purpose-in-context), where the purpose can be determine from the linked text. Avoid using terms like, "see this link" or "see here" as the latter are worse for web accessibility and usability. +- Include [alt text](https://quarto.org/docs/authoring/figures.html#alt-text) for each image or figure. +- Try to avoid arbitrarily changing file names as this will change URLs. If it makes sense to change a filename, include a redirect to the previous path in the [document front matter](https://quarto.org/docs/authoring/front-matter.html), using a relative path to the HTML, e.g.: + ```yaml + aliases: + - ../../admins/deployments/stat159.html + ``` + +## Previous Format and Hosting + +This website used to be authored in reStructured Text and was published to readthedocs via a [now disabled webhook](https://github.com/berkeley-dsep-infra/datahub/settings/hooks). The hook would periodically fail, even when there were no documentation-related changes, and that would get in the way of our CI. + +Content was ported from RST to Markdown by using pandoc. + +```bash +pandoc -f rst -t markdown -o output.qmd input.rst +``` + +It then had to be manually cleaned up in various ways. diff --git a/docs/admins/howto/new-hub.qmd b/docs/admins/howto/new-hub.qmd index e2781f338..3554547fd 100644 --- a/docs/admins/howto/new-hub.qmd +++ b/docs/admins/howto/new-hub.qmd @@ -1,5 +1,7 @@ --- title: Create a New Hub +aliases: + - ../../en/latest/admins/howto/new-hub.html --- ## Why create a new hub? @@ -386,7 +388,7 @@ prometheus-node-exporter Besides setting defaults, we can dynamically change the placeholder counts by either adding new, or editing existing, [calendar -events](https://docs.datahub.berkeley.edu/en/latest/admins/howto/calendar-scaler.html). +events](calendar-scaler.qmd). This is useful for large courses which can have placeholder nodes set aside for predicatable periods of heavy ramp up. diff --git a/docs/admins/howto/preview-local.qmd b/docs/admins/howto/preview-local.qmd deleted file mode 100644 index 0ed0a3a68..000000000 --- a/docs/admins/howto/preview-local.qmd +++ /dev/null @@ -1,18 +0,0 @@ ---- -title: Develop Documentation ---- - -## Live Preview - -Navigate to the `docs` directory and run `quarto preview`. You can view the -documentation in a browser while you make changes. - -## Render Static HTML - -Navigate to the `docs` directory and run `quarto render`. This will build the -entire website in the `_site` directory. You can then open files in your web -browser. - -You can also render individual files, which saves time if you do not want to -render the whole site. Run `quarto render ./path/to/filename.qmd`, and then open -the corresponding HTML file in the _site directory. diff --git a/docs/admins/structure.qmd b/docs/admins/structure.qmd index 3e3a9cafb..5cc715872 100644 --- a/docs/admins/structure.qmd +++ b/docs/admins/structure.qmd @@ -18,7 +18,7 @@ provided to the user. For example, it controls: 2. Libraries installed, and which versions of those are installed 3. Specific config for Jupyter Notebook or IPython -[repo2docker](https://repo2docker.readthedocs.io/en/latest/) is used to +[repo2docker](https://repo2docker.readthedocs.io) is used to build the actual user image, so you can use any of the [supported config files](https://repo2docker.readthedocs.io/en/latest/config_files.html) to customize the image as you wish. @@ -26,16 +26,16 @@ to customize the image as you wish. ### Hub Config (`config/` and `secrets/`) All our JupyterHubs are based on [Zero to JupyterHub -(z2jh)](http://z2jh.jupyter.org/). z2jh uses configuration files in +(z2jh)](https://z2jh.jupyter.org/). z2jh uses configuration files in [YAML](https://en.wikipedia.org/wiki/YAML) format to specify exactly how the hub is configured. For example, it controls: -1. RAM available per user -2. Admin user lists -3. User storage information -4. Per-class & Per-user RAM overrides (when classes or individuals need - more RAM) -5. Authentication secret keys +1. RAM available per user +2. Admin user lists +3. User storage information +4. Per-class & Per-user RAM overrides (when classes or individuals need + more RAM) +5. Authentication secret keys These files are split between files that are visible to everyone (`config/`) and files that are visible only to a select few illuminati @@ -44,12 +44,12 @@ illuminati. Files are further split into: -1. `common.yaml` - Configuration common to staging and production - instances of this hub. Most config should be here. -2. `staging.yaml` - Configuration specific to the staging instance of - the hub. -3. `prod.yaml` - Configuration specific to the production instance of - the hub. +1. `common.yaml` - Configuration common to staging and production + instances of this hub. Most config should be here. +2. `staging.yaml` - Configuration specific to the staging instance of + the hub. +3. `prod.yaml` - Configuration specific to the production instance of + the hub. ### `hubploy.yaml` @@ -63,12 +63,8 @@ under `secrets/` and referred to from `hubploy.yaml`. ## Documentation -Documentation is under the `docs/` folder, and is generated with the -[sphinx](http://www.sphinx-doc.org/) project. It is written with the -[reStructuredText -(rst)](http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html) -format. Documentation is automatically published to - and -. This is performed via a -[webhook](https://github.com/berkeley-dsep-infra/datahub/settings/hooks) -in the github repo. +Documentation is under the `docs/` folder, and is generated with +[Quarto](https://quarto/), where content is written in +[markdown](https://quarto.org/docs/authoring/markdown-basics.html). +Documentation is published to via a +[GitHub Action workflow](https://github.com/berkeley-dsep-infra/datahub/actions/workflows/quarto-docs.yml). diff --git a/docs/users/authentication.qmd b/docs/users/authentication.qmd index e24d3aec1..4191959a5 100644 --- a/docs/users/authentication.qmd +++ b/docs/users/authentication.qmd @@ -2,31 +2,28 @@ title: User Authentication --- -UC Berkeley uses a [Canvas](https://www.instructure.com/canvas/) instance, -called [bcourses.berkeley.edu](https://bcourses.berkeley.edu). Almost -all our hubs use this for authentication, although [not all yet](https://github.com/berkeley-dsep-infra/datahub/issues/1574). +Most of our hubs use a [Canvas](https://www.instructure.com/canvas/) instance, +[bcourses.berkeley.edu](https://bcourses.berkeley.edu), for authentication. The hubs not using `CanvasOAuthenticator` are edx (`LTI1Authenticator`), highschool (`GoogleOAuthenticator`), and workshop (`dummy`). -## Who has access? +## Authorization -Anyone who can log in to bcourses can log into our JupyterHubs. This -includes all berkeley affiliates. If you have a working berkeley.edu -email account, you can most likely log in to bcourses, and hence to +Anyone who can log in to bCourses can log into our Canvas-based hubs. This +includes all Berkeley affiliates. If you have a working berkeley.edu +email account, you can most likely log in to bCourses, and hence to our JupyterHubs. Students have access for 9 months after they graduate. If they have an incomplete, they have 13 months of access instead. -## Non-berkeley affiliates +## Non-Berkeley affiliates If someone who doesn't have a berkeley.edu account wants to use -the JupyterHubs, they need to get a [CalNet Sponsored Guest -account](https://calnetweb.berkeley.edu/calnet-departments/calnet-sponsored-guests) -This gives people access to [bcourses](https://bcourses.berkeley.edu), -and hence to all the JupyterHubs. +the JupyterHubs, they need to obtain a [CalNet Sponsored Guest +account](https://calnetweb.berkeley.edu/calnet-departments/calnet-sponsored-guests) and get added to a bCourses course. ## Troubleshooting -If you can log in to [bcourses](https://bcourses.berkeley.edu) but +If you can log in to [bCourses](https://bcourses.berkeley.edu) but not to any of the JupyterHubs, please contact us. -If you can not log in to bcourses, please [contact bcourses support](https://dls.berkeley.edu/services/bcourses-0) +If you can not log in to bCourses, please [contact bCourses support](https://dls.berkeley.edu/services/bcourses-0). diff --git a/docs/users/hubs.qmd b/docs/users/hubs.qmd index e7d558ad1..db0de43f4 100644 --- a/docs/users/hubs.qmd +++ b/docs/users/hubs.qmd @@ -1,70 +1,15 @@ --- -title: JupyterHubs in this repository +title: JupyterHub Deployments +listing: + categories: true + contents: + - "hubs/datahub.qmd" + - "hubs/data100.qmd" + - "hubs/data102.qmd" + - "hubs/edx.qmd" + - "hubs/prob140.qmd" + - "hubs/r.qmd" + - "hubs/shiny.qmd" + - "hubs/stat159.qmd" + - "hubs/stat20.qmd" --- - -## DataHub - -[datahub.berkeley.edu](https://datahub.berkeley.edu) is the \'main\' -JupyterHub for use on UC Berkeley campus. It\'s the largest and most -active hub. It has many Python & R packages installed. - -It runs on [Google Cloud Platform](https://cloud.google.com) in the -`ucb-datahub-2018` project. You can see all config for it under -`deployments/datahub`. - -### Classes - -- The big [data8](http://data8.org/) class. -- Active [connector - courses](https://data.berkeley.edu/education/connectors) -- [Data Science Modules](https://data.berkeley.edu/education/modules) -- [Astro - 128/256](https://astro.berkeley.edu/course-information/3958209-astronomy-data-science-laboratory) - -This hub is also the \'default\' when folks wanna use a hub for a short -period of time for any reason without super specific requirements. - -## Prob140 Hub - -A hub specifically for [prob140](http://prob140.org/). Some of the admin -users on `hubs/datahub`{.interpreted-text role="ref"} are students in -prob140 - this would allow them to see the work of other prob140 -students. Hence, this hub is separate until JupyterHub gains features -around restricting admin use. - -It runs on [Google Cloud Platform](https://cloud.google.com) in the -`ucb-datahub-2018` project. You can see all config for it under -`deployments/prob140`. - -## Data 100 - -This hub is for [Data 100](http://www.ds100.org/) which has a unique -user and grading environment. It runs on [Google Cloud -Platform](https://cloud.google.com) in the `ucb-datahub-2018` account. -You can see all config for it under `deployments/data100`. - -Data100 also has shared folders between staff (professors and GSIs) and -students. Staff, assuming they have been added as admins in -`config/common.yaml`, can see a `shared` and a `shared-readwrite` -folder. Students can only see the `shared` folder, which is read-only. -Anything that gets put in `shared-readwrite` is automatically viewable -in `shared`, but as read-only files. The purpose of this is to be able -to share large data files instead of having one per student. - -## Data 102 - -Data 102 runs on [Google Cloud Platform](https://cloud.google.com) in the -`ucb-datahub-2018` project. You can see all config for it under -`deployments/data102`. - -## Data8X Hub - -A hub for the [data8x course on -EdX](https://www.edx.org/professional-certificate/berkeleyx-foundations-of-data-science). -This hub is open to use by anyone in the world, using [LTI -Authentication](https://github.com/jupyterhub/ltiauthenticator) to -provide login capability from inside EdX. - -It runs on [Google Cloud Platform](https://cloud.google.com) in the -`data8x-scratch` project. You can see all config for it under -`deployments/data8x`. diff --git a/docs/users/hubs/data100.qmd b/docs/users/hubs/data100.qmd new file mode 100644 index 000000000..451502dc0 --- /dev/null +++ b/docs/users/hubs/data100.qmd @@ -0,0 +1,18 @@ +--- +title: Data 100 +categories: + - course + - jupyterlab + - otter-grader + - shared-folders +--- + +This hub is for [Data 100](http://www.ds100.org/) which has a unique +user and grading environment. + +Data100 has shared folders between staff (professors and GSIs) and +students. Course staff can see a `shared` and a `shared-readwrite` +folder. Students can only see the `shared` folder, which is read-only. +Anything that gets put in `shared-readwrite` is automatically viewable +in `shared`, but as read-only files. The purpose of this is to be able +to share large data files instead of having one per student. diff --git a/docs/users/hubs/data102.qmd b/docs/users/hubs/data102.qmd new file mode 100644 index 000000000..326f0442e --- /dev/null +++ b/docs/users/hubs/data102.qmd @@ -0,0 +1,15 @@ +--- +title: Data 102 +categories: + - course + - jupyterlab + - otter-grader + - shared-folders +--- + +This hub is for [Data 102](http://www.ds102.org/) which has a unique +user and grading environment. + +Data 102 runs on [Google Cloud Platform](https://cloud.google.com) in the +`ucb-datahub-2018` project. You can see all config for it under +`deployments/data102`. diff --git a/docs/users/hubs/datahub.qmd b/docs/users/hubs/datahub.qmd new file mode 100644 index 000000000..debfd8564 --- /dev/null +++ b/docs/users/hubs/datahub.qmd @@ -0,0 +1,20 @@ +--- +title: DataHub +categories: + - jupyterlab + - r + - vscode +aliases: + - ../../admins/deployments/datahub.html +--- + +[datahub.berkeley.edu](https://datahub.berkeley.edu) is the *main* JupyterHub +for use at UC Berkeley. It is the largest and most active hub, and provides a +standard computing environment to many foundational courses across diverse +disciplines. + +## Image + +The datahub image contains both Python and R environments. A user can +create jupyter notebooks utilizing either Python or R, or can run +RStudio using R or Python. diff --git a/docs/users/hubs/edx.qmd b/docs/users/hubs/edx.qmd new file mode 100644 index 000000000..7ddb771dc --- /dev/null +++ b/docs/users/hubs/edx.qmd @@ -0,0 +1,15 @@ +--- +title: Data 8X +categories: + - jupyterlab + - ltiauthenticator +--- + +This hub is for the [data8x course on EdX](https://www.edx.org/professional-certificate/berkeleyx-foundations-of-data-science). +It is open to use by anyone in the world, using [LTI +Authentication](https://github.com/jupyterhub/ltiauthenticator) to +provide login capability from inside EdX. + +It runs on [Google Cloud Platform](https://cloud.google.com) in the +`data8x-scratch` project. You can see all config for it under +`deployments/data8x`. diff --git a/docs/users/hubs/prob140.qmd b/docs/users/hubs/prob140.qmd new file mode 100644 index 000000000..c8555f768 --- /dev/null +++ b/docs/users/hubs/prob140.qmd @@ -0,0 +1,12 @@ +--- +title: Prob 140 +categories: + - course + - jupyterlab + - vscode +--- + +Prob 140 hub exists to isolate student files from the main hub. Some students +in this course might be course staff in another course, or vice versa, so we +isolate their home directories through this hub. It uses the same singleuser +docker image as the main hub. diff --git a/docs/users/hubs/r.qmd b/docs/users/hubs/r.qmd new file mode 100644 index 000000000..ff2b3943e --- /dev/null +++ b/docs/users/hubs/r.qmd @@ -0,0 +1,10 @@ +--- +title: R +categories: + - jupyterlab + - r + - rstudio + - vscode +--- + +[r.datahub.berkeley.edu](https://r.datahub.berkeley.edu) uses the same user environment as the main datahub, however it launches RStudio by default instead of JupyterLab. As with the main datahub, people can use R or Python in either authoring environment. diff --git a/docs/users/hubs/shiny.qmd b/docs/users/hubs/shiny.qmd new file mode 100644 index 000000000..44c70a70e --- /dev/null +++ b/docs/users/hubs/shiny.qmd @@ -0,0 +1,9 @@ +--- +title: Shiny +categories: + - r + - rstudio + - shiny +--- + +[shiny.datahub.berkeley.edu](https://shiny.datahub.berkeley.edu) contains the Shiny application services and it launches by default instead of JupyterLab or RSutdio. diff --git a/docs/admins/deployments/stat159.qmd b/docs/users/hubs/stat159.qmd similarity index 86% rename from docs/admins/deployments/stat159.qmd rename to docs/users/hubs/stat159.qmd index f49ac96ac..ea559dd3a 100644 --- a/docs/admins/deployments/stat159.qmd +++ b/docs/users/hubs/stat159.qmd @@ -1,5 +1,13 @@ --- title: Stat 159 +categories: + - course + - jupyterlab + - rtc + - shared-folders + - vscode +aliases: + - ../../admins/deployments/stat159.html --- stat159.datahub.berkeley.edu is a course-specific hub for Stat 159 as @@ -12,19 +20,12 @@ Notably the image contains support for RTC. As of March 2023, this requires: ```yaml -- altair==4.2.2 -- boken==2.4.3 -- dask==2023.1.1 - jupyter_server==2.2.1 - jupyterlab==3.6.1 - jupyterlab_server==2.19.0 -- tornado==6.2.0 - git+https:// ``` -Some of these are hard requirements and others were necessary to make -conda happy. - ## Configuration Along with the dependencies, the singleuser server is modified to launch @@ -71,10 +72,3 @@ CanvasAuthenticator can remain in charge of managing groups. This will be important for the extremely large courses. It will also be beneficial in that resource allocation can be performed more easily through group affiliations and group properties. - -## Historical Information - -The image has been periodically shared with data100 for when Fernando -has taught both. Going forward, it is probably best to keep them -separate and optionally kept in sync. We don\'t want changes in one -course to come as a surprise to the other. diff --git a/docs/users/hubs/stat20.qmd b/docs/users/hubs/stat20.qmd new file mode 100644 index 000000000..eef7d9907 --- /dev/null +++ b/docs/users/hubs/stat20.qmd @@ -0,0 +1,14 @@ +--- +title: Stat 20 +categories: + - course + - jupyterlab + - quarto + - rstudio + - shared-folders + - shiny +--- + +stat20.datahub.berkeley.edu is a course-specific hub for Stat 20 as +designed by Andrew Bray. It uses RStudio as the primary users interface +and students can use Quarto to author documents and Shiny to create web applications.