From 6415abf76cf972cffe2d20e7ec2263279f7d90c7 Mon Sep 17 00:00:00 2001 From: Dr Griffith Rees Date: Mon, 11 Dec 2023 08:14:54 +0000 Subject: [PATCH] feat: add `docker` config for `RStudio` users with `compose` refactor --- .github/workflows/ci.yaml | 6 +- _quarto.yml | 2 + compose.yml | 23 ++--- compose/docs/Dockerfile | 7 +- compose/{ => jupyter}/Dockerfile | 0 compose/linux-compose.yml | 11 +++ compose/mac-compose.yml | 11 +++ .../three_cities_debiasing_workshop.sh | 25 +++++ python/utils.py | 95 ++++++++++++++++--- 9 files changed, 145 insertions(+), 35 deletions(-) rename compose/{ => jupyter}/Dockerfile (100%) create mode 100644 compose/linux-compose.yml create mode 100644 compose/mac-compose.yml create mode 100644 python/debiasing/three_cities_debiasing_workshop.sh diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d6da476b..0cb2abe7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,11 +15,11 @@ env: on: pull_request: - branches: ['main', 'doc-deploy', 'ruth-notebook-for-workshop'] + branches: ['main', 'doc-deploy', 'ruth-notebook-for-workshop', 'r-docker-refactor'] paths-ignore: ['docs/**'] push: - branches: ['main', 'doc-deploy', 'ruth-doc-deploy'] + branches: ['main', 'doc-deploy', 'ruth-notebook-for-workshop', 'r-docker-refactor'] concurrency: group: ${{ github.head_ref || github.run_id }} @@ -122,7 +122,7 @@ jobs: run: | # A potentially quicker build option to try in future, requires running in detatched mode # DOCKER_BUILDKIT=1 docker build --no-cache -f compose/docs/Dockerfile --target builder --tag 'clim-recal-docs' . - docker compose build + docker compose build docs docker compose up --detach docker cp $(docker compose ps -q docs):/usr/local/apache2/htdocs/ ${{ env.GH_PAGE_PATH }} diff --git a/_quarto.yml b/_quarto.yml index a5e2b808..97a7148e 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -15,6 +15,8 @@ project: - "docs/pipeline.qmd" - "docs/contributing.md" - "python/README.md" + # - "notebooks/Assessing_bc_data/MethodsAssessment_DecWorkshop.Rmd" + # Requires dataset mounted to run notebook toc: True number-sections: True diff --git a/compose.yml b/compose.yml index eacf7f4e..6d8572ac 100644 --- a/compose.yml +++ b/compose.yml @@ -5,24 +5,21 @@ services: jupyter: build: context: . - dockerfile: ./compose/Dockerfile + dockerfile: ./compose/jupyter/Dockerfile target: clim-recal-base ports: - "8888:8888" - # volumes: - # - climate_data:/mnt/vmfileshare - # - type: bind - # source: /Volumes/vmfileshare - # target: /mnt/vmfileshare + volumes: + - .:/home/jovyan:rw docs: build: context: . dockerfile: ./compose/docs/Dockerfile - # target: clim-recal-docs ports: - "8080:80" - # command: quarto preview --port 8080 + volumes: + - .:/home/jovyan rstudio: build: @@ -30,11 +27,5 @@ services: dockerfile: ./compose/server/Dockerfile ports: - "8787:8787" - -# volumes: -# climate_data: -# driver: local -# driver_opts: -# type: none -# device: /Volumes/vmfileshare -# o: bind + volumes: + - .:/home/rstudio diff --git a/compose/docs/Dockerfile b/compose/docs/Dockerfile index 76b3b680..a840dddc 100644 --- a/compose/docs/Dockerfile +++ b/compose/docs/Dockerfile @@ -4,6 +4,7 @@ FROM ghcr.io/quarto-dev/quarto:${QUARTO_VERSION} AS builder ARG PORT=8080 ARG py_ver=3.9 +ENV DEBIAN_FRONTEND=noninteractive # ARG RIG_VERSION="latest" # ARG R_VERSION="release" @@ -15,14 +16,16 @@ ARG py_ver=3.9 # WORKDIR /app # RUN Rscript -e "renv::restore()" # RUN quarto render . -ADD . /app +COPY . /app WORKDIR /app # RUN Rscript -e "renv::restore()" EXPOSE ${PORT}:${PORT} # RUN quarto preview --port ${PORT}:${PORT} -RUN apt-get update && apt-get install -y python${py_ver} python3-pip +RUN apt-get update && apt-get install -y python${py_ver} python3-pip r-base r-base-dev RUN pip3 install quartodoc && quartodoc build +RUN Rscript -e 'install.packages("rmarkdown", repos="https://cloud.r-project.org")' + RUN quarto render FROM httpd:alpine diff --git a/compose/Dockerfile b/compose/jupyter/Dockerfile similarity index 100% rename from compose/Dockerfile rename to compose/jupyter/Dockerfile diff --git a/compose/linux-compose.yml b/compose/linux-compose.yml new file mode 100644 index 00000000..6ce3fbf2 --- /dev/null +++ b/compose/linux-compose.yml @@ -0,0 +1,11 @@ +version: "3.8" + +services: + + jupyter: + volumes: + - /mnt/vmfileshare:/mnt/vmfileshare + + rstudio: + volumes: + - /mnt/vmfileshare:/mnt/vmfileshare diff --git a/compose/mac-compose.yml b/compose/mac-compose.yml new file mode 100644 index 00000000..36b0ffcf --- /dev/null +++ b/compose/mac-compose.yml @@ -0,0 +1,11 @@ +version: "3.8" + +services: + + jupyter: + volumes: + - /Volumes/vmfileshare:/mnt/vmfileshare + + rstudio: + volumes: + - /Volumes/vmfileshare:/mnt/vmfileshare diff --git a/python/debiasing/three_cities_debiasing_workshop.sh b/python/debiasing/three_cities_debiasing_workshop.sh new file mode 100644 index 00000000..28f70eee --- /dev/null +++ b/python/debiasing/three_cities_debiasing_workshop.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +declare -a vars=("tasmax" "rainfall" "tasmin") +declare -a runs=("05" "07" "08" "06") +declare -a cities=("Glasgow") +declare -a methods=("quantile_delta_mapping" "quantile_mapping") +declare -a methods_2=("variance_scaling" "delta_method") + +for run in "${runs[@]}"; do + for city in "${cities[@]}"; do + for var in "${vars[@]}"; do + + python preprocess_data.py --mod /mnt/vmfileshare/ClimateData/Cropped/three.cities/CPM/$city --obs /mnt/vmfileshare/ClimateData/Cropped/three.cities/Hads.updated360/$city -v $var -r $run --out /mnt/vmfileshare/ClimateData/Cropped/three.cities/Preprocessed/workshop/$city/$run/$var --calib_dates 19801201-20101129 --valid_dates 20101130-20201130 + + for method in "${methods[@]}"; do + python run_cmethods.py --input_data_folder /mnt/vmfileshare/ClimateData/Cropped/three.cities/Preprocessed/workshop/$city/$run/$var --out /mnt/vmfileshare/ClimateData/Debiased/three.cities.cropped/workshop/$city/$run --method $method --v $var -p 32 + done + + for method in "${methods_2[@]}"; do + python run_cmethods.py --input_data_folder /mnt/vmfileshare/ClimateData/Cropped/three.cities/Preprocessed/workshop/$city/$run/$var --out /mnt/vmfileshare/ClimateData/Debiased/three.cities.cropped/workshop/$city/$run --method $method --group time.month --v $var -p 32 + done + + done + done +done diff --git a/python/utils.py b/python/utils.py index e25c1728..e0ea784b 100644 --- a/python/utils.py +++ b/python/utils.py @@ -2,12 +2,14 @@ import subprocess from datetime import date, datetime from pathlib import Path -from typing import Any, Final, Generator, Iterable, Optional, Union +from shutil import rmtree +from typing import Any, Callable, Final, Generator, Iterable, Optional, Union DateType = Union[date, str] DATE_FORMAT_STR: Final[str] = "%Y%m%d" DATE_FORMAT_SPLIT_STR: Final[str] = "-" -RSTUDIO_CODE_COPY_PATH: Path = Path("/home/rstudio/*") +RSTUDIO_DOCKER_USER_PATH: Path = Path("/home/rstudio") +JUPYTER_DOCKER_USER_PATH: Path = Path("/home/jovyan") DEBIAN_HOME_PATH: Path = Path("/home/") @@ -111,41 +113,106 @@ def path_iterdir( def make_user( - name: str, + user: str, password: str, - code_path: Path = RSTUDIO_CODE_COPY_PATH, + code_path: Path = RSTUDIO_DOCKER_USER_PATH, user_home_path: Path = DEBIAN_HOME_PATH, ) -> Path: """Make user account and copy code to that environment. Args: - name: user and home folder name + user: user and home folder name password: login password code_path: path to copy code from to user path Example: ```pycon >>> import os - >>> from shutil import rmtree >>> if os.geteuid() != 0: ... pytest.skip('requires root permission to run') >>> user_name: str = 'very_unlinkely_test_user' >>> password: str = 'test_pass' >>> code_path: Path = Path('/home/jovyan') - >>> make_user(user_name, password, code_path=code_path) + >>> make_user(user_name, password, code_path=JUPYTER_DOCKER_USER_PATH) PosixPath('/home/very_unlinkely_test_user') >>> Path(f'/home/{user_name}/python/conftest.py').is_file() True - >>> subprocess.run(f'userdel {user_name}', shell=True) - CompletedProcess(args='userdel very_unlinkely_test_user', returncode=0) - >>> rmtree(f'/home/{user_name}') + >>> rm_user(user_name) + 'very_unlinkely_test_user' ``` """ - home_path: Path = user_home_path / name - subprocess.run(f"useradd {name}", shell=True) - subprocess.run(f"echo {name}:{password} | chpasswd", shell=True) + home_path: Path = user_home_path / user + subprocess.run(f"useradd {user}", shell=True) + subprocess.run(f"echo {user}:{password} | chpasswd", shell=True) subprocess.run(f"mkdir {home_path}", shell=True) subprocess.run(f"cp -r {code_path}/* {home_path}", shell=True) - subprocess.run(f"chown -R {name}:{name} home_path", shell=True) + subprocess.run(f"chown -R {user}:{user} home_path", shell=True) return home_path + + +def rm_user(user: str, user_home_path: Path = DEBIAN_HOME_PATH) -> str: + """Remove user and user home folder. + + Args: + user: user and home folder name + password: login password + + Example: + ```pycon + >>> import os + >>> if os.geteuid() != 0: + ... pytest.skip('requires root permission to run') + >>> user_name: str = 'very_unlinkely_test_user' + >>> password: str = 'test_pass' + >>> make_user(user_name, password, code_path=JUPYTER_DOCKER_USER_PATH) + PosixPath('/home/very_unlinkely_test_user') + >>> rm_user(user_name) + 'very_unlinkely_test_user' + + ``` + """ + subprocess.run(f"userdel {user}", shell=True) + rmtree(user_home_path / user) + return user + + +def make_users( + file_path: Path, user_col: str, password_col: str, file_reader: Callable, **kwargs +) -> Generator[Path, None, None]: + """Load a file of usernames and passwords and to pass to make_user. + + Args: + file_path: path to collumned file including user names and passwords per row + user_col: str of column name for user names + password_col: name of column name for passwords + file_reader: function to read `file_path` + **kwargs: additional parameters for to pass to `file_reader` + + Example: + ```pycon + >>> import os + >>> if os.geteuid() != 0: + ... pytest.skip('requires root permission to run') + >>> from pandas import read_excel + >>> code_path: Path = Path('/home/jovyan') + >>> def excel_row_iter(path: Path, **kwargs) -> dict: + ... df: DataFrame = read_excel(path, **kwargs) + ... return df.to_dict(orient="records") + >>> test_accounts_path: Path = Path('tests/test_user_accounts.xlsx') + >>> user_paths: tuple[Path, ...] = tuple(make_users( + ... file_path=test_accounts_path, + ... user_col="User Name", + ... password_col="Password", + ... file_reader=excel_row_iter, + ... code_path=JUPYTER_DOCKER_USER_PATH, + ... )) + >>> [(path / 'python' / 'conftest.py').is_file() for path in user_paths] + [True, True, True, True, True] + >>> [rm_user(user_path.name) for user_path in user_paths] + ['sally', 'george', 'jean', 'felicity', 'frank'] + + ``` + """ + for record in file_reader(file_path): + yield make_user(user=record[user_col], password=record[password_col], **kwargs)