diff --git a/.github/workflows/installer.yml b/.github/workflows/installer.yml index 1f69fe64..04daa9c0 100644 --- a/.github/workflows/installer.yml +++ b/.github/workflows/installer.yml @@ -104,3 +104,4 @@ jobs: env source ${GITHUB_WORKSPACE}/github_action_scripts/set_crate_installer_environment cat ${CRATE_INSTALLER_CRATE_ROOT_HOST_DIR}/config/${CRATE_DOCKER_CRATEWEB_CONFIG_FILENAME} + sudo netstat -pvatn | greo LISTEN diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 0a42047c..9fd3a85e 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1701,6 +1701,9 @@ Changes resulted in a very large Docker image if the ID of the user creating the image was large. +- Update the installer to provide some example scripts for running + anonymisation, NLP etc under Docker. + https://github.com/ucam-department-of-psychiatry/crate/issues/163 To do ----- diff --git a/installer/example_scripts/anonymise_FULL.sh b/installer/example_scripts/anonymise_FULL.sh new file mode 100755 index 00000000..935d9091 --- /dev/null +++ b/installer/example_scripts/anonymise_FULL.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +# installer/example_scripts/anonymise_FULL.sh + +# ============================================================================== +# +# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. +# Created by Rudolf Cardinal (rnc1001@cam.ac.uk). +# +# This file is part of CRATE. +# +# CRATE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CRATE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CRATE. If not, see . +# +# ============================================================================== + +# Example script to run full anonymisation on the databases specified in the +# anonymisation configuration. + +set -euo pipefail + +THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars +source "${THISDIR}"/set_crate_environment_vars + +${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_anonymise_multiprocess \ + --nproc ${CRATE_NPROCESSORS} \ + --config ${CRATE_CONTAINER_CONFIG_ANON} \ + --full" \ + 2>&1 \ + | tee "${CRATE_HOST_ANON_LOG}" + +"${THISDIR}"/email_rdbm.sh --subject "Anonymisation finished" --text "FINISHED: anonymisation / full" diff --git a/installer/example_scripts/anonymise_INCREMENTAL.sh b/installer/example_scripts/anonymise_INCREMENTAL.sh new file mode 100755 index 00000000..8a4f667c --- /dev/null +++ b/installer/example_scripts/anonymise_INCREMENTAL.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +# installer/example_scripts/anonymise_INCREMENTAL.sh + +# ============================================================================== +# +# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. +# Created by Rudolf Cardinal (rnc1001@cam.ac.uk). +# +# This file is part of CRATE. +# +# CRATE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CRATE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CRATE. If not, see . +# +# ============================================================================== + +# Example script to run incremental anonymisation on the databases specified in +# the anonymisation configuration. + +set -euo pipefail + +THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars +source "${THISDIR}"/set_crate_environment_vars + +${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_anonymise_multiprocess \ + --nproc ${CRATE_NPROCESSORS} \ + --config ${CRATE_CONTAINER_CONFIG_ANON} \ + --incremental" \ + 2>&1 \ + | tee "${CRATE_HOST_ANON_LOG}" + +"${THISDIR}"/email_rdbm.sh --subject "Anonymisation finished" --text "FINISHED: anonymisation / incremental" diff --git a/installer/example_scripts/email_rdbm.sh b/installer/example_scripts/email_rdbm.sh new file mode 100755 index 00000000..6c46bbb1 --- /dev/null +++ b/installer/example_scripts/email_rdbm.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +# installer/example_scripts/email_rdbm.sh + +# ============================================================================== +# +# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. +# Created by Rudolf Cardinal (rnc1001@cam.ac.uk). +# +# This file is part of CRATE. +# +# CRATE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CRATE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CRATE. If not, see . +# +# ============================================================================== + +# Example script to email the Research Database Manager. This is configured in +# the CRATE webapp configuration (crateweb_local_settings.py). See the RDBM_* +# and EMAIL_* settings in this file + + +set -euo pipefail + +THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars +source "${THISDIR}"/set_crate_environment_vars + +EMAIL_ARGS=(crate_email_rdbm) +for ARG in "$@"; do + EMAIL_ARGS+=("\"${ARG}\"") +done; + + +${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "${EMAIL_ARGS[*]}" diff --git a/installer/example_scripts/generate_draft_data_dict.sh b/installer/example_scripts/generate_draft_data_dict.sh new file mode 100755 index 00000000..673300dd --- /dev/null +++ b/installer/example_scripts/generate_draft_data_dict.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +# installer/example_scripts/generate_draft_data_dict.sh + +# ============================================================================== +# +# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. +# Created by Rudolf Cardinal (rnc1001@cam.ac.uk). +# +# This file is part of CRATE. +# +# CRATE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CRATE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CRATE. If not, see . +# +# ============================================================================== + +# Example script to generate a draft Data Dictionary to use with anonymisation. + + +set -euo pipefail + +THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars +source "${THISDIR}"/set_crate_environment_vars + +DRAFT_DD=${CRATE_HOST_CONFIG_DIR}/crate_dd_DRAFT.tsv + +${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_anon_draft_dd \ + --config ${CRATE_CONTAINER_CONFIG_ANON} \ + --verbose" \ + 2>&1 \ + 1>${DRAFT_DD} \ + | tee "${CRATE_HOST_DDGEN_LOG}" + +echo If successful, see "${DRAFT_DD}" for the draft data dictionary. +echo A log was kept in "${CRATE_HOST_DDGEN_LOG}" diff --git a/installer/example_scripts/generate_nlp_config.sh b/installer/example_scripts/generate_nlp_config.sh new file mode 100755 index 00000000..11174bd9 --- /dev/null +++ b/installer/example_scripts/generate_nlp_config.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# installer/example_scripts/generate_nlp_config.sh + +# ============================================================================== +# +# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. +# Created by Rudolf Cardinal (rnc1001@cam.ac.uk). +# +# This file is part of CRATE. +# +# CRATE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CRATE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CRATE. If not, see . +# +# ============================================================================== + +# Example script to generate a CRATE NLP configuration + +set -euo pipefail + +THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars +source "${THISDIR}"/set_crate_environment_vars + +${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_nlp \ + --democonfig" diff --git a/installer/example_scripts/load_ons_postcode_database.sh b/installer/example_scripts/load_ons_postcode_database.sh new file mode 100755 index 00000000..f56d5e21 --- /dev/null +++ b/installer/example_scripts/load_ons_postcode_database.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +# installer/example_scripts/load_ons_postcode_database.sh + +# ============================================================================== +# +# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. +# Created by Rudolf Cardinal (rnc1001@cam.ac.uk). +# +# This file is part of CRATE. +# +# CRATE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CRATE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CRATE. If not, see . +# +# ============================================================================== + +# Example script to load the Office of National Statistics Postcode Database +# from spreadsheet files to a database specified by CRATE_ONSPD_URL + +set -euo pipefail + +THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars +source "${THISDIR}"/set_crate_environment_vars + +# -- REMOVE ONCE CONFIGURED CORRECTLY +echo "Before using this script, please:" +echo "1. Download and extract a copy of ONSPD from e.g. https://geoportal.statistics.gov.uk/search?q=PRD_ONSPD%20NOV_2024 into ${CRATE_HOST_ONSPD_DIR}" +echo "2. Create an empty database and set CRATE_ONSPD URL in set_crate_environment_vars to point to it." +echo "3. Remove these lines from the script" +exit 0 +# -- REMOVE TO HERE + +${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_postcodes \ + --dir ${CRATE_HOST_ONSPD_DIR}/ \ + --url ${CRATE_ONSPD_URL}" diff --git a/installer/example_scripts/nlp_BIOMARKERS_FULL.sh b/installer/example_scripts/nlp_BIOMARKERS_FULL.sh new file mode 100755 index 00000000..bfb67788 --- /dev/null +++ b/installer/example_scripts/nlp_BIOMARKERS_FULL.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# installer/example_scripts/nlp_BIOMARKERS_FULL.sh + +# ============================================================================== +# +# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. +# Created by Rudolf Cardinal (rnc1001@cam.ac.uk). +# +# This file is part of CRATE. +# +# CRATE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CRATE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CRATE. If not, see . +# +# ============================================================================== + +# Example script to run full "biomarkers" NLP on an anonymised database. + +set -euo pipefail + +THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars +source "${THISDIR}"/set_crate_environment_vars + +# -- REMOVE ONCE CONFIGURED CORRECTLY +echo "Before using this script, please:" +echo "1. Run ${THISDIR}/generate_nlp_config.sh > ${CRATE_HOST_CONFIG_DIR}/crate_nlp_config.ini" +echo "2. Modify the config file for your setup. See https://crateanon.readthedocs.io/en/latest/nlp/nlp_config.html" +echo "3. Remove these lines from the script" +exit 0 +# -- REMOVE TO HERE + +${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_nlp_multiprocess \ + --nproc ${CRATE_NPROCESSORS} \ + --config ${CRATE_CONTAINER_CONFIG_NLP} \ + --nlpdef crate_biomarkers \ + --full" \ + 2>&1 \ + | tee "${CRATE_HOST_NLP_BIOMARKERS_LOG}" + +"${THISDIR}"/email_rdbm.sh --subject "NLP finished" --text "FINISHED: CRATE biomarkers / full" diff --git a/installer/example_scripts/nlp_BIOMARKERS_INCREMENTAL.sh b/installer/example_scripts/nlp_BIOMARKERS_INCREMENTAL.sh new file mode 100755 index 00000000..a80fc301 --- /dev/null +++ b/installer/example_scripts/nlp_BIOMARKERS_INCREMENTAL.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + + +# installer/example_scripts/nlp_BIOMARKERS_INCREMENTAL.sh + +# ============================================================================== +# +# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. +# Created by Rudolf Cardinal (rnc1001@cam.ac.uk). +# +# This file is part of CRATE. +# +# CRATE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CRATE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CRATE. If not, see . +# +# ============================================================================== + +# Example script to run incremental "biomarkers" NLP on an anonymised database. + +set -euo pipefail + +THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars +source "${THISDIR}"/set_crate_environment_vars + +# -- REMOVE ONCE CONFIGURED CORRECTLY +echo "Before using this script, please:" +echo "1. Run ${THISDIR}/generate_nlp_config.sh > ${CRATE_HOST_CONFIG_DIR}/crate_nlp_config.ini" +echo "2. Modify the config file for your setup. See https://crateanon.readthedocs.io/en/latest/nlp/nlp_config.html" +echo "3. Remove these lines from the script" +exit 0 +# -- REMOVE TO HERE + +${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_nlp_multiprocess \ + --nproc ${CRATE_NPROCESSORS} \ + --config ${CRATE_CONTAINER_CONFIG_NLP} \ + --nlpdef crate_biomarkers \ + --incremental" \ + 2>&1 \ + | tee "${CRATE_HOST_NLP_BIOMARKERS_LOG}" + +"${THISDIR}"/email_rdbm.sh --subject "NLP finished" --text "FINISHED: CRATE biomarkers / incremental" diff --git a/installer/example_scripts/preprocess_rio.sh b/installer/example_scripts/preprocess_rio.sh new file mode 100755 index 00000000..f58767cb --- /dev/null +++ b/installer/example_scripts/preprocess_rio.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +# installer/example_scripts/preprocess_rio.sh + +# ============================================================================== +# +# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. +# Created by Rudolf Cardinal (rnc1001@cam.ac.uk). +# +# This file is part of CRATE. +# +# CRATE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CRATE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CRATE. If not, see . +# +# ============================================================================== + +# Example script to preprocess a RiO database prior to data dictionary +# generation and anonymisation. + +set -euo pipefail + +THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars +source "${THISDIR}"/set_crate_environment_vars + +# -- REMOVE ONCE CONFIGURED CORRECTLY +echo "Before using this script, please:" +echo "1. Ensure CRATE_SOURCE_RIO_DB_URL in set_crate_environment_vars points to your RiO database and the user has read/write access" +echo "2. Remove these lines from the script" +exit 0 +# -- REMOVE TO HERE + +${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_preprocess_rio \ + --url ${CRATE_SOURCE_RIO_DB_URL} \ + --prognotes_current_only \ + --clindocs_current_only \ + --allergies_current_only \ + --no_audit_info \ + --postcodedb ${CRATE_ONSPD_NAME} \ + --settings_filename ${CRATE_CONTAINER_CONFIG_DIR}/autogen_rio_preprocessor_draft_ddgen_settings.txt" \ + 2>&1 \ + | tee "${CRATE_HOST_PREPROCESS_LOG}" diff --git a/installer/example_scripts/preprocess_systmone.sh b/installer/example_scripts/preprocess_systmone.sh new file mode 100755 index 00000000..b6bbbf78 --- /dev/null +++ b/installer/example_scripts/preprocess_systmone.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# installer/example_scripts/preprocess_systmone.sh + +# ============================================================================== +# +# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. +# Created by Rudolf Cardinal (rnc1001@cam.ac.uk). +# +# This file is part of CRATE. +# +# CRATE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CRATE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CRATE. If not, see . +# +# ============================================================================== + +# Example script to preprocess a RiO database prior to data dictionary +# generation and anonymisation. + + +set -euo pipefail + +THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars +source "${THISDIR}"/set_crate_environment_vars + +# -- REMOVE ONCE CONFIGURED CORRECTLY +echo "Before using this script, please:" +echo "1. Ensure CRATE_SOURCE_SYSTMONE_DB_URL in set_crate_environment_vars points to your SystmOne database and the user has read/write access" +echo "2. Place a copy of the SystmOne specification in ${CRATE_CONTAINER_SYSTMONE_TPP_SRE_SPEC} (modify this path in set_crate_environment_vars if necessary)" +echo "3. Remove these lines from the script" +exit 0 +# -- REMOVE TO HERE + +${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_preprocess_systmone \ + --url ${CRATE_SOURCE_SYSTMONE_DB_URL} \ + --verbose \ + --systmone_context cpft_dw \ + --postcodedb ${CRATE_ONSPD_NAME}" \ + 2>&1 \ + | tee "${CRATE_HOST_SYSTMONE_PREPROCESS_LOG}" diff --git a/installer/example_scripts/set_crate_environment_vars b/installer/example_scripts/set_crate_environment_vars new file mode 100644 index 00000000..283f459c --- /dev/null +++ b/installer/example_scripts/set_crate_environment_vars @@ -0,0 +1,78 @@ +# =========================================================================== +# CRATE +# =========================================================================== + +# Paths on the Docker container should have the form CRATE_CONTAINER_xxx +# Paths on the Host should have the form CRATE_HOST_xxx + +# Avoid variables starting with CRATE_DOCKER or CRATE_INSTALLER as these are +# used by the installer and Docker setup + +# As seen from the Docker container running CRATE +export CRATE_CONTAINER_BASE_DIR=/crate +export CRATE_CONTAINER_FILES_DIR=${CRATE_CONTAINER_BASE_DIR}/files +export CRATE_CONTAINER_LOG_DIR=${CRATE_CONTAINER_FILES_DIR}/logs +export CRATE_CONTAINER_CONFIG_DIR=${CRATE_CONTAINER_BASE_DIR}/cfg + +# Config files +# Anonymisation +export CRATE_CONTAINER_CONFIG_ANON=${CRATE_CONTAINER_CONFIG_DIR}/crate_anon_config.ini + +# NLP +# This is not currently created by the installer: +export CRATE_CONTAINER_CONFIG_NLP=${CRATE_CONTAINER_CONFIG_DIR}/crate_nlp_config.ini + +export CRATE_CONTAINER_SYSTMONE_TPP_SRE_SPEC="${CRATE_CONTAINER_CONFIG_DIR}/systmone_sre_specifications/SpecificationDirectory/Specification v123.csv" + +# Third-party tools +export CRATE_CONTAINER_GATE_DIR=${CRATE_CONTAINER_BASE_DIR}/gate + + +# As seen from the Ubuntu host +export CRATE_HOST_BASE_DIR=@@CRATE_HOST_BASE_DIR@@ +export CRATE_HOST_CONFIG_DIR=@@CRATE_HOST_CONFIG_DIR@@ +export CRATE_HOST_INSTALLER_BASE_DIR=${CRATE_HOST_BASE_DIR}/src/installer +export CRATE_HOST_INSTALLER_VENV=${CRATE_HOST_BASE_DIR}/venv +export CRATE_HOST_ONSPD_DIR=${CRATE_HOST_BASE_DIR}/ons_postcode_database +export PYTHON=${CRATE_HOST_INSTALLER_VENV}/bin/python + + +# Log files +export CRATE_HOST_LOG_DIR=${CRATE_HOST_BASE_DIR}/files/logs +export CRATE_HOST_PREPROCESS_LOG=${CRATE_HOST_LOG_DIR}/preprocess.log +export CRATE_HOST_DDGEN_LOG=${CRATE_HOST_LOG_DIR}/ddgen.log +export CRATE_HOST_ANON_LOG=${CRATE_HOST_LOG_DIR}/anon.log +export CRATE_HOST_NLP_BIOMARKERS_LOG=${CRATE_HOST_LOG_DIR}/nlp_biomarkers.log + + +# Other variables + +# Number of processors to use for various tasks. + +# Some scripts have been seen to fail with duplicate record errors, even with +# MARS_Connection = yes in odbc_user.ini. In this case try setting this to 1. +# Create separate variables for just the failing scripts if necessary. + +GENERIC_NPROCESSORS=24 +export CRATE_NPROCESSORS=${GENERIC_NPROCESSORS} + + +# Database URLs (SQLAlchemy) +# https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls + +# ONS Postcode Database e.g. mssql+pyodbc://@onspd +export CRATE_ONSPD_URL= + +# The database (schema) name of the ONS Postcode Database, as imported by +# CRATE. With SQL Server you have to specify the schema as well as the database +# e.g. onspd.dbo +export CRATE_ONSPD_NAME= + +# RiO source database +export CRATE_SOURCE_RIO_DB_URL= + +# SystmOne source database +export CRATE_SOURCE_SYSTMONE_DB_URL= + +# Set all the environment variables from the Docker setup +source ${CRATE_HOST_CONFIG_DIR}/set_crate_docker_host_envvars diff --git a/installer/installer.py b/installer/installer.py index 9a21fe4f..d34dd0ef 100755 --- a/installer/installer.py +++ b/installer/installer.py @@ -610,6 +610,7 @@ def install(self) -> None: self.create_data_dictionary() if self.should_create_demo_containers(): self.anonymise_demo_data() + self.copy_example_scripts() self.report_status() @@ -1211,6 +1212,9 @@ def create_directories() -> None: crate_files_dir = os.environ.get(DockerEnvVar.FILES_HOST_DIR) Path(crate_files_dir).mkdir(parents=True, exist_ok=True) + crate_logs_dir = os.path.join(crate_files_dir, "logs") + Path(crate_logs_dir).mkdir(parents=True, exist_ok=True) + crate_static_dir = os.environ.get(DockerEnvVar.STATIC_HOST_DIR) Path(crate_static_dir).mkdir(parents=True, exist_ok=True) @@ -1450,6 +1454,29 @@ def anonymise_demo_data(self) -> None: self.info("Anonymising demo data...") self.run_crate_command(["crate_anonymise", "--full"], tty=True) + def copy_example_scripts(self) -> None: + scripts_dir = self.crate_scripts_host_dir() + + if os.path.exists(scripts_dir): + self.info( + "Scripts directory already exists. Not copying examples." + ) + return + + self.info("Copying example scripts...") + shutil.copytree( + self.installer_examples_scripts_host_dir(), scripts_dir + ) + + set_crate_environment_vars = os.path.join( + scripts_dir, "set_crate_environment_vars" + ) + replace_dict = { + "CRATE_HOST_BASE_DIR": self.crate_root_host_dir(), + "CRATE_HOST_CONFIG_DIR": self.getenv(DockerEnvVar.CONFIG_HOST_DIR), + } + self.search_replace_file(set_crate_environment_vars, replace_dict) + def report_status(self) -> None: localhost_url = self.get_crate_server_localhost_url() self.success(f"The CRATE application is running at {localhost_url}") @@ -1658,6 +1685,9 @@ def docker_host_dir(self) -> str: def src_host_dir(self) -> str: return os.path.join(self.installer_host_dir(), os.pardir) + def installer_examples_scripts_host_dir(self) -> str: + return os.path.join(self.installer_host_dir(), "example_scripts") + def installer_host_dir(self) -> str: return os.path.dirname(os.path.realpath(__file__)) @@ -1676,6 +1706,9 @@ def default_gate_bioyodie_resources_host_dir(self) -> str: def crate_root_host_dir(self) -> str: return self.getenv(InstallerEnvVar.CRATE_ROOT_HOST_DIR) + def crate_scripts_host_dir(self) -> str: + return os.path.join(self.crate_root_host_dir(), "scripts") + # ------------------------------------------------------------------------- # Fetching information from the user # -------------------------------------------------------------------------