-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #186 from ucam-department-of-psychiatry/installer-…
…example-scripts Provide some example scripts for running anonymisation, NLP etc under Docker
- Loading branch information
Showing
14 changed files
with
579 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#!/usr/bin/env bash | ||
|
||
# installer/example_scripts/anonymise_FULL.sh | ||
|
||
# ============================================================================== | ||
# | ||
# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. | ||
# Created by Rudolf Cardinal ([email protected]). | ||
# | ||
# This file is part of CRATE. | ||
# | ||
# CRATE is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# CRATE is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with CRATE. If not, see <https://www.gnu.org/licenses/>. | ||
# | ||
# ============================================================================== | ||
|
||
# Example script to run full anonymisation on the databases specified in the | ||
# anonymisation configuration. | ||
|
||
set -euo pipefail | ||
|
||
THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||
# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars | ||
source "${THISDIR}"/set_crate_environment_vars | ||
|
||
${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_anonymise_multiprocess \ | ||
--nproc ${CRATE_NPROCESSORS} \ | ||
--config ${CRATE_CONTAINER_CONFIG_ANON} \ | ||
--full" \ | ||
2>&1 \ | ||
| tee "${CRATE_HOST_ANON_LOG}" | ||
|
||
"${THISDIR}"/email_rdbm.sh --subject "Anonymisation finished" --text "FINISHED: anonymisation / full" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#!/usr/bin/env bash | ||
|
||
# installer/example_scripts/anonymise_INCREMENTAL.sh | ||
|
||
# ============================================================================== | ||
# | ||
# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. | ||
# Created by Rudolf Cardinal ([email protected]). | ||
# | ||
# This file is part of CRATE. | ||
# | ||
# CRATE is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# CRATE is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with CRATE. If not, see <https://www.gnu.org/licenses/>. | ||
# | ||
# ============================================================================== | ||
|
||
# Example script to run incremental anonymisation on the databases specified in | ||
# the anonymisation configuration. | ||
|
||
set -euo pipefail | ||
|
||
THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||
# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars | ||
source "${THISDIR}"/set_crate_environment_vars | ||
|
||
${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_anonymise_multiprocess \ | ||
--nproc ${CRATE_NPROCESSORS} \ | ||
--config ${CRATE_CONTAINER_CONFIG_ANON} \ | ||
--incremental" \ | ||
2>&1 \ | ||
| tee "${CRATE_HOST_ANON_LOG}" | ||
|
||
"${THISDIR}"/email_rdbm.sh --subject "Anonymisation finished" --text "FINISHED: anonymisation / incremental" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/usr/bin/env bash | ||
|
||
# installer/example_scripts/email_rdbm.sh | ||
|
||
# ============================================================================== | ||
# | ||
# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. | ||
# Created by Rudolf Cardinal ([email protected]). | ||
# | ||
# This file is part of CRATE. | ||
# | ||
# CRATE is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# CRATE is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with CRATE. If not, see <https://www.gnu.org/licenses/>. | ||
# | ||
# ============================================================================== | ||
|
||
# Example script to email the Research Database Manager. This is configured in | ||
# the CRATE webapp configuration (crateweb_local_settings.py). See the RDBM_* | ||
# and EMAIL_* settings in this file | ||
|
||
|
||
set -euo pipefail | ||
|
||
THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||
# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars | ||
source "${THISDIR}"/set_crate_environment_vars | ||
|
||
EMAIL_ARGS=(crate_email_rdbm) | ||
for ARG in "$@"; do | ||
EMAIL_ARGS+=("\"${ARG}\"") | ||
done; | ||
|
||
|
||
${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "${EMAIL_ARGS[*]}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#!/usr/bin/env bash | ||
|
||
# installer/example_scripts/generate_draft_data_dict.sh | ||
|
||
# ============================================================================== | ||
# | ||
# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. | ||
# Created by Rudolf Cardinal ([email protected]). | ||
# | ||
# This file is part of CRATE. | ||
# | ||
# CRATE is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# CRATE is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with CRATE. If not, see <https://www.gnu.org/licenses/>. | ||
# | ||
# ============================================================================== | ||
|
||
# Example script to generate a draft Data Dictionary to use with anonymisation. | ||
|
||
|
||
set -euo pipefail | ||
|
||
THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||
# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars | ||
source "${THISDIR}"/set_crate_environment_vars | ||
|
||
DRAFT_DD=${CRATE_HOST_CONFIG_DIR}/crate_dd_DRAFT.tsv | ||
|
||
${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_anon_draft_dd \ | ||
--config ${CRATE_CONTAINER_CONFIG_ANON} \ | ||
--verbose" \ | ||
2>&1 \ | ||
1>${DRAFT_DD} \ | ||
| tee "${CRATE_HOST_DDGEN_LOG}" | ||
|
||
echo If successful, see "${DRAFT_DD}" for the draft data dictionary. | ||
echo A log was kept in "${CRATE_HOST_DDGEN_LOG}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/usr/bin/env bash | ||
|
||
# installer/example_scripts/generate_nlp_config.sh | ||
|
||
# ============================================================================== | ||
# | ||
# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. | ||
# Created by Rudolf Cardinal ([email protected]). | ||
# | ||
# This file is part of CRATE. | ||
# | ||
# CRATE is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# CRATE is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with CRATE. If not, see <https://www.gnu.org/licenses/>. | ||
# | ||
# ============================================================================== | ||
|
||
# Example script to generate a CRATE NLP configuration | ||
|
||
set -euo pipefail | ||
|
||
THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||
# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars | ||
source "${THISDIR}"/set_crate_environment_vars | ||
|
||
${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_nlp \ | ||
--democonfig" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#!/usr/bin/env bash | ||
|
||
# installer/example_scripts/load_ons_postcode_database.sh | ||
|
||
# ============================================================================== | ||
# | ||
# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. | ||
# Created by Rudolf Cardinal ([email protected]). | ||
# | ||
# This file is part of CRATE. | ||
# | ||
# CRATE is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# CRATE is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with CRATE. If not, see <https://www.gnu.org/licenses/>. | ||
# | ||
# ============================================================================== | ||
|
||
# Example script to load the Office of National Statistics Postcode Database | ||
# from spreadsheet files to a database specified by CRATE_ONSPD_URL | ||
|
||
set -euo pipefail | ||
|
||
THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||
# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars | ||
source "${THISDIR}"/set_crate_environment_vars | ||
|
||
# -- REMOVE ONCE CONFIGURED CORRECTLY | ||
echo "Before using this script, please:" | ||
echo "1. Download and extract a copy of ONSPD from e.g. https://geoportal.statistics.gov.uk/search?q=PRD_ONSPD%20NOV_2024 into ${CRATE_HOST_ONSPD_DIR}" | ||
echo "2. Create an empty database and set CRATE_ONSPD URL in set_crate_environment_vars to point to it." | ||
echo "3. Remove these lines from the script" | ||
exit 0 | ||
# -- REMOVE TO HERE | ||
|
||
${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_postcodes \ | ||
--dir ${CRATE_HOST_ONSPD_DIR}/ \ | ||
--url ${CRATE_ONSPD_URL}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#!/usr/bin/env bash | ||
|
||
# installer/example_scripts/nlp_BIOMARKERS_FULL.sh | ||
|
||
# ============================================================================== | ||
# | ||
# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. | ||
# Created by Rudolf Cardinal ([email protected]). | ||
# | ||
# This file is part of CRATE. | ||
# | ||
# CRATE is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# CRATE is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with CRATE. If not, see <https://www.gnu.org/licenses/>. | ||
# | ||
# ============================================================================== | ||
|
||
# Example script to run full "biomarkers" NLP on an anonymised database. | ||
|
||
set -euo pipefail | ||
|
||
THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||
# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars | ||
source "${THISDIR}"/set_crate_environment_vars | ||
|
||
# -- REMOVE ONCE CONFIGURED CORRECTLY | ||
echo "Before using this script, please:" | ||
echo "1. Run ${THISDIR}/generate_nlp_config.sh > ${CRATE_HOST_CONFIG_DIR}/crate_nlp_config.ini" | ||
echo "2. Modify the config file for your setup. See https://crateanon.readthedocs.io/en/latest/nlp/nlp_config.html" | ||
echo "3. Remove these lines from the script" | ||
exit 0 | ||
# -- REMOVE TO HERE | ||
|
||
${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_nlp_multiprocess \ | ||
--nproc ${CRATE_NPROCESSORS} \ | ||
--config ${CRATE_CONTAINER_CONFIG_NLP} \ | ||
--nlpdef crate_biomarkers \ | ||
--full" \ | ||
2>&1 \ | ||
| tee "${CRATE_HOST_NLP_BIOMARKERS_LOG}" | ||
|
||
"${THISDIR}"/email_rdbm.sh --subject "NLP finished" --text "FINISHED: CRATE biomarkers / full" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/usr/bin/env bash | ||
|
||
|
||
# installer/example_scripts/nlp_BIOMARKERS_INCREMENTAL.sh | ||
|
||
# ============================================================================== | ||
# | ||
# Copyright (C) 2015, University of Cambridge, Department of Psychiatry. | ||
# Created by Rudolf Cardinal ([email protected]). | ||
# | ||
# This file is part of CRATE. | ||
# | ||
# CRATE is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# CRATE is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with CRATE. If not, see <https://www.gnu.org/licenses/>. | ||
# | ||
# ============================================================================== | ||
|
||
# Example script to run incremental "biomarkers" NLP on an anonymised database. | ||
|
||
set -euo pipefail | ||
|
||
THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||
# shellcheck source-path=SCRIPTDIR source=set_crate_environment_vars | ||
source "${THISDIR}"/set_crate_environment_vars | ||
|
||
# -- REMOVE ONCE CONFIGURED CORRECTLY | ||
echo "Before using this script, please:" | ||
echo "1. Run ${THISDIR}/generate_nlp_config.sh > ${CRATE_HOST_CONFIG_DIR}/crate_nlp_config.ini" | ||
echo "2. Modify the config file for your setup. See https://crateanon.readthedocs.io/en/latest/nlp/nlp_config.html" | ||
echo "3. Remove these lines from the script" | ||
exit 0 | ||
# -- REMOVE TO HERE | ||
|
||
${PYTHON} "${CRATE_HOST_INSTALLER_BASE_DIR}/installer.py" exec "crate_nlp_multiprocess \ | ||
--nproc ${CRATE_NPROCESSORS} \ | ||
--config ${CRATE_CONTAINER_CONFIG_NLP} \ | ||
--nlpdef crate_biomarkers \ | ||
--incremental" \ | ||
2>&1 \ | ||
| tee "${CRATE_HOST_NLP_BIOMARKERS_LOG}" | ||
|
||
"${THISDIR}"/email_rdbm.sh --subject "NLP finished" --text "FINISHED: CRATE biomarkers / incremental" |
Oops, something went wrong.