From 0a4d53ce33a8c7c8ee0ea81f6aaaa85a77123155 Mon Sep 17 00:00:00 2001 From: Rob Kooper Date: Thu, 29 Nov 2018 18:50:29 -0600 Subject: [PATCH 1/7] attributes migration Add a new table that allows for attributes in json form #505 --- .../20181129000515_create_attributes_table.rb | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 db/migrate/20181129000515_create_attributes_table.rb diff --git a/db/migrate/20181129000515_create_attributes_table.rb b/db/migrate/20181129000515_create_attributes_table.rb new file mode 100644 index 000000000..e3493306a --- /dev/null +++ b/db/migrate/20181129000515_create_attributes_table.rb @@ -0,0 +1,27 @@ +class CreateAttributesTable < ActiveRecord::Migration[5.1] + def change + this_hostid = Machine.new.hostid + + create_table :attributes, id: :bigint do |t| + t.string :container_type, null: false + t.integer :container_id, limit: 8, null: false + t.jsonb :value, null: false, default: '{}' + t.timestamps + end + + add_index :attributes, :container_id + add_index :attributes, :value, using: :gin + + reversible do |dir| + dir.up do + execute %{ + SELECT setval('attributes_id_seq', GREATEST(1, 1 + CAST(1e9 * #{this_hostid}::int AS bigint)), FALSE); + ALTER TABLE "attributes" + ALTER COLUMN created_at SET DEFAULT utc_now(), + ALTER COLUMN updated_at SET DEFAULT utc_now(), + ADD CONSTRAINT container_type_id UNIQUE(container_type, container_id); + } + end + end + end +end From c5d006d1bd4c79c803b1874894fb1dbcc3e8e9d5 Mon Sep 17 00:00:00 2001 From: Rob Kooper Date: Thu, 29 Nov 2018 18:53:50 -0600 Subject: [PATCH 2/7] Update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f242cda6..c980cdde1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ section for the next release. For more information about this file see also [Keep a Changelog](http://keepachangelog.com/) . +## [Unreleased] + +### Added + +- #505 : Added a new attributes table. This table allows to store through polymorphism any additional information with any row in any table. The value stored is a json document. ## [5.0.4] - 2018-10-11 From 332c84338f61748ba30a0db41b09c102ddfb9b51 Mon Sep 17 00:00:00 2001 From: Rob Kooper Date: Thu, 29 Nov 2018 18:58:01 -0600 Subject: [PATCH 3/7] move load/dump to bety from PEcAN This moves the load/dump scripts from PEcAn to BETY (fixes #597) Added attributes table to dump/load script. --- .gitignore | 1 - CHANGELOG.md | 1 + Dockerfile | 7 +- docker/entrypoint.sh | 6 +- script/dump.bety.sh | 250 ++++++++++++++++++++++ script/load.bety.sh | 489 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 745 insertions(+), 9 deletions(-) create mode 100755 script/dump.bety.sh create mode 100755 script/load.bety.sh diff --git a/.gitignore b/.gitignore index c834d231d..ed4a86610 100644 --- a/.gitignore +++ b/.gitignore @@ -19,7 +19,6 @@ public/maps/* config/initializers/site_keys.rb .Rproj.user public/assets -script/load.bety.sh # Ignore encrypted secrets key file. config/secrets.yml.key diff --git a/CHANGELOG.md b/CHANGELOG.md index c980cdde1..f87344897 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha ### Added - #505 : Added a new attributes table. This table allows to store through polymorphism any additional information with any row in any table. The value stored is a json document. +- #597 : Moved `dump.bety.sh` and `load.bety.sh` from PEcAn to BETY. ## [5.0.4] - 2018-10-11 diff --git a/Dockerfile b/Dockerfile index 48c079548..964ebf265 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,11 +29,8 @@ COPY / /home/bety COPY /docker/database.yml /home/bety/config/database.yml COPY /docker/config.ru /home/bety/config.ru -# download dump.bety and load.bety scripts and configure app -RUN curl -LOs https://raw.githubusercontent.com/PecanProject/pecan/master/scripts/load.bety.sh \ - && curl -LOs https://raw.githubusercontent.com/PecanProject/pecan/master/scripts/dump.bety.sh \ - && chmod +x load.bety.sh dump.bety.sh \ - && /bin/sed -e '/serve_static_assets/ s/false$/true/' -i config/environments/production.rb +# configure app +RUN /bin/sed -e '/serve_static_assets/ s/false$/true/' -i config/environments/production.rb # arguments that are added at the bottom of BETY ARG BETY_GIT_TAGS="unknown" diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 9089d93d3..3c598558b 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -6,18 +6,18 @@ case $1 in echo "Create new database, initialized from all data." psql -h postgres -p 5432 -U postgres -c "CREATE ROLE bety WITH LOGIN CREATEDB NOSUPERUSER NOCREATEROLE PASSWORD 'bety'" psql -h postgres -p 5432 -U postgres -c "CREATE DATABASE bety WITH OWNER bety" - ./load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -c -u -g -m ${LOCAL_SERVER} -r 0 -w https://ebi-forecast.igb.illinois.edu/pecan/dump/all/bety.tar.gz + ./script/load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -c -u -g -m ${LOCAL_SERVER} -r 0 -w https://ebi-forecast.igb.illinois.edu/pecan/dump/all/bety.tar.gz ;; "sync" ) echo "Synchronize with servers ${REMOTE_SERVERS}" for r in ${REMOTE_SERVERS}; do echo "Synchronizing with server ${r}" - ./load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -r ${r} + ./script/load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -r ${r} done ;; "dump" ) echo "Dump data from server ${LOCAL_SERVER}" - ./dump.bety.sh -d "bety" -p "-h postgres -p 5432 -U postgres" -m ${LOCAL_SERVER} -o dump + ./script/dump.bety.sh -d "bety" -p "-h postgres -p 5432 -U postgres" -m ${LOCAL_SERVER} -o dump ;; "migrate" ) echo "Migrate database." diff --git a/script/dump.bety.sh b/script/dump.bety.sh new file mode 100755 index 000000000..a73327510 --- /dev/null +++ b/script/dump.bety.sh @@ -0,0 +1,250 @@ +#!/bin/bash + +# exit on error +set -e + +# ---------------------------------------------------------------------- +# START CONFIGURATION SECTION +# ---------------------------------------------------------------------- + +# name of the dabase to dump +# this script assumes the user running it has access to the database +DATABASE=${DATABASE:-"bety"} + +# psql options +# this allows you to add the user to use as well as any other options +PG_OPT=${PG_OPT-"-U bety"} + +# ID's used in database +# These ID's need to be unique for the sharing to work. If you want +# to share your data, send email to kooper@illinois.edu to claim +# your ID range. The master list is maintained at +# https://github.com/PecanProject/bety/wiki/Distributed-BETYdb +# +# 0 - EBI - David LeBauer +# 1 - BU - Mike Dietze +# 2 - Brookhaven - Shawn Serbin +# 3 - Purdue - Jeanne Osnas +# 4 - Virginia Tech - Quinn Thomas +# 99 - VM +MYSITE=${MYSITE:-99} + +# access level requirement +# 0 - private +# 4 - public +LEVEL=${LEVEL:-3} + +# dump unchecked traits and yields +# set this to "YES" to dump all unchecked traits/yields as well +UNCHECKED=${UNCHECKED:-"NO"} + +# keep users +# set this to YES to dump all user information, otherwise it will +# be anonymized +KEEPUSERS=${KEEPUSERS:-"NO"} + +# location where to write the results, this will be a tar file +OUTPUT=${OUTPUT:-"$PWD/dump"} + +# Should the process be quiet +QUIET=${QUIET:-"NO"} + +# Should all the data be dumped +ALLDATA=${ALLDATA:-"NO"} + +# ---------------------------------------------------------------------- +# END CONFIGURATION SECTION +# ---------------------------------------------------------------------- + +# parse command line options +while getopts ad:hkl:m:o:p:qu opt; do + case $opt in + a) + ALLDATA="YES" + ;; + d) + DATABASE=$OPTARG + ;; + h) + echo "$0 [-a] [-d database] [-h] [-k] [-l 0,1,2,3,4] [-m my siteid] [-o folder] [-p psql options] [-u]" + echo " -a dump all records" + echo " -d database, default is bety" + echo " -h this help page" + echo " -k keep users, default is to be anonymized" + echo " -l level of data that can be dumped, default is 3" + echo " -m site id, default is 99 (VM)" + echo " -o output folder where dumped data is written, default is dump" + echo " -p additional psql command line options, default is -U bety" + echo " -q should the export be quiet" + echo " -u should unchecked data be dumped, default is NO" + exit 0 + ;; + k) + KEEPUSERS="YES" + ;; + l) + LEVEL=$OPTARG + ;; + m) + MYSITE=$OPTARG + ;; + o) + OUTPUT=$OPTARG + ;; + p) + PG_OPT=$OPTARG + ;; + q) + QUIET="YES" + ;; + u) + UNCHECKED="YES" + ;; + esac +done + +# Table that contains the users, this table will be anonymized +USER_TABLES="users" + +# list of all tables, schema_migrations is ignored since that +# will be imported during creaton +CLEAN_TABLES="attributes benchmark_sets benchmarks" +CLEAN_TABLES="${CLEAN_TABLES} citations covariates cultivars dbfiles" +CLEAN_TABLES="${CLEAN_TABLES} ensembles entities experiments formats inputs" +CLEAN_TABLES="${CLEAN_TABLES} likelihoods machines managements metrics" +CLEAN_TABLES="${CLEAN_TABLES} methods mimetypes models modeltypes" +CLEAN_TABLES="${CLEAN_TABLES} pfts posteriors priors reference_runs" +CLEAN_TABLES="${CLEAN_TABLES} runs sites species treatments" +CLEAN_TABLES="${CLEAN_TABLES} variables workflows" +CLEAN_TABLES="${CLEAN_TABLES} projects sitegroups" + +# tables that have checks that need to be looked at. +CHECK_TABLES="traits yields" + +# tables that have many to many relationships +MANY_TABLES="benchmarks_benchmarks_reference_runs benchmarks_ensembles" +MANY_TABLES="${MANY_TABLES} benchmarks_ensembles_scores benchmarks_metrics benchmark_sets_benchmark_reference_runs" +MANY_TABLES="${MANY_TABLES} citations_sites citations_treatments" +MANY_TABLES="${MANY_TABLES} cultivars_pfts current_posteriors" +MANY_TABLES="${MANY_TABLES} experiments_sites experiments_treatments" +MANY_TABLES="${MANY_TABLES} formats_variables inputs_runs" +MANY_TABLES="${MANY_TABLES} managements_treatments modeltypes_formats" +MANY_TABLES="${MANY_TABLES} pfts_priors pfts_species" +MANY_TABLES="${MANY_TABLES} posterior_samples posteriors_ensembles" +MANY_TABLES="${MANY_TABLES} sitegroups_sites sites_cultivars trait_covariate_associations" + +# tables that should NOT be dumped +IGNORE_TABLES="sessions" +SYSTEM_TABLES="schema_migrations spatial_ref_sys" + +# be quiet if not interactive +if ! tty -s ; then + exec 1>/dev/null +fi + +# this value should be constant, do not change +ID_RANGE=1000000000 + +# make output folder +mkdir -p "${OUTPUT}" +DUMPDIR="/tmp/$$" +mkdir -p "${DUMPDIR}" +chmod 777 "${DUMPDIR}" + +# compute range based on MYSITE +if [ "${ALLDATA}" != "YES" ]; then + START_ID=$(( MYSITE * ID_RANGE + 1 )) + LAST_ID=$(( START_ID + ID_RANGE - 1 )) + if [ "${QUIET}" != "YES" ]; then + echo "Dumping all items that have id : [${START_ID} - ${LAST_ID}]" + fi + LIMIT="(id >= ${START_ID} AND id <= ${LAST_ID})" +else + LIMIT="TRUE" + if [ "${QUIET}" != "YES" ]; then + echo "Dumping all items that have id : ALL ITEMS" + fi +fi + +# find current schema version +# following returns a triple: +# - number of migrations +# - largest migration +# - hash of all migrations +MIGRATIONS=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c 'SELECT COUNT(version) FROM schema_migrations' | tr -d ' ' ) +VERSION=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c 'SELECT md5(array_agg(version)::text) FROM (SELECT version FROM schema_migrations ORDER BY version) as v;' | tr -d ' ' ) +LATEST=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c 'SELECT version FROM schema_migrations ORDER BY version DESC LIMIT 1' | tr -d ' ' ) +NOW=$( date -u +"%Y-%m-%dT%H:%M:%SZ" ) +echo "${MIGRATIONS} ${VERSION} ${LATEST} ${NOW}" > "${OUTPUT}/version.txt" + +# dump schema +if [ "${QUIET}" != "YES" ]; then + printf "Dumping %-25s : " "schema" +fi +pg_dump ${PG_OPT} -s "${DATABASE}" -O -x > "${DUMPDIR}/${VERSION}.schema" +if [ "${QUIET}" != "YES" ]; then + echo "DUMPED version ${VERSION} with ${MIGRATIONS}, latest migration is ${LATEST}" +fi + +# dump ruby special table +if [ "${QUIET}" != "YES" ]; then + printf "Dumping %-25s : " "schema_migrations" +fi +ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM schema_migrations;" | tr -d ' ' ) +psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY schema_migrations TO '${DUMPDIR}/schema_migrations.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')" +if [ "${QUIET}" != "YES" ]; then + echo "DUMPED ${ADD}" +fi + +# skip following tables +# - inputs_runs (PEcAn, site specific) +# - posteriors_runs (PEcAn, site specific, is this used?) +# - runs (PEcAn, site specific) +# - workflows (PEcAn, site specific) + +# dump users +if [ "${QUIET}" != "YES" ]; then + printf "Dumping %-25s : " "users" +fi +if [ "${KEEPUSERS}" == "YES" ]; then + psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT * FROM ${USER_TABLES} WHERE ${LIMIT}) TO '${DUMPDIR}/users.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')" +else + psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT id, CONCAT('user', id) AS login, CONCAT('user ' , id) AS name, CONCAT('betydb+', id, '@gmail.com') as email, 'Urbana' AS city, 'USA' AS country, '' AS area, '1234567890abcdef' AS crypted_password, 'BU' AS salt, NOW() AS created_at, NOW() AS updated_at, NULL as remember_token, NULL AS remember_token_expires_at, 3 AS access_level, 4 AS page_access_level, NULL AS apikey, 'IL' AS state_prov, '61801' AS postal_code FROM ${USER_TABLES} WHERE ${LIMIT}) TO '${DUMPDIR}/users.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')" +fi +ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM ${USER_TABLES} WHERE ${LIMIT};" | tr -d ' ' ) +if [ "${QUIET}" != "YES" ]; then + echo "DUMPED ${ADD}" +fi + +# unrestricted tables +for T in ${CLEAN_TABLES} ${MANY_TABLES}; do + if [ "${QUIET}" != "YES" ]; then + printf "Dumping %-25s : " "${T}" + fi + psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT * FROM ${T} WHERE ${LIMIT}) TO '${DUMPDIR}/${T}.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')" + ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM ${T} WHERE ${LIMIT}" | tr -d ' ' ) + if [ "${QUIET}" != "YES" ]; then + echo "DUMPED ${ADD}" + fi +done + +# restricted and unchecked tables +for T in ${CHECK_TABLES}; do + if [ "${QUIET}" != "YES" ]; then + printf "Dumping %-25s : " "${T}" + fi + if [ "${UNCHECKED}" == "YES" ]; then + UNCHECKED_QUERY="" + else + UNCHECKED_QUERY="AND checked != -1" + fi + psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT * FROM ${T} WHERE ${LIMIT} AND access_level >= ${LEVEL} ${UNCHECKED_QUERY}) TO '${DUMPDIR}/${T}.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8');" + ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM ${T} WHERE ${LIMIT}" | tr -d ' ' ) + if [ "${QUIET}" != "YES" ]; then + echo "DUMPED ${ADD}" + fi +done + +# all done dumping database +tar zcf "${OUTPUT}/bety.tar.gz" -C "${DUMPDIR}" . +rm -rf "${DUMPDIR}" diff --git a/script/load.bety.sh b/script/load.bety.sh new file mode 100755 index 000000000..e0ce757e8 --- /dev/null +++ b/script/load.bety.sh @@ -0,0 +1,489 @@ +#!/bin/bash + +# exit on error +set -e + +# ---------------------------------------------------------------------- +# START CONFIGURATION SECTION +# ---------------------------------------------------------------------- + +# name of the dabase to load +# this script assumes the user running it has access to the database +DATABASE=${DATABASE:-"bety"} + +# owner of the database +# also used to connect to the database for most operations +OWNER=${OWNER:-"bety"} + +# postgres user to use for root level access +PG_USER=${PG_USER:-""} + +# psql options +# this allows you to add any other options +PG_OPT=${PG_OPT:-""} + +# ID's used in database +# These ID's need to be unique for the sharing to work. If you want +# to share your data, send email to kooper@illinois.edu to claim +# your ID range. The master list is maintained at +# https://github.com/PecanProject/bety/wiki/Distributed-BETYdb +# +# 0 - EBI - David LeBauer +# 1 - BU - Mike Dietze +# 2 - Brookhaven - Shawn Serbin +# 3 - Purdue - Jeanne Osnas +# 4 - Virginia Tech - Quinn Thomas +# 5 - Wisconsin - Ankur Desai +# 6 - TERRA REF - David LeBauer +# 7 - TERRA test - David LeBauer +# 8 - TERRA MEPP - David LeBauer +# 9 - TERRA TAMU - TBD +# 99 - VM +MYSITE=${MYSITE:-99} +REMOTESITE=${REMOTESITE:-0} +DUMPURL=${DUMPURL:-""} + +# Create the database from scratch +# Set this to YES to create the database, this will remove all existing +# data! +CREATE=${CREATE:-"NO"} + +# Empty database create +# Set this to YES to create an empty database, this will still +# import some rows, such as mimetypes, etc. +EMPTY=${EMPTY:-"NO"} + +# Fix the sequence numbers, this should only be need when creating a +# new database. Set this to YES to initialize the sequence numbers. +FIXSEQUENCE=${FIXSEQUENCE:-"NO"} + +# Keep the tmp folder even if the sync failed? +# Set this to YES to keep the tmp folder, this is helpful for +# debugging the script. The default value is NO and the tmp folder will +# be removed +KEEPTMP=${KEEPTMP:-"NO"} + +# Should the process be quiet +QUIET=${QUIET:-"NO"} + +# Add some default users +# Set this to YES to add carya with password. This will give this user +# admin priviliges. It will also create 16 more users that have specific +# abilities. +USERS=${USERS:-"NO"} + +# create guestuser +# Set this to YES to create guestuser used with BETY. +GUESTUSER=${GUESTUSER:-"NO"} + +# additional options for curl +CURL_OPTS=${CURL_OPTS:-""} + +# Log file +LOG=${LOG:-"$PWD/dump/sync.log"} + +# ---------------------------------------------------------------------- +# END CONFIGURATION SECTION +# ---------------------------------------------------------------------- + +# parse command line options +while getopts a:cd:efghkl:m:o:p:qr:tuw: opt; do + case $opt in + a) + PG_USER="$OPTARG" + ;; + c) + CREATE="YES" + ;; + d) + DATABASE="$OPTARG" + ;; + e) + EMPTY="YES" + ;; + f) + FIXSEQUENCE="YES" + ;; + g) + GUESTUSER="YES" + ;; + h) + echo "$0 [-a postgres] [-c] [-d database] [-e] [-f] [-g] [-h] [-l logfile] [-m my siteid] [-o owner] [-p psql options] [-r remote siteid] [-t] [-u]" + echo " -a access database as this user, this is NOT the owner of the database, often this is postgres" + echo " -c create database, THIS WILL ERASE THE CURRENT DATABASE, default is NO" + echo " -d database, default is bety" + echo " -e empty database, default is NO" + echo " -f fix sequence numbers, this should not be needed, default is NO" + echo " -g add guestuser for BETY webpage" + echo " -h this help page" + echo " -k allow for insecure connections when downloading data" + echo " -l location of log file (place this with the dump files)" + echo " -m site id, default is 99 (VM)" + echo " -o owner of the database, default is bety" + echo " -p additional psql command line options, default is empty" + echo " -q should the import be quiet" + echo " -r remote site id, default is 0 (EBI)" + echo " -t keep temp folder, default is NO" + echo " -u create carya users, this will create some default users" + echo " -w use url to fetch data from instead of hardcoded url" + exit 0 + ;; + k) + CURL_OPTS="${CURL_OPTS} --insecure" + ;; + l) + LOG="$OPTARG" + ;; + m) + MYSITE="$OPTARG" + ;; + o) + OWNER="$OPTARG" + ;; + p) + PG_OPT="$OPTARG" + ;; + q) + QUIET="YES" + ;; + r) + REMOTESITE="$OPTARG" + ;; + t) + KEEPTMP="YES" + ;; + u) + USERS="YES" + ;; + w) + DUMPURL="$OPTARG" + ;; + esac +done + +# simple sanity check +if [ "${CREATE}" == "YES" -a "${OWNER}" == "" ]; then + echo "Can not create database without owner" + exit 1 +fi +if [ "${MYSITE}" == "${REMOTESITE}" ]; then + echo "Can not have same remotesite as mysite" + exit 1 +fi +if [ "${CREATE}" == "YES" ]; then + FIXSEQUENCE="YES" +fi + +# add right flag to PG_USER +if [ "$PG_USER" != "" ]; then + PG_USER="-U ${PG_USER}" +fi +if [ "$OWNER" != "" ]; then + PG_OWNER="-U ${OWNER}" +fi + +# this seems to be a good option always +PG_OPT="${PG_OPT} -v ON_ERROR_ROLLBACK=on" + +# list of all tables, schema_migrations is ignored since that +# will be imported during creaton + +# list of tables that are part of an empty setup +EMPTY_TABLES="formats machines mimetypes users" + +# list of all tables, schema_migrations is ignored since that +# will be imported during creaton. Order is semi important. +CLEAN_TABLES="attributes benchmark_sets benchmarks" +CLEAN_TABLES="${CLEAN_TABLES} citations covariates cultivars" +CLEAN_TABLES="${CLEAN_TABLES} ensembles entities experiments inputs" +CLEAN_TABLES="${CLEAN_TABLES} likelihoods managements metrics" +CLEAN_TABLES="${CLEAN_TABLES} methods models modeltypes" +CLEAN_TABLES="${CLEAN_TABLES} pfts posteriors priors reference_runs" +CLEAN_TABLES="${CLEAN_TABLES} runs sites species treatments" +CLEAN_TABLES="${CLEAN_TABLES} variables workflows" +CLEAN_TABLES="${CLEAN_TABLES} projects sitegroups" +CLEAN_TABLES="${CLEAN_TABLES} dbfiles" + +# tables that have checks that need to be looked at. +CHECK_TABLES="traits yields" + +# tables that have many to many relationships +MANY_TABLES="benchmarks_benchmarks_reference_runs benchmarks_ensembles" +MANY_TABLES="${MANY_TABLES} benchmarks_ensembles_scores benchmarks_metrics benchmark_sets_benchmark_reference_runs" +MANY_TABLES="${MANY_TABLES} citations_sites citations_treatments" +MANY_TABLES="${MANY_TABLES} cultivars_pfts current_posteriors" +MANY_TABLES="${MANY_TABLES} experiments_sites experiments_treatments" +MANY_TABLES="${MANY_TABLES} formats_variables inputs_runs" +MANY_TABLES="${MANY_TABLES} managements_treatments modeltypes_formats" +MANY_TABLES="${MANY_TABLES} pfts_priors pfts_species" +MANY_TABLES="${MANY_TABLES} posterior_samples posteriors_ensembles" +MANY_TABLES="${MANY_TABLES} sitegroups_sites sites_cultivars trait_covariate_associations" + +# tables that should NOT be dumped +IGNORE_TABLES="sessions" +SYSTEM_TABLES="schema_migrations spatial_ref_sys" + +# list where to download data from. This data should come +# from the database. Same as mysite which should come from +# the database as well. +if [ -z "${DUMPURL}" ]; then + if [ "${REMOTESITE}" == "0" ]; then + DUMPURL="https://ebi-forecast.igb.illinois.edu/pecan/dump/bety.tar.gz" + elif [ "${REMOTESITE}" == "1" ]; then + DUMPURL="http://psql-pecan.bu.edu/sync/dump/bety.tar.gz" + elif [ "${REMOTESITE}" == "2" ]; then + DUMPURL="https://modex.bnl.gov/sync/dump/bety.tar.gz" + elif [ "${REMOTESITE}" == "5" ]; then + DUMPURL="http://tree.aos.wisc.edu:6480/sync/dump/bety.tar.gz" + elif [ "${REMOTESITE}" == "6" ]; then + DUMPURL="https://terraref.ncsa.illinois.edu/bety/dump/bety6/bety.tar.gz" + else + echo "Don't know where to get data for site ${REMOTESITE}" + DUMPURL="" + fi +fi + +# this value should be constant, do not change +ID_RANGE=1000000000 + +# before anything is done, check to make sure database exists +if ! psql ${PG_OPT} ${PG_USER} -lqt | cut -d \| -f 1 | grep -w "^ *${DATABASE} *$" > /dev/null ; then + echo "Database ${DATABASE} does not exist, please create it:" + echo "(see https://pecan.gitbooks.io/betydb-documentation/content/installing_betydb.html)" + echo " psql ${PG_OPT} ${PG_USER} -c \"CREATE ROLE ${OWNER} WITH LOGIN CREATEDB NOSUPERUSER NOCREATEROLE PASSWORD 'password'\"" + echo " psql ${PG_OPT} ${PG_USER} -c \"CREATE DATABASE ${DATABASE} WITH OWNER ${OWNER}\"" + exit 1 +fi + +# make output folder +DUMPDIR="/tmp/$$" +mkdir "${DUMPDIR}" + +# download dump file and unpack +if [ "${DUMPURL}" != "" ]; then + curl ${CURL_OPTS} -s -L -o "${DUMPDIR}/dump.tar.gz" "${DUMPURL}" + if [ ! -s ${DUMPDIR}/dump.tar.gz ]; then + echo "File downloaded is 0 bytes" + exit 1 + else + tar zxf "${DUMPDIR}/dump.tar.gz" -C "${DUMPDIR}" -m + fi +fi + +# create database if need be, otherwise check version of schema +if [ "${DUMPURL}" != "" ]; then + if [ "${CREATE}" == "YES" ]; then + if [ "${QUIET}" != "YES" ]; then + printf "Loading %-25s : " "schema" + fi + + # create empty public schema + psql ${PG_OPT} ${PG_USER} -q -d "${DATABASE}" -c "DROP SCHEMA public CASCADE;" + psql ${PG_OPT} ${PG_USER} -q -d "${DATABASE}" -c "CREATE SCHEMA public AUTHORIZATION ${OWNER};" + psql ${PG_OPT} ${PG_USER} -q -d "${DATABASE}" -c "CREATE EXTENSION postgis;" + psql ${PG_OPT} ${PG_USER} -q -d "${DATABASE}" -c "GRANT ALL ON ALL TABLES IN SCHEMA public TO ${OWNER};" + + # load the schema + psql ${PG_OPT} -U ${OWNER} -q -d "${DATABASE}" < "${DUMPDIR}"/*.schema + if [ "${QUIET}" != "YES" ]; then + echo "CREATED SCHEMA" + fi + + if [ "${QUIET}" != "YES" ]; then + printf "Loading %-25s : " "schema_migrations" + fi + ADD=$( psql ${PG_OPT} ${PG_OWNER} -t -q -d "${DATABASE}" -c "\COPY schema_migrations FROM '${DUMPDIR}/schema_migrations.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8'); SELECT COUNT(*) FROM schema_migrations;" | tr -d ' ' ) + if [ "${QUIET}" != "YES" ]; then + echo "ADDED ${ADD}" + fi + else + if [ "${QUIET}" != "YES" ]; then + printf "Checking %-25s : " "schema" + fi + + # find current schema version + VERSION=$( psql ${PG_OPT} ${PG_OWNER} -t -q -d "${DATABASE}" -c 'SELECT md5(array_agg(version)::text) FROM (SELECT version FROM schema_migrations ORDER BY version) as v;' | tr -d ' ' ) + + if [ ! -e "${DUMPDIR}/${VERSION}.schema" ]; then + echo "EXPECTED SCHEMA version ${VERSION}" + echo "Dump is from a different schema, please fix schema in database." + if [ "$KEEPTMP" == "YES" ]; then + echo "Files are in ${DUMPDIR}" + else + rm -rf "${DUMPDIR}" + fi + if [ -e ${LOG} ]; then + echo `date -u` $REMOTESITE 1 >> $LOG + fi + exit 1 + fi + + if [ "${QUIET}" != "YES" ]; then + echo "MATCHED SCHEMA version ${VERSION}" + fi + fi +fi + +# compute range based on {MY,REMOTE}SITE +MY_START_ID=$(( MYSITE * ID_RANGE + 1 )) +MY_LAST_ID=$(( MY_START_ID + ID_RANGE - 1 )) +REM_START_ID=$(( REMOTESITE * ID_RANGE + 1 )) +REM_LAST_ID=$(( REM_START_ID + ID_RANGE - 1 )) + +# common statement pieces used +REM_WHERE="WHERE (id >= ${REM_START_ID} AND id <= ${REM_LAST_ID})" +MY_WHERE="WHERE (id >= ${MY_START_ID} AND id <= ${MY_LAST_ID})" + +# disable all triggers +for T in ${EMPTY_TABLES} ${CLEAN_TABLES} ${MANY_TABLES}; do + psql ${PG_OPT} ${PG_USER} -q -d "${DATABASE}" -c "ALTER TABLE ${T} DISABLE TRIGGER ALL;" +done + +# create psql process that will be used for all code +PSQL_PIPE_INP=/tmp/psql_inp.$$ +PSQL_PIPE_OUT=/tmp/psql_out.$$ +mkfifo -m 600 $PSQL_PIPE_INP +mkfifo -m 600 $PSQL_PIPE_OUT +psql ${PG_OPT} ${PG_USER} -q --no-align --no-readline --tuples-only -P footer=off -d ${DATABASE} <$PSQL_PIPE_INP >$PSQL_PIPE_OUT & +exec 3>$PSQL_PIPE_INP +exec 4<$PSQL_PIPE_OUT +PSQL_PID=$! +if [ "${QUIET}" != "YES" ]; then + echo "Started psql (pid=$PSQL_PID)" +fi + +# capture EXIT so we can rollback if needed, as well as cleanup +trap ' + if ps -p $PSQL_PID > /dev/null ; then + echo "Process killed, no changes are made to the database." + echo "ROLLBACK;" >&3 + kill $PSQL_PID + cat <&4 + if [ -e ${LOG} ]; then + echo `date -u` $REMOTESITE 2 >> $LOG + fi + fi + rm -f $PSQL_PIPE_INP $PSQL_PIPE_OUT +' EXIT + +# start transaction + +# for all tables +# 1) disable constraints on this table +# 2) remove all rows that have id in range of remote site +# 3) load new data +# 4) set last inserted item in my range +# 5) enable constraints on this table +for T in ${EMPTY_TABLES} ${CLEAN_TABLES} ${CHECK_TABLES} ${MANY_TABLES}; do + # start + echo "BEGIN;" >&3 + echo "ALTER TABLE ${T} DISABLE TRIGGER ALL;" >&3 + + if [ "${DUMPURL}" != "" ]; then + echo "SELECT count(*) FROM ${T} ${REM_WHERE};" >&3 && read DEL <&4 + # TODO what is last index in range we are adding, this will give a better + # indication if rows are added. + echo "DELETE FROM ${T} ${REM_WHERE};" >&3 + echo "SELECT COUNT(*) FROM ${T};" >&3 && read START <&4 + if [[ "${EMPTY}" == "NO" || ${EMPTY_TABLES} == *"$T"* ]]; then + if [ -f "${DUMPDIR}/${T}.csv" ]; then + echo "\COPY ${T} FROM '${DUMPDIR}/${T}.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')" >&3 + fi + fi + echo "SELECT COUNT(*) FROM ${T};" >&3 && read END <&4 + ADD=$(( END - START )) + DIFF=$(( ADD - DEL )) + if [ "${QUIET}" != "YES" ]; then + if [ "$DEL" != "0" -o "$ADD" != "0" ]; then + if [ "$DIFF" != "0" ]; then + printf "Updated %-25s : %11d (%+d)\n" "${T}" ${ADD} ${DIFF} + else + printf "Updated %-25s : %11d\n" "${T}" ${ADD} + fi + fi + fi + fi + + # fix sequence number + if [ "${FIXSEQUENCE}" == "YES" ]; then + echo "SELECT last_value from ${T}_id_seq;" >&3 && read OLD <&4 + echo "SELECT setval('${T}_id_seq', ${MY_START_ID}, false);" >&3 && read IGN <&4 + echo "SELECT setval('${T}_id_seq', (SELECT MAX(id) FROM ${T} ${MY_WHERE}), true);" >&3 && read IGN <&4 + echo "SELECT last_value from ${T}_id_seq;" >&3 && read NEXT <&4 + if [ "${QUIET}" != "YES" ]; then + if [ "$OLD" != "$NEXT" ]; then + printf "Fixed %-25s : %s\n" "${T}" "${NEXT}" + fi + fi + fi + + # finish off + echo "ALTER TABLE ${T} ENABLE TRIGGER ALL;" >&3 + echo "END;" >&3 +done + +# fix sequence numbers if needed +if [ "${FIXSEQUENCE}" == "YES" ]; then + for T in ${IGNORE_TABLES}; do + echo "SELECT last_value from ${T}_id_seq;" >&3 && read OLD <&4 + echo "SELECT setval('${T}_id_seq', ${MY_START_ID}, false);" >&3 && read IGN <&4 + echo "SELECT setval('${T}_id_seq', (SELECT MAX(id) FROM ${T} ${MY_WHERE}), true);" >&3 && read IGN <&4 + echo "SELECT last_value from ${T}_id_seq;" >&3 && read NEXT <&4 + if [ "${QUIET}" != "YES" ]; then + if [ "$OLD" != "$NEXT" ]; then + printf "Fixed %-25s : %s\n" "${T}" "${NEXT}" + fi + fi + done +fi + +# Add carya and other users if requested. +if [ "${USERS}" == "YES" ]; then + + # add carya user with admin rights + echo "SELECT count(id) FROM users WHERE login='carya';" >&3 && read RESULT <&4 + if [ ${RESULT} -eq 0 ]; then + echo "SELECT nextval('users_id_seq');" >&3 && read ID <&4 + echo "INSERT INTO users (login, name, email, crypted_password, salt, city, state_prov, postal_code, country, area, access_level, page_access_level, created_at, updated_at, apikey, remember_token, remember_token_expires_at) VALUES ('carya', 'carya', 'betydb+${ID}@gmail.com', 'df8428063fb28d75841d719e3447c3f416860bb7', 'carya', 'Urbana', 'IL', '61801', 'USA', '', 1, 1, NOW(), NOW(), NULL, NULL, NULL);" >&3 + if [ "${QUIET}" != "YES" ]; then + echo "Added carya with admin privileges with id=${ID}" + fi + fi + + # add other users with specific rights + for f in 1 2 3 4; do + for g in 1 2 3 4; do + echo "SELECT count(id) FROM users WHERE login='carya${f}${g}';" >&3 && read RESULT <&4 + if [ ${RESULT} -eq 0 ]; then + echo "SELECT nextval('users_id_seq');" >&3 && read ID <&4 + echo "INSERT INTO users (login, name, email, crypted_password, salt, city, state_prov, postal_code, country, area, access_level, page_access_level, created_at, updated_at, apikey, remember_token, remember_token_expires_at) VALUES ('carya${f}${g}', 'carya${f}${g}', 'betydb+${ID}@gmail.com', 'df8428063fb28d75841d719e3447c3f416860bb7', 'carya', 'Urbana', 'IL', '61801', 'USA', '', $f, $g, NOW(), NOW(), NULL, NULL, NULL);" >&3 + if [ "${QUIET}" != "YES" ]; then + echo "Added carya$f$g with access_level=$f and page_access_level=$g with id=${ID}" + fi + fi + done + done +fi + +# Add guest user +if [ "${GUESTUSER}" == "YES" ]; then + # add guest user + echo "SELECT count(id) FROM users WHERE login='guestuser';" >&3 && read RESULT <&4 + if [ ${RESULT} -eq 0 ]; then + echo "SELECT nextval('users_id_seq');" >&3 && read ID <&4 + echo "INSERT INTO users (login, name, email, crypted_password, salt, city, state_prov, postal_code, country, area, access_level, page_access_level, created_at, updated_at, apikey, remember_token, remember_token_expires_at) VALUES ('guestuser', 'guestuser', 'betydb+${ID}@gmail.com', '994363a949b6486fc7ea54bf40335127f5413318', 'bety', 'Urbana', 'IL', '61801', 'USA', '', 4, 4, NOW(), NOW(), NULL, NULL, NULL);" >&3 + if [ "${QUIET}" != "YES" ]; then + echo "Added guestuser with access_level=4 and page_access_level=4 with id=${ID}" + fi + fi +fi + +# close transaction +if [ -e ${LOG} ]; then + echo `date -u` $REMOTESITE 0 >> $LOG +fi +echo "\quit" >&3 +wait $PSQL_PID + +# all done, cleanup +rm -rf "${DUMPDIR}" From 64f26d18800d662220a9fa49b986dc096225cc46 Mon Sep 17 00:00:00 2001 From: Rob Kooper Date: Fri, 30 Nov 2018 15:31:04 -0600 Subject: [PATCH 4/7] remove greatest --- db/migrate/20181129000515_create_attributes_table.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/migrate/20181129000515_create_attributes_table.rb b/db/migrate/20181129000515_create_attributes_table.rb index e3493306a..373e2c3b4 100644 --- a/db/migrate/20181129000515_create_attributes_table.rb +++ b/db/migrate/20181129000515_create_attributes_table.rb @@ -15,7 +15,7 @@ def change reversible do |dir| dir.up do execute %{ - SELECT setval('attributes_id_seq', GREATEST(1, 1 + CAST(1e9 * #{this_hostid}::int AS bigint)), FALSE); + SELECT setval('attributes_id_seq', 1 + CAST(1e9 * #{this_hostid}::int AS bigint), FALSE); ALTER TABLE "attributes" ALTER COLUMN created_at SET DEFAULT utc_now(), ALTER COLUMN updated_at SET DEFAULT utc_now(), From 1fed2be09a11ac081cdc559d1e65148104bfe82a Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Thu, 13 Dec 2018 21:37:07 -0700 Subject: [PATCH 5/7] Update load.bety.sh --- script/load.bety.sh | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/script/load.bety.sh b/script/load.bety.sh index e0ce757e8..289ad7f5d 100755 --- a/script/load.bety.sh +++ b/script/load.bety.sh @@ -26,18 +26,19 @@ PG_OPT=${PG_OPT:-""} # These ID's need to be unique for the sharing to work. If you want # to share your data, send email to kooper@illinois.edu to claim # your ID range. The master list is maintained at -# https://github.com/PecanProject/bety/wiki/Distributed-BETYdb +# https://github.com/PecanProject/betydb-documentation/blob/master/betydb-system-administration/distributed_betydb.md # -# 0 - EBI - David LeBauer -# 1 - BU - Mike Dietze -# 2 - Brookhaven - Shawn Serbin -# 3 - Purdue - Jeanne Osnas -# 4 - Virginia Tech - Quinn Thomas -# 5 - Wisconsin - Ankur Desai -# 6 - TERRA REF - David LeBauer -# 7 - TERRA test - David LeBauer -# 8 - TERRA MEPP - David LeBauer -# 9 - TERRA TAMU - TBD +# 0 - EBI - David Slater +# 1 - BU - Mike Dietze +# 2 - Brookhaven - Shawn Serbin +# 3 - Purdue - Jeanne Osnas +# 4 - Virginia Tech - Quinn Thomas +# 5 - Wisconsin - Ankur Desai +# 6 - TERRA REF - David LeBauer +# 7 - TERRA test - David LeBauer +# 8 - TERRA MEPP - David LeBauer +# 9 - University of Arizona - David LeBauer +# 10 - Ghent - # 99 - VM MYSITE=${MYSITE:-99} REMOTESITE=${REMOTESITE:-0} From 033f381314c6701491dfa3eef7b2b2acf4a07552 Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Thu, 13 Dec 2018 21:41:14 -0700 Subject: [PATCH 6/7] Update load.bety.sh --- script/load.bety.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/load.bety.sh b/script/load.bety.sh index 289ad7f5d..42b04e987 100755 --- a/script/load.bety.sh +++ b/script/load.bety.sh @@ -36,7 +36,7 @@ PG_OPT=${PG_OPT:-""} # 5 - Wisconsin - Ankur Desai # 6 - TERRA REF - David LeBauer # 7 - TERRA test - David LeBauer -# 8 - TERRA MEPP - David LeBauer +# 8 - TERRA MEPP - David Slater # 9 - University of Arizona - David LeBauer # 10 - Ghent - # 99 - VM From 699d79c67a9b0288e900eb4913d210e2460844f1 Mon Sep 17 00:00:00 2001 From: Rob Kooper Date: Mon, 11 Mar 2019 10:18:44 -0500 Subject: [PATCH 7/7] release 5.2 - update CHANGELOG - add release.sh - small fix to allow to set initialize url in docker --- CHANGELOG.md | 3 ++- Dockerfile | 2 ++ docker/entrypoint.sh | 2 +- release.sh | 22 ++++++++++++++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) create mode 100755 release.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c1b1326d..f3e55ecbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,10 +5,11 @@ section for the next release. For more information about this file see also [Keep a Changelog](http://keepachangelog.com/) . -## [Unreleased] +## [5.2.0] - 2019-03-11 ### Added +- Ability to set the initialize URL in docker - #505 : Added a new attributes table. This table allows to store through polymorphism any additional information with any row in any table. The value stored is a json document. - #597 : Moved `dump.bety.sh` and `load.bety.sh` from PEcAn to BETY. diff --git a/Dockerfile b/Dockerfile index 964ebf265..f9aed8faa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,6 +41,8 @@ ARG BETY_GIT_DATE="unknown" # environment variables used ENV LOCAL_SERVER=99 \ REMOTE_SERVERS="0 1 2 5" \ + INITIALIZE_FLAGS="-g -u" \ + INITIALIZE_URL="-w https://ebi-forecast.igb.illinois.edu/pecan/dump/all/bety.tar.gz" \ RAILS_ENV="production" \ RAILS_RELATIVE_URL_ROOT="" \ SECRET_KEY_BASE="ThisIsNotReallySuchAGreatSecret" \ diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 3c598558b..e6d7b315d 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -6,7 +6,7 @@ case $1 in echo "Create new database, initialized from all data." psql -h postgres -p 5432 -U postgres -c "CREATE ROLE bety WITH LOGIN CREATEDB NOSUPERUSER NOCREATEROLE PASSWORD 'bety'" psql -h postgres -p 5432 -U postgres -c "CREATE DATABASE bety WITH OWNER bety" - ./script/load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -c -u -g -m ${LOCAL_SERVER} -r 0 -w https://ebi-forecast.igb.illinois.edu/pecan/dump/all/bety.tar.gz + ./script/load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -c ${INITIALIZE_FLAGS} -m ${LOCAL_SERVER} -r 0 ${INITIALIZE_URL} ;; "sync" ) echo "Synchronize with servers ${REMOTE_SERVERS}" diff --git a/release.sh b/release.sh new file mode 100755 index 000000000..28b16ee33 --- /dev/null +++ b/release.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +VERSION="5.2.0" +#DEBUG=echo + +TAGS="" +TMPVERSION="${VERSION}" +OLDVERSION="" +while [ "$OLDVERSION" != "$TMPVERSION" ]; do + TAGS="${TAGS} ${TMPVERSION}" + OLDVERSION="${TMPVERSION}" + TMPVERSION=$(echo ${OLDVERSION} | sed 's/\.[0-9]*$//') +done + +${DEBUG} docker pull pecan/bety:${VERSION} + +for x in ${TAGS}; do + if [ "$x" == "$VERSION" ]; then continue; fi + + ${DEBUG} docker tag pecan/bety:${VERSION} pecan/bety:$x + ${DEBUG} docker push pecan/bety:$x +done