Skip to content

Commit

Permalink
Merge pull request #642 from PecanProject/release/v5.2.0
Browse files Browse the repository at this point in the history
Release/v5.2.0
  • Loading branch information
robkooper authored Mar 14, 2019
2 parents 822d5ad + 699d79c commit 31de64a
Show file tree
Hide file tree
Showing 8 changed files with 804 additions and 9 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ public/maps/*
config/initializers/site_keys.rb
.Rproj.user
public/assets
script/load.bety.sh

# Ignore encrypted secrets key file.
config/secrets.yml.key
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ section for the next release.

For more information about this file see also [Keep a Changelog](http://keepachangelog.com/) .

## [5.2.0] - 2019-03-11

### Added

- Ability to set the initialize URL in docker
- #505 : Added a new attributes table. This table allows to store through polymorphism any additional information with any row in any table. The value stored is a json document.
- #597 : Moved `dump.bety.sh` and `load.bety.sh` from PEcAn to BETY.

## [5.1.0] - 2019-01-14

### Fixes
Expand Down
9 changes: 4 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,8 @@ COPY / /home/bety
COPY /docker/database.yml /home/bety/config/database.yml
COPY /docker/config.ru /home/bety/config.ru

# download dump.bety and load.bety scripts and configure app
RUN curl -LOs https://raw.githubusercontent.com/PecanProject/pecan/master/scripts/load.bety.sh \
&& curl -LOs https://raw.githubusercontent.com/PecanProject/pecan/master/scripts/dump.bety.sh \
&& chmod +x load.bety.sh dump.bety.sh \
&& /bin/sed -e '/serve_static_assets/ s/false$/true/' -i config/environments/production.rb
# configure app
RUN /bin/sed -e '/serve_static_assets/ s/false$/true/' -i config/environments/production.rb

# arguments that are added at the bottom of BETY
ARG BETY_GIT_TAGS="unknown"
Expand All @@ -44,6 +41,8 @@ ARG BETY_GIT_DATE="unknown"
# environment variables used
ENV LOCAL_SERVER=99 \
REMOTE_SERVERS="0 1 2 5" \
INITIALIZE_FLAGS="-g -u" \
INITIALIZE_URL="-w https://ebi-forecast.igb.illinois.edu/pecan/dump/all/bety.tar.gz" \
RAILS_ENV="production" \
RAILS_RELATIVE_URL_ROOT="" \
SECRET_KEY_BASE="ThisIsNotReallySuchAGreatSecret" \
Expand Down
27 changes: 27 additions & 0 deletions db/migrate/20181129000515_create_attributes_table.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
class CreateAttributesTable < ActiveRecord::Migration[5.1]
def change
this_hostid = Machine.new.hostid

create_table :attributes, id: :bigint do |t|
t.string :container_type, null: false
t.integer :container_id, limit: 8, null: false
t.jsonb :value, null: false, default: '{}'
t.timestamps
end

add_index :attributes, :container_id
add_index :attributes, :value, using: :gin

reversible do |dir|
dir.up do
execute %{
SELECT setval('attributes_id_seq', 1 + CAST(1e9 * #{this_hostid}::int AS bigint), FALSE);
ALTER TABLE "attributes"
ALTER COLUMN created_at SET DEFAULT utc_now(),
ALTER COLUMN updated_at SET DEFAULT utc_now(),
ADD CONSTRAINT container_type_id UNIQUE(container_type, container_id);
}
end
end
end
end
6 changes: 3 additions & 3 deletions docker/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@ case $1 in
echo "Create new database, initialized from all data."
psql -h postgres -p 5432 -U postgres -c "CREATE ROLE bety WITH LOGIN CREATEDB NOSUPERUSER NOCREATEROLE PASSWORD 'bety'"
psql -h postgres -p 5432 -U postgres -c "CREATE DATABASE bety WITH OWNER bety"
./load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -c -u -g -m ${LOCAL_SERVER} -r 0 -w https://ebi-forecast.igb.illinois.edu/pecan/dump/all/bety.tar.gz
./script/load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -c ${INITIALIZE_FLAGS} -m ${LOCAL_SERVER} -r 0 ${INITIALIZE_URL}
;;
"sync" )
echo "Synchronize with servers ${REMOTE_SERVERS}"
for r in ${REMOTE_SERVERS}; do
echo "Synchronizing with server ${r}"
./load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -r ${r}
./script/load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -r ${r}
done
;;
"dump" )
echo "Dump data from server ${LOCAL_SERVER}"
./dump.bety.sh -d "bety" -p "-h postgres -p 5432 -U postgres" -m ${LOCAL_SERVER} -o dump
./script/dump.bety.sh -d "bety" -p "-h postgres -p 5432 -U postgres" -m ${LOCAL_SERVER} -o dump
;;
"migrate" )
echo "Migrate database."
Expand Down
22 changes: 22 additions & 0 deletions release.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

VERSION="5.2.0"
#DEBUG=echo

TAGS=""
TMPVERSION="${VERSION}"
OLDVERSION=""
while [ "$OLDVERSION" != "$TMPVERSION" ]; do
TAGS="${TAGS} ${TMPVERSION}"
OLDVERSION="${TMPVERSION}"
TMPVERSION=$(echo ${OLDVERSION} | sed 's/\.[0-9]*$//')
done

${DEBUG} docker pull pecan/bety:${VERSION}

for x in ${TAGS}; do
if [ "$x" == "$VERSION" ]; then continue; fi

${DEBUG} docker tag pecan/bety:${VERSION} pecan/bety:$x
${DEBUG} docker push pecan/bety:$x
done
250 changes: 250 additions & 0 deletions script/dump.bety.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
#!/bin/bash

# exit on error
set -e

# ----------------------------------------------------------------------
# START CONFIGURATION SECTION
# ----------------------------------------------------------------------

# name of the dabase to dump
# this script assumes the user running it has access to the database
DATABASE=${DATABASE:-"bety"}

# psql options
# this allows you to add the user to use as well as any other options
PG_OPT=${PG_OPT-"-U bety"}

# ID's used in database
# These ID's need to be unique for the sharing to work. If you want
# to share your data, send email to [email protected] to claim
# your ID range. The master list is maintained at
# https://github.com/PecanProject/bety/wiki/Distributed-BETYdb
#
# 0 - EBI - David LeBauer
# 1 - BU - Mike Dietze
# 2 - Brookhaven - Shawn Serbin
# 3 - Purdue - Jeanne Osnas
# 4 - Virginia Tech - Quinn Thomas
# 99 - VM
MYSITE=${MYSITE:-99}

# access level requirement
# 0 - private
# 4 - public
LEVEL=${LEVEL:-3}

# dump unchecked traits and yields
# set this to "YES" to dump all unchecked traits/yields as well
UNCHECKED=${UNCHECKED:-"NO"}

# keep users
# set this to YES to dump all user information, otherwise it will
# be anonymized
KEEPUSERS=${KEEPUSERS:-"NO"}

# location where to write the results, this will be a tar file
OUTPUT=${OUTPUT:-"$PWD/dump"}

# Should the process be quiet
QUIET=${QUIET:-"NO"}

# Should all the data be dumped
ALLDATA=${ALLDATA:-"NO"}

# ----------------------------------------------------------------------
# END CONFIGURATION SECTION
# ----------------------------------------------------------------------

# parse command line options
while getopts ad:hkl:m:o:p:qu opt; do
case $opt in
a)
ALLDATA="YES"
;;
d)
DATABASE=$OPTARG
;;
h)
echo "$0 [-a] [-d database] [-h] [-k] [-l 0,1,2,3,4] [-m my siteid] [-o folder] [-p psql options] [-u]"
echo " -a dump all records"
echo " -d database, default is bety"
echo " -h this help page"
echo " -k keep users, default is to be anonymized"
echo " -l level of data that can be dumped, default is 3"
echo " -m site id, default is 99 (VM)"
echo " -o output folder where dumped data is written, default is dump"
echo " -p additional psql command line options, default is -U bety"
echo " -q should the export be quiet"
echo " -u should unchecked data be dumped, default is NO"
exit 0
;;
k)
KEEPUSERS="YES"
;;
l)
LEVEL=$OPTARG
;;
m)
MYSITE=$OPTARG
;;
o)
OUTPUT=$OPTARG
;;
p)
PG_OPT=$OPTARG
;;
q)
QUIET="YES"
;;
u)
UNCHECKED="YES"
;;
esac
done

# Table that contains the users, this table will be anonymized
USER_TABLES="users"

# list of all tables, schema_migrations is ignored since that
# will be imported during creaton
CLEAN_TABLES="attributes benchmark_sets benchmarks"
CLEAN_TABLES="${CLEAN_TABLES} citations covariates cultivars dbfiles"
CLEAN_TABLES="${CLEAN_TABLES} ensembles entities experiments formats inputs"
CLEAN_TABLES="${CLEAN_TABLES} likelihoods machines managements metrics"
CLEAN_TABLES="${CLEAN_TABLES} methods mimetypes models modeltypes"
CLEAN_TABLES="${CLEAN_TABLES} pfts posteriors priors reference_runs"
CLEAN_TABLES="${CLEAN_TABLES} runs sites species treatments"
CLEAN_TABLES="${CLEAN_TABLES} variables workflows"
CLEAN_TABLES="${CLEAN_TABLES} projects sitegroups"

# tables that have checks that need to be looked at.
CHECK_TABLES="traits yields"

# tables that have many to many relationships
MANY_TABLES="benchmarks_benchmarks_reference_runs benchmarks_ensembles"
MANY_TABLES="${MANY_TABLES} benchmarks_ensembles_scores benchmarks_metrics benchmark_sets_benchmark_reference_runs"
MANY_TABLES="${MANY_TABLES} citations_sites citations_treatments"
MANY_TABLES="${MANY_TABLES} cultivars_pfts current_posteriors"
MANY_TABLES="${MANY_TABLES} experiments_sites experiments_treatments"
MANY_TABLES="${MANY_TABLES} formats_variables inputs_runs"
MANY_TABLES="${MANY_TABLES} managements_treatments modeltypes_formats"
MANY_TABLES="${MANY_TABLES} pfts_priors pfts_species"
MANY_TABLES="${MANY_TABLES} posterior_samples posteriors_ensembles"
MANY_TABLES="${MANY_TABLES} sitegroups_sites sites_cultivars trait_covariate_associations"

# tables that should NOT be dumped
IGNORE_TABLES="sessions"
SYSTEM_TABLES="schema_migrations spatial_ref_sys"

# be quiet if not interactive
if ! tty -s ; then
exec 1>/dev/null
fi

# this value should be constant, do not change
ID_RANGE=1000000000

# make output folder
mkdir -p "${OUTPUT}"
DUMPDIR="/tmp/$$"
mkdir -p "${DUMPDIR}"
chmod 777 "${DUMPDIR}"

# compute range based on MYSITE
if [ "${ALLDATA}" != "YES" ]; then
START_ID=$(( MYSITE * ID_RANGE + 1 ))
LAST_ID=$(( START_ID + ID_RANGE - 1 ))
if [ "${QUIET}" != "YES" ]; then
echo "Dumping all items that have id : [${START_ID} - ${LAST_ID}]"
fi
LIMIT="(id >= ${START_ID} AND id <= ${LAST_ID})"
else
LIMIT="TRUE"
if [ "${QUIET}" != "YES" ]; then
echo "Dumping all items that have id : ALL ITEMS"
fi
fi

# find current schema version
# following returns a triple:
# - number of migrations
# - largest migration
# - hash of all migrations
MIGRATIONS=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c 'SELECT COUNT(version) FROM schema_migrations' | tr -d ' ' )
VERSION=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c 'SELECT md5(array_agg(version)::text) FROM (SELECT version FROM schema_migrations ORDER BY version) as v;' | tr -d ' ' )
LATEST=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c 'SELECT version FROM schema_migrations ORDER BY version DESC LIMIT 1' | tr -d ' ' )
NOW=$( date -u +"%Y-%m-%dT%H:%M:%SZ" )
echo "${MIGRATIONS} ${VERSION} ${LATEST} ${NOW}" > "${OUTPUT}/version.txt"

# dump schema
if [ "${QUIET}" != "YES" ]; then
printf "Dumping %-25s : " "schema"
fi
pg_dump ${PG_OPT} -s "${DATABASE}" -O -x > "${DUMPDIR}/${VERSION}.schema"
if [ "${QUIET}" != "YES" ]; then
echo "DUMPED version ${VERSION} with ${MIGRATIONS}, latest migration is ${LATEST}"
fi

# dump ruby special table
if [ "${QUIET}" != "YES" ]; then
printf "Dumping %-25s : " "schema_migrations"
fi
ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM schema_migrations;" | tr -d ' ' )
psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY schema_migrations TO '${DUMPDIR}/schema_migrations.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')"
if [ "${QUIET}" != "YES" ]; then
echo "DUMPED ${ADD}"
fi

# skip following tables
# - inputs_runs (PEcAn, site specific)
# - posteriors_runs (PEcAn, site specific, is this used?)
# - runs (PEcAn, site specific)
# - workflows (PEcAn, site specific)

# dump users
if [ "${QUIET}" != "YES" ]; then
printf "Dumping %-25s : " "users"
fi
if [ "${KEEPUSERS}" == "YES" ]; then
psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT * FROM ${USER_TABLES} WHERE ${LIMIT}) TO '${DUMPDIR}/users.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')"
else
psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT id, CONCAT('user', id) AS login, CONCAT('user ' , id) AS name, CONCAT('betydb+', id, '@gmail.com') as email, 'Urbana' AS city, 'USA' AS country, '' AS area, '1234567890abcdef' AS crypted_password, 'BU' AS salt, NOW() AS created_at, NOW() AS updated_at, NULL as remember_token, NULL AS remember_token_expires_at, 3 AS access_level, 4 AS page_access_level, NULL AS apikey, 'IL' AS state_prov, '61801' AS postal_code FROM ${USER_TABLES} WHERE ${LIMIT}) TO '${DUMPDIR}/users.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')"
fi
ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM ${USER_TABLES} WHERE ${LIMIT};" | tr -d ' ' )
if [ "${QUIET}" != "YES" ]; then
echo "DUMPED ${ADD}"
fi

# unrestricted tables
for T in ${CLEAN_TABLES} ${MANY_TABLES}; do
if [ "${QUIET}" != "YES" ]; then
printf "Dumping %-25s : " "${T}"
fi
psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT * FROM ${T} WHERE ${LIMIT}) TO '${DUMPDIR}/${T}.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')"
ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM ${T} WHERE ${LIMIT}" | tr -d ' ' )
if [ "${QUIET}" != "YES" ]; then
echo "DUMPED ${ADD}"
fi
done

# restricted and unchecked tables
for T in ${CHECK_TABLES}; do
if [ "${QUIET}" != "YES" ]; then
printf "Dumping %-25s : " "${T}"
fi
if [ "${UNCHECKED}" == "YES" ]; then
UNCHECKED_QUERY=""
else
UNCHECKED_QUERY="AND checked != -1"
fi
psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT * FROM ${T} WHERE ${LIMIT} AND access_level >= ${LEVEL} ${UNCHECKED_QUERY}) TO '${DUMPDIR}/${T}.csv' WITH (DELIMITER ' ', NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8');"
ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM ${T} WHERE ${LIMIT}" | tr -d ' ' )
if [ "${QUIET}" != "YES" ]; then
echo "DUMPED ${ADD}"
fi
done

# all done dumping database
tar zcf "${OUTPUT}/bety.tar.gz" -C "${DUMPDIR}" .
rm -rf "${DUMPDIR}"
Loading

0 comments on commit 31de64a

Please sign in to comment.