Merge pull request #642 from PecanProject/release/v5.2.0

Release/v5.2.0
PecanProject · Mar 14, 2019 · 31de64a · 31de64a
2 parents 822d5ad + 699d79c
commit 31de64a
Show file tree

Hide file tree

Showing 8 changed files with 804 additions and 9 deletions.
diff --git a/.gitignore b/.gitignore
@@ -19,7 +19,6 @@ public/maps/*
 config/initializers/site_keys.rb
 .Rproj.user
 public/assets
-script/load.bety.sh
 
 # Ignore encrypted secrets key file.
 config/secrets.yml.key
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,14 @@ section for the next release.
 
 For more information about this file see also [Keep a Changelog](http://keepachangelog.com/) .
 
+## [5.2.0] - 2019-03-11
+
+### Added
+
+- Ability to set the initialize URL in docker
+- #505 : Added a new attributes table. This table allows to store through polymorphism any additional information with any row in any table. The value stored is a json document.
+- #597 : Moved `dump.bety.sh` and `load.bety.sh` from PEcAn to BETY.
+
 ## [5.1.0] - 2019-01-14
 
 ### Fixes

diff --git a/Dockerfile b/Dockerfile
@@ -29,11 +29,8 @@ COPY / /home/bety
 COPY /docker/database.yml /home/bety/config/database.yml
 COPY /docker/config.ru /home/bety/config.ru
 
-# download dump.bety and load.bety scripts and configure app
-RUN curl -LOs https://raw.githubusercontent.com/PecanProject/pecan/master/scripts/load.bety.sh \
-    && curl -LOs https://raw.githubusercontent.com/PecanProject/pecan/master/scripts/dump.bety.sh \
-    && chmod +x load.bety.sh dump.bety.sh \
-    && /bin/sed -e '/serve_static_assets/ s/false$/true/' -i config/environments/production.rb
+# configure app
+RUN /bin/sed -e '/serve_static_assets/ s/false$/true/' -i config/environments/production.rb
 
 # arguments that are added at the bottom of BETY
 ARG BETY_GIT_TAGS="unknown"
@@ -44,6 +41,8 @@ ARG BETY_GIT_DATE="unknown"
 # environment variables used
 ENV LOCAL_SERVER=99 \
     REMOTE_SERVERS="0 1 2 5" \
+    INITIALIZE_FLAGS="-g -u" \
+    INITIALIZE_URL="-w https://ebi-forecast.igb.illinois.edu/pecan/dump/all/bety.tar.gz" \
     RAILS_ENV="production" \
     RAILS_RELATIVE_URL_ROOT="" \
     SECRET_KEY_BASE="ThisIsNotReallySuchAGreatSecret" \

diff --git a/db/migrate/20181129000515_create_attributes_table.rb b/db/migrate/20181129000515_create_attributes_table.rb
@@ -0,0 +1,27 @@
+class CreateAttributesTable < ActiveRecord::Migration[5.1]
+  def change
+    this_hostid = Machine.new.hostid
+
+    create_table :attributes, id: :bigint do |t|
+      t.string :container_type, null: false
+      t.integer :container_id, limit: 8, null: false
+      t.jsonb :value, null: false, default: '{}'
+      t.timestamps
+    end
+
+    add_index :attributes, :container_id
+    add_index :attributes, :value, using: :gin
+
+    reversible do |dir|
+      dir.up do
+        execute %{
+          SELECT setval('attributes_id_seq', 1 + CAST(1e9 * #{this_hostid}::int AS bigint), FALSE);
+          ALTER TABLE "attributes"
+            ALTER COLUMN created_at SET DEFAULT utc_now(),
+            ALTER COLUMN updated_at SET DEFAULT utc_now(),
+            ADD CONSTRAINT container_type_id UNIQUE(container_type, container_id);
+        }
+      end
+    end  
+  end
+end
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
@@ -6,18 +6,18 @@ case $1 in
         echo "Create new database, initialized from all data."
         psql -h postgres -p 5432 -U postgres -c "CREATE ROLE bety WITH LOGIN CREATEDB NOSUPERUSER NOCREATEROLE PASSWORD 'bety'"
         psql -h postgres -p 5432 -U postgres -c "CREATE DATABASE bety WITH OWNER bety"
-        ./load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -c -u -g -m ${LOCAL_SERVER} -r 0 -w https://ebi-forecast.igb.illinois.edu/pecan/dump/all/bety.tar.gz
+        ./script/load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -c ${INITIALIZE_FLAGS} -m ${LOCAL_SERVER} -r 0 ${INITIALIZE_URL}
         ;;
     "sync" )
         echo "Synchronize with servers ${REMOTE_SERVERS}"
         for r in ${REMOTE_SERVERS}; do
             echo "Synchronizing with server ${r}"
-            ./load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -r ${r}
+            ./script/load.bety.sh -a "postgres" -d "bety" -p "-h postgres -p 5432" -o bety -r ${r}
         done
         ;;
     "dump" )
         echo "Dump data from server ${LOCAL_SERVER}"
-        ./dump.bety.sh -d "bety" -p "-h postgres -p 5432 -U postgres" -m ${LOCAL_SERVER} -o dump
+        ./script/dump.bety.sh -d "bety" -p "-h postgres -p 5432 -U postgres" -m ${LOCAL_SERVER} -o dump
         ;;
     "migrate" )
         echo "Migrate database."

diff --git a/release.sh b/release.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+VERSION="5.2.0"
+#DEBUG=echo
+
+TAGS=""
+TMPVERSION="${VERSION}"
+OLDVERSION=""
+while [ "$OLDVERSION" != "$TMPVERSION" ]; do
+    TAGS="${TAGS} ${TMPVERSION}"
+    OLDVERSION="${TMPVERSION}"
+    TMPVERSION=$(echo ${OLDVERSION} | sed 's/\.[0-9]*$//')
+done
+
+${DEBUG} docker pull pecan/bety:${VERSION}
+
+for x in ${TAGS}; do
+  if [ "$x" == "$VERSION" ]; then continue; fi
+
+  ${DEBUG} docker tag pecan/bety:${VERSION} pecan/bety:$x
+  ${DEBUG} docker push pecan/bety:$x
+done
diff --git a/script/dump.bety.sh b/script/dump.bety.sh
@@ -0,0 +1,250 @@
+#!/bin/bash
+
+# exit on error
+set -e
+
+# ----------------------------------------------------------------------
+# START CONFIGURATION SECTION
+# ----------------------------------------------------------------------
+
+# name of the dabase to dump
+# this script assumes the user running it has access to the database
+DATABASE=${DATABASE:-"bety"}
+
+# psql options
+# this allows you to add the user to use as well as any other options
+PG_OPT=${PG_OPT-"-U bety"}
+
+# ID's used in database
+# These ID's need to be unique for the sharing to work. If you want
+# to share your data, send email to [email protected] to claim
+# your ID range. The master list is maintained at
+# https://github.com/PecanProject/bety/wiki/Distributed-BETYdb
+#
+#  0 - EBI           - David LeBauer
+#  1 - BU            - Mike Dietze
+#  2 - Brookhaven    - Shawn Serbin
+#  3 - Purdue        - Jeanne Osnas
+#  4 - Virginia Tech - Quinn Thomas
+# 99 - VM
+MYSITE=${MYSITE:-99}
+
+# access level requirement
+# 0 - private
+# 4 - public
+LEVEL=${LEVEL:-3}
+
+# dump unchecked traits and yields
+# set this to "YES" to dump all unchecked traits/yields as well
+UNCHECKED=${UNCHECKED:-"NO"}
+
+# keep users
+# set this to YES to dump all user information, otherwise it will
+# be anonymized
+KEEPUSERS=${KEEPUSERS:-"NO"}
+
+# location where to write the results, this will be a tar file
+OUTPUT=${OUTPUT:-"$PWD/dump"}
+
+# Should the process be quiet
+QUIET=${QUIET:-"NO"}
+
+# Should all the data be dumped
+ALLDATA=${ALLDATA:-"NO"}
+
+# ----------------------------------------------------------------------
+# END CONFIGURATION SECTION
+# ----------------------------------------------------------------------
+
+# parse command line options
+while getopts ad:hkl:m:o:p:qu opt; do
+  case $opt in
+  a)
+    ALLDATA="YES"
+    ;;
+  d)
+    DATABASE=$OPTARG
+    ;;
+  h)
+    echo "$0 [-a] [-d database] [-h] [-k] [-l 0,1,2,3,4] [-m my siteid] [-o folder] [-p psql options] [-u]"
+    echo " -a dump all records"
+    echo " -d database, default is bety"
+    echo " -h this help page"
+    echo " -k keep users, default is to be anonymized"
+    echo " -l level of data that can be dumped, default is 3"
+    echo " -m site id, default is 99 (VM)"
+    echo " -o output folder where dumped data is written, default is dump"
+    echo " -p additional psql command line options, default is -U bety"
+    echo " -q should the export be quiet"
+    echo " -u should unchecked data be dumped, default is NO"
+    exit 0
+    ;;
+  k)
+    KEEPUSERS="YES"
+    ;;
+  l)
+    LEVEL=$OPTARG
+    ;;
+  m)
+    MYSITE=$OPTARG
+    ;;
+  o)
+    OUTPUT=$OPTARG
+    ;;
+  p)
+    PG_OPT=$OPTARG
+    ;;
+  q)
+    QUIET="YES"
+    ;;
+  u)
+    UNCHECKED="YES"
+    ;;
+  esac
+done
+
+# Table that contains the users, this table will be anonymized
+USER_TABLES="users"
+
+# list of all tables, schema_migrations is ignored since that
+# will be imported during creaton
+CLEAN_TABLES="attributes benchmark_sets benchmarks"
+CLEAN_TABLES="${CLEAN_TABLES} citations covariates cultivars dbfiles"
+CLEAN_TABLES="${CLEAN_TABLES} ensembles entities experiments formats inputs"
+CLEAN_TABLES="${CLEAN_TABLES} likelihoods machines managements metrics"
+CLEAN_TABLES="${CLEAN_TABLES} methods mimetypes models modeltypes"
+CLEAN_TABLES="${CLEAN_TABLES} pfts posteriors priors reference_runs"
+CLEAN_TABLES="${CLEAN_TABLES} runs sites species treatments"
+CLEAN_TABLES="${CLEAN_TABLES} variables workflows"
+CLEAN_TABLES="${CLEAN_TABLES} projects sitegroups"
+
+# tables that have checks that need to be looked at.
+CHECK_TABLES="traits yields"
+
+# tables that have many to many relationships
+MANY_TABLES="benchmarks_benchmarks_reference_runs benchmarks_ensembles"
+MANY_TABLES="${MANY_TABLES} benchmarks_ensembles_scores benchmarks_metrics benchmark_sets_benchmark_reference_runs"
+MANY_TABLES="${MANY_TABLES} citations_sites citations_treatments"
+MANY_TABLES="${MANY_TABLES} cultivars_pfts current_posteriors"
+MANY_TABLES="${MANY_TABLES} experiments_sites experiments_treatments"
+MANY_TABLES="${MANY_TABLES} formats_variables inputs_runs"
+MANY_TABLES="${MANY_TABLES} managements_treatments modeltypes_formats"
+MANY_TABLES="${MANY_TABLES} pfts_priors pfts_species"
+MANY_TABLES="${MANY_TABLES} posterior_samples posteriors_ensembles"
+MANY_TABLES="${MANY_TABLES} sitegroups_sites sites_cultivars trait_covariate_associations"
+
+# tables that should NOT be dumped
+IGNORE_TABLES="sessions"
+SYSTEM_TABLES="schema_migrations spatial_ref_sys"
+
+# be quiet if not interactive
+if ! tty -s ; then
+    exec 1>/dev/null
+fi
+
+# this value should be constant, do not change
+ID_RANGE=1000000000
+
+# make output folder
+mkdir -p "${OUTPUT}"
+DUMPDIR="/tmp/$$"
+mkdir -p "${DUMPDIR}"
+chmod 777 "${DUMPDIR}"
+
+# compute range based on MYSITE
+if [ "${ALLDATA}" != "YES" ]; then
+  START_ID=$(( MYSITE * ID_RANGE + 1 ))
+  LAST_ID=$(( START_ID + ID_RANGE - 1 ))
+  if [ "${QUIET}" != "YES" ]; then
+    echo "Dumping all items that have id    : [${START_ID} - ${LAST_ID}]"
+  fi
+  LIMIT="(id >= ${START_ID} AND id <= ${LAST_ID})"
+else
+  LIMIT="TRUE"
+  if [ "${QUIET}" != "YES" ]; then
+    echo "Dumping all items that have id    : ALL ITEMS"
+  fi
+fi
+
+# find current schema version
+# following returns a triple:
+# - number of migrations
+# - largest migration
+# - hash of all migrations
+MIGRATIONS=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c 'SELECT COUNT(version) FROM schema_migrations' | tr -d ' ' )
+VERSION=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c 'SELECT md5(array_agg(version)::text) FROM (SELECT version FROM schema_migrations ORDER BY version) as v;' | tr -d ' ' )
+LATEST=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c 'SELECT version FROM schema_migrations ORDER BY version DESC LIMIT 1' | tr -d ' ' )
+NOW=$( date -u +"%Y-%m-%dT%H:%M:%SZ" )
+echo "${MIGRATIONS}	${VERSION}	${LATEST}	${NOW}" > "${OUTPUT}/version.txt"
+
+# dump schema
+if [ "${QUIET}" != "YES" ]; then
+  printf "Dumping %-25s : " "schema"
+fi
+pg_dump ${PG_OPT} -s "${DATABASE}" -O -x > "${DUMPDIR}/${VERSION}.schema"
+if [ "${QUIET}" != "YES" ]; then
+  echo "DUMPED version ${VERSION} with ${MIGRATIONS}, latest migration is ${LATEST}"
+fi
+
+# dump ruby special table
+if [ "${QUIET}" != "YES" ]; then
+  printf "Dumping %-25s : " "schema_migrations"
+fi
+ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM schema_migrations;" | tr -d ' ' )
+psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY schema_migrations TO '${DUMPDIR}/schema_migrations.csv' WITH (DELIMITER '	',  NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')"
+if [ "${QUIET}" != "YES" ]; then
+  echo "DUMPED ${ADD}"
+fi
+
+# skip following tables
+# - inputs_runs (PEcAn, site specific)
+# - posteriors_runs (PEcAn, site specific, is this used?)
+# - runs (PEcAn, site specific)
+# - workflows (PEcAn, site specific)
+
+# dump users
+if [ "${QUIET}" != "YES" ]; then
+  printf "Dumping %-25s : " "users"
+fi
+if [ "${KEEPUSERS}" == "YES" ]; then
+    psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT * FROM ${USER_TABLES} WHERE ${LIMIT})  TO '${DUMPDIR}/users.csv' WITH (DELIMITER '	',  NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')"
+else
+    psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT id, CONCAT('user', id) AS login, CONCAT('user ' , id) AS name, CONCAT('betydb+', id, '@gmail.com') as email, 'Urbana' AS city,  'USA' AS country, '' AS area, '1234567890abcdef' AS crypted_password, 'BU' AS salt, NOW() AS created_at, NOW() AS updated_at, NULL as remember_token, NULL AS remember_token_expires_at, 3 AS access_level, 4 AS page_access_level, NULL AS apikey, 'IL' AS state_prov, '61801' AS postal_code FROM ${USER_TABLES} WHERE ${LIMIT}) TO '${DUMPDIR}/users.csv' WITH (DELIMITER '	',  NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')"
+fi
+ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM ${USER_TABLES} WHERE ${LIMIT};" | tr -d ' ' )
+if [ "${QUIET}" != "YES" ]; then
+  echo "DUMPED ${ADD}"
+fi
+
+# unrestricted tables
+for T in ${CLEAN_TABLES} ${MANY_TABLES}; do
+    if [ "${QUIET}" != "YES" ]; then
+      printf "Dumping %-25s : " "${T}"
+    fi
+    psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT * FROM ${T} WHERE ${LIMIT}) TO '${DUMPDIR}/${T}.csv' WITH (DELIMITER '	',  NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8')"
+    ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM ${T} WHERE ${LIMIT}" | tr -d ' ' )
+    if [ "${QUIET}" != "YES" ]; then
+      echo "DUMPED ${ADD}"
+    fi
+done
+
+# restricted and unchecked tables
+for T in ${CHECK_TABLES}; do
+    if [ "${QUIET}" != "YES" ]; then
+      printf "Dumping %-25s : " "${T}"
+    fi
+    if [ "${UNCHECKED}" == "YES" ]; then
+        UNCHECKED_QUERY=""
+    else
+        UNCHECKED_QUERY="AND checked != -1"
+    fi
+    psql ${PG_OPT} -t -q -d "${DATABASE}" -c "\COPY (SELECT * FROM ${T} WHERE ${LIMIT} AND access_level >= ${LEVEL} ${UNCHECKED_QUERY}) TO '${DUMPDIR}/${T}.csv' WITH (DELIMITER '	',  NULL '\\N', ESCAPE '\\', FORMAT CSV, ENCODING 'UTF-8');"
+    ADD=$( psql ${PG_OPT} -t -q -d "${DATABASE}" -c "SELECT count(*) FROM ${T} WHERE ${LIMIT}" | tr -d ' ' )
+    if [ "${QUIET}" != "YES" ]; then
+      echo "DUMPED ${ADD}"
+    fi
+done
+
+# all done dumping database
+tar zcf "${OUTPUT}/bety.tar.gz" -C "${DUMPDIR}" .
+rm -rf "${DUMPDIR}"