From e139800823aa2205c32162be5fbe5d966884f3c8 Mon Sep 17 00:00:00 2001 From: AB019TC Date: Wed, 22 Mar 2023 15:34:09 +0200 Subject: [PATCH] Added DateTime configs for `run_enceladus` --- scripts/bash/enceladus_env.template.sh | 3 +++ scripts/bash/run_enceladus.sh | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/scripts/bash/enceladus_env.template.sh b/scripts/bash/enceladus_env.template.sh index 4582fe506..4a8ca6c6a 100644 --- a/scripts/bash/enceladus_env.template.sh +++ b/scripts/bash/enceladus_env.template.sh @@ -65,6 +65,9 @@ CONF_DEFAULT_DRA_MIN_EXECUTORS=0 CONF_DEFAULT_DRA_ALLOCATION_RATIO=0.5 CONF_DEFAULT_ADAPTIVE_TARGET_POSTSHUFFLE_INPUT_SIZE=134217728 +DEFAULT_PARQUET_DATETIME_READ_MODE="CORRECTED" +DEFAULT_PARQUET_DATETIME_WRITE_MODE="CORRECTED" + DEFAULT_DEPLOY_MODE="client" LOG_DIR="/tmp" diff --git a/scripts/bash/run_enceladus.sh b/scripts/bash/run_enceladus.sh index f14b02795..d2903eaca 100644 --- a/scripts/bash/run_enceladus.sh +++ b/scripts/bash/run_enceladus.sh @@ -40,6 +40,8 @@ DRA_MIN_EXECUTORS="$DEFAULT_DRA_MIN_EXECUTORS" DRA_MAX_EXECUTORS="$DEFAULT_DRA_MAX_EXECUTORS" DRA_ALLOCATION_RATIO="$DEFAULT_DRA_ALLOCATION_RATIO" ADAPTIVE_TARGET_POSTSHUFFLE_INPUT_SIZE="$DEFAULT_ADAPTIVE_TARGET_POSTSHUFFLE_INPUT_SIZE" +PARQUET_DATETIME_WRITE_MODE="$DEFAULT_PARQUET_DATETIME_WRITE_MODE" +PARQUET_DATETIME_READ_MODE="$DEFAULT_PARQUET_DATETIME_READ_MODE" # Command like default for the job JAR=${SPARK_JOBS_JAR_OVERRIDE:-$SPARK_JOBS_JAR} @@ -118,6 +120,14 @@ case $key in DRA_EXECUTOR_MEMORY="$2" shift 2 # past argument and value ;; + --parquet-datetime-read-mode) + PARQUET_DATETIME_READ_MODE="$2" + shift 2 # past argument and value + ;; + --parquet-datetime-write-mode) + PARQUET_DATETIME_WRITE_MODE="$2" + shift 2 # past argument and value + ;; --master) MASTER="$2" shift 2 # past argument and value @@ -477,6 +487,11 @@ else add_to_cmd_line "--executor-cores" "${EXECUTOR_CORES}" fi +add_spark_conf_cmd "spark.sql.parquet.datetimeRebaseModeInRead" "${PARQUET_DATETIME_READ_MODE}" +add_spark_conf_cmd "spark.sql.parquet.datetimeRebaseModeInWrite" "${PARQUET_DATETIME_WRITE_MODE}" +add_spark_conf_cmd "spark.sql.parquet.int96RebaseModeInRead" "${PARQUET_DATETIME_READ_MODE}" +add_spark_conf_cmd "spark.sql.parquet.int96RebaseModeInWrite" "${PARQUET_DATETIME_WRITE_MODE}" + JVM_CONF="spark.driver.extraJavaOptions=-Dstandardized.hdfs.path=$STD_HDFS_PATH \ -Dspline.mongodb.url=$SPLINE_MONGODB_URL -Dspline.mongodb.name=$SPLINE_MONGODB_NAME -Dhdp.version=$HDP_VERSION \ $MT_PATTERN $MIN_PARTITION_SIZE $MAX_PARTITION_SIZE"