Skip to content

Commit

Permalink
Added DateTime configs for run_enceladus
Browse files Browse the repository at this point in the history
  • Loading branch information
TebaleloS committed Mar 22, 2023
1 parent 77bc82a commit e139800
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 0 deletions.
3 changes: 3 additions & 0 deletions scripts/bash/enceladus_env.template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ CONF_DEFAULT_DRA_MIN_EXECUTORS=0
CONF_DEFAULT_DRA_ALLOCATION_RATIO=0.5
CONF_DEFAULT_ADAPTIVE_TARGET_POSTSHUFFLE_INPUT_SIZE=134217728

DEFAULT_PARQUET_DATETIME_READ_MODE="CORRECTED"
DEFAULT_PARQUET_DATETIME_WRITE_MODE="CORRECTED"

DEFAULT_DEPLOY_MODE="client"

LOG_DIR="/tmp"
Expand Down
15 changes: 15 additions & 0 deletions scripts/bash/run_enceladus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ DRA_MIN_EXECUTORS="$DEFAULT_DRA_MIN_EXECUTORS"
DRA_MAX_EXECUTORS="$DEFAULT_DRA_MAX_EXECUTORS"
DRA_ALLOCATION_RATIO="$DEFAULT_DRA_ALLOCATION_RATIO"
ADAPTIVE_TARGET_POSTSHUFFLE_INPUT_SIZE="$DEFAULT_ADAPTIVE_TARGET_POSTSHUFFLE_INPUT_SIZE"
PARQUET_DATETIME_WRITE_MODE="$DEFAULT_PARQUET_DATETIME_WRITE_MODE"
PARQUET_DATETIME_READ_MODE="$DEFAULT_PARQUET_DATETIME_READ_MODE"

# Command like default for the job
JAR=${SPARK_JOBS_JAR_OVERRIDE:-$SPARK_JOBS_JAR}
Expand Down Expand Up @@ -118,6 +120,14 @@ case $key in
DRA_EXECUTOR_MEMORY="$2"
shift 2 # past argument and value
;;
--parquet-datetime-read-mode)
PARQUET_DATETIME_READ_MODE="$2"
shift 2 # past argument and value
;;
--parquet-datetime-write-mode)
PARQUET_DATETIME_WRITE_MODE="$2"
shift 2 # past argument and value
;;
--master)
MASTER="$2"
shift 2 # past argument and value
Expand Down Expand Up @@ -477,6 +487,11 @@ else
add_to_cmd_line "--executor-cores" "${EXECUTOR_CORES}"
fi

add_spark_conf_cmd "spark.sql.parquet.datetimeRebaseModeInRead" "${PARQUET_DATETIME_READ_MODE}"
add_spark_conf_cmd "spark.sql.parquet.datetimeRebaseModeInWrite" "${PARQUET_DATETIME_WRITE_MODE}"
add_spark_conf_cmd "spark.sql.parquet.int96RebaseModeInRead" "${PARQUET_DATETIME_READ_MODE}"
add_spark_conf_cmd "spark.sql.parquet.int96RebaseModeInWrite" "${PARQUET_DATETIME_WRITE_MODE}"

JVM_CONF="spark.driver.extraJavaOptions=-Dstandardized.hdfs.path=$STD_HDFS_PATH \
-Dspline.mongodb.url=$SPLINE_MONGODB_URL -Dspline.mongodb.name=$SPLINE_MONGODB_NAME -Dhdp.version=$HDP_VERSION \
$MT_PATTERN $MIN_PARTITION_SIZE $MAX_PARTITION_SIZE"
Expand Down

0 comments on commit e139800

Please sign in to comment.