From e139800823aa2205c32162be5fbe5d966884f3c8 Mon Sep 17 00:00:00 2001
From: AB019TC <tebalelo.sekhula@absa.africa>
Date: Wed, 22 Mar 2023 15:34:09 +0200
Subject: [PATCH 1/3] Added DateTime configs for `run_enceladus`

---
 scripts/bash/enceladus_env.template.sh |  3 +++
 scripts/bash/run_enceladus.sh          | 15 +++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/scripts/bash/enceladus_env.template.sh b/scripts/bash/enceladus_env.template.sh
index 4582fe506..4a8ca6c6a 100644
--- a/scripts/bash/enceladus_env.template.sh
+++ b/scripts/bash/enceladus_env.template.sh
@@ -65,6 +65,9 @@ CONF_DEFAULT_DRA_MIN_EXECUTORS=0
 CONF_DEFAULT_DRA_ALLOCATION_RATIO=0.5
 CONF_DEFAULT_ADAPTIVE_TARGET_POSTSHUFFLE_INPUT_SIZE=134217728
 
+DEFAULT_PARQUET_DATETIME_READ_MODE="CORRECTED"
+DEFAULT_PARQUET_DATETIME_WRITE_MODE="CORRECTED"
+
 DEFAULT_DEPLOY_MODE="client"
 
 LOG_DIR="/tmp"
diff --git a/scripts/bash/run_enceladus.sh b/scripts/bash/run_enceladus.sh
index f14b02795..d2903eaca 100644
--- a/scripts/bash/run_enceladus.sh
+++ b/scripts/bash/run_enceladus.sh
@@ -40,6 +40,8 @@ DRA_MIN_EXECUTORS="$DEFAULT_DRA_MIN_EXECUTORS"
 DRA_MAX_EXECUTORS="$DEFAULT_DRA_MAX_EXECUTORS"
 DRA_ALLOCATION_RATIO="$DEFAULT_DRA_ALLOCATION_RATIO"
 ADAPTIVE_TARGET_POSTSHUFFLE_INPUT_SIZE="$DEFAULT_ADAPTIVE_TARGET_POSTSHUFFLE_INPUT_SIZE"
+PARQUET_DATETIME_WRITE_MODE="$DEFAULT_PARQUET_DATETIME_WRITE_MODE"
+PARQUET_DATETIME_READ_MODE="$DEFAULT_PARQUET_DATETIME_READ_MODE"
 
 # Command like default for the job
 JAR=${SPARK_JOBS_JAR_OVERRIDE:-$SPARK_JOBS_JAR}
@@ -118,6 +120,14 @@ case $key in
     DRA_EXECUTOR_MEMORY="$2"
     shift 2 # past argument and value
     ;;
+  --parquet-datetime-read-mode)
+    PARQUET_DATETIME_READ_MODE="$2"
+    shift 2 # past argument and value
+    ;;
+  --parquet-datetime-write-mode)
+    PARQUET_DATETIME_WRITE_MODE="$2"
+    shift 2 # past argument and value
+    ;;
   --master)
     MASTER="$2"
     shift 2 # past argument and value
@@ -477,6 +487,11 @@ else
   add_to_cmd_line "--executor-cores" "${EXECUTOR_CORES}"
 fi
 
+add_spark_conf_cmd "spark.sql.parquet.datetimeRebaseModeInRead" "${PARQUET_DATETIME_READ_MODE}"
+add_spark_conf_cmd "spark.sql.parquet.datetimeRebaseModeInWrite" "${PARQUET_DATETIME_WRITE_MODE}"
+add_spark_conf_cmd "spark.sql.parquet.int96RebaseModeInRead" "${PARQUET_DATETIME_READ_MODE}"
+add_spark_conf_cmd "spark.sql.parquet.int96RebaseModeInWrite" "${PARQUET_DATETIME_WRITE_MODE}"
+
 JVM_CONF="spark.driver.extraJavaOptions=-Dstandardized.hdfs.path=$STD_HDFS_PATH \
 -Dspline.mongodb.url=$SPLINE_MONGODB_URL -Dspline.mongodb.name=$SPLINE_MONGODB_NAME -Dhdp.version=$HDP_VERSION \
 $MT_PATTERN $MIN_PARTITION_SIZE $MAX_PARTITION_SIZE"

From 9098681fdfcae2ed1df448b1440f2c038a91c91a Mon Sep 17 00:00:00 2001
From: AB019TC <tebalelo.sekhula@absa.africa>
Date: Wed, 22 Mar 2023 16:26:58 +0200
Subject: [PATCH 2/3] Added DateTime configs for `run_enceladus.cmd`

---
 scripts/bash/_print_help.sh    |  2 ++
 scripts/cmd/_run_enceladus.cmd | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/scripts/bash/_print_help.sh b/scripts/bash/_print_help.sh
index 63ca1798e..cc6c36175 100644
--- a/scripts/bash/_print_help.sh
+++ b/scripts/bash/_print_help.sh
@@ -39,6 +39,8 @@ echo "  --executor-memory MEM                    Memory per executor (e.g. 1000M
 echo "  --dra-num-executors NUM                  Same as '--num-executors' but used when DRA is enabled. Use with care! DRA won't scale below this NUM."
 echo "  --dra-executor-cores NUM                 Same as '--executor-memory' but used when DRA is enabled."
 echo "  --dra-executor-memory MEM                Same as '--executor-cores' but used when DRA is enabled."
+echo "  --parquet-datetime-read-mode"
+echo "  --parquet-datetime-write-mode"
 echo "  --master MASTER_URL                      spark://host:port, mesos://host:port, yarn, k8s://https://host:port, or local"
 echo "  --deploy-mode DEPLOY_MODE                Whether to launch the driver program locally (\"client\") or on one of the worker machines inside the cluster (\"cluster\")."
 echo "  --driver-cores NUM                       Number of cores used by the driver, only in cluster mode."
diff --git a/scripts/cmd/_run_enceladus.cmd b/scripts/cmd/_run_enceladus.cmd
index 49b73dd7c..08197d530 100644
--- a/scripts/cmd/_run_enceladus.cmd
+++ b/scripts/cmd/_run_enceladus.cmd
@@ -29,6 +29,8 @@ SET EXECUTOR_CORES=%DEFAULT_EXECUTOR_CORES%
 SET EXECUTOR_MEMORY=%EFAULT_EXECUTOR_MEMORY%
 SET DRA_EXECUTOR_CORES=%DEFAULT_DRA_EXECUTOR_CORES%
 SET DRA_EXECUTOR_MEMORY=%DEFAULT_DRA_EXECUTOR_MEMORY%
+SET PARQUET_DATETIME_READ_MODE=%DEFAULT_PARQUET_DATETIME_READ_MODE%
+SET PARQUET_DATETIME_WRITE_MODE=%DEFAULT_PARQUET_DATETIME_WRITE_MODE%
 SET NUM_EXECUTORS=%DEFAULT_NUM_EXECUTORS%
 SET DRA_NUM_EXECUTORS=
 SET FILES=%ENCELADUS_FILES%
@@ -131,6 +133,18 @@ IF "%1"=="--dra-executor-memory" (
     SHIFT
     GOTO CmdParse
 )
+IF "%1"=="--parquet-datetime-read-mode" (
+    SET PARQUET_DATETIME_READ_MODE=%2
+    SHIFT
+    SHIFT
+    GOTO CmdParse
+)
+IF "%1"=="--parquet-datetime-write-mode" (
+    SET PARQUET_DATETIME_WRITE_MODE=%2
+    SHIFT
+    SHIFT
+    GOTO CmdParse
+)
 IF "%1"=="--master" (
     SET MASTER=%2
     SHIFT
@@ -561,6 +575,11 @@ IF %DRA_ENABLED%==true (
     IF DEFINED EXECUTOR_CORES SET CMD_LINE=%CMD_LINE% --executor-cores %EXECUTOR_CORES%
 )
 
+SET SPARK_CONF=%SPARK_CONF% --conf spark.sql.parquet.datetimeRebaseModeInRead=%PARQUET_DATETIME_READ_MODE%
+SET SPARK_CONF=%SPARK_CONF% --conf spark.sql.parquet.datetimeRebaseModeInWrite=%PARQUET_DATETIME_WRITE_MODE%
+SET SPARK_CONF=%SPARK_CONF% --conf spark.sql.parquet.int96RebaseModeInRead=%PARQUET_DATETIME_READ_MODE%
+SET SPARK_CONF=%SPARK_CONF% --conf spark.sql.parquet.int96RebaseModeInWrite=%PARQUET_DATETIME_WRITE_MODE%
+
 SET JVM_CONF=spark.driver.extraJavaOptions=-Dstandardized.hdfs.path=%STD_HDFS_PATH% -Dspline.mongodb.url=%SPLINE_MONGODB_URL% -Dspline.mongodb.name=%SPLINE_MONGODB_NAME% -Dhdp.version=%HDP_VERSION% %MT_PATTERN% %MIN_BLOCK_SIZE% %MAX_BLOCK_SIZE%
 
 SET CMD_LINE=%SPARK_SUBMIT%

From 8df173e1604b57ab7caf42019302689c382a084c Mon Sep 17 00:00:00 2001
From: AB019TC <tebalelo.sekhula@absa.africa>
Date: Thu, 23 Mar 2023 09:14:35 +0200
Subject: [PATCH 3/3] Added DateTime variables descriptions

---
 scripts/bash/_print_help.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/bash/_print_help.sh b/scripts/bash/_print_help.sh
index cc6c36175..7219699a5 100644
--- a/scripts/bash/_print_help.sh
+++ b/scripts/bash/_print_help.sh
@@ -39,8 +39,8 @@ echo "  --executor-memory MEM                    Memory per executor (e.g. 1000M
 echo "  --dra-num-executors NUM                  Same as '--num-executors' but used when DRA is enabled. Use with care! DRA won't scale below this NUM."
 echo "  --dra-executor-cores NUM                 Same as '--executor-memory' but used when DRA is enabled."
 echo "  --dra-executor-memory MEM                Same as '--executor-cores' but used when DRA is enabled."
-echo "  --parquet-datetime-read-mode"
-echo "  --parquet-datetime-write-mode"
+echo "  --parquet-datetime-read-mode             Spark_submit datetime read mode for parquet files with the default value of 'corrected'."
+echo "  --parquet-datetime-write-mode            Spark_submit datetime write mode for parquet files with the default value of 'corrected'."
 echo "  --master MASTER_URL                      spark://host:port, mesos://host:port, yarn, k8s://https://host:port, or local"
 echo "  --deploy-mode DEPLOY_MODE                Whether to launch the driver program locally (\"client\") or on one of the worker machines inside the cluster (\"cluster\")."
 echo "  --driver-cores NUM                       Number of cores used by the driver, only in cluster mode."