From 5886b4dd5de2326c6c82879f42c9c57236a316e6 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Thu, 18 May 2023 13:11:22 +0200 Subject: [PATCH 01/27] [PUBDEV-9089] Upgrade Hadoop Libraries in Main Standalone Jar (cherry picked from commit a2e9f686e89971ce22757b8f5aff8752d13a1d5b) --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index eea91798f7a0..c651d7a3094d 100644 --- a/gradle.properties +++ b/gradle.properties @@ -55,7 +55,7 @@ httpClientVersion=4.5.2 defaultParquetVersion=1.12.3 # Default Hadoop client version -defaultHadoopVersion=2.8.4 +defaultHadoopVersion=3.3.5 defaultHdfsDependency=hadoop-hdfs-client # Default Hive version From a1856f8df1be0335efdf3079ce5abb540cbbf6d8 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Mon, 10 Jul 2023 19:56:39 +0200 Subject: [PATCH 02/27] Add hadoop commons --- h2o-extensions/krbstandalone/build.gradle | 1 + h2o-persist-hdfs/build.gradle | 2 ++ h2o-persist-s3/build.gradle | 1 + 3 files changed, 4 insertions(+) diff --git a/h2o-extensions/krbstandalone/build.gradle b/h2o-extensions/krbstandalone/build.gradle index 9ac573ace6cc..78f882606bfb 100644 --- a/h2o-extensions/krbstandalone/build.gradle +++ b/h2o-extensions/krbstandalone/build.gradle @@ -3,6 +3,7 @@ description = "H2O Kerberos Standalone support" dependencies { api project(":h2o-core") api project(":h2o-persist-hdfs") + compileOnly("org.apache.hadoop:hadoop-common:$defaultHadoopVersion") api("org.apache.hadoop:hadoop-auth:$defaultHadoopVersion") { // Pull all dependencies to allow run directly from IDE or command line transitive = true diff --git a/h2o-persist-hdfs/build.gradle b/h2o-persist-hdfs/build.gradle index 1822c9b6a69e..0483c02bab21 100644 --- a/h2o-persist-hdfs/build.gradle +++ b/h2o-persist-hdfs/build.gradle @@ -8,11 +8,13 @@ configurations { dependencies { api project(":h2o-core") + compileOnly("org.apache.hadoop:hadoop-common:$defaultHadoopVersion") api("org.apache.hadoop:$defaultHdfsDependency:$defaultHadoopVersion") { // Pull all dependencies to allow run directly from IDE or command line transitive = true } api("org.apache.hadoop:hadoop-aws:$defaultHadoopVersion") + api("com.nimbusds:nimbus-jose-jwt:9.11.3") testImplementation project(":h2o-test-support") diff --git a/h2o-persist-s3/build.gradle b/h2o-persist-s3/build.gradle index 4fe10af31c97..01fea6eb1520 100644 --- a/h2o-persist-s3/build.gradle +++ b/h2o-persist-s3/build.gradle @@ -7,6 +7,7 @@ configurations { dependencies { api project(":h2o-core") + compileOnly("org.apache.hadoop:hadoop-common:$defaultHadoopVersion") api "com.amazonaws:aws-java-sdk-s3:${awsJavaSdkVersion}" api "com.amazonaws:aws-java-sdk-sts:${awsJavaSdkVersion}" // Required by WebIdentityTokenCredentialsProvider from AWS SDK api "org.apache.httpcomponents:httpclient:${httpClientVersion}" From e9cb550d17e73949ac60c85bbc7181b4e0604218 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Fri, 14 Jul 2023 20:10:35 +0200 Subject: [PATCH 03/27] Add some transitive dependencies for hdfs --- h2o-assemblies/main/build.gradle | 11 +++++++++++ h2o-assemblies/steam/build.gradle | 17 ++++------------- h2o-persist-hdfs/build.gradle | 8 +++++++- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index 611f10dedd30..061269bc9b72 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -23,6 +23,17 @@ dependencies { api project(":h2o-parquet-parser") api project(":h2o-k8s-int") + api "org.apache.hadoop:hadoop-hdfs-client:${defaultHadoopVersion}" + api("org.apache.hadoop:hadoop-common:${defaultHadoopVersion}") { + exclude group: "com.sun.jersey" + exclude group: "javax.servlet" + exclude group: "org.apache.avro" + exclude group: "org.apache.curator" + exclude group: "org.apache.zookeeper" + exclude group: "org.eclipse.jetty" + exclude group: "org.apache.hadoop.thirdparty", module: "hadoop-shaded-protobuf_3_7" + } + constraints { api('com.fasterxml.jackson.core:jackson-databind:2.13.4.2') { because 'Fixes CVE-2022-42003' diff --git a/h2o-assemblies/steam/build.gradle b/h2o-assemblies/steam/build.gradle index 7de32060f6f4..210cb0d94e6c 100644 --- a/h2o-assemblies/steam/build.gradle +++ b/h2o-assemblies/steam/build.gradle @@ -25,15 +25,13 @@ dependencies { api(project(":h2o-persist-s3")) { exclude group: "org.apache.hadoop" } - api(project(":h2o-persist-hdfs")) { - exclude group: "org.apache.hadoop" - } + api(project(":h2o-persist-hdfs")) api(project(":h2o-parquet-parser")) { exclude group: "org.apache.hadoop" } // Force latest version Hadoop with unused components excluded - we need Hadoop for Parquet and S3A export - api "org.apache.hadoop:hadoop-hdfs-client:3.3.5" - api("org.apache.hadoop:hadoop-common:3.3.5") { + api "org.apache.hadoop:hadoop-hdfs-client:${defaultHadoopVersion}" + api("org.apache.hadoop:hadoop-common:${defaultHadoopVersion}") { exclude group: "com.sun.jersey" exclude group: "javax.servlet" exclude group: "org.apache.avro" @@ -42,18 +40,11 @@ dependencies { exclude group: "org.eclipse.jetty" exclude group: "org.apache.hadoop.thirdparty", module: "hadoop-shaded-protobuf_3_7" } - api("org.apache.hadoop:hadoop-aws:3.3.5") { - exclude group: "com.amazonaws", module: "aws-java-sdk-bundle" - } - // aws-java-sdk-dynamodb is required for S3A support, S3A import throws NoClassDefFoundError (AmazonDynamoDBException) - api("com.amazonaws:aws-java-sdk-dynamodb:${awsJavaSdkVersion}") { - transitive = false - } // Upgrade dependencies coming from Hadoop to address vulnerabilities api "org.apache.commons:commons-compress:1.21" // Force specific Parquet version to avoid dependency on vulnerable FasterXML jackson-mapper-asl api "org.apache.parquet:parquet-hadoop:${defaultParquetVersion}" - api("org.apache.hadoop:hadoop-mapreduce-client-core:3.3.5") { + api("org.apache.hadoop:hadoop-mapreduce-client-core:${defaultHadoopVersion}") { transitive = false } // Google OAuth force version diff --git a/h2o-persist-hdfs/build.gradle b/h2o-persist-hdfs/build.gradle index 0483c02bab21..a3734a1acee5 100644 --- a/h2o-persist-hdfs/build.gradle +++ b/h2o-persist-hdfs/build.gradle @@ -13,7 +13,13 @@ dependencies { // Pull all dependencies to allow run directly from IDE or command line transitive = true } - api("org.apache.hadoop:hadoop-aws:$defaultHadoopVersion") + api("org.apache.hadoop:hadoop-aws:${defaultHadoopVersion}") { + exclude group: "com.amazonaws", module: "aws-java-sdk-bundle" + } + // aws-java-sdk-dynamodb is required for S3A support, S3A import throws NoClassDefFoundError (AmazonDynamoDBException) + api("com.amazonaws:aws-java-sdk-dynamodb:${awsJavaSdkVersion}") { + transitive = false + } api("com.nimbusds:nimbus-jose-jwt:9.11.3") From 219664eb762943a9c4429768267a5dbffafd51ae Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Tue, 18 Jul 2023 13:37:55 +0200 Subject: [PATCH 04/27] Remove orc dependency for main --- gradle.properties | 5 +---- h2o-assemblies/main/build.gradle | 3 --- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/gradle.properties b/gradle.properties index c651d7a3094d..b03774b35559 100644 --- a/gradle.properties +++ b/gradle.properties @@ -18,9 +18,6 @@ doFindbugs=false # Run animal sniffer to verify compatibility of API with actual Java version doAnimalSniffer=false -# include ORC support inside default h2o.jar. -doIncludeOrc=false - # include MOJO Pipeline support inside default h2o.jar. doIncludeMojoPipeline=false @@ -59,7 +56,7 @@ defaultHadoopVersion=3.3.5 defaultHdfsDependency=hadoop-hdfs-client # Default Hive version -defaultHiveExecVersion=1.1.0 +defaultHiveExecVersion=1.2.1 defaultWebserverModule=h2o-jetty-9 # default module to be included in assemblies diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index 061269bc9b72..d96fa4baa2b8 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -17,9 +17,6 @@ dependencies { api project(":h2o-persist-http") api project(":h2o-persist-hdfs") api project(":h2o-ext-krbstandalone") - if (project.hasProperty("doIncludeOrc") && project.doIncludeOrc == "true") { - api project(":h2o-orc-parser") - } api project(":h2o-parquet-parser") api project(":h2o-k8s-int") From 99e1396b83eb25dafb93318d53cbd6063f87a397 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Wed, 19 Jul 2023 13:09:26 +0200 Subject: [PATCH 05/27] Specify different version of hadoop for orc --- h2o-parsers/h2o-orc-parser/build.gradle | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/h2o-parsers/h2o-orc-parser/build.gradle b/h2o-parsers/h2o-orc-parser/build.gradle index dd992438cf72..6e3df327228f 100644 --- a/h2o-parsers/h2o-orc-parser/build.gradle +++ b/h2o-parsers/h2o-orc-parser/build.gradle @@ -12,8 +12,10 @@ configurations{ } dependencies { - hadoopCommonExclude("org.apache.hadoop:hadoop-common:${defaultHadoopVersion}") - hiveExecExclude("org.apache.hive:hive-exec:$defaultHiveExecVersion"){ + def hadoopVersion="2.8.4" + def hiveExecVersion="1.1.0" + hadoopCommonExclude("org.apache.hadoop:hadoop-common:$hadoopVersion") + hiveExecExclude("org.apache.hive:hive-exec:$hiveExecVersion"){ // this dependency need to be excluded manually as Gradle can't find it in maven central exclude group: 'org.pentaho', module: 'pentaho-aggdesigner-algorithm' exclude group: 'eigenbase', module: 'eigenbase-properties' @@ -30,7 +32,7 @@ dependencies { // Note: What is connection between hive-exec version and hadoop-version and orc version? // Note: In this case we are using hive version which is compatible with $defaultHadoopVersion // Note: for newest version it should be replaces by hive-orc - api("org.apache.hive:hive-exec:$defaultHiveExecVersion") { + api("org.apache.hive:hive-exec:$hiveExecVersion") { // we can't use transitive=false so we need to exclude the dependencies manually configurations.hiveExecExclude.getResolvedConfiguration().getResolvedArtifacts().each { if (it.moduleVersion.id.group != "org.apache.hive" && it.moduleVersion.id.module.name != "hive-exec") { @@ -40,7 +42,7 @@ dependencies { exclude group: 'org.pentaho', module: 'pentaho-aggdesigner-algorithm' } // For compilation we need common - api("org.apache.hadoop:hadoop-common:$defaultHadoopVersion") { + api("org.apache.hadoop:hadoop-common:$hadoopVersion") { // we can't use transitive=false so we need to exclude the dependencies manually configurations.hadoopCommonExclude.getResolvedConfiguration().getResolvedArtifacts().each { if (it.moduleVersion.id.group != "org.apache.hadoop" && it.moduleVersion.id.module.name != "hadoop-common") { From 1b13d0fd3f955f198360552b7c7090e385cb4a68 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Wed, 19 Jul 2023 13:11:20 +0200 Subject: [PATCH 06/27] remove hive exec version spec from hive properties --- gradle.properties | 3 --- 1 file changed, 3 deletions(-) diff --git a/gradle.properties b/gradle.properties index b03774b35559..62c9a1c479ae 100644 --- a/gradle.properties +++ b/gradle.properties @@ -55,9 +55,6 @@ defaultParquetVersion=1.12.3 defaultHadoopVersion=3.3.5 defaultHdfsDependency=hadoop-hdfs-client -# Default Hive version -defaultHiveExecVersion=1.2.1 - defaultWebserverModule=h2o-jetty-9 # default module to be included in assemblies defaultExtWebserverModule=h2o-jetty-9-ext From 16c6c687acc39c57d0d15e7815e92120bb01d604 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Thu, 20 Jul 2023 12:47:10 +0200 Subject: [PATCH 07/27] Change hadoop version in orc tests --- h2o-parsers/h2o-orc-parser/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-parsers/h2o-orc-parser/build.gradle b/h2o-parsers/h2o-orc-parser/build.gradle index 6e3df327228f..4070a0e42e74 100644 --- a/h2o-parsers/h2o-orc-parser/build.gradle +++ b/h2o-parsers/h2o-orc-parser/build.gradle @@ -54,7 +54,7 @@ dependencies { testImplementation project(":h2o-test-support") testRuntimeOnly project(":${defaultWebserverModule}") // We need correct version of MapRe Hadoop to run JUnits - testRuntimeOnly("org.apache.hadoop:hadoop-client:$defaultHadoopVersion") { + testRuntimeOnly("org.apache.hadoop:hadoop-client:$hadoopVersion") { exclude module: "jasper-runtime" exclude module: "jasper-compiler" exclude module: "curator-client" From a9ce21ba22f5627dd42b2c7d966531a97903502f Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Thu, 20 Jul 2023 18:46:59 +0200 Subject: [PATCH 08/27] Fix usage of http client --- h2o-parsers/h2o-orc-parser/build.gradle | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/h2o-parsers/h2o-orc-parser/build.gradle b/h2o-parsers/h2o-orc-parser/build.gradle index 4070a0e42e74..09308d37da14 100644 --- a/h2o-parsers/h2o-orc-parser/build.gradle +++ b/h2o-parsers/h2o-orc-parser/build.gradle @@ -25,8 +25,7 @@ dependencies { api(project(":h2o-persist-hdfs")) { exclude group: 'ai.h2o', module: 'h2o-core' exclude group: 'net.java.dev.jets3t', module: 'jets3t' - exclude group: 'org.apache.hadoop', module: 'hadoop-client' - exclude group: 'org.apache.hadoop', module: 'hadoop-aws' + exclude group: 'org.apache.hadoop' } // Note: What is connection between hive-exec version and hadoop-version and orc version? From 4d48215fd8c7d9154809c2934d44b5ce520f4bec Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Fri, 21 Jul 2023 14:48:41 +0200 Subject: [PATCH 09/27] Add libs for s3 persist tests --- h2o-persist-hdfs/build.gradle | 2 ++ .../src/test/java/water/persist/PersistS3HdfsTest.java | 7 +++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/h2o-persist-hdfs/build.gradle b/h2o-persist-hdfs/build.gradle index a3734a1acee5..6420975e036c 100644 --- a/h2o-persist-hdfs/build.gradle +++ b/h2o-persist-hdfs/build.gradle @@ -24,7 +24,9 @@ dependencies { api("com.nimbusds:nimbus-jose-jwt:9.11.3") testImplementation project(":h2o-test-support") + testImplementation "org.apache.hadoop:hadoop-common:$defaultHadoopVersion" testImplementation "com.amazonaws:aws-java-sdk-s3:${awsJavaSdkVersion}" + testImplementation "org.jets3t:jets3t:0.9.7" testRuntimeOnly project(":${defaultWebserverModule}") testRuntimeOnly project(":h2o-persist-s3") } diff --git a/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java b/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java index ce44b634defe..e1fa6765602b 100644 --- a/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java +++ b/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java @@ -2,11 +2,10 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.s3.S3FileSystem; +import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.jets3t.service.S3Service; import org.jets3t.service.model.S3Object; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -37,10 +36,10 @@ public void testPubDev5663() throws Exception { // Demonstrates that S3FileSyste PersistHdfs hdfsPersist = (PersistHdfs) H2O.getPM().getPersistForURI(URI.create("hdfs://localhost/")); - String existing = "s3://" + bucket + "/" + key; + String existing = "s3a://" + bucket + "/" + key; Path p = new Path(existing); - S3FileSystem fs = (S3FileSystem) FileSystem.get(p.toUri(), PersistHdfs.CONF); + S3AFileSystem fs = (S3AFileSystem) FileSystem.get(p.toUri(), PersistHdfs.CONF); // use crazy reflection to get to the actual S3 Service instance S3Service s3Service = (S3Service) getValue(fs, "store", "h", "proxyDescriptor", "fpp", "proxy", "s3Service"); From a21832521d5c5058dc4887c50594fffafe547c2c Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Fri, 21 Jul 2023 18:12:16 +0200 Subject: [PATCH 10/27] Update tests --- .../java/water/persist/PersistHdfsTest.java | 6 +++-- .../java/water/persist/PersistS3HdfsTest.java | 22 +++---------------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/h2o-persist-hdfs/src/test/java/water/persist/PersistHdfsTest.java b/h2o-persist-hdfs/src/test/java/water/persist/PersistHdfsTest.java index 4e6e2153f09b..53fa98fbc67b 100644 --- a/h2o-persist-hdfs/src/test/java/water/persist/PersistHdfsTest.java +++ b/h2o-persist-hdfs/src/test/java/water/persist/PersistHdfsTest.java @@ -23,8 +23,10 @@ public class PersistHdfsTest extends TestUtil { @Parameterized.Parameters(name = "{index}: scheme={0}") - public static Object[] schemes() { - return new Object[] { "s3n", "s3a" }; + public static Object[] schemes() { + return new Object[] { + // "s3n", - s3n is not supported by hadoop-aws 3.0+ + "s3a" }; } @Parameterized.Parameter diff --git a/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java b/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java index e1fa6765602b..14481648020f 100644 --- a/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java +++ b/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java @@ -3,8 +3,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.S3AFileSystem; -import org.jets3t.service.S3Service; -import org.jets3t.service.model.S3Object; +import com.amazonaws.services.s3.model.S3Object; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; @@ -40,25 +39,10 @@ public void testPubDev5663() throws Exception { // Demonstrates that S3FileSyste Path p = new Path(existing); S3AFileSystem fs = (S3AFileSystem) FileSystem.get(p.toUri(), PersistHdfs.CONF); - // use crazy reflection to get to the actual S3 Service instance - S3Service s3Service = (S3Service) getValue(fs, "store", "h", "proxyDescriptor", "fpp", "proxy", "s3Service"); - - S3Object s3Object = s3Service.getObject(bucket, key); + S3Object s3Object = fs.getAmazonS3ClientForTesting("testPubDev5663").getObject(bucket, key); + assertNotNull(s3Object); // The object exists assertFalse(fs.exists(p)); // But FS says it doesn't => S3 is broken in Hadoop assertFalse(hdfsPersist.exists(existing)); // Our persist gives the same result } - - private Object getValue(Object o, String... fieldNames) { - StringBuilder path = new StringBuilder(o.getClass().getName()); - for (String f : fieldNames) { - path.append('.').append(f); - Object no = ReflectionUtils.getFieldValue(o, f); - if (no == null) - throw new IllegalStateException("Invalid path: " + path.toString() + ", object is instance of " + o.getClass()); - o = no; - } - return o; - } - } From 99b4a6bab50e03a718cf36d8c20835c601bcecb0 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Mon, 24 Jul 2023 11:16:05 +0200 Subject: [PATCH 11/27] Update tests --- .../src/test/java/water/persist/PersistS3HdfsTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java b/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java index 14481648020f..b6a746339d81 100644 --- a/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java +++ b/h2o-persist-hdfs/src/test/java/water/persist/PersistS3HdfsTest.java @@ -42,7 +42,7 @@ public void testPubDev5663() throws Exception { // Demonstrates that S3FileSyste S3Object s3Object = fs.getAmazonS3ClientForTesting("testPubDev5663").getObject(bucket, key); assertNotNull(s3Object); // The object exists - assertFalse(fs.exists(p)); // But FS says it doesn't => S3 is broken in Hadoop - assertFalse(hdfsPersist.exists(existing)); // Our persist gives the same result + assert(fs.exists(p)); // But FS says it exists as well. + assert(hdfsPersist.exists(existing)); // Our persist gives the same result } } From 1668d8810296331408c4c9b1b10462feae756598 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Tue, 25 Jul 2023 11:31:07 +0200 Subject: [PATCH 12/27] add hadoop-common to perstist-s3 tests --- h2o-persist-s3/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/h2o-persist-s3/build.gradle b/h2o-persist-s3/build.gradle index 01fea6eb1520..9d3b4435bcac 100644 --- a/h2o-persist-s3/build.gradle +++ b/h2o-persist-s3/build.gradle @@ -20,6 +20,7 @@ dependencies { testRuntimeOnly project(":${defaultWebserverModule}") testRuntimeOnly project(":h2o-parquet-parser") testImplementation project(":h2o-persist-hdfs") + testImplementation "org.apache.hadoop:hadoop-common:$defaultHadoopVersion" } apply from: "${rootDir}/gradle/dataCheck.gradle" From b777d3aa3d6837b8c2153b52c0eafcfc7d447486 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Wed, 26 Jul 2023 13:18:41 +0200 Subject: [PATCH 13/27] Refactor usage of hadoop-mapreduce-client-core --- h2o-assemblies/main/build.gradle | 6 ++++++ h2o-parsers/h2o-parquet-parser/build.gradle | 3 +++ h2o-parsers/h2o-parquet-parser/parquet_dependencies.gradle | 3 --- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index d96fa4baa2b8..6431b7d6550d 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -18,9 +18,15 @@ dependencies { api project(":h2o-persist-hdfs") api project(":h2o-ext-krbstandalone") api project(":h2o-parquet-parser") + api("org.apache.hadoop:hadoop-mapreduce-client-core:${defaultHadoopVersion}") { + transitive = false + } api project(":h2o-k8s-int") api "org.apache.hadoop:hadoop-hdfs-client:${defaultHadoopVersion}" + implementation("org.apache.hadoop:hadoop-mapreduce-client-core:${defaultHadoopVersion}") { + transitive = false + } api("org.apache.hadoop:hadoop-common:${defaultHadoopVersion}") { exclude group: "com.sun.jersey" exclude group: "javax.servlet" diff --git a/h2o-parsers/h2o-parquet-parser/build.gradle b/h2o-parsers/h2o-parquet-parser/build.gradle index 79d171bd5b5a..00a41816fd15 100644 --- a/h2o-parsers/h2o-parquet-parser/build.gradle +++ b/h2o-parsers/h2o-parquet-parser/build.gradle @@ -6,6 +6,9 @@ description = "H2O Parquet Parser" dependencies { testImplementation project(":h2o-test-support") testImplementation project(":h2o-parquet-parser-tests") + testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${defaultHadoopVersion}") { + transitive = false + } testRuntimeOnly project(":${defaultWebserverModule}") } diff --git a/h2o-parsers/h2o-parquet-parser/parquet_dependencies.gradle b/h2o-parsers/h2o-parquet-parser/parquet_dependencies.gradle index 4abb69601464..3b174f8e10b5 100644 --- a/h2o-parsers/h2o-parquet-parser/parquet_dependencies.gradle +++ b/h2o-parsers/h2o-parquet-parser/parquet_dependencies.gradle @@ -29,7 +29,4 @@ dependencies { } } } - implementation("org.apache.hadoop:hadoop-mapreduce-client-core:${parquetHadoopVersion}") { - transitive = false - } } From 237be031437b56bd1c417d6b5b0b11a57dbcd939 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Wed, 26 Jul 2023 15:22:58 +0200 Subject: [PATCH 14/27] update usage of hadoop-common --- .../h2o-parquet-v17-compat/build.gradle | 5 ++--- .../parquet_dependencies.gradle | 17 ++--------------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/h2o-parsers/h2o-parquet-compat/h2o-parquet-v17-compat/build.gradle b/h2o-parsers/h2o-parquet-compat/h2o-parquet-v17-compat/build.gradle index 1b3b333ef1db..77905c235cd2 100644 --- a/h2o-parsers/h2o-parquet-compat/h2o-parquet-v17-compat/build.gradle +++ b/h2o-parsers/h2o-parquet-compat/h2o-parquet-v17-compat/build.gradle @@ -16,9 +16,7 @@ dependencies { } // Parquet support api("org.apache.parquet:parquet-hadoop:1.7.0") - api("org.apache.hadoop:hadoop-common:${parquetHadoopVersion}") { - transitive = false - } + compileOnly("org.apache.hadoop:hadoop-common:${parquetHadoopVersion}") testImplementation project(":h2o-test-support") testImplementation project(":h2o-parquet-parser-tests") @@ -28,6 +26,7 @@ dependencies { testImplementation("org.apache.hadoop:hadoop-client:${parquetHadoopVersion}") { exclude module: "servlet-api" } + testImplementation("org.apache.hadoop:hadoop-common:${parquetHadoopVersion}") } apply from: "${rootDir}/gradle/dataCheck.gradle" diff --git a/h2o-parsers/h2o-parquet-parser/parquet_dependencies.gradle b/h2o-parsers/h2o-parquet-parser/parquet_dependencies.gradle index 3b174f8e10b5..dbfe5b30a52b 100644 --- a/h2o-parsers/h2o-parquet-parser/parquet_dependencies.gradle +++ b/h2o-parsers/h2o-parquet-parser/parquet_dependencies.gradle @@ -1,14 +1,7 @@ def parquetHadoopVersion = binding.variables.get("hadoopVersion") ? binding.variables.get("hadoopVersion") : defaultHadoopVersion -configurations{ - // Configuration used to get all transitive dependencies for org.apache.hadoop:hadoop-common - hadoopCommonExclude -} - dependencies { - hadoopCommonExclude("org.apache.hadoop:hadoop-common:${parquetHadoopVersion}") - api project(":h2o-core") api(project(":h2o-persist-hdfs")) { exclude group: 'ai.h2o', module: 'h2o-core' @@ -21,12 +14,6 @@ dependencies { api("org.apache.parquet:parquet-hadoop:${defaultParquetVersion}") - api("org.apache.hadoop:hadoop-common:${parquetHadoopVersion}") { - // we can't use transitive=false so we need to exclude the dependencies manually - configurations.hadoopCommonExclude.getResolvedConfiguration().getResolvedArtifacts().each { - if (it.moduleVersion.id.group != "org.apache.hadoop" && it.moduleVersion.id.module.name != "hadoop-common") { - exclude group: it.moduleVersion.id.group, module: it.moduleVersion.id.module.name - } - } - } + compileOnly("org.apache.hadoop:hadoop-common:${parquetHadoopVersion}") + testImplementation("org.apache.hadoop:hadoop-common:${parquetHadoopVersion}") } From 6b5bb922220a635a594f1ee7ee13c34f37fd36c1 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Wed, 26 Jul 2023 20:01:34 +0200 Subject: [PATCH 15/27] try to relocate hadoop libraries --- h2o-assemblies/main/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index 6431b7d6550d..fc7bca53c18a 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -57,6 +57,7 @@ shadowJar { if (!project.hasProperty("jacocoCoverage")) { relocate 'org.joda.time', 'ai.h2o.org.joda.time' } + relocate "org.apache.hadoop", "ai.h2o.org.apache.hadoop" exclude 'META-INF/*.DSA' exclude 'META-INF/*.SF' exclude 'synchronize.properties' From 49d27fb1c561bd262b137219134e2744f21ab96e Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Thu, 27 Jul 2023 15:22:47 +0200 Subject: [PATCH 16/27] Revert relocation --- h2o-assemblies/main/build.gradle | 2 -- 1 file changed, 2 deletions(-) diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index fc7bca53c18a..d79f9e7e702a 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -34,7 +34,6 @@ dependencies { exclude group: "org.apache.curator" exclude group: "org.apache.zookeeper" exclude group: "org.eclipse.jetty" - exclude group: "org.apache.hadoop.thirdparty", module: "hadoop-shaded-protobuf_3_7" } constraints { @@ -57,7 +56,6 @@ shadowJar { if (!project.hasProperty("jacocoCoverage")) { relocate 'org.joda.time', 'ai.h2o.org.joda.time' } - relocate "org.apache.hadoop", "ai.h2o.org.apache.hadoop" exclude 'META-INF/*.DSA' exclude 'META-INF/*.SF' exclude 'synchronize.properties' From 85720a95e1e365e7df1dc49ea9f4511a99caa95f Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Thu, 27 Jul 2023 20:30:15 +0200 Subject: [PATCH 17/27] Relocated hadoop libs --- h2o-assemblies/main/build.gradle | 3 +++ 1 file changed, 3 insertions(+) diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index d79f9e7e702a..b3d2df5f58cd 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -63,6 +63,9 @@ shadowJar { exclude 'test.properties' exclude 'cockpitlite.properties' exclude 'devpay_products.properties' + + relocate 'org.apache.hadoop', 'ai.h2o.org.apache.hadoop' + manifest { attributes 'Main-Class': 'water.H2OApp' attributes 'Add-Opens': 'java.base/java.lang java.base/java.util java.base/java.lang.reflect' From 9aa940790656ef657299434ace261a79f0c08285 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Tue, 15 Aug 2023 16:04:08 +0200 Subject: [PATCH 18/27] add hadoop-common.jar on tests --- scripts/jenkins/groovy/hadoopCommands.groovy | 2 +- scripts/jenkins/groovy/kerberosCommands.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/jenkins/groovy/hadoopCommands.groovy b/scripts/jenkins/groovy/hadoopCommands.groovy index 34e440d37fe8..afd90b6332ee 100644 --- a/scripts/jenkins/groovy/hadoopCommands.groovy +++ b/scripts/jenkins/groovy/hadoopCommands.groovy @@ -58,7 +58,7 @@ private GString getCommandHadoop(final stageConfig) { private GString getCommandStandalone(final stageConfig) { def defaultPort = 54321 return """ - java -cp build/h2o.jar:\$(cat /opt/hive-jdbc-cp) water.H2OApp \\ + java -cp build/h2o.jar:\$(cat /opt/hive-jdbc-cp):\$HADOOP_HOME/hadoop-common.jar water.H2OApp \\ -port ${defaultPort} -ip \$(hostname --ip-address) -name \$(date +%s) \\ -jks mykeystore.jks \\ -login_conf ${stageConfig.customData.ldapConfigPathStandalone} -ldap_login \\ diff --git a/scripts/jenkins/groovy/kerberosCommands.groovy b/scripts/jenkins/groovy/kerberosCommands.groovy index 02059a67e59f..8c6f8dadd6a3 100644 --- a/scripts/jenkins/groovy/kerberosCommands.groovy +++ b/scripts/jenkins/groovy/kerberosCommands.groovy @@ -140,7 +140,7 @@ private GString getCommandStandaloneKeytab(final stageConfig, final jar = 'build private GString getCommandStandalone(final stageConfig, final jar = 'build/h2o.jar', final authArgs = '', final hiveArgs = '') { def defaultPort = 54321 return """ - java -cp ${jar}:\$(cat /opt/hive-jdbc-cp):${stageConfig.customData.extraClasspath} water.H2OApp \\ + java -cp ${jar}:\$(cat /opt/hive-jdbc-cp):\$HADOOP_HOME/hadoop-common.jar:${stageConfig.customData.extraClasspath} water.H2OApp \\ -port ${defaultPort} -ip \$(hostname --ip-address) -name \$(date +%s) \\ -jks mykeystore.jks \\ -spnego_login -user_name ${stageConfig.customData.kerberosUserName} \\ From b341fbc55bcf508662eee33c9721d7fb56b45c37 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Tue, 15 Aug 2023 17:46:42 +0200 Subject: [PATCH 19/27] exclude org.apache.hadoop.net.DNSDomainNameResolver from relocation --- h2o-assemblies/main/build.gradle | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index b3d2df5f58cd..a04e1f4a85b1 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -64,7 +64,9 @@ shadowJar { exclude 'cockpitlite.properties' exclude 'devpay_products.properties' - relocate 'org.apache.hadoop', 'ai.h2o.org.apache.hadoop' + relocate ('org.apache.hadoop', 'ai.h2o.org.apache.hadoop') { + exclude 'org.apache.hadoop.net.DNSDomainNameResolver' + } manifest { attributes 'Main-Class': 'water.H2OApp' From db7232078047d344d15d35c7db8fc6be3b7d4cfd Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Tue, 15 Aug 2023 19:20:55 +0200 Subject: [PATCH 20/27] exclude org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback --- h2o-assemblies/main/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index a04e1f4a85b1..6b8f04e62e23 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -66,6 +66,7 @@ shadowJar { relocate ('org.apache.hadoop', 'ai.h2o.org.apache.hadoop') { exclude 'org.apache.hadoop.net.DNSDomainNameResolver' + exclude 'org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback' } manifest { From c8d8cf11c398e6cb3526722a02b48ef6189bcfe5 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Wed, 16 Aug 2023 10:43:09 +0200 Subject: [PATCH 21/27] include hive shims common --- h2o-assemblies/main/build.gradle | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index 6b8f04e62e23..ee16ba00109d 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -35,6 +35,7 @@ dependencies { exclude group: "org.apache.zookeeper" exclude group: "org.eclipse.jetty" } + implementation 'org.apache.hive.shims:hive-shims-common:2.3.9' constraints { api('com.fasterxml.jackson.core:jackson-databind:2.13.4.2') { @@ -63,11 +64,6 @@ shadowJar { exclude 'test.properties' exclude 'cockpitlite.properties' exclude 'devpay_products.properties' - - relocate ('org.apache.hadoop', 'ai.h2o.org.apache.hadoop') { - exclude 'org.apache.hadoop.net.DNSDomainNameResolver' - exclude 'org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback' - } manifest { attributes 'Main-Class': 'water.H2OApp' From 3d38927c1df1797dba7f33c15982988764b704b8 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Wed, 16 Aug 2023 10:51:36 +0200 Subject: [PATCH 22/27] remove hadoop common --- scripts/jenkins/groovy/hadoopCommands.groovy | 2 +- scripts/jenkins/groovy/kerberosCommands.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/jenkins/groovy/hadoopCommands.groovy b/scripts/jenkins/groovy/hadoopCommands.groovy index afd90b6332ee..34e440d37fe8 100644 --- a/scripts/jenkins/groovy/hadoopCommands.groovy +++ b/scripts/jenkins/groovy/hadoopCommands.groovy @@ -58,7 +58,7 @@ private GString getCommandHadoop(final stageConfig) { private GString getCommandStandalone(final stageConfig) { def defaultPort = 54321 return """ - java -cp build/h2o.jar:\$(cat /opt/hive-jdbc-cp):\$HADOOP_HOME/hadoop-common.jar water.H2OApp \\ + java -cp build/h2o.jar:\$(cat /opt/hive-jdbc-cp) water.H2OApp \\ -port ${defaultPort} -ip \$(hostname --ip-address) -name \$(date +%s) \\ -jks mykeystore.jks \\ -login_conf ${stageConfig.customData.ldapConfigPathStandalone} -ldap_login \\ diff --git a/scripts/jenkins/groovy/kerberosCommands.groovy b/scripts/jenkins/groovy/kerberosCommands.groovy index 8c6f8dadd6a3..02059a67e59f 100644 --- a/scripts/jenkins/groovy/kerberosCommands.groovy +++ b/scripts/jenkins/groovy/kerberosCommands.groovy @@ -140,7 +140,7 @@ private GString getCommandStandaloneKeytab(final stageConfig, final jar = 'build private GString getCommandStandalone(final stageConfig, final jar = 'build/h2o.jar', final authArgs = '', final hiveArgs = '') { def defaultPort = 54321 return """ - java -cp ${jar}:\$(cat /opt/hive-jdbc-cp):\$HADOOP_HOME/hadoop-common.jar:${stageConfig.customData.extraClasspath} water.H2OApp \\ + java -cp ${jar}:\$(cat /opt/hive-jdbc-cp):${stageConfig.customData.extraClasspath} water.H2OApp \\ -port ${defaultPort} -ip \$(hostname --ip-address) -name \$(date +%s) \\ -jks mykeystore.jks \\ -spnego_login -user_name ${stageConfig.customData.kerberosUserName} \\ From f55fd26cb568771e50a44d203d245132f941f981 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Wed, 16 Aug 2023 13:39:11 +0200 Subject: [PATCH 23/27] remove s3n filesystem from hadoop tests --- scripts/jenkins/groovy/defineTestStages.groovy | 6 +++++- scripts/jenkins/groovy/hadoopCommands.groovy | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/jenkins/groovy/defineTestStages.groovy b/scripts/jenkins/groovy/defineTestStages.groovy index e34c0057fe38..5792dab4123c 100644 --- a/scripts/jenkins/groovy/defineTestStages.groovy +++ b/scripts/jenkins/groovy/defineTestStages.groovy @@ -589,7 +589,8 @@ def call(final pipelineContext) { version: distribution.version, commandFactory: 'h2o-3/scripts/jenkins/groovy/hadoopCommands.groovy', ldapConfigPath: ldapConfigPath, - ldapConfigPathStandalone: 'scripts/jenkins/config/ldap-jetty-9.txt' + ldapConfigPathStandalone: 'scripts/jenkins/config/ldap-jetty-9.txt', + bundledS3FileSystems: 's3a,s3n' ], pythonVersion: '3.6', customDockerArgs: [ '--privileged' ], @@ -599,6 +600,7 @@ def call(final pipelineContext) { def standaloneStage = evaluate(stageTemplate.inspect()) standaloneStage.stageName = "${distribution.name.toUpperCase()} ${distribution.version} - STANDALONE" standaloneStage.customData.mode = 'STANDALONE' + standaloneStage.customData.bundledS3FileSystems = 's3a' def onHadoopStage = evaluate(stageTemplate.inspect()) onHadoopStage.stageName = "${distribution.name.toUpperCase()} ${distribution.version} - HADOOP" @@ -672,10 +674,12 @@ def call(final pipelineContext) { def standaloneStage = evaluate(stageTemplate.inspect()) standaloneStage.stageName = "${distribution.name.toUpperCase()} ${distribution.version} - STANDALONE" standaloneStage.customData.mode = 'STANDALONE' + standaloneStage.customData.bundledS3FileSystems = 's3a' def standaloneKeytabStage = evaluate(stageTemplate.inspect()) standaloneKeytabStage.stageName = "${distribution.name.toUpperCase()} ${distribution.version} - STANDALONE KEYTAB" standaloneKeytabStage.customData.mode = 'STANDALONE_KEYTAB' + standaloneKeytabStage.customData.bundledS3FileSystems = 's3a' def standaloneDriverKeytabStage = evaluate(stageTemplate.inspect()) standaloneDriverKeytabStage.stageName = "${distribution.name.toUpperCase()} ${distribution.version} - DRIVER KEYTAB" diff --git a/scripts/jenkins/groovy/hadoopCommands.groovy b/scripts/jenkins/groovy/hadoopCommands.groovy index 34e440d37fe8..44e175491745 100644 --- a/scripts/jenkins/groovy/hadoopCommands.groovy +++ b/scripts/jenkins/groovy/hadoopCommands.groovy @@ -81,6 +81,7 @@ private GString getCommandStandalone(final stageConfig) { fi export CLOUD_IP=\$(hostname --ip-address) export CLOUD_PORT=${defaultPort} + export HADOOP_S3_FILESYSTEMS=${stageConfig.customData.bundledS3FileSystems} """ } From 8e0a377eb60ae73ea2ffdbe0fe16d629d7eef4a9 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Wed, 16 Aug 2023 16:40:32 +0200 Subject: [PATCH 24/27] Upgrade hive shimps common --- h2o-assemblies/main/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index ee16ba00109d..cb946dee6ae9 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -35,7 +35,7 @@ dependencies { exclude group: "org.apache.zookeeper" exclude group: "org.eclipse.jetty" } - implementation 'org.apache.hive.shims:hive-shims-common:2.3.9' + implementation 'org.apache.hive.shims:hive-shims-common:3.1.3' constraints { api('com.fasterxml.jackson.core:jackson-databind:2.13.4.2') { From 899fa99881f49b5b01ffd2a71259580578ffa63f Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Wed, 16 Aug 2023 19:35:37 +0200 Subject: [PATCH 25/27] Exclude most of the Shim classes --- h2o-assemblies/main/build.gradle | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index cb946dee6ae9..dc2741478c44 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -35,7 +35,7 @@ dependencies { exclude group: "org.apache.zookeeper" exclude group: "org.eclipse.jetty" } - implementation 'org.apache.hive.shims:hive-shims-common:3.1.3' + implementation 'org.apache.hive.shims:hive-shims-common:2.3.9' constraints { api('com.fasterxml.jackson.core:jackson-databind:2.13.4.2') { @@ -64,6 +64,13 @@ shadowJar { exclude 'test.properties' exclude 'cockpitlite.properties' exclude 'devpay_products.properties' + exclude 'org/apache/hadoop/hive/thrift/**/*.*' + exclude 'org/apache/hadoop/hive/io/**/*.*' + exclude 'org/apache/hadoop/hive/upgrade/**/*.*' + exclude 'org/apache/hadoop/hive/shims/Utils.*' + exclude 'org/apache/hadoop/hive/shims/CombineHiveKey.*' + exclude 'org/apache/hadoop/hive/shims/*Shims*.*' + exclude 'org/apache/hadoop/hive/shims/HiveHarFileSystem.*' manifest { attributes 'Main-Class': 'water.H2OApp' From 901caf0a21787c9ff664748e090842c76318b8be Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Thu, 17 Aug 2023 11:52:15 +0200 Subject: [PATCH 26/27] Add some comments to build.gradle about extensions --- h2o-assemblies/main/build.gradle | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/h2o-assemblies/main/build.gradle b/h2o-assemblies/main/build.gradle index dc2741478c44..8351fde0fb65 100644 --- a/h2o-assemblies/main/build.gradle +++ b/h2o-assemblies/main/build.gradle @@ -35,6 +35,8 @@ dependencies { exclude group: "org.apache.zookeeper" exclude group: "org.eclipse.jetty" } + + // Need to a newer org.apache.hadoop.hive.shims.ShimLoader to make older hive JDBC drivers work on Hadoop 3. implementation 'org.apache.hive.shims:hive-shims-common:2.3.9' constraints { @@ -64,6 +66,9 @@ shadowJar { exclude 'test.properties' exclude 'cockpitlite.properties' exclude 'devpay_products.properties' + + // Need to a newer org.apache.hadoop.hive.shims.ShimLoader to make older hive JDBC drivers work on Hadoop 3. + // Excluding other classes of org.apache.hive.shims:hive-shims-common. exclude 'org/apache/hadoop/hive/thrift/**/*.*' exclude 'org/apache/hadoop/hive/io/**/*.*' exclude 'org/apache/hadoop/hive/upgrade/**/*.*' From 46cbd20f946534e1d970fd9a3249bb92546868c7 Mon Sep 17 00:00:00 2001 From: Marek Novotny Date: Thu, 17 Aug 2023 15:09:25 +0200 Subject: [PATCH 27/27] Fix S3 tests --- h2o-persist-s3/build.gradle | 3 +++ 1 file changed, 3 insertions(+) diff --git a/h2o-persist-s3/build.gradle b/h2o-persist-s3/build.gradle index 9d3b4435bcac..cdec1d6441ce 100644 --- a/h2o-persist-s3/build.gradle +++ b/h2o-persist-s3/build.gradle @@ -21,6 +21,9 @@ dependencies { testRuntimeOnly project(":h2o-parquet-parser") testImplementation project(":h2o-persist-hdfs") testImplementation "org.apache.hadoop:hadoop-common:$defaultHadoopVersion" + testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:$defaultHadoopVersion") { + transitive = false + } } apply from: "${rootDir}/gradle/dataCheck.gradle"