From 02d6e4f92ccd4cb61e70edc6a81cfa48be5902df Mon Sep 17 00:00:00 2001 From: Carter Kozak Date: Wed, 25 Sep 2024 15:40:10 -0400 Subject: [PATCH 1/6] Hybrid (G1) GC uses the defeault 200ms MaxGCPauseMillis on JDK-21+ It seems that the heuristics have gone a bit sideways in JDK-21 causing degenerate full gc pauses unnecessarily when we configure `-XX:MaxGCPauseMillis=500`, however setting the default 200ms value resolves this behavior. Note that we deployed the increase from 200ms to 500ms at a time when container cpu shares informed the JDKs processor count, thus gc threads/etc, which is no longer the case. As such, it should be safe (and generally more stable) to use the hardened default values from the jdk. --- .../gradle/dist/service/gc/GcProfile.java | 24 +++++-- .../JavaServiceDistributionPluginTests.groovy | 69 +++++++++++++++++++ 2 files changed, 89 insertions(+), 4 deletions(-) diff --git a/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java b/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java index f4dd24f93..3ba3715cd 100644 --- a/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java +++ b/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java @@ -22,6 +22,7 @@ import java.io.Serializable; import java.util.Collections; import java.util.List; +import java.util.OptionalInt; import org.gradle.api.JavaVersion; public interface GcProfile extends Serializable { @@ -108,15 +109,30 @@ class Hybrid implements GcProfile { // in high-garbage or low-gc-thread scenarios. In the happy case, increasing the pause target increases // both throughput and latency. In degenerate cases, a low target can cause the garbage collector to // thrash and reduce throughput while increasing latency. - private int maxGCPauseMillis = 500; + private static final int LEGACY_MAX_GC_PAUSE_MILLIS = 500; + private static final int JAVA_21_PLUS_MAX_GC_PAUSE_MILLIS = 200; + + private OptionalInt maxGCPauseMillis = OptionalInt.empty(); @Override - public final List gcJvmOpts(JavaVersion _javaVersion) { - return ImmutableList.of("-XX:+UseG1GC", "-XX:+UseNUMA", "-XX:MaxGCPauseMillis=" + maxGCPauseMillis); + public final List gcJvmOpts(JavaVersion javaVersion) { + return ImmutableList.of( + "-XX:+UseG1GC", "-XX:+UseNUMA", "-XX:MaxGCPauseMillis=" + getMaxGCPauseMillis(javaVersion)); + } + + private int getMaxGCPauseMillis(JavaVersion javaVersion) { + if (maxGCPauseMillis.isPresent()) { + return maxGCPauseMillis.getAsInt(); + } + if (javaVersion.compareTo(JavaVersion.toVersion("21")) >= 0) { + return JAVA_21_PLUS_MAX_GC_PAUSE_MILLIS; + } else { + return LEGACY_MAX_GC_PAUSE_MILLIS; + } } public final void maxGCPauseMillis(int value) { - this.maxGCPauseMillis = value; + this.maxGCPauseMillis = OptionalInt.of(value); } } diff --git a/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy b/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy index 0af2d8ca3..80d2e92da 100644 --- a/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy +++ b/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy @@ -1300,6 +1300,75 @@ class JavaServiceDistributionPluginTests extends GradleIntegrationSpec { actualStaticConfig.jvmOpts.containsAll(['-XX:+UseG1GC', '-XX:+UseNUMA', "-XX:MaxGCPauseMillis=500"]) } + def 'adds maxGCPauseMillis gc profile jvm settings on java 21'() { + given: + buildFile << ''' + plugins { + id 'java' + id 'com.palantir.sls-java-service-distribution' + } + + repositories { + mavenCentral() + } + + version '0.0.1' + + distribution { + serviceName 'service-name' + mainClass 'test.Test' + javaVersion 21 + gc 'hybrid', { + maxGCPauseMillis 1234 + } + } + '''.stripIndent() + + createUntarTask(buildFile) + + when: + runTasks(':untar') + + then: + def actualStaticConfig = OBJECT_MAPPER.readValue( + new File(projectDir, 'dist/service-name-0.0.1/service/bin/launcher-static.yml'), LaunchConfig.LaunchConfigInfo) + actualStaticConfig.jvmOpts.containsAll(['-XX:+UseG1GC', '-XX:+UseNUMA', '-XX:MaxGCPauseMillis=1234']) + } + + def 'gc profile null configuration closure on java 21'() { + // We set a different different default MaxGCPauseMillis in JDK-21 than older JDKs. + given: + buildFile << ''' + plugins { + id 'java' + id 'com.palantir.sls-java-service-distribution' + } + + repositories { + mavenCentral() + } + + version '0.0.1' + + distribution { + serviceName 'service-name' + mainClass 'test.Test' + javaVersion 21 + gc 'hybrid' + } + '''.stripIndent() + + createUntarTask(buildFile) + + when: + runTasks(':untar') + + then: + def actualStaticConfig = OBJECT_MAPPER.readValue( + new File(projectDir, 'dist/service-name-0.0.1/service/bin/launcher-static.yml'), LaunchConfig.LaunchConfigInfo) + actualStaticConfig.jvmOpts.containsAll(['-XX:+UseG1GC', '-XX:+UseNUMA', "-XX:MaxGCPauseMillis=200"]) + } + def 'applies java agents'() { createUntarBuildFile(buildFile) buildFile << """ From 249a09cd3f4da744cbc31bb67f1486352bc318c9 Mon Sep 17 00:00:00 2001 From: svc-changelog Date: Wed, 25 Sep 2024 20:18:44 +0000 Subject: [PATCH 2/6] Add generated changelog entries --- changelog/@unreleased/pr-1706.v2.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelog/@unreleased/pr-1706.v2.yml diff --git a/changelog/@unreleased/pr-1706.v2.yml b/changelog/@unreleased/pr-1706.v2.yml new file mode 100644 index 000000000..821617c0a --- /dev/null +++ b/changelog/@unreleased/pr-1706.v2.yml @@ -0,0 +1,5 @@ +type: improvement +improvement: + description: Hybrid (G1) GC uses the defeault 200ms MaxGCPauseMillis on JDK-21+ + links: + - https://github.com/palantir/sls-packaging/pull/1706 From 3e1280d8c24f5018009debeeac9c0e17a18abc5a Mon Sep 17 00:00:00 2001 From: Carter Kozak Date: Wed, 25 Sep 2024 16:29:00 -0400 Subject: [PATCH 3/6] only set MaxGCPauseMillis on jdk-21 when an override is present --- .../gradle/dist/service/gc/GcProfile.java | 20 ++++++++++++------- .../JavaServiceDistributionPluginTests.groovy | 3 ++- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java b/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java index 3ba3715cd..b98a7eb2a 100644 --- a/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java +++ b/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java @@ -103,6 +103,7 @@ public final void newRatio(int newerRatio) { // Match the MaxGCPauseMillis case @SuppressWarnings("AbbreviationAsWordInName") class Hybrid implements GcProfile { + // Applies to jdks older than 21: // We use 500ms by default, up from the JDK default value of 200ms. Using G1, eden space is dynamically // chosen based on the amount of memory which can be collected within the pause time target. // Higher pause target values allow for more eden space, resulting in more stable old generation @@ -110,24 +111,29 @@ class Hybrid implements GcProfile { // both throughput and latency. In degenerate cases, a low target can cause the garbage collector to // thrash and reduce throughput while increasing latency. private static final int LEGACY_MAX_GC_PAUSE_MILLIS = 500; - private static final int JAVA_21_PLUS_MAX_GC_PAUSE_MILLIS = 200; private OptionalInt maxGCPauseMillis = OptionalInt.empty(); @Override public final List gcJvmOpts(JavaVersion javaVersion) { - return ImmutableList.of( - "-XX:+UseG1GC", "-XX:+UseNUMA", "-XX:MaxGCPauseMillis=" + getMaxGCPauseMillis(javaVersion)); + ImmutableList.Builder builder = ImmutableList.builderWithExpectedSize(3) + .add("-XX:+UseG1GC") + .add("-XX:+UseNUMA"); + OptionalInt maxGCPauseMillisValue = getMaxGCPauseMillis(javaVersion); + if (maxGCPauseMillisValue.isPresent()) { + builder.add("-XX:MaxGCPauseMillis=" + maxGCPauseMillisValue.getAsInt()); + } + return builder.build(); } - private int getMaxGCPauseMillis(JavaVersion javaVersion) { + private OptionalInt getMaxGCPauseMillis(JavaVersion javaVersion) { if (maxGCPauseMillis.isPresent()) { - return maxGCPauseMillis.getAsInt(); + return maxGCPauseMillis; } if (javaVersion.compareTo(JavaVersion.toVersion("21")) >= 0) { - return JAVA_21_PLUS_MAX_GC_PAUSE_MILLIS; + return OptionalInt.empty(); } else { - return LEGACY_MAX_GC_PAUSE_MILLIS; + return OptionalInt.of(LEGACY_MAX_GC_PAUSE_MILLIS); } } diff --git a/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy b/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy index 80d2e92da..c72ed1808 100644 --- a/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy +++ b/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy @@ -1366,7 +1366,8 @@ class JavaServiceDistributionPluginTests extends GradleIntegrationSpec { then: def actualStaticConfig = OBJECT_MAPPER.readValue( new File(projectDir, 'dist/service-name-0.0.1/service/bin/launcher-static.yml'), LaunchConfig.LaunchConfigInfo) - actualStaticConfig.jvmOpts.containsAll(['-XX:+UseG1GC', '-XX:+UseNUMA', "-XX:MaxGCPauseMillis=200"]) + actualStaticConfig.jvmOpts.containsAll(['-XX:+UseG1GC', '-XX:+UseNUMA']) + actualStaticConfig.jvmOpts().stream().noneMatch { it.contains("MaxGCPauseMillis") } } def 'applies java agents'() { From 291f0e101ee1f24d8f2035d2c728663c6d0f56d1 Mon Sep 17 00:00:00 2001 From: Carter Kozak Date: Wed, 25 Sep 2024 16:53:00 -0400 Subject: [PATCH 4/6] fix stale comment --- .../dist/service/JavaServiceDistributionPluginTests.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy b/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy index c72ed1808..ae4f497f7 100644 --- a/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy +++ b/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy @@ -1336,7 +1336,7 @@ class JavaServiceDistributionPluginTests extends GradleIntegrationSpec { } def 'gc profile null configuration closure on java 21'() { - // We set a different different default MaxGCPauseMillis in JDK-21 than older JDKs. + // We do not declare MaxGCPauseMillis in JDK-21 unless a value is explicitly configured. given: buildFile << ''' plugins { From 6a3a9213b03347c0beda41bafd7a18e85f8c2393 Mon Sep 17 00:00:00 2001 From: Carter Kozak Date: Tue, 1 Oct 2024 17:03:04 -0400 Subject: [PATCH 5/6] rewrite legacy pause millis comment --- .../palantir/gradle/dist/service/gc/GcProfile.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java b/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java index b98a7eb2a..b958d9625 100644 --- a/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java +++ b/gradle-sls-packaging/src/main/java/com/palantir/gradle/dist/service/gc/GcProfile.java @@ -103,13 +103,13 @@ public final void newRatio(int newerRatio) { // Match the MaxGCPauseMillis case @SuppressWarnings("AbbreviationAsWordInName") class Hybrid implements GcProfile { - // Applies to jdks older than 21: - // We use 500ms by default, up from the JDK default value of 200ms. Using G1, eden space is dynamically - // chosen based on the amount of memory which can be collected within the pause time target. - // Higher pause target values allow for more eden space, resulting in more stable old generation - // in high-garbage or low-gc-thread scenarios. In the happy case, increasing the pause target increases - // both throughput and latency. In degenerate cases, a low target can cause the garbage collector to - // thrash and reduce throughput while increasing latency. + // For Java versions older than 21 we use 500ms by default, up from the JDK default value of 200ms for + // historical reasons. Java 21 builds seem to interact poorly with this value, causing frequent full + // GC cycles. The 500ms value was chosen at a time when GC thread counts were based on cpu shares + // rather than the underlying physical cores (prior to https://bugs.openjdk.org/browse/JDK-8281181), + // so services tended to thrash in GC to meet the 200ms threshold, promoting data too quickly to old gen. + // This is no longer the case, however we are leaving the Java 17 values where they've been, and making + // changes targeting Java 21 and beyond where we've observed significant improvements. private static final int LEGACY_MAX_GC_PAUSE_MILLIS = 500; private OptionalInt maxGCPauseMillis = OptionalInt.empty(); From 0fd667b91f5872001eedb12515abfe6fc1359bb4 Mon Sep 17 00:00:00 2001 From: Carter Kozak Date: Thu, 17 Oct 2024 15:32:15 -0400 Subject: [PATCH 6/6] explicitly use java 17 in test for old behavior --- .../dist/service/JavaServiceDistributionPluginTests.groovy | 1 + 1 file changed, 1 insertion(+) diff --git a/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy b/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy index ae4f497f7..a933400f5 100644 --- a/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy +++ b/gradle-sls-packaging/src/test/groovy/com/palantir/gradle/dist/service/JavaServiceDistributionPluginTests.groovy @@ -1285,6 +1285,7 @@ class JavaServiceDistributionPluginTests extends GradleIntegrationSpec { distribution { serviceName 'service-name' mainClass 'test.Test' + javaVersion 17 gc 'hybrid' } '''.stripIndent()