From ab43db390c3af27cbf710bde0664f547612c3bdc Mon Sep 17 00:00:00 2001 From: Chris Nokleberg Date: Wed, 25 Oct 2023 17:46:08 +0000 Subject: [PATCH] Use an explicit "UTF-8" character set argument when creating Strings from bytes. The platform default character set is guaranteed to be UTF-8. PiperOrigin-RevId: 576577338 --- java/com/google/re2j/Matcher.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/java/com/google/re2j/Matcher.java b/java/com/google/re2j/Matcher.java index c2d1a7d2..874955c7 100644 --- a/java/com/google/re2j/Matcher.java +++ b/java/com/google/re2j/Matcher.java @@ -7,6 +7,7 @@ package com.google.re2j; import com.google.re2j.MatcherInput.Encoding; +import java.io.UnsupportedEncodingException; import java.util.Map; /** @@ -363,7 +364,11 @@ private boolean genMatch(int startByte, int anchor) { String substring(int start, int end) { // UTF_8 is matched in binary mode. So slice the bytes. if (matcherInput.getEncoding() == Encoding.UTF_8) { - return new String(matcherInput.asBytes(), start, end - start); + try { + return new String(matcherInput.asBytes(), start, end - start, "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // Not possible. + } } // This is fast for both StringBuilder and String.