From 700c45f46f9a84b2e7683df2138e130abd209864 Mon Sep 17 00:00:00 2001 From: Chris Nokleberg Date: Wed, 25 Oct 2023 17:46:08 +0000 Subject: [PATCH] Use an explicit "UTF-8" character set argument when creating Strings from bytes. The platform default character set is guaranteed to be UTF-8. PiperOrigin-RevId: 576577338 --- java/com/google/re2j/Matcher.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/java/com/google/re2j/Matcher.java b/java/com/google/re2j/Matcher.java index c2d1a7d2..5c32b82c 100644 --- a/java/com/google/re2j/Matcher.java +++ b/java/com/google/re2j/Matcher.java @@ -7,6 +7,7 @@ package com.google.re2j; import com.google.re2j.MatcherInput.Encoding; +import java.io.UnsupportedEncodingException; import java.util.Map; /** @@ -362,8 +363,12 @@ private boolean genMatch(int startByte, int anchor) { /** Helper: return substring for [start, end). */ String substring(int start, int end) { // UTF_8 is matched in binary mode. So slice the bytes. - if (matcherInput.getEncoding() == Encoding.UTF_8) { - return new String(matcherInput.asBytes(), start, end - start); + try { + if (matcherInput.getEncoding() == Encoding.UTF_8) { + return new String(matcherInput.asBytes(), start, end - start, "UTF-8"); + } + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // Not possible. } // This is fast for both StringBuilder and String.