diff --git a/src/main/java/net/openhft/chronicle/wire/JSONWire.java b/src/main/java/net/openhft/chronicle/wire/JSONWire.java index 758d105674..c9a9adad1e 100644 --- a/src/main/java/net/openhft/chronicle/wire/JSONWire.java +++ b/src/main/java/net/openhft/chronicle/wire/JSONWire.java @@ -26,6 +26,7 @@ import net.openhft.chronicle.core.threads.ThreadLocalHelper; import net.openhft.chronicle.core.util.ClassNotFoundRuntimeException; import net.openhft.chronicle.core.util.UnresolvedType; +import net.openhft.chronicle.wire.internal.UnicodeToStringHelper; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; @@ -764,6 +765,11 @@ protected StringBuilder readField(@NotNull StringBuilder sb) { return super.readField(sb); } + @Override + public String toString() { + return UnicodeToStringHelper.toUnicodeString(bytes); + } + @Override @NotNull protected StopCharsTester getStrictEscapingEndOfText() { diff --git a/src/main/java/net/openhft/chronicle/wire/WireType.java b/src/main/java/net/openhft/chronicle/wire/WireType.java index 3c8910febc..ff2dfcfed4 100644 --- a/src/main/java/net/openhft/chronicle/wire/WireType.java +++ b/src/main/java/net/openhft/chronicle/wire/WireType.java @@ -27,6 +27,7 @@ import net.openhft.chronicle.core.io.ValidatableUtil; import net.openhft.chronicle.core.scoped.ScopedResource; import net.openhft.chronicle.core.values.*; +import net.openhft.chronicle.wire.internal.UnicodeToStringHelper; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; @@ -34,7 +35,6 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.Serializable; -import java.lang.reflect.Constructor; import java.net.URL; import java.util.Map; import java.util.Spliterator; @@ -200,6 +200,12 @@ public Wire apply(@NotNull Bytes bytes) { public boolean isText() { return true; } + + @Override + public String asString(Object marshallable) { + return asUnicodeString(marshallable); + } + }, JSON_ONLY { @NotNull @@ -212,6 +218,11 @@ public Wire apply(@NotNull Bytes bytes) { public boolean isText() { return true; } + + @Override + public String asString(Object marshallable) { + return asUnicodeString(marshallable); + } }, YAML { @SuppressWarnings("deprecation") @@ -385,6 +396,17 @@ public String asString(Object marshallable) { } } + protected @NotNull String asUnicodeString(Object marshallable) { + ValidatableUtil.startValidateDisabled(); + try (ScopedResource> stlBytes = Wires.acquireBytesScoped()) { + final Bytes bytes = stlBytes.get(); + asBytes(marshallable, bytes); + return UnicodeToStringHelper.toUnicodeString(bytes); + } finally { + ValidatableUtil.endValidateDisabled(); + } + } + /** * Converts the given marshallable object to a {@link Bytes} buffer. * This method uses various strategies to serialize different types of @@ -395,7 +417,7 @@ public String asString(Object marshallable) { * @throws InvalidMarshallableException If the object cannot be serialized properly. */ @NotNull - private void asBytes(Object marshallable, Bytes bytes) throws InvalidMarshallableException { + protected void asBytes(Object marshallable, Bytes bytes) throws InvalidMarshallableException { Wire wire = apply(bytes); wire.usePadding(wire.isBinary() && AbstractWire.DEFAULT_USE_PADDING); @NotNull final ValueOut valueOut = wire.getValueOut(); diff --git a/src/main/java/net/openhft/chronicle/wire/internal/UnicodeToStringHelper.java b/src/main/java/net/openhft/chronicle/wire/internal/UnicodeToStringHelper.java new file mode 100644 index 0000000000..2abe86f79a --- /dev/null +++ b/src/main/java/net/openhft/chronicle/wire/internal/UnicodeToStringHelper.java @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016-2022 chronicle.software + * + * https://chronicle.software + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package net.openhft.chronicle.wire.internal; + +import net.openhft.chronicle.bytes.Bytes; +import net.openhft.chronicle.bytes.BytesStore; +import net.openhft.chronicle.bytes.VanillaBytes; +import net.openhft.chronicle.bytes.internal.NativeBytesStore; +import net.openhft.chronicle.core.Jvm; +import net.openhft.chronicle.core.Memory; +import net.openhft.chronicle.core.io.ClosedIllegalStateException; + +import java.nio.BufferUnderflowException; +import java.nio.charset.StandardCharsets; + +/** + * Helper class to convert a stream of bytes to a UTF-8 encoded string. + * Heavily based on the approach used in {@link VanillaBytes#toString()} but with UTF-8 support. + * This is used selectively rather than everywhere because some parts of whire use 8-bit encoding of strings based on + * ISO-8859-1. + */ +public final class UnicodeToStringHelper { + + private UnicodeToStringHelper() { + } + + /** + * Represent the bytes store as a UTF-8 string. + * + * @return UTF-8 string representation of the bytes store. + */ + public static String toUnicodeString(BytesStore bytesStore) { + try { + try { + return bytesStore instanceof NativeBytesStore + ? toStringNativeBytes((NativeBytesStore) bytesStore) + : toStringBytesStore(bytesStore); + } catch (IllegalStateException e) { + throw Jvm.rethrow(e); + } + } catch (Exception e) { + return e.toString(); + } + } + + private static String toStringNativeBytes(NativeBytesStore bytesStore) { + final Memory memory = bytesStore.memory; + int length = (int) + Math.min(Bytes.MAX_HEAP_CAPACITY, bytesStore.realReadRemaining()); + byte[] bytes = new byte[length]; + final long address = bytesStore.address + bytesStore.translate(bytesStore.readPosition()); + for (int i = 0; i < length && i < bytesStore.realCapacity(); i++) { + bytes[i] = memory.readByte(address + i); + } + return new String(bytes, StandardCharsets.UTF_8); + } + + private static String toStringBytesStore(BytesStore bytesStore) + throws ClosedIllegalStateException { + int length = (int) Math.min(Bytes.MAX_HEAP_CAPACITY, bytesStore.readRemaining()); + byte[] bytes = new byte[length]; + try { + for (int i = 0; i < length; i++) { + bytes[i] = (bytesStore.readByte(bytesStore.readPosition() + i)); + } + } catch (BufferUnderflowException e) { + // ignored + } + return new String(bytes, StandardCharsets.UTF_8); + } + +} diff --git a/src/test/java/net/openhft/chronicle/wire/JsonWireUnicodeAcceptanceTest.java b/src/test/java/net/openhft/chronicle/wire/JsonWireUnicodeAcceptanceTest.java new file mode 100644 index 0000000000..af6dd3adad --- /dev/null +++ b/src/test/java/net/openhft/chronicle/wire/JsonWireUnicodeAcceptanceTest.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016-2022 chronicle.software + * + * https://chronicle.software + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package net.openhft.chronicle.wire; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +/** + * Verify that unicode characters can be properly represented in JSON output. + */ +public class JsonWireUnicodeAcceptanceTest { + + @ParameterizedTest + @ValueSource(strings = {"£", "€", "¥", "₹", "ó", "óaóó", "☞☞☞☞☞", "ÊÆÄ"}) + public void json_verifyAsString(String input) { + Map map = new HashMap<>(); + map.put("x", input); + assertEquals("{\"x\":\"" + input + "\"}", WireType.JSON.asString(map)); + } + + @ParameterizedTest + @ValueSource(strings = {"£", "€", "¥", "₹", "ó", "óaóó"}) + public void json_verifyObjectToString(String input) { + Map map = new HashMap<>(); + map.put("x", input); + WireOut object = new JSONWire().getValueOut().object(map); + assertEquals("{\"x\":\"" + input + "\"}", object.toString()); + } + + @ParameterizedTest + @ValueSource(strings = {"£", "€", "¥", "₹", "ó", "óaóó"}) + public void json_verifyAsText(String input) { + Map map = new HashMap<>(); + map.put("x", input); + JSONWire jsonWire = new JSONWire(); + jsonWire.getValueOut().object(map); + assertEquals("{\"x\":\"" + input + "\"}", JSONWire.asText(jsonWire)); + } + +}