From bd9df624931caedea712314042b8700417ac22dd Mon Sep 17 00:00:00 2001 From: Leigh McCulloch <351529+leighmcculloch@users.noreply.github.com> Date: Thu, 23 Nov 2023 01:26:51 -0800 Subject: [PATCH 01/11] Render asset codes as strings in JSON --- Makefile | 2 +- src/curr/generated.rs | 37 +---------- src/curr/str.rs | 128 ++++++++++++++++++++++++++++++++++-- tests/str.rs | 147 +++++++++++++++++++++++++++++++++++++++++- 4 files changed, 269 insertions(+), 45 deletions(-) diff --git a/Makefile b/Makefile index 4e00fe21..767c9f7d 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ CARGO_HACK_ARGS=--feature-powerset --exclude-features default --group-features b CARGO_DOC_ARGS?=--open XDRGEN_VERSION=cbff4b31 -XDRGEN_TYPES_CUSTOM_STR_IMPL=PublicKey,AccountId,MuxedAccount,MuxedAccountMed25519,SignerKey,SignerKeyEd25519SignedPayload,NodeId,ScAddress +XDRGEN_TYPES_CUSTOM_STR_IMPL=PublicKey,AccountId,MuxedAccount,MuxedAccountMed25519,SignerKey,SignerKeyEd25519SignedPayload,NodeId,ScAddress,AssetCode,AssetCode4,AssetCode12 all: build test diff --git a/src/curr/generated.rs b/src/curr/generated.rs index 10bd4dc0..79e18d06 100644 --- a/src/curr/generated.rs +++ b/src/curr/generated.rs @@ -10345,23 +10345,6 @@ impl core::fmt::Debug for AssetCode4 { Ok(()) } } -impl core::fmt::Display for AssetCode4 { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let v = &self.0; - for b in v { - write!(f, "{b:02x}")?; - } - Ok(()) - } -} - -#[cfg(feature = "alloc")] -impl core::str::FromStr for AssetCode4 { - type Err = Error; - fn from_str(s: &str) -> core::result::Result { - hex::decode(s).map_err(|_| Error::InvalidHex)?.try_into() - } -} impl From for [u8; 4] { #[must_use] fn from(x: AssetCode4) -> Self { @@ -10461,23 +10444,6 @@ impl core::fmt::Debug for AssetCode12 { Ok(()) } } -impl core::fmt::Display for AssetCode12 { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let v = &self.0; - for b in v { - write!(f, "{b:02x}")?; - } - Ok(()) - } -} - -#[cfg(feature = "alloc")] -impl core::str::FromStr for AssetCode12 { - type Err = Error; - fn from_str(s: &str) -> core::result::Result { - hex::decode(s).map_err(|_| Error::InvalidHex)?.try_into() - } -} impl From for [u8; 12] { #[must_use] fn from(x: AssetCode12) -> Self { @@ -10689,8 +10655,7 @@ impl WriteXdr for AssetType { #[cfg_attr(feature = "arbitrary", derive(Arbitrary))] #[cfg_attr( all(feature = "serde", feature = "alloc"), - derive(serde::Serialize, serde::Deserialize), - serde(rename_all = "snake_case") + derive(serde_with::SerializeDisplay, serde_with::DeserializeFromStr) )] #[allow(clippy::large_enum_variant)] pub enum AssetCode { diff --git a/src/curr/str.rs b/src/curr/str.rs index c5ad3e8b..0c95cd4a 100644 --- a/src/curr/str.rs +++ b/src/curr/str.rs @@ -1,10 +1,6 @@ //# Custom string representations of the following types, also used for JSON //# formatting. //# -//# The types that has impls in this file are given to the xdrgen -//# --rust-types-custom-str-impl cli option, so that xdrgen does not generate -//# FromStr and Display impls for them. -//# //# ## Strkey Types (SEP-23) //# - PublicKey //# - AccountId @@ -13,11 +9,16 @@ //# - SignerKey //# - SignerKeyEd25519SignedPayload //# - NodeId +//# +//# ## Asset Codes +//# - AssetCode +//# - AssetCode4 +//# - AssetCode12 #![cfg(feature = "alloc")] use super::{ - AccountId, Error, Hash, MuxedAccount, MuxedAccountMed25519, NodeId, PublicKey, ScAddress, - SignerKey, SignerKeyEd25519SignedPayload, Uint256, + AccountId, AssetCode, AssetCode12, AssetCode4, Error, Hash, MuxedAccount, MuxedAccountMed25519, + NodeId, PublicKey, ScAddress, SignerKey, SignerKeyEd25519SignedPayload, Uint256, }; impl From for Error { @@ -254,3 +255,118 @@ impl core::fmt::Display for ScAddress { Ok(()) } } + +impl core::str::FromStr for AssetCode4 { + type Err = Error; + fn from_str(s: &str) -> core::result::Result { + let b = s.as_bytes(); + let mut code = AssetCode4([0u8; 4]); + if b.len() <= code.0.len() { + code.0[..b.len()].copy_from_slice(&b); + Ok(code) + } else { + Err(Error::Invalid) + } + } +} + +impl core::fmt::Display for AssetCode4 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + if let Some(last_idx) = self.0.iter().rposition(|c| *c != 0) { + write_utf8_lossy_with_nuls(f, &self.0[..=last_idx]) + } else { + Ok(()) + } + } +} + +impl core::str::FromStr for AssetCode12 { + type Err = Error; + fn from_str(s: &str) -> core::result::Result { + let b = s.as_bytes(); + let mut code = AssetCode12([0u8; 12]); + if b.len() <= code.0.len() { + code.0[..b.len()].copy_from_slice(&b); + Ok(code) + } else { + Err(Error::Invalid) + } + } +} + +impl core::fmt::Display for AssetCode12 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + if let Some(last_idx) = self.0.iter().rposition(|c| *c != 0) { + write_utf8_lossy_with_nuls(f, &self.0[..=last_idx]) + } else { + Ok(()) + } + } +} + +impl core::str::FromStr for AssetCode { + type Err = Error; + fn from_str(s: &str) -> core::result::Result { + let b = s.as_bytes(); + if b.len() <= 4 { + Ok(AssetCode::CreditAlphanum4(AssetCode4::from_str(s)?)) + } else if b.len() <= 12 { + Ok(AssetCode::CreditAlphanum12(AssetCode12::from_str(s)?)) + } else { + Err(Error::Invalid) + } + } +} + +impl core::fmt::Display for AssetCode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + AssetCode::CreditAlphanum4(c) => c.fmt(f), + AssetCode::CreditAlphanum12(c) => c.fmt(f), + } + } +} + +/// Writes a byte slice as a utf8 string, replacing any bytes in invalid utf8 +/// sequences as the nul byte. +/// +/// A modified copy of the Rust stdlib docs examples here: +/// +/// +/// This particular implementation preserves the length of the string written +/// such that exactly one byte is written for every byte in an invalid sequence, +/// by writing a nul (0x00) byte for each. +/// +/// Normally it would be common to write a Unicode Replacement Character +/// (U+FFFD) for lossy coding but doing so would not preserve the length as a +/// single invalid byte would be replaced by two bytes. +pub fn write_utf8_lossy_with_nuls( + f: &mut impl core::fmt::Write, + mut input: &[u8], +) -> core::fmt::Result { + loop { + match core::str::from_utf8(input) { + Ok(valid) => { + write!(f, "{valid}")?; + break; + } + Err(error) => { + let (valid, after_valid) = input.split_at(error.valid_up_to()); + write!(f, "{}", core::str::from_utf8(valid).unwrap())?; + + if let Some(invalid_sequence_length) = error.error_len() { + for _ in 0..invalid_sequence_length { + write!(f, "\0")?; + } + input = &after_valid[invalid_sequence_length..]; + } else { + for _ in 0..after_valid.len() { + write!(f, "\0")?; + } + break; + } + } + } + } + Ok(()) +} diff --git a/tests/str.rs b/tests/str.rs index 57e129f8..2835abbe 100644 --- a/tests/str.rs +++ b/tests/str.rs @@ -4,8 +4,8 @@ use stellar_xdr::curr as stellar_xdr; use stellar_xdr::{ - AccountId, Error, Hash, MuxedAccount, MuxedAccountMed25519, NodeId, PublicKey, ScAddress, - SignerKey, SignerKeyEd25519SignedPayload, Uint256, + AccountId, AssetCode, AssetCode12, AssetCode4, Error, Hash, MuxedAccount, MuxedAccountMed25519, + NodeId, PublicKey, ScAddress, SignerKey, SignerKeyEd25519SignedPayload, Uint256, }; use std::str::FromStr; @@ -398,3 +398,146 @@ fn sc_address_from_str_with_invalid() { ); assert_eq!(v, Err(Error::Invalid)); } + +#[test] +fn asset_code_4_from_str() { + assert_eq!(AssetCode4::from_str(""), Ok(AssetCode4(*b"\0\0\0\0"))); + assert_eq!(AssetCode4::from_str("a"), Ok(AssetCode4(*b"a\0\0\0"))); + assert_eq!(AssetCode4::from_str("ab"), Ok(AssetCode4(*b"ab\0\0"))); + assert_eq!(AssetCode4::from_str("abc"), Ok(AssetCode4(*b"abc\0"))); + assert_eq!(AssetCode4::from_str("abcd"), Ok(AssetCode4(*b"abcd"))); + + assert_eq!(AssetCode4::from_str("abcde"), Err(Error::Invalid)); +} + +#[test] +fn asset_code_4_to_string() { + assert_eq!(AssetCode4(*b"\0\0\0\0").to_string(), ""); + assert_eq!(AssetCode4(*b"a\0\0\0").to_string(), "a"); + assert_eq!(AssetCode4(*b"ab\0\0").to_string(), "ab"); + assert_eq!(AssetCode4(*b"abc\0").to_string(), "abc"); + assert_eq!(AssetCode4(*b"abcd").to_string(), "abcd"); + + // Preserve as much of the code as possible, even if it contains nul bytes. + assert_eq!(AssetCode4(*b"a\0cd").to_string(), "a\0cd"); + + // Replace bytes that are not valid utf8 with the replacement character � and preserve length. + assert_eq!(AssetCode4(*b"a\xc3\x28d").to_string(), "a\0(d"); + assert_eq!(AssetCode4(*b"a\xc3\xc3\x28").to_string(), "a\0\0("); + assert_eq!(AssetCode4(*b"a\xc3\xc3\xc3").to_string(), "a\0\0\0"); +} + +#[test] +#[rustfmt::skip] +fn asset_code_12_from_str() { + assert_eq!(AssetCode12::from_str(""), Ok(AssetCode12(*b"\0\0\0\0\0\0\0\0\0\0\0\0"))); + assert_eq!(AssetCode12::from_str("a"), Ok(AssetCode12(*b"a\0\0\0\0\0\0\0\0\0\0\0"))); + assert_eq!(AssetCode12::from_str("ab"), Ok(AssetCode12(*b"ab\0\0\0\0\0\0\0\0\0\0"))); + assert_eq!(AssetCode12::from_str("abc"), Ok(AssetCode12(*b"abc\0\0\0\0\0\0\0\0\0"))); + assert_eq!(AssetCode12::from_str("abcd"), Ok(AssetCode12(*b"abcd\0\0\0\0\0\0\0\0"))); + assert_eq!(AssetCode12::from_str("abcde"), Ok(AssetCode12(*b"abcde\0\0\0\0\0\0\0"))); + assert_eq!(AssetCode12::from_str("abcdef"), Ok(AssetCode12(*b"abcdef\0\0\0\0\0\0"))); + assert_eq!(AssetCode12::from_str("abcdefg"), Ok(AssetCode12(*b"abcdefg\0\0\0\0\0"))); + assert_eq!(AssetCode12::from_str("abcdefgh"), Ok(AssetCode12(*b"abcdefgh\0\0\0\0"))); + assert_eq!(AssetCode12::from_str("abcdefghi"), Ok(AssetCode12(*b"abcdefghi\0\0\0"))); + assert_eq!(AssetCode12::from_str("abcdefghij"), Ok(AssetCode12(*b"abcdefghij\0\0"))); + assert_eq!(AssetCode12::from_str("abcdefghijk"), Ok(AssetCode12(*b"abcdefghijk\0"))); + assert_eq!(AssetCode12::from_str("abcdefghijkl"), Ok(AssetCode12(*b"abcdefghijkl"))); + + assert_eq!(AssetCode12::from_str("abcdefghijklm"), Err(Error::Invalid)); +} + +#[test] +#[rustfmt::skip] +fn asset_code_12_to_string() { + assert_eq!(AssetCode12(*b"\0\0\0\0\0\0\0\0\0\0\0\0").to_string(), ""); + assert_eq!(AssetCode12(*b"a\0\0\0\0\0\0\0\0\0\0\0").to_string(), "a"); + assert_eq!(AssetCode12(*b"ab\0\0\0\0\0\0\0\0\0\0").to_string(), "ab"); + assert_eq!(AssetCode12(*b"abc\0\0\0\0\0\0\0\0\0").to_string(), "abc"); + assert_eq!(AssetCode12(*b"abcd\0\0\0\0\0\0\0\0").to_string(), "abcd"); + assert_eq!(AssetCode12(*b"abcde\0\0\0\0\0\0\0").to_string(), "abcde"); + assert_eq!(AssetCode12(*b"abcdef\0\0\0\0\0\0").to_string(), "abcdef"); + assert_eq!(AssetCode12(*b"abcdefg\0\0\0\0\0").to_string(), "abcdefg"); + assert_eq!(AssetCode12(*b"abcdefgh\0\0\0\0").to_string(), "abcdefgh"); + assert_eq!(AssetCode12(*b"abcdefghi\0\0\0").to_string(), "abcdefghi"); + assert_eq!(AssetCode12(*b"abcdefghij\0\0").to_string(), "abcdefghij"); + assert_eq!(AssetCode12(*b"abcdefghijk\0").to_string(), "abcdefghijk"); + assert_eq!(AssetCode12(*b"abcdefghijkl").to_string(), "abcdefghijkl"); + + // Preserve as much of the code as possible, even if it contains nul bytes. + assert_eq!(AssetCode12(*b"a\0cd\0\0\0\0\0\0\0\0").to_string(), "a\0cd"); + + // Replace bytes that are not valid utf8 with the replacement character � and preserve length. + assert_eq!(AssetCode12(*b"a\xc3\x28d\0\0\0\0\0\0\0\0").to_string(), "a\0(d"); + assert_eq!(AssetCode12(*b"a\xc3\xc3\x28d\0\0\0\0\0\0\0").to_string(), "a\0\0(d"); +} + +#[test] +#[rustfmt::skip] +fn asset_code_from_str() { + assert_eq!(AssetCode::from_str(""), Ok(AssetCode::CreditAlphanum4(AssetCode4(*b"\0\0\0\0")))); + assert_eq!(AssetCode::from_str("a"), Ok(AssetCode::CreditAlphanum4(AssetCode4(*b"a\0\0\0")))); + assert_eq!(AssetCode::from_str("ab"), Ok(AssetCode::CreditAlphanum4(AssetCode4(*b"ab\0\0")))); + assert_eq!(AssetCode::from_str("abc"), Ok(AssetCode::CreditAlphanum4(AssetCode4(*b"abc\0")))); + assert_eq!(AssetCode::from_str("abcd"), Ok(AssetCode::CreditAlphanum4(AssetCode4(*b"abcd")))); + + assert_eq!(AssetCode::from_str("abcde"), Ok(AssetCode::CreditAlphanum12(AssetCode12(*b"abcde\0\0\0\0\0\0\0")))); + assert_eq!(AssetCode::from_str("abcdef"), Ok(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdef\0\0\0\0\0\0")))); + assert_eq!(AssetCode::from_str("abcdefg"), Ok(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefg\0\0\0\0\0")))); + assert_eq!(AssetCode::from_str("abcdefgh"), Ok(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefgh\0\0\0\0")))); + assert_eq!(AssetCode::from_str("abcdefghi"), Ok(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefghi\0\0\0")))); + assert_eq!(AssetCode::from_str("abcdefghij"), Ok(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefghij\0\0")))); + assert_eq!(AssetCode::from_str("abcdefghijk"), Ok(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefghijk\0")))); + assert_eq!(AssetCode::from_str("abcdefghijkl"), Ok(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefghijkl")))); + + assert_eq!(AssetCode::from_str("abcdefghijklm"), Err(Error::Invalid)); +} + +#[test] +#[rustfmt::skip] +fn asset_code_to_string() { + assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"\0\0\0\0")).to_string(), ""); + assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"a\0\0\0")).to_string(), "a"); + assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"ab\0\0")).to_string(), "ab"); + assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"abc\0")).to_string(), "abc"); + assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"abcd")).to_string(), "abcd"); + + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"\0\0\0\0\0\0\0\0\0\0\0\0")).to_string(), ""); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\0\0\0\0\0\0\0\0\0\0\0")).to_string(), "a"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"ab\0\0\0\0\0\0\0\0\0\0")).to_string(), "ab"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"abc\0\0\0\0\0\0\0\0\0")).to_string(), "abc"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"abcd\0\0\0\0\0\0\0\0")).to_string(), "abcd"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"abcde\0\0\0\0\0\0\0")).to_string(), "abcde"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdef\0\0\0\0\0\0")).to_string(), "abcdef"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefg\0\0\0\0\0")).to_string(), "abcdefg"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefgh\0\0\0\0")).to_string(), "abcdefgh"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefghi\0\0\0")).to_string(), "abcdefghi"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefghij\0\0")).to_string(), "abcdefghij"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefghijk\0")).to_string(), "abcdefghijk"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefghijkl")).to_string(), "abcdefghijkl"); + + // Preserve as much of the code as possible, even if it contains nul bytes. + assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"a\0cd")).to_string(), "a\0cd"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\0cd\0\0\0\0\0\0\0\0")).to_string(), "a\0cd"); + + // Replace bytes that are not valid utf8 with the replacement character � and preserve length. + assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"a\xc3\x28d")).to_string(), "a\0(d"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xc3\x28d\0\0\0\0\0\0\0\0")).to_string(), "a\0(d"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xc3\xc3\x28d\0\0\0\0\0\0\0")).to_string(), "a\0\0(d"); +} + +#[test] +#[rustfmt::skip] +fn asset_code_from_str_to_string_roundtrip_unicode() { + // Round tripped to correct variant based on byte length, not code point length. + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xd9\xaa\xd9\xaa\0\0\0\0\0\0\0")).to_string(), "a٪٪"); + assert_eq!(AssetCode::from_str("a٪٪"), Ok(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xd9\xaa\xd9\xaa\0\0\0\0\0\0\0")))); + + // Round tripped to correct variant based on byte length even when utf8 + // parsing error occurs. To preserve type consistency when round tripping + // the data, the length when parsing errors occur must be consistent with + // the input length, which is why a nul byte is expected instead of a + // Unicode Replacement Character, which would be two bytes. + assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"a\xc3\xc3d")).to_string(), "a\0\0d"); + assert_eq!(AssetCode::from_str("a\0\0d"), Ok(AssetCode::CreditAlphanum4(AssetCode4(*b"a\0\0d")))); +} From 88423579f4b98d987a643f0aa7e276ecf5d47f98 Mon Sep 17 00:00:00 2001 From: Leigh McCulloch <351529+leighmcculloch@users.noreply.github.com> Date: Thu, 23 Nov 2023 01:41:41 -0800 Subject: [PATCH 02/11] fix --- src/curr/str.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/curr/str.rs b/src/curr/str.rs index 0c95cd4a..ac1be2e6 100644 --- a/src/curr/str.rs +++ b/src/curr/str.rs @@ -262,7 +262,7 @@ impl core::str::FromStr for AssetCode4 { let b = s.as_bytes(); let mut code = AssetCode4([0u8; 4]); if b.len() <= code.0.len() { - code.0[..b.len()].copy_from_slice(&b); + code.0[..b.len()].copy_from_slice(b); Ok(code) } else { Err(Error::Invalid) @@ -286,7 +286,7 @@ impl core::str::FromStr for AssetCode12 { let b = s.as_bytes(); let mut code = AssetCode12([0u8; 12]); if b.len() <= code.0.len() { - code.0[..b.len()].copy_from_slice(&b); + code.0[..b.len()].copy_from_slice(b); Ok(code) } else { Err(Error::Invalid) From e355ad268dd54a1a898d9b11c040269964dcc279 Mon Sep 17 00:00:00 2001 From: Leigh McCulloch <351529+leighmcculloch@users.noreply.github.com> Date: Thu, 23 Nov 2023 01:43:02 -0800 Subject: [PATCH 03/11] fix doc comment --- src/curr/str.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/curr/str.rs b/src/curr/str.rs index ac1be2e6..a99f074b 100644 --- a/src/curr/str.rs +++ b/src/curr/str.rs @@ -1,6 +1,10 @@ //# Custom string representations of the following types, also used for JSON //# formatting. //# +//# The types that has impls in this file are given to the xdrgen +//# --rust-types-custom-str-impl cli option, so that xdrgen does not generate +//# FromStr and Display impls for them. +//# //# ## Strkey Types (SEP-23) //# - PublicKey //# - AccountId From 46f90f44f1340a239e974bb0e8541972954329f9 Mon Sep 17 00:00:00 2001 From: Leigh McCulloch <351529+leighmcculloch@users.noreply.github.com> Date: Thu, 23 Nov 2023 01:49:36 -0800 Subject: [PATCH 04/11] fix test --- tests/serde_tx.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/serde_tx.rs b/tests/serde_tx.rs index 9ec4904c..1db87f1a 100644 --- a/tests/serde_tx.rs +++ b/tests/serde_tx.rs @@ -68,7 +68,7 @@ fn test_serde_tx() -> Result<(), Box> { "change_trust": { "line": { "credit_alphanum4": { - "asset_code": "41424344", + "asset_code": "ABCD", "issuer": "GBB5BH2JFIVOHKQK5WHM5XFSE2SPOUFJB3FU4CPZVR3EUVJXZLMHOLOM" } }, From 2e5ac1a3a455b7a0792a101b17b2d79d8377d792 Mon Sep 17 00:00:00 2001 From: Leigh McCulloch <351529+leighmcculloch@users.noreply.github.com> Date: Thu, 30 Nov 2023 14:51:48 -0800 Subject: [PATCH 05/11] Escape asset code strings preserving their values --- Cargo.lock | 5 +++ Cargo.toml | 3 +- src/curr/str.rs | 89 +++++++++++-------------------------------------- tests/str.rs | 32 +++++++++--------- 4 files changed, 43 insertions(+), 86 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b9cd805f..abadb30e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -244,6 +244,10 @@ dependencies = [ "syn 1.0.98", ] +[[package]] +name = "escape-bytes" +version = "0.1.0" + [[package]] name = "fnv" version = "1.0.7" @@ -493,6 +497,7 @@ dependencies = [ "base64 0.13.0", "clap", "crate-git-revision", + "escape-bytes", "hex", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index c2c35dea..265f5d52 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ stellar-strkey = { version = "0.0.8", optional = true } base64 = { version = "0.13.0", optional = true } serde = { version = "1.0.139", features = ["derive"], optional = true } serde_with = { version = "3.0.0", optional = true } +escape-bytes = { version = "0.1.0", path = "../asciiescape", default-features = false, optional = true } hex = { version = "0.4.3", optional = true } arbitrary = {version = "1.1.3", features = ["derive"], optional = true} clap = { version = "4.2.4", default-features = false, features = ["std", "derive", "usage", "help"], optional = true } @@ -35,7 +36,7 @@ serde_json = "1.0.89" [features] default = ["std", "curr"] std = ["alloc"] -alloc = ["dep:hex", "dep:stellar-strkey"] +alloc = ["dep:hex", "dep:stellar-strkey", "dep:escape-bytes"] curr = [] next = [] diff --git a/src/curr/str.rs b/src/curr/str.rs index a99f074b..c00d65bb 100644 --- a/src/curr/str.rs +++ b/src/curr/str.rs @@ -263,59 +263,54 @@ impl core::fmt::Display for ScAddress { impl core::str::FromStr for AssetCode4 { type Err = Error; fn from_str(s: &str) -> core::result::Result { - let b = s.as_bytes(); let mut code = AssetCode4([0u8; 4]); - if b.len() <= code.0.len() { - code.0[..b.len()].copy_from_slice(b); - Ok(code) - } else { - Err(Error::Invalid) - } + escape_bytes::unescape_into(&mut code.0, s.as_bytes()).map_err(|_| Error::Invalid)?; + Ok(code) } } impl core::fmt::Display for AssetCode4 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { if let Some(last_idx) = self.0.iter().rposition(|c| *c != 0) { - write_utf8_lossy_with_nuls(f, &self.0[..=last_idx]) - } else { - Ok(()) + for b in escape_bytes::Escape::new(&self.0[..=last_idx]) { + write!(f, "{}", b as char)?; + } } + Ok(()) } } impl core::str::FromStr for AssetCode12 { type Err = Error; fn from_str(s: &str) -> core::result::Result { - let b = s.as_bytes(); let mut code = AssetCode12([0u8; 12]); - if b.len() <= code.0.len() { - code.0[..b.len()].copy_from_slice(b); - Ok(code) - } else { - Err(Error::Invalid) - } + escape_bytes::unescape_into(&mut code.0, s.as_bytes()).map_err(|_| Error::Invalid)?; + Ok(code) } } impl core::fmt::Display for AssetCode12 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { if let Some(last_idx) = self.0.iter().rposition(|c| *c != 0) { - write_utf8_lossy_with_nuls(f, &self.0[..=last_idx]) - } else { - Ok(()) + for b in escape_bytes::Escape::new(&self.0[..=last_idx]) { + write!(f, "{}", b as char)?; + } } + Ok(()) } } impl core::str::FromStr for AssetCode { type Err = Error; fn from_str(s: &str) -> core::result::Result { - let b = s.as_bytes(); - if b.len() <= 4 { - Ok(AssetCode::CreditAlphanum4(AssetCode4::from_str(s)?)) - } else if b.len() <= 12 { - Ok(AssetCode::CreditAlphanum12(AssetCode12::from_str(s)?)) + let mut code = [0u8; 12]; + let n = escape_bytes::unescape_into(&mut code, s.as_bytes()).map_err(|_| Error::Invalid)?; + if n <= 4 { + Ok(AssetCode::CreditAlphanum4(AssetCode4([ + code[0], code[1], code[2], code[3], + ]))) + } else if n <= 12 { + Ok(AssetCode::CreditAlphanum12(AssetCode12(code))) } else { Err(Error::Invalid) } @@ -330,47 +325,3 @@ impl core::fmt::Display for AssetCode { } } } - -/// Writes a byte slice as a utf8 string, replacing any bytes in invalid utf8 -/// sequences as the nul byte. -/// -/// A modified copy of the Rust stdlib docs examples here: -/// -/// -/// This particular implementation preserves the length of the string written -/// such that exactly one byte is written for every byte in an invalid sequence, -/// by writing a nul (0x00) byte for each. -/// -/// Normally it would be common to write a Unicode Replacement Character -/// (U+FFFD) for lossy coding but doing so would not preserve the length as a -/// single invalid byte would be replaced by two bytes. -pub fn write_utf8_lossy_with_nuls( - f: &mut impl core::fmt::Write, - mut input: &[u8], -) -> core::fmt::Result { - loop { - match core::str::from_utf8(input) { - Ok(valid) => { - write!(f, "{valid}")?; - break; - } - Err(error) => { - let (valid, after_valid) = input.split_at(error.valid_up_to()); - write!(f, "{}", core::str::from_utf8(valid).unwrap())?; - - if let Some(invalid_sequence_length) = error.error_len() { - for _ in 0..invalid_sequence_length { - write!(f, "\0")?; - } - input = &after_valid[invalid_sequence_length..]; - } else { - for _ in 0..after_valid.len() { - write!(f, "\0")?; - } - break; - } - } - } - } - Ok(()) -} diff --git a/tests/str.rs b/tests/str.rs index 2835abbe..1ffb2e99 100644 --- a/tests/str.rs +++ b/tests/str.rs @@ -419,12 +419,12 @@ fn asset_code_4_to_string() { assert_eq!(AssetCode4(*b"abcd").to_string(), "abcd"); // Preserve as much of the code as possible, even if it contains nul bytes. - assert_eq!(AssetCode4(*b"a\0cd").to_string(), "a\0cd"); + assert_eq!(AssetCode4(*b"a\0cd").to_string(), r"a\0cd"); // Replace bytes that are not valid utf8 with the replacement character � and preserve length. - assert_eq!(AssetCode4(*b"a\xc3\x28d").to_string(), "a\0(d"); - assert_eq!(AssetCode4(*b"a\xc3\xc3\x28").to_string(), "a\0\0("); - assert_eq!(AssetCode4(*b"a\xc3\xc3\xc3").to_string(), "a\0\0\0"); + assert_eq!(AssetCode4(*b"a\xc3\x28d").to_string(), r"a\xc3(d"); + assert_eq!(AssetCode4(*b"a\xc3\xc3\x28").to_string(), r"a\xc3\xc3("); + assert_eq!(AssetCode4(*b"a\xc3\xc3\xc3").to_string(), r"a\xc3\xc3\xc3"); } #[test] @@ -465,11 +465,11 @@ fn asset_code_12_to_string() { assert_eq!(AssetCode12(*b"abcdefghijkl").to_string(), "abcdefghijkl"); // Preserve as much of the code as possible, even if it contains nul bytes. - assert_eq!(AssetCode12(*b"a\0cd\0\0\0\0\0\0\0\0").to_string(), "a\0cd"); + assert_eq!(AssetCode12(*b"a\0cd\0\0\0\0\0\0\0\0").to_string(), r"a\0cd"); // Replace bytes that are not valid utf8 with the replacement character � and preserve length. - assert_eq!(AssetCode12(*b"a\xc3\x28d\0\0\0\0\0\0\0\0").to_string(), "a\0(d"); - assert_eq!(AssetCode12(*b"a\xc3\xc3\x28d\0\0\0\0\0\0\0").to_string(), "a\0\0(d"); + assert_eq!(AssetCode12(*b"a\xc3\x28d\0\0\0\0\0\0\0\0").to_string(), r"a\xc3(d"); + assert_eq!(AssetCode12(*b"a\xc3\xc3\x28d\0\0\0\0\0\0\0").to_string(), r"a\xc3\xc3(d"); } #[test] @@ -517,27 +517,27 @@ fn asset_code_to_string() { assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"abcdefghijkl")).to_string(), "abcdefghijkl"); // Preserve as much of the code as possible, even if it contains nul bytes. - assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"a\0cd")).to_string(), "a\0cd"); - assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\0cd\0\0\0\0\0\0\0\0")).to_string(), "a\0cd"); + assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"a\0cd")).to_string(), r"a\0cd"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\0cd\0\0\0\0\0\0\0\0")).to_string(), r"a\0cd"); // Replace bytes that are not valid utf8 with the replacement character � and preserve length. - assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"a\xc3\x28d")).to_string(), "a\0(d"); - assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xc3\x28d\0\0\0\0\0\0\0\0")).to_string(), "a\0(d"); - assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xc3\xc3\x28d\0\0\0\0\0\0\0")).to_string(), "a\0\0(d"); + assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"a\xc3\x28d")).to_string(), r"a\xc3(d"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xc3\x28d\0\0\0\0\0\0\0\0")).to_string(), r"a\xc3(d"); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xc3\xc3\x28d\0\0\0\0\0\0\0")).to_string(), r"a\xc3\xc3(d"); } #[test] #[rustfmt::skip] fn asset_code_from_str_to_string_roundtrip_unicode() { // Round tripped to correct variant based on byte length, not code point length. - assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xd9\xaa\xd9\xaa\0\0\0\0\0\0\0")).to_string(), "a٪٪"); - assert_eq!(AssetCode::from_str("a٪٪"), Ok(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xd9\xaa\xd9\xaa\0\0\0\0\0\0\0")))); + assert_eq!(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xd9\xaa\xd9\xaa\0\0\0\0\0\0\0")).to_string(), r"a\xd9\xaa\xd9\xaa"); + assert_eq!(AssetCode::from_str(r"a\xd9\xaa\xd9\xaa"), Ok(AssetCode::CreditAlphanum12(AssetCode12(*b"a\xd9\xaa\xd9\xaa\0\0\0\0\0\0\0")))); // Round tripped to correct variant based on byte length even when utf8 // parsing error occurs. To preserve type consistency when round tripping // the data, the length when parsing errors occur must be consistent with // the input length, which is why a nul byte is expected instead of a // Unicode Replacement Character, which would be two bytes. - assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"a\xc3\xc3d")).to_string(), "a\0\0d"); - assert_eq!(AssetCode::from_str("a\0\0d"), Ok(AssetCode::CreditAlphanum4(AssetCode4(*b"a\0\0d")))); + assert_eq!(AssetCode::CreditAlphanum4(AssetCode4(*b"a\xc3\xc3d")).to_string(), r"a\xc3\xc3d"); + assert_eq!(AssetCode::from_str(r"a\xc3\xc3d"), Ok(AssetCode::CreditAlphanum4(AssetCode4(*b"a\xc3\xc3d")))); } From 0a24627335f26b34d08c29aa6820ab34a2f9a617 Mon Sep 17 00:00:00 2001 From: Leigh McCulloch <351529+leighmcculloch@users.noreply.github.com> Date: Thu, 30 Nov 2023 15:54:35 -0800 Subject: [PATCH 06/11] test --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 265f5d52..18e619b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ stellar-strkey = { version = "0.0.8", optional = true } base64 = { version = "0.13.0", optional = true } serde = { version = "1.0.139", features = ["derive"], optional = true } serde_with = { version = "3.0.0", optional = true } -escape-bytes = { version = "0.1.0", path = "../asciiescape", default-features = false, optional = true } +escape-bytes = { version = "0.1.0", git = "https://github.com/stellar/escape-bytes", rev = "457ee2147e84fcc969228e9b11c8431eb44f0df3", default-features = false, optional = true } hex = { version = "0.4.3", optional = true } arbitrary = {version = "1.1.3", features = ["derive"], optional = true} clap = { version = "4.2.4", default-features = false, features = ["std", "derive", "usage", "help"], optional = true } From 8a50dfc3bbb665ce78527853ef7d4b1bbbf12f9d Mon Sep 17 00:00:00 2001 From: Leigh McCulloch <351529+leighmcculloch@users.noreply.github.com> Date: Thu, 30 Nov 2023 15:58:38 -0800 Subject: [PATCH 07/11] fix --- Cargo.lock | 3 ++- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index abadb30e..d2d1bac0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -246,7 +246,8 @@ dependencies = [ [[package]] name = "escape-bytes" -version = "0.1.0" +version = "0.0.0" +source = "git+https://github.com/stellar/escape-bytes?rev=457ee2147e84fcc969228e9b11c8431eb44f0df3#457ee2147e84fcc969228e9b11c8431eb44f0df3" [[package]] name = "fnv" diff --git a/Cargo.toml b/Cargo.toml index 18e619b9..ba5e6832 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ stellar-strkey = { version = "0.0.8", optional = true } base64 = { version = "0.13.0", optional = true } serde = { version = "1.0.139", features = ["derive"], optional = true } serde_with = { version = "3.0.0", optional = true } -escape-bytes = { version = "0.1.0", git = "https://github.com/stellar/escape-bytes", rev = "457ee2147e84fcc969228e9b11c8431eb44f0df3", default-features = false, optional = true } +escape-bytes = { version = "0.0.0", git = "https://github.com/stellar/escape-bytes", rev = "457ee2147e84fcc969228e9b11c8431eb44f0df3", default-features = false, optional = true } hex = { version = "0.4.3", optional = true } arbitrary = {version = "1.1.3", features = ["derive"], optional = true} clap = { version = "4.2.4", default-features = false, features = ["std", "derive", "usage", "help"], optional = true } From d67d2bcc954f3538966aac292eedcc789cb16e11 Mon Sep 17 00:00:00 2001 From: Leigh McCulloch <351529+leighmcculloch@users.noreply.github.com> Date: Thu, 30 Nov 2023 17:41:38 -0800 Subject: [PATCH 08/11] Update xdrgen with StringM escape change --- Cargo.toml | 4 +-- Makefile | 2 +- src/curr/generated.rs | 57 +++++++++++++++-------------------- src/curr/scval_conversions.rs | 2 +- src/next/generated.rs | 57 +++++++++++++++-------------------- src/next/scval_conversions.rs | 2 +- 6 files changed, 53 insertions(+), 71 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ba5e6832..86b005b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ stellar-strkey = { version = "0.0.8", optional = true } base64 = { version = "0.13.0", optional = true } serde = { version = "1.0.139", features = ["derive"], optional = true } serde_with = { version = "3.0.0", optional = true } -escape-bytes = { version = "0.0.0", git = "https://github.com/stellar/escape-bytes", rev = "457ee2147e84fcc969228e9b11c8431eb44f0df3", default-features = false, optional = true } +escape-bytes = { version = "0.0.0", git = "https://github.com/stellar/escape-bytes", rev = "457ee2147e84fcc969228e9b11c8431eb44f0df3", default-features = false } hex = { version = "0.4.3", optional = true } arbitrary = {version = "1.1.3", features = ["derive"], optional = true} clap = { version = "4.2.4", default-features = false, features = ["std", "derive", "usage", "help"], optional = true } @@ -36,7 +36,7 @@ serde_json = "1.0.89" [features] default = ["std", "curr"] std = ["alloc"] -alloc = ["dep:hex", "dep:stellar-strkey", "dep:escape-bytes"] +alloc = ["dep:hex", "dep:stellar-strkey", "escape-bytes/alloc"] curr = [] next = [] diff --git a/Makefile b/Makefile index 767c9f7d..1c730c15 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ CARGO_HACK_ARGS=--feature-powerset --exclude-features default --group-features b CARGO_DOC_ARGS?=--open -XDRGEN_VERSION=cbff4b31 +XDRGEN_VERSION=193ddc25bdcde71b860b17828dc3fb1e5655b27e XDRGEN_TYPES_CUSTOM_STR_IMPL=PublicKey,AccountId,MuxedAccount,MuxedAccountMed25519,SignerKey,SignerKeyEd25519SignedPayload,NodeId,ScAddress,AssetCode,AssetCode4,AssetCode12 all: build test diff --git a/src/curr/generated.rs b/src/curr/generated.rs index 79e18d06..63f93055 100644 --- a/src/curr/generated.rs +++ b/src/curr/generated.rs @@ -1703,6 +1703,17 @@ impl WriteXdr for BytesM { // StringM ------------------------------------------------------------------------ +/// A string type that contains arbitrary bytes. +/// +/// Convertible, fallibly, to/from a Rust UTF-8 String using +/// [`TryFrom`]/[`TryInto`]/[`StringM::to_utf8_string`]. +/// +/// Convertible, lossyly, to a Rust UTF-8 String using +/// [`StringM::to_utf8_string_lossy`]. +/// +/// Convertible to/from escaped printable-ASCII using +/// [`Display`]/[`ToString`]/[`FromStr`]. + #[cfg(feature = "alloc")] #[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] #[cfg_attr( @@ -1717,38 +1728,15 @@ pub struct StringM(Vec); #[cfg_attr(feature = "arbitrary", derive(Arbitrary))] pub struct StringM(Vec); -/// `write_utf8_lossy` is a modified copy of the Rust stdlib docs examples here: -/// -fn write_utf8_lossy(f: &mut impl core::fmt::Write, mut input: &[u8]) -> core::fmt::Result { - loop { - match core::str::from_utf8(input) { - Ok(valid) => { - write!(f, "{valid}")?; - break; - } - Err(error) => { - let (valid, after_valid) = input.split_at(error.valid_up_to()); - write!(f, "{}", core::str::from_utf8(valid).unwrap())?; - write!(f, "\u{FFFD}")?; - - if let Some(invalid_sequence_length) = error.error_len() { - input = &after_valid[invalid_sequence_length..]; - } else { - break; - } - } - } - } - Ok(()) -} - impl core::fmt::Display for StringM { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { #[cfg(feature = "alloc")] let v = &self.0; #[cfg(not(feature = "alloc"))] let v = self.0; - write_utf8_lossy(f, v)?; + for b in escape_bytes::Escape::new(v) { + write!(f, "{}", b as char)?; + } Ok(()) } } @@ -1760,7 +1748,9 @@ impl core::fmt::Debug for StringM { #[cfg(not(feature = "alloc"))] let v = self.0; write!(f, "StringM(")?; - write_utf8_lossy(f, v)?; + for b in escape_bytes::Escape::new(v) { + write!(f, "{}", b as char)?; + } write!(f, ")")?; Ok(()) } @@ -1770,7 +1760,8 @@ impl core::fmt::Debug for StringM { impl core::str::FromStr for StringM { type Err = Error; fn from_str(s: &str) -> core::result::Result { - s.try_into() + let b = escape_bytes::unescape(s.as_bytes()).map_err(|_| Error::Invalid)?; + Ok(Self(b)) } } @@ -1818,24 +1809,24 @@ impl StringM { impl StringM { #[cfg(feature = "alloc")] - pub fn to_string(&self) -> Result { + pub fn to_utf8_string(&self) -> Result { self.try_into() } #[cfg(feature = "alloc")] - pub fn into_string(self) -> Result { + pub fn into_utf8_string(self) -> Result { self.try_into() } #[cfg(feature = "alloc")] #[must_use] - pub fn to_string_lossy(&self) -> String { + pub fn to_utf8_string_lossy(&self) -> String { String::from_utf8_lossy(&self.0).into_owned() } #[cfg(feature = "alloc")] #[must_use] - pub fn into_string_lossy(self) -> String { + pub fn into_utf8_string_lossy(self) -> String { String::from_utf8_lossy(&self.0).into_owned() } } @@ -52060,7 +52051,7 @@ impl Type { } #[cfg(feature = "base64")] - pub fn from_xdr_base64(v: TypeVariant, b64: String, limits: Limits) -> Result { + pub fn from_xdr_base64(v: TypeVariant, b64: impl AsRef<[u8]>, limits: Limits) -> Result { let mut b64_reader = Cursor::new(b64); let mut dec = Limited::new( base64::read::DecoderReader::new(&mut b64_reader, base64::STANDARD), diff --git a/src/curr/scval_conversions.rs b/src/curr/scval_conversions.rs index aae4d98f..3eb830ee 100644 --- a/src/curr/scval_conversions.rs +++ b/src/curr/scval_conversions.rs @@ -371,7 +371,7 @@ impl TryFrom for String { if let ScVal::Symbol(s) = v { // TODO: It might be worth distinguishing the error case where this // is an invalid symbol with invalid characters. - Ok(s.0.into_string().map_err(|_| ())?) + Ok(s.0.into_utf8_string().map_err(|_| ())?) } else { Err(()) } diff --git a/src/next/generated.rs b/src/next/generated.rs index 0beb4e5a..5fb9706c 100644 --- a/src/next/generated.rs +++ b/src/next/generated.rs @@ -1703,6 +1703,17 @@ impl WriteXdr for BytesM { // StringM ------------------------------------------------------------------------ +/// A string type that contains arbitrary bytes. +/// +/// Convertible, fallibly, to/from a Rust UTF-8 String using +/// [`TryFrom`]/[`TryInto`]/[`StringM::to_utf8_string`]. +/// +/// Convertible, lossyly, to a Rust UTF-8 String using +/// [`StringM::to_utf8_string_lossy`]. +/// +/// Convertible to/from escaped printable-ASCII using +/// [`Display`]/[`ToString`]/[`FromStr`]. + #[cfg(feature = "alloc")] #[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] #[cfg_attr( @@ -1717,38 +1728,15 @@ pub struct StringM(Vec); #[cfg_attr(feature = "arbitrary", derive(Arbitrary))] pub struct StringM(Vec); -/// `write_utf8_lossy` is a modified copy of the Rust stdlib docs examples here: -/// -fn write_utf8_lossy(f: &mut impl core::fmt::Write, mut input: &[u8]) -> core::fmt::Result { - loop { - match core::str::from_utf8(input) { - Ok(valid) => { - write!(f, "{valid}")?; - break; - } - Err(error) => { - let (valid, after_valid) = input.split_at(error.valid_up_to()); - write!(f, "{}", core::str::from_utf8(valid).unwrap())?; - write!(f, "\u{FFFD}")?; - - if let Some(invalid_sequence_length) = error.error_len() { - input = &after_valid[invalid_sequence_length..]; - } else { - break; - } - } - } - } - Ok(()) -} - impl core::fmt::Display for StringM { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { #[cfg(feature = "alloc")] let v = &self.0; #[cfg(not(feature = "alloc"))] let v = self.0; - write_utf8_lossy(f, v)?; + for b in escape_bytes::Escape::new(v) { + write!(f, "{}", b as char)?; + } Ok(()) } } @@ -1760,7 +1748,9 @@ impl core::fmt::Debug for StringM { #[cfg(not(feature = "alloc"))] let v = self.0; write!(f, "StringM(")?; - write_utf8_lossy(f, v)?; + for b in escape_bytes::Escape::new(v) { + write!(f, "{}", b as char)?; + } write!(f, ")")?; Ok(()) } @@ -1770,7 +1760,8 @@ impl core::fmt::Debug for StringM { impl core::str::FromStr for StringM { type Err = Error; fn from_str(s: &str) -> core::result::Result { - s.try_into() + let b = escape_bytes::unescape(s.as_bytes()).map_err(|_| Error::Invalid)?; + Ok(Self(b)) } } @@ -1818,24 +1809,24 @@ impl StringM { impl StringM { #[cfg(feature = "alloc")] - pub fn to_string(&self) -> Result { + pub fn to_utf8_string(&self) -> Result { self.try_into() } #[cfg(feature = "alloc")] - pub fn into_string(self) -> Result { + pub fn into_utf8_string(self) -> Result { self.try_into() } #[cfg(feature = "alloc")] #[must_use] - pub fn to_string_lossy(&self) -> String { + pub fn to_utf8_string_lossy(&self) -> String { String::from_utf8_lossy(&self.0).into_owned() } #[cfg(feature = "alloc")] #[must_use] - pub fn into_string_lossy(self) -> String { + pub fn into_utf8_string_lossy(self) -> String { String::from_utf8_lossy(&self.0).into_owned() } } @@ -52103,7 +52094,7 @@ impl Type { } #[cfg(feature = "base64")] - pub fn from_xdr_base64(v: TypeVariant, b64: String, limits: Limits) -> Result { + pub fn from_xdr_base64(v: TypeVariant, b64: impl AsRef<[u8]>, limits: Limits) -> Result { let mut b64_reader = Cursor::new(b64); let mut dec = Limited::new( base64::read::DecoderReader::new(&mut b64_reader, base64::STANDARD), diff --git a/src/next/scval_conversions.rs b/src/next/scval_conversions.rs index 7f2fbca8..01267cb1 100644 --- a/src/next/scval_conversions.rs +++ b/src/next/scval_conversions.rs @@ -371,7 +371,7 @@ impl TryFrom for String { if let ScVal::Symbol(s) = v { // TODO: It might be worth distinguishing the error case where this // is an invalid symbol with invalid characters. - Ok(s.0.into_string().map_err(|_| ())?) + Ok(s.0.into_utf8_string().map_err(|_| ())?) } else { Err(()) } From b54a1810e6f96050a2384d97a9214091087724be Mon Sep 17 00:00:00 2001 From: Leigh McCulloch <351529+leighmcculloch@users.noreply.github.com> Date: Thu, 30 Nov 2023 18:28:31 -0800 Subject: [PATCH 09/11] upd tests --- tests/tx_debug_display.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/tx_debug_display.rs b/tests/tx_debug_display.rs index 8c44dc5c..9354747a 100644 --- a/tests/tx_debug_display.rs +++ b/tests/tx_debug_display.rs @@ -64,13 +64,13 @@ fn test_debug_invalid_utf8() -> Result<(), Error> { ), "BytesM(68656c6c6fc328776f726c64)" ); - // StringM replaces the invalid sequence with the Unicode replacement character. + // StringM escapes strings. assert_eq!( format!( "{:?}", <_ as TryInto>::try_into(b"hello\xc3\x28world")? ), - "StringM(hello�(world)" + r"StringM(hello\xc3(world)" ); Ok(()) } @@ -108,13 +108,13 @@ fn test_display_invalid_utf8() -> Result<(), Error> { ), "68656c6c6fc328776f726c64" ); - // StringM replaces the invalid sequence with the Unicode replacement character. + // StringM escapes strings. assert_eq!( format!( "{}", <_ as TryInto>::try_into(b"hello\xc3\x28world")? ), - "hello�(world" + r"hello\xc3(world" ); Ok(()) } From 35a99b82d342c07e946bf99f8fead0a77a993259 Mon Sep 17 00:00:00 2001 From: Leigh McCulloch <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 1 Dec 2023 02:05:31 -0800 Subject: [PATCH 10/11] upd version of escape-bytes --- Cargo.lock | 5 +++-- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d2d1bac0..f08da150 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -246,8 +246,9 @@ dependencies = [ [[package]] name = "escape-bytes" -version = "0.0.0" -source = "git+https://github.com/stellar/escape-bytes?rev=457ee2147e84fcc969228e9b11c8431eb44f0df3#457ee2147e84fcc969228e9b11c8431eb44f0df3" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3871d161fec5b6fade5fe7afe2e196b86839c5526a8d256c7b1a04dbbe5241a4" [[package]] name = "fnv" diff --git a/Cargo.toml b/Cargo.toml index ba5e6832..69f6fd65 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ stellar-strkey = { version = "0.0.8", optional = true } base64 = { version = "0.13.0", optional = true } serde = { version = "1.0.139", features = ["derive"], optional = true } serde_with = { version = "3.0.0", optional = true } -escape-bytes = { version = "0.0.0", git = "https://github.com/stellar/escape-bytes", rev = "457ee2147e84fcc969228e9b11c8431eb44f0df3", default-features = false, optional = true } +escape-bytes = { version = "0.1.0", default-features = false, optional = true } hex = { version = "0.4.3", optional = true } arbitrary = {version = "1.1.3", features = ["derive"], optional = true} clap = { version = "4.2.4", default-features = false, features = ["std", "derive", "usage", "help"], optional = true } From 473d77bd8da6c8f924bd1e1eebdb09f09588459d Mon Sep 17 00:00:00 2001 From: Leigh McCulloch <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 1 Dec 2023 02:08:59 -0800 Subject: [PATCH 11/11] upd hash --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1c730c15..21ebcb93 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ CARGO_HACK_ARGS=--feature-powerset --exclude-features default --group-features b CARGO_DOC_ARGS?=--open -XDRGEN_VERSION=193ddc25bdcde71b860b17828dc3fb1e5655b27e +XDRGEN_VERSION=e90b9ee62a89f346a86ef66f889bcfd8e1a8fbcb XDRGEN_TYPES_CUSTOM_STR_IMPL=PublicKey,AccountId,MuxedAccount,MuxedAccountMed25519,SignerKey,SignerKeyEd25519SignedPayload,NodeId,ScAddress,AssetCode,AssetCode4,AssetCode12 all: build test