From c8b82b4b59a069066e9840e3e98b77eb7d8e6109 Mon Sep 17 00:00:00 2001 From: Semyon Panenkov Date: Mon, 24 Jun 2024 22:49:27 +0200 Subject: [PATCH 1/7] Add `borsh` support --- compact_str/Cargo.toml | 2 + compact_str/src/features/borsh.rs | 78 +++++++++++++++++++++++++++++++ compact_str/src/features/mod.rs | 2 + 3 files changed, 82 insertions(+) create mode 100644 compact_str/src/features/borsh.rs diff --git a/compact_str/Cargo.toml b/compact_str/Cargo.toml index e6940d1d..6e40670f 100644 --- a/compact_str/Cargo.toml +++ b/compact_str/Cargo.toml @@ -16,6 +16,7 @@ default = ["std"] std = [] arbitrary = ["dep:arbitrary"] +borsh = ["dep:borsh"] bytes = ["dep:bytes"] diesel = ["dep:diesel"] markup = ["dep:markup"] @@ -31,6 +32,7 @@ sqlx-sqlite = ["sqlx", "sqlx/sqlite"] [dependencies] arbitrary = { version = "1", optional = true, default-features = false } +borsh = { version = "1", optional = true } bytes = { version = "1", optional = true } diesel = { version = "2", optional = true, default-features = false } markup = { version = "0.13", optional = true, default-features = false } diff --git a/compact_str/src/features/borsh.rs b/compact_str/src/features/borsh.rs new file mode 100644 index 00000000..10e05f70 --- /dev/null +++ b/compact_str/src/features/borsh.rs @@ -0,0 +1,78 @@ +#![cfg_attr(docsrs, doc(cfg(feature = "borsh")))] + +use borsh::io::{ + Error, + ErrorKind, + Read, + Result, + Write, +}; +use borsh::{ + BorshDeserialize, + BorshSerialize, +}; + +use crate::CompactString; + +impl BorshSerialize for CompactString { + fn serialize(&self, writer: &mut W) -> Result<()> { + self.as_str().serialize(writer) + } +} + +impl BorshDeserialize for CompactString { + fn deserialize_reader(reader: &mut R) -> Result { + let len = u32::deserialize_reader(&mut *reader)? as usize; + + // Do not call `Error::new` on OOM as it allocates. + let mut s = CompactString::try_with_capacity(len).map_err(|_| ErrorKind::OutOfMemory)?; + + // SAFETY: The current length is zero so the bytes are uninterpreted + // and don't have to form valid UTF-8 + let buf = unsafe { s.as_mut_bytes() }; + + reader.read_exact(&mut buf[..len])?; + std::str::from_utf8(&buf[..len]).map_err(|err| Error::new(ErrorKind::InvalidData, err))?; + + // SAFETY: The first `len` bytes are initialized and are valid UTF-8 + unsafe { s.set_len(len) }; + + Ok(s) + } +} + +#[cfg(test)] +mod tests { + use alloc::string::String; + + use test_strategy::proptest; + + use crate::CompactString; + + #[test] + fn test_roundtrip() { + const VALUE: &str = "Hello, 🌍!"; + + let bytes_compact = borsh::to_vec(&CompactString::from(VALUE)).unwrap(); + let bytes_control = borsh::to_vec(&String::from(VALUE)).unwrap(); + assert_eq!(&*bytes_compact, &*bytes_control); + + let compact: CompactString = borsh::from_slice(&bytes_compact).unwrap(); + let control: String = borsh::from_slice(&bytes_control).unwrap(); + assert_eq!(compact, VALUE); + assert_eq!(control, VALUE); + } + + #[cfg_attr(miri, ignore)] + #[proptest] + fn proptest_roundtrip(s: String) { + let bytes_compact = borsh::to_vec(&CompactString::from(&s)).unwrap(); + let bytes_control = borsh::to_vec(&String::from(&s)).unwrap(); + assert_eq!(&*bytes_compact, &*bytes_control); + + let compact: CompactString = borsh::from_slice(&bytes_compact).unwrap(); + let control: String = borsh::from_slice(&bytes_control).unwrap(); + assert_eq!(compact, s); + assert_eq!(control, s); + } +} diff --git a/compact_str/src/features/mod.rs b/compact_str/src/features/mod.rs index 7829b25b..a2ac7c05 100644 --- a/compact_str/src/features/mod.rs +++ b/compact_str/src/features/mod.rs @@ -2,6 +2,8 @@ #[cfg(feature = "arbitrary")] mod arbitrary; +#[cfg(feature = "borsh")] +mod borsh; #[cfg(feature = "bytes")] mod bytes; #[cfg(feature = "diesel")] From 0107d1945438fd7a28b0b24ba7712ba001ff3524 Mon Sep 17 00:00:00 2001 From: SmnTin Date: Tue, 25 Jun 2024 00:00:54 +0200 Subject: [PATCH 2/7] Use `core` instead of `std` --- compact_str/src/features/borsh.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compact_str/src/features/borsh.rs b/compact_str/src/features/borsh.rs index 10e05f70..da06b63d 100644 --- a/compact_str/src/features/borsh.rs +++ b/compact_str/src/features/borsh.rs @@ -32,7 +32,7 @@ impl BorshDeserialize for CompactString { let buf = unsafe { s.as_mut_bytes() }; reader.read_exact(&mut buf[..len])?; - std::str::from_utf8(&buf[..len]).map_err(|err| Error::new(ErrorKind::InvalidData, err))?; + core::str::from_utf8(&buf[..len]).map_err(|err| Error::new(ErrorKind::InvalidData, err))?; // SAFETY: The first `len` bytes are initialized and are valid UTF-8 unsafe { s.set_len(len) }; From 5d5d64d8480ee07109c52071950d05576baba9e5 Mon Sep 17 00:00:00 2001 From: SmnTin Date: Tue, 25 Jun 2024 00:38:50 +0200 Subject: [PATCH 3/7] Add a test failing with UB --- compact_str/src/features/borsh.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/compact_str/src/features/borsh.rs b/compact_str/src/features/borsh.rs index da06b63d..b733c97a 100644 --- a/compact_str/src/features/borsh.rs +++ b/compact_str/src/features/borsh.rs @@ -47,6 +47,10 @@ mod tests { use test_strategy::proptest; + use crate::repr::{ + HEAP_MASK, + MAX_SIZE, + }; use crate::CompactString; #[test] @@ -63,6 +67,12 @@ mod tests { assert_eq!(control, VALUE); } + #[test] + fn test_deserialize_invalid_utf8() { + let bytes = borsh::to_vec(&[HEAP_MASK; MAX_SIZE] as &[u8]).unwrap(); + borsh::from_slice::(&bytes).unwrap_err(); + } + #[cfg_attr(miri, ignore)] #[proptest] fn proptest_roundtrip(s: String) { From 1981602538f3b2a93d0063bba5d4ae5c5ff4792a Mon Sep 17 00:00:00 2001 From: SmnTin Date: Tue, 25 Jun 2024 15:11:14 +0200 Subject: [PATCH 4/7] Reimplement without unsafe --- compact_str/Cargo.toml | 2 +- compact_str/src/features/borsh.rs | 34 ++++++++++++++++++------------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/compact_str/Cargo.toml b/compact_str/Cargo.toml index 6e40670f..a0d810d2 100644 --- a/compact_str/Cargo.toml +++ b/compact_str/Cargo.toml @@ -12,7 +12,7 @@ keywords = ["string", "compact", "small", "memory", "mutable"] categories = ["encoding", "parsing", "memory-management", "text-processing"] [features] -default = ["std"] +default = ["std", "borsh"] std = [] arbitrary = ["dep:arbitrary"] diff --git a/compact_str/src/features/borsh.rs b/compact_str/src/features/borsh.rs index b733c97a..39e390db 100644 --- a/compact_str/src/features/borsh.rs +++ b/compact_str/src/features/borsh.rs @@ -1,5 +1,9 @@ #![cfg_attr(docsrs, doc(cfg(feature = "borsh")))] +use alloc::string::String; +use alloc::vec::Vec; +use core::str; + use borsh::io::{ Error, ErrorKind, @@ -12,6 +16,7 @@ use borsh::{ BorshSerialize, }; +use crate::repr::MAX_SIZE; use crate::CompactString; impl BorshSerialize for CompactString { @@ -24,20 +29,21 @@ impl BorshDeserialize for CompactString { fn deserialize_reader(reader: &mut R) -> Result { let len = u32::deserialize_reader(&mut *reader)? as usize; - // Do not call `Error::new` on OOM as it allocates. - let mut s = CompactString::try_with_capacity(len).map_err(|_| ErrorKind::OutOfMemory)?; - - // SAFETY: The current length is zero so the bytes are uninterpreted - // and don't have to form valid UTF-8 - let buf = unsafe { s.as_mut_bytes() }; - - reader.read_exact(&mut buf[..len])?; - core::str::from_utf8(&buf[..len]).map_err(|err| Error::new(ErrorKind::InvalidData, err))?; - - // SAFETY: The first `len` bytes are initialized and are valid UTF-8 - unsafe { s.set_len(len) }; - - Ok(s) + if len <= MAX_SIZE { + let mut buf = [0u8; MAX_SIZE]; + reader.read_exact(&mut buf[..len])?; + let s = str::from_utf8(&buf[..len]) + .map_err(|err| Error::new(ErrorKind::InvalidData, err))?; + Ok(CompactString::from(s)) + } else { + let mut buf = Vec::new(); + buf.try_reserve_exact(len)?; + buf.resize(len, 0u8); + reader.read_exact(&mut buf[..])?; + let s = + String::from_utf8(buf).map_err(|err| Error::new(ErrorKind::InvalidData, err))?; + Ok(CompactString::from(s)) + } } } From 0abadf588868bf4dc1930225d6d9b6b172a3f277 Mon Sep 17 00:00:00 2001 From: Semyon Panenkov Date: Tue, 25 Jun 2024 23:44:23 +0500 Subject: [PATCH 5/7] Use `take` + `read_to_end` Co-authored-by: Jiahao XU <30436523+NobodyXu@users.noreply.github.com> --- compact_str/src/features/borsh.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compact_str/src/features/borsh.rs b/compact_str/src/features/borsh.rs index 39e390db..46444847 100644 --- a/compact_str/src/features/borsh.rs +++ b/compact_str/src/features/borsh.rs @@ -38,8 +38,7 @@ impl BorshDeserialize for CompactString { } else { let mut buf = Vec::new(); buf.try_reserve_exact(len)?; - buf.resize(len, 0u8); - reader.read_exact(&mut buf[..])?; + reader.take(len as u64).read_to_end(&mut buf)?; let s = String::from_utf8(buf).map_err(|err| Error::new(ErrorKind::InvalidData, err))?; Ok(CompactString::from(s)) From 05e8714c0a5f8c599f93bc1ca084d4d5b0f11bb7 Mon Sep 17 00:00:00 2001 From: SmnTin Date: Tue, 25 Jun 2024 21:01:14 +0200 Subject: [PATCH 6/7] Fix nits --- compact_str/Cargo.toml | 2 +- compact_str/src/features/borsh.rs | 39 +++++++++++++++++-------------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/compact_str/Cargo.toml b/compact_str/Cargo.toml index a0d810d2..6e40670f 100644 --- a/compact_str/Cargo.toml +++ b/compact_str/Cargo.toml @@ -12,7 +12,7 @@ keywords = ["string", "compact", "small", "memory", "mutable"] categories = ["encoding", "parsing", "memory-management", "text-processing"] [features] -default = ["std", "borsh"] +default = ["std"] std = [] arbitrary = ["dep:arbitrary"] diff --git a/compact_str/src/features/borsh.rs b/compact_str/src/features/borsh.rs index 46444847..bc2a42bc 100644 --- a/compact_str/src/features/borsh.rs +++ b/compact_str/src/features/borsh.rs @@ -38,7 +38,9 @@ impl BorshDeserialize for CompactString { } else { let mut buf = Vec::new(); buf.try_reserve_exact(len)?; - reader.take(len as u64).read_to_end(&mut buf)?; + if reader.take(len as u64).read_to_end(&mut buf)? < len { + return Err(ErrorKind::UnexpectedEof.into()); + } let s = String::from_utf8(buf).map_err(|err| Error::new(ErrorKind::InvalidData, err))?; Ok(CompactString::from(s)) @@ -58,18 +60,15 @@ mod tests { }; use crate::CompactString; - #[test] - fn test_roundtrip() { - const VALUE: &str = "Hello, 🌍!"; - - let bytes_compact = borsh::to_vec(&CompactString::from(VALUE)).unwrap(); - let bytes_control = borsh::to_vec(&String::from(VALUE)).unwrap(); + fn assert_roundtrip(s: &str) { + let bytes_compact = borsh::to_vec(&CompactString::from(s)).unwrap(); + let bytes_control = borsh::to_vec(&String::from(s)).unwrap(); assert_eq!(&*bytes_compact, &*bytes_control); let compact: CompactString = borsh::from_slice(&bytes_compact).unwrap(); let control: String = borsh::from_slice(&bytes_control).unwrap(); - assert_eq!(compact, VALUE); - assert_eq!(control, VALUE); + assert_eq!(compact, s); + assert_eq!(control, s); } #[test] @@ -78,16 +77,22 @@ mod tests { borsh::from_slice::(&bytes).unwrap_err(); } + #[test] + fn test_deserialize_unexpected_eof() { + let s = core::str::from_utf8(&[b'a'; 55]).unwrap(); + let mut bytes = borsh::to_vec(s).unwrap(); + bytes.pop(); + borsh::from_slice::(&bytes).unwrap_err(); + } + + #[test] + fn test_roundtrip() { + assert_roundtrip("Hello, 🌍!"); + } + #[cfg_attr(miri, ignore)] #[proptest] fn proptest_roundtrip(s: String) { - let bytes_compact = borsh::to_vec(&CompactString::from(&s)).unwrap(); - let bytes_control = borsh::to_vec(&String::from(&s)).unwrap(); - assert_eq!(&*bytes_compact, &*bytes_control); - - let compact: CompactString = borsh::from_slice(&bytes_compact).unwrap(); - let control: String = borsh::from_slice(&bytes_control).unwrap(); - assert_eq!(compact, s); - assert_eq!(control, s); + assert_roundtrip(&s); } } From 114b819ddff7455f09a94286b579c2c4472b2537 Mon Sep 17 00:00:00 2001 From: SmnTin Date: Wed, 26 Jun 2024 22:26:17 +0200 Subject: [PATCH 7/7] Prevent a potential DoS attack --- compact_str/src/features/borsh.rs | 36 ++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/compact_str/src/features/borsh.rs b/compact_str/src/features/borsh.rs index bc2a42bc..6f174879 100644 --- a/compact_str/src/features/borsh.rs +++ b/compact_str/src/features/borsh.rs @@ -36,11 +36,9 @@ impl BorshDeserialize for CompactString { .map_err(|err| Error::new(ErrorKind::InvalidData, err))?; Ok(CompactString::from(s)) } else { - let mut buf = Vec::new(); - buf.try_reserve_exact(len)?; - if reader.take(len as u64).read_to_end(&mut buf)? < len { - return Err(ErrorKind::UnexpectedEof.into()); - } + // We can't just deserialize `Vec` because we have already read the length + // TODO: replace with `read_buf` when (if) it stabilizes + let buf = vec_from_reader(len, reader)?; let s = String::from_utf8(buf).map_err(|err| Error::new(ErrorKind::InvalidData, err))?; Ok(CompactString::from(s)) @@ -48,6 +46,34 @@ impl BorshDeserialize for CompactString { } } +// A copy of hidden `u8::vec_from_reader`(https://docs.rs/borsh/1.5.1/src/borsh/de/mod.rs.html#156-184) +fn vec_from_reader(len: usize, reader: &mut R) -> Result> { + // Avoid OOM by limiting the size of allocation. This makes the read + // less efficient (since we need to loop and reallocate) but it protects + // us from someone sending us [0xff, 0xff, 0xff, 0xff] and forcing us to + // allocate 4GiB of memory. + let mut vec = vec![0u8; len.min(1024 * 1024)]; + let mut pos = 0; + while pos < len { + if pos == vec.len() { + vec.resize(vec.len().saturating_mul(2).min(len), 0) + } + // TODO(mina86): Convert this to read_buf once that stabilises. + match reader.read(&mut vec.as_mut_slice()[pos..])? { + 0 => { + return Err(Error::new( + ErrorKind::InvalidData, + "Unexpected length of input", + )) + } + read => { + pos += read; + } + } + } + Ok(vec) +} + #[cfg(test)] mod tests { use alloc::string::String;