From 02010461c6cdae4478d4acedb12ef4fd052b7717 Mon Sep 17 00:00:00 2001 From: Alexandru Macovei Date: Sun, 14 Feb 2021 18:39:37 +0200 Subject: [PATCH] Simplify iteration, and remove unsafe Use builtin char iteration from core library, instead of manually iterating bytes and then using unsafe to attempt to extract utf-8 chars. This is safer, simpler, and just as fast. --- Cargo.toml | 2 -- src/byte.rs | 54 +++++++++++++----------------- src/byte_unit.rs | 85 +++++++++++++++++++----------------------------- 3 files changed, 56 insertions(+), 85 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7a8ee2b..7423e51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,8 +17,6 @@ repository = "magiclen/Byte-Unit" branch = "master" [dependencies] -utf8-width = "0.1" - serde = { version = "1", optional = true } [features] diff --git a/src/byte.rs b/src/byte.rs index 6aaeaa3..56f0258 100644 --- a/src/byte.rs +++ b/src/byte.rs @@ -6,9 +6,7 @@ use alloc::string::String; use alloc::fmt::{self, Display, Formatter}; -use crate::{ - get_char_from_bytes, read_xib, AdjustedByte, ByteError, ByteUnit, ValueIncorrectError, -}; +use crate::{read_xib, AdjustedByte, ByteError, ByteUnit, ValueIncorrectError}; #[cfg(feature = "serde")] use crate::serde::ser::{Serialize, Serializer}; @@ -177,16 +175,14 @@ impl Byte { pub fn from_str>(s: S) -> Result { let s = s.as_ref().trim(); - let mut bytes = s.bytes(); + let mut chars = s.chars(); - let mut value = match bytes.next() { + let mut value = match chars.next() { Some(e) => { match e { - b'0'..=b'9' => f64::from(e - b'0'), + '0'..='9' => f64::from(e as u8 - b'0'), _ => { - return Err( - ValueIncorrectError::NotNumber(get_char_from_bytes(e, bytes)).into() - ); + return Err(ValueIncorrectError::NotNumber(e).into()); } } } @@ -194,39 +190,38 @@ impl Byte { }; let e = 'outer: loop { - match bytes.next() { + match chars.next() { Some(e) => { match e { - b'0'..=b'9' => { - value = value * 10.0 + f64::from(e - b'0'); + '0'..='9' => { + value = value * 10.0 + f64::from(e as u8 - b'0'); } - b'.' => { + '.' => { let mut i = 0.1; loop { - match bytes.next() { + match chars.next() { Some(e) => { match e { - b'0'..=b'9' => { - value += f64::from(e - b'0') * i; + '0'..='9' => { + value += f64::from(e as u8 - b'0') * i; i /= 10.0; } _ => { if (i * 10.0) as u8 == 1 { - return Err(ValueIncorrectError::NotNumber( - get_char_from_bytes(e, bytes), - ) - .into()); + return Err( + ValueIncorrectError::NotNumber(e).into() + ); } match e { - b' ' => { + ' ' => { loop { - match bytes.next() { + match chars.next() { Some(e) => { match e { - b' ' => (), + ' ' => (), _ => break 'outer Some(e), } } @@ -241,10 +236,7 @@ impl Byte { } None => { if (i * 10.0) as u8 == 1 { - return Err(ValueIncorrectError::NotNumber( - get_char_from_bytes(e, bytes), - ) - .into()); + return Err(ValueIncorrectError::NotNumber(e).into()); } break 'outer None; @@ -252,12 +244,12 @@ impl Byte { } } } - b' ' => { + ' ' => { loop { - match bytes.next() { + match chars.next() { Some(e) => { match e { - b' ' => (), + ' ' => (), _ => break 'outer Some(e), } } @@ -272,7 +264,7 @@ impl Byte { } }; - let unit = read_xib(e, bytes)?; + let unit = read_xib(e, chars)?; let bytes = get_bytes(value, unit); diff --git a/src/byte_unit.rs b/src/byte_unit.rs index fe5b58c..b22970b 100644 --- a/src/byte_unit.rs +++ b/src/byte_unit.rs @@ -1,7 +1,5 @@ -extern crate utf8_width; - use core::convert::TryFrom; -use core::str::{from_utf8_unchecked, Bytes, FromStr}; +use core::str::{Chars, FromStr}; #[cfg(feature = "serde")] use alloc::string::String; @@ -82,9 +80,9 @@ impl ByteUnit { pub fn from_str>(unit: S) -> Result { let s = unit.as_ref().trim(); - let mut bytes = s.bytes(); + let mut chars = s.chars(); - read_xib(bytes.next(), bytes) + read_xib(chars.next(), chars) } /// Use string slice to represent this `ByteUnit`. @@ -224,32 +222,15 @@ impl FromStr for ByteUnit { } } -#[inline] -pub(crate) fn get_char_from_bytes(e: u8, mut bytes: Bytes) -> char { - let width = unsafe { utf8_width::get_width_assume_valid(e) }; - - let mut char_bytes = [e; 4]; - - if width > 1 { - for e in char_bytes[1..].iter_mut().take(width - 1) { - *e = bytes.next().unwrap(); - } - } - - let char_str = unsafe { from_utf8_unchecked(&char_bytes[..width]) }; - - char::from_str(char_str).unwrap() -} - -pub(crate) fn read_xib(e: Option, mut bytes: Bytes) -> Result { +pub(crate) fn read_xib(e: Option, mut chars: Chars) -> Result { match e { Some(e) => { match e.to_ascii_uppercase() { - b'B' => { - match bytes.next() { + 'B' => { + match chars.next() { Some(e) => { Err(UnitIncorrectError { - character: get_char_from_bytes(e, bytes), + character: e, expected_characters: &[], also_expect_no_character: false, }) @@ -257,52 +238,52 @@ pub(crate) fn read_xib(e: Option, mut bytes: Bytes) -> Result Ok(ByteUnit::B), } } - b'K' => { - if read_ib(bytes)? { + 'K' => { + if read_ib(chars)? { Ok(ByteUnit::KiB) } else { Ok(ByteUnit::KB) } } - b'M' => { - if read_ib(bytes)? { + 'M' => { + if read_ib(chars)? { Ok(ByteUnit::MiB) } else { Ok(ByteUnit::MB) } } - b'G' => { - if read_ib(bytes)? { + 'G' => { + if read_ib(chars)? { Ok(ByteUnit::GiB) } else { Ok(ByteUnit::GB) } } - b'T' => { - if read_ib(bytes)? { + 'T' => { + if read_ib(chars)? { Ok(ByteUnit::TiB) } else { Ok(ByteUnit::TB) } } - b'P' => { - if read_ib(bytes)? { + 'P' => { + if read_ib(chars)? { Ok(ByteUnit::PiB) } else { Ok(ByteUnit::PB) } } #[cfg(feature = "u128")] - b'E' => { - if read_ib(bytes)? { + 'E' => { + if read_ib(chars)? { Ok(ByteUnit::EiB) } else { Ok(ByteUnit::EB) } } #[cfg(feature = "u128")] - b'Z' => { - if read_ib(bytes)? { + 'Z' => { + if read_ib(chars)? { Ok(ByteUnit::ZiB) } else { Ok(ByteUnit::ZB) @@ -312,7 +293,7 @@ pub(crate) fn read_xib(e: Option, mut bytes: Bytes) -> Result, mut bytes: Bytes) -> Result, mut bytes: Bytes) -> Result Result { - match bytes.next() { +fn read_ib(mut chars: Chars) -> Result { + match chars.next() { Some(e) => { match e.to_ascii_uppercase() { - b'I' => { - match bytes.next() { + 'I' => { + match chars.next() { Some(e) => { match e.to_ascii_uppercase() { - b'B' => Ok(true), + 'B' => Ok(true), _ => { Err(UnitIncorrectError { - character: get_char_from_bytes(e, bytes), + character: e, expected_characters: &['B'], also_expect_no_character: false, }) @@ -353,11 +334,11 @@ fn read_ib(mut bytes: Bytes) -> Result { None => Ok(true), } } - b'B' => { - match bytes.next() { + 'B' => { + match chars.next() { Some(e) => { Err(UnitIncorrectError { - character: get_char_from_bytes(e, bytes), + character: e, expected_characters: &[], also_expect_no_character: false, }) @@ -367,7 +348,7 @@ fn read_ib(mut bytes: Bytes) -> Result { } _ => { Err(UnitIncorrectError { - character: get_char_from_bytes(e, bytes), + character: e, expected_characters: &['B', 'i'], also_expect_no_character: false, })