Skip to content

Commit

Permalink
Lift writing to output out of encode_buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
qsantos committed Jun 19, 2024
1 parent 7ba1def commit 41c6486
Showing 1 changed file with 30 additions and 25 deletions.
55 changes: 30 additions & 25 deletions src/encode_unicode.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
use std::io::{BufWriter, Read, Write};
use std::io::{Read, Write};
use std::mem::{transmute, MaybeUninit};

use crate::encode_ascii_mapping::ASCII_TO_QWORD;
use crate::encode_unicode_mapping::from_unicode;

fn encode_buffer(
output: &mut impl Write,
input: &str,
need_separator: &mut bool,
output_buf: &mut [MaybeUninit<u8>],
) -> Result<(), std::io::Error> {
) -> Result<usize, std::io::Error> {
// SAFETY: `output_buf[cur]`
// Accessing the element `cur` of `output_buf` is safe because
// - `cur <= 18 * input_buf.len() + 1` because we increment `cur` by at most 18 for each byte read
Expand Down Expand Up @@ -84,23 +83,7 @@ fn encode_buffer(
cur += len;
}
}
// flush buffer
if cur != 0 {
// SAFETY: transmuting `output_buf[cur - 1]` from `MaybeInit<u8>` to `u8` is safe
// since `cur` starts at 0 and we always write an element before increment `cur`
if unsafe { transmute::<MaybeUninit<u8>, u8>(output_buf[cur - 1]) } == b' ' {
cur -= 1;
*need_separator = true;
} else {
*need_separator = false;
}
// SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit<u8>` to
// `u8` is safe since `cur` starts at 0 and we always write an element before increment
// `cur`
let init: &[u8] = unsafe { transmute(&output_buf[..cur]) };
output.write_all(init)?;
}
Ok(())
Ok(cur)
}

/// Encode characters from a [string slice][&str] into a [String].
Expand All @@ -121,11 +104,17 @@ fn encode_buffer(
/// assert_eq!(morse, "- ..-.. .-.. ..-.. --. .-. .- .--. .... .. .");
/// ```
pub fn encode_string(input: &str) -> String {
let mut writer = BufWriter::new(Vec::new());
let mut output_buf = vec![MaybeUninit::uninit(); input.len() * 18 + 1];
encode_buffer(&mut writer, input, &mut false, &mut output_buf).unwrap();
let vec = writer.into_inner().unwrap();
String::from_utf8(vec).unwrap()
let cur = encode_buffer(input, &mut false, &mut output_buf).unwrap();
output_buf.truncate(cur);
// SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit<u8>` to
// `u8` is safe since `cur` starts at 0 and we always write an element before increment
// `cur`
let mut output_buf: Vec<u8> = unsafe { transmute(output_buf) };
if output_buf.last() == Some(&b' ') {
output_buf.pop();
}
String::from_utf8(output_buf).unwrap()
}

/// Encode Unicode characters from a [Read][std::io::Read] object into a [Write][std::io::Write] object.
Expand Down Expand Up @@ -175,7 +164,23 @@ pub fn encode_stream(input: &mut impl Read, output: &mut impl Write) {
(decoded, bytes_decoded)
}
};
encode_buffer(output, decoded, &mut need_separator, &mut output_buf).unwrap();
let mut cur = encode_buffer(decoded, &mut need_separator, &mut output_buf).unwrap();
// flush buffer
if cur != 0 {
// SAFETY: transmuting `output_buf[cur - 1]` from `MaybeInit<u8>` to `u8` is safe
// since `cur` starts at 0 and we always write an element before increment `cur`
if unsafe { transmute::<MaybeUninit<u8>, u8>(output_buf[cur - 1]) } == b' ' {
cur -= 1;
need_separator = true;
} else {
need_separator = false;
}
// SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit<u8>` to
// `u8` is safe since `cur` starts at 0 and we always write an element before increment
// `cur`
let init: &[u8] = unsafe { transmute(&output_buf[..cur]) };
output.write_all(init).unwrap();
}
input_buf.copy_within(bytes_decoded..bytes_available, 0);
bytes_available -= bytes_decoded;
}
Expand Down

0 comments on commit 41c6486

Please sign in to comment.