Skip to content

Commit

Permalink
Simplify encode_buffer interface using Vec
Browse files Browse the repository at this point in the history
  • Loading branch information
qsantos committed Jun 20, 2024
1 parent 3733e79 commit 76b6247
Showing 1 changed file with 29 additions and 59 deletions.
88 changes: 29 additions & 59 deletions src/encode_unicode.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
use std::io::{Read, Write};
use std::mem::{transmute, MaybeUninit};

use crate::encode_ascii_mapping::ASCII_TO_QWORD;
use crate::encode_unicode_mapping::from_unicode;

fn encode_buffer(input: &str, output_buf: &mut [MaybeUninit<u8>], mut cur: usize) -> usize {
fn encode_buffer(input: &str, output_buf: &mut Vec<u8>) {
// SAFETY: `output_buf[cur]`
// Accessing the element `cur` of `output_buf` is safe because
// - `cur <= 18 * input_buf.len() + 1` because we increment `cur` by at most 18 for each byte read
// - `18 * input_buf.len() + 1 <= output_buf` as check by the `assert!` below
assert!(output_buf.len() > input.len() * 18);
let mut cur = output_buf.len();
output_buf.reserve(input.len() * 18 + cur);
for c in input.chars() {
if c.is_ascii() {
let (bytes, len) = ASCII_TO_QWORD[c as usize];
if len == 0 {
} else if len <= 8 {
if (c == '\t' || c == '\n' || c == '\r')
&& cur > 0
// SAFETY: transmuting `output_buf[cur - 1]` from `MaybeInit<u8>` to `u8` is safe
// since `cur` starts at 0 and we always write an element before increment `cur`
// and see `output_buf[cur]` above
&& unsafe { transmute::<MaybeUninit<u8>, u8>(*output_buf.get_unchecked(cur - 1)) } == b' '
// SAFETY: see `output_buf[cur]` above
&& unsafe { *output_buf.as_ptr().add(cur - 1) } == b' '
{
cur -= 1;
}
Expand All @@ -34,15 +32,9 @@ fn encode_buffer(input: &str, output_buf: &mut [MaybeUninit<u8>], mut cur: usize
} else {
// handle only ASCII character encoded as more than 7 elements + space
assert_eq!(c, '%');
// SAFETY: source and destination derived from references, slices are of the
// correct length (replace with `MaybeUninit::copy_from_slice()` once stabilized).
// and see `output_buf[cur]` above
unsafe {
transmute::<&mut [MaybeUninit<u8>], &mut [u8]>(
&mut output_buf.get_unchecked_mut(cur..cur + 18),
)
}
.copy_from_slice(b"----- -..-. ----- ");
// SAFETY: `output_buf[cur]` above
unsafe { std::slice::from_raw_parts_mut(output_buf.as_mut_ptr().add(cur), 18) }
.copy_from_slice(b"----- -..-. ----- ");
}
cur += len;
} else {
Expand All @@ -52,28 +44,22 @@ fn encode_buffer(input: &str, output_buf: &mut [MaybeUninit<u8>], mut cur: usize
// SAFETY: we flush the buffer after each byte when we are below 18 free bytes
// next chunk; thus, there is at least 8 available bytes for writing after
// `output_buf + cur`.
let buf8 = unsafe { output_buf.get_unchecked_mut(cur..cur + 8) };
let buf8 =
unsafe { std::slice::from_raw_parts_mut(output_buf.as_mut_ptr().add(cur), 8) };
// SAFETY: the slice is exactly 8 bytes long per construction
let bytes8 = unsafe { &*(bytes.as_ptr() as *const [u8; 8]) };
// SAFETY: source and destination derived from references, slices are of the
// correct length (replace with `MaybeUninit::copy_from_slice()` once stabilized).
unsafe { transmute::<&mut [MaybeUninit<u8>], &mut [u8]>(buf8) }
.copy_from_slice(bytes8);
buf8.copy_from_slice(bytes8);
} else {
// SAFETY: source and destination derived from references, slices are of the
// correct length (replace with `MaybeUninit::copy_from_slice()` once stabilized).
// and see `output_buf[cur]` above
unsafe {
transmute::<&mut [MaybeUninit<u8>], &mut [u8]>(
&mut output_buf.get_unchecked_mut(cur..cur + len),
)
}
.copy_from_slice(bytes);
// SAFETY: `output_buf[cur]` above
unsafe { std::slice::from_raw_parts_mut(output_buf.as_mut_ptr().add(cur), len) }
.copy_from_slice(bytes);
}
cur += len;
}
}
cur
// SAFETY: the first `cur` bytes of `output_buf` are initialized because we only increase cur
// after writing to `output_buf`
unsafe { output_buf.set_len(cur) };
}

/// Encode characters from a [string slice][&str] into a [String].
Expand All @@ -94,13 +80,8 @@ fn encode_buffer(input: &str, output_buf: &mut [MaybeUninit<u8>], mut cur: usize
/// assert_eq!(morse, "- ..-.. .-.. ..-.. --. .-. .- .--. .... .. .");
/// ```
pub fn encode_string(input: &str) -> String {
let mut output_buf = vec![MaybeUninit::uninit(); input.len() * 18 + 1];
let cur = encode_buffer(input, &mut output_buf, 0);
output_buf.truncate(cur);
// SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit<u8>` to
// `u8` is safe since `cur` starts at 0 and we always write an element before increment
// `cur`
let mut output_buf: Vec<u8> = unsafe { transmute(output_buf) };
let mut output_buf = Vec::new();
encode_buffer(input, &mut output_buf);
if output_buf.last() == Some(&b' ') {
output_buf.pop();
}
Expand Down Expand Up @@ -135,8 +116,7 @@ pub fn encode_string(input: &str) -> String {
pub fn encode_stream(input: &mut impl Read, output: &mut impl Write) -> Result<(), std::io::Error> {
let mut input_buf = vec![0u8; 1 << 15];
let mut bytes_available = 0;
let mut output_buf = vec![MaybeUninit::uninit(); (18 << 15) + 1];
let mut cur = 0;
let mut output_buf = Vec::new();
loop {
let bytes_read = input.read(&mut input_buf[bytes_available..])?;
if bytes_read == 0 {
Expand All @@ -155,26 +135,16 @@ pub fn encode_stream(input: &mut impl Read, output: &mut impl Write) -> Result<(
(decoded, bytes_decoded)
}
};
cur = encode_buffer(decoded, &mut output_buf, cur);
if cur != 0 {
// SAFETY: transmuting `output_buf[cur - 1]` from `MaybeInit<u8>` to `u8` is safe
// since `cur` starts at 0 and we always write an element before increment `cur`
if unsafe { transmute::<MaybeUninit<u8>, u8>(output_buf[cur - 1]) } == b' ' {
cur -= 1;
// SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit<u8>` to
// `u8` is safe since `cur` starts at 0 and we always write an element before increment
// `cur`
let init: &[u8] = unsafe { transmute(&output_buf[..cur]) };
output.write_all(init)?;
output_buf[0].write(b' ');
cur = 1;
encode_buffer(decoded, &mut output_buf);
if !output_buf.is_empty() {
if output_buf.last() == Some(&b' ') {
output_buf.pop();
output.write_all(&output_buf)?;
output_buf.clear();
output_buf.push(b' ');
} else {
// SAFETY: transmuting the `cur` first elements of `output_buf` from `MaybeInit<u8>` to
// `u8` is safe since `cur` starts at 0 and we always write an element before increment
// `cur`
let init: &[u8] = unsafe { transmute(&output_buf[..cur]) };
output.write_all(init)?;
cur = 0;
output.write_all(&output_buf)?;
output_buf.clear();
}
}
input_buf.copy_within(bytes_decoded..bytes_available, 0);
Expand Down

0 comments on commit 76b6247

Please sign in to comment.