From 4804346dc6a7294bb6d2d8c97ead5e00b21ea36b Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Tue, 12 Nov 2024 11:53:00 +0100 Subject: [PATCH 1/5] Some cleanups --- bzlib.rs | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/bzlib.rs b/bzlib.rs index 98ad76996..66d2c3d0c 100644 --- a/bzlib.rs +++ b/bzlib.rs @@ -482,8 +482,7 @@ const _C_SHORT_SIZE: () = assert!(core::mem::size_of::() == const _C_CHAR_SIZE: () = assert!(core::mem::size_of::() == 1); unsafe extern "C" fn default_bzalloc(_opaque: *mut c_void, items: i32, size: i32) -> *mut c_void { - let v: *mut c_void = malloc((items * size) as usize); - v + malloc((items * size) as usize) } unsafe extern "C" fn default_bzfree(_opaque: *mut c_void, addr: *mut c_void) { if !addr.is_null() { @@ -523,14 +522,12 @@ unsafe fn bzalloc_array(bzalloc: AllocFunc, opaque: *mut c_void, len: usize) let len = i32::try_from(len).ok()?; let width = i32::try_from(mem::size_of::()).ok()?; - let ptr = bzalloc(opaque, len, width); + let ptr = bzalloc(opaque, len, width).cast::(); if ptr.is_null() { return None; } - let ptr = ptr.cast::(); - ptr::write_bytes(ptr, 0, len as usize); Some(ptr) @@ -2643,11 +2640,7 @@ enum OpenMode { FileDescriptor(i32), } -unsafe fn bzopen_or_bzdopen( - path: *const c_char, - open_mode: OpenMode, - mode: *const c_char, -) -> *mut BZFILE { +unsafe fn bzopen_or_bzdopen(path: Option<&CStr>, open_mode: OpenMode, mode: &CStr) -> *mut BZFILE { let mut bzerr = 0; let mut unused: [c_char; BZ_MAX_UNUSED as usize] = [0; BZ_MAX_UNUSED as usize]; @@ -2659,18 +2652,6 @@ unsafe fn bzopen_or_bzdopen( let mut smallMode = false; let mut operation = Operation::Reading; - let mode = if mode.is_null() { - return ptr::null_mut(); - } else { - CStr::from_ptr(mode) - }; - - let path = if path.is_null() { - None - } else { - Some(CStr::from_ptr(path)) - }; - for c in mode.to_bytes() { match c { b'r' => operation = Operation::Reading, @@ -2756,6 +2737,18 @@ unsafe fn bzopen_or_bzdopen( /// [`pointer::as_mut`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.as_mut #[export_name = prefix!(BZ2_bzopen)] pub unsafe extern "C" fn BZ2_bzopen(path: *const c_char, mode: *const c_char) -> *mut BZFILE { + let mode = if mode.is_null() { + return ptr::null_mut(); + } else { + CStr::from_ptr(mode) + }; + + let path = if path.is_null() { + None + } else { + Some(CStr::from_ptr(path)) + }; + bzopen_or_bzdopen(path, OpenMode::Pointer, mode) } @@ -2773,7 +2766,13 @@ pub unsafe extern "C" fn BZ2_bzopen(path: *const c_char, mode: *const c_char) -> /// [`pointer::as_mut`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.as_mut #[export_name = prefix!(BZ2_bzdopen)] pub unsafe extern "C" fn BZ2_bzdopen(fd: c_int, mode: *const c_char) -> *mut BZFILE { - bzopen_or_bzdopen(ptr::null(), OpenMode::FileDescriptor(fd), mode) + let mode = if mode.is_null() { + return ptr::null_mut(); + } else { + CStr::from_ptr(mode) + }; + + bzopen_or_bzdopen(None, OpenMode::FileDescriptor(fd), mode) } /// Reads up to `len` (uncompressed) bytes from the compressed file `b` into the buffer `buf`. From e0732ddd98b3572fd30cd1f92eb9f38aa9f0cfa7 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Tue, 12 Nov 2024 13:56:01 +0100 Subject: [PATCH 2/5] Encapsulate memory allocation and freeing for Arr1, Arr2 and Ftab --- bzlib.rs | 159 ++++++++++++++++++++++++------------------------------- 1 file changed, 68 insertions(+), 91 deletions(-) diff --git a/bzlib.rs b/bzlib.rs index 66d2c3d0c..756de559e 100644 --- a/bzlib.rs +++ b/bzlib.rs @@ -226,24 +226,22 @@ pub(crate) struct Arr1 { } impl Arr1 { - fn new() -> Self { - Self { - ptr: dangling(), - len: 0, - } - } - - /// Safety: ptr and len must satisfy the requirements of [`core::slice::from_raw_parts_mut`]. - unsafe fn from_raw_parts_mut(ptr: *mut u32, len: usize) -> Self { - Self { ptr, len } + unsafe fn alloc(bzalloc: AllocFunc, opaque: *mut c_void, len: usize) -> Option { + let ptr = bzalloc_array(bzalloc, opaque, len)?; + Some(Self { ptr, len }) } - fn into_raw_parts(self) -> (*mut u32, usize) { - (self.ptr, self.len) - } - - fn is_empty(&self) -> bool { - self.len == 0 + unsafe fn dealloc(&mut self, bzfree: FreeFunc, opaque: *mut c_void) { + let this = mem::replace( + self, + Self { + ptr: dangling(), + len: 0, + }, + ); + if this.len != 0 { + bzfree(opaque, this.ptr.cast()) + } } pub(crate) fn mtfv(&mut self) -> &mut [u16] { @@ -261,24 +259,22 @@ pub(crate) struct Arr2 { } impl Arr2 { - fn new() -> Self { - Self { - ptr: dangling(), - len: 0, - } + unsafe fn alloc(bzalloc: AllocFunc, opaque: *mut c_void, len: usize) -> Option { + let ptr = bzalloc_array(bzalloc, opaque, len)?; + Some(Self { ptr, len }) } - /// Safety: ptr must satisfy the requirements of [`core::slice::from_raw_parts_mut`]. - unsafe fn from_raw_parts_mut(ptr: *mut u32, len: usize) -> Self { - Self { ptr, len } - } - - fn into_raw_parts(self) -> (*mut u32, usize) { - (self.ptr, self.len) - } - - fn is_empty(&self) -> bool { - self.len == 0 + unsafe fn dealloc(&mut self, bzfree: FreeFunc, opaque: *mut c_void) { + let this = mem::replace( + self, + Self { + ptr: dangling(), + len: 0, + }, + ); + if this.len != 0 { + bzfree(opaque, this.ptr.cast()) + } } pub(crate) fn eclass(&mut self) -> &mut [u32] { @@ -324,23 +320,27 @@ pub(crate) struct Ftab { } impl Ftab { - /// Safety: ptr must satisfy the requirements of `Option<&mut [u32; FTAB_LEN]>`. - unsafe fn from_ptr(ptr: *mut u32) -> Self { - Self { ptr } + unsafe fn alloc(bzalloc: AllocFunc, opaque: *mut c_void) -> Option { + let ptr = bzalloc_array(bzalloc, opaque, FTAB_LEN)?; + Some(Self { ptr }) } - fn is_null(&self) -> bool { - self.ptr.is_null() + unsafe fn dealloc(&mut self, bzfree: FreeFunc, opaque: *mut c_void) { + let this = mem::replace( + self, + Self { + ptr: ptr::null_mut(), + }, + ); + if !this.ptr.is_null() { + bzfree(opaque, this.ptr.cast()) + } } pub(crate) fn ftab(&mut self) -> &mut [u32; FTAB_LEN] { // NOTE: this panics if the pointer is NULL, that is important! unsafe { self.ptr.cast::<[u32; FTAB_LEN]>().as_mut().unwrap() } } - - fn into_ptr(self) -> *mut u32 { - self.ptr - } } pub(crate) struct DState { @@ -429,7 +429,7 @@ impl DSlice { len: usize, ) -> Option { let ptr = bzalloc_array::(bzalloc, opaque, len)?; - Some(Self::from_raw_parts_mut(ptr, len)) + Some(Self { ptr, len }) } pub(crate) unsafe fn dealloc(&mut self, bzfree: FreeFunc, opaque: *mut c_void) { @@ -439,11 +439,6 @@ impl DSlice { } } - /// Safety: ptr must satisfy the requirements of [`core::slice::from_raw_parts_mut`]. - pub(crate) unsafe fn from_raw_parts_mut(ptr: *mut T, len: usize) -> Self { - Self { ptr, len } - } - pub(crate) fn as_slice(&self) -> &[T] { unsafe { core::slice::from_raw_parts(self.ptr, self.len) } } @@ -577,10 +572,10 @@ unsafe fn BZ2_bzCompressInitHelp( workFactor = 30; } - let bzalloc = (*strm).bzalloc.get_or_insert(default_bzalloc); - let bzfree = (*strm).bzfree.get_or_insert(default_bzfree); + let bzalloc = *(*strm).bzalloc.get_or_insert(default_bzalloc); + let bzfree = *(*strm).bzfree.get_or_insert(default_bzfree); - let Some(s) = bzalloc_array::(*bzalloc, (*strm).opaque, 1) else { + let Some(s) = bzalloc_array::(bzalloc, (*strm).opaque, 1) else { return ReturnCode::BZ_MEM_ERROR; }; @@ -591,44 +586,38 @@ unsafe fn BZ2_bzCompressInitHelp( let n = 100000 * blockSize100k; let arr1_len = n as usize; - let arr1_alloc = bzalloc_array(*bzalloc, (*strm).opaque, arr1_len); + let arr1 = Arr1::alloc(bzalloc, (*strm).opaque, arr1_len); let arr2_len = n as usize + (2 + 12 + 18 + 2); - let arr2_alloc = bzalloc_array(*bzalloc, (*strm).opaque, arr2_len); + let arr2 = Arr2::alloc(bzalloc, (*strm).opaque, arr2_len); - let ftab_alloc = bzalloc_array(*bzalloc, (*strm).opaque, FTAB_LEN); + let ftab = Ftab::alloc(bzalloc, (*strm).opaque); - let (Some(arr1_alloc), Some(arr2_alloc), Some(ftab_alloc)) = - (arr1_alloc, arr2_alloc, ftab_alloc) - else { - if let Some(ptr) = arr1_alloc { - (bzfree)((*strm).opaque, ptr as *mut c_void); + match (arr1, arr2, ftab) { + (Some(arr1), Some(arr2), Some(ftab)) => { + (*s).arr1 = arr1; + (*s).arr2 = arr2; + (*s).ftab = ftab; } + (arr1, arr2, ftab) => { + if let Some(mut arr1) = arr1 { + arr1.dealloc(bzfree, (*strm).opaque); + } - if let Some(ptr) = arr2_alloc { - (bzfree)((*strm).opaque, ptr as *mut c_void); - } + if let Some(mut arr2) = arr2 { + arr2.dealloc(bzfree, (*strm).opaque); + } - if let Some(ptr) = ftab_alloc { - (bzfree)((*strm).opaque, ptr as *mut c_void); - } + if let Some(mut ftab) = ftab { + ftab.dealloc(bzfree, (*strm).opaque); + } - if !s.is_null() { (bzfree)((*strm).opaque, s as *mut c_void); - } - return ReturnCode::BZ_MEM_ERROR; + return ReturnCode::BZ_MEM_ERROR; + } }; - // SAFETY: pointer is non-null and memory was initialized by `bzalloc_array` - unsafe { (*s).arr1 = Arr1::from_raw_parts_mut(arr1_alloc, arr1_len) }; - - // SAFETY: pointer is non-null and memory was initialized by `bzalloc_array` - unsafe { (*s).arr2 = Arr2::from_raw_parts_mut(arr2_alloc, arr2_len) }; - - // SAFETY: pointer is non-null and memory was initialized by `bzalloc_array` - unsafe { (*s).ftab = Ftab::from_ptr(ftab_alloc) }; - (*s).blockNo = 0; (*s).state = State::Output; (*s).mode = Mode::Running; @@ -1003,21 +992,9 @@ pub unsafe extern "C" fn BZ2_bzCompressEnd(strm: *mut bz_stream) -> c_int { return ReturnCode::BZ_PARAM_ERROR as c_int; }; - if !(s.arr1).is_empty() { - let arr1 = mem::replace(&mut s.arr1, Arr1::new()); - let (ptr, _len) = arr1.into_raw_parts(); - (bzfree)(strm.opaque, ptr.cast::()); - } - if !(s.arr2).is_empty() { - let arr2 = mem::replace(&mut s.arr2, Arr2::new()); - let (ptr, _len) = arr2.into_raw_parts(); - (bzfree)(strm.opaque, ptr.cast::()); - } - if !(s.ftab).is_null() { - let ftab = mem::replace(&mut s.ftab, Ftab::from_ptr(ptr::null_mut())); - let ptr = ftab.into_ptr(); - (bzfree)(strm.opaque, ptr.cast::()); - } + s.arr1.dealloc(bzfree, strm.opaque); + s.arr2.dealloc(bzfree, strm.opaque); + s.ftab.dealloc(bzfree, strm.opaque); (bzfree)(strm.opaque, strm.state); strm.state = ptr::null_mut::(); From 2f73d23aabf71f64342bf88f53b535094c28735d Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Tue, 12 Nov 2024 14:02:23 +0100 Subject: [PATCH 3/5] Change the strm fields in EState and DState to usize This avoid self-referential types --- bzlib.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/bzlib.rs b/bzlib.rs index 756de559e..19293e25c 100644 --- a/bzlib.rs +++ b/bzlib.rs @@ -182,7 +182,7 @@ pub(crate) const BZ_N_OVERSHOOT: usize = (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + pub(crate) const FTAB_LEN: usize = u16::MAX as usize + 2; pub(crate) struct EState { - pub strm: *mut bz_stream, + pub strm: usize, // Only for a consistency check pub mode: Mode, pub state: State, pub avail_in_expect: u32, @@ -344,7 +344,7 @@ impl Ftab { } pub(crate) struct DState { - pub strm: *mut bz_stream, + pub strm: usize, // Only for a consistency check pub state: decompress::State, pub state_out_ch: u8, pub state_out_len: i32, @@ -581,7 +581,7 @@ unsafe fn BZ2_bzCompressInitHelp( // this `s.strm` pointer should _NEVER_ be used! it exists just as a consistency check to ensure // that a given state belongs to a given strm. - (*s).strm = strm; + (*s).strm = strm as usize; // FIXME use .addr() once stable let n = 100000 * blockSize100k; @@ -885,7 +885,8 @@ unsafe fn BZ2_bzCompressHelp(strm: &mut bz_stream, action: i32) -> ReturnCode { return ReturnCode::BZ_PARAM_ERROR; }; - if s.strm != strm { + // FIXME use .addr() once stable + if s.strm != strm as *mut _ as usize { return ReturnCode::BZ_PARAM_ERROR; } @@ -984,7 +985,8 @@ pub unsafe extern "C" fn BZ2_bzCompressEnd(strm: *mut bz_stream) -> c_int { return ReturnCode::BZ_PARAM_ERROR as c_int; }; - if s.strm != strm { + // FIXME use .addr() once stable + if s.strm != strm as *mut _ as usize { return ReturnCode::BZ_PARAM_ERROR as c_int; } @@ -1060,7 +1062,7 @@ unsafe fn BZ2_bzDecompressInitHelp( // this `s.strm` pointer should _NEVER_ be used! it exists just as a consistency check to ensure // that a given state belongs to a given strm. - (*s).strm = strm; + (*s).strm = strm as usize; // FIXME use .addr() once stable (*s).state = decompress::State::BZ_X_MAGIC_1; (*s).bsLive = 0; @@ -1590,7 +1592,8 @@ unsafe fn BZ2_bzDecompressHelp(strm: &mut bz_stream) -> ReturnCode { return ReturnCode::BZ_PARAM_ERROR; }; - if s.strm != strm { + // FIXME use .addr() once stable + if s.strm != strm as *mut _ as usize { return ReturnCode::BZ_PARAM_ERROR; } @@ -1683,7 +1686,8 @@ pub unsafe extern "C" fn BZ2_bzDecompressEnd(strm: *mut bz_stream) -> c_int { return ReturnCode::BZ_PARAM_ERROR as c_int; }; - if s.strm != strm { + // FIXME use .addr() once stable + if s.strm != strm as *mut _ as usize { return ReturnCode::BZ_PARAM_ERROR as c_int; } From 9597d5f7ec76dbf57730681162b6e6eaef7e8829 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Tue, 12 Nov 2024 14:17:04 +0100 Subject: [PATCH 4/5] Make a couple of functions safe to call --- bzlib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bzlib.rs b/bzlib.rs index 19293e25c..8a1041025 100644 --- a/bzlib.rs +++ b/bzlib.rs @@ -647,7 +647,7 @@ macro_rules! BZ_UPDATE_CRC { }; } -unsafe fn add_pair_to_block(s: &mut EState) { +fn add_pair_to_block(s: &mut EState) { let ch: u8 = s.state_in_ch as u8; for _ in 0..s.state_in_len { @@ -681,7 +681,7 @@ unsafe fn add_pair_to_block(s: &mut EState) { }; } -unsafe fn flush_rl(s: &mut EState) { +fn flush_rl(s: &mut EState) { if s.state_in_ch < 256 { add_pair_to_block(s); } From 55885621cf00a030eb297b1b20b1c971f9a2c1fc Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Tue, 12 Nov 2024 15:09:15 +0100 Subject: [PATCH 5/5] Rename strm usize fields to strm_addr --- bzlib.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bzlib.rs b/bzlib.rs index 8a1041025..1b0887415 100644 --- a/bzlib.rs +++ b/bzlib.rs @@ -182,7 +182,7 @@ pub(crate) const BZ_N_OVERSHOOT: usize = (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + pub(crate) const FTAB_LEN: usize = u16::MAX as usize + 2; pub(crate) struct EState { - pub strm: usize, // Only for a consistency check + pub strm_addr: usize, // Only for a consistency check pub mode: Mode, pub state: State, pub avail_in_expect: u32, @@ -344,7 +344,7 @@ impl Ftab { } pub(crate) struct DState { - pub strm: usize, // Only for a consistency check + pub strm_addr: usize, // Only for a consistency check pub state: decompress::State, pub state_out_ch: u8, pub state_out_len: i32, @@ -581,7 +581,7 @@ unsafe fn BZ2_bzCompressInitHelp( // this `s.strm` pointer should _NEVER_ be used! it exists just as a consistency check to ensure // that a given state belongs to a given strm. - (*s).strm = strm as usize; // FIXME use .addr() once stable + (*s).strm_addr = strm as usize; // FIXME use .addr() once stable let n = 100000 * blockSize100k; @@ -886,7 +886,7 @@ unsafe fn BZ2_bzCompressHelp(strm: &mut bz_stream, action: i32) -> ReturnCode { }; // FIXME use .addr() once stable - if s.strm != strm as *mut _ as usize { + if s.strm_addr != strm as *mut _ as usize { return ReturnCode::BZ_PARAM_ERROR; } @@ -986,7 +986,7 @@ pub unsafe extern "C" fn BZ2_bzCompressEnd(strm: *mut bz_stream) -> c_int { }; // FIXME use .addr() once stable - if s.strm != strm as *mut _ as usize { + if s.strm_addr != strm as *mut _ as usize { return ReturnCode::BZ_PARAM_ERROR as c_int; } @@ -1062,7 +1062,7 @@ unsafe fn BZ2_bzDecompressInitHelp( // this `s.strm` pointer should _NEVER_ be used! it exists just as a consistency check to ensure // that a given state belongs to a given strm. - (*s).strm = strm as usize; // FIXME use .addr() once stable + (*s).strm_addr = strm as usize; // FIXME use .addr() once stable (*s).state = decompress::State::BZ_X_MAGIC_1; (*s).bsLive = 0; @@ -1593,7 +1593,7 @@ unsafe fn BZ2_bzDecompressHelp(strm: &mut bz_stream) -> ReturnCode { }; // FIXME use .addr() once stable - if s.strm != strm as *mut _ as usize { + if s.strm_addr != strm as *mut _ as usize { return ReturnCode::BZ_PARAM_ERROR; } @@ -1687,7 +1687,7 @@ pub unsafe extern "C" fn BZ2_bzDecompressEnd(strm: *mut bz_stream) -> c_int { }; // FIXME use .addr() once stable - if s.strm != strm as *mut _ as usize { + if s.strm_addr != strm as *mut _ as usize { return ReturnCode::BZ_PARAM_ERROR as c_int; }