diff --git a/examples/print-content.rs b/examples/print-content.rs index 09d3bd6..fb0fff8 100644 --- a/examples/print-content.rs +++ b/examples/print-content.rs @@ -2,13 +2,88 @@ extern crate dbase; fn main() { let dbf_path = std::env::args().nth(1).expect("Path to file as first arg"); - let mut reader = dbase::Reader::from_path(dbf_path).unwrap(); + let mut reader = dbase::Reader::from_path(&dbf_path).unwrap(); + println!("{}", reader.header().size_of_record); + // let mut records = reader.iter_records().collect::,_>>().unwrap(); + // + // + // let r = records.clone(); + // + // while records.len() < 500_000 { + // let mut tmp = r.clone(); + // records.append(&mut tmp); + // } + // + // let mut writer = dbase::TableWriterBuilder::from_reader(reader).build_with_file_dest("lol.dbf").unwrap(); + // writer.write_records(&records).unwrap(); - for (i, record_result) in reader.iter_records().enumerate() { - println!("Record {}", i); - let record = record_result.unwrap(); - for (name, value) in record { - println!("\tname: {}, value: {:?}", name, value); + let t = std::time::Instant::now(); + let mut records = Vec::with_capacity(reader.header().num_records as usize); + for record in reader.iter_records() { + // let tt = std::time::Instant::now(); + records.push(record.unwrap()); + // println!("time to read one record: {:?}", tt.elapsed()); + } + println!("Time to read via reader: {:?}", t.elapsed()); + + let mut file = dbase::File::open_read_only(&dbf_path).unwrap(); + let t = std::time::Instant::now(); + let mut records = Vec::with_capacity(file.num_records() * file.fields().len()); + let num_fields = file.fields().len(); + let num_records = file.num_records(); + let mut iter = file.records(); + // let mut c = 0u64; + loop { + // let tt = std::time::Instant::now(); + let Some(mut record) = iter.next() else { + break; + }; + // println!("time to read one record: {:?}", tt.elapsed()); + + for i in 0..num_fields { + records.push(record.field(dbase::FieldIndex(i)).unwrap().read()); } } + println!("Time to read via file: {:?}", t.elapsed()); + + let mut file = dbase::File::open_read_only(dbf_path).unwrap(); + let t = std::time::Instant::now(); + let mut records = Vec::with_capacity(file.num_records()); + let mut iter = file.records(); + while let Some(mut record) = iter.next() { + records.push(record.read().unwrap()); + } + println!("Time to read via file: {:?}", t.elapsed()); + + // for (i, record_result) in reader.iter_records().enumerate() { + // println!("Record {}", i); + // let record = record_result.unwrap(); + // for (name, value) in record { + // println!("\tname: {}, value: {:?}", name, value); + // } + // } } + +// +// use std::fs::File; +// use std::io::{BufReader, SeekFrom}; +// use std::io::prelude::*; +// use byteorder::WriteBytesExt; +// +// fn main() -> std::io::Result<()> { +// let mut file = File::options().read(true).write(true).truncate(false).open("foo.txt")?; +// let mut file_copy = BufReader::new(file.try_clone()?); +// +// let mut contents = vec![0u8; 3]; +// file_copy.read_exact(&mut contents)?; +// println!("contents: {:?}", contents); +// +// // file.seek(SeekFrom::Start(2)).unwrap(); +// file.write_u8(33).unwrap(); +// +// // file_copy.seek(SeekFrom::Start(0)).unwrap(); +// file_copy.read_exact(&mut contents).unwrap(); +// println!("contents: {:?}", contents); +// +// Ok(()) +// } diff --git a/src/field/mod.rs b/src/field/mod.rs index f0ce300..8992134 100644 --- a/src/field/mod.rs +++ b/src/field/mod.rs @@ -172,6 +172,13 @@ pub(crate) enum DeletionFlag { } impl DeletionFlag { + pub(crate) const fn to_byte(self) -> u8 { + match self { + Self::NotDeleted => 0x20, + Self::Deleted => 0x2A, + } + } + pub(crate) fn read_from(source: &mut T) -> std::io::Result { let byte = source.read_u8()?; match byte { diff --git a/src/file.rs b/src/file.rs index 30444c1..36ca948 100644 --- a/src/file.rs +++ b/src/file.rs @@ -106,6 +106,16 @@ where .seek(SeekFrom::Start(self.position_in_source())) .map_err(|e| FieldIOError::new(ErrorKind::IoError(e), Some(field_info.clone()))) } + + /// Returns the start position in the record **INCLUDING** + /// the deletion flag + fn position_in_record(&self) -> usize { + DELETION_FLAG_SIZE + + self.file.fields_info[..self.field_index.0] + .iter() + .map(|i| i.field_length as usize) + .sum::() + } } impl<'a, T> FieldRef<'a, T> @@ -114,21 +124,14 @@ where { /// Reads and returns the value pub fn read(&mut self) -> Result { - self.seek_to_beginning() - .map_err(|e| Error::new(e, self.record_index.0))?; - let field_info = &self.file.fields_info[self.field_index.0]; - let buffer = &mut self.file.field_data_buffer[..field_info.field_length as usize]; - self.file.inner.read(buffer).map_err(|e| { - Error::new( - FieldIOError::new(ErrorKind::IoError(e), Some(field_info.clone())), - self.record_index.0, - ) - })?; + let start_pos = self.position_in_record(); + let field_bytes = &mut self.file.record_data_buffer.get_mut() + [start_pos..start_pos + field_info.field_length as usize]; FieldValue::read_from::>, _>( - &buffer, + &field_bytes, &mut None, field_info, &self.file.encoding, @@ -170,14 +173,21 @@ where where ValueType: WritableAsDbaseField, { - self.seek_to_beginning() + self.file.file_position = self + .seek_to_beginning() .map_err(|e| Error::new(e, self.record_index.0))?; let field_info = &self.file.fields_info[self.field_index.0]; - let buffer = &mut self.file.field_data_buffer[..field_info.field_length as usize]; - buffer.fill(0); - let mut cursor = Cursor::new(buffer); + let start_pos = self.position_in_record(); + let field_bytes = &mut self.file.record_data_buffer.get_mut() + [start_pos..start_pos + field_info.field_length as usize]; + field_bytes.fill(0); + + // Note that since we modify the internal buffer, we don't need to re-read the + // record / buffer, meaning if a user writes then reads it should get correct + // value, and we did not re-read from file. + let mut cursor = Cursor::new(field_bytes); value .write_as(field_info, &self.file.encoding, &mut cursor) .map_err(|e| { @@ -188,6 +198,7 @@ where })?; let buffer = cursor.into_inner(); + self.file.inner.write_all(&buffer).map_err(|e| { Error::new( FieldIOError::new(ErrorKind::IoError(e), Some(field_info.clone())), @@ -195,6 +206,8 @@ where ) })?; + self.file.file_position += buffer.len() as u64; + Ok(()) } } @@ -250,7 +263,9 @@ where pub fn seek_before_deletion_flag(&mut self) -> Result { self.file .inner - .seek(SeekFrom::Start(self.position_in_source() - 1)) + .seek(SeekFrom::Start( + self.position_in_source() - DELETION_FLAG_SIZE as u64, + )) .map_err(|e| FieldIOError::new(ErrorKind::IoError(e), None)) } } @@ -264,13 +279,7 @@ where /// - true -> the record is marked as deleted /// - false -> the record is **not** marked as deleted pub fn is_deleted(&mut self) -> Result { - let deletion_flag_pos = self.position_in_source() - DELETION_FLAG_SIZE as u64; - self.file - .inner - .seek(SeekFrom::Start(deletion_flag_pos)) - .map_err(|error| Error::io_error(error, self.index.0))?; - - let deletion_flag = DeletionFlag::read_from(&mut self.file.inner) + let deletion_flag = DeletionFlag::read_from(&mut self.file.record_data_buffer) .map_err(|error| Error::io_error(error, self.index.0))?; Ok(deletion_flag == DeletionFlag::Deleted) @@ -284,11 +293,14 @@ where where R: ReadableRecord, { - self.seek_to_beginning() - .map_err(|error| Error::new(error, self.index.0))?; + // self.seek_to_beginning() + // .map_err(|error| Error::new(error, self.index.0))?; + self.file + .record_data_buffer + .set_position(DELETION_FLAG_SIZE as u64); let mut field_iterator = FieldIterator::<_, Cursor>> { - source: &mut self.file.inner, + source: &mut self.file.record_data_buffer, fields_info: self.file.fields_info.iter().peekable(), memo_reader: &mut None, field_data_buffer: &mut self.file.field_data_buffer, @@ -310,23 +322,36 @@ where where R: WritableRecord, { - self.seek_before_deletion_flag() - .map_err(|error| Error::new(error, self.index.0))?; + self.file.record_data_buffer.get_mut().fill(0); + self.file.record_data_buffer.get_mut()[0] = DeletionFlag::NotDeleted.to_byte(); + self.file.record_data_buffer.set_position(1); let mut field_writer = FieldWriter { - dst: &mut self.file.inner, + dst: &mut self.file.record_data_buffer, fields_info: self.file.fields_info.iter().peekable(), field_buffer: &mut Cursor::new(&mut self.file.field_data_buffer), encoding: &self.file.encoding, }; - field_writer - .write_deletion_flag() - .map_err(|error| Error::io_error(error, self.index.0))?; - record .write_using(&mut field_writer) - .map_err(|error| Error::new(error, self.index.0)) + .map_err(|error| Error::new(error, self.index.0))?; + + self.seek_before_deletion_flag() + .map_err(|error| Error::new(error, self.index.0))?; + + self.file + .inner + .write_all(self.file.record_data_buffer.get_ref()) + .map_err(|error| Error::io_error(error, self.index.0))?; + + // We don't need to update the file's inner position as we re-wrote the whole record + debug_assert_eq!( + self.file.file_position, + self.file.inner.seek(SeekFrom::Current(0)).unwrap() + ); + + Ok(()) } } @@ -336,20 +361,25 @@ pub struct FileRecordIterator<'a, T> { current_record: RecordIndex, } -impl<'a, T> FileRecordIterator<'a, T> { +impl<'a, T> FileRecordIterator<'a, T> +where + T: Seek + Read, +{ // To implement iterator we need the Iterator trait to make use of GATs // which is not the case, to iteration will have to use the while let Some() pattern pub fn next<'s>(&'s mut self) -> Option> { - if self.current_record.0 >= self.file.header.num_records as usize { - None - } else { - let r = RecordRef { - file: &mut self.file, - index: self.current_record, - }; - self.current_record.0 += 1; - Some(r) + let record_ref = self.file.record(self.current_record.0); + if let Some(_) = record_ref { + self.current_record.0 += 1 } + record_ref + // if self.current_record.0 >= self.file.header.num_records as usize { + // None + // } else { + // self.current_record.0 += 1; + // self.file.record(self.current_record.0) + // .expect("internal error tried to get out of bound record") + // } } } @@ -397,10 +427,12 @@ pub struct File { pub(crate) header: Header, pub(crate) fields_info: Vec, pub(crate) encoding: DynEncoding, + record_data_buffer: Cursor>, /// Non-Memo field length is stored on a u8, /// so fields cannot exceed 255 bytes field_data_buffer: [u8; 255], pub(crate) options: ReadingOptions, + file_position: u64, } impl File { @@ -467,14 +499,23 @@ impl File { let field_error = FieldIOError::new(UnsupportedCodePage(header.code_page_mark), None); Error::new(field_error, 0) })?; + + let record_size: usize = DELETION_FLAG_SIZE + + fields_info + .iter() + .map(|i| i.field_length as usize) + .sum::(); + let record_data_buffer = Cursor::new(vec![0u8; record_size]); Ok(Self { inner: source, // memo_reader: None, header, fields_info, encoding, + record_data_buffer, field_data_buffer: [0u8; 255], options: ReadingOptions::default(), + file_position: header.offset_to_first_record as u64, }) } @@ -485,10 +526,30 @@ impl File { if index >= self.header.num_records as usize { None } else { - Some(RecordRef { + let record_ref = RecordRef { + file: self, + index: RecordIndex(index), + }; + let pos = record_ref.position_in_source() - DELETION_FLAG_SIZE as u64; + drop(record_ref); + + if pos != self.file_position { + self.file_position = self + .inner + .seek(SeekFrom::Start(pos)) + .map_err(|e| FieldIOError::new(ErrorKind::IoError(e), None)) + .unwrap() + } + + self.inner + .read_exact(self.record_data_buffer.get_mut()) + .unwrap(); + self.file_position += self.record_data_buffer.get_mut().len() as u64; + let record_ref = RecordRef { file: self, index: RecordIndex(index), - }) + }; + Some(record_ref) } } @@ -506,14 +567,24 @@ impl File { impl File { pub fn create_new(mut dst: T, table_info: TableInfo) -> Result { write_header_parts(&mut dst, &table_info.header, &table_info.fields_info)?; - + let record_size: usize = DELETION_FLAG_SIZE + + table_info + .fields_info + .iter() + .map(|i| i.field_length as usize) + .sum::(); + let record_data_buffer = Cursor::new(vec![0u8; record_size]); + let file_position = table_info.header.offset_to_first_record as u64; + debug_assert_eq!(file_position, dst.stream_position().unwrap()); Ok(Self { inner: dst, header: table_info.header, fields_info: table_info.fields_info, encoding: table_info.encoding, + record_data_buffer, field_data_buffer: [0u8; 255], options: ReadingOptions::default(), + file_position, }) } diff --git a/src/lib.rs b/src/lib.rs index 802dd75..84a5ed3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -288,7 +288,7 @@ mod reading; mod record; mod writing; -pub use file::{FieldRef, File, RecordRef}; +pub use file::{FieldIndex, RecordIndex, FieldRef, File, RecordRef}; #[cfg(feature = "datafusion")] pub use crate::datafusion::{DbaseTable, DbaseTableFactory}; diff --git a/src/writing.rs b/src/writing.rs index 80defb0..6940198 100644 --- a/src/writing.rs +++ b/src/writing.rs @@ -252,7 +252,7 @@ impl TableWriterBuilder { let size_of_record = self .v .iter() - .fold(1u16, |s, info| s + info.field_length as u16); + .fold(0u16, |s, info| s + info.field_length as u16); self.hdr.offset_to_first_record = offset_to_first_record as u16; self.hdr.size_of_record = size_of_record;