Skip to content

Commit

Permalink
Improve dbase::File performance
Browse files Browse the repository at this point in the history
  • Loading branch information
tmontaigu committed Nov 1, 2023
1 parent 7cf1ec9 commit 79be213
Show file tree
Hide file tree
Showing 7 changed files with 337 additions and 103 deletions.
87 changes: 81 additions & 6 deletions examples/print-content.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,88 @@ extern crate dbase;

fn main() {
let dbf_path = std::env::args().nth(1).expect("Path to file as first arg");
let mut reader = dbase::Reader::from_path(dbf_path).unwrap();
let mut reader = dbase::Reader::from_path(&dbf_path).unwrap();
println!("{}", reader.header().size_of_record);
// let mut records = reader.iter_records().collect::<Result<Vec<_>,_>>().unwrap();
//
//
// let r = records.clone();
//
// while records.len() < 500_000 {
// let mut tmp = r.clone();
// records.append(&mut tmp);
// }
//
// let mut writer = dbase::TableWriterBuilder::from_reader(reader).build_with_file_dest("lol.dbf").unwrap();
// writer.write_records(&records).unwrap();

for (i, record_result) in reader.iter_records().enumerate() {
println!("Record {}", i);
let record = record_result.unwrap();
for (name, value) in record {
println!("\tname: {}, value: {:?}", name, value);
let t = std::time::Instant::now();
let mut records = Vec::with_capacity(reader.header().num_records as usize);
for record in reader.iter_records() {
// let tt = std::time::Instant::now();
records.push(record.unwrap());
// println!("time to read one record: {:?}", tt.elapsed());
}
println!("Time to read via reader: {:?}", t.elapsed());

let mut file = dbase::File::open_read_only(&dbf_path).unwrap();
let t = std::time::Instant::now();
let mut records = Vec::with_capacity(file.num_records() * file.fields().len());
let num_fields = file.fields().len();
let num_records = file.num_records();
let mut iter = file.records();
// let mut c = 0u64;
loop {
// let tt = std::time::Instant::now();
let Some(mut record) = iter.next() else {
break;
};
// println!("time to read one record: {:?}", tt.elapsed());

for i in 0..2 {
records.push(record.field(dbase::FieldIndex(i)).unwrap().read());
}
}
println!("Time to read via file: {:?}", t.elapsed());

let mut file = dbase::File::open_read_only(dbf_path).unwrap();
let t = std::time::Instant::now();
let mut records = Vec::with_capacity(file.num_records());
let mut iter = file.records();
while let Some(mut record) = iter.next() {
records.push(record.read().unwrap());
}
println!("Time to read via file: {:?}", t.elapsed());

// for (i, record_result) in reader.iter_records().enumerate() {
// println!("Record {}", i);
// let record = record_result.unwrap();
// for (name, value) in record {
// println!("\tname: {}, value: {:?}", name, value);
// }
// }
}

//
// use std::fs::File;
// use std::io::{BufReader, SeekFrom};
// use std::io::prelude::*;
// use byteorder::WriteBytesExt;
//
// fn main() -> std::io::Result<()> {
// let mut file = File::options().read(true).write(true).truncate(false).open("foo.txt")?;
// let mut file_copy = BufReader::new(file.try_clone()?);
//
// let mut contents = vec![0u8; 3];
// file_copy.read_exact(&mut contents)?;
// println!("contents: {:?}", contents);
//
// // file.seek(SeekFrom::Start(2)).unwrap();
// file.write_u8(33).unwrap();
//
// // file_copy.seek(SeekFrom::Start(0)).unwrap();
// file_copy.read_exact(&mut contents).unwrap();
// println!("contents: {:?}", contents);
//
// Ok(())
// }
88 changes: 79 additions & 9 deletions src/field/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::convert::TryFrom;
use std::io::{Read, Write};
use std::ops::Index;
use std::slice::SliceIndex;

use byteorder::{ReadBytesExt, WriteBytesExt};

Expand Down Expand Up @@ -155,6 +157,67 @@ impl FieldInfo {
}
}

pub struct FieldsInfo {
pub(crate) inner: Vec<FieldInfo>,
}

impl FieldsInfo {
pub(crate) fn read_from<R: Read>(source: &mut R, num_fields: usize) -> Result<Self, ErrorKind> {
let mut fields_info = Vec::<FieldInfo>::with_capacity(num_fields as usize);
for _ in 0..num_fields {
let info = FieldInfo::read_from(source)?;
fields_info.push(info);
}

Ok(Self { inner: fields_info })
}

// TODO FieldIndex ?
pub(crate) fn field_position_in_record(&self, index: usize) -> Option<usize> {
self.inner
.get(..index)
.map(|slc| slc.iter().map(|i| i.field_length as usize).sum::<usize>())
}

pub(crate) fn size_of_all_fields(&self) -> usize {
self.inner
.iter()
.map(|i| i.field_length as usize)
.sum::<usize>()
}

pub(crate) fn at_least_one_field_is_memo(&self) -> bool {
self.inner
.iter()
.any(|f_info| f_info.field_type == FieldType::Memo)
}

pub fn len(&self) -> usize {
self.inner.len()
}

pub fn iter(&self) -> std::slice::Iter<'_, FieldInfo> {
self.inner.iter()
}
}

impl AsRef<[FieldInfo]> for FieldsInfo {
fn as_ref(&self) -> &[FieldInfo] {
&self.inner
}
}

impl<I> Index<I> for FieldsInfo
where
I: SliceIndex<[FieldInfo]>,
{
type Output = I::Output;

fn index(&self, index: I) -> &Self::Output {
&self.inner.as_slice()[index]
}
}

impl std::fmt::Display for FieldInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
Expand All @@ -172,21 +235,28 @@ pub(crate) enum DeletionFlag {
}

impl DeletionFlag {
pub(crate) fn read_from<T: Read>(source: &mut T) -> std::io::Result<Self> {
let byte = source.read_u8()?;
pub(crate) const fn to_byte(self) -> u8 {
match self {
Self::NotDeleted => 0x20,
Self::Deleted => 0x2A,
}
}

pub(crate) const fn from_byte(byte: u8) -> Self {
match byte {
0x20 => Ok(Self::NotDeleted),
0x2A => Ok(Self::Deleted),
0x20 => Self::NotDeleted,
0x2A => Self::Deleted,
// Silently consider other values as not deleted
_ => Ok(Self::NotDeleted),
_ => Self::NotDeleted,
}
}

pub(crate) fn read_from<T: Read>(source: &mut T) -> std::io::Result<Self> {
source.read_u8().map(Self::from_byte)
}

pub(crate) fn write_to<T: Write>(self, dst: &mut T) -> std::io::Result<()> {
match self {
Self::NotDeleted => dst.write_u8(0x20),
Self::Deleted => dst.write_u8(0x2A),
}
dst.write_u8(self.to_byte())
}
}
/// Flags describing a field
Expand Down
Loading

0 comments on commit 79be213

Please sign in to comment.