Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
tmontaigu committed Nov 1, 2023
1 parent e8a8aa3 commit 9d1bbb8
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 75 deletions.
2 changes: 1 addition & 1 deletion examples/print-content.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ fn main() {
};
// println!("time to read one record: {:?}", tt.elapsed());

for i in 0..num_fields {
for i in 0..2 {
records.push(record.field(dbase::FieldIndex(i)).unwrap().read());
}
}
Expand Down
81 changes: 72 additions & 9 deletions src/field/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::convert::TryFrom;
use std::io::{Read, Write};
use std::ops::Index;
use std::slice::SliceIndex;

use byteorder::{ReadBytesExt, WriteBytesExt};

Expand Down Expand Up @@ -155,6 +157,67 @@ impl FieldInfo {
}
}

pub struct FieldsInfo {
pub(crate) inner: Vec<FieldInfo>,
}

impl FieldsInfo {
pub(crate) fn read_from<R: Read>(source: &mut R, num_fields: usize) -> Result<Self, ErrorKind> {
let mut fields_info = Vec::<FieldInfo>::with_capacity(num_fields as usize);
for _ in 0..num_fields {
let info = FieldInfo::read_from(source)?;
fields_info.push(info);
}

Ok(Self { inner: fields_info })
}

// TODO FieldIndex ?
pub(crate) fn field_position_in_record(&self, index: usize) -> Option<usize> {
self.inner
.get(..index)
.map(|slc| slc.iter().map(|i| i.field_length as usize).sum::<usize>())
}

pub(crate) fn size_of_all_fields(&self) -> usize {
self.inner
.iter()
.map(|i| i.field_length as usize)
.sum::<usize>()
}

pub(crate) fn at_least_one_field_is_memo(&self) -> bool {
self.inner
.iter()
.any(|f_info| f_info.field_type == FieldType::Memo)
}

pub fn len(&self) -> usize {
self.inner.len()
}

pub fn iter(&self) -> std::slice::Iter<'_, FieldInfo> {
self.inner.iter()
}
}

impl AsRef<[FieldInfo]> for FieldsInfo {
fn as_ref(&self) -> &[FieldInfo] {
&self.inner
}
}

impl<I> Index<I> for FieldsInfo
where
I: SliceIndex<[FieldInfo]>,
{
type Output = I::Output;

fn index(&self, index: I) -> &Self::Output {
&self.inner.as_slice()[index]
}
}

impl std::fmt::Display for FieldInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
Expand All @@ -179,21 +242,21 @@ impl DeletionFlag {
}
}

pub(crate) fn read_from<T: Read>(source: &mut T) -> std::io::Result<Self> {
let byte = source.read_u8()?;
pub(crate) const fn from_byte(byte: u8) -> Self {
match byte {
0x20 => Ok(Self::NotDeleted),
0x2A => Ok(Self::Deleted),
0x20 => Self::NotDeleted,
0x2A => Self::Deleted,
// Silently consider other values as not deleted
_ => Ok(Self::NotDeleted),
_ => Self::NotDeleted,
}
}

pub(crate) fn read_from<T: Read>(source: &mut T) -> std::io::Result<Self> {
source.read_u8().map(Self::from_byte)
}

pub(crate) fn write_to<T: Write>(self, dst: &mut T) -> std::io::Result<()> {
match self {
Self::NotDeleted => dst.write_u8(0x20),
Self::Deleted => dst.write_u8(0x2A),
}
dst.write_u8(self.to_byte())
}
}
/// Flags describing a field
Expand Down
131 changes: 70 additions & 61 deletions src/file.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use crate::encoding::DynEncoding;
use crate::field::types::TrimOption;
use crate::field::{DeletionFlag, DELETION_FLAG_SIZE};
use crate::field::{DeletionFlag, FieldsInfo, DELETION_FLAG_SIZE};
use crate::header::Header;
use crate::memo::MemoReader;
use crate::reading::{ReadingOptions, BACKLINK_SIZE, TERMINATOR_VALUE};
use crate::writing::{write_header_parts, WritableAsDbaseField};
use crate::ErrorKind::UnsupportedCodePage;
Expand Down Expand Up @@ -85,37 +86,35 @@ impl<'a, T> FieldRef<'a, T> {
.header
.record_position(self.record_index.0)
.unwrap() as u64;
let position_in_record = self.file.fields_info[..self.field_index.0]
.iter()
.map(|i| i.field_length as u64)
.sum::<u64>();
let position_in_record = self.position_in_record();

record_position + position_in_record
record_position + position_in_record as u64
}

/// Returns the start position in the record **INCLUDING**
/// the deletion flag
fn position_in_record(&self) -> usize {
DELETION_FLAG_SIZE
+ self
.file
.fields_info
.field_position_in_record(self.field_index.0)
.expect("internal error, invalid field index in FieldRef")
}
}

impl<'a, T> FieldRef<'a, T>
where
T: Seek,
{
pub(crate) fn seek_to_beginning(&mut self) -> Result<u64, FieldIOError> {
fn seek_to_beginning(&mut self) -> Result<u64, FieldIOError> {
let field_info = &self.file.fields_info[self.field_index.0];

self.file
.inner
.seek(SeekFrom::Start(self.position_in_source()))
.map_err(|e| FieldIOError::new(ErrorKind::IoError(e), Some(field_info.clone())))
}

/// Returns the start position in the record **INCLUDING**
/// the deletion flag
fn position_in_record(&self) -> usize {
DELETION_FLAG_SIZE
+ self.file.fields_info[..self.field_index.0]
.iter()
.map(|i| i.field_length as usize)
.sum::<usize>()
}
}

impl<'a, T> FieldRef<'a, T>
Expand All @@ -130,9 +129,9 @@ where
let field_bytes = &mut self.file.record_data_buffer.get_mut()
[start_pos..start_pos + field_info.field_length as usize];

FieldValue::read_from::<Cursor<Vec<u8>>, _>(
FieldValue::read_from(
&field_bytes,
&mut None,
&mut self.file.memo_reader,
field_info,
&self.file.encoding,
TrimOption::BeginEnd,
Expand Down Expand Up @@ -253,14 +252,7 @@ impl<'a, T> RecordRef<'a, T>
where
T: Seek,
{
pub fn seek_to_beginning(&mut self) -> Result<u64, FieldIOError> {
self.file
.inner
.seek(SeekFrom::Start(self.position_in_source()))
.map_err(|e| FieldIOError::new(ErrorKind::IoError(e), None))
}

pub fn seek_before_deletion_flag(&mut self) -> Result<u64, FieldIOError> {
fn seek_before_deletion_flag(&mut self) -> Result<u64, FieldIOError> {
self.file
.inner
.seek(SeekFrom::Start(
Expand All @@ -278,31 +270,29 @@ where
///
/// - true -> the record is marked as deleted
/// - false -> the record is **not** marked as deleted
pub fn is_deleted(&mut self) -> Result<bool, Error> {
let deletion_flag = DeletionFlag::read_from(&mut self.file.record_data_buffer)
.map_err(|error| Error::io_error(error, self.index.0))?;
pub fn is_deleted(&self) -> Result<bool, Error> {
let deletion_flag = DeletionFlag::from_byte(self.file.record_data_buffer.get_ref()[0]);

Ok(deletion_flag == DeletionFlag::Deleted)
}

/// Reads the record
pub fn read(&mut self) -> Result<crate::Record, Error> {
self.read_as()
}

/// Reads the record as the given type
pub fn read_as<R>(&mut self) -> Result<R, Error>
where
R: ReadableRecord,
{
// self.seek_to_beginning()
// .map_err(|error| Error::new(error, self.index.0))?;

self.file
.record_data_buffer
.set_position(DELETION_FLAG_SIZE as u64);
let mut field_iterator = FieldIterator::<_, Cursor<Vec<u8>>> {
let mut field_iterator = FieldIterator {
source: &mut self.file.record_data_buffer,
fields_info: self.file.fields_info.iter().peekable(),
memo_reader: &mut None,
memo_reader: &mut self.file.memo_reader,
field_data_buffer: &mut self.file.field_data_buffer,
encoding: &self.file.encoding,
options: self.file.options,
Expand Down Expand Up @@ -373,13 +363,6 @@ where
self.current_record.0 += 1
}
record_ref
// if self.current_record.0 >= self.file.header.num_records as usize {
// None
// } else {
// self.current_record.0 += 1;
// self.file.record(self.current_record.0)
// .expect("internal error tried to get out of bound record")
// }
}
}

Expand Down Expand Up @@ -424,24 +407,30 @@ where
/// ```
pub struct File<T> {
pub(crate) inner: T,
memo_reader: Option<MemoReader<T>>,
pub(crate) header: Header,
pub(crate) fields_info: Vec<FieldInfo>,
pub(crate) fields_info: FieldsInfo,
pub(crate) encoding: DynEncoding,
/// Buffer that contains a whole record worth of data
/// It also contains the deletion flag
record_data_buffer: Cursor<Vec<u8>>,
/// Non-Memo field length is stored on a u8,
/// so fields cannot exceed 255 bytes
field_data_buffer: [u8; 255],
pub(crate) options: ReadingOptions,
/// We track the position in the file
/// to avoid calling `seek` when we are reading buffer
/// in order (0, 1, 2, etc)
file_position: u64,
}

impl<T> File<T> {
/// Returns the information about fields present in the records
pub fn fields(&self) -> &[FieldInfo] {
self.fields_info.as_slice()
self.fields_info.as_ref()
}

/// Returns the field infex that corresponds to the given name
/// Returns the field index that corresponds to the given name
pub fn field_index(&self, name: &str) -> Option<FieldIndex> {
self.fields_info
.iter()
Expand All @@ -460,7 +449,7 @@ impl<T> File<T> {
}

impl<T: Read + Seek> File<T> {
/// creates of File using source as the storate space.
/// creates of File using source as the storage space.
pub fn open(mut source: T) -> Result<Self, Error> {
let header = Header::read_from(&mut source).map_err(|error| Error::io_error(error, 0))?;

Expand All @@ -475,15 +464,12 @@ impl<T: Read + Seek> File<T> {
let num_fields =
(offset as usize - Header::SIZE - std::mem::size_of::<u8>()) / FieldInfo::SIZE;

let mut fields_info = Vec::<FieldInfo>::with_capacity(num_fields as usize);
for _ in 0..num_fields {
let info = FieldInfo::read_from(&mut source).map_err(|error| Error {
let fields_info =
FieldsInfo::read_from(&mut source, num_fields).map_err(|error| Error {
record_num: 0,
field: None,
kind: error,
})?;
fields_info.push(info);
}

let terminator = source
.read_u8()
Expand All @@ -500,15 +486,13 @@ impl<T: Read + Seek> File<T> {
Error::new(field_error, 0)
})?;

let record_size: usize = DELETION_FLAG_SIZE
+ fields_info
.iter()
.map(|i| i.field_length as usize)
.sum::<usize>();
let record_size: usize = DELETION_FLAG_SIZE + fields_info.size_of_all_fields();
let record_data_buffer = Cursor::new(vec![0u8; record_size]);
// debug_assert_eq!(record_size - DELETION_FLAG_SIZE, header.size_of_record as usize);

Ok(Self {
inner: source,
// memo_reader: None,
memo_reader: None,
header,
fields_info,
encoding,
Expand Down Expand Up @@ -578,8 +562,11 @@ impl<T: Write + Seek> File<T> {
debug_assert_eq!(file_position, dst.stream_position().unwrap());
Ok(Self {
inner: dst,
memo_reader: None,
header: table_info.header,
fields_info: table_info.fields_info,
fields_info: FieldsInfo {
inner: table_info.fields_info,
},
encoding: table_info.encoding,
record_data_buffer,
field_data_buffer: [0u8; 255],
Expand Down Expand Up @@ -664,9 +651,31 @@ impl File<BufReadWriteFile> {

/// Opens an existing dBase file in read only mode
pub fn open_read_only<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
let file = std::fs::File::open(path).map_err(|error| Error::io_error(error, 0))?;

File::open(BufReadWriteFile::new(file).unwrap())
let file = std::fs::File::open(path.as_ref()).map_err(|error| Error::io_error(error, 0))?;

let mut file = File::open(BufReadWriteFile::new(file).unwrap())?;
if file.fields_info.at_least_one_field_is_memo() {
let p = path.as_ref();
let memo_type = file.header.file_type.supported_memo_type();
if let Some(mt) = memo_type {
let memo_path = p.with_extension(mt.extension());

let memo_file = std::fs::File::open(memo_path).map_err(|error| Error {
record_num: 0,
field: None,
kind: ErrorKind::ErrorOpeningMemoFile(error),
})?;

let memo_reader =
BufReadWriteFile::new(memo_file)
.and_then(|memo_file| {
MemoReader::new(mt, memo_file)
}).map_err(|error| Error::io_error(error, 0))?;

file.memo_reader = Some(memo_reader);
}
}
Ok(file)
}

/// Opens an existing dBase file in write only mode
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ mod reading;
mod record;
mod writing;

pub use file::{FieldIndex, RecordIndex, FieldRef, File, RecordRef};
pub use file::{FieldIndex, FieldRef, File, RecordIndex, RecordRef};

#[cfg(feature = "datafusion")]
pub use crate::datafusion::{DbaseTable, DbaseTableFactory};
Expand Down
Loading

0 comments on commit 9d1bbb8

Please sign in to comment.