From c8fe23b18af8c65303a84e4c780ae42c9fae0e38 Mon Sep 17 00:00:00 2001 From: tmontaigu Date: Tue, 24 Oct 2023 21:56:36 +0200 Subject: [PATCH] Fix off by one error in dbase::File dbase::File calculated the position of the record/field assuming the record size by using the header's info which includes the implicit deletion flag size, and dbase::File added the deletion flag size again resulting of bad readings. --- src/datafusion.rs | 11 ++++++++++- src/file.rs | 16 +++++++++------- src/header.rs | 6 ++---- tests/data/stations.dbf | Bin 6264 -> 87623 bytes tests/test_file.rs | 37 +++++++++++++++++++++++++------------ 5 files changed, 46 insertions(+), 24 deletions(-) diff --git a/src/datafusion.rs b/src/datafusion.rs index d862a56..b79d7aa 100644 --- a/src/datafusion.rs +++ b/src/datafusion.rs @@ -436,6 +436,9 @@ mod test { // +-----------------------+ // | Franconia-Springfield | // | Federal Center SW | + // | "Foggy Bottom GWU" | + // | "Farragut West" | + // | "Federal Triangle" | // +-----------------------+ // extract first (and only) RecordBatch from dataframe @@ -448,7 +451,13 @@ mod test { // ensure values match assert_eq!( result[0].column(0).as_ref(), - &StringArray::from(vec!["Franconia-Springfield", "Federal Center SW"]) + &StringArray::from(vec![ + "Franconia-Springfield", + "Federal Center SW", + "Foggy Bottom GWU", + "Farragut West", + "Federal Triangle" + ]) ); Ok(()) } diff --git a/src/file.rs b/src/file.rs index 6be6194..a0ba1c7 100644 --- a/src/file.rs +++ b/src/file.rs @@ -53,7 +53,7 @@ impl<'a, T> FieldRef<'a, T> { .map(|i| i.field_length as u64) .sum::(); - record_position + position_in_record + record_position + position_in_record + DELETION_FLAG_SIZE as u64 } } @@ -206,14 +206,16 @@ where pub fn seek_to_beginning(&mut self) -> Result { self.file .inner - .seek(SeekFrom::Start(self.position_in_source())) + .seek(SeekFrom::Start( + self.position_in_source() + DELETION_FLAG_SIZE as u64, + )) .map_err(|e| FieldIOError::new(ErrorKind::IoError(e), None)) } pub fn seek_before_deletion_flag(&mut self) -> Result { self.file .inner - .seek(SeekFrom::Start(self.position_in_source() - 1)) + .seek(SeekFrom::Start(self.position_in_source())) .map_err(|e| FieldIOError::new(ErrorKind::IoError(e), None)) } } @@ -227,7 +229,7 @@ where /// - true -> the record is marked as deleted /// - false -> the record is **not** marked as deleted pub fn is_deleted(&mut self) -> Result { - let deletion_flag_pos = self.position_in_source() - DELETION_FLAG_SIZE as u64; + let deletion_flag_pos = self.position_in_source(); self.file .inner .seek(SeekFrom::Start(deletion_flag_pos)) @@ -329,7 +331,7 @@ impl<'a, T> FileRecordIterator<'a, T> { /// # fn main() -> Result<(), Box> { /// let mut file = dbase::File::open_read_only("tests/data/stations.dbf")?; /// -/// assert_eq!(file.num_records(), 6); +/// assert_eq!(file.num_records(), 86); /// /// let name_idx = file.field_index("name").unwrap(); /// let marker_color_idx = file.field_index("marker-col").unwrap(); @@ -501,8 +503,8 @@ impl File { ); let end_of_last_record = self.header.offset_to_first_record as u64 - + self.num_records() as u64 - * (DELETION_FLAG_SIZE as u64 + self.header.size_of_record as u64); + + (self.num_records() as u64 * self.header.size_of_record as u64); + self.inner .seek(SeekFrom::Start(end_of_last_record)) .map_err(|error| Error::io_error(error, self.num_records()))?; diff --git a/src/header.rs b/src/header.rs index 3352bf4..efa13b8 100644 --- a/src/header.rs +++ b/src/header.rs @@ -4,7 +4,6 @@ use crate::encoding::DynEncoding; use std::io::{Read, Write}; use crate::field::types::Date; -use crate::field::DELETION_FLAG_SIZE; use crate::memo::MemoFileType; // Used this as source: https://blog.codetitans.pl/post/dbf-and-language-code-page/ @@ -412,9 +411,8 @@ impl Header { if index >= self.num_records as usize { None } else { - let offset = self.offset_to_first_record as usize - + (index * (self.size_of_record as usize + DELETION_FLAG_SIZE)) - + DELETION_FLAG_SIZE; + let offset = + self.offset_to_first_record as usize + (index * self.size_of_record as usize); Some(offset) } } diff --git a/tests/data/stations.dbf b/tests/data/stations.dbf index 0551986131513e65fd5d8b05c6622779b2d22953..2d13f6377f27e225d75a3d6d74b88028ce610c94 100644 GIT binary patch literal 87623 zcmeI5&u-&35XJ-Sp{Kq;PXv0{ld!!M?bT6Yr|mj5;v`0UDHw~Eh)`sfqU?Inm+52n zQgq~agE+}%vzuO`@6*MWEE^t)Kh2Qy@6{h)eRICIxA*tnKda>_1`;Z z)H+_8n9YORd+hUOT|fM}v;NWF|5}}EuMR@=YMP=kNnvf@eka>vfhF{fv{74^)h5Mo za|r0og=-CvxOYVzM(yXpTb)g>qV?s%8duysski$CpWyd)a6c~C&=e+Wr*h+yiE27~ z{PdoEO7IDO&xZ6<#re4{?A+?OR?~M+{Z4<*`*yMDjQ#!y^jVT%oBGkCP*~ zeGR1J?mo@Vg!4CpMYMD6vq3Ej`?+0Or)D~;v8h9}+P%qjEW-O9dV2{zuEs|P-*;+# zn)F7!-+e6feV?X2F8JVcd@<_f(bqKU$k?(ED(Hxzsm#6o`7EzVSb5e>gHrjj2!Y3H~h}VBQ|u#`r3%#DNcJ?u$FGOVfe5 z+q;5dC+Vz>b78f6MS%8>S_#neWfEsT;70Iq=lI~@54@g-CRq)LJy-RSEAW8(@eg$) z1vtDKcSXL?DsOe}JkIes$469plaNBK=ZF)0x!nXG*VG5ExhGNjg>7p!h?0=AMtk%T ze1eZQ=C|^mRG~2Hr)Cqq2e+=zBpqDv!IN~x)_c>0q#=!c0bhSlcU}(@u0O5EgT$ zAtE)@p{b7xKDeoWsxKvE_rf?eB9>6nl#@E5arRQ8fF$g}vf2Qtusb8N} zM%rdUCgaO3BlraWrIP#1|2+&+RX?dt@jvsIUgFmYKEZ#9tS}y zWXLWq_~0SCgQBpF`0+R3PsOJ%#=g;w#t7*l_yiwR;~yC9S0R8U=X|)?vZ~M0TcByV z;H9^Y%}V7uMoG&D4B#X9xEdcE{I}x54_ycku7ScWM42gWv_+9rrB$02)%E-N2 z7X$6>NoX3kM$F9X?ld(%H9i{gTX_f3N&Jp1=>L_Q+K8zSHQOin1RoUqgZ8SczmwZI zM~m0(G(JszdD=(F7Fy^@In3R@DnCIP`Pa9b;1hgM@Q?L! zt$ynY>StVv1{j^~rg#4b%|`VQ~eM~zR7k2mT!5>G3=%8tUS z)A?uKF0n2HH9ju*;2M7#+9WF9Z(8YKQ7_UGe1Z=*=&uDHMPvMhahM%72tL6F1^?7| zXVxjKd^UiJ*4^B1`tfnW2lwM|YVHkXIlqxay*AD(b$>Q-hHna0^!3WRBKWM#I zZ^NdY#@NJY7uGnKdOQT5;Deg_=T@q1YBkh0F7%(-XbffV6MTXX3jQz(`9)LND|KXS zS)~T)LU4|c3qJT9U;OyRT8*NV-$EjOD=Wb#_<+N{6Zs^}F9ADg5PZ1;EX1)L28b`G zE9)=1#+iu1GcDIq<3kR7rDl~)Mh&GJ2Vw{hO?`q73ck$lo2Bs~X44dPs_GDdCi?`R z;DdrcHieyc`S^GxcIA!}e1Z=O{=ShMou4wZD0;i^gZuOwbBE;IaC-(@{eAK&x#Md|Xo>9Q;Gw zB#G3&m7RB^4guZ}e1Z=!%pVf*+{V&+tEJ>#E4m*nONZbS{D-IjIM!9xZKRP-hLx7L zy|V_QL^(+qv-t7fyM0- ze1Z>w`F$Y=QcB-0O_Ocf=8jGM+TA~?w?{a~#|0mJjz81M#6=8{1$fvy4>&sy9Q>2u rwKUK;F}ic#&9oHS_N7GfsbigN_n8{lcqKmmUTS@s`nVb&dvWhSt#qCk delta 23 ecmX@UhV_R5H*+};BO3z)!$O818@Y2N7^MJIbOtQ| diff --git a/tests/test_file.rs b/tests/test_file.rs index 4ec0e26..3f84fa9 100644 --- a/tests/test_file.rs +++ b/tests/test_file.rs @@ -30,7 +30,7 @@ fn copy_to_named_tmp_file(origin: &str) -> std::io::Result Result<(), Box> { let mut file = dbase::File::open_read_only("tests/data/stations.dbf")?; - assert_eq!(file.num_records(), 6); + assert_eq!(file.num_records(), STATIONS_DBG_NUM_RECORDS); let name_idx = file.field_index("name").unwrap(); let marker_color_idx = file.field_index("marker-col").unwrap(); @@ -89,12 +89,14 @@ fn test_file_read_only() -> Result<(), Box> { Ok(()) } +const STATIONS_DBG_NUM_RECORDS: usize = 86; + #[test] fn test_file_read_write() -> Result<(), Box> { let tmp_file = copy_to_tmp_file("tests/data/stations.dbf")?; let mut file = dbase::File::open(tmp_file)?; - assert_eq!(file.num_records(), 6); + assert_eq!(file.num_records(), STATIONS_DBG_NUM_RECORDS); let name_idx = file.field_index("name").unwrap(); let marker_color_idx = file.field_index("marker-col").unwrap(); @@ -222,11 +224,11 @@ fn test_file_append_record() -> Result<(), Box> { { let mut file = dbase::File::open_read_write(tmp_file.path())?; - assert_eq!(file.num_records(), 6); + assert_eq!(file.num_records(), STATIONS_DBG_NUM_RECORDS); file.append_record(&new_record)?; - assert_eq!(file.num_records(), 7); - let record = file.record(6).unwrap().read()?; + assert_eq!(file.num_records(), STATIONS_DBG_NUM_RECORDS + 1); + let record = file.record(STATIONS_DBG_NUM_RECORDS).unwrap().read()?; assert_eq!(record, new_record); } @@ -234,8 +236,8 @@ fn test_file_append_record() -> Result<(), Box> { // Check that after closing the file, if we re-open it, // our appended record is still here let mut file = dbase::File::open_read_write(tmp_file.path())?; - assert_eq!(file.num_records(), 7); - let record = file.record(6).unwrap().read()?; + assert_eq!(file.num_records(), STATIONS_DBG_NUM_RECORDS + 1); + let record = file.record(STATIONS_DBG_NUM_RECORDS).unwrap().read()?; assert_eq!(record, new_record); } @@ -299,11 +301,25 @@ fn test_file_char_trimming() -> Result<(), Box> { } ); + let mut file = dbase::File::open_read_only("tests/data/stations.dbf")?; + let reading = dbase::ReadingOptions::default().character_trim(dbase::TrimOption::End); + file.set_options(reading); + + let expected_trim_end = StationRecord { + name: format!("{}", "Franconia-Springfield",), + marker_col: format!("{}", "#0000ff",), + marker_sym: format!("{}", "rail-metro",), + line: format!("{}", "blue",), + }; + + let record = file.record(1).unwrap().read_as::()?; + assert_eq!(record, expected_trim_end); + let mut file = dbase::File::open_read_only("tests/data/stations.dbf")?; let reading = dbase::ReadingOptions::default().character_trim(dbase::TrimOption::Begin); file.set_options(reading); - let expected = StationRecord { + let expected_trim_begin = StationRecord { name: format!( "{:width$}", "Franconia-Springfield", @@ -325,11 +341,8 @@ fn test_file_char_trimming() -> Result<(), Box> { width = file.fields()[0].length() as usize ), }; - let record = file.record(1).unwrap().read_as::()?; - - assert_eq!(record, expected); - + assert_eq!(record, expected_trim_begin); Ok(()) }