From 82e7b5798787c5dc8225ab31a00dcb9a657438d1 Mon Sep 17 00:00:00 2001 From: Rohan Vanheusden Date: Wed, 7 Jul 2021 18:21:10 -0700 Subject: [PATCH 1/4] Implement handling of mergeCells tag --- src/xlsx.rs | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/src/xlsx.rs b/src/xlsx.rs index f8b2fcd6..85b7aec1 100644 --- a/src/xlsx.rs +++ b/src/xlsx.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::collections::HashMap; use std::io::BufReader; use std::io::{Read, Seek}; +use std::iter; use std::str::FromStr; use log::warn; @@ -12,7 +13,7 @@ use zip::read::{ZipArchive, ZipFile}; use zip::result::ZipError; use crate::vba::VbaProject; -use crate::{Cell, CellErrorType, DataType, Metadata, Range, Reader, Table}; +use crate::{Cell, CellErrorType, CellType, DataType, Metadata, Range, Reader, Table}; type XlsReader<'a> = XmlReader>>; @@ -606,6 +607,7 @@ where { let mut cells = Vec::new(); let mut buf = Vec::new(); + let mut merge_cells = None; 'xml: loop { buf.clear(); match xml.read_event(&mut buf) { @@ -631,7 +633,16 @@ where } b"sheetData" => { read_data(&strings, &formats, &mut xml, &mut cells)?; - break; + } + b"mergeCells" => { + let merge_count: usize = std::str::from_utf8( + get_attribute(e.attributes(), b"count")? + .ok_or(XlsxError::XmlEof("count"))?, + ) + .unwrap_or("0") + .parse()?; + + merge_cells = Some(read_merge_cells(&mut xml, merge_count)?); } _ => (), } @@ -641,7 +652,14 @@ where _ => (), } } - Ok(Range::from_sparse(cells)) + + let mut range = Range::from_sparse(cells); + + if let Some(ref merge_cells) = merge_cells { + write_merge_cells(merge_cells, &mut range); + } + + Ok(range) } impl Reader for Xlsx { @@ -943,6 +961,47 @@ fn read_sheet_data( }) } +fn read_merge_cells( + xml: &mut XlsReader<'_>, + merge_count: usize, +) -> Result, XlsxError> { + let mut buf = Vec::new(); + let mut merge_dimensions = Vec::with_capacity(merge_count); + + loop { + buf.clear(); + + match xml.read_event(&mut buf) { + Ok(Event::Start(ref e)) if e.local_name() == b"mergeCell" => { + let merge_ref = + get_attribute(e.attributes(), b"ref")?.ok_or(XlsxError::XmlEof("ref"))?; + merge_dimensions.push(get_dimension(merge_ref)?); + } + Ok(Event::End(ref e)) if e.local_name() == b"mergeCells" => { + return Ok(merge_dimensions) + } + Ok(Event::Eof) => return Err(XlsxError::XmlEof("mergeCells")), + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } +} + +fn write_merge_cells(merge_cells: &[Dimensions], range: &mut Range) +where + T: CellType, +{ + for merge_cell in merge_cells { + let start = (merge_cell.start.0 as usize, merge_cell.start.1 as usize); + let end = (merge_cell.end.0 as usize, merge_cell.end.1 as usize); + let source_cell = range[start].clone(); + + for target in (start.0..=end.0).flat_map(|r| iter::repeat(r).zip(start.1..=end.1)) { + range[target].clone_from(&source_cell); + } + } +} + // This tries to detect number formats that are definitely date/time formats. // This is definitely not perfect! fn is_custom_date_format(format: &str) -> bool { From ff59f279b2cc4f7ac2abd9c1649028beb340499b Mon Sep 17 00:00:00 2001 From: Rohan Vanheusden Date: Mon, 12 Jul 2021 14:31:19 -0700 Subject: [PATCH 2/4] Use getter/setter methods rather than Index for writing merge cells --- src/xlsx.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/xlsx.rs b/src/xlsx.rs index 85b7aec1..c9bda04d 100644 --- a/src/xlsx.rs +++ b/src/xlsx.rs @@ -656,7 +656,7 @@ where let mut range = Range::from_sparse(cells); if let Some(ref merge_cells) = merge_cells { - write_merge_cells(merge_cells, &mut range); + write_merge_cells(merge_cells, &mut range)?; } Ok(range) @@ -987,19 +987,27 @@ fn read_merge_cells( } } -fn write_merge_cells(merge_cells: &[Dimensions], range: &mut Range) +fn write_merge_cells(merge_cells: &[Dimensions], range: &mut Range) -> Result<(), XlsxError> where T: CellType, { for merge_cell in merge_cells { - let start = (merge_cell.start.0 as usize, merge_cell.start.1 as usize); - let end = (merge_cell.end.0 as usize, merge_cell.end.1 as usize); - let source_cell = range[start].clone(); + let start = (merge_cell.start.0, merge_cell.start.1); + let end = (merge_cell.end.0, merge_cell.end.1); + let source_cell = range + .get_value(start) + .ok_or_else(|| { + XlsxError::Unexpected("expected start cell of merge range to be present") + })? + .clone(); for target in (start.0..=end.0).flat_map(|r| iter::repeat(r).zip(start.1..=end.1)) { - range[target].clone_from(&source_cell); + // range[target].clone_from(&source_cell); + range.set_value(target, source_cell.clone()); } } + + Ok(()) } // This tries to detect number formats that are definitely date/time formats. From 18a62636aeb353cfface6e80b93b21c91ed2a6be Mon Sep 17 00:00:00 2001 From: Rohan Vanheusden Date: Mon, 12 Jul 2021 14:32:31 -0700 Subject: [PATCH 3/4] Remove extraneous comment --- src/xlsx.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/xlsx.rs b/src/xlsx.rs index c9bda04d..dff3d7f9 100644 --- a/src/xlsx.rs +++ b/src/xlsx.rs @@ -1002,7 +1002,6 @@ where .clone(); for target in (start.0..=end.0).flat_map(|r| iter::repeat(r).zip(start.1..=end.1)) { - // range[target].clone_from(&source_cell); range.set_value(target, source_cell.clone()); } } From db0c882bf9def3aee296d0752ec7b0541f9eac36 Mon Sep 17 00:00:00 2001 From: Rohan Vanheusden Date: Mon, 12 Jul 2021 14:46:29 -0700 Subject: [PATCH 4/4] Remove redundancy in `start` and `end` declarations --- src/xlsx.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/xlsx.rs b/src/xlsx.rs index dff3d7f9..8bc5043c 100644 --- a/src/xlsx.rs +++ b/src/xlsx.rs @@ -992,8 +992,7 @@ where T: CellType, { for merge_cell in merge_cells { - let start = (merge_cell.start.0, merge_cell.start.1); - let end = (merge_cell.end.0, merge_cell.end.1); + let Dimensions { start, end } = *merge_cell; let source_cell = range .get_value(start) .ok_or_else(|| {