Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support inline formatted strings #93

Merged
merged 3 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tidyxl
Title: Read Untidy Excel Files
Version: 1.0.9
Version: 1.0.10
Authors@R: c(
person("Duncan", "Garmonsway", email = "[email protected]", role = c("aut", "cre")),
person("Hadley", "Wickham", role = c("ctb"), comment = "Author of included readxl fragments"),
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# tidyxl 1.0.10

* Fixed a bug in the support for formatted strings, which sometimes weren't
being imported (#92)

# tidyxl 1.0.9

* CRAN fixes
Expand Down
9 changes: 6 additions & 3 deletions src/xlsxcell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,12 @@ void xlsxcell::cacheValue(
) {
// 'v' for 'value' is either literal (numeric) or an index into a string table
rapidxml::xml_node<>* v = cell->first_node("v");
rapidxml::xml_node<>* is = cell->first_node("is");

std::string vvalue;
if (v != NULL) {
vvalue = v->value();
SET_STRING_ELT(book.content_, i, Rf_mkCharCE(vvalue.c_str(), CE_UTF8));
} else {
book.is_blank_[i] = true;
}

// 't' for 'type' defines the meaning of 'v' for value
Expand All @@ -100,15 +100,18 @@ void xlsxcell::cacheValue(

if (t != NULL && tvalue == "inlineStr") {
book.data_type_[i] = "character";
rapidxml::xml_node<>* is = cell->first_node("is");
if (is != NULL) { // Get the inline string if it's really there
// Parse it as though it's a simple string
std::string inlineString;
parseString(is, inlineString); // value is modified in place
// Also parse it as though it's a formatted string
SET_STRING_ELT(book.character_, i, Rf_mkCharCE(inlineString.c_str(), CE_UTF8));
book.character_formatted_[i] = parseFormattedString(is, book.styles_);
}
return;
} else if (v == NULL) {
// Can't now be an inline string (tested above)
book.is_blank_[i] = true;
book.data_type_[i] = "blank";
return;
} else if (t == NULL || tvalue == "n") {
Expand Down
Binary file added tests/testthat/inline-formatted-string.xlsx
Binary file not shown.
10 changes: 10 additions & 0 deletions tests/testthat/test-inline-strings.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ test_that("can read sheets with inlineStr", {
expect_equal(x[14], "RQ11610")
})

test_that("can read sheets with formatted inline strings", {
# Original source: http://our.componentone.com/wp-content/uploads/2011/12/TestExcel.xlsx
# These appear to come from LibreOffice 4.2.7.2.
cells <- xlsx_cells("inline-formatted-string.xlsx")
index <- 64
expect_equal(cells$is_blank[index], FALSE)
expect_equal(cells$data_type[index], "character")
expect_failure(expect_equal(cells$character_formatted[index], NULL))
})

test_that("does not crash on phonetic strings", {
# https://github.com/nacnudus/tidyxl/issues/30
expect_error(xlsx_cells("phonetic.xlsx"), NA)
Expand Down