Skip to content

Commit 4c5724e

Browse files
committed
Fix bug mentionned in stephenberry#1445 (comment)
1 parent 4d4f63c commit 4c5724e

File tree

2 files changed

+117
-88
lines changed

2 files changed

+117
-88
lines changed

include/glaze/csv/read.hpp

+72-80
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ namespace glz
117117
}
118118
}
119119
};
120-
120+
121121
// CSV spec: https://www.ietf.org/rfc/rfc4180.txt
122122
// Quotes are escaped via double quotes
123123

@@ -151,43 +151,34 @@ namespace glz
151151
// Escaped quote
152152
value.push_back('"');
153153
++it;
154-
} else {
154+
}
155+
else {
155156
// Closing quote
156157
break;
157158
}
158-
} else {
159+
}
160+
else {
159161
value.push_back(*it);
160162
++it;
161163
}
162164
}
163165
// After closing quote, expect comma, newline, or end of input
164-
if (it != end) {
165-
if (*it == ',') {
166-
++it; // Skip the comma
167-
} else if (*it == '\n') {
168-
// End of record, do nothing
169-
} else if (it == end) {
170-
// End of input
171-
} else {
172-
// Invalid character after closing quote
173-
ctx.error = error_code::syntax_error;
174-
return;
175-
}
166+
if (it != end && *it != ',' && *it == '\n') {
167+
// Invalid character after closing quote
168+
ctx.error = error_code::syntax_error;
169+
return;
176170
}
177-
} else {
171+
}
172+
else {
178173
// Unquoted field
179174
while (it != end && *it != ',' && *it != '\n') {
180175
value.push_back(*it);
181176
++it;
182177
}
183-
if (it != end && *it == ',') {
184-
++it; // Skip the comma
185-
}
186178
}
187179
}
188180
};
189181

190-
191182
template <bool_t T>
192183
struct from<CSV, T>
193184
{
@@ -220,8 +211,7 @@ namespace glz
220211
template <char delim>
221212
inline void goto_delim(auto&& it, auto&& end) noexcept
222213
{
223-
while (++it != end && *it != delim)
224-
;
214+
while (++it != end && *it != delim);
225215
}
226216

227217
inline auto read_column_wise_keys(auto&& ctx, auto&& it, auto&& end)
@@ -469,87 +459,89 @@ namespace glz
469459
decode_hash_with_size<CSV, T, HashInfo, HashInfo.type>::op(key.data(), end, key.size());
470460

471461
if (index < N) [[likely]] {
472-
visit<N>([&]<size_t I>() {
473-
decltype(auto) member = [&]() -> decltype(auto) {
474-
if constexpr (reflectable<T>) {
475-
return get_member(value, get<I>(to_tuple(value)));
476-
}
477-
else {
478-
return get_member(value, get<I>(reflect<T>::values));
479-
}
480-
}();
481-
482-
using M = std::decay_t<decltype(member)>;
483-
if constexpr (fixed_array_value_t<M> && emplace_backable<M>) {
484-
size_t col = 0;
485-
while (it != end) {
486-
if (col < member.size()) [[likely]] {
487-
read<CSV>::op<Opts>(member[col][csv_index], ctx, it, end);
462+
visit<N>(
463+
[&]<size_t I>() {
464+
decltype(auto) member = [&]() -> decltype(auto) {
465+
if constexpr (reflectable<T>) {
466+
return get_member(value, get<I>(to_tuple(value)));
488467
}
489-
else [[unlikely]] {
490-
read<CSV>::op<Opts>(member.emplace_back()[csv_index], ctx, it, end);
468+
else {
469+
return get_member(value, get<I>(reflect<T>::values));
491470
}
471+
}();
472+
473+
using M = std::decay_t<decltype(member)>;
474+
if constexpr (fixed_array_value_t<M> && emplace_backable<M>) {
475+
size_t col = 0;
476+
while (it != end) {
477+
if (col < member.size()) [[likely]] {
478+
read<CSV>::op<Opts>(member[col][csv_index], ctx, it, end);
479+
}
480+
else [[unlikely]] {
481+
read<CSV>::op<Opts>(member.emplace_back()[csv_index], ctx, it, end);
482+
}
492483

493-
if (*it == '\r') {
494-
++it;
495-
if (*it == '\n') {
484+
if (*it == '\r') {
485+
++it;
486+
if (*it == '\n') {
487+
++it;
488+
break;
489+
}
490+
else [[unlikely]] {
491+
ctx.error = error_code::syntax_error;
492+
return;
493+
}
494+
}
495+
else if (*it == '\n') {
496496
++it;
497497
break;
498498
}
499+
else if (it == end) {
500+
return;
501+
}
502+
503+
if (*it == ',') [[likely]] {
504+
++it;
505+
}
499506
else [[unlikely]] {
500507
ctx.error = error_code::syntax_error;
501508
return;
502509
}
503-
}
504-
else if (*it == '\n') {
505-
++it;
506-
break;
507-
}
508-
else if (it == end) {
509-
return;
510-
}
511510

512-
if (*it == ',') [[likely]] {
513-
++it;
511+
++col;
514512
}
515-
else [[unlikely]] {
516-
ctx.error = error_code::syntax_error;
517-
return;
518-
}
519-
520-
++col;
521513
}
522-
}
523-
else {
524-
while (it != end) {
525-
read<CSV>::op<Opts>(member, ctx, it, end);
514+
else {
515+
while (it != end) {
516+
read<CSV>::op<Opts>(member, ctx, it, end);
526517

527-
if (*it == '\r') {
528-
++it;
529-
if (*it == '\n') {
518+
if (*it == '\r') {
519+
++it;
520+
if (*it == '\n') {
521+
++it;
522+
break;
523+
}
524+
else [[unlikely]] {
525+
ctx.error = error_code::syntax_error;
526+
return;
527+
}
528+
}
529+
else if (*it == '\n') {
530530
++it;
531531
break;
532532
}
533+
534+
if (*it == ',') [[likely]] {
535+
++it;
536+
}
533537
else [[unlikely]] {
534538
ctx.error = error_code::syntax_error;
535539
return;
536540
}
537541
}
538-
else if (*it == '\n') {
539-
++it;
540-
break;
541-
}
542-
543-
if (*it == ',') [[likely]] {
544-
++it;
545-
}
546-
else [[unlikely]] {
547-
ctx.error = error_code::syntax_error;
548-
return;
549-
}
550542
}
551-
}
552-
}, index);
543+
},
544+
index);
553545

554546
if (bool(ctx.error)) [[unlikely]] {
555547
return;

tests/csv_test/csv_test.cpp

+45-8
Original file line numberDiff line numberDiff line change
@@ -589,21 +589,58 @@ suite fish_record = [] {
589589
};
590590
};
591591

592-
struct CurrencyCSV {
593-
std::vector<std::string> Entity;
594-
std::vector<std::string> Currency;
595-
std::vector<std::string> AlphabeticCode;
596-
std::vector<std::string> NumericCode;
597-
std::vector<std::string> MinorUnit;
598-
std::vector<std::string> WithdrawalDate;
592+
struct CurrencyCSV
593+
{
594+
std::vector<std::string> Entity;
595+
std::vector<std::string> Currency;
596+
std::vector<std::string> AlphabeticCode;
597+
std::vector<std::string> NumericCode;
598+
std::vector<std::string> MinorUnit;
599+
std::vector<std::string> WithdrawalDate;
599600
};
600601

601602
suite currency_csv_test = [] {
602-
"currency"_test = [] {
603+
"currency_row"_test = [] {
604+
CurrencyCSV obj{};
605+
std::string buffer{};
606+
// auto ec = glz::read_file_csv(obj, GLZ_TEST_DIRECTORY "/currency.csv", buffer);
607+
// expect(not ec) << glz::format_error(ec, buffer) << '\n';
608+
};
609+
"currency_col"_test = [] {
603610
CurrencyCSV obj{};
604611
std::string buffer{};
605612
auto ec = glz::read_file_csv<glz::colwise>(obj, GLZ_TEST_DIRECTORY "/currency.csv", buffer);
606613
expect(not ec) << glz::format_error(ec, buffer) << '\n';
614+
615+
constexpr auto kExpectedSize = 445;
616+
617+
expect(obj.Entity.size() == kExpectedSize);
618+
expect(obj.Currency.size() == kExpectedSize);
619+
expect(obj.AlphabeticCode.size() == kExpectedSize);
620+
expect(obj.NumericCode.size() == kExpectedSize);
621+
expect(obj.MinorUnit.size() == kExpectedSize);
622+
expect(obj.WithdrawalDate.size() == kExpectedSize);
623+
624+
expect(obj.Entity[0] == "AFGHANISTAN");
625+
expect(obj.Currency[0] == "Afghani");
626+
expect(obj.AlphabeticCode[0] == "AFN");
627+
expect(obj.NumericCode[0] == "971");
628+
expect(obj.MinorUnit[0] == "2");
629+
expect(obj.WithdrawalDate[0] == "");
630+
631+
expect(obj.Entity[29] == "BONAIRE, SINT EUSTATIUS AND SABA");
632+
expect(obj.Currency[29] == "US Dollar");
633+
expect(obj.AlphabeticCode[29] == "USD");
634+
expect(obj.NumericCode[29] == "840");
635+
expect(obj.MinorUnit[29] == "2");
636+
expect(obj.WithdrawalDate[29] == "");
637+
638+
expect(obj.Entity[324] == "EUROPEAN MONETARY CO-OPERATION FUND (EMCF)");
639+
expect(obj.Currency[324] == "European Currency Unit (E.C.U)");
640+
expect(obj.AlphabeticCode[324] == "XEU");
641+
expect(obj.NumericCode[324] == "954");
642+
expect(obj.MinorUnit[324] == "");
643+
expect(obj.WithdrawalDate[324] == "1999-01");
607644
};
608645
};
609646

0 commit comments

Comments
 (0)