From 0dd6fca51ee0611f4ffcbecab18593a596902ef1 Mon Sep 17 00:00:00 2001 From: Matthias Koefferlein Date: Sat, 20 Jul 2024 22:35:54 +0200 Subject: [PATCH] Implemented a fix for #1794 (giant meta info produces invalid GDS) This fix solves two problems: * Too large meta info data * Too many meta info entries The first problem is fixed by splitting the strings that serialize the meta info. The second problem is fixed by introducing prefixed strings that indicate the attribute index within the string, not inside the PROPATTR record. The solution is backward compatible, although old versions will not read all meta info and skip entries that exceed the GDS capacity. Caveat: the produced GDS files may contain duplicate PROPATTR keys. This is not strictly illegal, but some third-party processors may drop such entries. --- .../gds2/db_plugin/dbGDS2ReaderBase.cc | 53 ++++++++- .../gds2/db_plugin/dbGDS2WriterBase.cc | 68 ++++++++--- .../gds2/db_plugin/dbGDS2WriterBase.h | 1 + .../gds2/unit_tests/dbGDS2WriterTests.cc | 109 +++++++++++++++++- 4 files changed, 210 insertions(+), 21 deletions(-) diff --git a/src/plugins/streamers/gds2/db_plugin/dbGDS2ReaderBase.cc b/src/plugins/streamers/gds2/db_plugin/dbGDS2ReaderBase.cc index 6435fd011d..2edd16847d 100644 --- a/src/plugins/streamers/gds2/db_plugin/dbGDS2ReaderBase.cc +++ b/src/plugins/streamers/gds2/db_plugin/dbGDS2ReaderBase.cc @@ -461,6 +461,7 @@ GDS2ReaderBase::read_context_info_cell () if (valid_hook) { std::vector &strings = m_context_info.insert (std::make_pair (cn, std::vector ())).first->second; + std::map > strings_ex; size_t attr = 0; @@ -474,16 +475,60 @@ GDS2ReaderBase::read_context_info_cell () attr = size_t (get_ushort ()); } else if (rec_id == sPROPVALUE) { - if (strings.size () <= attr) { - strings.resize (attr + 1, std::string ()); + const char *str = get_string (); + + // To embed long strings and more than 64k attributes, a separate notation is used: + // "#,

:" + // where is a string index and

is the part index (zero-based). + // For such properties, the PROPATTR value is ignored. This means however, that the + // attribute numbers may not be unique. + // See issue #1794. + + if (str[0] == '#') { + + tl::Extractor ex (str + 1); + size_t n = 0, p = 0; + if (ex.try_read (n) && ex.test (",") && ex.try_read (p) && ex.test (":")) { + if (strings.size () <= n) { + strings.resize (n + 1, std::string ()); + } + std::vector &sv = strings_ex[n]; + if (sv.size () <= p) { + sv.resize (p + 1, std::string ()); + } + sv[p] = ex.get (); + } + + } else { + + if (strings.size () <= attr) { + strings.resize (attr + 1, std::string ()); + } + strings [attr] = str; + } - strings [attr] = get_string (); } else { error (tl::to_string (tr ("ENDEL, PROPATTR or PROPVALUE record expected"))); } - } + } + + // combine the multipart strings (#1794) + for (auto es = strings_ex.begin (); es != strings_ex.end (); ++es) { + if (es->first < strings.size ()) { + std::string &s = strings [es->first]; + s.clear (); + size_t sz = 0; + for (auto i = es->second.begin (); i != es->second.end (); ++i) { + sz += i->size (); + } + s.reserve (sz); + for (auto i = es->second.begin (); i != es->second.end (); ++i) { + s += *i; + } + } + } } else { error (tl::to_string (tr ("Invalid record inside a context info cell"))); diff --git a/src/plugins/streamers/gds2/db_plugin/dbGDS2WriterBase.cc b/src/plugins/streamers/gds2/db_plugin/dbGDS2WriterBase.cc index 2b49a14918..9f1552bc6b 100644 --- a/src/plugins/streamers/gds2/db_plugin/dbGDS2WriterBase.cc +++ b/src/plugins/streamers/gds2/db_plugin/dbGDS2WriterBase.cc @@ -72,6 +72,54 @@ inline int scale (double sf, int value) } } +void +GDS2WriterBase::write_context_string (size_t n, const std::string &s) +{ + // max. size for GDS strings used as payload carrier + size_t chunk_size = 32000; + short max_short = std::numeric_limits::max (); + + if (n > size_t (max_short) || s.size () > chunk_size) { + + // Split strings and use a separate notation: "#,<+p>:..." for the partial + // strings. n is the string index and p the part index (zero based). + // The property number is not defined in that case. There may be properties with + // the same number. See issue #1794. + + size_t nchunks = (s.size () + (chunk_size - 1)) / chunk_size; + while (nchunks > 0) { + + --nchunks; + + std::string partial; + partial.reserve (chunk_size + 100); // approx. + partial += "#"; + partial += tl::to_string (n); + partial += ","; + partial += tl::to_string (nchunks); + partial += ":"; + size_t pos = nchunks * chunk_size; + partial += std::string (s, pos, std::min (s.size (), pos + chunk_size) - pos); + + write_record_size (6); + write_record (sPROPATTR); + write_short (n <= size_t (max_short) ? short (n) : max_short); + + write_string_record (sPROPVALUE, partial); + + } + + } else { + + write_record_size (6); + write_record (sPROPATTR); + write_short (short (n)); + + write_string_record (sPROPVALUE, s); + + } +} + void GDS2WriterBase::write_context_cell (db::Layout &layout, const short *time_data, const std::vector &cells) { @@ -112,15 +160,9 @@ GDS2WriterBase::write_context_cell (db::Layout &layout, const short *time_data, // Hint: write in the reverse order since this way, the reader is more efficient (it knows how many strings // will arrive) for (std::vector ::const_iterator s = context_prop_strings.end (); s != context_prop_strings.begin (); ) { - --s; - - write_record_size (6); - write_record (sPROPATTR); - write_short (short (std::distance (std::vector ::const_iterator (context_prop_strings.begin ()), s))); // = user string - - write_string_record (sPROPVALUE, *s); - + size_t n = std::distance (std::vector ::const_iterator (context_prop_strings.begin ()), s); + write_context_string (n, *s); } } @@ -151,15 +193,9 @@ GDS2WriterBase::write_context_cell (db::Layout &layout, const short *time_data, // Hint: write in the reverse order since this way, the reader is more efficient (it knows how many strings // will arrive) for (std::vector ::const_iterator s = context_prop_strings.end (); s != context_prop_strings.begin (); ) { - --s; - - write_record_size (6); - write_record (sPROPATTR); - write_short (short (std::distance (std::vector ::const_iterator (context_prop_strings.begin ()), s))); // = user string - - write_string_record (sPROPVALUE, *s); - + size_t n = std::distance (std::vector ::const_iterator (context_prop_strings.begin ()), s); + write_context_string (n, *s); } } diff --git a/src/plugins/streamers/gds2/db_plugin/dbGDS2WriterBase.h b/src/plugins/streamers/gds2/db_plugin/dbGDS2WriterBase.h index a70f3e6266..0d058e7fe2 100644 --- a/src/plugins/streamers/gds2/db_plugin/dbGDS2WriterBase.h +++ b/src/plugins/streamers/gds2/db_plugin/dbGDS2WriterBase.h @@ -169,6 +169,7 @@ class DB_PLUGIN_PUBLIC GDS2WriterBase void write_properties (const db::Layout &layout, db::properties_id_type prop_id); void write_context_cell (db::Layout &layout, const short *time_data, const std::vector &cells); + void write_context_string (size_t n, const std::string &s); }; } // namespace db diff --git a/src/plugins/streamers/gds2/unit_tests/dbGDS2WriterTests.cc b/src/plugins/streamers/gds2/unit_tests/dbGDS2WriterTests.cc index a840ef810d..4fce6a91e5 100644 --- a/src/plugins/streamers/gds2/unit_tests/dbGDS2WriterTests.cc +++ b/src/plugins/streamers/gds2/unit_tests/dbGDS2WriterTests.cc @@ -1196,7 +1196,7 @@ TEST(121) } // Meta info -TEST(130a) +TEST(130) { db::Layout layout_org; @@ -1312,6 +1312,113 @@ TEST(130a) EXPECT_EQ (layout_read.meta_info ("b").value.to_string (), "nil"); } +// Giant meta info (issue #1794) +TEST(131) +{ + db::Layout layout_org; + + layout_org.add_cell ("U"); + db::cell_index_type ci = layout_org.add_cell ("X"); + + std::vector ll1; + std::vector ll2; + + for (unsigned int i = 0; i < 100000; ++i) { + ll1.push_back (tl::Variant (i)); + ll2.push_back ("C" + tl::to_string (i)); + } + + layout_org.add_meta_info ("a", db::MetaInfo ("", ll1, true)); + layout_org.add_meta_info ("b", db::MetaInfo ("", "value", true)); + + layout_org.add_meta_info (ci, "a", db::MetaInfo ("", ll2, true)); + layout_org.add_meta_info (ci, "c", db::MetaInfo ("", -1, true)); + + std::string tmp_file = tl::TestBase::tmp_file ("tmp_GDS2Writer_131.gds"); + + { + tl::OutputStream out (tmp_file); + db::SaveLayoutOptions options; + db::Writer writer (options); + writer.write (layout_org, out); + } + + db::Layout layout_read; + + { + tl::InputStream in (tmp_file); + db::Reader reader (in); + reader.read (layout_read); + } + + EXPECT_EQ (layout_read.has_meta_info ("x"), false); + EXPECT_EQ (layout_read.has_meta_info ("a"), true); + EXPECT_EQ (layout_read.meta_info ("x").value.to_string (), "nil"); + EXPECT_EQ (layout_read.meta_info ("a").value == ll1, true); + EXPECT_EQ (layout_read.has_meta_info ("b"), true); + EXPECT_EQ (layout_read.meta_info ("b").value.to_string (), "value"); + + db::cell_index_type ci2 = layout_read.cell_by_name ("X").second; + + EXPECT_EQ (layout_read.meta_info (ci2, "x").value.to_string (), "nil"); + EXPECT_EQ (layout_read.meta_info (ci2, "a").value == ll2, true); + EXPECT_EQ (layout_read.meta_info (ci2, "c").value.to_string (), "-1"); +} + +// Many meta info (issue #1794) +TEST(132) +{ + db::Layout layout_org; + + layout_org.add_cell ("U"); + db::cell_index_type ci = layout_org.add_cell ("X"); + + for (unsigned int i = 0; i < 100000; ++i) { + layout_org.add_meta_info ("a" + tl::to_string (i), db::MetaInfo ("", i, true)); + } + layout_org.add_meta_info ("b", db::MetaInfo ("", "value", true)); + + for (unsigned int i = 0; i < 100000; ++i) { + layout_org.add_meta_info (ci, "a" + tl::to_string (i * 2), db::MetaInfo ("", i * 2, true)); + } + layout_org.add_meta_info (ci, "c", db::MetaInfo ("", -1, true)); + + std::string tmp_file = tl::TestBase::tmp_file ("tmp_GDS2Writer_132.gds"); + + { + tl::OutputStream out (tmp_file); + db::SaveLayoutOptions options; + db::Writer writer (options); + writer.write (layout_org, out); + } + + db::Layout layout_read; + + { + tl::InputStream in (tmp_file); + db::Reader reader (in); + reader.read (layout_read); + } + + EXPECT_EQ (layout_read.has_meta_info ("x"), false); + EXPECT_EQ (layout_read.meta_info ("x").value.to_string (), "nil"); + for (unsigned int i = 0; i < 10; ++i) { + EXPECT_EQ (layout_read.has_meta_info ("a" + tl::to_string (i)), true); + EXPECT_EQ (layout_read.meta_info ("a" + tl::to_string (i)).value.to_string (), tl::Variant (i).to_string ()); + } + EXPECT_EQ (layout_read.has_meta_info ("b"), true); + EXPECT_EQ (layout_read.meta_info ("b").value.to_string (), "value"); + + db::cell_index_type ci2 = layout_read.cell_by_name ("X").second; + + EXPECT_EQ (layout_read.meta_info (ci2, "x").value.to_string (), "nil"); + for (unsigned int i = 0; i < 10; ++i) { + EXPECT_EQ (layout_read.has_meta_info (ci2, "a" + tl::to_string (i * 2)), true); + EXPECT_EQ (layout_read.meta_info (ci2, "a" + tl::to_string (i * 2)).value.to_string (), tl::Variant (i * 2).to_string ()); + } + EXPECT_EQ (layout_read.meta_info (ci2, "c").value.to_string (), "-1"); +} + // Extreme fracturing by max. points TEST(166) {