Skip to content

Commit

Permalink
Update SimpleStringDictionary to improve the performance when adding …
Browse files Browse the repository at this point in the history
…new strings. The string handles are stored in an insertion sorted array, so lookup is now O(log N).
  • Loading branch information
cesaref committed Oct 10, 2024
1 parent 39beafe commit 2573b63
Showing 1 changed file with 66 additions and 22 deletions.
88 changes: 66 additions & 22 deletions containers/choc_Value.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,25 +428,36 @@ class StringDictionary
//==============================================================================
/** A simple implementation of StringDictionary.
This should have good performance for typical-sized dictionaries.
Adding new strings will require O(n) time where n = dictionary size, but
Adding new strings will require O(log n) time where n = dictionary size, but
retrieving the string for a handle is fast with O(1).
*/
struct SimpleStringDictionary : public StringDictionary
{
SimpleStringDictionary() = default;
SimpleStringDictionary (const SimpleStringDictionary& other) : strings (other.strings) {}
SimpleStringDictionary (SimpleStringDictionary&& other) : strings (std::move (other.strings)) {}
SimpleStringDictionary& operator= (const SimpleStringDictionary& other) { strings = other.strings; return *this; }
SimpleStringDictionary& operator= (SimpleStringDictionary&& other) { strings = std::move (other.strings); return *this; }
SimpleStringDictionary (const SimpleStringDictionary& other) : strings (other.strings), stringMap (other.stringMap) {}
SimpleStringDictionary (SimpleStringDictionary&& other) : strings (std::move (other.strings)), stringMap (std::move (other.stringMap)) {}
SimpleStringDictionary& operator= (const SimpleStringDictionary& other) { strings = other.strings; stringMap = other.stringMap; return *this; }
SimpleStringDictionary& operator= (SimpleStringDictionary&& other) { strings = std::move (other.strings); stringMap = std::move (other.stringMap); return *this; }

Handle getHandleForString (std::string_view) override;
std::string_view getStringForHandle (Handle handle) const override;

bool empty() const { return strings.empty(); }
void clear();

size_t getRawDataSize() const { return strings.size(); }
const char* getRawData() const { return strings.data(); }

void setRawData (const void*, size_t);

private:
std::pair<std::vector<uint32_t>::const_iterator, bool> findGreaterThanOrEqual (std::string_view) const;

/// The strings are stored in a single chunk, which can be saved and
/// reloaded if necessary.
/// reloaded if necessary. The stringMap is a sorted vector of handles
/// supporting fast lookup of strings in the map
std::vector<char> strings;
std::vector<uint32_t> stringMap;
};

//==============================================================================
Expand Down Expand Up @@ -2924,11 +2935,10 @@ template <typename OutputStream> void Value::serialise (OutputStream& o) const
{
o.write (getRawData(), value.type.getValueDataSize());

if (auto stringDataSize = static_cast<uint32_t> (dictionary.strings.size()))
if (auto stringDataSize = static_cast<uint32_t> (dictionary.getRawDataSize()))
{
CHOC_ASSERT (dictionary.strings.back() == 0);
Type::SerialisationHelpers::writeVariableLengthInt (o, stringDataSize);
o.write (dictionary.strings.data(), stringDataSize);
o.write (dictionary.getRawData(), stringDataSize);
}
}
}
Expand All @@ -2953,9 +2963,10 @@ inline Value Value::deserialise (InputData& input)
{
auto stringDataSize = Type::SerialisationHelpers::readVariableLengthInt (input);
Type::SerialisationHelpers::expect (stringDataSize <= static_cast<uint32_t> (input.end - input.start));
v.dictionary.strings.resize (stringDataSize);
std::memcpy (v.dictionary.strings.data(), input.start, stringDataSize);
Type::SerialisationHelpers::expect (v.dictionary.strings.back() == 0);
v.dictionary.setRawData (input.start, stringDataSize);
// v.dictionary.strings.resize (stringDataSize);
// std::memcpy (v.dictionary.strings.data(), input.start, stringDataSize);
// Type::SerialisationHelpers::expect (v.dictionary.strings.back() == 0);
}

return v;
Expand Down Expand Up @@ -3213,20 +3224,20 @@ inline SimpleStringDictionary::Handle SimpleStringDictionary::getHandleForString
if (text.empty())
return {};

for (size_t i = 0; i < strings.size(); ++i)
{
std::string_view sv (strings.data() + i);

if (text == sv)
return { static_cast<decltype(Handle::handle)> (i + 1) };
auto i = findGreaterThanOrEqual (text);

i += sv.length();
}
if (i.second)
return { *i.first };

auto newHandle = static_cast<decltype(Handle::handle)> (strings.size() + 1);
strings.reserve (strings.size() + text.length() + 1);

if (strings.size() > 100 && (strings.capacity() < (strings.size() + text.length() + 1)))
strings.reserve (strings.size() + 1000);

strings.insert (strings.end(), text.begin(), text.end());
strings.push_back (0);

stringMap.insert (i.first, newHandle);
return { newHandle };
}

Expand All @@ -3241,8 +3252,41 @@ inline std::string_view SimpleStringDictionary::getStringForHandle (Handle handl
return std::string_view (strings.data() + (handle.handle - 1));
}

inline void SimpleStringDictionary::clear() { strings.clear(); }
inline void SimpleStringDictionary::clear() { strings.clear(); stringMap.clear(); }

inline void SimpleStringDictionary::setRawData (const void* p, size_t n)
{
strings.resize (n);
std::memcpy (strings.data(), p, n);

// Populate string map
for (size_t i = 0; i < strings.size(); ++i)
{
std::string_view sv (strings.data() + i);
auto v = findGreaterThanOrEqual (sv);
stringMap.insert (v.first, static_cast<uint32_t> (i));
i += sv.length();
}
}

inline std::pair<std::vector<uint32_t>::const_iterator, bool> SimpleStringDictionary::findGreaterThanOrEqual (std::string_view v) const
{
bool exactMatch = false;

auto it = std::lower_bound(stringMap.begin(), stringMap.end(), v, [&] (uint32_t i, std::string_view sv) -> bool
{
auto c = sv.compare (getStringForHandle ( { i }));

if (c == 0)
exactMatch = true;

return c > 0;
});

return std::pair (it, exactMatch);
}

} // namespace choc::value


#endif // CHOC_VALUE_POOL_HEADER_INCLUDED

0 comments on commit 2573b63

Please sign in to comment.