Skip to content

Commit

Permalink
20457: Fixes bug where utf-8 spaces and certain language characters c…
Browse files Browse the repository at this point in the history
…ould cause labels not be stored and loaded properly (#145)
  • Loading branch information
howsohazard authored Jun 3, 2024
1 parent 526d6de commit a10ea21
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 15 deletions.
22 changes: 15 additions & 7 deletions src/Amalgam/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,19 @@ void Parser::SkipToEndOfIdentifier(bool allow_leading_label_marks)
//eat all characters until one that indicates end of identifier
while(pos < code->size())
{
if(StringManipulation::IsUtf8Whitespace(*code, pos))
break;

auto cur_char = (*code)[pos];
if(cur_char == '\t' || cur_char == '\n' || cur_char == '\v' || cur_char == '\f'
|| cur_char == '\r' || cur_char == ' '
|| cur_char == '#'

if(cur_char == '\\' && pos + 1 < code->size())
{
pos += 2;
continue;
}

//check language characters
if(cur_char == '#'
|| cur_char == '(' || cur_char == ')'
|| cur_char == ';')
break;
Expand Down Expand Up @@ -653,11 +662,10 @@ void Parser::AppendComments(EvaluableNode *n, size_t indentation_depth, bool pre
//if the string contains a character that needs to be escaped for labels, then will convert
std::string ConvertLabelToQuotedStringIfNecessary(const std::string &s)
{
bool needs_escape = false;
if(s.empty())
return s;

//check for any characters that need to be escaped
if(s.find_first_of(" \t\"\n\r") != std::string::npos)
needs_escape = true;
bool needs_escape = Parser::HasCharactersBeyondIdentifier(s, true);

if(!needs_escape)
{
Expand Down
22 changes: 15 additions & 7 deletions src/Amalgam/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,27 @@ class Parser
}

//returns true if the string needs to be backslashified, has spaces, or has special characters
inline static bool HasCharactersBeyondIdentifier(const std::string &s)
inline static bool HasCharactersBeyondIdentifier(const std::string &s, bool label = false)
{
for(auto c : s)
bool in_label_initial_hashes = label;
for(size_t i = 0; i < s.size(); i++)
{
switch(c)
//can ignore any #'s up front
if(in_label_initial_hashes)
{
if(s[i] == '#')
continue;
in_label_initial_hashes = false;
}

if(StringManipulation::IsUtf8Whitespace(s, i))
return true;

switch(s[i])
{
case '\0':
case '\\':
case '"':
case '\t':
case '\n':
case '\r':
case ' ':
case '(':
case ')':
case '.':
Expand Down
2 changes: 1 addition & 1 deletion src/Amalgam/string/StringManipulation.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace StringManipulation

//returns the number of bytes wide the character in position of string s is if it is whitespace,
// 0 if it is not a newline
inline size_t IsUtf8Whitespace(std::string &s, size_t position)
inline size_t IsUtf8Whitespace(const std::string &s, size_t position)
{
auto cur_char = s[position];
if(cur_char == '\t' || cur_char == '\n' || cur_char == '\v' || cur_char == '\f'
Expand Down

0 comments on commit a10ea21

Please sign in to comment.