Skip to content

Commit

Permalink
Fix a small bug in the Turtle parser (ad-freiburg#1394)
Browse files Browse the repository at this point in the history
Fixes ad-freiburg#1392 . The problem occurred for typed literals with escaped characters.
  • Loading branch information
joka921 authored Jul 15, 2024
1 parent 67145fa commit 6a6a4b8
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 12 deletions.
19 changes: 10 additions & 9 deletions src/parser/RdfEscaping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ std::string hexadecimalCharactersToUtf8(std::string_view hex) {
* is not allowed is found.
*/
template <bool acceptOnlyNumericEscapes, bool acceptOnlyBackslashAndNewline,
typename InputIterator, typename OutputIterator>
void unescapeStringAndNumericEscapes(InputIterator beginIterator,
InputIterator endIterator,
typename OutputIterator>
void unescapeStringAndNumericEscapes(std::string_view input,
OutputIterator outputIterator) {
static_assert(!(acceptOnlyNumericEscapes && acceptOnlyBackslashAndNewline));

auto beginIterator = input.begin();
auto endIterator = input.end();
// Append the `character` to the output, but only if newlines/backslashes are
// allowed via the configuration
auto pushNewlineOrBackslash = [&outputIterator](
Expand Down Expand Up @@ -141,8 +141,9 @@ void unescapeStringAndNumericEscapes(InputIterator beginIterator,
}

default:
// should never happen
AD_FAIL();
AD_CONTRACT_CHECK(false,
"Unsupported escape sequence found in RDF literal \"",
input, "\"");
}
beginIterator = nextBackslashIterator + numCharactersFromInput;
}
Expand All @@ -153,7 +154,7 @@ void unescapeStringAndNumericEscapes(InputIterator beginIterator,
std::string unescapeNewlinesAndBackslashes(std::string_view literal) {
std::string result;
RdfEscaping::detail::unescapeStringAndNumericEscapes<false, true>(
literal.begin(), literal.end(), std::back_inserter(result));
literal, std::back_inserter(result));
return result;
}

Expand All @@ -165,7 +166,7 @@ std::string escapeNewlinesAndBackslashes(std::string_view literal) {
// ____________________________________________________________________________
static void literalUnescape(std::string_view input, std::string& res) {
detail::unescapeStringAndNumericEscapes<false, false>(
input.begin(), input.end(), std::back_inserter(res));
input, std::back_inserter(res));
}

// ____________________________________________________________________________
Expand Down Expand Up @@ -223,7 +224,7 @@ static void unescapeIriWithoutBrackets(std::string_view input,
std::string& res) {
// Only numeric escapes are allowed for iriefs.
RdfEscaping::detail::unescapeStringAndNumericEscapes<true, false>(
input.begin(), input.end(), std::back_inserter(res));
input, std::back_inserter(res));
}

// __________________________________________________________________________
Expand Down
4 changes: 2 additions & 2 deletions src/parser/TurtleParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,8 @@ template <class T>
TripleComponent TurtleParser<T>::literalAndDatatypeToTripleComponentImpl(
std::string_view normalizedLiteralContent,
const TripleComponent::Iri& typeIri, TurtleParser<T>* parser) {
auto literal =
TripleComponent::Literal::literalWithoutQuotes(normalizedLiteralContent);
auto literal = TripleComponent::Literal::literalWithNormalizedContent(
asNormalizedStringViewUnsafe(normalizedLiteralContent));
std::string_view type = asStringViewUnsafe(typeIri.getContent());

try {
Expand Down
7 changes: 6 additions & 1 deletion test/RdfEscapingTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

#include <gtest/gtest.h>

#include "./util/GTestHelpers.h"
#include "parser/RdfEscaping.h"

using namespace RdfEscaping;

// ___________________________________________________________________________
Expand Down Expand Up @@ -43,6 +43,11 @@ TEST(RdfEscapingTest, normalizedContentFromLiteralOrIri) {
ASSERT_EQ(f("\"bumm\"^^<http://www.mycustomiris.com/sometype>"), "bumm");
}

TEST(RdfEscapingTest, invalidEscapeThrows) {
AD_EXPECT_THROW_WITH_MESSAGE(
normalizeRDFLiteral("\"invalid\\Escape\""),
::testing::HasSubstr("Unsupported escape sequence"));
}
// ___________________________________________________________________________
TEST(RdfEscapingTest, escapeForXml) {
ASSERT_EQ(escapeForXml("abc\n\t;"), "abc\n\t;");
Expand Down
4 changes: 4 additions & 0 deletions test/TurtleParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ TEST(TurtleParserTest, rdfLiteral) {
std::vector<TripleComponent> expected;
literals.emplace_back(R"("simpleString")");
expected.emplace_back(lit(R"("simpleString")"));
literals.emplace_back(R"("string\"with \\ escapes\n"^^<www.x.de>)");
expected.emplace_back(TripleComponent::Literal::fromEscapedRdfLiteral(
R"("string\"with \\ escapes\n")",
TripleComponent::Iri::fromIriref("<www.x.de>")));
literals.emplace_back(R"("langtag"@en-gb)");
expected.emplace_back(lit(R"("langtag")", "@en-gb"));
literals.emplace_back("\"valueLong\"^^<www.someunknownType/integer>");
Expand Down

0 comments on commit 6a6a4b8

Please sign in to comment.