Skip to content

Commit

Permalink
Implement LiteralOrIri (#1186)
Browse files Browse the repository at this point in the history
We plan for this class to be consistently used across all of QLever to represent values that cannot be folded into an ID directly (e.g. during the parsing, for local vocabs, expressions, etc.).
It is a strong type that stores the contents in a normalized way that makes it easy to work with and reason about.
This commit only introduces this type together with some unit tests, follow-up commits will roll it out across the codebase.
  • Loading branch information
greenBene authored Feb 2, 2024
1 parent bebdadc commit 9813a28
Show file tree
Hide file tree
Showing 15 changed files with 825 additions and 29 deletions.
4 changes: 3 additions & 1 deletion src/engine/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include "util/http/HttpUtils.h"
#include "util/http/websocket/MessageSender.h"

using namespace std::string_literals;

template <typename T>
using Awaitable = Server::Awaitable<T>;

Expand Down Expand Up @@ -481,7 +483,7 @@ nlohmann::json Server::composeCacheStatsJson() const {
class QueryAlreadyInUseError : public std::runtime_error {
public:
explicit QueryAlreadyInUseError(std::string_view proposedQueryId)
: std::runtime_error{"Query id '" + proposedQueryId +
: std::runtime_error{"Query id '"s + proposedQueryId +
"' is already in use!"} {}
};

Expand Down
5 changes: 4 additions & 1 deletion src/parser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ add_library(parser
SelectClause.cpp
GraphPatternOperation.cpp
# The `Variable.cpp` from the subdirectory is linked here because otherwise we get linking errors.
GraphPattern.cpp data/Variable.cpp)
GraphPattern.cpp data/Variable.cpp
Iri.cpp
Literal.cpp
LiteralOrIri.cpp)
qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2::re2 util engine)

33 changes: 33 additions & 0 deletions src/parser/Iri.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2023, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Benedikt Maria Beckermann <[email protected]>

#include "parser/Iri.h"

#include <utility>

#include "util/StringUtils.h"

namespace ad_utility::triple_component {
// __________________________________________
Iri::Iri(NormalizedString iri) : iri_{std::move(iri)} {}

// __________________________________________
Iri::Iri(const Iri& prefix, NormalizedStringView suffix)
: iri_{NormalizedString{prefix.getContent()} + suffix} {};

// __________________________________________
NormalizedStringView Iri::getContent() const { return iri_; }

// __________________________________________
Iri Iri::iriref(std::string_view stringWithBrackets) {
return Iri{RdfEscaping::normalizeIriWithBrackets(stringWithBrackets)};
}

// __________________________________________
Iri Iri::prefixed(const Iri& prefix, std::string_view suffix) {
return Iri{std::move(prefix),
RdfEscaping::normalizeIriWithoutBrackets(suffix)};
}

} // namespace ad_utility::triple_component
42 changes: 42 additions & 0 deletions src/parser/Iri.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright 2023, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Benedikt Maria Beckermann <[email protected]>

#pragma once

#include <string_view>

#include "parser/NormalizedString.h"

namespace ad_utility::triple_component {

// A class to hold IRIs. It does not store the leading or trailing
// angled bracket.
//
// E.g. For the input "<http://example.org/books/book1>",
// only "http://example.org/books/book1" is to be stored in the iri_ variable.
class Iri {
private:
// Store the string value of the IRI without any leading or trailing angled
// brackets.
NormalizedString iri_;

// Create a new iri object
explicit Iri(NormalizedString iri);

// Create a new iri using a prefix
Iri(const Iri& prefix, NormalizedStringView suffix);

public:
// Create a new iri given an iri with brackets
static Iri iriref(std::string_view stringWithBrackets);

// Create a new iri given a prefix iri and its suffix
static Iri prefixed(const Iri& prefix, std::string_view suffix);

// Return the string value of the iri object without any leading or trailing
// angled brackets.
NormalizedStringView getContent() const;
};

} // namespace ad_utility::triple_component
95 changes: 95 additions & 0 deletions src/parser/Literal.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright 2023, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Benedikt Maria Beckermann <[email protected]>

#include "parser/Literal.h"

#include <utility>
#include <variant>

namespace ad_utility::triple_component {
// __________________________________________
Literal::Literal(NormalizedString content) : content_{std::move(content)} {}

// __________________________________________
Literal::Literal(NormalizedString content, Iri datatype)
: content_{std::move(content)}, descriptor_{std::move(datatype)} {}

// __________________________________________
Literal::Literal(NormalizedString content, NormalizedString languageTag)
: content_{std::move(content)}, descriptor_{std::move(languageTag)} {}

// __________________________________________
bool Literal::hasLanguageTag() const {
return std::holds_alternative<NormalizedString>(descriptor_);
}

// __________________________________________
bool Literal::hasDatatype() const {
return std::holds_alternative<Iri>(descriptor_);
}

// __________________________________________
NormalizedStringView Literal::getContent() const { return content_; }

// __________________________________________
Iri Literal::getDatatype() const {
if (!hasDatatype()) {
AD_THROW("The literal does not have an explicit datatype.");
}
return std::get<Iri>(descriptor_);
}

// __________________________________________
NormalizedStringView Literal::getLanguageTag() const {
if (!hasLanguageTag()) {
AD_THROW("The literal does not have an explicit language tag.");
}
return std::get<NormalizedString>(descriptor_);
}

// __________________________________________
Literal Literal::literalWithQuotes(
std::string_view rdfContentWithQuotes,
std::optional<std::variant<Iri, string>> descriptor) {
NormalizedString content =
RdfEscaping::normalizeLiteralWithQuotes(rdfContentWithQuotes);

return literalWithNormalizedContent(content, std::move(descriptor));
}

// __________________________________________
Literal Literal::literalWithoutQuotes(
std::string_view rdfContentWithoutQuotes,
std::optional<std::variant<Iri, string>> descriptor) {
NormalizedString content =
RdfEscaping::normalizeLiteralWithoutQuotes(rdfContentWithoutQuotes);

return literalWithNormalizedContent(content, std::move(descriptor));
}

// __________________________________________
Literal Literal::literalWithNormalizedContent(
NormalizedString normalizedRdfContent,
std::optional<std::variant<Iri, string>> descriptor) {
if (!descriptor.has_value()) {
return Literal(std::move(normalizedRdfContent));
}

using namespace RdfEscaping;
auto visitLanguageTag =
[&normalizedRdfContent](std::string&& languageTag) -> Literal {
return {std::move(normalizedRdfContent),
normalizeLanguageTag(std::move(languageTag))};
};

auto visitDatatype = [&normalizedRdfContent](Iri&& datatype) -> Literal {
return {std::move(normalizedRdfContent), std::move(datatype)};
};

return std::visit(
ad_utility::OverloadCallOperator{visitDatatype, visitLanguageTag},
std::move(descriptor.value()));
}

} // namespace ad_utility::triple_component
74 changes: 74 additions & 0 deletions src/parser/Literal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright 2023, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Benedikt Maria Beckermann <[email protected]>

#pragma once

#include "parser/Iri.h"
#include "parser/NormalizedString.h"

namespace ad_utility::triple_component {
// A class to hold literal values.
class Literal {
private:
// Store the string value of the literal without the surrounding quotation
// marks or trailing descriptor.
// "Hello World"@en -> Hello World
NormalizedString content_;

using LiteralDescriptorVariant =
std::variant<std::monostate, NormalizedString, Iri>;

// Store the optional language tag or the optional datatype if applicable
// without their prefixes.
// "Hello World"@en -> en
// "Hello World"^^test:type -> test:type
LiteralDescriptorVariant descriptor_;

// Create a new literal without any descriptor
explicit Literal(NormalizedString content);

// Create a new literal with a datatype
Literal(NormalizedString content, Iri datatype);

// Create a new literal with a language tag
Literal(NormalizedString content, NormalizedString languageTag);

// Similar to `literalWithQuotes`, except the rdfContent is expected to
// already be normalized
static Literal literalWithNormalizedContent(
NormalizedString normalizedRdfContent,
std::optional<std::variant<Iri, string>> descriptor = std::nullopt);

public:
// Return true if the literal has an assigned language tag
bool hasLanguageTag() const;

// Return true if the literal has an assigned datatype
bool hasDatatype() const;

// Return the value of the literal without quotation marks and without any
// datatype or language tag
NormalizedStringView getContent() const;

// Return the language tag of the literal, if available, without leading @
// character. Throws an exception if the literal has no language tag.
NormalizedStringView getLanguageTag() const;

// Return the datatype of the literal, if available, without leading ^^
// prefix. Throws an exception if the literal has no datatype.
Iri getDatatype() const;

// For documentation, see documentation of function
// LiteralORIri::literalWithQuotes
static Literal literalWithQuotes(
std::string_view rdfContentWithQuotes,
std::optional<std::variant<Iri, string>> descriptor = std::nullopt);

// For documentation, see documentation of function
// LiteralORIri::literalWithoutQuotes
static Literal literalWithoutQuotes(
std::string_view rdfContentWithoutQuotes,
std::optional<std::variant<Iri, string>> descriptor = std::nullopt);
};
} // namespace ad_utility::triple_component
107 changes: 107 additions & 0 deletions src/parser/LiteralOrIri.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright 2023, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Benedikt Maria Beckermann <[email protected]>

#include "parser/LiteralOrIri.h"

#include <algorithm>
#include <utility>

namespace ad_utility::triple_component {
// __________________________________________
LiteralOrIri::LiteralOrIri(Iri iri) : data_{std::move(iri)} {}

// __________________________________________
LiteralOrIri::LiteralOrIri(Literal literal) : data_{std::move(literal)} {}

// __________________________________________
bool LiteralOrIri::isIri() const { return std::holds_alternative<Iri>(data_); }

// __________________________________________
const Iri& LiteralOrIri::getIri() const {
if (!isIri()) {
AD_THROW(
"LiteralOrIri object does not contain an Iri object and thus "
"cannot return it");
}
return std::get<Iri>(data_);
}

// __________________________________________
NormalizedStringView LiteralOrIri::getIriContent() const {
return getIri().getContent();
}

// __________________________________________
bool LiteralOrIri::isLiteral() const {
return std::holds_alternative<Literal>(data_);
}

// __________________________________________
const Literal& LiteralOrIri::getLiteral() const {
if (!isLiteral()) {
AD_THROW(
"LiteralOrIri object does not contain an Literal object and "
"thus cannot return it");
}
return std::get<Literal>(data_);
}

// __________________________________________
bool LiteralOrIri::hasLanguageTag() const {
return getLiteral().hasLanguageTag();
}

// __________________________________________
bool LiteralOrIri::hasDatatype() const { return getLiteral().hasDatatype(); }

// __________________________________________
NormalizedStringView LiteralOrIri::getLiteralContent() const {
return getLiteral().getContent();
}

// __________________________________________
NormalizedStringView LiteralOrIri::getLanguageTag() const {
return getLiteral().getLanguageTag();
}

// __________________________________________
Iri LiteralOrIri::getDatatype() const { return getLiteral().getDatatype(); }

// __________________________________________
NormalizedStringView LiteralOrIri::getContent() const {
if (isLiteral())
return getLiteralContent();
else if (isIri())
return getIriContent();
else
AD_THROW("LiteralOrIri object contains neither Iri not Literal");
}

// __________________________________________
LiteralOrIri LiteralOrIri::iriref(const std::string& stringWithBrackets) {
return LiteralOrIri{Iri::iriref(stringWithBrackets)};
}

// __________________________________________
LiteralOrIri LiteralOrIri::prefixedIri(const Iri& prefix,
std::string_view suffix) {
return LiteralOrIri{Iri::prefixed(prefix, suffix)};
}

// __________________________________________
LiteralOrIri LiteralOrIri::literalWithQuotes(
std::string_view rdfContentWithQuotes,
std::optional<std::variant<Iri, string>> descriptor) {
return LiteralOrIri(
Literal::literalWithQuotes(rdfContentWithQuotes, std::move(descriptor)));
}

// __________________________________________
LiteralOrIri LiteralOrIri::literalWithoutQuotes(
std::string_view rdfContentWithoutQuotes,
std::optional<std::variant<Iri, string>> descriptor) {
return LiteralOrIri(Literal::literalWithoutQuotes(rdfContentWithoutQuotes,
std::move(descriptor)));
}
} // namespace ad_utility::triple_component
Loading

0 comments on commit 9813a28

Please sign in to comment.