-
Notifications
You must be signed in to change notification settings - Fork 61
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
We plan for this class to be consistently used across all of QLever to represent values that cannot be folded into an ID directly (e.g. during the parsing, for local vocabs, expressions, etc.). It is a strong type that stores the contents in a normalized way that makes it easy to work with and reason about. This commit only introduces this type together with some unit tests, follow-up commits will roll it out across the codebase.
- Loading branch information
Showing
15 changed files
with
825 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// Copyright 2023, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Benedikt Maria Beckermann <[email protected]> | ||
|
||
#include "parser/Iri.h" | ||
|
||
#include <utility> | ||
|
||
#include "util/StringUtils.h" | ||
|
||
namespace ad_utility::triple_component { | ||
// __________________________________________ | ||
Iri::Iri(NormalizedString iri) : iri_{std::move(iri)} {} | ||
|
||
// __________________________________________ | ||
Iri::Iri(const Iri& prefix, NormalizedStringView suffix) | ||
: iri_{NormalizedString{prefix.getContent()} + suffix} {}; | ||
|
||
// __________________________________________ | ||
NormalizedStringView Iri::getContent() const { return iri_; } | ||
|
||
// __________________________________________ | ||
Iri Iri::iriref(std::string_view stringWithBrackets) { | ||
return Iri{RdfEscaping::normalizeIriWithBrackets(stringWithBrackets)}; | ||
} | ||
|
||
// __________________________________________ | ||
Iri Iri::prefixed(const Iri& prefix, std::string_view suffix) { | ||
return Iri{std::move(prefix), | ||
RdfEscaping::normalizeIriWithoutBrackets(suffix)}; | ||
} | ||
|
||
} // namespace ad_utility::triple_component |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
// Copyright 2023, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Benedikt Maria Beckermann <[email protected]> | ||
|
||
#pragma once | ||
|
||
#include <string_view> | ||
|
||
#include "parser/NormalizedString.h" | ||
|
||
namespace ad_utility::triple_component { | ||
|
||
// A class to hold IRIs. It does not store the leading or trailing | ||
// angled bracket. | ||
// | ||
// E.g. For the input "<http://example.org/books/book1>", | ||
// only "http://example.org/books/book1" is to be stored in the iri_ variable. | ||
class Iri { | ||
private: | ||
// Store the string value of the IRI without any leading or trailing angled | ||
// brackets. | ||
NormalizedString iri_; | ||
|
||
// Create a new iri object | ||
explicit Iri(NormalizedString iri); | ||
|
||
// Create a new iri using a prefix | ||
Iri(const Iri& prefix, NormalizedStringView suffix); | ||
|
||
public: | ||
// Create a new iri given an iri with brackets | ||
static Iri iriref(std::string_view stringWithBrackets); | ||
|
||
// Create a new iri given a prefix iri and its suffix | ||
static Iri prefixed(const Iri& prefix, std::string_view suffix); | ||
|
||
// Return the string value of the iri object without any leading or trailing | ||
// angled brackets. | ||
NormalizedStringView getContent() const; | ||
}; | ||
|
||
} // namespace ad_utility::triple_component |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
// Copyright 2023, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Benedikt Maria Beckermann <[email protected]> | ||
|
||
#include "parser/Literal.h" | ||
|
||
#include <utility> | ||
#include <variant> | ||
|
||
namespace ad_utility::triple_component { | ||
// __________________________________________ | ||
Literal::Literal(NormalizedString content) : content_{std::move(content)} {} | ||
|
||
// __________________________________________ | ||
Literal::Literal(NormalizedString content, Iri datatype) | ||
: content_{std::move(content)}, descriptor_{std::move(datatype)} {} | ||
|
||
// __________________________________________ | ||
Literal::Literal(NormalizedString content, NormalizedString languageTag) | ||
: content_{std::move(content)}, descriptor_{std::move(languageTag)} {} | ||
|
||
// __________________________________________ | ||
bool Literal::hasLanguageTag() const { | ||
return std::holds_alternative<NormalizedString>(descriptor_); | ||
} | ||
|
||
// __________________________________________ | ||
bool Literal::hasDatatype() const { | ||
return std::holds_alternative<Iri>(descriptor_); | ||
} | ||
|
||
// __________________________________________ | ||
NormalizedStringView Literal::getContent() const { return content_; } | ||
|
||
// __________________________________________ | ||
Iri Literal::getDatatype() const { | ||
if (!hasDatatype()) { | ||
AD_THROW("The literal does not have an explicit datatype."); | ||
} | ||
return std::get<Iri>(descriptor_); | ||
} | ||
|
||
// __________________________________________ | ||
NormalizedStringView Literal::getLanguageTag() const { | ||
if (!hasLanguageTag()) { | ||
AD_THROW("The literal does not have an explicit language tag."); | ||
} | ||
return std::get<NormalizedString>(descriptor_); | ||
} | ||
|
||
// __________________________________________ | ||
Literal Literal::literalWithQuotes( | ||
std::string_view rdfContentWithQuotes, | ||
std::optional<std::variant<Iri, string>> descriptor) { | ||
NormalizedString content = | ||
RdfEscaping::normalizeLiteralWithQuotes(rdfContentWithQuotes); | ||
|
||
return literalWithNormalizedContent(content, std::move(descriptor)); | ||
} | ||
|
||
// __________________________________________ | ||
Literal Literal::literalWithoutQuotes( | ||
std::string_view rdfContentWithoutQuotes, | ||
std::optional<std::variant<Iri, string>> descriptor) { | ||
NormalizedString content = | ||
RdfEscaping::normalizeLiteralWithoutQuotes(rdfContentWithoutQuotes); | ||
|
||
return literalWithNormalizedContent(content, std::move(descriptor)); | ||
} | ||
|
||
// __________________________________________ | ||
Literal Literal::literalWithNormalizedContent( | ||
NormalizedString normalizedRdfContent, | ||
std::optional<std::variant<Iri, string>> descriptor) { | ||
if (!descriptor.has_value()) { | ||
return Literal(std::move(normalizedRdfContent)); | ||
} | ||
|
||
using namespace RdfEscaping; | ||
auto visitLanguageTag = | ||
[&normalizedRdfContent](std::string&& languageTag) -> Literal { | ||
return {std::move(normalizedRdfContent), | ||
normalizeLanguageTag(std::move(languageTag))}; | ||
}; | ||
|
||
auto visitDatatype = [&normalizedRdfContent](Iri&& datatype) -> Literal { | ||
return {std::move(normalizedRdfContent), std::move(datatype)}; | ||
}; | ||
|
||
return std::visit( | ||
ad_utility::OverloadCallOperator{visitDatatype, visitLanguageTag}, | ||
std::move(descriptor.value())); | ||
} | ||
|
||
} // namespace ad_utility::triple_component |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
// Copyright 2023, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Benedikt Maria Beckermann <[email protected]> | ||
|
||
#pragma once | ||
|
||
#include "parser/Iri.h" | ||
#include "parser/NormalizedString.h" | ||
|
||
namespace ad_utility::triple_component { | ||
// A class to hold literal values. | ||
class Literal { | ||
private: | ||
// Store the string value of the literal without the surrounding quotation | ||
// marks or trailing descriptor. | ||
// "Hello World"@en -> Hello World | ||
NormalizedString content_; | ||
|
||
using LiteralDescriptorVariant = | ||
std::variant<std::monostate, NormalizedString, Iri>; | ||
|
||
// Store the optional language tag or the optional datatype if applicable | ||
// without their prefixes. | ||
// "Hello World"@en -> en | ||
// "Hello World"^^test:type -> test:type | ||
LiteralDescriptorVariant descriptor_; | ||
|
||
// Create a new literal without any descriptor | ||
explicit Literal(NormalizedString content); | ||
|
||
// Create a new literal with a datatype | ||
Literal(NormalizedString content, Iri datatype); | ||
|
||
// Create a new literal with a language tag | ||
Literal(NormalizedString content, NormalizedString languageTag); | ||
|
||
// Similar to `literalWithQuotes`, except the rdfContent is expected to | ||
// already be normalized | ||
static Literal literalWithNormalizedContent( | ||
NormalizedString normalizedRdfContent, | ||
std::optional<std::variant<Iri, string>> descriptor = std::nullopt); | ||
|
||
public: | ||
// Return true if the literal has an assigned language tag | ||
bool hasLanguageTag() const; | ||
|
||
// Return true if the literal has an assigned datatype | ||
bool hasDatatype() const; | ||
|
||
// Return the value of the literal without quotation marks and without any | ||
// datatype or language tag | ||
NormalizedStringView getContent() const; | ||
|
||
// Return the language tag of the literal, if available, without leading @ | ||
// character. Throws an exception if the literal has no language tag. | ||
NormalizedStringView getLanguageTag() const; | ||
|
||
// Return the datatype of the literal, if available, without leading ^^ | ||
// prefix. Throws an exception if the literal has no datatype. | ||
Iri getDatatype() const; | ||
|
||
// For documentation, see documentation of function | ||
// LiteralORIri::literalWithQuotes | ||
static Literal literalWithQuotes( | ||
std::string_view rdfContentWithQuotes, | ||
std::optional<std::variant<Iri, string>> descriptor = std::nullopt); | ||
|
||
// For documentation, see documentation of function | ||
// LiteralORIri::literalWithoutQuotes | ||
static Literal literalWithoutQuotes( | ||
std::string_view rdfContentWithoutQuotes, | ||
std::optional<std::variant<Iri, string>> descriptor = std::nullopt); | ||
}; | ||
} // namespace ad_utility::triple_component |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
// Copyright 2023, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Benedikt Maria Beckermann <[email protected]> | ||
|
||
#include "parser/LiteralOrIri.h" | ||
|
||
#include <algorithm> | ||
#include <utility> | ||
|
||
namespace ad_utility::triple_component { | ||
// __________________________________________ | ||
LiteralOrIri::LiteralOrIri(Iri iri) : data_{std::move(iri)} {} | ||
|
||
// __________________________________________ | ||
LiteralOrIri::LiteralOrIri(Literal literal) : data_{std::move(literal)} {} | ||
|
||
// __________________________________________ | ||
bool LiteralOrIri::isIri() const { return std::holds_alternative<Iri>(data_); } | ||
|
||
// __________________________________________ | ||
const Iri& LiteralOrIri::getIri() const { | ||
if (!isIri()) { | ||
AD_THROW( | ||
"LiteralOrIri object does not contain an Iri object and thus " | ||
"cannot return it"); | ||
} | ||
return std::get<Iri>(data_); | ||
} | ||
|
||
// __________________________________________ | ||
NormalizedStringView LiteralOrIri::getIriContent() const { | ||
return getIri().getContent(); | ||
} | ||
|
||
// __________________________________________ | ||
bool LiteralOrIri::isLiteral() const { | ||
return std::holds_alternative<Literal>(data_); | ||
} | ||
|
||
// __________________________________________ | ||
const Literal& LiteralOrIri::getLiteral() const { | ||
if (!isLiteral()) { | ||
AD_THROW( | ||
"LiteralOrIri object does not contain an Literal object and " | ||
"thus cannot return it"); | ||
} | ||
return std::get<Literal>(data_); | ||
} | ||
|
||
// __________________________________________ | ||
bool LiteralOrIri::hasLanguageTag() const { | ||
return getLiteral().hasLanguageTag(); | ||
} | ||
|
||
// __________________________________________ | ||
bool LiteralOrIri::hasDatatype() const { return getLiteral().hasDatatype(); } | ||
|
||
// __________________________________________ | ||
NormalizedStringView LiteralOrIri::getLiteralContent() const { | ||
return getLiteral().getContent(); | ||
} | ||
|
||
// __________________________________________ | ||
NormalizedStringView LiteralOrIri::getLanguageTag() const { | ||
return getLiteral().getLanguageTag(); | ||
} | ||
|
||
// __________________________________________ | ||
Iri LiteralOrIri::getDatatype() const { return getLiteral().getDatatype(); } | ||
|
||
// __________________________________________ | ||
NormalizedStringView LiteralOrIri::getContent() const { | ||
if (isLiteral()) | ||
return getLiteralContent(); | ||
else if (isIri()) | ||
return getIriContent(); | ||
else | ||
AD_THROW("LiteralOrIri object contains neither Iri not Literal"); | ||
} | ||
|
||
// __________________________________________ | ||
LiteralOrIri LiteralOrIri::iriref(const std::string& stringWithBrackets) { | ||
return LiteralOrIri{Iri::iriref(stringWithBrackets)}; | ||
} | ||
|
||
// __________________________________________ | ||
LiteralOrIri LiteralOrIri::prefixedIri(const Iri& prefix, | ||
std::string_view suffix) { | ||
return LiteralOrIri{Iri::prefixed(prefix, suffix)}; | ||
} | ||
|
||
// __________________________________________ | ||
LiteralOrIri LiteralOrIri::literalWithQuotes( | ||
std::string_view rdfContentWithQuotes, | ||
std::optional<std::variant<Iri, string>> descriptor) { | ||
return LiteralOrIri( | ||
Literal::literalWithQuotes(rdfContentWithQuotes, std::move(descriptor))); | ||
} | ||
|
||
// __________________________________________ | ||
LiteralOrIri LiteralOrIri::literalWithoutQuotes( | ||
std::string_view rdfContentWithoutQuotes, | ||
std::optional<std::variant<Iri, string>> descriptor) { | ||
return LiteralOrIri(Literal::literalWithoutQuotes(rdfContentWithoutQuotes, | ||
std::move(descriptor))); | ||
} | ||
} // namespace ad_utility::triple_component |
Oops, something went wrong.