diff --git a/.gitignore b/.gitignore index 25871d0..06736e1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ node_modules build/ +conan* +!conanfile.txt +graph_info.json +*Find*.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ae5c08..d0de76a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,15 +6,16 @@ project("Docgen" VERSION 1.0.0 # This will perform memcheck include(CTest) +# enables testing +enable_testing() + # This is to make this library portable to other machines. # This will be used for install. include(GNUInstallDirs) -# enables testing -enable_testing() - -# Set C++17 standard for project target -set(CMAKE_CXX_STANDARD 17) +# Set this such that dependency installation through conan can be found +set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/libs) +message("CMAKE_MODULE_PATH: ${CMAKE_MODULE_PATH}") # Create DocgenConfigVersion.cmake which contains current project version # This is supposed to help with (major) version compatibility. @@ -54,8 +55,17 @@ set(ETERNAL_DIR ${PROJECT_SOURCE_DIR}/libs/eternal) # find json library find_package(nlohmann_json 3.2.0 REQUIRED) +# find google benchmark +find_package(benchmark REQUIRED PATHS ${GBENCH_DIR}/build) + # add libs subdirectory add_subdirectory(${PROJECT_SOURCE_DIR}/libs ${PROJECT_BINARY_DIR}/libs) +# add src subdirectory +add_subdirectory(${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src) + +# add benchmark subdirectory +add_subdirectory(${PROJECT_SOURCE_DIR}/benchmark ${PROJECT_BINARY_DIR}/benchmark) + # add test subdirectory add_subdirectory(${PROJECT_SOURCE_DIR}/test ${PROJECT_BINARY_DIR}/test) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt new file mode 100644 index 0000000..d2822fb --- /dev/null +++ b/benchmark/CMakeLists.txt @@ -0,0 +1,25 @@ +add_executable(lexer_benchmark + ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/lexer_benchmark.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/lexer_legacy_benchmark.cpp + # Source dependency + $ + ) +target_compile_features(lexer_benchmark PRIVATE cxx_std_17) +target_include_directories(lexer_benchmark PRIVATE + ${GBENCH_DIR}/include + ${PROJECT_SOURCE_DIR}/src + ${ETERNAL_DIR}/include + ) +target_link_libraries(lexer_benchmark PRIVATE + benchmark::benchmark + benchmark::benchmark_main + pthread + nlohmann_json::nlohmann_json + ) + +# copy data directory into where lexer_benchmark executable ends up +add_custom_command( + TARGET lexer_benchmark POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/data/ + $/data) diff --git a/benchmark/core/lex/data/data_1.txt b/benchmark/core/lex/data/data_1.txt new file mode 100644 index 0000000..2cb7356 --- /dev/null +++ b/benchmark/core/lex/data/data_1.txt @@ -0,0 +1,113 @@ +#include + +namespace docgen { +namespace core { +namespace lex { + +/////////////////////////////////// +// Lexer Implementation +/////////////////////////////////// + +Lexer::Lexer() + : trie_({ + {"\n", Symbol::NEWLINE}, + {" ", Symbol::WHITESPACE}, + {"\t", Symbol::WHITESPACE}, + {"\v", Symbol::WHITESPACE}, + {"\r", Symbol::WHITESPACE}, + {"\f", Symbol::WHITESPACE}, + {";", Symbol::SEMICOLON}, + {"#", Symbol::HASHTAG}, + {"*", Symbol::STAR}, + {"{", Symbol::OPEN_BRACE}, + {"}", Symbol::CLOSE_BRACE}, + {"///", Symbol::BEGIN_SLINE_COMMENT}, + {"/*!", Symbol::BEGIN_SBLOCK_COMMENT}, + {"//", Symbol::BEGIN_NLINE_COMMENT}, + {"/*", Symbol::BEGIN_NBLOCK_COMMENT}, + {"*/", Symbol::END_BLOCK_COMMENT}, + {"@sdesc", Symbol::SDESC}, + {"@tparam", Symbol::TPARAM}, + {"@param", Symbol::PARAM}, + {"@return", Symbol::RETURN} + }) +{} + +void Lexer::process(char c) +{ + this->update_state(); + + auto it = trie_.get_children().find(c); + + // if transition exists + if (it != trie_.get_children().end()) { + buf_.push_back(c); + trie_.transition(c); + return; + } + + // otherwise, no transition exists + + // if not backtracking + if (!this->is_backtracking()) { + // if trie at root + if (trie_.is_reset()) { + text_.push_back(c); + return; + } + text_.append(buf_); + buf_.clear(); + trie_.reset(); + return this->process(c); + } + + // otherwise, currently backtracking + this->backtrack(c); +} + +void Lexer::backtrack(char c) +{ + // tokenize text + this->tokenize_text(); + + // tokenize symbol + for (uint32_t i = 0; i < buf_.size(); ++i) { + trie_.back_transition(); + } + assert(trie_.is_accept()); + auto opt_symbol = trie_.get_symbol(); + assert(static_cast(opt_symbol)); + status_.tokens.emplace(*opt_symbol); + + // move and clear buf_ to temp string for reprocessing + std::string reprocess_str(std::move(buf_)); + reprocess_str.push_back(c); + + // reset + this->reset(); + + // reprocess the rest + for (char c : reprocess_str) { + this->process(c); + } +} + +void Lexer::flush() +{ + this->update_state(); + + if (this->is_backtracking()) { + return this->backtrack(0); + } + + // non-backtracking: no parent is an accepting node + // append buf_ to text_ and tokenize text_ + // reset all other fields + text_.append(buf_); + this->tokenize_text(); + this->reset(); +} + +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/benchmark/core/lex/data/data_2.txt b/benchmark/core/lex/data/data_2.txt new file mode 100644 index 0000000..2df5008 --- /dev/null +++ b/benchmark/core/lex/data/data_2.txt @@ -0,0 +1,211 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace docgen { +namespace core { +namespace lex { + +template +struct Trie +{ +private: + struct TrieNode; // forward declaration + +public: + using pair_t = std::pair; + + // Constructs trie node from a list of pairs of string and symbol. + // The string must be of type std::string_view and it must not be empty. + // The symbol must be of type SymbolType. + Trie(const std::initializer_list&); + + // Delete compiler-generated copy/move ctor/assignment + // This ensures that Trie objects are only (default) constructible. + Trie(const Trie&) =delete; + Trie(Trie&&) =delete; + Trie& operator=(const Trie&) =delete; + Trie& operator=(Trie&&) =delete; + + void transition(char c); + void back_transition(); + bool is_accept() const; + typename TrieNode::children_t& get_children(); + bool is_reset() const; + void reset(); + const std::optional& get_symbol() const; + +private: + + struct TrieNode + { + using children_t = std::unordered_map>; + + // Insert str from current node to update the trie structure. + // The string str is read starting from idx. + void insert(const std::pair&, size_t = 0); + + // Returns if current node is an accepting state. + bool is_accept() const; + + // Returns the optional symbol associated with current node. + // Symbol will be active if is_accept is true. + const std::optional& get_symbol() const; + + children_t& get_children(); + + std::optional> get_parent(); + + private: + + enum class State : bool { + accept, + non_accept + }; + + State state_ = State::non_accept; // indicates accepting node or not + std::optional symbol_; // symbol for accepting node + children_t children_; // current node's children + TrieNode* parent_ptr_; // current node's parent + }; + + TrieNode root_; // root of Trie + std::reference_wrapper curr_node_ = root_; // current node +}; + +//////////////////////////////////////////////////////////////// +// TrieNode Implementation +//////////////////////////////////////////////////////////////// + +template +inline void +Trie::TrieNode::insert(const pair_t& pair, size_t idx) +{ + const auto& str = std::get<0>(pair); + + // if string starting from idx is empty, then accepting state + if (str[idx] == '\0') { + state_ = State::accept; + symbol_ = std::get<1>(pair); + } + + else { + // if no child with str[idx] mapping + if (children_.find(str[idx]) == children_.end()) { + children_.emplace(str[idx], std::make_unique()); + } + auto& child = children_.at(str[idx]); + child->parent_ptr_ = this; + child->insert(pair, idx + 1); + } +} + +template +inline bool +Trie::TrieNode::is_accept() const +{ + return state_ == State::accept; +} + +template +inline const std::optional& +Trie::TrieNode::get_symbol() const +{ + return symbol_; +} + +template +inline typename Trie::TrieNode::children_t& +Trie::TrieNode::get_children() +{ + return children_; +} + +template +inline std::optional::TrieNode>> +Trie::TrieNode::get_parent() +{ + if (parent_ptr_) { + return *parent_ptr_; + } + return {}; +} + +//////////////////////////////////////////////////////////////// +// Trie Implementation +//////////////////////////////////////////////////////////////// + +template +inline +Trie::Trie(const std::initializer_list& pairs) + : root_() +{ + for (auto it = pairs.begin(); it != pairs.end(); ++it) { + if (it->first.empty()) { + throw exceptions::control_flow_error("strings must be non-empty"); + } + root_.insert(*it); + } +} + +template +inline void +Trie::transition(char c) +{ + curr_node_ = *(curr_node_.get().get_children().at(c)); +} + +template +inline bool +Trie::is_accept() const +{ + return curr_node_.get().is_accept(); +} + +template +inline typename Trie::TrieNode::children_t& +Trie::get_children() +{ + return curr_node_.get().get_children(); +} + +template +inline bool +Trie::is_reset() const +{ + return &(curr_node_.get()) == &root_; +} + +template +inline void +Trie::reset() +{ + curr_node_ = root_; +} + +template +inline void +Trie::back_transition() +{ + auto&& opt_parent = curr_node_.get().get_parent(); + if (!opt_parent) { + throw exceptions::control_flow_error("Attempt to back transition past the root"); + } + curr_node_ = *opt_parent; +} + +template +inline const std::optional& +Trie::get_symbol() const +{ + return curr_node_.get().get_symbol(); +} + +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/benchmark/core/lex/data/data_3.txt b/benchmark/core/lex/data/data_3.txt new file mode 100644 index 0000000..e7483bb --- /dev/null +++ b/benchmark/core/lex/data/data_3.txt @@ -0,0 +1,803 @@ +#include +#include + +namespace docgen { +namespace core { +namespace lex { + +struct lexer_fixture : ::testing::Test +{ +protected: + using status_t = typename Lexer::status_t; + using token_t = typename Lexer::token_t; + using symbol_t = typename Lexer::symbol_t; + + Lexer lexer; + std::optional token; + + void setup_lexer(const char* content) + { + std::string str(content); + for (char c : str) { + lexer.process(c); + } + lexer.flush(); + } +}; + +//////////////////////////////////////////////////////////////////// +// Individual Symbol TESTS +//////////////////////////////////////////////////////////////////// + +// NEWLINE +TEST_F(lexer_fixture, lexer_newline) +{ + static constexpr const char* content = + "somecrazy1492text\nmvn2b" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "somecrazy1492text"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "mvn2b"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE ( ) +TEST_F(lexer_fixture, lexer_whitespace_space) +{ + static constexpr const char* content = + ",m.,m. abn" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, ",m.,m."); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abn"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE (\t) +TEST_F(lexer_fixture, lexer_whitespace_t) +{ + static constexpr const char* content = + "h0f2n.1\t1234|" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "h0f2n.1"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "1234|"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE (\v) +TEST_F(lexer_fixture, lexer_whitespace_v) +{ + static constexpr const char* content = + "hello!\v" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "hello!"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE (\r) +TEST_F(lexer_fixture, lexer_whitespace_r) +{ + static constexpr const char* content = + "hello!\rwsdescorrld!!" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "hello!"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "wsdescorrld!!"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE (\f) +TEST_F(lexer_fixture, lexer_whitespace_f) +{ + static constexpr const char* content = + "hello!\fwsdescorrld!!" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "hello!"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "wsdescorrld!!"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE (;) +TEST_F(lexer_fixture, lexer_semicolon) +{ + static constexpr const char* content = + ";wsdescorrld!!" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::SEMICOLON); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "wsdescorrld!!"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// BEGIN_SLINE_COMMENT +TEST_F(lexer_fixture, lexer_begin_sline_comment) +{ + static constexpr const char* content = + "abc///" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// BEGIN_SBLOCK_COMMENT +TEST_F(lexer_fixture, lexer_begin_sblock_comment) +{ + static constexpr const char* content = + "abc/*!" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SBLOCK_COMMENT); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// BEGIN_NBLOCK_COMMENT +TEST_F(lexer_fixture, lexer_begin_nblock_comment) +{ + static constexpr const char* content = + "abc/**!" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NBLOCK_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::STAR); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "!"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// END_BLOCK_COMMENT +TEST_F(lexer_fixture, lexer_end_block_comment_no_star) +{ + static constexpr const char* content = + "abc*/f" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::END_BLOCK_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "f"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +TEST_F(lexer_fixture, lexer_end_block_comment_star) +{ + static constexpr const char* content = + "abc**/f" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::STAR); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::END_BLOCK_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "f"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// SDESC +TEST_F(lexer_fixture, lexer_sdesc) +{ + static constexpr const char* content = + "ssdesc@@sdescf@sdesscf" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "ssdesc@"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::SDESC); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "f@sdesscf"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// TPARAM +TEST_F(lexer_fixture, lexer_tparam) +{ + static constexpr const char* content = + "ssdes@@@@@@tpaar@tpara@m@tparam@tpar" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "ssdes@@@@@@tpaar@tpara@m"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TPARAM); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "@tpar"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// RETURN +TEST_F(lexer_fixture, lexer_return) +{ + static constexpr const char* content = + "@re@@@@@@return@@@@@" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "@re@@@@@"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::RETURN); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "@@@@@"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +//////////////////////////////////////////////////////////////////// +// Mix TESTS +//////////////////////////////////////////////////////////////////// + +// line comment mix +TEST_F(lexer_fixture, lexer_line_comment_4) +{ + static constexpr const char* content = + "abc////" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "/"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// line comment mix +TEST_F(lexer_fixture, lexer_line_comment_5) +{ + static constexpr const char* content = + "abc/////" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// line comment mix +TEST_F(lexer_fixture, lexer_line_comment_6) +{ + static constexpr const char* content = + "abc//////" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +TEST_F(lexer_fixture, lexer_test_1_no_special_comment) +{ + static constexpr const char* content = + "#include // some comment\n" + "\n" + "void f();" + ; + + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::HASHTAG); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "include"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "some"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "void"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "f()"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::SEMICOLON); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +TEST_F(lexer_fixture, lexer_test_2_no_special_comment) +{ + static constexpr const char* content = + "#include \n" + "\n" + " // just a normal comment\n" + "\n" + ; + + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::HASHTAG); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "include"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "just"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "a"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "normal"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +TEST_F(lexer_fixture, lexer_test_1_comment_mix) +{ + static constexpr const char* content = + "// comment\n" + " /// special_comment \n" + ; + + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "special_comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +TEST_F(lexer_fixture, lexer_test_1_tagname_comments) +{ + static constexpr const char* content = + "// @tparam normal comment\n" + "/// @sdescspecial comment \n" + "#define hehe\n" + ; + + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TPARAM); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "normal"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::SDESC); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "special"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::HASHTAG); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "define"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "hehe"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/benchmark/core/lex/data/data_4.txt b/benchmark/core/lex/data/data_4.txt new file mode 100644 index 0000000..374e703 --- /dev/null +++ b/benchmark/core/lex/data/data_4.txt @@ -0,0 +1,71 @@ +jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj + + +; +{jfiojeojahl{jiofejiofkvnkelwojio}} +/// @sdesc some short description............ + + +jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj +JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ +iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd +jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj +jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj +jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj +jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj +jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj + + +; +{jfiojeojahl{jiofejiofkvnkelwojio}} +/// @sdesc some short description............ + + +jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj +JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ +iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd + + +; +{jfiojeojahl{jiofejiofkvnkelwojio}} +/// @sdesc some short description............ + + +jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj +JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ +iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd + + +; +{jfiojeojahl{jiofejiofkvnkelwojio}} +/// @sdesc some short description............ + +/*! !hfowojlk @tparam scdesc fjown @tparam x jeiofhjoeifjdkjofijsoiejfoijeojoirjiohignjknjfbnkjnvkfjoiejioj +* +* +*/ + + +jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj +JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ +iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd + + +; +{jfiojeojahl{jiofejiofkvnkelwojio}} +/// @sdesc some short description............ + + +jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj +JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ +iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd + + +; +{jfiojeojahl{jiofejiofkvnkelwojio}} +/// @sdesc some short description............ + + +jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj +JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ +iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd diff --git a/benchmark/core/lex/lexer_base_fixture.hpp b/benchmark/core/lex/lexer_base_fixture.hpp new file mode 100644 index 0000000..51631d6 --- /dev/null +++ b/benchmark/core/lex/lexer_base_fixture.hpp @@ -0,0 +1,24 @@ +#pragma once +#include + +namespace docgen { +namespace core { +namespace lex { + +struct lexer_base_fixture : benchmark::Fixture +{ + static constexpr const char* data_1_path = "data/data_1.txt"; + static constexpr const char* data_2_path = "data/data_2.txt"; + static constexpr const char* data_3_path = "data/data_3.txt"; + static constexpr const char* data_4_path = "data/data_3.txt"; + + void SetUp(const ::benchmark::State& state) + {} + + void TearDown(const ::benchmark::State& state) + {} +}; + +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/benchmark/core/lex/lexer_benchmark.cpp b/benchmark/core/lex/lexer_benchmark.cpp new file mode 100644 index 0000000..f2cc6d1 --- /dev/null +++ b/benchmark/core/lex/lexer_benchmark.cpp @@ -0,0 +1,68 @@ +#include +#include "lexer_base_fixture.hpp" + +namespace docgen { +namespace core { +namespace lex { + +struct lexer_fixture : lexer_base_fixture +{ + Lexer lexer; +}; + +BENCHMARK_F(lexer_fixture, data_1_test)(benchmark::State& st) +{ + for (auto _ : st) { + std::unique_ptr file(fopen(data_1_path, "r"), + [](FILE* file) {fclose(file);}); + int c = 0; + while ((c = fgetc(file.get())) != EOF) { + lexer.process(c); + } + benchmark::DoNotOptimize(lexer.next_token()); + } +} + +BENCHMARK_F(lexer_fixture, data_2_test)(benchmark::State& st) +{ + for (auto _ : st) { + std::unique_ptr file(fopen(data_2_path, "r"), + [](FILE* file) {fclose(file);}); + int c = 0; + while ((c = fgetc(file.get())) != EOF) { + lexer.process(c); + } + benchmark::DoNotOptimize(lexer.next_token()); + } +} + +BENCHMARK_F(lexer_fixture, data_3_test)(benchmark::State& st) +{ + for (auto _ : st) { + std::unique_ptr file(fopen(data_3_path, "r"), + [](FILE* file) {fclose(file);}); + int c = 0; + while ((c = fgetc(file.get())) != EOF) { + lexer.process(c); + } + benchmark::DoNotOptimize(lexer.next_token()); + } +} + +BENCHMARK_F(lexer_fixture, data_4_test)(benchmark::State& st) +{ + for (auto _ : st) { + std::unique_ptr file(fopen(data_4_path, "r"), + [](FILE* file) {fclose(file);}); + int c = 0; + while ((c = fgetc(file.get())) != EOF) { + lexer.process(c); + } + benchmark::DoNotOptimize(lexer.next_token()); + } +} + +} // namespace lex +} // namespace core +} // namespace docgen + diff --git a/benchmark/core/lex/lexer_legacy_benchmark.cpp b/benchmark/core/lex/lexer_legacy_benchmark.cpp new file mode 100644 index 0000000..34e9848 --- /dev/null +++ b/benchmark/core/lex/lexer_legacy_benchmark.cpp @@ -0,0 +1,59 @@ +#include +#include "lexer_base_fixture.hpp" + +namespace docgen { +namespace core { +namespace lex { +namespace legacy { + +struct lexer_legacy_fixture : lexer_base_fixture +{}; + +BENCHMARK_F(lexer_legacy_fixture, data_1_test)(benchmark::State& st) +{ + for (auto _ : st) { + FILE* file = fopen(data_1_path, "r"); + Lexer lexer(file); + lexer.process(); + benchmark::DoNotOptimize(lexer.get_tokens()[0]); + fclose(file); + } +} + +BENCHMARK_F(lexer_legacy_fixture, data_2_test)(benchmark::State& st) +{ + for (auto _ : st) { + FILE* file = fopen(data_2_path, "r"); + Lexer lexer(file); + lexer.process(); + benchmark::DoNotOptimize(lexer.get_tokens()[0]); + fclose(file); + } +} + +BENCHMARK_F(lexer_legacy_fixture, data_3_test)(benchmark::State& st) +{ + for (auto _ : st) { + FILE* file = fopen(data_3_path, "r"); + Lexer lexer(file); + lexer.process(); + benchmark::DoNotOptimize(lexer.get_tokens()[0]); + fclose(file); + } +} + +BENCHMARK_F(lexer_legacy_fixture, data_4_test)(benchmark::State& st) +{ + for (auto _ : st) { + FILE* file = fopen(data_4_path, "r"); + Lexer lexer(file); + lexer.process(); + benchmark::DoNotOptimize(lexer.get_tokens()[0]); + fclose(file); + } +} + +} // namespace legacy +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/configure.sh b/configure.sh index 3b5097a..132c372 100755 --- a/configure.sh +++ b/configure.sh @@ -1,20 +1,65 @@ -#!/bin/sh +#!/bin/bash + +mode=$1 # debug/release mode +shift # shift command-line arguments + # the rest are cmake command-line arguments + +# relative directory where current shell script resides from where shell script was called +PROJECTDIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" +echo "Project directory: $PROJECTDIR" + +cd "$PROJECTDIR" + +# Install nlohmann/json +if [[ "$OSTYPE" == "linux-gnu" ]]; then + if [ $(command -v conan) == "" ]; then + echo "config fail: conan not installed" + exit 1 + fi + cd libs + conan install . + cd - +elif [[ "$OSTYPE" == "darwin"* ]]; then + if [ $(command -v brew) == "" ]; then + echo "config fail: brew not installed" + exit 1 + fi + brew install nlohmann-json +else + echo "config fail: unrecognizable OS" +fi # If setup.sh was called before if [ -d "libs/benchmark/googletest" ]; then rm -rf libs/benchmark fi +# Initialize submodules if needed +git submodule update --init # Update submodule if needed -git submodule update --remote -# Setup google benchmark and googletest +git submodule update --recursive --remote + +# Setup googletest git clone https://github.com/google/googletest.git libs/benchmark/googletest +# Set google test to specific release tag +cd libs/benchmark/googletest +git fetch --all --tags --prune +git checkout tags/release-1.10.0 -b release-1.10.0 +cd - + +# Setup googlebenchmark cd libs/benchmark -mkdir -p build && cd build +git fetch --all --tags --prune +git checkout tags/v1.5.0 -b v1.5.0 +cd - +# Build and install google benchmark locally +cd libs/benchmark +mkdir -p build && cd build +cmake_flags="-DCMAKE_INSTALL_PREFIX=$PROJECTDIR/libs/benchmark/build" if [ $(command -v ninja) != "" ]; then - cmake ../ -GNinja + cmake ../ -GNinja $cmake_flags "$@" else - cmake ../ + cmake ../ $cmake_flags "$@" fi -cmake --build . -- -j12 +cmake --build . --target install -- -j12 diff --git a/libs/benchmark b/libs/benchmark index 5ce2429..daff5fe 160000 --- a/libs/benchmark +++ b/libs/benchmark @@ -1 +1 @@ -Subproject commit 5ce2429af7a8481581896afaa480552cc7584808 +Subproject commit daff5fead3fbe22c6fc58310ca3f49caf117f185 diff --git a/libs/conanfile.txt b/libs/conanfile.txt new file mode 100644 index 0000000..64b1192 --- /dev/null +++ b/libs/conanfile.txt @@ -0,0 +1,5 @@ +[requires] +nlohmann_json/3.7.3 + +[generators] +cmake_find_package diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..4147592 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,9 @@ +# Create object files for lexer +add_library(LEXER_LIB_OBJECTS OBJECT + ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/lexer.cpp + ) +target_compile_features(LEXER_LIB_OBJECTS PRIVATE cxx_std_17) +target_include_directories(LEXER_LIB_OBJECTS PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${ETERNAL_DIR}/include + ) diff --git a/src/core/lexer.hpp b/src/core/lex/legacy/lexer.hpp similarity index 73% rename from src/core/lexer.hpp rename to src/core/lex/legacy/lexer.hpp index 56f647e..ec5537e 100644 --- a/src/core/lexer.hpp +++ b/src/core/lex/legacy/lexer.hpp @@ -6,9 +6,9 @@ namespace core { struct Lexer { - using symbol_t = lexer_details::symbol_t; - using file_reader = lexer_details::file_reader; - using status_t = lexer_details::status_t; + using symbol_t = lex::legacy::symbol_t; + using file_reader = lex::legacy::file_reader; + using status_t = lex::legacy::status_t; Lexer(FILE* file) : reader_(file) @@ -18,7 +18,7 @@ struct Lexer void process() { - lexer_details::process(reader_, status_); + lex::legacy::process(reader_, status_); } const status_t::token_arr_t& get_tokens() const diff --git a/src/core/lexer_routines.hpp b/src/core/lex/legacy/lexer_routines.hpp similarity index 97% rename from src/core/lexer_routines.hpp rename to src/core/lex/legacy/lexer_routines.hpp index 06e658f..d6e8dd0 100644 --- a/src/core/lexer_routines.hpp +++ b/src/core/lex/legacy/lexer_routines.hpp @@ -1,14 +1,14 @@ #pragma once -#include "token.hpp" -#include "status.hpp" -#include "symbol.hpp" +#include "core/lex/legacy/token.hpp" +#include "core/lex/legacy/status.hpp" +#include "core/lex/legacy/symbol.hpp" #include "io/file_reader.hpp" -#include "tag_set.hpp" -#include +#include "core/tag_set.hpp" namespace docgen { namespace core { -namespace lexer_details { +namespace lex { +namespace legacy { static constexpr size_t DEFAULT_STRING_RESERVE_SIZE = 50; @@ -295,6 +295,7 @@ inline void process(file_reader& reader, status_t& status) status.tokens.emplace_back(token_t::symbol_t::END_OF_FILE); } -} // namespace lexer_details +} // namespace legacy +} // namespace lex } // namespace core } // namespace docgen diff --git a/src/core/status.hpp b/src/core/lex/legacy/status.hpp similarity index 100% rename from src/core/status.hpp rename to src/core/lex/legacy/status.hpp diff --git a/src/core/lex/legacy/symbol.hpp b/src/core/lex/legacy/symbol.hpp new file mode 100644 index 0000000..4eb1126 --- /dev/null +++ b/src/core/lex/legacy/symbol.hpp @@ -0,0 +1,44 @@ +#pragma once +#include +#include +#include + +namespace docgen { +namespace core { +namespace lex { +namespace legacy { + +enum class Symbol { + // single-char tokens + END_OF_FILE, + NEWLINE, + SEMICOLON, + STAR, + OPEN_BRACE, + CLOSE_BRACE, + // string tokens + BEGIN_LINE_COMMENT, + BEGIN_BLOCK_COMMENT, + END_BLOCK_COMMENT, + // special tags + TAGNAME, + // default + TEXT +}; + +// Compile-time mapping of strings to corresponding symbol +static MAPBOX_ETERNAL_CONSTEXPR const auto symbol_map = + mapbox::eternal::map({ + {Symbol::SEMICOLON, ";"}, + {Symbol::STAR, "*"}, + {Symbol::OPEN_BRACE, "{"}, + {Symbol::CLOSE_BRACE, "}"}, + {Symbol::BEGIN_LINE_COMMENT, "///"}, + {Symbol::BEGIN_BLOCK_COMMENT, "/*!"}, + {Symbol::END_BLOCK_COMMENT, "*/"}, + }); + +} // namespace legacy +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/src/core/lex/legacy/token.hpp b/src/core/lex/legacy/token.hpp new file mode 100644 index 0000000..72edcaa --- /dev/null +++ b/src/core/lex/legacy/token.hpp @@ -0,0 +1,43 @@ +#pragma once +#include +#include "core/symbol.hpp" + +namespace docgen { +namespace core { +namespace lex { +namespace legacy { + +template +struct Token +{ + using symbol_t = SymbolType; + + Token(symbol_t name, std::string&& content, uint32_t leading_ws_count=0) + : name(name) + , content(std::move(content)) + , leading_ws_count(leading_ws_count) + {} + + Token(symbol_t name) + : Token(name, "") + {} + + // left undefined for SymbolType != Symbol + const char* c_str() const; + + symbol_t name; + std::string content; + uint32_t leading_ws_count; +}; + +template <> +inline const char* Token::c_str() const +{ + return (symbol_map.find(name) != symbol_map.end()) ? + symbol_map.at(name).c_str() : content.c_str(); +} + +} +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/src/core/lex/lexer.cpp b/src/core/lex/lexer.cpp new file mode 100644 index 0000000..2cb7356 --- /dev/null +++ b/src/core/lex/lexer.cpp @@ -0,0 +1,113 @@ +#include + +namespace docgen { +namespace core { +namespace lex { + +/////////////////////////////////// +// Lexer Implementation +/////////////////////////////////// + +Lexer::Lexer() + : trie_({ + {"\n", Symbol::NEWLINE}, + {" ", Symbol::WHITESPACE}, + {"\t", Symbol::WHITESPACE}, + {"\v", Symbol::WHITESPACE}, + {"\r", Symbol::WHITESPACE}, + {"\f", Symbol::WHITESPACE}, + {";", Symbol::SEMICOLON}, + {"#", Symbol::HASHTAG}, + {"*", Symbol::STAR}, + {"{", Symbol::OPEN_BRACE}, + {"}", Symbol::CLOSE_BRACE}, + {"///", Symbol::BEGIN_SLINE_COMMENT}, + {"/*!", Symbol::BEGIN_SBLOCK_COMMENT}, + {"//", Symbol::BEGIN_NLINE_COMMENT}, + {"/*", Symbol::BEGIN_NBLOCK_COMMENT}, + {"*/", Symbol::END_BLOCK_COMMENT}, + {"@sdesc", Symbol::SDESC}, + {"@tparam", Symbol::TPARAM}, + {"@param", Symbol::PARAM}, + {"@return", Symbol::RETURN} + }) +{} + +void Lexer::process(char c) +{ + this->update_state(); + + auto it = trie_.get_children().find(c); + + // if transition exists + if (it != trie_.get_children().end()) { + buf_.push_back(c); + trie_.transition(c); + return; + } + + // otherwise, no transition exists + + // if not backtracking + if (!this->is_backtracking()) { + // if trie at root + if (trie_.is_reset()) { + text_.push_back(c); + return; + } + text_.append(buf_); + buf_.clear(); + trie_.reset(); + return this->process(c); + } + + // otherwise, currently backtracking + this->backtrack(c); +} + +void Lexer::backtrack(char c) +{ + // tokenize text + this->tokenize_text(); + + // tokenize symbol + for (uint32_t i = 0; i < buf_.size(); ++i) { + trie_.back_transition(); + } + assert(trie_.is_accept()); + auto opt_symbol = trie_.get_symbol(); + assert(static_cast(opt_symbol)); + status_.tokens.emplace(*opt_symbol); + + // move and clear buf_ to temp string for reprocessing + std::string reprocess_str(std::move(buf_)); + reprocess_str.push_back(c); + + // reset + this->reset(); + + // reprocess the rest + for (char c : reprocess_str) { + this->process(c); + } +} + +void Lexer::flush() +{ + this->update_state(); + + if (this->is_backtracking()) { + return this->backtrack(0); + } + + // non-backtracking: no parent is an accepting node + // append buf_ to text_ and tokenize text_ + // reset all other fields + text_.append(buf_); + this->tokenize_text(); + this->reset(); +} + +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/src/core/lex/lexer.hpp b/src/core/lex/lexer.hpp new file mode 100644 index 0000000..053f75c --- /dev/null +++ b/src/core/lex/lexer.hpp @@ -0,0 +1,100 @@ +#pragma once +#include +#include +#include +#include + +namespace docgen { +namespace core { +namespace lex { + +struct Lexer +{ + using symbol_t = Symbol; + using token_t = Token; + using status_t = Status; + + Lexer(); + + void process(char c); + void flush(); + std::optional next_token(); + +private: + + void tokenize_text(); + bool is_backtracking() const; + void set_backtracking(); + void reset_backtracking(); + void backtrack(char c); + void update_state(); + void reset(); + + enum class State : bool { + backtrack, + non_backtrack + }; + + Trie trie_; + std::string text_; + std::string buf_; + State state_ = State::non_backtrack; + status_t status_; +}; + +inline void Lexer::tokenize_text() +{ + if (!text_.empty()) { + status_.tokens.emplace(symbol_t::TEXT, std::move(text_)); + } +} + +inline bool Lexer::is_backtracking() const +{ + return state_ == State::backtrack; +} + +inline void Lexer::set_backtracking() +{ + state_ = State::backtrack; +} + +inline void Lexer::reset_backtracking() +{ + state_ = State::non_backtrack; +} + +inline void Lexer::update_state() +{ + // if current state is accepting + if (trie_.is_accept()) { + if (!this->is_backtracking()) { + this->set_backtracking(); + } + // ignore contents in buffer up until now + // this optimization can be done because we look for longest match + buf_.clear(); + } +} + +inline std::optional Lexer::next_token() +{ + if (!status_.tokens.empty()) { + token_t token = std::move(status_.tokens.front()); + status_.tokens.pop(); + return token; + } + return {}; +} + +inline void Lexer::reset() +{ + text_.clear(); + buf_.clear(); + trie_.reset(); + reset_backtracking(); +} + +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/src/core/lex/status.hpp b/src/core/lex/status.hpp new file mode 100644 index 0000000..2af91f7 --- /dev/null +++ b/src/core/lex/status.hpp @@ -0,0 +1,19 @@ +#pragma once +#include + +namespace docgen { +namespace core { +namespace lex { + +template +struct Status +{ + using token_t = TokenType; + using token_arr_t = std::queue; + + token_arr_t tokens; +}; + +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/src/core/lex/trie.hpp b/src/core/lex/trie.hpp new file mode 100644 index 0000000..2df5008 --- /dev/null +++ b/src/core/lex/trie.hpp @@ -0,0 +1,211 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace docgen { +namespace core { +namespace lex { + +template +struct Trie +{ +private: + struct TrieNode; // forward declaration + +public: + using pair_t = std::pair; + + // Constructs trie node from a list of pairs of string and symbol. + // The string must be of type std::string_view and it must not be empty. + // The symbol must be of type SymbolType. + Trie(const std::initializer_list&); + + // Delete compiler-generated copy/move ctor/assignment + // This ensures that Trie objects are only (default) constructible. + Trie(const Trie&) =delete; + Trie(Trie&&) =delete; + Trie& operator=(const Trie&) =delete; + Trie& operator=(Trie&&) =delete; + + void transition(char c); + void back_transition(); + bool is_accept() const; + typename TrieNode::children_t& get_children(); + bool is_reset() const; + void reset(); + const std::optional& get_symbol() const; + +private: + + struct TrieNode + { + using children_t = std::unordered_map>; + + // Insert str from current node to update the trie structure. + // The string str is read starting from idx. + void insert(const std::pair&, size_t = 0); + + // Returns if current node is an accepting state. + bool is_accept() const; + + // Returns the optional symbol associated with current node. + // Symbol will be active if is_accept is true. + const std::optional& get_symbol() const; + + children_t& get_children(); + + std::optional> get_parent(); + + private: + + enum class State : bool { + accept, + non_accept + }; + + State state_ = State::non_accept; // indicates accepting node or not + std::optional symbol_; // symbol for accepting node + children_t children_; // current node's children + TrieNode* parent_ptr_; // current node's parent + }; + + TrieNode root_; // root of Trie + std::reference_wrapper curr_node_ = root_; // current node +}; + +//////////////////////////////////////////////////////////////// +// TrieNode Implementation +//////////////////////////////////////////////////////////////// + +template +inline void +Trie::TrieNode::insert(const pair_t& pair, size_t idx) +{ + const auto& str = std::get<0>(pair); + + // if string starting from idx is empty, then accepting state + if (str[idx] == '\0') { + state_ = State::accept; + symbol_ = std::get<1>(pair); + } + + else { + // if no child with str[idx] mapping + if (children_.find(str[idx]) == children_.end()) { + children_.emplace(str[idx], std::make_unique()); + } + auto& child = children_.at(str[idx]); + child->parent_ptr_ = this; + child->insert(pair, idx + 1); + } +} + +template +inline bool +Trie::TrieNode::is_accept() const +{ + return state_ == State::accept; +} + +template +inline const std::optional& +Trie::TrieNode::get_symbol() const +{ + return symbol_; +} + +template +inline typename Trie::TrieNode::children_t& +Trie::TrieNode::get_children() +{ + return children_; +} + +template +inline std::optional::TrieNode>> +Trie::TrieNode::get_parent() +{ + if (parent_ptr_) { + return *parent_ptr_; + } + return {}; +} + +//////////////////////////////////////////////////////////////// +// Trie Implementation +//////////////////////////////////////////////////////////////// + +template +inline +Trie::Trie(const std::initializer_list& pairs) + : root_() +{ + for (auto it = pairs.begin(); it != pairs.end(); ++it) { + if (it->first.empty()) { + throw exceptions::control_flow_error("strings must be non-empty"); + } + root_.insert(*it); + } +} + +template +inline void +Trie::transition(char c) +{ + curr_node_ = *(curr_node_.get().get_children().at(c)); +} + +template +inline bool +Trie::is_accept() const +{ + return curr_node_.get().is_accept(); +} + +template +inline typename Trie::TrieNode::children_t& +Trie::get_children() +{ + return curr_node_.get().get_children(); +} + +template +inline bool +Trie::is_reset() const +{ + return &(curr_node_.get()) == &root_; +} + +template +inline void +Trie::reset() +{ + curr_node_ = root_; +} + +template +inline void +Trie::back_transition() +{ + auto&& opt_parent = curr_node_.get().get_parent(); + if (!opt_parent) { + throw exceptions::control_flow_error("Attempt to back transition past the root"); + } + curr_node_ = *opt_parent; +} + +template +inline const std::optional& +Trie::get_symbol() const +{ + return curr_node_.get().get_symbol(); +} + +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/src/core/symbol.hpp b/src/core/symbol.hpp index 6dc5762..0bb7772 100644 --- a/src/core/symbol.hpp +++ b/src/core/symbol.hpp @@ -1,7 +1,5 @@ #pragma once #include -#include -#include namespace docgen { namespace core { @@ -10,16 +8,23 @@ enum class Symbol { // single-char tokens END_OF_FILE, NEWLINE, + WHITESPACE, SEMICOLON, + HASHTAG, STAR, OPEN_BRACE, CLOSE_BRACE, // string tokens - BEGIN_LINE_COMMENT, - BEGIN_BLOCK_COMMENT, + BEGIN_SLINE_COMMENT, + BEGIN_SBLOCK_COMMENT, + BEGIN_NLINE_COMMENT, + BEGIN_NBLOCK_COMMENT, END_BLOCK_COMMENT, // special tags - TAGNAME, + SDESC, + TPARAM, + PARAM, + RETURN, // default TEXT }; @@ -31,8 +36,10 @@ static MAPBOX_ETERNAL_CONSTEXPR const auto symbol_map = {Symbol::STAR, "*"}, {Symbol::OPEN_BRACE, "{"}, {Symbol::CLOSE_BRACE, "}"}, - {Symbol::BEGIN_LINE_COMMENT, "///"}, - {Symbol::BEGIN_BLOCK_COMMENT, "/*!"}, + {Symbol::BEGIN_SLINE_COMMENT, "///"}, + {Symbol::BEGIN_SBLOCK_COMMENT, "/*!"}, + {Symbol::BEGIN_NLINE_COMMENT, "//"}, + {Symbol::BEGIN_NBLOCK_COMMENT, "/*"}, {Symbol::END_BLOCK_COMMENT, "*/"}, }); diff --git a/src/core/token.hpp b/src/core/token.hpp index ac83ec0..4ef3798 100644 --- a/src/core/token.hpp +++ b/src/core/token.hpp @@ -1,6 +1,6 @@ #pragma once #include -#include "symbol.hpp" +#include namespace docgen { namespace core { @@ -10,10 +10,9 @@ struct Token { using symbol_t = SymbolType; - Token(symbol_t name, std::string&& content, uint32_t leading_ws_count=0) + Token(symbol_t name, std::string&& content) : name(name) , content(std::move(content)) - , leading_ws_count(leading_ws_count) {} Token(symbol_t name) @@ -25,7 +24,6 @@ struct Token symbol_t name; std::string content; - uint32_t leading_ws_count; }; template <> diff --git a/src/exceptions/exceptions.hpp b/src/exceptions/exceptions.hpp index b4fc786..8dbe010 100644 --- a/src/exceptions/exceptions.hpp +++ b/src/exceptions/exceptions.hpp @@ -2,6 +2,7 @@ #include #include +#include #include #include diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c8abf0f..3f4594f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -3,6 +3,7 @@ # TEST_TARGET is added as a test and an executable named ${TEST_NAME} will be created. function(create_test TEST_NAME TEST_TARGET) target_compile_options(${TEST_TARGET} PRIVATE -g -Wall -Werror -Wextra) + target_compile_features(${TEST_TARGET} PRIVATE cxx_std_17) target_include_directories(${TEST_TARGET} PRIVATE ${GTEST_DIR}/include @@ -27,26 +28,30 @@ create_test("exceptions_unittests" exceptions_unittests) # File IO Unit Tests ###################################################### -add_executable(io_unittests - ${CMAKE_CURRENT_SOURCE_DIR}/io/file_reader_unittest.cpp - ) - -create_test("io_unittests" io_unittests) - -# copy data directory into where io_unittests executable ends up -add_custom_command( - TARGET io_unittests POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CMAKE_CURRENT_SOURCE_DIR}/io/io_data/ - $/io_data) +#add_executable(io_unittests +# ${CMAKE_CURRENT_SOURCE_DIR}/io/file_reader_unittest.cpp +# ) +# +#create_test("io_unittests" io_unittests) +# +## copy data directory into where io_unittests executable ends up +#add_custom_command( +# TARGET io_unittests POST_BUILD +# COMMAND ${CMAKE_COMMAND} -E copy_directory +# ${CMAKE_CURRENT_SOURCE_DIR}/io/io_data/ +# $/io_data) ###################################################### -# File IO Unit Tests +# Core Unit Tests ###################################################### add_executable(core_unittests - ${CMAKE_CURRENT_SOURCE_DIR}/core/lexer_routines_unittest.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/core/lexer_unittest.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/trie_unittest.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/lexer_unittest.cpp + # Source dependency + $ ) create_test("core_unittests" core_unittests) + + diff --git a/test/core/lex/lexer_unittest.cpp b/test/core/lex/lexer_unittest.cpp new file mode 100644 index 0000000..e7483bb --- /dev/null +++ b/test/core/lex/lexer_unittest.cpp @@ -0,0 +1,803 @@ +#include +#include + +namespace docgen { +namespace core { +namespace lex { + +struct lexer_fixture : ::testing::Test +{ +protected: + using status_t = typename Lexer::status_t; + using token_t = typename Lexer::token_t; + using symbol_t = typename Lexer::symbol_t; + + Lexer lexer; + std::optional token; + + void setup_lexer(const char* content) + { + std::string str(content); + for (char c : str) { + lexer.process(c); + } + lexer.flush(); + } +}; + +//////////////////////////////////////////////////////////////////// +// Individual Symbol TESTS +//////////////////////////////////////////////////////////////////// + +// NEWLINE +TEST_F(lexer_fixture, lexer_newline) +{ + static constexpr const char* content = + "somecrazy1492text\nmvn2b" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "somecrazy1492text"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "mvn2b"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE ( ) +TEST_F(lexer_fixture, lexer_whitespace_space) +{ + static constexpr const char* content = + ",m.,m. abn" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, ",m.,m."); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abn"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE (\t) +TEST_F(lexer_fixture, lexer_whitespace_t) +{ + static constexpr const char* content = + "h0f2n.1\t1234|" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "h0f2n.1"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "1234|"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE (\v) +TEST_F(lexer_fixture, lexer_whitespace_v) +{ + static constexpr const char* content = + "hello!\v" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "hello!"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE (\r) +TEST_F(lexer_fixture, lexer_whitespace_r) +{ + static constexpr const char* content = + "hello!\rwsdescorrld!!" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "hello!"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "wsdescorrld!!"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE (\f) +TEST_F(lexer_fixture, lexer_whitespace_f) +{ + static constexpr const char* content = + "hello!\fwsdescorrld!!" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "hello!"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "wsdescorrld!!"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// WHITESPACE (;) +TEST_F(lexer_fixture, lexer_semicolon) +{ + static constexpr const char* content = + ";wsdescorrld!!" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::SEMICOLON); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "wsdescorrld!!"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// BEGIN_SLINE_COMMENT +TEST_F(lexer_fixture, lexer_begin_sline_comment) +{ + static constexpr const char* content = + "abc///" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// BEGIN_SBLOCK_COMMENT +TEST_F(lexer_fixture, lexer_begin_sblock_comment) +{ + static constexpr const char* content = + "abc/*!" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SBLOCK_COMMENT); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// BEGIN_NBLOCK_COMMENT +TEST_F(lexer_fixture, lexer_begin_nblock_comment) +{ + static constexpr const char* content = + "abc/**!" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NBLOCK_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::STAR); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "!"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// END_BLOCK_COMMENT +TEST_F(lexer_fixture, lexer_end_block_comment_no_star) +{ + static constexpr const char* content = + "abc*/f" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::END_BLOCK_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "f"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +TEST_F(lexer_fixture, lexer_end_block_comment_star) +{ + static constexpr const char* content = + "abc**/f" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::STAR); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::END_BLOCK_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "f"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// SDESC +TEST_F(lexer_fixture, lexer_sdesc) +{ + static constexpr const char* content = + "ssdesc@@sdescf@sdesscf" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "ssdesc@"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::SDESC); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "f@sdesscf"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// TPARAM +TEST_F(lexer_fixture, lexer_tparam) +{ + static constexpr const char* content = + "ssdes@@@@@@tpaar@tpara@m@tparam@tpar" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "ssdes@@@@@@tpaar@tpara@m"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TPARAM); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "@tpar"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// RETURN +TEST_F(lexer_fixture, lexer_return) +{ + static constexpr const char* content = + "@re@@@@@@return@@@@@" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "@re@@@@@"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::RETURN); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "@@@@@"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +//////////////////////////////////////////////////////////////////// +// Mix TESTS +//////////////////////////////////////////////////////////////////// + +// line comment mix +TEST_F(lexer_fixture, lexer_line_comment_4) +{ + static constexpr const char* content = + "abc////" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "/"); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// line comment mix +TEST_F(lexer_fixture, lexer_line_comment_5) +{ + static constexpr const char* content = + "abc/////" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +// line comment mix +TEST_F(lexer_fixture, lexer_line_comment_6) +{ + static constexpr const char* content = + "abc//////" + ; + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "abc"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +TEST_F(lexer_fixture, lexer_test_1_no_special_comment) +{ + static constexpr const char* content = + "#include // some comment\n" + "\n" + "void f();" + ; + + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::HASHTAG); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "include"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "some"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "void"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "f()"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::SEMICOLON); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +TEST_F(lexer_fixture, lexer_test_2_no_special_comment) +{ + static constexpr const char* content = + "#include \n" + "\n" + " // just a normal comment\n" + "\n" + ; + + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::HASHTAG); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "include"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "just"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "a"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "normal"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +TEST_F(lexer_fixture, lexer_test_1_comment_mix) +{ + static constexpr const char* content = + "// comment\n" + " /// special_comment \n" + ; + + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "special_comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +TEST_F(lexer_fixture, lexer_test_1_tagname_comments) +{ + static constexpr const char* content = + "// @tparam normal comment\n" + "/// @sdescspecial comment \n" + "#define hehe\n" + ; + + setup_lexer(content); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TPARAM); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "normal"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::SDESC); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "special"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "comment"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::HASHTAG); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "define"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::WHITESPACE); + EXPECT_EQ(token->content, ""); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::TEXT); + EXPECT_EQ(token->content, "hehe"); + + token = lexer.next_token(); + EXPECT_EQ(token->name, symbol_t::NEWLINE); + EXPECT_EQ(token->content, ""); + + // check that there are no more tokens + token = lexer.next_token(); + EXPECT_FALSE(static_cast(token)); +} + +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/test/core/lex/trie_unittest.cpp b/test/core/lex/trie_unittest.cpp new file mode 100644 index 0000000..7fb1eb9 --- /dev/null +++ b/test/core/lex/trie_unittest.cpp @@ -0,0 +1,319 @@ +#include +#include + +namespace docgen { +namespace core { +namespace lex { + +enum class MockSymbol { + symbol_0, + symbol_1, + symbol_2, + symbol_3, +}; + +struct trie_fixture : ::testing::Test +{ +protected: + using symbol_t = MockSymbol; + using trie_t = Trie; +}; + +//////////////////////////////////////////// +// State TESTS +//////////////////////////////////////////// + +TEST_F(trie_fixture, trie_root) +{ + trie_t trie({ + {"a", symbol_t::symbol_0}, + {"abc", symbol_t::symbol_1}, + {"ac", symbol_t::symbol_2}, + }); + + EXPECT_TRUE(!trie.get_symbol()); + EXPECT_TRUE(!trie.is_accept()); + EXPECT_TRUE(trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(1)); + EXPECT_NE(trie.get_children().find('a'), trie.get_children().end()); // found +} + +TEST_F(trie_fixture, trie_transition_child_a) +{ + trie_t trie({ + {"a", symbol_t::symbol_0}, + {"abc", symbol_t::symbol_1}, + {"ac", symbol_t::symbol_2}, + }); + + trie.transition('a'); + + EXPECT_TRUE(trie.get_symbol()); + EXPECT_EQ(*trie.get_symbol(), symbol_t::symbol_0); + EXPECT_TRUE(trie.is_accept()); + EXPECT_TRUE(!trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(2)); + EXPECT_NE(trie.get_children().find('b'), trie.get_children().end()); // found + EXPECT_NE(trie.get_children().find('c'), trie.get_children().end()); // found +} + +TEST_F(trie_fixture, trie_transition_child_b) +{ + trie_t trie({ + {"a", symbol_t::symbol_0}, + {"abc", symbol_t::symbol_1}, + {"ac", symbol_t::symbol_2}, + }); + + trie.transition('a'); + trie.transition('b'); + + EXPECT_TRUE(!trie.get_symbol()); + EXPECT_TRUE(!trie.is_accept()); + EXPECT_TRUE(!trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(1)); + EXPECT_NE(trie.get_children().find('c'), trie.get_children().end()); // found +} + +TEST_F(trie_fixture, trie_transition_child_bc) +{ + trie_t trie({ + {"a", symbol_t::symbol_0}, + {"abc", symbol_t::symbol_1}, + {"ac", symbol_t::symbol_2}, + }); + + trie.transition('a'); + trie.transition('b'); + trie.transition('c'); + + EXPECT_TRUE(trie.get_symbol()); + EXPECT_EQ(*trie.get_symbol(), symbol_t::symbol_1); + EXPECT_TRUE(trie.is_accept()); + EXPECT_TRUE(!trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(0)); +} + +TEST_F(trie_fixture, trie_transition_child_c) +{ + trie_t trie({ + {"a", symbol_t::symbol_0}, + {"abc", symbol_t::symbol_1}, + {"ac", symbol_t::symbol_2}, + }); + + trie.transition('a'); + trie.transition('c'); + + EXPECT_TRUE(trie.get_symbol()); + EXPECT_EQ(*trie.get_symbol(), symbol_t::symbol_2); + EXPECT_TRUE(trie.is_accept()); + EXPECT_TRUE(!trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(0)); +} + +TEST_F(trie_fixture, trie_reset_root) +{ + trie_t trie({ + {"a", symbol_t::symbol_0}, + {"abc", symbol_t::symbol_1}, + {"ac", symbol_t::symbol_2}, + }); + + trie.reset(); + + EXPECT_TRUE(!trie.get_symbol()); + EXPECT_TRUE(!trie.is_accept()); + EXPECT_TRUE(trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(1)); + EXPECT_NE(trie.get_children().find('a'), trie.get_children().end()); // found +} + +TEST_F(trie_fixture, trie_reset_child_a) +{ + trie_t trie({ + {"a", symbol_t::symbol_0}, + {"abc", symbol_t::symbol_1}, + {"ac", symbol_t::symbol_2}, + }); + + trie.transition('a'); + + trie.reset(); + + EXPECT_TRUE(!trie.get_symbol()); + EXPECT_TRUE(!trie.is_accept()); + EXPECT_TRUE(trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(1)); + EXPECT_NE(trie.get_children().find('a'), trie.get_children().end()); // found +} + +TEST_F(trie_fixture, trie_reset_child_a_b) +{ + trie_t trie({ + {"a", symbol_t::symbol_0}, + {"abc", symbol_t::symbol_1}, + {"ac", symbol_t::symbol_2}, + }); + + trie.transition('a'); + trie.transition('b'); + + trie.reset(); + + EXPECT_TRUE(!trie.get_symbol()); + EXPECT_TRUE(!trie.is_accept()); + EXPECT_TRUE(trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(1)); + EXPECT_NE(trie.get_children().find('a'), trie.get_children().end()); // found +} + +TEST_F(trie_fixture, trie_back_transition_root) +{ + trie_t trie({ + {"a", symbol_t::symbol_0}, + {"abc", symbol_t::symbol_1}, + {"ac", symbol_t::symbol_2}, + }); + + EXPECT_THROW(trie.back_transition(), exceptions::control_flow_error); +} + +TEST_F(trie_fixture, trie_back_transition_child_a) +{ + trie_t trie({ + {"a", symbol_t::symbol_0}, + {"abc", symbol_t::symbol_1}, + {"ac", symbol_t::symbol_2}, + }); + + trie.transition('a'); + trie.back_transition(); + + EXPECT_TRUE(!trie.get_symbol()); + EXPECT_TRUE(!trie.is_accept()); + EXPECT_TRUE(trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(1)); + EXPECT_NE(trie.get_children().find('a'), trie.get_children().end()); // found +} + +TEST_F(trie_fixture, trie_back_transition_child_ab) +{ + trie_t trie({ + {"a", symbol_t::symbol_0}, + {"abc", symbol_t::symbol_1}, + {"ac", symbol_t::symbol_2}, + }); + + trie.transition('a'); + trie.transition('b'); + + // back to child 'a' + trie.back_transition(); + + EXPECT_TRUE(trie.get_symbol()); + EXPECT_TRUE(trie.is_accept()); + EXPECT_TRUE(!trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(2)); + EXPECT_NE(trie.get_children().find('b'), trie.get_children().end()); // found + EXPECT_NE(trie.get_children().find('c'), trie.get_children().end()); // found + + // back to root + trie.back_transition(); + + EXPECT_TRUE(!trie.get_symbol()); + EXPECT_TRUE(!trie.is_accept()); + EXPECT_TRUE(trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(1)); + EXPECT_NE(trie.get_children().find('a'), trie.get_children().end()); // found +} + +//////////////////////////////////////////// +// Structural Checks +//////////////////////////////////////////// + +TEST_F(trie_fixture, trie_off_by_one_prefix) +{ + trie_t trie({ + {"ab", symbol_t::symbol_1}, + {"bab", symbol_t::symbol_1}, + }); + + // check root + EXPECT_TRUE(!trie.get_symbol()); + EXPECT_TRUE(!trie.is_accept()); + EXPECT_TRUE(trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(2)); + EXPECT_NE(trie.get_children().find('a'), trie.get_children().end()); // found + EXPECT_NE(trie.get_children().find('b'), trie.get_children().end()); // found + + // check child 'a' + trie.transition('a'); + + EXPECT_TRUE(!trie.get_symbol()); + EXPECT_TRUE(!trie.is_accept()); + EXPECT_TRUE(!trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(1)); + EXPECT_NE(trie.get_children().find('b'), trie.get_children().end()); // found + + // check child 'a'->'b' + trie.transition('b'); + + EXPECT_TRUE(trie.get_symbol()); + EXPECT_TRUE(trie.is_accept()); + EXPECT_TRUE(!trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(0)); + + // up to child 'a' + trie.back_transition(); + + // up to root + trie.back_transition(); + + // check child 'b' + trie.transition('b'); + + EXPECT_TRUE(!trie.get_symbol()); + EXPECT_TRUE(!trie.is_accept()); + EXPECT_TRUE(!trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(1)); + EXPECT_NE(trie.get_children().find('a'), trie.get_children().end()); // found + + // check child 'b'->'a' + trie.transition('a'); + + EXPECT_TRUE(!trie.get_symbol()); + EXPECT_TRUE(!trie.is_accept()); + EXPECT_TRUE(!trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(1)); + EXPECT_NE(trie.get_children().find('b'), trie.get_children().end()); // found + + // check child 'b'->'a'->'b' + trie.transition('b'); + + EXPECT_TRUE(trie.get_symbol()); + EXPECT_TRUE(trie.is_accept()); + EXPECT_TRUE(!trie.is_reset()); + + EXPECT_EQ(trie.get_children().size(), static_cast(0)); +} + +} // namespace lex +} // namespace core +} // namespace docgen diff --git a/test/core/lexer_base_fixture.hpp b/test/core/lexer_base_fixture.hpp deleted file mode 100644 index 79ea86a..0000000 --- a/test/core/lexer_base_fixture.hpp +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once -#include "core/lexer_routines.hpp" -#include - -namespace docgen { -namespace core { - -struct lexer_base_fixture : ::testing::Test -{ -protected: - using status_t = lexer_details::status_t; - using token_t = lexer_details::token_t; - using symbol_t = lexer_details::symbol_t; - - static constexpr const char* filename = ".lexer_routines_unittest.data.txt"; - static constexpr size_t buf_size = 20; - FILE* file; - - lexer_base_fixture() - : file(fopen(filename, "r")) - {} - - ~lexer_base_fixture() - { - fclose(file); - } - - void write_file(const char* content) - { - FILE* fp = fopen(filename, "w"); - fwrite(content, sizeof(char), strlen(content), fp); - fclose(fp); - } - - void check_token(symbol_t actual_sym, symbol_t expected_sym, - const std::string& actual_str, const std::string& expected_str) - { - EXPECT_EQ(actual_sym, expected_sym); - EXPECT_EQ(actual_str, expected_str); - } -}; - -} // namespace core -} // namespace docgen diff --git a/test/core/lexer_routines_unittest.cpp b/test/core/lexer_routines_unittest.cpp deleted file mode 100644 index f32f0e3..0000000 --- a/test/core/lexer_routines_unittest.cpp +++ /dev/null @@ -1,784 +0,0 @@ -#include "lexer_base_fixture.hpp" - -namespace docgen { -namespace core { -namespace lexer_details { - -struct lexer_routines_fixture : lexer_base_fixture -{ -protected: - - template - static bool is_not(char x) - { - return x != c; - } - - void read(file_reader& reader, std::string& str) - { - int c = 0; - while ((c = reader.read()) != file_reader::termination) { - str.push_back(c); - } - } - - template - void ignore_until_test(const char* content, const char* expected_str, - char expected_last_char, Condition condition) - { - write_file(content); - file_reader reader(file); - std::string actual; - int last_char = ignore_until(reader, condition); - EXPECT_EQ(last_char, expected_last_char); - read(reader, actual); - EXPECT_EQ(actual, expected_str); - } - - template - void read_until_test(const char* content, const char* expected_str, - char expected_last_char, Condition condition) - { - write_file(content); - file_reader reader(file); - std::string actual; - int last_char = read_until(reader, condition, actual); - EXPECT_EQ(last_char, expected_last_char); - EXPECT_EQ(actual, expected_str); - } - - void trim_test(const char* content, const char* expected) - { - std::string actual(content); - trim(actual); - EXPECT_EQ(actual, expected); - } - - void tokenize_text_check(const std::string& actual, const token_t& token, - const char* expected) - { - check_token(token.name, symbol_t::TEXT, - token.content, expected); - EXPECT_EQ(actual.size(), static_cast(0)); - EXPECT_GT(actual.capacity(), DEFAULT_STRING_RESERVE_SIZE); - } - - void process_char_check(bool res, const status_t& status, - const std::string& actual, const char* expected, - symbol_t expected_symbol) - { - EXPECT_TRUE(res); - EXPECT_EQ(status.tokens.size(), static_cast(2)); - tokenize_text_check(actual, status.tokens[0], expected); - check_token(status.tokens[1].name, expected_symbol, - status.tokens[1].content, ""); - } - -}; - -//////////////////////////////////////////////////////////////////////// -// ignore_until TESTS -//////////////////////////////////////////////////////////////////////// - -TEST_F(lexer_routines_fixture, ignore_until_newline) -{ - static constexpr const char* content = - "to ignore here \ndo not ignore" - ; - static constexpr const char* expected = - "do not ignore"; - ignore_until_test(content, expected, '\n', is_not<'\n'>); -} - -TEST_F(lexer_routines_fixture, ignore_until_empty_content) -{ - static constexpr const char* content = - "" - ; - static constexpr const char* expected = - ""; - ignore_until_test(content, expected, file_reader::termination, is_not<'a'>); -} - -TEST_F(lexer_routines_fixture, ignore_until_first_char) -{ - static constexpr const char* content = - "hello" - ; - static constexpr const char* expected = - "ello"; - ignore_until_test(content, expected, 'h', is_not<'h'>); -} - -TEST_F(lexer_routines_fixture, ignore_until_last_char) -{ - static constexpr const char* content = - "hello" - ; - static constexpr const char* expected = - ""; - ignore_until_test(content, expected, 'o', is_not<'o'>); -} - -//////////////////////////////////////////////////////////////////////// -// read_until TESTS -//////////////////////////////////////////////////////////////////////// - -TEST_F(lexer_routines_fixture, read_until_newline) -{ - static constexpr const char* content = - "very special comment \n not read here" - ; - static constexpr const char* expected = - "very special comment "; - read_until_test(content, expected, '\n', is_not<'\n'>); -} - -TEST_F(lexer_routines_fixture, read_until_two_newline) -{ - static constexpr const char* content = - "very special \ncomment \n" - ; - static constexpr const char* expected = - "very special "; - read_until_test(content, expected, '\n', is_not<'\n'>); -} - -TEST_F(lexer_routines_fixture, read_until_empty) -{ - static constexpr const char* content = - "" - ; - static constexpr const char* expected = - ""; - read_until_test(content, expected, file_reader::termination, is_not<'c'>); -} - -TEST_F(lexer_routines_fixture, read_until_first_char) -{ - static constexpr const char* content = - "very special \ncomment \n" - ; - static constexpr const char* expected = - ""; - read_until_test(content, expected, 'v', is_not<'v'>); -} - -TEST_F(lexer_routines_fixture, read_until_last_char) -{ - static constexpr const char* content = - "very special comment #" - ; - static constexpr const char* expected = - "very special comment "; - read_until_test(content, expected, '#', is_not<'#'>); -} - -//////////////////////////////////////////////////////////////////////// -// trim TESTS -//////////////////////////////////////////////////////////////////////// - -TEST_F(lexer_routines_fixture, trim_empty) -{ - static constexpr const char* content = - "" - ; - static constexpr const char* expected = - ""; - trim_test(content, expected); -} - -TEST_F(lexer_routines_fixture, trim_only_leading) -{ - static constexpr const char* content = - " \n\t hello\tworld!" - ; - static constexpr const char* expected = - "hello\tworld!"; - trim_test(content, expected); -} - -TEST_F(lexer_routines_fixture, trim_only_trailing) -{ - static constexpr const char* content = - "hello\tworld!\v\r\t\f \n\t " - ; - static constexpr const char* expected = - "hello\tworld!"; - trim_test(content, expected); -} - -TEST_F(lexer_routines_fixture, trim_leading_trailing) -{ - static constexpr const char* content = - "\n \r\t \f hello\tworld!\v\r\t\f \n\t " - ; - static constexpr const char* expected = - "hello\tworld!"; - trim_test(content, expected); -} - -//////////////////////////////////////////////////////////////////////// -// tokenize_text TESTS -//////////////////////////////////////////////////////////////////////// - -TEST_F(lexer_routines_fixture, tokenize_text_empty) -{ - static constexpr const char* content = - "" - ; - static constexpr const char* expected = - ""; - - std::string actual(content); - status_t status; - tokenize_text(actual, status); - EXPECT_EQ(status.tokens.size(), static_cast(0)); - EXPECT_EQ(actual, expected); -} - -TEST_F(lexer_routines_fixture, tokenize_text) -{ - static constexpr const char* content = - "\n \r\t \f hello\tworld!\v\r\t\f \n\t " - ; - static constexpr const char* expected = - "hello\tworld!"; - - std::string actual(content); - status_t status; - tokenize_text(actual, status); - tokenize_text_check(actual, status.tokens[0], expected); -} - -// this tests whether text is left in a valid state for the next processing -TEST_F(lexer_routines_fixture, tokenize_text_twice) -{ - static constexpr const char* content_1 = - "\n \r\t \f hello\tworld!\v\r\t\f \n\t " - ; - static constexpr const char* expected_1 = - "hello\tworld!"; - - static constexpr const char* content_2 = - "\n this is docgen!\v\f \n\t " - ; - static constexpr const char* expected_2 = - "this is docgen!"; - - std::string actual(content_1); - status_t status; - tokenize_text(actual, status); // actual cleared, status.tokens updated - - // check first token - tokenize_text_check(actual, status.tokens[0], expected_1); - - // push back content of content_2 - for (size_t i = 0; i < strlen(content_2); ++i) { - actual.push_back(content_2[i]); - } - - tokenize_text(actual, status); - - // only 2 tokens - EXPECT_EQ(status.tokens.size(), static_cast(2)); - - // check second token - tokenize_text_check(actual, status.tokens[1], expected_2); - // check content of first token to test if moving worked correctly - check_token(status.tokens[0].name, symbol_t::TEXT, - status.tokens[0].content, expected_1); -} - -//////////////////////////////////////////////////////////////////////// -// process_char TESTS -//////////////////////////////////////////////////////////////////////// - -TEST_F(lexer_routines_fixture, process_char_newline) -{ - static constexpr const char* content = - "\t some text " - ; - static constexpr const char* expected = - "some text"; - - std::string actual(content); - status_t status; - bool res = process_char('\n', actual, status); - process_char_check(res, status, actual, expected, symbol_t::NEWLINE); -} - -TEST_F(lexer_routines_fixture, process_char_semicolon) -{ - static constexpr const char* content = - "\v\t some text \r\v\f \v" - ; - static constexpr const char* expected = - "some text"; - - std::string actual(content); - status_t status; - bool res = process_char(';', actual, status); - process_char_check(res, status, actual, expected, symbol_t::SEMICOLON); -} - -TEST_F(lexer_routines_fixture, process_char_open_brace) -{ - static constexpr const char* content = - " \v some text \v" - ; - static constexpr const char* expected = - "some text"; - - std::string actual(content); - status_t status; - bool res = process_char('{', actual, status); - process_char_check(res, status, actual, expected, symbol_t::OPEN_BRACE); -} - -TEST_F(lexer_routines_fixture, process_char_close_brace) -{ - static constexpr const char* content = - " \v some text \v" - ; - static constexpr const char* expected = - "some text"; - - std::string actual(content); - status_t status; - bool res = process_char('}', actual, status); - process_char_check(res, status, actual, expected, symbol_t::CLOSE_BRACE); -} - -TEST_F(lexer_routines_fixture, process_char_default) -{ - static constexpr const char* content = - " \v some text \v" - ; - static constexpr const char* expected = content; - - std::string actual(content); - status_t status; - bool res = process_char('a', actual, status); - - EXPECT_FALSE(res); - EXPECT_EQ(actual, expected); - EXPECT_EQ(status.tokens.size(), static_cast(0)); -} - -//////////////////////////////////////////////////////////////////////// -// tokenize_tag_name TESTS -//////////////////////////////////////////////////////////////////////// - -TEST_F(lexer_routines_fixture, tokenize_tag_name_sdesc) -{ - static constexpr const char* content = - "sdesc\t " - ; - static constexpr const char* text_content = - "some text"; - - std::string text(text_content); - write_file(content); - file_reader reader(file); - status_t status; // context is none - tokenize_tag_name(text, reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(2)); - check_token(status.tokens[0].name, symbol_t::TEXT, - status.tokens[0].content, text_content); - check_token(status.tokens[1].name, symbol_t::TAGNAME, - status.tokens[1].content, "sdesc"); - EXPECT_EQ(reader.peek(), '\t'); -} - -TEST_F(lexer_routines_fixture, tokenize_tag_name_param) -{ - static constexpr const char* content = - "param\n \t " - ; - static constexpr const char* text_content = - "some text"; - - std::string text(text_content); - write_file(content); - file_reader reader(file); - status_t status; // context is none - tokenize_tag_name(text, reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(2)); - check_token(status.tokens[0].name, symbol_t::TEXT, - status.tokens[0].content, text_content); - check_token(status.tokens[1].name, symbol_t::TAGNAME, - status.tokens[1].content, "param"); - EXPECT_EQ(reader.peek(), '\n'); -} - -TEST_F(lexer_routines_fixture, tokenize_tag_name_tparam) -{ - static constexpr const char* content = - "tparam\n \t " - ; - static constexpr const char* text_content = - "some text"; - - std::string text(text_content); - write_file(content); - file_reader reader(file); - status_t status; // context is none - tokenize_tag_name(text, reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(2)); - check_token(status.tokens[0].name, symbol_t::TEXT, - status.tokens[0].content, text_content); - check_token(status.tokens[1].name, symbol_t::TAGNAME, - status.tokens[1].content, "tparam"); - EXPECT_EQ(reader.peek(), '\n'); -} - -TEST_F(lexer_routines_fixture, tokenize_tag_name_invalid) -{ - static constexpr const char* content = - "tparram\n \t " - ; - static constexpr const char* text_content = - "some text"; - - std::string text(text_content); - write_file(content); - file_reader reader(file); - status_t status; // context is none - tokenize_tag_name(text, reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(0)); - EXPECT_EQ(text, std::string(text_content) + "@tparram"); - EXPECT_EQ(reader.peek(), '\n'); -} - -TEST_F(lexer_routines_fixture, tokenize_tag_name_eof) -{ - static constexpr const char* content = - "tparam" - ; - static constexpr const char* text_content = - "some text"; - - std::string text(text_content); - write_file(content); - file_reader reader(file); - status_t status; // context is none - tokenize_tag_name(text, reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(2)); - check_token(status.tokens[0].name, symbol_t::TEXT, - status.tokens[0].content, text_content); - check_token(status.tokens[1].name, symbol_t::TAGNAME, - status.tokens[1].content, "tparam"); - EXPECT_EQ(reader.peek(), static_cast(file_reader::termination)); -} - -//////////////////////////////////////////////////////////////////////// -// process_tag_name TESTS -//////////////////////////////////////////////////////////////////////// - -TEST_F(lexer_routines_fixture, process_tag_name_valid) -{ - static constexpr const char* content = - "param x\tsome int\n" - ; - static constexpr const char* text_content = - " some existing text... \n"; - static constexpr const char* expected_text = - "some existing text..."; - - write_file(content); - file_reader reader(file); - status_t status; - std::string text(text_content); - bool res = process_tag_name('@', text, reader, status); - - EXPECT_TRUE(res); - EXPECT_EQ(status.tokens.size(), static_cast(2)); - tokenize_text_check(text, status.tokens[0], expected_text); - check_token(status.tokens[1].name, symbol_t::TAGNAME, - status.tokens[1].content, "param"); -} - -TEST_F(lexer_routines_fixture, process_tag_name_invalid) -{ - static constexpr const char* content = - "xparam x\tsome int\n" - ; - static constexpr const char* text_content = - " some existing text... \n"; - - write_file(content); - file_reader reader(file); - status_t status; - std::string text(text_content); - bool res = process_tag_name('m', text, reader, status); - - EXPECT_FALSE(res); - EXPECT_EQ(status.tokens.size(), static_cast(0)); -} - -//////////////////////////////////////////////////////////////////////// -// process_line_comment TESTS -//////////////////////////////////////////////////////////////////////// - -TEST_F(lexer_routines_fixture, process_line_comment_valid) -{ - static constexpr const char* content = - "/ some special content...\n" - ; - static constexpr const char* text_content = - "\n some text... \t"; - static constexpr const char* expected_text = - "some text..."; - - write_file(content); - file_reader reader(file); - status_t status; - std::string text(text_content); - process_line_comment(text, reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(2)); - tokenize_text_check(text, status.tokens[0], expected_text); - check_token(status.tokens[1].name, symbol_t::BEGIN_LINE_COMMENT, - status.tokens[1].content, ""); -} - -TEST_F(lexer_routines_fixture, process_line_comment_invalid_nospace) -{ - static constexpr const char* content = - "/some special content...\n" - ; - static constexpr const char* text_content = - "\n some text... \t"; - - write_file(content); - file_reader reader(file); - status_t status; - std::string text(text_content); - process_line_comment(text, reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(0)); - EXPECT_EQ(text, text_content); // text unchanged -} - -TEST_F(lexer_routines_fixture, process_line_comment_invalid_noslash) -{ - static constexpr const char* content = - " some special content...\n" - ; - static constexpr const char* text_content = - "\n some text... \t"; - - write_file(content); - file_reader reader(file); - status_t status; - std::string text(text_content); - process_line_comment(text, reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(0)); - EXPECT_EQ(text, text_content); // text unchanged -} - -//////////////////////////////////////////////////////////////////////// -// process_block_comment TESTS -//////////////////////////////////////////////////////////////////////// - -TEST_F(lexer_routines_fixture, process_block_comment_valid) -{ - static constexpr const char* content = - "! some special content...\n" - ; - static constexpr const char* text_content = - "\n some text... \t"; - static constexpr const char* expected_text = - "some text..."; - - write_file(content); - file_reader reader(file); - status_t status; - std::string text(text_content); - process_block_comment(text, reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(2)); - tokenize_text_check(text, status.tokens[0], expected_text); - check_token(status.tokens[1].name, symbol_t::BEGIN_BLOCK_COMMENT, - status.tokens[1].content, ""); -} - -TEST_F(lexer_routines_fixture, process_block_comment_invalid_nospace) -{ - static constexpr const char* content = - "!some special content...\n" - ; - static constexpr const char* text_content = - "\n some text... \t"; - - write_file(content); - file_reader reader(file); - status_t status; - std::string text(text_content); - process_block_comment(text, reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(0)); - EXPECT_EQ(text, text_content); // text unchanged -} - -TEST_F(lexer_routines_fixture, process_block_comment_invalid_noexclam) -{ - static constexpr const char* content = - " some special content...\n" - ; - static constexpr const char* text_content = - "\n some text... \t"; - - write_file(content); - file_reader reader(file); - status_t status; - std::string text(text_content); - process_block_comment(text, reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(0)); - EXPECT_EQ(text, text_content); // text unchanged -} - -//////////////////////////////////////////////////////////////////////// -// process_tags TESTS -//////////////////////////////////////////////////////////////////////// - -TEST_F(lexer_routines_fixture, process_string_invalid_comment) -{ - static constexpr const char* content = - "some content...\n " - ; - static constexpr const char* text_content = - " some text... "; - - write_file(content); - file_reader reader(file); - status_t status; // context is none - std::string text(text_content); - bool res = process_string('/', text, reader, status); - - EXPECT_TRUE(res); - EXPECT_EQ(status.tokens.size(), static_cast(0)); - EXPECT_EQ(text, std::string(text_content) + "/s"); -} - -TEST_F(lexer_routines_fixture, process_string_invalid_slash) -{ - static constexpr const char* content = - "some content...\n " - ; - static constexpr const char* text_content = - " some text... "; - - write_file(content); - file_reader reader(file); - status_t status; // context is none - std::string text(text_content); - bool res = process_string('x', text, reader, status); - - EXPECT_FALSE(res); - EXPECT_EQ(status.tokens.size(), static_cast(0)); - EXPECT_EQ(text, text_content); -} - -//////////////////////////////////////////////////////////////////////// -// process TESTS -//////////////////////////////////////////////////////////////////////// - -TEST_F(lexer_routines_fixture, process) -{ - static constexpr const char* content = - "#include // for json\n" - "// this is some comment to ignore\n" - "/* this is another comment to ignore \n*/" - "\n" - " /// description... @sdesc some short description\n" - " /*! @param x some int\n" - " * that we care about\n" - " */" - "inline f(int x);" - "struct A {const char* p = \"@param\"};" - ; - - write_file(content); - file_reader reader(file); - status_t status; // context is none - process(reader, status); - - EXPECT_EQ(status.tokens.size(), static_cast(27)); - check_token(status.tokens[0].name, symbol_t::TEXT, - status.tokens[0].content, "#include "); - check_token(status.tokens[1].name, symbol_t::NEWLINE, - status.tokens[1].content, ""); - check_token(status.tokens[2].name, symbol_t::BEGIN_LINE_COMMENT, - status.tokens[2].content, ""); - check_token(status.tokens[3].name, symbol_t::TEXT, - status.tokens[3].content, "description..."); - EXPECT_EQ(status.tokens[3].leading_ws_count, static_cast(1)); - - check_token(status.tokens[4].name, symbol_t::TAGNAME, - status.tokens[4].content, "sdesc"); - check_token(status.tokens[5].name, symbol_t::TEXT, - status.tokens[5].content, "some short description"); - EXPECT_EQ(status.tokens[5].leading_ws_count, static_cast(2)); - - check_token(status.tokens[6].name, symbol_t::NEWLINE, - status.tokens[6].content, ""); - check_token(status.tokens[7].name, symbol_t::BEGIN_BLOCK_COMMENT, - status.tokens[7].content, ""); - check_token(status.tokens[8].name, symbol_t::TAGNAME, - status.tokens[8].content, "param"); - check_token(status.tokens[9].name, symbol_t::TEXT, - status.tokens[9].content, "x some int"); - EXPECT_EQ(status.tokens[9].leading_ws_count, static_cast(1)); - - check_token(status.tokens[10].name, symbol_t::NEWLINE, - status.tokens[10].content, ""); - check_token(status.tokens[11].name, symbol_t::STAR, - status.tokens[11].content, ""); - check_token(status.tokens[12].name, symbol_t::TEXT, - status.tokens[12].content, "that we care about"); - EXPECT_EQ(status.tokens[12].leading_ws_count, static_cast(1)); - - check_token(status.tokens[13].name, symbol_t::NEWLINE, - status.tokens[13].content, ""); - check_token(status.tokens[14].name, symbol_t::END_BLOCK_COMMENT, - status.tokens[14].content, ""); - check_token(status.tokens[15].name, symbol_t::TEXT, - status.tokens[15].content, "inline f(int x)"); - check_token(status.tokens[16].name, symbol_t::SEMICOLON, - status.tokens[16].content, ""); - check_token(status.tokens[17].name, symbol_t::TEXT, - status.tokens[17].content, "struct A"); - check_token(status.tokens[18].name, symbol_t::OPEN_BRACE, - status.tokens[18].content, ""); - check_token(status.tokens[19].name, symbol_t::TEXT, - status.tokens[19].content, "const char"); - check_token(status.tokens[20].name, symbol_t::STAR, - status.tokens[20].content, ""); - check_token(status.tokens[21].name, symbol_t::TEXT, - status.tokens[21].content, "p = \""); - EXPECT_EQ(status.tokens[21].leading_ws_count, static_cast(1)); - - check_token(status.tokens[22].name, symbol_t::TAGNAME, - status.tokens[22].content, "param"); - check_token(status.tokens[23].name, symbol_t::TEXT, - status.tokens[23].content, "\""); - check_token(status.tokens[24].name, symbol_t::CLOSE_BRACE, - status.tokens[24].content, ""); - check_token(status.tokens[25].name, symbol_t::SEMICOLON, - status.tokens[25].content, ""); - check_token(status.tokens[26].name, symbol_t::END_OF_FILE, - status.tokens[26].content, ""); -} - -} // namespace lexer_details -} // namespace core -} // namespace docgen diff --git a/test/core/lexer_unittest.cpp b/test/core/lexer_unittest.cpp deleted file mode 100644 index aebfbaa..0000000 --- a/test/core/lexer_unittest.cpp +++ /dev/null @@ -1,279 +0,0 @@ -#include "core/lexer.hpp" -#include "lexer_base_fixture.hpp" - -namespace docgen { -namespace core { - -struct lexer_fixture : lexer_base_fixture -{ -protected: -}; - -TEST_F(lexer_fixture, process_no_comment) -{ - static constexpr const char* content = - "#include \n" - "\n" - " // just a normal comment\n" - "\n" - ; - - write_file(content); - Lexer lexer(file); - lexer.process(); - const auto& tokens = lexer.get_tokens(); - - EXPECT_EQ(tokens.size(), static_cast(5)); - - check_token(tokens[0].name, symbol_t::TEXT, - tokens[0].content, "#include "); - check_token(tokens[1].name, symbol_t::NEWLINE, - tokens[1].content, ""); - check_token(tokens[2].name, symbol_t::NEWLINE, - tokens[2].content, ""); - check_token(tokens[3].name, symbol_t::NEWLINE, - tokens[3].content, ""); - check_token(tokens[4].name, symbol_t::END_OF_FILE, - tokens[4].content, ""); -} - -TEST_F(lexer_fixture, process_one_line_comment) -{ - static constexpr const char* content = - "#include \n" - "\n" - " // just a normal comment\n" - " /// a very special comment \n" - "\n" - ; - - write_file(content); - Lexer lexer(file); - lexer.process(); - const auto& tokens = lexer.get_tokens(); - - EXPECT_EQ(tokens.size(), static_cast(8)); - - check_token(tokens[0].name, symbol_t::TEXT, - tokens[0].content, "#include "); - check_token(tokens[1].name, symbol_t::NEWLINE, - tokens[1].content, ""); - check_token(tokens[2].name, symbol_t::NEWLINE, - tokens[2].content, ""); - check_token(tokens[3].name, symbol_t::BEGIN_LINE_COMMENT, - tokens[3].content, ""); - check_token(tokens[4].name, symbol_t::TEXT, - tokens[4].content, "a very special comment"); - EXPECT_EQ(tokens[4].leading_ws_count, static_cast(1)); - check_token(tokens[5].name, symbol_t::NEWLINE, - tokens[5].content, ""); - check_token(tokens[6].name, symbol_t::NEWLINE, - tokens[6].content, ""); - check_token(tokens[7].name, symbol_t::END_OF_FILE, - tokens[7].content, ""); -} - -TEST_F(lexer_fixture, process_two_line_comment) -{ - static constexpr const char* content = - "#include \n" - "\n" - " // just a normal comment\n" - " /// a very special comment \n" - "\n" - " // just a normal comment\n" - " /// another very special comment \n" - " // just a normal comment\n" - ; - - write_file(content); - Lexer lexer(file); - lexer.process(); - const auto& tokens = lexer.get_tokens(); - - EXPECT_EQ(tokens.size(), static_cast(11)); - - check_token(tokens[0].name, symbol_t::TEXT, - tokens[0].content, "#include "); - check_token(tokens[1].name, symbol_t::NEWLINE, - tokens[1].content, ""); - check_token(tokens[2].name, symbol_t::NEWLINE, - tokens[2].content, ""); - check_token(tokens[3].name, symbol_t::BEGIN_LINE_COMMENT, - tokens[3].content, ""); - check_token(tokens[4].name, symbol_t::TEXT, - tokens[4].content, "a very special comment"); - EXPECT_EQ(tokens[4].leading_ws_count, static_cast(1)); - check_token(tokens[5].name, symbol_t::NEWLINE, - tokens[5].content, ""); - check_token(tokens[6].name, symbol_t::NEWLINE, - tokens[6].content, ""); - check_token(tokens[7].name, symbol_t::BEGIN_LINE_COMMENT, - tokens[7].content, ""); - check_token(tokens[8].name, symbol_t::TEXT, - tokens[8].content, "another very special comment"); - EXPECT_EQ(tokens[8].leading_ws_count, static_cast(1)); - check_token(tokens[9].name, symbol_t::NEWLINE, - tokens[9].content, ""); - check_token(tokens[10].name, symbol_t::END_OF_FILE, - tokens[10].content, ""); -} - -TEST_F(lexer_fixture, process_one_block_comment) -{ - static constexpr const char* content = - "#include \n" - "\n" - " // just a normal comment\n" - " /*! a very special comment */\n" - "\n" - ; - - write_file(content); - Lexer lexer(file); - lexer.process(); - const auto& tokens = lexer.get_tokens(); - - EXPECT_EQ(tokens.size(), static_cast(9)); - - check_token(tokens[0].name, symbol_t::TEXT, - tokens[0].content, "#include "); - check_token(tokens[1].name, symbol_t::NEWLINE, - tokens[1].content, ""); - check_token(tokens[2].name, symbol_t::NEWLINE, - tokens[2].content, ""); - check_token(tokens[3].name, symbol_t::BEGIN_BLOCK_COMMENT, - tokens[3].content, ""); - check_token(tokens[4].name, symbol_t::TEXT, - tokens[4].content, "a very special comment"); - EXPECT_EQ(tokens[4].leading_ws_count, static_cast(1)); - check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT, - tokens[5].content, ""); - check_token(tokens[6].name, symbol_t::NEWLINE, - tokens[6].content, ""); - check_token(tokens[7].name, symbol_t::NEWLINE, - tokens[7].content, ""); - check_token(tokens[8].name, symbol_t::END_OF_FILE, - tokens[8].content, ""); -} - -TEST_F(lexer_fixture, process_two_block_comment) -{ - static constexpr const char* content = - "#include \n" - "\n" - " // just a normal comment\n" - " /*! a very special comment */\n" - "\n" - " // just a normal comment\n" - " /*! another very \n" - " * special comment \n" - "*/" - " /* just a normal comment\n */" - ; - - write_file(content); - Lexer lexer(file); - lexer.process(); - const auto& tokens = lexer.get_tokens(); - - EXPECT_EQ(tokens.size(), static_cast(16)); - - check_token(tokens[0].name, symbol_t::TEXT, - tokens[0].content, "#include "); - check_token(tokens[1].name, symbol_t::NEWLINE, - tokens[1].content, ""); - check_token(tokens[2].name, symbol_t::NEWLINE, - tokens[2].content, ""); - check_token(tokens[3].name, symbol_t::BEGIN_BLOCK_COMMENT, - tokens[3].content, ""); - check_token(tokens[4].name, symbol_t::TEXT, - tokens[4].content, "a very special comment"); - EXPECT_EQ(tokens[4].leading_ws_count, static_cast(1)); - check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT, - tokens[5].content, ""); - check_token(tokens[6].name, symbol_t::NEWLINE, - tokens[6].content, ""); - check_token(tokens[7].name, symbol_t::NEWLINE, - tokens[7].content, ""); - check_token(tokens[8].name, symbol_t::BEGIN_BLOCK_COMMENT, - tokens[8].content, ""); - check_token(tokens[9].name, symbol_t::TEXT, - tokens[9].content, "another very"); - EXPECT_EQ(tokens[9].leading_ws_count, static_cast(1)); - check_token(tokens[10].name, symbol_t::NEWLINE, - tokens[10].content, ""); - check_token(tokens[11].name, symbol_t::STAR, - tokens[11].content, ""); - check_token(tokens[12].name, symbol_t::TEXT, - tokens[12].content, "special comment"); - EXPECT_EQ(tokens[12].leading_ws_count, static_cast(1)); - check_token(tokens[13].name, symbol_t::NEWLINE, - tokens[13].content, ""); - check_token(tokens[14].name, symbol_t::END_BLOCK_COMMENT, - tokens[14].content, ""); - check_token(tokens[15].name, symbol_t::END_OF_FILE, - tokens[15].content, ""); -} - -TEST_F(lexer_fixture, process_line_block_comment) -{ - static constexpr const char* content = - "#include \n" - "\n" - " // just a normal comment\n" - " /// a very special comment */\n" - "\n" - " // just a normal comment\n" - " /*! another very \n" - " * special comment \n" - "*/" - " /* just a normal comment\n */" - ; - - write_file(content); - Lexer lexer(file); - lexer.process(); - const auto& tokens = lexer.get_tokens(); - - EXPECT_EQ(tokens.size(), static_cast(16)); - - check_token(tokens[0].name, symbol_t::TEXT, - tokens[0].content, "#include "); - check_token(tokens[1].name, symbol_t::NEWLINE, - tokens[1].content, ""); - check_token(tokens[2].name, symbol_t::NEWLINE, - tokens[2].content, ""); - check_token(tokens[3].name, symbol_t::BEGIN_LINE_COMMENT, - tokens[3].content, ""); - check_token(tokens[4].name, symbol_t::TEXT, - tokens[4].content, "a very special comment"); - EXPECT_EQ(tokens[4].leading_ws_count, static_cast(1)); - check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT, - tokens[5].content, ""); - check_token(tokens[6].name, symbol_t::NEWLINE, - tokens[6].content, ""); - check_token(tokens[7].name, symbol_t::NEWLINE, - tokens[7].content, ""); - check_token(tokens[8].name, symbol_t::BEGIN_BLOCK_COMMENT, - tokens[8].content, ""); - check_token(tokens[9].name, symbol_t::TEXT, - tokens[9].content, "another very"); - EXPECT_EQ(tokens[9].leading_ws_count, static_cast(1)); - check_token(tokens[10].name, symbol_t::NEWLINE, - tokens[10].content, ""); - check_token(tokens[11].name, symbol_t::STAR, - tokens[11].content, ""); - check_token(tokens[12].name, symbol_t::TEXT, - tokens[12].content, "special comment"); - EXPECT_EQ(tokens[12].leading_ws_count, static_cast(1)); - check_token(tokens[13].name, symbol_t::NEWLINE, - tokens[13].content, ""); - check_token(tokens[14].name, symbol_t::END_BLOCK_COMMENT, - tokens[14].content, ""); - check_token(tokens[15].name, symbol_t::END_OF_FILE, - tokens[15].content, ""); -} - -} // namespace core -} // namespace docgen