From b664de480493604bb32f5a058b5ff0a27a855e98 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Fri, 17 Jan 2020 19:31:51 -0500
Subject: [PATCH 01/23] Add trie implementation and lexer with trie

---
 src/core/lexer_trie.hpp | 163 ++++++++++++++++++++++++++++++++
 src/core/status.hpp     |   5 +-
 src/core/symbol.hpp     |  15 ++-
 src/core/token.hpp      |   4 +-
 src/core/trie.hpp       | 201 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 378 insertions(+), 10 deletions(-)
 create mode 100644 src/core/lexer_trie.hpp
 create mode 100644 src/core/trie.hpp
diff --git a/src/core/lexer_trie.hpp b/src/core/lexer_trie.hpp
new file mode 100644
index 0000000..4bd9eb5
--- /dev/null
+++ b/src/core/lexer_trie.hpp
@@ -0,0 +1,163 @@
+#pragma once 
+#include <core/trie.hpp>
+#include <core/symbol.hpp>
+#include <core/status.hpp>
+#include <core/token.hpp>
+
+namespace docgen {
+namespace core {
+
+struct Lexer
+{
+    using symbol_t = Symbol;
+    using token_t = Token<symbol_t>;
+    using status_t = Status<token_t>;
+
+    Lexer();
+
+    void process(char c);
+    std::optional<Lexer::token_t> next_token();
+
+private:
+
+    bool is_backtracking() const;
+    void set_backtracking();
+    void reset_backtracking();
+    void backtrack(char c);
+
+    enum class State : bool {
+        backtrack,
+        non_backtrack
+    };
+    
+    Trie<symbol_t> trie_;
+    std::string text_;
+    std::string buf_;
+    State state_ = State::non_backtrack;
+    status_t status_;
+};
+
+///////////////////////////////////
+// Lexer Implementation
+///////////////////////////////////
+
+Lexer::Lexer()
+    : trie_({
+            {"\n", Symbol::NEWLINE},
+            {";", Symbol::SEMICOLON},
+            {" ", Symbol::WHITESPACE},
+            {"\t", Symbol::WHITESPACE},
+            {"\v", Symbol::WHITESPACE},
+            {"\r", Symbol::WHITESPACE},
+            {"\f", Symbol::WHITESPACE},
+            {"*", Symbol::STAR},
+            {"{", Symbol::OPEN_BRACE},
+            {"}", Symbol::CLOSE_BRACE},
+            {"///", Symbol::BEGIN_SLINE_COMMENT},
+            {"/*!", Symbol::BEGIN_SBLOCK_COMMENT},
+            {"//", Symbol::BEGIN_NLINE_COMMENT},
+            {"/*", Symbol::BEGIN_NBLOCK_COMMENT},
+            {"*/", Symbol::END_BLOCK_COMMENT},
+            {"@sdesc", Symbol::SDESC}
+            })
+{
+    // TODO: reserve space for status_.tokens?
+}
+
+inline void Lexer::process(char c)
+{
+    // if current state is accepting
+    if (trie_.is_accept()) {
+        if (!this->is_backtracking()) {
+            this->set_backtracking();
+        }
+        // ignore contents in buffer up until now
+        // this optimization can be done because we look for longest match
+        buf_.clear();
+    }
+
+    auto it = trie_.get_children().find(c);
+
+    // if transition exists
+    if (it != trie_.get_children().end()) {
+        buf_.push_back(c);
+        trie_.transition(c);
+        return;
+    }
+
+    // otherwise, no transition exists
+    
+    // if not backtracking
+    if (!this->is_backtracking()) {
+        text_.append(buf_);
+        text_.push_back(c);
+        buf_.clear();
+        trie_.reset();
+        return;
+    }
+    
+    // otherwise, currently backtracking
+    this->backtrack(c); 
+}
+
+inline bool Lexer::is_backtracking() const
+{
+    return state_ == State::backtrack;
+}
+
+inline void Lexer::set_backtracking()
+{
+    state_ = State::backtrack;
+}
+
+inline void Lexer::reset_backtracking()
+{
+    state_ = State::non_backtrack;
+}
+
+inline void Lexer::backtrack(char c)
+{
+    // reset to non-backtracking
+    this->reset_backtracking();
+
+    // tokenize and clear text
+    if (!text_.empty()) {
+        status_.tokens.emplace(symbol_t::TEXT, std::move(text_));
+        text_.clear();
+    }
+    
+    // tokenize symbol
+    for (uint32_t i = 0; i < buf_.size(); ++i) {
+        trie_.back_transition();
+    }
+    assert(trie_.is_accept());
+    auto opt_symbol = trie_.get_symbol();
+    assert(static_cast<bool>(opt_symbol));
+    status_.tokens.emplace(*opt_symbol);
+
+    // move and clear buf_ to temp
+    std::string reprocess_str(std::move(buf_));
+    buf_.clear();
+    reprocess_str.push_back(c);
+
+    // reset trie
+    trie_.reset();
+    
+    // reprocess the rest
+    for (char c : reprocess_str) {
+        this->process(c);
+    }
+}
+
+inline std::optional<Lexer::token_t> Lexer::next_token() 
+{
+    if (!status_.tokens.empty()) {
+        token_t token = std::move(status_.tokens.front());
+        status_.tokens.pop();
+        return token;
+    }
+    return {};
+}
+
+} // namespace core
+} // namespace docgen
diff --git a/src/core/status.hpp b/src/core/status.hpp
index d29dda4..9294ea7 100644
--- a/src/core/status.hpp
+++ b/src/core/status.hpp
@@ -1,5 +1,6 @@
 #pragma once
-#include <vector>
+#include <queue>
+#include <string>
 
 namespace docgen {
 namespace core {
@@ -8,7 +9,7 @@ template <class TokenType>
 struct Status
 {
     using token_t = TokenType;
-    using token_arr_t = std::vector<token_t>;
+    using token_arr_t = std::queue<token_t>;
 
     token_arr_t tokens;
 };
diff --git a/src/core/symbol.hpp b/src/core/symbol.hpp
index 6dc5762..b19e1f7 100644
--- a/src/core/symbol.hpp
+++ b/src/core/symbol.hpp
@@ -10,16 +10,19 @@ enum class Symbol {
     // single-char tokens
     END_OF_FILE,
     NEWLINE,
+    WHITESPACE,
     SEMICOLON,
     STAR,
     OPEN_BRACE,
     CLOSE_BRACE,
     // string tokens
-    BEGIN_LINE_COMMENT,
-    BEGIN_BLOCK_COMMENT,
+    BEGIN_SLINE_COMMENT,
+    BEGIN_SBLOCK_COMMENT,
+    BEGIN_NLINE_COMMENT,
+    BEGIN_NBLOCK_COMMENT,
     END_BLOCK_COMMENT,
     // special tags
-    TAGNAME,
+    SDESC,
     // default
     TEXT
 };
@@ -31,8 +34,10 @@ static MAPBOX_ETERNAL_CONSTEXPR const auto symbol_map =
             {Symbol::STAR, "*"},
             {Symbol::OPEN_BRACE, "{"},
             {Symbol::CLOSE_BRACE, "}"},
-            {Symbol::BEGIN_LINE_COMMENT, "///"},
-            {Symbol::BEGIN_BLOCK_COMMENT, "/*!"},
+            {Symbol::BEGIN_SLINE_COMMENT, "///"},
+            {Symbol::BEGIN_SBLOCK_COMMENT, "/*!"},
+            {Symbol::BEGIN_NLINE_COMMENT, "//"},
+            {Symbol::BEGIN_NBLOCK_COMMENT, "/*"},
             {Symbol::END_BLOCK_COMMENT, "*/"},
     });
 
diff --git a/src/core/token.hpp b/src/core/token.hpp
index ac83ec0..cc02f37 100644
--- a/src/core/token.hpp
+++ b/src/core/token.hpp
@@ -10,10 +10,9 @@ struct Token
 {
     using symbol_t = SymbolType;
 
-    Token(symbol_t name, std::string&& content, uint32_t leading_ws_count=0)
+    Token(symbol_t name, std::string&& content)
         : name(name)
         , content(std::move(content))
-        , leading_ws_count(leading_ws_count)
     {}
 
     Token(symbol_t name)
@@ -25,7 +24,6 @@ struct Token
 
     symbol_t name;
     std::string content;
-    uint32_t leading_ws_count;
 };
 
 template <>
diff --git a/src/core/trie.hpp b/src/core/trie.hpp
new file mode 100644
index 0000000..71dbb5c
--- /dev/null
+++ b/src/core/trie.hpp
@@ -0,0 +1,201 @@
+#pragma once
+#include <unordered_map>
+#include <string_view>
+#include <type_traits>
+#include <cassert>
+#include <optional>
+#include <exceptions/exceptions.hpp>
+
+namespace docgen {
+namespace core {
+
+template <class SymbolType>
+struct Trie
+{
+private:
+    using pair_t = std::pair<std::string_view, SymbolType>;
+
+public:
+
+    // Constructs trie node from a list of pairs of string and symbol.
+    // The string must be of type std::string_view and it must not be empty.
+    // The symbol must be of type SymbolType.
+    Trie(const std::initializer_list<pair_t>&);
+
+    // Delete compiler-generated copy/move ctor/assignment
+    // This ensures that Trie objects are only (default) constructible.
+    Trie(const Trie&) =delete;
+    Trie(Trie&&) =delete;
+    Trie& operator=(const Trie&) =delete;
+    Trie& operator=(Trie&&) =delete;
+
+    void transition(char c);
+    void back_transition();
+    bool is_accept() const;
+    auto get_children();
+    bool is_reset() const;
+    void reset();
+    const std::optional<SymbolType>& get_symbol() const;
+
+private:
+
+    struct TrieNode
+    {
+        // Insert str from current node to update the trie structure.
+        // The string str is read starting from idx.
+        void insert(const std::pair<std::string_view, SymbolType>&, size_t = 0);
+
+        // Returns if current node is an accepting state.
+        bool is_accept() const;
+
+        // Returns the optional symbol associated with current node.
+        // Symbol will be active if is_accept is true.
+        const std::optional<SymbolType>& get_symbol() const;
+
+        std::unordered_map<char, TrieNode>& get_children();
+
+        std::optional<std::reference_wrapper<TrieNode>> get_parent();
+
+    private:
+
+        enum class State : bool {
+            accept,
+            non_accept
+        };
+
+        State state_ = State::non_accept;               // indicates accepting node or not
+        std::optional<SymbolType> symbol_;              // symbol for accepting node
+        std::unordered_map<char, TrieNode> children_;   // current node's children
+        TrieNode* parent_ptr_;                          // current node's parent
+    };
+
+    TrieNode root_;                                       // root of Trie
+    std::reference_wrapper<TrieNode> curr_node_ = root_;  // current node
+};
+
+////////////////////////////////////////////////////////////////
+// TrieNode Implementation
+////////////////////////////////////////////////////////////////
+
+template <class SymbolType>
+inline void 
+Trie<SymbolType>::TrieNode::insert(const pair_t& pair, size_t idx)
+{
+    const auto& str = std::get<0>(pair);
+
+    // if string starting from idx is empty, then accepting state
+    if (str[idx] == '\0') {
+        state_ = State::accept;
+        symbol_ = std::get<1>(pair);
+    }
+
+    else {
+        auto& child = children_[str[idx]];
+        child.parent_ptr_ = this;
+        child.insert(pair, idx + 1);
+    }
+}
+
+template <class SymbolType>
+inline bool 
+Trie<SymbolType>::TrieNode::is_accept() const
+{
+    return state_ == State::accept;
+}
+
+template <class SymbolType>
+inline const std::optional<SymbolType>&
+Trie<SymbolType>::TrieNode::get_symbol() const
+{
+    return symbol_;
+}
+
+template <class SymbolType>
+inline std::unordered_map<char, typename Trie<SymbolType>::TrieNode>&
+Trie<SymbolType>::TrieNode::get_children()
+{
+    return children_;
+}
+
+template <class SymbolType>
+inline std::optional<std::reference_wrapper<typename Trie<SymbolType>::TrieNode>>
+Trie<SymbolType>::TrieNode::get_parent()
+{
+    if (parent_ptr_) {
+        return *parent_ptr_;
+    }
+    return {};
+}
+
+////////////////////////////////////////////////////////////////
+// Trie Implementation
+////////////////////////////////////////////////////////////////
+
+template <class SymbolType>
+inline 
+Trie<SymbolType>::Trie(const std::initializer_list<pair_t>& pairs)
+    : root_()
+{
+    for (auto it = pairs.begin(); it != pairs.end(); ++it) {
+        if (it->first.empty()) {
+            throw exceptions::control_flow_error("strings must be non-empty");
+        }
+        root_.insert(*it);
+    }
+}
+
+template <class SymbolType>
+inline void
+Trie<SymbolType>::transition(char c)
+{
+    curr_node_ = curr_node_.get().get_children().at(c);
+}
+
+template <class SymbolType>
+inline bool
+Trie<SymbolType>::is_accept() const
+{
+    return curr_node_.get().is_accept();
+}
+
+template <class SymbolType>
+inline auto
+Trie<SymbolType>::get_children() 
+{
+    return curr_node_.get().get_children();
+}
+
+template <class SymbolType>
+inline bool
+Trie<SymbolType>::is_reset() const
+{
+    return &(curr_node_.get()) == &root_;
+}
+
+template <class SymbolType>
+inline void
+Trie<SymbolType>::reset() 
+{
+    curr_node_ = root_;
+}
+
+template <class SymbolType>
+inline void
+Trie<SymbolType>::back_transition() 
+{
+    auto&& opt_parent = curr_node_.get().get_parent();
+    if (!opt_parent) {
+        throw exceptions::control_flow_error("Attempt to back transition past the root");
+    }
+    curr_node_ = *opt_parent;
+}
+
+template <class SymbolType>
+inline const std::optional<SymbolType>& 
+Trie<SymbolType>::get_symbol() const
+{
+    return curr_node_.get().get_symbol();
+}
+
+} // namespace core
+} // namespace docgen

From be0b02bcb81177cf8c4d5014f752317295ba44c8 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Fri, 17 Jan 2020 21:03:48 -0500
Subject: [PATCH 02/23] Remove unnecessary lexer_routines and rename lexer_trie
 to lexer

---
 src/core/lexer.hpp          | 170 +++++++++++++++++---
 src/core/lexer_routines.hpp | 300 ------------------------------------
 src/core/lexer_trie.hpp     | 163 --------------------
 src/core/symbol.hpp         |   4 +
 4 files changed, 154 insertions(+), 483 deletions(-)
 delete mode 100644 src/core/lexer_routines.hpp
 delete mode 100644 src/core/lexer_trie.hpp

diff --git a/src/core/lexer.hpp b/src/core/lexer.hpp
index 56f647e..40706ab 100644
--- a/src/core/lexer.hpp
+++ b/src/core/lexer.hpp
@@ -1,37 +1,167 @@
-#pragma once
-#include "lexer_routines.hpp"
+#pragma once 
+#include <core/trie.hpp>
+#include <core/symbol.hpp>
+#include <core/status.hpp>
+#include <core/token.hpp>
 
 namespace docgen {
 namespace core {
 
 struct Lexer
 {
-    using symbol_t = lexer_details::symbol_t;
-    using file_reader = lexer_details::file_reader;
-    using status_t = lexer_details::status_t;
+    using symbol_t = Symbol;
+    using token_t = Token<symbol_t>;
+    using status_t = Status<token_t>;
 
-    Lexer(FILE* file)
-        : reader_(file)
-    {
-        status_.tokens.reserve(DEFAULT_TOKEN_ARR_SIZE);
+    Lexer();
+
+    void process(char c);
+    std::optional<Lexer::token_t> next_token();
+
+private:
+
+    bool is_backtracking() const;
+    void set_backtracking();
+    void reset_backtracking();
+    void backtrack(char c);
+
+    enum class State : bool {
+        backtrack,
+        non_backtrack
+    };
+    
+    Trie<symbol_t> trie_;
+    std::string text_;
+    std::string buf_;
+    State state_ = State::non_backtrack;
+    status_t status_;
+};
+
+///////////////////////////////////
+// Lexer Implementation
+///////////////////////////////////
+
+Lexer::Lexer()
+    : trie_({
+            {"\n", Symbol::NEWLINE},
+            {" ", Symbol::WHITESPACE},
+            {"\t", Symbol::WHITESPACE},
+            {"\v", Symbol::WHITESPACE},
+            {"\r", Symbol::WHITESPACE},
+            {"\f", Symbol::WHITESPACE},
+            {";", Symbol::SEMICOLON},
+            {"#", Symbol::HASHTAG},
+            {"*", Symbol::STAR},
+            {"{", Symbol::OPEN_BRACE},
+            {"}", Symbol::CLOSE_BRACE},
+            {"///", Symbol::BEGIN_SLINE_COMMENT},
+            {"/*!", Symbol::BEGIN_SBLOCK_COMMENT},
+            {"//", Symbol::BEGIN_NLINE_COMMENT},
+            {"/*", Symbol::BEGIN_NBLOCK_COMMENT},
+            {"*/", Symbol::END_BLOCK_COMMENT},
+            {"@sdesc", Symbol::SDESC},
+            {"@tparam", Symbol::TPARAM},
+            {"@param", Symbol::PARAM},
+            {"@return", Symbol::RETURN}
+            })
+{
+    // TODO: reserve space for status_.tokens?
+}
+
+inline void Lexer::process(char c)
+{
+    // if current state is accepting
+    if (trie_.is_accept()) {
+        if (!this->is_backtracking()) {
+            this->set_backtracking();
+        }
+        // ignore contents in buffer up until now
+        // this optimization can be done because we look for longest match
+        buf_.clear();
     }
 
-    void process()
-    {
-        lexer_details::process(reader_, status_);
+    auto it = trie_.get_children().find(c);
+
+    // if transition exists
+    if (it != trie_.get_children().end()) {
+        buf_.push_back(c);
+        trie_.transition(c);
+        return;
     }
+
+    // otherwise, no transition exists
     
-    const status_t::token_arr_t& get_tokens() const
-    {
-        return status_.tokens;
+    // if not backtracking
+    if (!this->is_backtracking()) {
+        text_.append(buf_);
+        text_.push_back(c);
+        buf_.clear();
+        trie_.reset();
+        return;
     }
+    
+    // otherwise, currently backtracking
+    this->backtrack(c); 
+}
 
-private:
-    static constexpr size_t DEFAULT_TOKEN_ARR_SIZE = 50;
+inline bool Lexer::is_backtracking() const
+{
+    return state_ == State::backtrack;
+}
 
-    file_reader reader_;
-    status_t status_; // keeps track of last token value (enum)
-};
+inline void Lexer::set_backtracking()
+{
+    state_ = State::backtrack;
+}
+
+inline void Lexer::reset_backtracking()
+{
+    state_ = State::non_backtrack;
+}
+
+inline void Lexer::backtrack(char c)
+{
+    // reset to non-backtracking
+    this->reset_backtracking();
+
+    // tokenize and clear text
+    if (!text_.empty()) {
+        status_.tokens.emplace(symbol_t::TEXT, std::move(text_));
+        text_.clear();
+    }
+    
+    // tokenize symbol
+    for (uint32_t i = 0; i < buf_.size(); ++i) {
+        trie_.back_transition();
+    }
+    assert(trie_.is_accept());
+    auto opt_symbol = trie_.get_symbol();
+    assert(static_cast<bool>(opt_symbol));
+    status_.tokens.emplace(*opt_symbol);
+
+    // move and clear buf_ to temp string for reprocessing
+    std::string reprocess_str(std::move(buf_));
+    buf_.clear();
+    reprocess_str.push_back(c);
+
+    // reset trie
+    trie_.reset();
+    
+    // reprocess the rest
+    for (char c : reprocess_str) {
+        this->process(c);
+    }
+}
+
+inline std::optional<Lexer::token_t> Lexer::next_token() 
+{
+    if (!status_.tokens.empty()) {
+        token_t token = std::move(status_.tokens.front());
+        status_.tokens.pop();
+        return token;
+    }
+    return {};
+}
 
 } // namespace core
 } // namespace docgen
diff --git a/src/core/lexer_routines.hpp b/src/core/lexer_routines.hpp
deleted file mode 100644
index 06e658f..0000000
--- a/src/core/lexer_routines.hpp
+++ /dev/null
@@ -1,300 +0,0 @@
-#pragma once
-#include "token.hpp"
-#include "status.hpp"
-#include "symbol.hpp"
-#include "io/file_reader.hpp"
-#include "tag_set.hpp"
-#include <iostream>
-
-namespace docgen {
-namespace core {
-namespace lexer_details {
-
-static constexpr size_t DEFAULT_STRING_RESERVE_SIZE = 50;
-
-using file_reader = io::file_reader;
-using symbol_t = Symbol;
-using token_t = Token<symbol_t>;
-using status_t = Status<token_t>;
-
-// Reads and ignores chars until func(c) evaluates to false or reading terminates,
-// where c is the current char read.
-// Returns the last char read that terminated the function.
-template <class Termination>
-inline int ignore_until(file_reader& reader, Termination func)
-{
-    int c = 0;
-    while (((c = reader.read()) != file_reader::termination) && func(c));
-    return c;
-}
-
-// Reads and stores chars until func(c) evaluates to false or reading terminates,
-// where c is the current char read.
-// Returns the last char read that terminated the function.
-template <class Termination>
-inline int read_until(file_reader& reader, Termination func, std::string& line)
-{
-    int c = 0;
-    line.reserve(DEFAULT_STRING_RESERVE_SIZE);
-    while (((c = reader.read()) != file_reader::termination) && func(c)) {
-        line.push_back(c);
-    }
-    return c;
-}
-
-// Trims all leading and trailing whitespaces (one of " \t\n\v\f\r") from line.
-// Line is directly modified.
-// Returns leading whitespace count of original line.
-inline uint32_t trim(std::string& line)
-{
-    static constexpr const char* whitespaces = " \t\n\v\f\r";
-
-    // find first non-whitespace
-    const auto begin = line.find_first_not_of(whitespaces);
-
-    // find last non-whitespace
-    const auto end = line.find_last_not_of(whitespaces);
-
-    // If substring invalid, simply clear line return length of string
-    // By symmetry, begin and end will be npos if and only if the string only
-    // consists of whitespaces. In this case, the leading whitespace count is 
-    // simply the length of the string.
-    if (begin == std::string::npos && end == std::string::npos) {
-        uint32_t leading_ws_count = line.size();
-        line.clear();
-        return leading_ws_count;
-    }
-
-    // otherwise, replace with substring
-    line = line.substr(begin, end - begin + 1);
-
-    return begin; // number of leading whitespaces
-}
-
-// Trims text, tokenizes it, clears it, and reserve DEFAULT_STRING_RESERVE_SIZE.
-// (Trimmed) text is only tokenized if it is non-empty.
-inline void tokenize_text(std::string& text, status_t& status)
-{
-    // trim whitespaces from text first
-    uint32_t leading_whitespace_count = trim(text);
-    // tokenize current TEXT only if it is non-empty
-    if (!text.empty()) {
-        status.tokens.emplace_back(symbol_t::TEXT, std::move(text), leading_whitespace_count);
-    }
-    // clear and reserve 
-    text.clear();
-    text.reserve(DEFAULT_STRING_RESERVE_SIZE); 
-}
-
-// If c is one of single-char special tokens (see symbol.hpp),
-// then text is first tokenized then the single-char special token.
-// The tokens are appended to status.tokens in this order.
-// Otherwise, no operations are performed.
-// Returns true if and only if a single-char special token created.
-inline bool process_char(int c, std::string& text, status_t& status)
-{
-    switch (c) {
-        case '\n':
-            tokenize_text(text, status);
-            status.tokens.emplace_back(symbol_t::NEWLINE);
-            return true;
-        case ';':
-            tokenize_text(text, status);
-            status.tokens.emplace_back(symbol_t::SEMICOLON);
-            return true;
-        case '{':
-            tokenize_text(text, status);
-            status.tokens.emplace_back(symbol_t::OPEN_BRACE);
-            return true;
-        case '}':
-            tokenize_text(text, status);
-            status.tokens.emplace_back(symbol_t::CLOSE_BRACE);
-            return true;
-        default:
-            return false;
-    }
-}
-
-// If tag name is not a valid one, assume it is simply text.
-// It is expected that the caller immediately read "@" before calling.
-inline void tokenize_tag_name(std::string& text, file_reader& reader, status_t& status)
-{
-    static constexpr const auto is_alpha = 
-        [](char x) {return isalpha(x);};
-
-    // parse tag
-    std::string tagname;
-    int c = read_until(reader, is_alpha, tagname);
-    reader.back(c);
-
-    // if valid tag, append text token then token with tag name
-    if (tag_set.find(tagname) != tag_set.end()) {
-        tokenize_text(text, status);
-        status.tokens.emplace_back(symbol_t::TAGNAME, std::move(tagname));
-    }
-
-    // otherwise, assume part of text: append "@" then tag name to text 
-    else {
-        text.push_back('@');
-        text.append(tagname);
-    }
-}
-
-// If c is '@', try to tokenize tag name.
-// Behavior is the same as tokenize_tag_name.
-// Returns true if and only if c is '@'.
-inline bool process_tag_name(int c, std::string& text, 
-                             file_reader& reader, status_t& status)
-{
-    if (c == '@') {
-        tokenize_tag_name(text, reader, status);
-        return true;
-    }
-    return false;
-}
-
-// It is expected that caller has read the string "//" immediately before calling.
-inline void process_line_comment(std::string& text, file_reader& reader, status_t& status)
-{
-    static constexpr const auto is_not_newline =
-        [](char x) {return x != '\n';};
-
-    int c = reader.read();
-
-    if (c == '/') {
-        c = reader.read();
-        // valid triple-slash comment
-        if (isspace(c)) {
-            tokenize_text(text, status);
-            status.tokens.emplace_back(symbol_t::BEGIN_LINE_COMMENT);
-            reader.back(c); // in case it's a single-char token
-        }
-        // invalid triple-slash comment
-        else {
-            // no need to read back since c cannot be a whitespace and we ignore anyway
-            ignore_until(reader, is_not_newline);
-        }
-    }
-
-    // invalid triple-slash comment
-    else {
-        reader.back(c); // the character just read may be '\n'
-        ignore_until(reader, is_not_newline);
-    }
-}
-
-// It is expected that caller has read the string "/*" immediately before calling.
-inline void process_block_comment(std::string& text, file_reader& reader, status_t& status)
-{
-    const auto is_not_end_block =
-        [&](char x) {return (x != '*') || (reader.peek() != '/');};
-
-    int c = reader.read();
-
-    if (c == '!') {
-        c = reader.read();
-        // valid block comment: tokenize text then begin block comment symbol
-        if (isspace(c)) {
-            tokenize_text(text, status);
-            status.tokens.emplace_back(symbol_t::BEGIN_BLOCK_COMMENT);
-            reader.back(c); // may be special single-char token
-        }
-        // regular block comment: ignore text until end and stop tokenizing
-        else {
-            ignore_until(reader, is_not_end_block);
-            reader.read(); // read the '/'
-        }
-    }
-
-    // regular block comment
-    else {
-        ignore_until(reader, is_not_end_block); // stops after reading '*' in "*/"
-        reader.read(); // read the '/' after 
-    }
-}
-
-// If c is not '/' or '*', then no operation done and returns false.
-// If c is '/', and if it's a possible line comment ("//") then same as process_line_comment;
-// if it's a possible block comment ("/*") then same as process_block_comment;
-// otherwise, text is updated to include all characters read.
-//
-// If c is '*', and if it is the ending of a block comment ("*/"), text tokenized then END_BLOCK_COMMENT;
-// otherwise, text tokenized then STAR.
-//
-// In any case, returns true if first char has been processed.
-inline bool process_string(int c, std::string& text,
-                           file_reader& reader, status_t& status)
-{
-    // possibly beginning of line or block comment
-    if (c == '/') {
-        c = reader.read();
-        if (c == '/') {
-            process_line_comment(text, reader, status);
-        }
-        else if (c == '*') {
-            process_block_comment(text, reader, status);
-        }
-        else {
-            text.push_back('/');
-            text.push_back(c);
-        }
-        return true;
-    }
-
-    // possibly ending block comment or a star that can be ignored in the middle of a block comment
-    else if (c == '*') {
-        c = reader.read();
-        if (c == '/') {
-            tokenize_text(text, status);
-            status.tokens.emplace_back(symbol_t::END_BLOCK_COMMENT);
-        }
-        else {
-            tokenize_text(text, status);
-            status.tokens.emplace_back(symbol_t::STAR);
-            reader.back(c);
-        }
-        return true;
-    }
-
-    return false;
-}
-
-inline void process(file_reader& reader, status_t& status)
-{
-    std::string text;
-    text.reserve(DEFAULT_STRING_RESERVE_SIZE);
-    int c = 0;
-    bool processed = false;
-
-    while ((c = reader.read()) != file_reader::termination) {
-
-        // process special single-char
-        processed = process_char(c, text, status);
-        if (processed) {
-            continue;
-        }
-
-        // process tag name
-        processed = process_tag_name(c, text, reader, status);
-        if (processed) {
-            continue;
-        }
-
-        // process string tokens
-        processed = process_string(c, text, reader, status);
-        if (processed) {
-            continue;
-        }
-
-        // otherwise, no special symbol -> push to text
-        text.push_back(c);
-    }
-
-    // tokenize last text then EOF
-    tokenize_text(text, status);
-    status.tokens.emplace_back(token_t::symbol_t::END_OF_FILE);
-}
-
-} // namespace lexer_details
-} // namespace core
-} // namespace docgen
diff --git a/src/core/lexer_trie.hpp b/src/core/lexer_trie.hpp
deleted file mode 100644
index 4bd9eb5..0000000
--- a/src/core/lexer_trie.hpp
+++ /dev/null
@@ -1,163 +0,0 @@
-#pragma once 
-#include <core/trie.hpp>
-#include <core/symbol.hpp>
-#include <core/status.hpp>
-#include <core/token.hpp>
-
-namespace docgen {
-namespace core {
-
-struct Lexer
-{
-    using symbol_t = Symbol;
-    using token_t = Token<symbol_t>;
-    using status_t = Status<token_t>;
-
-    Lexer();
-
-    void process(char c);
-    std::optional<Lexer::token_t> next_token();
-
-private:
-
-    bool is_backtracking() const;
-    void set_backtracking();
-    void reset_backtracking();
-    void backtrack(char c);
-
-    enum class State : bool {
-        backtrack,
-        non_backtrack
-    };
-    
-    Trie<symbol_t> trie_;
-    std::string text_;
-    std::string buf_;
-    State state_ = State::non_backtrack;
-    status_t status_;
-};
-
-///////////////////////////////////
-// Lexer Implementation
-///////////////////////////////////
-
-Lexer::Lexer()
-    : trie_({
-            {"\n", Symbol::NEWLINE},
-            {";", Symbol::SEMICOLON},
-            {" ", Symbol::WHITESPACE},
-            {"\t", Symbol::WHITESPACE},
-            {"\v", Symbol::WHITESPACE},
-            {"\r", Symbol::WHITESPACE},
-            {"\f", Symbol::WHITESPACE},
-            {"*", Symbol::STAR},
-            {"{", Symbol::OPEN_BRACE},
-            {"}", Symbol::CLOSE_BRACE},
-            {"///", Symbol::BEGIN_SLINE_COMMENT},
-            {"/*!", Symbol::BEGIN_SBLOCK_COMMENT},
-            {"//", Symbol::BEGIN_NLINE_COMMENT},
-            {"/*", Symbol::BEGIN_NBLOCK_COMMENT},
-            {"*/", Symbol::END_BLOCK_COMMENT},
-            {"@sdesc", Symbol::SDESC}
-            })
-{
-    // TODO: reserve space for status_.tokens?
-}
-
-inline void Lexer::process(char c)
-{
-    // if current state is accepting
-    if (trie_.is_accept()) {
-        if (!this->is_backtracking()) {
-            this->set_backtracking();
-        }
-        // ignore contents in buffer up until now
-        // this optimization can be done because we look for longest match
-        buf_.clear();
-    }
-
-    auto it = trie_.get_children().find(c);
-
-    // if transition exists
-    if (it != trie_.get_children().end()) {
-        buf_.push_back(c);
-        trie_.transition(c);
-        return;
-    }
-
-    // otherwise, no transition exists
-    
-    // if not backtracking
-    if (!this->is_backtracking()) {
-        text_.append(buf_);
-        text_.push_back(c);
-        buf_.clear();
-        trie_.reset();
-        return;
-    }
-    
-    // otherwise, currently backtracking
-    this->backtrack(c); 
-}
-
-inline bool Lexer::is_backtracking() const
-{
-    return state_ == State::backtrack;
-}
-
-inline void Lexer::set_backtracking()
-{
-    state_ = State::backtrack;
-}
-
-inline void Lexer::reset_backtracking()
-{
-    state_ = State::non_backtrack;
-}
-
-inline void Lexer::backtrack(char c)
-{
-    // reset to non-backtracking
-    this->reset_backtracking();
-
-    // tokenize and clear text
-    if (!text_.empty()) {
-        status_.tokens.emplace(symbol_t::TEXT, std::move(text_));
-        text_.clear();
-    }
-    
-    // tokenize symbol
-    for (uint32_t i = 0; i < buf_.size(); ++i) {
-        trie_.back_transition();
-    }
-    assert(trie_.is_accept());
-    auto opt_symbol = trie_.get_symbol();
-    assert(static_cast<bool>(opt_symbol));
-    status_.tokens.emplace(*opt_symbol);
-
-    // move and clear buf_ to temp
-    std::string reprocess_str(std::move(buf_));
-    buf_.clear();
-    reprocess_str.push_back(c);
-
-    // reset trie
-    trie_.reset();
-    
-    // reprocess the rest
-    for (char c : reprocess_str) {
-        this->process(c);
-    }
-}
-
-inline std::optional<Lexer::token_t> Lexer::next_token() 
-{
-    if (!status_.tokens.empty()) {
-        token_t token = std::move(status_.tokens.front());
-        status_.tokens.pop();
-        return token;
-    }
-    return {};
-}
-
-} // namespace core
-} // namespace docgen
diff --git a/src/core/symbol.hpp b/src/core/symbol.hpp
index b19e1f7..34c1969 100644
--- a/src/core/symbol.hpp
+++ b/src/core/symbol.hpp
@@ -12,6 +12,7 @@ enum class Symbol {
     NEWLINE,
     WHITESPACE,
     SEMICOLON,
+    HASHTAG,
     STAR,
     OPEN_BRACE,
     CLOSE_BRACE,
@@ -23,6 +24,9 @@ enum class Symbol {
     END_BLOCK_COMMENT,
     // special tags
     SDESC,
+    TPARAM,
+    PARAM,
+    RETURN,
     // default
     TEXT
 };

From 5f43218ecd2e358cec0541ed0d1a539db92a2a25 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Fri, 17 Jan 2020 21:05:34 -0500
Subject: [PATCH 03/23] Update new lexer unittest

---
 test/core/lexer_base_fixture.hpp |   3 +-
 test/core/lexer_unittest.cpp     | 654 +++++++++++++++++++------------
 test/core/trie_unittest.cpp      | 102 +++++
 3 files changed, 517 insertions(+), 242 deletions(-)
 create mode 100644 test/core/trie_unittest.cpp

diff --git a/test/core/lexer_base_fixture.hpp b/test/core/lexer_base_fixture.hpp
index 79ea86a..b37b85f 100644
--- a/test/core/lexer_base_fixture.hpp
+++ b/test/core/lexer_base_fixture.hpp
@@ -1,5 +1,4 @@
 #pragma once
-#include "core/lexer_routines.hpp"
 #include <gtest/gtest.h>
 
 namespace docgen {
@@ -8,7 +7,7 @@ namespace core {
 struct lexer_base_fixture : ::testing::Test
 {
 protected:
-    using status_t = lexer_details::status_t;
+    using status_t = status_t;
     using token_t = lexer_details::token_t;
     using symbol_t = lexer_details::symbol_t;
 
diff --git a/test/core/lexer_unittest.cpp b/test/core/lexer_unittest.cpp
index aebfbaa..643c2f5 100644
--- a/test/core/lexer_unittest.cpp
+++ b/test/core/lexer_unittest.cpp
@@ -1,279 +1,453 @@
-#include "core/lexer.hpp"
-#include "lexer_base_fixture.hpp"
+#define private public
+
+#include <core/lexer.hpp>
+#include <gtest/gtest.h>
 
 namespace docgen {
 namespace core {
 
-struct lexer_fixture : lexer_base_fixture
+struct lexer_fixture : ::testing::Test
 {
 protected:
+    using status_t = Lexer::status_t;
+    using token_t = Lexer::token_t;
+    using symbol_t = Lexer::symbol_t;
+
+    Lexer lexer;
+
+    void setup_lexer(const char* content)
+    {
+        std::string str(content);
+        for (char c : str) {
+            lexer.process(c);
+        }
+        lexer.process(0);
+    }
+
+    void check_token(symbol_t actual_sym, symbol_t expected_sym,
+                     const std::string& actual_str, const std::string& expected_str)
+    {
+        EXPECT_EQ(actual_sym, expected_sym);
+        EXPECT_EQ(actual_str, expected_str);
+    }
 };
 
-TEST_F(lexer_fixture, process_no_comment)
+TEST_F(lexer_fixture, lexer)
 {
     static constexpr const char* content =
-        "#include <gtest/gtest.h>\n"
-        "\n"
-        "  // just a normal comment\n"
+        "#include <core/lexer_trie.hpp> // some comment\n"
         "\n"
+        "void f();"
         ;
 
-    write_file(content);
-    Lexer lexer(file);
-    lexer.process();
-    const auto& tokens = lexer.get_tokens();
-
-    EXPECT_EQ(tokens.size(), static_cast<size_t>(5));
-
-    check_token(tokens[0].name, symbol_t::TEXT,
-                tokens[0].content, "#include <gtest/gtest.h>");
-    check_token(tokens[1].name, symbol_t::NEWLINE,
-                tokens[1].content, "");
-    check_token(tokens[2].name, symbol_t::NEWLINE,
-                tokens[2].content, "");
-    check_token(tokens[3].name, symbol_t::NEWLINE,
-                tokens[3].content, "");
-    check_token(tokens[4].name, symbol_t::END_OF_FILE,
-                tokens[4].content, "");
-}
+    setup_lexer(content);
 
-TEST_F(lexer_fixture, process_one_line_comment)
-{
-    static constexpr const char* content =
-        "#include <gtest/gtest.h>\n"
-        "\n"
-        "  // just a normal comment\n"
-        "   /// a very special comment   \n"
-        "\n"
-        ;
+    auto token = *lexer.next_token();
+    check_token(token.name, symbol_t::HASHTAG,
+                token.content, "");
 
-    write_file(content);
-    Lexer lexer(file);
-    lexer.process();
-    const auto& tokens = lexer.get_tokens();
-
-    EXPECT_EQ(tokens.size(), static_cast<size_t>(8));
-
-    check_token(tokens[0].name, symbol_t::TEXT,
-                tokens[0].content, "#include <gtest/gtest.h>");
-    check_token(tokens[1].name, symbol_t::NEWLINE,
-                tokens[1].content, "");
-    check_token(tokens[2].name, symbol_t::NEWLINE,
-                tokens[2].content, "");
-    check_token(tokens[3].name, symbol_t::BEGIN_LINE_COMMENT,
-                tokens[3].content, "");
-    check_token(tokens[4].name, symbol_t::TEXT,
-                tokens[4].content, "a very special comment");
-    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-    check_token(tokens[5].name, symbol_t::NEWLINE,
-                tokens[5].content, "");
-    check_token(tokens[6].name, symbol_t::NEWLINE,
-                tokens[6].content, "");
-    check_token(tokens[7].name, symbol_t::END_OF_FILE,
-                tokens[7].content, "");
-}
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "include");
 
-TEST_F(lexer_fixture, process_two_line_comment)
-{
-    static constexpr const char* content =
-        "#include <gtest/gtest.h>\n"
-        "\n"
-        "  // just a normal comment\n"
-        "   /// a very special comment   \n"
-        "\n"
-        "  // just a normal comment\n"
-        "   /// another very special comment   \n"
-        "  // just a normal comment\n"
-        ;
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
 
-    write_file(content);
-    Lexer lexer(file);
-    lexer.process();
-    const auto& tokens = lexer.get_tokens();
-
-    EXPECT_EQ(tokens.size(), static_cast<size_t>(11));
-
-    check_token(tokens[0].name, symbol_t::TEXT,
-                tokens[0].content, "#include <gtest/gtest.h>");
-    check_token(tokens[1].name, symbol_t::NEWLINE,
-                tokens[1].content, "");
-    check_token(tokens[2].name, symbol_t::NEWLINE,
-                tokens[2].content, "");
-    check_token(tokens[3].name, symbol_t::BEGIN_LINE_COMMENT,
-                tokens[3].content, "");
-    check_token(tokens[4].name, symbol_t::TEXT,
-                tokens[4].content, "a very special comment");
-    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-    check_token(tokens[5].name, symbol_t::NEWLINE,
-                tokens[5].content, "");
-    check_token(tokens[6].name, symbol_t::NEWLINE,
-                tokens[6].content, "");
-    check_token(tokens[7].name, symbol_t::BEGIN_LINE_COMMENT,
-                tokens[7].content, "");
-    check_token(tokens[8].name, symbol_t::TEXT,
-                tokens[8].content, "another very special comment");
-    EXPECT_EQ(tokens[8].leading_ws_count, static_cast<uint32_t>(1));
-    check_token(tokens[9].name, symbol_t::NEWLINE,
-                tokens[9].content, "");
-    check_token(tokens[10].name, symbol_t::END_OF_FILE,
-                tokens[10].content, "");
-}
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "<core/lexer_trie.hpp>");
 
-TEST_F(lexer_fixture, process_one_block_comment)
-{
-    static constexpr const char* content =
-        "#include <gtest/gtest.h>\n"
-        "\n"
-        "  // just a normal comment\n"
-        "   /*! a very special comment   */\n"
-        "\n"
-        ;
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::BEGIN_NLINE_COMMENT,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "some");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "comment");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::NEWLINE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::NEWLINE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "void");
 
-    write_file(content);
-    Lexer lexer(file);
-    lexer.process();
-    const auto& tokens = lexer.get_tokens();
-
-    EXPECT_EQ(tokens.size(), static_cast<size_t>(9));
-
-    check_token(tokens[0].name, symbol_t::TEXT,
-                tokens[0].content, "#include <gtest/gtest.h>");
-    check_token(tokens[1].name, symbol_t::NEWLINE,
-                tokens[1].content, "");
-    check_token(tokens[2].name, symbol_t::NEWLINE,
-                tokens[2].content, "");
-    check_token(tokens[3].name, symbol_t::BEGIN_BLOCK_COMMENT,
-                tokens[3].content, "");
-    check_token(tokens[4].name, symbol_t::TEXT,
-                tokens[4].content, "a very special comment");
-    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-    check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT,
-                tokens[5].content, "");
-    check_token(tokens[6].name, symbol_t::NEWLINE,
-                tokens[6].content, "");
-    check_token(tokens[7].name, symbol_t::NEWLINE,
-                tokens[7].content, "");
-    check_token(tokens[8].name, symbol_t::END_OF_FILE,
-                tokens[8].content, "");
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "f()");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::SEMICOLON,
+                token.content, "");
 }
 
-TEST_F(lexer_fixture, process_two_block_comment)
+TEST_F(lexer_fixture, process_no_comment)
 {
     static constexpr const char* content =
         "#include <gtest/gtest.h>\n"
         "\n"
         "  // just a normal comment\n"
-        "   /*! a very special comment   */\n"
         "\n"
-        "  // just a normal comment\n"
-        "   /*! another very \n"
-        "    * special comment   \n"
-        "*/"
-        "  /* just a normal comment\n */"
         ;
 
-    write_file(content);
-    Lexer lexer(file);
-    lexer.process();
-    const auto& tokens = lexer.get_tokens();
-
-    EXPECT_EQ(tokens.size(), static_cast<size_t>(16));
-
-    check_token(tokens[0].name, symbol_t::TEXT,
-                tokens[0].content, "#include <gtest/gtest.h>");
-    check_token(tokens[1].name, symbol_t::NEWLINE,
-                tokens[1].content, "");
-    check_token(tokens[2].name, symbol_t::NEWLINE,
-                tokens[2].content, "");
-    check_token(tokens[3].name, symbol_t::BEGIN_BLOCK_COMMENT,
-                tokens[3].content, "");
-    check_token(tokens[4].name, symbol_t::TEXT,
-                tokens[4].content, "a very special comment");
-    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-    check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT,
-                tokens[5].content, "");
-    check_token(tokens[6].name, symbol_t::NEWLINE,
-                tokens[6].content, "");
-    check_token(tokens[7].name, symbol_t::NEWLINE,
-                tokens[7].content, "");
-    check_token(tokens[8].name, symbol_t::BEGIN_BLOCK_COMMENT,
-                tokens[8].content, "");
-    check_token(tokens[9].name, symbol_t::TEXT,
-                tokens[9].content, "another very");
-    EXPECT_EQ(tokens[9].leading_ws_count, static_cast<uint32_t>(1));
-    check_token(tokens[10].name, symbol_t::NEWLINE,
-                tokens[10].content, "");
-    check_token(tokens[11].name, symbol_t::STAR,
-                tokens[11].content, "");
-    check_token(tokens[12].name, symbol_t::TEXT,
-                tokens[12].content, "special comment");
-    EXPECT_EQ(tokens[12].leading_ws_count, static_cast<uint32_t>(1));
-    check_token(tokens[13].name, symbol_t::NEWLINE,
-                tokens[13].content, "");
-    check_token(tokens[14].name, symbol_t::END_BLOCK_COMMENT,
-                tokens[14].content, "");
-    check_token(tokens[15].name, symbol_t::END_OF_FILE,
-                tokens[15].content, "");
+    setup_lexer(content);
+
+    auto token = *lexer.next_token();
+    check_token(token.name, symbol_t::HASHTAG,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "include");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "<gtest/gtest.h>");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::NEWLINE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::NEWLINE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::BEGIN_NLINE_COMMENT,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "just");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "a");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "normal");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "comment");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::NEWLINE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::NEWLINE,
+                token.content, "");
 }
 
-TEST_F(lexer_fixture, process_line_block_comment)
+TEST_F(lexer_fixture, process_one_line_comment)
 {
     static constexpr const char* content =
-        "#include <gtest/gtest.h>\n"
-        "\n"
-        "  // just a normal comment\n"
-        "   /// a very special comment   */\n"
-        "\n"
-        "  // just a normal comment\n"
-        "   /*! another very \n"
-        "    * special comment   \n"
-        "*/"
-        "  /* just a normal comment\n */"
+        "// comment\n"
+        " /// special_comment \n"
         ;
 
-    write_file(content);
-    Lexer lexer(file);
-    lexer.process();
-    const auto& tokens = lexer.get_tokens();
-
-    EXPECT_EQ(tokens.size(), static_cast<size_t>(16));
-
-    check_token(tokens[0].name, symbol_t::TEXT,
-                tokens[0].content, "#include <gtest/gtest.h>");
-    check_token(tokens[1].name, symbol_t::NEWLINE,
-                tokens[1].content, "");
-    check_token(tokens[2].name, symbol_t::NEWLINE,
-                tokens[2].content, "");
-    check_token(tokens[3].name, symbol_t::BEGIN_LINE_COMMENT,
-                tokens[3].content, "");
-    check_token(tokens[4].name, symbol_t::TEXT,
-                tokens[4].content, "a very special comment");
-    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-    check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT,
-                tokens[5].content, "");
-    check_token(tokens[6].name, symbol_t::NEWLINE,
-                tokens[6].content, "");
-    check_token(tokens[7].name, symbol_t::NEWLINE,
-                tokens[7].content, "");
-    check_token(tokens[8].name, symbol_t::BEGIN_BLOCK_COMMENT,
-                tokens[8].content, "");
-    check_token(tokens[9].name, symbol_t::TEXT,
-                tokens[9].content, "another very");
-    EXPECT_EQ(tokens[9].leading_ws_count, static_cast<uint32_t>(1));
-    check_token(tokens[10].name, symbol_t::NEWLINE,
-                tokens[10].content, "");
-    check_token(tokens[11].name, symbol_t::STAR,
-                tokens[11].content, "");
-    check_token(tokens[12].name, symbol_t::TEXT,
-                tokens[12].content, "special comment");
-    EXPECT_EQ(tokens[12].leading_ws_count, static_cast<uint32_t>(1));
-    check_token(tokens[13].name, symbol_t::NEWLINE,
-                tokens[13].content, "");
-    check_token(tokens[14].name, symbol_t::END_BLOCK_COMMENT,
-                tokens[14].content, "");
-    check_token(tokens[15].name, symbol_t::END_OF_FILE,
-                tokens[15].content, "");
+    setup_lexer(content);
+
+    auto token = *lexer.next_token();
+    check_token(token.name, symbol_t::BEGIN_NLINE_COMMENT,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "comment");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::NEWLINE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::BEGIN_SLINE_COMMENT,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::TEXT,
+                token.content, "special_comment");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::WHITESPACE,
+                token.content, "");
+
+    token = *lexer.next_token();
+    check_token(token.name, symbol_t::NEWLINE,
+                token.content, "");
+
+
 }
 
+//TEST_F(lexer_fixture, process_two_line_comment)
+//{
+//    static constexpr const char* content =
+//        "#include <gtest/gtest.h>\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /// a very special comment   \n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /// another very special comment   \n"
+//        "  // just a normal comment\n"
+//        ;
+//
+//    write_file(content);
+//    Lexer lexer(file);
+//    lexer.process();
+//    const auto& tokens = lexer.get_tokens();
+//
+//    EXPECT_EQ(tokens.size(), static_cast<size_t>(11));
+//
+//    check_token(tokens[0].name, symbol_t::TEXT,
+//                tokens[0].content, "#include <gtest/gtest.h>");
+//    check_token(tokens[1].name, symbol_t::NEWLINE,
+//                tokens[1].content, "");
+//    check_token(tokens[2].name, symbol_t::NEWLINE,
+//                tokens[2].content, "");
+//    check_token(tokens[3].name, symbol_t::BEGIN_LINE_COMMENT,
+//                tokens[3].content, "");
+//    check_token(tokens[4].name, symbol_t::TEXT,
+//                tokens[4].content, "a very special comment");
+//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[5].name, symbol_t::NEWLINE,
+//                tokens[5].content, "");
+//    check_token(tokens[6].name, symbol_t::NEWLINE,
+//                tokens[6].content, "");
+//    check_token(tokens[7].name, symbol_t::BEGIN_LINE_COMMENT,
+//                tokens[7].content, "");
+//    check_token(tokens[8].name, symbol_t::TEXT,
+//                tokens[8].content, "another very special comment");
+//    EXPECT_EQ(tokens[8].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[9].name, symbol_t::NEWLINE,
+//                tokens[9].content, "");
+//    check_token(tokens[10].name, symbol_t::END_OF_FILE,
+//                tokens[10].content, "");
+//}
+//
+//TEST_F(lexer_fixture, process_one_block_comment)
+//{
+//    static constexpr const char* content =
+//        "#include <gtest/gtest.h>\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /*! a very special comment   */\n"
+//        "\n"
+//        ;
+//
+//    write_file(content);
+//    Lexer lexer(file);
+//    lexer.process();
+//    const auto& tokens = lexer.get_tokens();
+//
+//    EXPECT_EQ(tokens.size(), static_cast<size_t>(9));
+//
+//    check_token(tokens[0].name, symbol_t::TEXT,
+//                tokens[0].content, "#include <gtest/gtest.h>");
+//    check_token(tokens[1].name, symbol_t::NEWLINE,
+//                tokens[1].content, "");
+//    check_token(tokens[2].name, symbol_t::NEWLINE,
+//                tokens[2].content, "");
+//    check_token(tokens[3].name, symbol_t::BEGIN_BLOCK_COMMENT,
+//                tokens[3].content, "");
+//    check_token(tokens[4].name, symbol_t::TEXT,
+//                tokens[4].content, "a very special comment");
+//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT,
+//                tokens[5].content, "");
+//    check_token(tokens[6].name, symbol_t::NEWLINE,
+//                tokens[6].content, "");
+//    check_token(tokens[7].name, symbol_t::NEWLINE,
+//                tokens[7].content, "");
+//    check_token(tokens[8].name, symbol_t::END_OF_FILE,
+//                tokens[8].content, "");
+//}
+//
+//TEST_F(lexer_fixture, process_two_block_comment)
+//{
+//    static constexpr const char* content =
+//        "#include <gtest/gtest.h>\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /*! a very special comment   */\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /*! another very \n"
+//        "    * special comment   \n"
+//        "*/"
+//        "  /* just a normal comment\n */"
+//        ;
+//
+//    write_file(content);
+//    Lexer lexer(file);
+//    lexer.process();
+//    const auto& tokens = lexer.get_tokens();
+//
+//    EXPECT_EQ(tokens.size(), static_cast<size_t>(16));
+//
+//    check_token(tokens[0].name, symbol_t::TEXT,
+//                tokens[0].content, "#include <gtest/gtest.h>");
+//    check_token(tokens[1].name, symbol_t::NEWLINE,
+//                tokens[1].content, "");
+//    check_token(tokens[2].name, symbol_t::NEWLINE,
+//                tokens[2].content, "");
+//    check_token(tokens[3].name, symbol_t::BEGIN_BLOCK_COMMENT,
+//                tokens[3].content, "");
+//    check_token(tokens[4].name, symbol_t::TEXT,
+//                tokens[4].content, "a very special comment");
+//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT,
+//                tokens[5].content, "");
+//    check_token(tokens[6].name, symbol_t::NEWLINE,
+//                tokens[6].content, "");
+//    check_token(tokens[7].name, symbol_t::NEWLINE,
+//                tokens[7].content, "");
+//    check_token(tokens[8].name, symbol_t::BEGIN_BLOCK_COMMENT,
+//                tokens[8].content, "");
+//    check_token(tokens[9].name, symbol_t::TEXT,
+//                tokens[9].content, "another very");
+//    EXPECT_EQ(tokens[9].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[10].name, symbol_t::NEWLINE,
+//                tokens[10].content, "");
+//    check_token(tokens[11].name, symbol_t::STAR,
+//                tokens[11].content, "");
+//    check_token(tokens[12].name, symbol_t::TEXT,
+//                tokens[12].content, "special comment");
+//    EXPECT_EQ(tokens[12].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[13].name, symbol_t::NEWLINE,
+//                tokens[13].content, "");
+//    check_token(tokens[14].name, symbol_t::END_BLOCK_COMMENT,
+//                tokens[14].content, "");
+//    check_token(tokens[15].name, symbol_t::END_OF_FILE,
+//                tokens[15].content, "");
+//}
+//
+//TEST_F(lexer_fixture, process_line_block_comment)
+//{
+//    static constexpr const char* content =
+//        "#include <gtest/gtest.h>\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /// a very special comment   */\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /*! another very \n"
+//        "    * special comment   \n"
+//        "*/"
+//        "  /* just a normal comment\n */"
+//        ;
+//
+//    write_file(content);
+//    Lexer lexer(file);
+//    lexer.process();
+//    const auto& tokens = lexer.get_tokens();
+//
+//    EXPECT_EQ(tokens.size(), static_cast<size_t>(16));
+//
+//    check_token(tokens[0].name, symbol_t::TEXT,
+//                tokens[0].content, "#include <gtest/gtest.h>");
+//    check_token(tokens[1].name, symbol_t::NEWLINE,
+//                tokens[1].content, "");
+//    check_token(tokens[2].name, symbol_t::NEWLINE,
+//                tokens[2].content, "");
+//    check_token(tokens[3].name, symbol_t::BEGIN_LINE_COMMENT,
+//                tokens[3].content, "");
+//    check_token(tokens[4].name, symbol_t::TEXT,
+//                tokens[4].content, "a very special comment");
+//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT,
+//                tokens[5].content, "");
+//    check_token(tokens[6].name, symbol_t::NEWLINE,
+//                tokens[6].content, "");
+//    check_token(tokens[7].name, symbol_t::NEWLINE,
+//                tokens[7].content, "");
+//    check_token(tokens[8].name, symbol_t::BEGIN_BLOCK_COMMENT,
+//                tokens[8].content, "");
+//    check_token(tokens[9].name, symbol_t::TEXT,
+//                tokens[9].content, "another very");
+//    EXPECT_EQ(tokens[9].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[10].name, symbol_t::NEWLINE,
+//                tokens[10].content, "");
+//    check_token(tokens[11].name, symbol_t::STAR,
+//                tokens[11].content, "");
+//    check_token(tokens[12].name, symbol_t::TEXT,
+//                tokens[12].content, "special comment");
+//    EXPECT_EQ(tokens[12].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[13].name, symbol_t::NEWLINE,
+//                tokens[13].content, "");
+//    check_token(tokens[14].name, symbol_t::END_BLOCK_COMMENT,
+//                tokens[14].content, "");
+//    check_token(tokens[15].name, symbol_t::END_OF_FILE,
+//                tokens[15].content, "");
+//}
+
 } // namespace core
 } // namespace docgen
diff --git a/test/core/trie_unittest.cpp b/test/core/trie_unittest.cpp
new file mode 100644
index 0000000..43bf550
--- /dev/null
+++ b/test/core/trie_unittest.cpp
@@ -0,0 +1,102 @@
+#define private public
+
+#include <core/trie.hpp>
+#include <iostream>
+#include <gtest/gtest.h>
+
+namespace docgen {
+namespace core {
+
+enum class MockSymbol {
+    symbol1,
+    symbol2
+};
+
+struct trie_fixture : ::testing::Test
+{
+protected:
+    using symbol_t = MockSymbol;
+    using trie_t = Trie<symbol_t>;
+
+    void print_trie(const trie_t& trie)
+    {
+        print_trie(trie.root_); 
+    }
+
+    void print_trie(const trie_t::TrieNode& node)
+    {
+        if (node.is_accept()) {
+            std::cout << "symbol: " << (int) *node.get_symbol() << std::endl;
+        }
+        std::cout << "\nsize: " << node.children_.size() << std::endl;
+        for (auto it = node.children_.begin(); it != node.children_.end(); ++it) {
+            std::cout << it->first << "--" << std::endl;;
+            print_trie(it->second);
+            std::cout << "--" << std::endl;
+        }
+    }
+};
+
+TEST_F(trie_fixture, trie_ctor)
+{
+    trie_t trie({
+        {"adf", symbol_t::symbol1},
+        {"asdf", symbol_t::symbol2},
+        {"bscdf", symbol_t::symbol1}
+    });
+
+    auto symbol = trie.get_symbol();
+    EXPECT_FALSE((bool) symbol);
+
+    //trie.transition('a');
+    //EXPECT_FALSE((bool) trie.get_symbol());
+    //trie.transition('d');
+    //EXPECT_FALSE((bool) trie.get_symbol());
+    //trie.transition('b');
+    //EXPECT_FALSE((bool) trie.get_symbol());
+
+    trie.transition('a');
+    EXPECT_FALSE((bool) trie.get_symbol());
+    trie.transition('d');
+    EXPECT_FALSE((bool) trie.get_symbol());
+    trie.transition('f');
+    EXPECT_TRUE((bool) trie.get_symbol());
+
+    trie.back_transition();
+    trie.back_transition();
+    trie.back_transition();
+
+    trie.transition('a');
+    EXPECT_FALSE((bool) trie.get_symbol());
+    trie.transition('d');
+    EXPECT_FALSE((bool) trie.get_symbol());
+    trie.transition('f');
+    EXPECT_TRUE((bool) trie.get_symbol());
+
+    trie.reset();
+
+    trie.transition('a');
+    EXPECT_FALSE((bool) trie.get_symbol());
+    trie.transition('s');
+    EXPECT_FALSE((bool) trie.get_symbol());
+    trie.transition('d');
+    EXPECT_FALSE((bool) trie.get_symbol());
+    trie.transition('f');
+    EXPECT_TRUE((bool) trie.get_symbol());
+
+    trie.reset();
+
+    trie.transition('b');
+    EXPECT_FALSE((bool) trie.get_symbol());
+    trie.transition('s');
+    EXPECT_FALSE((bool) trie.get_symbol());
+    trie.transition('c');
+    EXPECT_FALSE((bool) trie.get_symbol());
+    trie.transition('d');
+    EXPECT_FALSE((bool) trie.get_symbol());
+    trie.transition('f');
+    EXPECT_TRUE((bool) trie.get_symbol());
+}
+
+} // namespace core
+} // namespace docgen

From 10269ca3b6dbfa3aa4b0a67b4d7966a185a24a59 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Fri, 17 Jan 2020 21:06:03 -0500
Subject: [PATCH 04/23] Add cmake changes to build new unittests (no more
 lexer_routines_unittest also)

---
 test/CMakeLists.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c8abf0f..20bf694 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -41,12 +41,14 @@ add_custom_command(
                 $<TARGET_FILE_DIR:io_unittests>/io_data)
 
 ######################################################
-# File IO Unit Tests
+# Core Unit Tests
 ######################################################
 
 add_executable(core_unittests
-               ${CMAKE_CURRENT_SOURCE_DIR}/core/lexer_routines_unittest.cpp
+               ${CMAKE_CURRENT_SOURCE_DIR}/core/trie_unittest.cpp
                ${CMAKE_CURRENT_SOURCE_DIR}/core/lexer_unittest.cpp
                )
 
 create_test("core_unittests" core_unittests)
+
+

From 99431276efa47bdc3e3a2ac519a886919a5c494b Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Fri, 17 Jan 2020 23:31:37 -0500
Subject: [PATCH 05/23] Fix process when string termination given (changed to
 flush)

---
 CMakeLists.txt                                |   3 +
 src/CMakeLists.txt                            |   8 +
 src/core/{lexer.hpp => lexer/lexer.cpp}       | 104 +--
 src/core/lexer/lexer.hpp                      | 100 +++
 src/core/{ => lexer}/status.hpp               |   3 +-
 src/core/{ => lexer}/trie.hpp                 |   9 +-
 src/core/symbol.hpp                           |   2 -
 src/core/token.hpp                            |   2 +-
 test/CMakeLists.txt                           |   6 +-
 test/core/{ => lexer}/lexer_base_fixture.hpp  |   7 -
 .../{ => lexer}/lexer_routines_unittest.cpp   |   0
 test/core/lexer/lexer_unittest.cpp            | 620 ++++++++++++++++++
 test/core/lexer/trie_unittest.cpp             | 336 ++++++++++
 test/core/lexer_unittest.cpp                  | 453 -------------
 test/core/trie_unittest.cpp                   | 102 ---
 15 files changed, 1103 insertions(+), 652 deletions(-)
 create mode 100644 src/CMakeLists.txt
 rename src/core/{lexer.hpp => lexer/lexer.cpp} (52%)
 create mode 100644 src/core/lexer/lexer.hpp
 rename src/core/{ => lexer}/status.hpp (86%)
 rename src/core/{ => lexer}/trie.hpp (96%)
 rename test/core/{ => lexer}/lexer_base_fixture.hpp (74%)
 rename test/core/{ => lexer}/lexer_routines_unittest.cpp (100%)
 create mode 100644 test/core/lexer/lexer_unittest.cpp
 create mode 100644 test/core/lexer/trie_unittest.cpp
 delete mode 100644 test/core/lexer_unittest.cpp
 delete mode 100644 test/core/trie_unittest.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7ae5c08..c1208dd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,5 +57,8 @@ find_package(nlohmann_json 3.2.0 REQUIRED)
 # add libs subdirectory
 add_subdirectory(${PROJECT_SOURCE_DIR}/libs ${PROJECT_BINARY_DIR}/libs)
 
+# add src subdirectory
+add_subdirectory(${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src)
+
 # add test subdirectory
 add_subdirectory(${PROJECT_SOURCE_DIR}/test ${PROJECT_BINARY_DIR}/test)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..96870d9
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Create object files for lexer
+add_library(LEXER_LIB_OBJECTS OBJECT
+    ${CMAKE_CURRENT_SOURCE_DIR}/core/lexer/lexer.cpp
+    )
+target_include_directories(LEXER_LIB_OBJECTS PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${ETERNAL_DIR}/include
+    )
diff --git a/src/core/lexer.hpp b/src/core/lexer/lexer.cpp
similarity index 52%
rename from src/core/lexer.hpp
rename to src/core/lexer/lexer.cpp
index 40706ab..f727e49 100644
--- a/src/core/lexer.hpp
+++ b/src/core/lexer/lexer.cpp
@@ -1,41 +1,8 @@
-#pragma once 
-#include <core/trie.hpp>
-#include <core/symbol.hpp>
-#include <core/status.hpp>
-#include <core/token.hpp>
+#include <core/lexer/lexer.hpp>
 
 namespace docgen {
 namespace core {
-
-struct Lexer
-{
-    using symbol_t = Symbol;
-    using token_t = Token<symbol_t>;
-    using status_t = Status<token_t>;
-
-    Lexer();
-
-    void process(char c);
-    std::optional<Lexer::token_t> next_token();
-
-private:
-
-    bool is_backtracking() const;
-    void set_backtracking();
-    void reset_backtracking();
-    void backtrack(char c);
-
-    enum class State : bool {
-        backtrack,
-        non_backtrack
-    };
-    
-    Trie<symbol_t> trie_;
-    std::string text_;
-    std::string buf_;
-    State state_ = State::non_backtrack;
-    status_t status_;
-};
+namespace lexer {
 
 ///////////////////////////////////
 // Lexer Implementation
@@ -64,21 +31,11 @@ Lexer::Lexer()
             {"@param", Symbol::PARAM},
             {"@return", Symbol::RETURN}
             })
-{
-    // TODO: reserve space for status_.tokens?
-}
+{}
 
-inline void Lexer::process(char c)
+void Lexer::process(char c)
 {
-    // if current state is accepting
-    if (trie_.is_accept()) {
-        if (!this->is_backtracking()) {
-            this->set_backtracking();
-        }
-        // ignore contents in buffer up until now
-        // this optimization can be done because we look for longest match
-        buf_.clear();
-    }
+    this->update_state();
 
     auto it = trie_.get_children().find(c);
 
@@ -104,32 +61,11 @@ inline void Lexer::process(char c)
     this->backtrack(c); 
 }
 
-inline bool Lexer::is_backtracking() const
+void Lexer::backtrack(char c)
 {
-    return state_ == State::backtrack;
-}
+    // tokenize text
+    this->tokenize_text(); 
 
-inline void Lexer::set_backtracking()
-{
-    state_ = State::backtrack;
-}
-
-inline void Lexer::reset_backtracking()
-{
-    state_ = State::non_backtrack;
-}
-
-inline void Lexer::backtrack(char c)
-{
-    // reset to non-backtracking
-    this->reset_backtracking();
-
-    // tokenize and clear text
-    if (!text_.empty()) {
-        status_.tokens.emplace(symbol_t::TEXT, std::move(text_));
-        text_.clear();
-    }
-    
     // tokenize symbol
     for (uint32_t i = 0; i < buf_.size(); ++i) {
         trie_.back_transition();
@@ -141,11 +77,10 @@ inline void Lexer::backtrack(char c)
 
     // move and clear buf_ to temp string for reprocessing
     std::string reprocess_str(std::move(buf_));
-    buf_.clear();
     reprocess_str.push_back(c);
 
-    // reset trie
-    trie_.reset();
+    // reset 
+    this->reset();
     
     // reprocess the rest
     for (char c : reprocess_str) {
@@ -153,15 +88,22 @@ inline void Lexer::backtrack(char c)
     }
 }
 
-inline std::optional<Lexer::token_t> Lexer::next_token() 
+void Lexer::flush()
 {
-    if (!status_.tokens.empty()) {
-        token_t token = std::move(status_.tokens.front());
-        status_.tokens.pop();
-        return token;
+    this->update_state();
+
+    if (this->is_backtracking()) {
+        return this->backtrack(0);
     }
-    return {};
+
+    // non-backtracking: no parent is an accepting node
+    // append buf_ to text_ and tokenize text_
+    // reset all other fields
+    text_.append(buf_);
+    this->tokenize_text();
+    this->reset();
 }
 
+} // namespace lexer
 } // namespace core
 } // namespace docgen
diff --git a/src/core/lexer/lexer.hpp b/src/core/lexer/lexer.hpp
new file mode 100644
index 0000000..03b65fa
--- /dev/null
+++ b/src/core/lexer/lexer.hpp
@@ -0,0 +1,100 @@
+#pragma once 
+#include <core/lexer/trie.hpp>
+#include <core/lexer/status.hpp>
+#include <core/symbol.hpp>
+#include <core/token.hpp>
+
+namespace docgen {
+namespace core {
+namespace lexer {
+
+struct Lexer
+{
+    using symbol_t = Symbol;
+    using token_t = Token<symbol_t>;
+    using status_t = Status<token_t>;
+
+    Lexer();
+
+    void process(char c);
+    void flush();
+    std::optional<Lexer::token_t> next_token();
+
+private:
+
+    void tokenize_text();
+    bool is_backtracking() const;
+    void set_backtracking();
+    void reset_backtracking();
+    void backtrack(char c);
+    void update_state();
+    void reset();
+
+    enum class State : bool {
+        backtrack,
+        non_backtrack
+    };
+    
+    Trie<symbol_t> trie_;
+    std::string text_;
+    std::string buf_;
+    State state_ = State::non_backtrack;
+    status_t status_;
+};
+
+inline void Lexer::tokenize_text()
+{
+    if (!text_.empty()) {
+        status_.tokens.emplace(symbol_t::TEXT, std::move(text_));
+    }
+}
+
+inline bool Lexer::is_backtracking() const
+{
+    return state_ == State::backtrack;
+}
+
+inline void Lexer::set_backtracking()
+{
+    state_ = State::backtrack;
+}
+
+inline void Lexer::reset_backtracking()
+{
+    state_ = State::non_backtrack;
+}
+
+inline void Lexer::update_state()
+{
+    // if current state is accepting
+    if (trie_.is_accept()) {
+        if (!this->is_backtracking()) {
+            this->set_backtracking();
+        }
+        // ignore contents in buffer up until now
+        // this optimization can be done because we look for longest match
+        buf_.clear();
+    }
+}
+
+inline std::optional<Lexer::token_t> Lexer::next_token() 
+{
+    if (!status_.tokens.empty()) {
+        token_t token = std::move(status_.tokens.front());
+        status_.tokens.pop();
+        return token;
+    }
+    return {};
+}
+
+inline void Lexer::reset()
+{
+    text_.clear();
+    buf_.clear();
+    trie_.reset();
+    reset_backtracking();
+}
+
+} // namespace lexer
+} // namespace core
+} // namespace docgen
diff --git a/src/core/status.hpp b/src/core/lexer/status.hpp
similarity index 86%
rename from src/core/status.hpp
rename to src/core/lexer/status.hpp
index 9294ea7..e4af461 100644
--- a/src/core/status.hpp
+++ b/src/core/lexer/status.hpp
@@ -1,9 +1,9 @@
 #pragma once
 #include <queue>
-#include <string>
 
 namespace docgen {
 namespace core {
+namespace lexer {
 
 template <class TokenType>
 struct Status
@@ -14,5 +14,6 @@ struct Status
     token_arr_t tokens;
 };
 
+} // namespace lexer
 } // namespace core
 } // namespace docgen
diff --git a/src/core/trie.hpp b/src/core/lexer/trie.hpp
similarity index 96%
rename from src/core/trie.hpp
rename to src/core/lexer/trie.hpp
index 71dbb5c..1fe79f0 100644
--- a/src/core/trie.hpp
+++ b/src/core/lexer/trie.hpp
@@ -8,14 +8,16 @@
 
 namespace docgen {
 namespace core {
+namespace lexer {
 
 template <class SymbolType>
 struct Trie
 {
 private:
-    using pair_t = std::pair<std::string_view, SymbolType>;
+    struct TrieNode; // forward declaration
 
 public:
+    using pair_t = std::pair<std::string_view, SymbolType>;
 
     // Constructs trie node from a list of pairs of string and symbol.
     // The string must be of type std::string_view and it must not be empty.
@@ -32,7 +34,7 @@ struct Trie
     void transition(char c);
     void back_transition();
     bool is_accept() const;
-    auto get_children();
+    std::unordered_map<char, TrieNode>& get_children();
     bool is_reset() const;
     void reset();
     const std::optional<SymbolType>& get_symbol() const;
@@ -159,7 +161,7 @@ Trie<SymbolType>::is_accept() const
 }
 
 template <class SymbolType>
-inline auto
+inline std::unordered_map<char, typename Trie<SymbolType>::TrieNode>&
 Trie<SymbolType>::get_children() 
 {
     return curr_node_.get().get_children();
@@ -197,5 +199,6 @@ Trie<SymbolType>::get_symbol() const
     return curr_node_.get().get_symbol();
 }
 
+} // namespace lexer
 } // namespace core
 } // namespace docgen
diff --git a/src/core/symbol.hpp b/src/core/symbol.hpp
index 34c1969..0bb7772 100644
--- a/src/core/symbol.hpp
+++ b/src/core/symbol.hpp
@@ -1,7 +1,5 @@
 #pragma once
 #include <mapbox/eternal.hpp>
-#include <unordered_set>
-#include <string>
 
 namespace docgen {
 namespace core {
diff --git a/src/core/token.hpp b/src/core/token.hpp
index cc02f37..4ef3798 100644
--- a/src/core/token.hpp
+++ b/src/core/token.hpp
@@ -1,6 +1,6 @@
 #pragma once
 #include <string>
-#include "symbol.hpp"
+#include <core/symbol.hpp>
 
 namespace docgen {
 namespace core {
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 20bf694..1ddf86a 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -45,8 +45,10 @@ add_custom_command(
 ######################################################
 
 add_executable(core_unittests
-               ${CMAKE_CURRENT_SOURCE_DIR}/core/trie_unittest.cpp
-               ${CMAKE_CURRENT_SOURCE_DIR}/core/lexer_unittest.cpp
+               ${CMAKE_CURRENT_SOURCE_DIR}/core/lexer/trie_unittest.cpp
+               ${CMAKE_CURRENT_SOURCE_DIR}/core/lexer/lexer_unittest.cpp
+               # Source dependency
+               $<TARGET_OBJECTS:LEXER_LIB_OBJECTS>
                )
 
 create_test("core_unittests" core_unittests)
diff --git a/test/core/lexer_base_fixture.hpp b/test/core/lexer/lexer_base_fixture.hpp
similarity index 74%
rename from test/core/lexer_base_fixture.hpp
rename to test/core/lexer/lexer_base_fixture.hpp
index b37b85f..46ba63a 100644
--- a/test/core/lexer_base_fixture.hpp
+++ b/test/core/lexer/lexer_base_fixture.hpp
@@ -30,13 +30,6 @@ struct lexer_base_fixture : ::testing::Test
         fwrite(content, sizeof(char), strlen(content), fp);
         fclose(fp);
     }
-
-    void check_token(symbol_t actual_sym, symbol_t expected_sym,
-                     const std::string& actual_str, const std::string& expected_str)
-    {
-        EXPECT_EQ(actual_sym, expected_sym);
-        EXPECT_EQ(actual_str, expected_str);
-    }
 };
 
 } // namespace core
diff --git a/test/core/lexer_routines_unittest.cpp b/test/core/lexer/lexer_routines_unittest.cpp
similarity index 100%
rename from test/core/lexer_routines_unittest.cpp
rename to test/core/lexer/lexer_routines_unittest.cpp
diff --git a/test/core/lexer/lexer_unittest.cpp b/test/core/lexer/lexer_unittest.cpp
new file mode 100644
index 0000000..9fead88
--- /dev/null
+++ b/test/core/lexer/lexer_unittest.cpp
@@ -0,0 +1,620 @@
+#include <core/lexer/lexer.hpp>
+#include <gtest/gtest.h>
+
+namespace docgen {
+namespace core {
+namespace lexer {
+
+struct lexer_fixture : ::testing::Test
+{
+protected:
+    using status_t = typename Lexer::status_t;
+    using token_t = typename Lexer::token_t;
+    using symbol_t = typename Lexer::symbol_t;
+
+    Lexer lexer;
+    std::optional<token_t> token;
+
+    void setup_lexer(const char* content)
+    {
+        std::string str(content);
+        for (char c : str) {
+            lexer.process(c);
+        }
+        lexer.flush();
+    }
+};
+
+////////////////////////////////////////////////////////////////////
+// Individual Symbol TESTS
+////////////////////////////////////////////////////////////////////
+
+// NEWLINE
+TEST_F(lexer_fixture, lexer_newline)
+{
+    static constexpr const char* content =
+        "somecrazy1492text\nmvn2b"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "somecrazy1492text");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "mvn2b");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (  )
+TEST_F(lexer_fixture, lexer_whitespace_space)
+{
+    static constexpr const char* content =
+        ",m.,m. abn"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, ",m.,m.");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abn");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (\t)
+TEST_F(lexer_fixture, lexer_whitespace_t)
+{
+    static constexpr const char* content =
+        "h0f2n.1\t1234|"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "h0f2n.1");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "1234|");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (\v)
+TEST_F(lexer_fixture, lexer_whitespace_v)
+{
+    static constexpr const char* content =
+        "hello!\v"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "hello!");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (\r)
+TEST_F(lexer_fixture, lexer_whitespace_r)
+{
+    static constexpr const char* content =
+        "hello!\rwsdescorrld!!"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "hello!");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "wsdescorrld!!");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (\f)
+TEST_F(lexer_fixture, lexer_whitespace_f)
+{
+    static constexpr const char* content =
+        "hello!\fwsdescorrld!!"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "hello!");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "wsdescorrld!!");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (;)
+TEST_F(lexer_fixture, lexer_semicolon)
+{
+    static constexpr const char* content =
+        ";wsdescorrld!!"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::SEMICOLON);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "wsdescorrld!!");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+////////////////////////////////////////////////////////////////////
+// Mix TESTS
+////////////////////////////////////////////////////////////////////
+
+TEST_F(lexer_fixture, lexer_test_1)
+{
+    static constexpr const char* content =
+        "#include <core/lexer_trie.hpp> // some comment\n"
+        "\n"
+        "void f();"
+        ;
+
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::HASHTAG);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "include");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "<core/lexer_trie.hpp>");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "some");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "void");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "f()");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::SEMICOLON);
+	EXPECT_EQ(token->content, "");
+}
+
+TEST_F(lexer_fixture, process_no_comment)
+{
+    static constexpr const char* content =
+        "#include <gtest/gtest.h>\n"
+        "\n"
+        "  // just a normal comment\n"
+        "\n"
+        ;
+
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::HASHTAG);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "include");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "<gtest/gtest.h>");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "just");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "a");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "normal");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+}
+
+TEST_F(lexer_fixture, process_one_line_comment)
+{
+    static constexpr const char* content =
+        "// comment\n"
+        " /// special_comment \n"
+        ;
+
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "special_comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+}
+
+//TEST_F(lexer_fixture, process_two_line_comment)
+//{
+//    static constexpr const char* content =
+//        "#include <gtest/gtest.h>\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /// a very special comment   \n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /// another very special comment   \n"
+//        "  // just a normal comment\n"
+//        ;
+//
+//    write_file(content);
+//    Lexer lexer(file);
+//    lexer.process();
+//    const auto& tokens = lexer.get_tokens();
+//
+//    EXPECT_EQ(tokens.size(), static_cast<size_t>(11));
+//
+//    check_token(tokens[0]->name, symbol_t::TEXT,
+//                tokens[0]->content, "#include <gtest/gtest.h>");
+//    check_token(tokens[1]->name, symbol_t::NEWLINE,
+//                tokens[1]->content, "");
+//    check_token(tokens[2]->name, symbol_t::NEWLINE,
+//                tokens[2]->content, "");
+//    check_token(tokens[3]->name, symbol_t::BEGIN_LINE_COMMENT,
+//                tokens[3]->content, "");
+//    check_token(tokens[4]->name, symbol_t::TEXT,
+//                tokens[4]->content, "a very special comment");
+//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[5]->name, symbol_t::NEWLINE,
+//                tokens[5]->content, "");
+//    check_token(tokens[6]->name, symbol_t::NEWLINE,
+//                tokens[6]->content, "");
+//    check_token(tokens[7]->name, symbol_t::BEGIN_LINE_COMMENT,
+//                tokens[7]->content, "");
+//    check_token(tokens[8]->name, symbol_t::TEXT,
+//                tokens[8]->content, "another very special comment");
+//    EXPECT_EQ(tokens[8].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[9]->name, symbol_t::NEWLINE,
+//                tokens[9]->content, "");
+//    check_token(tokens[10]->name, symbol_t::END_OF_FILE,
+//                tokens[10]->content, "");
+//}
+//
+//TEST_F(lexer_fixture, process_one_block_comment)
+//{
+//    static constexpr const char* content =
+//        "#include <gtest/gtest.h>\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /*! a very special comment   */\n"
+//        "\n"
+//        ;
+//
+//    write_file(content);
+//    Lexer lexer(file);
+//    lexer.process();
+//    const auto& tokens = lexer.get_tokens();
+//
+//    EXPECT_EQ(tokens.size(), static_cast<size_t>(9));
+//
+//    check_token(tokens[0]->name, symbol_t::TEXT,
+//                tokens[0]->content, "#include <gtest/gtest.h>");
+//    check_token(tokens[1]->name, symbol_t::NEWLINE,
+//                tokens[1]->content, "");
+//    check_token(tokens[2]->name, symbol_t::NEWLINE,
+//                tokens[2]->content, "");
+//    check_token(tokens[3]->name, symbol_t::BEGIN_BLOCK_COMMENT,
+//                tokens[3]->content, "");
+//    check_token(tokens[4]->name, symbol_t::TEXT,
+//                tokens[4]->content, "a very special comment");
+//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[5]->name, symbol_t::END_BLOCK_COMMENT,
+//                tokens[5]->content, "");
+//    check_token(tokens[6]->name, symbol_t::NEWLINE,
+//                tokens[6]->content, "");
+//    check_token(tokens[7]->name, symbol_t::NEWLINE,
+//                tokens[7]->content, "");
+//    check_token(tokens[8]->name, symbol_t::END_OF_FILE,
+//                tokens[8]->content, "");
+//}
+//
+//TEST_F(lexer_fixture, process_two_block_comment)
+//{
+//    static constexpr const char* content =
+//        "#include <gtest/gtest.h>\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /*! a very special comment   */\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /*! another very \n"
+//        "    * special comment   \n"
+//        "*/"
+//        "  /* just a normal comment\n */"
+//        ;
+//
+//    write_file(content);
+//    Lexer lexer(file);
+//    lexer.process();
+//    const auto& tokens = lexer.get_tokens();
+//
+//    EXPECT_EQ(tokens.size(), static_cast<size_t>(16));
+//
+//    check_token(tokens[0]->name, symbol_t::TEXT,
+//                tokens[0]->content, "#include <gtest/gtest.h>");
+//    check_token(tokens[1]->name, symbol_t::NEWLINE,
+//                tokens[1]->content, "");
+//    check_token(tokens[2]->name, symbol_t::NEWLINE,
+//                tokens[2]->content, "");
+//    check_token(tokens[3]->name, symbol_t::BEGIN_BLOCK_COMMENT,
+//                tokens[3]->content, "");
+//    check_token(tokens[4]->name, symbol_t::TEXT,
+//                tokens[4]->content, "a very special comment");
+//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[5]->name, symbol_t::END_BLOCK_COMMENT,
+//                tokens[5]->content, "");
+//    check_token(tokens[6]->name, symbol_t::NEWLINE,
+//                tokens[6]->content, "");
+//    check_token(tokens[7]->name, symbol_t::NEWLINE,
+//                tokens[7]->content, "");
+//    check_token(tokens[8]->name, symbol_t::BEGIN_BLOCK_COMMENT,
+//                tokens[8]->content, "");
+//    check_token(tokens[9]->name, symbol_t::TEXT,
+//                tokens[9]->content, "another very");
+//    EXPECT_EQ(tokens[9].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[10]->name, symbol_t::NEWLINE,
+//                tokens[10]->content, "");
+//    check_token(tokens[11]->name, symbol_t::STAR,
+//                tokens[11]->content, "");
+//    check_token(tokens[12]->name, symbol_t::TEXT,
+//                tokens[12]->content, "special comment");
+//    EXPECT_EQ(tokens[12].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[13]->name, symbol_t::NEWLINE,
+//                tokens[13]->content, "");
+//    check_token(tokens[14]->name, symbol_t::END_BLOCK_COMMENT,
+//                tokens[14]->content, "");
+//    check_token(tokens[15]->name, symbol_t::END_OF_FILE,
+//                tokens[15]->content, "");
+//}
+//
+//TEST_F(lexer_fixture, process_line_block_comment)
+//{
+//    static constexpr const char* content =
+//        "#include <gtest/gtest.h>\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /// a very special comment   */\n"
+//        "\n"
+//        "  // just a normal comment\n"
+//        "   /*! another very \n"
+//        "    * special comment   \n"
+//        "*/"
+//        "  /* just a normal comment\n */"
+//        ;
+//
+//    write_file(content);
+//    Lexer lexer(file);
+//    lexer.process();
+//    const auto& tokens = lexer.get_tokens();
+//
+//    EXPECT_EQ(tokens.size(), static_cast<size_t>(16));
+//
+//    check_token(tokens[0]->name, symbol_t::TEXT,
+//                tokens[0]->content, "#include <gtest/gtest.h>");
+//    check_token(tokens[1]->name, symbol_t::NEWLINE,
+//                tokens[1]->content, "");
+//    check_token(tokens[2]->name, symbol_t::NEWLINE,
+//                tokens[2]->content, "");
+//    check_token(tokens[3]->name, symbol_t::BEGIN_LINE_COMMENT,
+//                tokens[3]->content, "");
+//    check_token(tokens[4]->name, symbol_t::TEXT,
+//                tokens[4]->content, "a very special comment");
+//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[5]->name, symbol_t::END_BLOCK_COMMENT,
+//                tokens[5]->content, "");
+//    check_token(tokens[6]->name, symbol_t::NEWLINE,
+//                tokens[6]->content, "");
+//    check_token(tokens[7]->name, symbol_t::NEWLINE,
+//                tokens[7]->content, "");
+//    check_token(tokens[8]->name, symbol_t::BEGIN_BLOCK_COMMENT,
+//                tokens[8]->content, "");
+//    check_token(tokens[9]->name, symbol_t::TEXT,
+//                tokens[9]->content, "another very");
+//    EXPECT_EQ(tokens[9].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[10]->name, symbol_t::NEWLINE,
+//                tokens[10]->content, "");
+//    check_token(tokens[11]->name, symbol_t::STAR,
+//                tokens[11]->content, "");
+//    check_token(tokens[12]->name, symbol_t::TEXT,
+//                tokens[12]->content, "special comment");
+//    EXPECT_EQ(tokens[12].leading_ws_count, static_cast<uint32_t>(1));
+//    check_token(tokens[13]->name, symbol_t::NEWLINE,
+//                tokens[13]->content, "");
+//    check_token(tokens[14]->name, symbol_t::END_BLOCK_COMMENT,
+//                tokens[14]->content, "");
+//    check_token(tokens[15]->name, symbol_t::END_OF_FILE,
+//                tokens[15]->content, "");
+//}
+
+} // namespace lexer 
+} // namespace core
+} // namespace docgen
diff --git a/test/core/lexer/trie_unittest.cpp b/test/core/lexer/trie_unittest.cpp
new file mode 100644
index 0000000..206a6d7
--- /dev/null
+++ b/test/core/lexer/trie_unittest.cpp
@@ -0,0 +1,336 @@
+#include <core/lexer/trie.hpp>
+#include <gtest/gtest.h>
+
+namespace docgen {
+namespace core {
+namespace lexer {
+
+enum class MockSymbol {
+    symbol_0,
+    symbol_1,
+    symbol_2,
+    symbol_3,
+};
+
+struct trie_fixture : ::testing::Test
+{
+protected:
+    using symbol_t = MockSymbol;
+    using trie_t = Trie<symbol_t>;
+};
+
+////////////////////////////////////////////
+// State TESTS
+////////////////////////////////////////////
+
+TEST_F(trie_fixture, trie_root)
+{
+    trie_t trie({
+        {"a", symbol_t::symbol_0},
+        {"abc", symbol_t::symbol_1},
+        {"ac", symbol_t::symbol_2},
+    });
+
+    EXPECT_TRUE(!trie.get_symbol());
+    EXPECT_TRUE(!trie.is_accept());
+    EXPECT_TRUE(trie.is_reset());
+
+    auto& children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(1));
+    EXPECT_NE(children.find('a'), children.end());  // found
+}
+
+TEST_F(trie_fixture, trie_transition_child_a) 
+{
+    trie_t trie({
+        {"a", symbol_t::symbol_0},
+        {"abc", symbol_t::symbol_1},
+        {"ac", symbol_t::symbol_2},
+    });
+
+    trie.transition('a');
+
+    EXPECT_TRUE(trie.get_symbol());
+    EXPECT_EQ(*trie.get_symbol(), symbol_t::symbol_0);
+    EXPECT_TRUE(trie.is_accept());
+    EXPECT_TRUE(!trie.is_reset());
+
+    auto& children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(2));
+    EXPECT_NE(children.find('b'), children.end());  // found
+    EXPECT_NE(children.find('c'), children.end());  // found
+}
+
+TEST_F(trie_fixture, trie_transition_child_b)
+{
+    trie_t trie({
+        {"a", symbol_t::symbol_0},
+        {"abc", symbol_t::symbol_1},
+        {"ac", symbol_t::symbol_2},
+    });
+
+    trie.transition('a');
+    trie.transition('b');
+
+    EXPECT_TRUE(!trie.get_symbol());
+    EXPECT_TRUE(!trie.is_accept());
+    EXPECT_TRUE(!trie.is_reset());
+
+    auto& children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(1));
+    EXPECT_NE(children.find('c'), children.end());  // found
+}
+
+TEST_F(trie_fixture, trie_transition_child_bc)
+{
+    trie_t trie({
+        {"a", symbol_t::symbol_0},
+        {"abc", symbol_t::symbol_1},
+        {"ac", symbol_t::symbol_2},
+    });
+
+    trie.transition('a');
+    trie.transition('b');
+    trie.transition('c');
+
+    EXPECT_TRUE(trie.get_symbol());
+    EXPECT_EQ(*trie.get_symbol(), symbol_t::symbol_1);
+    EXPECT_TRUE(trie.is_accept());
+    EXPECT_TRUE(!trie.is_reset());
+
+    auto& children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(0));
+}
+
+TEST_F(trie_fixture, trie_transition_child_c)
+{
+    trie_t trie({
+        {"a", symbol_t::symbol_0},
+        {"abc", symbol_t::symbol_1},
+        {"ac", symbol_t::symbol_2},
+    });
+
+    trie.transition('a');
+    trie.transition('c');
+
+    EXPECT_TRUE(trie.get_symbol());
+    EXPECT_EQ(*trie.get_symbol(), symbol_t::symbol_2);
+    EXPECT_TRUE(trie.is_accept());
+    EXPECT_TRUE(!trie.is_reset());
+
+    auto& children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(0));
+}
+
+TEST_F(trie_fixture, trie_reset_root)
+{
+    trie_t trie({
+        {"a", symbol_t::symbol_0},
+        {"abc", symbol_t::symbol_1},
+        {"ac", symbol_t::symbol_2},
+    });
+
+    trie.reset();
+
+    EXPECT_TRUE(!trie.get_symbol());
+    EXPECT_TRUE(!trie.is_accept());
+    EXPECT_TRUE(trie.is_reset());
+
+    auto& children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(1));
+    EXPECT_NE(children.find('a'), children.end());  // found
+}
+
+TEST_F(trie_fixture, trie_reset_child_a)
+{
+    trie_t trie({
+        {"a", symbol_t::symbol_0},
+        {"abc", symbol_t::symbol_1},
+        {"ac", symbol_t::symbol_2},
+    });
+
+    trie.transition('a');
+
+    trie.reset();
+
+    EXPECT_TRUE(!trie.get_symbol());
+    EXPECT_TRUE(!trie.is_accept());
+    EXPECT_TRUE(trie.is_reset());
+
+    auto& children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(1));
+    EXPECT_NE(children.find('a'), children.end());  // found
+}
+
+TEST_F(trie_fixture, trie_reset_child_a_b)
+{
+    trie_t trie({
+        {"a", symbol_t::symbol_0},
+        {"abc", symbol_t::symbol_1},
+        {"ac", symbol_t::symbol_2},
+    });
+
+    trie.transition('a');
+    trie.transition('b');
+
+    trie.reset();
+
+    EXPECT_TRUE(!trie.get_symbol());
+    EXPECT_TRUE(!trie.is_accept());
+    EXPECT_TRUE(trie.is_reset());
+
+    auto& children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(1));
+    EXPECT_NE(children.find('a'), children.end());  // found
+}
+
+TEST_F(trie_fixture, trie_back_transition_root)
+{
+    trie_t trie({
+        {"a", symbol_t::symbol_0},
+        {"abc", symbol_t::symbol_1},
+        {"ac", symbol_t::symbol_2},
+    });
+
+    EXPECT_THROW(trie.back_transition(), exceptions::control_flow_error);
+}
+
+TEST_F(trie_fixture, trie_back_transition_child_a)
+{
+    trie_t trie({
+        {"a", symbol_t::symbol_0},
+        {"abc", symbol_t::symbol_1},
+        {"ac", symbol_t::symbol_2},
+    });
+
+    trie.transition('a');
+    trie.back_transition();
+
+    EXPECT_TRUE(!trie.get_symbol());
+    EXPECT_TRUE(!trie.is_accept());
+    EXPECT_TRUE(trie.is_reset());
+
+    auto& children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(1));
+    EXPECT_NE(children.find('a'), children.end());  // found
+}
+
+TEST_F(trie_fixture, trie_back_transition_child_ab)
+{
+    trie_t trie({
+        {"a", symbol_t::symbol_0},
+        {"abc", symbol_t::symbol_1},
+        {"ac", symbol_t::symbol_2},
+    });
+
+    trie.transition('a');
+    trie.transition('b');
+
+    // back to child 'a'
+    trie.back_transition();
+
+    EXPECT_TRUE(trie.get_symbol());
+    EXPECT_TRUE(trie.is_accept());
+    EXPECT_TRUE(!trie.is_reset());
+
+    auto children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(2));
+    EXPECT_NE(children.find('b'), children.end());  // found
+    EXPECT_NE(children.find('c'), children.end());  // found
+
+    // back to root
+    trie.back_transition();
+
+    EXPECT_TRUE(!trie.get_symbol());
+    EXPECT_TRUE(!trie.is_accept());
+    EXPECT_TRUE(trie.is_reset());
+
+    children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(1));
+    EXPECT_NE(children.find('a'), children.end());  // found
+}
+
+////////////////////////////////////////////
+// Structural Checks
+////////////////////////////////////////////
+
+TEST_F(trie_fixture, trie_off_by_one_prefix)
+{
+    trie_t trie({
+        {"ab", symbol_t::symbol_1},
+        {"bab", symbol_t::symbol_1},
+    });
+
+    // check root
+    EXPECT_TRUE(!trie.get_symbol());
+    EXPECT_TRUE(!trie.is_accept());
+    EXPECT_TRUE(trie.is_reset());
+
+    auto children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(2));
+    EXPECT_NE(children.find('a'), children.end());  // found
+    EXPECT_NE(children.find('b'), children.end());  // found
+
+    // check child 'a'
+    trie.transition('a');
+
+    EXPECT_TRUE(!trie.get_symbol());
+    EXPECT_TRUE(!trie.is_accept());
+    EXPECT_TRUE(!trie.is_reset());
+
+    children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(1));
+    EXPECT_NE(children.find('b'), children.end());  // found
+
+    // check child 'a'->'b'
+    trie.transition('b');
+
+    EXPECT_TRUE(trie.get_symbol());
+    EXPECT_TRUE(trie.is_accept());
+    EXPECT_TRUE(!trie.is_reset());
+
+    children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(0));
+
+    // up to child 'a'
+    trie.back_transition();
+
+    // up to root
+    trie.back_transition();
+
+    // check child 'b'
+    trie.transition('b');
+
+    EXPECT_TRUE(!trie.get_symbol());
+    EXPECT_TRUE(!trie.is_accept());
+    EXPECT_TRUE(!trie.is_reset());
+
+    children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(1));
+    EXPECT_NE(children.find('a'), children.end());  // found
+
+    // check child 'b'->'a'
+    trie.transition('a');
+
+    EXPECT_TRUE(!trie.get_symbol());
+    EXPECT_TRUE(!trie.is_accept());
+    EXPECT_TRUE(!trie.is_reset());
+
+    children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(1));
+    EXPECT_NE(children.find('b'), children.end());  // found
+
+    // check child 'b'->'a'->'b'
+    trie.transition('b');
+
+    EXPECT_TRUE(trie.get_symbol());
+    EXPECT_TRUE(trie.is_accept());
+    EXPECT_TRUE(!trie.is_reset());
+
+    children = trie.get_children();
+    EXPECT_EQ(children.size(), static_cast<size_t>(0));
+}
+
+} // namespace lexer
+} // namespace core
+} // namespace docgen
diff --git a/test/core/lexer_unittest.cpp b/test/core/lexer_unittest.cpp
deleted file mode 100644
index 643c2f5..0000000
--- a/test/core/lexer_unittest.cpp
+++ /dev/null
@@ -1,453 +0,0 @@
-#define private public
-
-#include <core/lexer.hpp>
-#include <gtest/gtest.h>
-
-namespace docgen {
-namespace core {
-
-struct lexer_fixture : ::testing::Test
-{
-protected:
-    using status_t = Lexer::status_t;
-    using token_t = Lexer::token_t;
-    using symbol_t = Lexer::symbol_t;
-
-    Lexer lexer;
-
-    void setup_lexer(const char* content)
-    {
-        std::string str(content);
-        for (char c : str) {
-            lexer.process(c);
-        }
-        lexer.process(0);
-    }
-
-    void check_token(symbol_t actual_sym, symbol_t expected_sym,
-                     const std::string& actual_str, const std::string& expected_str)
-    {
-        EXPECT_EQ(actual_sym, expected_sym);
-        EXPECT_EQ(actual_str, expected_str);
-    }
-};
-
-TEST_F(lexer_fixture, lexer)
-{
-    static constexpr const char* content =
-        "#include <core/lexer_trie.hpp> // some comment\n"
-        "\n"
-        "void f();"
-        ;
-
-    setup_lexer(content);
-
-    auto token = *lexer.next_token();
-    check_token(token.name, symbol_t::HASHTAG,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "include");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "<core/lexer_trie.hpp>");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::BEGIN_NLINE_COMMENT,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "some");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "comment");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::NEWLINE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::NEWLINE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "void");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "f()");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::SEMICOLON,
-                token.content, "");
-}
-
-TEST_F(lexer_fixture, process_no_comment)
-{
-    static constexpr const char* content =
-        "#include <gtest/gtest.h>\n"
-        "\n"
-        "  // just a normal comment\n"
-        "\n"
-        ;
-
-    setup_lexer(content);
-
-    auto token = *lexer.next_token();
-    check_token(token.name, symbol_t::HASHTAG,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "include");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "<gtest/gtest.h>");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::NEWLINE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::NEWLINE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::BEGIN_NLINE_COMMENT,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "just");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "a");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "normal");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "comment");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::NEWLINE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::NEWLINE,
-                token.content, "");
-}
-
-TEST_F(lexer_fixture, process_one_line_comment)
-{
-    static constexpr const char* content =
-        "// comment\n"
-        " /// special_comment \n"
-        ;
-
-    setup_lexer(content);
-
-    auto token = *lexer.next_token();
-    check_token(token.name, symbol_t::BEGIN_NLINE_COMMENT,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "comment");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::NEWLINE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::BEGIN_SLINE_COMMENT,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::TEXT,
-                token.content, "special_comment");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::WHITESPACE,
-                token.content, "");
-
-    token = *lexer.next_token();
-    check_token(token.name, symbol_t::NEWLINE,
-                token.content, "");
-
-
-}
-
-//TEST_F(lexer_fixture, process_two_line_comment)
-//{
-//    static constexpr const char* content =
-//        "#include <gtest/gtest.h>\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /// a very special comment   \n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /// another very special comment   \n"
-//        "  // just a normal comment\n"
-//        ;
-//
-//    write_file(content);
-//    Lexer lexer(file);
-//    lexer.process();
-//    const auto& tokens = lexer.get_tokens();
-//
-//    EXPECT_EQ(tokens.size(), static_cast<size_t>(11));
-//
-//    check_token(tokens[0].name, symbol_t::TEXT,
-//                tokens[0].content, "#include <gtest/gtest.h>");
-//    check_token(tokens[1].name, symbol_t::NEWLINE,
-//                tokens[1].content, "");
-//    check_token(tokens[2].name, symbol_t::NEWLINE,
-//                tokens[2].content, "");
-//    check_token(tokens[3].name, symbol_t::BEGIN_LINE_COMMENT,
-//                tokens[3].content, "");
-//    check_token(tokens[4].name, symbol_t::TEXT,
-//                tokens[4].content, "a very special comment");
-//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[5].name, symbol_t::NEWLINE,
-//                tokens[5].content, "");
-//    check_token(tokens[6].name, symbol_t::NEWLINE,
-//                tokens[6].content, "");
-//    check_token(tokens[7].name, symbol_t::BEGIN_LINE_COMMENT,
-//                tokens[7].content, "");
-//    check_token(tokens[8].name, symbol_t::TEXT,
-//                tokens[8].content, "another very special comment");
-//    EXPECT_EQ(tokens[8].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[9].name, symbol_t::NEWLINE,
-//                tokens[9].content, "");
-//    check_token(tokens[10].name, symbol_t::END_OF_FILE,
-//                tokens[10].content, "");
-//}
-//
-//TEST_F(lexer_fixture, process_one_block_comment)
-//{
-//    static constexpr const char* content =
-//        "#include <gtest/gtest.h>\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /*! a very special comment   */\n"
-//        "\n"
-//        ;
-//
-//    write_file(content);
-//    Lexer lexer(file);
-//    lexer.process();
-//    const auto& tokens = lexer.get_tokens();
-//
-//    EXPECT_EQ(tokens.size(), static_cast<size_t>(9));
-//
-//    check_token(tokens[0].name, symbol_t::TEXT,
-//                tokens[0].content, "#include <gtest/gtest.h>");
-//    check_token(tokens[1].name, symbol_t::NEWLINE,
-//                tokens[1].content, "");
-//    check_token(tokens[2].name, symbol_t::NEWLINE,
-//                tokens[2].content, "");
-//    check_token(tokens[3].name, symbol_t::BEGIN_BLOCK_COMMENT,
-//                tokens[3].content, "");
-//    check_token(tokens[4].name, symbol_t::TEXT,
-//                tokens[4].content, "a very special comment");
-//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT,
-//                tokens[5].content, "");
-//    check_token(tokens[6].name, symbol_t::NEWLINE,
-//                tokens[6].content, "");
-//    check_token(tokens[7].name, symbol_t::NEWLINE,
-//                tokens[7].content, "");
-//    check_token(tokens[8].name, symbol_t::END_OF_FILE,
-//                tokens[8].content, "");
-//}
-//
-//TEST_F(lexer_fixture, process_two_block_comment)
-//{
-//    static constexpr const char* content =
-//        "#include <gtest/gtest.h>\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /*! a very special comment   */\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /*! another very \n"
-//        "    * special comment   \n"
-//        "*/"
-//        "  /* just a normal comment\n */"
-//        ;
-//
-//    write_file(content);
-//    Lexer lexer(file);
-//    lexer.process();
-//    const auto& tokens = lexer.get_tokens();
-//
-//    EXPECT_EQ(tokens.size(), static_cast<size_t>(16));
-//
-//    check_token(tokens[0].name, symbol_t::TEXT,
-//                tokens[0].content, "#include <gtest/gtest.h>");
-//    check_token(tokens[1].name, symbol_t::NEWLINE,
-//                tokens[1].content, "");
-//    check_token(tokens[2].name, symbol_t::NEWLINE,
-//                tokens[2].content, "");
-//    check_token(tokens[3].name, symbol_t::BEGIN_BLOCK_COMMENT,
-//                tokens[3].content, "");
-//    check_token(tokens[4].name, symbol_t::TEXT,
-//                tokens[4].content, "a very special comment");
-//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT,
-//                tokens[5].content, "");
-//    check_token(tokens[6].name, symbol_t::NEWLINE,
-//                tokens[6].content, "");
-//    check_token(tokens[7].name, symbol_t::NEWLINE,
-//                tokens[7].content, "");
-//    check_token(tokens[8].name, symbol_t::BEGIN_BLOCK_COMMENT,
-//                tokens[8].content, "");
-//    check_token(tokens[9].name, symbol_t::TEXT,
-//                tokens[9].content, "another very");
-//    EXPECT_EQ(tokens[9].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[10].name, symbol_t::NEWLINE,
-//                tokens[10].content, "");
-//    check_token(tokens[11].name, symbol_t::STAR,
-//                tokens[11].content, "");
-//    check_token(tokens[12].name, symbol_t::TEXT,
-//                tokens[12].content, "special comment");
-//    EXPECT_EQ(tokens[12].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[13].name, symbol_t::NEWLINE,
-//                tokens[13].content, "");
-//    check_token(tokens[14].name, symbol_t::END_BLOCK_COMMENT,
-//                tokens[14].content, "");
-//    check_token(tokens[15].name, symbol_t::END_OF_FILE,
-//                tokens[15].content, "");
-//}
-//
-//TEST_F(lexer_fixture, process_line_block_comment)
-//{
-//    static constexpr const char* content =
-//        "#include <gtest/gtest.h>\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /// a very special comment   */\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /*! another very \n"
-//        "    * special comment   \n"
-//        "*/"
-//        "  /* just a normal comment\n */"
-//        ;
-//
-//    write_file(content);
-//    Lexer lexer(file);
-//    lexer.process();
-//    const auto& tokens = lexer.get_tokens();
-//
-//    EXPECT_EQ(tokens.size(), static_cast<size_t>(16));
-//
-//    check_token(tokens[0].name, symbol_t::TEXT,
-//                tokens[0].content, "#include <gtest/gtest.h>");
-//    check_token(tokens[1].name, symbol_t::NEWLINE,
-//                tokens[1].content, "");
-//    check_token(tokens[2].name, symbol_t::NEWLINE,
-//                tokens[2].content, "");
-//    check_token(tokens[3].name, symbol_t::BEGIN_LINE_COMMENT,
-//                tokens[3].content, "");
-//    check_token(tokens[4].name, symbol_t::TEXT,
-//                tokens[4].content, "a very special comment");
-//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[5].name, symbol_t::END_BLOCK_COMMENT,
-//                tokens[5].content, "");
-//    check_token(tokens[6].name, symbol_t::NEWLINE,
-//                tokens[6].content, "");
-//    check_token(tokens[7].name, symbol_t::NEWLINE,
-//                tokens[7].content, "");
-//    check_token(tokens[8].name, symbol_t::BEGIN_BLOCK_COMMENT,
-//                tokens[8].content, "");
-//    check_token(tokens[9].name, symbol_t::TEXT,
-//                tokens[9].content, "another very");
-//    EXPECT_EQ(tokens[9].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[10].name, symbol_t::NEWLINE,
-//                tokens[10].content, "");
-//    check_token(tokens[11].name, symbol_t::STAR,
-//                tokens[11].content, "");
-//    check_token(tokens[12].name, symbol_t::TEXT,
-//                tokens[12].content, "special comment");
-//    EXPECT_EQ(tokens[12].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[13].name, symbol_t::NEWLINE,
-//                tokens[13].content, "");
-//    check_token(tokens[14].name, symbol_t::END_BLOCK_COMMENT,
-//                tokens[14].content, "");
-//    check_token(tokens[15].name, symbol_t::END_OF_FILE,
-//                tokens[15].content, "");
-//}
-
-} // namespace core
-} // namespace docgen
diff --git a/test/core/trie_unittest.cpp b/test/core/trie_unittest.cpp
deleted file mode 100644
index 43bf550..0000000
--- a/test/core/trie_unittest.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-#define private public
-
-#include <core/trie.hpp>
-#include <iostream>
-#include <gtest/gtest.h>
-
-namespace docgen {
-namespace core {
-
-enum class MockSymbol {
-    symbol1,
-    symbol2
-};
-
-struct trie_fixture : ::testing::Test
-{
-protected:
-    using symbol_t = MockSymbol;
-    using trie_t = Trie<symbol_t>;
-
-    void print_trie(const trie_t& trie)
-    {
-        print_trie(trie.root_); 
-    }
-
-    void print_trie(const trie_t::TrieNode& node)
-    {
-        if (node.is_accept()) {
-            std::cout << "symbol: " << (int) *node.get_symbol() << std::endl;
-        }
-        std::cout << "\nsize: " << node.children_.size() << std::endl;
-        for (auto it = node.children_.begin(); it != node.children_.end(); ++it) {
-            std::cout << it->first << "--" << std::endl;;
-            print_trie(it->second);
-            std::cout << "--" << std::endl;
-        }
-    }
-};
-
-TEST_F(trie_fixture, trie_ctor)
-{
-    trie_t trie({
-        {"adf", symbol_t::symbol1},
-        {"asdf", symbol_t::symbol2},
-        {"bscdf", symbol_t::symbol1}
-    });
-
-    auto symbol = trie.get_symbol();
-    EXPECT_FALSE((bool) symbol);
-
-    //trie.transition('a');
-    //EXPECT_FALSE((bool) trie.get_symbol());
-    //trie.transition('d');
-    //EXPECT_FALSE((bool) trie.get_symbol());
-    //trie.transition('b');
-    //EXPECT_FALSE((bool) trie.get_symbol());
-
-    trie.transition('a');
-    EXPECT_FALSE((bool) trie.get_symbol());
-    trie.transition('d');
-    EXPECT_FALSE((bool) trie.get_symbol());
-    trie.transition('f');
-    EXPECT_TRUE((bool) trie.get_symbol());
-
-    trie.back_transition();
-    trie.back_transition();
-    trie.back_transition();
-
-    trie.transition('a');
-    EXPECT_FALSE((bool) trie.get_symbol());
-    trie.transition('d');
-    EXPECT_FALSE((bool) trie.get_symbol());
-    trie.transition('f');
-    EXPECT_TRUE((bool) trie.get_symbol());
-
-    trie.reset();
-
-    trie.transition('a');
-    EXPECT_FALSE((bool) trie.get_symbol());
-    trie.transition('s');
-    EXPECT_FALSE((bool) trie.get_symbol());
-    trie.transition('d');
-    EXPECT_FALSE((bool) trie.get_symbol());
-    trie.transition('f');
-    EXPECT_TRUE((bool) trie.get_symbol());
-
-    trie.reset();
-
-    trie.transition('b');
-    EXPECT_FALSE((bool) trie.get_symbol());
-    trie.transition('s');
-    EXPECT_FALSE((bool) trie.get_symbol());
-    trie.transition('c');
-    EXPECT_FALSE((bool) trie.get_symbol());
-    trie.transition('d');
-    EXPECT_FALSE((bool) trie.get_symbol());
-    trie.transition('f');
-    EXPECT_TRUE((bool) trie.get_symbol());
-}
-
-} // namespace core
-} // namespace docgen

From a18548da111c5a407e7e22a2668c536a5d78ddf6 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 00:06:36 -0500
Subject: [PATCH 06/23] Fix issue with non-backtrack behavior when at root vs.
 non-root

---
 src/core/lexer/lexer.cpp           |   8 +-
 test/core/lexer/lexer_unittest.cpp | 224 +++++++++++++++++++++++++++++
 2 files changed, 230 insertions(+), 2 deletions(-)

diff --git a/src/core/lexer/lexer.cpp b/src/core/lexer/lexer.cpp
index f727e49..82ca5d6 100644
--- a/src/core/lexer/lexer.cpp
+++ b/src/core/lexer/lexer.cpp
@@ -50,11 +50,15 @@ void Lexer::process(char c)
     
     // if not backtracking
     if (!this->is_backtracking()) {
+        // if trie at root
+        if (trie_.is_reset()) {
+            text_.push_back(c);
+            return;
+        }
         text_.append(buf_);
-        text_.push_back(c);
         buf_.clear();
         trie_.reset();
-        return;
+        return this->process(c);
     }
     
     // otherwise, currently backtracking
diff --git a/test/core/lexer/lexer_unittest.cpp b/test/core/lexer/lexer_unittest.cpp
index 9fead88..0a1b465 100644
--- a/test/core/lexer/lexer_unittest.cpp
+++ b/test/core/lexer/lexer_unittest.cpp
@@ -196,10 +196,234 @@ TEST_F(lexer_fixture, lexer_semicolon)
     EXPECT_FALSE(static_cast<bool>(token));
 }
 
+// BEGIN_SLINE_COMMENT
+TEST_F(lexer_fixture, lexer_begin_sline_comment)
+{
+    static constexpr const char* content =
+        "abc///"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// BEGIN_SBLOCK_COMMENT
+TEST_F(lexer_fixture, lexer_begin_sblock_comment)
+{
+    static constexpr const char* content =
+        "abc/*!"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SBLOCK_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// BEGIN_NBLOCK_COMMENT
+TEST_F(lexer_fixture, lexer_begin_nblock_comment)
+{
+    static constexpr const char* content =
+        "abc/**!"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NBLOCK_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::STAR);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "!");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// END_BLOCK_COMMENT
+TEST_F(lexer_fixture, lexer_end_block_comment_no_star)
+{
+    static constexpr const char* content =
+        "abc*/f"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::END_BLOCK_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "f");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+TEST_F(lexer_fixture, lexer_end_block_comment_star)
+{
+    static constexpr const char* content =
+        "abc**/f"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::STAR);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::END_BLOCK_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "f");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// SDESC
+TEST_F(lexer_fixture, lexer_sdesc)
+{
+    static constexpr const char* content =
+        "ssdesc@@sdescf@sdesscf"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "ssdesc@");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::SDESC);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "f@sdesscf");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
 ////////////////////////////////////////////////////////////////////
 // Mix TESTS
 ////////////////////////////////////////////////////////////////////
 
+// line comment mix
+TEST_F(lexer_fixture, lexer_line_comment_4)
+{
+    static constexpr const char* content =
+        "abc////"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "/");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// line comment mix
+TEST_F(lexer_fixture, lexer_line_comment_5)
+{
+    static constexpr const char* content =
+        "abc/////"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// line comment mix
+TEST_F(lexer_fixture, lexer_line_comment_6)
+{
+    static constexpr const char* content =
+        "abc//////"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
 TEST_F(lexer_fixture, lexer_test_1)
 {
     static constexpr const char* content =

From c718ace66169183d0d8436f24584666a3cd6605e Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 00:17:34 -0500
Subject: [PATCH 07/23] Add more unittests and integration tests

---
 test/core/lexer/lexer_unittest.cpp | 367 +++++++++++++----------------
 1 file changed, 163 insertions(+), 204 deletions(-)

diff --git a/test/core/lexer/lexer_unittest.cpp b/test/core/lexer/lexer_unittest.cpp
index 0a1b465..294cf63 100644
--- a/test/core/lexer/lexer_unittest.cpp
+++ b/test/core/lexer/lexer_unittest.cpp
@@ -345,6 +345,56 @@ TEST_F(lexer_fixture, lexer_sdesc)
     EXPECT_FALSE(static_cast<bool>(token));
 }
 
+// TPARAM
+TEST_F(lexer_fixture, lexer_tparam)
+{
+    static constexpr const char* content =
+        "ssdes@@@@@@tpaar@tpara@m@tparam@tpar"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "ssdes@@@@@@tpaar@tpara@m");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TPARAM);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "@tpar");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// RETURN
+TEST_F(lexer_fixture, lexer_return)
+{
+    static constexpr const char* content =
+        "@re@@@@@@return@@@@@"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "@re@@@@@");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::RETURN);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "@@@@@");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
 ////////////////////////////////////////////////////////////////////
 // Mix TESTS
 ////////////////////////////////////////////////////////////////////
@@ -424,7 +474,7 @@ TEST_F(lexer_fixture, lexer_line_comment_6)
     EXPECT_FALSE(static_cast<bool>(token));
 }
 
-TEST_F(lexer_fixture, lexer_test_1)
+TEST_F(lexer_fixture, lexer_test_1_no_special_comment)
 {
     static constexpr const char* content =
         "#include <core/lexer_trie.hpp> // some comment\n"
@@ -497,9 +547,13 @@ TEST_F(lexer_fixture, lexer_test_1)
     token = lexer.next_token();
     EXPECT_EQ(token->name, symbol_t::SEMICOLON);
 	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
 }
 
-TEST_F(lexer_fixture, process_no_comment)
+TEST_F(lexer_fixture, lexer_test_2_no_special_comment)
 {
     static constexpr const char* content =
         "#include <gtest/gtest.h>\n"
@@ -585,9 +639,13 @@ TEST_F(lexer_fixture, process_no_comment)
     token = lexer.next_token();
     EXPECT_EQ(token->name, symbol_t::NEWLINE);
 	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
 }
 
-TEST_F(lexer_fixture, process_one_line_comment)
+TEST_F(lexer_fixture, lexer_test_1_comment_mix)
 {
     static constexpr const char* content =
         "// comment\n"
@@ -635,209 +693,110 @@ TEST_F(lexer_fixture, process_one_line_comment)
     token = lexer.next_token();
     EXPECT_EQ(token->name, symbol_t::NEWLINE);
 	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
 }
 
-//TEST_F(lexer_fixture, process_two_line_comment)
-//{
-//    static constexpr const char* content =
-//        "#include <gtest/gtest.h>\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /// a very special comment   \n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /// another very special comment   \n"
-//        "  // just a normal comment\n"
-//        ;
-//
-//    write_file(content);
-//    Lexer lexer(file);
-//    lexer.process();
-//    const auto& tokens = lexer.get_tokens();
-//
-//    EXPECT_EQ(tokens.size(), static_cast<size_t>(11));
-//
-//    check_token(tokens[0]->name, symbol_t::TEXT,
-//                tokens[0]->content, "#include <gtest/gtest.h>");
-//    check_token(tokens[1]->name, symbol_t::NEWLINE,
-//                tokens[1]->content, "");
-//    check_token(tokens[2]->name, symbol_t::NEWLINE,
-//                tokens[2]->content, "");
-//    check_token(tokens[3]->name, symbol_t::BEGIN_LINE_COMMENT,
-//                tokens[3]->content, "");
-//    check_token(tokens[4]->name, symbol_t::TEXT,
-//                tokens[4]->content, "a very special comment");
-//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[5]->name, symbol_t::NEWLINE,
-//                tokens[5]->content, "");
-//    check_token(tokens[6]->name, symbol_t::NEWLINE,
-//                tokens[6]->content, "");
-//    check_token(tokens[7]->name, symbol_t::BEGIN_LINE_COMMENT,
-//                tokens[7]->content, "");
-//    check_token(tokens[8]->name, symbol_t::TEXT,
-//                tokens[8]->content, "another very special comment");
-//    EXPECT_EQ(tokens[8].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[9]->name, symbol_t::NEWLINE,
-//                tokens[9]->content, "");
-//    check_token(tokens[10]->name, symbol_t::END_OF_FILE,
-//                tokens[10]->content, "");
-//}
-//
-//TEST_F(lexer_fixture, process_one_block_comment)
-//{
-//    static constexpr const char* content =
-//        "#include <gtest/gtest.h>\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /*! a very special comment   */\n"
-//        "\n"
-//        ;
-//
-//    write_file(content);
-//    Lexer lexer(file);
-//    lexer.process();
-//    const auto& tokens = lexer.get_tokens();
-//
-//    EXPECT_EQ(tokens.size(), static_cast<size_t>(9));
-//
-//    check_token(tokens[0]->name, symbol_t::TEXT,
-//                tokens[0]->content, "#include <gtest/gtest.h>");
-//    check_token(tokens[1]->name, symbol_t::NEWLINE,
-//                tokens[1]->content, "");
-//    check_token(tokens[2]->name, symbol_t::NEWLINE,
-//                tokens[2]->content, "");
-//    check_token(tokens[3]->name, symbol_t::BEGIN_BLOCK_COMMENT,
-//                tokens[3]->content, "");
-//    check_token(tokens[4]->name, symbol_t::TEXT,
-//                tokens[4]->content, "a very special comment");
-//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[5]->name, symbol_t::END_BLOCK_COMMENT,
-//                tokens[5]->content, "");
-//    check_token(tokens[6]->name, symbol_t::NEWLINE,
-//                tokens[6]->content, "");
-//    check_token(tokens[7]->name, symbol_t::NEWLINE,
-//                tokens[7]->content, "");
-//    check_token(tokens[8]->name, symbol_t::END_OF_FILE,
-//                tokens[8]->content, "");
-//}
-//
-//TEST_F(lexer_fixture, process_two_block_comment)
-//{
-//    static constexpr const char* content =
-//        "#include <gtest/gtest.h>\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /*! a very special comment   */\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /*! another very \n"
-//        "    * special comment   \n"
-//        "*/"
-//        "  /* just a normal comment\n */"
-//        ;
-//
-//    write_file(content);
-//    Lexer lexer(file);
-//    lexer.process();
-//    const auto& tokens = lexer.get_tokens();
-//
-//    EXPECT_EQ(tokens.size(), static_cast<size_t>(16));
-//
-//    check_token(tokens[0]->name, symbol_t::TEXT,
-//                tokens[0]->content, "#include <gtest/gtest.h>");
-//    check_token(tokens[1]->name, symbol_t::NEWLINE,
-//                tokens[1]->content, "");
-//    check_token(tokens[2]->name, symbol_t::NEWLINE,
-//                tokens[2]->content, "");
-//    check_token(tokens[3]->name, symbol_t::BEGIN_BLOCK_COMMENT,
-//                tokens[3]->content, "");
-//    check_token(tokens[4]->name, symbol_t::TEXT,
-//                tokens[4]->content, "a very special comment");
-//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[5]->name, symbol_t::END_BLOCK_COMMENT,
-//                tokens[5]->content, "");
-//    check_token(tokens[6]->name, symbol_t::NEWLINE,
-//                tokens[6]->content, "");
-//    check_token(tokens[7]->name, symbol_t::NEWLINE,
-//                tokens[7]->content, "");
-//    check_token(tokens[8]->name, symbol_t::BEGIN_BLOCK_COMMENT,
-//                tokens[8]->content, "");
-//    check_token(tokens[9]->name, symbol_t::TEXT,
-//                tokens[9]->content, "another very");
-//    EXPECT_EQ(tokens[9].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[10]->name, symbol_t::NEWLINE,
-//                tokens[10]->content, "");
-//    check_token(tokens[11]->name, symbol_t::STAR,
-//                tokens[11]->content, "");
-//    check_token(tokens[12]->name, symbol_t::TEXT,
-//                tokens[12]->content, "special comment");
-//    EXPECT_EQ(tokens[12].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[13]->name, symbol_t::NEWLINE,
-//                tokens[13]->content, "");
-//    check_token(tokens[14]->name, symbol_t::END_BLOCK_COMMENT,
-//                tokens[14]->content, "");
-//    check_token(tokens[15]->name, symbol_t::END_OF_FILE,
-//                tokens[15]->content, "");
-//}
-//
-//TEST_F(lexer_fixture, process_line_block_comment)
-//{
-//    static constexpr const char* content =
-//        "#include <gtest/gtest.h>\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /// a very special comment   */\n"
-//        "\n"
-//        "  // just a normal comment\n"
-//        "   /*! another very \n"
-//        "    * special comment   \n"
-//        "*/"
-//        "  /* just a normal comment\n */"
-//        ;
-//
-//    write_file(content);
-//    Lexer lexer(file);
-//    lexer.process();
-//    const auto& tokens = lexer.get_tokens();
-//
-//    EXPECT_EQ(tokens.size(), static_cast<size_t>(16));
-//
-//    check_token(tokens[0]->name, symbol_t::TEXT,
-//                tokens[0]->content, "#include <gtest/gtest.h>");
-//    check_token(tokens[1]->name, symbol_t::NEWLINE,
-//                tokens[1]->content, "");
-//    check_token(tokens[2]->name, symbol_t::NEWLINE,
-//                tokens[2]->content, "");
-//    check_token(tokens[3]->name, symbol_t::BEGIN_LINE_COMMENT,
-//                tokens[3]->content, "");
-//    check_token(tokens[4]->name, symbol_t::TEXT,
-//                tokens[4]->content, "a very special comment");
-//    EXPECT_EQ(tokens[4].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[5]->name, symbol_t::END_BLOCK_COMMENT,
-//                tokens[5]->content, "");
-//    check_token(tokens[6]->name, symbol_t::NEWLINE,
-//                tokens[6]->content, "");
-//    check_token(tokens[7]->name, symbol_t::NEWLINE,
-//                tokens[7]->content, "");
-//    check_token(tokens[8]->name, symbol_t::BEGIN_BLOCK_COMMENT,
-//                tokens[8]->content, "");
-//    check_token(tokens[9]->name, symbol_t::TEXT,
-//                tokens[9]->content, "another very");
-//    EXPECT_EQ(tokens[9].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[10]->name, symbol_t::NEWLINE,
-//                tokens[10]->content, "");
-//    check_token(tokens[11]->name, symbol_t::STAR,
-//                tokens[11]->content, "");
-//    check_token(tokens[12]->name, symbol_t::TEXT,
-//                tokens[12]->content, "special comment");
-//    EXPECT_EQ(tokens[12].leading_ws_count, static_cast<uint32_t>(1));
-//    check_token(tokens[13]->name, symbol_t::NEWLINE,
-//                tokens[13]->content, "");
-//    check_token(tokens[14]->name, symbol_t::END_BLOCK_COMMENT,
-//                tokens[14]->content, "");
-//    check_token(tokens[15]->name, symbol_t::END_OF_FILE,
-//                tokens[15]->content, "");
-//}
+TEST_F(lexer_fixture, lexer_test_1_tagname_comments)
+{
+    static constexpr const char* content =
+        "// @tparam normal comment\n"
+        "/// @sdescspecial comment \n"
+        "#define hehe\n"
+        ;
+
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TPARAM);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+    
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "normal");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::SDESC);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "special");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::HASHTAG);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "define");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "hehe");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
 
 } // namespace lexer 
 } // namespace core

From 2498daf49a4b2f1a4ad7ce9b8a78118a3258fa74 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 00:24:36 -0500
Subject: [PATCH 08/23] Rename namespace lexer to lex and change CMake and
 directory structure

---
 src/CMakeLists.txt                          |   2 +-
 src/core/{lexer => lex}/lexer.cpp           |   6 +-
 src/core/{lexer => lex}/lexer.hpp           |   8 +-
 src/core/{lexer => lex}/status.hpp          |   4 +-
 src/core/{lexer => lex}/trie.hpp            |   4 +-
 test/CMakeLists.txt                         |   4 +-
 test/core/{lexer => lex}/lexer_unittest.cpp |   6 +-
 test/core/{lexer => lex}/trie_unittest.cpp  |   6 +-
 test/core/lexer/lexer_base_fixture.hpp      |  36 -
 test/core/lexer/lexer_routines_unittest.cpp | 784 --------------------
 10 files changed, 20 insertions(+), 840 deletions(-)
 rename src/core/{lexer => lex}/lexer.cpp (97%)
 rename src/core/{lexer => lex}/lexer.hpp (94%)
 rename src/core/{lexer => lex}/status.hpp (86%)
 rename src/core/{lexer => lex}/trie.hpp (99%)
 rename test/core/{lexer => lex}/lexer_unittest.cpp (99%)
 rename test/core/{lexer => lex}/trie_unittest.cpp (99%)
 delete mode 100644 test/core/lexer/lexer_base_fixture.hpp
 delete mode 100644 test/core/lexer/lexer_routines_unittest.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 96870d9..f95563a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,6 +1,6 @@
 # Create object files for lexer
 add_library(LEXER_LIB_OBJECTS OBJECT
-    ${CMAKE_CURRENT_SOURCE_DIR}/core/lexer/lexer.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/lexer.cpp
     )
 target_include_directories(LEXER_LIB_OBJECTS PRIVATE
     ${CMAKE_CURRENT_SOURCE_DIR}
diff --git a/src/core/lexer/lexer.cpp b/src/core/lex/lexer.cpp
similarity index 97%
rename from src/core/lexer/lexer.cpp
rename to src/core/lex/lexer.cpp
index 82ca5d6..2cb7356 100644
--- a/src/core/lexer/lexer.cpp
+++ b/src/core/lex/lexer.cpp
@@ -1,8 +1,8 @@
-#include <core/lexer/lexer.hpp>
+#include <core/lex/lexer.hpp>
 
 namespace docgen {
 namespace core {
-namespace lexer {
+namespace lex {
 
 ///////////////////////////////////
 // Lexer Implementation
@@ -108,6 +108,6 @@ void Lexer::flush()
     this->reset();
 }
 
-} // namespace lexer
+} // namespace lex
 } // namespace core
 } // namespace docgen
diff --git a/src/core/lexer/lexer.hpp b/src/core/lex/lexer.hpp
similarity index 94%
rename from src/core/lexer/lexer.hpp
rename to src/core/lex/lexer.hpp
index 03b65fa..053f75c 100644
--- a/src/core/lexer/lexer.hpp
+++ b/src/core/lex/lexer.hpp
@@ -1,12 +1,12 @@
 #pragma once 
-#include <core/lexer/trie.hpp>
-#include <core/lexer/status.hpp>
+#include <core/lex/trie.hpp>
+#include <core/lex/status.hpp>
 #include <core/symbol.hpp>
 #include <core/token.hpp>
 
 namespace docgen {
 namespace core {
-namespace lexer {
+namespace lex {
 
 struct Lexer
 {
@@ -95,6 +95,6 @@ inline void Lexer::reset()
     reset_backtracking();
 }
 
-} // namespace lexer
+} // namespace lex
 } // namespace core
 } // namespace docgen
diff --git a/src/core/lexer/status.hpp b/src/core/lex/status.hpp
similarity index 86%
rename from src/core/lexer/status.hpp
rename to src/core/lex/status.hpp
index e4af461..2af91f7 100644
--- a/src/core/lexer/status.hpp
+++ b/src/core/lex/status.hpp
@@ -3,7 +3,7 @@
 
 namespace docgen {
 namespace core {
-namespace lexer {
+namespace lex {
 
 template <class TokenType>
 struct Status
@@ -14,6 +14,6 @@ struct Status
     token_arr_t tokens;
 };
 
-} // namespace lexer
+} // namespace lex
 } // namespace core
 } // namespace docgen
diff --git a/src/core/lexer/trie.hpp b/src/core/lex/trie.hpp
similarity index 99%
rename from src/core/lexer/trie.hpp
rename to src/core/lex/trie.hpp
index 1fe79f0..eabe4ce 100644
--- a/src/core/lexer/trie.hpp
+++ b/src/core/lex/trie.hpp
@@ -8,7 +8,7 @@
 
 namespace docgen {
 namespace core {
-namespace lexer {
+namespace lex {
 
 template <class SymbolType>
 struct Trie
@@ -199,6 +199,6 @@ Trie<SymbolType>::get_symbol() const
     return curr_node_.get().get_symbol();
 }
 
-} // namespace lexer
+} // namespace lex
 } // namespace core
 } // namespace docgen
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 1ddf86a..d5ab8e4 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -45,8 +45,8 @@ add_custom_command(
 ######################################################
 
 add_executable(core_unittests
-               ${CMAKE_CURRENT_SOURCE_DIR}/core/lexer/trie_unittest.cpp
-               ${CMAKE_CURRENT_SOURCE_DIR}/core/lexer/lexer_unittest.cpp
+               ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/trie_unittest.cpp
+               ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/lexer_unittest.cpp
                # Source dependency
                $<TARGET_OBJECTS:LEXER_LIB_OBJECTS>
                )
diff --git a/test/core/lexer/lexer_unittest.cpp b/test/core/lex/lexer_unittest.cpp
similarity index 99%
rename from test/core/lexer/lexer_unittest.cpp
rename to test/core/lex/lexer_unittest.cpp
index 294cf63..e7483bb 100644
--- a/test/core/lexer/lexer_unittest.cpp
+++ b/test/core/lex/lexer_unittest.cpp
@@ -1,9 +1,9 @@
-#include <core/lexer/lexer.hpp>
+#include <core/lex/lexer.hpp>
 #include <gtest/gtest.h>
 
 namespace docgen {
 namespace core {
-namespace lexer {
+namespace lex {
 
 struct lexer_fixture : ::testing::Test
 {
@@ -798,6 +798,6 @@ TEST_F(lexer_fixture, lexer_test_1_tagname_comments)
     EXPECT_FALSE(static_cast<bool>(token));
 }
 
-} // namespace lexer 
+} // namespace lex
 } // namespace core
 } // namespace docgen
diff --git a/test/core/lexer/trie_unittest.cpp b/test/core/lex/trie_unittest.cpp
similarity index 99%
rename from test/core/lexer/trie_unittest.cpp
rename to test/core/lex/trie_unittest.cpp
index 206a6d7..bc8d483 100644
--- a/test/core/lexer/trie_unittest.cpp
+++ b/test/core/lex/trie_unittest.cpp
@@ -1,9 +1,9 @@
-#include <core/lexer/trie.hpp>
+#include <core/lex/trie.hpp>
 #include <gtest/gtest.h>
 
 namespace docgen {
 namespace core {
-namespace lexer {
+namespace lex {
 
 enum class MockSymbol {
     symbol_0,
@@ -331,6 +331,6 @@ TEST_F(trie_fixture, trie_off_by_one_prefix)
     EXPECT_EQ(children.size(), static_cast<size_t>(0));
 }
 
-} // namespace lexer
+} // namespace lex
 } // namespace core
 } // namespace docgen
diff --git a/test/core/lexer/lexer_base_fixture.hpp b/test/core/lexer/lexer_base_fixture.hpp
deleted file mode 100644
index 46ba63a..0000000
--- a/test/core/lexer/lexer_base_fixture.hpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-#include <gtest/gtest.h>
-
-namespace docgen {
-namespace core {
-
-struct lexer_base_fixture : ::testing::Test
-{
-protected:
-    using status_t = status_t;
-    using token_t = lexer_details::token_t;
-    using symbol_t = lexer_details::symbol_t;
-
-    static constexpr const char* filename = ".lexer_routines_unittest.data.txt";
-    static constexpr size_t buf_size = 20;
-    FILE* file;
-
-    lexer_base_fixture()
-        : file(fopen(filename, "r"))
-    {}
-
-    ~lexer_base_fixture()
-    {
-        fclose(file);
-    }
-
-    void write_file(const char* content)
-    {
-        FILE* fp = fopen(filename, "w");
-        fwrite(content, sizeof(char), strlen(content), fp);
-        fclose(fp);
-    }
-};
-
-} // namespace core
-} // namespace docgen
diff --git a/test/core/lexer/lexer_routines_unittest.cpp b/test/core/lexer/lexer_routines_unittest.cpp
deleted file mode 100644
index f32f0e3..0000000
--- a/test/core/lexer/lexer_routines_unittest.cpp
+++ /dev/null
@@ -1,784 +0,0 @@
-#include "lexer_base_fixture.hpp"
-
-namespace docgen {
-namespace core {
-namespace lexer_details {
-
-struct lexer_routines_fixture : lexer_base_fixture
-{
-protected:
-
-    template <char c>
-    static bool is_not(char x) 
-    {
-        return x != c;
-    }
-
-    void read(file_reader& reader, std::string& str) 
-    {
-        int c = 0;
-        while ((c = reader.read()) != file_reader::termination) {
-            str.push_back(c);
-        }
-    }
-
-    template <class Condition>
-    void ignore_until_test(const char* content, const char* expected_str, 
-                           char expected_last_char, Condition condition) 
-    {
-        write_file(content);
-        file_reader reader(file);
-        std::string actual;
-        int last_char = ignore_until(reader, condition);
-        EXPECT_EQ(last_char, expected_last_char);
-        read(reader, actual);
-        EXPECT_EQ(actual, expected_str);
-    }
-
-    template <class Condition>
-    void read_until_test(const char* content, const char* expected_str,
-                         char expected_last_char, Condition condition)
-    {
-        write_file(content);
-        file_reader reader(file);
-        std::string actual;
-        int last_char = read_until(reader, condition, actual);
-        EXPECT_EQ(last_char, expected_last_char);
-        EXPECT_EQ(actual, expected_str);
-    }
-
-    void trim_test(const char* content, const char* expected) 
-    {
-        std::string actual(content);
-        trim(actual);
-        EXPECT_EQ(actual, expected);
-    }
-
-    void tokenize_text_check(const std::string& actual, const token_t& token, 
-                             const char* expected)
-    {
-        check_token(token.name, symbol_t::TEXT,
-                    token.content, expected);
-        EXPECT_EQ(actual.size(), static_cast<size_t>(0));
-        EXPECT_GT(actual.capacity(), DEFAULT_STRING_RESERVE_SIZE);
-    }
-
-    void process_char_check(bool res, const status_t& status,
-                            const std::string& actual, const char* expected, 
-                            symbol_t expected_symbol)
-    {
-        EXPECT_TRUE(res);
-        EXPECT_EQ(status.tokens.size(), static_cast<size_t>(2));
-        tokenize_text_check(actual, status.tokens[0], expected);
-        check_token(status.tokens[1].name, expected_symbol,
-                    status.tokens[1].content, "");
-    }
-
-};
-
-////////////////////////////////////////////////////////////////////////
-// ignore_until TESTS
-////////////////////////////////////////////////////////////////////////
-
-TEST_F(lexer_routines_fixture, ignore_until_newline)
-{
-    static constexpr const char* content =
-        "to ignore here \ndo not ignore"
-        ;
-    static constexpr const char* expected =
-        "do not ignore";
-    ignore_until_test(content, expected, '\n', is_not<'\n'>);
-}
-
-TEST_F(lexer_routines_fixture, ignore_until_empty_content)
-{
-    static constexpr const char* content =
-        ""
-        ;
-    static constexpr const char* expected =
-        "";
-    ignore_until_test(content, expected, file_reader::termination, is_not<'a'>);
-}
-
-TEST_F(lexer_routines_fixture, ignore_until_first_char)
-{
-    static constexpr const char* content =
-        "hello"
-        ;
-    static constexpr const char* expected =
-        "ello";
-    ignore_until_test(content, expected, 'h', is_not<'h'>);
-}
-
-TEST_F(lexer_routines_fixture, ignore_until_last_char)
-{
-    static constexpr const char* content =
-        "hello"
-        ;
-    static constexpr const char* expected =
-        "";
-    ignore_until_test(content, expected, 'o', is_not<'o'>);
-}
-
-////////////////////////////////////////////////////////////////////////
-// read_until TESTS
-////////////////////////////////////////////////////////////////////////
-
-TEST_F(lexer_routines_fixture, read_until_newline)
-{
-    static constexpr const char* content =
-        "very special comment \n not read here"
-        ;
-    static constexpr const char* expected =
-        "very special comment ";
-    read_until_test(content, expected, '\n', is_not<'\n'>);
-}
-
-TEST_F(lexer_routines_fixture, read_until_two_newline)
-{
-    static constexpr const char* content =
-        "very special \ncomment \n"
-        ;
-    static constexpr const char* expected =
-        "very special ";
-    read_until_test(content, expected, '\n', is_not<'\n'>);
-}
-
-TEST_F(lexer_routines_fixture, read_until_empty) 
-{
-    static constexpr const char* content =
-        ""
-        ;
-    static constexpr const char* expected =
-        "";
-    read_until_test(content, expected, file_reader::termination, is_not<'c'>);
-}
-
-TEST_F(lexer_routines_fixture, read_until_first_char)
-{
-    static constexpr const char* content =
-        "very special \ncomment \n"
-        ;
-    static constexpr const char* expected =
-        "";
-    read_until_test(content, expected, 'v', is_not<'v'>);
-}
-
-TEST_F(lexer_routines_fixture, read_until_last_char)
-{
-    static constexpr const char* content =
-        "very special comment #"
-        ;
-    static constexpr const char* expected =
-        "very special comment ";
-    read_until_test(content, expected, '#', is_not<'#'>);
-}
-
-////////////////////////////////////////////////////////////////////////
-// trim TESTS
-////////////////////////////////////////////////////////////////////////
-
-TEST_F(lexer_routines_fixture, trim_empty)
-{
-    static constexpr const char* content =
-        ""
-        ;
-    static constexpr const char* expected =
-        "";
-    trim_test(content, expected);
-}
-
-TEST_F(lexer_routines_fixture, trim_only_leading)
-{
-    static constexpr const char* content =
-        "    \n\t hello\tworld!"
-        ;
-    static constexpr const char* expected =
-        "hello\tworld!";
-    trim_test(content, expected);
-}
-
-TEST_F(lexer_routines_fixture, trim_only_trailing)
-{
-    static constexpr const char* content =
-        "hello\tworld!\v\r\t\f    \n\t "
-        ;
-    static constexpr const char* expected =
-        "hello\tworld!";
-    trim_test(content, expected);
-}
-
-TEST_F(lexer_routines_fixture, trim_leading_trailing)
-{
-    static constexpr const char* content =
-        "\n \r\t \f     hello\tworld!\v\r\t\f    \n\t "
-        ;
-    static constexpr const char* expected =
-        "hello\tworld!";
-    trim_test(content, expected);
-}
-
-////////////////////////////////////////////////////////////////////////
-// tokenize_text TESTS
-////////////////////////////////////////////////////////////////////////
-
-TEST_F(lexer_routines_fixture, tokenize_text_empty) 
-{
-    static constexpr const char* content =
-        ""
-        ;
-    static constexpr const char* expected =
-        "";
-
-    std::string actual(content);
-    status_t status;
-    tokenize_text(actual, status);
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(0));
-    EXPECT_EQ(actual, expected);
-}
-
-TEST_F(lexer_routines_fixture, tokenize_text) 
-{
-    static constexpr const char* content =
-        "\n \r\t \f     hello\tworld!\v\r\t\f    \n\t "
-        ;
-    static constexpr const char* expected =
-        "hello\tworld!";
-
-    std::string actual(content);
-    status_t status;
-    tokenize_text(actual, status);
-    tokenize_text_check(actual, status.tokens[0], expected);
-}
-
-// this tests whether text is left in a valid state for the next processing
-TEST_F(lexer_routines_fixture, tokenize_text_twice) 
-{
-    static constexpr const char* content_1 =
-        "\n \r\t \f     hello\tworld!\v\r\t\f    \n\t "
-        ;
-    static constexpr const char* expected_1 =
-        "hello\tworld!";
-
-    static constexpr const char* content_2 =
-        "\n this is docgen!\v\f    \n\t "
-        ;
-    static constexpr const char* expected_2 =
-        "this is docgen!";
-
-    std::string actual(content_1);
-    status_t status;
-    tokenize_text(actual, status);  // actual cleared, status.tokens updated
-
-    // check first token
-    tokenize_text_check(actual, status.tokens[0], expected_1);
-
-    // push back content of content_2
-    for (size_t i = 0; i < strlen(content_2); ++i) {
-        actual.push_back(content_2[i]);
-    }
-
-    tokenize_text(actual, status);
-
-    // only 2 tokens
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(2));
-
-    // check second token
-    tokenize_text_check(actual, status.tokens[1], expected_2);
-    // check content of first token to test if moving worked correctly
-    check_token(status.tokens[0].name, symbol_t::TEXT,
-                status.tokens[0].content, expected_1);
-}
-
-////////////////////////////////////////////////////////////////////////
-// process_char TESTS
-////////////////////////////////////////////////////////////////////////
-
-TEST_F(lexer_routines_fixture, process_char_newline)
-{
-    static constexpr const char* content =
-        "\t some text  "
-        ;
-    static constexpr const char* expected =
-        "some text";
-
-    std::string actual(content);
-    status_t status; 
-    bool res = process_char('\n', actual, status);
-    process_char_check(res, status, actual, expected, symbol_t::NEWLINE);
-}
-
-TEST_F(lexer_routines_fixture, process_char_semicolon)
-{
-    static constexpr const char* content =
-        "\v\t some text  \r\v\f \v"
-        ;
-    static constexpr const char* expected =
-        "some text";
-
-    std::string actual(content);
-    status_t status; 
-    bool res = process_char(';', actual, status);
-    process_char_check(res, status, actual, expected, symbol_t::SEMICOLON);
-}
-
-TEST_F(lexer_routines_fixture, process_char_open_brace)
-{
-    static constexpr const char* content =
-        " \v  some text  \v"
-        ;
-    static constexpr const char* expected =
-        "some text";
-
-    std::string actual(content);
-    status_t status; 
-    bool res = process_char('{', actual, status);
-    process_char_check(res, status, actual, expected, symbol_t::OPEN_BRACE);
-}
-
-TEST_F(lexer_routines_fixture, process_char_close_brace)
-{
-    static constexpr const char* content =
-        " \v  some text  \v"
-        ;
-    static constexpr const char* expected =
-        "some text";
-
-    std::string actual(content);
-    status_t status; 
-    bool res = process_char('}', actual, status);
-    process_char_check(res, status, actual, expected, symbol_t::CLOSE_BRACE);
-}
-
-TEST_F(lexer_routines_fixture, process_char_default)
-{
-    static constexpr const char* content =
-        " \v  some text  \v"
-        ;
-    static constexpr const char* expected = content;
-
-    std::string actual(content);
-    status_t status; 
-    bool res = process_char('a', actual, status);
-
-    EXPECT_FALSE(res);
-    EXPECT_EQ(actual, expected);
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(0));
-}
-
-////////////////////////////////////////////////////////////////////////
-// tokenize_tag_name TESTS
-////////////////////////////////////////////////////////////////////////
-
-TEST_F(lexer_routines_fixture, tokenize_tag_name_sdesc)
-{
-    static constexpr const char* content =
-        "sdesc\t "
-        ;
-    static constexpr const char* text_content =
-        "some text";
-
-    std::string text(text_content);
-    write_file(content);
-    file_reader reader(file);
-    status_t status;    // context is none
-    tokenize_tag_name(text, reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(2));
-    check_token(status.tokens[0].name, symbol_t::TEXT,
-                status.tokens[0].content, text_content);
-    check_token(status.tokens[1].name, symbol_t::TAGNAME,
-                status.tokens[1].content, "sdesc");
-    EXPECT_EQ(reader.peek(), '\t');
-}
-
-TEST_F(lexer_routines_fixture, tokenize_tag_name_param)
-{
-    static constexpr const char* content =
-        "param\n \t "
-        ;
-    static constexpr const char* text_content =
-        "some text";
-
-    std::string text(text_content);
-    write_file(content);
-    file_reader reader(file);
-    status_t status;    // context is none
-    tokenize_tag_name(text, reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(2));
-    check_token(status.tokens[0].name, symbol_t::TEXT,
-                status.tokens[0].content, text_content);
-    check_token(status.tokens[1].name, symbol_t::TAGNAME,
-                status.tokens[1].content, "param");
-    EXPECT_EQ(reader.peek(), '\n');
-}
-
-TEST_F(lexer_routines_fixture, tokenize_tag_name_tparam)
-{
-    static constexpr const char* content =
-        "tparam\n \t "
-        ;
-    static constexpr const char* text_content =
-        "some text";
-
-    std::string text(text_content);
-    write_file(content);
-    file_reader reader(file);
-    status_t status;    // context is none
-    tokenize_tag_name(text, reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(2));
-    check_token(status.tokens[0].name, symbol_t::TEXT,
-                status.tokens[0].content, text_content);
-    check_token(status.tokens[1].name, symbol_t::TAGNAME,
-                status.tokens[1].content, "tparam");
-    EXPECT_EQ(reader.peek(), '\n');
-}
-
-TEST_F(lexer_routines_fixture, tokenize_tag_name_invalid)
-{
-    static constexpr const char* content =
-        "tparram\n \t "
-        ;
-    static constexpr const char* text_content =
-        "some text";
-
-    std::string text(text_content);
-    write_file(content);
-    file_reader reader(file);
-    status_t status;    // context is none
-    tokenize_tag_name(text, reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(0));
-    EXPECT_EQ(text, std::string(text_content) + "@tparram");
-    EXPECT_EQ(reader.peek(), '\n');
-}
-
-TEST_F(lexer_routines_fixture, tokenize_tag_name_eof)
-{
-    static constexpr const char* content =
-        "tparam"
-        ;
-    static constexpr const char* text_content =
-        "some text";
-
-    std::string text(text_content);
-    write_file(content);
-    file_reader reader(file);
-    status_t status;    // context is none
-    tokenize_tag_name(text, reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(2));
-    check_token(status.tokens[0].name, symbol_t::TEXT,
-                status.tokens[0].content, text_content);
-    check_token(status.tokens[1].name, symbol_t::TAGNAME,
-                status.tokens[1].content, "tparam");
-    EXPECT_EQ(reader.peek(), static_cast<int>(file_reader::termination));
-}
-
-////////////////////////////////////////////////////////////////////////
-// process_tag_name TESTS
-////////////////////////////////////////////////////////////////////////
-
-TEST_F(lexer_routines_fixture, process_tag_name_valid)
-{
-    static constexpr const char* content =
-        "param x\tsome int\n"
-        ;
-    static constexpr const char* text_content =
-        "  some existing text... \n";
-    static constexpr const char* expected_text =
-        "some existing text...";
-
-    write_file(content);
-    file_reader reader(file);
-    status_t status;
-    std::string text(text_content);
-    bool res = process_tag_name('@', text, reader, status);
-
-    EXPECT_TRUE(res);
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(2));
-    tokenize_text_check(text, status.tokens[0], expected_text);
-    check_token(status.tokens[1].name, symbol_t::TAGNAME,
-                status.tokens[1].content, "param");
-}
-
-TEST_F(lexer_routines_fixture, process_tag_name_invalid)
-{
-    static constexpr const char* content =
-        "xparam x\tsome int\n"
-        ;
-    static constexpr const char* text_content =
-        "  some existing text... \n";
-
-    write_file(content);
-    file_reader reader(file);
-    status_t status;
-    std::string text(text_content);
-    bool res = process_tag_name('m', text, reader, status);
-
-    EXPECT_FALSE(res);
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(0));
-}
-
-////////////////////////////////////////////////////////////////////////
-// process_line_comment TESTS
-////////////////////////////////////////////////////////////////////////
-
-TEST_F(lexer_routines_fixture, process_line_comment_valid)
-{
-    static constexpr const char* content =
-        "/ some special content...\n"
-        ;
-    static constexpr const char* text_content =
-        "\n  some text...   \t";
-    static constexpr const char* expected_text =
-        "some text...";
-
-    write_file(content);
-    file_reader reader(file);
-    status_t status;
-    std::string text(text_content);
-    process_line_comment(text, reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(2));
-    tokenize_text_check(text, status.tokens[0], expected_text);
-    check_token(status.tokens[1].name, symbol_t::BEGIN_LINE_COMMENT,
-                status.tokens[1].content, "");
-}
-
-TEST_F(lexer_routines_fixture, process_line_comment_invalid_nospace)
-{
-    static constexpr const char* content =
-        "/some special content...\n"
-        ;
-    static constexpr const char* text_content =
-        "\n  some text...   \t";
-
-    write_file(content);
-    file_reader reader(file);
-    status_t status;
-    std::string text(text_content);
-    process_line_comment(text, reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(0));
-    EXPECT_EQ(text, text_content);  // text unchanged
-}
-
-TEST_F(lexer_routines_fixture, process_line_comment_invalid_noslash)
-{
-    static constexpr const char* content =
-        " some special content...\n"
-        ;
-    static constexpr const char* text_content =
-        "\n  some text...   \t";
-
-    write_file(content);
-    file_reader reader(file);
-    status_t status;
-    std::string text(text_content);
-    process_line_comment(text, reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(0));
-    EXPECT_EQ(text, text_content);  // text unchanged
-}
-
-////////////////////////////////////////////////////////////////////////
-// process_block_comment TESTS
-////////////////////////////////////////////////////////////////////////
-
-TEST_F(lexer_routines_fixture, process_block_comment_valid)
-{
-    static constexpr const char* content =
-        "! some special content...\n"
-        ;
-    static constexpr const char* text_content =
-        "\n  some text...   \t";
-    static constexpr const char* expected_text =
-        "some text...";
-
-    write_file(content);
-    file_reader reader(file);
-    status_t status;
-    std::string text(text_content);
-    process_block_comment(text, reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(2));
-    tokenize_text_check(text, status.tokens[0], expected_text);
-    check_token(status.tokens[1].name, symbol_t::BEGIN_BLOCK_COMMENT,
-                status.tokens[1].content, "");
-}
-
-TEST_F(lexer_routines_fixture, process_block_comment_invalid_nospace)
-{
-    static constexpr const char* content =
-        "!some special content...\n"
-        ;
-    static constexpr const char* text_content =
-        "\n  some text...   \t";
-
-    write_file(content);
-    file_reader reader(file);
-    status_t status;
-    std::string text(text_content);
-    process_block_comment(text, reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(0));
-    EXPECT_EQ(text, text_content);  // text unchanged
-}
-
-TEST_F(lexer_routines_fixture, process_block_comment_invalid_noexclam)
-{
-    static constexpr const char* content =
-        " some special content...\n"
-        ;
-    static constexpr const char* text_content =
-        "\n  some text...   \t";
-
-    write_file(content);
-    file_reader reader(file);
-    status_t status;
-    std::string text(text_content);
-    process_block_comment(text, reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(0));
-    EXPECT_EQ(text, text_content);  // text unchanged
-}
-
-////////////////////////////////////////////////////////////////////////
-// process_tags TESTS
-////////////////////////////////////////////////////////////////////////
-
-TEST_F(lexer_routines_fixture, process_string_invalid_comment)
-{
-    static constexpr const char* content =
-        "some content...\n "
-        ;
-    static constexpr const char* text_content = 
-        " some text...     ";
-
-    write_file(content);
-    file_reader reader(file);
-    status_t status;    // context is none
-    std::string text(text_content);
-    bool res = process_string('/', text, reader, status);
-
-    EXPECT_TRUE(res);
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(0));
-    EXPECT_EQ(text, std::string(text_content) + "/s");
-}
-
-TEST_F(lexer_routines_fixture, process_string_invalid_slash)
-{
-    static constexpr const char* content =
-        "some content...\n "
-        ;
-    static constexpr const char* text_content = 
-        " some text...     ";
-
-    write_file(content);
-    file_reader reader(file);
-    status_t status;    // context is none
-    std::string text(text_content);
-    bool res = process_string('x', text, reader, status);
-
-    EXPECT_FALSE(res);
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(0));
-    EXPECT_EQ(text, text_content);
-}
-
-////////////////////////////////////////////////////////////////////////
-// process TESTS
-////////////////////////////////////////////////////////////////////////
-
-TEST_F(lexer_routines_fixture, process)
-{
-    static constexpr const char* content =
-        "#include <nlohmann/json.hpp> // for json\n"
-        "// this is some comment to ignore\n"
-        "/* this is another comment to ignore \n*/"
-        "\n"
-        "   /// description...  @sdesc  some short description\n"
-        " /*!  @param x some int\n"
-        "   * that we care about\n"
-        "   */"
-        "inline f(int x);"
-        "struct A {const char* p = \"@param\"};"
-        ;
-
-    write_file(content);
-    file_reader reader(file);
-    status_t status;    // context is none
-    process(reader, status);
-
-    EXPECT_EQ(status.tokens.size(), static_cast<size_t>(27));
-    check_token(status.tokens[0].name, symbol_t::TEXT,
-                status.tokens[0].content, "#include <nlohmann/json.hpp>");
-    check_token(status.tokens[1].name, symbol_t::NEWLINE,
-                status.tokens[1].content, "");
-    check_token(status.tokens[2].name, symbol_t::BEGIN_LINE_COMMENT,
-                status.tokens[2].content, "");
-    check_token(status.tokens[3].name, symbol_t::TEXT,
-                status.tokens[3].content, "description...");
-    EXPECT_EQ(status.tokens[3].leading_ws_count, static_cast<uint32_t>(1));
-
-    check_token(status.tokens[4].name, symbol_t::TAGNAME,
-                status.tokens[4].content, "sdesc");
-    check_token(status.tokens[5].name, symbol_t::TEXT,
-                status.tokens[5].content, "some short description");
-    EXPECT_EQ(status.tokens[5].leading_ws_count, static_cast<uint32_t>(2));
-
-    check_token(status.tokens[6].name, symbol_t::NEWLINE,
-                status.tokens[6].content, "");
-    check_token(status.tokens[7].name, symbol_t::BEGIN_BLOCK_COMMENT,
-                status.tokens[7].content, "");
-    check_token(status.tokens[8].name, symbol_t::TAGNAME,
-                status.tokens[8].content, "param");
-    check_token(status.tokens[9].name, symbol_t::TEXT,
-                status.tokens[9].content, "x some int");
-    EXPECT_EQ(status.tokens[9].leading_ws_count, static_cast<uint32_t>(1));
-
-    check_token(status.tokens[10].name, symbol_t::NEWLINE,
-                status.tokens[10].content, "");
-    check_token(status.tokens[11].name, symbol_t::STAR,
-                status.tokens[11].content, "");
-    check_token(status.tokens[12].name, symbol_t::TEXT,
-                status.tokens[12].content, "that we care about");
-    EXPECT_EQ(status.tokens[12].leading_ws_count, static_cast<uint32_t>(1));
-
-    check_token(status.tokens[13].name, symbol_t::NEWLINE,
-                status.tokens[13].content, "");
-    check_token(status.tokens[14].name, symbol_t::END_BLOCK_COMMENT,
-                status.tokens[14].content, "");
-    check_token(status.tokens[15].name, symbol_t::TEXT,
-                status.tokens[15].content, "inline f(int x)");
-    check_token(status.tokens[16].name, symbol_t::SEMICOLON,
-                status.tokens[16].content, "");
-    check_token(status.tokens[17].name, symbol_t::TEXT,
-                status.tokens[17].content, "struct A");
-    check_token(status.tokens[18].name, symbol_t::OPEN_BRACE,
-                status.tokens[18].content, "");
-    check_token(status.tokens[19].name, symbol_t::TEXT,
-                status.tokens[19].content, "const char");
-    check_token(status.tokens[20].name, symbol_t::STAR,
-                status.tokens[20].content, "");
-    check_token(status.tokens[21].name, symbol_t::TEXT,
-                status.tokens[21].content, "p = \"");
-    EXPECT_EQ(status.tokens[21].leading_ws_count, static_cast<uint32_t>(1));
-
-    check_token(status.tokens[22].name, symbol_t::TAGNAME,
-                status.tokens[22].content, "param");
-    check_token(status.tokens[23].name, symbol_t::TEXT,
-                status.tokens[23].content, "\"");
-    check_token(status.tokens[24].name, symbol_t::CLOSE_BRACE,
-                status.tokens[24].content, "");
-    check_token(status.tokens[25].name, symbol_t::SEMICOLON,
-                status.tokens[25].content, "");
-    check_token(status.tokens[26].name, symbol_t::END_OF_FILE,
-                status.tokens[26].content, "");
-}
-
-} // namespace lexer_details
-} // namespace core
-} // namespace docgen

From 68b59ed3582739e1835066848124b3cdce20c288 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 15:08:01 -0500
Subject: [PATCH 09/23] Modify configuration to fix at release points for libs

---
 configure.sh   | 25 +++++++++++++++++++++++--
 libs/benchmark |  2 +-
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/configure.sh b/configure.sh
index 3b5097a..417a613 100755
--- a/configure.sh
+++ b/configure.sh
@@ -1,14 +1,35 @@
-#!/bin/sh
+#!/bin/bash
+
+# directory where current shell script resides
+PROJECTDIR=$(dirname "$BASH_SOURCE")
+
+cd "$PROJECTDIR"
 
 # If setup.sh was called before
 if [ -d "libs/benchmark/googletest" ]; then
     rm -rf libs/benchmark
 fi
 
+# Initialize submodules if needed
+git submodule init
 # Update submodule if needed
 git submodule update --remote
-# Setup google benchmark and googletest
+
+# Setup googletest
 git clone https://github.com/google/googletest.git libs/benchmark/googletest
+# Set google test to specific release tag
+cd libs/benchmark/googletest
+git fetch --all --tags --prune
+git checkout tags/release-1.10.0 -b release-1.10.0
+cd -
+
+# Setup googlebenchmark
+cd libs/benchmark
+git fetch --all --tags --prune
+git checkout tags/v1.5.0 -b v1.5.0
+cd -
+
+# Build google benchmark
 cd libs/benchmark
 mkdir -p build && cd build
 
diff --git a/libs/benchmark b/libs/benchmark
index 5ce2429..090faec 160000
--- a/libs/benchmark
+++ b/libs/benchmark
@@ -1 +1 @@
-Subproject commit 5ce2429af7a8481581896afaa480552cc7584808
+Subproject commit 090faecb454fbd6e6e17a75ef8146acb037118d4

From b242364f4ce8b7c4c5a5a73e0919ce72b378219f Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 15:23:54 -0500
Subject: [PATCH 10/23] Use conan to manage nlohmann/json on linux

---
 .gitignore    | 3 +++
 conanfile.txt | 2 ++
 2 files changed, 5 insertions(+)
 create mode 100644 conanfile.txt

diff --git a/.gitignore b/.gitignore
index 25871d0..4234b19 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
 node_modules
 build/
+conan*
+!conanfile.txt
+graph_info.json
diff --git a/conanfile.txt b/conanfile.txt
new file mode 100644
index 0000000..ca65909
--- /dev/null
+++ b/conanfile.txt
@@ -0,0 +1,2 @@
+[requires]
+nlohmann_json/3.7.3

From e6627fb08ba3da76cf507ce0a8cdd7f8474a2401 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 15:30:55 -0500
Subject: [PATCH 11/23] Add configuration for nlohmann json on linux and mac

---
 configure.sh   | 17 +++++++++++++++++
 libs/benchmark |  2 +-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/configure.sh b/configure.sh
index 417a613..3e61983 100755
--- a/configure.sh
+++ b/configure.sh
@@ -5,6 +5,23 @@ PROJECTDIR=$(dirname "$BASH_SOURCE")
 
 cd "$PROJECTDIR"
 
+# Install nlohmann/json
+if [[ "$OSTYPE" == "linux-gnu" ]]; then
+    if [ $(command -v conan) == "" ]; then
+        echo "config fail: conan not installed"
+        exit 1
+    fi
+    conan install .
+elif [[ "$OSTYPE" == "darwin"* ]]; then
+    if [ $(command -v brew) == "" ]; then
+        echo "config fail: brew not installed"
+        exit 1
+    fi
+    brew install nlohmann-json
+else
+    echo "config fail: unrecognizable OS"
+fi
+
 # If setup.sh was called before
 if [ -d "libs/benchmark/googletest" ]; then
     rm -rf libs/benchmark
diff --git a/libs/benchmark b/libs/benchmark
index 090faec..daff5fe 160000
--- a/libs/benchmark
+++ b/libs/benchmark
@@ -1 +1 @@
-Subproject commit 090faecb454fbd6e6e17a75ef8146acb037118d4
+Subproject commit daff5fead3fbe22c6fc58310ca3f49caf117f185

From 4a8a1da75136a9410460bb7a93d6c10586bd7d04 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 16:19:41 -0500
Subject: [PATCH 12/23] Move conan to libs and reconfigure

---
 .gitignore         | 1 +
 CMakeLists.txt     | 4 ++++
 conanfile.txt      | 2 --
 configure.sh       | 2 ++
 libs/conanfile.txt | 5 +++++
 5 files changed, 12 insertions(+), 2 deletions(-)
 delete mode 100644 conanfile.txt
 create mode 100644 libs/conanfile.txt

diff --git a/.gitignore b/.gitignore
index 4234b19..06736e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ build/
 conan*
 !conanfile.txt
 graph_info.json
+*Find*.cmake
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c1208dd..697bac1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,10 @@ enable_testing()
 # Set C++17 standard for project target
 set(CMAKE_CXX_STANDARD 17)
 
+# Set this such that dependency installation through conan can be found
+set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/libs)
+message("CMAKE_MODULE_PATH: ${CMAKE_MODULE_PATH}")
+
 # Create DocgenConfigVersion.cmake which contains current project version
 # This is supposed to help with (major) version compatibility.
 include(CMakePackageConfigHelpers)
diff --git a/conanfile.txt b/conanfile.txt
deleted file mode 100644
index ca65909..0000000
--- a/conanfile.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-[requires]
-nlohmann_json/3.7.3
diff --git a/configure.sh b/configure.sh
index 3e61983..a641f6c 100755
--- a/configure.sh
+++ b/configure.sh
@@ -11,7 +11,9 @@ if [[ "$OSTYPE" == "linux-gnu" ]]; then
         echo "config fail: conan not installed"
         exit 1
     fi
+    cd libs
     conan install .
+    cd -
 elif [[ "$OSTYPE" == "darwin"* ]]; then
     if [ $(command -v brew) == "" ]; then
         echo "config fail: brew not installed"
diff --git a/libs/conanfile.txt b/libs/conanfile.txt
new file mode 100644
index 0000000..64b1192
--- /dev/null
+++ b/libs/conanfile.txt
@@ -0,0 +1,5 @@
+[requires]
+nlohmann_json/3.7.3
+
+[generators]
+cmake_find_package

From 58e882430e9ec9789c62ad59d3f7acefaf3d4fca Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 16:37:23 -0500
Subject: [PATCH 13/23] Add cstring header for strerror

---
 src/exceptions/exceptions.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/exceptions/exceptions.hpp b/src/exceptions/exceptions.hpp
index b4fc786..8dbe010 100644
--- a/src/exceptions/exceptions.hpp
+++ b/src/exceptions/exceptions.hpp
@@ -2,6 +2,7 @@
 
 #include <cstdio>
 #include <cerrno>
+#include <cstring>
 #include <string>
 #include <exception>
 

From a89bd2f89f52d3f30eb37830ff5e426871c89e7f Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 17:24:20 -0500
Subject: [PATCH 14/23] Reimplement TrieNode to have uniqueptr of children

---
 src/core/lex/trie.hpp           | 31 +++++++-----
 test/core/lex/trie_unittest.cpp | 83 +++++++++++++--------------------
 2 files changed, 52 insertions(+), 62 deletions(-)

diff --git a/src/core/lex/trie.hpp b/src/core/lex/trie.hpp
index eabe4ce..2df5008 100644
--- a/src/core/lex/trie.hpp
+++ b/src/core/lex/trie.hpp
@@ -4,6 +4,7 @@
 #include <type_traits>
 #include <cassert>
 #include <optional>
+#include <memory>
 #include <exceptions/exceptions.hpp>
 
 namespace docgen {
@@ -34,7 +35,7 @@ struct Trie
     void transition(char c);
     void back_transition();
     bool is_accept() const;
-    std::unordered_map<char, TrieNode>& get_children();
+    typename TrieNode::children_t& get_children();
     bool is_reset() const;
     void reset();
     const std::optional<SymbolType>& get_symbol() const;
@@ -43,6 +44,8 @@ struct Trie
 
     struct TrieNode
     {
+        using children_t = std::unordered_map<char, std::unique_ptr<TrieNode>>;
+
         // Insert str from current node to update the trie structure.
         // The string str is read starting from idx.
         void insert(const std::pair<std::string_view, SymbolType>&, size_t = 0);
@@ -54,7 +57,7 @@ struct Trie
         // Symbol will be active if is_accept is true.
         const std::optional<SymbolType>& get_symbol() const;
 
-        std::unordered_map<char, TrieNode>& get_children();
+        children_t& get_children();
 
         std::optional<std::reference_wrapper<TrieNode>> get_parent();
 
@@ -65,10 +68,10 @@ struct Trie
             non_accept
         };
 
-        State state_ = State::non_accept;               // indicates accepting node or not
-        std::optional<SymbolType> symbol_;              // symbol for accepting node
-        std::unordered_map<char, TrieNode> children_;   // current node's children
-        TrieNode* parent_ptr_;                          // current node's parent
+        State state_ = State::non_accept;   // indicates accepting node or not
+        std::optional<SymbolType> symbol_;  // symbol for accepting node
+        children_t children_;               // current node's children
+        TrieNode* parent_ptr_;              // current node's parent
     };
 
     TrieNode root_;                                       // root of Trie
@@ -92,9 +95,13 @@ Trie<SymbolType>::TrieNode::insert(const pair_t& pair, size_t idx)
     }
 
     else {
-        auto& child = children_[str[idx]];
-        child.parent_ptr_ = this;
-        child.insert(pair, idx + 1);
+        // if no child with str[idx] mapping
+        if (children_.find(str[idx]) == children_.end()) {
+            children_.emplace(str[idx], std::make_unique<TrieNode>());
+        }
+        auto& child = children_.at(str[idx]);
+        child->parent_ptr_ = this;
+        child->insert(pair, idx + 1);
     }
 }
 
@@ -113,7 +120,7 @@ Trie<SymbolType>::TrieNode::get_symbol() const
 }
 
 template <class SymbolType>
-inline std::unordered_map<char, typename Trie<SymbolType>::TrieNode>&
+inline typename Trie<SymbolType>::TrieNode::children_t&
 Trie<SymbolType>::TrieNode::get_children()
 {
     return children_;
@@ -150,7 +157,7 @@ template <class SymbolType>
 inline void
 Trie<SymbolType>::transition(char c)
 {
-    curr_node_ = curr_node_.get().get_children().at(c);
+    curr_node_ = *(curr_node_.get().get_children().at(c));
 }
 
 template <class SymbolType>
@@ -161,7 +168,7 @@ Trie<SymbolType>::is_accept() const
 }
 
 template <class SymbolType>
-inline std::unordered_map<char, typename Trie<SymbolType>::TrieNode>&
+inline typename Trie<SymbolType>::TrieNode::children_t&
 Trie<SymbolType>::get_children() 
 {
     return curr_node_.get().get_children();
diff --git a/test/core/lex/trie_unittest.cpp b/test/core/lex/trie_unittest.cpp
index bc8d483..7fb1eb9 100644
--- a/test/core/lex/trie_unittest.cpp
+++ b/test/core/lex/trie_unittest.cpp
@@ -35,9 +35,8 @@ TEST_F(trie_fixture, trie_root)
     EXPECT_TRUE(!trie.is_accept());
     EXPECT_TRUE(trie.is_reset());
 
-    auto& children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(1));
-    EXPECT_NE(children.find('a'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(1));
+    EXPECT_NE(trie.get_children().find('a'), trie.get_children().end());  // found
 }
 
 TEST_F(trie_fixture, trie_transition_child_a) 
@@ -55,10 +54,9 @@ TEST_F(trie_fixture, trie_transition_child_a)
     EXPECT_TRUE(trie.is_accept());
     EXPECT_TRUE(!trie.is_reset());
 
-    auto& children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(2));
-    EXPECT_NE(children.find('b'), children.end());  // found
-    EXPECT_NE(children.find('c'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(2));
+    EXPECT_NE(trie.get_children().find('b'), trie.get_children().end());  // found
+    EXPECT_NE(trie.get_children().find('c'), trie.get_children().end());  // found
 }
 
 TEST_F(trie_fixture, trie_transition_child_b)
@@ -76,9 +74,8 @@ TEST_F(trie_fixture, trie_transition_child_b)
     EXPECT_TRUE(!trie.is_accept());
     EXPECT_TRUE(!trie.is_reset());
 
-    auto& children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(1));
-    EXPECT_NE(children.find('c'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(1));
+    EXPECT_NE(trie.get_children().find('c'), trie.get_children().end());  // found
 }
 
 TEST_F(trie_fixture, trie_transition_child_bc)
@@ -98,8 +95,7 @@ TEST_F(trie_fixture, trie_transition_child_bc)
     EXPECT_TRUE(trie.is_accept());
     EXPECT_TRUE(!trie.is_reset());
 
-    auto& children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(0));
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(0));
 }
 
 TEST_F(trie_fixture, trie_transition_child_c)
@@ -118,8 +114,7 @@ TEST_F(trie_fixture, trie_transition_child_c)
     EXPECT_TRUE(trie.is_accept());
     EXPECT_TRUE(!trie.is_reset());
 
-    auto& children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(0));
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(0));
 }
 
 TEST_F(trie_fixture, trie_reset_root)
@@ -136,9 +131,8 @@ TEST_F(trie_fixture, trie_reset_root)
     EXPECT_TRUE(!trie.is_accept());
     EXPECT_TRUE(trie.is_reset());
 
-    auto& children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(1));
-    EXPECT_NE(children.find('a'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(1));
+    EXPECT_NE(trie.get_children().find('a'), trie.get_children().end());  // found
 }
 
 TEST_F(trie_fixture, trie_reset_child_a)
@@ -157,9 +151,8 @@ TEST_F(trie_fixture, trie_reset_child_a)
     EXPECT_TRUE(!trie.is_accept());
     EXPECT_TRUE(trie.is_reset());
 
-    auto& children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(1));
-    EXPECT_NE(children.find('a'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(1));
+    EXPECT_NE(trie.get_children().find('a'), trie.get_children().end());  // found
 }
 
 TEST_F(trie_fixture, trie_reset_child_a_b)
@@ -179,9 +172,8 @@ TEST_F(trie_fixture, trie_reset_child_a_b)
     EXPECT_TRUE(!trie.is_accept());
     EXPECT_TRUE(trie.is_reset());
 
-    auto& children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(1));
-    EXPECT_NE(children.find('a'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(1));
+    EXPECT_NE(trie.get_children().find('a'), trie.get_children().end());  // found
 }
 
 TEST_F(trie_fixture, trie_back_transition_root)
@@ -210,9 +202,8 @@ TEST_F(trie_fixture, trie_back_transition_child_a)
     EXPECT_TRUE(!trie.is_accept());
     EXPECT_TRUE(trie.is_reset());
 
-    auto& children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(1));
-    EXPECT_NE(children.find('a'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(1));
+    EXPECT_NE(trie.get_children().find('a'), trie.get_children().end());  // found
 }
 
 TEST_F(trie_fixture, trie_back_transition_child_ab)
@@ -233,10 +224,9 @@ TEST_F(trie_fixture, trie_back_transition_child_ab)
     EXPECT_TRUE(trie.is_accept());
     EXPECT_TRUE(!trie.is_reset());
 
-    auto children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(2));
-    EXPECT_NE(children.find('b'), children.end());  // found
-    EXPECT_NE(children.find('c'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(2));
+    EXPECT_NE(trie.get_children().find('b'), trie.get_children().end());  // found
+    EXPECT_NE(trie.get_children().find('c'), trie.get_children().end());  // found
 
     // back to root
     trie.back_transition();
@@ -245,9 +235,8 @@ TEST_F(trie_fixture, trie_back_transition_child_ab)
     EXPECT_TRUE(!trie.is_accept());
     EXPECT_TRUE(trie.is_reset());
 
-    children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(1));
-    EXPECT_NE(children.find('a'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(1));
+    EXPECT_NE(trie.get_children().find('a'), trie.get_children().end());  // found
 }
 
 ////////////////////////////////////////////
@@ -266,10 +255,9 @@ TEST_F(trie_fixture, trie_off_by_one_prefix)
     EXPECT_TRUE(!trie.is_accept());
     EXPECT_TRUE(trie.is_reset());
 
-    auto children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(2));
-    EXPECT_NE(children.find('a'), children.end());  // found
-    EXPECT_NE(children.find('b'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(2));
+    EXPECT_NE(trie.get_children().find('a'), trie.get_children().end());  // found
+    EXPECT_NE(trie.get_children().find('b'), trie.get_children().end());  // found
 
     // check child 'a'
     trie.transition('a');
@@ -278,9 +266,8 @@ TEST_F(trie_fixture, trie_off_by_one_prefix)
     EXPECT_TRUE(!trie.is_accept());
     EXPECT_TRUE(!trie.is_reset());
 
-    children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(1));
-    EXPECT_NE(children.find('b'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(1));
+    EXPECT_NE(trie.get_children().find('b'), trie.get_children().end());  // found
 
     // check child 'a'->'b'
     trie.transition('b');
@@ -289,8 +276,7 @@ TEST_F(trie_fixture, trie_off_by_one_prefix)
     EXPECT_TRUE(trie.is_accept());
     EXPECT_TRUE(!trie.is_reset());
 
-    children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(0));
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(0));
 
     // up to child 'a'
     trie.back_transition();
@@ -305,9 +291,8 @@ TEST_F(trie_fixture, trie_off_by_one_prefix)
     EXPECT_TRUE(!trie.is_accept());
     EXPECT_TRUE(!trie.is_reset());
 
-    children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(1));
-    EXPECT_NE(children.find('a'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(1));
+    EXPECT_NE(trie.get_children().find('a'), trie.get_children().end());  // found
 
     // check child 'b'->'a'
     trie.transition('a');
@@ -316,9 +301,8 @@ TEST_F(trie_fixture, trie_off_by_one_prefix)
     EXPECT_TRUE(!trie.is_accept());
     EXPECT_TRUE(!trie.is_reset());
 
-    children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(1));
-    EXPECT_NE(children.find('b'), children.end());  // found
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(1));
+    EXPECT_NE(trie.get_children().find('b'), trie.get_children().end());  // found
 
     // check child 'b'->'a'->'b'
     trie.transition('b');
@@ -327,8 +311,7 @@ TEST_F(trie_fixture, trie_off_by_one_prefix)
     EXPECT_TRUE(trie.is_accept());
     EXPECT_TRUE(!trie.is_reset());
 
-    children = trie.get_children();
-    EXPECT_EQ(children.size(), static_cast<size_t>(0));
+    EXPECT_EQ(trie.get_children().size(), static_cast<size_t>(0));
 }
 
 } // namespace lex

From 954f449a23dbe5c807b3424b67dbbf0dbd02d743 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 17:31:11 -0500
Subject: [PATCH 15/23] Remove io unittests for now

---
 test/CMakeLists.txt | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index d5ab8e4..125e82f 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -27,18 +27,18 @@ create_test("exceptions_unittests" exceptions_unittests)
 # File IO Unit Tests
 ######################################################
 
-add_executable(io_unittests
-               ${CMAKE_CURRENT_SOURCE_DIR}/io/file_reader_unittest.cpp
-               )
-
-create_test("io_unittests" io_unittests)
-
-# copy data directory into where io_unittests executable ends up
-add_custom_command(
-        TARGET io_unittests POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_directory
-                ${CMAKE_CURRENT_SOURCE_DIR}/io/io_data/
-                $<TARGET_FILE_DIR:io_unittests>/io_data)
+#add_executable(io_unittests
+#               ${CMAKE_CURRENT_SOURCE_DIR}/io/file_reader_unittest.cpp
+#               )
+#
+#create_test("io_unittests" io_unittests)
+#
+## copy data directory into where io_unittests executable ends up
+#add_custom_command(
+#        TARGET io_unittests POST_BUILD
+#        COMMAND ${CMAKE_COMMAND} -E copy_directory
+#                ${CMAKE_CURRENT_SOURCE_DIR}/io/io_data/
+#                $<TARGET_FILE_DIR:io_unittests>/io_data)
 
 ######################################################
 # Core Unit Tests

From 12385f85301376c291c2ba8f0ecbaa9b4713f9a6 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 17:44:58 -0500
Subject: [PATCH 16/23] Update google benchmark

---
 libs/benchmark | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/benchmark b/libs/benchmark
index daff5fe..090faec 160000
--- a/libs/benchmark
+++ b/libs/benchmark
@@ -1 +1 @@
-Subproject commit daff5fead3fbe22c6fc58310ca3f49caf117f185
+Subproject commit 090faecb454fbd6e6e17a75ef8146acb037118d4

From 782126a063615221eb3e7c7e80e260f4537c517a Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 17:45:45 -0500
Subject: [PATCH 17/23] Update google benchmark again

---
 libs/benchmark | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/benchmark b/libs/benchmark
index 090faec..daff5fe 160000
--- a/libs/benchmark
+++ b/libs/benchmark
@@ -1 +1 @@
-Subproject commit 090faecb454fbd6e6e17a75ef8146acb037118d4
+Subproject commit daff5fead3fbe22c6fc58310ca3f49caf117f185

From 1456ef67fae0295a118359e609c07ff45e722efc Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sat, 18 Jan 2020 17:53:57 -0500
Subject: [PATCH 18/23] Update configure to recursively update submodule

---
 configure.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.sh b/configure.sh
index a641f6c..363eecd 100755
--- a/configure.sh
+++ b/configure.sh
@@ -30,9 +30,9 @@ if [ -d "libs/benchmark/googletest" ]; then
 fi
 
 # Initialize submodules if needed
-git submodule init
+git submodule update --init
 # Update submodule if needed
-git submodule update --remote
+git submodule update --recursive --remote
 
 # Setup googletest
 git clone https://github.com/google/googletest.git libs/benchmark/googletest

From 27f7d05b58129286a1d077aba16061a78ccdfea2 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sun, 19 Jan 2020 09:32:04 -0500
Subject: [PATCH 19/23] Add legacy files for benchmarking

---
 src/core/lex/legacy/lexer.hpp          |  37 +++
 src/core/lex/legacy/lexer_routines.hpp | 301 +++++++++++++++++++++++++
 src/core/lex/legacy/status.hpp         |  17 ++
 src/core/lex/legacy/symbol.hpp         |  44 ++++
 src/core/{ => lex/legacy}/token.hpp    |  10 +-
 5 files changed, 407 insertions(+), 2 deletions(-)
 create mode 100644 src/core/lex/legacy/lexer.hpp
 create mode 100644 src/core/lex/legacy/lexer_routines.hpp
 create mode 100644 src/core/lex/legacy/status.hpp
 create mode 100644 src/core/lex/legacy/symbol.hpp
 rename src/core/{ => lex/legacy}/token.hpp (72%)

diff --git a/src/core/lex/legacy/lexer.hpp b/src/core/lex/legacy/lexer.hpp
new file mode 100644
index 0000000..56f647e
--- /dev/null
+++ b/src/core/lex/legacy/lexer.hpp
@@ -0,0 +1,37 @@
+#pragma once
+#include "lexer_routines.hpp"
+
+namespace docgen {
+namespace core {
+
+struct Lexer
+{
+    using symbol_t = lexer_details::symbol_t;
+    using file_reader = lexer_details::file_reader;
+    using status_t = lexer_details::status_t;
+
+    Lexer(FILE* file)
+        : reader_(file)
+    {
+        status_.tokens.reserve(DEFAULT_TOKEN_ARR_SIZE);
+    }
+
+    void process()
+    {
+        lexer_details::process(reader_, status_);
+    }
+    
+    const status_t::token_arr_t& get_tokens() const
+    {
+        return status_.tokens;
+    }
+
+private:
+    static constexpr size_t DEFAULT_TOKEN_ARR_SIZE = 50;
+
+    file_reader reader_;
+    status_t status_; // keeps track of last token value (enum)
+};
+
+} // namespace core
+} // namespace docgen
diff --git a/src/core/lex/legacy/lexer_routines.hpp b/src/core/lex/legacy/lexer_routines.hpp
new file mode 100644
index 0000000..d6e8dd0
--- /dev/null
+++ b/src/core/lex/legacy/lexer_routines.hpp
@@ -0,0 +1,301 @@
+#pragma once
+#include "core/lex/legacy/token.hpp"
+#include "core/lex/legacy/status.hpp"
+#include "core/lex/legacy/symbol.hpp"
+#include "io/file_reader.hpp"
+#include "core/tag_set.hpp"
+
+namespace docgen {
+namespace core {
+namespace lex {
+namespace legacy {
+
+static constexpr size_t DEFAULT_STRING_RESERVE_SIZE = 50;
+
+using file_reader = io::file_reader;
+using symbol_t = Symbol;
+using token_t = Token<symbol_t>;
+using status_t = Status<token_t>;
+
+// Reads and ignores chars until func(c) evaluates to false or reading terminates,
+// where c is the current char read.
+// Returns the last char read that terminated the function.
+template <class Termination>
+inline int ignore_until(file_reader& reader, Termination func)
+{
+    int c = 0;
+    while (((c = reader.read()) != file_reader::termination) && func(c));
+    return c;
+}
+
+// Reads and stores chars until func(c) evaluates to false or reading terminates,
+// where c is the current char read.
+// Returns the last char read that terminated the function.
+template <class Termination>
+inline int read_until(file_reader& reader, Termination func, std::string& line)
+{
+    int c = 0;
+    line.reserve(DEFAULT_STRING_RESERVE_SIZE);
+    while (((c = reader.read()) != file_reader::termination) && func(c)) {
+        line.push_back(c);
+    }
+    return c;
+}
+
+// Trims all leading and trailing whitespaces (one of " \t\n\v\f\r") from line.
+// Line is directly modified.
+// Returns leading whitespace count of original line.
+inline uint32_t trim(std::string& line)
+{
+    static constexpr const char* whitespaces = " \t\n\v\f\r";
+
+    // find first non-whitespace
+    const auto begin = line.find_first_not_of(whitespaces);
+
+    // find last non-whitespace
+    const auto end = line.find_last_not_of(whitespaces);
+
+    // If substring invalid, simply clear line return length of string
+    // By symmetry, begin and end will be npos if and only if the string only
+    // consists of whitespaces. In this case, the leading whitespace count is 
+    // simply the length of the string.
+    if (begin == std::string::npos && end == std::string::npos) {
+        uint32_t leading_ws_count = line.size();
+        line.clear();
+        return leading_ws_count;
+    }
+
+    // otherwise, replace with substring
+    line = line.substr(begin, end - begin + 1);
+
+    return begin; // number of leading whitespaces
+}
+
+// Trims text, tokenizes it, clears it, and reserve DEFAULT_STRING_RESERVE_SIZE.
+// (Trimmed) text is only tokenized if it is non-empty.
+inline void tokenize_text(std::string& text, status_t& status)
+{
+    // trim whitespaces from text first
+    uint32_t leading_whitespace_count = trim(text);
+    // tokenize current TEXT only if it is non-empty
+    if (!text.empty()) {
+        status.tokens.emplace_back(symbol_t::TEXT, std::move(text), leading_whitespace_count);
+    }
+    // clear and reserve 
+    text.clear();
+    text.reserve(DEFAULT_STRING_RESERVE_SIZE); 
+}
+
+// If c is one of single-char special tokens (see symbol.hpp),
+// then text is first tokenized then the single-char special token.
+// The tokens are appended to status.tokens in this order.
+// Otherwise, no operations are performed.
+// Returns true if and only if a single-char special token created.
+inline bool process_char(int c, std::string& text, status_t& status)
+{
+    switch (c) {
+        case '\n':
+            tokenize_text(text, status);
+            status.tokens.emplace_back(symbol_t::NEWLINE);
+            return true;
+        case ';':
+            tokenize_text(text, status);
+            status.tokens.emplace_back(symbol_t::SEMICOLON);
+            return true;
+        case '{':
+            tokenize_text(text, status);
+            status.tokens.emplace_back(symbol_t::OPEN_BRACE);
+            return true;
+        case '}':
+            tokenize_text(text, status);
+            status.tokens.emplace_back(symbol_t::CLOSE_BRACE);
+            return true;
+        default:
+            return false;
+    }
+}
+
+// If tag name is not a valid one, assume it is simply text.
+// It is expected that the caller immediately read "@" before calling.
+inline void tokenize_tag_name(std::string& text, file_reader& reader, status_t& status)
+{
+    static constexpr const auto is_alpha = 
+        [](char x) {return isalpha(x);};
+
+    // parse tag
+    std::string tagname;
+    int c = read_until(reader, is_alpha, tagname);
+    reader.back(c);
+
+    // if valid tag, append text token then token with tag name
+    if (tag_set.find(tagname) != tag_set.end()) {
+        tokenize_text(text, status);
+        status.tokens.emplace_back(symbol_t::TAGNAME, std::move(tagname));
+    }
+
+    // otherwise, assume part of text: append "@" then tag name to text 
+    else {
+        text.push_back('@');
+        text.append(tagname);
+    }
+}
+
+// If c is '@', try to tokenize tag name.
+// Behavior is the same as tokenize_tag_name.
+// Returns true if and only if c is '@'.
+inline bool process_tag_name(int c, std::string& text, 
+                             file_reader& reader, status_t& status)
+{
+    if (c == '@') {
+        tokenize_tag_name(text, reader, status);
+        return true;
+    }
+    return false;
+}
+
+// It is expected that caller has read the string "//" immediately before calling.
+inline void process_line_comment(std::string& text, file_reader& reader, status_t& status)
+{
+    static constexpr const auto is_not_newline =
+        [](char x) {return x != '\n';};
+
+    int c = reader.read();
+
+    if (c == '/') {
+        c = reader.read();
+        // valid triple-slash comment
+        if (isspace(c)) {
+            tokenize_text(text, status);
+            status.tokens.emplace_back(symbol_t::BEGIN_LINE_COMMENT);
+            reader.back(c); // in case it's a single-char token
+        }
+        // invalid triple-slash comment
+        else {
+            // no need to read back since c cannot be a whitespace and we ignore anyway
+            ignore_until(reader, is_not_newline);
+        }
+    }
+
+    // invalid triple-slash comment
+    else {
+        reader.back(c); // the character just read may be '\n'
+        ignore_until(reader, is_not_newline);
+    }
+}
+
+// It is expected that caller has read the string "/*" immediately before calling.
+inline void process_block_comment(std::string& text, file_reader& reader, status_t& status)
+{
+    const auto is_not_end_block =
+        [&](char x) {return (x != '*') || (reader.peek() != '/');};
+
+    int c = reader.read();
+
+    if (c == '!') {
+        c = reader.read();
+        // valid block comment: tokenize text then begin block comment symbol
+        if (isspace(c)) {
+            tokenize_text(text, status);
+            status.tokens.emplace_back(symbol_t::BEGIN_BLOCK_COMMENT);
+            reader.back(c); // may be special single-char token
+        }
+        // regular block comment: ignore text until end and stop tokenizing
+        else {
+            ignore_until(reader, is_not_end_block);
+            reader.read(); // read the '/'
+        }
+    }
+
+    // regular block comment
+    else {
+        ignore_until(reader, is_not_end_block); // stops after reading '*' in "*/"
+        reader.read(); // read the '/' after 
+    }
+}
+
+// If c is not '/' or '*', then no operation done and returns false.
+// If c is '/', and if it's a possible line comment ("//") then same as process_line_comment;
+// if it's a possible block comment ("/*") then same as process_block_comment;
+// otherwise, text is updated to include all characters read.
+//
+// If c is '*', and if it is the ending of a block comment ("*/"), text tokenized then END_BLOCK_COMMENT;
+// otherwise, text tokenized then STAR.
+//
+// In any case, returns true if first char has been processed.
+inline bool process_string(int c, std::string& text,
+                           file_reader& reader, status_t& status)
+{
+    // possibly beginning of line or block comment
+    if (c == '/') {
+        c = reader.read();
+        if (c == '/') {
+            process_line_comment(text, reader, status);
+        }
+        else if (c == '*') {
+            process_block_comment(text, reader, status);
+        }
+        else {
+            text.push_back('/');
+            text.push_back(c);
+        }
+        return true;
+    }
+
+    // possibly ending block comment or a star that can be ignored in the middle of a block comment
+    else if (c == '*') {
+        c = reader.read();
+        if (c == '/') {
+            tokenize_text(text, status);
+            status.tokens.emplace_back(symbol_t::END_BLOCK_COMMENT);
+        }
+        else {
+            tokenize_text(text, status);
+            status.tokens.emplace_back(symbol_t::STAR);
+            reader.back(c);
+        }
+        return true;
+    }
+
+    return false;
+}
+
+inline void process(file_reader& reader, status_t& status)
+{
+    std::string text;
+    text.reserve(DEFAULT_STRING_RESERVE_SIZE);
+    int c = 0;
+    bool processed = false;
+
+    while ((c = reader.read()) != file_reader::termination) {
+
+        // process special single-char
+        processed = process_char(c, text, status);
+        if (processed) {
+            continue;
+        }
+
+        // process tag name
+        processed = process_tag_name(c, text, reader, status);
+        if (processed) {
+            continue;
+        }
+
+        // process string tokens
+        processed = process_string(c, text, reader, status);
+        if (processed) {
+            continue;
+        }
+
+        // otherwise, no special symbol -> push to text
+        text.push_back(c);
+    }
+
+    // tokenize last text then EOF
+    tokenize_text(text, status);
+    status.tokens.emplace_back(token_t::symbol_t::END_OF_FILE);
+}
+
+} // namespace legacy
+} // namespace lex
+} // namespace core
+} // namespace docgen
diff --git a/src/core/lex/legacy/status.hpp b/src/core/lex/legacy/status.hpp
new file mode 100644
index 0000000..d29dda4
--- /dev/null
+++ b/src/core/lex/legacy/status.hpp
@@ -0,0 +1,17 @@
+#pragma once
+#include <vector>
+
+namespace docgen {
+namespace core {
+
+template <class TokenType>
+struct Status
+{
+    using token_t = TokenType;
+    using token_arr_t = std::vector<token_t>;
+
+    token_arr_t tokens;
+};
+
+} // namespace core
+} // namespace docgen
diff --git a/src/core/lex/legacy/symbol.hpp b/src/core/lex/legacy/symbol.hpp
new file mode 100644
index 0000000..4eb1126
--- /dev/null
+++ b/src/core/lex/legacy/symbol.hpp
@@ -0,0 +1,44 @@
+#pragma once
+#include <mapbox/eternal.hpp>
+#include <unordered_set>
+#include <string>
+
+namespace docgen {
+namespace core {
+namespace lex {
+namespace legacy {
+
+enum class Symbol {
+    // single-char tokens
+    END_OF_FILE,
+    NEWLINE,
+    SEMICOLON,
+    STAR,
+    OPEN_BRACE,
+    CLOSE_BRACE,
+    // string tokens
+    BEGIN_LINE_COMMENT,
+    BEGIN_BLOCK_COMMENT,
+    END_BLOCK_COMMENT,
+    // special tags
+    TAGNAME,
+    // default
+    TEXT
+};
+
+// Compile-time mapping of strings to corresponding symbol
+static MAPBOX_ETERNAL_CONSTEXPR const auto symbol_map = 
+    mapbox::eternal::map<Symbol, mapbox::eternal::string>({
+            {Symbol::SEMICOLON, ";"},
+            {Symbol::STAR, "*"},
+            {Symbol::OPEN_BRACE, "{"},
+            {Symbol::CLOSE_BRACE, "}"},
+            {Symbol::BEGIN_LINE_COMMENT, "///"},
+            {Symbol::BEGIN_BLOCK_COMMENT, "/*!"},
+            {Symbol::END_BLOCK_COMMENT, "*/"},
+    });
+
+} // namespace legacy
+} // namespace lex
+} // namespace core
+} // namespace docgen
diff --git a/src/core/token.hpp b/src/core/lex/legacy/token.hpp
similarity index 72%
rename from src/core/token.hpp
rename to src/core/lex/legacy/token.hpp
index 4ef3798..72edcaa 100644
--- a/src/core/token.hpp
+++ b/src/core/lex/legacy/token.hpp
@@ -1,18 +1,21 @@
 #pragma once
 #include <string>
-#include <core/symbol.hpp>
+#include "core/symbol.hpp"
 
 namespace docgen {
 namespace core {
+namespace lex {
+namespace legacy {
 
 template <class SymbolType>
 struct Token
 {
     using symbol_t = SymbolType;
 
-    Token(symbol_t name, std::string&& content)
+    Token(symbol_t name, std::string&& content, uint32_t leading_ws_count=0)
         : name(name)
         , content(std::move(content))
+        , leading_ws_count(leading_ws_count)
     {}
 
     Token(symbol_t name)
@@ -24,6 +27,7 @@ struct Token
 
     symbol_t name;
     std::string content;
+    uint32_t leading_ws_count;
 };
 
 template <>
@@ -33,5 +37,7 @@ inline const char* Token<Symbol>::c_str() const
         symbol_map.at(name).c_str() : content.c_str();
 }
 
+}
+} // namespace lex
 } // namespace core
 } // namespace docgen

From 87b49ad76aa52b5102765d590a44398623b60fe8 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sun, 19 Jan 2020 09:35:33 -0500
Subject: [PATCH 20/23] Readd new token.hpp

---
 src/core/token.hpp | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 src/core/token.hpp

diff --git a/src/core/token.hpp b/src/core/token.hpp
new file mode 100644
index 0000000..4ef3798
--- /dev/null
+++ b/src/core/token.hpp
@@ -0,0 +1,37 @@
+#pragma once
+#include <string>
+#include <core/symbol.hpp>
+
+namespace docgen {
+namespace core {
+
+template <class SymbolType>
+struct Token
+{
+    using symbol_t = SymbolType;
+
+    Token(symbol_t name, std::string&& content)
+        : name(name)
+        , content(std::move(content))
+    {}
+
+    Token(symbol_t name)
+        : Token(name, "")
+    {}
+
+    // left undefined for SymbolType != Symbol
+    const char* c_str() const;
+
+    symbol_t name;
+    std::string content;
+};
+
+template <>
+inline const char* Token<Symbol>::c_str() const
+{
+    return (symbol_map.find(name) != symbol_map.end()) ?
+        symbol_map.at(name).c_str() : content.c_str();
+}
+
+} // namespace core
+} // namespace docgen

From 73210ceb2a9866b750feab8a525e2e59f22b7160 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sun, 19 Jan 2020 10:54:56 -0500
Subject: [PATCH 21/23] Modifying configuration and benchmark in cmake

---
 CMakeLists.txt                         |  15 +-
 benchmark/CMakeLists.txt               |  24 +
 benchmark/core/lex/data/data_1.txt     | 113 ++++
 benchmark/core/lex/data/data_2.txt     | 211 +++++++
 benchmark/core/lex/data/data_3.txt     | 803 +++++++++++++++++++++++++
 benchmark/core/lex/lexer_benchmark.cpp |  40 ++
 configure.sh                           |  15 +-
 src/CMakeLists.txt                     |   1 +
 test/CMakeLists.txt                    |   1 +
 9 files changed, 1210 insertions(+), 13 deletions(-)
 create mode 100644 benchmark/CMakeLists.txt
 create mode 100644 benchmark/core/lex/data/data_1.txt
 create mode 100644 benchmark/core/lex/data/data_2.txt
 create mode 100644 benchmark/core/lex/data/data_3.txt
 create mode 100644 benchmark/core/lex/lexer_benchmark.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 697bac1..7944435 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,15 +6,12 @@ project("Docgen" VERSION 1.0.0
 # This will perform memcheck
 include(CTest)
 
-# This is to make this library portable to other machines.
-# This will be used for install.
-include(GNUInstallDirs)
-
 # enables testing
 enable_testing()
 
-# Set C++17 standard for project target
-set(CMAKE_CXX_STANDARD 17)
+# This is to make this library portable to other machines.
+# This will be used for install.
+include(GNUInstallDirs)
 
 # Set this such that dependency installation through conan can be found
 set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/libs)
@@ -58,11 +55,17 @@ set(ETERNAL_DIR ${PROJECT_SOURCE_DIR}/libs/eternal)
 # find json library
 find_package(nlohmann_json 3.2.0 REQUIRED)
 
+# find google benchmark
+find_package(benchmark REQUIRED)
+
 # add libs subdirectory
 add_subdirectory(${PROJECT_SOURCE_DIR}/libs ${PROJECT_BINARY_DIR}/libs)
 
 # add src subdirectory
 add_subdirectory(${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src)
 
+# add benchmark subdirectory
+add_subdirectory(${PROJECT_SOURCE_DIR}/benchmark ${PROJECT_BINARY_DIR}/benchmark)
+
 # add test subdirectory
 add_subdirectory(${PROJECT_SOURCE_DIR}/test ${PROJECT_BINARY_DIR}/test)
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
new file mode 100644
index 0000000..fe431a0
--- /dev/null
+++ b/benchmark/CMakeLists.txt
@@ -0,0 +1,24 @@
+add_executable(lexer_benchmark
+    ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/lexer_benchmark.cpp
+    # Source dependency
+    $<TARGET_OBJECTS:LEXER_LIB_OBJECTS>
+    )
+target_compile_features(lexer_benchmark PRIVATE cxx_std_17)
+target_include_directories(lexer_benchmark PRIVATE
+    ${GBENCH_DIR}/include
+    ${PROJECT_SOURCE_DIR}/src
+    ${ETERNAL_DIR}/include
+    )
+target_link_libraries(lexer_benchmark PRIVATE
+    benchmark::benchmark
+    benchmark::benchmark_main
+    pthread
+    nlohmann_json::nlohmann_json
+    )
+
+# copy data directory into where lexer_benchmark executable ends up
+add_custom_command(
+        TARGET lexer_benchmark POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_directory
+                ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/data/
+                $<TARGET_FILE_DIR:lexer_benchmark>/data)
diff --git a/benchmark/core/lex/data/data_1.txt b/benchmark/core/lex/data/data_1.txt
new file mode 100644
index 0000000..2cb7356
--- /dev/null
+++ b/benchmark/core/lex/data/data_1.txt
@@ -0,0 +1,113 @@
+#include <core/lex/lexer.hpp>
+
+namespace docgen {
+namespace core {
+namespace lex {
+
+///////////////////////////////////
+// Lexer Implementation
+///////////////////////////////////
+
+Lexer::Lexer()
+    : trie_({
+            {"\n", Symbol::NEWLINE},
+            {" ", Symbol::WHITESPACE},
+            {"\t", Symbol::WHITESPACE},
+            {"\v", Symbol::WHITESPACE},
+            {"\r", Symbol::WHITESPACE},
+            {"\f", Symbol::WHITESPACE},
+            {";", Symbol::SEMICOLON},
+            {"#", Symbol::HASHTAG},
+            {"*", Symbol::STAR},
+            {"{", Symbol::OPEN_BRACE},
+            {"}", Symbol::CLOSE_BRACE},
+            {"///", Symbol::BEGIN_SLINE_COMMENT},
+            {"/*!", Symbol::BEGIN_SBLOCK_COMMENT},
+            {"//", Symbol::BEGIN_NLINE_COMMENT},
+            {"/*", Symbol::BEGIN_NBLOCK_COMMENT},
+            {"*/", Symbol::END_BLOCK_COMMENT},
+            {"@sdesc", Symbol::SDESC},
+            {"@tparam", Symbol::TPARAM},
+            {"@param", Symbol::PARAM},
+            {"@return", Symbol::RETURN}
+            })
+{}
+
+void Lexer::process(char c)
+{
+    this->update_state();
+
+    auto it = trie_.get_children().find(c);
+
+    // if transition exists
+    if (it != trie_.get_children().end()) {
+        buf_.push_back(c);
+        trie_.transition(c);
+        return;
+    }
+
+    // otherwise, no transition exists
+    
+    // if not backtracking
+    if (!this->is_backtracking()) {
+        // if trie at root
+        if (trie_.is_reset()) {
+            text_.push_back(c);
+            return;
+        }
+        text_.append(buf_);
+        buf_.clear();
+        trie_.reset();
+        return this->process(c);
+    }
+    
+    // otherwise, currently backtracking
+    this->backtrack(c); 
+}
+
+void Lexer::backtrack(char c)
+{
+    // tokenize text
+    this->tokenize_text(); 
+
+    // tokenize symbol
+    for (uint32_t i = 0; i < buf_.size(); ++i) {
+        trie_.back_transition();
+    }
+    assert(trie_.is_accept());
+    auto opt_symbol = trie_.get_symbol();
+    assert(static_cast<bool>(opt_symbol));
+    status_.tokens.emplace(*opt_symbol);
+
+    // move and clear buf_ to temp string for reprocessing
+    std::string reprocess_str(std::move(buf_));
+    reprocess_str.push_back(c);
+
+    // reset 
+    this->reset();
+    
+    // reprocess the rest
+    for (char c : reprocess_str) {
+        this->process(c);
+    }
+}
+
+void Lexer::flush()
+{
+    this->update_state();
+
+    if (this->is_backtracking()) {
+        return this->backtrack(0);
+    }
+
+    // non-backtracking: no parent is an accepting node
+    // append buf_ to text_ and tokenize text_
+    // reset all other fields
+    text_.append(buf_);
+    this->tokenize_text();
+    this->reset();
+}
+
+} // namespace lex
+} // namespace core
+} // namespace docgen
diff --git a/benchmark/core/lex/data/data_2.txt b/benchmark/core/lex/data/data_2.txt
new file mode 100644
index 0000000..2df5008
--- /dev/null
+++ b/benchmark/core/lex/data/data_2.txt
@@ -0,0 +1,211 @@
+#pragma once
+#include <unordered_map>
+#include <string_view>
+#include <type_traits>
+#include <cassert>
+#include <optional>
+#include <memory>
+#include <exceptions/exceptions.hpp>
+
+namespace docgen {
+namespace core {
+namespace lex {
+
+template <class SymbolType>
+struct Trie
+{
+private:
+    struct TrieNode; // forward declaration
+
+public:
+    using pair_t = std::pair<std::string_view, SymbolType>;
+
+    // Constructs trie node from a list of pairs of string and symbol.
+    // The string must be of type std::string_view and it must not be empty.
+    // The symbol must be of type SymbolType.
+    Trie(const std::initializer_list<pair_t>&);
+
+    // Delete compiler-generated copy/move ctor/assignment
+    // This ensures that Trie objects are only (default) constructible.
+    Trie(const Trie&) =delete;
+    Trie(Trie&&) =delete;
+    Trie& operator=(const Trie&) =delete;
+    Trie& operator=(Trie&&) =delete;
+
+    void transition(char c);
+    void back_transition();
+    bool is_accept() const;
+    typename TrieNode::children_t& get_children();
+    bool is_reset() const;
+    void reset();
+    const std::optional<SymbolType>& get_symbol() const;
+
+private:
+
+    struct TrieNode
+    {
+        using children_t = std::unordered_map<char, std::unique_ptr<TrieNode>>;
+
+        // Insert str from current node to update the trie structure.
+        // The string str is read starting from idx.
+        void insert(const std::pair<std::string_view, SymbolType>&, size_t = 0);
+
+        // Returns if current node is an accepting state.
+        bool is_accept() const;
+
+        // Returns the optional symbol associated with current node.
+        // Symbol will be active if is_accept is true.
+        const std::optional<SymbolType>& get_symbol() const;
+
+        children_t& get_children();
+
+        std::optional<std::reference_wrapper<TrieNode>> get_parent();
+
+    private:
+
+        enum class State : bool {
+            accept,
+            non_accept
+        };
+
+        State state_ = State::non_accept;   // indicates accepting node or not
+        std::optional<SymbolType> symbol_;  // symbol for accepting node
+        children_t children_;               // current node's children
+        TrieNode* parent_ptr_;              // current node's parent
+    };
+
+    TrieNode root_;                                       // root of Trie
+    std::reference_wrapper<TrieNode> curr_node_ = root_;  // current node
+};
+
+////////////////////////////////////////////////////////////////
+// TrieNode Implementation
+////////////////////////////////////////////////////////////////
+
+template <class SymbolType>
+inline void 
+Trie<SymbolType>::TrieNode::insert(const pair_t& pair, size_t idx)
+{
+    const auto& str = std::get<0>(pair);
+
+    // if string starting from idx is empty, then accepting state
+    if (str[idx] == '\0') {
+        state_ = State::accept;
+        symbol_ = std::get<1>(pair);
+    }
+
+    else {
+        // if no child with str[idx] mapping
+        if (children_.find(str[idx]) == children_.end()) {
+            children_.emplace(str[idx], std::make_unique<TrieNode>());
+        }
+        auto& child = children_.at(str[idx]);
+        child->parent_ptr_ = this;
+        child->insert(pair, idx + 1);
+    }
+}
+
+template <class SymbolType>
+inline bool 
+Trie<SymbolType>::TrieNode::is_accept() const
+{
+    return state_ == State::accept;
+}
+
+template <class SymbolType>
+inline const std::optional<SymbolType>&
+Trie<SymbolType>::TrieNode::get_symbol() const
+{
+    return symbol_;
+}
+
+template <class SymbolType>
+inline typename Trie<SymbolType>::TrieNode::children_t&
+Trie<SymbolType>::TrieNode::get_children()
+{
+    return children_;
+}
+
+template <class SymbolType>
+inline std::optional<std::reference_wrapper<typename Trie<SymbolType>::TrieNode>>
+Trie<SymbolType>::TrieNode::get_parent()
+{
+    if (parent_ptr_) {
+        return *parent_ptr_;
+    }
+    return {};
+}
+
+////////////////////////////////////////////////////////////////
+// Trie Implementation
+////////////////////////////////////////////////////////////////
+
+template <class SymbolType>
+inline 
+Trie<SymbolType>::Trie(const std::initializer_list<pair_t>& pairs)
+    : root_()
+{
+    for (auto it = pairs.begin(); it != pairs.end(); ++it) {
+        if (it->first.empty()) {
+            throw exceptions::control_flow_error("strings must be non-empty");
+        }
+        root_.insert(*it);
+    }
+}
+
+template <class SymbolType>
+inline void
+Trie<SymbolType>::transition(char c)
+{
+    curr_node_ = *(curr_node_.get().get_children().at(c));
+}
+
+template <class SymbolType>
+inline bool
+Trie<SymbolType>::is_accept() const
+{
+    return curr_node_.get().is_accept();
+}
+
+template <class SymbolType>
+inline typename Trie<SymbolType>::TrieNode::children_t&
+Trie<SymbolType>::get_children() 
+{
+    return curr_node_.get().get_children();
+}
+
+template <class SymbolType>
+inline bool
+Trie<SymbolType>::is_reset() const
+{
+    return &(curr_node_.get()) == &root_;
+}
+
+template <class SymbolType>
+inline void
+Trie<SymbolType>::reset() 
+{
+    curr_node_ = root_;
+}
+
+template <class SymbolType>
+inline void
+Trie<SymbolType>::back_transition() 
+{
+    auto&& opt_parent = curr_node_.get().get_parent();
+    if (!opt_parent) {
+        throw exceptions::control_flow_error("Attempt to back transition past the root");
+    }
+    curr_node_ = *opt_parent;
+}
+
+template <class SymbolType>
+inline const std::optional<SymbolType>& 
+Trie<SymbolType>::get_symbol() const
+{
+    return curr_node_.get().get_symbol();
+}
+
+} // namespace lex
+} // namespace core
+} // namespace docgen
diff --git a/benchmark/core/lex/data/data_3.txt b/benchmark/core/lex/data/data_3.txt
new file mode 100644
index 0000000..e7483bb
--- /dev/null
+++ b/benchmark/core/lex/data/data_3.txt
@@ -0,0 +1,803 @@
+#include <core/lex/lexer.hpp>
+#include <gtest/gtest.h>
+
+namespace docgen {
+namespace core {
+namespace lex {
+
+struct lexer_fixture : ::testing::Test
+{
+protected:
+    using status_t = typename Lexer::status_t;
+    using token_t = typename Lexer::token_t;
+    using symbol_t = typename Lexer::symbol_t;
+
+    Lexer lexer;
+    std::optional<token_t> token;
+
+    void setup_lexer(const char* content)
+    {
+        std::string str(content);
+        for (char c : str) {
+            lexer.process(c);
+        }
+        lexer.flush();
+    }
+};
+
+////////////////////////////////////////////////////////////////////
+// Individual Symbol TESTS
+////////////////////////////////////////////////////////////////////
+
+// NEWLINE
+TEST_F(lexer_fixture, lexer_newline)
+{
+    static constexpr const char* content =
+        "somecrazy1492text\nmvn2b"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "somecrazy1492text");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "mvn2b");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (  )
+TEST_F(lexer_fixture, lexer_whitespace_space)
+{
+    static constexpr const char* content =
+        ",m.,m. abn"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, ",m.,m.");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abn");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (\t)
+TEST_F(lexer_fixture, lexer_whitespace_t)
+{
+    static constexpr const char* content =
+        "h0f2n.1\t1234|"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "h0f2n.1");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "1234|");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (\v)
+TEST_F(lexer_fixture, lexer_whitespace_v)
+{
+    static constexpr const char* content =
+        "hello!\v"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "hello!");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (\r)
+TEST_F(lexer_fixture, lexer_whitespace_r)
+{
+    static constexpr const char* content =
+        "hello!\rwsdescorrld!!"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "hello!");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "wsdescorrld!!");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (\f)
+TEST_F(lexer_fixture, lexer_whitespace_f)
+{
+    static constexpr const char* content =
+        "hello!\fwsdescorrld!!"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "hello!");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "wsdescorrld!!");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// WHITESPACE (;)
+TEST_F(lexer_fixture, lexer_semicolon)
+{
+    static constexpr const char* content =
+        ";wsdescorrld!!"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::SEMICOLON);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "wsdescorrld!!");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// BEGIN_SLINE_COMMENT
+TEST_F(lexer_fixture, lexer_begin_sline_comment)
+{
+    static constexpr const char* content =
+        "abc///"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// BEGIN_SBLOCK_COMMENT
+TEST_F(lexer_fixture, lexer_begin_sblock_comment)
+{
+    static constexpr const char* content =
+        "abc/*!"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SBLOCK_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// BEGIN_NBLOCK_COMMENT
+TEST_F(lexer_fixture, lexer_begin_nblock_comment)
+{
+    static constexpr const char* content =
+        "abc/**!"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NBLOCK_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::STAR);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "!");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// END_BLOCK_COMMENT
+TEST_F(lexer_fixture, lexer_end_block_comment_no_star)
+{
+    static constexpr const char* content =
+        "abc*/f"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::END_BLOCK_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "f");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+TEST_F(lexer_fixture, lexer_end_block_comment_star)
+{
+    static constexpr const char* content =
+        "abc**/f"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::STAR);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::END_BLOCK_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "f");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// SDESC
+TEST_F(lexer_fixture, lexer_sdesc)
+{
+    static constexpr const char* content =
+        "ssdesc@@sdescf@sdesscf"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "ssdesc@");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::SDESC);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "f@sdesscf");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// TPARAM
+TEST_F(lexer_fixture, lexer_tparam)
+{
+    static constexpr const char* content =
+        "ssdes@@@@@@tpaar@tpara@m@tparam@tpar"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "ssdes@@@@@@tpaar@tpara@m");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TPARAM);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "@tpar");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// RETURN
+TEST_F(lexer_fixture, lexer_return)
+{
+    static constexpr const char* content =
+        "@re@@@@@@return@@@@@"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "@re@@@@@");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::RETURN);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "@@@@@");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+////////////////////////////////////////////////////////////////////
+// Mix TESTS
+////////////////////////////////////////////////////////////////////
+
+// line comment mix
+TEST_F(lexer_fixture, lexer_line_comment_4)
+{
+    static constexpr const char* content =
+        "abc////"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "/");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// line comment mix
+TEST_F(lexer_fixture, lexer_line_comment_5)
+{
+    static constexpr const char* content =
+        "abc/////"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+// line comment mix
+TEST_F(lexer_fixture, lexer_line_comment_6)
+{
+    static constexpr const char* content =
+        "abc//////"
+        ;
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "abc");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+TEST_F(lexer_fixture, lexer_test_1_no_special_comment)
+{
+    static constexpr const char* content =
+        "#include <core/lexer_trie.hpp> // some comment\n"
+        "\n"
+        "void f();"
+        ;
+
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::HASHTAG);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "include");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "<core/lexer_trie.hpp>");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "some");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "void");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "f()");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::SEMICOLON);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+TEST_F(lexer_fixture, lexer_test_2_no_special_comment)
+{
+    static constexpr const char* content =
+        "#include <gtest/gtest.h>\n"
+        "\n"
+        "  // just a normal comment\n"
+        "\n"
+        ;
+
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::HASHTAG);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "include");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "<gtest/gtest.h>");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "just");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "a");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "normal");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+TEST_F(lexer_fixture, lexer_test_1_comment_mix)
+{
+    static constexpr const char* content =
+        "// comment\n"
+        " /// special_comment \n"
+        ;
+
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "special_comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+TEST_F(lexer_fixture, lexer_test_1_tagname_comments)
+{
+    static constexpr const char* content =
+        "// @tparam normal comment\n"
+        "/// @sdescspecial comment \n"
+        "#define hehe\n"
+        ;
+
+    setup_lexer(content);
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_NLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TPARAM);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+    
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "normal");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::BEGIN_SLINE_COMMENT);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::SDESC);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "special");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "comment");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::HASHTAG);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "define");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::WHITESPACE);
+	EXPECT_EQ(token->content, "");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::TEXT);
+	EXPECT_EQ(token->content, "hehe");
+
+    token = lexer.next_token();
+    EXPECT_EQ(token->name, symbol_t::NEWLINE);
+	EXPECT_EQ(token->content, "");
+
+    // check that there are no more tokens
+    token = lexer.next_token();
+    EXPECT_FALSE(static_cast<bool>(token));
+}
+
+} // namespace lex
+} // namespace core
+} // namespace docgen
diff --git a/benchmark/core/lex/lexer_benchmark.cpp b/benchmark/core/lex/lexer_benchmark.cpp
new file mode 100644
index 0000000..327e8e3
--- /dev/null
+++ b/benchmark/core/lex/lexer_benchmark.cpp
@@ -0,0 +1,40 @@
+#include <core/lex/lexer.hpp>
+#include <benchmark/benchmark.h>
+
+namespace docgen {
+namespace core {
+namespace lex {
+
+struct lexer_fixture : benchmark::Fixture 
+{
+    static constexpr const char* data_1_path = "data/data_1.txt";
+    static constexpr const char* data_2_path = "data/data_2.txt";
+    static constexpr const char* data_3_path = "data/data_3.txt";
+
+    Lexer lexer;
+
+    void SetUp(const ::benchmark::State& state) 
+    {
+    }
+
+    void TearDown(const ::benchmark::State& state)
+    {
+    }
+};
+
+BENCHMARK_F(lexer_fixture, data_1_test)(benchmark::State& st)
+{
+    std::unique_ptr<FILE, void (*)(FILE*)> file(fopen(data_1_path, "r"), 
+                                                [](FILE* file) {fclose(file);});
+    int c = 0;
+    for (auto _ : st) {
+        while ((c = getc(file.get())) != EOF) {
+            lexer.process(c);
+        }
+    }
+}
+
+} // namespace lex
+} // namespace core
+} // namespace docgen
+
diff --git a/configure.sh b/configure.sh
index 363eecd..bfe444f 100755
--- a/configure.sh
+++ b/configure.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 
-# directory where current shell script resides
-PROJECTDIR=$(dirname "$BASH_SOURCE")
+# relative directory where current shell script resides from where shell script was called
+PROJECTDIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+echo "Project directory: $PROJECTDIR"
 
 cd "$PROJECTDIR"
 
@@ -48,13 +49,13 @@ git fetch --all --tags --prune
 git checkout tags/v1.5.0 -b v1.5.0
 cd -
 
-# Build google benchmark
+# Build and install google benchmark locally
 cd libs/benchmark
 mkdir -p build && cd build
-
+cmake_flags="-DCMAKE_INSTALL_PREFIX=$PROJECTDIR/libs/benchmark/build"
 if [ $(command -v ninja) != "" ]; then
-    cmake ../ -GNinja
+    cmake ../ -GNinja $cmake_flags
 else
-    cmake ../
+    cmake ../ $cmake_flags
 fi
-cmake --build . -- -j12
+cmake --build . --target install -- -j12
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f95563a..4147592 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,6 +2,7 @@
 add_library(LEXER_LIB_OBJECTS OBJECT
     ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/lexer.cpp
     )
+target_compile_features(LEXER_LIB_OBJECTS PRIVATE cxx_std_17)
 target_include_directories(LEXER_LIB_OBJECTS PRIVATE
     ${CMAKE_CURRENT_SOURCE_DIR}
     ${ETERNAL_DIR}/include
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 125e82f..3f4594f 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -3,6 +3,7 @@
 # TEST_TARGET is added as a test and an executable named ${TEST_NAME} will be created.
 function(create_test TEST_NAME TEST_TARGET)
     target_compile_options(${TEST_TARGET} PRIVATE -g -Wall -Werror -Wextra)
+    target_compile_features(${TEST_TARGET} PRIVATE cxx_std_17)
     target_include_directories(${TEST_TARGET}
         PRIVATE
         ${GTEST_DIR}/include

From 4cf249393f93e71d9797f27dcf924640b757405a Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sun, 19 Jan 2020 14:35:36 -0500
Subject: [PATCH 22/23] Finish benchmarking

---
 CMakeLists.txt                                |  2 +-
 benchmark/CMakeLists.txt                      |  1 +
 benchmark/core/lex/data/data_4.txt            | 71 +++++++++++++++++++
 benchmark/core/lex/lexer_base_fixture.hpp     | 24 +++++++
 benchmark/core/lex/lexer_benchmark.cpp        | 60 +++++++++++-----
 benchmark/core/lex/lexer_legacy_benchmark.cpp | 59 +++++++++++++++
 src/core/lex/legacy/lexer.hpp                 |  8 +--
 7 files changed, 204 insertions(+), 21 deletions(-)
 create mode 100644 benchmark/core/lex/data/data_4.txt
 create mode 100644 benchmark/core/lex/lexer_base_fixture.hpp
 create mode 100644 benchmark/core/lex/lexer_legacy_benchmark.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7944435..d0de76a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -56,7 +56,7 @@ set(ETERNAL_DIR ${PROJECT_SOURCE_DIR}/libs/eternal)
 find_package(nlohmann_json 3.2.0 REQUIRED)
 
 # find google benchmark
-find_package(benchmark REQUIRED)
+find_package(benchmark REQUIRED PATHS ${GBENCH_DIR}/build)
 
 # add libs subdirectory
 add_subdirectory(${PROJECT_SOURCE_DIR}/libs ${PROJECT_BINARY_DIR}/libs)
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index fe431a0..d2822fb 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_executable(lexer_benchmark
     ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/lexer_benchmark.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/core/lex/lexer_legacy_benchmark.cpp
     # Source dependency
     $<TARGET_OBJECTS:LEXER_LIB_OBJECTS>
     )
diff --git a/benchmark/core/lex/data/data_4.txt b/benchmark/core/lex/data/data_4.txt
new file mode 100644
index 0000000..374e703
--- /dev/null
+++ b/benchmark/core/lex/data/data_4.txt
@@ -0,0 +1,71 @@
+jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj
+
+
+;
+{jfiojeojahl{jiofejiofkvnkelwojio}}
+/// @sdesc some short description............
+
+
+jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj
+JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ
+iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd
+jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj
+jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj
+jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj
+jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj
+jfowfoeijiowjfo///jfoijeonnvmlalfjioejoifjoelkdkfnkejwaoijjfoj12893483u2rjjdkfj
+
+
+;
+{jfiojeojahl{jiofejiofkvnkelwojio}}
+/// @sdesc some short description............
+
+
+jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj
+JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ
+iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd
+
+
+;
+{jfiojeojahl{jiofejiofkvnkelwojio}}
+/// @sdesc some short description............
+
+
+jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj
+JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ
+iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd
+
+
+;
+{jfiojeojahl{jiofejiofkvnkelwojio}}
+/// @sdesc some short description............
+
+/*! !hfowojlk @tparam scdesc fjown @tparam x jeiofhjoeifjdkjofijsoiejfoijeojoirjiohignjknjfbnkjnvkfjoiejioj
+*
+*
+*/
+
+
+jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj
+JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ
+iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd
+
+
+;
+{jfiojeojahl{jiofejiofkvnkelwojio}}
+/// @sdesc some short description............
+
+
+jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj
+JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ
+iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd
+
+
+;
+{jfiojeojahl{jiofejiofkvnkelwojio}}
+/// @sdesc some short description............
+
+
+jfoijeiojokvmjknfirojijjoejfiokjeofjojdjfdfoejoj
+JFOIJEIOJOKVMJKNFIROJIJJOEJFIOKJEOFJOJDJFDFOEJOJ
+iuhigarufejoafwrhsgijfkajnbvkmfpoefrigirjkfel;df,ldmbkglrf;e,d.v mcd
diff --git a/benchmark/core/lex/lexer_base_fixture.hpp b/benchmark/core/lex/lexer_base_fixture.hpp
new file mode 100644
index 0000000..51631d6
--- /dev/null
+++ b/benchmark/core/lex/lexer_base_fixture.hpp
@@ -0,0 +1,24 @@
+#pragma once
+#include <benchmark/benchmark.h>
+
+namespace docgen {
+namespace core {
+namespace lex {
+
+struct lexer_base_fixture : benchmark::Fixture 
+{
+    static constexpr const char* data_1_path = "data/data_1.txt";
+    static constexpr const char* data_2_path = "data/data_2.txt";
+    static constexpr const char* data_3_path = "data/data_3.txt";
+    static constexpr const char* data_4_path = "data/data_3.txt";
+
+    void SetUp(const ::benchmark::State& state) 
+    {}
+
+    void TearDown(const ::benchmark::State& state)
+    {}
+};
+
+} // namespace lex
+} // namespace core
+} // namespace docgen
diff --git a/benchmark/core/lex/lexer_benchmark.cpp b/benchmark/core/lex/lexer_benchmark.cpp
index 327e8e3..f2cc6d1 100644
--- a/benchmark/core/lex/lexer_benchmark.cpp
+++ b/benchmark/core/lex/lexer_benchmark.cpp
@@ -1,36 +1,64 @@
 #include <core/lex/lexer.hpp>
-#include <benchmark/benchmark.h>
+#include "lexer_base_fixture.hpp"
 
 namespace docgen {
 namespace core {
 namespace lex {
 
-struct lexer_fixture : benchmark::Fixture 
+struct lexer_fixture : lexer_base_fixture
 {
-    static constexpr const char* data_1_path = "data/data_1.txt";
-    static constexpr const char* data_2_path = "data/data_2.txt";
-    static constexpr const char* data_3_path = "data/data_3.txt";
-
     Lexer lexer;
+};
 
-    void SetUp(const ::benchmark::State& state) 
-    {
+BENCHMARK_F(lexer_fixture, data_1_test)(benchmark::State& st)
+{
+    for (auto _ : st) {
+        std::unique_ptr<FILE, void (*)(FILE*)> file(fopen(data_1_path, "r"), 
+                                                    [](FILE* file) {fclose(file);});
+        int c = 0;
+        while ((c = fgetc(file.get())) != EOF) {
+            lexer.process(c);
+        }
+        benchmark::DoNotOptimize(lexer.next_token()); 
+    }
+}
+
+BENCHMARK_F(lexer_fixture, data_2_test)(benchmark::State& st)
+{
+    for (auto _ : st) {
+        std::unique_ptr<FILE, void (*)(FILE*)> file(fopen(data_2_path, "r"), 
+                                                    [](FILE* file) {fclose(file);});
+        int c = 0;
+        while ((c = fgetc(file.get())) != EOF) {
+            lexer.process(c);
+        }
+        benchmark::DoNotOptimize(lexer.next_token()); 
     }
+}
 
-    void TearDown(const ::benchmark::State& state)
-    {
+BENCHMARK_F(lexer_fixture, data_3_test)(benchmark::State& st)
+{
+    for (auto _ : st) {
+        std::unique_ptr<FILE, void (*)(FILE*)> file(fopen(data_3_path, "r"), 
+                                                    [](FILE* file) {fclose(file);});
+        int c = 0;
+        while ((c = fgetc(file.get())) != EOF) {
+            lexer.process(c);
+        }
+        benchmark::DoNotOptimize(lexer.next_token()); 
     }
-};
+}
 
-BENCHMARK_F(lexer_fixture, data_1_test)(benchmark::State& st)
+BENCHMARK_F(lexer_fixture, data_4_test)(benchmark::State& st)
 {
-    std::unique_ptr<FILE, void (*)(FILE*)> file(fopen(data_1_path, "r"), 
-                                                [](FILE* file) {fclose(file);});
-    int c = 0;
     for (auto _ : st) {
-        while ((c = getc(file.get())) != EOF) {
+        std::unique_ptr<FILE, void (*)(FILE*)> file(fopen(data_4_path, "r"), 
+                                                    [](FILE* file) {fclose(file);});
+        int c = 0;
+        while ((c = fgetc(file.get())) != EOF) {
             lexer.process(c);
         }
+        benchmark::DoNotOptimize(lexer.next_token()); 
     }
 }
 
diff --git a/benchmark/core/lex/lexer_legacy_benchmark.cpp b/benchmark/core/lex/lexer_legacy_benchmark.cpp
new file mode 100644
index 0000000..34e9848
--- /dev/null
+++ b/benchmark/core/lex/lexer_legacy_benchmark.cpp
@@ -0,0 +1,59 @@
+#include <core/lex/legacy/lexer.hpp>
+#include "lexer_base_fixture.hpp"
+
+namespace docgen {
+namespace core {
+namespace lex {
+namespace legacy {
+
+struct lexer_legacy_fixture : lexer_base_fixture
+{};
+
+BENCHMARK_F(lexer_legacy_fixture, data_1_test)(benchmark::State& st)
+{
+    for (auto _ : st) {
+        FILE* file = fopen(data_1_path, "r");
+        Lexer lexer(file);
+        lexer.process();
+        benchmark::DoNotOptimize(lexer.get_tokens()[0]); 
+        fclose(file);
+    }
+}
+
+BENCHMARK_F(lexer_legacy_fixture, data_2_test)(benchmark::State& st)
+{
+    for (auto _ : st) {
+        FILE* file = fopen(data_2_path, "r");
+        Lexer lexer(file);
+        lexer.process();
+        benchmark::DoNotOptimize(lexer.get_tokens()[0]); 
+        fclose(file);
+    }
+}
+
+BENCHMARK_F(lexer_legacy_fixture, data_3_test)(benchmark::State& st)
+{
+    for (auto _ : st) {
+        FILE* file = fopen(data_3_path, "r");
+        Lexer lexer(file);
+        lexer.process();
+        benchmark::DoNotOptimize(lexer.get_tokens()[0]); 
+        fclose(file);
+    }
+}
+
+BENCHMARK_F(lexer_legacy_fixture, data_4_test)(benchmark::State& st)
+{
+    for (auto _ : st) {
+        FILE* file = fopen(data_4_path, "r");
+        Lexer lexer(file);
+        lexer.process();
+        benchmark::DoNotOptimize(lexer.get_tokens()[0]); 
+        fclose(file);
+    }
+}
+
+} // namespace legacy
+} // namespace lex
+} // namespace core
+} // namespace docgen
diff --git a/src/core/lex/legacy/lexer.hpp b/src/core/lex/legacy/lexer.hpp
index 56f647e..ec5537e 100644
--- a/src/core/lex/legacy/lexer.hpp
+++ b/src/core/lex/legacy/lexer.hpp
@@ -6,9 +6,9 @@ namespace core {
 
 struct Lexer
 {
-    using symbol_t = lexer_details::symbol_t;
-    using file_reader = lexer_details::file_reader;
-    using status_t = lexer_details::status_t;
+    using symbol_t = lex::legacy::symbol_t;
+    using file_reader = lex::legacy::file_reader;
+    using status_t = lex::legacy::status_t;
 
     Lexer(FILE* file)
         : reader_(file)
@@ -18,7 +18,7 @@ struct Lexer
 
     void process()
     {
-        lexer_details::process(reader_, status_);
+        lex::legacy::process(reader_, status_);
     }
     
     const status_t::token_arr_t& get_tokens() const

From 75a459e011642bb03a45898ebff5194d703d4af2 Mon Sep 17 00:00:00 2001
From: James Yang <jamesyang916@gmail.com>
Date: Sun, 19 Jan 2020 15:09:22 -0500
Subject: [PATCH 23/23] Add cmake command line args passable to configure

---
 configure.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/configure.sh b/configure.sh
index bfe444f..132c372 100755
--- a/configure.sh
+++ b/configure.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
 
+mode=$1 # debug/release mode
+shift   # shift command-line arguments
+        # the rest are cmake command-line arguments
+
 # relative directory where current shell script resides from where shell script was called
 PROJECTDIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
 echo "Project directory: $PROJECTDIR"
@@ -54,8 +58,8 @@ cd libs/benchmark
 mkdir -p build && cd build
 cmake_flags="-DCMAKE_INSTALL_PREFIX=$PROJECTDIR/libs/benchmark/build"
 if [ $(command -v ninja) != "" ]; then
-    cmake ../ -GNinja $cmake_flags
+    cmake ../ -GNinja $cmake_flags "$@"
 else
-    cmake ../ $cmake_flags
+    cmake ../ $cmake_flags "$@"
 fi
 cmake --build . --target install -- -j12