forked from chipsalliance/verible
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflex_lexer_adapter.h
169 lines (146 loc) · 6.18 KB
/
flex_lexer_adapter.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// FlexLexerAdapter class adapts Flex-generated lexers to Lexer interface.
//
// Template parameter L must be a Flex-generated lexer (yyFlexLexer).
// The adapter inherits from this generated class to override functions.
//
// Main lexing function yylex() must be defined in a subclass.
//
// Example usage:
// in verilog_lexer.h:
// class verilogFlexLexer; // generated by flex
// class VerilogLexer : public verible::FlexLexerAdapter<verilogFlexLexer> {
// ...
// };
//
// and in verilog.lex:
// %option yyclass="verilog::VerilogLexer"
#ifndef VERIBLE_COMMON_LEXER_FLEX_LEXER_ADAPTER_H_
#define VERIBLE_COMMON_LEXER_FLEX_LEXER_ADAPTER_H_
#include <cstdlib>
#include <iostream>
#include <sstream> // IWYU pragma: keep // for ostringstream
#include <string>
#include "absl/strings/string_view.h"
#include "common/lexer/lexer.h"
#include "common/text/token_info.h"
#include "common/util/logging.h"
namespace verible {
// The "L" base class of FlexLexerAdaptor needs to use code_stream_ in its
// constructor, which means that code_stream_ must be initialized first. All
// base classes are initialized before any non-static data members, so to
// achieve that, we need to also put code_stream_ in a base class that is
// ordered before "L" in FlexLexerAdaptor's base classes.
class CodeStreamHolder {
protected:
// The stream object conforms to the FlexLexer input interface.
// Even though scanning is done on the stream's internal copy of the input
// string, the byte offsets being tracked can be used to construct
// string_views based on the original string's start address.
// Using the standard istream interface also lets us switch buffers, e.g.
// during preprocessing.
std::istringstream code_stream_;
};
// L is a (flex-generated) yyFlexLexer-like class.
template <typename L>
class FlexLexerAdapter : private CodeStreamHolder, protected L, public Lexer {
public:
explicit FlexLexerAdapter(absl::string_view code)
: L(&code_stream_),
code_(code),
// last_token_ points to the beginning of the code_ buffer
last_token_(0 /* enum doesn't matter */, code_.substr(0, 0)) {
code_stream_.str(std::string(code));
// istringstream copies text into its own internal buffer.
}
// Returns the token associated with the last UpdateLocation() call.
const TokenInfo& GetLastToken() const final { return last_token_; }
// Returns next token and updates its location.
const TokenInfo& DoNextToken() override {
if (at_eof_) {
// Do not call yylex(), because that will result in the fatal error:
// "fatal flex scanner internal error--end of buffer missed"
last_token_ = TokenInfo::EOFToken(code_);
} else {
// In normal operation, call yylex() to extract the next token.
last_token_.set_token_enum(this->yylex());
}
// yylex has already called UpdateLocation()
return last_token_;
}
protected:
// Must be called by subclasses to update location of the current token.
void UpdateLocation() { last_token_.AdvanceText(this->YYLeng()); }
// EOF needs special handling because yyleng is set to include a terminating
// \0 (NUL) character. Once EOF is encountered it is also not possible to
// yyless-rewind the window -- doing so messes up the internal state machine,
// and causes (flex) errors like:
// "fatal flex scanner internal error--end of buffer missed"
// We advance the token text without spanning the NUL character.
// This should only be needed in lexer states that need to explicitly
// handle <<EOF>>.
void UpdateLocationEOF() {
last_token_.AdvanceText(this->YYLeng() - 1);
at_eof_ = true;
}
// Restart lexer by pointing to new input stream, and reset all state.
void Restart(absl::string_view code) override {
at_eof_ = false;
code_ = code;
code_stream_.str(std::string(code_));
last_token_ = TokenInfo(0, code_.substr(0, 0));
// Reset buffer stack.
while (L::yy_buffer_stack_top > 1) { // Keep bottom buffer only.
L::yypop_buffer_state();
}
// Reset the current buffer to use new stream.
L::yyrestart(&code_stream_);
// Reset start condition stack.
while (L::yy_start_stack_ptr > 1) { // Keep INITIAL state.
L::yy_pop_state();
}
}
// Overrides yyFlexLexer's implementation to handle unrecognized chars.
void LexerOutput(const char* buf, int size) final {
VLOG(1) << "LexerOutput: rejected text: \"" << std::string(buf, size)
<< '\"';
// Update location by the size of the unrecognized sequence.
// Note, this is a last-resort guard. The preferred way
// to handle unrecognized chars is to add wildcard rule
// at the end of the lexer definition that just calls
// UpdateLocation().
last_token_.AdvanceText(size);
// TODO(fangism): Communicate some sort of error token to the consumer.
}
// Overrides yyFlexLexer's implementation to do proper error handling.
void LexerError(const char* msg) final {
std::cerr << "Fatal LexerError: " << msg;
abort();
}
private:
// A read-only view of the entire text to be scanned.
absl::string_view code_;
// Contains the enumeration and the substring slice of the last lexed token.
TokenInfo last_token_;
// Kludge: the generated FlexLexer (subclass) doesn't expose a way to
// determine whether and EOF has already been encountered:
// (yy_buffer_stack[yy_buffer_stack_top]->yy_buffer_status
// == YY_BUFFER_EOF_PENDING)
// because yy_buffer_state's implementation is private.
// Thus, we manually set this bit upon encountering <<EOF>>.
bool at_eof_ = false;
};
} // namespace verible
#endif // VERIBLE_COMMON_LEXER_FLEX_LEXER_ADAPTER_H_