Skip to content

Commit 6bb617b

Browse files
committed
more flexible tree structure and listener can have any lifetime now, more type safety
1 parent 1188be7 commit 6bb617b

38 files changed

+1490
-1768
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
.idea
22
.vscode
33
/target
4+
/tests/gen/*.tokens
5+
/tests/gen/*.interp
46
**/*.rs.bk
57
Cargo.lock

README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,18 @@ For now development is going on in this repository
1717
but eventually it will be merged to main ANTLR4 repo
1818

1919
Currently requires nightly version of rust.
20-
This likely will be the case until `specialization`,`try_blocks` and `unsize` features are stabilized.
20+
This likely will be the case until `unsize` or some kind of `CoerceUnsized` is stabilized.
21+
There are other unstable features in use but only `CoerceUnsized` is essential.
2122

2223
Remaining things before merge:
2324
- API stabilization
2425
- [ ] Rust api guidelines compliance
2526
- [ ] more tests for API because it is quite different from Java
26-
- more generic `PredictionContext`
2727
- generate enum for labeled alternatives without redundant `Error` option
2828
- option to generate fields instead of getters by default
2929
- move useful exports to lib.rs for better documentation
3030
- reexport statics crate and move to once_cell
31+
- support byte level parser
3132

3233
Can be done after merge:
3334
- profiling and performance optimizations

grammars/CSV.g4

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
grammar CSV;
22

33
@tokenfactory{
4-
pub type LocalTokenFactory<'input> = antlr_rust::common_token_factory::ArenaCommonFactory<'input>;
4+
pub type LocalTokenFactory<'input> = antlr_rust::token_factory::ArenaCommonFactory<'input>;
55
}
66

77
csvFile: hdr row+ ;

src/atn.rs

+7-6
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ use crate::dfa::ScopeExt;
88
use crate::interval_set::IntervalSet;
99
use crate::lexer_action::LexerAction;
1010
use crate::ll1_analyzer::LL1Analyzer;
11-
use crate::parser_rule_context::{ParserRuleContext, ParserRuleContextType};
12-
use crate::rule_context::RuleContext;
11+
use crate::parser::ParserNodeType;
12+
use crate::parser_rule_context::ParserRuleContext;
13+
use crate::rule_context::{EmptyContextType, RuleContext};
1314
use crate::token::{TOKEN_EOF, TOKEN_EPSILON};
1415
use crate::token_factory::{CommonTokenFactory, TokenFactory};
1516
use crate::transition::RuleTransition;
@@ -59,7 +60,7 @@ impl ATN {
5960
///rule.
6061
pub fn next_tokens<'a>(&self, s: &'a dyn ATNState) -> &'a IntervalSet {
6162
s.get_next_tokens_within_rule().get_or_init(|| {
62-
self.next_tokens_in_ctx(s, None::<&dyn ParserRuleContext<TF=CommonTokenFactory>>)
63+
self.next_tokens_in_ctx::<EmptyContextType<CommonTokenFactory>>(s, None)
6364
.modify_with(|r| {
6465
r.read_only = true
6566
}
@@ -71,9 +72,9 @@ impl ATN {
7172
/// If `ctx` is null, the set of tokens will not include what can follow
7273
/// the rule surrounding `s`. In other words, the set will be
7374
/// restricted to tokens reachable staying within `s`'s rule.
74-
pub fn next_tokens_in_ctx<'a, TF: TokenFactory<'a> + 'a>(&self, s: &dyn ATNState, _ctx: Option<&(dyn ParserRuleContext<'a, TF=TF> + 'a)>) -> IntervalSet {
75+
pub fn next_tokens_in_ctx<'a, Ctx: ParserNodeType<'a>>(&self, s: &dyn ATNState, _ctx: Option<&Ctx::Type>) -> IntervalSet {
7576
let analyzer = LL1Analyzer::new(self);
76-
analyzer.look(s, None, _ctx)
77+
analyzer.look::<'a, Ctx>(s, None, _ctx)
7778
}
7879

7980
pub(crate) fn add_state(&mut self, state: Box<dyn ATNState>) {
@@ -125,7 +126,7 @@ impl ATN {
125126
/// specified state in the specified context.
126127
/// @throws IllegalArgumentException if the ATN does not contain a state with
127128
/// number {@code stateNumber}
128-
pub fn get_expected_tokens<'a, TF: TokenFactory<'a>>(&self, state_number: isize, _ctx: &ParserRuleContextType<'a, TF>) -> IntervalSet {
129+
pub fn get_expected_tokens<'a, Ctx: ParserNodeType<'a>>(&self, state_number: isize, _ctx: &Rc<Ctx::Type>) -> IntervalSet {
129130
let s = self.states[state_number as usize].as_ref();
130131
let mut following = self.next_tokens(s);
131132
if !following.contains(TOKEN_EPSILON) {

src/atn_state.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ pub trait ATNState: Sync + Send + Debug {
8080
fn get_state_type_id(&self) -> isize;
8181

8282
fn get_state_number(&self) -> usize;
83-
fn set_state_number(&self, stateNumber: isize);
83+
fn set_state_number(&self, state_number: isize);
8484

8585
fn get_transitions(&self) -> &Vec<Box<dyn Transition>>;
8686
fn set_transitions(&self, t: Vec<Box<dyn Transition>>);
@@ -156,7 +156,7 @@ impl ATNState for BaseATNState {
156156
self.state_number
157157
}
158158

159-
fn set_state_number(&self, _stateNumber: isize) {
159+
fn set_state_number(&self, _state_number: isize) {
160160
unimplemented!()
161161
}
162162

src/error_listener.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::recognizer::Recognizer;
1313
use crate::token::Token;
1414

1515
pub trait ErrorListener<T> {
16-
fn syntax_error(&self, /*todo _recognizer: (Option<&dyn Lexer<'_>>,Option<&dyn Parser<'_>>),*/ _offending_symbol: Option<&dyn Token>,
16+
fn syntax_error(&self, _recognizer: &T, _offending_symbol: Option<&dyn Token>,
1717
_line: isize, _column: isize, _msg: &str, _e: Option<&ANTLRError>, ) {}
1818

1919
fn report_ambiguity(&self, _recognizer: &T, _dfa: &DFA, _start_index: isize, _stop_index: isize,
@@ -30,7 +30,7 @@ pub trait ErrorListener<T> {
3030
pub struct ConsoleErrorListener {}
3131

3232
impl<T> ErrorListener<T> for ConsoleErrorListener {
33-
fn syntax_error(&self, _offending_symbol: Option<&dyn Token>,
33+
fn syntax_error(&self, _recognizer: &T, _offending_symbol: Option<&dyn Token>,
3434
line: isize, column: isize, msg: &str, _e: Option<&ANTLRError>) {
3535
eprintln!("line {}:{} {}", line, column, msg);
3636
}
@@ -41,9 +41,9 @@ pub(crate) struct ProxyErrorListener<'a, T> {
4141
}
4242

4343
impl<'a, T> ErrorListener<T> for ProxyErrorListener<'a, T> {
44-
fn syntax_error(&self, offending_symbol: Option<&dyn Token>, line: isize, column: isize, msg: &str, e: Option<&ANTLRError>) {
44+
fn syntax_error(&self, _recognizer: &T, offending_symbol: Option<&dyn Token>, line: isize, column: isize, msg: &str, e: Option<&ANTLRError>) {
4545
for listener in self.delegates.deref() {
46-
listener.syntax_error(offending_symbol, line, column, msg, e)
46+
listener.syntax_error(_recognizer, offending_symbol, line, column, msg, e)
4747
}
4848
}
4949

src/error_strategy.rs

+26-24
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@ use crate::char_stream::CharStream;
1111
use crate::dfa::ScopeExt;
1212
use crate::errors::{ANTLRError, FailedPredicateError, InputMisMatchError, NoViableAltError, RecognitionError};
1313
use crate::interval_set::IntervalSet;
14-
use crate::parser::Parser;
15-
use crate::parser_rule_context::{ParserRuleContext, ParserRuleContextType};
14+
use crate::parser::{Parser, ParserNodeType};
15+
use crate::parser_rule_context::ParserRuleContext;
16+
use crate::rule_context::{CustomRuleContext, RuleContext};
1617
use crate::token::{OwningToken, Token, TOKEN_DEFAULT_CHANNEL, TOKEN_EOF, TOKEN_EPSILON, TOKEN_INVALID_TYPE};
1718
use crate::token_factory::TokenFactory;
1819
use crate::transition::RuleTransition;
20+
use crate::tree::Tree;
1921
use crate::utils::escape_whitespaces;
2022

2123
/// The interface for defining strategies to deal with syntax errors encountered
@@ -41,16 +43,16 @@ pub trait ErrorStrategy<'a, T: Parser<'a>> {
4143
fn report_match(&mut self, recognizer: &mut T);
4244
}
4345

44-
pub struct DefaultErrorStrategy<'input, TF: TokenFactory<'input> + 'input> {
46+
pub struct DefaultErrorStrategy<'input, Ctx: ParserNodeType<'input>> {
4547
error_recovery_mode: bool,
4648
last_error_index: isize,
4749
last_error_states: Option<IntervalSet>,
4850
next_tokens_state: isize,
49-
next_tokens_ctx: Option<ParserRuleContextType<'input, TF>>,
51+
next_tokens_ctx: Option<Rc<Ctx::Type>>,
5052
}
5153

5254

53-
impl<'input, TF: TokenFactory<'input> + 'input> DefaultErrorStrategy<'input, TF> {
55+
impl<'input, Ctx: ParserNodeType<'input>> DefaultErrorStrategy<'input, Ctx> {
5456
pub fn new() -> Self {
5557
Self {
5658
error_recovery_mode: false,
@@ -61,17 +63,17 @@ impl<'input, TF: TokenFactory<'input> + 'input> DefaultErrorStrategy<'input, TF>
6163
}
6264
}
6365

64-
fn begin_error_condition<T: Parser<'input, TF=TF>>(&mut self, _recognizer: &T) {
66+
fn begin_error_condition<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&mut self, _recognizer: &T) {
6567
self.error_recovery_mode = true;
6668
}
6769

68-
fn end_error_condition<T: Parser<'input, TF=TF>>(&mut self, _recognizer: &T) {
70+
fn end_error_condition<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&mut self, _recognizer: &T) {
6971
self.error_recovery_mode = false;
7072
self.last_error_index = -1;
7173
self.last_error_states = None;
7274
}
7375

74-
fn report_no_viable_alternative<T: Parser<'input, TF=TF>>(&self, recognizer: &mut T, e: &NoViableAltError) -> String {
76+
fn report_no_viable_alternative<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&self, recognizer: &mut T, e: &NoViableAltError) -> String {
7577
let input = if e.start_token.token_type == TOKEN_EOF {
7678
"<EOF>".to_owned()
7779
} else {
@@ -81,19 +83,19 @@ impl<'input, TF: TokenFactory<'input> + 'input> DefaultErrorStrategy<'input, TF>
8183
format!("no viable alternative at input '{}'", input)
8284
}
8385

84-
fn report_input_mismatch<T: Parser<'input, TF=TF>>(&self, recognizer: &T, e: &InputMisMatchError) -> String {
86+
fn report_input_mismatch<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&self, recognizer: &T, e: &InputMisMatchError) -> String {
8587
format!("mismatched input {} expecting {}",
8688
self.get_token_error_display(&e.base.offending_token),
8789
e.base.get_expected_tokens(recognizer).to_token_string(recognizer.get_vocabulary()))
8890
}
8991

90-
fn report_failed_predicate<T: Parser<'input, TF=TF>>(&self, recognizer: &T, e: &FailedPredicateError) -> String {
92+
fn report_failed_predicate<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&self, recognizer: &T, e: &FailedPredicateError) -> String {
9193
format!("rule {} {}",
9294
recognizer.get_rule_names()[recognizer.get_parser_rule_context().get_rule_index()],
9395
e.base.message)
9496
}
9597

96-
fn report_unwanted_token<T: Parser<'input, TF=TF>>(&mut self, recognizer: &mut T) {
98+
fn report_unwanted_token<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&mut self, recognizer: &mut T) {
9799
if self.in_error_recovery_mode(recognizer) {
98100
return;
99101
}
@@ -108,7 +110,7 @@ impl<'input, TF: TokenFactory<'input> + 'input> DefaultErrorStrategy<'input, TF>
108110
recognizer.notify_error_listeners(msg, Some(t), None);
109111
}
110112

111-
fn report_missing_token<T: Parser<'input, TF=TF>>(&mut self, recognizer: &mut T) {
113+
fn report_missing_token<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&mut self, recognizer: &mut T) {
112114
if self.in_error_recovery_mode(recognizer) {
113115
return;
114116
}
@@ -126,13 +128,13 @@ impl<'input, TF: TokenFactory<'input> + 'input> DefaultErrorStrategy<'input, TF>
126128
recognizer.notify_error_listeners(msg, Some(t), None);
127129
}
128130

129-
fn single_token_insertion<T: Parser<'input, TF=TF>>(&mut self, recognizer: &mut T) -> bool {
131+
fn single_token_insertion<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&mut self, recognizer: &mut T) -> bool {
130132
let current_token = recognizer.get_input_stream_mut().la(1);
131133

132134
let atn = recognizer.get_interpreter().atn();
133135
let current_state = atn.states[recognizer.get_state() as usize].as_ref();
134136
let next = current_state.get_transitions().first().unwrap().get_target();
135-
let expect_at_ll2 = atn.next_tokens_in_ctx(
137+
let expect_at_ll2 = atn.next_tokens_in_ctx::<'input, Ctx>(
136138
atn.states[next].as_ref(),
137139
Some(recognizer.get_parser_rule_context().deref()),
138140
);
@@ -143,7 +145,7 @@ impl<'input, TF: TokenFactory<'input> + 'input> DefaultErrorStrategy<'input, TF>
143145
false
144146
}
145147

146-
fn single_token_deletion<'a, T: Parser<'input, TF=TF>>(&mut self, recognizer: &'a mut T) -> Option<&'a (TF::Tok)> {
148+
fn single_token_deletion<'a, T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&mut self, recognizer: &'a mut T) -> Option<&'a <T::TF as TokenFactory<'input>>::Tok> {
147149
let next_token_type = recognizer.get_input_stream_mut().la(2);
148150
let expecting = self.get_expected_tokens(recognizer);
149151
// println!("expecting {}", expecting.to_token_string(recognizer.get_vocabulary()));
@@ -157,7 +159,7 @@ impl<'input, TF: TokenFactory<'input> + 'input> DefaultErrorStrategy<'input, TF>
157159
None
158160
}
159161

160-
fn get_missing_symbol<T: Parser<'input, TF=TF>>(&self, recognizer: &mut T) -> TF::Tok {
162+
fn get_missing_symbol<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&self, recognizer: &mut T) -> <T::TF as TokenFactory<'input>>::Tok {
161163
let expected = self.get_expected_tokens(recognizer);
162164
let expected_token_type = expected.get_min().unwrap_or(TOKEN_INVALID_TYPE);
163165
let token_text = if expected_token_type == TOKEN_EOF {
@@ -189,7 +191,7 @@ impl<'input, TF: TokenFactory<'input> + 'input> DefaultErrorStrategy<'input, TF>
189191
// .modify_with(|it| it.text = token_text)
190192
}
191193

192-
fn get_expected_tokens<T: Parser<'input, TF=TF>>(&self, recognizer: &T) -> IntervalSet {
194+
fn get_expected_tokens<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&self, recognizer: &T) -> IntervalSet {
193195
recognizer.get_expected_tokens()
194196
}
195197

@@ -202,7 +204,7 @@ impl<'input, TF: TokenFactory<'input> + 'input> DefaultErrorStrategy<'input, TF>
202204
format!("'{}'", escape_whitespaces(s, false))
203205
}
204206

205-
fn get_error_recovery_set<T: Parser<'input, TF=TF>>(&self, recognizer: &T) -> IntervalSet {
207+
fn get_error_recovery_set<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&self, recognizer: &T) -> IntervalSet {
206208
let atn = recognizer.get_interpreter().atn();
207209
let mut ctx = Some(recognizer.get_parser_rule_context().clone());
208210
let mut recover_set = IntervalSet::new();
@@ -220,7 +222,7 @@ impl<'input, TF: TokenFactory<'input> + 'input> DefaultErrorStrategy<'input, TF>
220222
return recover_set;
221223
}
222224

223-
fn consume_until<T: Parser<'input, TF=TF>>(&mut self, recognizer: &mut T, set: &IntervalSet) {
225+
fn consume_until<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&mut self, recognizer: &mut T, set: &IntervalSet) {
224226
let mut ttype = recognizer.get_input_stream_mut().la(1);
225227
while ttype != TOKEN_EOF && !set.contains(ttype) {
226228
recognizer.consume(self);
@@ -229,7 +231,7 @@ impl<'input, TF: TokenFactory<'input> + 'input> DefaultErrorStrategy<'input, TF>
229231
}
230232
}
231233

232-
impl<'a, T: Parser<'a>> ErrorStrategy<'a, T> for DefaultErrorStrategy<'a, T::TF> {
234+
impl<'a, T: Parser<'a>> ErrorStrategy<'a, T> for DefaultErrorStrategy<'a, T::Node> {
233235
fn reset(&mut self, _recognizer: &mut T) {
234236
unimplemented!()
235237
}
@@ -367,12 +369,12 @@ myparser.err_handler = BailErrorStrategy::new();
367369
368370
[`ParserRuleContext.exception`]: todo
369371
*/
370-
pub struct BailErrorStrategy<'input, TF: TokenFactory<'input>>(DefaultErrorStrategy<'input, TF>);
372+
pub struct BailErrorStrategy<'input, Ctx: ParserNodeType<'input>>(DefaultErrorStrategy<'input, Ctx>);
371373

372-
impl<'input, TF: TokenFactory<'input>> BailErrorStrategy<'input, TF> {
374+
impl<'input, Ctx: ParserNodeType<'input>> BailErrorStrategy<'input, Ctx> {
373375
pub fn new() -> Self { Self(DefaultErrorStrategy::new()) }
374376

375-
fn process_error<T: Parser<'input, TF=TF>>(&self, recognizer: &mut T, e: &ANTLRError) -> ANTLRError {
377+
fn process_error<T: Parser<'input, Node=Ctx, TF=Ctx::TF>>(&self, recognizer: &mut T, e: &ANTLRError) -> ANTLRError {
376378
let mut ctx = recognizer.get_parser_rule_context().clone();
377379
let _: Option<()> = try {
378380
loop {
@@ -401,7 +403,7 @@ impl Display for ParseCancelledError {
401403
}
402404
}
403405

404-
impl<'a, T: Parser<'a>> ErrorStrategy<'a, T> for BailErrorStrategy<'a, T::TF> {
406+
impl<'a, T: Parser<'a>> ErrorStrategy<'a, T> for BailErrorStrategy<'a, T::Node> {
405407
fn reset(&mut self, recognizer: &mut T) { self.0.reset(recognizer) }
406408

407409
fn recover_inline(&mut self, recognizer: &mut T) -> Result<<T::TF as TokenFactory<'a>>::Tok, ANTLRError> {

src/errors.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ use std::rc::Rc;
88

99
use crate::atn_simulator::IATNSimulator;
1010
use crate::interval_set::IntervalSet;
11-
use crate::parser::Parser;
12-
use crate::parser_rule_context::{ParserRuleContext, ParserRuleContextType};
11+
use crate::parser::{Parser, ParserNodeType};
12+
use crate::parser_rule_context::ParserRuleContext;
1313
use crate::token::{OwningToken, Token};
1414
use crate::transition::PredicateTransition;
1515
use crate::transition::TransitionType::TRANSITION_PREDICATE;
@@ -19,7 +19,7 @@ use crate::transition::TransitionType::TRANSITION_PREDICATE;
1919
pub enum ANTLRError {
2020
/// Returned from Lexer when it fails to find matching token type for current input
2121
///
22-
/// Usually Lexers contain rule that captures all invalid tokens like:
22+
/// Usually Lexers contain last rule that captures all invalid tokens like:
2323
/// ```text
2424
/// ERROR_TOKEN: . ;
2525
/// ```
@@ -119,7 +119,7 @@ pub struct BaseRecognitionError {
119119
impl BaseRecognitionError {
120120
pub fn get_expected_tokens<'a, T: Parser<'a>>(&self, recognizer: &T) -> IntervalSet {
121121
recognizer.get_interpreter().atn()
122-
.get_expected_tokens(self.offending_state, recognizer.get_parser_rule_context())
122+
.get_expected_tokens::<'a, T::Node>(self.offending_state, recognizer.get_parser_rule_context())
123123
}
124124

125125
fn new<'a, T: Parser<'a>>(recog: &mut T) -> BaseRecognitionError {
@@ -187,7 +187,7 @@ impl InputMisMatchError {
187187
}
188188
}
189189

190-
pub fn with_state<'a, T: Parser<'a>>(recognizer: &mut T, offending_state: isize, ctx: ParserRuleContextType<'a, T::TF>) -> InputMisMatchError {
190+
pub fn with_state<'a, T: Parser<'a>>(recognizer: &mut T, offending_state: isize, ctx: Rc<<T::Node as ParserNodeType<'a>>::Type>) -> InputMisMatchError {
191191
let mut a = Self::new(recognizer);
192192
// a.base.ctx = ctx;
193193
a.base.offending_state = offending_state;

src/interval_set.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ pub struct Interval {
1111
pub b: isize,
1212
}
1313

14+
pub const INVALID: Interval = Interval { a: -1, b: -2 };
15+
1416
impl Interval {
1517
/* stop is not included! */
1618
fn new(a: isize, b: isize) -> Interval {
@@ -21,7 +23,6 @@ impl Interval {
2123
unimplemented!()
2224
}
2325

24-
2526
fn length(&self) -> isize {
2627
self.b - self.a
2728
}

0 commit comments

Comments
 (0)