1
- use std:: fmt;
2
- use std:: sync:: atomic:: { AtomicU32 , Ordering } ;
3
-
1
+ use itertools:: Itertools ;
4
2
use tracing:: instrument;
5
3
6
- use super :: { Byte , Nfa , Ref , nfa} ;
7
- use crate :: Map ;
4
+ use super :: {
5
+ Byte , Nfa , Ref ,
6
+ automaton:: { Automaton , State , Transition } ,
7
+ } ;
8
+ use crate :: { Map , Set } ;
8
9
9
10
#[ derive( PartialEq , Clone , Debug ) ]
10
- pub ( crate ) struct Dfa < R >
11
- where
12
- R : Ref ,
13
- {
14
- pub ( crate ) transitions : Map < State , Transitions < R > > ,
15
- pub ( crate ) start : State ,
16
- pub ( crate ) accepting : State ,
17
- }
18
-
19
- #[ derive( PartialEq , Clone , Debug ) ]
20
- pub ( crate ) struct Transitions < R >
21
- where
22
- R : Ref ,
23
- {
24
- byte_transitions : Map < Byte , State > ,
25
- ref_transitions : Map < R , State > ,
26
- }
27
-
28
- impl < R > Default for Transitions < R >
29
- where
30
- R : Ref ,
31
- {
32
- fn default ( ) -> Self {
33
- Self { byte_transitions : Map :: default ( ) , ref_transitions : Map :: default ( ) }
34
- }
35
- }
36
-
37
- impl < R > Transitions < R >
38
- where
39
- R : Ref ,
40
- {
41
- #[ cfg( test) ]
42
- fn insert ( & mut self , transition : Transition < R > , state : State ) {
43
- match transition {
44
- Transition :: Byte ( b) => {
45
- self . byte_transitions . insert ( b, state) ;
46
- }
47
- Transition :: Ref ( r) => {
48
- self . ref_transitions . insert ( r, state) ;
49
- }
50
- }
51
- }
52
- }
53
-
54
- /// The states in a `Nfa` represent byte offsets.
55
- #[ derive( Hash , Eq , PartialEq , PartialOrd , Ord , Copy , Clone ) ]
56
- pub ( crate ) struct State ( u32 ) ;
57
-
58
- #[ cfg( test) ]
59
- #[ derive( Hash , Eq , PartialEq , Clone , Copy ) ]
60
- pub ( crate ) enum Transition < R >
61
- where
62
- R : Ref ,
63
- {
64
- Byte ( Byte ) ,
65
- Ref ( R ) ,
66
- }
67
-
68
- impl fmt:: Debug for State {
69
- fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
70
- write ! ( f, "S_{}" , self . 0 )
71
- }
72
- }
73
-
74
- #[ cfg( test) ]
75
- impl < R > fmt:: Debug for Transition < R >
76
- where
77
- R : Ref ,
78
- {
79
- fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
80
- match & self {
81
- Self :: Byte ( b) => b. fmt ( f) ,
82
- Self :: Ref ( r) => r. fmt ( f) ,
83
- }
84
- }
85
- }
11
+ pub ( crate ) struct Dfa < R : Ref > (
12
+ // INVARIANT: `Automaton` is a DFA, which means that, for any `state`, each
13
+ // transition in `self.0.transitions[state]` contains exactly one
14
+ // destination state.
15
+ pub ( crate ) Automaton < R > ,
16
+ ) ;
86
17
87
18
impl < R > Dfa < R >
88
19
where
89
20
R : Ref ,
90
21
{
91
22
#[ cfg( test) ]
92
23
pub ( crate ) fn bool ( ) -> Self {
93
- let mut transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
24
+ let mut transitions: Map < State , Map < Transition < R > , Set < State > > > = Map :: default ( ) ;
94
25
let start = State :: new ( ) ;
95
- let accepting = State :: new ( ) ;
26
+ let accept = State :: new ( ) ;
96
27
97
- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x00 ) ) , accepting) ;
28
+ transitions
29
+ . entry ( start)
30
+ . or_default ( )
31
+ . insert ( Transition :: Byte ( Byte :: Init ( 0x00 ) ) , [ accept] . into_iter ( ) . collect ( ) ) ;
98
32
99
- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x01 ) ) , accepting) ;
33
+ transitions
34
+ . entry ( start)
35
+ . or_default ( )
36
+ . insert ( Transition :: Byte ( Byte :: Init ( 0x01 ) ) , [ accept] . into_iter ( ) . collect ( ) ) ;
100
37
101
- Self { transitions, start, accepting }
38
+ Dfa ( Automaton { transitions, start, accept } )
102
39
}
103
40
104
41
#[ instrument( level = "debug" ) ]
105
42
pub ( crate ) fn from_nfa ( nfa : Nfa < R > ) -> Self {
106
- let Nfa { transitions : nfa_transitions, start : nfa_start, accepting : nfa_accepting } = nfa;
43
+ // It might already be the case that `nfa` is a DFA. If that's the case,
44
+ // we can avoid reconstructing the DFA.
45
+ let is_dfa = nfa
46
+ . 0
47
+ . transitions
48
+ . iter ( )
49
+ . flat_map ( |( _, transitions) | transitions. iter ( ) )
50
+ . all ( |( _, dsts) | dsts. len ( ) <= 1 ) ;
51
+ if is_dfa {
52
+ return Dfa ( nfa. 0 ) ;
53
+ }
54
+
55
+ let Nfa ( Automaton { transitions : nfa_transitions, start : nfa_start, accept : nfa_accept } ) =
56
+ nfa;
107
57
108
- let mut dfa_transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
109
- let mut nfa_to_dfa: Map < nfa :: State , State > = Map :: default ( ) ;
58
+ let mut dfa_transitions: Map < State , Map < Transition < R > , Set < State > > > = Map :: default ( ) ;
59
+ let mut nfa_to_dfa: Map < State , State > = Map :: default ( ) ;
110
60
let dfa_start = State :: new ( ) ;
111
61
nfa_to_dfa. insert ( nfa_start, dfa_start) ;
112
62
113
63
let mut queue = vec ! [ ( nfa_start, dfa_start) ] ;
114
64
115
65
while let Some ( ( nfa_state, dfa_state) ) = queue. pop ( ) {
116
- if nfa_state == nfa_accepting {
66
+ if nfa_state == nfa_accept {
117
67
continue ;
118
68
}
119
69
120
70
for ( nfa_transition, next_nfa_states) in nfa_transitions[ & nfa_state] . iter ( ) {
71
+ use itertools:: Itertools as _;
72
+
121
73
let dfa_transitions =
122
74
dfa_transitions. entry ( dfa_state) . or_insert_with ( Default :: default) ;
123
75
124
76
let mapped_state = next_nfa_states. iter ( ) . find_map ( |x| nfa_to_dfa. get ( x) . copied ( ) ) ;
125
77
126
- let next_dfa_state = match nfa_transition {
127
- & nfa:: Transition :: Byte ( b) => * dfa_transitions
128
- . byte_transitions
129
- . entry ( b)
130
- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
131
- & nfa:: Transition :: Ref ( r) => * dfa_transitions
132
- . ref_transitions
133
- . entry ( r)
134
- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
135
- } ;
78
+ let next_dfa_state = dfa_transitions. entry ( * nfa_transition) . or_insert_with ( || {
79
+ [ mapped_state. unwrap_or_else ( State :: new) ] . into_iter ( ) . collect ( )
80
+ } ) ;
81
+ let next_dfa_state = * next_dfa_state. iter ( ) . exactly_one ( ) . unwrap ( ) ;
136
82
137
83
for & next_nfa_state in next_nfa_states {
138
84
nfa_to_dfa. entry ( next_nfa_state) . or_insert_with ( || {
@@ -143,40 +89,38 @@ where
143
89
}
144
90
}
145
91
146
- let dfa_accepting = nfa_to_dfa[ & nfa_accepting] ;
147
-
148
- Self { transitions : dfa_transitions, start : dfa_start, accepting : dfa_accepting }
149
- }
150
-
151
- pub ( crate ) fn bytes_from ( & self , start : State ) -> Option < & Map < Byte , State > > {
152
- Some ( & self . transitions . get ( & start) ?. byte_transitions )
92
+ let dfa_accept = nfa_to_dfa[ & nfa_accept] ;
93
+ Dfa ( Automaton { transitions : dfa_transitions, start : dfa_start, accept : dfa_accept } )
153
94
}
154
95
155
96
pub ( crate ) fn byte_from ( & self , start : State , byte : Byte ) -> Option < State > {
156
- self . transitions . get ( & start) ?. byte_transitions . get ( & byte) . copied ( )
157
- }
158
-
159
- pub ( crate ) fn refs_from ( & self , start : State ) -> Option < & Map < R , State > > {
160
- Some ( & self . transitions . get ( & start) ?. ref_transitions )
97
+ Some (
98
+ self . 0
99
+ . transitions
100
+ . get ( & start) ?
101
+ . get ( & Transition :: Byte ( byte) ) ?
102
+ . iter ( )
103
+ . copied ( )
104
+ . exactly_one ( )
105
+ . unwrap ( ) ,
106
+ )
161
107
}
162
- }
163
108
164
- impl State {
165
- pub ( crate ) fn new ( ) -> Self {
166
- static COUNTER : AtomicU32 = AtomicU32 :: new ( 0 ) ;
167
- Self ( COUNTER . fetch_add ( 1 , Ordering :: SeqCst ) )
109
+ pub ( crate ) fn iter_bytes_from ( & self , start : State ) -> impl Iterator < Item = ( Byte , State ) > {
110
+ self . 0 . transitions . get ( & start) . into_iter ( ) . flat_map ( |transitions| {
111
+ transitions. iter ( ) . filter_map ( |( t, s) | {
112
+ let s = s. iter ( ) . copied ( ) . exactly_one ( ) . unwrap ( ) ;
113
+ if let Transition :: Byte ( b) = t { Some ( ( * b, s) ) } else { None }
114
+ } )
115
+ } )
168
116
}
169
- }
170
117
171
- #[ cfg( test) ]
172
- impl < R > From < nfa:: Transition < R > > for Transition < R >
173
- where
174
- R : Ref ,
175
- {
176
- fn from ( nfa_transition : nfa:: Transition < R > ) -> Self {
177
- match nfa_transition {
178
- nfa:: Transition :: Byte ( byte) => Transition :: Byte ( byte) ,
179
- nfa:: Transition :: Ref ( r) => Transition :: Ref ( r) ,
180
- }
118
+ pub ( crate ) fn iter_refs_from ( & self , start : State ) -> impl Iterator < Item = ( R , State ) > {
119
+ self . 0 . transitions . get ( & start) . into_iter ( ) . flat_map ( |transitions| {
120
+ transitions. iter ( ) . filter_map ( |( t, s) | {
121
+ let s = s. iter ( ) . copied ( ) . exactly_one ( ) . unwrap ( ) ;
122
+ if let Transition :: Ref ( r) = t { Some ( ( * r, s) ) } else { None }
123
+ } )
124
+ } )
181
125
}
182
126
}
0 commit comments