1
- use std:: fmt;
2
- use std:: sync:: atomic:: { AtomicU32 , Ordering } ;
3
-
1
+ use itertools:: Itertools ;
4
2
use tracing:: instrument;
5
3
6
- use super :: { Byte , Nfa , Ref , nfa} ;
7
- use crate :: Map ;
4
+ use super :: automaton:: { Automaton , State , Transition } ;
5
+ use super :: { Byte , Nfa , Ref } ;
6
+ use crate :: { Map , Set } ;
8
7
9
8
#[ derive( PartialEq , Clone , Debug ) ]
10
- pub ( crate ) struct Dfa < R >
11
- where
12
- R : Ref ,
13
- {
14
- pub ( crate ) transitions : Map < State , Transitions < R > > ,
15
- pub ( crate ) start : State ,
16
- pub ( crate ) accepting : State ,
17
- }
18
-
19
- #[ derive( PartialEq , Clone , Debug ) ]
20
- pub ( crate ) struct Transitions < R >
21
- where
22
- R : Ref ,
23
- {
24
- byte_transitions : Map < Byte , State > ,
25
- ref_transitions : Map < R , State > ,
26
- }
27
-
28
- impl < R > Default for Transitions < R >
29
- where
30
- R : Ref ,
31
- {
32
- fn default ( ) -> Self {
33
- Self { byte_transitions : Map :: default ( ) , ref_transitions : Map :: default ( ) }
34
- }
35
- }
36
-
37
- impl < R > Transitions < R >
38
- where
39
- R : Ref ,
40
- {
41
- #[ cfg( test) ]
42
- fn insert ( & mut self , transition : Transition < R > , state : State ) {
43
- match transition {
44
- Transition :: Byte ( b) => {
45
- self . byte_transitions . insert ( b, state) ;
46
- }
47
- Transition :: Ref ( r) => {
48
- self . ref_transitions . insert ( r, state) ;
49
- }
50
- }
51
- }
52
- }
53
-
54
- /// The states in a `Nfa` represent byte offsets.
55
- #[ derive( Hash , Eq , PartialEq , PartialOrd , Ord , Copy , Clone ) ]
56
- pub ( crate ) struct State ( u32 ) ;
57
-
58
- #[ cfg( test) ]
59
- #[ derive( Hash , Eq , PartialEq , Clone , Copy ) ]
60
- pub ( crate ) enum Transition < R >
61
- where
62
- R : Ref ,
63
- {
64
- Byte ( Byte ) ,
65
- Ref ( R ) ,
66
- }
67
-
68
- impl fmt:: Debug for State {
69
- fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
70
- write ! ( f, "S_{}" , self . 0 )
71
- }
72
- }
73
-
74
- #[ cfg( test) ]
75
- impl < R > fmt:: Debug for Transition < R >
76
- where
77
- R : Ref ,
78
- {
79
- fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
80
- match & self {
81
- Self :: Byte ( b) => b. fmt ( f) ,
82
- Self :: Ref ( r) => r. fmt ( f) ,
83
- }
84
- }
85
- }
9
+ pub ( crate ) struct Dfa < R : Ref > (
10
+ // INVARIANT: `Automaton` is a DFA, which means that, for any `state`, each
11
+ // transition in `self.0.transitions[state]` contains exactly one
12
+ // destination state.
13
+ pub ( crate ) Automaton < R > ,
14
+ ) ;
86
15
87
16
impl < R > Dfa < R >
88
17
where
89
18
R : Ref ,
90
19
{
91
20
#[ cfg( test) ]
92
21
pub ( crate ) fn bool ( ) -> Self {
93
- let mut transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
22
+ let mut transitions: Map < State , Map < Transition < R > , Set < State > > > = Map :: default ( ) ;
94
23
let start = State :: new ( ) ;
95
- let accepting = State :: new ( ) ;
24
+ let accept = State :: new ( ) ;
96
25
97
- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x00 ) ) , accepting) ;
26
+ transitions
27
+ . entry ( start)
28
+ . or_default ( )
29
+ . insert ( Transition :: Byte ( Byte :: Init ( 0x00 ) ) , [ accept] . into_iter ( ) . collect ( ) ) ;
98
30
99
- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x01 ) ) , accepting) ;
31
+ transitions
32
+ . entry ( start)
33
+ . or_default ( )
34
+ . insert ( Transition :: Byte ( Byte :: Init ( 0x01 ) ) , [ accept] . into_iter ( ) . collect ( ) ) ;
100
35
101
- Self { transitions, start, accepting }
36
+ Dfa ( Automaton { transitions, start, accept } )
102
37
}
103
38
104
39
#[ instrument( level = "debug" ) ]
105
40
pub ( crate ) fn from_nfa ( nfa : Nfa < R > ) -> Self {
106
- let Nfa { transitions : nfa_transitions, start : nfa_start, accepting : nfa_accepting } = nfa;
41
+ // It might already be the case that `nfa` is a DFA. If that's the case,
42
+ // we can avoid reconstructing the DFA.
43
+ let is_dfa = nfa
44
+ . 0
45
+ . transitions
46
+ . iter ( )
47
+ . flat_map ( |( _, transitions) | transitions. iter ( ) )
48
+ . all ( |( _, dsts) | dsts. len ( ) <= 1 ) ;
49
+ if is_dfa {
50
+ return Dfa ( nfa. 0 ) ;
51
+ }
52
+
53
+ let Nfa ( Automaton { transitions : nfa_transitions, start : nfa_start, accept : nfa_accept } ) =
54
+ nfa;
107
55
108
- let mut dfa_transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
109
- let mut nfa_to_dfa: Map < nfa :: State , State > = Map :: default ( ) ;
56
+ let mut dfa_transitions: Map < State , Map < Transition < R > , Set < State > > > = Map :: default ( ) ;
57
+ let mut nfa_to_dfa: Map < State , State > = Map :: default ( ) ;
110
58
let dfa_start = State :: new ( ) ;
111
59
nfa_to_dfa. insert ( nfa_start, dfa_start) ;
112
60
113
61
let mut queue = vec ! [ ( nfa_start, dfa_start) ] ;
114
62
115
63
while let Some ( ( nfa_state, dfa_state) ) = queue. pop ( ) {
116
- if nfa_state == nfa_accepting {
64
+ if nfa_state == nfa_accept {
117
65
continue ;
118
66
}
119
67
120
68
for ( nfa_transition, next_nfa_states) in nfa_transitions[ & nfa_state] . iter ( ) {
69
+ use itertools:: Itertools as _;
70
+
121
71
let dfa_transitions =
122
72
dfa_transitions. entry ( dfa_state) . or_insert_with ( Default :: default) ;
123
73
124
74
let mapped_state = next_nfa_states. iter ( ) . find_map ( |x| nfa_to_dfa. get ( x) . copied ( ) ) ;
125
75
126
- let next_dfa_state = match nfa_transition {
127
- & nfa:: Transition :: Byte ( b) => * dfa_transitions
128
- . byte_transitions
129
- . entry ( b)
130
- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
131
- & nfa:: Transition :: Ref ( r) => * dfa_transitions
132
- . ref_transitions
133
- . entry ( r)
134
- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
135
- } ;
76
+ let next_dfa_state = dfa_transitions. entry ( * nfa_transition) . or_insert_with ( || {
77
+ [ mapped_state. unwrap_or_else ( State :: new) ] . into_iter ( ) . collect ( )
78
+ } ) ;
79
+ let next_dfa_state = * next_dfa_state. iter ( ) . exactly_one ( ) . unwrap ( ) ;
136
80
137
81
for & next_nfa_state in next_nfa_states {
138
82
nfa_to_dfa. entry ( next_nfa_state) . or_insert_with ( || {
@@ -143,40 +87,38 @@ where
143
87
}
144
88
}
145
89
146
- let dfa_accepting = nfa_to_dfa[ & nfa_accepting] ;
147
-
148
- Self { transitions : dfa_transitions, start : dfa_start, accepting : dfa_accepting }
149
- }
150
-
151
- pub ( crate ) fn bytes_from ( & self , start : State ) -> Option < & Map < Byte , State > > {
152
- Some ( & self . transitions . get ( & start) ?. byte_transitions )
90
+ let dfa_accept = nfa_to_dfa[ & nfa_accept] ;
91
+ Dfa ( Automaton { transitions : dfa_transitions, start : dfa_start, accept : dfa_accept } )
153
92
}
154
93
155
94
pub ( crate ) fn byte_from ( & self , start : State , byte : Byte ) -> Option < State > {
156
- self . transitions . get ( & start) ?. byte_transitions . get ( & byte) . copied ( )
157
- }
158
-
159
- pub ( crate ) fn refs_from ( & self , start : State ) -> Option < & Map < R , State > > {
160
- Some ( & self . transitions . get ( & start) ?. ref_transitions )
95
+ Some (
96
+ self . 0
97
+ . transitions
98
+ . get ( & start) ?
99
+ . get ( & Transition :: Byte ( byte) ) ?
100
+ . iter ( )
101
+ . copied ( )
102
+ . exactly_one ( )
103
+ . unwrap ( ) ,
104
+ )
161
105
}
162
- }
163
106
164
- impl State {
165
- pub ( crate ) fn new ( ) -> Self {
166
- static COUNTER : AtomicU32 = AtomicU32 :: new ( 0 ) ;
167
- Self ( COUNTER . fetch_add ( 1 , Ordering :: SeqCst ) )
107
+ pub ( crate ) fn iter_bytes_from ( & self , start : State ) -> impl Iterator < Item = ( Byte , State ) > {
108
+ self . 0 . transitions . get ( & start) . into_iter ( ) . flat_map ( |transitions| {
109
+ transitions. iter ( ) . filter_map ( |( t, s) | {
110
+ let s = s. iter ( ) . copied ( ) . exactly_one ( ) . unwrap ( ) ;
111
+ if let Transition :: Byte ( b) = t { Some ( ( * b, s) ) } else { None }
112
+ } )
113
+ } )
168
114
}
169
- }
170
115
171
- #[ cfg( test) ]
172
- impl < R > From < nfa:: Transition < R > > for Transition < R >
173
- where
174
- R : Ref ,
175
- {
176
- fn from ( nfa_transition : nfa:: Transition < R > ) -> Self {
177
- match nfa_transition {
178
- nfa:: Transition :: Byte ( byte) => Transition :: Byte ( byte) ,
179
- nfa:: Transition :: Ref ( r) => Transition :: Ref ( r) ,
180
- }
116
+ pub ( crate ) fn iter_refs_from ( & self , start : State ) -> impl Iterator < Item = ( R , State ) > {
117
+ self . 0 . transitions . get ( & start) . into_iter ( ) . flat_map ( |transitions| {
118
+ transitions. iter ( ) . filter_map ( |( t, s) | {
119
+ let s = s. iter ( ) . copied ( ) . exactly_one ( ) . unwrap ( ) ;
120
+ if let Transition :: Ref ( r) = t { Some ( ( * r, s) ) } else { None }
121
+ } )
122
+ } )
181
123
}
182
124
}
0 commit comments