Skip to content

Commit f0be2b1

Browse files
committed
transmutability: Short-circuit NFA->DFA
When an NFA is already a DFA, we short-circuit NFA->DFA conversion, avoiding the need to reconstruct the DFA.
1 parent 4680817 commit f0be2b1

File tree

6 files changed

+214
-280
lines changed

6 files changed

+214
-280
lines changed

compiler/rustc_transmute/Cargo.toml

+1-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ edition = "2024"
55

66
[dependencies]
77
# tidy-alphabetical-start
8+
itertools = "0.12"
89
rustc_abi = { path = "../rustc_abi", optional = true }
910
rustc_data_structures = { path = "../rustc_data_structures" }
1011
rustc_hir = { path = "../rustc_hir", optional = true }
@@ -20,8 +21,3 @@ rustc = [
2021
"dep:rustc_middle",
2122
"dep:rustc_span",
2223
]
23-
24-
[dev-dependencies]
25-
# tidy-alphabetical-start
26-
itertools = "0.12"
27-
# tidy-alphabetical-end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
use std::fmt;
2+
use std::sync::atomic::{AtomicU32, Ordering};
3+
4+
use super::{Byte, Ref};
5+
use crate::{Map, Set};
6+
7+
#[derive(PartialEq, Debug, Clone)]
8+
pub(crate) struct Automaton<R>
9+
where
10+
R: Ref,
11+
{
12+
pub(crate) transitions: Map<State, Map<Transition<R>, Set<State>>>,
13+
pub(crate) start: State,
14+
pub(crate) accept: State,
15+
}
16+
17+
/// The states in a `Nfa` represent byte offsets.
18+
#[derive(Hash, Eq, PartialEq, PartialOrd, Ord, Copy, Clone)]
19+
pub(crate) struct State(u32);
20+
21+
/// The transitions between states in a `Nfa` reflect bit validity.
22+
#[derive(Hash, Eq, PartialEq, Clone, Copy)]
23+
pub(crate) enum Transition<R>
24+
where
25+
R: Ref,
26+
{
27+
Byte(Byte),
28+
Ref(R),
29+
}
30+
31+
impl fmt::Debug for State {
32+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33+
write!(f, "S_{}", self.0)
34+
}
35+
}
36+
37+
impl<R> fmt::Debug for Transition<R>
38+
where
39+
R: Ref,
40+
{
41+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42+
match &self {
43+
Self::Byte(b) => b.fmt(f),
44+
Self::Ref(r) => r.fmt(f),
45+
}
46+
}
47+
}
48+
49+
impl State {
50+
pub(crate) fn new() -> Self {
51+
static COUNTER: AtomicU32 = AtomicU32::new(0);
52+
Self(COUNTER.fetch_add(1, Ordering::SeqCst))
53+
}
54+
}
+70-128
Original file line numberDiff line numberDiff line change
@@ -1,138 +1,82 @@
1-
use std::fmt;
2-
use std::sync::atomic::{AtomicU32, Ordering};
3-
1+
use itertools::Itertools;
42
use tracing::instrument;
53

6-
use super::{Byte, Nfa, Ref, nfa};
7-
use crate::Map;
4+
use super::automaton::{Automaton, State, Transition};
5+
use super::{Byte, Nfa, Ref};
6+
use crate::{Map, Set};
87

98
#[derive(PartialEq, Clone, Debug)]
10-
pub(crate) struct Dfa<R>
11-
where
12-
R: Ref,
13-
{
14-
pub(crate) transitions: Map<State, Transitions<R>>,
15-
pub(crate) start: State,
16-
pub(crate) accepting: State,
17-
}
18-
19-
#[derive(PartialEq, Clone, Debug)]
20-
pub(crate) struct Transitions<R>
21-
where
22-
R: Ref,
23-
{
24-
byte_transitions: Map<Byte, State>,
25-
ref_transitions: Map<R, State>,
26-
}
27-
28-
impl<R> Default for Transitions<R>
29-
where
30-
R: Ref,
31-
{
32-
fn default() -> Self {
33-
Self { byte_transitions: Map::default(), ref_transitions: Map::default() }
34-
}
35-
}
36-
37-
impl<R> Transitions<R>
38-
where
39-
R: Ref,
40-
{
41-
#[cfg(test)]
42-
fn insert(&mut self, transition: Transition<R>, state: State) {
43-
match transition {
44-
Transition::Byte(b) => {
45-
self.byte_transitions.insert(b, state);
46-
}
47-
Transition::Ref(r) => {
48-
self.ref_transitions.insert(r, state);
49-
}
50-
}
51-
}
52-
}
53-
54-
/// The states in a `Nfa` represent byte offsets.
55-
#[derive(Hash, Eq, PartialEq, PartialOrd, Ord, Copy, Clone)]
56-
pub(crate) struct State(u32);
57-
58-
#[cfg(test)]
59-
#[derive(Hash, Eq, PartialEq, Clone, Copy)]
60-
pub(crate) enum Transition<R>
61-
where
62-
R: Ref,
63-
{
64-
Byte(Byte),
65-
Ref(R),
66-
}
67-
68-
impl fmt::Debug for State {
69-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70-
write!(f, "S_{}", self.0)
71-
}
72-
}
73-
74-
#[cfg(test)]
75-
impl<R> fmt::Debug for Transition<R>
76-
where
77-
R: Ref,
78-
{
79-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80-
match &self {
81-
Self::Byte(b) => b.fmt(f),
82-
Self::Ref(r) => r.fmt(f),
83-
}
84-
}
85-
}
9+
pub(crate) struct Dfa<R: Ref>(
10+
// INVARIANT: `Automaton` is a DFA, which means that, for any `state`, each
11+
// transition in `self.0.transitions[state]` contains exactly one
12+
// destination state.
13+
pub(crate) Automaton<R>,
14+
);
8615

8716
impl<R> Dfa<R>
8817
where
8918
R: Ref,
9019
{
9120
#[cfg(test)]
9221
pub(crate) fn bool() -> Self {
93-
let mut transitions: Map<State, Transitions<R>> = Map::default();
22+
let mut transitions: Map<State, Map<Transition<R>, Set<State>>> = Map::default();
9423
let start = State::new();
95-
let accepting = State::new();
24+
let accept = State::new();
9625

97-
transitions.entry(start).or_default().insert(Transition::Byte(Byte::Init(0x00)), accepting);
26+
transitions
27+
.entry(start)
28+
.or_default()
29+
.insert(Transition::Byte(Byte::Init(0x00)), [accept].into_iter().collect());
9830

99-
transitions.entry(start).or_default().insert(Transition::Byte(Byte::Init(0x01)), accepting);
31+
transitions
32+
.entry(start)
33+
.or_default()
34+
.insert(Transition::Byte(Byte::Init(0x01)), [accept].into_iter().collect());
10035

101-
Self { transitions, start, accepting }
36+
Dfa(Automaton { transitions, start, accept })
10237
}
10338

10439
#[instrument(level = "debug")]
10540
pub(crate) fn from_nfa(nfa: Nfa<R>) -> Self {
106-
let Nfa { transitions: nfa_transitions, start: nfa_start, accepting: nfa_accepting } = nfa;
41+
// It might already be the case that `nfa` is a DFA. If that's the case,
42+
// we can avoid reconstructing the DFA.
43+
let is_dfa = nfa
44+
.0
45+
.transitions
46+
.iter()
47+
.flat_map(|(_, transitions)| transitions.iter())
48+
.all(|(_, dsts)| dsts.len() <= 1);
49+
if is_dfa {
50+
return Dfa(nfa.0);
51+
}
52+
53+
let Nfa(Automaton { transitions: nfa_transitions, start: nfa_start, accept: nfa_accept }) =
54+
nfa;
10755

108-
let mut dfa_transitions: Map<State, Transitions<R>> = Map::default();
109-
let mut nfa_to_dfa: Map<nfa::State, State> = Map::default();
56+
let mut dfa_transitions: Map<State, Map<Transition<R>, Set<State>>> = Map::default();
57+
let mut nfa_to_dfa: Map<State, State> = Map::default();
11058
let dfa_start = State::new();
11159
nfa_to_dfa.insert(nfa_start, dfa_start);
11260

11361
let mut queue = vec![(nfa_start, dfa_start)];
11462

11563
while let Some((nfa_state, dfa_state)) = queue.pop() {
116-
if nfa_state == nfa_accepting {
64+
if nfa_state == nfa_accept {
11765
continue;
11866
}
11967

12068
for (nfa_transition, next_nfa_states) in nfa_transitions[&nfa_state].iter() {
69+
use itertools::Itertools as _;
70+
12171
let dfa_transitions =
12272
dfa_transitions.entry(dfa_state).or_insert_with(Default::default);
12373

12474
let mapped_state = next_nfa_states.iter().find_map(|x| nfa_to_dfa.get(x).copied());
12575

126-
let next_dfa_state = match nfa_transition {
127-
&nfa::Transition::Byte(b) => *dfa_transitions
128-
.byte_transitions
129-
.entry(b)
130-
.or_insert_with(|| mapped_state.unwrap_or_else(State::new)),
131-
&nfa::Transition::Ref(r) => *dfa_transitions
132-
.ref_transitions
133-
.entry(r)
134-
.or_insert_with(|| mapped_state.unwrap_or_else(State::new)),
135-
};
76+
let next_dfa_state = dfa_transitions.entry(*nfa_transition).or_insert_with(|| {
77+
[mapped_state.unwrap_or_else(State::new)].into_iter().collect()
78+
});
79+
let next_dfa_state = *next_dfa_state.iter().exactly_one().unwrap();
13680

13781
for &next_nfa_state in next_nfa_states {
13882
nfa_to_dfa.entry(next_nfa_state).or_insert_with(|| {
@@ -143,40 +87,38 @@ where
14387
}
14488
}
14589

146-
let dfa_accepting = nfa_to_dfa[&nfa_accepting];
147-
148-
Self { transitions: dfa_transitions, start: dfa_start, accepting: dfa_accepting }
149-
}
150-
151-
pub(crate) fn bytes_from(&self, start: State) -> Option<&Map<Byte, State>> {
152-
Some(&self.transitions.get(&start)?.byte_transitions)
90+
let dfa_accept = nfa_to_dfa[&nfa_accept];
91+
Dfa(Automaton { transitions: dfa_transitions, start: dfa_start, accept: dfa_accept })
15392
}
15493

15594
pub(crate) fn byte_from(&self, start: State, byte: Byte) -> Option<State> {
156-
self.transitions.get(&start)?.byte_transitions.get(&byte).copied()
157-
}
158-
159-
pub(crate) fn refs_from(&self, start: State) -> Option<&Map<R, State>> {
160-
Some(&self.transitions.get(&start)?.ref_transitions)
95+
Some(
96+
self.0
97+
.transitions
98+
.get(&start)?
99+
.get(&Transition::Byte(byte))?
100+
.iter()
101+
.copied()
102+
.exactly_one()
103+
.unwrap(),
104+
)
161105
}
162-
}
163106

164-
impl State {
165-
pub(crate) fn new() -> Self {
166-
static COUNTER: AtomicU32 = AtomicU32::new(0);
167-
Self(COUNTER.fetch_add(1, Ordering::SeqCst))
107+
pub(crate) fn iter_bytes_from(&self, start: State) -> impl Iterator<Item = (Byte, State)> {
108+
self.0.transitions.get(&start).into_iter().flat_map(|transitions| {
109+
transitions.iter().filter_map(|(t, s)| {
110+
let s = s.iter().copied().exactly_one().unwrap();
111+
if let Transition::Byte(b) = t { Some((*b, s)) } else { None }
112+
})
113+
})
168114
}
169-
}
170115

171-
#[cfg(test)]
172-
impl<R> From<nfa::Transition<R>> for Transition<R>
173-
where
174-
R: Ref,
175-
{
176-
fn from(nfa_transition: nfa::Transition<R>) -> Self {
177-
match nfa_transition {
178-
nfa::Transition::Byte(byte) => Transition::Byte(byte),
179-
nfa::Transition::Ref(r) => Transition::Ref(r),
180-
}
116+
pub(crate) fn iter_refs_from(&self, start: State) -> impl Iterator<Item = (R, State)> {
117+
self.0.transitions.get(&start).into_iter().flat_map(|transitions| {
118+
transitions.iter().filter_map(|(t, s)| {
119+
let s = s.iter().copied().exactly_one().unwrap();
120+
if let Transition::Ref(r) = t { Some((*r, s)) } else { None }
121+
})
122+
})
181123
}
182124
}

compiler/rustc_transmute/src/layout/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use std::fmt::{self, Debug};
22
use std::hash::Hash;
33

4+
pub(crate) mod automaton;
5+
46
pub(crate) mod tree;
57
pub(crate) use tree::Tree;
68

0 commit comments

Comments
 (0)