Skip to content

Commit 648c6c0

Browse files
committed
transmutability: Short-circuit NFA->DFA
When an NFA is already a DFA, we short-circuit NFA->DFA conversion, avoiding the need to reconstruct the DFA.
1 parent b7fbf20 commit 648c6c0

File tree

6 files changed

+218
-281
lines changed

6 files changed

+218
-281
lines changed

compiler/rustc_transmute/Cargo.toml

+1-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ edition = "2024"
55

66
[dependencies]
77
# tidy-alphabetical-start
8+
itertools = "0.12"
89
rustc_abi = { path = "../rustc_abi", optional = true }
910
rustc_data_structures = { path = "../rustc_data_structures" }
1011
rustc_hir = { path = "../rustc_hir", optional = true }
@@ -20,8 +21,3 @@ rustc = [
2021
"dep:rustc_middle",
2122
"dep:rustc_span",
2223
]
23-
24-
[dev-dependencies]
25-
# tidy-alphabetical-start
26-
itertools = "0.12"
27-
# tidy-alphabetical-end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
use std::fmt;
2+
use std::sync::atomic::{AtomicU32, Ordering};
3+
4+
use super::{Byte, Ref};
5+
use crate::{Map, Set};
6+
7+
#[derive(PartialEq, Debug, Clone)]
8+
pub(crate) struct Automaton<R>
9+
where
10+
R: Ref,
11+
{
12+
pub(crate) transitions: Map<State, Map<Transition<R>, Set<State>>>,
13+
pub(crate) start: State,
14+
pub(crate) accept: State,
15+
}
16+
17+
/// The states in a `Nfa` represent byte offsets.
18+
#[derive(Hash, Eq, PartialEq, PartialOrd, Ord, Copy, Clone)]
19+
pub(crate) struct State(u32);
20+
21+
/// The transitions between states in a `Nfa` reflect bit validity.
22+
#[derive(Hash, Eq, PartialEq, Clone, Copy)]
23+
pub(crate) enum Transition<R>
24+
where
25+
R: Ref,
26+
{
27+
Byte(Byte),
28+
Ref(R),
29+
}
30+
31+
impl fmt::Debug for State {
32+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33+
write!(f, "S_{}", self.0)
34+
}
35+
}
36+
37+
impl<R> fmt::Debug for Transition<R>
38+
where
39+
R: Ref,
40+
{
41+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42+
match &self {
43+
Self::Byte(b) => b.fmt(f),
44+
Self::Ref(r) => r.fmt(f),
45+
}
46+
}
47+
}
48+
49+
impl State {
50+
pub(crate) fn new() -> Self {
51+
static COUNTER: AtomicU32 = AtomicU32::new(0);
52+
Self(COUNTER.fetch_add(1, Ordering::SeqCst))
53+
}
54+
}
+72-128
Original file line numberDiff line numberDiff line change
@@ -1,138 +1,84 @@
1-
use std::fmt;
2-
use std::sync::atomic::{AtomicU32, Ordering};
3-
1+
use itertools::Itertools;
42
use tracing::instrument;
53

6-
use super::{Byte, Nfa, Ref, nfa};
7-
use crate::Map;
4+
use super::{
5+
Byte, Nfa, Ref,
6+
automaton::{Automaton, State, Transition},
7+
};
8+
use crate::{Map, Set};
89

910
#[derive(PartialEq, Clone, Debug)]
10-
pub(crate) struct Dfa<R>
11-
where
12-
R: Ref,
13-
{
14-
pub(crate) transitions: Map<State, Transitions<R>>,
15-
pub(crate) start: State,
16-
pub(crate) accepting: State,
17-
}
18-
19-
#[derive(PartialEq, Clone, Debug)]
20-
pub(crate) struct Transitions<R>
21-
where
22-
R: Ref,
23-
{
24-
byte_transitions: Map<Byte, State>,
25-
ref_transitions: Map<R, State>,
26-
}
27-
28-
impl<R> Default for Transitions<R>
29-
where
30-
R: Ref,
31-
{
32-
fn default() -> Self {
33-
Self { byte_transitions: Map::default(), ref_transitions: Map::default() }
34-
}
35-
}
36-
37-
impl<R> Transitions<R>
38-
where
39-
R: Ref,
40-
{
41-
#[cfg(test)]
42-
fn insert(&mut self, transition: Transition<R>, state: State) {
43-
match transition {
44-
Transition::Byte(b) => {
45-
self.byte_transitions.insert(b, state);
46-
}
47-
Transition::Ref(r) => {
48-
self.ref_transitions.insert(r, state);
49-
}
50-
}
51-
}
52-
}
53-
54-
/// The states in a `Nfa` represent byte offsets.
55-
#[derive(Hash, Eq, PartialEq, PartialOrd, Ord, Copy, Clone)]
56-
pub(crate) struct State(u32);
57-
58-
#[cfg(test)]
59-
#[derive(Hash, Eq, PartialEq, Clone, Copy)]
60-
pub(crate) enum Transition<R>
61-
where
62-
R: Ref,
63-
{
64-
Byte(Byte),
65-
Ref(R),
66-
}
67-
68-
impl fmt::Debug for State {
69-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70-
write!(f, "S_{}", self.0)
71-
}
72-
}
73-
74-
#[cfg(test)]
75-
impl<R> fmt::Debug for Transition<R>
76-
where
77-
R: Ref,
78-
{
79-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80-
match &self {
81-
Self::Byte(b) => b.fmt(f),
82-
Self::Ref(r) => r.fmt(f),
83-
}
84-
}
85-
}
11+
pub(crate) struct Dfa<R: Ref>(
12+
// INVARIANT: `Automaton` is a DFA, which means that, for any `state`, each
13+
// transition in `self.0.transitions[state]` contains exactly one
14+
// destination state.
15+
pub(crate) Automaton<R>,
16+
);
8617

8718
impl<R> Dfa<R>
8819
where
8920
R: Ref,
9021
{
9122
#[cfg(test)]
9223
pub(crate) fn bool() -> Self {
93-
let mut transitions: Map<State, Transitions<R>> = Map::default();
24+
let mut transitions: Map<State, Map<Transition<R>, Set<State>>> = Map::default();
9425
let start = State::new();
95-
let accepting = State::new();
26+
let accept = State::new();
9627

97-
transitions.entry(start).or_default().insert(Transition::Byte(Byte::Init(0x00)), accepting);
28+
transitions
29+
.entry(start)
30+
.or_default()
31+
.insert(Transition::Byte(Byte::Init(0x00)), [accept].into_iter().collect());
9832

99-
transitions.entry(start).or_default().insert(Transition::Byte(Byte::Init(0x01)), accepting);
33+
transitions
34+
.entry(start)
35+
.or_default()
36+
.insert(Transition::Byte(Byte::Init(0x01)), [accept].into_iter().collect());
10037

101-
Self { transitions, start, accepting }
38+
Dfa(Automaton { transitions, start, accept })
10239
}
10340

10441
#[instrument(level = "debug")]
10542
pub(crate) fn from_nfa(nfa: Nfa<R>) -> Self {
106-
let Nfa { transitions: nfa_transitions, start: nfa_start, accepting: nfa_accepting } = nfa;
43+
// It might already be the case that `nfa` is a DFA. If that's the case,
44+
// we can avoid reconstructing the DFA.
45+
let is_dfa = nfa
46+
.0
47+
.transitions
48+
.iter()
49+
.flat_map(|(_, transitions)| transitions.iter())
50+
.all(|(_, dsts)| dsts.len() <= 1);
51+
if is_dfa {
52+
return Dfa(nfa.0);
53+
}
54+
55+
let Nfa(Automaton { transitions: nfa_transitions, start: nfa_start, accept: nfa_accept }) =
56+
nfa;
10757

108-
let mut dfa_transitions: Map<State, Transitions<R>> = Map::default();
109-
let mut nfa_to_dfa: Map<nfa::State, State> = Map::default();
58+
let mut dfa_transitions: Map<State, Map<Transition<R>, Set<State>>> = Map::default();
59+
let mut nfa_to_dfa: Map<State, State> = Map::default();
11060
let dfa_start = State::new();
11161
nfa_to_dfa.insert(nfa_start, dfa_start);
11262

11363
let mut queue = vec![(nfa_start, dfa_start)];
11464

11565
while let Some((nfa_state, dfa_state)) = queue.pop() {
116-
if nfa_state == nfa_accepting {
66+
if nfa_state == nfa_accept {
11767
continue;
11868
}
11969

12070
for (nfa_transition, next_nfa_states) in nfa_transitions[&nfa_state].iter() {
71+
use itertools::Itertools as _;
72+
12173
let dfa_transitions =
12274
dfa_transitions.entry(dfa_state).or_insert_with(Default::default);
12375

12476
let mapped_state = next_nfa_states.iter().find_map(|x| nfa_to_dfa.get(x).copied());
12577

126-
let next_dfa_state = match nfa_transition {
127-
&nfa::Transition::Byte(b) => *dfa_transitions
128-
.byte_transitions
129-
.entry(b)
130-
.or_insert_with(|| mapped_state.unwrap_or_else(State::new)),
131-
&nfa::Transition::Ref(r) => *dfa_transitions
132-
.ref_transitions
133-
.entry(r)
134-
.or_insert_with(|| mapped_state.unwrap_or_else(State::new)),
135-
};
78+
let next_dfa_state = dfa_transitions.entry(*nfa_transition).or_insert_with(|| {
79+
[mapped_state.unwrap_or_else(State::new)].into_iter().collect()
80+
});
81+
let next_dfa_state = *next_dfa_state.iter().exactly_one().unwrap();
13682

13783
for &next_nfa_state in next_nfa_states {
13884
nfa_to_dfa.entry(next_nfa_state).or_insert_with(|| {
@@ -143,40 +89,38 @@ where
14389
}
14490
}
14591

146-
let dfa_accepting = nfa_to_dfa[&nfa_accepting];
147-
148-
Self { transitions: dfa_transitions, start: dfa_start, accepting: dfa_accepting }
149-
}
150-
151-
pub(crate) fn bytes_from(&self, start: State) -> Option<&Map<Byte, State>> {
152-
Some(&self.transitions.get(&start)?.byte_transitions)
92+
let dfa_accept = nfa_to_dfa[&nfa_accept];
93+
Dfa(Automaton { transitions: dfa_transitions, start: dfa_start, accept: dfa_accept })
15394
}
15495

15596
pub(crate) fn byte_from(&self, start: State, byte: Byte) -> Option<State> {
156-
self.transitions.get(&start)?.byte_transitions.get(&byte).copied()
157-
}
158-
159-
pub(crate) fn refs_from(&self, start: State) -> Option<&Map<R, State>> {
160-
Some(&self.transitions.get(&start)?.ref_transitions)
97+
Some(
98+
self.0
99+
.transitions
100+
.get(&start)?
101+
.get(&Transition::Byte(byte))?
102+
.iter()
103+
.copied()
104+
.exactly_one()
105+
.unwrap(),
106+
)
161107
}
162-
}
163108

164-
impl State {
165-
pub(crate) fn new() -> Self {
166-
static COUNTER: AtomicU32 = AtomicU32::new(0);
167-
Self(COUNTER.fetch_add(1, Ordering::SeqCst))
109+
pub(crate) fn iter_bytes_from(&self, start: State) -> impl Iterator<Item = (Byte, State)> {
110+
self.0.transitions.get(&start).into_iter().flat_map(|transitions| {
111+
transitions.iter().filter_map(|(t, s)| {
112+
let s = s.iter().copied().exactly_one().unwrap();
113+
if let Transition::Byte(b) = t { Some((*b, s)) } else { None }
114+
})
115+
})
168116
}
169-
}
170117

171-
#[cfg(test)]
172-
impl<R> From<nfa::Transition<R>> for Transition<R>
173-
where
174-
R: Ref,
175-
{
176-
fn from(nfa_transition: nfa::Transition<R>) -> Self {
177-
match nfa_transition {
178-
nfa::Transition::Byte(byte) => Transition::Byte(byte),
179-
nfa::Transition::Ref(r) => Transition::Ref(r),
180-
}
118+
pub(crate) fn iter_refs_from(&self, start: State) -> impl Iterator<Item = (R, State)> {
119+
self.0.transitions.get(&start).into_iter().flat_map(|transitions| {
120+
transitions.iter().filter_map(|(t, s)| {
121+
let s = s.iter().copied().exactly_one().unwrap();
122+
if let Transition::Ref(r) = t { Some((*r, s)) } else { None }
123+
})
124+
})
181125
}
182126
}

compiler/rustc_transmute/src/layout/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use std::fmt::{self, Debug};
22
use std::hash::Hash;
33

4+
pub(crate) mod automaton;
5+
46
pub(crate) mod tree;
57
pub(crate) use tree::Tree;
68

0 commit comments

Comments
 (0)