Skip to content

Commit 0fec408

Browse files
committed
transmutability: remove NFA intermediate representation
Prior to this commit, the transmutability analysis used an intermediate NFA representation of type layout. We then determinized this representation into a DFA, upon which we ran the core transmutability analysis. Unfortunately, determinizing NFAs is expensive. In this commit, we avoid NFAs entirely by observing that Rust `union`s are the only source of nondeterminism and that it is comparatively cheap to compute the DFA union of DFAs. We also implement Graphviz DOT debug formatting of DFAs. Fixes rust-lang/project-safe-transmute#23 Fixes rust-lang/project-safe-transmute#24
1 parent 883f9f7 commit 0fec408

File tree

7 files changed

+226
-290
lines changed

7 files changed

+226
-290
lines changed

Diff for: compiler/rustc_transmute/src/layout/dfa.rs

+181-92
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
use std::fmt;
22
use std::sync::atomic::{AtomicU32, Ordering};
33

4-
use tracing::instrument;
5-
6-
use super::{Byte, Nfa, Ref, nfa};
4+
use super::{Byte, Ref, Tree, Uninhabited};
75
use crate::Map;
86

9-
#[derive(PartialEq, Clone, Debug)]
7+
#[derive(PartialEq, Clone)]
108
pub(crate) struct Dfa<R>
119
where
1210
R: Ref,
@@ -34,35 +32,15 @@ where
3432
}
3533
}
3634

37-
impl<R> Transitions<R>
38-
where
39-
R: Ref,
40-
{
41-
#[cfg(test)]
42-
fn insert(&mut self, transition: Transition<R>, state: State) {
43-
match transition {
44-
Transition::Byte(b) => {
45-
self.byte_transitions.insert(b, state);
46-
}
47-
Transition::Ref(r) => {
48-
self.ref_transitions.insert(r, state);
49-
}
50-
}
51-
}
52-
}
53-
5435
/// The states in a `Nfa` represent byte offsets.
5536
#[derive(Hash, Eq, PartialEq, PartialOrd, Ord, Copy, Clone)]
56-
pub(crate) struct State(u32);
37+
pub(crate) struct State(pub(crate) u32);
5738

58-
#[cfg(test)]
59-
#[derive(Hash, Eq, PartialEq, Clone, Copy)]
60-
pub(crate) enum Transition<R>
61-
where
62-
R: Ref,
63-
{
64-
Byte(Byte),
65-
Ref(R),
39+
impl State {
40+
pub(crate) fn new() -> Self {
41+
static COUNTER: AtomicU32 = AtomicU32::new(0);
42+
Self(COUNTER.fetch_add(1, Ordering::SeqCst))
43+
}
6644
}
6745

6846
impl fmt::Debug for State {
@@ -71,19 +49,6 @@ impl fmt::Debug for State {
7149
}
7250
}
7351

74-
#[cfg(test)]
75-
impl<R> fmt::Debug for Transition<R>
76-
where
77-
R: Ref,
78-
{
79-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80-
match &self {
81-
Self::Byte(b) => b.fmt(f),
82-
Self::Ref(r) => r.fmt(f),
83-
}
84-
}
85-
}
86-
8752
impl<R> Dfa<R>
8853
where
8954
R: Ref,
@@ -94,58 +59,158 @@ where
9459
let start = State::new();
9560
let accepting = State::new();
9661

97-
transitions.entry(start).or_default().insert(Transition::Byte(Byte::Init(0x00)), accepting);
62+
transitions.entry(start).or_default().byte_transitions.insert(Byte::Init(0x00), accepting);
9863

99-
transitions.entry(start).or_default().insert(Transition::Byte(Byte::Init(0x01)), accepting);
64+
transitions.entry(start).or_default().byte_transitions.insert(Byte::Init(0x01), accepting);
10065

10166
Self { transitions, start, accepting }
10267
}
10368

104-
#[instrument(level = "debug")]
105-
pub(crate) fn from_nfa(nfa: Nfa<R>) -> Self {
106-
let Nfa { transitions: nfa_transitions, start: nfa_start, accepting: nfa_accepting } = nfa;
69+
pub(crate) fn unit() -> Self {
70+
let transitions: Map<State, Transitions<R>> = Map::default();
71+
let start = State::new();
72+
let accepting = start;
73+
74+
Self { transitions, start, accepting }
75+
}
10776

108-
let mut dfa_transitions: Map<State, Transitions<R>> = Map::default();
109-
let mut nfa_to_dfa: Map<nfa::State, State> = Map::default();
110-
let dfa_start = State::new();
111-
nfa_to_dfa.insert(nfa_start, dfa_start);
77+
pub(crate) fn from_byte(byte: Byte) -> Self {
78+
let mut transitions: Map<State, Transitions<R>> = Map::default();
79+
let start = State::new();
80+
let accepting = State::new();
11281

113-
let mut queue = vec![(nfa_start, dfa_start)];
82+
transitions.entry(start).or_default().byte_transitions.insert(byte, accepting);
11483

115-
while let Some((nfa_state, dfa_state)) = queue.pop() {
116-
if nfa_state == nfa_accepting {
117-
continue;
118-
}
84+
Self { transitions, start, accepting }
85+
}
86+
87+
pub(crate) fn from_ref(r: R) -> Self {
88+
let mut transitions: Map<State, Transitions<R>> = Map::default();
89+
let start = State::new();
90+
let accepting = State::new();
91+
92+
transitions.entry(start).or_default().ref_transitions.insert(r, accepting);
11993

120-
for (nfa_transition, next_nfa_states) in nfa_transitions[&nfa_state].iter() {
121-
let dfa_transitions =
122-
dfa_transitions.entry(dfa_state).or_insert_with(Default::default);
123-
124-
let mapped_state = next_nfa_states.iter().find_map(|x| nfa_to_dfa.get(x).copied());
125-
126-
let next_dfa_state = match nfa_transition {
127-
&nfa::Transition::Byte(b) => *dfa_transitions
128-
.byte_transitions
129-
.entry(b)
130-
.or_insert_with(|| mapped_state.unwrap_or_else(State::new)),
131-
&nfa::Transition::Ref(r) => *dfa_transitions
132-
.ref_transitions
133-
.entry(r)
134-
.or_insert_with(|| mapped_state.unwrap_or_else(State::new)),
135-
};
136-
137-
for &next_nfa_state in next_nfa_states {
138-
nfa_to_dfa.entry(next_nfa_state).or_insert_with(|| {
139-
queue.push((next_nfa_state, next_dfa_state));
140-
next_dfa_state
141-
});
94+
Self { transitions, start, accepting }
95+
}
96+
97+
pub(crate) fn from_tree(tree: Tree<!, R>) -> Result<Self, Uninhabited> {
98+
Ok(match tree {
99+
Tree::Byte(b) => Self::from_byte(b),
100+
Tree::Ref(r) => Self::from_ref(r),
101+
Tree::Alt(alts) => {
102+
let mut alts = alts.into_iter().map(Self::from_tree).filter_map(Result::ok);
103+
let dfa = alts.next().ok_or(Uninhabited)?;
104+
alts.fold(dfa, |dfa, alt| dfa.union(alt, State::new))
105+
}
106+
Tree::Seq(elts) => {
107+
let mut dfa = Self::unit();
108+
for elt in elts.into_iter().map(Self::from_tree) {
109+
dfa = dfa.concat(elt?);
142110
}
111+
dfa
112+
}
113+
})
114+
}
115+
116+
/// Concatenate two `Dfa`s.
117+
pub(crate) fn concat(self, other: Self) -> Self {
118+
if self.start == self.accepting {
119+
return other;
120+
} else if other.start == other.accepting {
121+
return self;
122+
}
123+
124+
let start = self.start;
125+
let accepting = other.accepting;
126+
127+
let mut transitions: Map<State, Transitions<R>> = self.transitions;
128+
129+
for (source, transition) in other.transitions {
130+
let fix_state = |state| if state == other.start { self.accepting } else { state };
131+
let entry = transitions.entry(fix_state(source)).or_default();
132+
for (edge, destination) in transition.byte_transitions {
133+
entry.byte_transitions.insert(edge, fix_state(destination));
134+
}
135+
for (edge, destination) in transition.ref_transitions {
136+
entry.ref_transitions.insert(edge, fix_state(destination));
143137
}
144138
}
145139

146-
let dfa_accepting = nfa_to_dfa[&nfa_accepting];
140+
Self { transitions, start, accepting }
141+
}
142+
143+
/// Compute the union of two `Nfa`s.
144+
pub(crate) fn union(self, other: Self, mut new_state: impl FnMut() -> State) -> Self {
145+
// We implement `union` by lazily initializing a set of states
146+
// corresponding to the product of states in `self` and `other`, and
147+
// then add transitions between these states that correspond to where
148+
// they exist between `self` and `other`.
149+
150+
let a = self;
151+
let b = other;
152+
153+
let accepting = new_state();
154+
155+
let mut mapping: Map<(Option<State>, Option<State>), State> = Map::default();
156+
157+
let mut mapped = |(a_state, b_state)| {
158+
if Some(a.accepting) == a_state || Some(b.accepting) == b_state {
159+
// If either `a_state` or `b_state` are accepting, map to a
160+
// common `accepting` state.
161+
accepting
162+
} else {
163+
*mapping.entry((a_state, b_state)).or_insert_with(&mut new_state)
164+
}
165+
};
166+
167+
let start = mapped((Some(a.start), Some(b.start)));
168+
let mut transitions: Map<State, Transitions<R>> = Map::default();
169+
let mut queue = vec![(Some(a.start), Some(b.start))];
170+
let empty_transitions = Transitions::default();
171+
172+
while let Some((a_src, b_src)) = queue.pop() {
173+
let a_transitions =
174+
a_src.and_then(|a_src| a.transitions.get(&a_src)).unwrap_or(&empty_transitions);
175+
let b_transitions =
176+
b_src.and_then(|b_src| b.transitions.get(&b_src)).unwrap_or(&empty_transitions);
177+
178+
let byte_transitions =
179+
a_transitions.byte_transitions.keys().chain(b_transitions.byte_transitions.keys());
180+
181+
for byte_transition in byte_transitions {
182+
let a_dst = a_transitions.byte_transitions.get(byte_transition).copied();
183+
let b_dst = b_transitions.byte_transitions.get(byte_transition).copied();
184+
185+
assert!(a_dst.is_some() || b_dst.is_some());
186+
187+
let src = mapped((a_src, b_src));
188+
let dst = mapped((a_dst, b_dst));
189+
190+
transitions.entry(src).or_default().byte_transitions.insert(*byte_transition, dst);
191+
192+
queue.push((a_dst, b_dst))
193+
}
194+
195+
let ref_transitions =
196+
a_transitions.ref_transitions.keys().chain(b_transitions.ref_transitions.keys());
147197

148-
Self { transitions: dfa_transitions, start: dfa_start, accepting: dfa_accepting }
198+
for ref_transition in ref_transitions {
199+
let a_dst = a_transitions.ref_transitions.get(ref_transition).copied();
200+
let b_dst = b_transitions.ref_transitions.get(ref_transition).copied();
201+
202+
assert!(a_dst.is_some() || b_dst.is_some());
203+
204+
let src = mapped((a_src, b_src));
205+
let dst = mapped((a_dst, b_dst));
206+
207+
transitions.entry(src).or_default().ref_transitions.insert(*ref_transition, dst);
208+
209+
queue.push((a_dst, b_dst))
210+
}
211+
}
212+
213+
Self { transitions, start, accepting }
149214
}
150215

151216
pub(crate) fn bytes_from(&self, start: State) -> Option<&Map<Byte, State>> {
@@ -159,24 +224,48 @@ where
159224
pub(crate) fn refs_from(&self, start: State) -> Option<&Map<R, State>> {
160225
Some(&self.transitions.get(&start)?.ref_transitions)
161226
}
162-
}
163227

164-
impl State {
165-
pub(crate) fn new() -> Self {
166-
static COUNTER: AtomicU32 = AtomicU32::new(0);
167-
Self(COUNTER.fetch_add(1, Ordering::SeqCst))
228+
#[cfg(test)]
229+
pub(crate) fn from_edges<B: Copy + Into<Byte>>(
230+
start: u32,
231+
accept: u32,
232+
edges: &[(u32, B, u32)],
233+
) -> Self {
234+
let start = State(start);
235+
let accepting = State(accept);
236+
let mut transitions: Map<State, Transitions<R>> = Map::default();
237+
238+
for &(src, edge, dst) in edges {
239+
let src = State(src);
240+
let dst = State(dst);
241+
let old = transitions.entry(src).or_default().byte_transitions.insert(edge.into(), dst);
242+
assert!(old.is_none());
243+
}
244+
245+
Self { start, accepting, transitions }
168246
}
169247
}
170248

171-
#[cfg(test)]
172-
impl<R> From<nfa::Transition<R>> for Transition<R>
249+
/// Serialize the DFA using the Graphviz DOT format.
250+
impl<R> fmt::Debug for Dfa<R>
173251
where
174252
R: Ref,
175253
{
176-
fn from(nfa_transition: nfa::Transition<R>) -> Self {
177-
match nfa_transition {
178-
nfa::Transition::Byte(byte) => Transition::Byte(byte),
179-
nfa::Transition::Ref(r) => Transition::Ref(r),
254+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
255+
writeln!(f, "digraph {{")?;
256+
writeln!(f, " {:?} [shape = doublecircle]", self.start)?;
257+
writeln!(f, " {:?} [shape = doublecircle]", self.accepting)?;
258+
259+
for (src, transitions) in self.transitions.iter() {
260+
for (t, dst) in transitions.byte_transitions.iter() {
261+
writeln!(f, " {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
262+
}
263+
264+
for (t, dst) in transitions.ref_transitions.iter() {
265+
writeln!(f, " {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
266+
}
180267
}
268+
269+
writeln!(f, "}}")
181270
}
182271
}

Diff for: compiler/rustc_transmute/src/layout/mod.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@ use std::hash::Hash;
44
pub(crate) mod tree;
55
pub(crate) use tree::Tree;
66

7-
pub(crate) mod nfa;
8-
pub(crate) use nfa::Nfa;
9-
107
pub(crate) mod dfa;
118
pub(crate) use dfa::Dfa;
129

@@ -29,6 +26,13 @@ impl fmt::Debug for Byte {
2926
}
3027
}
3128

29+
#[cfg(test)]
30+
impl From<u8> for Byte {
31+
fn from(src: u8) -> Self {
32+
Self::Init(src)
33+
}
34+
}
35+
3236
pub(crate) trait Def: Debug + Hash + Eq + PartialEq + Copy + Clone {
3337
fn has_safety_invariants(&self) -> bool;
3438
}

0 commit comments

Comments
 (0)