Skip to content

Commit 88aa7ad

Browse files
committed
Move various token stream things from rustc_parse to rustc_ast.
Specifically: `TokenCursor`, `LazyAttrTokenStreamImpl`, `FlatToken`, and `ReplaceRange`. These are all related to token streams, rather than actual parsing. This will facilitate the simplifications in the next commit.
1 parent f552794 commit 88aa7ad

File tree

3 files changed

+277
-278
lines changed

3 files changed

+277
-278
lines changed

compiler/rustc_ast/src/tokenstream.rs

+268-7
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ use rustc_serialize::{Decodable, Encodable};
2525
use rustc_span::{sym, Span, SpanDecoder, SpanEncoder, Symbol, DUMMY_SP};
2626

2727
use std::borrow::Cow;
28-
use std::{cmp, fmt, iter};
28+
use std::ops::Range;
29+
use std::{cmp, fmt, iter, mem};
2930

3031
/// Part of a `TokenStream`.
3132
#[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
@@ -156,12 +157,195 @@ impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
156157
}
157158
}
158159

159-
/// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
160-
/// information about the tokens for attribute targets. This is used
161-
/// during expansion to perform early cfg-expansion, and to process attributes
162-
/// during proc-macro invocations.
163-
#[derive(Clone, Debug, Default, Encodable, Decodable)]
164-
pub struct AttrTokenStream(pub Lrc<Vec<AttrTokenTree>>);
160+
/// Indicates a range of tokens that should be replaced by the tokens in the
161+
/// provided `AttrsTarget`. This is used in two places during token collection:
162+
///
163+
/// 1. During the parsing of an AST node that may have a `#[derive]` attribute,
164+
/// we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]` In this
165+
/// case, we use a `ReplaceRange` to replace the entire inner AST node with
166+
/// `FlatToken::AttrsTarget`, allowing us to perform eager cfg-expansion on
167+
/// an `AttrTokenStream`.
168+
///
169+
/// 2. When we parse an inner attribute while collecting tokens. We remove
170+
/// inner attributes from the token stream entirely, and instead track them
171+
/// through the `attrs` field on the AST node. This allows us to easily
172+
/// manipulate them (for example, removing the first macro inner attribute
173+
/// to invoke a proc-macro). When create a `TokenStream`, the inner
174+
/// attributes get inserted into the proper place in the token stream.
175+
pub type ReplaceRange = (Range<u32>, Option<AttrsTarget>);
176+
177+
// Produces a `TokenStream` on-demand. Using `cursor_snapshot` and `num_calls`,
178+
// we can reconstruct the `TokenStream` seen by the callback. This allows us to
179+
// avoid producing a `TokenStream` if it is never needed - for example, a
180+
// captured `macro_rules!` argument that is never passed to a proc macro. In
181+
// practice token stream creation happens rarely compared to calls to
182+
// `collect_tokens` (see some statistics in #78736), so we are doing as little
183+
// up-front work as possible.
184+
//
185+
// This also makes `Parser` very cheap to clone, since there is no intermediate
186+
// collection buffer to clone.
187+
pub struct LazyAttrTokenStreamImpl {
188+
pub start_token: (Token, Spacing),
189+
pub cursor_snapshot: TokenCursor,
190+
pub num_calls: u32,
191+
pub break_last_token: bool,
192+
pub replace_ranges: Box<[ReplaceRange]>,
193+
}
194+
195+
impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
196+
fn to_attr_token_stream(&self) -> AttrTokenStream {
197+
// The token produced by the final call to `{,inlined_}next` was not
198+
// actually consumed by the callback. The combination of chaining the
199+
// initial token and using `take` produces the desired result - we
200+
// produce an empty `TokenStream` if no calls were made, and omit the
201+
// final token otherwise.
202+
let mut cursor_snapshot = self.cursor_snapshot.clone();
203+
let tokens = iter::once(FlatToken::Token(self.start_token.clone()))
204+
.chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
205+
.take(self.num_calls as usize);
206+
207+
if self.replace_ranges.is_empty() {
208+
make_attr_token_stream(tokens, self.break_last_token)
209+
} else {
210+
let mut tokens: Vec<_> = tokens.collect();
211+
let mut replace_ranges = self.replace_ranges.to_vec();
212+
replace_ranges.sort_by_key(|(range, _)| range.start);
213+
214+
#[cfg(debug_assertions)]
215+
{
216+
for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() {
217+
assert!(
218+
range.end <= next_range.start || range.end >= next_range.end,
219+
"Replace ranges should either be disjoint or nested: \
220+
({:?}, {:?}) ({:?}, {:?})",
221+
range,
222+
tokens,
223+
next_range,
224+
next_tokens,
225+
);
226+
}
227+
}
228+
229+
// Process the replace ranges, starting from the highest start
230+
// position and working our way back. If have tokens like:
231+
//
232+
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
233+
//
234+
// Then we will generate replace ranges for both the `#[cfg(FALSE)]
235+
// field: bool` and the entire `#[cfg(FALSE)] struct Foo {
236+
// #[cfg(FALSE)] field: bool }`
237+
//
238+
// By starting processing from the replace range with the greatest
239+
// start position, we ensure that any replace range which encloses
240+
// another replace range will capture the *replaced* tokens for the
241+
// inner range, not the original tokens.
242+
for (range, target) in replace_ranges.into_iter().rev() {
243+
assert!(!range.is_empty(), "Cannot replace an empty range: {range:?}");
244+
245+
// Replace the tokens in range with zero or one
246+
// `FlatToken::AttrsTarget`s, plus enough `FlatToken::Empty`s
247+
// to fill up the rest of the range. This keeps the total
248+
// length of `tokens` constant throughout the replacement
249+
// process, allowing us to use all of the `ReplaceRanges`
250+
// entries without adjusting indices.
251+
let target_len = target.is_some() as usize;
252+
tokens.splice(
253+
(range.start as usize)..(range.end as usize),
254+
target
255+
.into_iter()
256+
.map(|target| FlatToken::AttrsTarget(target))
257+
.chain(iter::repeat(FlatToken::Empty).take(range.len() - target_len)),
258+
);
259+
}
260+
make_attr_token_stream(tokens.into_iter(), self.break_last_token)
261+
}
262+
}
263+
}
264+
265+
/// A helper struct used when building an `AttrTokenStream` from a
266+
/// `LazyAttrTokenStream`. Both delimiter and non-delimited tokens are stored
267+
/// as `FlatToken::Token`. A vector of `FlatToken`s is then 'parsed' to build
268+
/// up an `AttrTokenStream` with nested `AttrTokenTree::Delimited` tokens.
269+
#[derive(Debug, Clone)]
270+
enum FlatToken {
271+
/// A token. This holds both delimiter (e.g. '{' and '}') and non-delimiter
272+
/// tokens.
273+
Token((Token, Spacing)),
274+
/// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
275+
/// directly into the constructed `AttrTokenStream` as an
276+
/// `AttrTokenTree::AttrsTarget`.
277+
AttrsTarget(AttrsTarget),
278+
/// A special 'empty' token that is ignored during the conversion to an
279+
/// `AttrTokenStream`. This is used to simplify the handling of replace
280+
/// ranges.
281+
Empty,
282+
}
283+
284+
/// Converts a flattened iterator of tokens (including open and close delimiter
285+
/// tokens) into an `AttrTokenStream`, creating an `AttrTokenTree::Delimited`
286+
/// for each matching pair of open and close delims.
287+
fn make_attr_token_stream(
288+
iter: impl Iterator<Item = FlatToken>,
289+
break_last_token: bool,
290+
) -> AttrTokenStream {
291+
#[derive(Debug)]
292+
struct FrameData {
293+
// This is `None` for the first frame, `Some` for all others.
294+
open_delim_sp: Option<(Delimiter, Span, Spacing)>,
295+
inner: Vec<AttrTokenTree>,
296+
}
297+
// The stack always has at least one element. Storing it separately makes for shorter code.
298+
let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] };
299+
let mut stack_rest = vec![];
300+
for flat_token in iter {
301+
match flat_token {
302+
FlatToken::Token((Token { kind: TokenKind::OpenDelim(delim), span }, spacing)) => {
303+
stack_rest.push(mem::replace(
304+
&mut stack_top,
305+
FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] },
306+
));
307+
}
308+
FlatToken::Token((Token { kind: TokenKind::CloseDelim(delim), span }, spacing)) => {
309+
let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap());
310+
let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap();
311+
assert_eq!(
312+
open_delim, delim,
313+
"Mismatched open/close delims: open={open_delim:?} close={span:?}"
314+
);
315+
let dspan = DelimSpan::from_pair(open_sp, span);
316+
let dspacing = DelimSpacing::new(open_spacing, spacing);
317+
let stream = AttrTokenStream::new(frame_data.inner);
318+
let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream);
319+
stack_top.inner.push(delimited);
320+
}
321+
FlatToken::Token((token, spacing)) => {
322+
stack_top.inner.push(AttrTokenTree::Token(token, spacing))
323+
}
324+
FlatToken::AttrsTarget(target) => {
325+
stack_top.inner.push(AttrTokenTree::AttrsTarget(target))
326+
}
327+
FlatToken::Empty => {}
328+
}
329+
}
330+
331+
if break_last_token {
332+
let last_token = stack_top.inner.pop().unwrap();
333+
if let AttrTokenTree::Token(last_token, spacing) = last_token {
334+
let unglued_first = last_token.kind.break_two_token_op().unwrap().0;
335+
336+
// An 'unglued' token is always two ASCII characters.
337+
let mut first_span = last_token.span.shrink_to_lo();
338+
first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1));
339+
340+
stack_top
341+
.inner
342+
.push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing));
343+
} else {
344+
panic!("Unexpected last token {last_token:?}")
345+
}
346+
}
347+
AttrTokenStream::new(stack_top.inner)
348+
}
165349

166350
/// Like `TokenTree`, but for `AttrTokenStream`.
167351
#[derive(Clone, Debug, Encodable, Decodable)]
@@ -174,6 +358,13 @@ pub enum AttrTokenTree {
174358
AttrsTarget(AttrsTarget),
175359
}
176360

361+
/// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
362+
/// information about the tokens for attribute targets. This is used
363+
/// during expansion to perform early cfg-expansion, and to process attributes
364+
/// during proc-macro invocations.
365+
#[derive(Clone, Debug, Default, Encodable, Decodable)]
366+
pub struct AttrTokenStream(pub Lrc<Vec<AttrTokenTree>>);
367+
177368
impl AttrTokenStream {
178369
pub fn new(tokens: Vec<AttrTokenTree>) -> AttrTokenStream {
179370
AttrTokenStream(Lrc::new(tokens))
@@ -720,6 +911,75 @@ impl TokenTreeCursor {
720911
}
721912
}
722913

914+
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
915+
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
916+
/// use this type to emit them as a linear sequence. But a linear sequence is
917+
/// what the parser expects, for the most part.
918+
#[derive(Clone, Debug)]
919+
pub struct TokenCursor {
920+
// Cursor for the current (innermost) token stream. The delimiters for this
921+
// token stream are found in `self.stack.last()`; when that is `None` then
922+
// we are in the outermost token stream which never has delimiters.
923+
pub tree_cursor: TokenTreeCursor,
924+
925+
// Token streams surrounding the current one. The delimiters for stack[n]'s
926+
// tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
927+
// because it's the outermost token stream which never has delimiters.
928+
pub stack: Vec<(TokenTreeCursor, DelimSpan, DelimSpacing, Delimiter)>,
929+
}
930+
931+
impl TokenCursor {
932+
pub fn next(&mut self) -> (Token, Spacing) {
933+
self.inlined_next()
934+
}
935+
936+
/// This always-inlined version should only be used on hot code paths.
937+
#[inline(always)]
938+
pub fn inlined_next(&mut self) -> (Token, Spacing) {
939+
loop {
940+
// FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
941+
// #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
942+
// below can be removed.
943+
if let Some(tree) = self.tree_cursor.next_ref() {
944+
match tree {
945+
&TokenTree::Token(ref token, spacing) => {
946+
debug_assert!(!matches!(
947+
token.kind,
948+
token::OpenDelim(_) | token::CloseDelim(_)
949+
));
950+
return (token.clone(), spacing);
951+
}
952+
&TokenTree::Delimited(sp, spacing, delim, ref tts) => {
953+
let trees = tts.clone().into_trees();
954+
self.stack.push((
955+
mem::replace(&mut self.tree_cursor, trees),
956+
sp,
957+
spacing,
958+
delim,
959+
));
960+
if delim != Delimiter::Invisible {
961+
return (Token::new(token::OpenDelim(delim), sp.open), spacing.open);
962+
}
963+
// No open delimiter to return; continue on to the next iteration.
964+
}
965+
};
966+
} else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() {
967+
// We have exhausted this token stream. Move back to its parent token stream.
968+
self.tree_cursor = tree_cursor;
969+
if delim != Delimiter::Invisible {
970+
return (Token::new(token::CloseDelim(delim), span.close), spacing.close);
971+
}
972+
// No close delimiter to return; continue on to the next iteration.
973+
} else {
974+
// We have exhausted the outermost token stream. The use of
975+
// `Spacing::Alone` is arbitrary and immaterial, because the
976+
// `Eof` token's spacing is never used.
977+
return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
978+
}
979+
}
980+
}
981+
}
982+
723983
#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
724984
pub struct DelimSpan {
725985
pub open: Span,
@@ -765,6 +1025,7 @@ mod size_asserts {
7651025
static_assert_size!(AttrTokenStream, 8);
7661026
static_assert_size!(AttrTokenTree, 32);
7671027
static_assert_size!(LazyAttrTokenStream, 8);
1028+
static_assert_size!(LazyAttrTokenStreamImpl, 96);
7681029
static_assert_size!(Option<LazyAttrTokenStream>, 8); // must be small, used in many AST nodes
7691030
static_assert_size!(TokenStream, 8);
7701031
static_assert_size!(TokenTree, 32);

0 commit comments

Comments
 (0)