|
14 | 14 | //! ownership of the original.
|
15 | 15 |
|
16 | 16 | use std::borrow::Cow;
|
17 |
| -use std::{cmp, fmt, iter}; |
| 17 | +use std::ops::Range; |
| 18 | +use std::{cmp, fmt, iter, mem}; |
18 | 19 |
|
19 | 20 | use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
|
20 | 21 | use rustc_data_structures::sync::{self, Lrc};
|
@@ -156,13 +157,226 @@ impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
|
156 | 157 | }
|
157 | 158 | }
|
158 | 159 |
|
| 160 | +/// A token range within a `Parser`'s full token stream. |
| 161 | +#[derive(Clone, Debug)] |
| 162 | +pub struct ParserRange(pub Range<u32>); |
| 163 | + |
| 164 | +/// A token range within an individual AST node's (lazy) token stream, i.e. |
| 165 | +/// relative to that node's first token. Distinct from `ParserRange` so the two |
| 166 | +/// kinds of range can't be mixed up. |
| 167 | +#[derive(Clone, Debug)] |
| 168 | +pub struct NodeRange(pub Range<u32>); |
| 169 | + |
| 170 | +/// Indicates a range of tokens that should be replaced by an `AttrsTarget` |
| 171 | +/// (replacement) or be replaced by nothing (deletion). This is used in two |
| 172 | +/// places during token collection. |
| 173 | +/// |
| 174 | +/// 1. Replacement. During the parsing of an AST node that may have a |
| 175 | +/// `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]` |
| 176 | +/// or `#[cfg_attr]`, we replace the entire inner AST node with |
| 177 | +/// `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an |
| 178 | +/// `AttrTokenStream`. |
| 179 | +/// |
| 180 | +/// 2. Deletion. We delete inner attributes from all collected token streams, |
| 181 | +/// and instead track them through the `attrs` field on the AST node. This |
| 182 | +/// lets us manipulate them similarly to outer attributes. When we create a |
| 183 | +/// `TokenStream`, the inner attributes are inserted into the proper place |
| 184 | +/// in the token stream. |
| 185 | +/// |
| 186 | +/// Each replacement starts off in `ParserReplacement` form but is converted to |
| 187 | +/// `NodeReplacement` form when it is attached to a single AST node, via |
| 188 | +/// `LazyAttrTokenStreamImpl`. |
| 189 | +pub type ParserReplacement = (ParserRange, Option<AttrsTarget>); |
| 190 | + |
| 191 | +/// See the comment on `ParserReplacement`. |
| 192 | +pub type NodeReplacement = (NodeRange, Option<AttrsTarget>); |
| 193 | + |
| 194 | +impl NodeRange { |
| 195 | + // Converts a range within a parser's tokens to a range within a |
| 196 | + // node's tokens beginning at `start_pos`. |
| 197 | + // |
| 198 | + // For example, imagine a parser with 50 tokens in its token stream, a |
| 199 | + // function that spans `ParserRange(20..40)` and an inner attribute within |
| 200 | + // that function that spans `ParserRange(30..35)`. We would find the inner |
| 201 | + // attribute's range within the function's tokens by subtracting 20, which |
| 202 | + // is the position of the function's start token. This gives |
| 203 | + // `NodeRange(10..15)`. |
| 204 | + pub fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange { |
| 205 | + assert!(!parser_range.is_empty()); |
| 206 | + assert!(parser_range.start >= start_pos); |
| 207 | + NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos)) |
| 208 | + } |
| 209 | +} |
| 210 | + |
| 211 | +// From a value of this type we can reconstruct the `TokenStream` seen by the |
| 212 | +// `f` callback passed to a call to `Parser::collect_tokens`, by |
| 213 | +// replaying the getting of the tokens. This saves us producing a `TokenStream` |
| 214 | +// if it is never needed, e.g. a captured `macro_rules!` argument that is never |
| 215 | +// passed to a proc macro. In practice, token stream creation happens rarely |
| 216 | +// compared to calls to `collect_tokens` (see some statistics in #78736) so we |
| 217 | +// are doing as little up-front work as possible. |
| 218 | +// |
| 219 | +// This also makes `Parser` very cheap to clone, since |
| 220 | +// there is no intermediate collection buffer to clone. |
| 221 | +pub struct LazyAttrTokenStreamImpl { |
| 222 | + pub start_token: (Token, Spacing), |
| 223 | + pub cursor_snapshot: TokenCursor, |
| 224 | + pub num_calls: u32, |
| 225 | + pub break_last_token: bool, |
| 226 | + pub node_replacements: Box<[NodeReplacement]>, |
| 227 | +} |
| 228 | + |
| 229 | +impl ToAttrTokenStream for LazyAttrTokenStreamImpl { |
| 230 | + fn to_attr_token_stream(&self) -> AttrTokenStream { |
| 231 | + // The token produced by the final call to `{,inlined_}next` was not |
| 232 | + // actually consumed by the callback. The combination of chaining the |
| 233 | + // initial token and using `take` produces the desired result - we |
| 234 | + // produce an empty `TokenStream` if no calls were made, and omit the |
| 235 | + // final token otherwise. |
| 236 | + let mut cursor_snapshot = self.cursor_snapshot.clone(); |
| 237 | + let tokens = iter::once(FlatToken::Token(self.start_token.clone())) |
| 238 | + .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next()))) |
| 239 | + .take(self.num_calls as usize); |
| 240 | + |
| 241 | + if self.node_replacements.is_empty() { |
| 242 | + make_attr_token_stream(tokens, self.break_last_token) |
| 243 | + } else { |
| 244 | + let mut tokens: Vec<_> = tokens.collect(); |
| 245 | + let mut node_replacements = self.node_replacements.to_vec(); |
| 246 | + node_replacements.sort_by_key(|(range, _)| range.0.start); |
| 247 | + |
| 248 | + #[cfg(debug_assertions)] |
| 249 | + for [(node_range, tokens), (next_node_range, next_tokens)] in |
| 250 | + node_replacements.array_windows() |
| 251 | + { |
| 252 | + assert!( |
| 253 | + node_range.0.end <= next_node_range.0.start, |
| 254 | + "Node ranges should be disjoint: ({:?}, {:?}) ({:?}, {:?})", |
| 255 | + node_range, |
| 256 | + tokens, |
| 257 | + next_node_range, |
| 258 | + next_tokens, |
| 259 | + ); |
| 260 | + } |
| 261 | + |
| 262 | + // Process the replace ranges. |
| 263 | + for (node_range, target) in node_replacements.into_iter() { |
| 264 | + assert!( |
| 265 | + !node_range.0.is_empty(), |
| 266 | + "Cannot replace an empty node range: {:?}", |
| 267 | + node_range.0 |
| 268 | + ); |
| 269 | + |
| 270 | + // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus |
| 271 | + // enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the |
| 272 | + // total length of `tokens` constant throughout the replacement process, allowing |
| 273 | + // us to do all replacements without adjusting indices. |
| 274 | + let target_len = target.is_some() as usize; |
| 275 | + tokens.splice( |
| 276 | + (node_range.0.start as usize)..(node_range.0.end as usize), |
| 277 | + target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain( |
| 278 | + iter::repeat(FlatToken::Empty).take(node_range.0.len() - target_len), |
| 279 | + ), |
| 280 | + ); |
| 281 | + } |
| 282 | + make_attr_token_stream(tokens.into_iter(), self.break_last_token) |
| 283 | + } |
| 284 | + } |
| 285 | +} |
| 286 | + |
| 287 | +/// A helper struct used when building an `AttrTokenStream` from |
| 288 | +/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens |
| 289 | +/// are stored as `FlatToken::Token`. A vector of `FlatToken`s |
| 290 | +/// is then 'parsed' to build up an `AttrTokenStream` with nested |
| 291 | +/// `AttrTokenTree::Delimited` tokens. |
| 292 | +#[derive(Debug, Clone)] |
| 293 | +pub enum FlatToken { |
| 294 | + /// A token - this holds both delimiter (e.g. '{' and '}') |
| 295 | + /// and non-delimiter tokens |
| 296 | + Token((Token, Spacing)), |
| 297 | + /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted |
| 298 | + /// directly into the constructed `AttrTokenStream` as an |
| 299 | + /// `AttrTokenTree::AttrsTarget`. |
| 300 | + AttrsTarget(AttrsTarget), |
| 301 | + /// A special 'empty' token that is ignored during the conversion |
| 302 | + /// to an `AttrTokenStream`. This is used to simplify the |
| 303 | + /// handling of replace ranges. |
| 304 | + Empty, |
| 305 | +} |
| 306 | + |
159 | 307 | /// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
|
160 | 308 | /// information about the tokens for attribute targets. This is used
|
161 | 309 | /// during expansion to perform early cfg-expansion, and to process attributes
|
162 | 310 | /// during proc-macro invocations.
|
163 | 311 | #[derive(Clone, Debug, Default, Encodable, Decodable)]
|
164 | 312 | pub struct AttrTokenStream(pub Lrc<Vec<AttrTokenTree>>);
|
165 | 313 |
|
| 314 | +/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an |
| 315 | +/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and |
| 316 | +/// close delims. |
| 317 | +fn make_attr_token_stream( |
| 318 | + iter: impl Iterator<Item = FlatToken>, |
| 319 | + break_last_token: bool, |
| 320 | +) -> AttrTokenStream { |
| 321 | + #[derive(Debug)] |
| 322 | + struct FrameData { |
| 323 | + // This is `None` for the first frame, `Some` for all others. |
| 324 | + open_delim_sp: Option<(Delimiter, Span, Spacing)>, |
| 325 | + inner: Vec<AttrTokenTree>, |
| 326 | + } |
| 327 | + // The stack always has at least one element. Storing it separately makes for shorter code. |
| 328 | + let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] }; |
| 329 | + let mut stack_rest = vec![]; |
| 330 | + for flat_token in iter { |
| 331 | + match flat_token { |
| 332 | + FlatToken::Token((Token { kind: TokenKind::OpenDelim(delim), span }, spacing)) => { |
| 333 | + stack_rest.push(mem::replace( |
| 334 | + &mut stack_top, |
| 335 | + FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] }, |
| 336 | + )); |
| 337 | + } |
| 338 | + FlatToken::Token((Token { kind: TokenKind::CloseDelim(delim), span }, spacing)) => { |
| 339 | + let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap()); |
| 340 | + let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap(); |
| 341 | + assert_eq!( |
| 342 | + open_delim, delim, |
| 343 | + "Mismatched open/close delims: open={open_delim:?} close={span:?}" |
| 344 | + ); |
| 345 | + let dspan = DelimSpan::from_pair(open_sp, span); |
| 346 | + let dspacing = DelimSpacing::new(open_spacing, spacing); |
| 347 | + let stream = AttrTokenStream::new(frame_data.inner); |
| 348 | + let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream); |
| 349 | + stack_top.inner.push(delimited); |
| 350 | + } |
| 351 | + FlatToken::Token((token, spacing)) => { |
| 352 | + stack_top.inner.push(AttrTokenTree::Token(token, spacing)) |
| 353 | + } |
| 354 | + FlatToken::AttrsTarget(target) => { |
| 355 | + stack_top.inner.push(AttrTokenTree::AttrsTarget(target)) |
| 356 | + } |
| 357 | + FlatToken::Empty => {} |
| 358 | + } |
| 359 | + } |
| 360 | + |
| 361 | + if break_last_token { |
| 362 | + let last_token = stack_top.inner.pop().unwrap(); |
| 363 | + if let AttrTokenTree::Token(last_token, spacing) = last_token { |
| 364 | + let unglued_first = last_token.kind.break_two_token_op().unwrap().0; |
| 365 | + |
| 366 | + // An 'unglued' token is always two ASCII characters |
| 367 | + let mut first_span = last_token.span.shrink_to_lo(); |
| 368 | + first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1)); |
| 369 | + |
| 370 | + stack_top |
| 371 | + .inner |
| 372 | + .push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing)); |
| 373 | + } else { |
| 374 | + panic!("Unexpected last token {last_token:?}") |
| 375 | + } |
| 376 | + } |
| 377 | + AttrTokenStream::new(stack_top.inner) |
| 378 | +} |
| 379 | + |
166 | 380 | /// Like `TokenTree`, but for `AttrTokenStream`.
|
167 | 381 | #[derive(Clone, Debug, Encodable, Decodable)]
|
168 | 382 | pub enum AttrTokenTree {
|
@@ -728,6 +942,75 @@ impl TokenTreeCursor {
|
728 | 942 | }
|
729 | 943 | }
|
730 | 944 |
|
| 945 | +/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that |
| 946 | +/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b) |
| 947 | +/// use this type to emit them as a linear sequence. But a linear sequence is |
| 948 | +/// what the parser expects, for the most part. |
| 949 | +#[derive(Clone, Debug)] |
| 950 | +pub struct TokenCursor { |
| 951 | + // Cursor for the current (innermost) token stream. The delimiters for this |
| 952 | + // token stream are found in `self.stack.last()`; when that is `None` then |
| 953 | + // we are in the outermost token stream which never has delimiters. |
| 954 | + pub tree_cursor: TokenTreeCursor, |
| 955 | + |
| 956 | + // Token streams surrounding the current one. The delimiters for stack[n]'s |
| 957 | + // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters |
| 958 | + // because it's the outermost token stream which never has delimiters. |
| 959 | + pub stack: Vec<(TokenTreeCursor, DelimSpan, DelimSpacing, Delimiter)>, |
| 960 | +} |
| 961 | + |
| 962 | +impl TokenCursor { |
| 963 | + pub fn next(&mut self) -> (Token, Spacing) { |
| 964 | + self.inlined_next() |
| 965 | + } |
| 966 | + |
| 967 | + /// This always-inlined version should only be used on hot code paths. |
| 968 | + #[inline(always)] |
| 969 | + pub fn inlined_next(&mut self) -> (Token, Spacing) { |
| 970 | + loop { |
| 971 | + // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix |
| 972 | + // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions |
| 973 | + // below can be removed. |
| 974 | + if let Some(tree) = self.tree_cursor.next_ref() { |
| 975 | + match tree { |
| 976 | + &TokenTree::Token(ref token, spacing) => { |
| 977 | + debug_assert!(!matches!( |
| 978 | + token.kind, |
| 979 | + token::OpenDelim(_) | token::CloseDelim(_) |
| 980 | + )); |
| 981 | + return (token.clone(), spacing); |
| 982 | + } |
| 983 | + &TokenTree::Delimited(sp, spacing, delim, ref tts) => { |
| 984 | + let trees = tts.clone().into_trees(); |
| 985 | + self.stack.push(( |
| 986 | + mem::replace(&mut self.tree_cursor, trees), |
| 987 | + sp, |
| 988 | + spacing, |
| 989 | + delim, |
| 990 | + )); |
| 991 | + if delim != Delimiter::Invisible { |
| 992 | + return (Token::new(token::OpenDelim(delim), sp.open), spacing.open); |
| 993 | + } |
| 994 | + // No open delimiter to return; continue on to the next iteration. |
| 995 | + } |
| 996 | + }; |
| 997 | + } else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() { |
| 998 | + // We have exhausted this token stream. Move back to its parent token stream. |
| 999 | + self.tree_cursor = tree_cursor; |
| 1000 | + if delim != Delimiter::Invisible { |
| 1001 | + return (Token::new(token::CloseDelim(delim), span.close), spacing.close); |
| 1002 | + } |
| 1003 | + // No close delimiter to return; continue on to the next iteration. |
| 1004 | + } else { |
| 1005 | + // We have exhausted the outermost token stream. The use of |
| 1006 | + // `Spacing::Alone` is arbitrary and immaterial, because the |
| 1007 | + // `Eof` token's spacing is never used. |
| 1008 | + return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); |
| 1009 | + } |
| 1010 | + } |
| 1011 | + } |
| 1012 | +} |
| 1013 | + |
731 | 1014 | #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
|
732 | 1015 | pub struct DelimSpan {
|
733 | 1016 | pub open: Span,
|
@@ -774,6 +1057,7 @@ mod size_asserts {
|
774 | 1057 | static_assert_size!(AttrTokenStream, 8);
|
775 | 1058 | static_assert_size!(AttrTokenTree, 32);
|
776 | 1059 | static_assert_size!(LazyAttrTokenStream, 8);
|
| 1060 | + static_assert_size!(LazyAttrTokenStreamImpl, 96); |
777 | 1061 | static_assert_size!(Option<LazyAttrTokenStream>, 8); // must be small, used in many AST nodes
|
778 | 1062 | static_assert_size!(TokenStream, 8);
|
779 | 1063 | static_assert_size!(TokenTree, 32);
|
|
0 commit comments