@@ -25,7 +25,8 @@ use rustc_serialize::{Decodable, Encodable};
25
25
use rustc_span:: { sym, Span , SpanDecoder , SpanEncoder , Symbol , DUMMY_SP } ;
26
26
27
27
use std:: borrow:: Cow ;
28
- use std:: { cmp, fmt, iter} ;
28
+ use std:: ops:: Range ;
29
+ use std:: { cmp, fmt, iter, mem} ;
29
30
30
31
/// Part of a `TokenStream`.
31
32
#[ derive( Debug , Clone , PartialEq , Encodable , Decodable , HashStable_Generic ) ]
@@ -156,12 +157,195 @@ impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
156
157
}
157
158
}
158
159
159
- /// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
160
- /// information about the tokens for attribute targets. This is used
161
- /// during expansion to perform early cfg-expansion, and to process attributes
162
- /// during proc-macro invocations.
163
- #[ derive( Clone , Debug , Default , Encodable , Decodable ) ]
164
- pub struct AttrTokenStream ( pub Lrc < Vec < AttrTokenTree > > ) ;
160
+ /// Indicates a range of tokens that should be replaced by the tokens in the
161
+ /// provided `AttrsTarget`. This is used in two places during token collection:
162
+ ///
163
+ /// 1. During the parsing of an AST node that may have a `#[derive]` attribute,
164
+ /// we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]` In this
165
+ /// case, we use a `ReplaceRange` to replace the entire inner AST node with
166
+ /// `FlatToken::AttrsTarget`, allowing us to perform eager cfg-expansion on
167
+ /// an `AttrTokenStream`.
168
+ ///
169
+ /// 2. When we parse an inner attribute while collecting tokens. We remove
170
+ /// inner attributes from the token stream entirely, and instead track them
171
+ /// through the `attrs` field on the AST node. This allows us to easily
172
+ /// manipulate them (for example, removing the first macro inner attribute
173
+ /// to invoke a proc-macro). When create a `TokenStream`, the inner
174
+ /// attributes get inserted into the proper place in the token stream.
175
+ pub type ReplaceRange = ( Range < u32 > , Option < AttrsTarget > ) ;
176
+
177
+ // Produces a `TokenStream` on-demand. Using `cursor_snapshot` and `num_calls`,
178
+ // we can reconstruct the `TokenStream` seen by the callback. This allows us to
179
+ // avoid producing a `TokenStream` if it is never needed - for example, a
180
+ // captured `macro_rules!` argument that is never passed to a proc macro. In
181
+ // practice token stream creation happens rarely compared to calls to
182
+ // `collect_tokens` (see some statistics in #78736), so we are doing as little
183
+ // up-front work as possible.
184
+ //
185
+ // This also makes `Parser` very cheap to clone, since there is no intermediate
186
+ // collection buffer to clone.
187
+ pub struct LazyAttrTokenStreamImpl {
188
+ pub start_token : ( Token , Spacing ) ,
189
+ pub cursor_snapshot : TokenCursor ,
190
+ pub num_calls : u32 ,
191
+ pub break_last_token : bool ,
192
+ pub replace_ranges : Box < [ ReplaceRange ] > ,
193
+ }
194
+
195
+ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
196
+ fn to_attr_token_stream ( & self ) -> AttrTokenStream {
197
+ // The token produced by the final call to `{,inlined_}next` was not
198
+ // actually consumed by the callback. The combination of chaining the
199
+ // initial token and using `take` produces the desired result - we
200
+ // produce an empty `TokenStream` if no calls were made, and omit the
201
+ // final token otherwise.
202
+ let mut cursor_snapshot = self . cursor_snapshot . clone ( ) ;
203
+ let tokens = iter:: once ( FlatToken :: Token ( self . start_token . clone ( ) ) )
204
+ . chain ( iter:: repeat_with ( || FlatToken :: Token ( cursor_snapshot. next ( ) ) ) )
205
+ . take ( self . num_calls as usize ) ;
206
+
207
+ if self . replace_ranges . is_empty ( ) {
208
+ make_attr_token_stream ( tokens, self . break_last_token )
209
+ } else {
210
+ let mut tokens: Vec < _ > = tokens. collect ( ) ;
211
+ let mut replace_ranges = self . replace_ranges . to_vec ( ) ;
212
+ replace_ranges. sort_by_key ( |( range, _) | range. start ) ;
213
+
214
+ #[ cfg( debug_assertions) ]
215
+ {
216
+ for [ ( range, tokens) , ( next_range, next_tokens) ] in replace_ranges. array_windows ( ) {
217
+ assert ! (
218
+ range. end <= next_range. start || range. end >= next_range. end,
219
+ "Replace ranges should either be disjoint or nested: \
220
+ ({:?}, {:?}) ({:?}, {:?})",
221
+ range,
222
+ tokens,
223
+ next_range,
224
+ next_tokens,
225
+ ) ;
226
+ }
227
+ }
228
+
229
+ // Process the replace ranges, starting from the highest start
230
+ // position and working our way back. If have tokens like:
231
+ //
232
+ // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
233
+ //
234
+ // Then we will generate replace ranges for both the `#[cfg(FALSE)]
235
+ // field: bool` and the entire `#[cfg(FALSE)] struct Foo {
236
+ // #[cfg(FALSE)] field: bool }`
237
+ //
238
+ // By starting processing from the replace range with the greatest
239
+ // start position, we ensure that any replace range which encloses
240
+ // another replace range will capture the *replaced* tokens for the
241
+ // inner range, not the original tokens.
242
+ for ( range, target) in replace_ranges. into_iter ( ) . rev ( ) {
243
+ assert ! ( !range. is_empty( ) , "Cannot replace an empty range: {range:?}" ) ;
244
+
245
+ // Replace the tokens in range with zero or one
246
+ // `FlatToken::AttrsTarget`s, plus enough `FlatToken::Empty`s
247
+ // to fill up the rest of the range. This keeps the total
248
+ // length of `tokens` constant throughout the replacement
249
+ // process, allowing us to use all of the `ReplaceRanges`
250
+ // entries without adjusting indices.
251
+ let target_len = target. is_some ( ) as usize ;
252
+ tokens. splice (
253
+ ( range. start as usize ) ..( range. end as usize ) ,
254
+ target
255
+ . into_iter ( )
256
+ . map ( |target| FlatToken :: AttrsTarget ( target) )
257
+ . chain ( iter:: repeat ( FlatToken :: Empty ) . take ( range. len ( ) - target_len) ) ,
258
+ ) ;
259
+ }
260
+ make_attr_token_stream ( tokens. into_iter ( ) , self . break_last_token )
261
+ }
262
+ }
263
+ }
264
+
265
+ /// A helper struct used when building an `AttrTokenStream` from a
266
+ /// `LazyAttrTokenStream`. Both delimiter and non-delimited tokens are stored
267
+ /// as `FlatToken::Token`. A vector of `FlatToken`s is then 'parsed' to build
268
+ /// up an `AttrTokenStream` with nested `AttrTokenTree::Delimited` tokens.
269
+ #[ derive( Debug , Clone ) ]
270
+ enum FlatToken {
271
+ /// A token. This holds both delimiter (e.g. '{' and '}') and non-delimiter
272
+ /// tokens.
273
+ Token ( ( Token , Spacing ) ) ,
274
+ /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
275
+ /// directly into the constructed `AttrTokenStream` as an
276
+ /// `AttrTokenTree::AttrsTarget`.
277
+ AttrsTarget ( AttrsTarget ) ,
278
+ /// A special 'empty' token that is ignored during the conversion to an
279
+ /// `AttrTokenStream`. This is used to simplify the handling of replace
280
+ /// ranges.
281
+ Empty ,
282
+ }
283
+
284
+ /// Converts a flattened iterator of tokens (including open and close delimiter
285
+ /// tokens) into an `AttrTokenStream`, creating an `AttrTokenTree::Delimited`
286
+ /// for each matching pair of open and close delims.
287
+ fn make_attr_token_stream (
288
+ iter : impl Iterator < Item = FlatToken > ,
289
+ break_last_token : bool ,
290
+ ) -> AttrTokenStream {
291
+ #[ derive( Debug ) ]
292
+ struct FrameData {
293
+ // This is `None` for the first frame, `Some` for all others.
294
+ open_delim_sp : Option < ( Delimiter , Span , Spacing ) > ,
295
+ inner : Vec < AttrTokenTree > ,
296
+ }
297
+ // The stack always has at least one element. Storing it separately makes for shorter code.
298
+ let mut stack_top = FrameData { open_delim_sp : None , inner : vec ! [ ] } ;
299
+ let mut stack_rest = vec ! [ ] ;
300
+ for flat_token in iter {
301
+ match flat_token {
302
+ FlatToken :: Token ( ( Token { kind : TokenKind :: OpenDelim ( delim) , span } , spacing) ) => {
303
+ stack_rest. push ( mem:: replace (
304
+ & mut stack_top,
305
+ FrameData { open_delim_sp : Some ( ( delim, span, spacing) ) , inner : vec ! [ ] } ,
306
+ ) ) ;
307
+ }
308
+ FlatToken :: Token ( ( Token { kind : TokenKind :: CloseDelim ( delim) , span } , spacing) ) => {
309
+ let frame_data = mem:: replace ( & mut stack_top, stack_rest. pop ( ) . unwrap ( ) ) ;
310
+ let ( open_delim, open_sp, open_spacing) = frame_data. open_delim_sp . unwrap ( ) ;
311
+ assert_eq ! (
312
+ open_delim, delim,
313
+ "Mismatched open/close delims: open={open_delim:?} close={span:?}"
314
+ ) ;
315
+ let dspan = DelimSpan :: from_pair ( open_sp, span) ;
316
+ let dspacing = DelimSpacing :: new ( open_spacing, spacing) ;
317
+ let stream = AttrTokenStream :: new ( frame_data. inner ) ;
318
+ let delimited = AttrTokenTree :: Delimited ( dspan, dspacing, delim, stream) ;
319
+ stack_top. inner . push ( delimited) ;
320
+ }
321
+ FlatToken :: Token ( ( token, spacing) ) => {
322
+ stack_top. inner . push ( AttrTokenTree :: Token ( token, spacing) )
323
+ }
324
+ FlatToken :: AttrsTarget ( target) => {
325
+ stack_top. inner . push ( AttrTokenTree :: AttrsTarget ( target) )
326
+ }
327
+ FlatToken :: Empty => { }
328
+ }
329
+ }
330
+
331
+ if break_last_token {
332
+ let last_token = stack_top. inner . pop ( ) . unwrap ( ) ;
333
+ if let AttrTokenTree :: Token ( last_token, spacing) = last_token {
334
+ let unglued_first = last_token. kind . break_two_token_op ( ) . unwrap ( ) . 0 ;
335
+
336
+ // An 'unglued' token is always two ASCII characters.
337
+ let mut first_span = last_token. span . shrink_to_lo ( ) ;
338
+ first_span = first_span. with_hi ( first_span. lo ( ) + rustc_span:: BytePos ( 1 ) ) ;
339
+
340
+ stack_top
341
+ . inner
342
+ . push ( AttrTokenTree :: Token ( Token :: new ( unglued_first, first_span) , spacing) ) ;
343
+ } else {
344
+ panic ! ( "Unexpected last token {last_token:?}" )
345
+ }
346
+ }
347
+ AttrTokenStream :: new ( stack_top. inner )
348
+ }
165
349
166
350
/// Like `TokenTree`, but for `AttrTokenStream`.
167
351
#[ derive( Clone , Debug , Encodable , Decodable ) ]
@@ -174,6 +358,13 @@ pub enum AttrTokenTree {
174
358
AttrsTarget ( AttrsTarget ) ,
175
359
}
176
360
361
+ /// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
362
+ /// information about the tokens for attribute targets. This is used
363
+ /// during expansion to perform early cfg-expansion, and to process attributes
364
+ /// during proc-macro invocations.
365
+ #[ derive( Clone , Debug , Default , Encodable , Decodable ) ]
366
+ pub struct AttrTokenStream ( pub Lrc < Vec < AttrTokenTree > > ) ;
367
+
177
368
impl AttrTokenStream {
178
369
pub fn new ( tokens : Vec < AttrTokenTree > ) -> AttrTokenStream {
179
370
AttrTokenStream ( Lrc :: new ( tokens) )
@@ -720,6 +911,75 @@ impl TokenTreeCursor {
720
911
}
721
912
}
722
913
914
+ /// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
915
+ /// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
916
+ /// use this type to emit them as a linear sequence. But a linear sequence is
917
+ /// what the parser expects, for the most part.
918
+ #[ derive( Clone , Debug ) ]
919
+ pub struct TokenCursor {
920
+ // Cursor for the current (innermost) token stream. The delimiters for this
921
+ // token stream are found in `self.stack.last()`; when that is `None` then
922
+ // we are in the outermost token stream which never has delimiters.
923
+ pub tree_cursor : TokenTreeCursor ,
924
+
925
+ // Token streams surrounding the current one. The delimiters for stack[n]'s
926
+ // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
927
+ // because it's the outermost token stream which never has delimiters.
928
+ pub stack : Vec < ( TokenTreeCursor , DelimSpan , DelimSpacing , Delimiter ) > ,
929
+ }
930
+
931
+ impl TokenCursor {
932
+ pub fn next ( & mut self ) -> ( Token , Spacing ) {
933
+ self . inlined_next ( )
934
+ }
935
+
936
+ /// This always-inlined version should only be used on hot code paths.
937
+ #[ inline( always) ]
938
+ pub fn inlined_next ( & mut self ) -> ( Token , Spacing ) {
939
+ loop {
940
+ // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
941
+ // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
942
+ // below can be removed.
943
+ if let Some ( tree) = self . tree_cursor . next_ref ( ) {
944
+ match tree {
945
+ & TokenTree :: Token ( ref token, spacing) => {
946
+ debug_assert ! ( !matches!(
947
+ token. kind,
948
+ token:: OpenDelim ( _) | token:: CloseDelim ( _)
949
+ ) ) ;
950
+ return ( token. clone ( ) , spacing) ;
951
+ }
952
+ & TokenTree :: Delimited ( sp, spacing, delim, ref tts) => {
953
+ let trees = tts. clone ( ) . into_trees ( ) ;
954
+ self . stack . push ( (
955
+ mem:: replace ( & mut self . tree_cursor , trees) ,
956
+ sp,
957
+ spacing,
958
+ delim,
959
+ ) ) ;
960
+ if delim != Delimiter :: Invisible {
961
+ return ( Token :: new ( token:: OpenDelim ( delim) , sp. open ) , spacing. open ) ;
962
+ }
963
+ // No open delimiter to return; continue on to the next iteration.
964
+ }
965
+ } ;
966
+ } else if let Some ( ( tree_cursor, span, spacing, delim) ) = self . stack . pop ( ) {
967
+ // We have exhausted this token stream. Move back to its parent token stream.
968
+ self . tree_cursor = tree_cursor;
969
+ if delim != Delimiter :: Invisible {
970
+ return ( Token :: new ( token:: CloseDelim ( delim) , span. close ) , spacing. close ) ;
971
+ }
972
+ // No close delimiter to return; continue on to the next iteration.
973
+ } else {
974
+ // We have exhausted the outermost token stream. The use of
975
+ // `Spacing::Alone` is arbitrary and immaterial, because the
976
+ // `Eof` token's spacing is never used.
977
+ return ( Token :: new ( token:: Eof , DUMMY_SP ) , Spacing :: Alone ) ;
978
+ }
979
+ }
980
+ }
981
+ }
982
+
723
983
#[ derive( Debug , Copy , Clone , PartialEq , Encodable , Decodable , HashStable_Generic ) ]
724
984
pub struct DelimSpan {
725
985
pub open : Span ,
@@ -765,6 +1025,7 @@ mod size_asserts {
765
1025
static_assert_size ! ( AttrTokenStream , 8 ) ;
766
1026
static_assert_size ! ( AttrTokenTree , 32 ) ;
767
1027
static_assert_size ! ( LazyAttrTokenStream , 8 ) ;
1028
+ static_assert_size ! ( LazyAttrTokenStreamImpl , 96 ) ;
768
1029
static_assert_size ! ( Option <LazyAttrTokenStream >, 8 ) ; // must be small, used in many AST nodes
769
1030
static_assert_size ! ( TokenStream , 8 ) ;
770
1031
static_assert_size ! ( TokenTree , 32 ) ;
0 commit comments