syntax: Capture a TokenStream when parsing items

alexcrichton · alexcrichton · commit 4886ec86651a · 2017-07-28T10:47:01.000-07:00
This is then later used by `proc_macro` to generate a new
`proc_macro::TokenTree` which preserves span information. Unfortunately this
isn't a bullet-proof approach as it doesn't handle the case when there's still
other attributes on the item, especially inner attributes.

Despite this the intention here is to solve the primary use case for procedural
attributes, attached to functions as outer attributes, likely bare. In this
situation we should be able to now yield a lossless stream of tokens to preserve
span information.
diff --git a/src/libproc_macro/lib.rs b/src/libproc_macro/lib.rs
@@ -510,15 +510,38 @@ impl TokenTree {
             Literal(..) | DocComment(..) => TokenNode::Literal(self::Literal(token)),
 
             Interpolated(ref nt) => {
-                let mut node = None;
-                if let Nonterminal::NtItem(ref item) = nt.0 {
-                    if let Some(ref tokens) = item.tokens {
-                        node = Some(TokenNode::Group(Delimiter::None,
-                                                     TokenStream(tokens.clone())));
+                // An `Interpolated` token means that we have a `Nonterminal`
+                // which is often a parsed AST item. At this point we now need
+                // to convert the parsed AST to an actual token stream, e.g.
+                // un-parse it basically.
+                //
+                // Unfortunately there's not really a great way to do that in a
+                // guaranteed lossless fashion right now. The fallback here is
+                // to just stringify the AST node and reparse it, but this loses
+                // all span information.
+                //
+                // As a result, some AST nodes are annotated with the token
+                // stream they came from. Attempt to extract these lossless
+                // token streams before we fall back to the stringification.
+                let mut tokens = None;
+
+                match nt.0 {
+                    Nonterminal::NtItem(ref item) => {
+                        tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
                     }
+                    Nonterminal::NtTraitItem(ref item) => {
+                        tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
+                    }
+                    Nonterminal::NtImplItem(ref item) => {
+                        tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
+                    }
+                    _ => {}
                 }
 
-                node.unwrap_or_else(|| {
+                tokens.map(|tokens| {
+                    TokenNode::Group(Delimiter::None,
+                                     TokenStream(tokens.clone()))
+                }).unwrap_or_else(|| {
                     __internal::with_sess(|(sess, _)| {
                         TokenNode::Group(Delimiter::None, TokenStream(nt.1.force(|| {
                             // FIXME(jseyfried): Avoid this pretty-print + reparse hack
@@ -592,6 +615,34 @@ impl TokenTree {
     }
 }
 
+fn prepend_attrs(attrs: &[ast::Attribute],
+                 tokens: Option<&tokenstream::TokenStream>,
+                 span: syntax_pos::Span)
+    -> Option<tokenstream::TokenStream>
+{
+    let tokens = match tokens {
+        Some(tokens) => tokens,
+        None => return None,
+    };
+    if attrs.len() == 0 {
+        return Some(tokens.clone())
+    }
+    let mut builder = tokenstream::TokenStreamBuilder::new();
+    for attr in attrs {
+        assert_eq!(attr.style, ast::AttrStyle::Outer,
+                   "inner attributes should prevent cached tokens from existing");
+        let stream = __internal::with_sess(|(sess, _)| {
+            // FIXME: Avoid this pretty-print + reparse hack as bove
+            let name = "<macro expansion>".to_owned();
+            let source = pprust::attr_to_string(attr);
+            parse_stream_from_source_str(name, source, sess, Some(span))
+        });
+        builder.push(stream);
+    }
+    builder.push(tokens.clone());
+    Some(builder.build())
+}
+
 /// Permanently unstable internal implementation details of this crate. This
 /// should not be used.
 ///
diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs
@@ -1149,6 +1149,8 @@ pub struct TraitItem {
     pub attrs: Vec<Attribute>,
     pub node: TraitItemKind,
     pub span: Span,
+    /// See `Item::tokens` for what this is
+    pub tokens: Option<TokenStream>,
 }
 
 #[derive(Clone, PartialEq, Eq, RustcEncodable, RustcDecodable, Hash, Debug)]
@@ -1168,6 +1170,8 @@ pub struct ImplItem {
     pub attrs: Vec<Attribute>,
     pub node: ImplItemKind,
     pub span: Span,
+    /// See `Item::tokens` for what this is
+    pub tokens: Option<TokenStream>,
 }
 
 #[derive(Clone, PartialEq, Eq, RustcEncodable, RustcDecodable, Hash, Debug)]
@@ -1817,6 +1821,9 @@ pub struct Item {
     /// available for all items, although over time more and more items should
     /// have this be `Some`. Right now this is primarily used for procedural
     /// macros, notably custom attributes.
+    ///
+    /// Note that the tokens here do not include the outer attributes, but will
+    /// include inner attributes.
     pub tokens: Option<TokenStream>,
 }
 
diff --git a/src/libsyntax/ext/placeholders.rs b/src/libsyntax/ext/placeholders.rs
@@ -51,11 +51,13 @@ pub fn placeholder(kind: ExpansionKind, id: ast::NodeId) -> Expansion {
         ExpansionKind::TraitItems => Expansion::TraitItems(SmallVector::one(ast::TraitItem {
             id: id, span: span, ident: ident, attrs: attrs,
             node: ast::TraitItemKind::Macro(mac_placeholder()),
+            tokens: None,
         })),
         ExpansionKind::ImplItems => Expansion::ImplItems(SmallVector::one(ast::ImplItem {
             id: id, span: span, ident: ident, vis: vis, attrs: attrs,
             node: ast::ImplItemKind::Macro(mac_placeholder()),
             defaultness: ast::Defaultness::Final,
+            tokens: None,
         })),
         ExpansionKind::Pat => Expansion::Pat(P(ast::Pat {
             id: id, span: span, node: ast::PatKind::Mac(mac_placeholder()),
diff --git a/src/libsyntax/fold.rs b/src/libsyntax/fold.rs
@@ -957,7 +957,8 @@ pub fn noop_fold_trait_item<T: Folder>(i: TraitItem, folder: &mut T)
                 TraitItemKind::Macro(folder.fold_mac(mac))
             }
         },
-        span: folder.new_span(i.span)
+        span: folder.new_span(i.span),
+        tokens: i.tokens,
     })
 }
 
@@ -980,7 +981,8 @@ pub fn noop_fold_impl_item<T: Folder>(i: ImplItem, folder: &mut T)
             ast::ImplItemKind::Type(ty) => ast::ImplItemKind::Type(folder.fold_ty(ty)),
             ast::ImplItemKind::Macro(mac) => ast::ImplItemKind::Macro(folder.fold_mac(mac))
         },
-        span: folder.new_span(i.span)
+        span: folder.new_span(i.span),
+        tokens: i.tokens,
     })
 }
 
@@ -1042,9 +1044,10 @@ pub fn noop_fold_item_simple<T: Folder>(Item {id, ident, attrs, node, vis, span,
         attrs: fold_attrs(attrs, folder),
         node: folder.fold_item_kind(node),
         span: folder.new_span(span),
-        tokens: tokens.map(|tokens| {
-            folder.fold_tts(tokens.into()).into()
-        }),
+
+        // FIXME: if this is replaced with a call to `folder.fold_tts` it causes
+        //        an ICE during resolve... odd!
+        tokens: tokens,
     }
 }
 
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
@@ -843,11 +843,18 @@ mod tests {
     // check the contents of the tt manually:
     #[test] fn parse_fundecl () {
         // this test depends on the intern order of "fn" and "i32"
-        assert_eq!(string_to_item("fn a (b : i32) { b; }".to_string()),
+        let item = string_to_item("fn a (b : i32) { b; }".to_string()).map(|m| {
+            m.map(|mut m| {
+                m.tokens = None;
+                m
+            })
+        });
+        assert_eq!(item,
                   Some(
                       P(ast::Item{ident:Ident::from_str("a"),
                             attrs:Vec::new(),
                             id: ast::DUMMY_NODE_ID,
+                            tokens: None,
                             node: ast::ItemKind::Fn(P(ast::FnDecl {
                                 inputs: vec![ast::Arg{
                                     ty: P(ast::Ty{id: ast::DUMMY_NODE_ID,
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
@@ -216,6 +216,30 @@ struct TokenCursorFrame {
     open_delim: bool,
     tree_cursor: tokenstream::Cursor,
     close_delim: bool,
+    last_token: LastToken,
+}
+
+/// This is used in `TokenCursorFrame` above to track tokens that are consumed
+/// by the parser, and then that's transitively used to record the tokens that
+/// each parse AST item is created with.
+///
+/// Right now this has two states, either collecting tokens or not collecting
+/// tokens. If we're collecting tokens we just save everything off into a local
+/// `Vec`. This should eventually though likely save tokens from the original
+/// token stream and just use slicing of token streams to avoid creation of a
+/// whole new vector.
+///
+/// The second state is where we're passively not recording tokens, but the last
+/// token is still tracked for when we want to start recording tokens. This
+/// "last token" means that when we start recording tokens we'll want to ensure
+/// that this, the first token, is included in the output.
+///
+/// You can find some more example usage of this in the `collect_tokens` method
+/// on the parser.
+#[derive(Clone)]
+enum LastToken {
+    Collecting(Vec<TokenTree>),
+    Was(Option<TokenTree>),
 }
 
 impl TokenCursorFrame {
@@ -226,6 +250,7 @@ impl TokenCursorFrame {
             open_delim: delimited.delim == token::NoDelim,
             tree_cursor: delimited.stream().into_trees(),
             close_delim: delimited.delim == token::NoDelim,
+            last_token: LastToken::Was(None),
         }
     }
 }
@@ -250,6 +275,11 @@ impl TokenCursor {
                 return TokenAndSpan { tok: token::Eof, sp: syntax_pos::DUMMY_SP }
             };
 
+            match self.frame.last_token {
+                LastToken::Collecting(ref mut v) => v.push(tree.clone()),
+                LastToken::Was(ref mut t) => *t = Some(tree.clone()),
+            }
+
             match tree {
                 TokenTree::Token(sp, tok) => return TokenAndSpan { tok: tok, sp: sp },
                 TokenTree::Delimited(sp, ref delimited) => {
@@ -1209,7 +1239,20 @@ impl<'a> Parser<'a> {
     /// Parse the items in a trait declaration
     pub fn parse_trait_item(&mut self, at_end: &mut bool) -> PResult<'a, TraitItem> {
         maybe_whole!(self, NtTraitItem, |x| x);
-        let mut attrs = self.parse_outer_attributes()?;
+        let attrs = self.parse_outer_attributes()?;
+        let (mut item, tokens) = self.collect_tokens(|this| {
+            this.parse_trait_item_(at_end, attrs)
+        })?;
+        // See `parse_item` for why this clause is here.
+        if !item.attrs.iter().any(|attr| attr.style == AttrStyle::Inner) {
+            item.tokens = Some(tokens);
+        }
+        Ok(item)
+    }
+
+    fn parse_trait_item_(&mut self,
+                         at_end: &mut bool,
+                         mut attrs: Vec<Attribute>) -> PResult<'a, TraitItem> {
         let lo = self.span;
 
         let (name, node) = if self.eat_keyword(keywords::Type) {
@@ -1304,6 +1347,7 @@ impl<'a> Parser<'a> {
             attrs: attrs,
             node: node,
             span: lo.to(self.prev_span),
+            tokens: None,
         })
     }
 
@@ -4653,7 +4697,7 @@ impl<'a> Parser<'a> {
             node: node,
             vis: vis,
             span: span,
-            tokens: None, // TODO: fill this in
+            tokens: None,
         })
     }
 
@@ -4709,8 +4753,21 @@ impl<'a> Parser<'a> {
     /// Parse an impl item.
     pub fn parse_impl_item(&mut self, at_end: &mut bool) -> PResult<'a, ImplItem> {
         maybe_whole!(self, NtImplItem, |x| x);
+        let attrs = self.parse_outer_attributes()?;
+        let (mut item, tokens) = self.collect_tokens(|this| {
+            this.parse_impl_item_(at_end, attrs)
+        })?;
+
+        // See `parse_item` for why this clause is here.
+        if !item.attrs.iter().any(|attr| attr.style == AttrStyle::Inner) {
+            item.tokens = Some(tokens);
+        }
+        Ok(item)
+    }
 
-        let mut attrs = self.parse_outer_attributes()?;
+    fn parse_impl_item_(&mut self,
+                        at_end: &mut bool,
+                        mut attrs: Vec<Attribute>) -> PResult<'a, ImplItem> {
         let lo = self.span;
         let vis = self.parse_visibility(false)?;
         let defaultness = self.parse_defaultness()?;
@@ -4742,7 +4799,8 @@ impl<'a> Parser<'a> {
             vis: vis,
             defaultness: defaultness,
             attrs: attrs,
-            node: node
+            node: node,
+            tokens: None,
         })
     }
 
@@ -6018,9 +6076,71 @@ impl<'a> Parser<'a> {
         Ok(None)
     }
 
+    fn collect_tokens<F, R>(&mut self, f: F) -> PResult<'a, (R, TokenStream)>
+        where F: FnOnce(&mut Self) -> PResult<'a, R>
+    {
+        // Record all tokens we parse when parsing this item.
+        let mut tokens = Vec::new();
+        match self.token_cursor.frame.last_token {
+            LastToken::Collecting(_) => {
+                panic!("cannot collect tokens recursively yet")
+            }
+            LastToken::Was(ref mut last) => tokens.extend(last.take()),
+        }
+        self.token_cursor.frame.last_token = LastToken::Collecting(tokens);
+        let prev = self.token_cursor.stack.len();
+        let ret = f(self);
+        let last_token = if self.token_cursor.stack.len() == prev {
+            &mut self.token_cursor.frame.last_token
+        } else {
+            &mut self.token_cursor.stack[prev].last_token
+        };
+        let mut tokens = match *last_token {
+            LastToken::Collecting(ref mut v) => mem::replace(v, Vec::new()),
+            LastToken::Was(_) => panic!("our vector went away?"),
+        };
+
+        // If we're not at EOF our current token wasn't actually consumed by
+        // `f`, but it'll still be in our list that we pulled out. In that case
+        // put it back.
+        if self.token == token::Eof {
+            *last_token = LastToken::Was(None);
+        } else {
+            *last_token = LastToken::Was(tokens.pop());
+        }
+
+        Ok((ret?, tokens.into_iter().collect()))
+    }
+
     pub fn parse_item(&mut self) -> PResult<'a, Option<P<Item>>> {
         let attrs = self.parse_outer_attributes()?;
-        self.parse_item_(attrs, true, false)
+
+        let (ret, tokens) = self.collect_tokens(|this| {
+            this.parse_item_(attrs, true, false)
+        })?;
+
+        // Once we've parsed an item and recorded the tokens we got while
+        // parsing we may want to store `tokens` into the item we're about to
+        // return. Note, though, that we specifically didn't capture tokens
+        // related to outer attributes. The `tokens` field here may later be
+        // used with procedural macros to convert this item back into a token
+        // stream, but during expansion we may be removing attributes as we go
+        // along.
+        //
+        // If we've got inner attributes then the `tokens` we've got above holds
+        // these inner attributes. If an inner attribute is expanded we won't
+        // actually remove it from the token stream, so we'll just keep yielding
+        // it (bad!). To work around this case for now we just avoid recording
+        // `tokens` if we detect any inner attributes. This should help keep
+        // expansion correct, but we should fix this bug one day!
+        Ok(ret.map(|item| {
+            item.map(|mut i| {
+                if !i.attrs.iter().any(|attr| attr.style == AttrStyle::Inner) {
+                    i.tokens = Some(tokens);
+                }
+                i
+            })
+        }))
     }
 
     fn parse_path_list_items(&mut self) -> PResult<'a, Vec<ast::PathListItem>> {
diff --git a/src/libsyntax_ext/deriving/generic/mod.rs b/src/libsyntax_ext/deriving/generic/mod.rs
@@ -504,6 +504,7 @@ impl<'a> TraitDef<'a> {
                 defaultness: ast::Defaultness::Final,
                 attrs: Vec::new(),
                 node: ast::ImplItemKind::Type(type_def.to_ty(cx, self.span, type_ident, generics)),
+                tokens: None,
             }
         });
 
@@ -930,6 +931,7 @@ impl<'a> MethodDef<'a> {
                                                 decl: fn_decl,
                                             },
                                             body_block),
+            tokens: None,
         }
     }
 
diff --git a/src/test/compile-fail-fulldeps/proc-macro/attribute-with-error.rs b/src/test/compile-fail-fulldeps/proc-macro/attribute-with-error.rs
diff --git a/src/test/compile-fail-fulldeps/proc-macro/attributes-included.rs b/src/test/compile-fail-fulldeps/proc-macro/attributes-included.rs
diff --git a/src/test/compile-fail-fulldeps/proc-macro/auxiliary/attributes-included.rs b/src/test/compile-fail-fulldeps/proc-macro/auxiliary/attributes-included.rs

Original file line number	Diff line number	Diff line change
`@@ -957,7 +957,8 @@ pub fn noop_fold_trait_item<T: Folder>(i: TraitItem, folder: &mut T)`
`957`	`957`	`TraitItemKind::Macro(folder.fold_mac(mac))`
`958`	`958`	`}`
`959`	`959`	`},`
`960`		`- span: folder.new_span(i.span)`
	`960`	`+ span: folder.new_span(i.span),`
	`961`	`+ tokens: i.tokens,`
`961`	`962`	`})`
`962`	`963`	`}`
`963`	`964`
`@@ -980,7 +981,8 @@ pub fn noop_fold_impl_item<T: Folder>(i: ImplItem, folder: &mut T)`
`980`	`981`	`ast::ImplItemKind::Type(ty) => ast::ImplItemKind::Type(folder.fold_ty(ty)),`
`981`	`982`	`ast::ImplItemKind::Macro(mac) => ast::ImplItemKind::Macro(folder.fold_mac(mac))`
`982`	`983`	`},`
`983`		`- span: folder.new_span(i.span)`
	`984`	`+ span: folder.new_span(i.span),`
	`985`	`+ tokens: i.tokens,`
`984`	`986`	`})`
`985`	`987`	`}`
`986`	`988`
`@@ -1042,9 +1044,10 @@ pub fn noop_fold_item_simple<T: Folder>(Item {id, ident, attrs, node, vis, span,`
`1042`	`1044`	`attrs: fold_attrs(attrs, folder),`
`1043`	`1045`	`node: folder.fold_item_kind(node),`
`1044`	`1046`	`span: folder.new_span(span),`
`1045`		`- tokens: tokens.map(\|tokens\| {`
`1046`		`- folder.fold_tts(tokens.into()).into()`
`1047`		`- }),`
	`1047`	`+`
	`1048`	+ // FIXME: if this is replaced with a call to `folder.fold_tts` it causes
	`1049`	`+ // an ICE during resolve... odd!`
	`1050`	`+ tokens: tokens,`
`1048`	`1051`	`}`
`1049`	`1052`	`}`
`1050`	`1053`
Original file line number	Diff line number	Diff line change
`@@ -504,6 +504,7 @@ impl<'a> TraitDef<'a> {`
`504`	`504`	`defaultness: ast::Defaultness::Final,`
`505`	`505`	`attrs: Vec::new(),`
`506`	`506`	`node: ast::ImplItemKind::Type(type_def.to_ty(cx, self.span, type_ident, generics)),`
	`507`	`+ tokens: None,`
`507`	`508`	`}`
`508`	`509`	`});`
`509`	`510`
`@@ -930,6 +931,7 @@ impl<'a> MethodDef<'a> {`
`930`	`931`	`decl: fn_decl,`
`931`	`932`	`},`
`932`	`933`	`body_block),`
	`934`	`+ tokens: None,`
`933`	`935`	`}`
`934`	`936`	`}`
`935`	`937`