Skip to content

Commit 4886ec8

Browse files
committed
syntax: Capture a TokenStream when parsing items
This is then later used by `proc_macro` to generate a new `proc_macro::TokenTree` which preserves span information. Unfortunately this isn't a bullet-proof approach as it doesn't handle the case when there's still other attributes on the item, especially inner attributes. Despite this the intention here is to solve the primary use case for procedural attributes, attached to functions as outer attributes, likely bare. In this situation we should be able to now yield a lossless stream of tokens to preserve span information.
1 parent 036300a commit 4886ec8

File tree

10 files changed

+398
-20
lines changed

10 files changed

+398
-20
lines changed

src/libproc_macro/lib.rs

+57-6
Original file line numberDiff line numberDiff line change
@@ -510,15 +510,38 @@ impl TokenTree {
510510
Literal(..) | DocComment(..) => TokenNode::Literal(self::Literal(token)),
511511

512512
Interpolated(ref nt) => {
513-
let mut node = None;
514-
if let Nonterminal::NtItem(ref item) = nt.0 {
515-
if let Some(ref tokens) = item.tokens {
516-
node = Some(TokenNode::Group(Delimiter::None,
517-
TokenStream(tokens.clone())));
513+
// An `Interpolated` token means that we have a `Nonterminal`
514+
// which is often a parsed AST item. At this point we now need
515+
// to convert the parsed AST to an actual token stream, e.g.
516+
// un-parse it basically.
517+
//
518+
// Unfortunately there's not really a great way to do that in a
519+
// guaranteed lossless fashion right now. The fallback here is
520+
// to just stringify the AST node and reparse it, but this loses
521+
// all span information.
522+
//
523+
// As a result, some AST nodes are annotated with the token
524+
// stream they came from. Attempt to extract these lossless
525+
// token streams before we fall back to the stringification.
526+
let mut tokens = None;
527+
528+
match nt.0 {
529+
Nonterminal::NtItem(ref item) => {
530+
tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
518531
}
532+
Nonterminal::NtTraitItem(ref item) => {
533+
tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
534+
}
535+
Nonterminal::NtImplItem(ref item) => {
536+
tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
537+
}
538+
_ => {}
519539
}
520540

521-
node.unwrap_or_else(|| {
541+
tokens.map(|tokens| {
542+
TokenNode::Group(Delimiter::None,
543+
TokenStream(tokens.clone()))
544+
}).unwrap_or_else(|| {
522545
__internal::with_sess(|(sess, _)| {
523546
TokenNode::Group(Delimiter::None, TokenStream(nt.1.force(|| {
524547
// FIXME(jseyfried): Avoid this pretty-print + reparse hack
@@ -592,6 +615,34 @@ impl TokenTree {
592615
}
593616
}
594617

618+
fn prepend_attrs(attrs: &[ast::Attribute],
619+
tokens: Option<&tokenstream::TokenStream>,
620+
span: syntax_pos::Span)
621+
-> Option<tokenstream::TokenStream>
622+
{
623+
let tokens = match tokens {
624+
Some(tokens) => tokens,
625+
None => return None,
626+
};
627+
if attrs.len() == 0 {
628+
return Some(tokens.clone())
629+
}
630+
let mut builder = tokenstream::TokenStreamBuilder::new();
631+
for attr in attrs {
632+
assert_eq!(attr.style, ast::AttrStyle::Outer,
633+
"inner attributes should prevent cached tokens from existing");
634+
let stream = __internal::with_sess(|(sess, _)| {
635+
// FIXME: Avoid this pretty-print + reparse hack as bove
636+
let name = "<macro expansion>".to_owned();
637+
let source = pprust::attr_to_string(attr);
638+
parse_stream_from_source_str(name, source, sess, Some(span))
639+
});
640+
builder.push(stream);
641+
}
642+
builder.push(tokens.clone());
643+
Some(builder.build())
644+
}
645+
595646
/// Permanently unstable internal implementation details of this crate. This
596647
/// should not be used.
597648
///

src/libsyntax/ast.rs

+7
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,8 @@ pub struct TraitItem {
11491149
pub attrs: Vec<Attribute>,
11501150
pub node: TraitItemKind,
11511151
pub span: Span,
1152+
/// See `Item::tokens` for what this is
1153+
pub tokens: Option<TokenStream>,
11521154
}
11531155

11541156
#[derive(Clone, PartialEq, Eq, RustcEncodable, RustcDecodable, Hash, Debug)]
@@ -1168,6 +1170,8 @@ pub struct ImplItem {
11681170
pub attrs: Vec<Attribute>,
11691171
pub node: ImplItemKind,
11701172
pub span: Span,
1173+
/// See `Item::tokens` for what this is
1174+
pub tokens: Option<TokenStream>,
11711175
}
11721176

11731177
#[derive(Clone, PartialEq, Eq, RustcEncodable, RustcDecodable, Hash, Debug)]
@@ -1817,6 +1821,9 @@ pub struct Item {
18171821
/// available for all items, although over time more and more items should
18181822
/// have this be `Some`. Right now this is primarily used for procedural
18191823
/// macros, notably custom attributes.
1824+
///
1825+
/// Note that the tokens here do not include the outer attributes, but will
1826+
/// include inner attributes.
18201827
pub tokens: Option<TokenStream>,
18211828
}
18221829

src/libsyntax/ext/placeholders.rs

+2
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,13 @@ pub fn placeholder(kind: ExpansionKind, id: ast::NodeId) -> Expansion {
5151
ExpansionKind::TraitItems => Expansion::TraitItems(SmallVector::one(ast::TraitItem {
5252
id: id, span: span, ident: ident, attrs: attrs,
5353
node: ast::TraitItemKind::Macro(mac_placeholder()),
54+
tokens: None,
5455
})),
5556
ExpansionKind::ImplItems => Expansion::ImplItems(SmallVector::one(ast::ImplItem {
5657
id: id, span: span, ident: ident, vis: vis, attrs: attrs,
5758
node: ast::ImplItemKind::Macro(mac_placeholder()),
5859
defaultness: ast::Defaultness::Final,
60+
tokens: None,
5961
})),
6062
ExpansionKind::Pat => Expansion::Pat(P(ast::Pat {
6163
id: id, span: span, node: ast::PatKind::Mac(mac_placeholder()),

src/libsyntax/fold.rs

+8-5
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,8 @@ pub fn noop_fold_trait_item<T: Folder>(i: TraitItem, folder: &mut T)
957957
TraitItemKind::Macro(folder.fold_mac(mac))
958958
}
959959
},
960-
span: folder.new_span(i.span)
960+
span: folder.new_span(i.span),
961+
tokens: i.tokens,
961962
})
962963
}
963964

@@ -980,7 +981,8 @@ pub fn noop_fold_impl_item<T: Folder>(i: ImplItem, folder: &mut T)
980981
ast::ImplItemKind::Type(ty) => ast::ImplItemKind::Type(folder.fold_ty(ty)),
981982
ast::ImplItemKind::Macro(mac) => ast::ImplItemKind::Macro(folder.fold_mac(mac))
982983
},
983-
span: folder.new_span(i.span)
984+
span: folder.new_span(i.span),
985+
tokens: i.tokens,
984986
})
985987
}
986988

@@ -1042,9 +1044,10 @@ pub fn noop_fold_item_simple<T: Folder>(Item {id, ident, attrs, node, vis, span,
10421044
attrs: fold_attrs(attrs, folder),
10431045
node: folder.fold_item_kind(node),
10441046
span: folder.new_span(span),
1045-
tokens: tokens.map(|tokens| {
1046-
folder.fold_tts(tokens.into()).into()
1047-
}),
1047+
1048+
// FIXME: if this is replaced with a call to `folder.fold_tts` it causes
1049+
// an ICE during resolve... odd!
1050+
tokens: tokens,
10481051
}
10491052
}
10501053

src/libsyntax/parse/mod.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -843,11 +843,18 @@ mod tests {
843843
// check the contents of the tt manually:
844844
#[test] fn parse_fundecl () {
845845
// this test depends on the intern order of "fn" and "i32"
846-
assert_eq!(string_to_item("fn a (b : i32) { b; }".to_string()),
846+
let item = string_to_item("fn a (b : i32) { b; }".to_string()).map(|m| {
847+
m.map(|mut m| {
848+
m.tokens = None;
849+
m
850+
})
851+
});
852+
assert_eq!(item,
847853
Some(
848854
P(ast::Item{ident:Ident::from_str("a"),
849855
attrs:Vec::new(),
850856
id: ast::DUMMY_NODE_ID,
857+
tokens: None,
851858
node: ast::ItemKind::Fn(P(ast::FnDecl {
852859
inputs: vec![ast::Arg{
853860
ty: P(ast::Ty{id: ast::DUMMY_NODE_ID,

src/libsyntax/parse/parser.rs

+125-5
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,30 @@ struct TokenCursorFrame {
216216
open_delim: bool,
217217
tree_cursor: tokenstream::Cursor,
218218
close_delim: bool,
219+
last_token: LastToken,
220+
}
221+
222+
/// This is used in `TokenCursorFrame` above to track tokens that are consumed
223+
/// by the parser, and then that's transitively used to record the tokens that
224+
/// each parse AST item is created with.
225+
///
226+
/// Right now this has two states, either collecting tokens or not collecting
227+
/// tokens. If we're collecting tokens we just save everything off into a local
228+
/// `Vec`. This should eventually though likely save tokens from the original
229+
/// token stream and just use slicing of token streams to avoid creation of a
230+
/// whole new vector.
231+
///
232+
/// The second state is where we're passively not recording tokens, but the last
233+
/// token is still tracked for when we want to start recording tokens. This
234+
/// "last token" means that when we start recording tokens we'll want to ensure
235+
/// that this, the first token, is included in the output.
236+
///
237+
/// You can find some more example usage of this in the `collect_tokens` method
238+
/// on the parser.
239+
#[derive(Clone)]
240+
enum LastToken {
241+
Collecting(Vec<TokenTree>),
242+
Was(Option<TokenTree>),
219243
}
220244

221245
impl TokenCursorFrame {
@@ -226,6 +250,7 @@ impl TokenCursorFrame {
226250
open_delim: delimited.delim == token::NoDelim,
227251
tree_cursor: delimited.stream().into_trees(),
228252
close_delim: delimited.delim == token::NoDelim,
253+
last_token: LastToken::Was(None),
229254
}
230255
}
231256
}
@@ -250,6 +275,11 @@ impl TokenCursor {
250275
return TokenAndSpan { tok: token::Eof, sp: syntax_pos::DUMMY_SP }
251276
};
252277

278+
match self.frame.last_token {
279+
LastToken::Collecting(ref mut v) => v.push(tree.clone()),
280+
LastToken::Was(ref mut t) => *t = Some(tree.clone()),
281+
}
282+
253283
match tree {
254284
TokenTree::Token(sp, tok) => return TokenAndSpan { tok: tok, sp: sp },
255285
TokenTree::Delimited(sp, ref delimited) => {
@@ -1209,7 +1239,20 @@ impl<'a> Parser<'a> {
12091239
/// Parse the items in a trait declaration
12101240
pub fn parse_trait_item(&mut self, at_end: &mut bool) -> PResult<'a, TraitItem> {
12111241
maybe_whole!(self, NtTraitItem, |x| x);
1212-
let mut attrs = self.parse_outer_attributes()?;
1242+
let attrs = self.parse_outer_attributes()?;
1243+
let (mut item, tokens) = self.collect_tokens(|this| {
1244+
this.parse_trait_item_(at_end, attrs)
1245+
})?;
1246+
// See `parse_item` for why this clause is here.
1247+
if !item.attrs.iter().any(|attr| attr.style == AttrStyle::Inner) {
1248+
item.tokens = Some(tokens);
1249+
}
1250+
Ok(item)
1251+
}
1252+
1253+
fn parse_trait_item_(&mut self,
1254+
at_end: &mut bool,
1255+
mut attrs: Vec<Attribute>) -> PResult<'a, TraitItem> {
12131256
let lo = self.span;
12141257

12151258
let (name, node) = if self.eat_keyword(keywords::Type) {
@@ -1304,6 +1347,7 @@ impl<'a> Parser<'a> {
13041347
attrs: attrs,
13051348
node: node,
13061349
span: lo.to(self.prev_span),
1350+
tokens: None,
13071351
})
13081352
}
13091353

@@ -4653,7 +4697,7 @@ impl<'a> Parser<'a> {
46534697
node: node,
46544698
vis: vis,
46554699
span: span,
4656-
tokens: None, // TODO: fill this in
4700+
tokens: None,
46574701
})
46584702
}
46594703

@@ -4709,8 +4753,21 @@ impl<'a> Parser<'a> {
47094753
/// Parse an impl item.
47104754
pub fn parse_impl_item(&mut self, at_end: &mut bool) -> PResult<'a, ImplItem> {
47114755
maybe_whole!(self, NtImplItem, |x| x);
4756+
let attrs = self.parse_outer_attributes()?;
4757+
let (mut item, tokens) = self.collect_tokens(|this| {
4758+
this.parse_impl_item_(at_end, attrs)
4759+
})?;
4760+
4761+
// See `parse_item` for why this clause is here.
4762+
if !item.attrs.iter().any(|attr| attr.style == AttrStyle::Inner) {
4763+
item.tokens = Some(tokens);
4764+
}
4765+
Ok(item)
4766+
}
47124767

4713-
let mut attrs = self.parse_outer_attributes()?;
4768+
fn parse_impl_item_(&mut self,
4769+
at_end: &mut bool,
4770+
mut attrs: Vec<Attribute>) -> PResult<'a, ImplItem> {
47144771
let lo = self.span;
47154772
let vis = self.parse_visibility(false)?;
47164773
let defaultness = self.parse_defaultness()?;
@@ -4742,7 +4799,8 @@ impl<'a> Parser<'a> {
47424799
vis: vis,
47434800
defaultness: defaultness,
47444801
attrs: attrs,
4745-
node: node
4802+
node: node,
4803+
tokens: None,
47464804
})
47474805
}
47484806

@@ -6018,9 +6076,71 @@ impl<'a> Parser<'a> {
60186076
Ok(None)
60196077
}
60206078

6079+
fn collect_tokens<F, R>(&mut self, f: F) -> PResult<'a, (R, TokenStream)>
6080+
where F: FnOnce(&mut Self) -> PResult<'a, R>
6081+
{
6082+
// Record all tokens we parse when parsing this item.
6083+
let mut tokens = Vec::new();
6084+
match self.token_cursor.frame.last_token {
6085+
LastToken::Collecting(_) => {
6086+
panic!("cannot collect tokens recursively yet")
6087+
}
6088+
LastToken::Was(ref mut last) => tokens.extend(last.take()),
6089+
}
6090+
self.token_cursor.frame.last_token = LastToken::Collecting(tokens);
6091+
let prev = self.token_cursor.stack.len();
6092+
let ret = f(self);
6093+
let last_token = if self.token_cursor.stack.len() == prev {
6094+
&mut self.token_cursor.frame.last_token
6095+
} else {
6096+
&mut self.token_cursor.stack[prev].last_token
6097+
};
6098+
let mut tokens = match *last_token {
6099+
LastToken::Collecting(ref mut v) => mem::replace(v, Vec::new()),
6100+
LastToken::Was(_) => panic!("our vector went away?"),
6101+
};
6102+
6103+
// If we're not at EOF our current token wasn't actually consumed by
6104+
// `f`, but it'll still be in our list that we pulled out. In that case
6105+
// put it back.
6106+
if self.token == token::Eof {
6107+
*last_token = LastToken::Was(None);
6108+
} else {
6109+
*last_token = LastToken::Was(tokens.pop());
6110+
}
6111+
6112+
Ok((ret?, tokens.into_iter().collect()))
6113+
}
6114+
60216115
pub fn parse_item(&mut self) -> PResult<'a, Option<P<Item>>> {
60226116
let attrs = self.parse_outer_attributes()?;
6023-
self.parse_item_(attrs, true, false)
6117+
6118+
let (ret, tokens) = self.collect_tokens(|this| {
6119+
this.parse_item_(attrs, true, false)
6120+
})?;
6121+
6122+
// Once we've parsed an item and recorded the tokens we got while
6123+
// parsing we may want to store `tokens` into the item we're about to
6124+
// return. Note, though, that we specifically didn't capture tokens
6125+
// related to outer attributes. The `tokens` field here may later be
6126+
// used with procedural macros to convert this item back into a token
6127+
// stream, but during expansion we may be removing attributes as we go
6128+
// along.
6129+
//
6130+
// If we've got inner attributes then the `tokens` we've got above holds
6131+
// these inner attributes. If an inner attribute is expanded we won't
6132+
// actually remove it from the token stream, so we'll just keep yielding
6133+
// it (bad!). To work around this case for now we just avoid recording
6134+
// `tokens` if we detect any inner attributes. This should help keep
6135+
// expansion correct, but we should fix this bug one day!
6136+
Ok(ret.map(|item| {
6137+
item.map(|mut i| {
6138+
if !i.attrs.iter().any(|attr| attr.style == AttrStyle::Inner) {
6139+
i.tokens = Some(tokens);
6140+
}
6141+
i
6142+
})
6143+
}))
60246144
}
60256145

60266146
fn parse_path_list_items(&mut self) -> PResult<'a, Vec<ast::PathListItem>> {

src/libsyntax_ext/deriving/generic/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,7 @@ impl<'a> TraitDef<'a> {
504504
defaultness: ast::Defaultness::Final,
505505
attrs: Vec::new(),
506506
node: ast::ImplItemKind::Type(type_def.to_ty(cx, self.span, type_ident, generics)),
507+
tokens: None,
507508
}
508509
});
509510

@@ -930,6 +931,7 @@ impl<'a> MethodDef<'a> {
930931
decl: fn_decl,
931932
},
932933
body_block),
934+
tokens: None,
933935
}
934936
}
935937

0 commit comments

Comments
 (0)