From a47ae57a187eb9537fe22a50692088cc655f357f Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 8 Jul 2024 14:47:12 +1000 Subject: [PATCH 1/9] Use an `@` pattern to shorten some code. --- compiler/rustc_parse/src/parser/attr_wrapper.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 38f18022e3c58..a627bc89aa02f 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -325,12 +325,9 @@ impl<'a> Parser<'a> { replace_ranges, }); - // If we support tokens at all - if let Some(target_tokens) = ret.tokens_mut() { - if target_tokens.is_none() { - // Store our newly captured tokens into the AST node. - *target_tokens = Some(tokens.clone()); - } + // If we support tokens and don't already have them, store the newly captured tokens. + if let Some(target_tokens @ None) = ret.tokens_mut() { + *target_tokens = Some(tokens.clone()); } let final_attrs = ret.attrs(); From b16201317eefa9054d545dc5237b1bc830448258 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 8 Jul 2024 17:41:05 +1000 Subject: [PATCH 2/9] Use iterator normally in `make_attr_token_stream`. In a `for` loop, instead of a `while` loop. --- compiler/rustc_parse/src/parser/attr_wrapper.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index a627bc89aa02f..16261feca91d0 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -367,7 +367,7 @@ impl<'a> Parser<'a> { /// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and /// close delims. fn make_attr_token_stream( - mut iter: impl Iterator, + iter: impl Iterator, break_last_token: bool, ) -> AttrTokenStream { #[derive(Debug)] @@ -377,8 +377,7 @@ fn make_attr_token_stream( inner: Vec, } let mut stack = vec![FrameData { open_delim_sp: None, inner: vec![] }]; - let mut token_and_spacing = iter.next(); - while let Some((token, spacing)) = token_and_spacing { + for (token, spacing) in iter { match token { FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => { stack @@ -416,7 +415,6 @@ fn make_attr_token_stream( .push(AttrTokenTree::AttrsTarget(target)), FlatToken::Empty => {} } - token_and_spacing = iter.next(); } let mut final_buf = stack.pop().expect("Missing final buf!"); if break_last_token { From a88c4d67d9ea8a9f450638f207be867fa984bfce Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 8 Jul 2024 18:05:18 +1000 Subject: [PATCH 3/9] Split the stack in `make_attr_token_stream`. It makes for shorter code, and fewer allocations. --- .../rustc_parse/src/parser/attr_wrapper.rs | 43 ++++++++----------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 16261feca91d0..5a4259eb1e76f 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -376,18 +376,19 @@ fn make_attr_token_stream( open_delim_sp: Option<(Delimiter, Span, Spacing)>, inner: Vec, } - let mut stack = vec![FrameData { open_delim_sp: None, inner: vec![] }]; + // The stack always has at least one element. Storing it separately makes for shorter code. + let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] }; + let mut stack_rest = vec![]; for (token, spacing) in iter { match token { FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => { - stack - .push(FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] }); + stack_rest.push(mem::replace( + &mut stack_top, + FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] }, + )); } FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => { - let frame_data = stack - .pop() - .unwrap_or_else(|| panic!("Token stack was empty for token: {token:?}")); - + let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap()); let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap(); assert_eq!( open_delim, delim, @@ -397,28 +398,18 @@ fn make_attr_token_stream( let dspacing = DelimSpacing::new(open_spacing, spacing); let stream = AttrTokenStream::new(frame_data.inner); let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream); - stack - .last_mut() - .unwrap_or_else(|| panic!("Bottom token frame is missing for token: {token:?}")) - .inner - .push(delimited); + stack_top.inner.push(delimited); + } + FlatToken::Token(token) => stack_top.inner.push(AttrTokenTree::Token(token, spacing)), + FlatToken::AttrsTarget(target) => { + stack_top.inner.push(AttrTokenTree::AttrsTarget(target)) } - FlatToken::Token(token) => stack - .last_mut() - .expect("Bottom token frame is missing!") - .inner - .push(AttrTokenTree::Token(token, spacing)), - FlatToken::AttrsTarget(target) => stack - .last_mut() - .expect("Bottom token frame is missing!") - .inner - .push(AttrTokenTree::AttrsTarget(target)), FlatToken::Empty => {} } } - let mut final_buf = stack.pop().expect("Missing final buf!"); + if break_last_token { - let last_token = final_buf.inner.pop().unwrap(); + let last_token = stack_top.inner.pop().unwrap(); if let AttrTokenTree::Token(last_token, spacing) = last_token { let unglued_first = last_token.kind.break_two_token_op().unwrap().0; @@ -426,14 +417,14 @@ fn make_attr_token_stream( let mut first_span = last_token.span.shrink_to_lo(); first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1)); - final_buf + stack_top .inner .push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing)); } else { panic!("Unexpected last token {last_token:?}") } } - AttrTokenStream::new(final_buf.inner) + AttrTokenStream::new(stack_top.inner) } // Some types are used a lot. Make sure they don't unintentionally get bigger. From f5527949f2e819c056551a25d8fab7008facf6da Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 8 Jul 2024 19:32:21 +1000 Subject: [PATCH 4/9] Move `Spacing` into `FlatToken`. It's only needed for the `FlatToken::Token` variant. This makes things a little more concise. --- .../rustc_parse/src/parser/attr_wrapper.rs | 28 ++++++++----------- compiler/rustc_parse/src/parser/mod.rs | 2 +- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 5a4259eb1e76f..67d2424bb48a3 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -103,11 +103,8 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { // produce an empty `TokenStream` if no calls were made, and omit the // final token otherwise. let mut cursor_snapshot = self.cursor_snapshot.clone(); - let tokens = iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1)) - .chain(iter::repeat_with(|| { - let token = cursor_snapshot.next(); - (FlatToken::Token(token.0), token.1) - })) + let tokens = iter::once(FlatToken::Token(self.start_token.clone())) + .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next()))) .take(self.num_calls as usize); if self.replace_ranges.is_empty() { @@ -156,11 +153,8 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { (range.start as usize)..(range.end as usize), target .into_iter() - .map(|target| (FlatToken::AttrsTarget(target), Spacing::Alone)) - .chain( - iter::repeat((FlatToken::Empty, Spacing::Alone)) - .take(range.len() - target_len), - ), + .map(|target| FlatToken::AttrsTarget(target)) + .chain(iter::repeat(FlatToken::Empty).take(range.len() - target_len)), ); } make_attr_token_stream(tokens.into_iter(), self.break_last_token) @@ -367,7 +361,7 @@ impl<'a> Parser<'a> { /// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and /// close delims. fn make_attr_token_stream( - iter: impl Iterator, + iter: impl Iterator, break_last_token: bool, ) -> AttrTokenStream { #[derive(Debug)] @@ -379,15 +373,15 @@ fn make_attr_token_stream( // The stack always has at least one element. Storing it separately makes for shorter code. let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] }; let mut stack_rest = vec![]; - for (token, spacing) in iter { - match token { - FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => { + for flat_token in iter { + match flat_token { + FlatToken::Token((Token { kind: TokenKind::OpenDelim(delim), span }, spacing)) => { stack_rest.push(mem::replace( &mut stack_top, FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] }, )); } - FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => { + FlatToken::Token((Token { kind: TokenKind::CloseDelim(delim), span }, spacing)) => { let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap()); let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap(); assert_eq!( @@ -400,7 +394,9 @@ fn make_attr_token_stream( let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream); stack_top.inner.push(delimited); } - FlatToken::Token(token) => stack_top.inner.push(AttrTokenTree::Token(token, spacing)), + FlatToken::Token((token, spacing)) => { + stack_top.inner.push(AttrTokenTree::Token(token, spacing)) + } FlatToken::AttrsTarget(target) => { stack_top.inner.push(AttrTokenTree::AttrsTarget(target)) } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 45ca267fe5d1e..1482c665a16b7 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -1607,7 +1607,7 @@ pub(crate) fn make_unclosed_delims_error( enum FlatToken { /// A token - this holds both delimiter (e.g. '{' and '}') /// and non-delimiter tokens - Token(Token), + Token((Token, Spacing)), /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted /// directly into the constructed `AttrTokenStream` as an /// `AttrTokenTree::AttrsTarget`. From 8a390bae06dbb5686b581797b8f46cb0353dc255 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 10 Jul 2024 14:41:39 +1000 Subject: [PATCH 5/9] Change empty replace range condition. The new condition is equivalent in practice, but it's much more obvious that it would result in an empty range, because the condition lines up with the contents of the iterator. --- .../rustc_parse/src/parser/attr_wrapper.rs | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 67d2424bb48a3..e90568f177c26 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -295,21 +295,22 @@ impl<'a> Parser<'a> { let num_calls = end_pos - start_pos; - // If we have no attributes, then we will never need to - // use any replace ranges. - let replace_ranges: Box<[ReplaceRange]> = if ret.attrs().is_empty() && !self.capture_cfg { - Box::new([]) - } else { - // Grab any replace ranges that occur *inside* the current AST node. - // We will perform the actual replacement when we convert the `LazyAttrTokenStream` - // to an `AttrTokenStream`. - self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end] - .iter() - .cloned() - .chain(inner_attr_replace_ranges.iter().cloned()) - .map(|(range, data)| ((range.start - start_pos)..(range.end - start_pos), data)) - .collect() - }; + // This is hot enough for `deep-vector` that checking the conditions for an empty iterator + // is measurably faster than actually executing the iterator. + let replace_ranges: Box<[ReplaceRange]> = + if replace_ranges_start == replace_ranges_end && inner_attr_replace_ranges.is_empty() { + Box::new([]) + } else { + // Grab any replace ranges that occur *inside* the current AST node. + // We will perform the actual replacement when we convert the `LazyAttrTokenStream` + // to an `AttrTokenStream`. + self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end] + .iter() + .cloned() + .chain(inner_attr_replace_ranges.iter().cloned()) + .map(|(range, data)| ((range.start - start_pos)..(range.end - start_pos), data)) + .collect() + }; let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl { start_token, From fee152556fda2260a17fa24cea21d18b57c8460d Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 10 Jul 2024 14:51:41 +1000 Subject: [PATCH 6/9] Rework `Attribute::get_tokens`. Returning `Vec` works better for the call sites than returning `TokenStream`. --- compiler/rustc_ast/src/attr/mod.rs | 23 ++++++++++------------- compiler/rustc_ast/src/tokenstream.rs | 9 +++++---- compiler/rustc_expand/src/config.rs | 4 +--- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 088ae9ba44102..d2c7b1c0753da 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -202,21 +202,18 @@ impl Attribute { } } - // Named `get_tokens` to distinguish it from the `::tokens` method. - pub fn get_tokens(&self) -> TokenStream { - match &self.kind { - AttrKind::Normal(normal) => TokenStream::new( - normal - .tokens - .as_ref() - .unwrap_or_else(|| panic!("attribute is missing tokens: {self:?}")) - .to_attr_token_stream() - .to_token_trees(), - ), - &AttrKind::DocComment(comment_kind, data) => TokenStream::token_alone( + pub fn token_trees(&self) -> Vec { + match self.kind { + AttrKind::Normal(ref normal) => normal + .tokens + .as_ref() + .unwrap_or_else(|| panic!("attribute is missing tokens: {self:?}")) + .to_attr_token_stream() + .to_token_trees(), + AttrKind::DocComment(comment_kind, data) => vec![TokenTree::token_alone( token::DocComment(comment_kind, self.style, data), self.span, - ), + )], } } } diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index ee068f19332a5..1f2adde2570d7 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -225,11 +225,12 @@ impl AttrTokenStream { // properly implemented - we always synthesize fake tokens, // so we never reach this code. - let mut stream = TokenStream::default(); + let mut tts = vec![]; for inner_attr in inner_attrs { - stream.push_stream(inner_attr.get_tokens()); + tts.extend(inner_attr.token_trees()); } - stream.push_stream(delim_tokens.clone()); + tts.extend(delim_tokens.0.iter().cloned()); + let stream = TokenStream::new(tts); *tree = TokenTree::Delimited(*span, *spacing, *delim, stream); found = true; break; @@ -242,7 +243,7 @@ impl AttrTokenStream { ); } for attr in outer_attrs { - res.extend(attr.get_tokens().0.iter().cloned()); + res.extend(attr.token_trees()); } res.extend(target_tokens); } diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs index 40e16b4511575..8d796500dcc67 100644 --- a/compiler/rustc_expand/src/config.rs +++ b/compiler/rustc_expand/src/config.rs @@ -292,8 +292,6 @@ impl<'a> StripUnconfigured<'a> { attr: &Attribute, (item, item_span): (ast::AttrItem, Span), ) -> Attribute { - let orig_tokens = attr.get_tokens(); - // We are taking an attribute of the form `#[cfg_attr(pred, attr)]` // and producing an attribute of the form `#[attr]`. We // have captured tokens for `attr` itself, but we need to @@ -302,7 +300,7 @@ impl<'a> StripUnconfigured<'a> { // Use the `#` in `#[cfg_attr(pred, attr)]` as the `#` token // for `attr` when we expand it to `#[attr]` - let mut orig_trees = orig_tokens.trees(); + let mut orig_trees = attr.token_trees().into_iter(); let TokenTree::Token(pound_token @ Token { kind: TokenKind::Pound, .. }, _) = orig_trees.next().unwrap().clone() else { From d8b6aa6d0dabc0c102f16f9f9bb35f687a63101c Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 10 Jul 2024 15:11:57 +1000 Subject: [PATCH 7/9] Use `cfg_attr` as a name more. In various functions where the attribute being processed is known to be a `#[cfg_attr(...)]` attribute. I find this a helpful reminder. --- compiler/rustc_expand/src/config.rs | 22 +++++++++++----------- compiler/rustc_parse/src/lib.rs | 6 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs index 8d796500dcc67..13414f990fba9 100644 --- a/compiler/rustc_expand/src/config.rs +++ b/compiler/rustc_expand/src/config.rs @@ -253,9 +253,9 @@ impl<'a> StripUnconfigured<'a> { /// Gives a compiler warning when the `cfg_attr` contains no attributes and /// is in the original source file. Gives a compiler error if the syntax of /// the attribute is incorrect. - pub(crate) fn expand_cfg_attr(&self, attr: &Attribute, recursive: bool) -> Vec { + pub(crate) fn expand_cfg_attr(&self, cfg_attr: &Attribute, recursive: bool) -> Vec { let Some((cfg_predicate, expanded_attrs)) = - rustc_parse::parse_cfg_attr(attr, &self.sess.psess) + rustc_parse::parse_cfg_attr(cfg_attr, &self.sess.psess) else { return vec![]; }; @@ -264,7 +264,7 @@ impl<'a> StripUnconfigured<'a> { if expanded_attrs.is_empty() { self.sess.psess.buffer_lint( rustc_lint_defs::builtin::UNUSED_ATTRIBUTES, - attr.span, + cfg_attr.span, ast::CRATE_NODE_ID, BuiltinLintDiag::CfgAttrNoAttributes, ); @@ -280,16 +280,16 @@ impl<'a> StripUnconfigured<'a> { // `#[cfg_attr(false, cfg_attr(true, some_attr))]`. expanded_attrs .into_iter() - .flat_map(|item| self.process_cfg_attr(&self.expand_cfg_attr_item(attr, item))) + .flat_map(|item| self.process_cfg_attr(&self.expand_cfg_attr_item(cfg_attr, item))) .collect() } else { - expanded_attrs.into_iter().map(|item| self.expand_cfg_attr_item(attr, item)).collect() + expanded_attrs.into_iter().map(|item| self.expand_cfg_attr_item(cfg_attr, item)).collect() } } fn expand_cfg_attr_item( &self, - attr: &Attribute, + cfg_attr: &Attribute, (item, item_span): (ast::AttrItem, Span), ) -> Attribute { // We are taking an attribute of the form `#[cfg_attr(pred, attr)]` @@ -300,11 +300,11 @@ impl<'a> StripUnconfigured<'a> { // Use the `#` in `#[cfg_attr(pred, attr)]` as the `#` token // for `attr` when we expand it to `#[attr]` - let mut orig_trees = attr.token_trees().into_iter(); + let mut orig_trees = cfg_attr.token_trees().into_iter(); let TokenTree::Token(pound_token @ Token { kind: TokenKind::Pound, .. }, _) = orig_trees.next().unwrap().clone() else { - panic!("Bad tokens for attribute {attr:?}"); + panic!("Bad tokens for attribute {cfg_attr:?}"); }; // We don't really have a good span to use for the synthesized `[]` @@ -318,12 +318,12 @@ impl<'a> StripUnconfigured<'a> { .unwrap_or_else(|| panic!("Missing tokens for {item:?}")) .to_attr_token_stream(), ); - let trees = if attr.style == AttrStyle::Inner { + let trees = if cfg_attr.style == AttrStyle::Inner { // For inner attributes, we do the same thing for the `!` in `#![some_attr]` let TokenTree::Token(bang_token @ Token { kind: TokenKind::Not, .. }, _) = orig_trees.next().unwrap().clone() else { - panic!("Bad tokens for attribute {attr:?}"); + panic!("Bad tokens for attribute {cfg_attr:?}"); }; vec![ AttrTokenTree::Token(pound_token, Spacing::Joint), @@ -338,7 +338,7 @@ impl<'a> StripUnconfigured<'a> { &self.sess.psess.attr_id_generator, item, tokens, - attr.style, + cfg_attr.style, item_span, ); if attr.has_name(sym::crate_type) { diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs index 5522127be83ef..4454747ea0212 100644 --- a/compiler/rustc_parse/src/lib.rs +++ b/compiler/rustc_parse/src/lib.rs @@ -157,14 +157,14 @@ pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> Tok } pub fn parse_cfg_attr( - attr: &Attribute, + cfg_attr: &Attribute, psess: &ParseSess, ) -> Option<(MetaItem, Vec<(AttrItem, Span)>)> { const CFG_ATTR_GRAMMAR_HELP: &str = "#[cfg_attr(condition, attribute, other_attribute, ...)]"; const CFG_ATTR_NOTE_REF: &str = "for more information, visit \ "; - match attr.get_normal_item().args { + match cfg_attr.get_normal_item().args { ast::AttrArgs::Delimited(ast::DelimArgs { dspan, delim, ref tokens }) if !tokens.is_empty() => { @@ -180,7 +180,7 @@ pub fn parse_cfg_attr( } _ => { psess.dcx().emit_err(errors::MalformedCfgAttr { - span: attr.span, + span: cfg_attr.span, sugg: CFG_ATTR_GRAMMAR_HELP, }); } From d6ebbbfcb203b969f9b6e02998efd9badbe885d6 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 10 Jul 2024 15:47:02 +1000 Subject: [PATCH 8/9] Factor out `AttrsTarget` flattening code. This commit does the following. - Pulls the code out of `AttrTokenStream::to_token_trees` into a new function `attrs_and_tokens_to_token_trees`. - Simplifies `TokenStream::from_ast` by calling the new function. This is nicer than the old way, which created a temporary `AttrTokenStream` containing a single `AttrsTarget` (which required some cloning) just to call `to_token_trees` on it. (It is good to remove this use of `AttrsTarget` which isn't related to `cfg_attr` expansion.) --- compiler/rustc_ast/src/tokenstream.rs | 132 +++++++++++++------------- compiler/rustc_expand/src/config.rs | 5 +- 2 files changed, 72 insertions(+), 65 deletions(-) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 1f2adde2570d7..1f69a7dc35666 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -16,7 +16,7 @@ use crate::ast::{AttrStyle, StmtKind}; use crate::ast_traits::{HasAttrs, HasTokens}; use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind}; -use crate::AttrVec; +use crate::{AttrVec, Attribute}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::sync::{self, Lrc}; @@ -179,11 +179,10 @@ impl AttrTokenStream { AttrTokenStream(Lrc::new(tokens)) } - /// Converts this `AttrTokenStream` to a plain `Vec`. - /// During conversion, `AttrTokenTree::AttrsTarget` get 'flattened' - /// back to a `TokenStream` of the form `outer_attr attr_target`. - /// If there are inner attributes, they are inserted into the proper - /// place in the attribute target tokens. + /// Converts this `AttrTokenStream` to a plain `Vec`. During + /// conversion, any `AttrTokenTree::AttrsTarget` gets "flattened" back to a + /// `TokenStream`, as described in the comment on + /// `attrs_and_tokens_to_token_trees`. pub fn to_token_trees(&self) -> Vec { let mut res = Vec::with_capacity(self.0.len()); for tree in self.0.iter() { @@ -200,52 +199,7 @@ impl AttrTokenStream { )) } AttrTokenTree::AttrsTarget(target) => { - let idx = target - .attrs - .partition_point(|attr| matches!(attr.style, crate::AttrStyle::Outer)); - let (outer_attrs, inner_attrs) = target.attrs.split_at(idx); - - let mut target_tokens = target.tokens.to_attr_token_stream().to_token_trees(); - if !inner_attrs.is_empty() { - let mut found = false; - // Check the last two trees (to account for a trailing semi) - for tree in target_tokens.iter_mut().rev().take(2) { - if let TokenTree::Delimited(span, spacing, delim, delim_tokens) = tree { - // Inner attributes are only supported on extern blocks, functions, - // impls, and modules. All of these have their inner attributes - // placed at the beginning of the rightmost outermost braced group: - // e.g. fn foo() { #![my_attr] } - // - // Therefore, we can insert them back into the right location - // without needing to do any extra position tracking. - // - // Note: Outline modules are an exception - they can - // have attributes like `#![my_attr]` at the start of a file. - // Support for custom attributes in this position is not - // properly implemented - we always synthesize fake tokens, - // so we never reach this code. - - let mut tts = vec![]; - for inner_attr in inner_attrs { - tts.extend(inner_attr.token_trees()); - } - tts.extend(delim_tokens.0.iter().cloned()); - let stream = TokenStream::new(tts); - *tree = TokenTree::Delimited(*span, *spacing, *delim, stream); - found = true; - break; - } - } - - assert!( - found, - "Failed to find trailing delimited group in: {target_tokens:?}" - ); - } - for attr in outer_attrs { - res.extend(attr.token_trees()); - } - res.extend(target_tokens); + attrs_and_tokens_to_token_trees(&target.attrs, &target.tokens, &mut res); } } } @@ -253,6 +207,64 @@ impl AttrTokenStream { } } +// Converts multiple attributes and the tokens for a target AST node into token trees, and appends +// them to `res`. +// +// Example: if the AST node is "fn f() { blah(); }", then: +// - Simple if no attributes are present, e.g. "fn f() { blah(); }" +// - Simple if only outer attribute are present, e.g. "#[outer1] #[outer2] fn f() { blah(); }" +// - Trickier if inner attributes are present, because they must be moved within the AST node's +// tokens, e.g. "#[outer] fn f() { #![inner] blah() }" +fn attrs_and_tokens_to_token_trees( + attrs: &[Attribute], + target_tokens: &LazyAttrTokenStream, + res: &mut Vec, +) { + let idx = attrs.partition_point(|attr| matches!(attr.style, crate::AttrStyle::Outer)); + let (outer_attrs, inner_attrs) = attrs.split_at(idx); + + // Add outer attribute tokens. + for attr in outer_attrs { + res.extend(attr.token_trees()); + } + + // Add target AST node tokens. + res.extend(target_tokens.to_attr_token_stream().to_token_trees()); + + // Insert inner attribute tokens. + if !inner_attrs.is_empty() { + let mut found = false; + // Check the last two trees (to account for a trailing semi) + for tree in res.iter_mut().rev().take(2) { + if let TokenTree::Delimited(span, spacing, delim, delim_tokens) = tree { + // Inner attributes are only supported on extern blocks, functions, + // impls, and modules. All of these have their inner attributes + // placed at the beginning of the rightmost outermost braced group: + // e.g. fn foo() { #![my_attr] } + // + // Therefore, we can insert them back into the right location + // without needing to do any extra position tracking. + // + // Note: Outline modules are an exception - they can + // have attributes like `#![my_attr]` at the start of a file. + // Support for custom attributes in this position is not + // properly implemented - we always synthesize fake tokens, + // so we never reach this code. + let mut tts = vec![]; + for inner_attr in inner_attrs { + tts.extend(inner_attr.token_trees()); + } + tts.extend(delim_tokens.0.iter().cloned()); + let stream = TokenStream::new(tts); + *tree = TokenTree::Delimited(*span, *spacing, *delim, stream); + found = true; + break; + } + } + assert!(found, "Failed to find trailing delimited group in: {res:?}"); + } +} + /// Stores the tokens for an attribute target, along /// with its attributes. /// @@ -438,18 +450,10 @@ impl TokenStream { } pub fn from_ast(node: &(impl HasAttrs + HasTokens + fmt::Debug)) -> TokenStream { - let Some(tokens) = node.tokens() else { - panic!("missing tokens for node: {:?}", node); - }; - let attrs = node.attrs(); - let attr_stream = if attrs.is_empty() { - tokens.to_attr_token_stream() - } else { - let target = - AttrsTarget { attrs: attrs.iter().cloned().collect(), tokens: tokens.clone() }; - AttrTokenStream::new(vec![AttrTokenTree::AttrsTarget(target)]) - }; - TokenStream::new(attr_stream.to_token_trees()) + let tokens = node.tokens().unwrap_or_else(|| panic!("missing tokens for node: {:?}", node)); + let mut tts = vec![]; + attrs_and_tokens_to_token_trees(node.attrs(), tokens, &mut tts); + TokenStream::new(tts) } pub fn from_nonterminal_ast(nt: &Nonterminal) -> TokenStream { diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs index 13414f990fba9..323cea1af4acf 100644 --- a/compiler/rustc_expand/src/config.rs +++ b/compiler/rustc_expand/src/config.rs @@ -283,7 +283,10 @@ impl<'a> StripUnconfigured<'a> { .flat_map(|item| self.process_cfg_attr(&self.expand_cfg_attr_item(cfg_attr, item))) .collect() } else { - expanded_attrs.into_iter().map(|item| self.expand_cfg_attr_item(cfg_attr, item)).collect() + expanded_attrs + .into_iter() + .map(|item| self.expand_cfg_attr_item(cfg_attr, item)) + .collect() } } From 478ba5902629b0a4dda914e81167e6b3b44c3a51 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 10 Jul 2024 16:29:48 +1000 Subject: [PATCH 9/9] Add some comments. Explaining things that took me some time to work out. --- compiler/rustc_ast/src/tokenstream.rs | 5 ++++- compiler/rustc_expand/src/config.rs | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 1f69a7dc35666..a92ef575777c7 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -269,11 +269,14 @@ fn attrs_and_tokens_to_token_trees( /// with its attributes. /// /// This is constructed during parsing when we need to capture -/// tokens. +/// tokens, for `cfg` and `cfg_attr` attributes. /// /// For example, `#[cfg(FALSE)] struct Foo {}` would /// have an `attrs` field containing the `#[cfg(FALSE)]` attr, /// and a `tokens` field storing the (unparsed) tokens `struct Foo {}` +/// +/// The `cfg`/`cfg_attr` processing occurs in +/// `StripUnconfigured::configure_tokens`. #[derive(Clone, Debug, Encodable, Decodable)] pub struct AttrsTarget { /// Attributes, both outer and inner. diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs index 323cea1af4acf..9da4aa84db525 100644 --- a/compiler/rustc_expand/src/config.rs +++ b/compiler/rustc_expand/src/config.rs @@ -187,6 +187,7 @@ impl<'a> StripUnconfigured<'a> { .iter() .filter_map(|tree| match tree.clone() { AttrTokenTree::AttrsTarget(mut target) => { + // Expand any `cfg_attr` attributes. target.attrs.flat_map_in_place(|attr| self.process_cfg_attr(&attr)); if self.in_cfg(&target.attrs) { @@ -195,6 +196,8 @@ impl<'a> StripUnconfigured<'a> { ); Some(AttrTokenTree::AttrsTarget(target)) } else { + // Remove the target if there's a `cfg` attribute and + // the condition isn't satisfied. None } }