Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental: Add Derive Proc-Macro Caching #129102

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3666,6 +3666,7 @@ dependencies = [
"rustc_lexer",
"rustc_lint_defs",
"rustc_macros",
"rustc_middle",
"rustc_parse",
"rustc_serialize",
"rustc_session",
Expand Down
99 changes: 98 additions & 1 deletion compiler/rustc_ast/src/tokenstream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
//! ownership of the original.

use std::borrow::Cow;
use std::hash::Hash;
use std::{cmp, fmt, iter};

use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::sync::{self, Lrc};
use rustc_macros::{Decodable, Encodable, HashStable_Generic};
use rustc_serialize::{Decodable, Encodable};
use rustc_serialize::{Decodable, Encodable, Encoder};
use rustc_span::def_id::{CrateNum, DefIndex};
use rustc_span::{sym, Span, SpanDecoder, SpanEncoder, Symbol, DUMMY_SP};

use crate::ast::{AttrStyle, StmtKind};
Expand Down Expand Up @@ -140,6 +142,11 @@ impl fmt::Debug for LazyAttrTokenStream {

impl<S: SpanEncoder> Encodable<S> for LazyAttrTokenStream {
fn encode(&self, _s: &mut S) {
// FIXME(pr-time): Just a reminder that this exists/was tried out,
// but probably not necessary anymore (see below).
// self.to_attr_token_stream().encode(s)
// We should not need to anymore, now that we `flatten`?
// Yep, that seems to be true! :)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment can be removed now.

panic!("Attempted to encode LazyAttrTokenStream");
}
}
Expand Down Expand Up @@ -296,6 +303,96 @@ pub struct AttrsTarget {
#[derive(Clone, Debug, Default, Encodable, Decodable)]
pub struct TokenStream(pub(crate) Lrc<Vec<TokenTree>>);

struct HashEncoder<H: std::hash::Hasher> {
hasher: H,
}

impl<H: std::hash::Hasher> Encoder for HashEncoder<H> {
fn emit_usize(&mut self, v: usize) {
self.hasher.write_usize(v)
}

fn emit_u128(&mut self, v: u128) {
self.hasher.write_u128(v)
}

fn emit_u64(&mut self, v: u64) {
self.hasher.write_u64(v)
}

fn emit_u32(&mut self, v: u32) {
self.hasher.write_u32(v)
}

fn emit_u16(&mut self, v: u16) {
self.hasher.write_u16(v)
}

fn emit_u8(&mut self, v: u8) {
self.hasher.write_u8(v)
}

fn emit_isize(&mut self, v: isize) {
self.hasher.write_isize(v)
}

fn emit_i128(&mut self, v: i128) {
self.hasher.write_i128(v)
}

fn emit_i64(&mut self, v: i64) {
self.hasher.write_i64(v)
}

fn emit_i32(&mut self, v: i32) {
self.hasher.write_i32(v)
}

fn emit_i16(&mut self, v: i16) {
self.hasher.write_i16(v)
}

fn emit_raw_bytes(&mut self, s: &[u8]) {
self.hasher.write(s)
}
}

impl<H: std::hash::Hasher> SpanEncoder for HashEncoder<H> {
fn encode_span(&mut self, span: Span) {
span.hash(&mut self.hasher)
}

fn encode_symbol(&mut self, symbol: Symbol) {
symbol.hash(&mut self.hasher)
}

fn encode_expn_id(&mut self, expn_id: rustc_span::ExpnId) {
expn_id.hash(&mut self.hasher)
}

fn encode_syntax_context(&mut self, syntax_context: rustc_span::SyntaxContext) {
syntax_context.hash(&mut self.hasher)
}

fn encode_crate_num(&mut self, crate_num: CrateNum) {
crate_num.hash(&mut self.hasher)
}

fn encode_def_index(&mut self, def_index: DefIndex) {
def_index.hash(&mut self.hasher)
}

fn encode_def_id(&mut self, def_id: rustc_span::def_id::DefId) {
def_id.hash(&mut self.hasher)
}
}

impl Hash for TokenStream {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is implementation of unstable hash required for TokenStream?
Unstable hashing shouldn't be used in incremental compilation, and TokenStrem already has a stable hashing implementation because token streams are used in attributes.

(Also the use of (Span)Encoder here is strange, I don't think it's supposed to be used like this.)

fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
Encodable::encode(self, &mut HashEncoder { hasher: state });
}
}

/// Indicates whether a token can join with the following token to form a
/// compound token. Used for conversions to `proc_macro::Spacing`. Also used to
/// guide pretty-printing, which is where the `JointHidden` value (which isn't
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_expand/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ rustc_fluent_macro = { path = "../rustc_fluent_macro" }
rustc_lexer = { path = "../rustc_lexer" }
rustc_lint_defs = { path = "../rustc_lint_defs" }
rustc_macros = { path = "../rustc_macros" }
rustc_middle = { path = "../rustc_middle" }
rustc_parse = { path = "../rustc_parse" }
rustc_serialize = { path = "../rustc_serialize" }
rustc_session = { path = "../rustc_session" }
Expand Down
102 changes: 102 additions & 0 deletions compiler/rustc_expand/src/derive_macro_expansion.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
use std::cell::Cell;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably better to merge this file into compiler/rustc_expand/src/proc_macro.rs and put the new content in the bottom.

use std::ptr::{self, NonNull};

use rustc_ast::tokenstream::TokenStream;
use rustc_data_structures::svh::Svh;
use rustc_middle::ty::TyCtxt;
use rustc_span::profiling::SpannedEventArgRecorder;
use rustc_span::LocalExpnId;

use crate::base::ExtCtxt;
use crate::errors;

pub(super) fn provide_derive_macro_expansion<'tcx>(
tcx: TyCtxt<'tcx>,
key: (LocalExpnId, Svh, &'tcx TokenStream),
) -> Result<&'tcx TokenStream, ()> {
let (invoc_id, _macro_crate_hash, input) = key;

let res = with_context(|(ecx, client)| {
let span = invoc_id.expn_data().call_site;
let _timer = ecx.sess.prof.generic_activity_with_arg_recorder(
"expand_derive_proc_macro_inner",
|recorder| {
recorder.record_arg_with_span(ecx.sess.source_map(), ecx.expansion_descr(), span);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ecx.expansion_descr() doesn't need ecx, it only needs invoc_id that is already included into the key.

},
);
let proc_macro_backtrace = ecx.ecfg.proc_macro_backtrace;
let strategy = crate::proc_macro::exec_strategy(ecx);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't need ecx, only session.

let server = crate::proc_macro_server::Rustc::new(ecx);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, Rustc does needs ecx and uses it in nontrivial ways, which may break macro caching.
This is basically a side channel through which data can flow to and from the macro without query system noticing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of the logic in Rustc::new doesn't need ecx though, except for storing ecx itself into the field of course.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is why I didn't bother replacing the other uses of ecx before, because I saw that this is tied pretty strongly to ecx. But I can probably take care of the replaceable uses in another commit, since it should be mostly independent I'd think.

let res = match client.run(&strategy, server, input.clone(), proc_macro_backtrace) {
// FIXME(pr-time): without flattened some (weird) tests fail, but no idea if it's correct/enough
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Flattening again?
Isn't it already flattened in compiler/rustc_expand/src/proc_macro.rs?

Ok(stream) => Ok(tcx.arena.alloc(stream.flattened()) as &TokenStream),
Err(e) => {
ecx.dcx().emit_err({
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anything that can use tcx instead of ecx should use tcx.
sess is available from tcx in particular.

errors::ProcMacroDerivePanicked {
span,
message: e.as_str().map(|message| errors::ProcMacroDerivePanickedHelp {
message: message.into(),
}),
}
});
Err(())
}
};
res
});

res
}

type CLIENT = pm::bridge::client::Client<pm::TokenStream, pm::TokenStream>;

// based on rust/compiler/rustc_middle/src/ty/context/tls.rs
thread_local! {
/// A thread local variable that stores a pointer to the current `CONTEXT`.
static TLV: Cell<(*mut (), Option<CLIENT>)> = const { Cell::new((ptr::null_mut(), None)) };
}

/// Sets `context` as the new current `CONTEXT` for the duration of the function `f`.
#[inline]
pub(crate) fn enter_context<'a, F, R>(context: (&mut ExtCtxt<'a>, CLIENT), f: F) -> R
where
F: FnOnce() -> R,
{
let (ectx, client) = context;
let erased = (ectx as *mut _ as *mut (), Some(client));
TLV.with(|tlv| {
let old = tlv.replace(erased);
let _reset = rustc_data_structures::defer(move || tlv.set(old));
f()
})
}

/// Allows access to the current `CONTEXT`.
/// Panics if there is no `CONTEXT` available.
#[inline]
#[track_caller]
fn with_context<F, R>(f: F) -> R
where
F: for<'a, 'b> FnOnce(&'b mut (&mut ExtCtxt<'a>, CLIENT)) -> R,
{
let (ectx, client_opt) = TLV.get();
let ectx = NonNull::new(ectx).expect("no CONTEXT stored in tls");

// We could get an `CONTEXT` pointer from another thread.
// Ensure that `CONTEXT` is `DynSync`.
// FIXME(pr-time): we should not be able to?
// sync::assert_dyn_sync::<CONTEXT<'_>>();

// prevent double entering, as that would allow creating two `&mut ExtCtxt`s
// FIXME(pr-time): probably use a RefCell instead (which checks this properly)?
TLV.with(|tlv| {
let old = tlv.replace((ptr::null_mut(), None));
let _reset = rustc_data_structures::defer(move || tlv.set(old));
let ectx = {
let mut casted = ectx.cast::<ExtCtxt<'_>>();
unsafe { casted.as_mut() }
};

f(&mut (ectx, client_opt.unwrap()))
})
}
5 changes: 5 additions & 0 deletions compiler/rustc_expand/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,15 @@ mod proc_macro_server;
pub use mbe::macro_rules::compile_declarative_macro;
pub mod base;
pub mod config;
mod derive_macro_expansion;
pub mod expand;
pub mod module;
// FIXME(Nilstrieb) Translate proc_macro diagnostics
#[allow(rustc::untranslatable_diagnostic)]
pub mod proc_macro;

pub fn provide(providers: &mut rustc_middle::util::Providers) {
providers.derive_macro_expansion = derive_macro_expansion::provide_derive_macro_expansion;
}

rustc_fluent_macro::fluent_messages! { "../messages.ftl" }
64 changes: 37 additions & 27 deletions compiler/rustc_expand/src/proc_macro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use rustc_ast as ast;
use rustc_ast::ptr::P;
use rustc_ast::tokenstream::TokenStream;
use rustc_errors::ErrorGuaranteed;
use rustc_middle::ty;
use rustc_parse::parser::{ForceCollect, Parser};
use rustc_session::config::ProcMacroExecutionStrategy;
use rustc_span::profiling::SpannedEventArgRecorder;
Expand Down Expand Up @@ -31,7 +32,7 @@ impl<T> pm::bridge::server::MessagePipe<T> for MessagePipe<T> {
}
}

fn exec_strategy(ecx: &ExtCtxt<'_>) -> impl pm::bridge::server::ExecutionStrategy {
pub fn exec_strategy(ecx: &ExtCtxt<'_>) -> impl pm::bridge::server::ExecutionStrategy {
pm::bridge::server::MaybeCrossThread::<MessagePipe<_>>::new(
ecx.sess.opts.unstable_opts.proc_macro_execution_strategy
== ProcMacroExecutionStrategy::CrossThread,
Expand Down Expand Up @@ -114,6 +115,13 @@ impl MultiItemModifier for DeriveProcMacro {
item: Annotatable,
_is_derive_const: bool,
) -> ExpandResult<Vec<Annotatable>, Annotatable> {
let _timer = ecx.sess.prof.generic_activity_with_arg_recorder(
"expand_derive_proc_macro_outer",
|recorder| {
recorder.record_arg_with_span(ecx.sess.source_map(), ecx.expansion_descr(), span);
},
);

// We need special handling for statement items
// (e.g. `fn foo() { #[derive(Debug)] struct Bar; }`)
let is_stmt = matches!(item, Annotatable::Stmt(..));
Expand All @@ -124,36 +132,38 @@ impl MultiItemModifier for DeriveProcMacro {
// altogether. See #73345.
crate::base::ann_pretty_printing_compatibility_hack(&item, &ecx.sess);
let input = item.to_tokens();
let stream = {
let _timer =
ecx.sess.prof.generic_activity_with_arg_recorder("expand_proc_macro", |recorder| {
recorder.record_arg_with_span(
ecx.sess.source_map(),
ecx.expansion_descr(),
span,
);
});
let proc_macro_backtrace = ecx.ecfg.proc_macro_backtrace;
let strategy = exec_strategy(ecx);
let server = proc_macro_server::Rustc::new(ecx);
match self.client.run(&strategy, server, input, proc_macro_backtrace) {
Ok(stream) => stream,
Err(e) => {
ecx.dcx().emit_err({
errors::ProcMacroDerivePanicked {
span,
message: e.as_str().map(|message| {
errors::ProcMacroDerivePanickedHelp { message: message.into() }
}),
}
});
return ExpandResult::Ready(vec![]);
let res = ty::tls::with(|tcx| {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't be necessary, ecx.resolver has tcx, it just needs to be exposed.

// FIXME(pr-time): without flattened some (weird) tests fail, but no idea if it's correct/enough
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory flattening should always be correct (proc macro APIs will perform flattening lazily when necessary), but in practice there may be issues with badly written proc macros that pretty-print token streams and then parse the result as a text.

let input = tcx.arena.alloc(input.flattened()) as &TokenStream;
let invoc_id = ecx.current_expansion.id;

// FIXME(pr-time): Just using the crate hash to notice when the proc-macro code has
// changed. How to *correctly* depend on exactly the macro definition?
// I.e., depending on the crate hash is just a HACK (and leaves garbage in the
// incremental compilation dir).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which garbage?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that the incremental cache entries with old crate hashes would lead to garbage in the incremental dir. But now that mention it, this seems like it's actually the normal case for incremental comp, so it should probably be covered by whatever mechanisms are already in play. Is that the case?

let macro_def_id = invoc_id.expn_data().macro_def_id.unwrap();
let proc_macro_crate_hash = tcx.crate_hash(macro_def_id.krate);

assert_eq!(invoc_id.expn_data().call_site, span);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Operations on global hygiene data like .expn_data() are relatively expensive and are better done once.


let res = crate::derive_macro_expansion::enter_context((ecx, self.client), move || {
let key = (invoc_id, proc_macro_crate_hash, input);
if tcx.sess.opts.unstable_opts.cache_all_derive_macros {
tcx.derive_macro_expansion(key).cloned()
} else {
crate::derive_macro_expansion::provide_derive_macro_expansion(tcx, key).cloned()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to setup any global context in this case, we could just pass arguments, the context only needs to be setup around the query call.

}
}
});

res
});
let Ok(output) = res else {
// error will already have been emitted
return ExpandResult::Ready(vec![]);
};

let error_count_before = ecx.dcx().err_count();
let mut parser = Parser::new(&ecx.sess.psess, stream, Some("proc-macro derive"));
let mut parser = Parser::new(&ecx.sess.psess, output, Some("proc-macro derive"));
let mut items = vec![];

loop {
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_interface/src/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,7 @@ pub static DEFAULT_QUERY_PROVIDERS: LazyLock<Providers> = LazyLock::new(|| {
providers.resolutions = |tcx, ()| tcx.resolver_for_lowering_raw(()).1;
providers.early_lint_checks = early_lint_checks;
proc_macro_decls::provide(providers);
rustc_expand::provide(providers);
rustc_const_eval::provide(providers);
rustc_middle::hir::provide(providers);
rustc_borrowck::provide(providers);
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_middle/src/arena.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ macro_rules! arena_types {
[decode] specialization_graph: rustc_middle::traits::specialization_graph::Graph,
[] crate_inherent_impls: rustc_middle::ty::CrateInherentImpls,
[] hir_owner_nodes: rustc_hir::OwnerNodes<'tcx>,
[decode] token_stream: rustc_ast::tokenstream::TokenStream,
]);
)
}
Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_middle/src/query/erase.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::intrinsics::transmute_unchecked;
use std::mem::MaybeUninit;

use rustc_ast::tokenstream::TokenStream;

use crate::query::CyclePlaceholder;
use crate::ty::adjustment::CoerceUnsizedInfo;
use crate::ty::{self, Ty};
Expand Down Expand Up @@ -172,6 +174,10 @@ impl EraseType for Result<ty::EarlyBinder<'_, Ty<'_>>, CyclePlaceholder> {
type Result = [u8; size_of::<Result<ty::EarlyBinder<'static, Ty<'_>>, CyclePlaceholder>>()];
}

impl EraseType for Result<&'_ TokenStream, ()> {
type Result = [u8; size_of::<Result<&'static TokenStream, ()>>()];
}

impl<T> EraseType for Option<&'_ T> {
type Result = [u8; size_of::<Option<&'static ()>>()];
}
Expand Down
Loading
Loading