-
Notifications
You must be signed in to change notification settings - Fork 12.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Auto merge of #100996 - m-ou-se:format-args-2, r=estebank
Rewrite and refactor format_args!() builtin macro. This is a near complete rewrite of `compiler/rustc_builtin_macros/src/format.rs`. This gets rid of the massive unmaintanable [`Context` struct](https://github.com/rust-lang/rust/blob/76531befc4b0352247ada67bd225e8cf71ee5686/compiler/rustc_builtin_macros/src/format.rs#L176-L263), and splits the macro expansion into three parts: 1. First, `parse_args` will parse the `(literal, arg, arg, name=arg, name=arg)` syntax, but doesn't parse the template (the literal) itself. 2. Second, `make_format_args` will parse the template, the format options, resolve argument references, produce diagnostics, and turn the whole thing into a `FormatArgs` structure. 3. Finally, `expand_parsed_format_args` will turn that `FormatArgs` structure into the expression that the macro expands to. In other words, the `format_args` builtin macro used to be a hard-to-maintain 'single pass compiler', which I've split into a three phase compiler with a parser/tokenizer (step 1), semantic analysis (step 2), and backend (step 3). (It's compilers all the way down. ^^) This can serve as a great starting point for #99012, which will only need to change the implementation of 3, while leaving step 1 and 2 unchanged. It also makes rust-lang/compiler-team#541 easier, which could then upgrade the new `FormatArgs` struct to an `ast` node and remove step 3, moving that step to later in the compilation process. It also fixes a few diagnostics bugs. This also [significantly reduces](https://gist.github.com/m-ou-se/b67b2d54172c4837a5ab1b26fa3e5284) the amount of generated code for cases with arguments in non-default order without formatting options, like `"{1} {0}"` or `"{a} {}"`, etc.
- Loading branch information
Showing
16 changed files
with
1,449 additions
and
1,540 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,240 @@ | ||
use rustc_ast::ptr::P; | ||
use rustc_ast::Expr; | ||
use rustc_data_structures::fx::FxHashMap; | ||
use rustc_span::symbol::{Ident, Symbol}; | ||
use rustc_span::Span; | ||
|
||
// Definitions: | ||
// | ||
// format_args!("hello {abc:.xyz$}!!", abc="world"); | ||
// └──────────────────────────────────────────────┘ | ||
// FormatArgs | ||
// | ||
// format_args!("hello {abc:.xyz$}!!", abc="world"); | ||
// └─────────┘ | ||
// argument | ||
// | ||
// format_args!("hello {abc:.xyz$}!!", abc="world"); | ||
// └───────────────────┘ | ||
// template | ||
// | ||
// format_args!("hello {abc:.xyz$}!!", abc="world"); | ||
// └────┘└─────────┘└┘ | ||
// pieces | ||
// | ||
// format_args!("hello {abc:.xyz$}!!", abc="world"); | ||
// └────┘ └┘ | ||
// literal pieces | ||
// | ||
// format_args!("hello {abc:.xyz$}!!", abc="world"); | ||
// └─────────┘ | ||
// placeholder | ||
// | ||
// format_args!("hello {abc:.xyz$}!!", abc="world"); | ||
// └─┘ └─┘ | ||
// positions (could be names, numbers, empty, or `*`) | ||
|
||
/// (Parsed) format args. | ||
/// | ||
/// Basically the "AST" for a complete `format_args!()`. | ||
/// | ||
/// E.g., `format_args!("hello {name}");`. | ||
#[derive(Clone, Debug)] | ||
pub struct FormatArgs { | ||
pub span: Span, | ||
pub template: Vec<FormatArgsPiece>, | ||
pub arguments: FormatArguments, | ||
} | ||
|
||
/// A piece of a format template string. | ||
/// | ||
/// E.g. "hello" or "{name}". | ||
#[derive(Clone, Debug)] | ||
pub enum FormatArgsPiece { | ||
Literal(Symbol), | ||
Placeholder(FormatPlaceholder), | ||
} | ||
|
||
/// The arguments to format_args!(). | ||
/// | ||
/// E.g. `1, 2, name="ferris", n=3`, | ||
/// but also implicit captured arguments like `x` in `format_args!("{x}")`. | ||
#[derive(Clone, Debug)] | ||
pub struct FormatArguments { | ||
arguments: Vec<FormatArgument>, | ||
num_unnamed_args: usize, | ||
num_explicit_args: usize, | ||
names: FxHashMap<Symbol, usize>, | ||
} | ||
|
||
impl FormatArguments { | ||
pub fn new() -> Self { | ||
Self { | ||
arguments: Vec::new(), | ||
names: FxHashMap::default(), | ||
num_unnamed_args: 0, | ||
num_explicit_args: 0, | ||
} | ||
} | ||
|
||
pub fn add(&mut self, arg: FormatArgument) -> usize { | ||
let index = self.arguments.len(); | ||
if let Some(name) = arg.kind.ident() { | ||
self.names.insert(name.name, index); | ||
} else if self.names.is_empty() { | ||
// Only count the unnamed args before the first named arg. | ||
// (Any later ones are errors.) | ||
self.num_unnamed_args += 1; | ||
} | ||
if !matches!(arg.kind, FormatArgumentKind::Captured(..)) { | ||
// This is an explicit argument. | ||
// Make sure that all arguments so far are explcit. | ||
assert_eq!( | ||
self.num_explicit_args, | ||
self.arguments.len(), | ||
"captured arguments must be added last" | ||
); | ||
self.num_explicit_args += 1; | ||
} | ||
self.arguments.push(arg); | ||
index | ||
} | ||
|
||
pub fn by_name(&self, name: Symbol) -> Option<(usize, &FormatArgument)> { | ||
let i = *self.names.get(&name)?; | ||
Some((i, &self.arguments[i])) | ||
} | ||
|
||
pub fn by_index(&self, i: usize) -> Option<&FormatArgument> { | ||
(i < self.num_explicit_args).then(|| &self.arguments[i]) | ||
} | ||
|
||
pub fn unnamed_args(&self) -> &[FormatArgument] { | ||
&self.arguments[..self.num_unnamed_args] | ||
} | ||
|
||
pub fn named_args(&self) -> &[FormatArgument] { | ||
&self.arguments[self.num_unnamed_args..self.num_explicit_args] | ||
} | ||
|
||
pub fn explicit_args(&self) -> &[FormatArgument] { | ||
&self.arguments[..self.num_explicit_args] | ||
} | ||
|
||
pub fn into_vec(self) -> Vec<FormatArgument> { | ||
self.arguments | ||
} | ||
} | ||
|
||
#[derive(Clone, Debug)] | ||
pub struct FormatArgument { | ||
pub kind: FormatArgumentKind, | ||
pub expr: P<Expr>, | ||
} | ||
|
||
#[derive(Clone, Debug)] | ||
pub enum FormatArgumentKind { | ||
/// `format_args(…, arg)` | ||
Normal, | ||
/// `format_args(…, arg = 1)` | ||
Named(Ident), | ||
/// `format_args("… {arg} …")` | ||
Captured(Ident), | ||
} | ||
|
||
impl FormatArgumentKind { | ||
pub fn ident(&self) -> Option<Ident> { | ||
match self { | ||
&Self::Normal => None, | ||
&Self::Named(id) => Some(id), | ||
&Self::Captured(id) => Some(id), | ||
} | ||
} | ||
} | ||
|
||
#[derive(Clone, Debug, PartialEq, Eq)] | ||
pub struct FormatPlaceholder { | ||
/// Index into [`FormatArgs::arguments`]. | ||
pub argument: FormatArgPosition, | ||
/// The span inside the format string for the full `{…}` placeholder. | ||
pub span: Option<Span>, | ||
/// `{}`, `{:?}`, or `{:x}`, etc. | ||
pub format_trait: FormatTrait, | ||
/// `{}` or `{:.5}` or `{:-^20}`, etc. | ||
pub format_options: FormatOptions, | ||
} | ||
|
||
#[derive(Clone, Debug, PartialEq, Eq)] | ||
pub struct FormatArgPosition { | ||
/// Which argument this position refers to (Ok), | ||
/// or would've referred to if it existed (Err). | ||
pub index: Result<usize, usize>, | ||
/// What kind of position this is. See [`FormatArgPositionKind`]. | ||
pub kind: FormatArgPositionKind, | ||
/// The span of the name or number. | ||
pub span: Option<Span>, | ||
} | ||
|
||
#[derive(Copy, Clone, Debug, PartialEq, Eq)] | ||
pub enum FormatArgPositionKind { | ||
/// `{}` or `{:.*}` | ||
Implicit, | ||
/// `{1}` or `{:1$}` or `{:.1$}` | ||
Number, | ||
/// `{a}` or `{:a$}` or `{:.a$}` | ||
Named, | ||
} | ||
|
||
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] | ||
pub enum FormatTrait { | ||
/// `{}` | ||
Display, | ||
/// `{:?}` | ||
Debug, | ||
/// `{:e}` | ||
LowerExp, | ||
/// `{:E}` | ||
UpperExp, | ||
/// `{:o}` | ||
Octal, | ||
/// `{:p}` | ||
Pointer, | ||
/// `{:b}` | ||
Binary, | ||
/// `{:x}` | ||
LowerHex, | ||
/// `{:X}` | ||
UpperHex, | ||
} | ||
|
||
#[derive(Clone, Debug, Default, PartialEq, Eq)] | ||
pub struct FormatOptions { | ||
/// The width. E.g. `{:5}` or `{:width$}`. | ||
pub width: Option<FormatCount>, | ||
/// The precision. E.g. `{:.5}` or `{:.precision$}`. | ||
pub precision: Option<FormatCount>, | ||
/// The alignment. E.g. `{:>}` or `{:<}` or `{:^}`. | ||
pub alignment: Option<FormatAlignment>, | ||
/// The fill character. E.g. the `.` in `{:.>10}`. | ||
pub fill: Option<char>, | ||
/// The `+`, `-`, `0`, `#`, `x?` and `X?` flags. | ||
pub flags: u32, | ||
} | ||
|
||
#[derive(Clone, Debug, PartialEq, Eq)] | ||
pub enum FormatAlignment { | ||
/// `{:<}` | ||
Left, | ||
/// `{:>}` | ||
Right, | ||
/// `{:^}` | ||
Center, | ||
} | ||
|
||
#[derive(Clone, Debug, PartialEq, Eq)] | ||
pub enum FormatCount { | ||
/// `{:5}` or `{:.5}` | ||
Literal(usize), | ||
/// `{:.*}`, `{:.5$}`, or `{:a$}`, etc. | ||
Argument(FormatArgPosition), | ||
} |
Oops, something went wrong.