From 28b125b83d9db4094a08b512a956c187bd29a51f Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Thu, 9 May 2019 02:00:29 +0300 Subject: [PATCH 1/7] Turn `ast::Lit` into a struct --- src/librustc/ich/impls_syntax.rs | 6 +++++- src/libsyntax/ast.rs | 6 +++++- src/libsyntax/attr/mod.rs | 12 ++++++------ src/libsyntax/ext/build.rs | 10 +++++----- src/libsyntax/parse/parser.rs | 7 +++---- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/librustc/ich/impls_syntax.rs b/src/librustc/ich/impls_syntax.rs index 35df43ef25efa..88a2c295a6ef6 100644 --- a/src/librustc/ich/impls_syntax.rs +++ b/src/librustc/ich/impls_syntax.rs @@ -162,7 +162,11 @@ impl_stable_hash_for!(enum ::syntax::ast::LitIntType { Unsuffixed }); -impl_stable_hash_for_spanned!(::syntax::ast::LitKind); +impl_stable_hash_for!(struct ::syntax::ast::Lit { + node, + span +}); + impl_stable_hash_for!(enum ::syntax::ast::LitKind { Str(value, style), Err(value), diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index af2302d24f521..783792cf1977a 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -1351,7 +1351,11 @@ pub enum StrStyle { } /// A literal. -pub type Lit = Spanned; +#[derive(Clone, RustcEncodable, RustcDecodable, Debug, Hash, PartialEq)] +pub struct Lit { + pub node: LitKind, + pub span: Span, +} #[derive(Clone, RustcEncodable, RustcDecodable, Debug, Copy, Hash, PartialEq)] pub enum LitIntType { diff --git a/src/libsyntax/attr/mod.rs b/src/libsyntax/attr/mod.rs index e00f91e395280..e331a26335443 100644 --- a/src/libsyntax/attr/mod.rs +++ b/src/libsyntax/attr/mod.rs @@ -16,7 +16,7 @@ use crate::ast::{AttrId, Attribute, AttrStyle, Name, Ident, Path, PathSegment}; use crate::ast::{MetaItem, MetaItemKind, NestedMetaItem}; use crate::ast::{Lit, LitKind, Expr, ExprKind, Item, Local, Stmt, StmtKind, GenericParam}; use crate::mut_visit::visit_clobber; -use crate::source_map::{BytePos, Spanned, respan, dummy_spanned}; +use crate::source_map::{BytePos, Spanned, dummy_spanned}; use crate::parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration}; use crate::parse::parser::Parser; use crate::parse::{self, ParseSess, PResult}; @@ -350,11 +350,11 @@ impl Attribute { /* Constructors */ pub fn mk_name_value_item_str(ident: Ident, value: Spanned) -> MetaItem { - let value = respan(value.span, LitKind::Str(value.node, ast::StrStyle::Cooked)); + let value = Lit { node: LitKind::Str(value.node, ast::StrStyle::Cooked), span: value.span }; mk_name_value_item(ident.span.to(value.span), ident, value) } -pub fn mk_name_value_item(span: Span, ident: Ident, value: ast::Lit) -> MetaItem { +pub fn mk_name_value_item(span: Span, ident: Ident, value: Lit) -> MetaItem { MetaItem { path: Path::from_ident(ident), span, node: MetaItemKind::NameValue(value) } } @@ -417,7 +417,7 @@ pub fn mk_spanned_attr_outer(sp: Span, id: AttrId, item: MetaItem) -> Attribute pub fn mk_sugared_doc_attr(id: AttrId, text: Symbol, span: Span) -> Attribute { let style = doc_comment_style(&text.as_str()); - let lit = respan(span, LitKind::Str(text, ast::StrStyle::Cooked)); + let lit = Lit { node: LitKind::Str(text, ast::StrStyle::Cooked), span }; Attribute { id, style, @@ -562,7 +562,7 @@ impl MetaItemKind { tokens.next(); return if let Some(TokenTree::Token(span, token)) = tokens.next() { LitKind::from_token(token) - .map(|lit| MetaItemKind::NameValue(Spanned { node: lit, span: span })) + .map(|node| MetaItemKind::NameValue(Lit { node, span })) } else { None }; @@ -609,7 +609,7 @@ impl NestedMetaItem { if let Some(TokenTree::Token(span, token)) = tokens.peek().cloned() { if let Some(node) = LitKind::from_token(token) { tokens.next(); - return Some(NestedMetaItem::Literal(respan(span, node))); + return Some(NestedMetaItem::Literal(Lit { node, span })); } } diff --git a/src/libsyntax/ext/build.rs b/src/libsyntax/ext/build.rs index 40dd187ed28a7..0fe85361b54b1 100644 --- a/src/libsyntax/ext/build.rs +++ b/src/libsyntax/ext/build.rs @@ -697,8 +697,8 @@ impl<'a> AstBuilder for ExtCtxt<'a> { self.expr_struct(span, self.path_ident(span, id), fields) } - fn expr_lit(&self, sp: Span, lit: ast::LitKind) -> P { - self.expr(sp, ast::ExprKind::Lit(respan(sp, lit))) + fn expr_lit(&self, span: Span, node: ast::LitKind) -> P { + self.expr(span, ast::ExprKind::Lit(ast::Lit { node, span })) } fn expr_usize(&self, span: Span, i: usize) -> P { self.expr_lit(span, ast::LitKind::Int(i as u128, @@ -1164,10 +1164,10 @@ impl<'a> AstBuilder for ExtCtxt<'a> { attr::mk_list_item(sp, Ident::with_empty_ctxt(name).with_span_pos(sp), mis) } - fn meta_name_value(&self, sp: Span, name: ast::Name, value: ast::LitKind) + fn meta_name_value(&self, span: Span, name: ast::Name, node: ast::LitKind) -> ast::MetaItem { - attr::mk_name_value_item(sp, Ident::with_empty_ctxt(name).with_span_pos(sp), - respan(sp, value)) + attr::mk_name_value_item(span, Ident::with_empty_ctxt(name).with_span_pos(span), + ast::Lit { node, span }) } fn item_use(&self, sp: Span, diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index d97d1e2f0f4ee..2b30d2db95e01 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -2140,15 +2140,14 @@ impl<'a> Parser<'a> { /// Matches `lit = true | false | token_lit`. crate fn parse_lit(&mut self) -> PResult<'a, Lit> { let lo = self.span; - let lit = if self.eat_keyword(keywords::True) { + let node = if self.eat_keyword(keywords::True) { LitKind::Bool(true) } else if self.eat_keyword(keywords::False) { LitKind::Bool(false) } else { - let lit = self.parse_lit_token()?; - lit + self.parse_lit_token()? }; - Ok(source_map::Spanned { node: lit, span: lo.to(self.prev_span) }) + Ok(Lit { node, span: lo.to(self.prev_span) }) } /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). From f2834a403abe78f56d750a302807eab5206bb2c5 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Thu, 9 May 2019 02:17:32 +0300 Subject: [PATCH 2/7] Keep the original token in `ast::Lit` --- src/librustc/hir/mod.rs | 2 +- src/librustc/ich/impls_syntax.rs | 33 ++++++++--------- src/librustdoc/html/highlight.rs | 2 ++ src/libsyntax/ast.rs | 3 ++ src/libsyntax/attr/mod.rs | 62 +++++++++++++++++++------------- src/libsyntax/ext/build.rs | 6 ++-- src/libsyntax/parse/mod.rs | 1 + src/libsyntax/parse/parser.rs | 30 ++++++++-------- src/libsyntax/parse/token.rs | 5 +++ src/libsyntax/print/pprust.rs | 1 + 10 files changed, 86 insertions(+), 59 deletions(-) diff --git a/src/librustc/hir/mod.rs b/src/librustc/hir/mod.rs index 9f7fa6c5557ef..a59322bbe4da9 100644 --- a/src/librustc/hir/mod.rs +++ b/src/librustc/hir/mod.rs @@ -1353,7 +1353,7 @@ pub struct Expr { // `Expr` is used a lot. Make sure it doesn't unintentionally get bigger. #[cfg(target_arch = "x86_64")] -static_assert!(MEM_SIZE_OF_EXPR: std::mem::size_of::() == 72); +static_assert!(MEM_SIZE_OF_EXPR: std::mem::size_of::() == 80); impl Expr { pub fn precedence(&self) -> ExprPrecedence { diff --git a/src/librustc/ich/impls_syntax.rs b/src/librustc/ich/impls_syntax.rs index 88a2c295a6ef6..7ff546b7467da 100644 --- a/src/librustc/ich/impls_syntax.rs +++ b/src/librustc/ich/impls_syntax.rs @@ -164,6 +164,8 @@ impl_stable_hash_for!(enum ::syntax::ast::LitIntType { impl_stable_hash_for!(struct ::syntax::ast::Lit { node, + token, + suffix, span }); @@ -284,6 +286,19 @@ for tokenstream::TokenStream { } } +impl_stable_hash_for!(enum token::Lit { + Bool(val), + Byte(val), + Char(val), + Err(val), + Integer(val), + Float(val), + Str_(val), + ByteStr(val), + StrRaw(val, n), + ByteStrRaw(val, n) +}); + fn hash_token<'a, 'gcx, W: StableHasherResult>( token: &token::Token, hcx: &mut StableHashingContext<'a>, @@ -331,22 +346,8 @@ fn hash_token<'a, 'gcx, W: StableHasherResult>( token::Token::CloseDelim(delim_token) => { std_hash::Hash::hash(&delim_token, hasher); } - token::Token::Literal(ref lit, ref opt_name) => { - mem::discriminant(lit).hash_stable(hcx, hasher); - match *lit { - token::Lit::Byte(val) | - token::Lit::Char(val) | - token::Lit::Err(val) | - token::Lit::Integer(val) | - token::Lit::Float(val) | - token::Lit::Str_(val) | - token::Lit::ByteStr(val) => val.hash_stable(hcx, hasher), - token::Lit::StrRaw(val, n) | - token::Lit::ByteStrRaw(val, n) => { - val.hash_stable(hcx, hasher); - n.hash_stable(hcx, hasher); - } - }; + token::Token::Literal(lit, opt_name) => { + lit.hash_stable(hcx, hasher); opt_name.hash_stable(hcx, hasher); } diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index d66455f91ba1a..5bb06516ac49e 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -318,6 +318,8 @@ impl<'a> Classifier<'a> { // Number literals. token::Integer(..) | token::Float(..) => Class::Number, + + token::Bool(..) => panic!("literal token contains `Lit::Bool`"), } } diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 783792cf1977a..04bc146e145ec 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -6,6 +6,7 @@ pub use crate::symbol::{Ident, Symbol as Name}; pub use crate::util::parser::ExprPrecedence; use crate::ext::hygiene::{Mark, SyntaxContext}; +use crate::parse::token; use crate::print::pprust; use crate::ptr::P; use crate::source_map::{dummy_spanned, respan, Spanned}; @@ -1354,6 +1355,8 @@ pub enum StrStyle { #[derive(Clone, RustcEncodable, RustcDecodable, Debug, Hash, PartialEq)] pub struct Lit { pub node: LitKind, + pub token: token::Lit, + pub suffix: Option, pub span: Span, } diff --git a/src/libsyntax/attr/mod.rs b/src/libsyntax/attr/mod.rs index e331a26335443..2b87458108336 100644 --- a/src/libsyntax/attr/mod.rs +++ b/src/libsyntax/attr/mod.rs @@ -350,7 +350,9 @@ impl Attribute { /* Constructors */ pub fn mk_name_value_item_str(ident: Ident, value: Spanned) -> MetaItem { - let value = Lit { node: LitKind::Str(value.node, ast::StrStyle::Cooked), span: value.span }; + let node = LitKind::Str(value.node, ast::StrStyle::Cooked); + let (token, suffix) = node.lit_token(); + let value = Lit { node, token, suffix, span: value.span }; mk_name_value_item(ident.span.to(value.span), ident, value) } @@ -417,7 +419,9 @@ pub fn mk_spanned_attr_outer(sp: Span, id: AttrId, item: MetaItem) -> Attribute pub fn mk_sugared_doc_attr(id: AttrId, text: Symbol, span: Span) -> Attribute { let style = doc_comment_style(&text.as_str()); - let lit = Lit { node: LitKind::Str(text, ast::StrStyle::Cooked), span }; + let node = LitKind::Str(text, ast::StrStyle::Cooked); + let (token, suffix) = node.lit_token(); + let lit = Lit { node, token, suffix, span }; Attribute { id, style, @@ -562,7 +566,7 @@ impl MetaItemKind { tokens.next(); return if let Some(TokenTree::Token(span, token)) = tokens.next() { LitKind::from_token(token) - .map(|node| MetaItemKind::NameValue(Lit { node, span })) + .map(|(node, token, suffix)| MetaItemKind::NameValue(Lit { node, token, suffix, span })) } else { None }; @@ -607,9 +611,9 @@ impl NestedMetaItem { where I: Iterator, { if let Some(TokenTree::Token(span, token)) = tokens.peek().cloned() { - if let Some(node) = LitKind::from_token(token) { + if let Some((node, token, suffix)) = LitKind::from_token(token) { tokens.next(); - return Some(NestedMetaItem::Literal(Lit { node, span })); + return Some(NestedMetaItem::Literal(Lit { node, token, suffix, span })); } } @@ -625,28 +629,35 @@ impl Lit { impl LitKind { fn token(&self) -> Token { + match self.lit_token() { + (token::Bool(symbol), _) => Token::Ident(Ident::with_empty_ctxt(symbol), false), + (lit, suffix) => Token::Literal(lit, suffix), + } + } + + pub(crate) fn lit_token(&self) -> (token::Lit, Option) { use std::ascii; match *self { LitKind::Str(string, ast::StrStyle::Cooked) => { let escaped = string.as_str().escape_default().to_string(); - Token::Literal(token::Lit::Str_(Symbol::intern(&escaped)), None) + (token::Lit::Str_(Symbol::intern(&escaped)), None) } LitKind::Str(string, ast::StrStyle::Raw(n)) => { - Token::Literal(token::Lit::StrRaw(string, n), None) + (token::Lit::StrRaw(string, n), None) } LitKind::ByteStr(ref bytes) => { let string = bytes.iter().cloned().flat_map(ascii::escape_default) .map(Into::::into).collect::(); - Token::Literal(token::Lit::ByteStr(Symbol::intern(&string)), None) + (token::Lit::ByteStr(Symbol::intern(&string)), None) } LitKind::Byte(byte) => { let string: String = ascii::escape_default(byte).map(Into::::into).collect(); - Token::Literal(token::Lit::Byte(Symbol::intern(&string)), None) + (token::Lit::Byte(Symbol::intern(&string)), None) } LitKind::Char(ch) => { let string: String = ch.escape_default().map(Into::::into).collect(); - Token::Literal(token::Lit::Char(Symbol::intern(&string)), None) + (token::Lit::Char(Symbol::intern(&string)), None) } LitKind::Int(n, ty) => { let suffix = match ty { @@ -654,38 +665,39 @@ impl LitKind { ast::LitIntType::Signed(ty) => Some(Symbol::intern(ty.ty_to_string())), ast::LitIntType::Unsuffixed => None, }; - Token::Literal(token::Lit::Integer(Symbol::intern(&n.to_string())), suffix) + (token::Lit::Integer(Symbol::intern(&n.to_string())), suffix) } LitKind::Float(symbol, ty) => { - Token::Literal(token::Lit::Float(symbol), Some(Symbol::intern(ty.ty_to_string()))) + (token::Lit::Float(symbol), Some(Symbol::intern(ty.ty_to_string()))) } - LitKind::FloatUnsuffixed(symbol) => Token::Literal(token::Lit::Float(symbol), None), - LitKind::Bool(value) => Token::Ident(Ident::with_empty_ctxt(Symbol::intern(if value { - "true" - } else { - "false" - })), false), - LitKind::Err(val) => Token::Literal(token::Lit::Err(val), None), + LitKind::FloatUnsuffixed(symbol) => (token::Lit::Float(symbol), None), + LitKind::Bool(value) => { + let kw = if value { keywords::True } else { keywords::False }; + (token::Lit::Bool(kw.name()), None) + } + LitKind::Err(val) => (token::Lit::Err(val), None), } } - fn from_token(token: Token) -> Option { + fn from_token(token: Token) -> Option<(LitKind, token::Lit, Option)> { match token { - Token::Ident(ident, false) if ident.name == "true" => Some(LitKind::Bool(true)), - Token::Ident(ident, false) if ident.name == "false" => Some(LitKind::Bool(false)), + Token::Ident(ident, false) if ident.name == keywords::True.name() => + Some((LitKind::Bool(true), token::Bool(ident.name), None)), + Token::Ident(ident, false) if ident.name == keywords::False.name() => + Some((LitKind::Bool(false), token::Bool(ident.name), None)), Token::Interpolated(nt) => match *nt { token::NtExpr(ref v) | token::NtLiteral(ref v) => match v.node { - ExprKind::Lit(ref lit) => Some(lit.node.clone()), + ExprKind::Lit(ref lit) => Some((lit.node.clone(), lit.token, lit.suffix)), _ => None, }, _ => None, }, Token::Literal(lit, suf) => { let (suffix_illegal, result) = parse::lit_token(lit, suf, None); - if suffix_illegal && suf.is_some() { + if result.is_none() || suffix_illegal && suf.is_some() { return None; } - result + Some((result.unwrap(), lit, suf)) } _ => None, } diff --git a/src/libsyntax/ext/build.rs b/src/libsyntax/ext/build.rs index 0fe85361b54b1..0f9977e85038b 100644 --- a/src/libsyntax/ext/build.rs +++ b/src/libsyntax/ext/build.rs @@ -698,7 +698,8 @@ impl<'a> AstBuilder for ExtCtxt<'a> { } fn expr_lit(&self, span: Span, node: ast::LitKind) -> P { - self.expr(span, ast::ExprKind::Lit(ast::Lit { node, span })) + let (token, suffix) = node.lit_token(); + self.expr(span, ast::ExprKind::Lit(ast::Lit { node, token, suffix, span })) } fn expr_usize(&self, span: Span, i: usize) -> P { self.expr_lit(span, ast::LitKind::Int(i as u128, @@ -1166,8 +1167,9 @@ impl<'a> AstBuilder for ExtCtxt<'a> { fn meta_name_value(&self, span: Span, name: ast::Name, node: ast::LitKind) -> ast::MetaItem { + let (token, suffix) = node.lit_token(); attr::mk_name_value_item(span, Ident::with_empty_ctxt(name).with_span_pos(span), - ast::Lit { node, span }) + ast::Lit { node, token, suffix, span }) } fn item_use(&self, sp: Span, diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index be44b964ba5a7..4d4e99009a9d2 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -376,6 +376,7 @@ crate fn lit_token(lit: token::Lit, suf: Option, diag: Option<(Span, &Ha use ast::LitKind; match lit { + token::Bool(_) => panic!("literal token contains `Lit::Bool`"), token::Byte(i) => { let lit_kind = match unescape_byte(&i.as_str()) { Ok(c) => LitKind::Byte(c), diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 2b30d2db95e01..b988cb1447df7 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -2070,11 +2070,11 @@ impl<'a> Parser<'a> { } /// Matches `token_lit = LIT_INTEGER | ...`. - fn parse_lit_token(&mut self) -> PResult<'a, LitKind> { + fn parse_lit_token(&mut self) -> PResult<'a, (LitKind, token::Lit, Option)> { let out = match self.token { token::Interpolated(ref nt) => match **nt { token::NtExpr(ref v) | token::NtLiteral(ref v) => match v.node { - ExprKind::Lit(ref lit) => { lit.node.clone() } + ExprKind::Lit(ref lit) => { (lit.node.clone(), lit.token, lit.suffix) } _ => { return self.unexpected_last(&self.token); } }, _ => { return self.unexpected_last(&self.token); } @@ -2088,19 +2088,19 @@ impl<'a> Parser<'a> { self.expect_no_suffix(sp, &format!("a {}", lit.literal_name()), suf) } - result.unwrap() + (result.unwrap(), lit, suf) } token::Dot if self.look_ahead(1, |t| match t { - token::Literal(parse::token::Lit::Integer(_) , _) => true, + token::Literal(token::Lit::Integer(_) , _) => true, _ => false, }) => { // recover from `let x = .4;` let lo = self.span; self.bump(); if let token::Literal( - parse::token::Lit::Integer(val), + token::Lit::Integer(val), suffix, ) = self.token { - let suffix = suffix.and_then(|s| { + let float_suffix = suffix.and_then(|s| { let s = s.as_str(); if s == "f32" { Some("f32") @@ -2117,14 +2117,14 @@ impl<'a> Parser<'a> { err.span_suggestion( sp, "must have an integer part", - format!("0.{}{}", val, suffix), + format!("0.{}{}", val, float_suffix), Applicability::MachineApplicable, ); err.emit(); - return Ok(match suffix { - "f32" => ast::LitKind::Float(val, ast::FloatTy::F32), - "f64" => ast::LitKind::Float(val, ast::FloatTy::F64), - _ => ast::LitKind::FloatUnsuffixed(val), + return Ok(match float_suffix { + "f32" => (ast::LitKind::Float(val, ast::FloatTy::F32), token::Float(val), suffix), + "f64" => (ast::LitKind::Float(val, ast::FloatTy::F64), token::Float(val), suffix), + _ => (ast::LitKind::FloatUnsuffixed(val), token::Float(val), suffix), }); } else { unreachable!(); @@ -2140,14 +2140,14 @@ impl<'a> Parser<'a> { /// Matches `lit = true | false | token_lit`. crate fn parse_lit(&mut self) -> PResult<'a, Lit> { let lo = self.span; - let node = if self.eat_keyword(keywords::True) { - LitKind::Bool(true) + let (node, token, suffix) = if self.eat_keyword(keywords::True) { + (LitKind::Bool(true), token::Bool(keywords::True.name()), None) } else if self.eat_keyword(keywords::False) { - LitKind::Bool(false) + (LitKind::Bool(false), token::Bool(keywords::False.name()), None) } else { self.parse_lit_token()? }; - Ok(Lit { node, span: lo.to(self.prev_span) }) + Ok(Lit { node, token, suffix, span: lo.to(self.prev_span) }) } /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index fd7a39c576daa..48a949257ffb3 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -61,6 +61,7 @@ impl DelimToken { #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)] pub enum Lit { + Bool(ast::Name), // AST only, must never appear in a `Token` Byte(ast::Name), Char(ast::Name), Err(ast::Name), @@ -72,9 +73,13 @@ pub enum Lit { ByteStrRaw(ast::Name, u16), /* raw byte str delimited by n hash symbols */ } +#[cfg(target_arch = "x86_64")] +static_assert!(MEM_SIZE_OF_LIT: mem::size_of::() == 8); + impl Lit { crate fn literal_name(&self) -> &'static str { match *self { + Bool(_) => panic!("literal token contains `Lit::Bool`"), Byte(_) => "byte literal", Char(_) => "char literal", Err(_) => "invalid literal", diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index 682621d40ab65..0e93b857708f7 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -225,6 +225,7 @@ pub fn token_to_string(tok: &Token) -> String { /* Literals */ token::Literal(lit, suf) => { let mut out = match lit { + token::Bool(_) => panic!("literal token contains `Lit::Bool`"), token::Byte(b) => format!("b'{}'", b), token::Char(c) => format!("'{}'", c), token::Err(c) => format!("'{}'", c), From 751ae5af1a561a93ce61e9971be4b254a57e6576 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Thu, 9 May 2019 18:04:24 +0300 Subject: [PATCH 3/7] Introduce `hir::Lit` not keeping the original token --- src/librustc/hir/lowering.rs | 2 +- src/librustc/hir/mod.rs | 7 ++-- src/librustc/hir/print.rs | 59 ++++++++++++++++++++++++++++++++ src/librustc/ich/impls_syntax.rs | 2 ++ src/librustc_lint/types.rs | 10 +++--- src/librustc_typeck/check/mod.rs | 2 +- src/libsyntax/ast.rs | 2 +- 7 files changed, 74 insertions(+), 10 deletions(-) diff --git a/src/librustc/hir/lowering.rs b/src/librustc/hir/lowering.rs index 7ccfb826e37b7..20a2a1fd49bde 100644 --- a/src/librustc/hir/lowering.rs +++ b/src/librustc/hir/lowering.rs @@ -4100,7 +4100,7 @@ impl<'a> LoweringContext<'a> { let ohs = P(self.lower_expr(ohs)); hir::ExprKind::Unary(op, ohs) } - ExprKind::Lit(ref l) => hir::ExprKind::Lit((*l).clone()), + ExprKind::Lit(ref l) => hir::ExprKind::Lit(respan(l.span, l.node.clone())), ExprKind::Cast(ref expr, ref ty) => { let expr = P(self.lower_expr(expr)); hir::ExprKind::Cast(expr, self.lower_ty(ty, ImplTraitContext::disallowed())) diff --git a/src/librustc/hir/mod.rs b/src/librustc/hir/mod.rs index a59322bbe4da9..d645898692806 100644 --- a/src/librustc/hir/mod.rs +++ b/src/librustc/hir/mod.rs @@ -20,7 +20,7 @@ use syntax_pos::{Span, DUMMY_SP, symbol::InternedString}; use syntax::source_map::Spanned; use rustc_target::spec::abi::Abi; use syntax::ast::{self, CrateSugar, Ident, Name, NodeId, AsmDialect}; -use syntax::ast::{Attribute, Label, Lit, StrStyle, FloatTy, IntTy, UintTy}; +use syntax::ast::{Attribute, Label, LitKind, StrStyle, FloatTy, IntTy, UintTy}; use syntax::attr::{InlineAttr, OptimizeAttr}; use syntax::ext::hygiene::SyntaxContext; use syntax::ptr::P; @@ -1331,6 +1331,9 @@ impl BodyOwnerKind { } } +/// A literal. +pub type Lit = Spanned; + /// A constant (expression) that's not an item or associated item, /// but needs its own `DefId` for type-checking, const-eval, etc. /// These are usually found nested inside types (e.g., array lengths) @@ -1353,7 +1356,7 @@ pub struct Expr { // `Expr` is used a lot. Make sure it doesn't unintentionally get bigger. #[cfg(target_arch = "x86_64")] -static_assert!(MEM_SIZE_OF_EXPR: std::mem::size_of::() == 80); +static_assert!(MEM_SIZE_OF_EXPR: std::mem::size_of::() == 72); impl Expr { pub fn precedence(&self) -> ExprPrecedence { diff --git a/src/librustc/hir/print.rs b/src/librustc/hir/print.rs index c42d8f3cb3c36..b7260abb521ac 100644 --- a/src/librustc/hir/print.rs +++ b/src/librustc/hir/print.rs @@ -15,6 +15,7 @@ use crate::hir; use crate::hir::{PatKind, GenericBound, TraitBoundModifier, RangeEnd}; use crate::hir::{GenericParam, GenericParamKind, GenericArg}; +use std::ascii; use std::borrow::Cow; use std::cell::Cell; use std::io::{self, Write, Read}; @@ -1276,6 +1277,64 @@ impl<'a> State<'a> { self.print_expr_maybe_paren(expr, parser::PREC_PREFIX) } + fn print_literal(&mut self, lit: &hir::Lit) -> io::Result<()> { + self.maybe_print_comment(lit.span.lo())?; + if let Some(ltrl) = self.next_lit(lit.span.lo()) { + return self.writer().word(ltrl.lit.clone()); + } + match lit.node { + hir::LitKind::Str(st, style) => self.print_string(&st.as_str(), style), + hir::LitKind::Err(st) => { + let st = st.as_str().escape_debug().to_string(); + let mut res = String::with_capacity(st.len() + 2); + res.push('\''); + res.push_str(&st); + res.push('\''); + self.writer().word(res) + } + hir::LitKind::Byte(byte) => { + let mut res = String::from("b'"); + res.extend(ascii::escape_default(byte).map(|c| c as char)); + res.push('\''); + self.writer().word(res) + } + hir::LitKind::Char(ch) => { + let mut res = String::from("'"); + res.extend(ch.escape_default()); + res.push('\''); + self.writer().word(res) + } + hir::LitKind::Int(i, t) => { + match t { + ast::LitIntType::Signed(st) => { + self.writer().word(st.val_to_string(i as i128)) + } + ast::LitIntType::Unsigned(ut) => { + self.writer().word(ut.val_to_string(i)) + } + ast::LitIntType::Unsuffixed => { + self.writer().word(i.to_string()) + } + } + } + hir::LitKind::Float(ref f, t) => { + self.writer().word(format!("{}{}", &f, t.ty_to_string())) + } + hir::LitKind::FloatUnsuffixed(ref f) => self.writer().word(f.as_str().to_string()), + hir::LitKind::Bool(val) => { + if val { self.writer().word("true") } else { self.writer().word("false") } + } + hir::LitKind::ByteStr(ref v) => { + let mut escaped: String = String::new(); + for &ch in v.iter() { + escaped.extend(ascii::escape_default(ch) + .map(|c| c as char)); + } + self.writer().word(format!("b\"{}\"", escaped)) + } + } + } + pub fn print_expr(&mut self, expr: &hir::Expr) -> io::Result<()> { self.maybe_print_comment(expr.span.lo())?; self.print_outer_attributes(&expr.attrs)?; diff --git a/src/librustc/ich/impls_syntax.rs b/src/librustc/ich/impls_syntax.rs index 7ff546b7467da..4e5718cc5ef2a 100644 --- a/src/librustc/ich/impls_syntax.rs +++ b/src/librustc/ich/impls_syntax.rs @@ -181,6 +181,8 @@ impl_stable_hash_for!(enum ::syntax::ast::LitKind { Bool(value) }); +impl_stable_hash_for_spanned!(::syntax::ast::LitKind); + impl_stable_hash_for!(enum ::syntax::ast::IntTy { Isize, I8, I16, I32, I64, I128 }); impl_stable_hash_for!(enum ::syntax::ast::UintTy { Usize, U8, U16, U32, U64, U128 }); impl_stable_hash_for!(enum ::syntax::ast::FloatTy { F32, F64 }); diff --git a/src/librustc_lint/types.rs b/src/librustc_lint/types.rs index f4ebfd79fe1db..38b6e2c197939 100644 --- a/src/librustc_lint/types.rs +++ b/src/librustc_lint/types.rs @@ -62,7 +62,7 @@ impl TypeLimits { /// Returns `true` iff the lint was overridden. fn lint_overflowing_range_endpoint<'a, 'tcx>( cx: &LateContext<'a, 'tcx>, - lit: &ast::Lit, + lit: &hir::Lit, lit_val: u128, max: u128, expr: &'tcx hir::Expr, @@ -132,7 +132,7 @@ fn uint_ty_range(uint_ty: ast::UintTy) -> (u128, u128) { } } -fn get_bin_hex_repr(cx: &LateContext<'_, '_>, lit: &ast::Lit) -> Option { +fn get_bin_hex_repr(cx: &LateContext<'_, '_>, lit: &hir::Lit) -> Option { let src = cx.sess().source_map().span_to_snippet(lit.span).ok()?; let firstch = src.chars().next()?; @@ -249,7 +249,7 @@ fn lint_int_literal<'a, 'tcx>( cx: &LateContext<'a, 'tcx>, type_limits: &TypeLimits, e: &'tcx hir::Expr, - lit: &ast::Lit, + lit: &hir::Lit, t: ast::IntTy, v: u128, ) { @@ -301,7 +301,7 @@ fn lint_int_literal<'a, 'tcx>( fn lint_uint_literal<'a, 'tcx>( cx: &LateContext<'a, 'tcx>, e: &'tcx hir::Expr, - lit: &ast::Lit, + lit: &hir::Lit, t: ast::UintTy, ) { let uint_type = if let ast::UintTy::Usize = t { @@ -363,7 +363,7 @@ fn lint_literal<'a, 'tcx>( cx: &LateContext<'a, 'tcx>, type_limits: &TypeLimits, e: &'tcx hir::Expr, - lit: &ast::Lit, + lit: &hir::Lit, ) { match cx.tables.node_type(e.hir_id).sty { ty::Int(t) => { diff --git a/src/librustc_typeck/check/mod.rs b/src/librustc_typeck/check/mod.rs index a32745f27e1a0..dc73ada1506c8 100644 --- a/src/librustc_typeck/check/mod.rs +++ b/src/librustc_typeck/check/mod.rs @@ -3083,7 +3083,7 @@ impl<'a, 'gcx, 'tcx> FnCtxt<'a, 'gcx, 'tcx> { // AST fragment checking fn check_lit(&self, - lit: &ast::Lit, + lit: &hir::Lit, expected: Expectation<'tcx>) -> Ty<'tcx> { diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 04bc146e145ec..a188f1a936890 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -1352,7 +1352,7 @@ pub enum StrStyle { } /// A literal. -#[derive(Clone, RustcEncodable, RustcDecodable, Debug, Hash, PartialEq)] +#[derive(Clone, RustcEncodable, RustcDecodable, Debug)] pub struct Lit { pub node: LitKind, pub token: token::Lit, From a5b3f33cb90bf991342afa552bcd993e36f80fa7 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Thu, 9 May 2019 19:04:04 +0300 Subject: [PATCH 4/7] Eliminate `comments::Literal` --- src/librustc/hir/print.rs | 45 ++------ src/librustc_driver/pretty.rs | 6 +- src/librustdoc/clean/cfg.rs | 16 +-- src/libsyntax/attr/mod.rs | 7 +- src/libsyntax/parse/lexer/comments.rs | 29 +---- src/libsyntax/parse/parser.rs | 10 +- src/libsyntax/print/pprust.rs | 158 ++++++-------------------- 7 files changed, 61 insertions(+), 210 deletions(-) diff --git a/src/librustc/hir/print.rs b/src/librustc/hir/print.rs index b7260abb521ac..6d48ad94a6677 100644 --- a/src/librustc/hir/print.rs +++ b/src/librustc/hir/print.rs @@ -19,7 +19,6 @@ use std::ascii; use std::borrow::Cow; use std::cell::Cell; use std::io::{self, Write, Read}; -use std::iter::Peekable; use std::vec; pub enum AnnNode<'a> { @@ -77,7 +76,6 @@ pub struct State<'a> { pub s: pp::Printer<'a>, cm: Option<&'a SourceMap>, comments: Option>, - literals: Peekable>, cur_cmnt: usize, boxes: Vec, ann: &'a (dyn PpAnn + 'a), @@ -99,14 +97,6 @@ impl<'a> PrintState<'a> for State<'a> { fn cur_cmnt(&mut self) -> &mut usize { &mut self.cur_cmnt } - - fn cur_lit(&mut self) -> Option<&comments::Literal> { - self.literals.peek() - } - - fn bump_lit(&mut self) -> Option { - self.literals.next() - } } #[allow(non_upper_case_globals)] @@ -117,18 +107,16 @@ pub const default_columns: usize = 78; /// Requires you to pass an input filename and reader so that -/// it can scan the input text for comments and literals to -/// copy forward. +/// it can scan the input text for comments to copy forward. pub fn print_crate<'a>(cm: &'a SourceMap, sess: &ParseSess, krate: &hir::Crate, filename: FileName, input: &mut dyn Read, out: Box, - ann: &'a dyn PpAnn, - is_expanded: bool) + ann: &'a dyn PpAnn) -> io::Result<()> { - let mut s = State::new_from_input(cm, sess, filename, input, out, ann, is_expanded); + let mut s = State::new_from_input(cm, sess, filename, input, out, ann); // When printing the AST, we sometimes need to inject `#[no_std]` here. // Since you can't compile the HIR, it's not necessary. @@ -144,36 +132,21 @@ impl<'a> State<'a> { filename: FileName, input: &mut dyn Read, out: Box, - ann: &'a dyn PpAnn, - is_expanded: bool) + ann: &'a dyn PpAnn) -> State<'a> { - let (cmnts, lits) = comments::gather_comments_and_literals(sess, filename, input); - - State::new(cm, - out, - ann, - Some(cmnts), - // If the code is post expansion, don't use the table of - // literals, since it doesn't correspond with the literals - // in the AST anymore. - if is_expanded { - None - } else { - Some(lits) - }) + let comments = comments::gather_comments(sess, filename, input); + State::new(cm, out, ann, Some(comments)) } pub fn new(cm: &'a SourceMap, out: Box, ann: &'a dyn PpAnn, - comments: Option>, - literals: Option>) + comments: Option>) -> State<'a> { State { s: pp::mk_printer(out, default_columns), cm: Some(cm), comments, - literals: literals.unwrap_or_default().into_iter().peekable(), cur_cmnt: 0, boxes: Vec::new(), ann, @@ -190,7 +163,6 @@ pub fn to_string(ann: &dyn PpAnn, f: F) -> String s: pp::mk_printer(Box::new(&mut wr), default_columns), cm: None, comments: None, - literals: vec![].into_iter().peekable(), cur_cmnt: 0, boxes: Vec::new(), ann, @@ -1279,9 +1251,6 @@ impl<'a> State<'a> { fn print_literal(&mut self, lit: &hir::Lit) -> io::Result<()> { self.maybe_print_comment(lit.span.lo())?; - if let Some(ltrl) = self.next_lit(lit.span.lo()) { - return self.writer().word(ltrl.lit.clone()); - } match lit.node { hir::LitKind::Str(st, style) => self.print_string(&st.as_str(), style), hir::LitKind::Err(st) => { diff --git a/src/librustc_driver/pretty.rs b/src/librustc_driver/pretty.rs index fc8bf0baa99f6..c74ed5ec30c3c 100644 --- a/src/librustc_driver/pretty.rs +++ b/src/librustc_driver/pretty.rs @@ -805,8 +805,7 @@ pub fn print_after_hir_lowering<'tcx>( src_name, &mut rdr, box out, - annotation.pp_ann(), - true) + annotation.pp_ann()) }) } @@ -829,8 +828,7 @@ pub fn print_after_hir_lowering<'tcx>( src_name, &mut rdr, box out, - annotation.pp_ann(), - true); + annotation.pp_ann()); for node_id in uii.all_matching_node_ids(hir_map) { let node = hir_map.get(node_id); pp_state.print_node(node)?; diff --git a/src/librustdoc/clean/cfg.rs b/src/librustdoc/clean/cfg.rs index 69445451503cc..257f02af4cdd8 100644 --- a/src/librustdoc/clean/cfg.rs +++ b/src/librustdoc/clean/cfg.rs @@ -414,10 +414,9 @@ impl<'a> fmt::Display for Html<'a> { mod test { use super::Cfg; - use syntax::symbol::Symbol; - use syntax::ast::*; - use syntax::source_map::dummy_spanned; use syntax_pos::DUMMY_SP; + use syntax::ast::*; + use syntax::symbol::Symbol; use syntax::with_globals; fn word_cfg(s: &str) -> Cfg { @@ -592,12 +591,11 @@ mod test { let mi = dummy_meta_item_word("all"); assert_eq!(Cfg::parse(&mi), Ok(word_cfg("all"))); + let node = LitKind::Str(Symbol::intern("done"), StrStyle::Cooked); + let (token, suffix) = node.lit_token(); let mi = MetaItem { path: Path::from_ident(Ident::from_str("all")), - node: MetaItemKind::NameValue(dummy_spanned(LitKind::Str( - Symbol::intern("done"), - StrStyle::Cooked, - ))), + node: MetaItemKind::NameValue(Lit { node, token, suffix, span: DUMMY_SP }), span: DUMMY_SP, }; assert_eq!(Cfg::parse(&mi), Ok(name_value_cfg("all", "done"))); @@ -627,9 +625,11 @@ mod test { #[test] fn test_parse_err() { with_globals(|| { + let node = LitKind::Bool(false); + let (token, suffix) = node.lit_token(); let mi = MetaItem { path: Path::from_ident(Ident::from_str("foo")), - node: MetaItemKind::NameValue(dummy_spanned(LitKind::Bool(false))), + node: MetaItemKind::NameValue(Lit { node, token, suffix, span: DUMMY_SP }), span: DUMMY_SP, }; assert!(Cfg::parse(&mi).is_err()); diff --git a/src/libsyntax/attr/mod.rs b/src/libsyntax/attr/mod.rs index 2b87458108336..e23c18266519f 100644 --- a/src/libsyntax/attr/mod.rs +++ b/src/libsyntax/attr/mod.rs @@ -565,8 +565,9 @@ impl MetaItemKind { Some(TokenTree::Token(_, token::Eq)) => { tokens.next(); return if let Some(TokenTree::Token(span, token)) = tokens.next() { - LitKind::from_token(token) - .map(|(node, token, suffix)| MetaItemKind::NameValue(Lit { node, token, suffix, span })) + LitKind::from_token(token).map(|(node, token, suffix)| { + MetaItemKind::NameValue(Lit { node, token, suffix, span }) + }) } else { None }; @@ -635,7 +636,7 @@ impl LitKind { } } - pub(crate) fn lit_token(&self) -> (token::Lit, Option) { + pub fn lit_token(&self) -> (token::Lit, Option) { use std::ascii; match *self { diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index 74fff3324eacf..97d3fc002e9b0 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -3,8 +3,7 @@ pub use CommentStyle::*; use crate::ast; use crate::source_map::SourceMap; use crate::parse::lexer::{is_block_doc_comment, is_pattern_whitespace}; -use crate::parse::lexer::{self, ParseSess, StringReader, TokenAndSpan}; -use crate::print::pprust; +use crate::parse::lexer::{self, ParseSess, StringReader}; use syntax_pos::{BytePos, CharPos, Pos, FileName}; use log::debug; @@ -339,16 +338,9 @@ fn consume_comment(rdr: &mut StringReader<'_>, debug!("<<< consume comment"); } -#[derive(Clone)] -pub struct Literal { - pub lit: String, - pub pos: BytePos, -} - // it appears this function is called only from pprust... that's // probably not a good thing. -pub fn gather_comments_and_literals(sess: &ParseSess, path: FileName, srdr: &mut dyn Read) - -> (Vec, Vec) +pub fn gather_comments(sess: &ParseSess, path: FileName, srdr: &mut dyn Read) -> Vec { let mut src = String::new(); srdr.read_to_string(&mut src).unwrap(); @@ -357,7 +349,6 @@ pub fn gather_comments_and_literals(sess: &ParseSess, path: FileName, srdr: &mut let mut rdr = lexer::StringReader::new_raw(sess, source_file, None); let mut comments: Vec = Vec::new(); - let mut literals: Vec = Vec::new(); let mut code_to_the_left = false; // Only code let mut anything_to_the_left = false; // Code or comments @@ -382,26 +373,12 @@ pub fn gather_comments_and_literals(sess: &ParseSess, path: FileName, srdr: &mut } } - let bstart = rdr.pos; rdr.next_token(); - // discard, and look ahead; we're working with internal state - let TokenAndSpan { tok, sp } = rdr.peek(); - if tok.is_lit() { - rdr.with_str_from(bstart, |s| { - debug!("tok lit: {}", s); - literals.push(Literal { - lit: s.to_string(), - pos: sp.lo(), - }); - }) - } else { - debug!("tok: {}", pprust::token_to_string(&tok)); - } code_to_the_left = true; anything_to_the_left = true; } - (comments, literals) + comments } #[cfg(test)] diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index b988cb1447df7..8c0c2f4b6e28a 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -2121,11 +2121,11 @@ impl<'a> Parser<'a> { Applicability::MachineApplicable, ); err.emit(); - return Ok(match float_suffix { - "f32" => (ast::LitKind::Float(val, ast::FloatTy::F32), token::Float(val), suffix), - "f64" => (ast::LitKind::Float(val, ast::FloatTy::F64), token::Float(val), suffix), - _ => (ast::LitKind::FloatUnsuffixed(val), token::Float(val), suffix), - }); + return Ok((match float_suffix { + "f32" => ast::LitKind::Float(val, ast::FloatTy::F32), + "f64" => ast::LitKind::Float(val, ast::FloatTy::F64), + _ => ast::LitKind::FloatUnsuffixed(val), + }, token::Float(val), suffix)); } else { unreachable!(); }; diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index 0e93b857708f7..fa16a2b200ff3 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -20,10 +20,8 @@ use rustc_target::spec::abi::{self, Abi}; use syntax_pos::{self, BytePos}; use syntax_pos::{DUMMY_SP, FileName}; -use std::ascii; use std::borrow::Cow; use std::io::{self, Write, Read}; -use std::iter::Peekable; use std::vec; pub enum AnnNode<'a> { @@ -49,8 +47,7 @@ impl PpAnn for NoAnn {} pub struct State<'a> { pub s: pp::Printer<'a>, cm: Option<&'a SourceMap>, - comments: Option >, - literals: Peekable>, + comments: Option>, cur_cmnt: usize, boxes: Vec, ann: &'a (dyn PpAnn+'a), @@ -62,7 +59,6 @@ fn rust_printer<'a>(writer: Box, ann: &'a dyn PpAnn) -> State<'a> s: pp::mk_printer(writer, DEFAULT_COLUMNS), cm: None, comments: None, - literals: vec![].into_iter().peekable(), cur_cmnt: 0, boxes: Vec::new(), ann, @@ -75,8 +71,7 @@ pub const INDENT_UNIT: usize = 4; pub const DEFAULT_COLUMNS: usize = 78; /// Requires you to pass an input filename and reader so that -/// it can scan the input text for comments and literals to -/// copy forward. +/// it can scan the input text for comments to copy forward. pub fn print_crate<'a>(cm: &'a SourceMap, sess: &ParseSess, krate: &ast::Crate, @@ -118,36 +113,23 @@ impl<'a> State<'a> { out: Box, ann: &'a dyn PpAnn, is_expanded: bool) -> State<'a> { - let (cmnts, lits) = comments::gather_comments_and_literals(sess, filename, input); - - State::new( - cm, - out, - ann, - Some(cmnts), - // If the code is post expansion, don't use the table of - // literals, since it doesn't correspond with the literals - // in the AST anymore. - if is_expanded { None } else { Some(lits) }, - is_expanded - ) + let comments = comments::gather_comments(sess, filename, input); + State::new(cm, out, ann, Some(comments), is_expanded) } pub fn new(cm: &'a SourceMap, out: Box, ann: &'a dyn PpAnn, comments: Option>, - literals: Option>, is_expanded: bool) -> State<'a> { State { s: pp::mk_printer(out, DEFAULT_COLUMNS), cm: Some(cm), comments, - literals: literals.unwrap_or_default().into_iter().peekable(), cur_cmnt: 0, boxes: Vec::new(), ann, - is_expanded: is_expanded + is_expanded, } } } @@ -180,6 +162,31 @@ fn binop_to_string(op: BinOpToken) -> &'static str { } } +fn literal_to_string(lit: token::Lit, suffix: Option) -> String { + let mut out = match lit { + token::Byte(b) => format!("b'{}'", b), + token::Char(c) => format!("'{}'", c), + token::Err(c) => format!("'{}'", c), + token::Bool(c) | + token::Float(c) | + token::Integer(c) => c.to_string(), + token::Str_(s) => format!("\"{}\"", s), + token::StrRaw(s, n) => format!("r{delim}\"{string}\"{delim}", + delim="#".repeat(n as usize), + string=s), + token::ByteStr(v) => format!("b\"{}\"", v), + token::ByteStrRaw(s, n) => format!("br{delim}\"{string}\"{delim}", + delim="#".repeat(n as usize), + string=s), + }; + + if let Some(suffix) = suffix { + out.push_str(&suffix.as_str()) + } + + out +} + pub fn token_to_string(tok: &Token) -> String { match *tok { token::Eq => "=".to_string(), @@ -223,30 +230,7 @@ pub fn token_to_string(tok: &Token) -> String { token::SingleQuote => "'".to_string(), /* Literals */ - token::Literal(lit, suf) => { - let mut out = match lit { - token::Bool(_) => panic!("literal token contains `Lit::Bool`"), - token::Byte(b) => format!("b'{}'", b), - token::Char(c) => format!("'{}'", c), - token::Err(c) => format!("'{}'", c), - token::Float(c) | - token::Integer(c) => c.to_string(), - token::Str_(s) => format!("\"{}\"", s), - token::StrRaw(s, n) => format!("r{delim}\"{string}\"{delim}", - delim="#".repeat(n as usize), - string=s), - token::ByteStr(v) => format!("b\"{}\"", v), - token::ByteStrRaw(s, n) => format!("br{delim}\"{string}\"{delim}", - delim="#".repeat(n as usize), - string=s), - }; - - if let Some(s) = suf { - out.push_str(&s.as_str()) - } - - out - } + token::Literal(lit, suf) => literal_to_string(lit, suf), /* Name components */ token::Ident(s, false) => s.to_string(), @@ -439,8 +423,6 @@ pub trait PrintState<'a> { fn boxes(&mut self) -> &mut Vec; fn comments(&mut self) -> &mut Option>; fn cur_cmnt(&mut self) -> &mut usize; - fn cur_lit(&mut self) -> Option<&comments::Literal>; - fn bump_lit(&mut self) -> Option; fn word_space>>(&mut self, w: S) -> io::Result<()> { self.writer().word(w)?; @@ -505,21 +487,6 @@ pub trait PrintState<'a> { self.end() } - fn next_lit(&mut self, pos: BytePos) -> Option { - while let Some(ltrl) = self.cur_lit().cloned() { - if ltrl.pos > pos { break; } - - // we don't need the value here since we're forced to clone cur_lit - // due to lack of NLL. - self.bump_lit(); - if ltrl.pos == pos { - return Some(ltrl); - } - } - - None - } - fn maybe_print_comment(&mut self, pos: BytePos) -> io::Result<()> { while let Some(ref cmnt) = self.next_comment() { if cmnt.pos < pos { @@ -607,60 +574,7 @@ pub trait PrintState<'a> { fn print_literal(&mut self, lit: &ast::Lit) -> io::Result<()> { self.maybe_print_comment(lit.span.lo())?; - if let Some(ltrl) = self.next_lit(lit.span.lo()) { - return self.writer().word(ltrl.lit.clone()); - } - match lit.node { - ast::LitKind::Str(st, style) => self.print_string(&st.as_str(), style), - ast::LitKind::Err(st) => { - let st = st.as_str().escape_debug().to_string(); - let mut res = String::with_capacity(st.len() + 2); - res.push('\''); - res.push_str(&st); - res.push('\''); - self.writer().word(res) - } - ast::LitKind::Byte(byte) => { - let mut res = String::from("b'"); - res.extend(ascii::escape_default(byte).map(|c| c as char)); - res.push('\''); - self.writer().word(res) - } - ast::LitKind::Char(ch) => { - let mut res = String::from("'"); - res.extend(ch.escape_default()); - res.push('\''); - self.writer().word(res) - } - ast::LitKind::Int(i, t) => { - match t { - ast::LitIntType::Signed(st) => { - self.writer().word(st.val_to_string(i as i128)) - } - ast::LitIntType::Unsigned(ut) => { - self.writer().word(ut.val_to_string(i)) - } - ast::LitIntType::Unsuffixed => { - self.writer().word(i.to_string()) - } - } - } - ast::LitKind::Float(ref f, t) => { - self.writer().word(format!("{}{}", &f, t.ty_to_string())) - } - ast::LitKind::FloatUnsuffixed(ref f) => self.writer().word(f.as_str().to_string()), - ast::LitKind::Bool(val) => { - if val { self.writer().word("true") } else { self.writer().word("false") } - } - ast::LitKind::ByteStr(ref v) => { - let mut escaped: String = String::new(); - for &ch in v.iter() { - escaped.extend(ascii::escape_default(ch) - .map(|c| c as char)); - } - self.writer().word(format!("b\"{}\"", escaped)) - } - } + self.writer().word(literal_to_string(lit.token, lit.suffix)) } fn print_string(&mut self, st: &str, @@ -881,14 +795,6 @@ impl<'a> PrintState<'a> for State<'a> { fn cur_cmnt(&mut self) -> &mut usize { &mut self.cur_cmnt } - - fn cur_lit(&mut self) -> Option<&comments::Literal> { - self.literals.peek() - } - - fn bump_lit(&mut self) -> Option { - self.literals.next() - } } impl<'a> State<'a> { From 8739668438a40712a0bc617bc587d415c8cb42f0 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Fri, 10 May 2019 03:00:51 +0300 Subject: [PATCH 5/7] Simplify conversions between tokens and semantic literals --- src/librustc/hir/print.rs | 56 +---- src/librustdoc/clean/cfg.rs | 24 +- src/libsyntax/attr/mod.rs | 104 +++++---- src/libsyntax/ext/build.rs | 11 +- src/libsyntax/parse/mod.rs | 212 +++++++++++------- src/libsyntax/parse/parser.rs | 153 +++---------- src/libsyntax/parse/token.rs | 7 + src/libsyntax/print/pprust.rs | 2 +- .../ui/malformed/malformed-interpolated.rs | 5 +- .../malformed/malformed-interpolated.stderr | 9 +- 10 files changed, 259 insertions(+), 324 deletions(-) diff --git a/src/librustc/hir/print.rs b/src/librustc/hir/print.rs index 6d48ad94a6677..475bf8d837239 100644 --- a/src/librustc/hir/print.rs +++ b/src/librustc/hir/print.rs @@ -5,7 +5,7 @@ use syntax::parse::ParseSess; use syntax::parse::lexer::comments; use syntax::print::pp::{self, Breaks}; use syntax::print::pp::Breaks::{Consistent, Inconsistent}; -use syntax::print::pprust::PrintState; +use syntax::print::pprust::{self, PrintState}; use syntax::ptr::P; use syntax::symbol::keywords; use syntax::util::parser::{self, AssocOp, Fixity}; @@ -15,7 +15,6 @@ use crate::hir; use crate::hir::{PatKind, GenericBound, TraitBoundModifier, RangeEnd}; use crate::hir::{GenericParam, GenericParamKind, GenericArg}; -use std::ascii; use std::borrow::Cow; use std::cell::Cell; use std::io::{self, Write, Read}; @@ -1251,57 +1250,8 @@ impl<'a> State<'a> { fn print_literal(&mut self, lit: &hir::Lit) -> io::Result<()> { self.maybe_print_comment(lit.span.lo())?; - match lit.node { - hir::LitKind::Str(st, style) => self.print_string(&st.as_str(), style), - hir::LitKind::Err(st) => { - let st = st.as_str().escape_debug().to_string(); - let mut res = String::with_capacity(st.len() + 2); - res.push('\''); - res.push_str(&st); - res.push('\''); - self.writer().word(res) - } - hir::LitKind::Byte(byte) => { - let mut res = String::from("b'"); - res.extend(ascii::escape_default(byte).map(|c| c as char)); - res.push('\''); - self.writer().word(res) - } - hir::LitKind::Char(ch) => { - let mut res = String::from("'"); - res.extend(ch.escape_default()); - res.push('\''); - self.writer().word(res) - } - hir::LitKind::Int(i, t) => { - match t { - ast::LitIntType::Signed(st) => { - self.writer().word(st.val_to_string(i as i128)) - } - ast::LitIntType::Unsigned(ut) => { - self.writer().word(ut.val_to_string(i)) - } - ast::LitIntType::Unsuffixed => { - self.writer().word(i.to_string()) - } - } - } - hir::LitKind::Float(ref f, t) => { - self.writer().word(format!("{}{}", &f, t.ty_to_string())) - } - hir::LitKind::FloatUnsuffixed(ref f) => self.writer().word(f.as_str().to_string()), - hir::LitKind::Bool(val) => { - if val { self.writer().word("true") } else { self.writer().word("false") } - } - hir::LitKind::ByteStr(ref v) => { - let mut escaped: String = String::new(); - for &ch in v.iter() { - escaped.extend(ascii::escape_default(ch) - .map(|c| c as char)); - } - self.writer().word(format!("b\"{}\"", escaped)) - } - } + let (token, suffix) = lit.node.to_lit_token(); + self.writer().word(pprust::literal_to_string(token, suffix)) } pub fn print_expr(&mut self, expr: &hir::Expr) -> io::Result<()> { diff --git a/src/librustdoc/clean/cfg.rs b/src/librustdoc/clean/cfg.rs index 257f02af4cdd8..51fe26b374313 100644 --- a/src/librustdoc/clean/cfg.rs +++ b/src/librustdoc/clean/cfg.rs @@ -591,13 +591,10 @@ mod test { let mi = dummy_meta_item_word("all"); assert_eq!(Cfg::parse(&mi), Ok(word_cfg("all"))); - let node = LitKind::Str(Symbol::intern("done"), StrStyle::Cooked); - let (token, suffix) = node.lit_token(); - let mi = MetaItem { - path: Path::from_ident(Ident::from_str("all")), - node: MetaItemKind::NameValue(Lit { node, token, suffix, span: DUMMY_SP }), - span: DUMMY_SP, - }; + let mi = attr::mk_name_value_item_str( + Ident::from_str("all"), + dummy_spanned(Symbol::intern("done")) + ); assert_eq!(Cfg::parse(&mi), Ok(name_value_cfg("all", "done"))); let mi = dummy_meta_item_list!(all, [a, b]); @@ -625,13 +622,12 @@ mod test { #[test] fn test_parse_err() { with_globals(|| { - let node = LitKind::Bool(false); - let (token, suffix) = node.lit_token(); - let mi = MetaItem { - path: Path::from_ident(Ident::from_str("foo")), - node: MetaItemKind::NameValue(Lit { node, token, suffix, span: DUMMY_SP }), - span: DUMMY_SP, - }; + let mi = attr::mk_name_value_item( + DUMMY_SP, + Ident::from_str("foo"), + LitKind::Bool(false), + DUMMY_SP, + ); assert!(Cfg::parse(&mi).is_err()); let mi = dummy_meta_item_list!(not, [a, b]); diff --git a/src/libsyntax/attr/mod.rs b/src/libsyntax/attr/mod.rs index e23c18266519f..c122e1994e749 100644 --- a/src/libsyntax/attr/mod.rs +++ b/src/libsyntax/attr/mod.rs @@ -14,7 +14,7 @@ pub use StabilityLevel::*; use crate::ast; use crate::ast::{AttrId, Attribute, AttrStyle, Name, Ident, Path, PathSegment}; use crate::ast::{MetaItem, MetaItemKind, NestedMetaItem}; -use crate::ast::{Lit, LitKind, Expr, ExprKind, Item, Local, Stmt, StmtKind, GenericParam}; +use crate::ast::{Lit, LitKind, Expr, Item, Local, Stmt, StmtKind, GenericParam}; use crate::mut_visit::visit_clobber; use crate::source_map::{BytePos, Spanned, dummy_spanned}; use crate::parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration}; @@ -27,9 +27,11 @@ use crate::ThinVec; use crate::tokenstream::{TokenStream, TokenTree, DelimSpan}; use crate::GLOBALS; +use errors::Handler; use log::debug; use syntax_pos::{FileName, Span}; +use std::ascii; use std::iter; use std::ops::DerefMut; @@ -350,14 +352,13 @@ impl Attribute { /* Constructors */ pub fn mk_name_value_item_str(ident: Ident, value: Spanned) -> MetaItem { - let node = LitKind::Str(value.node, ast::StrStyle::Cooked); - let (token, suffix) = node.lit_token(); - let value = Lit { node, token, suffix, span: value.span }; - mk_name_value_item(ident.span.to(value.span), ident, value) + let lit_kind = LitKind::Str(value.node, ast::StrStyle::Cooked); + mk_name_value_item(ident.span.to(value.span), ident, lit_kind, value.span) } -pub fn mk_name_value_item(span: Span, ident: Ident, value: Lit) -> MetaItem { - MetaItem { path: Path::from_ident(ident), span, node: MetaItemKind::NameValue(value) } +pub fn mk_name_value_item(span: Span, ident: Ident, lit_kind: LitKind, lit_span: Span) -> MetaItem { + let lit = Lit::from_lit_kind(lit_kind, lit_span); + MetaItem { path: Path::from_ident(ident), span, node: MetaItemKind::NameValue(lit) } } pub fn mk_list_item(span: Span, ident: Ident, items: Vec) -> MetaItem { @@ -419,9 +420,8 @@ pub fn mk_spanned_attr_outer(sp: Span, id: AttrId, item: MetaItem) -> Attribute pub fn mk_sugared_doc_attr(id: AttrId, text: Symbol, span: Span) -> Attribute { let style = doc_comment_style(&text.as_str()); - let node = LitKind::Str(text, ast::StrStyle::Cooked); - let (token, suffix) = node.lit_token(); - let lit = Lit { node, token, suffix, span }; + let lit_kind = LitKind::Str(text, ast::StrStyle::Cooked); + let lit = Lit::from_lit_kind(lit_kind, span); Attribute { id, style, @@ -565,9 +565,7 @@ impl MetaItemKind { Some(TokenTree::Token(_, token::Eq)) => { tokens.next(); return if let Some(TokenTree::Token(span, token)) = tokens.next() { - LitKind::from_token(token).map(|(node, token, suffix)| { - MetaItemKind::NameValue(Lit { node, token, suffix, span }) - }) + Lit::from_token(&token, span, None).map(MetaItemKind::NameValue) } else { None }; @@ -612,9 +610,9 @@ impl NestedMetaItem { where I: Iterator, { if let Some(TokenTree::Token(span, token)) = tokens.peek().cloned() { - if let Some((node, token, suffix)) = LitKind::from_token(token) { + if let Some(lit) = Lit::from_token(&token, span, None) { tokens.next(); - return Some(NestedMetaItem::Literal(Lit { node, token, suffix, span })); + return Some(NestedMetaItem::Literal(lit)); } } @@ -624,21 +622,19 @@ impl NestedMetaItem { impl Lit { crate fn tokens(&self) -> TokenStream { - TokenTree::Token(self.span, self.node.token()).into() + let token = match self.token { + token::Bool(symbol) => Token::Ident(Ident::with_empty_ctxt(symbol), false), + token => Token::Literal(token, self.suffix), + }; + TokenTree::Token(self.span, token).into() } } impl LitKind { - fn token(&self) -> Token { - match self.lit_token() { - (token::Bool(symbol), _) => Token::Ident(Ident::with_empty_ctxt(symbol), false), - (lit, suffix) => Token::Literal(lit, suffix), - } - } - - pub fn lit_token(&self) -> (token::Lit, Option) { - use std::ascii; - + /// Attempts to recover a token from semantic literal. + /// This function is used when the original token doesn't exist (e.g. the literal is created + /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). + pub fn to_lit_token(&self) -> (token::Lit, Option) { match *self { LitKind::Str(string, ast::StrStyle::Cooked) => { let escaped = string.as_str().escape_default().to_string(); @@ -679,29 +675,45 @@ impl LitKind { LitKind::Err(val) => (token::Lit::Err(val), None), } } +} - fn from_token(token: Token) -> Option<(LitKind, token::Lit, Option)> { - match token { - Token::Ident(ident, false) if ident.name == keywords::True.name() => - Some((LitKind::Bool(true), token::Bool(ident.name), None)), - Token::Ident(ident, false) if ident.name == keywords::False.name() => - Some((LitKind::Bool(false), token::Bool(ident.name), None)), - Token::Interpolated(nt) => match *nt { - token::NtExpr(ref v) | token::NtLiteral(ref v) => match v.node { - ExprKind::Lit(ref lit) => Some((lit.node.clone(), lit.token, lit.suffix)), - _ => None, - }, - _ => None, - }, - Token::Literal(lit, suf) => { - let (suffix_illegal, result) = parse::lit_token(lit, suf, None); - if result.is_none() || suffix_illegal && suf.is_some() { - return None; +impl Lit { + /// Converts literal token with a suffix into an AST literal. + /// Works speculatively and may return `None` is diagnostic handler is not passed. + /// If diagnostic handler is passed, may return `Some`, + /// possibly after reporting non-fatal errors and recovery, or `None` for irrecoverable errors. + crate fn from_token( + token: &token::Token, + span: Span, + diag: Option<(Span, &Handler)>, + ) -> Option { + let (token, suffix) = match *token { + token::Ident(ident, false) if ident.name == keywords::True.name() || + ident.name == keywords::False.name() => + (token::Bool(ident.name), None), + token::Literal(token, suffix) => + (token, suffix), + token::Interpolated(ref nt) => { + if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt { + if let ast::ExprKind::Lit(lit) = &expr.node { + return Some(lit.clone()); + } } - Some((result.unwrap(), lit, suf)) + return None; } - _ => None, - } + _ => return None, + }; + + let node = LitKind::from_lit_token(token, suffix, diag)?; + Some(Lit { node, token, suffix, span }) + } + + /// Attempts to recover an AST literal from semantic literal. + /// This function is used when the original token doesn't exist (e.g. the literal is created + /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). + pub fn from_lit_kind(node: LitKind, span: Span) -> Lit { + let (token, suffix) = node.to_lit_token(); + Lit { node, token, suffix, span } } } diff --git a/src/libsyntax/ext/build.rs b/src/libsyntax/ext/build.rs index 0f9977e85038b..d24106f697e19 100644 --- a/src/libsyntax/ext/build.rs +++ b/src/libsyntax/ext/build.rs @@ -697,9 +697,9 @@ impl<'a> AstBuilder for ExtCtxt<'a> { self.expr_struct(span, self.path_ident(span, id), fields) } - fn expr_lit(&self, span: Span, node: ast::LitKind) -> P { - let (token, suffix) = node.lit_token(); - self.expr(span, ast::ExprKind::Lit(ast::Lit { node, token, suffix, span })) + fn expr_lit(&self, span: Span, lit_kind: ast::LitKind) -> P { + let lit = ast::Lit::from_lit_kind(lit_kind, span); + self.expr(span, ast::ExprKind::Lit(lit)) } fn expr_usize(&self, span: Span, i: usize) -> P { self.expr_lit(span, ast::LitKind::Int(i as u128, @@ -1165,11 +1165,10 @@ impl<'a> AstBuilder for ExtCtxt<'a> { attr::mk_list_item(sp, Ident::with_empty_ctxt(name).with_span_pos(sp), mis) } - fn meta_name_value(&self, span: Span, name: ast::Name, node: ast::LitKind) + fn meta_name_value(&self, span: Span, name: ast::Name, lit_kind: ast::LitKind) -> ast::MetaItem { - let (token, suffix) = node.lit_token(); attr::mk_name_value_item(span, Ident::with_empty_ctxt(name).with_span_pos(span), - ast::Lit { node, token, suffix, span }) + lit_kind, span) } fn item_use(&self, sp: Span, diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 4d4e99009a9d2..868b344c06584 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -1,11 +1,11 @@ //! The main parser interface. -use crate::ast::{self, CrateConfig, NodeId}; +use crate::ast::{self, CrateConfig, LitKind, NodeId}; use crate::early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId}; use crate::source_map::{SourceMap, FilePathMapping}; use crate::feature_gate::UnstableFeatures; use crate::parse::parser::Parser; -use crate::symbol::Symbol; +use crate::symbol::{keywords, Symbol}; use crate::syntax::parse::parser::emit_unclosed_delims; use crate::tokenstream::{TokenStream, TokenTree}; use crate::diagnostics::plugin::ErrorMap; @@ -371,97 +371,151 @@ macro_rules! err { } } -crate fn lit_token(lit: token::Lit, suf: Option, diag: Option<(Span, &Handler)>) - -> (bool /* suffix illegal? */, Option) { - use ast::LitKind; - - match lit { - token::Bool(_) => panic!("literal token contains `Lit::Bool`"), - token::Byte(i) => { - let lit_kind = match unescape_byte(&i.as_str()) { - Ok(c) => LitKind::Byte(c), - Err(_) => LitKind::Err(i), - }; - (true, Some(lit_kind)) - }, - token::Char(i) => { - let lit_kind = match unescape_char(&i.as_str()) { - Ok(c) => LitKind::Char(c), - Err(_) => LitKind::Err(i), +crate fn expect_no_suffix(sp: Span, diag: &Handler, kind: &str, suffix: Option) { + match suffix { + None => {/* everything ok */} + Some(suf) => { + let text = suf.as_str(); + if text.is_empty() { + diag.span_bug(sp, "found empty literal suffix in Some") + } + let mut err = if kind == "a tuple index" && + ["i32", "u32", "isize", "usize"].contains(&text.to_string().as_str()) + { + // #59553: warn instead of reject out of hand to allow the fix to percolate + // through the ecosystem when people fix their macros + let mut err = diag.struct_span_warn( + sp, + &format!("suffixes on {} are invalid", kind), + ); + err.note(&format!( + "`{}` is *temporarily* accepted on tuple index fields as it was \ + incorrectly accepted on stable for a few releases", + text, + )); + err.help( + "on proc macros, you'll want to use `syn::Index::from` or \ + `proc_macro::Literal::*_unsuffixed` for code that will desugar \ + to tuple field access", + ); + err.note( + "for more context, see https://github.com/rust-lang/rust/issues/60210", + ); + err + } else { + diag.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) }; - (true, Some(lit_kind)) - }, - token::Err(i) => (true, Some(LitKind::Err(i))), - - // There are some valid suffixes for integer and float literals, - // so all the handling is done internally. - token::Integer(s) => (false, integer_lit(&s.as_str(), suf, diag)), - token::Float(s) => (false, float_lit(&s.as_str(), suf, diag)), - - token::Str_(mut sym) => { - // If there are no characters requiring special treatment we can - // reuse the symbol from the Token. Otherwise, we must generate a - // new symbol because the string in the LitKind is different to the - // string in the Token. - let mut has_error = false; - let s = &sym.as_str(); - if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { - let mut buf = String::with_capacity(s.len()); - unescape_str(s, &mut |_, unescaped_char| { - match unescaped_char { + err.span_label(sp, format!("invalid suffix `{}`", text)); + err.emit(); + } + } +} + +impl LitKind { + /// Converts literal token with a suffix into a semantic literal. + /// Works speculatively and may return `None` is diagnostic handler is not passed. + /// If diagnostic handler is passed, always returns `Some`, + /// possibly after reporting non-fatal errors and recovery. + crate fn from_lit_token( + lit: token::Lit, + suf: Option, + diag: Option<(Span, &Handler)> + ) -> Option { + if suf.is_some() && !lit.may_have_suffix() { + err!(diag, |span, diag| { + expect_no_suffix(span, diag, &format!("a {}", lit.literal_name()), suf) + }); + } + + Some(match lit { + token::Bool(i) => { + assert!(i == keywords::True.name() || i == keywords::False.name()); + LitKind::Bool(i == keywords::True.name()) + } + token::Byte(i) => { + match unescape_byte(&i.as_str()) { + Ok(c) => LitKind::Byte(c), + Err(_) => LitKind::Err(i), + } + }, + token::Char(i) => { + match unescape_char(&i.as_str()) { + Ok(c) => LitKind::Char(c), + Err(_) => LitKind::Err(i), + } + }, + token::Err(i) => LitKind::Err(i), + + // There are some valid suffixes for integer and float literals, + // so all the handling is done internally. + token::Integer(s) => return integer_lit(&s.as_str(), suf, diag), + token::Float(s) => return float_lit(&s.as_str(), suf, diag), + + token::Str_(mut sym) => { + // If there are no characters requiring special treatment we can + // reuse the symbol from the Token. Otherwise, we must generate a + // new symbol because the string in the LitKind is different to the + // string in the Token. + let mut has_error = false; + let s = &sym.as_str(); + if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { + let mut buf = String::with_capacity(s.len()); + unescape_str(s, &mut |_, unescaped_char| { + match unescaped_char { + Ok(c) => buf.push(c), + Err(_) => has_error = true, + } + }); + if has_error { + return Some(LitKind::Err(sym)); + } + sym = Symbol::intern(&buf) + } + + LitKind::Str(sym, ast::StrStyle::Cooked) + } + token::StrRaw(mut sym, n) => { + // Ditto. + let s = &sym.as_str(); + if s.contains('\r') { + sym = Symbol::intern(&raw_str_lit(s)); + } + LitKind::Str(sym, ast::StrStyle::Raw(n)) + } + token::ByteStr(i) => { + let s = &i.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut has_error = false; + unescape_byte_str(s, &mut |_, unescaped_byte| { + match unescaped_byte { Ok(c) => buf.push(c), Err(_) => has_error = true, } }); if has_error { - return (true, Some(LitKind::Err(sym))); + return Some(LitKind::Err(i)); } - sym = Symbol::intern(&buf) + buf.shrink_to_fit(); + LitKind::ByteStr(Lrc::new(buf)) } - - (true, Some(LitKind::Str(sym, ast::StrStyle::Cooked))) - } - token::StrRaw(mut sym, n) => { - // Ditto. - let s = &sym.as_str(); - if s.contains('\r') { - sym = Symbol::intern(&raw_str_lit(s)); + token::ByteStrRaw(i, _) => { + LitKind::ByteStr(Lrc::new(i.to_string().into_bytes())) } - (true, Some(LitKind::Str(sym, ast::StrStyle::Raw(n)))) - } - token::ByteStr(i) => { - let s = &i.as_str(); - let mut buf = Vec::with_capacity(s.len()); - let mut has_error = false; - unescape_byte_str(s, &mut |_, unescaped_byte| { - match unescaped_byte { - Ok(c) => buf.push(c), - Err(_) => has_error = true, - } - }); - if has_error { - return (true, Some(LitKind::Err(i))); - } - buf.shrink_to_fit(); - (true, Some(LitKind::ByteStr(Lrc::new(buf)))) - } - token::ByteStrRaw(i, _) => { - (true, Some(LitKind::ByteStr(Lrc::new(i.to_string().into_bytes())))) - } + }) } } fn filtered_float_lit(data: Symbol, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { + -> Option { debug!("filtered_float_lit: {}, {:?}", data, suffix); let suffix = match suffix { Some(suffix) => suffix, - None => return Some(ast::LitKind::FloatUnsuffixed(data)), + None => return Some(LitKind::FloatUnsuffixed(data)), }; Some(match &*suffix.as_str() { - "f32" => ast::LitKind::Float(data, ast::FloatTy::F32), - "f64" => ast::LitKind::Float(data, ast::FloatTy::F64), + "f32" => LitKind::Float(data, ast::FloatTy::F32), + "f64" => LitKind::Float(data, ast::FloatTy::F64), suf => { err!(diag, |span, diag| { if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) { @@ -477,12 +531,12 @@ fn filtered_float_lit(data: Symbol, suffix: Option, diag: Option<(Span, } }); - ast::LitKind::FloatUnsuffixed(data) + LitKind::FloatUnsuffixed(data) } }) } fn float_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { + -> Option { debug!("float_lit: {:?}, {:?}", s, suffix); // FIXME #2252: bounds checking float literals is deferred until trans @@ -499,7 +553,7 @@ fn float_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) } fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { + -> Option { // s can only be ascii, byte indexing is fine // Strip underscores without allocating a new String unless necessary. @@ -595,7 +649,7 @@ fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) string was {:?}, the original suffix was {:?}", ty, base, s, orig, suffix); Some(match u128::from_str_radix(s, base) { - Ok(r) => ast::LitKind::Int(r, ty), + Ok(r) => LitKind::Int(r, ty), Err(_) => { // small bases are lexed as if they were base 10, e.g, the string // might be `0b10201`. This will cause the conversion above to fail, @@ -608,7 +662,7 @@ fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) if !already_errored { err!(diag, |span, diag| diag.span_err(span, "int literal is too large")); } - ast::LitKind::Int(0, ty) + LitKind::Int(0, ty) } }) } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 8c0c2f4b6e28a..b81f7be9c2c14 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -15,7 +15,7 @@ use crate::ast::{ForeignItem, ForeignItemKind, FunctionRetTy}; use crate::ast::{GenericParam, GenericParamKind}; use crate::ast::GenericArg; use crate::ast::{Ident, ImplItem, IsAsync, IsAuto, Item, ItemKind}; -use crate::ast::{Label, Lifetime, Lit, LitKind}; +use crate::ast::{Label, Lifetime, Lit}; use crate::ast::{Local, LocalSource}; use crate::ast::MacStmtStyle; use crate::ast::{Mac, Mac_, MacDelimiter}; @@ -46,7 +46,7 @@ use crate::ptr::P; use crate::parse::PResult; use crate::ThinVec; use crate::tokenstream::{self, DelimSpan, TokenTree, TokenStream, TreeAndJoint}; -use crate::symbol::{Symbol, keywords}; +use crate::symbol::{keywords, Symbol}; use errors::{Applicability, DiagnosticBuilder, DiagnosticId, FatalError}; use rustc_target::spec::abi::{self, Abi}; @@ -1109,43 +1109,7 @@ impl<'a> Parser<'a> { } fn expect_no_suffix(&self, sp: Span, kind: &str, suffix: Option) { - match suffix { - None => {/* everything ok */} - Some(suf) => { - let text = suf.as_str(); - if text.is_empty() { - self.span_bug(sp, "found empty literal suffix in Some") - } - let mut err = if kind == "a tuple index" && - ["i32", "u32", "isize", "usize"].contains(&text.to_string().as_str()) - { - // #59553: warn instead of reject out of hand to allow the fix to percolate - // through the ecosystem when people fix their macros - let mut err = self.struct_span_warn( - sp, - &format!("suffixes on {} are invalid", kind), - ); - err.note(&format!( - "`{}` is *temporarily* accepted on tuple index fields as it was \ - incorrectly accepted on stable for a few releases", - text, - )); - err.help( - "on proc macros, you'll want to use `syn::Index::from` or \ - `proc_macro::Literal::*_unsuffixed` for code that will desugar \ - to tuple field access", - ); - err.note( - "for more context, see https://github.com/rust-lang/rust/issues/60210", - ); - err - } else { - self.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) - }; - err.span_label(sp, format!("invalid suffix `{}`", text)); - err.emit(); - } - } + parse::expect_no_suffix(sp, &self.sess.span_diagnostic, kind, suffix) } /// Attempts to consume a `<`. If `<<` is seen, replaces it with a single @@ -1452,9 +1416,6 @@ impl<'a> Parser<'a> { crate fn struct_span_err>(&self, sp: S, m: &str) -> DiagnosticBuilder<'a> { self.sess.span_diagnostic.struct_span_err(sp, m) } - fn struct_span_warn>(&self, sp: S, m: &str) -> DiagnosticBuilder<'a> { - self.sess.span_diagnostic.struct_span_warn(sp, m) - } crate fn span_bug>(&self, sp: S, m: &str) -> ! { self.sess.span_diagnostic.span_bug(sp, m) } @@ -2069,85 +2030,45 @@ impl<'a> Parser<'a> { } } - /// Matches `token_lit = LIT_INTEGER | ...`. - fn parse_lit_token(&mut self) -> PResult<'a, (LitKind, token::Lit, Option)> { - let out = match self.token { - token::Interpolated(ref nt) => match **nt { - token::NtExpr(ref v) | token::NtLiteral(ref v) => match v.node { - ExprKind::Lit(ref lit) => { (lit.node.clone(), lit.token, lit.suffix) } - _ => { return self.unexpected_last(&self.token); } - }, - _ => { return self.unexpected_last(&self.token); } - }, - token::Literal(lit, suf) => { - let diag = Some((self.span, &self.sess.span_diagnostic)); - let (suffix_illegal, result) = parse::lit_token(lit, suf, diag); - - if suffix_illegal { - let sp = self.span; - self.expect_no_suffix(sp, &format!("a {}", lit.literal_name()), suf) + /// Matches `lit = true | false | token_lit`. + crate fn parse_lit(&mut self) -> PResult<'a, Lit> { + let diag = Some((self.span, &self.sess.span_diagnostic)); + if let Some(lit) = Lit::from_token(&self.token, self.span, diag) { + self.bump(); + return Ok(lit); + } else if self.token == token::Dot { + // Recover `.4` as `0.4`. + let recovered = self.look_ahead(1, |t| { + if let token::Literal(token::Integer(val), suf) = *t { + let next_span = self.look_ahead_span(1); + if self.span.hi() == next_span.lo() { + let sym = String::from("0.") + &val.as_str(); + let token = token::Literal(token::Float(Symbol::intern(&sym)), suf); + return Some((token, self.span.to(next_span))); + } } - - (result.unwrap(), lit, suf) - } - token::Dot if self.look_ahead(1, |t| match t { - token::Literal(token::Lit::Integer(_) , _) => true, - _ => false, - }) => { // recover from `let x = .4;` - let lo = self.span; - self.bump(); - if let token::Literal( - token::Lit::Integer(val), - suffix, - ) = self.token { - let float_suffix = suffix.and_then(|s| { - let s = s.as_str(); - if s == "f32" { - Some("f32") - } else if s == "f64" { - Some("f64") - } else { - None - } - }).unwrap_or(""); - self.bump(); - let sp = lo.to(self.prev_span); - let mut err = self.diagnostic() - .struct_span_err(sp, "float literals must have an integer part"); - err.span_suggestion( - sp, + None + }); + if let Some((token, span)) = recovered { + self.diagnostic() + .struct_span_err(span, "float literals must have an integer part") + .span_suggestion( + span, "must have an integer part", - format!("0.{}{}", val, float_suffix), + pprust::token_to_string(&token), Applicability::MachineApplicable, - ); - err.emit(); - return Ok((match float_suffix { - "f32" => ast::LitKind::Float(val, ast::FloatTy::F32), - "f64" => ast::LitKind::Float(val, ast::FloatTy::F64), - _ => ast::LitKind::FloatUnsuffixed(val), - }, token::Float(val), suffix)); - } else { - unreachable!(); - }; + ) + .emit(); + let diag = Some((span, &self.sess.span_diagnostic)); + if let Some(lit) = Lit::from_token(&token, span, diag) { + self.bump(); + self.bump(); + return Ok(lit); + } } - _ => { return self.unexpected_last(&self.token); } - }; - - self.bump(); - Ok(out) - } + } - /// Matches `lit = true | false | token_lit`. - crate fn parse_lit(&mut self) -> PResult<'a, Lit> { - let lo = self.span; - let (node, token, suffix) = if self.eat_keyword(keywords::True) { - (LitKind::Bool(true), token::Bool(keywords::True.name()), None) - } else if self.eat_keyword(keywords::False) { - (LitKind::Bool(false), token::Bool(keywords::False.name()), None) - } else { - self.parse_lit_token()? - }; - Ok(Lit { node, token, suffix, span: lo.to(self.prev_span) }) + self.unexpected_last(&self.token) } /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 48a949257ffb3..0c2ea70aa20c8 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -90,6 +90,13 @@ impl Lit { } } + crate fn may_have_suffix(&self) -> bool { + match *self { + Integer(..) | Float(..) => true, + _ => false, + } + } + // See comments in `Nonterminal::to_tokenstream` for why we care about // *probably* equal here rather than actual equality fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool { diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index fa16a2b200ff3..0e8ac6c35b9bf 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -162,7 +162,7 @@ fn binop_to_string(op: BinOpToken) -> &'static str { } } -fn literal_to_string(lit: token::Lit, suffix: Option) -> String { +pub fn literal_to_string(lit: token::Lit, suffix: Option) -> String { let mut out = match lit { token::Byte(b) => format!("b'{}'", b), token::Char(c) => format!("'{}'", c), diff --git a/src/test/ui/malformed/malformed-interpolated.rs b/src/test/ui/malformed/malformed-interpolated.rs index e452435968bac..7c4ca3c017e7b 100644 --- a/src/test/ui/malformed/malformed-interpolated.rs +++ b/src/test/ui/malformed/malformed-interpolated.rs @@ -2,8 +2,7 @@ macro_rules! check { ($expr: expr) => ( - #[my_attr = $expr] //~ ERROR suffixed literals are not allowed in attributes - //~| ERROR unexpected token: `-0` + #[my_attr = $expr] //~ ERROR unexpected token: `-0` //~| ERROR unexpected token: `0 + 0` use main as _; ); @@ -11,7 +10,7 @@ macro_rules! check { check!("0"); // OK check!(0); // OK -check!(0u8); // ERROR, see above +check!(0u8); //~ ERROR suffixed literals are not allowed in attributes check!(-0); // ERROR, see above check!(0 + 0); // ERROR, see above diff --git a/src/test/ui/malformed/malformed-interpolated.stderr b/src/test/ui/malformed/malformed-interpolated.stderr index efeede0148dac..bc2146e409d47 100644 --- a/src/test/ui/malformed/malformed-interpolated.stderr +++ b/src/test/ui/malformed/malformed-interpolated.stderr @@ -1,11 +1,8 @@ error: suffixed literals are not allowed in attributes - --> $DIR/malformed-interpolated.rs:5:21 + --> $DIR/malformed-interpolated.rs:13:8 | -LL | #[my_attr = $expr] - | ^^^^^ -... -LL | check!(0u8); // ERROR, see above - | ------------ in this macro invocation +LL | check!(0u8); + | ^^^ | = help: instead of using a suffixed literal (1u8, 1.0f32, etc.), use an unsuffixed version (1, 1.0, etc.). From 3f064cae3d9d0d33951a44c30d83696563244572 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 11 May 2019 02:31:34 +0300 Subject: [PATCH 6/7] Move literal parsing code into a separate file Remove some dead code --- src/libsyntax/attr/mod.rs | 99 ---- src/libsyntax/parse/classify.rs | 13 - src/libsyntax/parse/lexer/mod.rs | 19 +- src/libsyntax/parse/literal.rs | 487 ++++++++++++++++++ src/libsyntax/parse/mod.rs | 352 +------------ src/libsyntax/parse/parser.rs | 56 +- src/test/ui/attr-eq-token-tree.stderr | 4 +- ...sive_range_pattern_syntax_collision.stderr | 4 +- ...ive_range_pattern_syntax_collision2.stderr | 4 +- src/test/ui/macros/macro-attribute.stderr | 4 +- .../malformed/malformed-interpolated.stderr | 8 +- src/test/ui/parser/attr-bad-meta-2.stderr | 4 +- src/test/ui/parser/pat-tuple-5.stderr | 4 +- 13 files changed, 521 insertions(+), 537 deletions(-) create mode 100644 src/libsyntax/parse/literal.rs diff --git a/src/libsyntax/attr/mod.rs b/src/libsyntax/attr/mod.rs index c122e1994e749..07e4bbf78ffdf 100644 --- a/src/libsyntax/attr/mod.rs +++ b/src/libsyntax/attr/mod.rs @@ -27,11 +27,9 @@ use crate::ThinVec; use crate::tokenstream::{TokenStream, TokenTree, DelimSpan}; use crate::GLOBALS; -use errors::Handler; use log::debug; use syntax_pos::{FileName, Span}; -use std::ascii; use std::iter; use std::ops::DerefMut; @@ -620,103 +618,6 @@ impl NestedMetaItem { } } -impl Lit { - crate fn tokens(&self) -> TokenStream { - let token = match self.token { - token::Bool(symbol) => Token::Ident(Ident::with_empty_ctxt(symbol), false), - token => Token::Literal(token, self.suffix), - }; - TokenTree::Token(self.span, token).into() - } -} - -impl LitKind { - /// Attempts to recover a token from semantic literal. - /// This function is used when the original token doesn't exist (e.g. the literal is created - /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). - pub fn to_lit_token(&self) -> (token::Lit, Option) { - match *self { - LitKind::Str(string, ast::StrStyle::Cooked) => { - let escaped = string.as_str().escape_default().to_string(); - (token::Lit::Str_(Symbol::intern(&escaped)), None) - } - LitKind::Str(string, ast::StrStyle::Raw(n)) => { - (token::Lit::StrRaw(string, n), None) - } - LitKind::ByteStr(ref bytes) => { - let string = bytes.iter().cloned().flat_map(ascii::escape_default) - .map(Into::::into).collect::(); - (token::Lit::ByteStr(Symbol::intern(&string)), None) - } - LitKind::Byte(byte) => { - let string: String = ascii::escape_default(byte).map(Into::::into).collect(); - (token::Lit::Byte(Symbol::intern(&string)), None) - } - LitKind::Char(ch) => { - let string: String = ch.escape_default().map(Into::::into).collect(); - (token::Lit::Char(Symbol::intern(&string)), None) - } - LitKind::Int(n, ty) => { - let suffix = match ty { - ast::LitIntType::Unsigned(ty) => Some(Symbol::intern(ty.ty_to_string())), - ast::LitIntType::Signed(ty) => Some(Symbol::intern(ty.ty_to_string())), - ast::LitIntType::Unsuffixed => None, - }; - (token::Lit::Integer(Symbol::intern(&n.to_string())), suffix) - } - LitKind::Float(symbol, ty) => { - (token::Lit::Float(symbol), Some(Symbol::intern(ty.ty_to_string()))) - } - LitKind::FloatUnsuffixed(symbol) => (token::Lit::Float(symbol), None), - LitKind::Bool(value) => { - let kw = if value { keywords::True } else { keywords::False }; - (token::Lit::Bool(kw.name()), None) - } - LitKind::Err(val) => (token::Lit::Err(val), None), - } - } -} - -impl Lit { - /// Converts literal token with a suffix into an AST literal. - /// Works speculatively and may return `None` is diagnostic handler is not passed. - /// If diagnostic handler is passed, may return `Some`, - /// possibly after reporting non-fatal errors and recovery, or `None` for irrecoverable errors. - crate fn from_token( - token: &token::Token, - span: Span, - diag: Option<(Span, &Handler)>, - ) -> Option { - let (token, suffix) = match *token { - token::Ident(ident, false) if ident.name == keywords::True.name() || - ident.name == keywords::False.name() => - (token::Bool(ident.name), None), - token::Literal(token, suffix) => - (token, suffix), - token::Interpolated(ref nt) => { - if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt { - if let ast::ExprKind::Lit(lit) = &expr.node { - return Some(lit.clone()); - } - } - return None; - } - _ => return None, - }; - - let node = LitKind::from_lit_token(token, suffix, diag)?; - Some(Lit { node, token, suffix, span }) - } - - /// Attempts to recover an AST literal from semantic literal. - /// This function is used when the original token doesn't exist (e.g. the literal is created - /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). - pub fn from_lit_kind(node: LitKind, span: Span) -> Lit { - let (token, suffix) = node.to_lit_token(); - Lit { node, token, suffix, span } - } -} - pub trait HasAttrs: Sized { fn attrs(&self) -> &[ast::Attribute]; fn visit_attrs)>(&mut self, f: F); diff --git a/src/libsyntax/parse/classify.rs b/src/libsyntax/parse/classify.rs index b4103440e3577..dfd6f451c28d7 100644 --- a/src/libsyntax/parse/classify.rs +++ b/src/libsyntax/parse/classify.rs @@ -25,16 +25,3 @@ pub fn expr_requires_semi_to_be_stmt(e: &ast::Expr) -> bool { _ => true, } } - -/// this statement requires a semicolon after it. -/// note that in one case (`stmt_semi`), we've already -/// seen the semicolon, and thus don't need another. -pub fn stmt_ends_with_semi(stmt: &ast::StmtKind) -> bool { - match *stmt { - ast::StmtKind::Local(_) => true, - ast::StmtKind::Expr(ref e) => expr_requires_semi_to_be_stmt(e), - ast::StmtKind::Item(_) | - ast::StmtKind::Semi(..) | - ast::StmtKind::Mac(..) => false, - } -} diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 2882acb0e780c..e76605cde32ab 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -262,18 +262,6 @@ impl<'a> StringReader<'a> { } } - pub fn new(sess: &'a ParseSess, - source_file: Lrc, - override_span: Option) -> Self { - let mut sr = StringReader::new_raw(sess, source_file, override_span); - if sr.advance_token().is_err() { - sr.emit_fatal_errors(); - FatalError.raise(); - } - - sr - } - pub fn new_or_buffered_errs(sess: &'a ParseSess, source_file: Lrc, override_span: Option) -> Result> { @@ -1627,7 +1615,12 @@ mod tests { teststr: String) -> StringReader<'a> { let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr); - StringReader::new(sess, sf, None) + let mut sr = StringReader::new_raw(sess, sf, None); + if sr.advance_token().is_err() { + sr.emit_fatal_errors(); + FatalError.raise(); + } + sr } #[test] diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs new file mode 100644 index 0000000000000..2c7ba13fbef82 --- /dev/null +++ b/src/libsyntax/parse/literal.rs @@ -0,0 +1,487 @@ +//! Code related to parsing literals. + +use crate::ast::{self, Ident, Lit, LitKind}; +use crate::parse::parser::Parser; +use crate::parse::PResult; +use crate::parse::token::{self, Token}; +use crate::parse::unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte}; +use crate::print::pprust; +use crate::symbol::{keywords, Symbol}; +use crate::tokenstream::{TokenStream, TokenTree}; + +use errors::{Applicability, Handler}; +use log::debug; +use rustc_data_structures::sync::Lrc; +use syntax_pos::Span; + +use std::ascii; + +macro_rules! err { + ($opt_diag:expr, |$span:ident, $diag:ident| $($body:tt)*) => { + match $opt_diag { + Some(($span, $diag)) => { $($body)* } + None => return None, + } + } +} + +impl LitKind { + /// Converts literal token with a suffix into a semantic literal. + /// Works speculatively and may return `None` is diagnostic handler is not passed. + /// If diagnostic handler is passed, always returns `Some`, + /// possibly after reporting non-fatal errors and recovery. + fn from_lit_token( + lit: token::Lit, + suf: Option, + diag: Option<(Span, &Handler)> + ) -> Option { + if suf.is_some() && !lit.may_have_suffix() { + err!(diag, |span, diag| { + expect_no_suffix(span, diag, &format!("a {}", lit.literal_name()), suf) + }); + } + + Some(match lit { + token::Bool(i) => { + assert!(i == keywords::True.name() || i == keywords::False.name()); + LitKind::Bool(i == keywords::True.name()) + } + token::Byte(i) => { + match unescape_byte(&i.as_str()) { + Ok(c) => LitKind::Byte(c), + Err(_) => LitKind::Err(i), + } + }, + token::Char(i) => { + match unescape_char(&i.as_str()) { + Ok(c) => LitKind::Char(c), + Err(_) => LitKind::Err(i), + } + }, + token::Err(i) => LitKind::Err(i), + + // There are some valid suffixes for integer and float literals, + // so all the handling is done internally. + token::Integer(s) => return integer_lit(&s.as_str(), suf, diag), + token::Float(s) => return float_lit(&s.as_str(), suf, diag), + + token::Str_(mut sym) => { + // If there are no characters requiring special treatment we can + // reuse the symbol from the Token. Otherwise, we must generate a + // new symbol because the string in the LitKind is different to the + // string in the Token. + let mut has_error = false; + let s = &sym.as_str(); + if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { + let mut buf = String::with_capacity(s.len()); + unescape_str(s, &mut |_, unescaped_char| { + match unescaped_char { + Ok(c) => buf.push(c), + Err(_) => has_error = true, + } + }); + if has_error { + return Some(LitKind::Err(sym)); + } + sym = Symbol::intern(&buf) + } + + LitKind::Str(sym, ast::StrStyle::Cooked) + } + token::StrRaw(mut sym, n) => { + // Ditto. + let s = &sym.as_str(); + if s.contains('\r') { + sym = Symbol::intern(&raw_str_lit(s)); + } + LitKind::Str(sym, ast::StrStyle::Raw(n)) + } + token::ByteStr(i) => { + let s = &i.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut has_error = false; + unescape_byte_str(s, &mut |_, unescaped_byte| { + match unescaped_byte { + Ok(c) => buf.push(c), + Err(_) => has_error = true, + } + }); + if has_error { + return Some(LitKind::Err(i)); + } + buf.shrink_to_fit(); + LitKind::ByteStr(Lrc::new(buf)) + } + token::ByteStrRaw(i, _) => { + LitKind::ByteStr(Lrc::new(i.to_string().into_bytes())) + } + }) + } + + /// Attempts to recover a token from semantic literal. + /// This function is used when the original token doesn't exist (e.g. the literal is created + /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). + pub fn to_lit_token(&self) -> (token::Lit, Option) { + match *self { + LitKind::Str(string, ast::StrStyle::Cooked) => { + let escaped = string.as_str().escape_default().to_string(); + (token::Lit::Str_(Symbol::intern(&escaped)), None) + } + LitKind::Str(string, ast::StrStyle::Raw(n)) => { + (token::Lit::StrRaw(string, n), None) + } + LitKind::ByteStr(ref bytes) => { + let string = bytes.iter().cloned().flat_map(ascii::escape_default) + .map(Into::::into).collect::(); + (token::Lit::ByteStr(Symbol::intern(&string)), None) + } + LitKind::Byte(byte) => { + let string: String = ascii::escape_default(byte).map(Into::::into).collect(); + (token::Lit::Byte(Symbol::intern(&string)), None) + } + LitKind::Char(ch) => { + let string: String = ch.escape_default().map(Into::::into).collect(); + (token::Lit::Char(Symbol::intern(&string)), None) + } + LitKind::Int(n, ty) => { + let suffix = match ty { + ast::LitIntType::Unsigned(ty) => Some(Symbol::intern(ty.ty_to_string())), + ast::LitIntType::Signed(ty) => Some(Symbol::intern(ty.ty_to_string())), + ast::LitIntType::Unsuffixed => None, + }; + (token::Lit::Integer(Symbol::intern(&n.to_string())), suffix) + } + LitKind::Float(symbol, ty) => { + (token::Lit::Float(symbol), Some(Symbol::intern(ty.ty_to_string()))) + } + LitKind::FloatUnsuffixed(symbol) => (token::Lit::Float(symbol), None), + LitKind::Bool(value) => { + let kw = if value { keywords::True } else { keywords::False }; + (token::Lit::Bool(kw.name()), None) + } + LitKind::Err(val) => (token::Lit::Err(val), None), + } + } +} + +impl Lit { + /// Converts literal token with a suffix into an AST literal. + /// Works speculatively and may return `None` is diagnostic handler is not passed. + /// If diagnostic handler is passed, may return `Some`, + /// possibly after reporting non-fatal errors and recovery, or `None` for irrecoverable errors. + crate fn from_token( + token: &token::Token, + span: Span, + diag: Option<(Span, &Handler)>, + ) -> Option { + let (token, suffix) = match *token { + token::Ident(ident, false) if ident.name == keywords::True.name() || + ident.name == keywords::False.name() => + (token::Bool(ident.name), None), + token::Literal(token, suffix) => + (token, suffix), + token::Interpolated(ref nt) => { + if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt { + if let ast::ExprKind::Lit(lit) = &expr.node { + return Some(lit.clone()); + } + } + return None; + } + _ => return None, + }; + + let node = LitKind::from_lit_token(token, suffix, diag)?; + Some(Lit { node, token, suffix, span }) + } + + /// Attempts to recover an AST literal from semantic literal. + /// This function is used when the original token doesn't exist (e.g. the literal is created + /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). + pub fn from_lit_kind(node: LitKind, span: Span) -> Lit { + let (token, suffix) = node.to_lit_token(); + Lit { node, token, suffix, span } + } + + /// Losslessly convert an AST literal into a token stream. + crate fn tokens(&self) -> TokenStream { + let token = match self.token { + token::Bool(symbol) => Token::Ident(Ident::with_empty_ctxt(symbol), false), + token => Token::Literal(token, self.suffix), + }; + TokenTree::Token(self.span, token).into() + } +} + +impl<'a> Parser<'a> { + /// Matches `lit = true | false | token_lit`. + crate fn parse_lit(&mut self) -> PResult<'a, Lit> { + let diag = Some((self.span, &self.sess.span_diagnostic)); + if let Some(lit) = Lit::from_token(&self.token, self.span, diag) { + self.bump(); + return Ok(lit); + } else if self.token == token::Dot { + // Recover `.4` as `0.4`. + let recovered = self.look_ahead(1, |t| { + if let token::Literal(token::Integer(val), suf) = *t { + let next_span = self.look_ahead_span(1); + if self.span.hi() == next_span.lo() { + let sym = String::from("0.") + &val.as_str(); + let token = token::Literal(token::Float(Symbol::intern(&sym)), suf); + return Some((token, self.span.to(next_span))); + } + } + None + }); + if let Some((token, span)) = recovered { + self.diagnostic() + .struct_span_err(span, "float literals must have an integer part") + .span_suggestion( + span, + "must have an integer part", + pprust::token_to_string(&token), + Applicability::MachineApplicable, + ) + .emit(); + let diag = Some((span, &self.sess.span_diagnostic)); + if let Some(lit) = Lit::from_token(&token, span, diag) { + self.bump(); + self.bump(); + return Ok(lit); + } + } + } + + Err(self.span_fatal(self.span, &format!("unexpected token: {}", self.this_token_descr()))) + } +} + +crate fn expect_no_suffix(sp: Span, diag: &Handler, kind: &str, suffix: Option) { + match suffix { + None => {/* everything ok */} + Some(suf) => { + let text = suf.as_str(); + if text.is_empty() { + diag.span_bug(sp, "found empty literal suffix in Some") + } + let mut err = if kind == "a tuple index" && + ["i32", "u32", "isize", "usize"].contains(&text.to_string().as_str()) + { + // #59553: warn instead of reject out of hand to allow the fix to percolate + // through the ecosystem when people fix their macros + let mut err = diag.struct_span_warn( + sp, + &format!("suffixes on {} are invalid", kind), + ); + err.note(&format!( + "`{}` is *temporarily* accepted on tuple index fields as it was \ + incorrectly accepted on stable for a few releases", + text, + )); + err.help( + "on proc macros, you'll want to use `syn::Index::from` or \ + `proc_macro::Literal::*_unsuffixed` for code that will desugar \ + to tuple field access", + ); + err.note( + "for more context, see https://github.com/rust-lang/rust/issues/60210", + ); + err + } else { + diag.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) + }; + err.span_label(sp, format!("invalid suffix `{}`", text)); + err.emit(); + } + } +} + +/// Parses a string representing a raw string literal into its final form. The +/// only operation this does is convert embedded CRLF into a single LF. +fn raw_str_lit(lit: &str) -> String { + debug!("raw_str_lit: given {}", lit.escape_default()); + let mut res = String::with_capacity(lit.len()); + + let mut chars = lit.chars().peekable(); + while let Some(c) = chars.next() { + if c == '\r' { + if *chars.peek().unwrap() != '\n' { + panic!("lexer accepted bare CR"); + } + chars.next(); + res.push('\n'); + } else { + res.push(c); + } + } + + res.shrink_to_fit(); + res +} + +// check if `s` looks like i32 or u1234 etc. +fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { + s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) +} + +fn filtered_float_lit(data: Symbol, suffix: Option, diag: Option<(Span, &Handler)>) + -> Option { + debug!("filtered_float_lit: {}, {:?}", data, suffix); + let suffix = match suffix { + Some(suffix) => suffix, + None => return Some(LitKind::FloatUnsuffixed(data)), + }; + + Some(match &*suffix.as_str() { + "f32" => LitKind::Float(data, ast::FloatTy::F32), + "f64" => LitKind::Float(data, ast::FloatTy::F64), + suf => { + err!(diag, |span, diag| { + if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) { + // if it looks like a width, lets try to be helpful. + let msg = format!("invalid width `{}` for float literal", &suf[1..]); + diag.struct_span_err(span, &msg).help("valid widths are 32 and 64").emit() + } else { + let msg = format!("invalid suffix `{}` for float literal", suf); + diag.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("valid suffixes are `f32` and `f64`") + .emit(); + } + }); + + LitKind::FloatUnsuffixed(data) + } + }) +} +fn float_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) + -> Option { + debug!("float_lit: {:?}, {:?}", s, suffix); + // FIXME #2252: bounds checking float literals is deferred until trans + + // Strip underscores without allocating a new String unless necessary. + let s2; + let s = if s.chars().any(|c| c == '_') { + s2 = s.chars().filter(|&c| c != '_').collect::(); + &s2 + } else { + s + }; + + filtered_float_lit(Symbol::intern(s), suffix, diag) +} + +fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) + -> Option { + // s can only be ascii, byte indexing is fine + + // Strip underscores without allocating a new String unless necessary. + let s2; + let mut s = if s.chars().any(|c| c == '_') { + s2 = s.chars().filter(|&c| c != '_').collect::(); + &s2 + } else { + s + }; + + debug!("integer_lit: {}, {:?}", s, suffix); + + let mut base = 10; + let orig = s; + let mut ty = ast::LitIntType::Unsuffixed; + + if s.starts_with('0') && s.len() > 1 { + match s.as_bytes()[1] { + b'x' => base = 16, + b'o' => base = 8, + b'b' => base = 2, + _ => { } + } + } + + // 1f64 and 2f32 etc. are valid float literals. + if let Some(suf) = suffix { + if looks_like_width_suffix(&['f'], &suf.as_str()) { + let err = match base { + 16 => Some("hexadecimal float literal is not supported"), + 8 => Some("octal float literal is not supported"), + 2 => Some("binary float literal is not supported"), + _ => None, + }; + if let Some(err) = err { + err!(diag, |span, diag| { + diag.struct_span_err(span, err) + .span_label(span, "not supported") + .emit(); + }); + } + return filtered_float_lit(Symbol::intern(s), Some(suf), diag) + } + } + + if base != 10 { + s = &s[2..]; + } + + if let Some(suf) = suffix { + if suf.as_str().is_empty() { + err!(diag, |span, diag| diag.span_bug(span, "found empty literal suffix in Some")); + } + ty = match &*suf.as_str() { + "isize" => ast::LitIntType::Signed(ast::IntTy::Isize), + "i8" => ast::LitIntType::Signed(ast::IntTy::I8), + "i16" => ast::LitIntType::Signed(ast::IntTy::I16), + "i32" => ast::LitIntType::Signed(ast::IntTy::I32), + "i64" => ast::LitIntType::Signed(ast::IntTy::I64), + "i128" => ast::LitIntType::Signed(ast::IntTy::I128), + "usize" => ast::LitIntType::Unsigned(ast::UintTy::Usize), + "u8" => ast::LitIntType::Unsigned(ast::UintTy::U8), + "u16" => ast::LitIntType::Unsigned(ast::UintTy::U16), + "u32" => ast::LitIntType::Unsigned(ast::UintTy::U32), + "u64" => ast::LitIntType::Unsigned(ast::UintTy::U64), + "u128" => ast::LitIntType::Unsigned(ast::UintTy::U128), + suf => { + // i and u look like widths, so lets + // give an error message along those lines + err!(diag, |span, diag| { + if looks_like_width_suffix(&['i', 'u'], suf) { + let msg = format!("invalid width `{}` for integer literal", &suf[1..]); + diag.struct_span_err(span, &msg) + .help("valid widths are 8, 16, 32, 64 and 128") + .emit(); + } else { + let msg = format!("invalid suffix `{}` for numeric literal", suf); + diag.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("the suffix must be one of the integral types \ + (`u32`, `isize`, etc)") + .emit(); + } + }); + + ty + } + } + } + + debug!("integer_lit: the type is {:?}, base {:?}, the new string is {:?}, the original \ + string was {:?}, the original suffix was {:?}", ty, base, s, orig, suffix); + + Some(match u128::from_str_radix(s, base) { + Ok(r) => LitKind::Int(r, ty), + Err(_) => { + // small bases are lexed as if they were base 10, e.g, the string + // might be `0b10201`. This will cause the conversion above to fail, + // but these cases have errors in the lexer: we don't want to emit + // two errors, and we especially don't want to emit this error since + // it isn't necessarily true. + let already_errored = base < 10 && + s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); + + if !already_errored { + err!(diag, |span, diag| diag.span_err(span, "int literal is too large")); + } + LitKind::Int(0, ty) + } + }) +} diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 868b344c06584..526143b28755f 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -1,11 +1,10 @@ //! The main parser interface. -use crate::ast::{self, CrateConfig, LitKind, NodeId}; +use crate::ast::{self, CrateConfig, NodeId}; use crate::early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId}; use crate::source_map::{SourceMap, FilePathMapping}; use crate::feature_gate::UnstableFeatures; use crate::parse::parser::Parser; -use crate::symbol::{keywords, Symbol}; use crate::syntax::parse::parser::emit_unclosed_delims; use crate::tokenstream::{TokenStream, TokenTree}; use crate::diagnostics::plugin::ErrorMap; @@ -14,7 +13,6 @@ use crate::print::pprust::token_to_string; use errors::{Applicability, FatalError, Level, Handler, ColorConfig, Diagnostic, DiagnosticBuilder}; use rustc_data_structures::sync::{Lrc, Lock}; use syntax_pos::{Span, SourceFile, FileName, MultiSpan}; -use log::debug; use rustc_data_structures::fx::{FxHashSet, FxHashMap}; use std::borrow::Cow; @@ -25,18 +23,15 @@ pub type PResult<'a, T> = Result>; #[macro_use] pub mod parser; - +pub mod attr; pub mod lexer; pub mod token; -pub mod attr; -pub mod diagnostics; - -pub mod classify; - -pub(crate) mod unescape; -use unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte}; -pub(crate) mod unescape_error_reporting; +crate mod classify; +crate mod diagnostics; +crate mod literal; +crate mod unescape; +crate mod unescape_error_reporting; /// Info about a parsing session. pub struct ParseSess { @@ -334,339 +329,6 @@ pub fn stream_to_parser(sess: &ParseSess, stream: TokenStream) -> Parser<'_> { Parser::new(sess, stream, None, true, false) } -/// Parses a string representing a raw string literal into its final form. The -/// only operation this does is convert embedded CRLF into a single LF. -fn raw_str_lit(lit: &str) -> String { - debug!("raw_str_lit: given {}", lit.escape_default()); - let mut res = String::with_capacity(lit.len()); - - let mut chars = lit.chars().peekable(); - while let Some(c) = chars.next() { - if c == '\r' { - if *chars.peek().unwrap() != '\n' { - panic!("lexer accepted bare CR"); - } - chars.next(); - res.push('\n'); - } else { - res.push(c); - } - } - - res.shrink_to_fit(); - res -} - -// check if `s` looks like i32 or u1234 etc. -fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { - s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) -} - -macro_rules! err { - ($opt_diag:expr, |$span:ident, $diag:ident| $($body:tt)*) => { - match $opt_diag { - Some(($span, $diag)) => { $($body)* } - None => return None, - } - } -} - -crate fn expect_no_suffix(sp: Span, diag: &Handler, kind: &str, suffix: Option) { - match suffix { - None => {/* everything ok */} - Some(suf) => { - let text = suf.as_str(); - if text.is_empty() { - diag.span_bug(sp, "found empty literal suffix in Some") - } - let mut err = if kind == "a tuple index" && - ["i32", "u32", "isize", "usize"].contains(&text.to_string().as_str()) - { - // #59553: warn instead of reject out of hand to allow the fix to percolate - // through the ecosystem when people fix their macros - let mut err = diag.struct_span_warn( - sp, - &format!("suffixes on {} are invalid", kind), - ); - err.note(&format!( - "`{}` is *temporarily* accepted on tuple index fields as it was \ - incorrectly accepted on stable for a few releases", - text, - )); - err.help( - "on proc macros, you'll want to use `syn::Index::from` or \ - `proc_macro::Literal::*_unsuffixed` for code that will desugar \ - to tuple field access", - ); - err.note( - "for more context, see https://github.com/rust-lang/rust/issues/60210", - ); - err - } else { - diag.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) - }; - err.span_label(sp, format!("invalid suffix `{}`", text)); - err.emit(); - } - } -} - -impl LitKind { - /// Converts literal token with a suffix into a semantic literal. - /// Works speculatively and may return `None` is diagnostic handler is not passed. - /// If diagnostic handler is passed, always returns `Some`, - /// possibly after reporting non-fatal errors and recovery. - crate fn from_lit_token( - lit: token::Lit, - suf: Option, - diag: Option<(Span, &Handler)> - ) -> Option { - if suf.is_some() && !lit.may_have_suffix() { - err!(diag, |span, diag| { - expect_no_suffix(span, diag, &format!("a {}", lit.literal_name()), suf) - }); - } - - Some(match lit { - token::Bool(i) => { - assert!(i == keywords::True.name() || i == keywords::False.name()); - LitKind::Bool(i == keywords::True.name()) - } - token::Byte(i) => { - match unescape_byte(&i.as_str()) { - Ok(c) => LitKind::Byte(c), - Err(_) => LitKind::Err(i), - } - }, - token::Char(i) => { - match unescape_char(&i.as_str()) { - Ok(c) => LitKind::Char(c), - Err(_) => LitKind::Err(i), - } - }, - token::Err(i) => LitKind::Err(i), - - // There are some valid suffixes for integer and float literals, - // so all the handling is done internally. - token::Integer(s) => return integer_lit(&s.as_str(), suf, diag), - token::Float(s) => return float_lit(&s.as_str(), suf, diag), - - token::Str_(mut sym) => { - // If there are no characters requiring special treatment we can - // reuse the symbol from the Token. Otherwise, we must generate a - // new symbol because the string in the LitKind is different to the - // string in the Token. - let mut has_error = false; - let s = &sym.as_str(); - if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { - let mut buf = String::with_capacity(s.len()); - unescape_str(s, &mut |_, unescaped_char| { - match unescaped_char { - Ok(c) => buf.push(c), - Err(_) => has_error = true, - } - }); - if has_error { - return Some(LitKind::Err(sym)); - } - sym = Symbol::intern(&buf) - } - - LitKind::Str(sym, ast::StrStyle::Cooked) - } - token::StrRaw(mut sym, n) => { - // Ditto. - let s = &sym.as_str(); - if s.contains('\r') { - sym = Symbol::intern(&raw_str_lit(s)); - } - LitKind::Str(sym, ast::StrStyle::Raw(n)) - } - token::ByteStr(i) => { - let s = &i.as_str(); - let mut buf = Vec::with_capacity(s.len()); - let mut has_error = false; - unescape_byte_str(s, &mut |_, unescaped_byte| { - match unescaped_byte { - Ok(c) => buf.push(c), - Err(_) => has_error = true, - } - }); - if has_error { - return Some(LitKind::Err(i)); - } - buf.shrink_to_fit(); - LitKind::ByteStr(Lrc::new(buf)) - } - token::ByteStrRaw(i, _) => { - LitKind::ByteStr(Lrc::new(i.to_string().into_bytes())) - } - }) - } -} - -fn filtered_float_lit(data: Symbol, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { - debug!("filtered_float_lit: {}, {:?}", data, suffix); - let suffix = match suffix { - Some(suffix) => suffix, - None => return Some(LitKind::FloatUnsuffixed(data)), - }; - - Some(match &*suffix.as_str() { - "f32" => LitKind::Float(data, ast::FloatTy::F32), - "f64" => LitKind::Float(data, ast::FloatTy::F64), - suf => { - err!(diag, |span, diag| { - if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) { - // if it looks like a width, lets try to be helpful. - let msg = format!("invalid width `{}` for float literal", &suf[1..]); - diag.struct_span_err(span, &msg).help("valid widths are 32 and 64").emit() - } else { - let msg = format!("invalid suffix `{}` for float literal", suf); - diag.struct_span_err(span, &msg) - .span_label(span, format!("invalid suffix `{}`", suf)) - .help("valid suffixes are `f32` and `f64`") - .emit(); - } - }); - - LitKind::FloatUnsuffixed(data) - } - }) -} -fn float_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { - debug!("float_lit: {:?}, {:?}", s, suffix); - // FIXME #2252: bounds checking float literals is deferred until trans - - // Strip underscores without allocating a new String unless necessary. - let s2; - let s = if s.chars().any(|c| c == '_') { - s2 = s.chars().filter(|&c| c != '_').collect::(); - &s2 - } else { - s - }; - - filtered_float_lit(Symbol::intern(s), suffix, diag) -} - -fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { - // s can only be ascii, byte indexing is fine - - // Strip underscores without allocating a new String unless necessary. - let s2; - let mut s = if s.chars().any(|c| c == '_') { - s2 = s.chars().filter(|&c| c != '_').collect::(); - &s2 - } else { - s - }; - - debug!("integer_lit: {}, {:?}", s, suffix); - - let mut base = 10; - let orig = s; - let mut ty = ast::LitIntType::Unsuffixed; - - if s.starts_with('0') && s.len() > 1 { - match s.as_bytes()[1] { - b'x' => base = 16, - b'o' => base = 8, - b'b' => base = 2, - _ => { } - } - } - - // 1f64 and 2f32 etc. are valid float literals. - if let Some(suf) = suffix { - if looks_like_width_suffix(&['f'], &suf.as_str()) { - let err = match base { - 16 => Some("hexadecimal float literal is not supported"), - 8 => Some("octal float literal is not supported"), - 2 => Some("binary float literal is not supported"), - _ => None, - }; - if let Some(err) = err { - err!(diag, |span, diag| { - diag.struct_span_err(span, err) - .span_label(span, "not supported") - .emit(); - }); - } - return filtered_float_lit(Symbol::intern(s), Some(suf), diag) - } - } - - if base != 10 { - s = &s[2..]; - } - - if let Some(suf) = suffix { - if suf.as_str().is_empty() { - err!(diag, |span, diag| diag.span_bug(span, "found empty literal suffix in Some")); - } - ty = match &*suf.as_str() { - "isize" => ast::LitIntType::Signed(ast::IntTy::Isize), - "i8" => ast::LitIntType::Signed(ast::IntTy::I8), - "i16" => ast::LitIntType::Signed(ast::IntTy::I16), - "i32" => ast::LitIntType::Signed(ast::IntTy::I32), - "i64" => ast::LitIntType::Signed(ast::IntTy::I64), - "i128" => ast::LitIntType::Signed(ast::IntTy::I128), - "usize" => ast::LitIntType::Unsigned(ast::UintTy::Usize), - "u8" => ast::LitIntType::Unsigned(ast::UintTy::U8), - "u16" => ast::LitIntType::Unsigned(ast::UintTy::U16), - "u32" => ast::LitIntType::Unsigned(ast::UintTy::U32), - "u64" => ast::LitIntType::Unsigned(ast::UintTy::U64), - "u128" => ast::LitIntType::Unsigned(ast::UintTy::U128), - suf => { - // i and u look like widths, so lets - // give an error message along those lines - err!(diag, |span, diag| { - if looks_like_width_suffix(&['i', 'u'], suf) { - let msg = format!("invalid width `{}` for integer literal", &suf[1..]); - diag.struct_span_err(span, &msg) - .help("valid widths are 8, 16, 32, 64 and 128") - .emit(); - } else { - let msg = format!("invalid suffix `{}` for numeric literal", suf); - diag.struct_span_err(span, &msg) - .span_label(span, format!("invalid suffix `{}`", suf)) - .help("the suffix must be one of the integral types \ - (`u32`, `isize`, etc)") - .emit(); - } - }); - - ty - } - } - } - - debug!("integer_lit: the type is {:?}, base {:?}, the new string is {:?}, the original \ - string was {:?}, the original suffix was {:?}", ty, base, s, orig, suffix); - - Some(match u128::from_str_radix(s, base) { - Ok(r) => LitKind::Int(r, ty), - Err(_) => { - // small bases are lexed as if they were base 10, e.g, the string - // might be `0b10201`. This will cause the conversion above to fail, - // but these cases have errors in the lexer: we don't want to emit - // two errors, and we especially don't want to emit this error since - // it isn't necessarily true. - let already_errored = base < 10 && - s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); - - if !already_errored { - err!(diag, |span, diag| diag.span_err(span, "int literal is too large")); - } - LitKind::Int(0, ty) - } - }) -} - /// A sequence separator. pub struct SeqSep { /// The seperator token. diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index b81f7be9c2c14..f95981680b940 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -15,7 +15,7 @@ use crate::ast::{ForeignItem, ForeignItemKind, FunctionRetTy}; use crate::ast::{GenericParam, GenericParamKind}; use crate::ast::GenericArg; use crate::ast::{Ident, ImplItem, IsAsync, IsAuto, Item, ItemKind}; -use crate::ast::{Label, Lifetime, Lit}; +use crate::ast::{Label, Lifetime}; use crate::ast::{Local, LocalSource}; use crate::ast::MacStmtStyle; use crate::ast::{Mac, Mac_, MacDelimiter}; @@ -35,7 +35,7 @@ use crate::ast::{RangeEnd, RangeSyntax}; use crate::{ast, attr}; use crate::ext::base::DummyResult; use crate::source_map::{self, SourceMap, Spanned, respan}; -use crate::parse::{self, SeqSep, classify, token}; +use crate::parse::{SeqSep, classify, literal, token}; use crate::parse::lexer::{TokenAndSpan, UnmatchedBrace}; use crate::parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration}; use crate::parse::token::DelimToken; @@ -613,7 +613,7 @@ impl<'a> Parser<'a> { }) } - fn this_token_descr(&self) -> String { + crate fn this_token_descr(&self) -> String { if let Some(prefix) = self.token_descr() { format!("{} `{}`", prefix, self.this_token_to_string()) } else { @@ -621,11 +621,6 @@ impl<'a> Parser<'a> { } } - fn unexpected_last(&self, t: &token::Token) -> PResult<'a, T> { - let token_str = pprust::token_to_string(t); - Err(self.span_fatal(self.prev_span, &format!("unexpected token: `{}`", token_str))) - } - crate fn unexpected(&mut self) -> PResult<'a, T> { match self.expect_one_of(&[], &[]) { Err(e) => Err(e), @@ -1109,7 +1104,7 @@ impl<'a> Parser<'a> { } fn expect_no_suffix(&self, sp: Span, kind: &str, suffix: Option) { - parse::expect_no_suffix(sp, &self.sess.span_diagnostic, kind, suffix) + literal::expect_no_suffix(sp, &self.sess.span_diagnostic, kind, suffix) } /// Attempts to consume a `<`. If `<<` is seen, replaces it with a single @@ -1387,7 +1382,7 @@ impl<'a> Parser<'a> { }) } - fn look_ahead_span(&self, dist: usize) -> Span { + crate fn look_ahead_span(&self, dist: usize) -> Span { if dist == 0 { return self.span } @@ -2030,47 +2025,6 @@ impl<'a> Parser<'a> { } } - /// Matches `lit = true | false | token_lit`. - crate fn parse_lit(&mut self) -> PResult<'a, Lit> { - let diag = Some((self.span, &self.sess.span_diagnostic)); - if let Some(lit) = Lit::from_token(&self.token, self.span, diag) { - self.bump(); - return Ok(lit); - } else if self.token == token::Dot { - // Recover `.4` as `0.4`. - let recovered = self.look_ahead(1, |t| { - if let token::Literal(token::Integer(val), suf) = *t { - let next_span = self.look_ahead_span(1); - if self.span.hi() == next_span.lo() { - let sym = String::from("0.") + &val.as_str(); - let token = token::Literal(token::Float(Symbol::intern(&sym)), suf); - return Some((token, self.span.to(next_span))); - } - } - None - }); - if let Some((token, span)) = recovered { - self.diagnostic() - .struct_span_err(span, "float literals must have an integer part") - .span_suggestion( - span, - "must have an integer part", - pprust::token_to_string(&token), - Applicability::MachineApplicable, - ) - .emit(); - let diag = Some((span, &self.sess.span_diagnostic)); - if let Some(lit) = Lit::from_token(&token, span, diag) { - self.bump(); - self.bump(); - return Ok(lit); - } - } - } - - self.unexpected_last(&self.token) - } - /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). crate fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P> { maybe_whole_expr!(self); diff --git a/src/test/ui/attr-eq-token-tree.stderr b/src/test/ui/attr-eq-token-tree.stderr index aae25b2721e4d..571779dfa1ae7 100644 --- a/src/test/ui/attr-eq-token-tree.stderr +++ b/src/test/ui/attr-eq-token-tree.stderr @@ -1,8 +1,8 @@ error: unexpected token: `!` - --> $DIR/attr-eq-token-tree.rs:3:11 + --> $DIR/attr-eq-token-tree.rs:3:13 | LL | #[my_attr = !] - | ^ + | ^ error: aborting due to previous error diff --git a/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision.stderr b/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision.stderr index 03867a8e43b10..359725a41c105 100644 --- a/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision.stderr +++ b/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision.stderr @@ -1,8 +1,8 @@ error: unexpected token: `,` - --> $DIR/exclusive_range_pattern_syntax_collision.rs:5:15 + --> $DIR/exclusive_range_pattern_syntax_collision.rs:5:17 | LL | [_, 99.., _] => {}, - | ^^ + | ^ error: aborting due to previous error diff --git a/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision2.stderr b/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision2.stderr index 5ac435bf011e4..8f849d7b3f87c 100644 --- a/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision2.stderr +++ b/src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision2.stderr @@ -1,8 +1,8 @@ error: unexpected token: `]` - --> $DIR/exclusive_range_pattern_syntax_collision2.rs:5:15 + --> $DIR/exclusive_range_pattern_syntax_collision2.rs:5:17 | LL | [_, 99..] => {}, - | ^^ + | ^ error: aborting due to previous error diff --git a/src/test/ui/macros/macro-attribute.stderr b/src/test/ui/macros/macro-attribute.stderr index aa1cd94b0c638..d28ce25341d3a 100644 --- a/src/test/ui/macros/macro-attribute.stderr +++ b/src/test/ui/macros/macro-attribute.stderr @@ -1,8 +1,8 @@ error: unexpected token: `$` - --> $DIR/macro-attribute.rs:1:7 + --> $DIR/macro-attribute.rs:1:9 | LL | #[doc = $not_there] - | ^ + | ^ error: aborting due to previous error diff --git a/src/test/ui/malformed/malformed-interpolated.stderr b/src/test/ui/malformed/malformed-interpolated.stderr index bc2146e409d47..e805416172bab 100644 --- a/src/test/ui/malformed/malformed-interpolated.stderr +++ b/src/test/ui/malformed/malformed-interpolated.stderr @@ -7,19 +7,19 @@ LL | check!(0u8); = help: instead of using a suffixed literal (1u8, 1.0f32, etc.), use an unsuffixed version (1, 1.0, etc.). error: unexpected token: `-0` - --> $DIR/malformed-interpolated.rs:5:19 + --> $DIR/malformed-interpolated.rs:5:21 | LL | #[my_attr = $expr] - | ^ + | ^^^^^ ... LL | check!(-0); // ERROR, see above | ----------- in this macro invocation error: unexpected token: `0 + 0` - --> $DIR/malformed-interpolated.rs:5:19 + --> $DIR/malformed-interpolated.rs:5:21 | LL | #[my_attr = $expr] - | ^ + | ^^^^^ ... LL | check!(0 + 0); // ERROR, see above | -------------- in this macro invocation diff --git a/src/test/ui/parser/attr-bad-meta-2.stderr b/src/test/ui/parser/attr-bad-meta-2.stderr index ffbfc583e8a75..2d772dae69125 100644 --- a/src/test/ui/parser/attr-bad-meta-2.stderr +++ b/src/test/ui/parser/attr-bad-meta-2.stderr @@ -1,8 +1,8 @@ error: unexpected token: `]` - --> $DIR/attr-bad-meta-2.rs:1:8 + --> $DIR/attr-bad-meta-2.rs:1:9 | LL | #[path =] - | ^ + | ^ error: aborting due to previous error diff --git a/src/test/ui/parser/pat-tuple-5.stderr b/src/test/ui/parser/pat-tuple-5.stderr index 61ae40b355d38..f9832214c6800 100644 --- a/src/test/ui/parser/pat-tuple-5.stderr +++ b/src/test/ui/parser/pat-tuple-5.stderr @@ -1,8 +1,8 @@ error: unexpected token: `)` - --> $DIR/pat-tuple-5.rs:3:14 + --> $DIR/pat-tuple-5.rs:3:16 | LL | (pat ..) => {} - | ^^ + | ^ error: aborting due to previous error From 83ed781c017632d48746553bdb2bf3d1633d5ca4 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 11 May 2019 16:03:27 +0300 Subject: [PATCH 7/7] Address comments + Fix tests --- src/librustdoc/clean/cfg.rs | 2 ++ src/libsyntax/ast.rs | 9 +++++++-- src/libsyntax/parse/literal.rs | 4 ++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/librustdoc/clean/cfg.rs b/src/librustdoc/clean/cfg.rs index 51fe26b374313..b96ac19c1ea61 100644 --- a/src/librustdoc/clean/cfg.rs +++ b/src/librustdoc/clean/cfg.rs @@ -416,6 +416,8 @@ mod test { use syntax_pos::DUMMY_SP; use syntax::ast::*; + use syntax::attr; + use syntax::source_map::dummy_spanned; use syntax::symbol::Symbol; use syntax::with_globals; diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index a188f1a936890..aa176c8925883 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -1351,12 +1351,17 @@ pub enum StrStyle { Raw(u16), } -/// A literal. +/// An AST literal. #[derive(Clone, RustcEncodable, RustcDecodable, Debug)] pub struct Lit { - pub node: LitKind, + /// The original literal token as written in source code. pub token: token::Lit, + /// The original literal suffix as written in source code. pub suffix: Option, + /// The "semantic" representation of the literal lowered from the original tokens. + /// Strings are unescaped, hexadecimal forms are eliminated, etc. + /// FIXME: Remove this and only create the semantic representation during lowering to HIR. + pub node: LitKind, pub span: Span, } diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index 2c7ba13fbef82..53195421ddcee 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -27,7 +27,7 @@ macro_rules! err { impl LitKind { /// Converts literal token with a suffix into a semantic literal. - /// Works speculatively and may return `None` is diagnostic handler is not passed. + /// Works speculatively and may return `None` if diagnostic handler is not passed. /// If diagnostic handler is passed, always returns `Some`, /// possibly after reporting non-fatal errors and recovery. fn from_lit_token( @@ -166,7 +166,7 @@ impl LitKind { impl Lit { /// Converts literal token with a suffix into an AST literal. - /// Works speculatively and may return `None` is diagnostic handler is not passed. + /// Works speculatively and may return `None` if diagnostic handler is not passed. /// If diagnostic handler is passed, may return `Some`, /// possibly after reporting non-fatal errors and recovery, or `None` for irrecoverable errors. crate fn from_token(