Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle Spans for byte and raw strings and add more detail #81307

Merged
merged 1 commit into from
Feb 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ mod tokentrees;
mod unescape_error_reporting;
mod unicode_chars;

use unescape_error_reporting::{emit_unescape_error, push_escaped_char};
use unescape_error_reporting::{emit_unescape_error, escaped_char};

#[derive(Clone, Debug)]
pub struct UnmatchedBrace {
Expand Down Expand Up @@ -122,11 +122,9 @@ impl<'a> StringReader<'a> {
m: &str,
c: char,
) -> DiagnosticBuilder<'a> {
let mut m = m.to_string();
m.push_str(": ");
push_escaped_char(&mut m, c);

self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
self.sess
.span_diagnostic
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
}

/// Turns simple `rustc_lexer::TokenKind` enum into a rich
Expand Down Expand Up @@ -421,7 +419,7 @@ impl<'a> StringReader<'a> {
let content_start = start + BytePos(prefix_len);
let content_end = suffix_start - BytePos(postfix_len);
let id = self.symbol_from_to(content_start, content_end);
self.validate_literal_escape(mode, content_start, content_end);
self.validate_literal_escape(mode, content_start, content_end, prefix_len, postfix_len);
(lit_kind, id)
}

Expand Down Expand Up @@ -525,17 +523,29 @@ impl<'a> StringReader<'a> {
.raise();
}

fn validate_literal_escape(&self, mode: Mode, content_start: BytePos, content_end: BytePos) {
fn validate_literal_escape(
&self,
mode: Mode,
content_start: BytePos,
content_end: BytePos,
prefix_len: u32,
postfix_len: u32,
) {
let lit_content = self.str_from_to(content_start, content_end);
unescape::unescape_literal(lit_content, mode, &mut |range, result| {
// Here we only check for errors. The actual unescaping is done later.
if let Err(err) = result {
let span_with_quotes =
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1));
let span_with_quotes = self
.mk_sp(content_start - BytePos(prefix_len), content_end + BytePos(postfix_len));
let (start, end) = (range.start as u32, range.end as u32);
let lo = content_start + BytePos(start);
let hi = lo + BytePos(end - start);
let span = self.mk_sp(lo, hi);
emit_unescape_error(
&self.sess.span_diagnostic,
lit_content,
span_with_quotes,
span,
mode,
range,
err,
Expand Down
186 changes: 116 additions & 70 deletions compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ pub(crate) fn emit_unescape_error(
lit: &str,
// full span of the literal, including quotes
span_with_quotes: Span,
// interior span of the literal, without quotes
span: Span,
mode: Mode,
// range of the error inside `lit`
range: Range<usize>,
Expand All @@ -26,13 +28,6 @@ pub(crate) fn emit_unescape_error(
range,
error
);
let span = {
let Range { start, end } = range;
let (start, end) = (start as u32, end as u32);
let lo = span_with_quotes.lo() + BytePos(start + 1);
let hi = lo + BytePos(end - start);
span_with_quotes.with_lo(lo).with_hi(hi)
};
let last_char = || {
let c = lit[range.clone()].chars().rev().next().unwrap();
let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
Expand All @@ -42,20 +37,22 @@ pub(crate) fn emit_unescape_error(
EscapeError::LoneSurrogateUnicodeEscape => {
handler
.struct_span_err(span, "invalid unicode character escape")
.span_label(span, "invalid escape")
.help("unicode escape must not be a surrogate")
.emit();
}
EscapeError::OutOfRangeUnicodeEscape => {
handler
.struct_span_err(span, "invalid unicode character escape")
.span_label(span, "invalid escape")
.help("unicode escape must be at most 10FFFF")
.emit();
}
EscapeError::MoreThanOneChar => {
let msg = if mode.is_bytes() {
"if you meant to write a byte string literal, use double quotes"
let (prefix, msg) = if mode.is_bytes() {
("b", "if you meant to write a byte string literal, use double quotes")
} else {
"if you meant to write a `str` literal, use double quotes"
("", "if you meant to write a `str` literal, use double quotes")
};

handler
Expand All @@ -66,31 +63,44 @@ pub(crate) fn emit_unescape_error(
.span_suggestion(
span_with_quotes,
msg,
format!("\"{}\"", lit),
format!("{}\"{}\"", prefix, lit),
Applicability::MachineApplicable,
)
.emit();
}
EscapeError::EscapeOnlyChar => {
let (c, _span) = last_char();
let (c, char_span) = last_char();

let mut msg = if mode.is_bytes() {
"byte constant must be escaped: "
let msg = if mode.is_bytes() {
"byte constant must be escaped"
} else {
"character constant must be escaped: "
}
.to_string();
push_escaped_char(&mut msg, c);

handler.span_err(span, msg.as_str())
"character constant must be escaped"
};
handler
.struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
.span_suggestion(
char_span,
"escape the character",
c.escape_default().to_string(),
Applicability::MachineApplicable,
)
.emit()
}
EscapeError::BareCarriageReturn => {
let msg = if mode.in_double_quotes() {
"bare CR not allowed in string, use \\r instead"
"bare CR not allowed in string, use `\\r` instead"
} else {
"character constant must be escaped: \\r"
"character constant must be escaped: `\\r`"
};
handler.span_err(span, msg);
handler
.struct_span_err(span, msg)
.span_suggestion(
span,
"escape the character",
"\\r".to_string(),
Applicability::MachineApplicable,
)
.emit();
}
EscapeError::BareCarriageReturnInRawString => {
assert!(mode.in_double_quotes());
Expand All @@ -102,21 +112,22 @@ pub(crate) fn emit_unescape_error(

let label =
if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" };
let mut msg = label.to_string();
msg.push_str(": ");
push_escaped_char(&mut msg, c);

let mut diag = handler.struct_span_err(span, msg.as_str());
let ec = escaped_char(c);
let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
diag.span_label(span, label);
if c == '{' || c == '}' && !mode.is_bytes() {
diag.help(
"if used in a formatting string, \
curly braces are escaped with `{{` and `}}`",
"if used in a formatting string, curly braces are escaped with `{{` and `}}`",
);
} else if c == '\r' {
diag.help(
"this is an isolated carriage return; \
consider checking your editor and version control settings",
"this is an isolated carriage return; consider checking your editor and \
version control settings",
);
} else {
diag.help(
"for more information, visit \
<https://static.rust-lang.org/doc/master/reference.html#literals>",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In which cases we link to the reference from error messages?
In theory, there's a reference page for every error, but in practice the diagnostics are already too noisy, so perhaps it's not a good idea to add such links just in case, without a strong specific reason.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The case where this one triggers when using a "fake" escape, like \y. We could enumerate all the possible ones that are valid, but feared that would be too verbose. Every other case handled in this document provides enough information inline. I'm more worried about the link being stale. The reason I'm adding it is because these escapes are hard to search for.

);
}
diag.emit();
Expand All @@ -127,45 +138,70 @@ pub(crate) fn emit_unescape_error(
EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
let (c, span) = last_char();

let mut msg = if error == EscapeError::InvalidCharInHexEscape {
"invalid character in numeric character escape: "
let msg = if error == EscapeError::InvalidCharInHexEscape {
"invalid character in numeric character escape"
} else {
"invalid character in unicode escape: "
}
.to_string();
push_escaped_char(&mut msg, c);
"invalid character in unicode escape"
};
let c = escaped_char(c);

handler.span_err(span, msg.as_str())
handler
.struct_span_err(span, &format!("{}: `{}`", msg, c))
.span_label(span, msg)
.emit();
}
EscapeError::NonAsciiCharInByte => {
assert!(mode.is_bytes());
let (_c, span) = last_char();
handler.span_err(
span,
"byte constant must be ASCII. \
Use a \\xHH escape for a non-ASCII byte",
)
let (c, span) = last_char();
handler
.struct_span_err(span, "non-ASCII character in byte constant")
.span_label(span, "byte constant must be ASCII")
.span_suggestion(
span,
"use a \\xHH escape for a non-ASCII byte",
format!("\\x{:X}", c as u32),
Applicability::MachineApplicable,
)
.emit();
}
EscapeError::NonAsciiCharInByteString => {
assert!(mode.is_bytes());
let (_c, span) = last_char();
handler.span_err(span, "raw byte string must be ASCII")
handler
.struct_span_err(span, "raw byte string must be ASCII")
.span_label(span, "must be ASCII")
.emit();
}
EscapeError::OutOfRangeHexEscape => {
handler
.struct_span_err(span, "out of range hex escape")
.span_label(span, "must be a character in the range [\\x00-\\x7f]")
.emit();
}
EscapeError::OutOfRangeHexEscape => handler.span_err(
span,
"this form of character escape may only be used \
with characters in the range [\\x00-\\x7f]",
),
EscapeError::LeadingUnderscoreUnicodeEscape => {
let (_c, span) = last_char();
handler.span_err(span, "invalid start of unicode escape")
let (c, span) = last_char();
let msg = "invalid start of unicode escape";
handler
.struct_span_err(span, &format!("{}: `{}`", msg, c))
.span_label(span, msg)
.emit();
}
EscapeError::OverlongUnicodeEscape => {
handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)")
}
EscapeError::UnclosedUnicodeEscape => {
handler.span_err(span, "unterminated unicode escape (needed a `}`)")
handler
.struct_span_err(span, "overlong unicode escape")
.span_label(span, "must have at most 6 hex digits")
.emit();
}
EscapeError::UnclosedUnicodeEscape => handler
.struct_span_err(span, "unterminated unicode escape")
.span_label(span, "missing a closing `}`")
.span_suggestion_verbose(
span.shrink_to_hi(),
"terminate the unicode escape",
"}".to_string(),
Applicability::MaybeIncorrect,
)
.emit(),
EscapeError::NoBraceInUnicodeEscape => {
let msg = "incorrect unicode escape sequence";
let mut diag = handler.struct_span_err(span, msg);
Expand Down Expand Up @@ -195,28 +231,38 @@ pub(crate) fn emit_unescape_error(

diag.emit();
}
EscapeError::UnicodeEscapeInByte => handler.span_err(
span,
"unicode escape sequences cannot be used \
as a byte or in a byte string",
),
EscapeError::UnicodeEscapeInByte => {
let msg = "unicode escape in byte string";
handler
.struct_span_err(span, msg)
.span_label(span, msg)
.help("unicode escape sequences cannot be used as a byte or in a byte string")
.emit();
}
EscapeError::EmptyUnicodeEscape => {
handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)")
handler
.struct_span_err(span, "empty unicode escape")
.span_label(span, "this escape must have at least 1 hex digit")
.emit();
}
EscapeError::ZeroChars => {
let msg = "empty character literal";
handler.struct_span_err(span, msg).span_label(span, msg).emit()
}
EscapeError::LoneSlash => {
let msg = "invalid trailing slash in literal";
handler.struct_span_err(span, msg).span_label(span, msg).emit();
}
EscapeError::ZeroChars => handler.span_err(span, "empty character literal"),
EscapeError::LoneSlash => handler.span_err(span, "invalid trailing slash in literal"),
}
}

/// Pushes a character to a message string for error reporting
pub(crate) fn push_escaped_char(msg: &mut String, c: char) {
pub(crate) fn escaped_char(c: char) -> String {
match c {
'\u{20}'..='\u{7e}' => {
// Don't escape \, ' or " for user-facing messages
msg.push(c);
}
_ => {
msg.extend(c.escape_default());
c.to_string()
}
_ => c.escape_default().to_string(),
}
}
2 changes: 1 addition & 1 deletion src/test/ui/attributes/key-value-non-ascii.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#![feature(rustc_attrs)]

#[rustc_dummy = b"ffi.rs"] //~ ERROR byte constant must be ASCII
#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte constant
fn main() {}
5 changes: 4 additions & 1 deletion src/test/ui/attributes/key-value-non-ascii.stderr
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
error: byte constant must be ASCII. Use a \xHH escape for a non-ASCII byte
error: non-ASCII character in byte constant
--> $DIR/key-value-non-ascii.rs:3:19
|
LL | #[rustc_dummy = b"ffi.rs"]
| ^
| |
| byte constant must be ASCII
| help: use a \xHH escape for a non-ASCII byte: `\xFB03`

error: aborting due to previous error

6 changes: 3 additions & 3 deletions src/test/ui/parser/ascii-only-character-escape.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
fn main() {
let x = "\x80"; //~ ERROR may only be used
let y = "\xff"; //~ ERROR may only be used
let z = "\xe2"; //~ ERROR may only be used
let x = "\x80"; //~ ERROR out of range hex escape
let y = "\xff"; //~ ERROR out of range hex escape
let z = "\xe2"; //~ ERROR out of range hex escape
let a = b"\x00e2"; // ok because byte literal
}
Loading