Skip to content

Commit

Permalink
Add conversion of \r\n to \n in *raw* (byte) string literals
Browse files Browse the repository at this point in the history
  • Loading branch information
LukasKalbertodt committed Jun 9, 2021
1 parent b07aedb commit b0a62f3
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 18 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.


## [Unreleased]
### Chaned
### Changed
- Fixed (byte) string literal parsing by:
- Correctly handling "string continue" sequences
- Correctly converting `\n\r` into `\n`
Expand Down
4 changes: 2 additions & 2 deletions src/bytestr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ impl<B: Buffer> ByteStringLit<B> {
/// Precondition: input has to start with either `b"` or `br`.
pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> {
if input.starts_with(r"br") {
let num_hashes = scan_raw_string::<u8>(&input, 2)?;
let (value, num_hashes) = scan_raw_string::<u8>(&input, 2)?;
Ok(Self {
raw: input,
value: None,
value: value.map(|s| s.into_bytes()),
num_hashes: Some(num_hashes),
})
} else {
Expand Down
9 changes: 9 additions & 0 deletions src/bytestr/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,15 @@ fn crlf_newlines() {

let lit = ByteStringLit::parse("b\"foo\r\n\"").expect("failed to parse");
assert_eq!(lit.value(), b"foo\n");

let lit = ByteStringLit::parse("br\"foo\r\nbar\"").expect("failed to parse");
assert_eq!(lit.value(), b"foo\nbar");

let lit = ByteStringLit::parse("br#\"\r\nbar\"#").expect("failed to parse");
assert_eq!(lit.value(), b"\nbar");

let lit = ByteStringLit::parse("br##\"foo\r\n\"##").expect("failed to parse");
assert_eq!(lit.value(), b"foo\n");
}

#[test]
Expand Down
60 changes: 47 additions & 13 deletions src/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ pub(crate) fn unescape_string<E: Escapee>(
return Err(perr(None, UnterminatedString));
}

// `value` is only empty there was no escape in the input string
// `value` is only empty if there was no escape in the input string
// (with the special case of the input being empty). This means the
// string value basically equals the input, so we store `None`.
let value = if value.is_empty() {
Expand All @@ -177,12 +177,13 @@ pub(crate) fn unescape_string<E: Escapee>(
Ok(value)
}

/// Reads and checks a raw (byte) string literal. Returns the number of hashes
/// used by the literal.
/// Reads and checks a raw (byte) string literal, converting `\r\n` sequences to
/// just `\n` sequences. Returns an optional new string (if the input contained
/// any `\r\n`) and the number of hashes used by the literal.
pub(crate) fn scan_raw_string<E: Escapee>(
input: &str,
offset: usize,
) -> Result<u32, ParseError> {
) -> Result<(Option<String>, u32), ParseError> {
// Raw string literal
let num_hashes = input[offset..].bytes().position(|b| b != b'#')
.ok_or(perr(None, InvalidLiteral))?;
Expand All @@ -194,28 +195,61 @@ pub(crate) fn scan_raw_string<E: Escapee>(
let hashes = &input[offset..num_hashes + offset];

let mut closing_quote_pos = None;
for (i, b) in input[start_inner..].bytes().enumerate() {
if b == b'"' && input[start_inner + i + 1..].starts_with(hashes) {
closing_quote_pos = Some(i + start_inner);
let mut i = start_inner;
let mut end_last_escape = start_inner;
let mut value = String::new();
while i < input.len() {
let b = input.as_bytes()[i];
if b == b'"' && input[i + 1..].starts_with(hashes) {
closing_quote_pos = Some(i);
break;
}

if E::SUPPORTS_UNICODE {
if b == b'\r' && input.as_bytes().get(start_inner + i + 1) != Some(&b'\n') {
return Err(perr(i + start_inner, IsolatedCr));
if b == b'\r' {
// Convert `\r\n` into `\n`. This is currently not well documented
// in the Rust reference, but is done even for raw strings. That's
// because rustc simply converts all line endings when reading
// source files.
if input.as_bytes().get(i + 1) == Some(&b'\n') {
value.push_str(&input[end_last_escape..i]);
value.push('\n');
i += 2;
end_last_escape = i;
continue;
} else if E::SUPPORTS_UNICODE {
// If no \n follows the \r and we are scanning a raw string
// (not raw byte string), we error.
return Err(perr(i, IsolatedCr))
}
} else {
}

if !E::SUPPORTS_UNICODE {
if !b.is_ascii() {
return Err(perr(i + start_inner, NonAsciiInByteLiteral));
return Err(perr(i, NonAsciiInByteLiteral));
}
}

i += 1;
}

let closing_quote_pos = closing_quote_pos
.ok_or(perr(None, UnterminatedRawString))?;

if closing_quote_pos + num_hashes != input.len() - 1 {
return Err(perr(closing_quote_pos + num_hashes + 1..input.len(), UnexpectedChar));
}

Ok(num_hashes as u32)
// `value` is only empty if there was no \r\n in the input string (with the
// special case of the input being empty). This means the string value
// equals the input, so we store `None`.
let value = if value.is_empty() {
None
} else {
// There was an \r\n in the string, so we need to push the remaining
// unescaped part of the string still.
value.push_str(&input[end_last_escape..closing_quote_pos]);
Some(value)
};

Ok((value, num_hashes as u32))
}
4 changes: 2 additions & 2 deletions src/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ impl<B: Buffer> StringLit<B> {
/// Precondition: input has to start with either `"` or `r`.
pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> {
if input.starts_with('r') {
let num_hashes = scan_raw_string::<char>(&input, 1)?;
let (value, num_hashes) = scan_raw_string::<char>(&input, 1)?;
Ok(Self {
raw: input,
value: None,
value,
num_hashes: Some(num_hashes),
})
} else {
Expand Down
9 changes: 9 additions & 0 deletions src/string/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,15 @@ fn crlf_newlines() {

let lit = StringLit::parse("\"лиса\r\n\"").expect("failed to parse");
assert_eq!(lit.value(), "лиса\n");

let lit = StringLit::parse("r\"foo\r\nbar\"").expect("failed to parse");
assert_eq!(lit.value(), "foo\nbar");

let lit = StringLit::parse("r#\"\r\nbar\"#").expect("failed to parse");
assert_eq!(lit.value(), "\nbar");

let lit = StringLit::parse("r##\"лиса\r\n\"##").expect("failed to parse");
assert_eq!(lit.value(), "лиса\n");
}

#[test]
Expand Down

0 comments on commit b0a62f3

Please sign in to comment.