Skip to content

Commit

Permalink
Fix parsing of code without trailing newlines
Browse files Browse the repository at this point in the history
When the input doesn't have a trailing newline, but the last line had
exactly the amount of bytes as the current indentation level, the
tokenizer didn't emit a fake newline, causing parse errors (the grammar
expects newlines to conform with the Python spec).

I don't see any reason for fake newlines to be omitted in these cases,
so this PR removes that condition from the tokenizer.

Reported in #930.
  • Loading branch information
zsol committed May 28, 2023
1 parent 59aeceb commit 07a168b
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
16 changes: 16 additions & 0 deletions native/libcst/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,22 @@ mod test {
parse_module("def g(a, b): ...", None).expect("parse error");
}

#[test]
fn test_single_statement_with_no_newline() {
for src in &[
"(\n \\\n)",
"(\n \\\n)",
"(\n '''\n''')",
"del _",
"if _:\n '''\n)'''",
"if _:\n ('''\n''')",
"if _:\n '''\n '''",
"if _:\n '''\n ''' ",
] {
parse_module(src, None).unwrap_or_else(|e| panic!("'{}' doesn't parse: {}", src, e));
}
}

#[test]
fn bol_offset_first_line() {
assert_eq!(0, bol_offset("hello", 1));
Expand Down
5 changes: 1 addition & 4 deletions native/libcst/src/tokenizer/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,7 @@ impl<'t> TokState<'t> {
return match self.text_pos.peek() {
// Check for EOF now
None => {
if self.missing_nl_before_eof
&& self.text_pos.byte_column_number() != self.bol_width
&& !self.blank_line
{
if self.missing_nl_before_eof && !self.blank_line {
self.at_bol = true;
self.missing_nl_before_eof = false;
Ok(TokType::Newline)
Expand Down
13 changes: 13 additions & 0 deletions native/libcst/src/tokenizer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,19 @@ fn test_fake_newline() {
);
}

#[test]
fn test_fake_newline_when_at_bol() {
assert_eq!(
tokenize_with_end_marker("(\n \\\n)", &default_config()),
Ok(vec![
(TokType::Op, "("),
(TokType::Op, ")"),
(TokType::Newline, ""),
(TokType::EndMarker, "")
])
)
}

#[test]
fn test_no_fake_newline_for_empty_input() {
assert_eq!(
Expand Down

0 comments on commit 07a168b

Please sign in to comment.