Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(es/lexer): Fix lexing of \r\n in JSX #9112

Merged
merged 9 commits into from
Jun 29, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 21 additions & 12 deletions crates/swc_ecma_parser/src/lexer/jsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ impl<'a> Lexer<'a> {
pub(super) fn read_jsx_token(&mut self) -> LexResult<Option<Token>> {
debug_assert!(self.syntax.jsx());

let start = self.input.cur_pos();
let mut chunk_start = self.input.cur_pos();
let mut out = String::new();
let mut value = String::new();

loop {
let cur = match self.input.cur() {
Expand Down Expand Up @@ -42,22 +43,30 @@ impl<'a> Lexer<'a> {
return self.read_token();
}

let raw = if out.is_empty() {
// Fast path: We don't need to allocate
let value = if value.is_empty() {
// Fast path: We don't need to allocate extra buffer for value
let s = unsafe {
// Safety: We already checked for the range
self.input.slice(chunk_start, cur_pos)
};
self.atoms.atom(s)
} else {
out.push_str(unsafe {
value.push_str(unsafe {
// Safety: We already checked for the range
self.input.slice(chunk_start, cur_pos)
});
self.atoms.atom(out)
self.atoms.atom(value)
};

let raw = {
let s = unsafe {
// Safety: We already checked for the range
self.input.slice(start, cur_pos)
};
self.atoms.atom(s)
};

return Ok(Some(Token::JSXText { raw }));
return Ok(Some(Token::JSXText { raw, value }));
}
'>' => {
self.emit_error(
Expand All @@ -84,28 +93,28 @@ impl<'a> Lexer<'a> {
}
}
'&' => {
out.push_str(unsafe {
value.push_str(unsafe {
// Safety: We already checked for the range
self.input.slice(chunk_start, cur_pos)
});

let jsx_entity = self.read_jsx_entity()?;

out.push(jsx_entity.0);
value.push(jsx_entity.0);
chunk_start = self.input.cur_pos();
}

_ => {
if cur.is_line_terminator() {
out.push_str(unsafe {
value.push_str(unsafe {
// Safety: We already checked for the range
self.input.slice(chunk_start, cur_pos)
});
match self.read_jsx_new_line(true)? {
Either::Left(s) => out.push_str(s),
Either::Right(c) => out.push(c),
Either::Left(s) => value.push_str(s),
Either::Right(c) => value.push(c),
}
chunk_start = cur_pos;
chunk_start = self.input.cur_pos();
} else {
unsafe {
// Safety: cur() was Some(c)
Expand Down
96 changes: 83 additions & 13 deletions crates/swc_ecma_parser/src/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1021,7 +1021,10 @@ fn jsx_02() {
Token::JSXTagStart,
Token::JSXName { name: "a".into() },
Token::JSXTagEnd,
Token::JSXText { raw: "foo".into() },
Token::JSXText {
raw: "foo".into(),
value: "foo".into()
},
Token::JSXTagStart,
tok!('/'),
Token::JSXName { name: "a".into() },
Expand Down Expand Up @@ -1205,7 +1208,10 @@ fn issue_299_01() {
raw: "'\\ '".into(),
},
Token::JSXTagEnd,
JSXText { raw: "ABC".into() },
JSXText {
raw: "ABC".into(),
value: "ABC".into()
},
JSXTagStart,
tok!('/'),
JSXName {
Expand Down Expand Up @@ -1239,7 +1245,10 @@ fn issue_299_02() {
raw: "'\\\\'".into(),
},
Token::JSXTagEnd,
JSXText { raw: "ABC".into() },
JSXText {
raw: "ABC".into(),
value: "ABC".into()
},
JSXTagStart,
tok!('/'),
JSXName {
Expand Down Expand Up @@ -1273,7 +1282,10 @@ fn jsx_string_1() {
raw: "'abc'".into(),
},
Token::JSXTagEnd,
JSXText { raw: "ABC".into() },
JSXText {
raw: "ABC".into(),
value: "ABC".into()
},
JSXTagStart,
tok!('/'),
JSXName {
Expand Down Expand Up @@ -1307,7 +1319,10 @@ fn jsx_string_2() {
raw: "\"abc\"".into(),
},
Token::JSXTagEnd,
JSXText { raw: "ABC".into() },
JSXText {
raw: "ABC".into(),
value: "ABC".into()
},
JSXTagStart,
tok!('/'),
JSXName {
Expand Down Expand Up @@ -1341,7 +1356,10 @@ fn jsx_string_3() {
raw: "'\n'".into(),
},
Token::JSXTagEnd,
JSXText { raw: "ABC".into() },
JSXText {
raw: "ABC".into(),
value: "ABC".into()
},
JSXTagStart,
tok!('/'),
JSXName {
Expand Down Expand Up @@ -1375,7 +1393,10 @@ fn jsx_string_4() {
raw: "'&sup3;'".into(),
},
Token::JSXTagEnd,
JSXText { raw: "ABC".into() },
JSXText {
raw: "ABC".into(),
value: "ABC".into()
},
JSXTagStart,
tok!('/'),
JSXName {
Expand Down Expand Up @@ -1409,7 +1430,10 @@ fn jsx_string_5() {
raw: "'&#42;'".into(),
},
Token::JSXTagEnd,
JSXText { raw: "ABC".into() },
JSXText {
raw: "ABC".into(),
value: "ABC".into()
},
JSXTagStart,
tok!('/'),
JSXName {
Expand Down Expand Up @@ -1443,7 +1467,10 @@ fn jsx_string_6() {
raw: "'&#x23;'".into(),
},
Token::JSXTagEnd,
JSXText { raw: "ABC".into() },
JSXText {
raw: "ABC".into(),
value: "ABC".into()
},
JSXTagStart,
tok!('/'),
JSXName {
Expand Down Expand Up @@ -1477,7 +1504,10 @@ fn jsx_string_7() {
raw: "'&'".into(),
},
Token::JSXTagEnd,
JSXText { raw: "ABC".into() },
JSXText {
raw: "ABC".into(),
value: "ABC".into()
},
JSXTagStart,
tok!('/'),
JSXName {
Expand Down Expand Up @@ -1511,7 +1541,10 @@ fn jsx_string_8() {
raw: "'&;'".into(),
},
Token::JSXTagEnd,
JSXText { raw: "ABC".into() },
JSXText {
raw: "ABC".into(),
value: "ABC".into()
},
JSXTagStart,
tok!('/'),
JSXName {
Expand Down Expand Up @@ -1545,7 +1578,10 @@ fn jsx_string_9() {
raw: "'&&'".into(),
},
Token::JSXTagEnd,
JSXText { raw: "ABC".into() },
JSXText {
raw: "ABC".into(),
value: "ABC".into()
},
JSXTagStart,
tok!('/'),
JSXName {
Expand Down Expand Up @@ -1599,7 +1635,10 @@ fn issue_481() {
name: "span".into()
},
Token::JSXTagEnd,
JSXText { raw: " ".into() },
JSXText {
raw: " ".into(),
value: " ".into()
},
LBrace,
Word(Word::Ident("foo".into())),
RBrace,
Expand Down Expand Up @@ -2159,3 +2198,34 @@ class C {
assert_eq!(errors.len(), 4);
assert!(errors.iter().all(|e| e.kind() == &SyntaxError::TS1185));
}

#[test]
fn issue_9106() {
assert_eq!(
lex_tokens(
crate::Syntax::Es(crate::EsSyntax {
jsx: true,
..Default::default()
}),
"<Page>\n\r\nABC</Page>;"
),
vec![
Token::JSXTagStart,
Token::JSXName {
name: "Page".into()
},
JSXTagEnd,
JSXText {
raw: "\n\r\nABC".into(),
value: "\n\nABC".into(),
},
JSXTagStart,
tok!('/'),
JSXName {
name: "Page".into()
},
JSXTagEnd,
Semi,
]
);
}
7 changes: 1 addition & 6 deletions crates/swc_ecma_parser/src/parser/jsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,12 +428,7 @@ impl<I: Tokens> Parser<I> {
let token = bump!(self);
let span = self.input.prev_span();
match token {
Token::JSXText { raw } => Ok(JSXText {
span,
// TODO
value: raw.clone(),
raw,
}),
Token::JSXText { raw, value } => Ok(JSXText { span, value, raw }),
_ => unreachable!(),
}
}
Expand Down
19 changes: 10 additions & 9 deletions crates/swc_ecma_parser/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::{
};

use num_bigint::BigInt as BigIntValue;
use swc_atoms::{atom, Atom, AtomStore, JsWord};
use swc_atoms::{atom, Atom, AtomStore};
use swc_common::{Span, Spanned};
use swc_ecma_ast::{AssignOp, BinaryOp};

Expand Down Expand Up @@ -278,7 +278,7 @@ pub enum Token {

/// String literal. Span of this token contains quote.
Str {
value: JsWord,
value: Atom,
raw: Atom,
},

Expand All @@ -297,9 +297,10 @@ pub enum Token {
},

JSXName {
name: JsWord,
name: Atom,
},
JSXText {
value: Atom,
raw: Atom,
},
JSXTagStart,
Expand Down Expand Up @@ -499,7 +500,7 @@ pub enum Word {
#[derive(Clone, PartialEq, Eq, Hash)]
pub enum IdentLike {
Known(KnownIdent),
Other(JsWord),
Other(Atom),
}

impl From<&'_ str> for IdentLike {
Expand Down Expand Up @@ -606,7 +607,7 @@ impl From<Keyword> for Word {
}
}

impl From<Word> for JsWord {
impl From<Word> for Atom {
fn from(w: Word) -> Self {
match w {
Word::Keyword(k) => match k {
Expand Down Expand Up @@ -687,7 +688,7 @@ impl Debug for Word {
match *self {
Word::Ident(ref s) => Display::fmt(s, f),
_ => {
let s: JsWord = self.clone().into();
let s: Atom = self.clone().into();
Display::fmt(&s, f)
}
}
Expand Down Expand Up @@ -716,7 +717,7 @@ macro_rules! declare_keyword {
$name:ident => $value:tt,
)*) => {
impl Keyword {
pub(crate) fn into_js_word(self) -> JsWord {
pub(crate) fn into_js_word(self) -> Atom {
match self {
$(Keyword::$name => atom!($value),)*
}
Expand Down Expand Up @@ -932,7 +933,7 @@ impl TokenKind {
}

impl Word {
pub(crate) fn cow(&self) -> Cow<JsWord> {
pub(crate) fn cow(&self) -> Cow<Atom> {
match self {
Word::Keyword(k) => Cow::Owned(k.into_js_word()),
Word::Ident(IdentLike::Known(w)) => Cow::Owned((*w).into()),
Expand Down Expand Up @@ -980,7 +981,7 @@ impl Debug for Token {
Num { value, raw, .. } => write!(f, "numeric literal ({}, {})", value, raw)?,
BigInt { value, raw } => write!(f, "bigint literal ({}, {})", value, raw)?,
JSXName { name } => write!(f, "jsx name ({})", name)?,
JSXText { raw } => write!(f, "jsx text ({})", raw)?,
JSXText { raw, .. } => write!(f, "jsx text ({})", raw)?,
JSXTagStart => write!(f, "< (jsx tag start)")?,
JSXTagEnd => write!(f, "> (jsx tag end)")?,
Shebang(_) => write!(f, "#!")?,
Expand Down
4 changes: 2 additions & 2 deletions crates/swc_ecma_parser/tests/jsx/basic/7/input.js.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@
"end": 42,
"ctxt": 0
},
"value": "\n\nbar\n\nbaz\n\n",
"raw": "\n\nbar\n\nbaz\n\n"
"value": "\nbar\nbaz\n",
"raw": "\nbar\nbaz\n"
}
],
"closing": {
Expand Down
Loading
Loading