Skip to content

Commit

Permalink
Moved parsing of paragraph and line separators as valid string charac…
Browse files Browse the repository at this point in the history
…ters in ES2019 parser
  • Loading branch information
mck89 committed Dec 16, 2018
1 parent f5ed773 commit 19feb1a
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 69 deletions.
17 changes: 17 additions & 0 deletions lib/Peast/Syntax/ES2019/Scanner.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,28 @@
*/
namespace Peast\Syntax\ES2019;

use \Peast\Syntax\Utils;

/**
* ES2019 scanner.
*
* @author Marco Marchiò <[email protected]>
*/
class Scanner extends \Peast\Syntax\ES2018\Scanner
{
/**
* Class constructor
*
* @param string $source Source code
* @param string $encoding Source code encoding, if not specified it
* will assume UTF-8
*/
function __construct($source, $encoding = null)
{
parent::__construct($source, $encoding);

//Allow paragraph and line separators in strings
$this->stringsStopsLSM->remove(Utils::unicodeToUtf8(0x2028));
$this->stringsStopsLSM->remove(Utils::unicodeToUtf8(0x2029));
}
}
32 changes: 23 additions & 9 deletions lib/Peast/Syntax/LSM.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,21 @@ class LSM
protected $map = array();

/**
* Scanner instance
* Encoding handle flag
*
* @var Scanner
* @var bool
*/
protected $scanner;
protected $handleEncoding = false;

/**
* Class constructor
*
* @param array $sequences Allowed characters sequences
* @param array $sequences Allowed characters sequences
* @param bool $handleEncoding True to handle encoding when matching
*/
function __construct($sequences)
function __construct($sequences, $handleEncoding = false)
{
$this->handleEncoding = $handleEncoding;
foreach ($sequences as $s) {
$this->add($s);
}
Expand All @@ -52,8 +54,14 @@ function __construct($sequences)
*/
public function add($sequence)
{
$first = $sequence[0];
$len = strlen($sequence);
if ($this->handleEncoding) {
$s = Utils::stringToUTF8Array($sequence);
$first = $s[0];
$len = count($s);
} else {
$first = $sequence[0];
$len = strlen($sequence);
}
if (!isset($this->map[$first])) {
$this->map[$first] = array(
"maxLen" => $len,
Expand All @@ -75,14 +83,20 @@ public function add($sequence)
*/
public function remove($sequence)
{
$first = $sequence[0];
if ($this->handleEncoding) {
$s = Utils::stringToUTF8Array($sequence);
$first = $s[0];
} else {
$first = $sequence[0];
}
if (isset($this->map[$first])) {
$len = $this->handleEncoding ? count($s) : strlen($sequence);
$this->map[$first]["map"] = array_diff(
$this->map[$first]["map"], array($sequence)
);
if (!count($this->map[$first]["map"])) {
unset($this->map[$first]);
} elseif ($this->map[$first]["maxLen"] === strlen($sequence)) {
} elseif ($this->map[$first]["maxLen"] === $len) {
// Recalculate the max length if necessary
foreach ($this->map[$first]["map"] as $m) {
$this->map[$first]["maxLen"] = max(
Expand Down
15 changes: 2 additions & 13 deletions lib/Peast/Syntax/Scanner.php
Original file line number Diff line number Diff line change
Expand Up @@ -290,9 +290,7 @@ function __construct($source, $encoding = null)

//Instead of using mb_substr for each character, split the source
//into an array of UTF8 characters for performance reasons
$this->source = $source === "" ?
array() :
preg_split('//u', $source, null, PREG_SPLIT_NO_EMPTY);
$this->source = Utils::stringToUTF8Array($source);
$this->length = count($this->source);

//Convert character codes to UTF8 characters in whitespaces and line
Expand All @@ -312,16 +310,7 @@ function __construct($source, $encoding = null)
$this->punctutatorsLSM = new LSM($this->punctutators);

//Create a LSM for strings stops
$this->stringsStopsLSM = new LSM(
array_diff(
$this->lineTerminators,
//Paragraph and line separators are allowed in strings
array(
Utils::unicodeToUtf8(0x2028),
Utils::unicodeToUtf8(0x2029)
)
)
);
$this->stringsStopsLSM = new LSM($this->lineTerminators, true);

$this->linesSplitter = "/" .
implode("|", $this->lineTerminators) .
Expand Down
14 changes: 14 additions & 0 deletions lib/Peast/Syntax/Utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,20 @@
*/
class Utils
{
/**
* Converts a string to an array of UTF-8 characters
*
* @param string $str String to convert
*
* @return array
*/
static public function stringToUTF8Array($str)
{
return $str === "" ?
array() :
preg_split('//u', $str, null, PREG_SPLIT_NO_EMPTY);
}

/**
* Converts an unicode code point to UTF-8
*
Expand Down
86 changes: 39 additions & 47 deletions test/Peast/Syntax/ES2015/ES2015Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -143,60 +143,52 @@ public function testKeywordIdentifier($code, $valid, $validStrictMode)
}
}

public function escapeSequencesProvider()
public function stringCharsProvider()
{
return array(
array("'\\x'"),
array("'\\x1'"),
array("'\\x1G'"),
array("'\\u'"),
array("'\\u1'"),
array("'\\u11'"),
array("'\\u111'"),
array("'\\uG'"),
array("'\\u1G'"),
array("'\\u11G'"),
array("'\\u111G'"),
array("'\\u{}'"),
array("'\\u{'"),
array("'\\u{12'"),
array("'\\u{G}'"),
array("'\\u{1G}'"),
array("'\\u{1G1}'"),
array("'\\u{G1}'"),
array("'\\u{{'"),
array("\\x", false),
array("\\x1", false),
array("\\x1G", false),
array("\\u", false),
array("\\u1", false),
array("\\u11", false),
array("\\u111", false),
array("\\uG", false),
array("\\u1G", false),
array("\\u11G", false),
array("\\u111G", false),
array("\\u{}", false),
array("\\u{", false),
array("\\u{12", false),
array("\\u{G}", false),
array("\\u{1G}", false),
array("\\u{1G1}", false),
array("\\u{G1}", false),
array("\\u{{", false),
array("\n", false),
array("\r", false),
array(\Peast\Syntax\Utils::unicodeToUtf8(0x2028), false),
array(\Peast\Syntax\Utils::unicodeToUtf8(0x2029), false),
array("\\\n", true),
array("\\\r", true),
array("\\\r\n", true),
array("\\" . \Peast\Syntax\Utils::unicodeToUtf8(0x2028), true),
array("\\" . \Peast\Syntax\Utils::unicodeToUtf8(0x2029), true)
);
}

/**
* @dataProvider escapeSequencesProvider
* @expectedException \Peast\Syntax\Exception
*/
public function testInvalidescapeSequences($code)
{
\Peast\Peast::{$this->parser}($code)->parse();
}

public function validStringsProvider()
{
return array(
array("\\\n"), //LF
array("\\\r"), //CR
array("\\\r\n"), //CR+LF
array(\Peast\Syntax\Utils::unicodeToUtf8(0x2028)), //LineSeparator
array(\Peast\Syntax\Utils::unicodeToUtf8(0x2029)), //ParagraphSeparator
);
}

/**
* @dataProvider validStringsProvider
* @dataProvider stringCharsProvider
*/
public function testValidStrings($code)
public function testStringsParsing($chars, $valid)
{
$code = "'$code'";
$tree = \Peast\Peast::{$this->parser}($code)->parse();
$items = $tree->getBody();
$str = $items[0]->getExpression()->getRaw();
$this->assertSame($code, $str);
$code = "'$chars'";
$validResult = true;
try {
\Peast\Peast::{$this->parser}($code)->parse();
} catch (\Exception $ex) {
$validResult = false;
}
$this->assertSame($valid, $validResult);
}
}
15 changes: 15 additions & 0 deletions test/Peast/Syntax/ES2019/ES2019Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,19 @@ protected function getTestVersions()
{
return array("ES2015", "ES2016", "ES2017", "ES2018", "ES2019");
}

public function stringCharsProvider()
{
$chars = parent::stringCharsProvider();
$validChars = array(
\Peast\Syntax\Utils::unicodeToUtf8(0x2028),
\Peast\Syntax\Utils::unicodeToUtf8(0x2029)
);
foreach ($chars as $k => $v) {
if (in_array($v[0], $validChars)) {
$chars[$k][1] = true;
}
}
return $chars;
}
}

0 comments on commit 19feb1a

Please sign in to comment.