Skip to content

Commit

Permalink
Added support for surrogate pairs in strings and templates. Fixes #37
Browse files Browse the repository at this point in the history
  • Loading branch information
mck89 committed Jul 24, 2021
1 parent ad912d4 commit cd50aa9
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 1 deletion.
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
},
"extra": {
"branch-alias": {
"dev-master": "1.13.2-dev"
"dev-master": "1.13.3-dev"
}
}
}
3 changes: 3 additions & 0 deletions doc/changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
Changelog
==========

#### 1.13.3
* Added support for surrogate pairs in strings and templates

#### 1.13.2
* Fixed bug when parsing spread operator inside objects returned by arrow functions

Expand Down
37 changes: 37 additions & 0 deletions lib/Peast/Syntax/Utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,23 @@ protected static function getLineTerminators()
}
return self::$lineTerminatorsCache;
}

/**
* Converts a surrogate pair of Unicode code points to UTF-8
*
* @param string $first First Unicode code point
* @param string $second Second Unicode code point
*
* @return string
*
* @codeCoverageIgnore
*/
static public function surrogatePairToUtf8($first, $second)
{
//From: https://stackoverflow.com/questions/39226593/how-to-convert-utf16-surrogate-pairs-to-equivalent-hex-codepoint-in-php
$value = ((hexdec($first) & 0x3ff) << 10) | (hexdec($second) & 0x3ff);
return self::unicodeToUtf8($value + 0x10000);
}

/**
* This function takes a string as it appears in the source code and returns
Expand All @@ -114,10 +131,22 @@ static public function unquoteLiteralString($str)
//Remove quotes
$str = substr($str, 1, -1);

//Return immediately if the escape character is missing
if (strpos($str, "\\") === false) {
return $str;
}

$lineTerminators = self::getLineTerminators();

//Surrogate pairs regex
$surrogatePairsReg = sprintf(
'u(?:%1$s|\{%1$s\})\\\\u(?:%2$s|\{%2$s\})',
"[dD][89abAB][0-9a-fA-F]{2}", "[dD][c-fC-F][0-9a-fA-F]{2}"
);

//Handle escapes
$patterns = array(
$surrogatePairsReg,
"u\{[a-fA-F0-9]+\}",
"u[a-fA-F0-9]{4}",
"x[a-fA-F0-9]{2}",
Expand All @@ -144,6 +173,14 @@ static public function unquoteLiteralString($str)
if (strlen($m[1]) === 1) {
return "\\$type";
}
// Surrogate pair
if ($type === "u" && strpos($m[1], "\\") !== false) {
$points = explode("\\", $m[1]);
return Utils::surrogatePairToUtf8(
str_replace(array("{", "}"), "", $points[0]),
str_replace(array("{", "}"), "", $points[1])
);
}
// \uFFFF, \u{FFFF}, \xFF
$code = substr($m[1], 1);
$code = str_replace(array("{", "}"), "", $code);
Expand Down
37 changes: 37 additions & 0 deletions test/Peast/Syntax/ES2015/ES2015Test.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
<?php
namespace Peast\test\Syntax\ES2015;

use Peast\Syntax\Utils;

class ES2015Test extends \Peast\test\TestParser
{
protected $parser = "ES2015";
Expand Down Expand Up @@ -122,4 +124,39 @@ public function testStringsParsing($chars, $valid)
}
$this->assertSame($valid, $validResult);
}

public function surrogatePairsProvider()
{
$tests = array();
foreach (array('"', "'", "`") as $char) {
for ($i = 0; $i <= 1; $i++) {
for ($c = 0; $c <= 1; $c++) {
$tests[] = array($char, $i, $c);
}
}
}
return $tests;
}

/**
* @dataProvider surrogatePairsProvider
*/
public function testSurrogatePairs($char, $firstBraces, $secondBraces)
{
$test = $char;
foreach (array("D83D" => $firstBraces, "DE00" => $secondBraces) as $point => $braces) {
$test .= '\u' . ($braces ? "{" : "") . $point . ($braces ? "}" : "");
}
$test .= $char;
$check = Utils::unicodeToUtf8(hexdec("1F600"));

$body = \Peast\Peast::{$this->parser}($test)->parse()->getBody();
if ($char === "`") {
$testVal = $body[0]->getExpression()->getQuasis()[0]->getValue();
} else {
$testVal = $body[0]->getExpression()->getValue();
}

$this->assertSame($testVal, $check);
}
}

0 comments on commit cd50aa9

Please sign in to comment.