From cab76b7d5797480e734c62a2a0217221dc3ed83b Mon Sep 17 00:00:00 2001 From: antoineB Date: Sun, 29 Oct 2023 01:47:20 +0200 Subject: [PATCH] feat: Add more string litral formats (from postgres) and numbers foramts Add bit string literal https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS Add unicode string https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE Add exponent in integers and decimals https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC Add hexadecimal, binary and octal formats Add '_' as separator of digits Add string casting (eg: REAL '1.23') --- grammar.js | 18 +++- test/corpus/literals.txt | 178 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+), 3 deletions(-) create mode 100644 test/corpus/literals.txt diff --git a/grammar.js b/grammar.js index 69d0a1d4..55284386 100644 --- a/grammar.js +++ b/grammar.js @@ -3106,6 +3106,8 @@ module.exports = grammar({ $._integer, $._decimal_number, $._literal_string, + $._bit_string, + $._string_casting, $.keyword_true, $.keyword_false, $.keyword_null, @@ -3114,7 +3116,7 @@ module.exports = grammar({ _double_quote_string: _ => /"[^"]*"/, // The norm specify that between two consecutive string must be a return, // but this is good enough. - _single_quote_string: _ => repeat1(/'([^']|'')*'/), + _single_quote_string: _ => seq(/([uU]&)?'([^']|'')*'/, repeat(/'([^']|'')*'/)), _literal_string: $ => prec( 1, choice( @@ -3124,8 +3126,18 @@ module.exports = grammar({ ), ), _natural_number: _ => /\d+/, - _integer: $ => seq(optional("-"), $._natural_number), - _decimal_number: $ => seq(optional("-"), /(\d*[.]\d+)|(\d+[.])/), + _integer: $ => seq( + optional(choice("-", "+")), + /(0[xX][0-9A-Fa-f]+(_[0-9A-Fa-f]+)*)|(0[oO][0-7]+(_[0-7]+)*)|(0[bB][01]+(_[01]+)*)|(\d+(_\d+)*(e[+-]?\d+(_\d+)*)?)/ + ), + _decimal_number: $ => seq( + optional( + choice("-", "+")), + /((\d+(_\d+)*)?[.]\d+(_\d+)*(e[+-]?\d+(_\d+)*)?)|(\d+(_\d+)*[.](e[+-]?\d+(_\d+)*)?)/ + ), + _bit_string: $ => seq(/[bBxX]'([^']|'')*'/, repeat(/'([^']|'')*'/)), + // The identifier should be followed by a string (no parenthesis allowed) + _string_casting: $ => seq($.identifier, $._single_quote_string), bang: _ => '!', diff --git a/test/corpus/literals.txt b/test/corpus/literals.txt new file mode 100644 index 00000000..05a17f8c --- /dev/null +++ b/test/corpus/literals.txt @@ -0,0 +1,178 @@ +================================================================================ +Hexadecimal number +================================================================================ + +SELECT 0xAA; + +-------------------------------------------------------------------------------- + +(program + (statement + (select + (keyword_select) + (select_expression + (term + (literal)))))) + +================================================================================ +binary number +================================================================================ + +SELECT 0b1010; + +-------------------------------------------------------------------------------- + +(program + (statement + (select + (keyword_select) + (select_expression + (term + (literal)))))) + +================================================================================ +big integer with _ separators +================================================================================ + +SELECT 1_000_000_000; + +-------------------------------------------------------------------------------- + +(program + (statement + (select + (keyword_select) + (select_expression + (term + (literal)))))) + +================================================================================ +integer with exponent with _ separators +================================================================================ + +SELECT 12e1_000; + +-------------------------------------------------------------------------------- + +(program + (statement + (select + (keyword_select) + (select_expression + (term + (literal)))))) + +================================================================================ +decimal with exponent +================================================================================ + +SELECT .2e-1_000; + +-------------------------------------------------------------------------------- + +(program + (statement + (select + (keyword_select) + (select_expression + (term + (literal)))))) + +================================================================================ +positive number +================================================================================ + +SELECT +1; + +-------------------------------------------------------------------------------- + +(program + (statement + (select + (keyword_select) + (select_expression + (term + (literal)))))) + +================================================================================ +bit string +================================================================================ + +SELECT b'1010'; + +-------------------------------------------------------------------------------- + +(program + (statement + (select + (keyword_select) + (select_expression + (term + (literal)))))) + +================================================================================ +bit string (hexa) +================================================================================ + +SELECT x'AF'; + +-------------------------------------------------------------------------------- + +(program + (statement + (select + (keyword_select) + (select_expression + (term + (literal)))))) + +================================================================================ +multi line string +================================================================================ + +SELECT 'hello ' +'world'; + +-------------------------------------------------------------------------------- + +(program + (statement + (select + (keyword_select) + (select_expression + (term + (literal)))))) + +================================================================================ +unicode string +================================================================================ + +SELECT u&'\041f\0440\0438\0432\0456\0442 ' +'\0421\0432\0456\0442'; + +-------------------------------------------------------------------------------- + +(program + (statement + (select + (keyword_select) + (select_expression + (term + (literal)))))) + +================================================================================ +string casting +================================================================================ + +SELECT int '123'; + +-------------------------------------------------------------------------------- + +(program + (statement + (select + (keyword_select) + (select_expression + (term + (literal + (identifier)))))))