Skip to content

Commit

Permalink
Perlito5 - misc/perl5_parser features
Browse files Browse the repository at this point in the history
  • Loading branch information
fglock committed Jul 7, 2024
1 parent 085d8ad commit 56c38b5
Showing 1 changed file with 30 additions and 49 deletions.
79 changes: 30 additions & 49 deletions misc/perl5_parser/perl5_parser.pl
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ sub tokenize {
elsif ( $char =~ /[a-zA-Z_]/ ) {
$state = IDENTIFIER();
}
elsif ( $char =~ /\d/ ) {
elsif ( $char =~ /[0-9]/ ) {
$state = NUMBER();
}
elsif ( exists $OPERATORS{$char} ) {
Expand All @@ -137,7 +137,7 @@ sub tokenize {
}
elsif ( $state == WHITESPACE() ) {
if ( $char !~ /\s/ || $char eq "\n" ) {
push @tokens, [ WHITESPACE(), $buffer, 0 ];
push @tokens, [ WHITESPACE(), $buffer ];
$buffer = '';
$state = START();
redo FSM;
Expand All @@ -158,7 +158,7 @@ sub tokenize {
}
}
elsif ( $state == NUMBER() ) {
if ( $char !~ /\d/ ) {
if ( $char !~ /[0-9]/ ) {
push @tokens, [ NUMBER(), $buffer ];
$buffer = '';
$state = START();
Expand Down Expand Up @@ -263,8 +263,9 @@ sub tokenize {
my %POSTFIX = (
'--' => 1,
'++' => 1,
## ',' => 1,
);

# default associativity is LEFT
my %ASSOC_RIGHT = (
'**' => 1,
'=' => 1,
Expand Down Expand Up @@ -333,6 +334,7 @@ sub parse_precedence_expression {
$pos++;
$pos = parse_optional_whitespace( $tokens, $pos )->{next};

# Handle postfix () [] {}
if ( $type == PAREN_OPEN() || $type == CURLY_OPEN() || $type == SQUARE_OPEN() ) {
my $right_expr = parse_term( $tokens, $op_pos );
if ( $right_expr->{FAIL} ) {
Expand Down Expand Up @@ -452,15 +454,10 @@ sub parse_variable {
sub parse_number {
my ( $tokens, $index ) = @_;
my $pos = $index;
if ( $tokens->[$pos][0] == MINUS() ) {
$pos++; # -
}
if ( $tokens->[$pos][0] == DOT() ) {
$pos++; # .
if ( $tokens->[$pos][0] == NUMBER() ) {

# .123
$pos++;
$pos++; # .123
}
else {
return parse_fail( $tokens, $index );
Expand All @@ -469,52 +466,34 @@ sub parse_number {
elsif ( $tokens->[$pos][0] == NUMBER() ) {
$pos++; # 123
if ( $tokens->[$pos][0] == DOT() ) {

# 123.
$pos++;
$pos++; # 123.
if ( $tokens->[$pos][0] == NUMBER() ) {

# 123.456
$pos++;
$pos++; # 123.456
}
}
}
else {
return parse_fail( $tokens, $index );
}

if ( $tokens->[$pos][0] == IDENTIFIER() && $tokens->[$pos][1] =~ /^e(\d*)$/i ) {
if ( $tokens->[$pos][0] == IDENTIFIER() && $tokens->[$pos][1] =~ /^e([0-9]*)$/i ) {
if ($1) {

# E10
$pos++;
$pos++; # E10
}
else {

# 123E-10
$pos++;

$pos++; # 123E-10
if ( $tokens->[$pos][0] == MINUS() ) {

# -
$pos++;
$pos++; # -
}
if ( $tokens->[$pos][0] == NUMBER() ) {

# 123
$pos++;
$pos++; # 123
}
else {
return parse_fail( $tokens, $index );
}
}

# return { NUMBER => 1, index => $index, value => [ map { $tokens->[$_] } $index .. $pos -1 ] };
return { type => 'NUMBER', index => $index, value => join( '', map { $tokens->[$_][1] } $index .. $pos - 1 ), next => $pos };
}
else {
return { type => 'NUMBER', index => $index, value => join( '', map { $tokens->[$_][1] } $index .. $pos - 1 ), next => $pos };
}
return { type => 'NUMBER', index => $index, value => join( '', map { $tokens->[$_][1] } $index .. $pos - 1 ), next => $pos };
}

my %escape_sequence = qw/ a 7 b 8 e 27 f 12 n 10 r 13 t 9 /;
Expand Down Expand Up @@ -803,16 +782,22 @@ sub parse_statement {
return $ast;
}

sub token_as_string {
my ( $type, $value, $attr ) = @_;
return "" if !defined $value;
$value = "newline" if $value eq "\n";
$attr = $TokenName{ $attr // "" } // "";
return "$TokenName{$type}: \t'$value' \t $attr\n";
}

sub main {
binmode( STDOUT, ":utf8" );
my $perl_code = join( '', <DATA> );
my $tokens = tokenize($perl_code);
## for my $token (@$tokens) {
## my ( $type, $value, $attr ) = @$token;
## $value = "newline" if $value eq "\n";
## $attr = $AttrName{$attr // ""} // "";
## print "$TokenName{$type}: \t'$value' \t $attr\n";
## }

# for my $token (@$tokens) {
# print token_as_string(@$token);
# }
my $index = 0;
while ( $index < @$tokens ) {
$index = parse_optional_whitespace( $tokens, $index )->{next};
Expand All @@ -823,11 +808,7 @@ sub main {
$index = $ast->{next};
}
else {
my ( $type, $value, $attr ) = @{ $tokens->[$index] };
last if !defined $value;
$value = "newline" if $value eq "\n";
$attr = $TokenName{ $attr // "" } // "";
print "$TokenName{$type}: \t'$value' \t $attr\n";
print token_as_string( @{ $tokens->[$index] } );
$index++;
}
}
Expand All @@ -844,7 +825,6 @@ sub main {
}
qw( abc def \n &.= € );
2*3+5*6 or 0;
1E10 + -1E-10;
{ , , a => 3 + 1, , c => 4 , , };
,,,;
(
Expand Down Expand Up @@ -888,4 +868,5 @@ sub main {
q< abd >;
{ q => 123 };
qq< abd [$v] >;
(-123, -123.56,
1E10 + -1E-10 );

0 comments on commit 56c38b5

Please sign in to comment.