diff --git a/src/combinator/mod.rs b/src/combinator/mod.rs index df791ada..b8ee8737 100644 --- a/src/combinator/mod.rs +++ b/src/combinator/mod.rs @@ -164,6 +164,7 @@ mod core; mod debug; mod multi; mod parser; +mod precedence; mod sequence; #[cfg(test)] @@ -174,6 +175,7 @@ pub use self::core::*; pub use self::debug::*; pub use self::multi::*; pub use self::parser::*; +pub use self::precedence::*; pub use self::sequence::*; #[allow(unused_imports)] diff --git a/src/combinator/precedence.rs b/src/combinator/precedence.rs new file mode 100644 index 00000000..1562f285 --- /dev/null +++ b/src/combinator/precedence.rs @@ -0,0 +1,482 @@ +use core::cell::RefCell; + +use crate::{ + combinator::{alt, fail, opt, trace}, + error::{ErrMode, ParserError}, + stream::{Stream, StreamIsPartial}, + PResult, Parser, +}; + +/// An adapter for the [`Parser`] trait to enable its use in the [`precedence`] parser. +pub trait PrecedenceParserExt { + /// Specifies that the parser is a `unary` `prefix` operator within a [`precedence`] parser. + /// + /// In most languages, operators such negation: `-`, `not` or `!`, dereferencing: `*`, etc. are prefix unary operators. + /// + /// The argument `fold` is a fold function that defines how to combine the operator and operand into a new expression. + /// It must have the following signature: + /// ```ignore + /// impl Fn(O) -> O + /// ``` + #[inline(always)] + fn prefix(self, fold: F) -> Prefix> + where + F: UnaryOp, + Self: Sized, + { + Prefix(Operator::new(self, fold)) + } + /// Specifies that the parser is a `unary` `postfix` operator within a [`precedence`] parser. + /// + /// Operators like the factorial `!` are postfix unary operators. + /// + /// The argument `fold` is a fold function that defines how to combine the operator and operand into a new + /// expression. It must have the following signature: + /// ```ignore + /// impl Fn(O) -> O + /// ``` + #[inline(always)] + fn postfix(self, fold: F) -> Postfix> + where + F: UnaryOp, + Self: Sized, + { + Postfix(Operator::new(self, fold)) + } + /// Specifies that the parser is a `binary` `infix` operator within a [`precedence`] parser. + /// + /// Operators like factorial `+`, `-`, `*`, `/` are infix binary operators. + /// + /// The argument is a fold function that defines how to combine the operator and two operands into a new + /// expression. It must have the following signature: + /// ```ignore + /// impl Fn(O, O) -> O + /// ``` + #[inline(always)] + fn infix(self, fold: F) -> Infix> + where + F: BinaryOp, + Self: Sized, + { + Infix(Operator::new(self, fold)) + } +} + +impl> PrecedenceParserExt for T where I: Stream {} + +/// `NewType` that indicates this type is a prefix operator a [`precedence`] parser. +/// See: [`PrecedenceParserExt::prefix`] +/// +/// Can hold and arbitrary parser, such as a tuple of multiple operator parsers: `(Operator<...>, Operator<...>)` +pub struct Prefix(T); + +/// `NewType` that indicates this type is a postfix operator a [`precedence`] parser. +/// See: [`PrecedenceParserExt::postfix`] +pub struct Postfix(T); + +/// `NewType` that indicates this type is a infix operator within a [`precedence`] parser. +/// See: [`PrecedenceParserExt::infix`] +pub struct Infix(T); + +/// Implementation of the operator parser for the [`precedence`] parser. +pub struct Operator { + // We use two different `ReffCell`s to enable mutable borrowing within the recursion + // while holding a reference to the predicate `op`: + // ``` + // let lhs = ...; + // let op: &ReffCell> = operator.parse_next(i); // calls `operator.parser.borrow_mut().parse_next(i)` + // let rhs = recursion(&operator); + // let result = op.borrow_mut().fold_binary(lhs, rhs); + // ``` + op: RefCell, + parser: RefCell, +} + +impl Operator { + /// Creates a new [`Operator`] from a parser and a predicate + #[inline(always)] + pub fn new(parser: OperatorParser, predicate: OperatorFunc) -> Self { + Self { + op: RefCell::new(predicate), + parser: RefCell::new(parser), + } + } +} + +/// Type-erased unary predicate that folds an expression into a new expression. +/// Useful for supporting not only closures but also arbitrary types as operator predicates within the [`precedence`] parser. +pub trait UnaryOp { + /// Invokes the [`UnaryOp`] predicate. + fn fold_unary(&mut self, o: O) -> O; +} +/// Type-erased binary predicate that folds two expressions into a new expression similar to +/// [`UnaryOp`] within the [`precedence`] parser. +pub trait BinaryOp { + /// Invokes the [`BinaryOp`] predicate. + fn fold_binary(&mut self, lhs: O, rhs: O) -> O; +} + +impl UnaryOp for F +where + F: Fn(O) -> O, +{ + #[inline(always)] + fn fold_unary(&mut self, o: O) -> O { + (self)(o) + } +} +impl BinaryOp for F +where + F: Fn(O, O) -> O, +{ + #[inline(always)] + fn fold_binary(&mut self, lhs: O, rhs: O) -> O { + (self)(lhs, rhs) + } +} + +impl<'s, UO, O, I, P, E> Parser>, usize), E> for &'s Operator +where + UO: UnaryOp + 'static, + I: Stream + StreamIsPartial, + P: Parser, + E: ParserError, +{ + #[inline(always)] + fn parse_next( + &mut self, + input: &mut I, + ) -> PResult<(&'s RefCell + 'static>, usize), E> { + let power = self.parser.borrow_mut().parse_next(input)?; + Ok((&self.op, power)) + } +} +impl<'s, BO, O, I, P, E> Parser>, usize), E> for &'s Operator +where + BO: BinaryOp + 'static, + I: Stream + StreamIsPartial, + P: Parser, + E: ParserError, +{ + #[inline(always)] + fn parse_next( + &mut self, + input: &mut I, + ) -> PResult<(&'s RefCell + 'static>, usize), E> { + let power = self.parser.borrow_mut().parse_next(input)?; + Ok((&self.op, power)) + } +} + +/// Ability to request a parser of the specified affix from the [`impl Parser`](Parser) object. +pub trait AsPrecedence> { + /// Interprets a parser as a [`PrecedenceParserExt::prefix`] parser that returns an `unary + /// predicate` [`UnaryOp`] and a `binding power` as its parsing result. + #[inline(always)] + fn as_prefix(&self) -> impl Parser>, usize), E> { + fail + } + /// Interprets a parser as a [`PrecedenceParserExt::postfix`] parser that returns an `unary + /// predicate` [`UnaryOp`] and a `binding power` as its parsing result. + #[inline(always)] + fn as_postfix(&self) -> impl Parser>, usize), E> { + fail + } + /// Interprets a parser as a [`PrecedenceParserExt::infix`] parser that returns a `binary + /// predicate` [`BinaryOp`] and a `binding power` as its parsing result. + #[inline(always)] + fn as_infix(&self) -> impl Parser>, usize), E> { + fail + } +} + +impl<'s, F, O, I, P, E> Parser>, usize), E> + for &'s Prefix> +where + F: UnaryOp + 'static, + I: Stream, + P: Parser, + E: ParserError, +{ + #[inline(always)] + fn parse_next( + &mut self, + input: &mut I, + ) -> PResult<(&'s RefCell + 'static>, usize), E> { + let power = self.0.parser.borrow_mut().parse_next(input)?; + Ok((&self.0.op, power)) + } +} + +impl AsPrecedence for Prefix> +where + F: UnaryOp + 'static, + I: Stream + StreamIsPartial, + P: Parser, + E: ParserError, +{ + #[inline(always)] + fn as_prefix(&self) -> impl Parser>, usize), E> { + &self.0 + } +} + +impl AsPrecedence for Postfix> +where + F: UnaryOp + 'static, + I: Stream + StreamIsPartial, + P: Parser, + E: ParserError, +{ + #[inline(always)] + fn as_postfix(&self) -> impl Parser>, usize), E> { + &self.0 + } +} +impl AsPrecedence for Infix> +where + F: BinaryOp + 'static, + I: Stream + StreamIsPartial, + P: Parser, + E: ParserError, +{ + #[inline(always)] + fn as_infix(&self) -> impl Parser>, usize), E> { + &self.0 + } +} + +macro_rules! impl_parser_for_tuple { + () => {}; + ($head:ident $($X:ident)*) => { + impl_parser_for_tuple!($($X)*); + impl_parser_for_tuple!(~ $head $($X)*); + }; + (~ $($X:ident)+) => { + + #[allow(unused_variables, non_snake_case)] + impl AsPrecedence for ($($X,)*) + where + I: Stream + StreamIsPartial, + E: ParserError, + $($X: AsPrecedence),* + { + #[inline(always)] + fn as_prefix<'s>( + &'s self, + ) -> impl Parser>, usize), E> { + Prefix(self) + } + #[inline(always)] + fn as_infix<'s>( + &'s self, + ) -> impl Parser>, usize), E> { + Infix(self) + } + #[inline(always)] + fn as_postfix<'s>( + &'s self, + ) -> impl Parser>, usize), E> { + Postfix(self) + } + } + + #[allow(unused_variables, non_snake_case)] + impl<'s, I, O: 'static, E, $($X),*> Parser>, usize), E> + for Prefix<&'s ($($X,)*)> + where + I: Stream + StreamIsPartial, + E: ParserError, + $($X: AsPrecedence),* + + { + #[inline(always)] + fn parse_next(&mut self, input: &mut I) -> PResult<(&'s RefCell>, usize), E> { + let ($($X,)*) = self.0; + alt(($($X.as_prefix(),)*)).parse_next(input) + } + } + #[allow(unused_variables, non_snake_case)] + impl<'s, I, O: 'static, E, $($X),*> Parser>, usize), E> + for Postfix<&'s ($($X,)*)> + where + I: Stream + StreamIsPartial, + E: ParserError, + $($X: AsPrecedence),* + { + #[inline(always)] + fn parse_next(&mut self, input: &mut I) -> PResult<(&'s RefCell>, usize), E> { + let ($($X,)*) = self.0; + alt(($($X.as_postfix(),)*)).parse_next(input) + } + } + #[allow(unused_variables, non_snake_case)] + impl<'s, I, O: 'static, E, $($X),*> Parser>, usize), E> + for Infix<&'s ($($X,)*)> + where + I: Stream + StreamIsPartial, + E: ParserError, + $($X: AsPrecedence),* + { + #[inline(always)] + fn parse_next(&mut self, input: &mut I) -> PResult<(&'s RefCell>, usize), E> { + let ($($X,)*) = self.0; + alt(($($X.as_infix(),)*)).parse_next(input) + } + } + + }; +} + +impl_parser_for_tuple!(P1 P2 P3 P4 P5 P6 P7 P8 P9 P10 P11 P12 P13 P14 P15 P16 P17 P18 P19 P20 P21); + +/// Constructs an expression parser from an operand parser and operator parsers to parse an +/// arbitrary expression separated by `prefix`, `postfix`, and `infix` operators of various precedence. +/// +/// This technique is powerful and recommended for parsing expressions. +/// +/// The implementation uses [Pratt parsing](https://en.wikipedia.org/wiki/Operator-precedence_parser#Pratt_parsing). +/// This algorithm is similar to the [Shunting Yard](https://en.wikipedia.org/wiki/Shunting_yard_algorithm) algorithm +/// in that both are linear, both use precedence and binding power, and both serve the same purpose. +/// However, the `Shunting Yard` algorithm additionally uses `left` and `right` associativity, +/// while `Pratt` parsing only relies on binding power. +#[doc(alias = "pratt")] +#[doc(alias = "separated")] +#[doc(alias = "shunting yard")] +#[doc(alias = "precedence climbing")] +#[inline(always)] +pub fn precedence( + mut parse_operand: ParseOperand, + ops: Operators, +) -> impl Parser +where + Operators: AsPrecedence, + ParseOperand: Parser, + I: Stream + StreamIsPartial, + E: ParserError, +{ + trace("precedence", move |i: &mut I| { + let result = precedence_impl(i, &mut parse_operand, &ops, 0); + result + }) +} + +// recursive function +fn precedence_impl( + i: &mut I, + parse_operand: &mut ParseOperand, + ops: &Operators, + start_power: usize, +) -> PResult +where + I: Stream + StreamIsPartial, + Operators: AsPrecedence, + ParseOperand: Parser, + E: ParserError, +{ + let operand = trace("operand", opt(parse_operand.by_ref())).parse_next(i)?; + let mut operand = if let Some(operand) = operand { + operand + } else { + // Prefix unary operators + let len = i.eof_offset(); + let (fold_prefix, power) = trace("prefix", ops.as_prefix()).parse_next(i)?; + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`prefix` parsers must always consume")); + } + let operand = precedence_impl(i, parse_operand, ops, power)?; + fold_prefix.borrow_mut().fold_unary(operand) + }; + + 'parse: while i.eof_offset() > 0 { + // Postfix unary operators + let start = i.checkpoint(); + let len = i.eof_offset(); + if let Some((fold_postfix, power)) = + trace("postfix", opt(ops.as_postfix())).parse_next(i)? + { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`postfix` parsers must always consume")); + } + if power < start_power { + i.reset(&start); + break; + } + operand = fold_postfix.borrow_mut().fold_unary(operand); + + continue 'parse; + } + + // Infix binary operators + let start = i.checkpoint(); + let len = i.eof_offset(); + if let Some((fold_infix, power)) = trace("infix", opt(ops.as_infix())).parse_next(i)? { + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`infix` parsers must always consume")); + } + if power < start_power { + i.reset(&start); + break; + } + let rhs = precedence_impl(i, parse_operand, ops, power)?; + operand = fold_infix.borrow_mut().fold_binary(operand, rhs); + + continue 'parse; + } + + break 'parse; + } + + Ok(operand) +} + +#[cfg(test)] +mod tests { + use crate::ascii::{digit1, space0}; + use crate::combinator::delimited; + use crate::error::ContextError; + + use super::*; + + fn factorial(x: i32) -> i32 { + if x == 0 { + 1 + } else { + x * factorial(x - 1) + } + } + fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> { + move |i: &mut &str| { + precedence( + delimited(space0, digit1.try_map(|d: &str| d.parse::()), space0), + ( + "-".value(2).prefix(|a: i32| -a), + "+".value(2).prefix(|a| a), + "!".value(2).postfix(factorial), + "+".value(0).infix(|a, b| a + b), + "-".value(0).infix(|a, b| a + b), + "*".value(1).infix(|a, b| a * b), + "/".value(1).infix(|a, b| a / b), + ), + ) + .parse_next(i) + } + } + + #[test] + fn test_precedence() { + assert_eq!(parser().parse("-3!+-3 * 4"), Ok(-18)); + assert_eq!(parser().parse("+2 + 3 * 4"), Ok(14)); + assert_eq!(parser().parse("2 * 3+4"), Ok(10)); + } + #[test] + fn test_unary() { + assert_eq!(parser().parse("-2"), Ok(-2)); + assert_eq!(parser().parse("4!"), Ok(24)); + assert_eq!(parser().parse("2 + 4!"), Ok(26)); + assert_eq!(parser().parse("-2 + 2"), Ok(0)); + } +}