Skip to content

Commit

Permalink
Merge pull request #40 from epage/parser
Browse files Browse the repository at this point in the history
feat: Move non-grammar combinators to `Parser`
  • Loading branch information
epage authored Dec 16, 2022
2 parents 055823b + 02d187d commit 1b0b9aa
Show file tree
Hide file tree
Showing 28 changed files with 951 additions and 312 deletions.
8 changes: 2 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,10 @@ error prone plumbing.
[Hexadecimal color](https://developer.mozilla.org/en-US/docs/Web/CSS/color) parser:

```rust
extern crate nom;
use nom::prelude::*;
use nom::{
IResult,
bytes::complete::{tag, take_while_m_n},
combinator::map_res,
sequence::tuple
};

Expand All @@ -64,10 +63,7 @@ fn is_hex_digit(c: char) -> bool {
}

fn hex_primary(input: &str) -> IResult<&str, u8> {
map_res(
take_while_m_n(2, 2, is_hex_digit),
from_hex
)(input)
take_while_m_n(2, 2, is_hex_digit).map_res(from_hex).parse(input)
}

fn hex_color(input: &str) -> IResult<&str, Color> {
Expand Down
65 changes: 31 additions & 34 deletions examples/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use nom::{
branch::alt,
bytes::{escaped, tag, take_while},
character::{alphanumeric1 as alphanumeric, char, f64, one_of},
combinator::{cut, opt, value},
error::{context, convert_error, ContextError, ErrorKind, ParseError, VerboseError},
combinator::{cut, opt},
error::{convert_error, ContextError, ErrorKind, ParseError, VerboseError},
multi::separated_list0,
sequence::{delimited, preceded, separated_pair, terminated},
Err, IResult,
Expand Down Expand Up @@ -65,19 +65,19 @@ fn parse_str<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str
fn boolean<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, bool, E> {
// This is a parser that returns `true` if it sees the string "true", and
// an error otherwise
let parse_true = value(true, tag("true"));
let parse_true = tag("true").value(true);

// This is a parser that returns `false` if it sees the string "false", and
// an error otherwise
let parse_false = value(false, tag("false"));
let parse_false = tag("false").value(false);

// `alt` combines the two parsers. It returns the result of the first
// successful parser, or an error
alt((parse_true, parse_false))(input)
}

fn null<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, (), E> {
value((), tag("null"))(input)
tag("null").value(()).parse(input)
}

/// this parser combines the previous `parse_str` parser, that recognizes the
Expand All @@ -94,10 +94,9 @@ fn null<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, (), E> {
fn string<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, &'a str, E> {
context(
"string",
preceded(char('\"'), cut(terminated(parse_str, char('\"')))),
)(i)
preceded(char('\"'), cut(terminated(parse_str, char('\"'))))
.context("string")
.parse(i)
}

/// some combinators, like `separated_list0` or `many0`, will call a parser repeatedly,
Expand All @@ -107,16 +106,15 @@ fn string<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
fn array<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, Vec<JsonValue>, E> {
context(
"array",
preceded(
char('['),
cut(terminated(
separated_list0(preceded(sp, char(',')), json_value),
preceded(sp, char(']')),
)),
),
)(i)
preceded(
char('['),
cut(terminated(
separated_list0(preceded(sp, char(',')), json_value),
preceded(sp, char(']')),
)),
)
.context("array")
.parse(i)
}

fn key_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
Expand All @@ -132,21 +130,20 @@ fn key_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
fn hash<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, HashMap<String, JsonValue>, E> {
context(
"map",
preceded(
char('{'),
cut(terminated(
separated_list0(preceded(sp, char(',')), key_value).map(|tuple_vec| {
tuple_vec
.into_iter()
.map(|(k, v)| (String::from(k), v))
.collect()
}),
preceded(sp, char('}')),
)),
),
)(i)
preceded(
char('{'),
cut(terminated(
separated_list0(preceded(sp, char(',')), key_value).map(|tuple_vec| {
tuple_vec
.into_iter()
.map(|(k, v)| (String::from(k), v))
.collect()
}),
preceded(sp, char('}')),
)),
)
.context("map")
.parse(i)
}

/// here, we apply the space parser before trying to parse a value
Expand Down
47 changes: 22 additions & 25 deletions examples/json_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use nom::{
bytes::{escaped, tag, take_while},
character::{alphanumeric1 as alphanumeric, char, f64, one_of},
combinator::cut,
error::{context, ParseError},
error::ParseError,
multi::separated_list0,
sequence::{preceded, separated_pair, terminated},
IResult,
Expand Down Expand Up @@ -226,44 +226,41 @@ fn parse_str<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str
}

fn string<'a>(i: &'a str) -> IResult<&'a str, &'a str> {
context(
"string",
preceded(char('\"'), cut(terminated(parse_str, char('\"')))),
)(i)
preceded(char('\"'), cut(terminated(parse_str, char('\"'))))
.context("string")
.parse(i)
}

fn boolean<'a>(input: &'a str) -> IResult<&'a str, bool> {
alt((tag("false").map(|_| false), tag("true").map(|_| true)))(input)
}

fn array<'a>(i: &'a str) -> IResult<&'a str, ()> {
context(
"array",
preceded(
char('['),
cut(terminated(
separated_list0(preceded(sp, char(',')), value).map(|_| ()),
preceded(sp, char(']')),
)),
),
)(i)
preceded(
char('['),
cut(terminated(
separated_list0(preceded(sp, char(',')), value).map(|_| ()),
preceded(sp, char(']')),
)),
)
.context("array")
.parse(i)
}

fn key_value<'a>(i: &'a str) -> IResult<&'a str, (&'a str, ())> {
separated_pair(preceded(sp, string), cut(preceded(sp, char(':'))), value)(i)
}

fn hash<'a>(i: &'a str) -> IResult<&'a str, ()> {
context(
"map",
preceded(
char('{'),
cut(terminated(
separated_list0(preceded(sp, char(',')), key_value).map(|_| ()),
preceded(sp, char('}')),
)),
),
)(i)
preceded(
char('{'),
cut(terminated(
separated_list0(preceded(sp, char(',')), key_value).map(|_| ()),
preceded(sp, char('}')),
)),
)
.context("map")
.parse(i)
}

fn value<'a>(i: &'a str) -> IResult<&'a str, ()> {
Expand Down
56 changes: 27 additions & 29 deletions examples/s_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use nom::{
branch::alt,
bytes::tag,
character::{alpha1, char, digit1, multispace0, multispace1, one_of},
combinator::{cut, map_res, opt},
error::{context, VerboseError},
combinator::{cut, opt},
error::VerboseError,
multi::many0,
sequence::{delimited, preceded, terminated, tuple},
IResult, Parser,
Expand Down Expand Up @@ -110,7 +110,8 @@ fn parse_bool<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
/// Put plainly: `preceded(tag(":"), cut(alpha1))` means that once we see the `:`
/// character, we have to see one or more alphabetic chararcters or the input is invalid.
fn parse_keyword<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
context("keyword", preceded(tag(":"), cut(alpha1)))
preceded(tag(":"), cut(alpha1))
.context("keyword")
.map(|sym_str: &str| Atom::Keyword(sym_str.to_string()))
.parse(i)
}
Expand All @@ -119,9 +120,7 @@ fn parse_keyword<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>
/// of digits but ending the program if it doesn't fit into an i32.
fn parse_num<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
alt((
map_res(digit1, |digit_str: &str| {
digit_str.parse::<i32>().map(Atom::Num)
}),
digit1.map_res(|digit_str: &str| digit_str.parse::<i32>().map(Atom::Num)),
preceded(tag("-"), digit1)
.map(|digit_str: &str| Atom::Num(-1 * digit_str.parse::<i32>().unwrap())),
))(i)
Expand Down Expand Up @@ -157,7 +156,7 @@ where
delimited(
char('('),
preceded(multispace0, inner),
context("closing paren", cut(preceded(multispace0, char(')')))),
cut(preceded(multispace0, char(')'))).context("closing paren"),
)
}

Expand All @@ -184,27 +183,25 @@ fn parse_application<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a
/// In fact, we define our parser as if `Expr::If` was defined with an Option in it,
/// we have the `opt` combinator which fits very nicely here.
fn parse_if<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
let if_inner = context(
"if expression",
preceded(
// here to avoid ambiguity with other names starting with `if`, if we added
// variables to our language, we say that if must be terminated by at least
// one whitespace character
terminated(tag("if"), multispace1),
cut(tuple((parse_expr, parse_expr, opt(parse_expr)))),
)
.map(|(pred, true_branch, maybe_false_branch)| {
if let Some(false_branch) = maybe_false_branch {
Expr::IfElse(
Box::new(pred),
Box::new(true_branch),
Box::new(false_branch),
)
} else {
Expr::If(Box::new(pred), Box::new(true_branch))
}
}),
);
let if_inner = preceded(
// here to avoid ambiguity with other names starting with `if`, if we added
// variables to our language, we say that if must be terminated by at least
// one whitespace character
terminated(tag("if"), multispace1),
cut(tuple((parse_expr, parse_expr, opt(parse_expr)))),
)
.map(|(pred, true_branch, maybe_false_branch)| {
if let Some(false_branch) = maybe_false_branch {
Expr::IfElse(
Box::new(pred),
Box::new(true_branch),
Box::new(false_branch),
)
} else {
Expr::If(Box::new(pred), Box::new(true_branch))
}
})
.context("if expression");
s_exp(if_inner)(i)
}

Expand All @@ -217,7 +214,8 @@ fn parse_quote<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>
// this should look very straight-forward after all we've done:
// we find the `'` (quote) character, use cut to say that we're unambiguously
// looking for an s-expression of 0 or more expressions, and then parse them
context("quote", preceded(tag("'"), cut(s_exp(many0(parse_expr)))))
preceded(tag("'"), cut(s_exp(many0(parse_expr))))
.context("quote")
.map(|exprs| Expr::Quote(exprs))
.parse(i)
}
Expand Down
27 changes: 14 additions & 13 deletions examples/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
use nom::branch::alt;
use nom::bytes::{is_not, take_while_m_n};
use nom::character::{char, multispace1};
use nom::combinator::{map_opt, map_res, value, verify};
use nom::error::{FromExternalError, ParseError};
use nom::multi::fold_many0;
use nom::prelude::*;
Expand Down Expand Up @@ -49,13 +48,15 @@ where
// `map_res` takes the result of a parser and applies a function that returns
// a Result. In this case we take the hex bytes from parse_hex and attempt to
// convert them to a u32.
let parse_u32 = map_res(parse_delimited_hex, move |hex| u32::from_str_radix(hex, 16));
let parse_u32 = parse_delimited_hex.map_res(move |hex| u32::from_str_radix(hex, 16));

// map_opt is like map_res, but it takes an Option instead of a Result. If
// the function returns None, map_opt returns an error. In this case, because
// not all u32 values are valid unicode code points, we have to fallibly
// convert to char with from_u32.
map_opt(parse_u32, |value| std::char::from_u32(value))(input)
parse_u32
.map_opt(|value| std::char::from_u32(value))
.parse(input)
}

/// Parse an escaped character: \n, \t, \r, \u{00AC}, etc.
Expand All @@ -73,14 +74,14 @@ where
// parser (the second argument) succeeds. In these cases, it looks for
// the marker characters (n, r, t, etc) and returns the matching
// character (\n, \r, \t, etc).
value('\n', char('n')),
value('\r', char('r')),
value('\t', char('t')),
value('\u{08}', char('b')),
value('\u{0C}', char('f')),
value('\\', char('\\')),
value('/', char('/')),
value('"', char('"')),
char('n').value('\n'),
char('r').value('\r'),
char('t').value('\t'),
char('b').value('\u{08}'),
char('f').value('\u{0C}'),
char('\\').value('\\'),
char('/').value('/'),
char('"').value('"'),
)),
)(input)
}
Expand All @@ -103,7 +104,7 @@ fn parse_literal<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str,
// the parser. The verification function accepts out output only if it
// returns true. In this case, we want to ensure that the output of is_not
// is non-empty.
verify(not_quote_slash, |s: &str| !s.is_empty())(input)
not_quote_slash.verify(|s: &str| !s.is_empty()).parse(input)
}

/// A string fragment contains a fragment of a string being parsed: either
Expand All @@ -127,7 +128,7 @@ where
// of that parser.
parse_literal.map(StringFragment::Literal),
parse_escaped_char.map(StringFragment::EscapedChar),
value(StringFragment::EscapedWS, parse_escaped_whitespace),
parse_escaped_whitespace.value(StringFragment::EscapedWS),
))(input)
}

Expand Down
Loading

0 comments on commit 1b0b9aa

Please sign in to comment.