Skip to content

Commit

Permalink
Generic MathML Errors using Parser Lookahead (some endpoints updated) (
Browse files Browse the repository at this point in the history
…#386)

### Changes

- Generic MathML Parser Error update:

- Added tag level errors to `generic_mathml.rs` parser: `<mi>`, `<mn>`,
`<msup>`, `<msub>`, `<msqrt>`, `<mfrac>`, `<mrow>`, `<munder>`,
`<mover>`, `<msubsup>`, `<mtext>`, `<mstyle>`, `<mspace>`, `<mo>`.
  - `/mathml/ast-graph` endpoint now shows these errors.
  - 

- First Order ODE Parser Error update:
- Updated `ParseError` messages using the `context` combinator, removing
the previous macro usage.
- The generic MathML errors were excluded as this parser uses
`interpreted_mathml.rs`, which doesn't encounter those errors at the
math_expression level.

- `/pmml/equations-to-amr` and `/latex/equations-to-amr` are passing on
these errors. from `skema-rs`

### Notes

- Lookahead Algorithm:

- Solved the problem of adding tag level parse errors by implementing a
lookahead in the parser.
- In `math_expression`, instead of using `alt` for multiple branches of
parsers, the following steps were adopted:
    1. Grab the content of the next tag.
2. If it is an open tag, call the appropriate parser. If the parser
fails, we can immediately stop execution with
[`cut`](https://tikv.github.io/doc/nom/combinator/fn.cut.html) because
of the lookahead knowledge.
3. If the tag was a close tag, return an `Error` instead of a `Failure`.
`Failure` cuts the execution, but returning an `Error` allows the parent
combinator to continue using parsers on the remaining input.
- This approach enables `many0` and other combinators to work as
expected. When we run out of things (like math expressions) for `many0`
to match (encountered a close tag), we return an `Error`, allowing the
parent combinator to continue. But, as long as we know there is an
expression to match (open tag), we can guarantee that if the internal
parser (for `<mi>`, `<mo>`, etc.) fails, it was due to bad input.

### Testing

- `cargo test` and `cargo clippy` passing.
  • Loading branch information
Adi-UA authored Aug 4, 2023
1 parent e8b2ac7 commit 1198e53
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 65 deletions.
19 changes: 10 additions & 9 deletions skema/rest/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@


from typing import List
from skema.rest.proxies import SKEMA_RS_ADDESS
from skema.rest import schema, utils, comments_proxy
from skema.program_analysis.comments import MultiFileCodeComments
from skema.img2mml import eqn2mml
from skema.skema_py import server as code2fn

import requests
from fastapi import APIRouter, File, UploadFile
from starlette.responses import JSONResponse
import requests

from skema.img2mml import eqn2mml
from skema.program_analysis.comments import MultiFileCodeComments
from skema.rest import comments_proxy, schema, utils
from skema.rest.proxies import SKEMA_RS_ADDESS
from skema.skema_py import server as code2fn

router = APIRouter()

Expand Down Expand Up @@ -89,7 +90,7 @@ async def equations_to_amr(data: schema.EquationLatexToAMR):
return JSONResponse(
status_code=400,
content={
"error": f"MORAE PUT /mathml/amr failed to process payload",
"error": f"MORAE PUT /mathml/amr failed to process payload with error {res.text}",
"payload": payload,
},
)
Expand All @@ -99,14 +100,13 @@ async def equations_to_amr(data: schema.EquationLatexToAMR):
# pmml -> amr
@router.post("/pmml/equations-to-amr", summary="Equations pMML → AMR")
async def equations_to_amr(data: schema.MmlToAMR):

payload = {"mathml": data.equations, "model": data.model}
res = requests.put(f"{SKEMA_RS_ADDESS}/mathml/amr", json=payload)
if res.status_code != 200:
return JSONResponse(
status_code=400,
content={
"error": f"MORAE PUT /mathml/amr failed to process payload",
"error": f"MORAE PUT /mathml/amr failed to process payload with error {res.text}",
"payload": payload,
},
)
Expand Down Expand Up @@ -147,6 +147,7 @@ async def code_snippets_to_rn_amr(system: code2fn.System):
return res.json()
"""


# zip archive -> fn -> petrinet amr
@router.post(
"/code/codebase-to-pn-amr", summary="Code repo (zip archive) → PetriNet AMR"
Expand Down
57 changes: 30 additions & 27 deletions skema/skema-rs/mathml/src/parsers/first_order_ode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@ use crate::{
Ci, MathExpression, Type,
},
parsers::{
generic_mathml::{
append_msg_to_parse_err, attribute, equals, etag, stag, ws, IResult, Span,
},
generic_mathml::{attribute, equals, etag, stag, ws, IResult, Span},
interpreted_mathml::{
ci_univariate_func, ci_unknown, first_order_derivative_leibniz_notation,
math_expression, newtonian_derivative, operator,
Expand All @@ -20,6 +18,7 @@ use crate::{

use derive_new::new;

use nom::error::context;
use nom::{
branch::alt,
bytes::complete::tag,
Expand Down Expand Up @@ -52,36 +51,40 @@ pub struct FirstOrderODE {

/// Parse a first order ODE with a single derivative term on the LHS.
pub fn first_order_ode(input: Span) -> IResult<FirstOrderODE> {
let (s, _) = stag!("math")(input)
.map_err(|err| append_msg_to_parse_err!(err, "MISSING STARTING <math> TAG."))?;
let (s, _) = context("MISSING STARTING <math> TAG.", stag!("math"))(input)?;

// Recognize LHS derivative
let (s, (_, ci)) = alt((
first_order_derivative_leibniz_notation,
newtonian_derivative,
))(s)
.map_err(|err| append_msg_to_parse_err!(err, "INVALID DERIVATIVE ON LHS."))?;
let (s, (_, ci)) = context(
"INVALID LHS DERIVATIVE.",
alt((
first_order_derivative_leibniz_notation,
newtonian_derivative,
)),
)(s)?;

// Recognize equals sign
let (s, _) = delimited(stag!("mo"), equals, etag!("mo"))(s)
.map_err(|err| append_msg_to_parse_err!(err, "MISSING EQUALS SIGN."))?;
let (s, _) = context(
"MISSING EQUALS SIGN.",
delimited(stag!("mo"), equals, etag!("mo")),
)(s)?;

// Recognize other tokens
let (s, remaining_tokens) = many1(alt((
map(ci_univariate_func, MathExpression::Ci),
map(ci_unknown, |Ci { content, .. }| {
MathExpression::Ci(Ci {
r#type: Some(Type::Function),
content,
})
}),
map(operator, MathExpression::Mo),
math_expression,
)))(s)
.map_err(|err| append_msg_to_parse_err!(err, "COULD NOT PARSE RHS."))?;

let (s, _) = etag!("math")(s)
.map_err(|err| append_msg_to_parse_err!(err, "MISSING ENDING </math> tag."))?;
let (s, remaining_tokens) = context(
"INVALID RHS.",
many1(alt((
map(ci_univariate_func, MathExpression::Ci),
map(ci_unknown, |Ci { content, .. }| {
MathExpression::Ci(Ci {
r#type: Some(Type::Function),
content,
})
}),
map(operator, MathExpression::Mo),
math_expression,
))),
)(s)?;

let (s, _) = context("INVALID ENDING MATH TAG", etag!("math"))(s)?;

let ode = FirstOrderODE {
lhs_var: ci,
Expand Down
77 changes: 48 additions & 29 deletions skema/skema-rs/mathml/src/parsers/generic_mathml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ use nom::{
branch::alt,
bytes::complete::{tag, take_until},
character::complete::{alphanumeric1, multispace0, not_line_ending},
combinator::{map, map_parser, opt, recognize, value},
combinator::{cut, map, map_parser, opt, peek, recognize, value},
multi::many0,
sequence::{delimited, pair, preceded, separated_pair, tuple},
};
use nom::{character::complete::char as nom_char, error::context};

use nom_locate::LocatedSpan;
use std::str::FromStr;

Expand Down Expand Up @@ -94,16 +96,6 @@ pub fn attribute(input: Span) -> IResult<(&str, &str)> {
Ok((s, (&key, &value)))
}

#[macro_export]
macro_rules! append_msg_to_parse_err {
($mapped_err:expr, $msg: expr) => {{
$mapped_err.map(|mut my_err| {
my_err.append_message($msg);
return my_err;
})
}};
}

#[macro_export]
macro_rules! stag {
($tag:expr) => {{
Expand Down Expand Up @@ -327,23 +319,51 @@ fn mo_line(input: Span) -> IResult<MathExpression> {

/// Math expressions
pub fn math_expression(input: Span) -> IResult<MathExpression> {
ws(alt((
map(mi, MathExpression::Mi),
mn,
msup,
msub,
msqrt,
mfrac,
map(mrow, MathExpression::Mrow),
munder,
mover,
msubsup,
mtext,
mstyle,
mspace,
mo_line,
mo,
)))(input)
// Lookahead for next open tag
let tag_name = peek(delimited(
multispace0,
delimited(
nom_char('<'),
take_until(">"),
alt((tag(">"), tag("/>"))), // Matches both self-closing and regular tags
),
multispace0,
))(input)
.map(|(_, tag_name)| {
let tag_name_string = tag_name.to_string();
let mut split_tag_name = tag_name_string.split_whitespace(); // We only want the tag name and no attributes
split_tag_name.next().unwrap().to_string()
})?;

if tag_name.contains('/') {
// Found a closing tag! This means no more math expressions, but is not wrong.
// We want the parent combinator to still continue to try and parse the remaining input
mn(input)
} else {
match tag_name.as_str() {
"mi" => context("FAILED TO PARSE <mi>", cut(ws(map(mi, MathExpression::Mi))))(input),
"mn" => context("FAILED TO PARSE <mn>", cut(ws(mn)))(input),
"msup" => context("FAILED TO PARSE <msup>", cut(ws(msup)))(input),
"msub" => context("FAILED TO PARSE <msub>", cut(ws(msub)))(input),
"msqrt" => context("FAILED TO PARSE <msqrt>", cut(ws(msqrt)))(input),
"mfrac" => context("FAILED TO PARSE <mfrac>", cut(ws(mfrac)))(input),
"mrow" => context(
"FAILED TO PARSE <mrow>",
cut(map(mrow, MathExpression::Mrow)),
)(input),
"munder" => context("FAILED TO PARSE <munder>", cut(ws(munder)))(input),
"mover" => context("FAILED TO PARSE <mover>", cut(ws(mover)))(input),
"msubsup" => context("FAILED TO PARSE <msubsup>", cut(ws(msubsup)))(input),
"mtext" => context("FAILED TO PARSE <mtext>", cut(ws(mtext)))(input),
"mstyle" => context("FAILED TO PARSE <mstyle>", cut(ws(mstyle)))(input),
"mspace" => context("FAILED TO PARSE <mspace>", cut(ws(mspace)))(input),
"mo" => context("FAILED TO PARSE <mo>", cut(ws(alt((mo, mo_line)))))(input),
_ => {
println!("Something went wrong. We grabbed a {} tag", tag_name);
context("SOMETHING WENT WRONG. WE SHOULDN'T BE HERE.", cut(mn))(input)
}
}
}
}

/// testing MathML documents
Expand Down Expand Up @@ -571,7 +591,6 @@ fn test_mathml_parser() {
}

// Exporting macros
pub(crate) use append_msg_to_parse_err;
pub(crate) use elem2;
pub(crate) use elem_many0;
pub(crate) use etag;
Expand Down

0 comments on commit 1198e53

Please sign in to comment.