Skip to content

Commit

Permalink
Full pattern matching (#1820)
Browse files Browse the repository at this point in the history
* Extend match expressions to full patterns

This commit brings together many previous chunks of work to finally make
match expressions accept generic patterns, and not just enum tags.

Pattern compilation was already implemented. Besides changing the
parsing rule for "match" and change the implementation to compile the
pattern and applies the result to the argument, the bulk of the work has
been to correctly typecheck arbitrary structural patterns.

Refer to the comments for the "Match" case in the typechecker for more
details about the algorithm.

* Fix failing test, reword code documentation

* Fix undue unbound identifier when typechecking match

* Add tests for full pattern matching

* Fix null error in pattern compilation

The compilation of record pattern was missing a null check which would
trigger a dynamic type error when some matches aren't exhaustive. This
commit fixes the issue by adding the missing null check.

* Fix clippy warnings

* Limited rewording of some code comments

* Post-rebase fixup of snapshot tests

* Apply suggestions from code review

Co-authored-by: Viktor Kleen <[email protected]>

---------

Co-authored-by: Viktor Kleen <[email protected]>
  • Loading branch information
yannham and vkleen authored Feb 19, 2024
1 parent 0b2f68e commit 6f38f16
Show file tree
Hide file tree
Showing 28 changed files with 815 additions and 337 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,27 @@
source: cli/tests/snapshot/main.rs
expression: err
---
error: values of type `[| 'z; _erows_a |]` are not guaranteed to be compatible with polymorphic enum tail `[| ; r |]`
┌─ [INPUTS_PATH]/errors/enum_forall_parametricity_violation.ncl:4:26
error: function types mismatch
┌─ <unknown> (generated by evaluation):1:16
1 │ [| 'x; r |] -> [| 'y; r |]
----------- this part of the expected type
┌─ <unknown> (generated by evaluation):1:16
1 │ [| 'x; r |] -> [| 'y, 'z; _erows_a |]
│ ---------------------- does not match this part of the inferred type
┌─ [INPUTS_PATH]/errors/enum_forall_parametricity_violation.ncl:4:3
4 │ match { 'x => 'y, _ => 'z }
^^ this expression
^^^^^^^^^^^^^^^^^^^^^^^^^^^ this expression
= Type variables introduced in a `forall` range over all possible types.
= Expected an expression of type `[| 'x; r |] -> [| 'y; r |]`
= Found an expression of type `[| 'x; r |] -> [| 'y, 'z; _erows_a |]`
= Could not match the two function types

error: while matching function types: values of type `[| 'z; _erows_a |]` are not guaranteed to be compatible with polymorphic enum tail `[| ; r |]`
= Type variables introduced in a `forall` range over all possible types.
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,17 @@
source: cli/tests/snapshot/main.rs
expression: err
---
error: non-exhaustive pattern matching
error: unmatched pattern
┌─ [INPUTS_PATH]/errors/non_exhaustive_match.ncl:7:9
6let x = if true then 'a else 'b in
-- this value doesn't match any branch
7let f = match {
│ ╭─────────^
8 │ │ 'c => "hello",
9 │ │ 'd => "adios",
10 │ │ }
│ ╰─^ in this match expression
= This match expression isn't exhaustive, matching only the following pattern(s): `'c, 'd`
= But it has been applied to an argument which doesn't match any of those patterns
· │
13f x
- this value doesn't match any branch


61 changes: 16 additions & 45 deletions core/src/eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,10 @@ use crate::{
term::{
array::ArrayAttrs,
make as mk_term,
pattern::compile::Compile,
record::{Field, RecordData},
BinaryOp, BindingType, LetAttrs, RecordOpKind, RichTerm, RuntimeContract, StrChunk, Term,
UnaryOp,
BinaryOp, BindingType, LetAttrs, MatchData, RecordOpKind, RichTerm, RuntimeContract,
StrChunk, Term, UnaryOp,
},
};

Expand Down Expand Up @@ -884,46 +885,15 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
// found (let's call it `arg`), we evaluate `%match% arg cases default`, where
// `%match%` is the primitive operation `UnaryOp::Match` taking care of forcing the
// argument `arg` and doing the actual matching operation.
Term::Match { cases, default } if !has_cont_on_stack => {
Term::Match(data) if !has_cont_on_stack => {
if let Some((arg, pos_app)) = self.stack.pop_arg(&self.cache) {
// Setting the stack to be as if we would have evaluated an application
// `_ cases default`, where `default` is optional, and `_` is not relevant.

let has_default = default.is_some();

if let Some(default) = default {
self.stack.push_arg(
Closure {
body: default,
env: env.clone(),
},
pos,
);
Closure {
body: data.compile(arg.body.closurize(&mut self.cache, arg.env), pos),
env,
}

self.stack.push_arg(
Closure {
body: RichTerm::new(
Term::Record(RecordData::with_field_values(cases)),
pos,
),
env: env.clone(),
},
pos,
);

// Now evaluating `%match% arg`, the left-most part of the application
// `%match% arg cases default`, which is in fact a primop application.
self.stack.push_op_cont(
OperationCont::Op1(UnaryOp::Match { has_default }, pos_app),
self.call_stack.len(),
pos,
);

arg
} else {
return Ok(Closure {
body: RichTerm::new(Term::Match { cases, default }, pos),
body: RichTerm::new(Term::Match(data), pos),
env,
});
}
Expand Down Expand Up @@ -1149,20 +1119,21 @@ pub fn subst<C: Cache>(

RichTerm::new(Term::App(t1, t2), pos)
}
Term::Match {cases, default} => {
Term::Match(data) => {
let default =
default.map(|d| subst(cache, d, initial_env, env));
let cases = cases
data.default.map(|d| subst(cache, d, initial_env, env));

let branches = data.branches
.into_iter()
.map(|(id, t)| {
.map(|(pat, branch)| {
(
id,
subst(cache, t, initial_env, env),
pat,
subst(cache, branch, initial_env, env),
)
})
.collect();

RichTerm::new(Term::Match {cases, default}, pos)
RichTerm::new(Term::Match(MatchData { branches, default}), pos)
}
Term::Op1(op, t) => {
let t = subst(cache, t, initial_env, env);
Expand Down
7 changes: 6 additions & 1 deletion core/src/eval/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,8 +330,12 @@ fn substitution() {

let t = parse("match{'x => [1, glob1], 'y => loc2, 'z => {id = true, other = glob3}} loc1")
.unwrap();

// parse() removes the position information from terms, but it currently doesn't remove it from
// patterns. For the time being, instead of comparing the rich terms directly, we compare their
// pretty printing, which should be enough for this test.
assert_eq!(
subst(&eval_cache, t, &initial_env, &env),
subst(&eval_cache, t, &initial_env, &env).to_string(),
parse(
"match {\
'x => [1, 1], \
Expand All @@ -340,5 +344,6 @@ fn substitution() {
} true"
)
.unwrap()
.to_string()
);
}
45 changes: 28 additions & 17 deletions core/src/parser/grammar.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -291,26 +291,29 @@ Applicative: UniTerm = {
<op: BOpPre> <t1: AsTerm<Atom>> <t2: AsTerm<Atom>>
=> UniTerm::from(mk_term::op2(op, t1, t2)),
NOpPre<AsTerm<Atom>>,
"match" "{" <cases: (MatchCase ",")*> <last: MatchCase?> "}" => {
let mut acc = IndexMap::with_capacity(cases.len());
"match" "{" <branches: (MatchCase ",")*> <last: MatchCase?> "}" => {
let mut default = None;

for case in cases.into_iter().map(|x| x.0).chain(last.into_iter()) {
match case {
MatchCase::Normal(id, t) => acc.insert(id, t),
// If there are multiple default cases, the last one silently
// erases the others. We should have a dedicated error for that
MatchCase::Default(t) => default.replace(t),
};
}
let branches = branches
.into_iter()
.map(|(case, _comma)| case)
.chain(last)
.filter_map(|case| match case {
MatchCase::Normal(pat, branch) => Some((pat, branch)),
MatchCase::Default(default_branch) => {
default = Some(default_branch);
None
}
})
.collect();

UniTerm::from(
Term::Match {
cases: acc,
Term::Match(MatchData {
branches,
default,
}
})
)
},
}
};

// The parametrized array type.
Expand Down Expand Up @@ -817,10 +820,18 @@ UOp: UnaryOp = {
"enum_get_tag" => UnaryOp::EnumGetTag(),
}

// It might seem silly that a match case can always be the catch-all case
// `_ => <exp>`. It would be better to separate between a normal match case and
// a rule for the catch-call. However, it's then surprisingly annoying to
// express the rule for "match" such that it's both non-ambiguous and allow an
// optional trailing comma ",".
//
// In the end, it was simpler to just allow the catch-all case to appear
// anywhere, and then to raise an error in the action code of the "match" rule.
MatchCase: MatchCase = {
<id: EnumTag> "=>" <t: Term> => MatchCase::Normal(id, t),
"_" "=>" <t: Term> => MatchCase::Default(<>),
}
<pat: Pattern> "=>" <t: Term> => MatchCase::Normal(pat, t),
"_" "=>" <Term> => MatchCase::Default(<>),
};

// Infix operators by precedence levels. Lowest levels take precedence over
// highest ones.
Expand Down
22 changes: 1 addition & 21 deletions core/src/parser/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::term::Term::*;
use crate::term::{make as mk_term, Term};
use crate::term::{record, BinaryOp, RichTerm, StrChunk, UnaryOp};

use crate::{mk_app, mk_match};
use crate::mk_app;
use assert_matches::assert_matches;
use codespan::Files;

Expand Down Expand Up @@ -219,26 +219,6 @@ fn enum_terms() {
"'\"this works!\"",
Enum(LocIdent::from("this works!")).into(),
),
(
"match with raw tags",
"match { 'foo => true, 'bar => false, _ => 456, } 123",
mk_app!(
mk_match!(("foo", Bool(true)), ("bar", Bool(false)) ; mk_term::integer(456)),
mk_term::integer(123)
),
),
(
"match with string tags",
"match { '\"one:two\" => true, '\"three four\" => false, _ => 13 } 1",
mk_app!(
mk_match!(
("one:two", Bool(true)),
("three four", Bool(false))
; mk_term::integer(13)
),
mk_term::integer(1)
),
),
];

for (name, input, expected) in success_cases {
Expand Down
2 changes: 1 addition & 1 deletion core/src/parser/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ pub enum StringEndDelimiter {
/// Distinguish between a normal case `id => exp` and a default case `_ => exp`.
#[derive(Clone, Debug)]
pub enum MatchCase {
Normal(LocIdent, RichTerm),
Normal(Pattern, RichTerm),
Default(RichTerm),
}

Expand Down
15 changes: 8 additions & 7 deletions core/src/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -840,21 +840,22 @@ where
.append(")"),
Record(record_data) => allocator.record(record_data, &[]),
RecRecord(record_data, dyn_fields, _) => allocator.record(record_data, dyn_fields),
Match { cases, default } => docs![
Match(data) => docs![
allocator,
"match ",
docs![
allocator,
allocator.line(),
allocator.intersperse(
sorted_map(cases)
data.branches
.iter()
.map(|&(id, t)| (format!("'{}", ident_quoted(id)), t))
.chain(default.iter().map(|d| ("_".to_owned(), d)))
.map(|(pattern, t)| docs![
.map(|(pat, t)| (pat.pretty(allocator), t))
.chain(data.default.iter().map(|d| (allocator.text("_"), d)))
.map(|(lhs, t)| docs![
allocator,
pattern,
" =>",
lhs,
allocator.space(),
"=>",
allocator.line(),
t,
","
Expand Down
Loading

0 comments on commit 6f38f16

Please sign in to comment.