From 6eaf2f28b17a03a0a4143a84842ae0eebdf57e6e Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 30 Jan 2020 17:38:22 -0500 Subject: [PATCH] syntax: fix flag scoping issue This fixes a rather nasty bug where flags set inside a group were being applies to expressions outside the group. e.g., In the simplest case, `((?i)a)b)` would match `aB`, even though the case insensitive flag _shouldn't_ be applied to `b`. The issue here was that we were actually going out of our way to reset the flags when a group is popped only _some_ of the time. Namely, when flags were set via `(?i:a)b` syntax. Instead, flags should be reset to their previous state _every_ time a group is popped in the translator. The fix here is pretty simple. When we open a group, if the group itself does not have any flags, then we simply record the current state of the flags instead of trying to replace the current flags. Then, when we pop the group, we are guaranteed to obtain the old flags, at which point, we reset them. Fixes #640 --- regex-syntax/src/hir/translate.rs | 37 ++++++++++++++++++++++--------- tests/regression.rs | 11 +++++++++ 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index 3db8796140..2469890684 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -159,18 +159,19 @@ enum HirFrame { /// indicated by parentheses (including non-capturing groups). It is popped /// upon leaving a group. Group { - /// The old active flags, if any, when this group was opened. + /// The old active flags when this group was opened. /// /// If this group sets flags, then the new active flags are set to the /// result of merging the old flags with the flags introduced by this - /// group. + /// group. If the group doesn't set any flags, then this is simply + /// equivalent to whatever flags were set when the group was opened. /// /// When this group is popped, the active flags should be restored to /// the flags set here. /// /// The "active" flags correspond to whatever flags are set in the /// Translator. - old_flags: Option, + old_flags: Flags, }, /// This is pushed whenever a concatenation is observed. After visiting /// every sub-expression in the concatenation, the translator's stack is @@ -219,8 +220,8 @@ impl HirFrame { /// Assert that the current stack frame is a group indicator and return /// its corresponding flags (the flags that were active at the time the - /// group was entered) if they exist. - fn unwrap_group(self) -> Option { + /// group was entered). + fn unwrap_group(self) -> Flags { match self { HirFrame::Group { old_flags } => old_flags, _ => { @@ -252,8 +253,11 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { } } Ast::Group(ref x) => { - let old_flags = x.flags().map(|ast| self.set_flags(ast)); - self.push(HirFrame::Group { old_flags: old_flags }); + let old_flags = x + .flags() + .map(|ast| self.set_flags(ast)) + .unwrap_or_else(|| self.flags()); + self.push(HirFrame::Group { old_flags }); } Ast::Concat(ref x) if x.asts.is_empty() => {} Ast::Concat(_) => { @@ -350,9 +354,8 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { } Ast::Group(ref x) => { let expr = self.pop().unwrap().unwrap_expr(); - if let Some(flags) = self.pop().unwrap().unwrap_group() { - self.trans().flags.set(flags); - } + let old_flags = self.pop().unwrap().unwrap_group(); + self.trans().flags.set(old_flags); self.push(HirFrame::Expr(self.hir_group(x, expr))); } Ast::Concat(_) => { @@ -1641,6 +1644,20 @@ mod tests { hir_lit("β"), ]) ); + assert_eq!( + t("(?:(?i-u)a)b"), + hir_cat(vec![ + hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), + hir_lit("b"), + ]) + ); + assert_eq!( + t("((?i-u)a)b"), + hir_cat(vec![ + hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), + hir_lit("b"), + ]) + ); #[cfg(feature = "unicode-case")] assert_eq!( t("(?i)(?-i:a)a"), diff --git a/tests/regression.rs b/tests/regression.rs index 4ab8c73406..686fe35f1f 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -199,3 +199,14 @@ fn regression_nfa_stops1() { let re = ::regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap(); assert_eq!(0, re.find_iter(b"s\xE4").count()); } + +// See: https://github.com/rust-lang/regex/issues/640 +#[cfg(feature = "unicode-case")] +matiter!( + flags_are_unset, + r"((?i)foo)|Bar", + "foo Foo bar Bar", + (0, 3), + (4, 7), + (12, 15) +);