diff --git a/lib/src/compiler/ir/ast2ir.rs b/lib/src/compiler/ir/ast2ir.rs index 41f83495..6a73a631 100644 --- a/lib/src/compiler/ir/ast2ir.rs +++ b/lib/src/compiler/ir/ast2ir.rs @@ -1016,7 +1016,26 @@ fn for_in_expr_from_ast( let iterable = iterable_from_ast(ctx, &for_in.iterable)?; let expected_vars = match &iterable { - Iterable::Range(_) => vec![TypeValue::Integer(Value::Unknown)], + Iterable::Range(range) => { + // Raise warning when the for loop iterates over a range where + // the upper bound is based in `filesize`. These loops are + // potentially slow. + if range.lower_bound.type_value().is_const() + && range + .upper_bound + .dfs_find(|node| matches!(node, Expr::Filesize)) + .is_some() + { + ctx.warnings.add(|| { + warnings::PotentiallySlowLoop::build( + ctx.report_builder, + for_in.iterable.span().into(), + ) + }) + } + + vec![TypeValue::Integer(Value::Unknown)] + } Iterable::ExprTuple(expressions) => { // All expressions in the tuple have the same type, we can use // the type of the first item in the tuple as the type of the diff --git a/lib/src/compiler/ir/dfs.rs b/lib/src/compiler/ir/dfs.rs index 2c2bcbb8..906532b9 100644 --- a/lib/src/compiler/ir/dfs.rs +++ b/lib/src/compiler/ir/dfs.rs @@ -234,7 +234,7 @@ mod test { ]), ]); - let mut dfs = expr.depth_first_search(); + let mut dfs = expr.dfs_iter(); assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Add { .. })))); assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Const(_))))); @@ -248,14 +248,14 @@ mod test { assert!(matches!(dfs.next(), Some(Event::Leave(&Expr::Add { .. })))); assert!(dfs.next().is_none()); - let mut dfs = expr.depth_first_search(); + let mut dfs = expr.dfs_iter(); assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Add { .. })))); dfs.prune(); assert!(matches!(dfs.next(), Some(Event::Leave(&Expr::Add { .. })))); assert!(dfs.next().is_none()); - let mut dfs = expr.depth_first_search(); + let mut dfs = expr.dfs_iter(); assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Add { .. })))); assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Const(_))))); @@ -266,7 +266,7 @@ mod test { assert!(matches!(dfs.next(), Some(Event::Leave(&Expr::Add { .. })))); assert!(dfs.next().is_none()); - let mut dfs = expr.depth_first_search(); + let mut dfs = expr.dfs_iter(); assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Add { .. })))); assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Const(_))))); diff --git a/lib/src/compiler/ir/mod.rs b/lib/src/compiler/ir/mod.rs index 7708c324..c454da5d 100644 --- a/lib/src/compiler/ir/mod.rs +++ b/lib/src/compiler/ir/mod.rs @@ -821,10 +821,30 @@ impl Expr { /// Returns an iterator that does a DFS traversal of the IR tree. /// /// See [`DepthFirstSearch`] for details. - pub fn depth_first_search(&self) -> DepthFirstSearch { + pub fn dfs_iter(&self) -> DepthFirstSearch { DepthFirstSearch::new(self) } + /// Finds the first expression in DFS order that matches the given + /// predicate. + /// + /// This function performs a DFS traversal starting at the current + /// expression and returns the first expression that matches the + /// predicate. + pub fn dfs_find

(&self, predicate: P) -> Option<&Expr> + where + P: Fn(&Expr) -> bool, + { + for evt in self.dfs_iter() { + if let Event::Enter(expr) = evt { + if predicate(expr) { + return Some(expr); + } + } + } + None + } + /// Returns the type of this expression. pub fn ty(&self) -> Type { match self { @@ -1120,7 +1140,7 @@ impl Debug for Expr { if index.is_some() { " INDEX" } else { "" } }; - for event in self.depth_first_search() { + for event in self.dfs_iter() { match event { Event::Leave(_) => level -= 1, Event::Enter(expr) => { diff --git a/lib/src/compiler/tests/testdata/warnings/31.in b/lib/src/compiler/tests/testdata/warnings/31.in new file mode 100644 index 00000000..eb876baa --- /dev/null +++ b/lib/src/compiler/tests/testdata/warnings/31.in @@ -0,0 +1,6 @@ +rule test_1 { condition: for any i in (0..filesize) : ( int32(i) == 0 ) } + +rule test_2 { condition: for any i in (0..filesize-1) : ( int32(i) == 0 ) } + +// No warning +rule test_3 { condition: for any i in (filesize-10..filesize) : ( int32(i) == 0 ) } \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/warnings/31.out b/lib/src/compiler/tests/testdata/warnings/31.out new file mode 100644 index 00000000..75cf074c --- /dev/null +++ b/lib/src/compiler/tests/testdata/warnings/31.out @@ -0,0 +1,12 @@ +warning[potentially_slow_loop]: potentially slow loop + --> line:1:39 + | +1 | rule test_1 { condition: for any i in (0..filesize) : ( int32(i) == 0 ) } + | ------------- this range can be very large + | +warning[potentially_slow_loop]: potentially slow loop + --> line:3:39 + | +3 | rule test_2 { condition: for any i in (0..filesize-1) : ( int32(i) == 0 ) } + | --------------- this range can be very large + | diff --git a/lib/src/compiler/warnings.rs b/lib/src/compiler/warnings.rs index 835297e0..6c673bf2 100644 --- a/lib/src/compiler/warnings.rs +++ b/lib/src/compiler/warnings.rs @@ -18,6 +18,7 @@ use crate::compiler::report::{Level, Report, ReportBuilder, CodeLoc, Label}; #[serde(tag = "type")] pub enum Warning { ConsecutiveJumps(Box), + PotentiallySlowLoop(Box), PotentiallyUnsatisfiableExpression(Box), InvariantBooleanExpression(Box), NonBooleanAsBoolean(Box), @@ -70,6 +71,38 @@ impl ConsecutiveJumps { } } +/// A rule contains a loop that could be very slow. +/// +/// This warning indicates that a rule contains a `for` loop that may be very +/// slow because it iterates over a range with an upper bound that depends on +/// `filesize`. For very large files this may mean hundreds of millions of +/// iterations. +/// +/// # Example +/// +/// ```text +/// warning[potentially_slow_loop]: potentially slow loop +/// --> test.yar:1:34 +/// | +/// 1 | rule t { condition: for any i in (0..filesize-1) : ( int32(i) == 0xcafebabe ) } +/// | --------------- this range can be very large +/// | +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "potentially_slow_loop", + title = "potentially slow loop", +)] +#[label( +"this range can be very large", + loc +)] +pub struct PotentiallySlowLoop { + report: Report, + loc: CodeLoc, +} + /// A boolean expression may be impossible to match. /// /// For instance, the condition `2 of ($a, $b) at 0` is impossible