Skip to content

Commit

Permalink
feat: add warning for potentially slow loops
Browse files Browse the repository at this point in the history
When a for loop iterates over a range that goes from 0 (or some other constant) to `filesize` (or some other expression that uses `filesize`) it may be very slow for large files.

A loop that iterates over every byte in the file is not a very good idea.
  • Loading branch information
plusvic committed Sep 2, 2024
1 parent 40e9d17 commit 38ddfb1
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 7 deletions.
21 changes: 20 additions & 1 deletion lib/src/compiler/ir/ast2ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1016,7 +1016,26 @@ fn for_in_expr_from_ast(
let iterable = iterable_from_ast(ctx, &for_in.iterable)?;

let expected_vars = match &iterable {
Iterable::Range(_) => vec![TypeValue::Integer(Value::Unknown)],
Iterable::Range(range) => {
// Raise warning when the for loop iterates over a range where
// the upper bound is based in `filesize`. These loops are
// potentially slow.
if range.lower_bound.type_value().is_const()
&& range
.upper_bound
.dfs_find(|node| matches!(node, Expr::Filesize))
.is_some()
{
ctx.warnings.add(|| {
warnings::PotentiallySlowLoop::build(
ctx.report_builder,
for_in.iterable.span().into(),
)
})
}

vec![TypeValue::Integer(Value::Unknown)]
}
Iterable::ExprTuple(expressions) => {
// All expressions in the tuple have the same type, we can use
// the type of the first item in the tuple as the type of the
Expand Down
8 changes: 4 additions & 4 deletions lib/src/compiler/ir/dfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ mod test {
]),
]);

let mut dfs = expr.depth_first_search();
let mut dfs = expr.dfs_iter();

assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Add { .. }))));
assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Const(_)))));
Expand All @@ -248,14 +248,14 @@ mod test {
assert!(matches!(dfs.next(), Some(Event::Leave(&Expr::Add { .. }))));
assert!(dfs.next().is_none());

let mut dfs = expr.depth_first_search();
let mut dfs = expr.dfs_iter();

assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Add { .. }))));
dfs.prune();
assert!(matches!(dfs.next(), Some(Event::Leave(&Expr::Add { .. }))));
assert!(dfs.next().is_none());

let mut dfs = expr.depth_first_search();
let mut dfs = expr.dfs_iter();

assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Add { .. }))));
assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Const(_)))));
Expand All @@ -266,7 +266,7 @@ mod test {
assert!(matches!(dfs.next(), Some(Event::Leave(&Expr::Add { .. }))));
assert!(dfs.next().is_none());

let mut dfs = expr.depth_first_search();
let mut dfs = expr.dfs_iter();

assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Add { .. }))));
assert!(matches!(dfs.next(), Some(Event::Enter(&Expr::Const(_)))));
Expand Down
24 changes: 22 additions & 2 deletions lib/src/compiler/ir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -821,10 +821,30 @@ impl Expr {
/// Returns an iterator that does a DFS traversal of the IR tree.
///
/// See [`DepthFirstSearch`] for details.
pub fn depth_first_search(&self) -> DepthFirstSearch {
pub fn dfs_iter(&self) -> DepthFirstSearch {
DepthFirstSearch::new(self)
}

/// Finds the first expression in DFS order that matches the given
/// predicate.
///
/// This function performs a DFS traversal starting at the current
/// expression and returns the first expression that matches the
/// predicate.
pub fn dfs_find<P>(&self, predicate: P) -> Option<&Expr>
where
P: Fn(&Expr) -> bool,
{
for evt in self.dfs_iter() {
if let Event::Enter(expr) = evt {
if predicate(expr) {
return Some(expr);
}
}
}
None
}

/// Returns the type of this expression.
pub fn ty(&self) -> Type {
match self {
Expand Down Expand Up @@ -1120,7 +1140,7 @@ impl Debug for Expr {
if index.is_some() { " INDEX" } else { "" }
};

for event in self.depth_first_search() {
for event in self.dfs_iter() {
match event {
Event::Leave(_) => level -= 1,
Event::Enter(expr) => {
Expand Down
6 changes: 6 additions & 0 deletions lib/src/compiler/tests/testdata/warnings/31.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
rule test_1 { condition: for any i in (0..filesize) : ( int32(i) == 0 ) }

rule test_2 { condition: for any i in (0..filesize-1) : ( int32(i) == 0 ) }

// No warning
rule test_3 { condition: for any i in (filesize-10..filesize) : ( int32(i) == 0 ) }
12 changes: 12 additions & 0 deletions lib/src/compiler/tests/testdata/warnings/31.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
warning[potentially_slow_loop]: potentially slow loop
--> line:1:39
|
1 | rule test_1 { condition: for any i in (0..filesize) : ( int32(i) == 0 ) }
| ------------- this range can be very large
|
warning[potentially_slow_loop]: potentially slow loop
--> line:3:39
|
3 | rule test_2 { condition: for any i in (0..filesize-1) : ( int32(i) == 0 ) }
| --------------- this range can be very large
|
33 changes: 33 additions & 0 deletions lib/src/compiler/warnings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use crate::compiler::report::{Level, Report, ReportBuilder, CodeLoc, Label};
#[serde(tag = "type")]
pub enum Warning {
ConsecutiveJumps(Box<ConsecutiveJumps>),
PotentiallySlowLoop(Box<PotentiallySlowLoop>),
PotentiallyUnsatisfiableExpression(Box<PotentiallyUnsatisfiableExpression>),
InvariantBooleanExpression(Box<InvariantBooleanExpression>),
NonBooleanAsBoolean(Box<NonBooleanAsBoolean>),
Expand Down Expand Up @@ -70,6 +71,38 @@ impl ConsecutiveJumps {
}
}

/// A rule contains a loop that could be very slow.
///
/// This warning indicates that a rule contains a `for` loop that may be very
/// slow because it iterates over a range with an upper bound that depends on
/// `filesize`. For very large files this may mean hundreds of millions of
/// iterations.
///
/// # Example
///
/// ```text
/// warning[potentially_slow_loop]: potentially slow loop
/// --> test.yar:1:34
/// |
/// 1 | rule t { condition: for any i in (0..filesize-1) : ( int32(i) == 0xcafebabe ) }
/// | --------------- this range can be very large
/// |
/// ```
#[derive(ErrorStruct, Debug, PartialEq, Eq)]
#[associated_enum(Warning)]
#[warning(
code = "potentially_slow_loop",
title = "potentially slow loop",
)]
#[label(
"this range can be very large",
loc
)]
pub struct PotentiallySlowLoop {
report: Report,
loc: CodeLoc,
}

/// A boolean expression may be impossible to match.
///
/// For instance, the condition `2 of ($a, $b) at 0` is impossible
Expand Down

0 comments on commit 38ddfb1

Please sign in to comment.