Skip to content

Commit

Permalink
update sqlsmith to generate more agg calls with distinct and order by
Browse files Browse the repository at this point in the history
Signed-off-by: Richard Chien <[email protected]>
  • Loading branch information
stdrc committed Jul 10, 2023
1 parent 0ce444f commit 000cbc1
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 48 deletions.
50 changes: 14 additions & 36 deletions src/tests/sqlsmith/src/sql_gen/agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
.map(|t| self.gen_expr(t, context))
.collect();

let distinct = self.flip_coin() && self.is_distinct_allowed && !exprs.is_empty();
// see `Binder::bind_normal_agg`
let distinct_allowed = func.func != AggKind::ApproxCountDistinct
&& !exprs.is_empty()
&& exprs.iter().skip(1).all(|e| matches!(e, Expr::Value(_)));
let distinct = distinct_allowed && self.flip_coin();

let filter = if self.flip_coin() {
let context = SqlGeneratorContext::new_with_can_agg(false);
// ENABLE: https://github.com/risingwavelabs/risingwave/issues/4762
Expand All @@ -52,11 +57,14 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
None
};

// Only can generate ORDER BY if distinct_allowed is banned globally in the generator.
// This avoids ORDER BY + Distinct aggregate from being generated.
// See https://github.com/risingwavelabs/risingwave/issues/9860.
let order_by = if self.flip_coin() && !distinct && !self.is_distinct_allowed {
self.gen_order_by()
let order_by = if self.flip_coin() {
if distinct {
// can only generate order by clause with exprs in argument list, see
// `Binder::bind_normal_agg`
self.gen_order_by_within(&exprs)
} else {
self.gen_order_by()
}
} else {
vec![]
};
Expand All @@ -75,21 +83,6 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
) -> Option<Expr> {
use AggKind as A;
match func {
A::StringAgg => {
// distinct and non_distinct_string_agg are incompatible according to
// https://github.com/risingwavelabs/risingwave/blob/a703dc7d725aa995fecbaedc4e9569bc9f6ca5ba/src/frontend/src/optimizer/plan_node/logical_agg.rs#L394
if self.is_distinct_allowed && !distinct {
None
} else {
Some(Expr::Function(make_agg_func(
"string_agg",
exprs,
distinct,
filter,
order_by,
)))
}
}
kind @ (A::FirstValue | A::LastValue) => {
if order_by.is_empty() {
// `first/last_value` only works when ORDER BY is provided
Expand All @@ -104,21 +97,6 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
)))
}
}
A::ApproxCountDistinct => {
if self.is_distinct_allowed {
None
} else {
// It does not make sense to have `distinct`.
// That requires precision, which `approx_count_distinct` does not provide.
Some(Expr::Function(make_agg_func(
"approx_count_distinct",
exprs,
false,
filter,
order_by,
)))
}
}
other => Some(Expr::Function(make_agg_func(
&other.to_string(),
exprs,
Expand Down
38 changes: 35 additions & 3 deletions src/tests/sqlsmith/src/sql_gen/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,16 +231,48 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}

pub(crate) fn gen_order_by(&mut self) -> Vec<OrderByExpr> {
if self.bound_columns.is_empty() || !self.is_distinct_allowed {
if self.bound_columns.is_empty() {
return vec![];
}
let mut order_by = vec![];
while self.flip_coin() {
let column = self.bound_columns.choose(&mut self.rng).unwrap();
order_by.push(OrderByExpr {
expr: Expr::Identifier(Ident::new_unchecked(&column.name)),
asc: Some(self.rng.gen_bool(0.5)),
nulls_first: None,
asc: if self.rng.gen_bool(0.3) {
None
} else {
Some(self.rng.gen_bool(0.5))
},
nulls_first: if self.rng.gen_bool(0.3) {
None
} else {
Some(self.rng.gen_bool(0.5))
},
})
}
order_by
}

pub(crate) fn gen_order_by_within(&mut self, exprs: &[Expr]) -> Vec<OrderByExpr> {
if exprs.is_empty() {
return vec![];
}
let mut order_by = vec![];
while self.flip_coin() {
let expr = exprs.choose(&mut self.rng).unwrap();
order_by.push(OrderByExpr {
expr: expr.clone(),
asc: if self.rng.gen_bool(0.3) {
None
} else {
Some(self.rng.gen_bool(0.5))
},
nulls_first: if self.rng.gen_bool(0.3) {
None
} else {
Some(self.rng.gen_bool(0.5))
},
})
}
order_by
Expand Down
9 changes: 0 additions & 9 deletions src/tests/sqlsmith/src/sql_gen/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,6 @@ pub(crate) struct SqlGenerator<'a, R: Rng> {
/// Relation ID used to generate table names and aliases
relation_id: u32,

/// is_distinct_allowed - Distinct and Orderby/Approx.. cannot be generated together among agg
/// having and
/// When this variable is true, it means distinct only
/// When this variable is false, it means orderby and approx only.
is_distinct_allowed: bool,

/// Relations bound in generated query.
/// We might not read from all tables.
bound_relations: Vec<Table>,
Expand All @@ -165,12 +159,10 @@ pub(crate) struct SqlGenerator<'a, R: Rng> {
/// Generators
impl<'a, R: Rng> SqlGenerator<'a, R> {
pub(crate) fn new(rng: &'a mut R, tables: Vec<Table>) -> Self {
let is_distinct_allowed = rng.gen_bool(0.5);
SqlGenerator {
tables,
rng,
relation_id: 0,
is_distinct_allowed,
bound_relations: vec![],
bound_columns: vec![],
is_mview: false,
Expand All @@ -184,7 +176,6 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
tables,
rng,
relation_id: 0,
is_distinct_allowed: false,
bound_relations: vec![],
bound_columns: vec![],
is_mview: true,
Expand Down

0 comments on commit 000cbc1

Please sign in to comment.