Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(fuzz): add validator for inserted rows #3932

Merged
merged 3 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests-fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ unstable = ["nix"]
[dependencies]
arbitrary = { version = "1.3.0", features = ["derive"] }
async-trait = { workspace = true }
chrono = { workspace = true }
common-error = { workspace = true }
common-macro = { workspace = true }
common-query = { workspace = true }
Expand Down
6 changes: 5 additions & 1 deletion tests-fuzz/src/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ pub mod select_expr;
use std::fmt;

use datatypes::data_type::ConcreteDataType;
use datatypes::value::Value;
use rand::Rng;

use crate::error::Error;
use crate::ir::create_expr::ColumnOption;
use crate::ir::{AlterTableExpr, CreateTableExpr};
use crate::ir::{AlterTableExpr, CreateTableExpr, Ident};

pub type CreateTableExprGenerator<R> =
Box<dyn Generator<CreateTableExpr, R, Error = Error> + Sync + Send>;
Expand All @@ -36,6 +37,9 @@ pub type ColumnOptionGenerator<R> = Box<dyn Fn(&mut R, &ConcreteDataType) -> Vec

pub type ConcreteDataTypeGenerator<R> = Box<dyn Random<ConcreteDataType, R>>;

pub type ValueGenerator<R> =
Box<dyn Fn(&mut R, &ConcreteDataType, Option<&dyn Random<Ident, R>>) -> Value>;

pub trait Generator<T, R: Rng> {
type Error: Sync + Send + fmt::Debug;

Expand Down
13 changes: 6 additions & 7 deletions tests-fuzz/src/generator/insert_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use rand::Rng;
use crate::context::TableContextRef;
use crate::error::{Error, Result};
use crate::fake::WordGenerator;
use crate::generator::{Generator, Random};
use crate::generator::{Generator, Random, ValueGenerator};
use crate::ir::insert_expr::{InsertIntoExpr, RowValue};
use crate::ir::{generate_random_value, Ident};

Expand All @@ -37,6 +37,8 @@ pub struct InsertExprGenerator<R: Rng + 'static> {
rows: usize,
#[builder(default = "Box::new(WordGenerator)")]
word_generator: Box<dyn Random<Ident, R>>,
#[builder(default = "Box::new(generate_random_value)")]
value_generator: ValueGenerator<R>,
#[builder(default)]
_phantom: PhantomData<R>,
}
Expand Down Expand Up @@ -81,7 +83,7 @@ impl<R: Rng + 'static> Generator<InsertIntoExpr, R> for InsertExprGenerator<R> {
continue;
}

row.push(RowValue::Value(generate_random_value(
row.push(RowValue::Value((self.value_generator)(
rng,
&column.column_type,
Some(self.word_generator.as_ref()),
Expand All @@ -93,11 +95,8 @@ impl<R: Rng + 'static> Generator<InsertIntoExpr, R> for InsertExprGenerator<R> {

Ok(InsertIntoExpr {
table_name: self.table_ctx.name.to_string(),
columns: if self.omit_column_list {
vec![]
} else {
values_columns
},
omit_column_list: self.omit_column_list,
columns: values_columns,
values_list,
})
}
Expand Down
76 changes: 76 additions & 0 deletions tests-fuzz/src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,21 @@ lazy_static! {
];
pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
vec![ConcreteDataType::string_datatype()];
pub static ref MYSQL_TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
// MySQL only permits fractional seconds with up to microseconds (6 digits) precision.
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_second_datatype(),
];
}

impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
impl_random!(ConcreteDataType, TsColumnTypeGenerator, TS_DATA_TYPES);
impl_random!(
ConcreteDataType,
MySQLTsColumnTypeGenerator,
MYSQL_TS_DATA_TYPES
);
impl_random!(
ConcreteDataType,
PartibleColumnTypeGenerator,
Expand All @@ -82,6 +93,7 @@ impl_random!(

pub struct ColumnTypeGenerator;
pub struct TsColumnTypeGenerator;
pub struct MySQLTsColumnTypeGenerator;
pub struct PartibleColumnTypeGenerator;
pub struct StringColumnTypeGenerator;

Expand Down Expand Up @@ -110,6 +122,31 @@ pub fn generate_random_value<R: Rng>(
}
}

/// Generates a random [Value] for MySQL.
pub fn generate_random_value_for_mysql<R: Rng>(
rng: &mut R,
datatype: &ConcreteDataType,
random_str: Option<&dyn Random<Ident, R>>,
) -> Value {
match datatype {
&ConcreteDataType::Boolean(_) => Value::from(rng.gen::<bool>()),
ConcreteDataType::Int16(_) => Value::from(rng.gen::<i16>()),
ConcreteDataType::Int32(_) => Value::from(rng.gen::<i32>()),
ConcreteDataType::Int64(_) => Value::from(rng.gen::<i64>()),
ConcreteDataType::Float32(_) => Value::from(rng.gen::<f32>()),
ConcreteDataType::Float64(_) => Value::from(rng.gen::<f64>()),
ConcreteDataType::String(_) => match random_str {
Some(random) => Value::from(random.gen(rng).value),
None => Value::from(rng.gen::<char>().to_string()),
},
ConcreteDataType::Date(_) => generate_random_date(rng),
ConcreteDataType::DateTime(_) => generate_random_datetime(rng),
&ConcreteDataType::Timestamp(ts_type) => generate_random_timestamp_for_mysql(rng, ts_type),

_ => unimplemented!("unsupported type: {datatype}"),
}
}

fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
let v = match ts_type {
TimestampType::Second(_) => {
Expand Down Expand Up @@ -140,6 +177,37 @@ fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Val
Value::from(v)
}

// MySQL supports timestamp from '1970-01-01 00:00:01.000000' to '2038-01-19 03:14:07.499999'
fn generate_random_timestamp_for_mysql<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
let v = match ts_type {
TimestampType::Second(_) => {
let min = 1;
let max = 2_147_483_647;
let value = rng.gen_range(min..=max);
Timestamp::new_second(value)
}
TimestampType::Millisecond(_) => {
let min = 1000;
let max = 2_147_483_647_499;
let value = rng.gen_range(min..=max);
Timestamp::new_millisecond(value)
}
TimestampType::Microsecond(_) => {
let min = 1_000_000;
let max = 2_147_483_647_499_999;
let value = rng.gen_range(min..=max);
Timestamp::new_microsecond(value)
}
TimestampType::Nanosecond(_) => {
let min = 1_000_000_000;
let max = 2_147_483_647_499_999_000;
let value = rng.gen_range(min..=max);
Timestamp::new_nanosecond(value)
}
};
Value::from(v)
}

fn generate_random_datetime<R: Rng>(rng: &mut R) -> Value {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
Expand Down Expand Up @@ -258,6 +326,14 @@ impl Column {
)
})
}

// Returns default value if it has.
pub fn default_value(&self) -> Option<&Value> {
self.options.iter().find_map(|opt| match opt {
ColumnOption::DefaultValue(value) => Some(value),
_ => None,
})
}
}

/// Returns droppable columns. i.e., non-primary key columns, non-ts columns.
Expand Down
48 changes: 47 additions & 1 deletion tests-fuzz/src/ir/insert_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,36 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt::Display;
use std::fmt::{Debug, Display};

use datatypes::value::Value;

use crate::ir::Column;

pub struct InsertIntoExpr {
pub table_name: String,
pub omit_column_list: bool,
pub columns: Vec<Column>,
pub values_list: Vec<RowValues>,
}

pub type RowValues = Vec<RowValue>;

#[derive(PartialEq, PartialOrd, Clone)]
pub enum RowValue {
Value(Value),
Default,
}

impl RowValue {
pub fn cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
match (self, other) {
(RowValue::Value(v1), RowValue::Value(v2)) => v1.partial_cmp(v2),
_ => panic!("Invalid comparison: {:?} and {:?}", self, other),
}
}
}

impl Display for RowValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expand All @@ -46,3 +57,38 @@ impl Display for RowValue {
}
}
}

impl Debug for RowValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RowValue::Value(v) => match v {
Value::Null => write!(f, "NULL"),
v @ (Value::String(_)
| Value::Timestamp(_)
| Value::DateTime(_)
| Value::Date(_)) => write!(f, "'{}'", v),
v => write!(f, "{}", v),
},
RowValue::Default => write!(f, "DEFAULT"),
}
}
}

#[cfg(test)]
mod tests {
use common_time::Timestamp;
use datatypes::value::Value;

use crate::ir::insert_expr::RowValue;

#[test]
fn test_value_cmp() {
let time_stampe1 =
Value::Timestamp(Timestamp::from_str_utc("-39988-01-31 01:21:12.848697+0000").unwrap());
let time_stampe2 =
Value::Timestamp(Timestamp::from_str_utc("+12970-09-22 08:40:58.392839+0000").unwrap());
let v1 = RowValue::Value(time_stampe1);
let v2 = RowValue::Value(time_stampe2);
assert_eq!(v1.cmp(&v2), Some(std::cmp::Ordering::Less));
}
}
2 changes: 1 addition & 1 deletion tests-fuzz/src/translator/mysql/insert_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl DslTranslator<InsertIntoExpr, String> for InsertIntoExprTranslator {

impl InsertIntoExprTranslator {
fn format_columns(input: &InsertIntoExpr) -> String {
if input.columns.is_empty() {
if input.omit_column_list {
"".to_string()
} else {
let list = input
Expand Down
1 change: 1 addition & 0 deletions tests-fuzz/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
// limitations under the License.

pub mod column;
pub mod row;
Loading
Loading