Skip to content

Commit

Permalink
feat: add respective json_is UDFs for JSON type (#4726)
Browse files Browse the repository at this point in the history
* feat: add respective json_is UDFs

* refactor: rename to_json to parse_json

* chore: happy clippy

* chore: some rename

* fix: small fixes
  • Loading branch information
CookiePieWw committed Sep 18, 2024
1 parent 50b3bb4 commit f73fb82
Show file tree
Hide file tree
Showing 12 changed files with 688 additions and 144 deletions.
18 changes: 15 additions & 3 deletions src/common/function/src/scalars/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@

use std::sync::Arc;
mod json_get;
mod json_is;
mod json_to_string;
mod to_json;
mod parse_json;

use json_get::{JsonGetBool, JsonGetFloat, JsonGetInt, JsonGetString};
use json_is::{
JsonIsArray, JsonIsBool, JsonIsFloat, JsonIsInt, JsonIsNull, JsonIsObject, JsonIsString,
};
use json_to_string::JsonToStringFunction;
use to_json::ToJsonFunction;
use parse_json::ParseJsonFunction;

use crate::function_registry::FunctionRegistry;

Expand All @@ -28,11 +32,19 @@ pub(crate) struct JsonFunction;
impl JsonFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(JsonToStringFunction));
registry.register(Arc::new(ToJsonFunction));
registry.register(Arc::new(ParseJsonFunction));

registry.register(Arc::new(JsonGetInt));
registry.register(Arc::new(JsonGetFloat));
registry.register(Arc::new(JsonGetString));
registry.register(Arc::new(JsonGetBool));

registry.register(Arc::new(JsonIsNull));
registry.register(Arc::new(JsonIsInt));
registry.register(Arc::new(JsonIsFloat));
registry.register(Arc::new(JsonIsString));
registry.register(Arc::new(JsonIsBool));
registry.register(Arc::new(JsonIsArray));
registry.register(Arc::new(JsonIsObject));
}
}
2 changes: 1 addition & 1 deletion src/common/function/src/scalars/json/json_get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
/// If the path does not exist or the value is not the type specified, return `NULL`.
macro_rules! json_get {
// e.g. name = JsonGetInt, type = Int64, rust_type = i64, doc = "Get the value from the JSONB by the given path and return it as an integer."
($name: ident, $type: ident, $rust_type: ident, $doc:expr) => {
($name:ident, $type:ident, $rust_type:ident, $doc:expr) => {
paste::paste! {
#[doc = $doc]
#[derive(Clone, Debug, Default)]
Expand Down
215 changes: 215 additions & 0 deletions src/common/function/src/scalars/json/json_is.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt::{self, Display};

use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
use snafu::ensure;

use crate::function::{Function, FunctionContext};

/// Checks if the input is a JSON object of the given type.
macro_rules! json_is {
($name:ident, $json_type:ident, $doc:expr) => {
paste::paste! {
#[derive(Clone, Debug, Default)]
pub struct $name;

impl Function for $name {
fn name(&self) -> &str {
stringify!([<$name:snake>])
}

fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}

fn signature(&self) -> Signature {
Signature::exact(vec![ConcreteDataType::json_datatype()], Volatility::Immutable)
}

fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 1,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly one, have: {}",
columns.len()
),
}
);

let jsons = &columns[0];
let size = jsons.len();
let datatype = jsons.data_type();
let mut results = BooleanVectorBuilder::with_capacity(size);

match datatype {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
for i in 0..size {
let json = jsons.get_ref(i);
let json = json.as_binary();
let result = match json {
Ok(Some(json)) => {
Some(jsonb::[<is_ $json_type>](json))
}
_ => None,
};
results.push(result);
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: stringify!([<$name:snake>]),
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}

Ok(results.to_vector())
}
}

impl Display for $name {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", stringify!([<$name:snake>]).to_ascii_uppercase())
}
}
}
}
}

json_is!(JsonIsNull, null, "Checks if the input JSONB is null");
json_is!(
JsonIsBool,
boolean,
"Checks if the input JSONB is a boolean type JSON value"
);
json_is!(
JsonIsInt,
i64,
"Checks if the input JSONB is a integer type JSON value"
);
json_is!(
JsonIsFloat,
number,
"Checks if the input JSONB is a JSON float"
);
json_is!(
JsonIsString,
string,
"Checks if the input JSONB is a JSON string"
);
json_is!(
JsonIsArray,
array,
"Checks if the input JSONB is a JSON array"
);
json_is!(
JsonIsObject,
object,
"Checks if the input JSONB is a JSON object"
);

#[cfg(test)]
mod tests {
use std::sync::Arc;

use datatypes::scalars::ScalarVector;
use datatypes::vectors::BinaryVector;

use super::*;

#[test]
fn test_json_is_functions() {
let json_is_functions: [&dyn Function; 6] = [
&JsonIsBool,
&JsonIsInt,
&JsonIsFloat,
&JsonIsString,
&JsonIsArray,
&JsonIsObject,
];
let expected_names = [
"json_is_bool",
"json_is_int",
"json_is_float",
"json_is_string",
"json_is_array",
"json_is_object",
];
for (func, expected_name) in json_is_functions.iter().zip(expected_names.iter()) {
assert_eq!(func.name(), *expected_name);
assert_eq!(
func.return_type(&[ConcreteDataType::json_datatype()])
.unwrap(),
ConcreteDataType::boolean_datatype()
);
assert_eq!(
func.signature(),
Signature::exact(
vec![ConcreteDataType::json_datatype()],
Volatility::Immutable
)
);
}

let json_strings = [
r#"true"#,
r#"1"#,
r#"1.0"#,
r#""The pig fly through a castle, and has been attracted by the princess.""#,
r#"[1, 2]"#,
r#"{"a": 1}"#,
];
let expected_results = [
[true, false, false, false, false, false],
[false, true, false, false, false, false],
// Integers are also floats
[false, true, true, false, false, false],
[false, false, false, true, false, false],
[false, false, false, false, true, false],
[false, false, false, false, false, true],
];

let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_vector = BinaryVector::from_vec(jsonbs);
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];

for (func, expected_result) in json_is_functions.iter().zip(expected_results.iter()) {
let vector = func.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(vector.len(), json_strings.len());

for (i, expected) in expected_result.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_boolean().unwrap().unwrap();
assert_eq!(result, *expected);
}
}
}
}
2 changes: 1 addition & 1 deletion src/common/function/src/scalars/json/json_to_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ mod tests {
use super::*;

#[test]
fn test_get_by_path_function() {
fn test_json_to_string_function() {
let json_to_string = JsonToStringFunction;

assert_eq!("json_to_string", json_to_string.name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ use crate::function::{Function, FunctionContext};

/// Parses the `String` into `JSONB`.
#[derive(Clone, Debug, Default)]
pub struct ToJsonFunction;
pub struct ParseJsonFunction;

const NAME: &str = "to_json";
const NAME: &str = "parse_json";

impl Function for ToJsonFunction {
impl Function for ParseJsonFunction {
fn name(&self) -> &str {
NAME
}
Expand Down Expand Up @@ -101,9 +101,9 @@ impl Function for ToJsonFunction {
}
}

impl Display for ToJsonFunction {
impl Display for ParseJsonFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "TO_JSON")
write!(f, "PARSE_JSON")
}
}

Expand All @@ -119,17 +119,17 @@ mod tests {

#[test]
fn test_get_by_path_function() {
let to_json = ToJsonFunction;
let parse_json = ParseJsonFunction;

assert_eq!("to_json", to_json.name());
assert_eq!("parse_json", parse_json.name());
assert_eq!(
ConcreteDataType::json_datatype(),
to_json
parse_json
.return_type(&[ConcreteDataType::json_datatype()])
.unwrap()
);

assert!(matches!(to_json.signature(),
assert!(matches!(parse_json.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
Expand All @@ -152,13 +152,12 @@ mod tests {

let json_string_vector = StringVector::from_vec(json_strings.to_vec());
let args: Vec<VectorRef> = vec![Arc::new(json_string_vector)];
let vector = to_json.eval(FunctionContext::default(), &args).unwrap();
let vector = parse_json.eval(FunctionContext::default(), &args).unwrap();

assert_eq!(3, vector.len());
for (i, gt) in jsonbs.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_binary().unwrap().unwrap();
// remove whitespaces
assert_eq!(gt, result);
}
}
Expand Down
56 changes: 0 additions & 56 deletions tests/cases/standalone/common/function/json.sql

This file was deleted.

Loading

0 comments on commit f73fb82

Please sign in to comment.