Skip to content

Commit

Permalink
Add FILLNULL command in PPL (opensearch-project#3032)
Browse files Browse the repository at this point in the history
* Add FILLNULL command in PPL

Signed-off-by: Norman Jordan <[email protected]>
  • Loading branch information
normanj-bitquill committed Oct 16, 2024
1 parent e838e46 commit bbe955a
Show file tree
Hide file tree
Showing 12 changed files with 374 additions and 0 deletions.
24 changes: 24 additions & 0 deletions core/src/main/java/org/opensearch/sql/analysis/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.opensearch.sql.ast.tree.Dedupe;
import org.opensearch.sql.ast.tree.Eval;
import org.opensearch.sql.ast.tree.FetchCursor;
import org.opensearch.sql.ast.tree.FillNull;
import org.opensearch.sql.ast.tree.Filter;
import org.opensearch.sql.ast.tree.Head;
import org.opensearch.sql.ast.tree.Kmeans;
Expand Down Expand Up @@ -558,6 +559,29 @@ public LogicalPlan visitAD(AD node, AnalysisContext context) {
return new LogicalAD(child, options);
}

/** Build {@link LogicalAD} for fillnull command. */
@Override
public LogicalPlan visitFillNull(final FillNull node, final AnalysisContext context) {
LogicalPlan child = node.getChild().get(0).accept(this, context);

ImmutableList.Builder<Pair<ReferenceExpression, Expression>> expressionsBuilder =
new Builder<>();
for (FillNull.NullableFieldFill fieldFill : node.getNullableFieldFills()) {
Expression fieldExpr =
expressionAnalyzer.analyze(fieldFill.getNullableFieldReference(), context);
ReferenceExpression ref =
DSL.ref(fieldFill.getNullableFieldReference().getField().toString(), fieldExpr.type());
FunctionExpression ifNullFunction =
DSL.ifnull(ref, expressionAnalyzer.analyze(fieldFill.getReplaceNullWithMe(), context));
expressionsBuilder.add(new ImmutablePair<>(ref, ifNullFunction));
TypeEnvironment typeEnvironment = context.peek();
// define the new reference in type env.
typeEnvironment.define(ref);
}

return new LogicalEval(child, expressionsBuilder.build());
}

/** Build {@link LogicalML} for ml command. */
@Override
public LogicalPlan visitML(ML node, AnalysisContext context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.opensearch.sql.ast.tree.Dedupe;
import org.opensearch.sql.ast.tree.Eval;
import org.opensearch.sql.ast.tree.FetchCursor;
import org.opensearch.sql.ast.tree.FillNull;
import org.opensearch.sql.ast.tree.Filter;
import org.opensearch.sql.ast.tree.Head;
import org.opensearch.sql.ast.tree.Kmeans;
Expand Down Expand Up @@ -312,4 +313,8 @@ public T visitFetchCursor(FetchCursor cursor, C context) {
public T visitCloseCursor(CloseCursor closeCursor, C context) {
return visitChildren(closeCursor, context);
}

public T visitFillNull(FillNull fillNull, C context) {
return visitChildren(fillNull, context);
}
}
21 changes: 21 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

package org.opensearch.sql.ast.dsl;

import com.google.common.collect.ImmutableList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import lombok.experimental.UtilityClass;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.opensearch.sql.ast.expression.AggregateFunction;
import org.opensearch.sql.ast.expression.Alias;
Expand Down Expand Up @@ -46,6 +48,7 @@
import org.opensearch.sql.ast.tree.Aggregation;
import org.opensearch.sql.ast.tree.Dedupe;
import org.opensearch.sql.ast.tree.Eval;
import org.opensearch.sql.ast.tree.FillNull;
import org.opensearch.sql.ast.tree.Filter;
import org.opensearch.sql.ast.tree.Head;
import org.opensearch.sql.ast.tree.Limit;
Expand Down Expand Up @@ -471,4 +474,22 @@ public static Parse parse(
java.util.Map<String, Literal> arguments) {
return new Parse(parseMethod, sourceField, pattern, arguments, input);
}

public static FillNull fillNull(UnresolvedExpression replaceNullWithMe, Field... fields) {
return new FillNull(
FillNull.ContainNullableFieldFill.ofSameValue(
replaceNullWithMe, ImmutableList.copyOf(fields)));
}

public static FillNull fillNull(
List<ImmutablePair<Field, UnresolvedExpression>> fieldAndReplacements) {
ImmutableList.Builder<FillNull.NullableFieldFill> replacementsBuilder = ImmutableList.builder();
for (ImmutablePair<Field, UnresolvedExpression> fieldAndReplacement : fieldAndReplacements) {
replacementsBuilder.add(
new FillNull.NullableFieldFill(
fieldAndReplacement.getLeft(), fieldAndReplacement.getRight()));
}
return new FillNull(
FillNull.ContainNullableFieldFill.ofVariousValue(replacementsBuilder.build()));
}
}
88 changes: 88 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/tree/FillNull.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.tree;

import java.util.List;
import java.util.Objects;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.Node;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.UnresolvedExpression;

@RequiredArgsConstructor
@AllArgsConstructor
public class FillNull extends UnresolvedPlan {

@Getter
@RequiredArgsConstructor
public static class NullableFieldFill {
@NonNull private final Field nullableFieldReference;
@NonNull private final UnresolvedExpression replaceNullWithMe;
}

public interface ContainNullableFieldFill {
List<NullableFieldFill> getNullFieldFill();

static ContainNullableFieldFill ofVariousValue(List<NullableFieldFill> replacements) {
return new VariousValueNullFill(replacements);
}

static ContainNullableFieldFill ofSameValue(
UnresolvedExpression replaceNullWithMe, List<Field> nullableFieldReferences) {
return new SameValueNullFill(replaceNullWithMe, nullableFieldReferences);
}
}

private static class SameValueNullFill implements ContainNullableFieldFill {
@Getter(onMethod_ = @Override)
private final List<NullableFieldFill> nullFieldFill;

public SameValueNullFill(
UnresolvedExpression replaceNullWithMe, List<Field> nullableFieldReferences) {
Objects.requireNonNull(replaceNullWithMe, "Null replacement is required");
this.nullFieldFill =
Objects.requireNonNull(nullableFieldReferences, "Nullable field reference is required")
.stream()
.map(nullableReference -> new NullableFieldFill(nullableReference, replaceNullWithMe))
.toList();
}
}

@RequiredArgsConstructor
private static class VariousValueNullFill implements ContainNullableFieldFill {
@NonNull
@Getter(onMethod_ = @Override)
private final List<NullableFieldFill> nullFieldFill;
}

private UnresolvedPlan child;

@NonNull private final ContainNullableFieldFill containNullableFieldFill;

public List<NullableFieldFill> getNullableFieldFills() {
return containNullableFieldFill.getNullFieldFill();
}

@Override
public UnresolvedPlan attach(UnresolvedPlan child) {
this.child = child;
return this;
}

@Override
public List<? extends Node> getChild() {
return child == null ? List.of() : List.of(child);
}

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitFillNull(this, context);
}
}
44 changes: 44 additions & 0 deletions core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
import org.opensearch.sql.ast.dsl.AstDSL;
import org.opensearch.sql.ast.expression.Argument;
import org.opensearch.sql.ast.expression.DataType;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.ParseMethod;
Expand All @@ -81,6 +82,7 @@
import org.opensearch.sql.ast.tree.AD;
import org.opensearch.sql.ast.tree.CloseCursor;
import org.opensearch.sql.ast.tree.FetchCursor;
import org.opensearch.sql.ast.tree.FillNull;
import org.opensearch.sql.ast.tree.Kmeans;
import org.opensearch.sql.ast.tree.ML;
import org.opensearch.sql.ast.tree.Paginate;
Expand Down Expand Up @@ -1437,6 +1439,48 @@ public void kmeanns_relation() {
new Kmeans(AstDSL.relation("schema"), argumentMap));
}

@Test
public void fillnull_same_value() {
assertAnalyzeEqual(
LogicalPlanDSL.eval(
LogicalPlanDSL.relation("schema", table),
ImmutablePair.of(
DSL.ref("integer_value", INTEGER),
DSL.ifnull(DSL.ref("integer_value", INTEGER), DSL.literal(0))),
ImmutablePair.of(
DSL.ref("int_null_value", INTEGER),
DSL.ifnull(DSL.ref("int_null_value", INTEGER), DSL.literal(0)))),
new FillNull(
AstDSL.relation("schema"),
FillNull.ContainNullableFieldFill.ofSameValue(
AstDSL.intLiteral(0),
ImmutableList.<Field>builder()
.add(AstDSL.field("integer_value"))
.add(AstDSL.field("int_null_value"))
.build())));
}

@Test
public void fillnull_various_values() {
assertAnalyzeEqual(
LogicalPlanDSL.eval(
LogicalPlanDSL.relation("schema", table),
ImmutablePair.of(
DSL.ref("integer_value", INTEGER),
DSL.ifnull(DSL.ref("integer_value", INTEGER), DSL.literal(0))),
ImmutablePair.of(
DSL.ref("int_null_value", INTEGER),
DSL.ifnull(DSL.ref("int_null_value", INTEGER), DSL.literal(1)))),
new FillNull(
AstDSL.relation("schema"),
FillNull.ContainNullableFieldFill.ofVariousValue(
ImmutableList.of(
new FillNull.NullableFieldFill(
AstDSL.field("integer_value"), AstDSL.intLiteral(0)),
new FillNull.NullableFieldFill(
AstDSL.field("int_null_value"), AstDSL.intLiteral(1))))));
}

@Test
public void ad_batchRCF_relation() {
Map<String, Literal> argumentMap =
Expand Down
67 changes: 67 additions & 0 deletions docs/user/ppl/cmd/fillnull.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
=============
fillnull
=============

.. rubric:: Table of contents

.. contents::
:local:
:depth: 2


Description
============
| The ``fillnull`` command replaces null values for one or more fields.

Syntax
============
fillnull "with" <expression> <field> ["," <field> ]...

* field: mandatory. Name of an existing field that was piped into ``fillnull``. Null values for all specified fields are replaced with the value of expression.
* expression: mandatory. Any expression support by the system. The expression value type must match the type of field.

fillnull "using" <field> "=" <expression> ["," <field> "=" <expression> ]...

* field: mandatory. Name of an existing field that was piped into ``fillnull``.
* expression: mandatory. Any expression support by the system. The expression value type must match the type of field.

Example 1: Replace null values with the same value for multiple fields
======================================================================

The example show to replace null values for email and host with "<not found>".

PPL query::

os> source=accounts | fields email, host | fillnull with '<not found>' email, host ;
fetched rows / total rows = 4/4
+-----------------------+------------+
| email | host |
|-----------------------+------------|
| [email protected] | pyrami.com |
| [email protected] | netagy.com |
| <not found> | |
| [email protected] | boink.com |
+-----------------------+------------+

Example 2: Replace null values for multiple fields with different values
========================================================================

The example show to replace null values for email with "<not found>" and null values for host with "<no host>".

PPL query::

os> source=accounts | fields email, host | fillnull using email = '<not found>', host = '<no host>' ;
fetched rows / total rows = 4/4
+-----------------------+------------+
| email | host |
|-----------------------+------------|
| [email protected] | pyrami.com |
| [email protected] | netagy.com |
| <not found> | |
| [email protected] | boink.com |
+-----------------------+------------+

Limitation
==========
The ``fillnull`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node.
3 changes: 3 additions & 0 deletions ppl/src/main/antlr/OpenSearchPPLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ NEW_FIELD: 'NEW_FIELD';
KMEANS: 'KMEANS';
AD: 'AD';
ML: 'ML';
FILLNULL: 'FILLNULL';

// COMMAND ASSIST KEYWORDS
AS: 'AS';
Expand All @@ -44,6 +45,8 @@ INDEX: 'INDEX';
D: 'D';
DESC: 'DESC';
DATASOURCES: 'DATASOURCES';
USING: 'USING';
WITH: 'WITH';

// CLAUSE KEYWORDS
SORTBY: 'SORTBY';
Expand Down
22 changes: 22 additions & 0 deletions ppl/src/main/antlr/OpenSearchPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ commands
| kmeansCommand
| adCommand
| mlCommand
| fillnullCommand
;

searchCommand
Expand Down Expand Up @@ -127,6 +128,27 @@ patternsMethod
| REGEX
;

fillnullCommand
: FILLNULL (fillNullWithTheSameValue
| fillNullWithFieldVariousValues)
;

fillNullWithTheSameValue
: WITH nullReplacement IN nullableField (COMMA nullableField)*
;

fillNullWithFieldVariousValues
: USING nullableField EQUAL nullReplacement (COMMA nullableField EQUAL nullReplacement)*
;

nullableField
: fieldExpression
;

nullReplacement
: expression
;

kmeansCommand
: KMEANS (kmeansParameter)*
;
Expand Down
Loading

0 comments on commit bbe955a

Please sign in to comment.