Skip to content

Commit

Permalink
feat: Support BYTES and time types for TopKDistinct, Greatest, and Le…
Browse files Browse the repository at this point in the history
…ast (#9202)

* feat: enable TopKDistinct for time & bytes data types
- Add support for bytes, time, timestamp, and date arguments to TopKDistinct
- See #9125

* feat: enable Greatest/Least for time & bytes data types
- Add support for bytes, time, timestamp, and date arguments to Greatest and Least
- See #9125

* fix: test greatest/least w/ null arrays, not array containing null element
- The cast to the type of array element makes the compiler implicitly create an array around the variable arguments.
- So an array was being created with a null element.
- shouldHandleNullArrays() is intended to test a null array, not an array with nulls.

* style: add previously missing licenses for greatest & least test files
  • Loading branch information
reneesoika authored Jun 16, 2022
1 parent 2e1c457 commit 6824f23
Show file tree
Hide file tree
Showing 48 changed files with 5,871 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@

package io.confluent.ksql.function.udaf.topkdistinct;

import com.google.common.collect.ImmutableList;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import io.confluent.ksql.function.AggregateFunctionFactory;
import io.confluent.ksql.function.AggregateFunctionInitArguments;
import io.confluent.ksql.function.KsqlAggregateFunction;
import io.confluent.ksql.function.types.ParamType;
import io.confluent.ksql.function.types.ParamTypes;
import io.confluent.ksql.schema.ksql.SchemaConverters;
import io.confluent.ksql.schema.ksql.SqlArgument;
import io.confluent.ksql.schema.ksql.types.SqlType;
Expand All @@ -32,23 +30,14 @@ public class TopkDistinctAggFunctionFactory extends AggregateFunctionFactory {

private static final String NAME = "TOPKDISTINCT";

private static final ImmutableList<List<ParamType>> SUPPORTED_TYPES = ImmutableList
.<List<ParamType>>builder()
.add(ImmutableList.of(ParamTypes.INTEGER))
.add(ImmutableList.of(ParamTypes.LONG))
.add(ImmutableList.of(ParamTypes.DOUBLE))
.add(ImmutableList.of(ParamTypes.STRING))
.add(ImmutableList.of(ParamTypes.DECIMAL))
.build();

public TopkDistinctAggFunctionFactory() {
super(NAME);
}

private static final AggregateFunctionInitArguments DEFAULT_INIT_ARGS =
new AggregateFunctionInitArguments(0, 1);

@SuppressWarnings("unchecked")
@SuppressWarnings({"unchecked", "CyclomaticComplexity"})
@Override
public KsqlAggregateFunction createAggregateFunction(
final List<SqlArgument> argTypeList,
Expand All @@ -64,6 +53,10 @@ public KsqlAggregateFunction createAggregateFunction(
case DOUBLE:
case STRING:
case DECIMAL:
case BYTES:
case DATE:
case TIME:
case TIMESTAMP:
return new TopkDistinctKudaf(
NAME, initArgs.udafIndex(), tkValFromArg, argSchema,
SchemaConverters.sqlToFunctionConverter().toFunctionType(argSchema),
Expand All @@ -80,9 +73,9 @@ public KsqlAggregateFunction createAggregateFunction(
}

@Override
@SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "SUPPORTED_TYPES is ImmutableList")
@SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "COMPARABLE_ARGS is ImmutableList")
public List<List<ParamType>> supportedArgs() {
return SUPPORTED_TYPES;
return COMPARABLE_ARGS;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
import io.confluent.ksql.util.DecimalUtil;
import io.confluent.ksql.util.KsqlConstants;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.sql.Date;
import java.sql.Time;
import java.sql.Timestamp;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
Expand Down Expand Up @@ -70,13 +74,50 @@ public Double greatest(@UdfParameter final Double val, @UdfParameter final Doubl

@Udf
public String greatest(@UdfParameter final String val, @UdfParameter final String... vals) {

return (vals == null) ? null : Streams.concat(Stream.of(val), Arrays.stream(vals))
.filter(Objects::nonNull)
.max(String::compareTo)
.orElse(null);
}

@Udf
public ByteBuffer greatest(@UdfParameter final ByteBuffer val,
@UdfParameter final ByteBuffer... vals) {

return (vals == null) ? null : Streams.concat(Stream.of(val), Arrays.stream(vals))
.filter(Objects::nonNull)
.max(ByteBuffer::compareTo)
.orElse(null);
}

@Udf
public Date greatest(@UdfParameter final Date val, @UdfParameter final Date... vals) {

return (vals == null) ? null : Streams.concat(Stream.of(val), Arrays.stream(vals))
.filter(Objects::nonNull)
.max(Date::compareTo)
.orElse(null);
}

@Udf
public Time greatest(@UdfParameter final Time val, @UdfParameter final Time... vals) {

return (vals == null) ? null : Streams.concat(Stream.of(val), Arrays.stream(vals))
.filter(Objects::nonNull)
.max(Time::compareTo)
.orElse(null);
}

@Udf
public Timestamp greatest(@UdfParameter final Timestamp val,
@UdfParameter final Timestamp... vals) {

return (vals == null) ? null : Streams.concat(Stream.of(val), Arrays.stream(vals))
.filter(Objects::nonNull)
.max(Timestamp::compareTo)
.orElse(null);
}

@Udf(schemaProvider = "greatestDecimalProvider")
public BigDecimal greatest(@UdfParameter final BigDecimal val,
@UdfParameter final BigDecimal... vals) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
import io.confluent.ksql.util.DecimalUtil;
import io.confluent.ksql.util.KsqlConstants;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.sql.Date;
import java.sql.Time;
import java.sql.Timestamp;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
Expand Down Expand Up @@ -76,6 +80,43 @@ public String least(@UdfParameter final String val, @UdfParameter final String..
.orElse(null);
}

@Udf
public ByteBuffer least(@UdfParameter final ByteBuffer val,
@UdfParameter final ByteBuffer... vals) {

return (vals == null) ? null : Streams.concat(Stream.of(val), Arrays.stream(vals))
.filter(Objects::nonNull)
.min(ByteBuffer::compareTo)
.orElse(null);
}

@Udf
public Date least(@UdfParameter final Date val, @UdfParameter final Date... vals) {

return (vals == null) ? null : Streams.concat(Stream.of(val), Arrays.stream(vals))
.filter(Objects::nonNull)
.min(Date::compareTo)
.orElse(null);
}

@Udf
public Time least(@UdfParameter final Time val, @UdfParameter final Time... vals) {

return (vals == null) ? null : Streams.concat(Stream.of(val), Arrays.stream(vals))
.filter(Objects::nonNull)
.min(Time::compareTo)
.orElse(null);
}

@Udf
public Timestamp least(@UdfParameter final Timestamp val, @UdfParameter final Timestamp... vals) {

return (vals == null) ? null : Streams.concat(Stream.of(val), Arrays.stream(vals))
.filter(Objects::nonNull)
.min(Timestamp::compareTo)
.orElse(null);
}

@Udf(schemaProvider = "leastDecimalProvider")
public BigDecimal least(@UdfParameter final BigDecimal val,
@UdfParameter final BigDecimal... vals) {
Expand All @@ -90,10 +131,10 @@ public BigDecimal least(@UdfParameter final BigDecimal val,
public SqlType leastDecimalProvider(final List<SqlArgument> params) {

return params.stream()
.filter(s -> s.getSqlType().isPresent())
.map(SqlArgument::getSqlTypeOrThrow)
.reduce(DecimalUtil::widen)
.orElse(null);
.filter(s -> s.getSqlType().isPresent())
.map(SqlArgument::getSqlTypeOrThrow)
.reduce(DecimalUtil::widen)
.orElse(null);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Copyright 2022 Confluent Inc.
*
* Licensed under the Confluent Community License; you may not use this file
* except in compliance with the License. You may obtain a copy of the License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.ksql.function.udaf.topkdistinct;

import com.google.common.collect.ImmutableList;
import io.confluent.ksql.function.udf.string.ToBytes;
import io.confluent.ksql.schema.ksql.types.SqlTypes;
import io.confluent.ksql.util.BytesUtils;
import org.junit.Before;
import org.junit.Test;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.MatcherAssert.assertThat;

public class BytesTopKDistinctKudafTest {
private final List<String> valuesArray = ImmutableList.of("A", "D", "F", "A", "G", "H", "B", "H",
"I", "E", "C", "H", "I");
private final TopkDistinctKudaf<ByteBuffer> bytesTopkDistinctKudaf
= TopKDistinctTestUtils.getTopKDistinctKudaf(3, SqlTypes.BYTES);
private ToBytes toBytesUDF;

@Before
public void setUp() {
toBytesUDF = new ToBytes();
}

@Test
public void shouldAggregateTopK() {
List<ByteBuffer> currentVal = new ArrayList<>();
for (final String d : valuesArray) {
currentVal = bytesTopkDistinctKudaf.aggregate(toBytes(d), currentVal);
}

List<ByteBuffer> expected = toBytes(ImmutableList.of("I", "H", "G"));
assertThat("Invalid results.", currentVal, equalTo(expected));
}

@Test
public void shouldAggregateTopKWithLessThanKValues() {
List<ByteBuffer> currentVal = new ArrayList<>();
currentVal = bytesTopkDistinctKudaf.aggregate(toBytes("I"), currentVal);

assertThat("Invalid results.", currentVal, equalTo(toBytes(ImmutableList.of("I"))));
}

@Test
public void shouldMergeTopK() {
final List<ByteBuffer> array1 = toBytes(ImmutableList.of("D", "B", "A"));
final List<ByteBuffer> array2 = toBytes(ImmutableList.of("E", "D", "C"));

assertThat("Invalid results.", bytesTopkDistinctKudaf.getMerger().apply(null, array1, array2),
equalTo(toBytes(ImmutableList.of("E", "D", "C"))));
}

@Test
public void shouldMergeTopKWithNulls() {
final List<ByteBuffer> array1 = toBytes(ImmutableList.of("B", "A"));
final List<ByteBuffer> array2 = toBytes(ImmutableList.of("C"));

assertThat("Invalid results.", bytesTopkDistinctKudaf.getMerger().apply(null, array1, array2),
equalTo(toBytes(ImmutableList.of("C", "B", "A"))));
}

@Test
public void shouldMergeTopKWithNullsDuplicates() {
final List<ByteBuffer> array1 = toBytes(ImmutableList.of("B", "A"));
final List<ByteBuffer> array2 = toBytes(ImmutableList.of("C", "B"));

assertThat("Invalid results.", bytesTopkDistinctKudaf.getMerger().apply(null, array1, array2),
equalTo(toBytes(ImmutableList.of("C", "B", "A"))));
}

@Test
public void shouldMergeTopKWithMoreNulls() {
final List<ByteBuffer> array1 = toBytes(ImmutableList.of("A"));
final List<ByteBuffer> array2 = toBytes(ImmutableList.of("A"));

assertThat("Invalid results.", bytesTopkDistinctKudaf.getMerger().apply(null, array1, array2),
equalTo(toBytes(ImmutableList.of("A"))));
}

private ByteBuffer toBytes(final String val) {
return toBytesUDF.toBytes(val, BytesUtils.Encoding.ASCII.toString());
}

private List<ByteBuffer> toBytes(final List<String> vals) {
return vals.stream().map(this::toBytes).collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright 2022 Confluent Inc.
*
* Licensed under the Confluent Community License; you may not use this file
* except in compliance with the License. You may obtain a copy of the License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.ksql.function.udaf.topkdistinct;

import com.google.common.collect.ImmutableList;
import io.confluent.ksql.schema.ksql.types.SqlTypes;
import org.junit.Test;

import java.sql.Date;
import java.util.ArrayList;
import java.util.List;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.MatcherAssert.assertThat;

public class DateTopKDistinctKudafTest {

private final List<Date> valuesArray = ImmutableList.of(new Date(10), new Date(30), new Date(45),
new Date(10), new Date(50), new Date(60), new Date(20), new Date(60), new Date(80),
new Date(35), new Date(25), new Date(60), new Date(80));
private final TopkDistinctKudaf<Date> dateTopkDistinctKudaf
= TopKDistinctTestUtils.getTopKDistinctKudaf(3, SqlTypes.DATE);

@Test
public void shouldAggregateTopK() {
List<Date> currentVal = new ArrayList<>();
for (final Date d : valuesArray) {
currentVal = dateTopkDistinctKudaf.aggregate(d, currentVal);
}

assertThat("Invalid results.", currentVal,
equalTo(ImmutableList.of(new Date(80), new Date(60), new Date(50))));
}

@Test
public void shouldAggregateTopKWithLessThanKValues() {
List<Date> currentVal = new ArrayList<>();
currentVal = dateTopkDistinctKudaf.aggregate(new Date(80), currentVal);

assertThat("Invalid results.", currentVal, equalTo(ImmutableList.of(new Date(80))));
}

@Test
public void shouldMergeTopK() {
final List<Date> array1 = ImmutableList.of(new Date(50), new Date(45), new Date(25));
final List<Date> array2 = ImmutableList.of(new Date(60), new Date(50), new Date(48));

assertThat("Invalid results.", dateTopkDistinctKudaf.getMerger().apply(null, array1, array2),
equalTo(ImmutableList.of(new Date(60), new Date(50), new Date(48))));
}

@Test
public void shouldMergeTopKWithNulls() {
final List<Date> array1 = ImmutableList.of(new Date(50), new Date(45));
final List<Date> array2 = ImmutableList.of(new Date(60));

assertThat("Invalid results.", dateTopkDistinctKudaf.getMerger().apply(null, array1, array2),
equalTo(ImmutableList.of(new Date(60), new Date(50), new Date(45))));
}

@Test
public void shouldMergeTopKWithNullsDuplicates() {
final List<Date> array1 = ImmutableList.of(new Date(50), new Date(45));
final List<Date> array2 = ImmutableList.of(new Date(60), new Date(50));

assertThat("Invalid results.", dateTopkDistinctKudaf.getMerger().apply(null, array1, array2),
equalTo(ImmutableList.of(new Date(60), new Date(50), new Date(45))));
}

@Test
public void shouldMergeTopKWithMoreNulls() {
final List<Date> array1 = ImmutableList.of(new Date(60));
final List<Date> array2 = ImmutableList.of(new Date(60));

assertThat("Invalid results.", dateTopkDistinctKudaf.getMerger().apply(null, array1, array2),
equalTo(ImmutableList.of(new Date(60))));
}
}
Loading

0 comments on commit 6824f23

Please sign in to comment.