Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: COLLECT_LIST can now be applied to tables #3093

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import com.google.common.collect.Lists;
import io.confluent.ksql.function.udaf.TableUdaf;
import io.confluent.ksql.function.udaf.Udaf;
import io.confluent.ksql.function.udaf.UdafDescription;
import io.confluent.ksql.function.udaf.UdafFactory;
import java.util.List;
Expand Down Expand Up @@ -69,27 +68,27 @@ public List<T> undo(final T valueToUndo, final List<T> aggregateValue) {
}

@UdafFactory(description = "collect values of a Bigint field into a single Array")
public static Udaf<Long, List<Long>> createCollectListLong() {
public static TableUdaf<Long, List<Long>> createCollectListLong() {
return listCollector();
}

@UdafFactory(description = "collect values of an Integer field into a single Array")
public static Udaf<Integer, List<Integer>> createCollectListInt() {
public static TableUdaf<Integer, List<Integer>> createCollectListInt() {
return listCollector();
}

@UdafFactory(description = "collect values of a Double field into a single Array")
public static Udaf<Double, List<Double>> createCollectListDouble() {
public static TableUdaf<Double, List<Double>> createCollectListDouble() {
return listCollector();
}

@UdafFactory(description = "collect values of a String/Varchar field into a single Array")
public static Udaf<String, List<String>> createCollectListString() {
public static TableUdaf<String, List<String>> createCollectListString() {
return listCollector();
}

@UdafFactory(description = "collect values of a Boolean field into a single Array")
public static Udaf<Boolean, List<Boolean>> createCollectListBool() {
public static TableUdaf<Boolean, List<Boolean>> createCollectListBool() {
return listCollector();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,16 @@
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.not;
import static org.junit.Assert.assertThat;
import io.confluent.ksql.function.udaf.Udaf;

import io.confluent.ksql.function.udaf.TableUdaf;
import java.util.List;
import org.junit.Test;

public class CollectListUdafTest {

@Test
public void shouldCollectInts() {
final Udaf<Integer, List<Integer>> udaf = CollectListUdaf.createCollectListInt();
final TableUdaf<Integer, List<Integer>> udaf = CollectListUdaf.createCollectListInt();
final Integer[] values = new Integer[] {3, 4, 5, 3};
List<Integer> runningList = udaf.initialize();
for (final Integer i : values) {
Expand All @@ -39,7 +40,7 @@ public void shouldCollectInts() {

@Test
public void shouldMergeIntLists() {
final Udaf<Integer, List<Integer>> udaf = CollectListUdaf.createCollectListInt();
final TableUdaf<Integer, List<Integer>> udaf = CollectListUdaf.createCollectListInt();

List<Integer> lhs = udaf.initialize();
final Integer[] lhsValues = new Integer[] {1, 2, null, 3};
Expand All @@ -61,7 +62,7 @@ public void shouldMergeIntLists() {

@Test
public void shouldRespectSizeLimit() {
final Udaf<Integer, List<Integer>> udaf = CollectListUdaf.createCollectListInt();
final TableUdaf<Integer, List<Integer>> udaf = CollectListUdaf.createCollectListInt();
List<Integer> runningList = udaf.initialize();
for (int i = 1; i < 2500; i++) {
runningList = udaf.aggregate(i, runningList);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,117 @@
{"topic": "S2", "key": 0, "value": {"ID":0,"COLLECTED":[true,false]}},
{"topic": "S2", "key": 0, "value": {"ID":0,"COLLECTED":[true,false,true]}}
]
},
{
"name": "collect_list int table",
"format": ["AVRO", "JSON"],
"statements": [
"CREATE TABLE TEST (ID bigint, VALUE integer) WITH (kafka_topic='test_topic',value_format='{FORMAT}', key='ID');",
"CREATE TABLE S2 as SELECT id, collect_list(value) as collected FROM test group by id;"
],
"inputs": [
{"topic": "test_topic", "key": 0, "value": {"ID": 0, "VALUE": 0}},
{"topic": "test_topic", "key": 0, "value": {"ID": 0, "VALUE": 100}},
{"topic": "test_topic", "key": 100, "value": {"ID": 100, "VALUE": 500}},
{"topic": "test_topic", "key": 100, "value": {"ID": 100, "VALUE": 100}}
],
"outputs": [
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": [0]}},
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": []}},
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": [100]}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": [500]}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": []}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": [100]}}
]
},
{
"name": "collect_list long table",
"format": ["AVRO", "JSON"],
"statements": [
"CREATE TABLE TEST (ID bigint, VALUE bigint) WITH (kafka_topic='test_topic', value_format='{FORMAT}', key='ID');",
"CREATE TABLE S2 as SELECT id, collect_list(value) as collected FROM test group by id;"
],
"inputs": [
{"topic": "test_topic", "key": 0, "value": {"ID": 0, "VALUE": 2147483648}},
{"topic": "test_topic", "key": 0, "value": {"ID": 0, "VALUE": 100}},
{"topic": "test_topic", "key": 100, "value": {"ID": 100, "VALUE": 500}},
{"topic": "test_topic", "key": 100, "value": {"ID": 100, "VALUE": 100}}
],
"outputs": [
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": [2147483648]}},
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": []}},
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": [100]}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": [500]}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": []}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": [100]}}
]
},
{
"name": "collect_list double table",
"format": ["AVRO", "JSON"],
"statements": [
"CREATE TABLE TEST (ID bigint, VALUE double) WITH (kafka_topic='test_topic', value_format='{FORMAT}', key='ID');",
"CREATE TABLE S2 as SELECT id, collect_list(value) as collected FROM test group by id;"
],
"inputs": [
{"topic": "test_topic", "key": 0, "value": {"ID": 0, "VALUE": 5.4}},
{"topic": "test_topic", "key": 0, "value": {"ID": 0, "VALUE": 100.1}},
{"topic": "test_topic", "key": 100, "value": {"ID": 100, "VALUE": 500.9}},
{"topic": "test_topic", "key": 100, "value": {"ID": 100, "VALUE": 300.8}}
],
"outputs": [
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": [5.4]}},
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": []}},
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": [100.1]}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": [500.9]}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": []}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": [300.8]}}
]
},
{
"name": "collect_list string table",
"format": ["AVRO", "JSON"],
"statements": [
"CREATE TABLE TEST (ID bigint, VALUE varchar) WITH (kafka_topic='test_topic', value_format='{FORMAT}', key='ID');",
"CREATE TABLE S2 as SELECT id, collect_list(value) as collected FROM test group by id;"
],
"inputs": [
{"topic": "test_topic", "key": 0, "value": {"ID": 0, "VALUE": "foo"}},
{"topic": "test_topic", "key": 100, "value": {"ID": 100, "VALUE": "baz"}},
{"topic": "test_topic", "key": 0, "value": {"ID": 0, "VALUE": "bar"}},
{"topic": "test_topic", "key": 100, "value": {"ID": 100, "VALUE": "baz"}},
{"topic": "test_topic", "key": 100, "value": {"ID": 100, "VALUE": "foo"}}
],
"outputs": [
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": ["foo"]}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": ["baz"]}},
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": []}},
{"topic": "S2", "key": 0, "value": {"ID": 0, "COLLECTED": ["bar"]}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": []}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": ["baz"]}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": []}},
{"topic": "S2", "key": 100, "value": {"ID": 100, "COLLECTED": ["foo"]}}
]
},
{
"name": "collect_list bool map table",
"format": ["JSON"],
"statements": [
"CREATE TABLE TEST (ID bigint, NAME varchar, VALUE map<varchar, boolean>) WITH (kafka_topic='test_topic', value_format='{FORMAT}', key='ID');",
"CREATE TABLE S2 as SELECT id, collect_list(value['key1']) AS collected FROM test group by id;"
],
"inputs": [
{"topic": "test_topic", "key": 0, "value": {"id": 0, "name": "zero", "value": {"key1":true, "key2":false}}},
{"topic": "test_topic", "key": 0, "value": {"id": 0, "name": "zero", "value": {"key1":false, "key2":true}}},
{"topic": "test_topic", "key": 0, "value": {"id": 0, "name": "zero", "value": {"key1":true, "key2":true}}}
],
"outputs": [
{"topic": "S2", "key": 0, "value": {"ID":0,"COLLECTED":[true]}},
{"topic": "S2", "key": 0, "value": {"ID":0,"COLLECTED":[]}},
{"topic": "S2", "key": 0, "value": {"ID":0,"COLLECTED":[false]}},
{"topic": "S2", "key": 0, "value": {"ID":0,"COLLECTED":[]}},
{"topic": "S2", "key": 0, "value": {"ID":0,"COLLECTED":[true]}}
]
}
]
}