diff --git a/docs/asciidoc/modules/ROOT/pages/overview/apoc.agg/apoc.agg.multiStats.adoc b/docs/asciidoc/modules/ROOT/pages/overview/apoc.agg/apoc.agg.multiStats.adoc new file mode 100644 index 0000000000..929484fd20 --- /dev/null +++ b/docs/asciidoc/modules/ROOT/pages/overview/apoc.agg/apoc.agg.multiStats.adoc @@ -0,0 +1,128 @@ + += apoc.agg.multiStats +:description: This section contains reference documentation for the apoc.agg.multiStats function. + +label:function[] label:apoc-extended[] + +[.emphasis] +apoc.agg.multiStats(nodeOrRel, keys) - Return a multi-dimensional aggregation + +== Signature + +[source] +---- +apoc.agg.multiStats(value :: NODE | RELATIONSHIP, keys :: LIST OF STRING) :: (MAP?) +---- + +== Input parameters +[.procedures, opts=header] +|=== +| Name | Type | Default +|value|NODE \| RELATIONSHIP|null +|=== + + +[[usage-apoc.data.email]] +== Usage Examples + +Given this dataset: +[source,cypher] +---- +CREATE (:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "aaa", another: 548}), + (:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "eee", another: 549}), + (:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "eee", another: 549}), + (:Person { louvain: 597, neo4jImportId: "18349391", wcc: 48, lpa: 598, name: "eee", another: 549}), + (:Person { louvain: 597, neo4jImportId: "18349392", wcc: 47, lpa: 596, name: "iii", another: 549}), + (:Person { louvain: 597, neo4jImportId: "18349393", wcc: 47, lpa: 596, name: "iii", another: 549}), + (:Person { louvain: 597, neo4jImportId: "18349394", wcc: 47, lpa: 596, name: "iii", another: 549}), + (:Person { louvain: 597, neo4jImportId: "18349393", wcc: 47, lpa: 596, name: "iii", another: 10}), + (:Person { louvain: 597, neo4jImportId: "18349394", wcc: 47, lpa: 596, name: "iii", another: 10}) +---- + + +We can create an optimized multiple aggregation based on the property key, +similar to this one: +[source,cypher] +---- +MATCH (p:Person) +WITH p +CALL { + WITH p + MATCH (n:Person {louvain: p.louvain}) + RETURN sum(p.louvain) AS sumLouvain, avg(p.louvain) AS avgLouvain, count(p.louvain) AS countLouvain +} +CALL { + WITH p + MATCH (n:Person {wcc: p.wcc}) + RETURN sum(p.wcc) AS sumWcc, avg(p.wcc) AS avgWcc, count(p.wcc) AS countWcc +} +CALL { + WITH p + MATCH (n:Person {another: p.another}) + RETURN sum(p.another) AS sumAnother, avg(p.another) AS avgAnother, count(p.another) AS countAnother +} +CALL { + WITH p + MATCH (lpa:Person {lpa: p.lpa}) + RETURN sum(p.lpa) AS sumLpa, avg(p.lpa) AS avgLpa, count(p.lpa) AS countLpa +} +RETURN p.name, + sumLouvain, avgLouvain, countLouvain, + sumWcc, avgWcc, countWcc, + sumAnother, avgAnother, countAnother, + sumLpa, avgLpa, countLpa +---- + + +executing the following query: +[source,cypher] +---- +MATCH (p:Person) +RETURN apoc.agg.multiStats(p, ["lpa","wcc","louvain", "another"]) as output +---- + + +.Results +[opts="header"] +|=== +| output +a| +[source,json] +---- +{ + "louvain" :{"596" :{"avg" :596.0, "count" :3, "sum" :1788}, "597" :{"avg" :597.0, "count" :6, "sum" :3582}}, + "wcc" :{"47" :{"avg" :47.0, "count" :5, "sum" :235}, "48" :{"avg" :48.0, "count" :4, "sum" :192}}, + "another" :{"548" :{"avg" :548.0, "count" :1, "sum" :548}, "549" :{"avg" :549.0, "count" :6, "sum" :3294}, "10" :{"avg" :10.0, "count" :2, "sum" :20}}, + "lpa" :{"596" :{"avg" :596.0, "count" :5, "sum" :2980}, "598" :{"avg" :598.0, "count" :4, "sum" :2392}} +} +---- +|=== + +which can be used, for example, to return a result similar to the Cypher one in this way: + +[source,cypher] +---- +MATCH (p:Person) +WITH apoc.agg.multiStats(p, ["lpa","wcc","louvain", "another"]) as data +MATCH (p:Person) +RETURN p.name, + data.wcc[toString(p.wcc)].avg AS avgWcc, + data.louvain[toString(p.louvain)].avg AS avgLouvain, + data.lpa[toString(p.lpa)].avg AS avgLpa +---- + + +.Results +[opts="header"] +|=== +| avgWcc | avgLouvain | avgLpa +| 48.0 | 596.0 | 598.0 +| 48.0 | 596.0 | 598.0 +| 48.0 | 596.0 | 598.0 +| 47.0 | 597.0 | 596.0 +| 47.0 | 597.0 | 596.0 +| 47.0 | 597.0 | 596.0 +| 47.0 | 597.0 | 596.0 +| 47.0 | 597.0 | 596.0 +|=== + diff --git a/docs/asciidoc/modules/ROOT/pages/overview/apoc.agg/index.adoc b/docs/asciidoc/modules/ROOT/pages/overview/apoc.agg/index.adoc index 9cec8f26cb..26d934a40e 100644 --- a/docs/asciidoc/modules/ROOT/pages/overview/apoc.agg/index.adoc +++ b/docs/asciidoc/modules/ROOT/pages/overview/apoc.agg/index.adoc @@ -14,5 +14,11 @@ Returns index of the `element` that match the given `value` Returns index of the `element` that match the given `predicate` |label:procedure[] + + +|xref::overview/apoc.agg/apoc.agg.multiStats.adoc[apoc.agg.multiStats icon:book[]] + +apoc.agg.multiStats(nodeOrRel, keys) - Return a multi-dimensional aggregation +|label:function[] |=== diff --git a/docs/asciidoc/modules/ROOT/partials/generated-documentation/documentation.adoc b/docs/asciidoc/modules/ROOT/partials/generated-documentation/documentation.adoc index dbacdf391e..d25311d2be 100644 --- a/docs/asciidoc/modules/ROOT/partials/generated-documentation/documentation.adoc +++ b/docs/asciidoc/modules/ROOT/partials/generated-documentation/documentation.adoc @@ -19,6 +19,19 @@ Returns index of the `element` that match the given `predicate` |=== +[discrete] +== xref::overview/apoc.agg/index.adoc[] + +[.procedures, opts=header, cols='5a,1a'] +|=== +| Qualified Name | Type +|xref::overview/apoc.agg/apoc.agg.multiStats.adoc[apoc.agg.multiStats icon:book[]] + +apoc.agg.multiStats(nodeOrRel, keys) - Return a multi-dimensional aggregation +|label:procedure[] +|=== + + [discrete] == xref::overview/apoc.bolt/index.adoc[] diff --git a/docs/asciidoc/modules/ROOT/partials/generated-documentation/nav.adoc b/docs/asciidoc/modules/ROOT/partials/generated-documentation/nav.adoc index 02f07fec92..ae165734cb 100644 --- a/docs/asciidoc/modules/ROOT/partials/generated-documentation/nav.adoc +++ b/docs/asciidoc/modules/ROOT/partials/generated-documentation/nav.adoc @@ -5,6 +5,7 @@ This file is generated by DocsTest, so don't change it! ** xref::overview/apoc.agg/index.adoc[] *** xref::overview/apoc.agg/apoc.agg.row.adoc[] *** xref::overview/apoc.agg/apoc.agg.position.adoc[] +*** xref::overview/apoc.agg/apoc.agg.multiStats.adoc[] ** xref::overview/apoc.bolt/index.adoc[] *** xref::overview/apoc.bolt/apoc.bolt.execute.adoc[] *** xref::overview/apoc.bolt/apoc.bolt.load.adoc[] diff --git a/extended/src/main/java/apoc/agg/MultiStats.java b/extended/src/main/java/apoc/agg/MultiStats.java new file mode 100644 index 0000000000..49ee004405 --- /dev/null +++ b/extended/src/main/java/apoc/agg/MultiStats.java @@ -0,0 +1,80 @@ +package apoc.agg; + +import apoc.Extended; +import org.neo4j.graphdb.Entity; +import org.neo4j.procedure.Description; +import org.neo4j.procedure.Name; +import org.neo4j.procedure.UserAggregationFunction; +import org.neo4j.procedure.UserAggregationResult; +import org.neo4j.procedure.UserAggregationUpdate; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +@Extended +public class MultiStats { + + @UserAggregationFunction("apoc.agg.multiStats") + @Description("Return a multi-dimensional aggregation") + public MultiStatsFunction multiStats() { + return new MultiStatsFunction(); + } + + public static class MultiStatsFunction { + + private final Map>> result = new HashMap<>(); + + @UserAggregationUpdate + public void aggregate( + @Name("value") Object value, + @Name(value = "keys") List keys) { + Entity entity = (Entity) value; + + // for each prop + keys.forEach(key -> { + if (entity.hasProperty(key)) { + Object property = entity.getProperty(key); + + result.compute(key, (ignored, v) -> { + Map> map = Objects.requireNonNullElseGet(v, HashMap::new); + + map.compute(property.toString(), (propKey, propVal) -> { + + Map propMap = Objects.requireNonNullElseGet(propVal, HashMap::new); + + Number count = propMap.compute("count", + ((subKey, subVal) -> subVal == null ? 1 : subVal.longValue() + 1) ); + + if (property instanceof Number numberProp) { + Number sum = propMap.compute("sum", + ((subKey, subVal) -> { + if (subVal == null) return numberProp; + if (subVal instanceof Long long1 && numberProp instanceof Long long2) { + return long1 + long2; + } + return subVal.doubleValue() + numberProp.doubleValue(); + })); + + propMap.compute("avg", + ((subKey, subVal) -> subVal == null ? numberProp.doubleValue() : sum.doubleValue() / count.doubleValue() )); + } + + return propMap; + }); + + return map; + }); + } + }); + } + + + @UserAggregationResult + // apoc.agg.multiStats([key1,key2,key3]) -> Map> + public Map>> result() { + return result; + } + } +} diff --git a/extended/src/main/resources/extended.txt b/extended/src/main/resources/extended.txt index d344975395..3d2353aee3 100644 --- a/extended/src/main/resources/extended.txt +++ b/extended/src/main/resources/extended.txt @@ -1,5 +1,6 @@ apoc.agg.position apoc.agg.row +apoc.agg.multiStats apoc.algo.aStarWithPoint apoc.bolt.execute apoc.bolt.load diff --git a/extended/src/test/java/apoc/agg/MultiStatsTest.java b/extended/src/test/java/apoc/agg/MultiStatsTest.java new file mode 100644 index 0000000000..c9e167a750 --- /dev/null +++ b/extended/src/test/java/apoc/agg/MultiStatsTest.java @@ -0,0 +1,100 @@ +package apoc.agg; + +import apoc.map.Maps; +import apoc.util.TestUtil; +import apoc.util.collection.Iterators; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.neo4j.test.rule.DbmsRule; +import org.neo4j.test.rule.ImpermanentDbmsRule; + +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class MultiStatsTest { + + @ClassRule + public static DbmsRule db = new ImpermanentDbmsRule(); + + @BeforeClass + public static void setUp() { + TestUtil.registerProcedure(db, Maps.class, MultiStats.class); + + db.executeTransactionally(""" + CREATE (:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "aaa", another: 548}), + (:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "eee", another: 549}), + (:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "eee", another: 549}), + (:Person { louvain: 597, neo4jImportId: "18349391", wcc: 48, lpa: 598, name: "eee", another: 549}), + (:Person { louvain: 597, neo4jImportId: "18349392", wcc: 47, lpa: 596, name: "iii", another: 549}), + (:Person { louvain: 597, neo4jImportId: "18349393", wcc: 47, lpa: 596, name: "iii", another: 549}), + (:Person { louvain: 597, neo4jImportId: "18349394", wcc: 47, lpa: 596, name: "iii", another: 549}), + (:Person { louvain: 597, neo4jImportId: "18349393", wcc: 47, lpa: 596, name: "iii", another: 10}), + (:Person { louvain: 597, neo4jImportId: "18349394", wcc: 47, lpa: 596, name: "iii", another: 10})"""); + } + + @AfterClass + public static void tearDown() { + db.shutdown(); + } + + // similar to https://community.neo4j.com/t/listing-the-community-size-of-different-community-detection-algorithms-already-calculated/42895 + @Test + public void testMultiStatsComparedWithCypherMultiAggregation() { + List multiAggregationResult = db.executeTransactionally(""" + MATCH (p:Person) + WITH p + CALL { + WITH p + MATCH (n:Person {louvain: p.louvain}) + RETURN sum(p.louvain) AS sumLouvain, avg(p.louvain) AS avgLouvain, count(p.louvain) AS countLouvain + } + CALL { + WITH p + MATCH (n:Person {wcc: p.wcc}) + RETURN sum(p.wcc) AS sumWcc, avg(p.wcc) AS avgWcc, count(p.wcc) AS countWcc + } + CALL { + WITH p + MATCH (n:Person {another: p.another}) + RETURN sum(p.another) AS sumAnother, avg(p.another) AS avgAnother, count(p.another) AS countAnother + } + CALL { + WITH p + MATCH (lpa:Person {lpa: p.lpa}) + RETURN sum(p.lpa) AS sumLpa, avg(p.lpa) AS avgLpa, count(p.lpa) AS countLpa + } + RETURN p.name, + sumLouvain, avgLouvain, countLouvain, + sumWcc, avgWcc, countWcc, + sumAnother, avgAnother, countAnother, + sumLpa, avgLpa, countLpa""", Map.of(), + Iterators::asList); + + List multiStatsResult = db.executeTransactionally(""" + match (p:Person) + with apoc.agg.multiStats(p, ["lpa","wcc","louvain", "another"]) as data + match (p:Person) + return p.name, + data.wcc[toString(p.wcc)].avg AS avgWcc, + data.louvain[toString(p.louvain)].avg AS avgLouvain, + data.lpa[toString(p.lpa)].avg AS avgLpa, + data.another[toString(p.another)].avg AS avgAnother, + data.another[toString(p.another)].count AS countAnother, + data.wcc[toString(p.wcc)].count AS countWcc, + data.louvain[toString(p.louvain)].count AS countLouvain, + data.lpa[toString(p.lpa)].count AS countLpa, + data.another[toString(p.another)].sum AS sumAnother, + data.wcc[toString(p.wcc)].sum AS sumWcc, + data.louvain[toString(p.louvain)].sum AS sumLouvain, + data.lpa[toString(p.lpa)].sum AS sumLpa + """, Map.of(), Iterators::asList); + + assertEquals(multiAggregationResult, multiStatsResult); + + } + +}