-
Notifications
You must be signed in to change notification settings - Fork 493
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixes #2182: multi-dimensional aggregation + aggregation functions fo…
…r community sizes (#4049) * Fixes #2182: multi-dimensional aggregation + aggregation functions for community sizes * removed unused imports * updated extended.txt
- Loading branch information
Showing
7 changed files
with
329 additions
and
0 deletions.
There are no files selected for viewing
128 changes: 128 additions & 0 deletions
128
docs/asciidoc/modules/ROOT/pages/overview/apoc.agg/apoc.agg.multiStats.adoc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
|
||
= apoc.agg.multiStats | ||
:description: This section contains reference documentation for the apoc.agg.multiStats function. | ||
|
||
label:function[] label:apoc-extended[] | ||
|
||
[.emphasis] | ||
apoc.agg.multiStats(nodeOrRel, keys) - Return a multi-dimensional aggregation | ||
|
||
== Signature | ||
|
||
[source] | ||
---- | ||
apoc.agg.multiStats(value :: NODE | RELATIONSHIP, keys :: LIST OF STRING) :: (MAP?) | ||
---- | ||
|
||
== Input parameters | ||
[.procedures, opts=header] | ||
|=== | ||
| Name | Type | Default | ||
|value|NODE \| RELATIONSHIP|null | ||
|=== | ||
|
||
|
||
[[usage-apoc.data.email]] | ||
== Usage Examples | ||
|
||
Given this dataset: | ||
[source,cypher] | ||
---- | ||
CREATE (:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "aaa", another: 548}), | ||
(:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "eee", another: 549}), | ||
(:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "eee", another: 549}), | ||
(:Person { louvain: 597, neo4jImportId: "18349391", wcc: 48, lpa: 598, name: "eee", another: 549}), | ||
(:Person { louvain: 597, neo4jImportId: "18349392", wcc: 47, lpa: 596, name: "iii", another: 549}), | ||
(:Person { louvain: 597, neo4jImportId: "18349393", wcc: 47, lpa: 596, name: "iii", another: 549}), | ||
(:Person { louvain: 597, neo4jImportId: "18349394", wcc: 47, lpa: 596, name: "iii", another: 549}), | ||
(:Person { louvain: 597, neo4jImportId: "18349393", wcc: 47, lpa: 596, name: "iii", another: 10}), | ||
(:Person { louvain: 597, neo4jImportId: "18349394", wcc: 47, lpa: 596, name: "iii", another: 10}) | ||
---- | ||
|
||
|
||
We can create an optimized multiple aggregation based on the property key, | ||
similar to this one: | ||
[source,cypher] | ||
---- | ||
MATCH (p:Person) | ||
WITH p | ||
CALL { | ||
WITH p | ||
MATCH (n:Person {louvain: p.louvain}) | ||
RETURN sum(p.louvain) AS sumLouvain, avg(p.louvain) AS avgLouvain, count(p.louvain) AS countLouvain | ||
} | ||
CALL { | ||
WITH p | ||
MATCH (n:Person {wcc: p.wcc}) | ||
RETURN sum(p.wcc) AS sumWcc, avg(p.wcc) AS avgWcc, count(p.wcc) AS countWcc | ||
} | ||
CALL { | ||
WITH p | ||
MATCH (n:Person {another: p.another}) | ||
RETURN sum(p.another) AS sumAnother, avg(p.another) AS avgAnother, count(p.another) AS countAnother | ||
} | ||
CALL { | ||
WITH p | ||
MATCH (lpa:Person {lpa: p.lpa}) | ||
RETURN sum(p.lpa) AS sumLpa, avg(p.lpa) AS avgLpa, count(p.lpa) AS countLpa | ||
} | ||
RETURN p.name, | ||
sumLouvain, avgLouvain, countLouvain, | ||
sumWcc, avgWcc, countWcc, | ||
sumAnother, avgAnother, countAnother, | ||
sumLpa, avgLpa, countLpa | ||
---- | ||
|
||
|
||
executing the following query: | ||
[source,cypher] | ||
---- | ||
MATCH (p:Person) | ||
RETURN apoc.agg.multiStats(p, ["lpa","wcc","louvain", "another"]) as output | ||
---- | ||
|
||
|
||
.Results | ||
[opts="header"] | ||
|=== | ||
| output | ||
a| | ||
[source,json] | ||
---- | ||
{ | ||
"louvain" :{"596" :{"avg" :596.0, "count" :3, "sum" :1788}, "597" :{"avg" :597.0, "count" :6, "sum" :3582}}, | ||
"wcc" :{"47" :{"avg" :47.0, "count" :5, "sum" :235}, "48" :{"avg" :48.0, "count" :4, "sum" :192}}, | ||
"another" :{"548" :{"avg" :548.0, "count" :1, "sum" :548}, "549" :{"avg" :549.0, "count" :6, "sum" :3294}, "10" :{"avg" :10.0, "count" :2, "sum" :20}}, | ||
"lpa" :{"596" :{"avg" :596.0, "count" :5, "sum" :2980}, "598" :{"avg" :598.0, "count" :4, "sum" :2392}} | ||
} | ||
---- | ||
|=== | ||
|
||
which can be used, for example, to return a result similar to the Cypher one in this way: | ||
|
||
[source,cypher] | ||
---- | ||
MATCH (p:Person) | ||
WITH apoc.agg.multiStats(p, ["lpa","wcc","louvain", "another"]) as data | ||
MATCH (p:Person) | ||
RETURN p.name, | ||
data.wcc[toString(p.wcc)].avg AS avgWcc, | ||
data.louvain[toString(p.louvain)].avg AS avgLouvain, | ||
data.lpa[toString(p.lpa)].avg AS avgLpa | ||
---- | ||
|
||
|
||
.Results | ||
[opts="header"] | ||
|=== | ||
| avgWcc | avgLouvain | avgLpa | ||
| 48.0 | 596.0 | 598.0 | ||
| 48.0 | 596.0 | 598.0 | ||
| 48.0 | 596.0 | 598.0 | ||
| 47.0 | 597.0 | 596.0 | ||
| 47.0 | 597.0 | 596.0 | ||
| 47.0 | 597.0 | 596.0 | ||
| 47.0 | 597.0 | 596.0 | ||
| 47.0 | 597.0 | 596.0 | ||
|=== | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package apoc.agg; | ||
|
||
import apoc.Extended; | ||
import org.neo4j.graphdb.Entity; | ||
import org.neo4j.procedure.Description; | ||
import org.neo4j.procedure.Name; | ||
import org.neo4j.procedure.UserAggregationFunction; | ||
import org.neo4j.procedure.UserAggregationResult; | ||
import org.neo4j.procedure.UserAggregationUpdate; | ||
|
||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
|
||
@Extended | ||
public class MultiStats { | ||
|
||
@UserAggregationFunction("apoc.agg.multiStats") | ||
@Description("Return a multi-dimensional aggregation") | ||
public MultiStatsFunction multiStats() { | ||
return new MultiStatsFunction(); | ||
} | ||
|
||
public static class MultiStatsFunction { | ||
|
||
private final Map<String, Map<String, Map<String, Number>>> result = new HashMap<>(); | ||
|
||
@UserAggregationUpdate | ||
public void aggregate( | ||
@Name("value") Object value, | ||
@Name(value = "keys") List<String> keys) { | ||
Entity entity = (Entity) value; | ||
|
||
// for each prop | ||
keys.forEach(key -> { | ||
if (entity.hasProperty(key)) { | ||
Object property = entity.getProperty(key); | ||
|
||
result.compute(key, (ignored, v) -> { | ||
Map<String, Map<String, Number>> map = Objects.requireNonNullElseGet(v, HashMap::new); | ||
|
||
map.compute(property.toString(), (propKey, propVal) -> { | ||
|
||
Map<String, Number> propMap = Objects.requireNonNullElseGet(propVal, HashMap::new); | ||
|
||
Number count = propMap.compute("count", | ||
((subKey, subVal) -> subVal == null ? 1 : subVal.longValue() + 1) ); | ||
|
||
if (property instanceof Number numberProp) { | ||
Number sum = propMap.compute("sum", | ||
((subKey, subVal) -> { | ||
if (subVal == null) return numberProp; | ||
if (subVal instanceof Long long1 && numberProp instanceof Long long2) { | ||
return long1 + long2; | ||
} | ||
return subVal.doubleValue() + numberProp.doubleValue(); | ||
})); | ||
|
||
propMap.compute("avg", | ||
((subKey, subVal) -> subVal == null ? numberProp.doubleValue() : sum.doubleValue() / count.doubleValue() )); | ||
} | ||
|
||
return propMap; | ||
}); | ||
|
||
return map; | ||
}); | ||
} | ||
}); | ||
} | ||
|
||
|
||
@UserAggregationResult | ||
// apoc.agg.multiStats([key1,key2,key3]) -> Map<Key,Map<agg="sum,count,avg", number>> | ||
public Map<String, Map<String, Map<String, Number>>> result() { | ||
return result; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
apoc.agg.position | ||
apoc.agg.row | ||
apoc.agg.multiStats | ||
apoc.algo.aStarWithPoint | ||
apoc.bolt.execute | ||
apoc.bolt.load | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
package apoc.agg; | ||
|
||
import apoc.map.Maps; | ||
import apoc.util.TestUtil; | ||
import apoc.util.collection.Iterators; | ||
import org.junit.AfterClass; | ||
import org.junit.BeforeClass; | ||
import org.junit.ClassRule; | ||
import org.junit.Test; | ||
import org.neo4j.test.rule.DbmsRule; | ||
import org.neo4j.test.rule.ImpermanentDbmsRule; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
public class MultiStatsTest { | ||
|
||
@ClassRule | ||
public static DbmsRule db = new ImpermanentDbmsRule(); | ||
|
||
@BeforeClass | ||
public static void setUp() { | ||
TestUtil.registerProcedure(db, Maps.class, MultiStats.class); | ||
|
||
db.executeTransactionally(""" | ||
CREATE (:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "aaa", another: 548}), | ||
(:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "eee", another: 549}), | ||
(:Person { louvain: 596, neo4jImportId: "18349390", wcc: 48, lpa: 598, name: "eee", another: 549}), | ||
(:Person { louvain: 597, neo4jImportId: "18349391", wcc: 48, lpa: 598, name: "eee", another: 549}), | ||
(:Person { louvain: 597, neo4jImportId: "18349392", wcc: 47, lpa: 596, name: "iii", another: 549}), | ||
(:Person { louvain: 597, neo4jImportId: "18349393", wcc: 47, lpa: 596, name: "iii", another: 549}), | ||
(:Person { louvain: 597, neo4jImportId: "18349394", wcc: 47, lpa: 596, name: "iii", another: 549}), | ||
(:Person { louvain: 597, neo4jImportId: "18349393", wcc: 47, lpa: 596, name: "iii", another: 10}), | ||
(:Person { louvain: 597, neo4jImportId: "18349394", wcc: 47, lpa: 596, name: "iii", another: 10})"""); | ||
} | ||
|
||
@AfterClass | ||
public static void tearDown() { | ||
db.shutdown(); | ||
} | ||
|
||
// similar to https://community.neo4j.com/t/listing-the-community-size-of-different-community-detection-algorithms-already-calculated/42895 | ||
@Test | ||
public void testMultiStatsComparedWithCypherMultiAggregation() { | ||
List multiAggregationResult = db.executeTransactionally(""" | ||
MATCH (p:Person) | ||
WITH p | ||
CALL { | ||
WITH p | ||
MATCH (n:Person {louvain: p.louvain}) | ||
RETURN sum(p.louvain) AS sumLouvain, avg(p.louvain) AS avgLouvain, count(p.louvain) AS countLouvain | ||
} | ||
CALL { | ||
WITH p | ||
MATCH (n:Person {wcc: p.wcc}) | ||
RETURN sum(p.wcc) AS sumWcc, avg(p.wcc) AS avgWcc, count(p.wcc) AS countWcc | ||
} | ||
CALL { | ||
WITH p | ||
MATCH (n:Person {another: p.another}) | ||
RETURN sum(p.another) AS sumAnother, avg(p.another) AS avgAnother, count(p.another) AS countAnother | ||
} | ||
CALL { | ||
WITH p | ||
MATCH (lpa:Person {lpa: p.lpa}) | ||
RETURN sum(p.lpa) AS sumLpa, avg(p.lpa) AS avgLpa, count(p.lpa) AS countLpa | ||
} | ||
RETURN p.name, | ||
sumLouvain, avgLouvain, countLouvain, | ||
sumWcc, avgWcc, countWcc, | ||
sumAnother, avgAnother, countAnother, | ||
sumLpa, avgLpa, countLpa""", Map.of(), | ||
Iterators::asList); | ||
|
||
List multiStatsResult = db.executeTransactionally(""" | ||
match (p:Person) | ||
with apoc.agg.multiStats(p, ["lpa","wcc","louvain", "another"]) as data | ||
match (p:Person) | ||
return p.name, | ||
data.wcc[toString(p.wcc)].avg AS avgWcc, | ||
data.louvain[toString(p.louvain)].avg AS avgLouvain, | ||
data.lpa[toString(p.lpa)].avg AS avgLpa, | ||
data.another[toString(p.another)].avg AS avgAnother, | ||
data.another[toString(p.another)].count AS countAnother, | ||
data.wcc[toString(p.wcc)].count AS countWcc, | ||
data.louvain[toString(p.louvain)].count AS countLouvain, | ||
data.lpa[toString(p.lpa)].count AS countLpa, | ||
data.another[toString(p.another)].sum AS sumAnother, | ||
data.wcc[toString(p.wcc)].sum AS sumWcc, | ||
data.louvain[toString(p.louvain)].sum AS sumLouvain, | ||
data.lpa[toString(p.lpa)].sum AS sumLpa | ||
""", Map.of(), Iterators::asList); | ||
|
||
assertEquals(multiAggregationResult, multiStatsResult); | ||
|
||
} | ||
|
||
} |