Skip to content

Commit

Permalink
Added new functions to apoc.coll - containsDuplicates(), duplicates()…
Browse files Browse the repository at this point in the history
…, duplicatesWithCount(), and occurrences() (#304)

* Added new functions to apoc.coll - containsDuplicates(), duplicates(), duplicatesWithCount(), and occurrences()

* Undid accidental whitespace edit
  • Loading branch information
InverseFalcon authored and jexp committed Mar 7, 2017
1 parent 2ef003e commit 268979d
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 4 deletions.
4 changes: 4 additions & 0 deletions docs/overview.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,10 @@ Sometimes type information gets lost, these functions help you to coerce an "Any
| apoc.coll.shuffle(coll) | returns the shuffled list
| apoc.coll.randomItem(coll) | returns a random item from the list
| apoc.coll.randomItems(coll, itemCount, allowRepick: false) | returns a list of `itemCount` random items from the list, optionally allowing picked elements to be picked again
| apoc.coll.containsDuplicates(coll) | returns true if a collection contains duplicate elements
| apoc.coll.duplicates(coll) | returns a list of duplicate items in the collection
| apoc.coll.duplicatesWithCount(coll) | returns a list of duplicate items in the collection and their count, keyed by `item` and `count` (e.g., `[{item: xyz, count:2}, {item:zyx, count:5}]`)
| apoc.coll.occurrences(coll) | returns the count of the given item in the collection
|===

=== Lookup Functions
Expand Down
65 changes: 65 additions & 0 deletions src/main/java/apoc/coll/Coll.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package apoc.coll;

import org.neo4j.kernel.impl.util.statistics.IntCounter;
import org.neo4j.procedure.*;
import apoc.result.*;
import org.neo4j.graphdb.GraphDatabaseService;
Expand Down Expand Up @@ -300,4 +301,68 @@ public List<Object> randomItems(@Name("coll") List<Object> coll, @Name("itemCoun

return randomItems;
}
@UserFunction
@Description("apoc.coll.containsDuplicates(coll) - returns true if a collection contains duplicate elements")
public boolean containsDuplicates(@Name("coll") List<Object> coll) {
Set<Object> set = new HashSet<>(coll);
return set.size() < coll.size();
}

@UserFunction
@Description("apoc.coll.duplicates(coll) - returns a list of duplicate items in the collection")
public List<Object> duplicates(@Name("coll") List<Object> coll) {
Set<Object> set = new HashSet<>(coll.size());
Set<Object> duplicates = new LinkedHashSet<>();

for (Object obj : coll) {
if (!set.add(obj)) {
duplicates.add(obj);
}
}

return new ArrayList(duplicates);
}

@UserFunction
@Description("apoc.coll.duplicatesWithCount(coll) - returns a list of duplicate items in the collection and their count, keyed by `item` and `count` (e.g., `[{item: xyz, count:2}, {item:zyx, count:5}]`)")
public List<Map<String, Object>> duplicatesWithCount(@Name("coll") List<Object> coll) {
// mimicking a counted bag
Map<Object, IntCounter> duplicates = new LinkedHashMap<>(coll.size());
List<Map<String, Object>> resultList = new ArrayList<>();

for (Object obj : coll) {
IntCounter counter = duplicates.get(obj);
if (counter == null) {
counter = new IntCounter();
duplicates.put(obj, counter);
}
counter.increment();
}

duplicates.forEach((o, intCounter) -> {
int count = intCounter.value();
if (count > 1) {
Map<String, Object> entry = new LinkedHashMap<>(2);
entry.put("item", o);
entry.put("count", Long.valueOf(count));
resultList.add(entry);
}
});

return resultList;
}

@UserFunction
@Description("apoc.coll.occurrences(coll, item) - returns the count of the given item in the collection")
public long occurrences(@Name("coll") List<Object> coll, @Name("item") Object item) {
long occurrences = 0;

for (Object obj : coll) {
if (item.equals(obj)) {
occurrences++;
}
}

return occurrences;
}
}
56 changes: 52 additions & 4 deletions src/test/java/apoc/coll/CollTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@
import org.neo4j.graphdb.Node;
import org.neo4j.test.TestGraphDatabaseFactory;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;

import static apoc.util.TestUtil.testCall;
import static apoc.util.TestUtil.testResult;
Expand Down Expand Up @@ -345,4 +342,55 @@ public void testRandomItemsLargerThanOriginalAllowingRepick() throws Exception {
assertTrue(original.containsAll(result));
});
}

@Test
public void testContainsDuplicates() throws Exception {
testCall(db,"RETURN apoc.coll.containsDuplicates([1,2,3,9,7,5]) AS value", r -> assertEquals(false, r.get("value")));
testCall(db,"RETURN apoc.coll.containsDuplicates([1,2,1,5,4]) AS value", r -> assertEquals(true, r.get("value")));
}

@Test public void testDuplicates() throws Exception {
testCall(db, "RETURN apoc.coll.duplicates([1,2,1,3,2,5,2,3,1,2]) as value",
(row) -> assertEquals(asList(1L,2L,3L), row.get("value")));
}

@Test public void testDuplicatesWithCount() throws Exception {
testCall(db, "RETURN apoc.coll.duplicatesWithCount([1,2,1,3,2,5,2,3,1,2]) as value",
(row) -> {
Map<Long, Long> expectedMap = new HashMap<>(3);
expectedMap.put(1l, 3l);
expectedMap.put(2l, 4l);
expectedMap.put(3l, 2l);

List<Map<String, Object>> result = (List<Map<String, Object>>) row.get("value");
assertEquals(3, result.size());

Set<Long> keys = new HashSet<>(3);

for (Map<String, Object> map : result) {
Object item = map.get("item");
Long count = (Long) map.get("count");
keys.add((Long) item);
assertTrue(expectedMap.containsKey(item));
assertEquals(expectedMap.get(item), count);
}

assertEquals(expectedMap.keySet(), keys);
});
}

@Test public void testOccurrences() throws Exception {
testCall(db, "RETURN apoc.coll.occurrences([1,2,1,3,2,5,2,3,1,2], 1) as value",
(row) -> assertEquals(3l, row.get("value")));
testCall(db, "RETURN apoc.coll.occurrences([1,2,1,3,2,5,2,3,1,2], 2) as value",
(row) -> assertEquals(4l, row.get("value")));
testCall(db, "RETURN apoc.coll.occurrences([1,2,1,3,2,5,2,3,1,2], 3) as value",
(row) -> assertEquals(2l, row.get("value")));
testCall(db, "RETURN apoc.coll.occurrences([1,2,1,3,2,5,2,3,1,2], 5) as value",
(row) -> assertEquals(1l, row.get("value")));
testCall(db, "RETURN apoc.coll.occurrences([1,2,1,3,2,5,2,3,1,2], -5) as value",
(row) -> assertEquals(0l, row.get("value")));
testCall(db, "RETURN apoc.coll.occurrences([], 5) as value",
(row) -> assertEquals(0l, row.get("value")));
}
}

0 comments on commit 268979d

Please sign in to comment.