Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More robust multiexpression filtering #206

Merged
merged 22 commits into from
May 5, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import java.util.function.Predicate;
import java.util.function.UnaryOperator;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* A list of {@link Expression Expressions} to evaluate on input elements.
Expand All @@ -33,6 +35,7 @@
*/
public record MultiExpression<T> (List<Entry<T>> expressions) {

private static final Logger LOGGER = LoggerFactory.getLogger(MultiExpression.class);
private static final Comparator<WithId> BY_ID = Comparator.comparingInt(WithId::id);

public static <T> MultiExpression<T> of(List<Entry<T>> expressions) {
Expand Down Expand Up @@ -62,34 +65,36 @@ private static <T> void visitExpressions(SourceFeature input, List<Match<T>> res
}
}

/** Calls {@code acceptKey} for every tag that could possibly cause {@code exp} to match an input element. */
private static void getRelevantKeys(Expression exp, Consumer<String> acceptKey) {
if (exp instanceof Expression.And and) {
and.children().forEach(child -> getRelevantKeys(child, acceptKey));
} else if (exp instanceof Expression.Or or) {
or.children().forEach(child -> getRelevantKeys(child, acceptKey));
} else if (exp instanceof Expression.Not not) {
getRelevantMissingKeys(not.child(), acceptKey);
} else if (exp instanceof Expression.MatchField field) {
acceptKey.accept(field.field());
} else if (exp instanceof Expression.MatchAny any) {
acceptKey.accept(any.field());
private static boolean mustAlwaysEvaluate(Expression expression) {
if (expression instanceof Expression.Or or) {
return or.children().stream().anyMatch(MultiExpression::mustAlwaysEvaluate);
} else if (expression instanceof Expression.And and) {
return and.children().stream().allMatch(MultiExpression::mustAlwaysEvaluate);
} else if (expression instanceof Expression.Not not) {
return !mustAlwaysEvaluate(not.child());
} else if (expression instanceof Expression.MatchAny any && any.matchWhenMissing()) {
return true;
} else {
return TRUE.equals(expression);
}
}

/**
* Calls {@code acceptKey} for every tag that, when missing, could possibly cause {@code exp} to match an input
* element.
*/
private static void getRelevantMissingKeys(Expression exp, Consumer<String> acceptKey) {
if (exp instanceof Expression.And and) {
and.children().forEach(child -> getRelevantMissingKeys(child, acceptKey));
} else if (exp instanceof Expression.Or or) {
or.children().forEach(child -> getRelevantMissingKeys(child, acceptKey));
} else if (exp instanceof Expression.Not not) {
getRelevantKeys(not.child(), acceptKey);
} else if (exp instanceof Expression.MatchAny any && any.matchWhenMissing()) {
acceptKey.accept(any.field());
/** Calls {@code acceptKey} for every tag that could possibly cause {@code exp} to match an input element. */
private static void getRelevantKeys(Expression exp, Consumer<String> acceptKey) {
if (!mustAlwaysEvaluate(exp)) {
if (exp instanceof Expression.And and) {
and.children().forEach(child -> getRelevantKeys(child, acceptKey));
} else if (exp instanceof Expression.Or or) {
or.children().forEach(child -> getRelevantKeys(child, acceptKey));
} else if (exp instanceof Expression.Not not) {
if (not.child()instanceof Expression.MatchAny any && any.matchWhenMissing()) {
acceptKey.accept(any.field());
}
} else if (exp instanceof Expression.MatchField field) {
acceptKey.accept(field.field());
} else if (exp instanceof Expression.MatchAny any && !any.matchWhenMissing()) {
acceptKey.accept(any.field());
}
}
}

Expand Down Expand Up @@ -220,38 +225,38 @@ private static class KeyIndex<T> implements Index<T> {
private final Map<String, List<EntryWithId<T>>> keyToExpressionsMap;
// same as keyToExpressionsMap but as a list (optimized for iteration when # source feature keys > # tags we care about)
private final List<Map.Entry<String, List<EntryWithId<T>>>> keyToExpressionsList;
// expressions that should match when certain tags are *not* present on an input element
private final List<Map.Entry<String, List<EntryWithId<T>>>> missingKeyToExpressionList;
// expressions that match a constant true input element
private final List<EntryWithId<T>> constantTrueExpressionList;
// expressions that must always be evaluated on each input element
private final List<EntryWithId<T>> alwaysEvaluateExpressionList;

private KeyIndex(MultiExpression<T> expressions) {
int id = 1;
// build the indexes
Map<String, Set<EntryWithId<T>>> keyToExpressions = new HashMap<>();
Map<String, Set<EntryWithId<T>>> missingKeyToExpressions = new HashMap<>();
List<EntryWithId<T>> constants = new ArrayList<>();
List<EntryWithId<T>> always = new ArrayList<>();

for (var entry : expressions.expressions) {
Expression expression = entry.expression;
EntryWithId<T> expressionValue = new EntryWithId<>(entry.result, expression, id++);
getRelevantKeys(expression,
key -> keyToExpressions.computeIfAbsent(key, k -> new HashSet<>()).add(expressionValue));
getRelevantMissingKeys(expression,
key -> missingKeyToExpressions.computeIfAbsent(key, k -> new HashSet<>()).add(expressionValue));
if (expression.equals(TRUE)) {
constants.add(expressionValue);
if (mustAlwaysEvaluate(expression)) {
always.add(expressionValue);
} else {
getRelevantKeys(expression,
key -> keyToExpressions.computeIfAbsent(key, k -> new HashSet<>()).add(expressionValue));
}
}
// create immutable copies for fast iteration at matching time
constantTrueExpressionList = List.copyOf(constants);
if (!always.isEmpty()) {
LOGGER.warn("{} expressions will be evaluated for every element:", always.size());
for (var expression : always) {
LOGGER.warn(" {}", expression.expression.generateJavaCode());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix should be to surround the warn call with if(LOGGER.isWarnEnabled()) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a few cases like this that I think I want to tackle in one pass (and leave highlighted so I don't miss one). I'd prefer to go the route of a logger framework that takes a lambda that only gets invoked if that level is needed. There's also some other logging issues to clear up like core should just depend on slf4j and the runnable thing (dist module or a project that depends on this) should bind slf4j to whatever implementation it wants.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also slf4j v2 api will support lambdas. I'd like to use that but it's been in alpha stage since 2019...? https://mvnrepository.com/artifact/org.slf4j/slf4j-api

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a logging framework, how bad could alpha be? 🤔

}
}
alwaysEvaluateExpressionList = List.copyOf(always);
keyToExpressionsMap = keyToExpressions.entrySet().stream().collect(Collectors.toUnmodifiableMap(
Map.Entry::getKey,
entry -> entry.getValue().stream().toList()
));
keyToExpressionsList = List.copyOf(keyToExpressionsMap.entrySet());
missingKeyToExpressionList = missingKeyToExpressions.entrySet().stream()
.map(entry -> Map.entry(entry.getKey(), entry.getValue().stream().toList())).toList();
numExpressions = id;
}

Expand All @@ -260,14 +265,7 @@ private KeyIndex(MultiExpression<T> expressions) {
public List<Match<T>> getMatchesWithTriggers(SourceFeature input) {
List<Match<T>> result = new ArrayList<>();
boolean[] visited = new boolean[numExpressions];
for (var entry : constantTrueExpressionList) {
result.add(new Match<>(entry.result, List.of(), entry.id));
}
for (var entry : missingKeyToExpressionList) {
if (!input.hasTag(entry.getKey())) {
visitExpressions(input, result, visited, entry.getValue());
}
}
visitExpressions(input, result, visited, alwaysEvaluateExpressionList);
Map<String, Object> tags = input.tags();
if (tags.size() < keyToExpressionsMap.size()) {
for (String inputKey : tags.keySet()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,35 @@ void testAnd() {
assertSameElements(List.of(), index.getMatches(featureWithTags()));
}

@Test
void testAndConstant() {
var index = MultiExpression.of(List.of(
entry("a", and(
Expression.TRUE,
matchAny("key1", "val1")
))
)).index();
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "val1", "key2", "val2")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "no", "key2", "val2")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "val1", "key2", "no")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "val1")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key2", "val2")));
assertSameElements(List.of(), index.getMatches(featureWithTags()));

index = MultiExpression.of(List.of(
entry("a", and(
Expression.FALSE,
matchAny("key1", "val1")
))
)).index();
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "val1", "key2", "val2")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "no", "key2", "val2")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "val1", "key2", "no")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "val1")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key2", "val2")));
assertSameElements(List.of(), index.getMatches(featureWithTags()));
}

@Test
void testOr() {
var index = MultiExpression.of(List.of(
Expand All @@ -281,6 +310,35 @@ void testOr() {
assertSameElements(List.of(), index.getMatches(featureWithTags()));
}

@Test
void testOrConstant() {
var index = MultiExpression.of(List.of(
entry("a", or(
Expression.TRUE,
matchAny("key1", "val1")
))
)).index();
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "val1", "key2", "val2")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "no", "key2", "val2")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "val1", "key2", "no")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "val1")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key2", "val2")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags()));

index = MultiExpression.of(List.of(
entry("a", or(
Expression.FALSE,
matchAny("key1", "val1")
))
)).index();
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "val1", "key2", "val2")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "no", "key2", "val2")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "val1", "key2", "no")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "val1")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key2", "val2")));
assertSameElements(List.of(), index.getMatches(featureWithTags()));
}

@Test
void testNot() {
var index = MultiExpression.of(List.of(
Expand All @@ -298,6 +356,41 @@ void testNot() {
assertSameElements(List.of(), index.getMatches(featureWithTags()));
}

@Test
void testNor() {
var index = MultiExpression.of(List.of(
entry("a", not(or(
matchAny("key1", "val1"),
matchAny("key2", "val2")
)))
)).index();
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "val1", "key2", "val2")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "val1", "key2", "val2", "key3", "val3")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "no", "key2", "val2")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "val1", "key2", "no")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "val1")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key2", "val2")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags()));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "no", "key2", "no")));
}

@Test
void testNand() {
var index = MultiExpression.of(List.of(
entry("a", not(and(
matchAny("key1", "val1"),
matchAny("key2", "val2")
)))
)).index();
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "val1", "key2", "val2")));
assertSameElements(List.of(), index.getMatches(featureWithTags("key1", "val1", "key2", "val2", "key3", "val3")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags()));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "val1")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key2", "val2")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "val1", "key2", "no")));
assertSameElements(List.of("a"), index.getMatches(featureWithTags("key1", "no", "key2", "val2")));
}

@Test
void testXor() {
var index = MultiExpression.of(List.of(
Expand Down