planetiler/planetiler-core/src/main/java/com/onthegomap/planetiler/expression/MultiExpression.java

371 wiersze
14 KiB
Java

package com.onthegomap.planetiler.expression;
import static com.onthegomap.planetiler.expression.Expression.FALSE;
import static com.onthegomap.planetiler.expression.Expression.TRUE;
import static com.onthegomap.planetiler.expression.Expression.matchType;
import com.onthegomap.planetiler.reader.WithGeometryType;
import com.onthegomap.planetiler.reader.WithTags;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.UnaryOperator;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A list of {@link Expression Expressions} to evaluate on input elements.
* <p>
* {@link #index()} returns an optimized {@link Index} that evaluates the minimal set of expressions on the keys present
* on the element.
* <p>
* {@link Index#getMatches(WithTags)} )} returns the data value associated with the expressions that match an input
* element.
*
* @param <T> type of data value associated with each expression
*/
public record MultiExpression<T> (List<Entry<T>> expressions) implements Simplifiable<MultiExpression<T>> {
private static final Logger LOGGER = LoggerFactory.getLogger(MultiExpression.class);
private static final Comparator<WithId> BY_ID = Comparator.comparingInt(WithId::id);
public static <T> MultiExpression<T> of(List<Entry<T>> expressions) {
return new MultiExpression<>(expressions);
}
public static <T> Entry<T> entry(T result, Expression expression) {
return new Entry<>(result, expression);
}
/**
* Returns true if {@code expression} only contains "not filter" so we can't limit evaluating this expression to only
* when a particular key is present on the input.
*/
private static boolean mustAlwaysEvaluate(Expression expression) {
if (expression instanceof Expression.Or or) {
return or.children().stream().anyMatch(MultiExpression::mustAlwaysEvaluate);
} else if (expression instanceof Expression.And and) {
return and.children().stream().allMatch(MultiExpression::mustAlwaysEvaluate);
} else if (expression instanceof Expression.Not not) {
return !mustAlwaysEvaluate(not.child());
} else if (expression instanceof Expression.MatchAny any && any.matchWhenMissing()) {
return true;
} else {
return !(expression instanceof Expression.MatchAny) &&
!(expression instanceof Expression.MatchField) &&
!FALSE.equals(expression);
}
}
/** Calls {@code acceptKey} for every tag that could possibly cause {@code exp} to match an input element. */
private static void getRelevantKeys(Expression exp, Consumer<String> acceptKey) {
// if a sub-expression must always be evaluated, then either the whole expression must always be evaluated
// or there is another part of the expression that limits the elements on which it must be evaluated, so we can
// ignore keys from this sub-expression.
if (!mustAlwaysEvaluate(exp)) {
if (exp instanceof Expression.And and) {
and.children().forEach(child -> getRelevantKeys(child, acceptKey));
} else if (exp instanceof Expression.Or or) {
or.children().forEach(child -> getRelevantKeys(child, acceptKey));
} else if (exp instanceof Expression.MatchField field) {
acceptKey.accept(field.field());
} else if (exp instanceof Expression.MatchAny any && !any.matchWhenMissing()) {
acceptKey.accept(any.field());
}
// ignore not case since not(matchAny("field", "")) should track "field" as a relevant key, but that gets
// simplified to matchField("field") so don't need to handle that here
}
}
/** Returns an optimized index for matching {@link #expressions()} against each input element. */
public Index<T> index() {
return index(false);
}
/**
* Same as {@link #index()} but logs a warning when there are degenerate expressions that must be evaluated on every
* input.
*/
public Index<T> indexAndWarn() {
return index(true);
}
private Index<T> index(boolean warn) {
if (expressions.isEmpty()) {
return new EmptyIndex<>();
}
boolean caresAboutGeometryType =
expressions.stream().anyMatch(entry -> entry.expression.contains(exp -> exp instanceof Expression.MatchType));
return caresAboutGeometryType ? new GeometryTypeIndex<>(this, warn) : new KeyIndex<>(simplify(), warn);
}
/** Returns a copy of this multi-expression that replaces every expression using {@code mapper}. */
public MultiExpression<T> map(UnaryOperator<Expression> mapper) {
return new MultiExpression<>(
expressions.stream()
.map(entry -> entry(entry.result, mapper.apply(entry.expression).simplify()))
.filter(entry -> entry.expression != Expression.FALSE)
.toList()
);
}
/**
* Returns a copy of this multi-expression that replaces every sub-expression that matches {@code test} with
* {@code b}.
*/
public MultiExpression<T> replace(Predicate<Expression> test, Expression b) {
return map(e -> e.replace(test, b));
}
/**
* Returns a copy of this multi-expression that replaces every sub-expression equal to {@code a} with {@code b}.
*/
public MultiExpression<T> replace(Expression a, Expression b) {
return map(e -> e.replace(a, b));
}
/** Returns a copy of this multi-expression with each expression simplified. */
@Override
public MultiExpression<T> simplifyOnce() {
return map(Simplifiable::simplify);
}
/** Returns a copy of this multi-expression, filtering-out the entry for each data value matching {@code accept}. */
public MultiExpression<T> filterResults(Predicate<T> accept) {
return new MultiExpression<>(
expressions.stream()
.filter(entry -> accept.test(entry.result))
.toList()
);
}
/** Returns a copy of this multi-expression, replacing the data value with {@code fn}. */
public <U> MultiExpression<U> mapResults(Function<T, U> fn) {
return new MultiExpression<>(
expressions.stream()
.map(entry -> entry(fn.apply(entry.result), entry.expression))
.toList()
);
}
/**
* An optimized index for finding which expressions match an input element.
*
* @param <O> type of data value associated with each expression
*/
public interface Index<O> {
List<Match<O>> getMatchesWithTriggers(WithTags input);
/** Returns all data values associated with expressions that match an input element. */
default List<O> getMatches(WithTags input) {
return getMatchesWithTriggers(input).stream().map(d -> d.match).toList();
}
/**
* Returns the data value associated with the first expression that match an input element, or {@code defaultValue}
* if none match.
*/
default O getOrElse(WithTags input, O defaultValue) {
List<O> matches = getMatches(input);
return matches.isEmpty() ? defaultValue : matches.get(0);
}
/**
* Returns the data value associated with expressions matching a feature with {@code tags}.
*/
default O getOrElse(Map<String, Object> tags, O defaultValue) {
List<O> matches = getMatches(WithTags.from(tags));
return matches.isEmpty() ? defaultValue : matches.get(0);
}
/** Returns true if any expression matches that tags from an input element. */
default boolean matches(WithTags input) {
return !getMatchesWithTriggers(input).isEmpty();
}
default boolean isEmpty() {
return false;
}
}
private interface WithId {
int id();
}
private static class EmptyIndex<T> implements Index<T> {
@Override
public List<Match<T>> getMatchesWithTriggers(WithTags input) {
return List.of();
}
@Override
public boolean isEmpty() {
return true;
}
}
/** Index that limits the search space of expressions based on keys present on an input element. */
private static class KeyIndex<T> implements Index<T> {
private final int numExpressions;
// index from source feature tag key to the expressions that include it so that
// we can limit the number of expressions we need to evaluate for each input,
// improves matching performance by ~5x
private final Map<String, List<EntryWithId<T>>> keyToExpressionsMap;
// same as keyToExpressionsMap but as a list (optimized for iteration when # source feature keys > # tags we care about)
private final List<Map.Entry<String, List<EntryWithId<T>>>> keyToExpressionsList;
// expressions that must always be evaluated on each input element
private final List<EntryWithId<T>> alwaysEvaluateExpressionList;
private KeyIndex(MultiExpression<T> expressions, boolean warn) {
int id = 1;
// build the indexes
Map<String, Set<EntryWithId<T>>> keyToExpressions = new HashMap<>();
List<EntryWithId<T>> always = new ArrayList<>();
for (var entry : expressions.expressions) {
Expression expression = entry.expression;
EntryWithId<T> expressionValue = new EntryWithId<>(entry.result, expression, id++);
if (mustAlwaysEvaluate(expression)) {
always.add(expressionValue);
} else {
getRelevantKeys(expression,
key -> keyToExpressions.computeIfAbsent(key, k -> new HashSet<>()).add(expressionValue));
}
}
// create immutable copies for fast iteration at matching time
if (warn && !always.isEmpty()) {
LOGGER.warn("{} expressions will be evaluated for every element:", always.size());
for (var expression : always) {
LOGGER.warn(" {}: {}", expression.result, expression.expression);
}
}
alwaysEvaluateExpressionList = List.copyOf(always);
keyToExpressionsMap = keyToExpressions.entrySet().stream().collect(Collectors.toUnmodifiableMap(
Map.Entry::getKey,
entry -> entry.getValue().stream().toList()
));
keyToExpressionsList = List.copyOf(keyToExpressionsMap.entrySet());
numExpressions = id;
}
/**
* Evaluates a list of expressions on an input element, storing the matches into {@code result} and using
* {@code visited} to avoid evaluating an expression more than once.
*/
private static <T> void visitExpressions(WithTags input, List<Match<T>> result,
boolean[] visited, List<EntryWithId<T>> expressions) {
if (expressions != null) {
for (EntryWithId<T> expressionValue : expressions) {
if (!visited[expressionValue.id]) {
visited[expressionValue.id] = true;
List<String> matchKeys = new ArrayList<>();
if (expressionValue.expression().evaluate(input, matchKeys)) {
result.add(new Match<>(expressionValue.result, matchKeys, expressionValue.id));
}
}
}
}
}
/** Lookup matches in this index for expressions that match a certain type. */
@Override
public List<Match<T>> getMatchesWithTriggers(WithTags input) {
List<Match<T>> result = new ArrayList<>();
boolean[] visited = new boolean[numExpressions];
visitExpressions(input, result, visited, alwaysEvaluateExpressionList);
Map<String, Object> tags = input.tags();
if (tags.size() < keyToExpressionsMap.size()) {
for (String inputKey : tags.keySet()) {
visitExpressions(input, result, visited, keyToExpressionsMap.get(inputKey));
}
} else {
for (var entry : keyToExpressionsList) {
if (tags.containsKey(entry.getKey())) {
visitExpressions(input, result, visited, entry.getValue());
}
}
}
result.sort(BY_ID);
return result;
}
}
/** Index that limits the search space of expressions based on geometry type of an input element. */
private static class GeometryTypeIndex<T> implements Index<T> {
private final KeyIndex<T> pointIndex;
private final KeyIndex<T> lineIndex;
private final KeyIndex<T> polygonIndex;
private final KeyIndex<T> otherIndex;
private GeometryTypeIndex(MultiExpression<T> expressions, boolean warn) {
// build an index per type then search in each of those indexes based on the geometry type of each input element
// this narrows the search space substantially, improving matching performance
pointIndex = indexForType(expressions, Expression.POINT_TYPE, warn);
lineIndex = indexForType(expressions, Expression.LINESTRING_TYPE, warn);
polygonIndex = indexForType(expressions, Expression.POLYGON_TYPE, warn);
otherIndex = indexForType(expressions, Expression.UNKNOWN_GEOMETRY_TYPE, warn);
}
private KeyIndex<T> indexForType(MultiExpression<T> expressions, String type, boolean warn) {
return new KeyIndex<>(
expressions
.replace(matchType(type), TRUE)
.replace(e -> e instanceof Expression.MatchType, FALSE)
.simplify(),
warn
);
}
/**
* Returns all data values associated with expressions that match an input element, along with the tag keys that
* caused the match.
*/
public List<Match<T>> getMatchesWithTriggers(WithTags input) {
List<Match<T>> result;
if (input instanceof WithGeometryType withGeometryType) {
if (withGeometryType.isPoint()) {
result = pointIndex.getMatchesWithTriggers(input);
} else if (withGeometryType.canBeLine()) {
result = lineIndex.getMatchesWithTriggers(input);
// closed ways can be lines or polygons, unless area=yes or no
if (withGeometryType.canBePolygon()) {
result.addAll(polygonIndex.getMatchesWithTriggers(input));
}
} else if (withGeometryType.canBePolygon()) {
result = polygonIndex.getMatchesWithTriggers(input);
} else {
result = otherIndex.getMatchesWithTriggers(input);
}
} else {
result = otherIndex.getMatchesWithTriggers(input);
}
result.sort(BY_ID);
return result;
}
}
/** An expression/value pair with unique ID to store whether we evaluated it yet. */
private record EntryWithId<T> (T result, Expression expression, @Override int id) implements WithId {}
/**
* An {@code expression} to evaluate on input elements and {@code result} value to return when the element matches.
*/
public record Entry<T> (T result, Expression expression) {}
/** The result when an expression matches, along with the input element tag {@code keys} that triggered the match. */
public record Match<T> (T match, List<String> keys, @Override int id) implements WithId {}
}