kopia lustrzana https://github.com/onthegomap/planetiler
Allow more than 256 attribute keys (#275)
rodzic
c58696ed69
commit
3fd094ddae
|
@ -56,22 +56,18 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
|
|||
private static final Logger LOGGER = LoggerFactory.getLogger(FeatureGroup.class);
|
||||
private final FeatureSort sorter;
|
||||
private final Profile profile;
|
||||
private final CommonStringEncoder commonStrings;
|
||||
private final CommonStringEncoder.AsByte commonLayerStrings = new CommonStringEncoder.AsByte();
|
||||
private final CommonStringEncoder commonValueStrings = new CommonStringEncoder(100_000);
|
||||
private final Stats stats;
|
||||
private final LayerStats layerStats = new LayerStats();
|
||||
private volatile boolean prepared = false;
|
||||
|
||||
FeatureGroup(FeatureSort sorter, Profile profile, CommonStringEncoder commonStrings, Stats stats) {
|
||||
FeatureGroup(FeatureSort sorter, Profile profile, Stats stats) {
|
||||
this.sorter = sorter;
|
||||
this.profile = profile;
|
||||
this.commonStrings = commonStrings;
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
FeatureGroup(FeatureSort sorter, Profile profile, Stats stats) {
|
||||
this(sorter, profile, new CommonStringEncoder(), stats);
|
||||
}
|
||||
|
||||
/** Returns a feature grouper that stores all feature in-memory. Only suitable for toy use-cases like unit tests. */
|
||||
public static FeatureGroup newInMemoryFeatureGroup(Profile profile, Stats stats) {
|
||||
return new FeatureGroup(FeatureSort.newInMemory(), profile, stats);
|
||||
|
@ -190,7 +186,7 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
|
|||
|
||||
private long encodeKey(RenderedFeature feature) {
|
||||
var vectorTileFeature = feature.vectorTileFeature();
|
||||
byte encodedLayer = commonStrings.encode(vectorTileFeature.layer());
|
||||
byte encodedLayer = commonLayerStrings.encode(vectorTileFeature.layer());
|
||||
return encodeKey(
|
||||
feature.tile().encoded(),
|
||||
encodedLayer,
|
||||
|
@ -214,7 +210,7 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
|
|||
packer.packMapHeader((int) attrs.values().stream().filter(Objects::nonNull).count());
|
||||
for (Map.Entry<String, Object> entry : attrs.entrySet()) {
|
||||
if (entry.getValue() != null) {
|
||||
packer.packByte(commonStrings.encode(entry.getKey()));
|
||||
packer.packInt(commonValueStrings.encode(entry.getKey()));
|
||||
Object value = entry.getValue();
|
||||
if (value instanceof String string) {
|
||||
packer.packValue(ValueFactory.newString(string));
|
||||
|
@ -427,7 +423,7 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
|
|||
int mapSize = unpacker.unpackMapHeader();
|
||||
Map<String, Object> attrs = new HashMap<>(mapSize);
|
||||
for (int i = 0; i < mapSize; i++) {
|
||||
String key = commonStrings.decode(unpacker.unpackByte());
|
||||
String key = commonValueStrings.decode(unpacker.unpackInt());
|
||||
Value v = unpacker.unpackValue();
|
||||
if (v.isStringValue()) {
|
||||
attrs.put(key, v.asStringValue().asString());
|
||||
|
@ -444,7 +440,7 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
|
|||
for (int i = 0; i < commandSize; i++) {
|
||||
commands[i] = unpacker.unpackInt();
|
||||
}
|
||||
String layer = commonStrings.decode(extractLayerIdFromKey(entry.key()));
|
||||
String layer = commonLayerStrings.decode(extractLayerIdFromKey(entry.key()));
|
||||
return new VectorTile.Feature(
|
||||
layer,
|
||||
id,
|
||||
|
|
|
@ -1,27 +1,35 @@
|
|||
package com.onthegomap.planetiler.util;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import javax.annotation.concurrent.ThreadSafe;
|
||||
|
||||
/**
|
||||
* A utility for compressing up to 250 commonly-used strings (i.e. layer name, tag attributes) into a single byte.
|
||||
* A utility for compressing commonly-used strings (i.e. layer name, tag attributes).
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class CommonStringEncoder {
|
||||
|
||||
private final ConcurrentMap<String, Byte> stringToId = new ConcurrentHashMap<>(255);
|
||||
private final String[] idToString = new String[255];
|
||||
private final AtomicInteger layerId = new AtomicInteger(0);
|
||||
private final int maxStrings;
|
||||
|
||||
private final Map<String, Integer> stringToId;
|
||||
private final String[] idToString;
|
||||
private final AtomicInteger stringId = new AtomicInteger(0);
|
||||
|
||||
public CommonStringEncoder(int maxStrings) {
|
||||
this.maxStrings = maxStrings;
|
||||
stringToId = new ConcurrentHashMap<>(maxStrings);
|
||||
idToString = new String[maxStrings];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string for {@code id}.
|
||||
*
|
||||
* @throws IllegalArgumentException if there is no value for {@code id}.
|
||||
*/
|
||||
public String decode(byte id) {
|
||||
String str = idToString[id & 0xff];
|
||||
public String decode(int id) {
|
||||
String str = idToString[id];
|
||||
if (str == null) {
|
||||
throw new IllegalArgumentException("No string for " + id);
|
||||
}
|
||||
|
@ -29,26 +37,41 @@ public class CommonStringEncoder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns a byte value to each unique string passed in.
|
||||
* Returns a int value to each unique string passed in.
|
||||
*
|
||||
* @param string the string to store
|
||||
* @return a byte that can be converted back to a string by {@link #decode(byte)}.
|
||||
* @return an int that can be converted back to a string by {@link #decode(int)}.
|
||||
* @throws IllegalArgumentException if called for too many values
|
||||
*/
|
||||
public byte encode(String string) {
|
||||
public int encode(String string) {
|
||||
// optimization to avoid more expensive computeIfAbsent call for the majority case when concurrent hash map already
|
||||
// contains the value.
|
||||
Byte result = stringToId.get(string);
|
||||
Integer result = stringToId.get(string);
|
||||
if (result == null) {
|
||||
result = stringToId.computeIfAbsent(string, s -> {
|
||||
int id = layerId.getAndIncrement();
|
||||
if (id > 250) {
|
||||
throw new IllegalArgumentException("Too many string keys when inserting " + string);
|
||||
int id = stringId.getAndIncrement();
|
||||
if (id >= maxStrings) {
|
||||
throw new IllegalArgumentException("Too many strings");
|
||||
}
|
||||
idToString[id] = string;
|
||||
return (byte) id;
|
||||
return id;
|
||||
});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Variant of CommonStringEncoder based on byte rather than int for string indexing.
|
||||
*/
|
||||
public static class AsByte {
|
||||
private final CommonStringEncoder encoder = new CommonStringEncoder(256);
|
||||
|
||||
public String decode(byte id) {
|
||||
return encoder.decode(id & 0xff);
|
||||
}
|
||||
|
||||
public byte encode(String string) {
|
||||
return (byte) encoder.encode(string);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@ import com.onthegomap.planetiler.reader.SimpleFeature;
|
|||
import com.onthegomap.planetiler.stats.Stats;
|
||||
import com.onthegomap.planetiler.util.ZoomFunction;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
@ -582,4 +583,32 @@ class FeatureCollectorTest {
|
|||
|
||||
assertFalse(iter.hasNext());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testManyAttr() {
|
||||
|
||||
Map<String, Object> tags = new HashMap<>();
|
||||
|
||||
for (int i = 0; i < 500; i++) {
|
||||
tags.put("key" + i, "val" + i);
|
||||
}
|
||||
|
||||
var collector = factory.get(newReaderFeature(newPoint(0, 0), tags));
|
||||
var point = collector.point("layername");
|
||||
|
||||
for (int i = 0; i < 500; i++) {
|
||||
point.setAttr("key" + i, tags.get("key" + i));
|
||||
}
|
||||
|
||||
assertFeatures(13, List.of(
|
||||
Map.of(
|
||||
"key0", "val0",
|
||||
"key10", "val10",
|
||||
"key100", "val100",
|
||||
"key256", "val256",
|
||||
"key499", "val499"
|
||||
)
|
||||
), collector);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -7,26 +7,48 @@ import org.junit.jupiter.api.Test;
|
|||
|
||||
class CommonStringEncoderTest {
|
||||
|
||||
private final CommonStringEncoder commonStringEncoder = new CommonStringEncoder();
|
||||
private final CommonStringEncoder commonStringEncoderInteger = new CommonStringEncoder(100_000);
|
||||
private final CommonStringEncoder.AsByte commonStringEncoderByte = new CommonStringEncoder.AsByte();
|
||||
|
||||
@Test
|
||||
void testRoundTrip() {
|
||||
byte a = commonStringEncoder.encode("a");
|
||||
byte b = commonStringEncoder.encode("b");
|
||||
assertEquals("a", commonStringEncoder.decode(a));
|
||||
assertEquals(a, commonStringEncoder.encode("a"));
|
||||
assertEquals("b", commonStringEncoder.decode(b));
|
||||
assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.decode((byte) (b + 1)));
|
||||
void testRoundTripByte() {
|
||||
byte a = commonStringEncoderByte.encode("a");
|
||||
byte b = commonStringEncoderByte.encode("b");
|
||||
assertEquals("a", commonStringEncoderByte.decode(a));
|
||||
assertEquals(a, commonStringEncoderByte.encode("a"));
|
||||
assertEquals("b", commonStringEncoderByte.decode(b));
|
||||
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderByte.decode((byte) (b + 1)));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLimitsTo250() {
|
||||
for (int i = 0; i <= 250; i++) {
|
||||
void testRoundTripInteger() {
|
||||
int a = commonStringEncoderInteger.encode("a");
|
||||
int b = commonStringEncoderInteger.encode("b");
|
||||
assertEquals("a", commonStringEncoderInteger.decode(a));
|
||||
assertEquals(a, commonStringEncoderInteger.encode("a"));
|
||||
assertEquals("b", commonStringEncoderInteger.decode(b));
|
||||
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderInteger.decode(b + 1));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testByteLimitsToMax() {
|
||||
for (int i = 0; i <= 255; i++) {
|
||||
String string = Integer.toString(i);
|
||||
byte encoded = commonStringEncoder.encode(Integer.toString(i));
|
||||
String decoded = commonStringEncoder.decode(encoded);
|
||||
byte encoded = commonStringEncoderByte.encode(string);
|
||||
String decoded = commonStringEncoderByte.decode(encoded);
|
||||
assertEquals(string, decoded);
|
||||
}
|
||||
assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.encode("too many"));
|
||||
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderByte.encode("too many"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testIntDoesNotLimitTo250() {
|
||||
for (int i = 0; i < 100_000; i++) {
|
||||
String string = Integer.toString(i);
|
||||
int encoded = commonStringEncoderInteger.encode(string);
|
||||
String decoded = commonStringEncoderInteger.decode(encoded);
|
||||
assertEquals(string, decoded);
|
||||
}
|
||||
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderInteger.encode("too many"));
|
||||
}
|
||||
}
|
||||
|
|
Ładowanie…
Reference in New Issue