Allow more than 256 attribute keys (#275)

pull/279/head
Brian Sperlongano 2022-06-20 05:37:40 -04:00 zatwierdzone przez GitHub
rodzic c58696ed69
commit 3fd094ddae
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
4 zmienionych plików z 109 dodań i 39 usunięć

Wyświetl plik

@ -56,22 +56,18 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
private static final Logger LOGGER = LoggerFactory.getLogger(FeatureGroup.class);
private final FeatureSort sorter;
private final Profile profile;
private final CommonStringEncoder commonStrings;
private final CommonStringEncoder.AsByte commonLayerStrings = new CommonStringEncoder.AsByte();
private final CommonStringEncoder commonValueStrings = new CommonStringEncoder(100_000);
private final Stats stats;
private final LayerStats layerStats = new LayerStats();
private volatile boolean prepared = false;
FeatureGroup(FeatureSort sorter, Profile profile, CommonStringEncoder commonStrings, Stats stats) {
FeatureGroup(FeatureSort sorter, Profile profile, Stats stats) {
this.sorter = sorter;
this.profile = profile;
this.commonStrings = commonStrings;
this.stats = stats;
}
FeatureGroup(FeatureSort sorter, Profile profile, Stats stats) {
this(sorter, profile, new CommonStringEncoder(), stats);
}
/** Returns a feature grouper that stores all feature in-memory. Only suitable for toy use-cases like unit tests. */
public static FeatureGroup newInMemoryFeatureGroup(Profile profile, Stats stats) {
return new FeatureGroup(FeatureSort.newInMemory(), profile, stats);
@ -190,7 +186,7 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
private long encodeKey(RenderedFeature feature) {
var vectorTileFeature = feature.vectorTileFeature();
byte encodedLayer = commonStrings.encode(vectorTileFeature.layer());
byte encodedLayer = commonLayerStrings.encode(vectorTileFeature.layer());
return encodeKey(
feature.tile().encoded(),
encodedLayer,
@ -214,7 +210,7 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
packer.packMapHeader((int) attrs.values().stream().filter(Objects::nonNull).count());
for (Map.Entry<String, Object> entry : attrs.entrySet()) {
if (entry.getValue() != null) {
packer.packByte(commonStrings.encode(entry.getKey()));
packer.packInt(commonValueStrings.encode(entry.getKey()));
Object value = entry.getValue();
if (value instanceof String string) {
packer.packValue(ValueFactory.newString(string));
@ -427,7 +423,7 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
int mapSize = unpacker.unpackMapHeader();
Map<String, Object> attrs = new HashMap<>(mapSize);
for (int i = 0; i < mapSize; i++) {
String key = commonStrings.decode(unpacker.unpackByte());
String key = commonValueStrings.decode(unpacker.unpackInt());
Value v = unpacker.unpackValue();
if (v.isStringValue()) {
attrs.put(key, v.asStringValue().asString());
@ -444,7 +440,7 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
for (int i = 0; i < commandSize; i++) {
commands[i] = unpacker.unpackInt();
}
String layer = commonStrings.decode(extractLayerIdFromKey(entry.key()));
String layer = commonLayerStrings.decode(extractLayerIdFromKey(entry.key()));
return new VectorTile.Feature(
layer,
id,

Wyświetl plik

@ -1,27 +1,35 @@
package com.onthegomap.planetiler.util;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import javax.annotation.concurrent.ThreadSafe;
/**
* A utility for compressing up to 250 commonly-used strings (i.e. layer name, tag attributes) into a single byte.
* A utility for compressing commonly-used strings (i.e. layer name, tag attributes).
*/
@ThreadSafe
public class CommonStringEncoder {
private final ConcurrentMap<String, Byte> stringToId = new ConcurrentHashMap<>(255);
private final String[] idToString = new String[255];
private final AtomicInteger layerId = new AtomicInteger(0);
private final int maxStrings;
private final Map<String, Integer> stringToId;
private final String[] idToString;
private final AtomicInteger stringId = new AtomicInteger(0);
public CommonStringEncoder(int maxStrings) {
this.maxStrings = maxStrings;
stringToId = new ConcurrentHashMap<>(maxStrings);
idToString = new String[maxStrings];
}
/**
* Returns the string for {@code id}.
*
* @throws IllegalArgumentException if there is no value for {@code id}.
*/
public String decode(byte id) {
String str = idToString[id & 0xff];
public String decode(int id) {
String str = idToString[id];
if (str == null) {
throw new IllegalArgumentException("No string for " + id);
}
@ -29,26 +37,41 @@ public class CommonStringEncoder {
}
/**
* Returns a byte value to each unique string passed in.
* Returns a int value to each unique string passed in.
*
* @param string the string to store
* @return a byte that can be converted back to a string by {@link #decode(byte)}.
* @return an int that can be converted back to a string by {@link #decode(int)}.
* @throws IllegalArgumentException if called for too many values
*/
public byte encode(String string) {
public int encode(String string) {
// optimization to avoid more expensive computeIfAbsent call for the majority case when concurrent hash map already
// contains the value.
Byte result = stringToId.get(string);
Integer result = stringToId.get(string);
if (result == null) {
result = stringToId.computeIfAbsent(string, s -> {
int id = layerId.getAndIncrement();
if (id > 250) {
throw new IllegalArgumentException("Too many string keys when inserting " + string);
int id = stringId.getAndIncrement();
if (id >= maxStrings) {
throw new IllegalArgumentException("Too many strings");
}
idToString[id] = string;
return (byte) id;
return id;
});
}
return result;
}
/**
* Variant of CommonStringEncoder based on byte rather than int for string indexing.
*/
public static class AsByte {
private final CommonStringEncoder encoder = new CommonStringEncoder(256);
public String decode(byte id) {
return encoder.decode(id & 0xff);
}
public byte encode(String string) {
return (byte) encoder.encode(string);
}
}
}

Wyświetl plik

@ -13,6 +13,7 @@ import com.onthegomap.planetiler.reader.SimpleFeature;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.ZoomFunction;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.StreamSupport;
@ -582,4 +583,32 @@ class FeatureCollectorTest {
assertFalse(iter.hasNext());
}
@Test
void testManyAttr() {
Map<String, Object> tags = new HashMap<>();
for (int i = 0; i < 500; i++) {
tags.put("key" + i, "val" + i);
}
var collector = factory.get(newReaderFeature(newPoint(0, 0), tags));
var point = collector.point("layername");
for (int i = 0; i < 500; i++) {
point.setAttr("key" + i, tags.get("key" + i));
}
assertFeatures(13, List.of(
Map.of(
"key0", "val0",
"key10", "val10",
"key100", "val100",
"key256", "val256",
"key499", "val499"
)
), collector);
}
}

Wyświetl plik

@ -7,26 +7,48 @@ import org.junit.jupiter.api.Test;
class CommonStringEncoderTest {
private final CommonStringEncoder commonStringEncoder = new CommonStringEncoder();
private final CommonStringEncoder commonStringEncoderInteger = new CommonStringEncoder(100_000);
private final CommonStringEncoder.AsByte commonStringEncoderByte = new CommonStringEncoder.AsByte();
@Test
void testRoundTrip() {
byte a = commonStringEncoder.encode("a");
byte b = commonStringEncoder.encode("b");
assertEquals("a", commonStringEncoder.decode(a));
assertEquals(a, commonStringEncoder.encode("a"));
assertEquals("b", commonStringEncoder.decode(b));
assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.decode((byte) (b + 1)));
void testRoundTripByte() {
byte a = commonStringEncoderByte.encode("a");
byte b = commonStringEncoderByte.encode("b");
assertEquals("a", commonStringEncoderByte.decode(a));
assertEquals(a, commonStringEncoderByte.encode("a"));
assertEquals("b", commonStringEncoderByte.decode(b));
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderByte.decode((byte) (b + 1)));
}
@Test
void testLimitsTo250() {
for (int i = 0; i <= 250; i++) {
void testRoundTripInteger() {
int a = commonStringEncoderInteger.encode("a");
int b = commonStringEncoderInteger.encode("b");
assertEquals("a", commonStringEncoderInteger.decode(a));
assertEquals(a, commonStringEncoderInteger.encode("a"));
assertEquals("b", commonStringEncoderInteger.decode(b));
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderInteger.decode(b + 1));
}
@Test
void testByteLimitsToMax() {
for (int i = 0; i <= 255; i++) {
String string = Integer.toString(i);
byte encoded = commonStringEncoder.encode(Integer.toString(i));
String decoded = commonStringEncoder.decode(encoded);
byte encoded = commonStringEncoderByte.encode(string);
String decoded = commonStringEncoderByte.decode(encoded);
assertEquals(string, decoded);
}
assertThrows(IllegalArgumentException.class, () -> commonStringEncoder.encode("too many"));
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderByte.encode("too many"));
}
@Test
void testIntDoesNotLimitTo250() {
for (int i = 0; i < 100_000; i++) {
String string = Integer.toString(i);
int encoded = commonStringEncoderInteger.encode(string);
String decoded = commonStringEncoderInteger.decode(encoded);
assertEquals(string, decoded);
}
assertThrows(IllegalArgumentException.class, () -> commonStringEncoderInteger.encode("too many"));
}
}