From f69882addbf99b600b06cf5e2a30f9d39008b339 Mon Sep 17 00:00:00 2001 From: Michael Barry Date: Tue, 2 Apr 2024 20:34:42 -0400 Subject: [PATCH] Fix tile size stats memory leak (#861) --- .../planetiler/archive/TileArchiveWriter.java | 3 +- .../planetiler/util/TileSizeStats.java | 70 ++++++++++--------- .../planetiler/util/TileSizeStatsTest.java | 4 +- 3 files changed, 42 insertions(+), 35 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java index bba4bd63..1e224854 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/archive/TileArchiveWriter.java @@ -266,6 +266,7 @@ public class TileArchiveWriter { boolean lastIsFill = false; List lastLayerStats = null; boolean skipFilled = config.skipFilledTiles(); + var layerStatsSerializer = TileSizeStats.newThreadLocalSerializer(); var tileStatsUpdater = tileStats.threadLocalUpdater(); var layerAttrStatsUpdater = layerAttrStats.handlerForThread(); @@ -320,7 +321,7 @@ public class TileArchiveWriter { if ((!skipFilled || !lastIsFill) && bytes != null) { tileStatsUpdater.recordTile(tileFeatures.tileCoord(), bytes.length, layerStats); List layerStatsRows = config.outputLayerStats() ? - TileSizeStats.formatOutputRows(tileFeatures.tileCoord(), bytes.length, layerStats) : + layerStatsSerializer.formatOutputRows(tileFeatures.tileCoord(), bytes.length, layerStats) : List.of(); result.add( new TileEncodingResult( diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java index cb3f6d87..28df236c 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java @@ -38,7 +38,7 @@ import vector_tile.VectorTileProto; * Utilities for extracting tile and layer size summaries from encoded vector tiles. *

* {@link #computeTileStats(VectorTileProto.Tile)} extracts statistics about each layer in a tile and - * {@link #formatOutputRows(TileCoord, int, List)} formats them as row of a TSV file to write. + * {@link TsvSerializer} formats them as row of a TSV file to write. *

* To generate a tsv.gz file with stats for each tile, you can add {@code --output-layerstats} option when generating an * archive, or run the following an existing archive: @@ -52,13 +52,11 @@ import vector_tile.VectorTileProto; public class TileSizeStats { private static final int BATCH_SIZE = 1_000; - private static final CsvMapper MAPPER = new CsvMapper(); - private static final CsvSchema SCHEMA = MAPPER + private static final CsvSchema SCHEMA = new CsvMapper() .schemaFor(OutputRow.class) .withoutHeader() .withColumnSeparator('\t') .withLineSeparator("\n"); - private static final ObjectWriter WRITER = MAPPER.writer(SCHEMA); /** Returns the default path that a layerstats file should go relative to an existing archive. */ public static Path getDefaultLayerstatsPath(Path archive) { @@ -120,6 +118,7 @@ public class TileSizeStats { List layerStats = null; var updater = tileStats.threadLocalUpdater(); + var layerStatsSerializer = TileSizeStats.newThreadLocalSerializer(); for (var batch : prev) { List lines = new ArrayList<>(batch.tiles.size()); for (var tile : batch.tiles) { @@ -130,7 +129,7 @@ public class TileSizeStats { layerStats = computeTileStats(decoded); } updater.recordTile(tile.coord(), zipped.length, layerStats); - lines.addAll(TileSizeStats.formatOutputRows(tile.coord(), zipped.length, layerStats)); + lines.addAll(layerStatsSerializer.formatOutputRows(tile.coord(), zipped.length, layerStats)); } batch.stats.complete(lines); } @@ -161,28 +160,32 @@ public class TileSizeStats { stats.printSummary(); } - /** Returns the TSV rows to output for all the layers in a tile. */ - public static List formatOutputRows(TileCoord tileCoord, int archivedBytes, List layerStats) - throws IOException { - int hilbert = tileCoord.hilbertEncoded(); - List result = new ArrayList<>(layerStats.size()); - for (var layer : layerStats) { - result.add(lineToString(new OutputRow( - tileCoord.z(), - tileCoord.x(), - tileCoord.y(), - hilbert, - archivedBytes, - layer.layer, - layer.layerBytes, - layer.layerFeatures, - layer.layerGeometries, - layer.layerAttrBytes, - layer.layerAttrKeys, - layer.layerAttrValues - ))); - } - return result; + /** Returns a {@link TsvSerializer} that can be used by a single thread to convert to CSV rows. */ + public static TsvSerializer newThreadLocalSerializer() { + // CsvMapper is not entirely thread safe, and can end up with a BufferRecycler memory leak when writeValueAsString + // is called billions of times from multiple threads, so we generate a new instance per serializing thread + ObjectWriter writer = new CsvMapper().writer(SCHEMA); + return (tileCoord, archivedBytes, layerStats) -> { + int hilbert = tileCoord.hilbertEncoded(); + List result = new ArrayList<>(layerStats.size()); + for (var layer : layerStats) { + result.add(writer.writeValueAsString(new OutputRow( + tileCoord.z(), + tileCoord.x(), + tileCoord.y(), + hilbert, + archivedBytes, + layer.layer, + layer.layerBytes, + layer.layerFeatures, + layer.layerGeometries, + layer.layerAttrBytes, + layer.layerAttrKeys, + layer.layerAttrValues + ))); + } + return result; + }; } /** @@ -195,11 +198,6 @@ public class TileSizeStats { StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE)))); } - /** Returns {@code output} encoded as a TSV row string. */ - public static String lineToString(OutputRow output) throws IOException { - return WRITER.writeValueAsString(output); - } - /** Returns the header row for the output TSV file. */ public static String headerRow() { return String.join( @@ -240,6 +238,14 @@ public class TileSizeStats { return result; } + @FunctionalInterface + public interface TsvSerializer { + + /** Returns the TSV rows to output for all the layers in a tile. */ + List formatOutputRows(TileCoord tileCoord, int archivedBytes, List layerStats) + throws IOException; + } + /** Model for the data contained in each row in the TSV. */ @JsonPropertyOrder({ "z", diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileSizeStatsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileSizeStatsTest.java index 538af3df..c8ec574f 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileSizeStatsTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileSizeStatsTest.java @@ -37,7 +37,7 @@ class TileSizeStatsTest { assertEquals(2, entry1.layerAttrKeys()); assertEquals(2, entry1.layerAttrValues()); - var formatted = TileSizeStats.formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats); + var formatted = TileSizeStats.newThreadLocalSerializer().formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats); assertEquals( """ z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_geometries layer_attr_bytes layer_attr_keys layer_attr_values @@ -86,7 +86,7 @@ class TileSizeStatsTest { assertEquals("b", entry2.layer()); assertEquals(1, entry2.layerFeatures()); - var formatted = TileSizeStats.formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats); + var formatted = TileSizeStats.newThreadLocalSerializer().formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats); assertEquals( """ z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_geometries layer_attr_bytes layer_attr_keys layer_attr_values