diff --git a/planetiler-basemap/src/test/java/com/onthegomap/planetiler/basemap/util/VerifyMonacoTest.java b/planetiler-basemap/src/test/java/com/onthegomap/planetiler/basemap/util/VerifyMonacoTest.java index e74ee6a2..ea2aad90 100644 --- a/planetiler-basemap/src/test/java/com/onthegomap/planetiler/basemap/util/VerifyMonacoTest.java +++ b/planetiler-basemap/src/test/java/com/onthegomap/planetiler/basemap/util/VerifyMonacoTest.java @@ -7,9 +7,11 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.onthegomap.planetiler.VectorTile; import com.onthegomap.planetiler.geo.TileCoord; import com.onthegomap.planetiler.mbtiles.Mbtiles; +import com.onthegomap.planetiler.mbtiles.TileEncodingResult; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.OptionalInt; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -51,7 +53,7 @@ class VerifyMonacoTest { VectorTile.encodeGeometry(point(0, 0)), Map.of() ))); - writer.write(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode())); + writer.write(new TileEncodingResult(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode()), OptionalInt.empty())); } assertInvalid(mbtiles); } diff --git a/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkMbtilesRead.java b/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkMbtilesRead.java new file mode 100644 index 00000000..b9255ffa --- /dev/null +++ b/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkMbtilesRead.java @@ -0,0 +1,124 @@ +package com.onthegomap.planetiler.benchmarks; + +import com.google.common.base.Stopwatch; +import com.onthegomap.planetiler.config.Arguments; +import com.onthegomap.planetiler.geo.TileCoord; +import com.onthegomap.planetiler.mbtiles.Mbtiles; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class BenchmarkMbtilesRead { + + private static final Logger LOGGER = LoggerFactory.getLogger(BenchmarkMbtilesRead.class); + + private static final String SELECT_RANDOM_COORDS = + "select tile_column, tile_row, zoom_level from tiles order by random() limit ?"; + + public static void main(String[] args) throws Exception { + + Arguments arguments = Arguments.fromArgs(args); + int repetitions = arguments.getInteger("bench_repetitions", "number of repetitions", 10); + int nrTileReads = arguments.getInteger("bench_nr_tile_reads", "number of tiles to read", 500_000); + int preWarms = arguments.getInteger("bench_pre_warms", "number of pre warm runs", 3); + + List mbtilesPaths = arguments.getList("bench_mbtiles", "the mbtiles file to read from", List.of()).stream() + .map(Paths::get).toList(); + + + if (mbtilesPaths.isEmpty()) { + throw new IllegalArgumentException("pass one or many paths to the same mbtiles file"); + } + + mbtilesPaths.stream().forEach(p -> { + if (!Files.exists(p) || !Files.isRegularFile(p)) { + throw new IllegalArgumentException("%s does not exists".formatted(p)); + } + }); + + List randomCoordsToFetchPerRepetition = new LinkedList<>(); + + do { + try (var db = Mbtiles.newReadOnlyDatabase(mbtilesPaths.get(0))) { + try (var statement = db.connection().prepareStatement(SELECT_RANDOM_COORDS)) { + statement.setInt(1, nrTileReads - randomCoordsToFetchPerRepetition.size()); + var rs = statement.executeQuery(); + while (rs.next()) { + int x = rs.getInt("tile_column"); + int y = rs.getInt("tile_row"); + int z = rs.getInt("zoom_level"); + randomCoordsToFetchPerRepetition.add(TileCoord.ofXYZ(x, (1 << z) - 1 - y, z)); + } + } + } + } while (randomCoordsToFetchPerRepetition.size() < nrTileReads); + + Map avgReadOperationsPerSecondPerDb = new HashMap<>(); + for (Path dbPath : mbtilesPaths) { + List results = new LinkedList<>(); + + LOGGER.info("working on {}", dbPath); + + for (int preWarm = 0; preWarm < preWarms; preWarm++) { + readEachTile(randomCoordsToFetchPerRepetition, dbPath); + } + + for (int rep = 0; rep < repetitions; rep++) { + results.add(readEachTile(randomCoordsToFetchPerRepetition, dbPath)); + } + var readOperationsPerSecondStats = + results.stream().mapToDouble(ReadResult::readOperationsPerSecond).summaryStatistics(); + LOGGER.info("readOperationsPerSecondStats: {}", readOperationsPerSecondStats); + + avgReadOperationsPerSecondPerDb.put(dbPath, readOperationsPerSecondStats.getAverage()); + } + + List keysSorted = avgReadOperationsPerSecondPerDb.entrySet().stream() + .sorted((o1, o2) -> o1.getValue().compareTo(o2.getValue())) + .map(Map.Entry::getKey) + .toList(); + + LOGGER.info("diffs"); + for (int i = 0; i < keysSorted.size() - 1; i++) { + for (int j = i + 1; j < keysSorted.size(); j++) { + Path db0 = keysSorted.get(i); + double avg0 = avgReadOperationsPerSecondPerDb.get(db0); + Path db1 = keysSorted.get(j); + double avg1 = avgReadOperationsPerSecondPerDb.get(db1); + + double diff = avg1 * 100 / avg0 - 100; + + LOGGER.info("\"{}\" to \"{}\": avg read operations per second improved by {}%", db0, db1, diff); + } + } + } + + private static ReadResult readEachTile(List coordsToFetch, Path dbPath) throws IOException { + try (var db = Mbtiles.newReadOnlyDatabase(dbPath)) { + db.getTile(0, 0, 0); // trigger prepared statement creation + var totalSw = Stopwatch.createStarted(); + for (var coordToFetch : coordsToFetch) { + if (db.getTile(coordToFetch) == null) { + throw new IllegalStateException("%s should exist in %s".formatted(coordToFetch, dbPath)); + } + } + totalSw.stop(); + return new ReadResult(totalSw.elapsed(), coordsToFetch.size()); + } + } + + private record ReadResult(Duration duration, int coordsFetchedCount) { + double readOperationsPerSecond() { + double secondsFractional = duration.toNanos() / 1E9; + return coordsFetchedCount / secondsFractional; + } + } +} diff --git a/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkMbtilesWriter.java b/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkMbtilesWriter.java new file mode 100644 index 00000000..4794f796 --- /dev/null +++ b/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkMbtilesWriter.java @@ -0,0 +1,142 @@ +package com.onthegomap.planetiler.benchmarks; + +import com.google.common.base.Stopwatch; +import com.onthegomap.planetiler.config.Arguments; +import com.onthegomap.planetiler.config.PlanetilerConfig; +import com.onthegomap.planetiler.geo.TileCoord; +import com.onthegomap.planetiler.mbtiles.Mbtiles; +import com.onthegomap.planetiler.mbtiles.Mbtiles.BatchedTileWriter; +import com.onthegomap.planetiler.mbtiles.TileEncodingResult; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.DoubleSummaryStatistics; +import java.util.OptionalInt; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class BenchmarkMbtilesWriter { + + private static final Logger LOGGER = LoggerFactory.getLogger(BenchmarkMbtilesWriter.class); + + public static void main(String[] args) throws IOException { + + Arguments arguments = Arguments.fromArgs(args); + + int tilesToWrite = arguments.getInteger("bench_tiles_to_write", "number of tiles to write", 1_000_000); + int repetitions = arguments.getInteger("bench_repetitions", "number of repetitions", 10); + /* + * select count(distinct(tile_data_id)) * 100.0 / count(*) from tiles_shallow + * => ~8% (Australia) + */ + int distinctTilesInPercent = arguments.getInteger("bench_distinct_tiles", "distinct tiles in percent", 10); + /* + * select avg(length(tile_data)) + * from (select tile_data_id from tiles_shallow group by tile_data_id having count(*) = 1) as x + * join tiles_data using(tile_data_id) + * => ~785 (Australia) + */ + int distinctTileDataSize = + arguments.getInteger("bench_distinct_tile_data_size", "distinct tile data size in bytes", 800); + /* + * select avg(length(tile_data)) + * from (select tile_data_id from tiles_shallow group by tile_data_id having count(*) > 1) as x + * join tiles_shallow using(tile_data_id) + * join tiles_data using(tile_data_id) + * => ~93 (Australia) + */ + int dupeTileDataSize = arguments.getInteger("bench_dupe_tile_data_size", "dupe tile data size in bytes", 100); + /* + * select count(*) * 100.0 / sum(usage_count) + * from (select tile_data_id, count(*) as usage_count from tiles_shallow group by tile_data_id having count(*) > 1) + * => ~0.17% (Australia) + */ + int dupeSpreadInPercent = arguments.getInteger("bench_dupe_spread", "dupe spread in percent", 10); + + byte[] distinctTileData = createFilledByteArray(distinctTileDataSize); + byte[] dupeTileData = createFilledByteArray(dupeTileDataSize); + + PlanetilerConfig config = PlanetilerConfig.from(arguments); + + DoubleSummaryStatistics tileWritesPerSecondsStats = new DoubleSummaryStatistics(); + + for (int repetition = 0; repetition < repetitions; repetition++) { + + Path outputPath = getTempOutputPath(); + try (var mbtiles = Mbtiles.newWriteToFileDatabase(outputPath, config.compactDb())) { + + mbtiles.createTables(); + if (!config.deferIndexCreation()) { + mbtiles.addTileIndex(); + } + + try (var writer = mbtiles.newBatchedTileWriter()) { + Stopwatch sw = Stopwatch.createStarted(); + writeTiles(writer, tilesToWrite, distinctTilesInPercent, distinctTileData, dupeTileData, dupeSpreadInPercent); + sw.stop(); + double secondsFractional = sw.elapsed(TimeUnit.NANOSECONDS) / 1E9; + double tileWritesPerSecond = tilesToWrite / secondsFractional; + tileWritesPerSecondsStats.accept(tileWritesPerSecond); + } + + } finally { + Files.delete(outputPath); + } + } + + LOGGER.info("tileWritesPerSecondsStats: {}", tileWritesPerSecondsStats); + } + + + private static void writeTiles(BatchedTileWriter writer, int tilesToWrite, int distinctTilesInPercent, + byte[] distinctTileData, byte[] dupeTileData, int dupeSpreadInPercent) { + + int dupesToWrite = (int) Math.round(tilesToWrite * (100 - distinctTilesInPercent) / 100.0); + int dupeHashMod = (int) Math.round(dupesToWrite * dupeSpreadInPercent / 100.0); + int tilesWritten = 0; + int dupeCounter = 0; + for (int z = 0; z <= 14; z++) { + int maxCoord = 1 << z; + for (int x = 0; x < maxCoord; x++) { + for (int y = 0; y < maxCoord; y++) { + + TileCoord coord = TileCoord.ofXYZ(x, y, z); + TileEncodingResult toWrite; + if (tilesWritten % 100 < distinctTilesInPercent) { + toWrite = new TileEncodingResult(coord, distinctTileData, OptionalInt.empty()); + } else { + ++dupeCounter; + int hash = dupeHashMod == 0 ? 0 : dupeCounter % dupeHashMod; + toWrite = new TileEncodingResult(coord, dupeTileData, OptionalInt.of(hash)); + } + + writer.write(toWrite); + + if (++tilesWritten >= tilesToWrite) { + return; + } + } + } + } + } + + private static Path getTempOutputPath() { + File f; + try { + f = File.createTempFile("planetiler", ".mbtiles"); + } catch (IOException e) { + throw new IllegalStateException(e); + } + f.deleteOnExit(); + return f.toPath(); + } + + private static byte[] createFilledByteArray(int len) { + byte[] data = new byte[len]; + new Random(0).nextBytes(data); + return data; + } +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java index 74bd9def..1cda4e96 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureGroup.java @@ -12,6 +12,7 @@ import com.onthegomap.planetiler.stats.Stats; import com.onthegomap.planetiler.util.CloseableConusmer; import com.onthegomap.planetiler.util.CommonStringEncoder; import com.onthegomap.planetiler.util.DiskBacked; +import com.onthegomap.planetiler.util.Hashing; import com.onthegomap.planetiler.util.LayerStats; import java.io.Closeable; import java.io.IOException; @@ -341,6 +342,22 @@ public final class FeatureGroup implements Iterable, return tileCoord; } + /** + * Generates a hash over the feature's relevant data: layer, geometry, and attributes. The coordinates are + * not part of the hash. + *

+ * Used as an optimization to avoid writing the same (ocean) tiles over and over again. + */ + public int generateContentHash() { + int hash = Hashing.FNV1_32_INIT; + for (var feature : entries) { + byte layerId = extractLayerIdFromKey(feature.key()); + hash = Hashing.fnv1a32(hash, layerId); + hash = Hashing.fnv1a32(hash, feature.value()); + } + return hash; + } + /** * Returns true if {@code other} contains features with identical layer, geometry, and attributes, as this tile - * even if the tiles have separate coordinates. @@ -363,6 +380,7 @@ public final class FeatureGroup implements Iterable, return true; } + private VectorTile.Feature decodeVectorTileFeature(SortableFeature entry) { try (MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(entry.value())) { long group; diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java index 67bf2878..dbf45b0d 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java @@ -39,7 +39,8 @@ public record PlanetilerConfig( double minFeatureSizeBelowMaxZoom, double simplifyToleranceAtMaxZoom, double simplifyToleranceBelowMaxZoom, - boolean osmLazyReads + boolean osmLazyReads, + boolean compactDb ) { public static final int MIN_MINZOOM = 0; @@ -135,6 +136,9 @@ public record PlanetilerConfig( 0.1d), arguments.getBoolean("osm_lazy_reads", "Read OSM blocks from disk in worker threads", + false), + arguments.getBoolean("compact_db", + "Reduce the DB size by separating and deduping the tile data", false) ); } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/Compare.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/Compare.java new file mode 100644 index 00000000..964d231f --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/Compare.java @@ -0,0 +1,175 @@ +package com.onthegomap.planetiler.mbtiles; + +import static com.onthegomap.planetiler.VectorTile.decode; +import static com.onthegomap.planetiler.util.Gzip.gunzip; + +import com.google.common.collect.Sets; +import com.onthegomap.planetiler.VectorTile; +import com.onthegomap.planetiler.config.Arguments; +import com.onthegomap.planetiler.geo.GeometryException; +import com.onthegomap.planetiler.geo.TileCoord; +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.locationtech.jts.geom.Geometry; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A utility to compare two mbtiles files. + *

+ * See {@link VectorTileFeatureForCmp} for comparison rules. The results planetiler produces are not necessarily stable, + * so sometimes a few feature may be different in one tile. Also POI coordinates may sometimes differ slightly. This + * should get fixed in https://github.com/onthegomap/planetiler/issues/215 + *

+ * => The tool helps to see if two mbtiles files are (mostly) identical. + * + */ +public class Compare { + + private static final Logger LOGGER = LoggerFactory.getLogger(Compare.class); + + public static void main(String[] args) throws Exception { + + + Arguments arguments = Arguments.fromArgs(args); + String dbPath0 = arguments.getString("bench_mbtiles0", "the first mbtiles file", null); + String dbPath1 = arguments.getString("bench_mbtiles1", "the second mbtiles file", null); + boolean failOnFeatureDiff = arguments.getBoolean("bench_fail_on_feature_diff", "fail on feature diff", false); + + try ( + var db0 = Mbtiles.newReadOnlyDatabase(Path.of(dbPath0)); + var db1 = Mbtiles.newReadOnlyDatabase(Path.of(dbPath1)) + ) { + long tilesCount0 = getTilesCount(db0); + long tilesCount1 = getTilesCount(db1); + if (tilesCount0 != tilesCount1) { + throw new IllegalArgumentException( + "expected tiles count to be equal but tilesCount0=%d tilesCount1=%d".formatted(tilesCount0, tilesCount1) + ); + } + + int lastPercentage = -1; + long processedTileCounter = 0; + long tilesWithDiffs = 0; + try (var statement = db0.connection().prepareStatement("select tile_column, tile_row, zoom_level from tiles")) { + var rs = statement.executeQuery(); + while (rs.next()) { + processedTileCounter++; + int x = rs.getInt("tile_column"); + int y = rs.getInt("tile_row"); + int z = rs.getInt("zoom_level"); + TileCoord coord = TileCoord.ofXYZ(x, (1 << z) - 1 - y, z); + + int currentPercentage = (int) (processedTileCounter * 100 / tilesCount0); + if (lastPercentage != currentPercentage) { + LOGGER.info("processed {}%", currentPercentage); + } + lastPercentage = currentPercentage; + + var features0 = decode(gunzip(db0.getTile(coord))) + .stream() + .map(VectorTileFeatureForCmp::fromActualFeature) + .collect(Collectors.toSet()); + var features1 = decode(gunzip(db1.getTile(coord))) + .stream() + .map(VectorTileFeatureForCmp::fromActualFeature) + .collect(Collectors.toSet()); + + if (!features0.equals(features1)) { + ++tilesWithDiffs; + + var tilesDifferException = new TilesDifferException(coord, features0, features1); + if (failOnFeatureDiff) { + throw tilesDifferException; + } else { + LOGGER.warn(tilesDifferException.getMessage()); + } + } + } + } + + LOGGER.info("totalTiles={} tilesWithDiffs={}", processedTileCounter, tilesWithDiffs); + } + } + + private static long getTilesCount(Mbtiles db) throws SQLException { + try (var statement = db.connection().createStatement()) { + var rs = statement.executeQuery("select count(*) from tiles_shallow"); + rs.next(); + return rs.getLong(1); + } catch (Exception e) { + try (var statement = db.connection().createStatement()) { + var rs = statement.executeQuery("select count(*) from tiles"); + rs.next(); + return rs.getLong(1); + } + } + } + + /** + * Wrapper around {@link VectorTile.Feature} to compare vector tiles. + *

    + *
  • {@link VectorTile.Feature#id()} won't be compared + *
  • {@link VectorTile.Feature#layer()} will be compared + *
  • {@link VectorTile.Feature#geometry()} gets normalized for comparing + *
  • {@link VectorTile.Feature#attrs()} will be compared except for the rank attribute since the value produced by + * planetiler is not stable and differs on every run (at least for parks) + *
  • {@link VectorTile.Feature#group()} will be compared + *
+ */ + private record VectorTileFeatureForCmp( + String layer, + Geometry normalizedGeometry, + Map attrs, + long group + ) { + static VectorTileFeatureForCmp fromActualFeature(VectorTile.Feature f) { + try { + var attrs = new HashMap<>(f.attrs()); + attrs.remove("rank"); + return new VectorTileFeatureForCmp(f.layer(), f.geometry().decode().norm(), attrs, f.group()); + } catch (GeometryException e) { + throw new IllegalStateException(e); + } + } + } + + private static class TilesDifferException extends RuntimeException { + + private static final long serialVersionUID = 1L; + + public TilesDifferException(TileCoord coord, Set features0, + Set features1) { + super(generateMessage(coord, features0, features1)); + } + + private static String generateMessage(TileCoord coord, Set features0, + Set features1) { + boolean featureCountMatches = features0.size() == features1.size(); + return """ + <<< + feature diff on coord %s - featureCountMatches: %b (%d vs %d) + + additional in db0 + --- + %s + + additional in db1 + --- + %s + >>> + """.formatted( + coord, featureCountMatches, features0.size(), features1.size(), + getDiffJoined(features0, features1, "\n"), + getDiffJoined(features1, features0, "\n")); + } + + private static String getDiffJoined(Set s0, Set s1, String delimiter) { + return Sets.difference(s0, s1).stream().map(Object::toString).collect(Collectors.joining(delimiter)); + } + } +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/Mbtiles.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/Mbtiles.java index f05df7e3..a5e173e2 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/Mbtiles.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/Mbtiles.java @@ -2,6 +2,7 @@ package com.onthegomap.planetiler.mbtiles; import static com.fasterxml.jackson.annotation.JsonInclude.Include.NON_ABSENT; +import com.carrotsearch.hppc.IntIntHashMap; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; @@ -20,15 +21,18 @@ import java.sql.Statement; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.OptionalInt; import java.util.TreeMap; import java.util.stream.Collectors; import java.util.stream.DoubleStream; +import java.util.stream.IntStream; import org.locationtech.jts.geom.Coordinate; import org.locationtech.jts.geom.Envelope; import org.slf4j.Logger; @@ -57,6 +61,23 @@ public final class Mbtiles implements Closeable { ); private static final String TILES_COL_DATA = "tile_data"; + private static final String TILES_DATA_TABLE = "tiles_data"; + private static final String TILES_DATA_COL_DATA_ID = "tile_data_id"; + private static final String TILES_DATA_COL_DATA = "tile_data"; + + private static final String TILES_SHALLOW_TABLE = "tiles_shallow"; + private static final String TILES_SHALLOW_COL_X = TILES_COL_X; + private static final String TILES_SHALLOW_COL_Y = TILES_COL_Y; + private static final String TILES_SHALLOW_COL_Z = TILES_COL_Z; + private static final String TILES_SHALLOW_COL_DATA_ID = TILES_DATA_COL_DATA_ID; + private static final String ADD_TILES_SHALLOW_INDEX_SQL = + "create unique index tiles_shallow_index on %s (%s, %s, %s)".formatted( + TILES_SHALLOW_TABLE, + TILES_SHALLOW_COL_Z, + TILES_SHALLOW_COL_X, + TILES_SHALLOW_COL_Y + ); + private static final String METADATA_TABLE = "metadata"; private static final String METADATA_COL_NAME = "name"; private static final String METADATA_COL_VALUE = "value"; @@ -77,24 +98,31 @@ public final class Mbtiles implements Closeable { private final Connection connection; private PreparedStatement getTileStatement = null; + private final boolean compactDb; - public Mbtiles(Connection connection) { + private Mbtiles(Connection connection, boolean compactDb) { this.connection = connection; + this.compactDb = compactDb; } /** Returns a new mbtiles file that won't get written to disk. Useful for toy use-cases like unit tests. */ - public static Mbtiles newInMemoryDatabase() { + public static Mbtiles newInMemoryDatabase(boolean compactDb) { try { SQLiteConfig config = new SQLiteConfig(); config.setApplicationId(MBTILES_APPLICATION_ID); - return new Mbtiles(DriverManager.getConnection("jdbc:sqlite::memory:", config.toProperties())); + return new Mbtiles(DriverManager.getConnection("jdbc:sqlite::memory:", config.toProperties()), compactDb); } catch (SQLException throwables) { throw new IllegalStateException("Unable to create in-memory database", throwables); } } + /** @see {@link #newInMemoryDatabase(boolean)} */ + public static Mbtiles newInMemoryDatabase() { + return newInMemoryDatabase(false); + } + /** Returns a new connection to an mbtiles file optimized for fast bulk writes. */ - public static Mbtiles newWriteToFileDatabase(Path path) { + public static Mbtiles newWriteToFileDatabase(Path path, boolean compactDb) { try { SQLiteConfig config = new SQLiteConfig(); config.setJournalMode(SQLiteConfig.JournalMode.OFF); @@ -103,7 +131,8 @@ public final class Mbtiles implements Closeable { config.setLockingMode(SQLiteConfig.LockingMode.EXCLUSIVE); config.setTempStore(SQLiteConfig.TempStore.MEMORY); config.setApplicationId(MBTILES_APPLICATION_ID); - return new Mbtiles(DriverManager.getConnection("jdbc:sqlite:" + path.toAbsolutePath(), config.toProperties())); + return new Mbtiles(DriverManager.getConnection("jdbc:sqlite:" + path.toAbsolutePath(), config.toProperties()), + compactDb); } catch (SQLException throwables) { throw new IllegalArgumentException("Unable to open " + path, throwables); } @@ -121,7 +150,7 @@ public final class Mbtiles implements Closeable { // config.setOpenMode(SQLiteOpenMode.NOMUTEX); Connection connection = DriverManager .getConnection("jdbc:sqlite:" + path.toAbsolutePath(), config.toProperties()); - return new Mbtiles(connection); + return new Mbtiles(connection, false /* in read-only mode, it's irrelevant if compact or not */); } catch (SQLException throwables) { throw new IllegalArgumentException("Unable to open " + path, throwables); } @@ -136,29 +165,81 @@ public final class Mbtiles implements Closeable { } } - private Mbtiles execute(String... queries) { + private Mbtiles execute(Collection queries) { for (String query : queries) { try (var statement = connection.createStatement()) { - LOGGER.debug("Execute mbtiles: " + query); + LOGGER.debug("Execute mbtiles: {}", query); statement.execute(query); } catch (SQLException throwables) { - throw new IllegalStateException("Error executing queries " + Arrays.toString(queries), throwables); + throw new IllegalStateException("Error executing queries " + String.join(",", queries), throwables); } } return this; } + private Mbtiles execute(String... queries) { + return execute(Arrays.asList(queries)); + } + public Mbtiles addTileIndex() { - return execute(ADD_TILE_INDEX_SQL); + if (compactDb) { + return execute(ADD_TILES_SHALLOW_INDEX_SQL); + } else { + return execute(ADD_TILE_INDEX_SQL); + } } public Mbtiles createTables() { - return execute( - "create table " + METADATA_TABLE + " (" + METADATA_COL_NAME + " text, " + METADATA_COL_VALUE + " text);", - "create unique index name on " + METADATA_TABLE + " (" + METADATA_COL_NAME + ");", - "create table " + TILES_TABLE + " (" + TILES_COL_Z + " integer, " + TILES_COL_X + " integer, " + TILES_COL_Y + - ", " + TILES_COL_DATA + " blob);" - ); + + List ddlStatements = new ArrayList<>(); + + ddlStatements + .add("create table " + METADATA_TABLE + " (" + METADATA_COL_NAME + " text, " + METADATA_COL_VALUE + " text);"); + ddlStatements + .add("create unique index name on " + METADATA_TABLE + " (" + METADATA_COL_NAME + ");"); + + if (compactDb) { + ddlStatements + .add(""" + create table %s ( + %s integer, + %s integer, + %s integer, + %s integer + ) + """.formatted(TILES_SHALLOW_TABLE, + TILES_SHALLOW_COL_Z, TILES_SHALLOW_COL_X, TILES_SHALLOW_COL_Y, TILES_SHALLOW_COL_DATA_ID)); + ddlStatements.add(""" + create table %s ( + %s integer primary key, + %s blob + ) + """.formatted(TILES_DATA_TABLE, TILES_DATA_COL_DATA_ID, TILES_DATA_COL_DATA)); + ddlStatements.add(""" + create view %s AS + select + %s.%s as %s, + %s.%s as %s, + %s.%s as %s, + %s.%s as %s + from %s + join %s on %s.%s = %s.%s + """.formatted( + TILES_TABLE, + TILES_SHALLOW_TABLE, TILES_SHALLOW_COL_Z, TILES_COL_Z, + TILES_SHALLOW_TABLE, TILES_SHALLOW_COL_X, TILES_COL_X, + TILES_SHALLOW_TABLE, TILES_SHALLOW_COL_Y, TILES_COL_Y, + TILES_DATA_TABLE, TILES_DATA_COL_DATA, TILES_COL_DATA, + TILES_SHALLOW_TABLE, + TILES_DATA_TABLE, TILES_SHALLOW_TABLE, TILES_SHALLOW_COL_DATA_ID, TILES_DATA_TABLE, TILES_DATA_COL_DATA_ID + )); + } else { + ddlStatements.add("create table " + TILES_TABLE + " (" + TILES_COL_Z + " integer, " + TILES_COL_X + " integer, " + + TILES_COL_Y + ", " + TILES_COL_DATA + " blob);"); + } + + + return execute(ddlStatements); } public Mbtiles vacuumAnalyze() { @@ -168,9 +249,13 @@ public final class Mbtiles implements Closeable { ); } - /** Returns a writer that queues up inserts into the tile database into large batches before executing them. */ + /** Returns a writer that queues up inserts into the tile database(s) into large batches before executing them. */ public BatchedTileWriter newBatchedTileWriter() { - return new BatchedTileWriter(); + if (compactDb) { + return new BatchedCompactTileWriter(); + } else { + return new BatchedNonCompactTileWriter(); + } } /** Returns the contents of the metadata table. */ @@ -183,10 +268,8 @@ public final class Mbtiles implements Closeable { try { getTileStatement = connection.prepareStatement(""" SELECT tile_data FROM %s - WHERE tile_column=? - AND tile_row=? - AND zoom_level=? - """.formatted(TILES_TABLE)); + WHERE %s=? AND %s=? AND %s=? + """.formatted(TILES_TABLE, TILES_COL_X, TILES_COL_Y, TILES_COL_Z)); } catch (SQLException throwables) { throw new IllegalStateException(throwables); } @@ -205,7 +288,7 @@ public final class Mbtiles implements Closeable { stmt.setInt(2, (1 << z) - 1 - y); stmt.setInt(3, z); try (ResultSet rs = stmt.executeQuery()) { - return rs.next() ? rs.getBytes("tile_data") : null; + return rs.next() ? rs.getBytes(TILES_COL_DATA) : null; } } catch (SQLException throwables) { throw new IllegalStateException("Could not get tile", throwables); @@ -215,11 +298,13 @@ public final class Mbtiles implements Closeable { public List getAllTileCoords() { List result = new ArrayList<>(); try (Statement statement = connection.createStatement()) { - ResultSet rs = statement.executeQuery("select zoom_level, tile_column, tile_row, tile_data from tiles"); + ResultSet rs = statement.executeQuery( + "select %s, %s, %s, %s from %s".formatted(TILES_COL_Z, TILES_COL_X, TILES_COL_Y, TILES_COL_DATA, TILES_TABLE) + ); while (rs.next()) { - int z = rs.getInt("zoom_level"); - int rawy = rs.getInt("tile_row"); - int x = rs.getInt("tile_column"); + int z = rs.getInt(TILES_COL_Z); + int rawy = rs.getInt(TILES_COL_Y); + int x = rs.getInt(TILES_COL_X); result.add(TileCoord.ofXYZ(x, (1 << z) - 1 - rawy, z)); } } catch (SQLException throwables) { @@ -312,7 +397,7 @@ public final class Mbtiles implements Closeable { } } - /** Contents of a row of the tiles table. */ + /** Contents of a row of the tiles table, or in case of compact mode in the tiles view. */ public record TileEntry(TileCoord tile, byte[] bytes) implements Comparable { @Override @@ -353,64 +438,92 @@ public final class Mbtiles implements Closeable { } } - /** - * A high-throughput writer that accepts new tiles and queues up the writes to execute them in fewer large-batches. - */ - public class BatchedTileWriter implements AutoCloseable { + /** Contents of a row of the tiles_shallow table. */ + private record TileShallowEntry(TileCoord coord, int tileDataId) {} - // max number of parameters in a prepared statements is 999 - private static final int BATCH_SIZE = 999 / 4; - private final List batch; - private final PreparedStatement batchStatement; - private final int batchLimit; - - private BatchedTileWriter() { - batchLimit = BATCH_SIZE; - batch = new ArrayList<>(batchLimit); - batchStatement = createBatchStatement(batchLimit); + /** Contents of a row of the tiles_data table. */ + private record TileDataEntry(int tileDataId, byte[] tileData) { + @Override + public String toString() { + return "TileDataEntry [tileDataId=" + tileDataId + ", tileData=" + Arrays.toString(tileData) + "]"; } - @SuppressWarnings("java:S2077") - private PreparedStatement createBatchStatement(int size) { - List groups = new ArrayList<>(); - for (int i = 0; i < size; i++) { - groups.add("(?,?,?,?)"); + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Arrays.hashCode(tileData); + result = prime * result + Objects.hash(tileDataId); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; } - try { - return connection.prepareStatement(""" - INSERT INTO %s (%s, %s, %s, %s) VALUES %s; - """.formatted( - TILES_TABLE, - TILES_COL_Z, TILES_COL_X, TILES_COL_Y, - TILES_COL_DATA, - String.join(", ", groups) - )); - } catch (SQLException throwables) { - throw new IllegalStateException("Could not create prepared statement", throwables); + if (!(obj instanceof TileDataEntry)) { + return false; } + TileDataEntry other = (TileDataEntry) obj; + return Arrays.equals(tileData, other.tileData) && tileDataId == other.tileDataId; + } + } + + private abstract class BatchedTableWriterBase implements AutoCloseable { + + private static final int MAX_PARAMETERS_IN_PREPARED_STATEMENT = 999; + private final List batch; + private final PreparedStatement batchStatement; + private final int batchLimit; + private final String insertStmtTableName; + private final boolean insertStmtInsertIgnore; + private final String insertStmtValuesPlaceHolder; + private final String insertStmtColumnsCsv; + + + protected BatchedTableWriterBase(String tableName, List columns, boolean insertIgnore) { + batchLimit = MAX_PARAMETERS_IN_PREPARED_STATEMENT / columns.size(); + batch = new ArrayList<>(batchLimit); + insertStmtTableName = tableName; + insertStmtInsertIgnore = insertIgnore; + insertStmtValuesPlaceHolder = columns.stream().map(c -> "?").collect(Collectors.joining(",", "(", ")")); + insertStmtColumnsCsv = columns.stream().collect(Collectors.joining(",")); + batchStatement = createBatchInsertPreparedStatement(batchLimit); } /** Queue-up a write or flush to disk if enough are waiting. */ - public void write(TileCoord tile, byte[] data) { - batch.add(new TileEntry(tile, data)); + void write(T item) { + batch.add(item); if (batch.size() >= batchLimit) { flush(batchStatement); } } + protected abstract int setParamsInStatementForItem(int positionOffset, PreparedStatement statement, T item) + throws SQLException; + + private PreparedStatement createBatchInsertPreparedStatement(int size) { + + final String sql = "INSERT %s INTO %s (%s) VALUES %s;".formatted( + insertStmtInsertIgnore ? "OR IGNORE" : "", + insertStmtTableName, + insertStmtColumnsCsv, + IntStream.range(0, size).mapToObj(i -> insertStmtValuesPlaceHolder).collect(Collectors.joining(", ")) + ); + + try { + return connection.prepareStatement(sql); + } catch (SQLException throwables) { + throw new IllegalStateException("Could not create prepared statement", throwables); + } + } + private void flush(PreparedStatement statement) { try { int pos = 1; - for (TileEntry tile : batch) { - TileCoord coord = tile.tile(); - int x = coord.x(); - int y = coord.y(); - int z = coord.z(); - statement.setInt(pos++, z); - statement.setInt(pos++, x); - // flip Y - statement.setInt(pos++, (1 << z) - 1 - y); - statement.setBytes(pos++, tile.bytes()); + for (T item : batch) { + pos = setParamsInStatementForItem(pos, statement, item); } statement.execute(); batch.clear(); @@ -421,17 +534,160 @@ public final class Mbtiles implements Closeable { @Override public void close() { - try { - if (batch.size() > 0) { - try (var lastBatch = createBatchStatement(batch.size())) { - flush(lastBatch); - } + if (!batch.isEmpty()) { + try (var lastBatch = createBatchInsertPreparedStatement(batch.size())) { + flush(lastBatch); + } catch (SQLException throwables) { + throw new IllegalStateException("Error flushing batch", throwables); } + } + try { batchStatement.close(); } catch (SQLException throwables) { LOGGER.warn("Error closing prepared statement", throwables); } } + + } + + + private class BatchedTileTableWriter extends BatchedTableWriterBase { + + private static final List COLUMNS = List.of(TILES_COL_Z, TILES_COL_X, TILES_COL_Y, TILES_COL_DATA); + + BatchedTileTableWriter() { + super(TILES_TABLE, COLUMNS, false); + } + + @Override + protected int setParamsInStatementForItem(int positionOffset, PreparedStatement statement, TileEntry tile) + throws SQLException { + + TileCoord coord = tile.tile(); + int x = coord.x(); + int y = coord.y(); + int z = coord.z(); + statement.setInt(positionOffset++, z); + statement.setInt(positionOffset++, x); + // flip Y + statement.setInt(positionOffset++, (1 << z) - 1 - y); + statement.setBytes(positionOffset++, tile.bytes()); + return positionOffset; + } + } + + private class BatchedTileShallowTableWriter extends BatchedTableWriterBase { + + private static final List COLUMNS = + List.of(TILES_SHALLOW_COL_Z, TILES_SHALLOW_COL_X, TILES_SHALLOW_COL_Y, TILES_SHALLOW_COL_DATA_ID); + + BatchedTileShallowTableWriter() { + super(TILES_SHALLOW_TABLE, COLUMNS, false); + } + + @Override + protected int setParamsInStatementForItem(int positionOffset, PreparedStatement statement, TileShallowEntry item) + throws SQLException { + + TileCoord coord = item.coord(); + int x = coord.x(); + int y = coord.y(); + int z = coord.z(); + statement.setInt(positionOffset++, z); + statement.setInt(positionOffset++, x); + // flip Y + statement.setInt(positionOffset++, (1 << z) - 1 - y); + statement.setInt(positionOffset++, item.tileDataId()); + + return positionOffset; + } + } + + private class BatchedTileDataTableWriter extends BatchedTableWriterBase { + + private static final List COLUMNS = List.of(TILES_DATA_COL_DATA_ID, TILES_DATA_COL_DATA); + + BatchedTileDataTableWriter() { + super(TILES_DATA_TABLE, COLUMNS, true); + } + + @Override + protected int setParamsInStatementForItem(int positionOffset, PreparedStatement statement, TileDataEntry item) + throws SQLException { + + statement.setInt(positionOffset++, item.tileDataId()); + statement.setBytes(positionOffset++, item.tileData()); + + return positionOffset; + } + } + + + /** + * A high-throughput writer that accepts new tiles and queues up the writes to execute them in fewer large-batches. + */ + public interface BatchedTileWriter extends AutoCloseable { + void write(TileEncodingResult encodingResult); + + @Override + void close(); + } + + private class BatchedNonCompactTileWriter implements BatchedTileWriter { + + private final BatchedTileTableWriter tableWriter = new BatchedTileTableWriter(); + + @Override + public void write(TileEncodingResult encodingResult) { + tableWriter.write(new TileEntry(encodingResult.coord(), encodingResult.tileData())); + } + + @Override + public void close() { + tableWriter.close(); + } + + } + + private class BatchedCompactTileWriter implements BatchedTileWriter { + + private final BatchedTileShallowTableWriter batchedTileShallowTableWriter = new BatchedTileShallowTableWriter(); + private final BatchedTileDataTableWriter batchedTileDataTableWriter = new BatchedTileDataTableWriter(); + private final IntIntHashMap tileDataIdByHash = new IntIntHashMap(1_000); + + private int tileDataIdCounter = 1; + + @Override + public void write(TileEncodingResult encodingResult) { + int tileDataId; + boolean writeData; + OptionalInt tileDataHashOpt = encodingResult.tileDataHash(); + + if (tileDataHashOpt.isPresent()) { + int tileDataHash = tileDataHashOpt.getAsInt(); + if (tileDataIdByHash.containsKey(tileDataHash)) { + tileDataId = tileDataIdByHash.get(tileDataHash); + writeData = false; + } else { + tileDataId = tileDataIdCounter++; + tileDataIdByHash.put(tileDataHash, tileDataId); + writeData = true; + } + } else { + tileDataId = tileDataIdCounter++; + writeData = true; + } + if (writeData) { + batchedTileDataTableWriter.write(new TileDataEntry(tileDataId, encodingResult.tileData())); + } + batchedTileShallowTableWriter.write(new TileShallowEntry(encodingResult.coord(), tileDataId)); + } + + @Override + public void close() { + batchedTileShallowTableWriter.close(); + batchedTileDataTableWriter.close(); + } } @@ -450,7 +706,7 @@ public final class Mbtiles implements Closeable { public Metadata setMetadata(String name, Object value) { if (value != null) { - LOGGER.debug("Set mbtiles metadata: " + name + "=" + value); + LOGGER.debug("Set mbtiles metadata: {}={}", name, value); try ( PreparedStatement statement = connection.prepareStatement( "INSERT INTO " + METADATA_TABLE + " (" + METADATA_COL_NAME + "," + METADATA_COL_VALUE + ") VALUES(?, ?);") diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/MbtilesWriter.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/MbtilesWriter.java index b04e2685..39e746bf 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/MbtilesWriter.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/MbtilesWriter.java @@ -26,6 +26,7 @@ import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.OptionalInt; import java.util.Queue; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; @@ -47,6 +48,7 @@ public class MbtilesWriter { private static final Logger LOGGER = LoggerFactory.getLogger(MbtilesWriter.class); private static final long MAX_FEATURES_PER_BATCH = 10_000; private static final long MAX_TILES_PER_BATCH = 1_000; + private static final int MAX_FEATURES_HASHING_THRESHOLD = 5; private final Counter.Readable featuresProcessed; private final Counter memoizedTiles; private final Mbtiles db; @@ -89,7 +91,7 @@ public class MbtilesWriter { /** Reads all {@code features}, encodes them in parallel, and writes to {@code outputPath}. */ public static void writeOutput(FeatureGroup features, Path outputPath, MbtilesMetadata mbtilesMetadata, PlanetilerConfig config, Stats stats) { - try (Mbtiles output = Mbtiles.newWriteToFileDatabase(outputPath)) { + try (Mbtiles output = Mbtiles.newWriteToFileDatabase(outputPath, config.compactDb())) { writeOutput(features, output, () -> FileUtils.fileSize(outputPath), mbtilesMetadata, config, stats); } catch (IOException e) { throw new IllegalStateException("Unable to write to " + outputPath, e); @@ -231,18 +233,22 @@ public class MbtilesWriter { * recomputing if the input hasn't changed. */ byte[] lastBytes = null, lastEncoded = null; + Integer lastTileDataHash = null; + boolean compactDb = config.compactDb(); for (TileBatch batch : prev) { - Queue result = new ArrayDeque<>(batch.size()); + Queue result = new ArrayDeque<>(batch.size()); FeatureGroup.TileFeatures last = null; // each batch contains tile ordered by z asc, x asc, y desc for (int i = 0; i < batch.in.size(); i++) { FeatureGroup.TileFeatures tileFeatures = batch.in.get(i); featuresProcessed.incBy(tileFeatures.getNumFeaturesProcessed()); byte[] bytes, encoded; + Integer tileDataHash; if (tileFeatures.hasSameContents(last)) { bytes = lastBytes; encoded = lastEncoded; + tileDataHash = lastTileDataHash; memoizedTiles.inc(); } else { VectorTile en = tileFeatures.getVectorTileEncoder(); @@ -256,12 +262,21 @@ public class MbtilesWriter { tileFeatures.tileCoord(), encoded.length / 1024); } + if (compactDb && tileFeatures.getNumFeaturesToEmit() < MAX_FEATURES_HASHING_THRESHOLD) { + tileDataHash = tileFeatures.generateContentHash(); + } else { + tileDataHash = null; + } + lastTileDataHash = tileDataHash; } int zoom = tileFeatures.tileCoord().z(); int encodedLength = encoded == null ? 0 : encoded.length; totalTileSizesByZoom[zoom].incBy(encodedLength); maxTileSizesByZoom[zoom].accumulate(encodedLength); - result.add(new Mbtiles.TileEntry(tileFeatures.tileCoord(), bytes)); + result.add( + new TileEncodingResult(tileFeatures.tileCoord(), bytes, + tileDataHash == null ? OptionalInt.empty() : OptionalInt.of(tileDataHash)) + ); } // hand result off to writer batch.out.complete(result); @@ -292,15 +307,15 @@ public class MbtilesWriter { TileCoord lastTile = null; Timer time = null; int currentZ = Integer.MIN_VALUE; - try (var batchedWriter = db.newBatchedTileWriter()) { + try (var batchedTileWriter = db.newBatchedTileWriter()) { for (TileBatch batch : tileBatches) { - Queue tiles = batch.out.get(); - Mbtiles.TileEntry tile; - while ((tile = tiles.poll()) != null) { - TileCoord tileCoord = tile.tile(); + Queue encodedTiles = batch.out.get(); + TileEncodingResult encodedTile; + while ((encodedTile = encodedTiles.poll()) != null) { + TileCoord tileCoord = encodedTile.coord(); assert lastTile == null || lastTile.compareTo(tileCoord) < 0 : "Tiles out of order %s before %s" .formatted(lastTile, tileCoord); - lastTile = tile.tile(); + lastTile = encodedTile.coord(); int z = tileCoord.z(); if (z != currentZ) { if (time == null) { @@ -311,8 +326,9 @@ public class MbtilesWriter { time = Timer.start(); currentZ = z; } - batchedWriter.write(tile.tile(), tile.bytes()); - stats.wroteTile(z, tile.bytes().length); + batchedTileWriter.write(encodedTile); + + stats.wroteTile(z, encodedTile.tileData() == null ? 0 : encodedTile.tileData().length); tilesByZoom[z].inc(); } lastTileWritten.set(lastTile); @@ -368,7 +384,7 @@ public class MbtilesWriter { */ private record TileBatch( List in, - CompletableFuture> out + CompletableFuture> out ) { TileBatch() { diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/TileEncodingResult.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/TileEncodingResult.java new file mode 100644 index 00000000..66a6f532 --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/mbtiles/TileEncodingResult.java @@ -0,0 +1,43 @@ +package com.onthegomap.planetiler.mbtiles; + +import com.onthegomap.planetiler.geo.TileCoord; +import java.util.Arrays; +import java.util.Objects; +import java.util.OptionalInt; + +public record TileEncodingResult( + TileCoord coord, + byte[] tileData, + /** will always be empty in non-compact mode and might also be empty in compact mode */ + OptionalInt tileDataHash +) { + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Arrays.hashCode(tileData); + result = prime * result + Objects.hash(coord, tileDataHash); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof TileEncodingResult)) { + return false; + } + TileEncodingResult other = (TileEncodingResult) obj; + return Objects.equals(coord, other.coord) && Arrays.equals(tileData, other.tileData) && + Objects.equals(tileDataHash, other.tileDataHash); + } + + @Override + public String toString() { + return "TileEncodingResult [coord=" + coord + ", tileData=" + Arrays.toString(tileData) + ", tileDataHash=" + + tileDataHash + "]"; + } + +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Hashing.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Hashing.java new file mode 100644 index 00000000..dbfabcaa --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Hashing.java @@ -0,0 +1,46 @@ +package com.onthegomap.planetiler.util; + +/** + * Static hash functions and hashing utilities. + * + */ +public final class Hashing { + + /** + * Initial hash for the FNV-1 and FNV-1a 32-bit hash function. + */ + public static final int FNV1_32_INIT = 0x811c9dc5; + private static final int FNV1_PRIME_32 = 16777619; + + private Hashing() {} + + /** + * Computes the hash using the FNV-1a 32-bit hash function, starting with the initial hash. + *

+ * The hash generation must always start with {@link #FNV1_32_INIT} as initial hash but this version comes in handy + * when generating the hash for multiple bytes consecutively in a loop. + * + * @param initHash the initial hash + * @param data the data to generate the hash for + * @return the generated hash + */ + public static int fnv1a32(int initHash, byte... data) { + int hash = initHash; + for (byte datum : data) { + hash ^= (datum & 0xff); + hash *= FNV1_PRIME_32; + } + return hash; + } + + /** + * Computes the hash using the FNV-1a 32-bit hash function. + * + * @param data the data to generate the hash for + * @return the hash + */ + public static int fnv1a32(byte... data) { + return fnv1a32(FNV1_32_INIT, data); + } + +} diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java index 2205295a..b9b2c599 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java @@ -3,6 +3,7 @@ package com.onthegomap.planetiler; import static com.onthegomap.planetiler.TestUtils.*; import static org.junit.jupiter.api.Assertions.*; +import com.onthegomap.planetiler.TestUtils.OsmXml; import com.onthegomap.planetiler.collection.FeatureGroup; import com.onthegomap.planetiler.collection.LongLongMap; import com.onthegomap.planetiler.collection.LongLongMultimap; @@ -47,6 +48,7 @@ import org.junit.jupiter.params.provider.ValueSource; import org.locationtech.jts.geom.Coordinate; import org.locationtech.jts.geom.Geometry; import org.locationtech.jts.geom.MultiPolygon; +import org.locationtech.jts.geom.Polygon; import org.locationtech.jts.io.InputStreamInStream; import org.locationtech.jts.io.WKBReader; import org.slf4j.Logger; @@ -69,8 +71,19 @@ class PlanetilerTests { private static final double Z13_WIDTH = 1d / Z13_TILES; private static final int Z12_TILES = 1 << 12; private static final int Z4_TILES = 1 << 4; + private static final Polygon WORLD_POLYGON = newPolygon( + worldCoordinateList( + Z14_WIDTH / 2, Z14_WIDTH / 2, + 1 - Z14_WIDTH / 2, Z14_WIDTH / 2, + 1 - Z14_WIDTH / 2, 1 - Z14_WIDTH / 2, + Z14_WIDTH / 2, 1 - Z14_WIDTH / 2, + Z14_WIDTH / 2, Z14_WIDTH / 2 + ), + List.of() + ); private final Stats stats = Stats.inMemory(); + private static T with(T elem, Consumer fn) { fn.accept(elem); return elem; @@ -121,12 +134,13 @@ class PlanetilerTests { FeatureGroup featureGroup = FeatureGroup.newInMemoryFeatureGroup(profile, stats); runner.run(featureGroup, profile, config); featureGroup.prepare(); - try (Mbtiles db = Mbtiles.newInMemoryDatabase()) { + try (Mbtiles db = Mbtiles.newInMemoryDatabase(config.compactDb())) { MbtilesWriter.writeOutput(featureGroup, db, () -> 0L, new MbtilesMetadata(profile, config.arguments()), config, stats); var tileMap = TestUtils.getTileMap(db); tileMap.values().forEach(fs -> fs.forEach(f -> f.geometry().validate())); - return new PlanetilerResults(tileMap, db.metadata().getAll()); + int tileDataCount = config.compactDb() ? TestUtils.getTilesDataCount(db) : 0; + return new PlanetilerResults(tileMap, db.metadata().getAll(), tileDataCount); } } @@ -643,21 +657,10 @@ class PlanetilerTests { @Test void testFullWorldPolygon() throws Exception { - List outerPoints = worldCoordinateList( - Z14_WIDTH / 2, Z14_WIDTH / 2, - 1 - Z14_WIDTH / 2, Z14_WIDTH / 2, - 1 - Z14_WIDTH / 2, 1 - Z14_WIDTH / 2, - Z14_WIDTH / 2, 1 - Z14_WIDTH / 2, - Z14_WIDTH / 2, Z14_WIDTH / 2 - ); - var results = runWithReaderFeatures( Map.of("threads", "1"), List.of( - newReaderFeature(newPolygon( - outerPoints, - List.of() - ), Map.of()) + newReaderFeature(WORLD_POLYGON, Map.of()) ), (in, features) -> features.polygon("layer") .setZoomRange(0, 6) @@ -1470,7 +1473,7 @@ class PlanetilerTests { } private record PlanetilerResults( - Map> tiles, Map metadata + Map> tiles, Map metadata, int tileDataCount ) {} private record TestProfile( @@ -1695,4 +1698,33 @@ class PlanetilerTests { assertSubmap(Map.of(), results.tiles); } + + + private PlanetilerResults runForCompactTest(boolean compactDbEnabled) throws Exception { + + return runWithReaderFeatures( + Map.of("threads", "1", "compact-db", Boolean.toString(compactDbEnabled)), + List.of( + newReaderFeature(WORLD_POLYGON, Map.of()) + ), + (in, features) -> features.polygon("layer") + .setZoomRange(0, 2) + .setBufferPixels(0) + ); + } + + @Test + void testCompactDb() throws Exception { + + var compactResult = runForCompactTest(true); + var nonCompactResult = runForCompactTest(false); + + assertEquals(nonCompactResult.tiles, compactResult.tiles); + assertTrue( + compactResult.tileDataCount() < compactResult.tiles.size(), + "tileDataCount=%s should be less than tileCount=%s".formatted( + compactResult.tileDataCount(), compactResult.tiles.size() + ) + ); + } } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/TestUtils.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/TestUtils.java index 5fc95c22..7ec51106 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/TestUtils.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/TestUtils.java @@ -230,6 +230,23 @@ public class TestUtils { return result; } + public static int getTilesDataCount(Mbtiles db) throws SQLException { + String tableToCountFrom = isCompactDb(db) ? "tiles_data" : "tiles"; + try (Statement statement = db.connection().createStatement()) { + ResultSet rs = statement.executeQuery("select count(*) from %s".formatted(tableToCountFrom)); + rs.next(); + return rs.getInt(1); + } + } + + public static boolean isCompactDb(Mbtiles db) throws SQLException { + try (Statement statement = db.connection().createStatement()) { + ResultSet rs = statement.executeQuery("select count(*) from sqlite_master where type='view' and name='tiles'"); + rs.next(); + return rs.getInt(1) > 0; + } + } + public static > void assertSubmap(Map expectedSubmap, Map actual) { assertSubmap(expectedSubmap, actual, ""); } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/collection/FeatureGroupTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/collection/FeatureGroupTest.java index 1bed8479..d302a6e5 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/collection/FeatureGroupTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/collection/FeatureGroupTest.java @@ -4,6 +4,7 @@ import static com.onthegomap.planetiler.TestUtils.decodeSilently; import static com.onthegomap.planetiler.TestUtils.newPoint; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.DynamicTest.dynamicTest; @@ -20,10 +21,15 @@ import java.util.List; import java.util.Map; import java.util.Optional; import java.util.TreeMap; +import java.util.stream.Stream; import org.junit.jupiter.api.DynamicTest; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestFactory; +import org.junit.jupiter.api.extension.ExtensionContext; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.provider.ArgumentsSource; import org.junit.jupiter.params.provider.CsvSource; import org.locationtech.jts.geom.Geometry; @@ -70,6 +76,11 @@ class FeatureGroupTest { featureWriter.accept(features.newRenderedFeatureEncoder().apply(feature)); } + private void put(PuTileArgs args) { + putWithIdGroupAndSortKey(args.id(), args.tile(), args.layer(), args.attrs(), args.geom(), args.sortKey(), + args.hasGroup(), args.group(), args.limit()); + } + private Map>> getFeatures() { Map>> map = new TreeMap<>(); for (FeatureGroup.TileFeatures tile : features) { @@ -287,70 +298,32 @@ class FeatureGroupTest { ); } - @Test - void testHasSameFeatures() { - // should be the "same" even though sort-key is different - putWithIdGroupAndSortKey( - 1, 1, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3 - ); - putWithIdGroupAndSortKey( - 1, 2, "layer", Map.of("id", 1), newPoint(1, 2), 2, true, 2, 3 - ); + @ParameterizedTest(name = "{0}") + @ArgumentsSource(SameFeatureGroupTestArgs.class) + void testHasSameContents(String testName, boolean expectSame, PuTileArgs args0, PuTileArgs args1) { + put(args0); + put(args1); sorter.sort(); var iter = features.iterator(); - assertTrue(iter.next().hasSameContents(iter.next())); + var tile0 = iter.next(); + var tile1 = iter.next(); + assertEquals(expectSame, tile0.hasSameContents(tile1)); } - @Test - void testDoesNotHaveSameFeaturesWhenGeometryChanges() { - putWithIdGroupAndSortKey( - 1, 1, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3 - ); - putWithIdGroupAndSortKey( - 1, 2, "layer", Map.of("id", 1), newPoint(1, 3), 1, true, 2, 3 - ); + @ParameterizedTest(name = "{0}") + @ArgumentsSource(SameFeatureGroupTestArgs.class) + void testGenerateContentHash(String testName, boolean expectSame, PuTileArgs args0, PuTileArgs args1) { + put(args0); + put(args1); sorter.sort(); var iter = features.iterator(); - assertFalse(iter.next().hasSameContents(iter.next())); - } - - @Test - void testDoesNotHaveSameFeaturesWhenAttrsChange() { - putWithIdGroupAndSortKey( - 1, 1, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3 - ); - putWithIdGroupAndSortKey( - 1, 2, "layer", Map.of("id", 2), newPoint(1, 2), 1, true, 2, 3 - ); - sorter.sort(); - var iter = features.iterator(); - assertFalse(iter.next().hasSameContents(iter.next())); - } - - @Test - void testDoesNotHaveSameFeaturesWhenLayerChanges() { - putWithIdGroupAndSortKey( - 1, 1, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3 - ); - putWithIdGroupAndSortKey( - 1, 2, "layer2", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3 - ); - sorter.sort(); - var iter = features.iterator(); - assertFalse(iter.next().hasSameContents(iter.next())); - } - - @Test - void testDoesNotHaveSameFeaturesWhenIdChanges() { - putWithIdGroupAndSortKey( - 1, 1, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3 - ); - putWithIdGroupAndSortKey( - 2, 2, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3 - ); - sorter.sort(); - var iter = features.iterator(); - assertFalse(iter.next().hasSameContents(iter.next())); + var tile0 = iter.next(); + var tile1 = iter.next(); + if (expectSame) { + assertEquals(tile0.generateContentHash(), tile1.generateContentHash()); + } else { + assertNotEquals(tile0.generateContentHash(), tile1.generateContentHash()); + } } @ParameterizedTest @@ -366,4 +339,46 @@ class FeatureGroupTest { assertEquals(geomType, FeatureGroup.decodeGeomType(encoded)); assertEquals(scale, FeatureGroup.decodeScale(encoded)); } + + private static class SameFeatureGroupTestArgs implements ArgumentsProvider { + + @Override + public Stream provideArguments(ExtensionContext context) throws Exception { + return Stream.of( + argsOf( + "same despite diff sort key", true, + new PuTileArgs(1, 1, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3), + new PuTileArgs(1, 2, "layer", Map.of("id", 1), newPoint(1, 2), 2, true, 2, 3) + ), + argsOf( + "diff when geometry changes", false, + new PuTileArgs(1, 1, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3), + new PuTileArgs(1, 2, "layer", Map.of("id", 1), newPoint(1, 3), 1, true, 2, 3) + ), + argsOf( + "diff when attrs changes", false, + new PuTileArgs(1, 1, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3), + new PuTileArgs(1, 2, "layer", Map.of("id", 2), newPoint(1, 2), 1, true, 2, 3) + ), + argsOf( + "diff when layer changes", false, + new PuTileArgs(1, 1, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3), + new PuTileArgs(1, 2, "layer2", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3) + ), + argsOf( + "diff when id changes", false, + new PuTileArgs(1, 1, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3), + new PuTileArgs(2, 2, "layer", Map.of("id", 1), newPoint(1, 2), 1, true, 2, 3) + ) + ); + } + + private static Arguments argsOf(String testName, boolean expectSame, PuTileArgs args0, + PuTileArgs args1) { + return Arguments.of(testName, expectSame, args0, args1); + } + } + + private static record PuTileArgs(long id, int tile, String layer, Map attrs, Geometry geom, + int sortKey, boolean hasGroup, long group, int limit) {} } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/MbtilesTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/MbtilesTest.java index 15658ce9..03d7533c 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/MbtilesTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/MbtilesTest.java @@ -5,13 +5,16 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; +import com.google.common.math.IntMath; import com.onthegomap.planetiler.TestUtils; import com.onthegomap.planetiler.geo.GeoUtils; import com.onthegomap.planetiler.geo.TileCoord; import java.io.IOException; +import java.math.RoundingMode; import java.sql.SQLException; import java.util.HashSet; import java.util.Map; +import java.util.OptionalInt; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; @@ -23,11 +26,17 @@ import org.locationtech.jts.geom.Envelope; class MbtilesTest { - private static final int BATCH = 999 / 4; + private static final int MAX_PARAMETERS_IN_PREPARED_STATEMENT = 999; + private static final int TILES_BATCH = MAX_PARAMETERS_IN_PREPARED_STATEMENT / 4; + private static final int TILES_SHALLOW_BATCH = MAX_PARAMETERS_IN_PREPARED_STATEMENT / 4; + private static final int TILES_DATA_BATCH = MAX_PARAMETERS_IN_PREPARED_STATEMENT / 2; - void testWriteTiles(int howMany, boolean deferIndexCreation, boolean optimize) - throws IOException, SQLException { - try (Mbtiles db = Mbtiles.newInMemoryDatabase()) { + + private static final + + void testWriteTiles(int howMany, boolean deferIndexCreation, boolean optimize, boolean compactDb) + throws IOException, SQLException { + try (Mbtiles db = Mbtiles.newInMemoryDatabase(compactDb)) { db.createTables(); if (!deferIndexCreation) { db.addTileIndex(); @@ -36,13 +45,15 @@ class MbtilesTest { Set expected = new TreeSet<>(); try (var writer = db.newBatchedTileWriter()) { for (int i = 0; i < howMany; i++) { + var dataHash = i - (i % 2); + var dataBase = howMany + dataHash; var entry = new Mbtiles.TileEntry(TileCoord.ofXYZ(i, i + 1, 14), new byte[]{ - (byte) howMany, - (byte) (howMany >> 8), - (byte) (howMany >> 16), - (byte) (howMany >> 24) + (byte) dataBase, + (byte) (dataBase >> 8), + (byte) (dataBase >> 16), + (byte) (dataBase >> 24) }); - writer.write(entry.tile(), entry.bytes()); + writer.write(new TileEncodingResult(entry.tile(), entry.bytes(), OptionalInt.of(dataHash))); expected.add(entry); } } @@ -62,23 +73,34 @@ class MbtilesTest { byte[] data = db.getTile(tile.x(), tile.y(), tile.z()); assertArrayEquals(expectedEntry.bytes(), data); } + assertEquals(compactDb, TestUtils.isCompactDb(db)); + if (compactDb) { + assertEquals(IntMath.divide(howMany, 2, RoundingMode.CEILING), TestUtils.getTilesDataCount(db)); + } } } @ParameterizedTest - @ValueSource(ints = {0, 1, BATCH, BATCH + 1, 2 * BATCH, 2 * BATCH + 1}) - void testWriteTilesDifferentSize(int howMany) throws IOException, SQLException { - testWriteTiles(howMany, false, false); + @ValueSource(ints = {0, 1, TILES_BATCH, TILES_BATCH + 1, 2 * TILES_BATCH, 2 * TILES_BATCH + 1}) + void testWriteTilesDifferentSizeInNonCompactMode(int howMany) throws IOException, SQLException { + testWriteTiles(howMany, false, false, false); + } + + @ParameterizedTest + @ValueSource(ints = {0, 1, TILES_DATA_BATCH, TILES_DATA_BATCH + 1, 2 * TILES_DATA_BATCH, 2 * TILES_DATA_BATCH + 1, + TILES_SHALLOW_BATCH, TILES_SHALLOW_BATCH + 1, 2 * TILES_SHALLOW_BATCH, 2 * TILES_SHALLOW_BATCH + 1}) + void testWriteTilesDifferentSizeInCompactMode(int howMany) throws IOException, SQLException { + testWriteTiles(howMany, false, false, true); } @Test void testDeferIndexCreation() throws IOException, SQLException { - testWriteTiles(10, true, false); + testWriteTiles(10, true, false, false); } @Test void testVacuumAnalyze() throws IOException, SQLException { - testWriteTiles(10, false, true); + testWriteTiles(10, false, true, false); } @Test diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/VerifyTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/VerifyTest.java index 770cfdad..a143a246 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/VerifyTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/mbtiles/VerifyTest.java @@ -11,6 +11,7 @@ import com.onthegomap.planetiler.geo.TileCoord; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.OptionalInt; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -52,7 +53,7 @@ class VerifyTest { VectorTile.encodeGeometry(point(0, 0)), Map.of() ))); - writer.write(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode())); + writer.write(new TileEncodingResult(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode()), OptionalInt.empty())); } assertValid(mbtiles); } @@ -76,7 +77,7 @@ class VerifyTest { )), Map.of() ))); - writer.write(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode())); + writer.write(new TileEncodingResult(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode()), OptionalInt.empty())); } assertInvalid(mbtiles); } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/HashingTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/HashingTest.java new file mode 100644 index 00000000..55588612 --- /dev/null +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/HashingTest.java @@ -0,0 +1,22 @@ +package com.onthegomap.planetiler.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +import org.junit.jupiter.api.Test; + +class HashingTest { + + @Test + void testFnv1a32() { + assertEquals(Hashing.fnv1a32(), Hashing.fnv1a32()); + assertEquals(Hashing.fnv1a32((byte) 1), Hashing.fnv1a32((byte) 1)); + assertEquals(Hashing.fnv1a32((byte) 1, (byte) 2), Hashing.fnv1a32((byte) 1, (byte) 2)); + assertNotEquals(Hashing.fnv1a32((byte) 1), Hashing.fnv1a32((byte) 2)); + assertNotEquals(Hashing.fnv1a32((byte) 1), Hashing.fnv1a32((byte) 1, (byte) 1)); + + assertEquals(Hashing.FNV1_32_INIT, Hashing.fnv1a32()); + assertEquals(123, Hashing.fnv1a32(123)); + } + +}