Support unzipping GeoPackage sources at runtime (#430)

pull/459/head
Erik Price 2023-01-25 17:56:30 -08:00 zatwierdzone przez GitHub
rodzic ae1317c341
commit a0f8c67c78
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
8 zmienionych plików z 199 dodań i 52 usunięć

Wyświetl plik

@ -350,6 +350,48 @@ public class Planetiler {
* <p>
* To override the location of the {@code geopackage} file, set {@code name_path=newpath.gpkg} in the arguments and to
* override the download URL set {@code name_url=http://url/of/file.gpkg}.
* <p>
* If given a path to a ZIP file containing one or more GeoPackages, each {@code .gpkg} file within will be extracted
* to a temporary directory at runtime.
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and {@code
* name_url} argument is not set
* @return this runner instance for chaining
* @see GeoPackageReader
* @see Downloader
*/
public Planetiler addGeoPackageSource(String projection, String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "geopackage", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path,
ifSourceUsed(name, () -> {
List<Path> sourcePaths = List.of(path);
if (FileUtils.hasExtension(path, "zip")) {
sourcePaths = FileUtils.walkPathWithPattern(path, "*.gpkg");
}
if (sourcePaths.isEmpty()) {
throw new IllegalArgumentException("No .gpkg files found in " + path);
}
GeoPackageReader.process(projection, name, sourcePaths, tmpDir, featureGroup, config, profile, stats);
}));
}
/**
* Adds a new OGC GeoPackage source that will be processed when {@link #run()} is called.
* <p>
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
* <p>
* To override the location of the {@code geopackage} file, set {@code name_path=newpath.gpkg} in the arguments and to
* override the download URL set {@code name_url=http://url/of/file.gpkg}.
* <p>
* If given a path to a ZIP file containing one or more GeoPackages, each {@code .gpkg} file within will be extracted
* to a temporary directory at runtime.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments
@ -360,25 +402,23 @@ public class Planetiler {
* @see Downloader
*/
public Planetiler addGeoPackageSource(String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "geopackage", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path,
ifSourceUsed(name,
() -> GeoPackageReader.process(name, List.of(path), featureGroup, config, profile, stats)));
return addGeoPackageSource(null, name, defaultPath, defaultUrl);
}
/**
* Adds a new Natural Earth sqlite file source that will be processed when {@link #run()} is called.
* <p>
* To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to
* override the download URL set {@code name_url=http://url/of/natural_earth.zip}.
*
* @deprecated can be replaced by {@link #addGeoPackageSource(String, Path, String)}.
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the
* {@code .sqlite} file or a {@code .zip} file containing the sqlite file.
* @return this runner instance for chaining
* @see NaturalEarthReader
*/
@Deprecated(forRemoval = true)
public Planetiler addNaturalEarthSource(String name, Path defaultPath) {
return addNaturalEarthSource(name, defaultPath, null);
}
@ -392,6 +432,8 @@ public class Planetiler {
* To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to
* override the download URL set {@code name_url=http://url/of/natural_earth.zip}.
*
* @deprecated can be replaced by {@link #addGeoPackageSource(String, Path, String)}.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the
* {@code .sqlite} file or a {@code .zip} file containing the sqlite file.
@ -401,6 +443,7 @@ public class Planetiler {
* @see NaturalEarthReader
* @see Downloader
*/
@Deprecated(forRemoval = true)
public Planetiler addNaturalEarthSource(String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "sqlite db", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path, ifSourceUsed(name, () -> NaturalEarthReader

Wyświetl plik

@ -4,6 +4,10 @@ import com.onthegomap.planetiler.Profile;
import com.onthegomap.planetiler.collection.FeatureGroup;
import com.onthegomap.planetiler.config.PlanetilerConfig;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.FileUtils;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
@ -17,7 +21,7 @@ import org.geotools.geometry.jts.JTS;
import org.geotools.geometry.jts.WKBReader;
import org.geotools.referencing.CRS;
import org.locationtech.jts.geom.Geometry;
import org.opengis.referencing.crs.CoordinateReferenceSystem;
import org.opengis.referencing.FactoryException;
import org.opengis.referencing.operation.MathTransform;
/**
@ -25,32 +29,72 @@ import org.opengis.referencing.operation.MathTransform;
*/
public class GeoPackageReader extends SimpleReader<SimpleFeature> {
private Path extractedPath = null;
private final GeoPackage geoPackage;
private final MathTransform coordinateTransform;
GeoPackageReader(String sourceName, Path input) {
GeoPackageReader(String sourceProjection, String sourceName, Path input, Path tmpDir) {
super(sourceName);
geoPackage = GeoPackageManager.open(false, input.toFile());
if (sourceProjection != null) {
try {
var sourceCRS = CRS.decode(sourceProjection);
var latLonCRS = CRS.decode("EPSG:4326");
coordinateTransform = CRS.findMathTransform(sourceCRS, latLonCRS);
} catch (FactoryException e) {
throw new FileFormatException("Bad reference system", e);
}
} else {
coordinateTransform = null;
}
try {
geoPackage = openGeopackage(input, tmpDir);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
/**
* Create a {@link GeoPackageManager} for the given path. If {@code input} refers to a file within a ZIP archive,
* first extract it to a temporary location.
*/
private GeoPackage openGeopackage(Path input, Path tmpDir) throws IOException {
var inputUri = input.toUri();
if ("jar".equals(inputUri.getScheme())) {
extractedPath = Files.createTempFile(tmpDir, "", ".gpkg");
try (var inputStream = inputUri.toURL().openStream()) {
FileUtils.safeCopy(inputStream, extractedPath);
}
return GeoPackageManager.open(false, extractedPath.toFile());
}
return GeoPackageManager.open(false, input.toFile());
}
/**
* Renders map features for all elements from an OGC GeoPackage based on the mapping logic defined in {@code
* profile}.
*
* @param sourceName string ID for this reader to use in logs and stats
* @param sourcePaths paths to the {@code .gpkg} files on disk
* @param writer consumer for rendered features
* @param config user-defined parameters controlling number of threads and log interval
* @param profile logic that defines what map features to emit for each source feature
* @param stats to keep track of counters and timings
* @param sourceProjection code for the coordinate reference system of the input data, to be parsed by
* {@link CRS#decode(String)}
* @param sourceName string ID for this reader to use in logs and stats
* @param sourcePaths paths to the {@code .gpkg} files on disk
* @param tmpDir path to temporary directory for extracting data from zip files
* @param writer consumer for rendered features
* @param config user-defined parameters controlling number of threads and log interval
* @param profile logic that defines what map features to emit for each source feature
* @param stats to keep track of counters and timings
* @throws IllegalArgumentException if a problem occurs reading the input file
*/
public static void process(String sourceName, List<Path> sourcePaths, FeatureGroup writer, PlanetilerConfig config,
public static void process(String sourceProjection, String sourceName, List<Path> sourcePaths, Path tmpDir,
FeatureGroup writer, PlanetilerConfig config,
Profile profile, Stats stats) {
SourceFeatureProcessor.processFiles(
sourceName,
sourcePaths,
path -> new GeoPackageReader(sourceName, path),
path -> new GeoPackageReader(sourceProjection, sourceName, path, tmpDir),
writer, config, profile, stats
);
}
@ -68,15 +112,19 @@ public class GeoPackageReader extends SimpleReader<SimpleFeature> {
@Override
public void readFeatures(Consumer<SimpleFeature> next) throws Exception {
CoordinateReferenceSystem latLonCRS = CRS.decode("EPSG:4326");
var latLonCRS = CRS.decode("EPSG:4326");
long id = 0;
for (var featureName : geoPackage.getFeatureTables()) {
FeatureDao features = geoPackage.getFeatureDao(featureName);
MathTransform transform = CRS.findMathTransform(
CRS.decode("EPSG:" + features.getSrsId()),
latLonCRS);
// GeoPackage spec allows this to be 0 (undefined geographic CRS) or
// -1 (undefined cartesian CRS). Both cases will throw when trying to
// call CRS.decode
long srsId = features.getSrsId();
MathTransform transform = (coordinateTransform != null) ? coordinateTransform :
CRS.findMathTransform(CRS.decode("EPSG:" + srsId), latLonCRS);
for (var feature : features.queryForAll()) {
GeoPackageGeometryData geometryData = feature.getGeometry();
@ -103,7 +151,11 @@ public class GeoPackageReader extends SimpleReader<SimpleFeature> {
}
@Override
public void close() {
public void close() throws IOException {
geoPackage.close();
if (extractedPath != null) {
Files.deleteIfExists(extractedPath);
}
}
}

Wyświetl plik

@ -253,6 +253,31 @@ public class FileUtils {
}
}
/**
* Copies bytes from {@code input} to {@code destPath}, ensuring that the size is limited to a reasonable value.
*
* @throws UncheckedIOException if an IO exception occurs
*/
public static void safeCopy(InputStream inputStream, Path destPath) {
try (var outputStream = Files.newOutputStream(destPath, StandardOpenOption.CREATE, StandardOpenOption.WRITE)) {
int totalSize = 0;
int nBytes;
byte[] buffer = new byte[2048];
while ((nBytes = inputStream.read(buffer)) > 0) {
outputStream.write(buffer, 0, nBytes);
totalSize += nBytes;
if (totalSize > ZIP_THRESHOLD_SIZE) {
throw new IOException("The uncompressed data size " + FORMAT.storage(totalSize) +
"B is too much for the application resource capacity");
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
/**
* Unzips a zip file from an input stream to {@code destDir}.
*
@ -304,7 +329,7 @@ public class FileUtils {
}
if (totalEntryArchive > ZIP_THRESHOLD_ENTRIES) {
throw new IOException("Too much entries in this archive " + FORMAT.integer(totalEntryArchive) +
throw new IOException("Too many entries in this archive " + FORMAT.integer(totalEntryArchive) +
", can lead to inodes exhaustion of the system");
}
}

Wyświetl plik

@ -1668,7 +1668,7 @@ class PlanetilerTests {
.addOsmSource("osm", tempOsm)
.addNaturalEarthSource("ne", TestUtils.pathToResource("natural_earth_vector.sqlite"))
.addShapefileSource("shapefile", TestUtils.pathToResource("shapefile.zip"))
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg"), null)
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg.zip"), null)
.setOutput("mbtiles", mbtiles)
.run();
@ -1749,9 +1749,11 @@ class PlanetilerTests {
@ValueSource(strings = {
"",
"--write-threads=2 --process-threads=2 --feature-read-threads=2 --threads=4",
"--input-file=geopackage.gpkg"
})
void testPlanetilerRunnerGeoPackage(String args) throws Exception {
Path mbtiles = tempDir.resolve("output.mbtiles");
String inputFile = Arguments.fromArgs(args).getString("input-file", "", "geopackage.gpkg.zip");
Planetiler.create(Arguments.fromArgs((args + " --tmpdir=" + tempDir.resolve("data")).split("\\s+")))
.setProfile(new Profile.NullProfile() {
@ -1762,7 +1764,7 @@ class PlanetilerTests {
.setAttr("name", source.getString("name"));
}
})
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg"), null)
.addGeoPackageSource("geopackage", TestUtils.pathToResource(inputFile), null)
.setOutput("mbtiles", mbtiles)
.run();
@ -1790,7 +1792,7 @@ class PlanetilerTests {
.addOsmSource("osm", TestUtils.pathToResource("monaco-latest.osm.pbf"))
.addNaturalEarthSource("ne", TestUtils.pathToResource("natural_earth_vector.sqlite"))
.addShapefileSource("shapefile", TestUtils.pathToResource("shapefile.zip"))
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg"), null)
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg.zip"), null)
.setOutput("mbtiles", tempDir.resolve("output.mbtiles"))
.run();
}

Wyświetl plik

@ -7,46 +7,61 @@ import com.onthegomap.planetiler.TestUtils;
import com.onthegomap.planetiler.collection.IterableOnce;
import com.onthegomap.planetiler.geo.GeoUtils;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.worker.WorkerPipeline;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.api.io.TempDir;
import org.locationtech.jts.geom.Geometry;
class GeoPackageReaderTest {
@TempDir
static Path tmpDir;
@Test
@Timeout(30)
void testReadGeoPackage() {
Path path = TestUtils.pathToResource("geopackage.gpkg");
void testReadGeoPackage() throws IOException {
Path pathOutsideZip = TestUtils.pathToResource("geopackage.gpkg");
Path zipPath = TestUtils.pathToResource("geopackage.gpkg.zip");
Path pathInZip = FileUtils.walkPathWithPattern(zipPath, "*.gpkg").get(0);
try (
var reader = new GeoPackageReader("test", path)
) {
for (int i = 1; i <= 2; i++) {
assertEquals(86, reader.getFeatureCount());
List<Geometry> points = new ArrayList<>();
List<String> names = new ArrayList<>();
WorkerPipeline.start("test", Stats.inMemory())
.readFromTiny("files", List.of(Path.of("dummy-path")))
.addWorker("geopackage", 1, (IterableOnce<Path> p, Consumer<SimpleFeature> next) -> reader.readFeatures(next))
.addBuffer("reader_queue", 100, 1)
.sinkToConsumer("counter", 1, elem -> {
assertTrue(elem.getTag("name") instanceof String);
assertEquals("test", elem.getSource());
assertEquals("stations", elem.getSourceLayer());
points.add(elem.latLonGeometry());
names.add(elem.getTag("name").toString());
}).await();
assertEquals(86, points.size());
assertTrue(names.contains("Van Dörn Street"));
var gc = GeoUtils.JTS_FACTORY.createGeometryCollection(points.toArray(new Geometry[0]));
var centroid = gc.getCentroid();
assertEquals(-77.0297995, centroid.getX(), 5, "iter " + i);
assertEquals(38.9119684, centroid.getY(), 5, "iter " + i);
var projections = new String[]{null, "EPSG:4326"};
for (var path : List.of(pathOutsideZip, pathInZip)) {
for (var proj : projections) {
try (
var reader = new GeoPackageReader(proj, "test", path, tmpDir)
) {
for (int iter = 0; iter < 2; iter++) {
String id = "path=" + path + " proj=" + proj + " iter=" + iter;
assertEquals(86, reader.getFeatureCount(), id);
List<Geometry> points = new ArrayList<>();
List<String> names = new ArrayList<>();
WorkerPipeline.start("test", Stats.inMemory())
.readFromTiny("files", List.of(Path.of("dummy-path")))
.addWorker("geopackage", 1,
(IterableOnce<Path> p, Consumer<SimpleFeature> next) -> reader.readFeatures(next))
.addBuffer("reader_queue", 100, 1)
.sinkToConsumer("counter", 1, elem -> {
assertTrue(elem.getTag("name") instanceof String);
assertEquals("test", elem.getSource());
assertEquals("stations", elem.getSourceLayer());
points.add(elem.latLonGeometry());
names.add(elem.getTag("name").toString());
}).await();
assertEquals(86, points.size(), id);
assertTrue(names.contains("Van Dörn Street"), id);
var gc = GeoUtils.JTS_FACTORY.createGeometryCollection(points.toArray(new Geometry[0]));
var centroid = gc.getCentroid();
assertEquals(-77.0297995, centroid.getX(), 5, id);
assertEquals(38.9119684, centroid.getY(), 5, id);
}
}
}
}
}

Wyświetl plik

@ -5,7 +5,9 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.onthegomap.planetiler.TestUtils;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
@ -90,6 +92,14 @@ class FileUtilsTest {
);
}
@Test
void testSafeCopy() throws IOException {
var dest = tmpDir.resolve("unzipped");
String input = "a1".repeat(1200);
FileUtils.safeCopy(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)), dest);
assertEquals(input, Files.readString(dest));
}
@Test
void testWalkPathWithPatternDirectory() throws IOException {
Path parent = tmpDir.resolve(Path.of("a", "b", "c"));

Plik binarny nie jest wyświetlany.

Wyświetl plik

@ -11,7 +11,7 @@ sources:
url: geofabrik:rhode-island
gpkg:
type: geopackage
url: https://example.com/geopackage.gpkg
url: https://example.com/geopackage.gpkg.zip
tag_mappings:
bridge: boolean # input=bridge, output=bridge, type=boolean
layer: long