planetiler/planetiler-core/src/main/java/com/onthegomap/planetiler/util/AwsOsm.java

118 wiersze
4.9 KiB
Java

package com.onthegomap.planetiler.util;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.xml.XmlMapper;
import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper;
import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty;
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
import com.onthegomap.planetiler.config.PlanetilerConfig;
import java.io.IOException;
import java.io.InputStream;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import javax.annotation.concurrent.Immutable;
/**
* A utility to download {@code planet.osm.pbf} files from public S3 sources such as
* <a href="https://registry.opendata.aws/osm/">AWS Open Data Registry</a> and
* <a href="https://overturemaps.org">Overture Maps Foundation</a>.
*/
public class AwsOsm {
private static final int MAX_PAGES = 100;
public static final AwsOsm OSM_PDS = new AwsOsm("https://osm-pds.s3.amazonaws.com/");
public static final AwsOsm OVERTURE = new AwsOsm("https://overturemaps-us-west-2.s3.amazonaws.com/");
private static final ObjectMapper mapper = new XmlMapper().registerModule(new Jdk8Module());
private final String bucketIndexUrl;
private final CopyOnWriteArrayList<ContentXml> entries = new CopyOnWriteArrayList<>();
protected AwsOsm(String bucketIndexUrl) {
this.bucketIndexUrl = bucketIndexUrl;
}
/**
* Fetches the S3 bucket index and searches for a {@code .osm.pbf} resource to download where snapshot date matches
* {@code searchQuery}, or the latest snapshot if {@code searchQuery == "latest"}.
* <p>
* The index is only fetched once and cached after that.
*
* @param searchQuery the snapshot to search for
* @param config planetiler config with user-agent and timeout to use when downloading
* @return the URL of a {@code .osm.pbf} file with name or snapshot ID matching {@code searchQuery}
* @throws IllegalArgumentException if no matches, or more than one match is found.
*/
public String getDownloadUrl(String searchQuery, PlanetilerConfig config) {
var indexXml = getAndCacheIndex(config);
return searchIndexForDownloadUrl(searchQuery, indexXml);
}
private synchronized List<ContentXml> getAndCacheIndex(PlanetilerConfig config) {
if (entries.isEmpty()) {
List<ContentXml> result = new ArrayList<>();
String nextPageParam = "";
int pageNum = 0;
do {
try (InputStream inputStream = Downloader.openStream(bucketIndexUrl + "?list-type=2" + nextPageParam, config)) {
if (pageNum++ > MAX_PAGES) {
throw new IllegalArgumentException("Too many entries in " + bucketIndexUrl + " to page through");
}
var page = parseIndexXml(inputStream);
result.addAll(page.contents());
nextPageParam = (!page.truncated() || page.nextToken() == null) ? null :
"&continuation-token=" + URLEncoder.encode(page.nextToken(), StandardCharsets.UTF_8);
} catch (IOException e) {
throw new IllegalStateException(e);
}
} while (nextPageParam != null);
entries.addAll(result);
}
return entries;
}
protected IndexXml parseIndexXml(InputStream indexXmlContent) throws IOException {
return mapper.readValue(indexXmlContent, IndexXml.class);
}
protected String searchIndexForDownloadUrl(String searchQuery, List<ContentXml> index) {
if ("latest".equalsIgnoreCase(searchQuery)) {
return index.stream()
.filter(c -> c.key.endsWith(".osm.pbf"))
.map(c -> bucketIndexUrl + c.key)
.max(Comparator.naturalOrder())
.orElseThrow(() -> new IllegalArgumentException("Unable to find latest AWS osm download URL"));
} else {
List<String> results = index.stream()
.filter(c -> c.key.endsWith("/planet-" + searchQuery + ".osm.pbf"))
.map(c -> bucketIndexUrl + c.key)
.toList();
if (results.isEmpty()) {
throw new IllegalArgumentException("Unable to find AWS osm download URL for " + searchQuery);
} else if (results.size() > 1) {
throw new IllegalArgumentException("Found multiple AWS osm download URLs for " + searchQuery + ": " + results);
}
return results.getFirst();
}
}
@JsonIgnoreProperties(ignoreUnknown = true)
@Immutable
record IndexXml(
@JacksonXmlProperty(localName = "Contents")
@JacksonXmlElementWrapper(useWrapping = false) List<ContentXml> contents,
@JacksonXmlProperty(localName = "NextContinuationToken")
@JacksonXmlElementWrapper(useWrapping = false) String nextToken,
@JacksonXmlProperty(localName = "IsTruncated")
@JacksonXmlElementWrapper(useWrapping = false) boolean truncated
) {}
@JsonIgnoreProperties(ignoreUnknown = true)
record ContentXml(
@JacksonXmlProperty(localName = "Key") String key
) {}
}