kopia lustrzana https://github.com/onthegomap/planetiler
translation cleanup
rodzic
32d109f52a
commit
cd9122c5ad
|
@ -1,5 +1,6 @@
|
||||||
package com.onthegomap.flatmap;
|
package com.onthegomap.flatmap;
|
||||||
|
|
||||||
|
import com.graphhopper.reader.ReaderElement;
|
||||||
import com.graphhopper.reader.ReaderRelation;
|
import com.graphhopper.reader.ReaderRelation;
|
||||||
import com.onthegomap.flatmap.geo.GeometryException;
|
import com.onthegomap.flatmap.geo.GeometryException;
|
||||||
import com.onthegomap.flatmap.read.OpenStreetMapReader;
|
import com.onthegomap.flatmap.read.OpenStreetMapReader;
|
||||||
|
@ -39,6 +40,10 @@ public interface Profile {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
default boolean caresAboutWikidataTranslation(ReaderElement elem) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
class NullProfile implements Profile {
|
class NullProfile implements Profile {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -11,7 +11,6 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
import com.graphhopper.coll.GHLongObjectHashMap;
|
import com.graphhopper.coll.GHLongObjectHashMap;
|
||||||
import com.graphhopper.reader.ReaderElement;
|
import com.graphhopper.reader.ReaderElement;
|
||||||
import com.graphhopper.reader.ReaderElementUtils;
|
|
||||||
import com.graphhopper.util.StopWatch;
|
import com.graphhopper.util.StopWatch;
|
||||||
import com.onthegomap.flatmap.monitoring.ProgressLoggers;
|
import com.onthegomap.flatmap.monitoring.ProgressLoggers;
|
||||||
import com.onthegomap.flatmap.monitoring.Stats;
|
import com.onthegomap.flatmap.monitoring.Stats;
|
||||||
|
@ -63,11 +62,13 @@ public class Wikidata {
|
||||||
private final Writer writer;
|
private final Writer writer;
|
||||||
private final Client client;
|
private final Client client;
|
||||||
private final int batchSize;
|
private final int batchSize;
|
||||||
|
private final Profile profile;
|
||||||
|
|
||||||
public Wikidata(Writer writer, Client client, int batchSize) {
|
public Wikidata(Writer writer, Client client, int batchSize, Profile profile) {
|
||||||
this.writer = writer;
|
this.writer = writer;
|
||||||
this.client = client;
|
this.client = client;
|
||||||
this.batchSize = batchSize;
|
this.batchSize = batchSize;
|
||||||
|
this.profile = profile;
|
||||||
qidsToFetch = new ArrayList<>(batchSize);
|
qidsToFetch = new ArrayList<>(batchSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,7 +130,7 @@ public class Wikidata {
|
||||||
WikidataTranslations oldMappings = load(outfile);
|
WikidataTranslations oldMappings = load(outfile);
|
||||||
try (Writer writer = Files.newBufferedWriter(outfile)) {
|
try (Writer writer = Files.newBufferedWriter(outfile)) {
|
||||||
HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(30)).build();
|
HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(30)).build();
|
||||||
Wikidata fetcher = new Wikidata(writer, Client.wrap(client), 5_000);
|
Wikidata fetcher = new Wikidata(writer, Client.wrap(client), 5_000, profile);
|
||||||
fetcher.loadExisting(oldMappings);
|
fetcher.loadExisting(oldMappings);
|
||||||
|
|
||||||
var topology = Topology.start("wikidata", stats)
|
var topology = Topology.start("wikidata", stats)
|
||||||
|
@ -213,7 +214,6 @@ public class Wikidata {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void filter(Supplier<ReaderElement> prev, Consumer<Long> next) {
|
private void filter(Supplier<ReaderElement> prev, Consumer<Long> next) {
|
||||||
TrackUsageMapping qidTracker = new TrackUsageMapping();
|
|
||||||
ReaderElement elem;
|
ReaderElement elem;
|
||||||
while ((elem = prev.get()) != null) {
|
while ((elem = prev.get()) != null) {
|
||||||
switch (elem.getType()) {
|
switch (elem.getType()) {
|
||||||
|
@ -221,12 +221,13 @@ public class Wikidata {
|
||||||
case ReaderElement.WAY -> ways.incrementAndGet();
|
case ReaderElement.WAY -> ways.incrementAndGet();
|
||||||
case ReaderElement.RELATION -> rels.incrementAndGet();
|
case ReaderElement.RELATION -> rels.incrementAndGet();
|
||||||
}
|
}
|
||||||
if (elem.hasTag("wikidata")) {
|
Object wikidata = elem.getTag("wikidata");
|
||||||
qidTracker.qid = 0;
|
if (wikidata instanceof String wikidataString) {
|
||||||
// TODO send reader element through profile
|
if (profile.caresAboutWikidataTranslation(elem)) {
|
||||||
qidTracker.getNameTranslations(ReaderElementUtils.getProperties(elem));
|
long qid = parseQid(wikidataString);
|
||||||
if (qidTracker.qid > 0) {
|
if (qid > 0) {
|
||||||
next.accept(qidTracker.qid);
|
next.accept(qid);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -335,15 +336,4 @@ public class Wikidata {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class TrackUsageMapping extends WikidataTranslations {
|
|
||||||
|
|
||||||
public long qid = 0;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Map<String, String> get(long qid) {
|
|
||||||
this.qid = qid;
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
package com.onthegomap.flatmap;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
public class TranslationsTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNull() {
|
||||||
|
var translations = Translations.nullProvider(List.of("en"));
|
||||||
|
assertEquals(Map.of(), translations.getTranslations(Map.of("name:en", "name")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDefaultProvider() {
|
||||||
|
var translations = Translations.defaultProvider(List.of("en"));
|
||||||
|
assertEquals(Map.of("name:en", "name"), translations.getTranslations(Map.of("name:en", "name", "name:de", "de")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTwoProviders() {
|
||||||
|
var translations = Translations.defaultProvider(List.of("en", "es", "de"))
|
||||||
|
.addTranslationProvider(elem -> Map.of("name:de", "de2", "name:en", "en2"));
|
||||||
|
assertEquals(Map.of("name:en", "en2", "name:es", "es1", "name:de", "de2"),
|
||||||
|
translations.getTranslations(Map.of("name:en", "en1", "name:es", "es1")));
|
||||||
|
}
|
||||||
|
}
|
|
@ -28,6 +28,8 @@ import org.mockito.Mockito;
|
||||||
|
|
||||||
public class WikidataTest {
|
public class WikidataTest {
|
||||||
|
|
||||||
|
Profile profile = new Profile.NullProfile();
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWikidataTranslations() {
|
public void testWikidataTranslations() {
|
||||||
var expected = Map.of("en", "en value", "es", "es value");
|
var expected = Map.of("en", "en value", "es", "es value");
|
||||||
|
@ -48,7 +50,7 @@ public class WikidataTest {
|
||||||
public List<DynamicTest> testFetchWikidata() throws IOException, InterruptedException {
|
public List<DynamicTest> testFetchWikidata() throws IOException, InterruptedException {
|
||||||
StringWriter writer = new StringWriter();
|
StringWriter writer = new StringWriter();
|
||||||
Wikidata.Client client = Mockito.mock(Wikidata.Client.class, Mockito.RETURNS_SMART_NULLS);
|
Wikidata.Client client = Mockito.mock(Wikidata.Client.class, Mockito.RETURNS_SMART_NULLS);
|
||||||
Wikidata fixture = new Wikidata(writer, client, 2);
|
Wikidata fixture = new Wikidata(writer, client, 2, profile);
|
||||||
fixture.fetch(1L);
|
fixture.fetch(1L);
|
||||||
Mockito.verifyNoInteractions(client);
|
Mockito.verifyNoInteractions(client);
|
||||||
Mockito.when(client.send(Mockito.any())).thenReturn(new ByteArrayInputStream("""
|
Mockito.when(client.send(Mockito.any())).thenReturn(new ByteArrayInputStream("""
|
||||||
|
@ -117,7 +119,7 @@ public class WikidataTest {
|
||||||
dynamicTest("do not re-request on subsequent loads", () -> {
|
dynamicTest("do not re-request on subsequent loads", () -> {
|
||||||
StringWriter writer2 = new StringWriter();
|
StringWriter writer2 = new StringWriter();
|
||||||
Wikidata.Client client2 = Mockito.mock(Wikidata.Client.class, Mockito.RETURNS_SMART_NULLS);
|
Wikidata.Client client2 = Mockito.mock(Wikidata.Client.class, Mockito.RETURNS_SMART_NULLS);
|
||||||
Wikidata fixture2 = new Wikidata(writer2, client2, 2);
|
Wikidata fixture2 = new Wikidata(writer2, client2, 2, profile);
|
||||||
fixture2.loadExisting(Wikidata.load(new BufferedReader(new StringReader(writer.toString()))));
|
fixture2.loadExisting(Wikidata.load(new BufferedReader(new StringReader(writer.toString()))));
|
||||||
fixture2.fetch(1L);
|
fixture2.fetch(1L);
|
||||||
fixture2.fetch(2L);
|
fixture2.fetch(2L);
|
||||||
|
|
|
@ -147,6 +147,10 @@ public record MultiExpression<T>(Map<T, Expression> expressions) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean matches(Map<String, Object> input) {
|
||||||
|
return !getMatchesWithTriggers(input).isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
public static record MatchWithTriggers<T>(T match, List<String> keys) {}
|
public static record MatchWithTriggers<T>(T match, List<String> keys) {}
|
||||||
|
|
||||||
public List<MatchWithTriggers<T>> getMatchesWithTriggers(Map<String, Object> input) {
|
public List<MatchWithTriggers<T>> getMatchesWithTriggers(Map<String, Object> input) {
|
||||||
|
|
|
@ -4,6 +4,8 @@ import static com.onthegomap.flatmap.openmaptiles.Expression.FALSE;
|
||||||
import static com.onthegomap.flatmap.openmaptiles.Expression.TRUE;
|
import static com.onthegomap.flatmap.openmaptiles.Expression.TRUE;
|
||||||
import static com.onthegomap.flatmap.openmaptiles.Expression.matchType;
|
import static com.onthegomap.flatmap.openmaptiles.Expression.matchType;
|
||||||
|
|
||||||
|
import com.graphhopper.reader.ReaderElement;
|
||||||
|
import com.graphhopper.reader.ReaderElementUtils;
|
||||||
import com.graphhopper.reader.ReaderRelation;
|
import com.graphhopper.reader.ReaderRelation;
|
||||||
import com.onthegomap.flatmap.Arguments;
|
import com.onthegomap.flatmap.Arguments;
|
||||||
import com.onthegomap.flatmap.FeatureCollector;
|
import com.onthegomap.flatmap.FeatureCollector;
|
||||||
|
@ -178,6 +180,17 @@ public class OpenMapTilesProfile implements Profile {
|
||||||
throws GeometryException;
|
throws GeometryException;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean caresAboutWikidataTranslation(ReaderElement elem) {
|
||||||
|
var tags = ReaderElementUtils.getProperties(elem);
|
||||||
|
return switch (elem.getType()) {
|
||||||
|
case ReaderElement.WAY -> osmPolygonMappings.matches(tags) || osmLineMappings.matches(tags);
|
||||||
|
case ReaderElement.NODE -> osmPointMappings.matches(tags);
|
||||||
|
case ReaderElement.RELATION -> osmPolygonMappings.matches(tags);
|
||||||
|
default -> false;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String name() {
|
public String name() {
|
||||||
return Layers.NAME;
|
return Layers.NAME;
|
||||||
|
|
|
@ -1,17 +1,25 @@
|
||||||
package com.onthegomap.flatmap.openmaptiles;
|
package com.onthegomap.flatmap.openmaptiles;
|
||||||
|
|
||||||
import com.onthegomap.flatmap.Arguments;
|
import com.onthegomap.flatmap.Arguments;
|
||||||
|
import com.onthegomap.flatmap.CommonParams;
|
||||||
import com.onthegomap.flatmap.FlatMapRunner;
|
import com.onthegomap.flatmap.FlatMapRunner;
|
||||||
import com.onthegomap.flatmap.Translations;
|
import com.onthegomap.flatmap.Translations;
|
||||||
import com.onthegomap.flatmap.Wikidata;
|
import com.onthegomap.flatmap.Wikidata;
|
||||||
|
import com.onthegomap.flatmap.monitoring.Stats;
|
||||||
import com.onthegomap.flatmap.openmaptiles.generated.Layers;
|
import com.onthegomap.flatmap.openmaptiles.generated.Layers;
|
||||||
|
import com.onthegomap.flatmap.read.OsmInputFile;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
public class OpenMaptilesMain {
|
public class OpenMaptilesMain {
|
||||||
|
|
||||||
|
private static final Logger LOGGER = LoggerFactory.getLogger(OpenMaptilesMain.class);
|
||||||
|
private static final String fallbackOsmFile = "north-america_us_massachusetts.pbf";
|
||||||
|
private static final Path sourcesDir = Path.of("data", "sources");
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
Path sourcesDir = Path.of("data", "sources");
|
|
||||||
|
|
||||||
FlatMapRunner runner = FlatMapRunner.create();
|
FlatMapRunner runner = FlatMapRunner.create();
|
||||||
|
|
||||||
|
@ -23,24 +31,35 @@ public class OpenMaptilesMain {
|
||||||
// sourcesDir.resolve("water-polygons-split-3857.zip"))
|
// sourcesDir.resolve("water-polygons-split-3857.zip"))
|
||||||
// .addNaturalEarthSource(OpenMapTilesProfile.NATURAL_EARTH_SOURCE,
|
// .addNaturalEarthSource(OpenMapTilesProfile.NATURAL_EARTH_SOURCE,
|
||||||
// sourcesDir.resolve("natural_earth_vector.sqlite.zip"))
|
// sourcesDir.resolve("natural_earth_vector.sqlite.zip"))
|
||||||
.addOsmSource(OpenMapTilesProfile.OSM_SOURCE, sourcesDir.resolve("north-america_us_massachusetts.pbf"))
|
.addOsmSource(OpenMapTilesProfile.OSM_SOURCE, sourcesDir.resolve(fallbackOsmFile))
|
||||||
.setOutput("mbtiles", Path.of("data", "massachusetts.mbtiles"))
|
.setOutput("mbtiles", Path.of("data", "massachusetts.mbtiles"))
|
||||||
.run();
|
.run();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static OpenMapTilesProfile createProfileWithWikidataTranslations(FlatMapRunner runner) {
|
private static OpenMapTilesProfile createProfileWithWikidataTranslations(FlatMapRunner runner) throws Exception {
|
||||||
Arguments arguments = runner.arguments();
|
Arguments arguments = runner.arguments();
|
||||||
boolean fetchWikidata = arguments.get("fetch_wikidata", "fetch wikidata translations", false);
|
boolean fetchWikidata = arguments.get("fetch_wikidata", "fetch wikidata translations then continue", false);
|
||||||
|
boolean onlyFetchWikidata = arguments.get("only_fetch_wikidata", "fetch wikidata translations then quit", false);
|
||||||
boolean useWikidata = arguments.get("use_wikidata", "use wikidata translations", true);
|
boolean useWikidata = arguments.get("use_wikidata", "use wikidata translations", true);
|
||||||
boolean transliterate = arguments.get("transliterate", "attempt to transliterate latin names", true);
|
boolean transliterate = arguments.get("transliterate", "attempt to transliterate latin names", true);
|
||||||
Path wikidataNamesFile = arguments.file("wikidata_cache", "wikidata cache file",
|
Path wikidataNamesFile = arguments.file("wikidata_cache", "wikidata cache file",
|
||||||
Path.of("data", "sources", "wikidata_names.json"));
|
Path.of("data", "sources", "wikidata_names.json"));
|
||||||
// most common languages: "en,ru,ar,zh,ja,ko,fr,de,fi,pl,es,be,br,he"
|
// most common languages: "en,ru,ar,zh,ja,ko,fr,de,fi,pl,es,be,br,he"
|
||||||
List<String> languages = arguments.get("name_languages", "languages to use",
|
List<String> languages = arguments
|
||||||
Layers.LANGUAGES.toArray(String[]::new));
|
.get("name_languages", "languages to use", Layers.LANGUAGES.toArray(String[]::new));
|
||||||
var translations = Translations.defaultProvider(languages).setShouldTransliterate(transliterate);
|
var translations = Translations.defaultProvider(languages).setShouldTransliterate(transliterate);
|
||||||
var profile = new OpenMapTilesProfile(translations, arguments, runner.stats());
|
var profile = new OpenMapTilesProfile(translations, arguments, runner.stats());
|
||||||
|
|
||||||
|
if (onlyFetchWikidata) {
|
||||||
|
LOGGER.info("Will fetch wikidata translations then quit...");
|
||||||
|
var osmInput = new OsmInputFile(
|
||||||
|
arguments.inputFile(OpenMapTilesProfile.OSM_SOURCE, "input file", sourcesDir.resolve(fallbackOsmFile)));
|
||||||
|
Wikidata
|
||||||
|
.fetch(osmInput, wikidataNamesFile, CommonParams.from(arguments, osmInput), profile, new Stats.InMemory());
|
||||||
|
translations.addTranslationProvider(Wikidata.load(wikidataNamesFile));
|
||||||
|
System.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
if (useWikidata) {
|
if (useWikidata) {
|
||||||
if (fetchWikidata) {
|
if (fetchWikidata) {
|
||||||
runner.addStage("wikidata", "fetch translations from wikidata query service", () -> {
|
runner.addStage("wikidata", "fetch translations from wikidata query service", () -> {
|
||||||
|
|
|
@ -6,8 +6,11 @@ import static com.onthegomap.flatmap.TestUtils.newPoint;
|
||||||
import static com.onthegomap.flatmap.TestUtils.newPolygon;
|
import static com.onthegomap.flatmap.TestUtils.newPolygon;
|
||||||
import static com.onthegomap.flatmap.openmaptiles.OpenMapTilesProfile.OSM_SOURCE;
|
import static com.onthegomap.flatmap.openmaptiles.OpenMapTilesProfile.OSM_SOURCE;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
import static org.junit.jupiter.api.DynamicTest.dynamicTest;
|
import static org.junit.jupiter.api.DynamicTest.dynamicTest;
|
||||||
|
|
||||||
|
import com.graphhopper.reader.ReaderNode;
|
||||||
import com.onthegomap.flatmap.Arguments;
|
import com.onthegomap.flatmap.Arguments;
|
||||||
import com.onthegomap.flatmap.CommonParams;
|
import com.onthegomap.flatmap.CommonParams;
|
||||||
import com.onthegomap.flatmap.FeatureCollector;
|
import com.onthegomap.flatmap.FeatureCollector;
|
||||||
|
@ -380,6 +383,16 @@ public class OpenMaptilesProfileTest {
|
||||||
))));
|
))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCaresAboutWikidata() {
|
||||||
|
var node = new ReaderNode(1, 1, 1);
|
||||||
|
node.setTag("aeroway", "gate");
|
||||||
|
assertTrue(profile.caresAboutWikidataTranslation(node));
|
||||||
|
|
||||||
|
node.setTag("aeroway", "other");
|
||||||
|
assertFalse(profile.caresAboutWikidataTranslation(node));
|
||||||
|
}
|
||||||
|
|
||||||
private VectorTileEncoder.Feature pointFeature(String layer, Map<String, Object> map, int group) {
|
private VectorTileEncoder.Feature pointFeature(String layer, Map<String, Object> map, int group) {
|
||||||
return new VectorTileEncoder.Feature(
|
return new VectorTileEncoder.Feature(
|
||||||
layer,
|
layer,
|
||||||
|
|
Ładowanie…
Reference in New Issue