translation cleanup

pull/1/head
Mike Barry 2021-06-18 08:31:08 -04:00
rodzic 32d109f52a
commit cd9122c5ad
8 zmienionych plików z 105 dodań i 29 usunięć

Wyświetl plik

@ -1,5 +1,6 @@
package com.onthegomap.flatmap; package com.onthegomap.flatmap;
import com.graphhopper.reader.ReaderElement;
import com.graphhopper.reader.ReaderRelation; import com.graphhopper.reader.ReaderRelation;
import com.onthegomap.flatmap.geo.GeometryException; import com.onthegomap.flatmap.geo.GeometryException;
import com.onthegomap.flatmap.read.OpenStreetMapReader; import com.onthegomap.flatmap.read.OpenStreetMapReader;
@ -39,6 +40,10 @@ public interface Profile {
return false; return false;
} }
default boolean caresAboutWikidataTranslation(ReaderElement elem) {
return true;
}
class NullProfile implements Profile { class NullProfile implements Profile {
@Override @Override

Wyświetl plik

@ -11,7 +11,6 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode; import com.fasterxml.jackson.databind.node.ObjectNode;
import com.graphhopper.coll.GHLongObjectHashMap; import com.graphhopper.coll.GHLongObjectHashMap;
import com.graphhopper.reader.ReaderElement; import com.graphhopper.reader.ReaderElement;
import com.graphhopper.reader.ReaderElementUtils;
import com.graphhopper.util.StopWatch; import com.graphhopper.util.StopWatch;
import com.onthegomap.flatmap.monitoring.ProgressLoggers; import com.onthegomap.flatmap.monitoring.ProgressLoggers;
import com.onthegomap.flatmap.monitoring.Stats; import com.onthegomap.flatmap.monitoring.Stats;
@ -63,11 +62,13 @@ public class Wikidata {
private final Writer writer; private final Writer writer;
private final Client client; private final Client client;
private final int batchSize; private final int batchSize;
private final Profile profile;
public Wikidata(Writer writer, Client client, int batchSize) { public Wikidata(Writer writer, Client client, int batchSize, Profile profile) {
this.writer = writer; this.writer = writer;
this.client = client; this.client = client;
this.batchSize = batchSize; this.batchSize = batchSize;
this.profile = profile;
qidsToFetch = new ArrayList<>(batchSize); qidsToFetch = new ArrayList<>(batchSize);
} }
@ -129,7 +130,7 @@ public class Wikidata {
WikidataTranslations oldMappings = load(outfile); WikidataTranslations oldMappings = load(outfile);
try (Writer writer = Files.newBufferedWriter(outfile)) { try (Writer writer = Files.newBufferedWriter(outfile)) {
HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(30)).build(); HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(30)).build();
Wikidata fetcher = new Wikidata(writer, Client.wrap(client), 5_000); Wikidata fetcher = new Wikidata(writer, Client.wrap(client), 5_000, profile);
fetcher.loadExisting(oldMappings); fetcher.loadExisting(oldMappings);
var topology = Topology.start("wikidata", stats) var topology = Topology.start("wikidata", stats)
@ -213,7 +214,6 @@ public class Wikidata {
} }
private void filter(Supplier<ReaderElement> prev, Consumer<Long> next) { private void filter(Supplier<ReaderElement> prev, Consumer<Long> next) {
TrackUsageMapping qidTracker = new TrackUsageMapping();
ReaderElement elem; ReaderElement elem;
while ((elem = prev.get()) != null) { while ((elem = prev.get()) != null) {
switch (elem.getType()) { switch (elem.getType()) {
@ -221,12 +221,13 @@ public class Wikidata {
case ReaderElement.WAY -> ways.incrementAndGet(); case ReaderElement.WAY -> ways.incrementAndGet();
case ReaderElement.RELATION -> rels.incrementAndGet(); case ReaderElement.RELATION -> rels.incrementAndGet();
} }
if (elem.hasTag("wikidata")) { Object wikidata = elem.getTag("wikidata");
qidTracker.qid = 0; if (wikidata instanceof String wikidataString) {
// TODO send reader element through profile if (profile.caresAboutWikidataTranslation(elem)) {
qidTracker.getNameTranslations(ReaderElementUtils.getProperties(elem)); long qid = parseQid(wikidataString);
if (qidTracker.qid > 0) { if (qid > 0) {
next.accept(qidTracker.qid); next.accept(qid);
}
} }
} }
} }
@ -335,15 +336,4 @@ public class Wikidata {
return null; return null;
} }
} }
private static class TrackUsageMapping extends WikidataTranslations {
public long qid = 0;
@Override
public Map<String, String> get(long qid) {
this.qid = qid;
return null;
}
}
} }

Wyświetl plik

@ -0,0 +1,30 @@
package com.onthegomap.flatmap;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.List;
import java.util.Map;
import org.junit.jupiter.api.Test;
public class TranslationsTest {
@Test
public void testNull() {
var translations = Translations.nullProvider(List.of("en"));
assertEquals(Map.of(), translations.getTranslations(Map.of("name:en", "name")));
}
@Test
public void testDefaultProvider() {
var translations = Translations.defaultProvider(List.of("en"));
assertEquals(Map.of("name:en", "name"), translations.getTranslations(Map.of("name:en", "name", "name:de", "de")));
}
@Test
public void testTwoProviders() {
var translations = Translations.defaultProvider(List.of("en", "es", "de"))
.addTranslationProvider(elem -> Map.of("name:de", "de2", "name:en", "en2"));
assertEquals(Map.of("name:en", "en2", "name:es", "es1", "name:de", "de2"),
translations.getTranslations(Map.of("name:en", "en1", "name:es", "es1")));
}
}

Wyświetl plik

@ -28,6 +28,8 @@ import org.mockito.Mockito;
public class WikidataTest { public class WikidataTest {
Profile profile = new Profile.NullProfile();
@Test @Test
public void testWikidataTranslations() { public void testWikidataTranslations() {
var expected = Map.of("en", "en value", "es", "es value"); var expected = Map.of("en", "en value", "es", "es value");
@ -48,7 +50,7 @@ public class WikidataTest {
public List<DynamicTest> testFetchWikidata() throws IOException, InterruptedException { public List<DynamicTest> testFetchWikidata() throws IOException, InterruptedException {
StringWriter writer = new StringWriter(); StringWriter writer = new StringWriter();
Wikidata.Client client = Mockito.mock(Wikidata.Client.class, Mockito.RETURNS_SMART_NULLS); Wikidata.Client client = Mockito.mock(Wikidata.Client.class, Mockito.RETURNS_SMART_NULLS);
Wikidata fixture = new Wikidata(writer, client, 2); Wikidata fixture = new Wikidata(writer, client, 2, profile);
fixture.fetch(1L); fixture.fetch(1L);
Mockito.verifyNoInteractions(client); Mockito.verifyNoInteractions(client);
Mockito.when(client.send(Mockito.any())).thenReturn(new ByteArrayInputStream(""" Mockito.when(client.send(Mockito.any())).thenReturn(new ByteArrayInputStream("""
@ -117,7 +119,7 @@ public class WikidataTest {
dynamicTest("do not re-request on subsequent loads", () -> { dynamicTest("do not re-request on subsequent loads", () -> {
StringWriter writer2 = new StringWriter(); StringWriter writer2 = new StringWriter();
Wikidata.Client client2 = Mockito.mock(Wikidata.Client.class, Mockito.RETURNS_SMART_NULLS); Wikidata.Client client2 = Mockito.mock(Wikidata.Client.class, Mockito.RETURNS_SMART_NULLS);
Wikidata fixture2 = new Wikidata(writer2, client2, 2); Wikidata fixture2 = new Wikidata(writer2, client2, 2, profile);
fixture2.loadExisting(Wikidata.load(new BufferedReader(new StringReader(writer.toString())))); fixture2.loadExisting(Wikidata.load(new BufferedReader(new StringReader(writer.toString()))));
fixture2.fetch(1L); fixture2.fetch(1L);
fixture2.fetch(2L); fixture2.fetch(2L);

Wyświetl plik

@ -147,6 +147,10 @@ public record MultiExpression<T>(Map<T, Expression> expressions) {
} }
} }
public boolean matches(Map<String, Object> input) {
return !getMatchesWithTriggers(input).isEmpty();
}
public static record MatchWithTriggers<T>(T match, List<String> keys) {} public static record MatchWithTriggers<T>(T match, List<String> keys) {}
public List<MatchWithTriggers<T>> getMatchesWithTriggers(Map<String, Object> input) { public List<MatchWithTriggers<T>> getMatchesWithTriggers(Map<String, Object> input) {

Wyświetl plik

@ -4,6 +4,8 @@ import static com.onthegomap.flatmap.openmaptiles.Expression.FALSE;
import static com.onthegomap.flatmap.openmaptiles.Expression.TRUE; import static com.onthegomap.flatmap.openmaptiles.Expression.TRUE;
import static com.onthegomap.flatmap.openmaptiles.Expression.matchType; import static com.onthegomap.flatmap.openmaptiles.Expression.matchType;
import com.graphhopper.reader.ReaderElement;
import com.graphhopper.reader.ReaderElementUtils;
import com.graphhopper.reader.ReaderRelation; import com.graphhopper.reader.ReaderRelation;
import com.onthegomap.flatmap.Arguments; import com.onthegomap.flatmap.Arguments;
import com.onthegomap.flatmap.FeatureCollector; import com.onthegomap.flatmap.FeatureCollector;
@ -178,6 +180,17 @@ public class OpenMapTilesProfile implements Profile {
throws GeometryException; throws GeometryException;
} }
@Override
public boolean caresAboutWikidataTranslation(ReaderElement elem) {
var tags = ReaderElementUtils.getProperties(elem);
return switch (elem.getType()) {
case ReaderElement.WAY -> osmPolygonMappings.matches(tags) || osmLineMappings.matches(tags);
case ReaderElement.NODE -> osmPointMappings.matches(tags);
case ReaderElement.RELATION -> osmPolygonMappings.matches(tags);
default -> false;
};
}
@Override @Override
public String name() { public String name() {
return Layers.NAME; return Layers.NAME;

Wyświetl plik

@ -1,17 +1,25 @@
package com.onthegomap.flatmap.openmaptiles; package com.onthegomap.flatmap.openmaptiles;
import com.onthegomap.flatmap.Arguments; import com.onthegomap.flatmap.Arguments;
import com.onthegomap.flatmap.CommonParams;
import com.onthegomap.flatmap.FlatMapRunner; import com.onthegomap.flatmap.FlatMapRunner;
import com.onthegomap.flatmap.Translations; import com.onthegomap.flatmap.Translations;
import com.onthegomap.flatmap.Wikidata; import com.onthegomap.flatmap.Wikidata;
import com.onthegomap.flatmap.monitoring.Stats;
import com.onthegomap.flatmap.openmaptiles.generated.Layers; import com.onthegomap.flatmap.openmaptiles.generated.Layers;
import com.onthegomap.flatmap.read.OsmInputFile;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.List; import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class OpenMaptilesMain { public class OpenMaptilesMain {
private static final Logger LOGGER = LoggerFactory.getLogger(OpenMaptilesMain.class);
private static final String fallbackOsmFile = "north-america_us_massachusetts.pbf";
private static final Path sourcesDir = Path.of("data", "sources");
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
Path sourcesDir = Path.of("data", "sources");
FlatMapRunner runner = FlatMapRunner.create(); FlatMapRunner runner = FlatMapRunner.create();
@ -23,24 +31,35 @@ public class OpenMaptilesMain {
// sourcesDir.resolve("water-polygons-split-3857.zip")) // sourcesDir.resolve("water-polygons-split-3857.zip"))
// .addNaturalEarthSource(OpenMapTilesProfile.NATURAL_EARTH_SOURCE, // .addNaturalEarthSource(OpenMapTilesProfile.NATURAL_EARTH_SOURCE,
// sourcesDir.resolve("natural_earth_vector.sqlite.zip")) // sourcesDir.resolve("natural_earth_vector.sqlite.zip"))
.addOsmSource(OpenMapTilesProfile.OSM_SOURCE, sourcesDir.resolve("north-america_us_massachusetts.pbf")) .addOsmSource(OpenMapTilesProfile.OSM_SOURCE, sourcesDir.resolve(fallbackOsmFile))
.setOutput("mbtiles", Path.of("data", "massachusetts.mbtiles")) .setOutput("mbtiles", Path.of("data", "massachusetts.mbtiles"))
.run(); .run();
} }
private static OpenMapTilesProfile createProfileWithWikidataTranslations(FlatMapRunner runner) { private static OpenMapTilesProfile createProfileWithWikidataTranslations(FlatMapRunner runner) throws Exception {
Arguments arguments = runner.arguments(); Arguments arguments = runner.arguments();
boolean fetchWikidata = arguments.get("fetch_wikidata", "fetch wikidata translations", false); boolean fetchWikidata = arguments.get("fetch_wikidata", "fetch wikidata translations then continue", false);
boolean onlyFetchWikidata = arguments.get("only_fetch_wikidata", "fetch wikidata translations then quit", false);
boolean useWikidata = arguments.get("use_wikidata", "use wikidata translations", true); boolean useWikidata = arguments.get("use_wikidata", "use wikidata translations", true);
boolean transliterate = arguments.get("transliterate", "attempt to transliterate latin names", true); boolean transliterate = arguments.get("transliterate", "attempt to transliterate latin names", true);
Path wikidataNamesFile = arguments.file("wikidata_cache", "wikidata cache file", Path wikidataNamesFile = arguments.file("wikidata_cache", "wikidata cache file",
Path.of("data", "sources", "wikidata_names.json")); Path.of("data", "sources", "wikidata_names.json"));
// most common languages: "en,ru,ar,zh,ja,ko,fr,de,fi,pl,es,be,br,he" // most common languages: "en,ru,ar,zh,ja,ko,fr,de,fi,pl,es,be,br,he"
List<String> languages = arguments.get("name_languages", "languages to use", List<String> languages = arguments
Layers.LANGUAGES.toArray(String[]::new)); .get("name_languages", "languages to use", Layers.LANGUAGES.toArray(String[]::new));
var translations = Translations.defaultProvider(languages).setShouldTransliterate(transliterate); var translations = Translations.defaultProvider(languages).setShouldTransliterate(transliterate);
var profile = new OpenMapTilesProfile(translations, arguments, runner.stats()); var profile = new OpenMapTilesProfile(translations, arguments, runner.stats());
if (onlyFetchWikidata) {
LOGGER.info("Will fetch wikidata translations then quit...");
var osmInput = new OsmInputFile(
arguments.inputFile(OpenMapTilesProfile.OSM_SOURCE, "input file", sourcesDir.resolve(fallbackOsmFile)));
Wikidata
.fetch(osmInput, wikidataNamesFile, CommonParams.from(arguments, osmInput), profile, new Stats.InMemory());
translations.addTranslationProvider(Wikidata.load(wikidataNamesFile));
System.exit(0);
}
if (useWikidata) { if (useWikidata) {
if (fetchWikidata) { if (fetchWikidata) {
runner.addStage("wikidata", "fetch translations from wikidata query service", () -> { runner.addStage("wikidata", "fetch translations from wikidata query service", () -> {

Wyświetl plik

@ -6,8 +6,11 @@ import static com.onthegomap.flatmap.TestUtils.newPoint;
import static com.onthegomap.flatmap.TestUtils.newPolygon; import static com.onthegomap.flatmap.TestUtils.newPolygon;
import static com.onthegomap.flatmap.openmaptiles.OpenMapTilesProfile.OSM_SOURCE; import static com.onthegomap.flatmap.openmaptiles.OpenMapTilesProfile.OSM_SOURCE;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.DynamicTest.dynamicTest; import static org.junit.jupiter.api.DynamicTest.dynamicTest;
import com.graphhopper.reader.ReaderNode;
import com.onthegomap.flatmap.Arguments; import com.onthegomap.flatmap.Arguments;
import com.onthegomap.flatmap.CommonParams; import com.onthegomap.flatmap.CommonParams;
import com.onthegomap.flatmap.FeatureCollector; import com.onthegomap.flatmap.FeatureCollector;
@ -380,6 +383,16 @@ public class OpenMaptilesProfileTest {
)))); ))));
} }
@Test
public void testCaresAboutWikidata() {
var node = new ReaderNode(1, 1, 1);
node.setTag("aeroway", "gate");
assertTrue(profile.caresAboutWikidataTranslation(node));
node.setTag("aeroway", "other");
assertFalse(profile.caresAboutWikidataTranslation(node));
}
private VectorTileEncoder.Feature pointFeature(String layer, Map<String, Object> map, int group) { private VectorTileEncoder.Feature pointFeature(String layer, Map<String, Object> map, int group) {
return new VectorTileEncoder.Feature( return new VectorTileEncoder.Feature(
layer, layer,