Memory-mapped file improvements (#103)

* Use large memory-mapped file segments to avoid running out of space on smaller machines
* Add `--nodemap-madvise` argument to opt into madvise(random) for memory-mapped file access
pull/138/head
Michael Barry 2022-03-09 07:22:33 -05:00 zatwierdzone przez GitHub
rodzic 1c27d833b2
commit 0a064797fb
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
12 zmienionych plików z 293 dodań i 49 usunięć

Wyświetl plik

@ -22,6 +22,7 @@ The `planetiler-core` module includes the following software:
- com.google.guava:guava (Apache license)
- org.openstreetmap.osmosis:osmosis-osm-binary (LGPL 3.0)
- com.carrotsearch:hppc (Apache license)
- com.github.jnr:jnr-ffi (Apache license)
- Adapted code:
- `DouglasPeuckerSimplifier` from [JTS](https://github.com/locationtech/jts) (EDL)
- `OsmMultipolygon` from [imposm3](https://github.com/omniscale/imposm3) (Apache license)
@ -31,6 +32,7 @@ The `planetiler-core` module includes the following software:
- `Imposm3Parsers` from [imposm3](https://github.com/omniscale/imposm3) (Apache license)
- `PbfDecoder` from [osmosis](https://github.com/openstreetmap/osmosis) (Public Domain)
- `PbfFieldDecoder` from [osmosis](https://github.com/openstreetmap/osmosis) (Public Domain)
- `NativeUtil` from [uppend](https://github.com/upserve/uppend/) (MIT License)
Additionally, the `planetiler-basemap` module is based on [OpenMapTiles](https://github.com/openmaptiles/openmaptiles):

Wyświetl plik

@ -217,6 +217,8 @@ Planetiler is made possible by these awesome open source projects:
and [tag parsing utilities](planetiler-core/src/main/java/com/onthegomap/planetiler/util/Imposm3Parsers.java)
- [HPPC](http://labs.carrotsearch.com/) for high-performance primitive Java collections
- [Osmosis](https://wiki.openstreetmap.org/wiki/Osmosis) for Java utilities to parse OpenStreetMap data
- [JNR-FFI](https://github.com/jnr/jnr-ffi) for utilities to access low-level system utilities to improve memory-mapped
file performance.
See [NOTICE.md](NOTICE.md) for a full list and license details.

Wyświetl plik

@ -25,7 +25,7 @@ public class LongLongMapBench {
Format format = Format.defaultInstance();
Path path = Path.of("./llmaptest");
FileUtils.delete(path);
LongLongMap map = LongLongMap.from(args[0], args[1], path);
LongLongMap map = LongLongMap.from(args[0], args[1], path, args.length < 5 || Boolean.parseBoolean(args[4]));
long entries = Long.parseLong(args[2]);
int readers = Integer.parseInt(args[3]);
@ -36,6 +36,7 @@ public class LongLongMapBench {
LocalCounter counter = new LocalCounter();
ProgressLoggers loggers = ProgressLoggers.create()
.addRatePercentCounter("entries", entries, () -> counter.count, true)
.addFileSize(map)
.newLine()
.addProcessStats();
AtomicReference<String> writeRate = new AtomicReference<>();

Wyświetl plik

@ -32,6 +32,11 @@
<artifactId>osmosis-osm-binary</artifactId>
<version>0.48.3</version>
</dependency>
<dependency>
<groupId>com.github.jnr</groupId>
<artifactId>jnr-ffi</artifactId>
<version>2.2.11</version>
</dependency>
<dependency>
<groupId>org.locationtech.jts</groupId>
<artifactId>jts-core</artifactId>

Wyświetl plik

@ -170,7 +170,8 @@ public class Planetiler {
),
ifSourceUsed(name, () -> {
try (
var nodeLocations = LongLongMap.from(config.nodeMapType(), config.nodeMapStorage(), nodeDbPath);
var nodeLocations =
LongLongMap.from(config.nodeMapType(), config.nodeMapStorage(), nodeDbPath, config.nodeMapMadvise());
var osmReader = new OsmReader(name, thisInputFile, nodeLocations, profile(), stats)
) {
osmReader.pass1(config);

Wyświetl plik

@ -1,12 +1,10 @@
package com.onthegomap.planetiler.collection;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.util.MmapUtil;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
@ -29,15 +27,21 @@ abstract class AppendStoreMmap implements AppendStore {
final long segmentMask;
final long segmentBytes;
private final Path path;
private final boolean madvise;
long outIdx = 0;
private volatile MappedByteBuffer[] segments;
private volatile FileChannel channel;
AppendStoreMmap(Path path) {
this(path, 1 << 20); // 1MB
static {
MmapUtil.init();
}
AppendStoreMmap(Path path, long segmentSizeBytes) {
AppendStoreMmap(Path path, boolean madvise) {
this(path, 1 << 30, madvise); // 1GB
}
AppendStoreMmap(Path path, long segmentSizeBytes, boolean madvise) {
this.madvise = madvise;
segmentBits = (int) (Math.log(segmentSizeBytes) / Math.log(2));
segmentMask = (1L << segmentBits) - 1;
segmentBytes = segmentSizeBytes;
@ -58,6 +62,7 @@ abstract class AppendStoreMmap implements AppendStore {
synchronized (this) {
if ((result = segments) == null) {
try {
boolean madviseFailed = false;
// prepare the memory mapped file: stop writing, start reading
outputStream.close();
channel = FileChannel.open(path, StandardOpenOption.READ);
@ -66,7 +71,19 @@ abstract class AppendStoreMmap implements AppendStore {
int i = 0;
for (long segmentStart = 0; segmentStart < outIdx; segmentStart += segmentBytes) {
long segmentEnd = Math.min(segmentBytes, outIdx - segmentStart);
result[i++] = channel.map(FileChannel.MapMode.READ_ONLY, segmentStart, segmentEnd);
MappedByteBuffer thisBuffer = channel.map(FileChannel.MapMode.READ_ONLY, segmentStart, segmentEnd);
if (madvise) {
try {
MmapUtil.madvise(thisBuffer, MmapUtil.Madvice.RANDOM);
} catch (IOException e) {
if (!madviseFailed) { // log once
LOGGER.info(
"madvise not available on this system - node location lookup may be slower when less free RAM is available outside the JVM");
madviseFailed = true;
}
}
}
result[i++] = thisBuffer;
}
segments = result;
} catch (IOException e) {
@ -87,27 +104,8 @@ abstract class AppendStoreMmap implements AppendStore {
}
if (segments != null) {
try {
// attempt to force-unmap the file, so we can delete it later
// https://stackoverflow.com/questions/2972986/how-to-unmap-a-file-from-memory-mapped-using-filechannel-in-java
Class<?> unsafeClass;
try {
unsafeClass = Class.forName("sun.misc.Unsafe");
} catch (Exception ex) {
unsafeClass = Class.forName("jdk.internal.misc.Unsafe");
}
Method clean = unsafeClass.getMethod("invokeCleaner", ByteBuffer.class);
clean.setAccessible(true);
Field theUnsafeField = unsafeClass.getDeclaredField("theUnsafe");
theUnsafeField.setAccessible(true);
Object theUnsafe = theUnsafeField.get(null);
for (int i = 0; i < segments.length; i++) {
var buffer = segments[i];
if (buffer != null) {
clean.invoke(theUnsafe, buffer);
segments[i] = null;
}
}
} catch (Exception e) {
MmapUtil.unmap(segments);
} catch (IOException e) {
LOGGER.info("Unable to unmap " + path + " " + e);
}
Arrays.fill(segments, null);
@ -122,12 +120,12 @@ abstract class AppendStoreMmap implements AppendStore {
static class Ints extends AppendStoreMmap implements AppendStore.Ints {
Ints(Path path) {
super(path);
Ints(Path path, boolean madvise) {
super(path, madvise);
}
Ints(Path path, long segmentSizeBytes) {
super(path, segmentSizeBytes);
Ints(Path path, long segmentSizeBytes, boolean madvise) {
super(path, segmentSizeBytes, madvise);
}
@Override
@ -158,12 +156,12 @@ abstract class AppendStoreMmap implements AppendStore {
static class Longs extends AppendStoreMmap implements AppendStore.Longs {
Longs(Path path) {
super(path);
Longs(Path path, boolean madvise) {
super(path, madvise);
}
Longs(Path path, long segmentSizeBytes) {
super(path, segmentSizeBytes);
Longs(Path path, long segmentSizeBytes, boolean madvise) {
super(path, segmentSizeBytes, madvise);
}
@Override

Wyświetl plik

@ -40,16 +40,17 @@ public interface LongLongMap extends Closeable, MemoryEstimator.HasEstimate, Dis
* @param name which implementation to use: {@code "noop"}, {@code "sortedtable"} or {@code "sparsearray"}
* @param storage how to store data: {@code "ram"} or {@code "mmap"}
* @param path where to store data (if mmap)
* @param madvise whether to use linux madvise random to improve read performance
* @return A longlong map instance
* @throws IllegalArgumentException if {@code name} or {@code storage} is not valid
*/
static LongLongMap from(String name, String storage, Path path) {
static LongLongMap from(String name, String storage, Path path, boolean madvise) {
boolean ram = isRam(storage);
return switch (name) {
case "noop" -> noop();
case "sortedtable" -> ram ? newInMemorySortedTable() : newDiskBackedSortedTable(path);
case "sparsearray" -> ram ? newInMemorySparseArray() : newDiskBackedSparseArray(path);
case "sortedtable" -> ram ? newInMemorySortedTable() : newDiskBackedSortedTable(path, madvise);
case "sparsearray" -> ram ? newInMemorySparseArray() : newDiskBackedSparseArray(path, madvise);
default -> throw new IllegalArgumentException("Unexpected value: " + name);
};
}
@ -125,11 +126,11 @@ public interface LongLongMap extends Closeable, MemoryEstimator.HasEstimate, Dis
}
/** Returns a memory-mapped longlong map that uses 12-bytes per node and binary search to find values. */
static LongLongMap newDiskBackedSortedTable(Path dir) {
static LongLongMap newDiskBackedSortedTable(Path dir, boolean madvise) {
FileUtils.createDirectory(dir);
return new SortedTable(
new AppendStore.SmallLongs(i -> new AppendStoreMmap.Ints(dir.resolve("keys-" + i))),
new AppendStoreMmap.Longs(dir.resolve("values"))
new AppendStore.SmallLongs(i -> new AppendStoreMmap.Ints(dir.resolve("keys-" + i), madvise)),
new AppendStoreMmap.Longs(dir.resolve("values"), madvise)
);
}
@ -145,8 +146,8 @@ public interface LongLongMap extends Closeable, MemoryEstimator.HasEstimate, Dis
* Returns a memory-mapped longlong map that uses 8-bytes per node and O(1) lookup but wastes space storing lots of
* 0's when the key space is fragmented.
*/
static LongLongMap newDiskBackedSparseArray(Path path) {
return new SparseArray(new AppendStoreMmap.Longs(path));
static LongLongMap newDiskBackedSparseArray(Path path, boolean madvise) {
return new SparseArray(new AppendStoreMmap.Longs(path, madvise));
}
/**

Wyświetl plik

@ -21,6 +21,7 @@ public record PlanetilerConfig(
int sortMaxWriters,
String nodeMapType,
String nodeMapStorage,
boolean nodeMapMadvise,
String httpUserAgent,
Duration httpTimeout,
int httpRetries,
@ -75,6 +76,8 @@ public record PlanetilerConfig(
arguments
.getString("nodemap_type", "type of node location map: noop, sortedtable, or sparsearray", "sortedtable"),
arguments.getString("nodemap_storage", "storage for location map: mmap or ram", "mmap"),
arguments.getBoolean("nodemap_madvise", "use linux madvise(random) to improve memory-mapped read performance",
false),
arguments.getString("http_user_agent", "User-Agent header to set when downloading files over HTTP",
"Planetiler downloader (https://github.com/onthegomap/planetiler)"),
arguments.getDuration("http_timeout", "Timeout to use when downloading files over HTTP", "30s"),

Wyświetl plik

@ -0,0 +1,98 @@
/*
MIT License
Copyright (c) 2017 Upserve, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
package com.onthegomap.planetiler.util;
import com.kenai.jffi.MemoryIO;
import java.io.IOException;
import java.nio.ByteBuffer;
import jnr.ffi.LibraryLoader;
import jnr.ffi.types.size_t;
/**
* Wrapper for native madvise function to be used via the public API
* {@link MmapUtil#madvise(ByteBuffer, MmapUtil.Madvice)}.
* <p>
* Ported from <a href=
* "https://github.com/upserve/uppend/blob/70967c6f24d7f1a3bbc18799f485d981da93f53b/src/main/java/com/upserve/uppend/blobs/NativeIO.java">upserve/uppend/NativeIO</a>.
*
* @see <a href="https://man7.org/linux/man-pages/man2/madvise.2.html">madvise(2) Linux manual page</a>
*/
class Madvise {
private static final NativeC nativeC = LibraryLoader.create(NativeC.class).load("c");
static int pageSize;
static {
try {
pageSize = nativeC.getpagesize(); // 4096 on most Linux
} catch (UnsatisfiedLinkError e) {
pageSize = -1;
}
}
private static long alignedAddress(long address) {
return address & (-pageSize);
}
private static long alignedSize(long address, int capacity) {
long end = address + capacity;
end = (end + pageSize - 1) & (-pageSize);
return end - alignedAddress(address);
}
/**
* Give a hint to the system how a mapped memory segment will be used so the OS can optimize performance.
*
* @param buffer The mapped memory segment.
* @param value The advice to use.
* @throws IOException If an error occurs or madvise not available on this system
* @see <a href="https://man7.org/linux/man-pages/man2/madvise.2.html">madvise(2) Linux manual page</a>
*/
static void madvise(ByteBuffer buffer, int value) throws IOException {
if (pageSize <= 0) {
throw new IOException("madvise failed, pagesize not available");
}
final long address = MemoryIO.getInstance().getDirectBufferAddress(buffer);
final int capacity = buffer.capacity();
long alignedAddress = alignedAddress(address);
long alignedSize = alignedSize(alignedAddress, capacity);
try {
int val = nativeC.madvise(alignedAddress, alignedSize, value);
if (val != 0) {
throw new IOException(String.format("System call madvise failed with code: %d", val));
}
} catch (UnsatisfiedLinkError error) {
throw new IOException("madvise failed", error);
}
}
/** JNR-FFI will automatically compile these to wrappers around native functions with the same signatures. */
public interface NativeC {
int madvise(@size_t long address, @size_t long size, int advice);
int getpagesize();
}
}

Wyświetl plik

@ -0,0 +1,87 @@
package com.onthegomap.planetiler.util;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utilities for working with memory-mapped files.
*/
public class MmapUtil {
private static final Logger LOGGER = LoggerFactory.getLogger(MmapUtil.class);
/** Attempts to invoke native utility and logs an error message if not available. */
public static void init() {
if (Madvise.pageSize < 0) {
try {
madvise(ByteBuffer.allocateDirect(1), Madvice.RANDOM);
} catch (IOException e) {
LOGGER.info("madvise not available on this system");
}
}
}
/**
* Give a hint to the system how a mapped memory segment will be used so the OS can optimize performance.
*
* @param buffer The mapped memory segment.
* @param value The advice to use.
* @throws IOException If an error occurs or madvise not available on this system
* @see <a href="https://man7.org/linux/man-pages/man2/madvise.2.html">madvise(2) Linux manual page</a>
*/
public static void madvise(ByteBuffer buffer, Madvice value) throws IOException {
Madvise.madvise(buffer, value.value);
}
/**
* Attempt to force-unmap a list of memory-mapped file segments so it can safely be deleted.
*
* @param segments The segments to unmap
* @throws IOException If any error occurs unmapping the segment
*/
public static void unmap(MappedByteBuffer... segments) throws IOException {
try {
// attempt to force-unmap the file, so we can delete it later
// https://stackoverflow.com/questions/2972986/how-to-unmap-a-file-from-memory-mapped-using-filechannel-in-java
Class<?> unsafeClass;
try {
unsafeClass = Class.forName("sun.misc.Unsafe");
} catch (Exception ex) {
unsafeClass = Class.forName("jdk.internal.misc.Unsafe");
}
Method clean = unsafeClass.getMethod("invokeCleaner", ByteBuffer.class);
clean.setAccessible(true);
Field theUnsafeField = unsafeClass.getDeclaredField("theUnsafe");
theUnsafeField.setAccessible(true);
Object theUnsafe = theUnsafeField.get(null);
for (MappedByteBuffer buffer : segments) {
if (buffer != null) {
clean.invoke(theUnsafe, buffer);
}
}
} catch (Exception e) {
throw new IOException("Unable to unmap", e);
}
}
/** Values from https://man7.org/linux/man-pages/man2/madvise.2.html */
public enum Madvice {
NORMAL(0),
RANDOM(1),
SEQUENTIAL(2),
WILLNEED(3),
DONTNEED(4);
final int value;
Madvice(int value) {
this.value = value;
}
}
}

Wyświetl plik

@ -80,7 +80,7 @@ public class AppendStoreTest {
@BeforeEach
public void setup(@TempDir Path path) {
this.store = new AppendStoreMmap.Ints(path.resolve("ints"), 4 << 2);
this.store = new AppendStoreMmap.Ints(path.resolve("ints"), 4 << 2, true);
}
}
@ -96,7 +96,7 @@ public class AppendStoreTest {
@BeforeEach
public void setup(@TempDir Path path) {
this.store = new AppendStoreMmap.Longs(path.resolve("longs"), 4 << 2);
this.store = new AppendStoreMmap.Longs(path.resolve("longs"), 4 << 2, true);
}
}
@ -104,7 +104,8 @@ public class AppendStoreTest {
@BeforeEach
public void setup(@TempDir Path path) {
this.store = new AppendStore.SmallLongs((i) -> new AppendStoreMmap.Ints(path.resolve("smalllongs" + i), 4 << 2));
this.store = new AppendStore.SmallLongs(
(i) -> new AppendStoreMmap.Ints(path.resolve("smalllongs" + i), 4 << 2, true));
}
}

Wyświetl plik

@ -0,0 +1,45 @@
package com.onthegomap.planetiler.util;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Locale;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
public class MmapUtilTest {
@Test
public void testMadviseAndUnmap(@TempDir Path dir) throws IOException {
String osName = System.getProperty("os.name", "").toLowerCase(Locale.ROOT);
String data = "test";
int bytes = data.getBytes(StandardCharsets.UTF_8).length;
var path = dir.resolve("file");
Files.writeString(path, data, StandardOpenOption.CREATE, StandardOpenOption.WRITE);
try (FileChannel channel = FileChannel.open(path, StandardOpenOption.READ)) {
MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, bytes);
try {
MmapUtil.madvise(buffer, MmapUtil.Madvice.RANDOM);
byte[] received = new byte[bytes];
buffer.get(received);
assertEquals(data, new String(received, StandardCharsets.UTF_8));
} catch (IOException e) {
if (osName.startsWith("mac") || osName.startsWith("linux")) {
throw e;
} else {
System.out.println("madvise failed, but the system may not support it");
}
} finally {
MmapUtil.unmap(buffer);
}
} finally {
Files.delete(path);
}
}
}