Skip to content

Commit

Permalink
Use snappy compression for --compress-temp flag (onthegomap#905)
Browse files Browse the repository at this point in the history
  • Loading branch information
msbarry authored May 30, 2024
1 parent 2cc1e2b commit 39956e6
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import com.onthegomap.planetiler.util.BinPack;
import com.onthegomap.planetiler.util.ByteBufferUtil;
import com.onthegomap.planetiler.util.CloseableConsumer;
import com.onthegomap.planetiler.util.FastGzipOutputStream;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.worker.WorkerPipeline;
import java.io.BufferedInputStream;
Expand Down Expand Up @@ -42,10 +41,11 @@
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
import java.util.zip.GZIPInputStream;
import javax.annotation.concurrent.NotThreadSafe;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xerial.snappy.SnappyInputStream;
import org.xerial.snappy.SnappyOutputStream;

/**
* A utility that writes {@link SortableFeature SortableFeatures} to disk and uses merge sort to efficiently sort much
Expand All @@ -69,7 +69,7 @@ class ExternalMergeSort implements FeatureSort {
private final AtomicLong features = new AtomicLong(0);
private final List<Chunk> chunks = new CopyOnWriteArrayList<>();
private final AtomicInteger chunkNum = new AtomicInteger(0);
private final boolean gzip;
private final boolean compress;
private final PlanetilerConfig config;
private final int readerLimit;
private final int writerLimit;
Expand All @@ -87,7 +87,7 @@ class ExternalMergeSort implements FeatureSort {
MAX_CHUNK_SIZE,
ProcessInfo.getMaxMemoryBytes() / 3
),
config.gzipTempStorage(),
config.compressTempStorage(),
config.mmapTempStorage(),
true,
true,
Expand All @@ -96,19 +96,19 @@ class ExternalMergeSort implements FeatureSort {
);
}

ExternalMergeSort(Path dir, int workers, int chunkSizeLimit, boolean gzip, boolean mmap, boolean parallelSort,
ExternalMergeSort(Path dir, int workers, int chunkSizeLimit, boolean compress, boolean mmap, boolean parallelSort,
boolean madvise, PlanetilerConfig config, Stats stats) {
this.config = config;
this.madvise = madvise;
this.dir = dir;
this.stats = stats;
this.parallelSort = parallelSort;
this.chunkSizeLimit = chunkSizeLimit;
if (gzip && mmap) {
LOGGER.warn("--gzip-temp option not supported with --mmap-temp, falling back to --gzip-temp=false");
gzip = false;
if (compress && mmap) {
LOGGER.warn("--compress-temp option not supported with --mmap-temp, falling back to --mmap-temp=false");
mmap = false;
}
this.gzip = gzip;
this.compress = compress;
this.mmapIO = mmap;
long memLimit = ProcessInfo.getMaxMemoryBytes() / 3;
if (chunkSizeLimit > memLimit) {
Expand Down Expand Up @@ -293,12 +293,12 @@ private static class ReaderBuffered extends BaseReader {
private final DataInputStream input;
private int read = 0;

ReaderBuffered(Path path, int count, boolean gzip) {
ReaderBuffered(Path path, int count, boolean compress) {
this.count = count;
try {
InputStream inputStream = new BufferedInputStream(Files.newInputStream(path));
if (gzip) {
inputStream = new GZIPInputStream(inputStream);
if (compress) {
inputStream = new SnappyInputStream(inputStream);
}
input = new DataInputStream(inputStream);
next = readNextFeature();
Expand Down Expand Up @@ -339,11 +339,11 @@ private static class WriterBuffered implements Writer {

private final DataOutputStream out;

WriterBuffered(Path path, boolean gzip) {
WriterBuffered(Path path, boolean compress) {
try {
OutputStream rawOutputStream = new BufferedOutputStream(Files.newOutputStream(path));
if (gzip) {
rawOutputStream = new FastGzipOutputStream(rawOutputStream);
if (compress) {
rawOutputStream = new SnappyOutputStream(rawOutputStream);
}
this.out = new DataOutputStream(rawOutputStream);
} catch (IOException e) {
Expand Down Expand Up @@ -533,11 +533,11 @@ private SortableChunk readAllAndMergeIn(Collection<Chunk> others) {
}

private Writer newWriter(Path path) {
return mmapIO ? new WriterMmap(path) : new WriterBuffered(path, gzip);
return mmapIO ? new WriterMmap(path) : new WriterBuffered(path, compress);
}

private Reader newReader() {
return mmapIO ? new ReaderMmap(path, itemCount) : new ReaderBuffered(path, itemCount, gzip);
return mmapIO ? new ReaderMmap(path, itemCount) : new ReaderBuffered(path, itemCount, compress);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public record PlanetilerConfig(
int maxzoomForRendering,
boolean force,
boolean append,
boolean gzipTempStorage,
boolean compressTempStorage,
boolean mmapTempStorage,
int sortMaxReaders,
int sortMaxWriters,
Expand Down Expand Up @@ -146,7 +146,8 @@ public static PlanetilerConfig from(Arguments arguments) {
"append to the output file - only supported by " + Stream.of(TileArchiveConfig.Format.values())
.filter(TileArchiveConfig.Format::supportsAppend).map(TileArchiveConfig.Format::id).toList(),
false),
arguments.getBoolean("gzip_temp", "gzip temporary feature storage (uses more CPU, but less disk space)", false),
arguments.getBoolean("compress_temp|gzip_temp",
"compress temporary feature storage (uses more CPU, but less disk space)", false),
arguments.getBoolean("mmap_temp", "use memory-mapped IO for temp feature files", true),
arguments.getInteger("sort_max_readers", "maximum number of concurrent read threads to use when sorting chunks",
6),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2082,6 +2082,7 @@ private static TileCompression extractTileCompression(String args) {
"",
"--write-threads=2 --process-threads=2 --feature-read-threads=2 --threads=4",
"--free-osm-after-read",
"--compress-temp",
"--osm-parse-node-bounds",
"--output-format=pmtiles",
"--output-format=csv",
Expand Down
2 changes: 1 addition & 1 deletion planetiler-custommap/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ cat planetiler-custommap/planetiler.schema.json | jq -r '.properties.args.proper
- `maxzoom` - Maximum tile zoom level to emit
- `render_maxzoom` - Maximum rendering zoom level up to
- `force` - Overwriting output file and ignore warnings
- `gzip_temp` - Gzip temporary feature storage (uses more CPU, but less disk space)
- `compress_temp` - Gzip temporary feature storage (uses more CPU, but less disk space)
- `mmap_temp` - Use memory-mapped IO for temp feature files
- `sort_max_readers` - Maximum number of concurrent read threads to use when sorting chunks
- `sort_max_writers` - Maximum number of concurrent write threads to use when sorting chunks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ private Root(Arguments arguments, Map<String, Object> schemaArgs) {
argumentValues.put("maxzoom", config.maxzoom());
argumentValues.put("render_maxzoom", config.maxzoomForRendering());
argumentValues.put("force", config.force());
argumentValues.put("gzip_temp", config.gzipTempStorage());
argumentValues.put("compress_temp", config.compressTempStorage());
argumentValues.put("mmap_temp", config.mmapTempStorage());
argumentValues.put("sort_max_readers", config.sortMaxReaders());
argumentValues.put("sort_max_writers", config.sortMaxWriters());
Expand Down

0 comments on commit 39956e6

Please sign in to comment.