Skip to content

Commit

Permalink
add compact DB mode (--compact-db)
Browse files Browse the repository at this point in the history
which splits the tiles table into tiles_shallow and tiles_data

tiles_shallow contains the coordinates plus a reference on the data ID
tiles_data contains the data ID plus the actual tile data

this allows to deduplicate content since multiple tiles can
reference the same data

in this mode, tiles is realized as a view that joins the two tables
tiles_shallow and tiles_data
  • Loading branch information
bbilger committed May 9, 2022
1 parent a8bec7a commit 1c78b61
Show file tree
Hide file tree
Showing 13 changed files with 1,122 additions and 158 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void testStilInvalidWithOneTile() throws IOException {
VectorTile.encodeGeometry(point(0, 0)),
Map.of()
)));
writer.write(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode()));
writer.write(TileCoord.ofXYZ(0, 0, 0), gzip(tile.encode()), 1);
}
assertInvalid(mbtiles);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package com.onthegomap.planetiler.benchmarks;

import com.google.common.base.Stopwatch;
import com.onthegomap.planetiler.config.Arguments;
import com.onthegomap.planetiler.geo.TileCoord;
import com.onthegomap.planetiler.mbtiles.Mbtiles;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.LongSummaryStatistics;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BenchmarkMbtilesRead {

private static final Logger LOGGER = LoggerFactory.getLogger(BenchmarkMbtilesWriter.class);

private static final String SELECT_RANDOM_COORDS =
"select tile_column, tile_row, zoom_level from tiles order by random() limit ?";

public static void main(String[] args) throws Exception {

Arguments arguments = Arguments.fromArgs(args);
int repetitions = arguments.getInteger("bench_repetitions", "number of repetitions", 100);
int nrTileReads = arguments.getInteger("bench_nr_tile_reads", "number of tiles to read", 10000);


List<String> mbtilesPaths = new ArrayList<>();
for (int i = 0;; i++) {
String mbtilesPathStr = arguments.getString("bench_mbtiles" + i, "the mbtiles file to read from", null);
if (mbtilesPathStr == null) {
break;
}
mbtilesPaths.add(mbtilesPathStr);
}

if (mbtilesPaths.isEmpty()) {
throw new IllegalArgumentException("pass one or many paths to the same mbtiles file");
}

mbtilesPaths.stream().map(File::new).forEach(f -> {
if (!f.exists() || !f.isFile()) {
throw new IllegalArgumentException("%s does not exists".formatted(f));
}
});

List<TileCoord> randomCoordsToFetchPerRepetition = new LinkedList<>();

try (var db = Mbtiles.newReadOnlyDatabase(Path.of(mbtilesPaths.get(0)))) {
try (var statement = db.connection().prepareStatement(SELECT_RANDOM_COORDS)) {
statement.setInt(1, nrTileReads);
var rs = statement.executeQuery();
while (rs.next()) {
int x = rs.getInt("tile_column");
int y = rs.getInt("tile_row");
int z = rs.getInt("zoom_level");
randomCoordsToFetchPerRepetition.add(TileCoord.ofXYZ(x, (1 << z) - 1 - y, z));
}
}
}

Map<String, Double> avgIndividualReadPerDb = new HashMap<>();
for (String dbPathStr : mbtilesPaths) {
Path dbPath = Path.of(dbPathStr);
List<ReadResult> results = new LinkedList<>();

LOGGER.info("working on {}", dbPath);

for (int rep = 0; rep < repetitions; rep++) {
results.add(readEachTile(randomCoordsToFetchPerRepetition, dbPath));
}
var totalStats = results.stream().mapToLong(ReadResult::totalDuration).summaryStatistics();
LOGGER.info("totalReadStats: {}", totalStats);

LongSummaryStatistics individualStats = results.stream().map(ReadResult::individualReadStats)
.collect(Collector.of(LongSummaryStatistics::new, LongSummaryStatistics::combine, (left, right) -> {
left.combine(right);
return left;
}));
LOGGER.info("individualReadStats: {}", individualStats);

avgIndividualReadPerDb.put(dbPathStr, individualStats.getAverage());
}

List<String> keysSorted = avgIndividualReadPerDb.entrySet().stream()
.sorted((o1, o2) -> o1.getValue().compareTo(o2.getValue()))
.map(Map.Entry::getKey)
.toList();

LOGGER.info("diffs");
for (int i = 0; i < keysSorted.size() - 1; i++) {
for (int j = i + 1; j < keysSorted.size(); j++) {
String db0 = keysSorted.get(i);
double avg0 = avgIndividualReadPerDb.get(db0);
String db1 = keysSorted.get(j);
double avg1 = avgIndividualReadPerDb.get(db1);

double diff = avg1 * 100 / avg0 - 100;

LOGGER.info("\"{}\" vs \"{}\": avgs reads up by {}%", db0, db1, diff);
}
}
}

private static ReadResult readEachTile(List<TileCoord> coordsToFetch, Path dbPath) throws IOException {
LongSummaryStatistics individualFetchDurations = new LongSummaryStatistics();
try (var db = Mbtiles.newReadOnlyDatabase(dbPath)) {
db.getTile(0, 0, 0); // trigger prepared statement creation
var totalSw = Stopwatch.createStarted();
for (var coordToFetch : coordsToFetch) {
var sw = Stopwatch.createStarted();
if (db.getTile(coordToFetch) == null) {
throw new IllegalStateException("%s should exist in %s".formatted(coordToFetch, dbPath));
}
sw.stop();
individualFetchDurations.accept(sw.elapsed(TimeUnit.NANOSECONDS));
}
totalSw.stop();
return new ReadResult(totalSw.elapsed(TimeUnit.NANOSECONDS), individualFetchDurations);
}
}

private record ReadResult(long totalDuration, LongSummaryStatistics individualReadStats) {}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
package com.onthegomap.planetiler.benchmarks;

import com.onthegomap.planetiler.Profile;
import com.onthegomap.planetiler.VectorTile;
import com.onthegomap.planetiler.collection.FeatureGroup;
import com.onthegomap.planetiler.config.Arguments;
import com.onthegomap.planetiler.config.MbtilesMetadata;
import com.onthegomap.planetiler.config.PlanetilerConfig;
import com.onthegomap.planetiler.geo.GeometryType;
import com.onthegomap.planetiler.geo.TileCoord;
import com.onthegomap.planetiler.mbtiles.MbtilesWriter;
import com.onthegomap.planetiler.render.RenderedFeature;
import com.onthegomap.planetiler.stats.Counter;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.stats.Timers;
import com.onthegomap.planetiler.util.MemoryEstimator.HasEstimate;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Duration;
import java.time.Instant;
import java.util.LongSummaryStatistics;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.LongSupplier;
import java.util.function.Supplier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BenchmarkMbtilesWriter {

private static final Logger LOGGER = LoggerFactory.getLogger(BenchmarkMbtilesWriter.class);


public static void main(String[] args) throws IOException {

Arguments arguments = Arguments.fromArgs(args);

int tilesToWrite = arguments.getInteger("bench_tiles_to_write", "number of tiles to write", 1_000_000);
int repetitions = arguments.getInteger("bench_repetitions", "number of repetitions", 10);
// to put some context here: Australia has 8% distinct tiles
int distinctTilesInPercent = arguments.getInteger("bench_distinct_tiles", "distinct tiles in percent", 10);


MbtilesMetadata mbtilesMetadata = new MbtilesMetadata(new Profile.NullProfile());
PlanetilerConfig config = PlanetilerConfig.from(arguments);

FeatureGroup featureGroup = FeatureGroup.newInMemoryFeatureGroup(new Profile.NullProfile(), Stats.inMemory());
renderTiles(featureGroup, tilesToWrite, distinctTilesInPercent, config.minzoom(), config.maxzoom());

RepeatedMbtilesWriteStats repeatedMbtilesStats = new RepeatedMbtilesWriteStats();
for (int repetition = 0; repetition < repetitions; repetition++) {
MyStats myStats = new MyStats();
Path outputPath = getTempOutputPath();
MbtilesWriter.writeOutput(featureGroup, outputPath, mbtilesMetadata, config, myStats);
repeatedMbtilesStats.updateWithStats(myStats, outputPath);
outputPath.toFile().delete();
}

LOGGER.info("{}", repeatedMbtilesStats);
}


private static void renderTiles(FeatureGroup featureGroup, int tilesToWrite, int distinctTilesInPercent, int minzoom,
int maxzoom) throws IOException {

String lastDistinctAttributeValue = "0";
String prevLastDistinctAttributeValue = "0";

try (
var renderer = featureGroup.newRenderedFeatureEncoder();
var writer = featureGroup.writerForThread();
) {
int tilesWritten = 0;
for (int z = minzoom; z <= maxzoom; z++) {
int maxCoord = 1 << z;
for (int x = 0; x < maxCoord; x++) {
for (int y = 0; y < maxCoord; y++) {

String attributeValue;
if (tilesWritten % 100 < distinctTilesInPercent) {
attributeValue = Integer.toString(tilesWritten);
prevLastDistinctAttributeValue = lastDistinctAttributeValue;
lastDistinctAttributeValue = attributeValue;
} else if (tilesWritten % 2 == 0) { // make sure the existing de-duping mechanism won't work
attributeValue = prevLastDistinctAttributeValue;
} else {
attributeValue = lastDistinctAttributeValue;
}

var renderedFeatures = createRenderedFeature(x, y, z, attributeValue);
var sortableFeature = renderer.apply(renderedFeatures);
writer.accept(sortableFeature);
if (++tilesWritten >= tilesToWrite) {
return;
}
}
}
}
}
}

private static RenderedFeature createRenderedFeature(int x, int y, int z, String attributeValue) {
var geometry = new VectorTile.VectorGeometry(new int[0], GeometryType.POINT, 14);
var vectorTileFeature = new VectorTile.Feature("layer", 0, geometry, Map.of("k", attributeValue));
return new RenderedFeature(TileCoord.ofXYZ(x, y, z), vectorTileFeature, 0, Optional.empty());
}

private static Path getTempOutputPath() {
File f;
try {
f = File.createTempFile("planetiler", ".mbtiles");
} catch (IOException e) {
throw new IllegalStateException(e);
}
f.deleteOnExit();
return f.toPath();
}

private record RepeatedMbtilesWriteStats(
LongSummaryStatistics total,
LongSummaryStatistics read,
LongSummaryStatistics encode,
LongSummaryStatistics write,
LongSummaryStatistics memoizedTiles,
LongSummaryStatistics file
) {
RepeatedMbtilesWriteStats() {
this(
new LongSummaryStatistics(),
new LongSummaryStatistics(),
new LongSummaryStatistics(),
new LongSummaryStatistics(),
new LongSummaryStatistics(),
new LongSummaryStatistics()
);
}

void updateWithStats(MyStats myStats, Path mbtilesPath) {
total.accept(myStats.getStageDuration("mbtiles").toMillis());
memoizedTiles.accept(myStats.getLongCounter("mbtiles_memoized_tiles"));
MyTimers myTimers = myStats.timers();
read.accept(myTimers.getWorkerDuration("mbtiles_read").toMillis());
encode.accept(myTimers.getWorkerDuration("mbtiles_encode").toMillis());
write.accept(myTimers.getWorkerDuration("mbtiles_write").toMillis());
try {
file.accept(Files.size(mbtilesPath));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

private static class MyTimers extends Timers {
private final Map<String, Duration> workerDurations = new ConcurrentHashMap<>();

@Override
public void finishedWorker(String prefix, Duration elapsed) {
workerDurations.put(prefix, elapsed);
super.finishedWorker(prefix, elapsed);
}

Duration getWorkerDuration(String prefix) {
return workerDurations.get(prefix);
}
}
/*
* custom stats in order to have custom times in order to get worker durations
* and while at it, make stage durations available as well
* note: the actual problem here is that Timer.Stage/ThreadInfo are not public
*/
private static class MyStats implements Stats {

private final Map<String, Duration> stageDurations = new ConcurrentHashMap<>();
private final Map<String, Counter.MultiThreadCounter> longCounters = new ConcurrentHashMap<>();

private final MyTimers timers = new MyTimers();

Duration getStageDuration(String name) {
return stageDurations.get(name);
}

long getLongCounter(String name) {
var counter = longCounters.get(name);
if (counter == null) {
return -1;
}
return counter.get();
}

@Override
public Timers.Finishable startStage(String name) {
Instant start = Instant.now();
Timers.Finishable wrapped = Stats.super.startStage(name);
return () -> {
stageDurations.put(name, Duration.between(start, Instant.now()));
wrapped.stop();
};
}

@Override
public void close() throws Exception {}

@Override
public void emittedFeatures(int z, String layer, int numFeatures) {}

@Override
public void processedElement(String elemType, String layer) {}

@Override
public void wroteTile(int zoom, int bytes) {}

@Override
public MyTimers timers() {
return timers;
}

@Override
public Map<String, Path> monitoredFiles() {
return Map.of();
}

@Override
public void monitorInMemoryObject(String name, HasEstimate object) {}

@Override
public void gauge(String name, Supplier<Number> value) {}

@Override
public void counter(String name, Supplier<Number> supplier) {}

@Override
public void counter(String name, String label, Supplier<Map<String, LongSupplier>> values) {}

@Override
public void dataError(String errorCode) {}

@Override
public Counter.MultiThreadCounter longCounter(String name) {
var counter = Counter.newMultiThreadCounter();
longCounters.put(name, counter);
return counter;
}

@Override
public Counter.MultiThreadCounter nanoCounter(String name) {
return Counter.newMultiThreadCounter();
}

}

}
Loading

0 comments on commit 1c78b61

Please sign in to comment.