Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support unzipping GeoPackage sources at runtime #430

Merged
merged 4 commits into from
Jan 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,12 @@ public Planetiler addShapefileSource(String projection, String name, Path defaul
* <p>
* To override the location of the {@code geopackage} file, set {@code name_path=newpath.gpkg} in the arguments and to
* override the download URL set {@code name_url=http://url/of/file.gpkg}.
* <p>
* If given a path to a ZIP file containing one or more GeoPackages, each {@code .gpkg} file within will be extracted
* to a temporary directory at runtime.
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and {@code
Expand All @@ -359,26 +364,61 @@ public Planetiler addShapefileSource(String projection, String name, Path defaul
* @see GeoPackageReader
* @see Downloader
*/
public Planetiler addGeoPackageSource(String name, Path defaultPath, String defaultUrl) {
public Planetiler addGeoPackageSource(String projection, String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "geopackage", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path,
ifSourceUsed(name,
() -> GeoPackageReader.process(name, List.of(path), featureGroup, config, profile, stats)));
ifSourceUsed(name, () -> {
List<Path> sourcePaths = List.of(path);
if (FileUtils.hasExtension(path, "zip")) {
sourcePaths = FileUtils.walkPathWithPattern(path, "*.gpkg");
}

if (sourcePaths.isEmpty()) {
throw new IllegalArgumentException("No .gpkg files found in " + path);
}

GeoPackageReader.process(projection, name, sourcePaths, tmpDir, featureGroup, config, profile, stats);
}));
}

/**
* Adds a new OGC GeoPackage source that will be processed when {@link #run()} is called.
* <p>
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
* <p>
* To override the location of the {@code geopackage} file, set {@code name_path=newpath.gpkg} in the arguments and to
* override the download URL set {@code name_url=http://url/of/file.gpkg}.
* <p>
* If given a path to a ZIP file containing one or more GeoPackages, each {@code .gpkg} file within will be extracted
* to a temporary directory at runtime.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and {@code
* name_url} argument is not set
* @return this runner instance for chaining
* @see GeoPackageReader
* @see Downloader
*/
public Planetiler addGeoPackageSource(String name, Path defaultPath, String defaultUrl) {
return addGeoPackageSource(null, name, defaultPath, defaultUrl);
}

/**
* Adds a new Natural Earth sqlite file source that will be processed when {@link #run()} is called.
* <p>
* To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to
* override the download URL set {@code name_url=http://url/of/natural_earth.zip}.
*
* @deprecated can be replaced by {@link #addGeoPackageSource(String, Path, String)}.
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the
* {@code .sqlite} file or a {@code .zip} file containing the sqlite file.
* @return this runner instance for chaining
* @see NaturalEarthReader
*/
@Deprecated(forRemoval = true)
public Planetiler addNaturalEarthSource(String name, Path defaultPath) {
return addNaturalEarthSource(name, defaultPath, null);
}
Expand All @@ -392,6 +432,8 @@ public Planetiler addNaturalEarthSource(String name, Path defaultPath) {
* To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to
* override the download URL set {@code name_url=http://url/of/natural_earth.zip}.
*
* @deprecated can be replaced by {@link #addGeoPackageSource(String, Path, String)}.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the
* {@code .sqlite} file or a {@code .zip} file containing the sqlite file.
Expand All @@ -401,6 +443,7 @@ public Planetiler addNaturalEarthSource(String name, Path defaultPath) {
* @see NaturalEarthReader
* @see Downloader
*/
@Deprecated(forRemoval = true)
public Planetiler addNaturalEarthSource(String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "sqlite db", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path, ifSourceUsed(name, () -> NaturalEarthReader
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
import com.onthegomap.planetiler.collection.FeatureGroup;
import com.onthegomap.planetiler.config.PlanetilerConfig;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.FileUtils;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
Expand All @@ -17,40 +21,80 @@
import org.geotools.geometry.jts.WKBReader;
import org.geotools.referencing.CRS;
import org.locationtech.jts.geom.Geometry;
import org.opengis.referencing.crs.CoordinateReferenceSystem;
import org.opengis.referencing.FactoryException;
import org.opengis.referencing.operation.MathTransform;

/**
* Utility that reads {@link SourceFeature SourceFeatures} from the vector geometries contained in a GeoPackage file.
*/
public class GeoPackageReader extends SimpleReader<SimpleFeature> {

private Path extractedPath = null;
private final GeoPackage geoPackage;
private final MathTransform coordinateTransform;

GeoPackageReader(String sourceName, Path input) {
GeoPackageReader(String sourceProjection, String sourceName, Path input, Path tmpDir) {
super(sourceName);

geoPackage = GeoPackageManager.open(false, input.toFile());
if (sourceProjection != null) {
try {
var sourceCRS = CRS.decode(sourceProjection);
var latLonCRS = CRS.decode("EPSG:4326");
coordinateTransform = CRS.findMathTransform(sourceCRS, latLonCRS);
} catch (FactoryException e) {
throw new FileFormatException("Bad reference system", e);
}
} else {
coordinateTransform = null;
}

try {
geoPackage = openGeopackage(input, tmpDir);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

/**
* Create a {@link GeoPackageManager} for the given path. If {@code input} refers to a file within a ZIP archive,
* first extract it to a temporary location.
*/
private GeoPackage openGeopackage(Path input, Path tmpDir) throws IOException {
var inputUri = input.toUri();
if ("jar".equals(inputUri.getScheme())) {
extractedPath = Files.createTempFile(tmpDir, "", ".gpkg");
try (var inputStream = inputUri.toURL().openStream()) {
FileUtils.safeCopy(inputStream, extractedPath);
}
return GeoPackageManager.open(false, extractedPath.toFile());
}

return GeoPackageManager.open(false, input.toFile());
}


/**
* Renders map features for all elements from an OGC GeoPackage based on the mapping logic defined in {@code
* profile}.
*
* @param sourceName string ID for this reader to use in logs and stats
* @param sourcePaths paths to the {@code .gpkg} files on disk
* @param writer consumer for rendered features
* @param config user-defined parameters controlling number of threads and log interval
* @param profile logic that defines what map features to emit for each source feature
* @param stats to keep track of counters and timings
* @param sourceProjection code for the coordinate reference system of the input data, to be parsed by
* {@link CRS#decode(String)}
* @param sourceName string ID for this reader to use in logs and stats
* @param sourcePaths paths to the {@code .gpkg} files on disk
* @param tmpDir path to temporary directory for extracting data from zip files
* @param writer consumer for rendered features
* @param config user-defined parameters controlling number of threads and log interval
* @param profile logic that defines what map features to emit for each source feature
* @param stats to keep track of counters and timings
* @throws IllegalArgumentException if a problem occurs reading the input file
*/
public static void process(String sourceName, List<Path> sourcePaths, FeatureGroup writer, PlanetilerConfig config,
public static void process(String sourceProjection, String sourceName, List<Path> sourcePaths, Path tmpDir,
FeatureGroup writer, PlanetilerConfig config,
Profile profile, Stats stats) {
SourceFeatureProcessor.processFiles(
sourceName,
sourcePaths,
path -> new GeoPackageReader(sourceName, path),
path -> new GeoPackageReader(sourceProjection, sourceName, path, tmpDir),
writer, config, profile, stats
);
}
Expand All @@ -68,15 +112,19 @@ public long getFeatureCount() {

@Override
public void readFeatures(Consumer<SimpleFeature> next) throws Exception {
CoordinateReferenceSystem latLonCRS = CRS.decode("EPSG:4326");
var latLonCRS = CRS.decode("EPSG:4326");
long id = 0;

for (var featureName : geoPackage.getFeatureTables()) {
FeatureDao features = geoPackage.getFeatureDao(featureName);

MathTransform transform = CRS.findMathTransform(
CRS.decode("EPSG:" + features.getSrsId()),
latLonCRS);
// GeoPackage spec allows this to be 0 (undefined geographic CRS) or
// -1 (undefined cartesian CRS). Both cases will throw when trying to
// call CRS.decode
long srsId = features.getSrsId();

MathTransform transform = (coordinateTransform != null) ? coordinateTransform :
CRS.findMathTransform(CRS.decode("EPSG:" + srsId), latLonCRS);

for (var feature : features.queryForAll()) {
GeoPackageGeometryData geometryData = feature.getGeometry();
Expand All @@ -103,7 +151,11 @@ public void readFeatures(Consumer<SimpleFeature> next) throws Exception {
}

@Override
public void close() {
public void close() throws IOException {
geoPackage.close();

if (extractedPath != null) {
Files.deleteIfExists(extractedPath);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,31 @@ public static void unzipResource(String resource, Path dest) {
}
}

/**
* Copies bytes from {@code input} to {@code destPath}, ensuring that the size is limited to a reasonable value.
*
* @throws UncheckedIOException if an IO exception occurs
*/
public static void safeCopy(InputStream inputStream, Path destPath) {
try (var outputStream = Files.newOutputStream(destPath, StandardOpenOption.CREATE, StandardOpenOption.WRITE)) {
int totalSize = 0;

int nBytes;
byte[] buffer = new byte[2048];
while ((nBytes = inputStream.read(buffer)) > 0) {
outputStream.write(buffer, 0, nBytes);
totalSize += nBytes;

if (totalSize > ZIP_THRESHOLD_SIZE) {
throw new IOException("The uncompressed data size " + FORMAT.storage(totalSize) +
"B is too much for the application resource capacity");
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

/**
* Unzips a zip file from an input stream to {@code destDir}.
*
Expand Down Expand Up @@ -304,7 +329,7 @@ public static void unzip(InputStream input, Path destDir) {
}

if (totalEntryArchive > ZIP_THRESHOLD_ENTRIES) {
throw new IOException("Too much entries in this archive " + FORMAT.integer(totalEntryArchive) +
throw new IOException("Too many entries in this archive " + FORMAT.integer(totalEntryArchive) +
", can lead to inodes exhaustion of the system");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1668,7 +1668,7 @@ public void processFeature(SourceFeature source, FeatureCollector features) {
.addOsmSource("osm", tempOsm)
.addNaturalEarthSource("ne", TestUtils.pathToResource("natural_earth_vector.sqlite"))
.addShapefileSource("shapefile", TestUtils.pathToResource("shapefile.zip"))
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg"), null)
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg.zip"), null)
.setOutput("mbtiles", mbtiles)
.run();

Expand Down Expand Up @@ -1749,9 +1749,11 @@ public void processFeature(SourceFeature source, FeatureCollector features) {
@ValueSource(strings = {
"",
"--write-threads=2 --process-threads=2 --feature-read-threads=2 --threads=4",
"--input-file=geopackage.gpkg"
})
void testPlanetilerRunnerGeoPackage(String args) throws Exception {
Path mbtiles = tempDir.resolve("output.mbtiles");
String inputFile = Arguments.fromArgs(args).getString("input-file", "", "geopackage.gpkg.zip");

Planetiler.create(Arguments.fromArgs((args + " --tmpdir=" + tempDir.resolve("data")).split("\\s+")))
.setProfile(new Profile.NullProfile() {
Expand All @@ -1762,7 +1764,7 @@ public void processFeature(SourceFeature source, FeatureCollector features) {
.setAttr("name", source.getString("name"));
}
})
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg"), null)
.addGeoPackageSource("geopackage", TestUtils.pathToResource(inputFile), null)
.setOutput("mbtiles", mbtiles)
.run();

Expand Down Expand Up @@ -1790,7 +1792,7 @@ private void runWithProfile(Path tempDir, Profile profile, boolean force) throws
.addOsmSource("osm", TestUtils.pathToResource("monaco-latest.osm.pbf"))
.addNaturalEarthSource("ne", TestUtils.pathToResource("natural_earth_vector.sqlite"))
.addShapefileSource("shapefile", TestUtils.pathToResource("shapefile.zip"))
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg"), null)
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg.zip"), null)
.setOutput("mbtiles", tempDir.resolve("output.mbtiles"))
.run();
}
Expand Down
Loading