From 90fc8f62cfe7c08faf1975ecdbf5bc0caef26760 Mon Sep 17 00:00:00 2001 From: Mike Barry Date: Sun, 26 May 2024 06:50:25 -0400 Subject: [PATCH] avoid deserializing entire geometry just to determine type --- .../planetiler/geo/GeometryType.java | 37 +++++++++++++ .../reader/parquet/GeometryReader.java | 49 +++++++++++++---- .../reader/parquet/ParquetFeature.java | 40 ++++++++------ .../planetiler/geo/GeometryTypeTest.java | 52 ++++++++++++++----- 4 files changed, 139 insertions(+), 39 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/geo/GeometryType.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/geo/GeometryType.java index faa4c87980..76df3f40ae 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/geo/GeometryType.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/geo/GeometryType.java @@ -2,6 +2,10 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.onthegomap.planetiler.expression.Expression; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Locale; +import java.util.regex.Pattern; import org.locationtech.jts.geom.Geometry; import org.locationtech.jts.geom.Lineal; import org.locationtech.jts.geom.Polygonal; @@ -41,6 +45,39 @@ public static GeometryType valueOf(VectorTileProto.Tile.GeomType geomType) { }; } + /** Returns the type of a WKB-encoded geometry without needing to deserialize the whole thing. */ + public static GeometryType fromWKB(byte[] wkb) { + var bb = ByteBuffer.wrap(wkb); + byte byteOrder = bb.get(); + int geomType = bb.order(byteOrder == 1 ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN).getInt(); + return switch (geomType) { + case 1, 4 -> GeometryType.POINT; + case 2, 5 -> GeometryType.LINE; + case 3, 6 -> GeometryType.POLYGON; + default -> GeometryType.UNKNOWN; + }; + } + + private static final Pattern TYPE_PATTERN = + Pattern.compile("^\\s*(multi)?(point|line|polygon)", Pattern.CASE_INSENSITIVE); + + /** Returns the type of a WKT-encoded geometry without needing to deserialize the whole thing. */ + public static GeometryType fromWKT(String wkt) { + var matcher = TYPE_PATTERN.matcher(wkt); + if (matcher.find()) { + String group = matcher.group(2); + if (group != null) { + return switch (group.toLowerCase(Locale.ROOT)) { + case "point" -> GeometryType.POINT; + case "line" -> GeometryType.LINE; + case "polygon" -> GeometryType.POLYGON; + default -> GeometryType.UNKNOWN; + }; + } + } + return GeometryType.UNKNOWN; + } + public static GeometryType valueOf(byte val) { return valueOf(VectorTileProto.Tile.GeomType.forNumber(val)); } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/GeometryReader.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/GeometryReader.java index 31436f2aa4..6477a13c46 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/GeometryReader.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/GeometryReader.java @@ -2,11 +2,13 @@ import com.onthegomap.planetiler.geo.GeoUtils; import com.onthegomap.planetiler.geo.GeometryException; +import com.onthegomap.planetiler.geo.GeometryType; import com.onthegomap.planetiler.reader.WithTags; import com.onthegomap.planetiler.util.FunctionThatThrows; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Function; import org.locationtech.jts.geom.Geometry; /** @@ -14,28 +16,45 @@ */ class GeometryReader { - private final Map> converters = new HashMap<>(); + private final Map converters = new HashMap<>(); final String geometryColumn; + private record FormatHandler( + FunctionThatThrows parse, + Function sniffType + ) {} + + private static > FormatHandler arrowHandler(GeometryType type, + FunctionThatThrows parser) { + return new FormatHandler(obj -> obj instanceof List list ? parser.apply((L) list) : null, any -> type); + } + GeometryReader(GeoParquetMetadata geoparquet) { this.geometryColumn = geoparquet.primaryColumn(); for (var entry : geoparquet.columns().entrySet()) { String column = entry.getKey(); GeoParquetMetadata.ColumnMetadata columnInfo = entry.getValue(); - FunctionThatThrows converter = switch (columnInfo.encoding()) { - case "WKB" -> obj -> obj instanceof byte[] bytes ? GeoUtils.wkbReader().read(bytes) : null; - case "WKT" -> obj -> obj instanceof String string ? GeoUtils.wktReader().read(string) : null; + FormatHandler converter = switch (columnInfo.encoding()) { + case "WKB" -> new FormatHandler( + obj -> obj instanceof byte[] bytes ? GeoUtils.wkbReader().read(bytes) : null, + obj -> obj instanceof byte[] bytes ? GeometryType.fromWKB(bytes) : GeometryType.UNKNOWN + ); + case "WKT" -> new FormatHandler( + obj -> obj instanceof String string ? GeoUtils.wktReader().read(string) : null, + obj -> obj instanceof String string ? GeometryType.fromWKT(string) : GeometryType.UNKNOWN + ); case "multipolygon", "geoarrow.multipolygon" -> - obj -> obj instanceof List list ? GeoArrow.multipolygon((List>>) list) : null; + arrowHandler(GeometryType.POLYGON, GeoArrow::multipolygon); case "polygon", "geoarrow.polygon" -> - obj -> obj instanceof List list ? GeoArrow.polygon((List>) list) : null; + arrowHandler(GeometryType.POLYGON, GeoArrow::polygon); case "multilinestring", "geoarrow.multilinestring" -> - obj -> obj instanceof List list ? GeoArrow.multilinestring((List>) list) : null; + arrowHandler(GeometryType.LINE, GeoArrow::multilinestring); case "linestring", "geoarrow.linestring" -> - obj -> obj instanceof List list ? GeoArrow.linestring((List) list) : null; + arrowHandler(GeometryType.LINE, GeoArrow::linestring); case "multipoint", "geoarrow.multipoint" -> - obj -> obj instanceof List list ? GeoArrow.multipoint((List) list) : null; - case "point", "geoarrow.point" -> GeoArrow::point; + arrowHandler(GeometryType.POINT, GeoArrow::multipoint); + case "point", "geoarrow.point" -> + arrowHandler(GeometryType.POINT, GeoArrow::point); default -> throw new IllegalArgumentException("Unhandled type: " + columnInfo.encoding()); }; converters.put(column, converter); @@ -58,9 +77,17 @@ Geometry parseGeometry(Object value, String column) throws GeometryException { throw new GeometryException("no_converter", "No geometry converter for " + column); } try { - return converter.apply(value); + return converter.parse.apply(value); } catch (Exception e) { throw new GeometryException("error_reading", "Error reading " + column, e); } } + + GeometryType sniffGeometryType(Object value, String column) { + var converter = converters.get(column); + if (value != null && converter != null) { + return converter.sniffType.apply(value); + } + return GeometryType.UNKNOWN; + } } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetFeature.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetFeature.java index 895fb9a662..6f8681a9a8 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetFeature.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/parquet/ParquetFeature.java @@ -2,6 +2,7 @@ import com.onthegomap.planetiler.geo.GeoUtils; import com.onthegomap.planetiler.geo.GeometryException; +import com.onthegomap.planetiler.geo.GeometryType; import com.onthegomap.planetiler.reader.SourceFeature; import com.onthegomap.planetiler.reader.Struct; import java.util.List; @@ -21,6 +22,7 @@ public class ParquetFeature extends SourceFeature { private Geometry latLon; private Geometry world; private Struct struct = null; + private GeometryType geometryType = null; ParquetFeature(String source, String sourceLayer, long id, GeometryReader geometryParser, Map tags) { @@ -40,31 +42,39 @@ public Geometry worldGeometry() throws GeometryException { (world = GeoUtils.sortPolygonsByAreaDescending(GeoUtils.latLonToWorldCoords(latLonGeometry()))); } + private GeometryType geometryType() { + if (geometryType != null) { + return geometryType; + } + geometryType = geometryParser.sniffGeometryType(rawGeometry, geometryParser.geometryColumn); + if (geometryType == GeometryType.UNKNOWN) { + try { + geometryType = switch (latLonGeometry()) { + case Puntal ignored -> GeometryType.POINT; + case Lineal ignored -> GeometryType.LINE; + case Polygonal ignored -> GeometryType.POLYGON; + default -> GeometryType.UNKNOWN; + }; + } catch (GeometryException e) { + throw new IllegalStateException(e); + } + } + return geometryType; + } + @Override public boolean isPoint() { - try { - return latLonGeometry() instanceof Puntal; - } catch (GeometryException e) { - throw new IllegalStateException(e); - } + return geometryType() == GeometryType.POINT; } @Override public boolean canBePolygon() { - try { - return latLonGeometry() instanceof Polygonal; - } catch (GeometryException e) { - throw new IllegalStateException(e); - } + return geometryType() == GeometryType.POLYGON; } @Override public boolean canBeLine() { - try { - return latLonGeometry() instanceof Lineal; - } catch (GeometryException e) { - throw new IllegalStateException(e); - } + return geometryType() == GeometryType.LINE; } private Struct cachedStruct() { diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/geo/GeometryTypeTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/geo/GeometryTypeTest.java index d7482eddb3..374f221ca8 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/geo/GeometryTypeTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/geo/GeometryTypeTest.java @@ -1,12 +1,20 @@ package com.onthegomap.planetiler.geo; import static java.util.Collections.emptyList; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import com.onthegomap.planetiler.TestUtils; import com.onthegomap.planetiler.reader.SimpleFeature; import java.util.Map; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.locationtech.jts.io.ParseException; +import org.locationtech.jts.io.WKBWriter; +import org.locationtech.jts.io.WKTReader; class GeometryTypeTest { @@ -22,20 +30,38 @@ void testGeometryFactory() { SimpleFeature.createFakeOsmFeature(TestUtils.newPolygon(0, 0, 1, 0, 1, 1, 0, 0), tags, "osm", null, 1, emptyList()); - Assertions.assertTrue(GeometryType.LINE.featureTest().evaluate(line)); - Assertions.assertFalse(GeometryType.LINE.featureTest().evaluate(point)); - Assertions.assertFalse(GeometryType.LINE.featureTest().evaluate(poly)); + assertTrue(GeometryType.LINE.featureTest().evaluate(line)); + assertFalse(GeometryType.LINE.featureTest().evaluate(point)); + assertFalse(GeometryType.LINE.featureTest().evaluate(poly)); - Assertions.assertFalse(GeometryType.POINT.featureTest().evaluate(line)); - Assertions.assertTrue(GeometryType.POINT.featureTest().evaluate(point)); - Assertions.assertFalse(GeometryType.POINT.featureTest().evaluate(poly)); + assertFalse(GeometryType.POINT.featureTest().evaluate(line)); + assertTrue(GeometryType.POINT.featureTest().evaluate(point)); + assertFalse(GeometryType.POINT.featureTest().evaluate(poly)); - Assertions.assertFalse(GeometryType.POLYGON.featureTest().evaluate(line)); - Assertions.assertFalse(GeometryType.POLYGON.featureTest().evaluate(point)); - Assertions.assertTrue(GeometryType.POLYGON.featureTest().evaluate(poly)); + assertFalse(GeometryType.POLYGON.featureTest().evaluate(line)); + assertFalse(GeometryType.POLYGON.featureTest().evaluate(point)); + assertTrue(GeometryType.POLYGON.featureTest().evaluate(poly)); - Assertions.assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(point)); - Assertions.assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(line)); - Assertions.assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(poly)); + assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(point)); + assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(line)); + assertThrows(Exception.class, () -> GeometryType.UNKNOWN.featureTest().evaluate(poly)); + } + + @ParameterizedTest + @CsvSource(value = { + "POINT; POINT EMPTY", + "POINT; POINT(1 1)", + "POINT; MULTIPOINT(1 1, 2 2)", + "LINE; lineString(1 1, 2 2)", + "LINE; LINESTRING ZM(1 1 2 3, 2 2 4 5)", + "LINE; multiLineString((1 1, 2 2))", + "POLYGON; POLYGON((0 0, 0 1, 1 0, 0 0))", + "POLYGON; MULTIPOLYGON(((0 0, 0 1, 1 0, 0 0)))", + "UNKNOWN; GEOMETRYCOLLECTION EMPTY", + }, delimiter = ';') + void testSniffTypes(GeometryType expected, String wkt) throws ParseException { + assertEquals(expected, GeometryType.fromWKT(wkt)); + var wkb = new WKBWriter().write(new WKTReader().read(wkt)); + assertEquals(expected, GeometryType.fromWKB(wkb)); } }