From ec533c1c003f05ab15a447d1b5ee1c430081c2fe Mon Sep 17 00:00:00 2001 From: ncclementi Date: Mon, 11 Dec 2023 10:08:30 -0700 Subject: [PATCH] feat(geospatial): add support for GeoTransform on duckdb feat(geospatial): decouple st_transform for duckdb Create GeoConvert (st_transform) for duckdb to avoid conflict with postgis implementation, and add a test. chore(nix): gate geometry table creation behind SANDBOXED --- ibis/backends/duckdb/registry.py | 10 ++++++++ ibis/backends/duckdb/tests/conftest.py | 25 ++++++++++++++++--- ibis/backends/duckdb/tests/test_geospatial.py | 15 +++++++++++ ibis/expr/operations/geospatial.py | 10 ++++++++ ibis/expr/types/geospatial.py | 19 ++++++++++++++ 5 files changed, 75 insertions(+), 4 deletions(-) diff --git a/ibis/backends/duckdb/registry.py b/ibis/backends/duckdb/registry.py index f5a1489bbb91..90453118f469 100644 --- a/ibis/backends/duckdb/registry.py +++ b/ibis/backends/duckdb/registry.py @@ -111,6 +111,15 @@ def _geo_union(t, op): return sa.func.st_union(left, right, type_=Geometry_WKB) +def _geo_convert(t, op): + arg = t.translate(op.arg) + source = op.source + target = op.target + + # sa.true() setting always_xy=True + return sa.func.st_transform(arg, source, target, sa.true(), type_=Geometry_WKB) + + def _generic_log(arg, base, *, type_): return sa.func.ln(arg, type_=type_) / sa.func.ln(base, type_=type_) @@ -550,6 +559,7 @@ def _array_remove(t, op): ops.GeoWithin: fixed_arity(sa.func.ST_Within, 2), ops.GeoX: unary(sa.func.ST_X), ops.GeoY: unary(sa.func.ST_Y), + ops.GeoConvert: _geo_convert, # other ops ops.TimestampRange: fixed_arity(sa.func.range, 3), } diff --git a/ibis/backends/duckdb/tests/conftest.py b/ibis/backends/duckdb/tests/conftest.py index b1242e04ff3b..c16b69ffb5ce 100644 --- a/ibis/backends/duckdb/tests/conftest.py +++ b/ibis/backends/duckdb/tests/conftest.py @@ -43,7 +43,7 @@ class TestConf(BackendTest): def preload(self): if not SANDBOXED: self.connection._load_extensions( - ["httpfs", "postgres_scanner", "sqlite_scanner"] + ["httpfs", "postgres_scanner", "sqlite_scanner", "spatial"] ) @property @@ -59,16 +59,23 @@ def ddl_script(self) -> Iterator[str]: SELECT * FROM read_parquet('{parquet_dir / f'{table}.parquet'}') """ ) - if geospatial_supported: + if geospatial_supported and not SANDBOXED: for table in TEST_TABLES_GEO: yield ( f""" - INSTALL spatial; - LOAD spatial; CREATE OR REPLACE TABLE {table} AS SELECT * FROM st_read('{geojson_dir / f'{table}.geojson'}') """ ) + yield ( + """ + CREATE or REPLACE TABLE geo (name VARCHAR, geom GEOMETRY); + INSERT INTO geo VALUES + ('Point', ST_GeomFromText('POINT(-100 40)')), + ('Linestring', ST_GeomFromText('LINESTRING(0 0, 1 1, 2 1, 2 2)')), + ('Polygon', ST_GeomFromText('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))')); + """ + ) yield from super().ddl_script @staticmethod @@ -113,3 +120,13 @@ def lines_gdf(data_dir): gpd = pytest.importorskip("geopandas") lines_gdf = gpd.read_file(data_dir / "geojson" / "lines.geojson") return lines_gdf + + +@pytest.fixture(scope="session") +def geotable(con): + return con.table("geo") + + +@pytest.fixture(scope="session") +def gdf(geotable): + return geotable.execute() diff --git a/ibis/backends/duckdb/tests/test_geospatial.py b/ibis/backends/duckdb/tests/test_geospatial.py index e3a194824ac7..00bd5ba9719d 100644 --- a/ibis/backends/duckdb/tests/test_geospatial.py +++ b/ibis/backends/duckdb/tests/test_geospatial.py @@ -195,3 +195,18 @@ def test_geospatial_buffer(zones, zones_gdf): gp_buffer = zones_gdf.geometry.buffer(100.0, resolution=8) gtm.assert_geoseries_equal(buffer.to_pandas(), gp_buffer, check_crs=False) + + +# using a smaller dataset for time purposes +def test_geospatial_convert(geotable, gdf): + # geotable is fabricated but let's say the + # data is in CRS: EPSG:2263 + # let's transform to EPSG:4326 (latitude-longitude projection) + geo_ll = geotable.geom.convert("EPSG:2263", "EPSG:4326") + + gdf.crs = "EPSG:2263" + gdf_ll = gdf.geometry.to_crs(crs=4326) + + gtm.assert_geoseries_equal( + geo_ll.to_pandas(), gdf_ll, check_less_precise=True, check_crs=False + ) diff --git a/ibis/expr/operations/geospatial.py b/ibis/expr/operations/geospatial.py index 3ead66f8328d..856716011f92 100644 --- a/ibis/expr/operations/geospatial.py +++ b/ibis/expr/operations/geospatial.py @@ -451,6 +451,16 @@ class GeoTransform(GeoSpatialUnOp): dtype = dt.geometry +@public +class GeoConvert(GeoSpatialUnOp): + """Returns a transformed version of the given geometry from source crs/srid to a target crs/srid.""" + + source: str + target: str + + dtype = dt.geometry + + @public class GeoAsBinary(GeoSpatialUnOp): """Return the Well-Known Binary (WKB) representation of the input, without SRID meta data.""" diff --git a/ibis/expr/types/geospatial.py b/ibis/expr/types/geospatial.py index 5c22938c09c8..c9f97f000da5 100644 --- a/ibis/expr/types/geospatial.py +++ b/ibis/expr/types/geospatial.py @@ -671,6 +671,25 @@ def transform(self, srid: ir.IntegerValue) -> GeoSpatialValue: """ return ops.GeoTransform(self, srid).to_expr() + def convert( + self, source: ir.StringValue, target: ir.StringValue | ir.IntegerValue + ) -> GeoSpatialValue: + """Transform a geometry into a new SRID (CRS). + + Parameters + ---------- + source + CRS/SRID of input geometry + target + Target CRS/SRID + + Returns + ------- + GeoSpatialValue + Transformed geometry + """ + return ops.GeoConvert(self, source, target).to_expr() + def line_locate_point(self, right: PointValue) -> ir.FloatingValue: """Locate the distance a point falls along the length of a line.