Skip to content

Commit

Permalink
Optimizing the make valid geometry query
Browse files Browse the repository at this point in the history
  • Loading branch information
ssadhu-sl committed Oct 10, 2023
1 parent 7b3e7b5 commit 161183c
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 36 deletions.
33 changes: 21 additions & 12 deletions task/pgload/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ def do_replace(source, tables_to_export=None):
logger.info(f"Finished loading from database: {source} table: {table}")

if source != "entity" and table == "entity":
make_valid_multipolygon(connection)
make_valid_multipolygon(connection,source)

make_valid_with_handle_geometry_collection(connection)
make_valid_with_handle_geometry_collection(connection,source)


def remove_invalid_datasets(valid_datasets):
Expand Down Expand Up @@ -161,15 +161,18 @@ def call_sql_queries(source, table, csv_filename, fieldnames, sql, cursor):
logger.info(f"No data found in database: {source} table: {table}")


def make_valid_with_handle_geometry_collection(connection):
def make_valid_with_handle_geometry_collection(connection, source):
make_valid_with_handle_geometry_collection = """
UPDATE entity SET geometry = ST_CollectionExtract(ST_MakeValid(geometry))
WHERE geometry IS NOT NULL AND NOT ST_IsValid(geometry)
AND ST_GeometryType(ST_MakeValid(geometry)) = 'ST_GeometryCollection';
WHERE geometry IS NOT NULL
AND ST_GeometryType(ST_MakeValid(geometry)) = 'ST_GeometryCollection' AND dataset = %s
AND (
(ST_IsSimple(geometry) AND NOT ST_IsValid(geometry))
OR NOT ST_IsSimple(geometry));
""".strip()

with connection.cursor() as cursor:
cursor.execute(make_valid_with_handle_geometry_collection)
cursor.execute(make_valid_with_handle_geometry_collection, (source,))
rowcount = cursor.rowcount
connection.commit()

Expand All @@ -178,15 +181,21 @@ def make_valid_with_handle_geometry_collection(connection):
)


def make_valid_multipolygon(connection):
def make_valid_multipolygon(connection,source):
make_valid_multipolygon = """
UPDATE entity set geometry = ST_MakeValid(geometry)
WHERE geometry IS NOT NULL AND NOT ST_IsValid(geometry)
AND ST_GeometryType(ST_MakeValid(geometry)) = 'ST_MultiPolygon';
""".strip()
UPDATE entity
SET geometry = ST_MakeValid(geometry)
WHERE geometry IS NOT NULL
AND ST_GeometryType(ST_MakeValid(geometry)) = 'ST_MultiPolygon'
AND dataset = %s
AND (
(ST_IsSimple(geometry) AND NOT ST_IsValid(geometry))
OR NOT ST_IsSimple(geometry));
""".strip()


with connection.cursor() as cursor:
cursor.execute(make_valid_multipolygon)
cursor.execute(make_valid_multipolygon, (source,))
rowcount = cursor.rowcount
connection.commit()

Expand Down
52 changes: 28 additions & 24 deletions tests/integration/pg_load/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,32 @@ def sources():


# function to check if invalid data is updated correctly
def multipolygon_check(cursor):
cursor.execute(
"""
def multipolygon_check(cursor,source):
cursor.execute("""
SELECT COUNT(*) FROM entity
WHERE geometry IS NOT NULL AND NOT ST_IsValid(geometry)
AND ST_GeometryType(ST_MakeValid(geometry)) = 'ST_MultiPolygon';
"""
)
WHERE geometry IS NOT NULL
AND ST_GeometryType(ST_MakeValid(geometry)) = 'ST_MultiPolygon'
AND dataset = %s
AND (
(ST_IsSimple(geometry) AND NOT ST_IsValid(geometry))
OR NOT ST_IsSimple(geometry));
""", (source,))
rowcount = cursor.fetchone()[0]
assert rowcount == 0


# function to check if invalid data is updated correctly
def handle_geometry_collection_check(cursor):
def handle_geometry_collection_check(cursor,source):
cursor.execute(
"""
SELECT COUNT(*) FROM entity
WHERE geometry IS NOT NULL AND NOT ST_IsValid(geometry)
AND ST_GeometryType(ST_MakeValid(geometry)) = 'ST_GeometryCollection';
"""
)
WHERE geometry IS NOT NULL
AND ST_GeometryType(ST_MakeValid(geometry)) = 'ST_MultiPolygon'
AND dataset = %s
AND (
(ST_IsSimple(geometry) AND NOT ST_IsValid(geometry))
OR NOT ST_IsSimple(geometry));
""", (source,))
rowcount = cursor.fetchone()[0]
assert rowcount == 0

Expand Down Expand Up @@ -80,21 +85,20 @@ def test_do_replace(sources, postgresql_conn, create_db):
print("Testing do_replace method for source successful:: ", source)


def test_make_valid_multipolygon(postgresql_conn):
cursor = postgresql_conn.cursor()
make_valid_multipolygon(postgresql_conn)

multipolygon_check(cursor)
def test_make_valid_multipolygon(postgresql_conn,sources):

cursor = postgresql_conn.cursor()
for source in sources:
make_valid_multipolygon(postgresql_conn,source)
multipolygon_check(cursor,source)
postgresql_conn.commit()
cursor.close()


def test_make_valid_with_handle_geometry_collection(postgresql_conn):
def test_make_valid_with_handle_geometry_collection(postgresql_conn,sources):
cursor = postgresql_conn.cursor()
make_valid_with_handle_geometry_collection(postgresql_conn)

handle_geometry_collection_check(cursor)

for source in sources:
make_valid_with_handle_geometry_collection(postgresql_conn,source)
handle_geometry_collection_check(cursor,source)
postgresql_conn.commit()
cursor.close()

0 comments on commit 161183c

Please sign in to comment.