Skip to content

Commit

Permalink
Added export of consideration field on datasets for exported_datasets…
Browse files Browse the repository at this point in the history
….csv

Rearranged location of requirements files and removed what looked
like redundant packages.

I'm not sure if load_facts is still in use, but there seemed to be
a bug that makes it odd that it ever worked? Not sure.

Some black reformatting and lint fixes included.

Ran load_local.sh successfully.

I'm assuming that the Dockerfile is still OK as it runs:

pip install --user --no-cache-dir -r requirements.txt

and requirements file now points to requrements.txt in
requirements subsdirectory.

i.e. -r requirements/requirements.txt
  • Loading branch information
ashimali committed Jun 27, 2024
1 parent 443f08a commit bb90e16
Show file tree
Hide file tree
Showing 18 changed files with 519 additions and 250 deletions.
10 changes: 10 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[*]
indent_style = space
indent_size = 2
insert_final_newline = true

[Makefile]
indent_style = tab

[*.py]
indent_size = 4
1 change: 0 additions & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ max-line-length = 120
ignore = W291, E203, W503
exclude =
__pycache__
node_modules
.venv
.direnv

Expand Down
3 changes: 3 additions & 0 deletions .isort.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[settings]
profile=black
src_paths=task,tests
17 changes: 15 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
init::
python -m pip install --upgrade pip
python -m pip install pip-tools
python -m piptools sync task/dev-requirements.txt
python -m piptools compile task/requirements/dev-requirements.in
python -m piptools compile task/requirements/requirements.in
python -m piptools sync task/requirements/dev-requirements.txt task/requirements/requirements.txt
python -m pre_commit install
python -m pip install -r task/requirements.txt

reqs::
python -m piptools compile task/requirements/dev-requirements.in
python -m piptools compile task/requirements/requirements.in
python -m piptools sync task/requirements/requirements.txt task/requirements/dev-requirements.txt


upgrade::
python -m piptools compile --upgrade task/requirements/dev-requirements.in
python -m piptools compile --upgrade task/requirements/requirements.in
python -m piptools sync task/requirements/requirements.txt task/requirements/dev-requirements.txt

test:: test-integration

Expand Down
12 changes: 0 additions & 12 deletions task/dev-requirements.in

This file was deleted.

158 changes: 0 additions & 158 deletions task/dev-requirements.txt

This file was deleted.

4 changes: 2 additions & 2 deletions task/load_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ curl -qfsL $SOURCE_URL/specification/main/specification/schema-field.csv > speci
# need to use the files cdn instead of the bucket name when loading locally without logging into aws
DATABASE=${S3_KEY##*/}
export DATABASE_NAME=${DATABASE%.*}
echo "DATABASE NAMW: $DATABASE_NAME"
echo "DATABASE NAME: $DATABASE_NAME"
echo "$EVENT_ID: running with settings: S3_KEY=$S3_KEY, DATABASE=$DATABASE, DATABASE_NAME=$DATABASE_NAME"


Expand Down Expand Up @@ -81,4 +81,4 @@ echo "$EVENT_ID: loading data into postgres"
python3 -m pgload.load --source="$DATABASE_NAME" || \
(echo "$EVENT_ID: failed to load $DATABASE" && exit 1)

echo "$EVENT_ID: loading of $DATABASE_NAME completed successfully"
echo "$EVENT_ID: loading of $DATABASE_NAME completed successfully"
11 changes: 5 additions & 6 deletions task/pgload/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ def do_replace(source, tables_to_export=None):
logger.info(f"Finished loading from database: {source} table: {table}")

if source != "entity" and table == "entity":
make_valid_multipolygon(connection,source)
make_valid_multipolygon(connection, source)

make_valid_with_handle_geometry_collection(connection,source)
make_valid_with_handle_geometry_collection(connection, source)


def remove_invalid_datasets(valid_datasets):
Expand Down Expand Up @@ -164,7 +164,7 @@ def call_sql_queries(source, table, csv_filename, fieldnames, sql, cursor):
def make_valid_with_handle_geometry_collection(connection, source):
make_valid_with_handle_geometry_collection = """
UPDATE entity SET geometry = ST_CollectionExtract(ST_MakeValid(geometry))
WHERE geometry IS NOT NULL
WHERE geometry IS NOT NULL
AND ST_GeometryType(ST_MakeValid(geometry)) = 'ST_GeometryCollection' AND dataset = %s
AND (
(ST_IsSimple(geometry) AND NOT ST_IsValid(geometry))
Expand All @@ -181,18 +181,17 @@ def make_valid_with_handle_geometry_collection(connection, source):
)


def make_valid_multipolygon(connection,source):
def make_valid_multipolygon(connection, source):
make_valid_multipolygon = """
UPDATE entity
SET geometry = ST_MakeValid(geometry)
WHERE geometry IS NOT NULL
WHERE geometry IS NOT NULL
AND ST_GeometryType(ST_MakeValid(geometry)) = 'ST_MultiPolygon'
AND dataset = %s
AND (
(ST_IsSimple(geometry) AND NOT ST_IsValid(geometry))
OR NOT ST_IsSimple(geometry));
""".strip()


with connection.cursor() as cursor:
cursor.execute(make_valid_multipolygon, (source,))
Expand Down
15 changes: 11 additions & 4 deletions task/pgload/load_facts.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import logging
import pathlib
import sys
import subprocess
import tempfile
import os
import click
import psycopg2
Expand Down Expand Up @@ -35,7 +32,9 @@ def load_facts():
connection = psycopg2.connect(
host=host, database=database, user=user, password=password, port=port
)
except:

except Exception as e:
logger.error(f"Error connecting to database: {e}")
host = os.getenv("DB_WRITE_ENDPOINT", "localhost")
database = os.getenv("DB_NAME", "digital_land")
user = os.getenv("DB_USER_NAME", "postgres")
Expand Down Expand Up @@ -74,6 +73,14 @@ def load_facts():


def load_facts_into_postgres(rows):

url = urlparse.urlparse(os.getenv("WRITE_DATABASE_URL"))
database = url.path[1:]
user = url.username
password = url.password
host = url.hostname
port = url.port

for row in rows:
for key, val in row.items():
if not val:
Expand Down
5 changes: 1 addition & 4 deletions task/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1 @@
-e git+https://github.com/digital-land/pipeline.git#egg=digital-land
click==8.0.3
psycopg2-binary==2.9.3
requests
-r requirements/requirements.txt
8 changes: 8 additions & 0 deletions task/requirements/dev-requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
pytest
flake8
pre-commit
black
pytest-pudb
pytest-mock
pytest-md-report
psycopg2
Loading

0 comments on commit bb90e16

Please sign in to comment.