Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDXDSYS-1086 Add HNO freeform column to hapi-pipelines #183

Merged
merged 8 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.10.X] - 2024-10-X

### Changed

- Use freeform category for humanitarian needs
- Populate provider_admin1_name and provider_amdin2_name

## [0.10.8] - 2024-10-10

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ classifiers = [
requires-python = ">=3.8"

dependencies = [
"hapi-schema>=0.9.0",
"hapi-schema@git+https://github.com/OCHA-DAP/hapi-sqlalchemy-schema@freeform_category",
"hdx-python-api>= 6.3.4",
"hdx-python-country>= 3.8.1",
"hdx-python-database[postgresql]>= 1.3.1",
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ greenlet==3.1.1
# via sqlalchemy
gspread==6.1.3
# via hdx-python-scraper
hapi-schema==0.9.0
hapi-schema @ git+https://github.com/OCHA-DAP/hapi-sqlalchemy-schema@b87001386b4c60ac6d4ca41dfeee689bdd7144ab
# via hapi-pipelines (pyproject.toml)
hdx-python-api==6.3.4
# via
Expand Down Expand Up @@ -95,7 +95,7 @@ jinja2==3.1.4
# via frictionless
jsonlines==4.0.0
# via hdx-python-utilities
jsonpath-ng==1.6.1
jsonpath-ng==1.7.0
# via libhxl
jsonschema==4.23.0
# via
Expand Down
86 changes: 48 additions & 38 deletions src/hapi/pipelines/database/humanitarian_needs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Functions specific to the humanitarian needs theme."""

import re
from datetime import datetime
from logging import getLogger

Expand All @@ -23,6 +24,8 @@


class HumanitarianNeeds(BaseUploader):
admin_name_regex = re.compile(r"Admin (\d) Name")

def __init__(
self,
session: Session,
Expand All @@ -38,21 +41,40 @@ def __init__(
self._configuration = configuration

def get_admin2_ref(self, row, dataset_name, errors):
admin_code = row["Admin 2 PCode"]
if admin_code == "#adm2+code": # ignore HXL row
countryiso3 = row["Country ISO3"]
if countryiso3 == "#country+code": # ignore HXL row
return None
if admin_code:
admin_level = "admintwo"
else:
admin_code = row["Admin 1 PCode"]
if admin_code:
admin_level = "adminone"
else:
admin_code = row["Country ISO3"]
admin_level = "0"
for header in row:
match = self.admin_name_regex.match(header)
if match and row[header]:
admin_level = match.group(1)
match admin_level:
case "0":
admin_level = "national"
return self._admins.get_admin2_ref(
admin_code = countryiso3
case "1":
admin_level = "adminone"
admin_code = row["Admin 1 PCode"]
case "2":
admin_level = "admintwo"
admin_code = row["Admin 2 PCode"]
case _:
return None
admin2_ref = self._admins.get_admin2_ref(
admin_level, admin_code, dataset_name, errors
)
if admin2_ref is None:
if admin_level == "adminone":
admin_code = f"{countryiso3}-XXX"
mcarans marked this conversation as resolved.
Show resolved Hide resolved
elif admin_level == "admintwo":
admin_code = f"{countryiso3}-XXX-XXX"
else:
return None
admin2_ref = self._admins.get_admin2_ref(
admin_level, admin_code, dataset_name, errors
)
return admin2_ref

def populate(self) -> None:
logger.info("Populating humanitarian needs table")
Expand All @@ -63,44 +85,37 @@ def populate(self) -> None:
self._metadata.add_dataset(dataset)
dataset_id = dataset["id"]
dataset_name = dataset["name"]
resource = dataset.get_resource(
1
) # assumes second resource is latest!
resource = dataset.get_resource(0) # assumes first resource is latest!
self._metadata.add_resource(dataset_id, resource)
negative_values_by_iso3 = {}
rounded_values_by_iso3 = {}
resource_id = resource["id"]
resource_name = resource["name"]
year = int(resource_name[-15:-11])
year = int(resource_name[-4:])
time_period_start = datetime(year, 1, 1)
time_period_end = datetime(year, 12, 31, 23, 59, 59)
url = resource["url"]
headers, rows = reader.get_tabular_rows(url, dict_form=True)
# Admin 1 PCode,Admin 2 PCode,Sector,Gender,Age Group,Disabled,Population Group,Population,In Need,Targeted,Affected,Reached
for row in rows:
admin2_ref = self.get_admin2_ref(row, dataset_name, errors)
if not admin2_ref:
continue
countryiso3 = row["Country ISO3"]
population_group = row["Population Group"]
if population_group == "ALL":
population_group = "all"
admin2_ref = self.get_admin2_ref(row, dataset_name, errors)
provider_admin1_name = row["Admin 1 Name"]
if provider_admin1_name is None:
provider_admin1_name = ""
provider_admin2_name = row["Admin 2 Name"]
if provider_admin2_name is None:
provider_admin2_name = ""
mcarans marked this conversation as resolved.
Show resolved Hide resolved
sector = row["Sector"]
sector_code = self._sector.get_sector_code(sector)
if not sector_code:
add_missing_value_message(
errors, dataset_name, "sector", sector
)
continue
gender = row["Gender"]
if gender == "a":
gender = "all"
age_range = row["Age Range"]
min_age = row["Min Age"]
max_age = row["Max Age"]
disabled_marker = row["Disabled"]
if disabled_marker == "a":
disabled_marker = "all"
category = row["Category"]
if category is None:
category = ""

def create_row(in_col, population_status):
value = row[in_col]
Expand All @@ -120,16 +135,11 @@ def create_row(in_col, population_status):
humanitarian_needs_row = DBHumanitarianNeeds(
resource_hdx_id=resource_id,
admin2_ref=admin2_ref,
provider_admin1_name="",
provider_admin2_name="",
gender=gender,
age_range=age_range,
min_age=min_age,
max_age=max_age,
provider_admin1_name=provider_admin1_name,
provider_admin2_name=provider_admin2_name,
category=category,
sector_code=sector_code,
population_group=population_group,
population_status=population_status,
disabled_marker=disabled_marker,
population=value,
reference_period_start=time_period_start,
reference_period_end=time_period_end,
Expand Down
Loading