Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/global filtering #187

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.10.13] - 2024-10-16

### Added

- Added global food security data with additional p-code mappings

## [0.10.12] - 2024-10-16

### Fixed
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ hdx-python-api==6.3.4
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-scraper
hdx-python-country==3.8.1
hdx-python-country==3.8.2
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-api
Expand Down Expand Up @@ -228,7 +228,7 @@ ruamel-yaml==0.18.6
# via hdx-python-utilities
ruamel-yaml-clib==0.2.8
# via ruamel-yaml
setuptools==75.1.0
setuptools==75.2.0
# via ckanapi
shellingham==1.5.4
# via typer
Expand Down
2 changes: 2 additions & 0 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ def output_food_security(self):
admins=self.admins,
adminone=self.adminone,
admintwo=self.admintwo,
countryiso3s=self.countries,
configuration=self.configuration,
)
food_security.populate()
Expand Down Expand Up @@ -311,6 +312,7 @@ def output_funding(self):
funding = Funding(
session=self.session,
metadata=self.metadata,
countryiso3s=self.countries,
locations=self.locations,
configuration=self.configuration,
)
Expand Down
9 changes: 9 additions & 0 deletions src/hapi/pipelines/configs/core.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ admin1:
"CMR|Far-North": "CM004"
"CMR|West": "CM008"
"CPV|Santo Antao": "CV06"
"DJI|Djibouti Ville": "DJ04"
"ETH|B. Gumuz": "ET06"
"HTI|Nord-Ouest": "HT09"
"HTI|Sud": "HT07"
Expand Down Expand Up @@ -90,6 +91,8 @@ admin2:
"AF08|Onaba": "AF0805"
"AF14|Khost": "AF1401"
"AF23|Chaghcharan": "AF2301"
"AO09|Município dos Gambos (ex-chiange)": "AO09073"
"AO16|Mocamedes": "AO16139"
"CD43|Ville de Gbadolite": "CD4301"
"CD83|Territoire de LODJA": "CD8303"
"CO08|Distrito Especial, Industrial Y Portuario De Barranquilla": "CO08001"
Expand Down Expand Up @@ -132,6 +135,7 @@ admin2:
"NG027|Munya": "NG027018"
"PH08|Samar": "PH08060"
"PK2|Mekran": "PK211"
"PK5|Dera Ismail Khan": "PK509"
"PS01|Ramallah and Albireh": "PS0130"
"RUS|Sankt-peterburg": "RU004002"
"SD01|Sharq El Nile": "SD01004"
Expand All @@ -144,6 +148,7 @@ admin2:
"TD11|La kabbia": "TD1102"
"TD17|Iriba": "TD1703"
"TD22|Tibest-Ouest": "TD2202"
"TG05|Kpendial- Ouest": "TG0513"
"TZ12|Mbozi": "TZ2606" # TODO: this unit is not getting picked up because of the mismatched admin1
"UA12|Apostolivskyi": "UA1206"
"UA12|Marhanetska": "UA1208"
Expand Down Expand Up @@ -271,6 +276,10 @@ admin2:
- "maguindanao"
- "meru south"
- "metropolitan manila"
- "GHA|ashanti"
- "GHA|bono"
- "GHA|bono east"
- "GHA|central"

orphan_admin2s:
SS0001: "SSD"
Expand Down
11 changes: 11 additions & 0 deletions src/hapi/pipelines/configs/food_security.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ food_security:
# This is where all "Areas" are non-admin units and so there is only admin 1
# data available in "Level 1"
adm1_only:
- "DJI"
- "HTI"
- "MMR"
- "SOM"
Expand All @@ -34,6 +35,16 @@ food_security:
adm2_only:
- "PSE"

# This is where there is only admin 2 data available in "Level 1" whether
# "Area" is blank or not
adm2_in_level1:
- "LBN"

# This is where "Level 1" is non-admin units and so there is only admin 1
# data available in "Area"
adm1_in_area:
- "KEN"

# The errors below were picked up from the logging which outputs all the
# fuzzy matches
adm1_errors:
Expand Down
61 changes: 58 additions & 3 deletions src/hapi/pipelines/database/food_security.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from dataclasses import dataclass
from logging import getLogger
from typing import Dict, Optional, Set
from typing import Dict, List, Optional, Set

from hapi_schema.db_food_security import DBFoodSecurity
from hdx.api.configuration import Configuration
Expand Down Expand Up @@ -38,13 +38,15 @@ def __init__(
admins: admins.Admins,
adminone: AdminLevel,
admintwo: AdminLevel,
countryiso3s: List[str],
configuration: Configuration,
):
super().__init__(session)
self._metadata = metadata
self._admins = admins
self._adminone = adminone
self._admintwo = admintwo
self._countryiso3s = countryiso3s
self._configuration = configuration
self._country_status = {}

Expand Down Expand Up @@ -212,7 +214,23 @@ def process_subnational(
admin_level == "admintwo"
and countryiso3 in food_sec_config["adm1_only"]
):
return None
self._country_status[countryiso3] = (
"Level 1: Admin 1, Area: ignored"
)
adminoneinfo = self.get_adminoneinfo(
food_sec_config["adm_ignore_patterns"],
warnings,
dataset_name,
countryiso3,
row["Level 1"],
)
return self.get_adminone_admin2_ref(
food_sec_config,
warnings,
errors,
dataset_name,
adminoneinfo,
)
# The YAML configuration "adm2_only" specifies locations where
# "Level 1" is not populated and "Area" is admin 2. (These are
# exceptions since "Level 1" would normally be populated if "Area" is
Expand All @@ -234,6 +252,43 @@ def process_subnational(
adminoneinfo,
)

if countryiso3 in food_sec_config["adm2_in_level1"]:
row["Area"] = row["Level 1"]
row["Level 1"] = None
adminoneinfo = AdminInfo(countryiso3, "NOT GIVEN", "", None, False)
self._country_status[countryiso3] = (
"Level 1: Admin 2, Area: ignored"
)
return self.get_admintwo_admin2_ref(
food_sec_config,
warnings,
errors,
dataset_name,
row,
adminoneinfo,
)

if countryiso3 in food_sec_config["adm1_in_area"]:
if admin_level == "adminone":
return None
self._country_status[countryiso3] = (
"Level 1: ignored, Area: Admin 1"
)
adminoneinfo = self.get_adminoneinfo(
food_sec_config["adm_ignore_patterns"],
warnings,
dataset_name,
countryiso3,
row["Area"],
)
return self.get_adminone_admin2_ref(
food_sec_config,
warnings,
errors,
dataset_name,
adminoneinfo,
)

adminone_name = row["Level 1"]

if not adminone_name:
Expand Down Expand Up @@ -334,7 +389,7 @@ def populate(self) -> None:
if "#" in row["Date of analysis"]: # ignore HXL row
continue
countryiso3 = row["Country"]
if countryiso3 not in self._configuration["HAPI_countries"]:
if countryiso3 not in self._countryiso3s:
continue
provider_admin1_name = get_provider_name(row, "Level 1")
provider_admin2_name = get_provider_name(row, "Area")
Expand Down
5 changes: 5 additions & 0 deletions src/hapi/pipelines/database/funding.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Functions specific to the funding theme."""

from logging import getLogger
from typing import List

from hapi_schema.db_funding import DBFunding
from hdx.api.configuration import Configuration
Expand All @@ -21,11 +22,13 @@ def __init__(
self,
session: Session,
metadata: Metadata,
countryiso3s: List[str],
locations: locations,
configuration: Configuration,
):
super().__init__(session)
self._metadata = metadata
self._countryiso3s = countryiso3s
self._locations = locations
self._configuration = configuration

Expand All @@ -45,6 +48,8 @@ def populate(self) -> None:
if dataset["archived"]:
continue
admin_code = dataset.get_location_iso3s()[0]
if admin_code not in self._countryiso3s:
continue
resource = [
r
for r in dataset.get_resources()
Expand Down