Skip to content

Commit

Permalink
Merge pull request #31 from OCHA-DAP/main
Browse files Browse the repository at this point in the history
HDXDSYS-843 Add IDP data
  • Loading branch information
alexandru-m-g authored Sep 30, 2024
2 parents f0a0f5c + a19efdd commit ad891cb
Show file tree
Hide file tree
Showing 21 changed files with 677,370 additions and 42,361 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/db_export.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,11 @@ jobs:
- name: Run Pipeline
env:
BASIC_AUTHS: ${{ secrets.BASIC_AUTHS }}
HDX_SITE: ${{ vars.HDX_SITE }}
HDX_KEY: ${{ secrets.HDX_BOT_SCRAPERS_API_TOKEN }}
PREPREFIX: ${{ secrets.HDX_PIPELINE_PREPREFIX }}
USER_AGENT: ${{ secrets.USER_AGENT }}
BASIC_AUTHS: ${{ secrets.BASIC_AUTHS }}
run: python3.11 -m hapi.pipelines.app -db "postgresql+psycopg://postgres:postgres@localhost:5432/hapi"

- name: Dump PostgreSQL Views
Expand Down
24 changes: 24 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,30 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.10.3] - 2024-09-24

### Changed

- Melanie's food security changes (SOM admin 1 only)

## [0.10.2] - 2024-09-24

### Changed

- Fix how pipeline appears in MixPanel

## [0.10.1] - 2024-09-20

### Changed

- Split refugees into refugees and returnees

## [0.10.0] - 2024-09-19

### Added

- IDP scraper

## [0.9.58] - 2024-09-18

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ classifiers = [
requires-python = ">=3.8"

dependencies = [
"hapi-schema>=0.8.15",
"hapi-schema>=0.8.17",
"hdx-python-api>= 6.3.4",
"hdx-python-country>= 3.7.8",
"hdx-python-database[postgresql]>= 1.3.1",
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,17 @@ filelock==3.16.1
# via virtualenv
frictionless==5.17.1
# via hdx-python-utilities
google-auth==2.34.0
google-auth==2.35.0
# via
# google-auth-oauthlib
# gspread
google-auth-oauthlib==1.2.1
# via gspread
greenlet==3.1.0
greenlet==3.1.1
# via sqlalchemy
gspread==6.1.2
# via hdx-python-scraper
hapi-schema==0.8.16
hapi-schema==0.8.17
# via hapi-pipelines (pyproject.toml)
hdx-python-api==6.3.4
# via
Expand Down
30 changes: 5 additions & 25 deletions src/hapi/pipelines/app/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import logging
from os import getenv
from os.path import expanduser, join
from typing import Dict, Optional

from hapi_schema.views import prepare_hapi_views
Expand Down Expand Up @@ -39,12 +40,6 @@

def parse_args():
parser = argparse.ArgumentParser(description="HAPI pipelines")
parser.add_argument("-hk", "--hdx-key", default=None, help="HDX api key")
parser.add_argument("-ua", "--user-agent", default=None, help="user agent")
parser.add_argument("-pp", "--preprefix", default=None, help="preprefix")
parser.add_argument(
"-hs", "--hdx-site", default=None, help="HDX site to use"
)
parser.add_argument(
"-db", "--db-uri", default=None, help="Database connection string"
)
Expand Down Expand Up @@ -172,20 +167,6 @@ def main(

if __name__ == "__main__":
args = parse_args()
hdx_key = args.hdx_key
if hdx_key is None:
hdx_key = getenv("HDX_KEY")
user_agent = args.user_agent
if user_agent is None:
user_agent = getenv("USER_AGENT")
if user_agent is None:
user_agent = "hapi-pipelines"
preprefix = args.preprefix
if preprefix is None:
preprefix = getenv("PREPREFIX")
hdx_site = args.hdx_site
if hdx_site is None:
hdx_site = getenv("HDX_SITE", "prod")
db_uri = args.db_uri
if db_uri is None:
db_uri = getenv("DB_URI")
Expand Down Expand Up @@ -221,21 +202,20 @@ def main(
"core.yaml",
"food_security.yaml",
"funding.yaml",
"idps.yaml",
"national_risk.yaml",
"operational_presence.yaml",
"population.yaml",
"poverty_rate.yaml",
"refugees.yaml",
"refugees_and_returnees.yaml",
"wfp.yaml",
]
project_config_dict = load_yamls(project_configs)
project_config_dict = add_defaults(project_config_dict)
facade(
main,
hdx_key=hdx_key,
user_agent=user_agent,
preprefix=preprefix,
hdx_site=hdx_site,
user_agent_config_yaml=join(expanduser("~"), ".useragents.yaml"),
user_agent_lookup=lookup,
project_config_dict=project_config_dict,
db_uri=db_uri,
db_params=args.db_params,
Expand Down
41 changes: 33 additions & 8 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from hapi.pipelines.database.food_security import FoodSecurity
from hapi.pipelines.database.funding import Funding
from hapi.pipelines.database.humanitarian_needs import HumanitarianNeeds
from hapi.pipelines.database.idps import IDPs
from hapi.pipelines.database.locations import Locations
from hapi.pipelines.database.metadata import Metadata
from hapi.pipelines.database.national_risk import NationalRisk
Expand All @@ -25,7 +26,7 @@
from hapi.pipelines.database.org_type import OrgType
from hapi.pipelines.database.population import Population
from hapi.pipelines.database.poverty_rate import PovertyRate
from hapi.pipelines.database.refugees import Refugees
from hapi.pipelines.database.refugees_and_returnees import RefugeesAndReturnees
from hapi.pipelines.database.sector import Sector
from hapi.pipelines.database.wfp_commodity import WFPCommodity
from hapi.pipelines.database.wfp_market import WFPMarket
Expand Down Expand Up @@ -173,7 +174,14 @@ def _create_configurable_scrapers(
_create_configurable_scrapers("operational_presence", "national")
_create_configurable_scrapers("national_risk", "national")
_create_configurable_scrapers("funding", "national")
_create_configurable_scrapers("refugees", "national")
_create_configurable_scrapers("refugees_and_returnees", "national")
_create_configurable_scrapers("idps", "national")
_create_configurable_scrapers(
"idps", "adminone", adminlevel=self.adminone
)
_create_configurable_scrapers(
"idps", "admintwo", adminlevel=self.admintwo
)
_create_configurable_scrapers("poverty_rate", "national")
_create_configurable_scrapers("conflict_event", "national")
_create_configurable_scrapers(
Expand Down Expand Up @@ -257,18 +265,34 @@ def output_national_risk(self):
)
national_risk.populate()

def output_refugees(self):
if not self.themes_to_run or "refugees" in self.themes_to_run:
def output_refugees_and_returnees(self):
if (
not self.themes_to_run
or "refugees_and_returnees" in self.themes_to_run
):
results = self.runner.get_hapi_results(
self.configurable_scrapers["refugees"]
self.configurable_scrapers["refugees_and_returnees"]
)
refugees = Refugees(
refugees_and_returnees = RefugeesAndReturnees(
session=self.session,
metadata=self.metadata,
locations=self.locations,
results=results,
)
refugees.populate()
refugees_and_returnees.populate()

def output_idps(self):
if not self.themes_to_run or "idps" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["idps"]
)
idps = IDPs(
session=self.session,
metadata=self.metadata,
admins=self.admins,
results=results,
)
idps.populate()

def output_funding(self):
if not self.themes_to_run or "funding" in self.themes_to_run:
Expand Down Expand Up @@ -351,7 +375,8 @@ def output(self):
self.output_food_security()
self.output_humanitarian_needs()
self.output_national_risk()
self.output_refugees()
self.output_refugees_and_returnees()
self.output_idps()
self.output_funding()
self.output_poverty_rate()
self.output_conflict_event()
Expand Down
1 change: 1 addition & 0 deletions src/hapi/pipelines/configs/food_security.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ food_security:
adm1_only:
- "HTI"
- "MMR"
- "SOM"

# This is where "Level 1" is blank and there is only admin 2 data available
# in "Area" (usually blank "Level 1" means "Area" is admin 1 rather than 2)
Expand Down
66 changes: 66 additions & 0 deletions src/hapi/pipelines/configs/idps.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#National risk config file

idps_default:
scrapers_with_defaults:
- "dtm"
format: "csv"
use_hxl: True
admin_exact: True
input:
- "#affected+idps"
- "#date+reported"
- "#round+code"
- "#assessment+type"
- "#operation+name"
list:
- "#affected+idps"
- "#date+reported"
- "#round+code"
- "#assessment+type"
- "#operation+name"
output:
- "number_idps"
- "reporting_date"
- "round_number"
- "asessment_type"
- "operation"
output_hxl:
- "#affected+idps"
- "#date+reported"
- "#round+code"
- "#assessment+type"
- "#operation+name"

idps_national:
dtm:
dataset: "global-iom-dtm-from-api"
resource: "Global IOM DTM data for admin levels 0-2"
filter_cols:
- "#adm1+code"
prefilter: "#adm1+code is None"
admin:
- "#country+code"

idps_adminone:
dtm:
dataset: "global-iom-dtm-from-api"
resource: "Global IOM DTM data for admin levels 0-2"
filter_cols:
- "#adm1+code"
- "#adm2+code"
prefilter: "#adm1+code is not None and #adm2+code is None"
admin:
- "#country+code"
- "#adm1+code"

idps_admintwo:
dtm:
dataset: "global-iom-dtm-from-api"
resource: "Global IOM DTM data for admin levels 0-2"
filter_cols:
- "#adm1+code"
- "#adm2+code"
prefilter: "#adm1+code is not None and #adm2+code is not None"
admin:
- "#country+code"
- "#adm2+code"
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
refugees_national:
refugees:
refugees_and_returnees_national:
refugees_and_returnees:
dataset: "unhcr-population-data-for-world"
resource: "Demographics and locations of forcibly displaced and stateless persons (Global)"
format: "csv"
Expand Down
Loading

0 comments on commit ad891cb

Please sign in to comment.