diff --git a/Snakefile b/Snakefile index 0b5241f55..05ecd6a86 100644 --- a/Snakefile +++ b/Snakefile @@ -2,11 +2,16 @@ # # SPDX-License-Identifier: GPL-3.0-or-later +import sys +sys.path.append('./scripts') + from os.path import normpath, exists, isdir from shutil import copyfile from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider +from scripts.download_osm_data import create_country_list + HTTP = HTTPRemoteProvider() if not exists("config.yaml"): @@ -15,6 +20,8 @@ if not exists("config.yaml"): configfile: "config.yaml" +# convert country list according to the desired region +config["countries"] = create_country_list(config["countries"]) COSTS = "data/costs.csv" ATLITE_NPROCESSES = config["atlite"].get("nprocesses", 20) diff --git a/notebooks/0_osm_data_url_acsess.ipynb b/notebooks/0_osm_data_url_acsess.ipynb new file mode 100644 index 000000000..0db2e3b20 --- /dev/null +++ b/notebooks/0_osm_data_url_acsess.ipynb @@ -0,0 +1,250 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "45072a1b", + "metadata": {}, + "source": [ + "# Acsess to the Open Street Maps Data Server\n", + "\n", + "This notebook shows how we reach the raw OpenStreetMaps data." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "98cecfff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This is the repository path: /Users/ekatef/Documents/_github_/pypsa-africa\n", + "Had to go 0 folder(s) up.\n" + ] + } + ], + "source": [ + "import sys\n", + "sys.path.append('../') # to import helpers\n", + "\n", + "from scripts._helpers import _sets_path_to_root\n", + "_sets_path_to_root(\"pypsa-africa\")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "9efb61cd", + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import sys\n", + "import pandas as pd\n", + "import requests\n", + "import urllib3\n", + "import time\n", + "\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_colwidth', 70)\n", + "\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "fd7c4412", + "metadata": {}, + "outputs": [], + "source": [ + "from scripts.config_osm_data import continent_regions\n", + "from scripts.config_osm_data import continents\n", + "from scripts.config_osm_data import iso_to_geofk_dict\n", + "from scripts.config_osm_data import world\n", + "from scripts.config_osm_data import world_geofk" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "0a09e736", + "metadata": {}, + "outputs": [], + "source": [ + "def list_word_geofk_countries():\n", + " countries_list = []\n", + "\n", + " for continent in world_geofk:\n", + " country = world_geofk[continent]\n", + " countries_list.append(list(country.keys()))\n", + " \n", + " return countries_list " + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "3f8fea67", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['DZ', 'AO', 'BJ', 'BW', 'BF', 'BI', 'CM', 'CF', 'TD', 'CG', 'CD', 'DJ', 'EG', 'GQ', 'ER', 'ET', 'GA', 'GH', 'GW', 'GN', 'CI', 'KE', 'LS', 'LR', 'LY', 'MG', 'MW', 'ML', 'MR', 'MA', 'MZ', 'NA', 'NE', 'NG', 'RW', 'SNGM', 'SL', 'SO', 'ZA', 'SS', 'SD', 'SZ', 'TZ', 'TG', 'TN', 'UG', 'ZM', 'ZW', 'AF', 'AM', 'AZ', 'BD', 'BT', 'KH', 'CN', 'GCC', 'IN', 'ID', 'IR', 'IQ', 'IL-PL', 'JP', 'JO', 'KZ', 'KP', 'KR', 'KG', 'LA', 'LB', 'MY-SG-BN', 'MV', 'MN', 'MM', 'NP', 'PK', 'PH', 'LK', 'SY', 'TW', 'TJ', 'TH', 'TM', 'UZ', 'VN', 'YE', 'AU', 'FJ', 'NC', 'NZ', 'PG', 'AL', 'AD', 'AT', 'BY', 'BE', 'BA', 'BG', 'HR', 'CZ', 'CY', 'DK', 'EE', 'FI', 'FR', 'GE', 'DE', 'GR', 'HU', 'IS', 'IE', 'IT', 'LV', 'LI', 'LT', 'LU', 'MK', 'MT', 'MD', 'MC', 'ME', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SK', 'SI', 'ES', 'SE', 'CH', 'UA', 'GB', 'TR', 'CEFD', 'FEFD', 'NCDF', 'NWDF', 'SBFD', 'SOFD', 'URDF', 'VOFD', 'CA', 'GL', 'MX', 'US', 'AR', 'BO', 'BR', 'CL', 'CO', 'EC', 'PE', 'SR', 'UY', 'VE', 'BZ', 'GT', 'SV', 'HN', 'NI', 'CR']\n" + ] + } + ], + "source": [ + "world_geofk_countries = []\n", + "for sublist in list_word_geofk_countries():\n", + " for item in sublist:\n", + " world_geofk_countries.append(item)\n", + "\n", + "print(world_geofk_countries) " + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "88a5e6e1", + "metadata": {}, + "outputs": [], + "source": [ + "def getGeofkContinentCountry(code):\n", + " for continent in world_geofk:\n", + " country = world_geofk[continent].get(code, 0)\n", + " if country:\n", + " return continent, country\n", + " return continent, country\n", + "\n", + "def build_url(country_code, update, verify):\n", + " continent, country_name = getGeofkContinentCountry(country_code)\n", + " geofabrik_filename = f\"{country_name}-latest.osm.pbf\"\n", + " geofabrik_url = f\"https://download.geofabrik.de/{continent}/{geofabrik_filename}\"\n", + " return geofabrik_url\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "984f4606", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('africa', 'algeria')\n", + "URL 'https://download.geofabrik.de/africa/algeria-latest.osm.pbf' is working\n", + "('africa', 'angola')\n", + "URL 'https://download.geofabrik.de/africa/angola-latest.osm.pbf' is working\n", + "('africa', 'benin')\n", + "URL 'https://download.geofabrik.de/africa/benin-latest.osm.pbf' is working\n", + "('africa', 'botswana')\n", + "URL 'https://download.geofabrik.de/africa/botswana-latest.osm.pbf' is working\n", + "('africa', 'burkina-faso')\n", + "URL 'https://download.geofabrik.de/africa/burkina-faso-latest.osm.pbf' is working\n", + "('africa', 'burundi')\n", + "URL 'https://download.geofabrik.de/africa/burundi-latest.osm.pbf' is working\n", + "('africa', 'cameroon')\n", + "URL 'https://download.geofabrik.de/africa/cameroon-latest.osm.pbf' is working\n", + "('africa', 'central-african-republic')\n", + "URL 'https://download.geofabrik.de/africa/central-african-republic-latest.osm.pbf' is working\n", + "('africa', 'chad')\n", + "URL 'https://download.geofabrik.de/africa/chad-latest.osm.pbf' is working\n", + "('africa', 'congo-brazzaville')\n", + "URL 'https://download.geofabrik.de/africa/congo-brazzaville-latest.osm.pbf' is working\n", + "('africa', 'congo-democratic-republic')\n", + "URL 'https://download.geofabrik.de/africa/congo-democratic-republic-latest.osm.pbf' is working\n", + "('africa', 'djibouti')\n", + "URL 'https://download.geofabrik.de/africa/djibouti-latest.osm.pbf' is working\n", + "('africa', 'egypt')\n", + "URL 'https://download.geofabrik.de/africa/egypt-latest.osm.pbf' is working\n", + "('africa', 'equatorial-guinea')\n", + "URL 'https://download.geofabrik.de/africa/equatorial-guinea-latest.osm.pbf' is working\n", + "('africa', 'eritrea')\n" + ] + } + ], + "source": [ + "problem_urls = []\n", + "problem_codes = []\n", + "problem_domain = []\n", + "# for cnt in country_geofk_list:\n", + "for cnt in world_geofk_countries: \n", + " print(getGeofkContinentCountry(cnt))\n", + " url = build_url(country_code=cnt, update=False, verify=False)\n", + " time.sleep(0.01)\n", + " try:\n", + " request = requests.head(url)\n", + " if request.status_code == 200:\n", + " print(\"URL '\" + url + \"' is working\")\n", + " else:\n", + " problem_urls.append(url)\n", + " problem_codes.append(cnt)\n", + " problem_domain.append(getGeofkContinentCountry(cnt))\n", + " print(\"URL '\" + url + \"' is silent\")\n", + " except requests.exceptions.ConnectionError:\n", + " print(f\"URL {url} not reachable\")\n", + "\n", + "print(\"There are troubles in reaching following urls:\") \n", + "print(problem_urls) \n", + "print(\"Country codes to be checked\")\n", + "print(problem_codes) \n", + "print(problem_domain)" + ] + }, + { + "cell_type": "markdown", + "id": "33ea8008", + "metadata": {}, + "source": [ + "\n", + "# Acknowledgments\n", + "\n", + "The project relies on the (OpenStreetMap)[https://www.geofabrik.de/] data provided via Geofabrik service. Many thanks to all the service contributors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be4bfbed", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45bb68b4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index 901b93e33..e2acc09a9 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -92,7 +92,6 @@ import xarray as xr from _helpers import configure_logging from _helpers import update_p_nom_max -from download_osm_data import create_country_list from powerplantmatching.export import map_country_bus from shapely.validation import make_valid from vresutils import transfer as vtransfer @@ -704,7 +703,7 @@ def add_nice_carrier_names(n, config=None): # Snakemake imports: regions = snakemake.input.regions load = snakemake.input.load - countries = create_country_list(snakemake.config["countries"]) + countries = snakemake.config["countries"] scale = snakemake.config["load_options"]["scale"] admin_shapes = snakemake.input.gadm_shapes diff --git a/scripts/base_network.py b/scripts/base_network.py index 0827cc674..afda13eed 100644 --- a/scripts/base_network.py +++ b/scripts/base_network.py @@ -68,7 +68,6 @@ import yaml from _helpers import _read_csv_nafix from _helpers import configure_logging -from download_osm_data import create_country_list from scipy.sparse import csgraph from shapely.geometry import LineString from shapely.geometry import Point @@ -137,15 +136,19 @@ def _load_buses_from_osm(): return buses + def _set_links_underwater_fraction(n): - if n.links.empty: return + if n.links.empty: + return - if not hasattr(n.links, 'geometry'): - n.links['underwater_fraction'] = 0. + if not hasattr(n.links, "geometry"): + n.links["underwater_fraction"] = 0.0 else: - offshore_shape = gpd.read_file(snakemake.input.offshore_shapes).unary_union + offshore_shape = gpd.read_file( + snakemake.input.offshore_shapes).unary_union links = gpd.GeoSeries(n.links.geometry.dropna().map(shapely.wkt.loads)) - n.links['underwater_fraction'] = links.intersection(offshore_shape).length / links.length + n.links["underwater_fraction"] = ( + links.intersection(offshore_shape).length / links.length) def _load_lines_from_osm(buses): @@ -198,7 +201,7 @@ def _set_countries_and_substations(n): buses = n.buses - countries = create_country_list(snakemake.config["countries"]) + countries = snakemake.config["countries"] country_shapes = (gpd.read_file(snakemake.input.country_shapes).set_index( "name")["geometry"].set_crs(4326)) offshore_shapes = unary_union( diff --git a/scripts/build_bus_regions.py b/scripts/build_bus_regions.py index d07d73b12..92032c63d 100644 --- a/scripts/build_bus_regions.py +++ b/scripts/build_bus_regions.py @@ -47,7 +47,6 @@ import pandas as pd import pypsa from _helpers import configure_logging -from download_osm_data import create_country_list from shapely.geometry import Point from shapely.geometry import Polygon from vresutils.graph import voronoi_partition_pts @@ -183,7 +182,7 @@ def get_id(coords): snakemake = mock_snakemake("build_bus_regions") configure_logging(snakemake) - countries = create_country_list(snakemake.config["countries"]) + countries = snakemake.config["countries"] n = pypsa.Network(snakemake.input.base_network) diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py index 8a2aab168..602465ada 100644 --- a/scripts/build_osm_network.py +++ b/scripts/build_osm_network.py @@ -11,7 +11,6 @@ from _helpers import _sets_path_to_root from _helpers import _to_csv_nafix from _helpers import configure_logging -from download_osm_data import create_country_list from shapely.geometry import LineString from shapely.geometry import Point from shapely.ops import linemerge @@ -531,7 +530,7 @@ def built_network(inputs, outputs): country_shapes = (gpd.read_file(country_shapes_fn).set_index("name") ["geometry"].set_crs(4326)) input = snakemake.config["countries"] - country_list = create_country_list(input) + country_list = input bus_country_list = buses["country"].unique().tolist() if len(bus_country_list) != len(country_list): diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py index 52a3a5d0a..7083ba2f8 100644 --- a/scripts/build_shapes.py +++ b/scripts/build_shapes.py @@ -21,7 +21,6 @@ from _helpers import _two_2_three_digits_country from _helpers import _two_digits_2_name_country from _helpers import configure_logging -from download_osm_data import create_country_list from rasterio.mask import mask from shapely.geometry import LineString from shapely.geometry import MultiPolygon @@ -702,7 +701,7 @@ def gadm(countries, out = snakemake.output - countries_list = create_country_list(snakemake.config["countries"]) + countries_list = snakemake.config["countries"] layer_id = snakemake.config["build_shape_options"]["gadm_layer_id"] update = snakemake.config["build_shape_options"]["update_file"] out_logging = snakemake.config["build_shape_options"]["out_logging"] diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 435852846..bbb41a781 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -138,7 +138,6 @@ from build_shapes import add_gdp_data from build_shapes import add_population_data from build_shapes import get_GADM_layer -from download_osm_data import create_country_list from pypsa.networkclustering import _make_consense from pypsa.networkclustering import busmap_by_kmeans from pypsa.networkclustering import busmap_by_spectral_clustering @@ -177,7 +176,7 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name=None): distribution_cluster = snakemake.config["cluster_options"][ "distribute_cluster"] - countries_list = create_country_list(snakemake.config["countries"]) + country_list = snakemake.config["countries"] year = snakemake.config["build_shape_options"]["year"] update = snakemake.config["build_shape_options"]["update_file"] out_logging = snakemake.config["build_shape_options"]["out_logging"] @@ -199,7 +198,7 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name=None): df_pop_c = gpd.read_file( snakemake.input.country_shapes).rename(columns={"name": "country"}) add_population_data(df_pop_c, - countries_list, + country_list, year, update, out_logging, @@ -496,7 +495,7 @@ def cluster_regions(busmaps, input=None, output=None): "alternative_clustering"] gadm_layer_id = snakemake.config["build_shape_options"]["gadm_layer_id"] focus_weights = snakemake.config.get("focus_weights", None) - country_list = create_country_list(snakemake.config["countries"]) + country_list = snakemake.config["countries"] if alternative_clustering: renewable_carriers = pd.Index([ diff --git a/scripts/config_osm_data.py b/scripts/config_osm_data.py index 92a90dc27..2435012b6 100644 --- a/scripts/config_osm_data.py +++ b/scripts/config_osm_data.py @@ -159,7 +159,7 @@ # 'AN': 'antarctica' } -world = { +world_iso = { "africa": { "DZ": "algeria", "AO": "angola", @@ -202,7 +202,6 @@ "RW": "rwanda", # saint-helena-ascension-and-tristan-da-cunha # "ST": "sao-tome-and-principe", #Island - "SNGM": "senegal-and-gambia", # See Map # Self-created country code "SN": "senegal", "GM": "gambia", # "SC": "seychelles", #Island @@ -225,7 +224,7 @@ "AF": "afghanistan", "AM": "armenia", "AZ": "azerbaijan", - # 'BH': 'bahrain', + "BH": "bahrain", "BD": "bangladesh", "BT": "bhutan", # 'IO': 'british indian ocean territory', @@ -241,37 +240,38 @@ "ID": "indonesia", "IR": "iran", "IQ": "iraq", - "IL-PL": "israel-and-palestine", + "IL": "israel", "JP": "japan", "JO": "jordan", "KZ": "kazakhstan", "KP": "north-korea", "KR": "south-korea", - # 'KW': 'kuwait', + "KW": "kuwait", "KG": "kyrgyzstan", - # 'LA': "lao people's democratic republic", + "LA": "lao-people's-democratic-republic", "LB": "lebanon", "MO": "macao", - "MY-SG-BN": - "malaysia-singapore-brunei", # Note 3 countries based on geofabrik + "MY": "malaysia", + "SG": "singapore", + "BN": "brunei", "MV": "maldives", "MN": "mongolia", "MM": "myanmar", "NP": "nepal", - # 'OM': 'oman', + "OM": "oman", "PK": "pakistan", - # 'PS': 'palestine', + "PS": "palestine", "PH": "philippines", - # 'QA': 'qatar', - # 'SA': 'saudi arabia', - # 'SG': 'singapore', # merged with MY + "QA": "qatar", + "SA": "saudi-arabia", + "SG": "singapore", # merged with MY "LK": "sri-lanka", "SY": "syria", "TW": "taiwan", "TJ": "tajikistan", "TH": "thailand", "TM": "turkmenistan", - # 'AE': 'united arab emirates', + "AE": "united-arab-emirates", "UZ": "uzbekistan", "VN": "vietnam", "YE": "yemen", @@ -290,7 +290,7 @@ "NC": "new-caledonia", "NZ": "new-zealand", "NU": "niue", - "NF": "norfolk island", + # "NF": "norfolk island", # 'MP': 'northern mariana islands', "PW": "palau", "PG": "papua-new-guinea", @@ -314,7 +314,7 @@ "CZ": "czech-republic", "DK": "denmark", "EE": "estonia", - "FO": "faroe islands", + # "FO": "faroe islands", "FI": "finland", "FR": "france", "DE": "germany", @@ -342,7 +342,7 @@ "PT": "portugal", "RO": "romania", "RU": "russia", - # 'SM': 'san-marino', + # "SM": "san-marino", "RS": "serbia", "SK": "slovakia", "SI": "slovenia", @@ -367,26 +367,25 @@ "CL": "chile", "CO": "colombia", "EC": "ecuador", + "GF": "french-guyane", + "GY": "guyane", "PE": "peru", + "PY": "paraguay", "SR": "suriname", "UY": "uruguay", "VE": "venezuela", }, "central_america": { "BZ": "belize", - "GT": "guatemala", - "SV": "el-salvador", + "CR": "costa-rica", "HN": "honduras", + "GT": "guatemala", "NI": "nicaragua", - "CR": "costa-rica", + "PA": "panama", + "SV": "el-salvador", }, } -world_countries = { - country_2D: country_name - for d in world.values() for (country_2D, country_name) in d.items() -} - continent_regions = { # Based on: https://waml.org/waml-information-bulletin/46-3/index-to-lc-g-schedule/1-world/ # Eurpean regions @@ -478,10 +477,264 @@ "TEST": ["NG", "NE", "SL", "MA"], } -# Problematic countries, where geofabrik and iso norm deviates +# Geofabrik and iso norm deviate for some countries and domains + # dictionary of correspondance between iso country codes and geofabrik codes containing those information +# This dictionary instructs the script download_osm_data about how to successfully download data +# from countries that are aggregated into osm. +# For example, Senegal (SN) and Gambia (GM) cannot be downloaded from OSM separately, but only jointly as SNGM +# That's the reason why in this dictionary they can be found the following entries: +# "SN": "SNGM" +# "GM": "SNGM" +# This instruct the workflow that when the country "SN" is requested, then it shall download the "SNGM" file iso_to_geofk_dict = { - "EH": "MA", # western sahara -> Morocco - "SN": "SNGM", # senegal -> senegal-gambia - "GM": "SNGM", # gambia -> senegal-gambia + "EH": "MA", # Western Sahara -> Morocco + "SN": "SNGM", # Senegal -> Senegal-Gambia + "GM": "SNGM", # Gambia -> Senegal-Gambia + "HK": "CN", # Hong Kong -> China + "MO": "CN", # Macao -> China + "SG": "MY", # Singapore -> Malaysia + "BN": "MY", # Brunei -> Malaysia + "SA": "GCC", # Saudi Arabia -> Gulf Cooperation Council + "KW": "GCC", # Kuwait -> Gulf Cooperation Council + "BH": "GCC", # Bahrain -> Gulf Cooperation Council + "QA": "GCC", # Qatar -> Gulf Cooperation Council + "AE": "GCC", # United Arab Emirates -> Gulf Cooperation Council + "OM": "GCC", # Oman -> Gulf Cooperation Council +} + +# Cyprus and Georgia -> European domain +# Russia -> a separate domain + +# data for some islands seem to be merged with some other areas data +# "FO": "faroe islands" +# "NF": "norfolk island", +# "PF": "french-polynesia" +# "GU": "guam" + +# "latin_america" -> "south-america" + +world_geofk = { + "africa": { + "DZ": "algeria", + "AO": "angola", + "BJ": "benin", + "BW": "botswana", + "BF": "burkina-faso", + "BI": "burundi", + "CM": "cameroon", + # canary-islands, # Island + # "CV": "cape-verde", # Island + "CF": "central-african-republic", + "TD": "chad", + # "KM": "comores", # Island + "CG": "congo-brazzaville", + "CD": "congo-democratic-republic", + "DJ": "djibouti", + "EG": "egypt", + "GQ": "equatorial-guinea", + "ER": "eritrea", + "ET": "ethiopia", + "GA": "gabon", + "GH": "ghana", + "GW": "guinea-bissau", # No Data + "GN": "guinea", + "CI": "ivory-coast", + "KE": "kenya", + "LS": "lesotho", + "LR": "liberia", + "LY": "libya", + "MG": "madagascar", + "MW": "malawi", + "ML": "mali", + "MR": "mauritania", + # "MU": "mauritius", # Island + "MA": "morocco", + "MZ": "mozambique", + "NA": "namibia", + "NE": "niger", + "NG": "nigeria", + "RW": "rwanda", + # saint-helena-ascension-and-tristan-da-cunha # Islands + # "ST": "sao-tome-and-principe", # Island + "SNGM": "senegal-and-gambia", # Geofk shortcurt + # "SC": "seychelles", # Island + "SL": "sierra-leone", + "SO": "somalia", # No Data + # south-africa-and-lesotho + "ZA": "south-africa", + "SS": "south-sudan", + "SD": "sudan", + "SZ": "swaziland", + "TZ": "tanzania", + "TG": "togo", + "TN": "tunisia", + "UG": "uganda", + "ZM": "zambia", + "ZW": "zimbabwe", + }, + "asia": { + "AF": "afghanistan", + "AM": "armenia", + "AZ": "azerbaijan", + "BD": "bangladesh", + "BT": "bhutan", + # 'IO': 'british indian ocean territory', # Island + "KH": "cambodia", + "CN": "china", + # 'CX': 'christmas island', # Island + # 'CC': 'cocos (keeling) islands', # Island + "GCC": "gcc-states", # Geofk shortcurt for SA, KW, BH, QA, AE, OM + "IN": "india", + "ID": "indonesia", + "IR": "iran", + "IQ": "iraq", + "IL-PL": "israel-and-palestine", + "JP": "japan", + "JO": "jordan", + "KZ": "kazakhstan", + "KP": "north-korea", + "KR": "south-korea", + "KG": "kyrgyzstan", + "LA": "laos", + "LB": "lebanon", + "MY-SG-BN": "malaysia-singapore-brunei", # Geofk shortcurt + "MV": "maldives", + "MN": "mongolia", + "MM": "myanmar", + "NP": "nepal", + "PK": "pakistan", + "PH": "philippines", + "LK": "sri-lanka", + "SY": "syria", + "TW": "taiwan", + "TJ": "tajikistan", + "TH": "thailand", + "TM": "turkmenistan", + "UZ": "uzbekistan", + "VN": "vietnam", + "YE": "yemen", + }, + "australia-oceania": { + # "AS": "american-oceania", # Islands + "AU": "australia", + # "CK": 'cook islands', + "FJ": "fiji", + # "PF": "french-polynesia", # Islands + # "GU": "guam", # Island + # "KI": "kiribati", # Islands + # "MH": 'marshall islands', # Islands + # "FM": "micronesia", # Islands + # "NR": "nauru", # Islands + "NC": "new-caledonia", + "NZ": "new-zealand", + # "NU": "niue", # Island + # "NF": "norfolk island", # Island + # "MP": 'northern mariana islands', # Islands + # "PW": "palau", # Islands + "PG": "papua-new-guinea", + # "WS": "samoa", # Islands + # 'SB': 'solomon islands', # Islands + # "TK": "tokelau", # Islands + # "TO": "tonga", # Islands + # "TV": "tuvalu", # Islands + # "VU": "vanuatu", # Islands + # "WF": "wallis-et-futuna", # Islands + }, + "europe": { + "AL": "albania", + "AD": "andorra", + "AT": "austria", + "BY": "belarus", + "BE": "belgium", + "BA": "bosnia-herzegovina", + "BG": "bulgaria", + "HR": "croatia", + "CZ": "czech-republic", + "CY": "cyprus", + "DK": "denmark", + "EE": "estonia", + # "FO": "faroe islands", # Islands + "FI": "finland", + "FR": "france", + "GE": "georgia", + "DE": "germany", + # 'GI': 'gibraltar', # Peninsula; Isolated PS? + "GR": "greece", + # 'GG': 'guernsey', # Island + "HU": "hungary", + "IS": "iceland", + "IE": "ireland-and-northern-ireland", + # 'IM': 'isle of man', # Island + "IT": "italy", + # 'JE': 'jersey', # Island + "LV": "latvia", + "LI": "liechtenstein", + "LT": "lithuania", + "LU": "luxembourg", + "MK": "macedonia", + "MT": "malta", + "MD": "moldova", + "MC": "monaco", + "ME": "montenegro", + "NL": "netherlands", + "NO": "norway", + "PL": "poland", + "PT": "portugal", + "RO": "romania", + # 'SM': 'san-marino', + "RS": "serbia", + "SK": "slovakia", + "SI": "slovenia", + "ES": "spain", + # 'SJ': 'svalbard-and-jan-mayen', # Islands + "SE": "sweden", + "CH": "switzerland", + "UA": "ukraine", + "GB": "great-britain", + "TR": "turkey", + }, + "russia": { + "CEFD": "central-fed-district", + "FEFD": "far-eastern-fed-district", + "NCDF": "north-caucasus-fed-district", + "NWDF": "northwestern-fed-district", + "SBFD": "siberian-fed-district", + "SOFD": "south-fed-district", + "URDF": "ural-fed-district", + "VOFD": "volga-fed-district", + "RU": "russia", + }, + "north-america": { + "CA": "canada", + "GL": "greenland", + "MX": "mexico", + "US": "us", + }, + "south-america": { + "AR": "argentina", + "BO": "bolivia", + "BR": "brazil", + "CL": "chile", + "CO": "colombia", + "EC": "ecuador", + "PE": "peru", + "SR": "suriname", + "PY": "paraguay", + "UY": "uruguay", + "VE": "venezuela", + }, + "central-america": { + "BZ": "belize", + "GT": "guatemala", + "SV": "el-salvador", + "HN": "honduras", + "NI": "nicaragua", + "CR": "costa-rica", + }, +} + +world_countries = { + country_2D: country_name + for d in world_geofk.values() for (country_2D, country_name) in d.items() } diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py index fe91707d8..21319da1b 100644 --- a/scripts/download_osm_data.py +++ b/scripts/download_osm_data.py @@ -31,7 +31,8 @@ from config_osm_data import feature_category from config_osm_data import feature_columns from config_osm_data import iso_to_geofk_dict -from config_osm_data import world +from config_osm_data import world_geofk +from config_osm_data import world_iso from esy.osmfilter import Node from esy.osmfilter import osm_info as osm_info from esy.osmfilter import osm_pickle as osm_pickle @@ -54,14 +55,14 @@ def getContinentCountry(code): - for continent in world: - country = world[continent].get(code, 0) + for continent in world_geofk: + country = world_geofk[continent].get(code, 0) if country: return continent, country return continent, country -def download_pbf(country_code, update, verify): +def download_pbf(country_code, update, verify, logging=True): """ Download pbf file from geofabrik for a given country code @@ -81,18 +82,37 @@ def download_pbf(country_code, update, verify): continent, country_name = getContinentCountry(country_code) # Filename for geofabrik geofabrik_filename = f"{country_name}-latest.osm.pbf" - # https://download.geofabrik.de/africa/nigeria-latest.osm.pbf - geofabrik_url = f"https://download.geofabrik.de/{continent}/{geofabrik_filename}" + + # Specify the url depending on the requested element, whether it is a continent or a region + if continent == country_name: + # Example continent-specific data: https://download.geofabrik.de/africa/nigeria-latest.osm.pbf + geofabrik_url = f"https://download.geofabrik.de/{geofabrik_filename}" + else: + # Example country- or sub-region-specific data: https://download.geofabrik.de/africa-latest.osm.pbf + geofabrik_url = ( + f"https://download.geofabrik.de/{continent}/{geofabrik_filename}") + + # Filepath of the pbf PBF_inputfile = os.path.join(os.getcwd(), "data", "osm", continent, "pbf", - geofabrik_filename) # Input filepath + geofabrik_filename) if not os.path.exists(PBF_inputfile): - _logger.info(f"{geofabrik_filename} downloading to {PBF_inputfile}") + if logging: + _logger.info( + f"{geofabrik_filename} downloading to {PBF_inputfile}") # create data/osm directory os.makedirs(os.path.dirname(PBF_inputfile), exist_ok=True) with requests.get(geofabrik_url, stream=True, verify=False) as r: - with open(PBF_inputfile, "wb") as f: - shutil.copyfileobj(r.raw, f) + + if r.status_code == 200: + # url properly found, thus execute as expected + with open(PBF_inputfile, "wb") as f: + shutil.copyfileobj(r.raw, f) + else: + # error status code: file not found + _logger.error( + f"Error code: {r.status_code}. File {geofabrik_filename} not downloaded from {geofabrik_url}" + ) if verify is True: if verify_pbf(PBF_inputfile, geofabrik_url, update) is False: @@ -363,6 +383,7 @@ def convert_pd_to_gdf_lines(df_way, simplified=False): if simplified is True: df_way["geometry"] = df_way["geometry"].apply( lambda x: x.simplify(0.005, preserve_topology=False)) + gdf = gpd.GeoDataFrame(df_way, geometry=[LineString(x) for x in df_way.lonlat], crs="EPSG:4326") @@ -374,12 +395,28 @@ def convert_pd_to_gdf_lines(df_way, simplified=False): def convert_iso_to_geofk(iso_code, iso_coding=True, convert_dict=iso_to_geofk_dict): - """Function to convert the iso code name of a country into the corresponding geofabrik""" - if iso_code in convert_dict: - if not iso_coding: - _logger.error( - f"Unexpected iso code {iso_code}: expected only geofabrik codes" - ) + """ + Function to convert the iso code name of a country into the corresponding geofabrik + In Geofabrik, some countries are aggregated, thus if a single country is requested, + then all the agglomeration shall be downloaded + For example, Senegal (SN) and Gambia (GM) cannot be found alone in geofabrik, + but they can be downloaded as a whole SNGM + + The conversion directory, initialized to iso_to_geofk_dict is used to perform such conversion + When a two-letter code country is found in convert_dict, and iso_coding is enabled, + then that two-letter code is converted into the corresponding value of the dictionary + + Parameters + ---------- + iso_code : str + Two-code country code to be converted + iso_coding : bool + When true, the iso to geofk is performed + convert_dict : dict + Dictionary used to apply the conversion iso to geofk + The keys correspond to the countries iso codes that need a different region to be downloaded + """ + if iso_coding and iso_code in convert_dict: return convert_dict[iso_code] else: return iso_code @@ -401,6 +438,13 @@ def output_csv_geojson(output_files, country_code, df_all_feature, os.makedirs(os.path.dirname(path_file_geojson), exist_ok=True) # create raw directory + # remove non-line elements + if feature_category[feature] == "way": + # check geometry with multiple points: at least two needed to draw a line + is_linestring = df_all_feature["lonlat"].apply( + lambda x: (len(x) >= 2) and (type(x[0]) == tuple)) + df_all_feature = df_all_feature[is_linestring] + df_all_feature = df_all_feature[df_all_feature.columns.intersection( set(columns_feature))] df_all_feature.reset_index(drop=True, inplace=True) @@ -436,7 +480,7 @@ def _init_process_pop(update_, verify_): # Auxiliary function to download the data def _process_func_pop(c_code): - download_pbf(c_code, update, verify) + download_pbf(c_code, update, verify, logging=False) def parallel_download_pbf(country_list, @@ -493,7 +537,9 @@ def process_data( # parallel download of data if parallel download is enabled if nprocesses > 1: - _logger.info(f"Parallel pbf download with {nprocesses} threads") + _logger.info( + f"Parallel raw osm data (pbf files) download with {nprocesses} threads" + ) parallel_download_pbf(country_list, nprocesses, update, verify) # loop the request for each feature @@ -576,10 +622,21 @@ def filter_codes(c_list, iso_coding=True): When iso code are implemented (iso_coding=True), then remove the geofabrik-specific ones. When geofabrik codes are selected(iso_coding=False), ignore iso-specific names. """ - if iso_coding: - return [c for c in c_list if len(c) == 2] + if ( + iso_coding + ): # if country lists are in iso coding, then check if they are 2-string + # 2-code countries + ret_list = [c for c in c_list if len(c) == 2] + + # check if elements have been removed and return a working if so + if len(ret_list) < len(c_list): + _logger.warning( + "Specified country list contains the following non-iso codes: " + + ", ".join(list(set(c_list) - set(ret_list)))) + + return ret_list else: - return [c for c in c_list if c not in iso_to_geofk_dict] + return c_list # [c for c in c_list if c not in iso_to_geofk_dict] full_codes_list = [] @@ -589,12 +646,12 @@ def filter_codes(c_list, iso_coding=True): # extract countries in world if value1 == "world": - for continent in world.keys(): - codes_list.extend(list(world[continent])) + for continent in world_iso.keys(): + codes_list.extend(list(world_iso[continent])) # extract countries in continent - elif value1 in world.keys(): - codes_list = list(world[value1]) + elif value1 in world_iso.keys(): + codes_list = list(world_iso[value1]) # extract countries in regions elif value1 in continent_regions.keys(): @@ -613,6 +670,33 @@ def filter_codes(c_list, iso_coding=True): return full_codes_list +def country_list_to_geofk(country_list): + """ + Convert the requested country list into geofk norm + + Parameters + ---------- + input : str + Any two-letter country name or aggregation of countries given in config_osm_data.py + Country name duplications won't distort the result. + Examples are: + ["NG","ZA"], downloading osm data for Nigeria and South Africa + ["SNGM"], downloading data for Senegal&Gambia shape + ["NG","ZA","NG"], won't distort result. + + Returns + ------- + full_codes_list : list + Example ["NG","ZA"] + """ + + full_codes_list = [ + convert_iso_to_geofk(c_code) for c_code in set(country_list) + ] + + return full_codes_list + + if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake @@ -627,13 +711,12 @@ def filter_codes(c_list, iso_coding=True): # ["substation", "generator", "line", "cable", "tower"] feature_list = ["substation", "generator", "line", "cable"] - input = snakemake.config["countries"] # country list or region + # get list of countries into geofabrik convention; expected iso norm in input + country_list = country_list_to_geofk(snakemake.config["countries"]) output_files = snakemake.output # output snakemake nprocesses = snakemake.config.get("download_osm_data_nprocesses", 1) # number of threads - country_list = create_country_list(input) - # Set update # Verify = True checks local md5s and pre-filters data again process_data( feature_list, diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py index 7d34b27bc..c5f8afb7c 100644 --- a/scripts/retrieve_databundle_light.py +++ b/scripts/retrieve_databundle_light.py @@ -54,7 +54,6 @@ # TODO Make logging compatible with progressbar (see PR #102) configure_logging(snakemake) -_sets_path_to_root("pypsa-africa") tutorial = snakemake.config["tutorial"] logger.info("Retrieving data from GoogleDrive.")