Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of LitPop #720

Merged
merged 10 commits into from
May 24, 2023
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ Removed:
- Use `myst-nb` for parsing Jupyter Notebooks for the documentation instead of `nbsphinx` [#712](https://github.com/CLIMADA-project/climada_python/pull/712)
- Installation guide now recommends installing CLIMADA directly via `conda install` [#714](https://github.com/CLIMADA-project/climada_python/pull/714)
- `Exposures.affected_total_value` now takes a hazard intensity threshold as argument. Affected values are only those for which at least one event exceeds the threshold. (previously, all exposures points with an assigned centroid were considered affected) [#702](https://github.com/CLIMADA-project/climada_python/pull/702)
- Add option to pass region ID to `LitPop.from_shape` [#720](https://github.com/CLIMADA-project/climada_python/pull/720)
- Slightly improved performance on `LitPop`-internal computations [#720](https://github.com/CLIMADA-project/climada_python/pull/720)

### Fixed

Expand Down
134 changes: 81 additions & 53 deletions climada/entity/exposures/litpop/litpop.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,8 +460,18 @@ def set_custom_shape(self, *args, **kwargs):
self.__dict__ = LitPop.from_shape(*args, **kwargs).__dict__

@classmethod
def from_shape(cls, shape, total_value, res_arcsec=30, exponents=(1,1), value_unit='USD',
reference_year=DEF_REF_YEAR, gpw_version=GPW_VERSION, data_dir=SYSTEM_DIR):
def from_shape(
cls,
shape,
total_value,
res_arcsec=30,
exponents=(1,1),
value_unit='USD',
region_id=None,
reference_year=DEF_REF_YEAR,
gpw_version=GPW_VERSION,
data_dir=SYSTEM_DIR,
):
"""init LitPop exposure object for a custom shape.
Requires user input regarding the total value to be disaggregated.

Expand All @@ -488,6 +498,12 @@ def from_shape(cls, shape, total_value, res_arcsec=30, exponents=(1,1), value_un
Defining power with which lit (nightlights) and pop (gpw) go into LitPop.
value_unit : str
Unit of exposure values. The default is USD.
region_id : int, optional
The numeric ISO 3166 region associated with the shape. If set to a value,
this single value will be set for every coordinate in the ``GeoDataFrame``
of the resulting ``LitPop`` instance. If ``None`` (default), the region ID
for every coordinate will be determined automatically (at a slight
computational cost).
reference_year : int, optional
Reference year for data sources. Default: CONFIG.exposures.def_ref_year
gpw_version : int, optional
Expand All @@ -512,10 +528,15 @@ def from_shape(cls, shape, total_value, res_arcsec=30, exponents=(1,1), value_un
'GeoSeries. Loop over elements of series outside method.')

tag = Tag()
litpop_gdf, _ = _get_litpop_single_polygon(shape, reference_year,
res_arcsec, data_dir,
gpw_version, exponents,
)
litpop_gdf, _ = _get_litpop_single_polygon(
shape,
reference_year,
res_arcsec,
data_dir,
gpw_version,
exponents,
region_id
)

# disaggregate total value proportional to LitPop values:
if isinstance(total_value, (float, int)):
Expand Down Expand Up @@ -673,8 +694,9 @@ def _get_litpop_single_polygon(polygon, reference_year, res_arcsec, data_dir,
Defining power with which lit (nightlights) and pop (gpw) go into LitPop. To get
nightlights^3 without population count: (3, 0). To use population count alone: (0, 1).
region_id : int, optional
if provided, region_id of gdf is set to value.
The default is None, this implies that region_id is not set.
if provided, region_id of gdf is set to value. The default is ``None``, in which
case the region is determined automatically for every location of the resulting
GeoDataFrame.
peanutfun marked this conversation as resolved.
Show resolved Hide resolved
verbose : bool, optional
Enable verbose logging about the used GPW version and reference year as well as about the
boundary case where no grid points from the GPW grid are contained in the specified
Expand Down Expand Up @@ -755,18 +777,19 @@ def _get_litpop_single_polygon(polygon, reference_year, res_arcsec, data_dir,
meta_out['width'],
meta_out['height'])
# init GeoDataFrame from data and coordinates:
gdf = geopandas.GeoDataFrame({'value': litpop_array.flatten()}, crs=meta_out['crs'],
geometry=geopandas.points_from_xy(
np.round_(lon.flatten(), decimals = 8, out = None),
np.round_(lat.flatten(), decimals = 8, out = None)
)
)
gdf['latitude'] = np.round_(lat.flatten(), decimals = 8, out = None)
gdf['longitude'] = np.round_(lon.flatten(), decimals = 8, out = None)
latitude = np.round(lat.flatten(), decimals=8)
longitude = np.round(lon.flatten(), decimals=8)
gdf = geopandas.GeoDataFrame(
{"value": litpop_array.flatten(), "latitude": latitude, "longitude": longitude},
crs=meta_out['crs'],
geometry=geopandas.points_from_xy(longitude, latitude),
)
if region_id is not None: # set region_id
gdf['region_id'] = region_id
else:
gdf['region_id'] = u_coord.get_country_code(gdf.latitude, gdf.longitude)
gdf['region_id'] = u_coord.get_country_code(
gdf.latitude, gdf.longitude, gridded=True
)
# remove entries outside polygon with `dropna` and return GeoDataFrame:
return gdf.dropna(), meta_out

Expand Down Expand Up @@ -953,8 +976,10 @@ def reproject_input_data(data_array_list, meta_list,
meta_out['width'] = data_out_list[-1].shape[1]
return data_out_list, meta_out

def gridpoints_core_calc(data_arrays, offsets=None, exponents=None,
total_val_rescale=None):

def gridpoints_core_calc(
data_arrays, offsets=None, exponents=None, total_val_rescale=None
):
"""
Combines N dense numerical arrays by point-wise multipilcation and
optionally rescales to new total value:
Expand Down Expand Up @@ -992,47 +1017,50 @@ def gridpoints_core_calc(data_arrays, offsets=None, exponents=None,

Returns
-------
result_array : np.array of same shape as arrays in data_arrays
np.array of same shape as arrays in data_arrays
Results from calculation described above.
"""
# convert input data to arrays
# check integrity of data_array input (length and type):
try:
data_arrays = [np.array(data) for data in data_arrays]
if len(list({data.shape for data in data_arrays})) > 1:
raise ValueError("Elements in data_arrays don't agree in shape.")
except AttributeError as err:
raise TypeError("data_arrays or contained elements have wrong type.") from err

# if None, defaults for offsets and exponents are set:
if offsets is None:
offsets = np.zeros(len(data_arrays))
if exponents is None:
exponents = np.ones(len(data_arrays))
if np.min(offsets) < 0:
raise ValueError("offset values < 0 are not allowed, because negative offset "
"values produce undesired negative disaggregation values.")
if np.min(exponents) < 0:
raise ValueError("exponents < 0 are not allowed, because negative exponents "
"produce 'inf' values where the initial value is 0.")
# Convert input data to arrays
# NOTE: Setting dtype will cause an error if shapes are not compatible
data = np.asarray(data_arrays, dtype=float)

def prepare_input(input_data, default_value, name):
"""Prepare the offset and exposure inputs

If they are ``None``, use the default values. If not, cast them to an array and
ensure that the array has only one dimension. Finally, check if the values are
positive.
"""
if input_data is None:
return default_value
peanutfun marked this conversation as resolved.
Show resolved Hide resolved

array = np.asarray(input_data, dtype=float)
if array.ndim > 1:
raise ValueError(f"Provide {name} as 1D array or list of scalars")
if np.any(array < 0):
raise ValueError(
f"{name} values < 0 are not allowed, because negative {name} "
"values produce undesired negative disaggregation values."
)
return array

offsets = prepare_input(offsets, 0, "offsets")
exponents = prepare_input(exponents, 1, "exponents")

# Steps 1-3: arrays are multiplied after application of offets and exponents:
# (arrays are converted to float to prevent ValueError:
# "Integers to negative integer powers are not allowed.")
result_array = np.power(np.array(data_arrays[0]+offsets[0], dtype=float),
exponents[0])
for i in np.arange(1, len(data_arrays)):
result_array = np.multiply(result_array,
np.power(np.array(data_arrays[i]+offsets[i], dtype=float),
exponents[i])
)

# Steps 4+5: if total value for rescaling is provided, result_array is normalized and
# NOTE: Transpose so that broadcasting over last dimension works. Computing the
# product over the last axis is also much faster. Then transpose back.
data = (data.T + offsets) ** exponents
data = data.prod(axis=-1).T

# Steps 4+5: if total value for rescaling is provided, result is normalized and
# scaled with this total value (total_val_rescale):
if isinstance(total_val_rescale, (float, int)):
return np.divide(result_array, result_array.sum()) * total_val_rescale
return data / data.sum() * total_val_rescale
if total_val_rescale is not None:
raise TypeError("total_val_rescale must be int or float.")
return result_array
return data


# The following functions are only required if calc_admin1 is True,
# not for core LitPop. They are maintained here mainly for backward compatibility
Expand Down
6 changes: 2 additions & 4 deletions climada/entity/exposures/test/test_litpop.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,11 @@ def test_gridpoints_core_calc_input_errors(self):
lp.gridpoints_core_calc(data_arrays_demo(4))

# wrong format:
with self.assertRaises(TypeError):
with self.assertRaises(ValueError):
lp.gridpoints_core_calc(data, exponents=['a', 'b'])
data.append('hello i am a string')
with self.assertRaises(ValueError):
lp.gridpoints_core_calc(data)
with self.assertRaises(TypeError):
lp.gridpoints_core_calc(777)

def test_gridpoints_core_calc_default_1(self):
"""test function gridpoints_core_calc, i.e. core data combination
Expand Down Expand Up @@ -374,4 +372,4 @@ def test_get_value_unit_pass(self):
if __name__ == "__main__":
TESTS = unittest.TestLoader().loadTestsFromTestCase(TestLitPop)
# TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUncertainty))
unittest.TextTestRunner(verbosity=2).run(TESTS)
unittest.TextTestRunner(verbosity=2).run(TESTS)