-
Notifications
You must be signed in to change notification settings - Fork 184
/
monte_carlo.py
438 lines (364 loc) · 16.1 KB
/
monte_carlo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: PyPSA-Earth and PyPSA-Eur Authors
#
# SPDX-License-Identifier: AGPL-3.0-or-later
# -*- coding: utf-8 -*-
"""
Prepares network files with monte-carlo parameter sweeps for solving process.
Relevant Settings
-----------------
.. code:: yaml
monte_carlo:
options:
add_to_snakefile: false # When set to true, enables Monte Carlo sampling
samples: 9 # number of optimizations. Note that number of samples when using scipy has to be the square of a prime number
sampling_strategy: "chaospy" # "pydoe2", "chaospy", "scipy", packages that are supported
seed: 42 # set seedling for reproducibilty
uncertainties:
loads_t.p_set:
type: uniform
args: [0, 1]
generators_t.p_max_pu.loc[:, n.generators.carrier == "onwind"]:
type: lognormal
args: [1.5]
generators_t.p_max_pu.loc[:, n.generators.carrier == "solar"]:
type: beta
args: [0.5, 2]
.. seealso::
Documentation of the configuration file ``config.yaml`` at :ref:`monte_cf`
Inputs
------
- ``networks/elec_s_10_ec_lcopt_Co2L-24H.nc``
Outputs
-------
- ``networks/elec_s_10_ec_lcopt_Co2L-24H_{unc}.nc``
e.g. networks/elec_s_10_ec_lcopt_Co2L-24H_m0.nc
networks/elec_s_10_ec_lcopt_Co2L-24H_m1.nc
...
Description
-----------
PyPSA-Earth is deterministic which means that a set of inputs give a set of outputs.
Parameter sweeps can help to explore the uncertainty of the outputs cause by parameter changes.
Many are familiar with the classical "sensitivity analysis" that can be applied by varying the
input of only one feature, while exploring its outputs changes. Here implemented is a
"global sensitivity analysis" that can help to explore the multi-dimensional uncertainty space
when more than one feature are changed at the same time.
To do so, the scripts is separated in two building blocks: One creates the experimental design,
the other, modifies and outputs the network file. Building the experimental design is currently
supported by the packages pyDOE2, chaospy and scipy. This should give users the freedom to
explore alternative approaches. The orthogonal latin hypercube sampling is thereby found as most
performant, hence, implemented here. Sampling the multi-dimensional uncertainty space is relatively
easy. It only requires two things: The number of *samples* (defines the number of total networks to
be optimized) and *features* (pypsa network object e.g loads_t.p_set or generators_t.p_max_pu). This
results in an experimental design of the dimension (samples X features).
The experimental design `lh` (dimension: sample X features) is used to modify the PyPSA
networks. Thereby, this script creates samples x amount of networks. The iterators comes from the
wildcard {unc}, which is described in the config.yaml and created in the Snakefile as a range from
0 to (total number of) SAMPLES.
"""
import os
import chaospy
import numpy as np
import pandas as pd
import pypsa
import seaborn as sns
from _helpers import configure_logging, create_logger
from pyDOE2 import lhs
from scipy.stats import beta, gamma, lognorm, norm, qmc, triang
from sklearn.preprocessing import MinMaxScaler
from solve_network import *
logger = create_logger(__name__)
sns.set(style="whitegrid")
def monte_carlo_sampling_pydoe2(
N_FEATURES: int,
SAMPLES: int,
uncertainties_values: dict,
random_state: int,
criterion: str = None,
iteration: int = None,
correlation_matrix: np.ndarray = None,
) -> np.ndarray:
"""
Creates Latin Hypercube Sample (LHS) implementation from PyDOE2 with
various options. Additionally, all "corners" are simulated.
Adapted from Disspaset: https://github.com/energy-modelling-toolkit/Dispa-SET/blob/master/scripts/build_and_run_hypercube.py
Documentation on PyDOE2: https://github.com/clicumu/pyDOE2 (fixes latin_cube errors)
"""
from pyDOE2 import lhs
from scipy.stats import qmc
# Generate a Nfeatures-dimensional latin hypercube varying between 0 and 1:
lh = lhs(
N_FEATURES,
samples=SAMPLES,
criterion=criterion,
iterations=iteration,
random_state=random_state,
correlation_matrix=correlation_matrix,
)
lh = rescale_distribution(lh, uncertainties_values)
discrepancy = qmc.discrepancy(lh)
logger.info(
"Discrepancy is:", discrepancy, " more details in function documentation."
)
return lh
def monte_carlo_sampling_chaospy(
N_FEATURES: int,
SAMPLES: int,
uncertainties_values: dict,
seed: int,
rule: str = "latin_hypercube",
) -> np.ndarray:
"""
Creates Latin Hypercube Sample (LHS) implementation from chaospy.
Documentation on Chaospy: https://github.com/clicumu/pyDOE2 (fixes latin_cube errors)
Documentation on Chaospy latin-hyper cube (quasi-Monte Carlo method): https://chaospy.readthedocs.io/en/master/user_guide/fundamentals/quasi_random_samples.html#Quasi-random-samples
"""
import chaospy
from scipy.stats import qmc
# generate a Nfeatures-dimensional latin hypercube varying between 0 and 1:
N_FEATURES = "chaospy.Uniform(0, 1), " * N_FEATURES
uniform_cube = eval(
f"chaospy.J({N_FEATURES})"
) # writes Nfeatures times the chaospy.uniform... command)
lh = uniform_cube.sample(SAMPLES, rule=rule, seed=seed).T
lh = rescale_distribution(lh, uncertainties_values)
discrepancy = qmc.discrepancy(lh)
logger.info(
"Discrepancy is:", discrepancy, " more details in function documentation."
)
return lh
def monte_carlo_sampling_scipy(
N_FEATURES: int,
SAMPLES: int,
uncertainties_values: dict,
seed: int,
strength: int = 2,
optimization: str = None,
) -> np.ndarray:
"""
Creates Latin Hypercube Sample (LHS) implementation from SciPy with various
options:
- Center the point within the multi-dimensional grid, centered=True
- Optimization scheme, optimization="random-cd"
- Strength=1, classical LHS
- Strength=2, performant orthogonal LHS, requires the sample to be square of a prime e.g. sq(11)=121
Options could be combined to produce an optimized centered orthogonal array
based LHS. After optimization, the result would not be guaranteed to be of strength 2.
Documentation for Quasi-Monte Carlo approaches: https://docs.scipy.org/doc/scipy/reference/stats.qmc.html
Documentation for Latin Hypercube: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.qmc.LatinHypercube.html#scipy.stats.qmc.LatinHypercube
Orthogonal LHS is better than basic LHS: https://github.com/scipy/scipy/pull/14546/files, https://en.wikipedia.org/wiki/Latin_hypercube_sampling
"""
from scipy.stats import qmc
sampler = qmc.LatinHypercube(
d=N_FEATURES,
strength=strength,
optimization=optimization,
seed=seed,
)
lh = sampler.random(n=SAMPLES)
lh = rescale_distribution(lh, uncertainties_values)
discrepancy = qmc.discrepancy(lh)
logger.info(
"Discrepancy is:", discrepancy, " more details in function documentation."
)
return lh
def rescale_distribution(
latin_hypercube: np.ndarray, uncertainties_values: dict
) -> np.ndarray:
"""
Rescales a Latin hypercube sampling (LHS) using specified distribution
parameters. More information on the distributions can be found
here https://docs.scipy.org/doc/scipy/reference/stats.html
**Parameters**:
- latin_hypercube (np.array): The Latin hypercube sampling to be rescaled.
- uncertainties_values (list): List of dictionaries containing distribution information.
Each dictionary should have 'type' key specifying the distribution type and 'args' key
containing parameters specific to the chosen distribution.
**Returns**:
- np.array: Rescaled Latin hypercube sampling with values in the range [0, 1].
**Supported Distributions**:
- "uniform": Rescaled to the specified lower and upper bounds.
- "normal": Rescaled using the inverse of the normal distribution function with specified mean and std.
- "lognormal": Rescaled using the inverse of the log-normal distribution function with specified mean and std.
- "triangle": Rescaled using the inverse of the triangular distribution function with mean calculated from given parameters.
- "beta": Rescaled using the inverse of the beta distribution function with specified shape parameters.
- "gamma": Rescaled using the inverse of the gamma distribution function with specified shape and scale parameters.
**Note**:
- The function supports rescaling for uniform, normal, lognormal, triangle, beta, and gamma distributions.
- The rescaled samples will have values in the range [0, 1].
"""
from scipy.stats import beta, gamma, lognorm, norm, qmc, triang
from sklearn.preprocessing import MinMaxScaler, minmax_scale
for idx, value in enumerate(uncertainties_values):
dist = value.get("type")
params = value.get("args")
match dist:
case "uniform":
l_bounds, u_bounds = params
latin_hypercube[:, idx] = minmax_scale(
latin_hypercube[:, idx], feature_range=(l_bounds, u_bounds)
)
case "normal":
mean, std = params
latin_hypercube[:, idx] = norm.ppf(latin_hypercube[:, idx], mean, std)
case "lognormal":
shape = params[0]
latin_hypercube[:, idx] = lognorm.ppf(latin_hypercube[:, idx], s=shape)
case "triangle":
mid_point = params[0]
latin_hypercube[:, idx] = triang.ppf(latin_hypercube[:, idx], mid_point)
case "beta":
a, b = params
latin_hypercube[:, idx] = beta.ppf(latin_hypercube[:, idx], a, b)
case "gamma":
shape, scale = params
latin_hypercube[:, idx] = gamma.ppf(
latin_hypercube[:, idx], shape, scale
)
# samples space needs to be from 0 to 1
mm = MinMaxScaler(feature_range=(0, 1), clip=True)
latin_hypercube = mm.fit_transform(latin_hypercube)
return latin_hypercube
def validate_parameters(
sampling_strategy: str, samples: int, uncertainties_values: dict
) -> None:
"""
Validates the parameters for a given probability distribution. Inputs from
user through the config file needs to be validated before proceeding to
perform monte-carlo simulations.
**Parameters**:
- sampling_strategy: str
The chosen sampling strategy from chaospy, scipy and pydoe2
- samples: int
The number of samples to generate for the simulation
- distribution: str
The name of the probability distribution.
- distribution_params: list
The parameters associated with the probability distribution.
**Raises**:
- ValueError: If the parameters are invalid for the specified distribution.
"""
valid_strategy = ["chaospy", "scipy", "pydoe2"]
valid_distribution = ["uniform", "normal", "lognormal", "triangle", "beta", "gamma"]
# verifying samples and distribution_params
if samples is None:
raise ValueError(f"assign a value to samples")
elif type(samples) is float or type(samples) is str:
raise ValueError(f"samples must be an integer")
# verify sampling strategy
if sampling_strategy not in valid_strategy:
raise ValueError(
f" Invalid sampling strategy: {sampling_strategy}. Choose from {valid_strategy}"
)
for idx, value in enumerate(uncertainties_values):
dist_type = value.get("type")
param = value.get("args")
if dist_type is None or len(param) == 0:
raise ValueError(f"assign a list of parameters to distribution_params")
if dist_type not in valid_distribution:
raise ValueError(
f"Unsupported Distribution : {dist_type}. Choose from {valid_distribution}"
)
if dist_type == "triangle":
if len(param) != 1:
raise ValueError(
f"{dist_type} distribution has to be 1 parameter [midpoint_between_0_and_1]"
)
elif param[0] < 0 or param[0] > 1:
raise ValueError(
f"{dist_type} distribution has to have a midpoint between 0 and 1"
)
if dist_type in ["normal", "uniform", "beta", "gamma"] and len(param) != 2:
raise ValueError(f"{dist_type} distribution must have 2 parameters")
elif dist_type == "lognormal" and len(param) != 1:
raise ValueError(f"{dist_type} must have a single parameter")
# handling having 0 as values in beta and gamma
if dist_type in ["beta", "gamma"]:
if np.min(param) <= 0:
raise ValueError(
f"{dist_type} distribution cannot have values lower than zero in parameters"
)
return None
if __name__ == "__main__":
if "snakemake" not in globals():
from _helpers import mock_snakemake
snakemake = mock_snakemake(
"monte_carlo",
simpl="",
clusters="10",
ll="copt",
opts="Co2L-24H",
unc="m0",
)
configure_logging(snakemake)
monte_carlo_config = snakemake.params.monte_carlo
# SCENARIO INPUTS
###
MONTE_CARLO_PYPSA_FEATURES = [
k for k in monte_carlo_config["uncertainties"].keys() if k
] # removes key value pairs with empty value e.g. []
MONTE_CARLO_OPTIONS = monte_carlo_config["options"]
N_FEATURES = len(
MONTE_CARLO_PYPSA_FEATURES
) # only counts features when specified in config
SAMPLES = MONTE_CARLO_OPTIONS.get(
"samples"
) # TODO: What is the optimal sampling? Fabian Neumann answered that in "Broad ranges" paper
SAMPLING_STRATEGY = MONTE_CARLO_OPTIONS.get("sampling_strategy")
UNCERTAINTIES_VALUES = monte_carlo_config["uncertainties"].values()
SEED = MONTE_CARLO_OPTIONS.get("seed")
# PARAMETERS VALIDATION
# validates the parameters supplied from config file
validate_parameters(SAMPLING_STRATEGY, SAMPLES, UNCERTAINTIES_VALUES)
# SCENARIO CREATION / SAMPLING STRATEGY
###
if SAMPLING_STRATEGY == "pydoe2":
lh = monte_carlo_sampling_pydoe2(
N_FEATURES,
SAMPLES,
UNCERTAINTIES_VALUES,
random_state=SEED,
criterion=None,
iteration=None,
correlation_matrix=None,
)
if SAMPLING_STRATEGY == "scipy":
lh = monte_carlo_sampling_scipy(
N_FEATURES,
SAMPLES,
UNCERTAINTIES_VALUES,
seed=SEED,
strength=2,
optimization=None,
)
if SAMPLING_STRATEGY == "chaospy":
lh = monte_carlo_sampling_chaospy(
N_FEATURES, SAMPLES, UNCERTAINTIES_VALUES, seed=SEED, rule="latin_hypercube"
)
# create plot for the rescaled distributions (for development usage, commented by default)
# for idx in range(N_FEATURES):
# sns.displot(lh[:, idx], kde=True).set(
# title=f"{MONTE_CARLO_PYPSA_FEATURES[idx]}"
# ).figure.savefig(f"{MONTE_CARLO_PYPSA_FEATURES[idx]}.png", bbox_inches="tight")
# MONTE-CARLO MODIFICATIONS
###
n = pypsa.Network(snakemake.input[0])
unc_wildcards = snakemake.wildcards[-1]
i = int(unc_wildcards[1:])
j = 0
for k in MONTE_CARLO_PYPSA_FEATURES:
# this loop sets in one scenario each "i" feature assumption
# k is the config input key "loads_t.p_set"
# v is the lower and upper bound [0.8,1.3], that was used for lh_scaled
# i, j interaction number to pick values of experimental setup
# Example: n.loads_t.p_set = network.loads_t.p_set = .loads_t.p_set * lh_scaled[0,0]
exec(f"n.{k} = n.{k} * {lh[i,j]}")
logger.info(f"Scaled n.{k} by factor {lh[i,j]} in the {i} scenario")
j = j + 1
# EXPORT AND METADATA
#
latin_hypercube_dict = (
pd.DataFrame(lh).rename_axis("Nruns").add_suffix("_feature")
).to_dict()
n.meta.update(latin_hypercube_dict)
n.export_to_netcdf(snakemake.output[0])