-
Notifications
You must be signed in to change notification settings - Fork 61
/
duality_handlers.jl
409 lines (346 loc) · 13.5 KB
/
duality_handlers.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
# Copyright (c) 2017-24, Oscar Dowson and SDDP.jl contributors, Lea Kapelevich.
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
function _deprecate_integrality_handler()
return error(
"""
SDDP.jl v0.4.0 introduced a number of breaking changes in how we deal with
binary and integer variables.
## Breaking changes
* We have renamed `SDDiP` to `LagrangianDuality`.
* We have renamed `ContinuousRelaxation` to `ContinuousConicDuality`.
* Instead of passing the argument to `PolicyGraph`, you now pass it to
`train`, e.g., `SDDP.train(model; duality_handler = SDDP.LagrangianDuality())`
* We no longer turn continuous and integer states into a binary expansion. If
you want to binarize your states, do it manually.
## Why did we do this?
SDDiP (the algorithm presented in the paper) is really two parts:
1. If you have an integer program, you can compute the dual of the fishing
constraint using Lagrangian duality; and
2. If you have pure binary state variables, then cuts constructed from the
Lagrangian duals result in an optimal policy.
However, these two points are quite independent. If you have integer or
continuous state variables, you can still use Lagrangian duality!
The new system is more flexible because the duality handler is a property of the
solution process, not the model. This allows us to use Lagrangian duality to
solve any dual problem, and it leaves the decision of binarizing the state
variables up to the user. (Hint: we don't think you should do it!)
## Other additions
We also added support for strengthened Benders cuts, which we call
`SDDP.StrengthenedConicDuality()`.
## Future plans
We have a number of future plans in the works, including better Lagrangian
solution methods and better ways of integrating the different types of duality
handlers (e.g., start with ContinuousConicDuality, then shift to
StrengthenedConicDuality, then LagrangianDuality).
If these sorts of things interest you, the code is now much more hackable, so
please reach out or read https://github.com/odow/SDDP.jl/issues/246.
Alternatively, if you have interesting examples using SDDiP that you find are
too slow, please send me the examples so we can use them as benchmarks in future
improvements.
""",
)
end
SDDiP(args...; kwargs...) = _deprecate_integrality_handler()
ContinuousRelaxation(args...; kwargs...) = _deprecate_integrality_handler()
function get_dual_solution(node::Node, ::Nothing)
return JuMP.objective_value(node.subproblem), Dict{Symbol,Float64}()
end
# ========================= Continuous relaxation ============================ #
"""
ContinuousConicDuality()
Compute dual variables in the backward pass using conic duality, relaxing any
binary or integer restrictions as necessary.
## Theory
Given the problem
```
min Cᵢ(x̄, u, w) + θᵢ
st (x̄, x′, u) in Xᵢ(w) ∩ S
x̄ - x == 0 [λ]
```
where `S ⊆ ℝ×ℤ`, we relax integrality and using conic duality to solve for `λ`
in the problem:
```
min Cᵢ(x̄, u, w) + θᵢ
st (x̄, x′, u) in Xᵢ(w)
x̄ - x == 0 [λ]
```
"""
struct ContinuousConicDuality <: AbstractDualityHandler end
function _has_dual_solution(node::Node)
status = JuMP.dual_status(node.subproblem)
return status in (JuMP.FEASIBLE_POINT, JuMP.NEARLY_FEASIBLE_POINT)
end
function get_dual_solution(node::Node, ::ContinuousConicDuality)
if !_has_dual_solution(node)
# Attempt to recover by resetting the optimizer and re-solving.
if JuMP.mode(node.subproblem) != JuMP.DIRECT
MOI.Utilities.reset_optimizer(node.subproblem)
optimize!(node.subproblem)
end
end
if !_has_dual_solution(node)
write_subproblem_to_file(
node,
"subproblem.mof.json";
throw_error = true,
)
end
# Note: due to JuMP's dual convention, we need to flip the sign for
# maximization problems.
dual_sign = JuMP.objective_sense(node.subproblem) == MOI.MIN_SENSE ? 1 : -1
λ = Dict{Symbol,Float64}(
name => dual_sign * JuMP.dual(JuMP.FixRef(state.in)) for
(name, state) in node.states
)
return objective_value(node.subproblem), λ
end
function _relax_integrality(node::Node)
if !node.has_integrality
return () -> nothing
end
return JuMP.relax_integrality(node.subproblem)
end
function prepare_backward_pass(node::Node, ::ContinuousConicDuality, ::Options)
return _relax_integrality(node)
end
duality_log_key(::ContinuousConicDuality) = " "
# =========================== LagrangianDuality ============================== #
"""
LagrangianDuality(;
method::LocalImprovementSearch.AbstractSearchMethod =
LocalImprovementSearch.BFGS(100),
)
Obtain dual variables in the backward pass using Lagrangian duality.
## Arguments
* `method`: the `LocalImprovementSearch` method for maximizing the Lagrangian
dual problem.
## Theory
Given the problem
```
min Cᵢ(x̄, u, w) + θᵢ
st (x̄, x′, u) in Xᵢ(w) ∩ S
x̄ - x == 0 [λ]
```
where `S ⊆ ℝ×ℤ`, we solve the problem `max L(λ)`, where:
```
L(λ) = min Cᵢ(x̄, u, w) + θᵢ - λ' h(x̄)
st (x̄, x′, u) in Xᵢ(w) ∩ S
```
and where `h(x̄) = x̄ - x`.
"""
mutable struct LagrangianDuality <: AbstractDualityHandler
method::LocalImprovementSearch.AbstractSearchMethod
function LagrangianDuality(;
method = LocalImprovementSearch.BFGS(100),
kwargs...,
)
if length(kwargs) > 0
@warn(
"Keyword arguments to LagrangianDuality have changed. " *
"See the documentation for details.",
)
end
return new(method)
end
end
function get_dual_solution(node::Node, lagrange::LagrangianDuality)
undo_relax = _relax_integrality(node)
optimize!(node.subproblem)
conic_obj, conic_dual = get_dual_solution(node, ContinuousConicDuality())
undo_relax()
s = JuMP.objective_sense(node.subproblem) == MOI.MIN_SENSE ? -1 : 1
N = length(node.states)
x_in_value = zeros(N)
λ_star, h_expr, h_k = zeros(N), Vector{AffExpr}(undef, N), zeros(N)
for (i, (key, state)) in enumerate(node.states)
x_in_value[i] = JuMP.fix_value(state.in)
h_expr[i] = @expression(node.subproblem, state.in - x_in_value[i])
JuMP.unfix(state.in)
λ_star[i] = conic_dual[key]
end
# Check that the conic dual is feasible for the subproblem. Sometimes it
# isn't if the LP dual solution is slightly infeasible due to numerical
# issues.
L_k = _solve_primal_problem(node.subproblem, λ_star, h_expr, h_k)
if L_k === nothing
return conic_obj, conic_dual
end
L_star, λ_star =
LocalImprovementSearch.minimize(lagrange.method, λ_star, conic_obj) do x
L_k = _solve_primal_problem(node.subproblem, x, h_expr, h_k)
return L_k === nothing ? nothing : (s * L_k, s * h_k)
end
for (i, (_, state)) in enumerate(node.states)
JuMP.fix(state.in, x_in_value[i]; force = true)
end
λ_solution = Dict{Symbol,Float64}(
name => λ_star[i] for (i, name) in enumerate(keys(node.states))
)
return s * L_star, λ_solution
end
function _solve_primal_problem(
model::JuMP.Model,
λ::Vector{Float64},
h_expr::Vector{GenericAffExpr{Float64,VariableRef}},
h_k::Vector{Float64},
)
primal_obj = JuMP.objective_function(model)
JuMP.set_objective_function(
model,
@expression(model, primal_obj - λ' * h_expr),
)
JuMP.optimize!(model)
if JuMP.termination_status(model) != MOI.OPTIMAL
JuMP.set_objective_function(model, primal_obj)
return nothing
end
h_k .= -JuMP.value.(h_expr)
L_λ = JuMP.objective_value(model)
JuMP.set_objective_function(model, primal_obj)
return L_λ
end
duality_log_key(::LagrangianDuality) = "L"
# ==================== StrengthenedConicDuality ==================== #
"""
StrengthenedConicDuality()
Obtain dual variables in the backward pass using strengthened conic duality.
## Theory
Given the problem
```
min Cᵢ(x̄, u, w) + θᵢ
st (x̄, x′, u) in Xᵢ(w) ∩ S
x̄ - x == 0 [λ]
```
we first obtain an estimate for `λ` using [`ContinuousConicDuality`](@ref).
Then, we evaluate the Lagrangian function:
```
L(λ) = min Cᵢ(x̄, u, w) + θᵢ - λ' (x̄ - x`)
st (x̄, x′, u) in Xᵢ(w) ∩ S
```
to obtain a better estimate of the intercept.
"""
mutable struct StrengthenedConicDuality <: AbstractDualityHandler end
function get_dual_solution(node::Node, ::StrengthenedConicDuality)
undo_relax = _relax_integrality(node)
optimize!(node.subproblem)
conic_obj, conic_dual = get_dual_solution(node, ContinuousConicDuality())
undo_relax()
if !node.has_integrality
return conic_obj, conic_dual # If we're linear, return this!
end
num_states = length(node.states)
λ_k, h_k, x = zeros(num_states), zeros(num_states), zeros(num_states)
h_expr = Vector{AffExpr}(undef, num_states)
for (i, (key, state)) in enumerate(node.states)
x[i] = JuMP.fix_value(state.in)
h_expr[i] = @expression(node.subproblem, state.in - x[i])
JuMP.unfix(state.in)
λ_k[i] = conic_dual[key]
end
lagrangian_obj = _solve_primal_problem(node.subproblem, λ_k, h_expr, h_k)
for (i, (_, state)) in enumerate(node.states)
JuMP.fix(state.in, x[i]; force = true)
end
# If lagrangian_obj is `nothing`, then the primal problem didn't solve
# correctly, probably because it was unbounded (i.e., the dual was
# infeasible.) But we got the dual from solving the LP relaxation so it must
# be feasible! Sometimes however, the dual from the LP solver might be
# numerically infeasible when solved in the primal. That's a shame :(
# If so, return the conic_obj instead.
return something(lagrangian_obj, conic_obj), conic_dual
end
duality_log_key(::StrengthenedConicDuality) = "S"
# ============================== BanditDuality =============================== #
mutable struct _BanditArm{T}
handler::T
rewards::Vector{Float64}
end
"""
BanditDuality()
Formulates the problem of choosing a duality handler as a multi-armed bandit
problem. The arms to choose between are:
* [`ContinuousConicDuality`](@ref)
* [`StrengthenedConicDuality`](@ref)
* [`LagrangianDuality`](@ref)
Our problem isn't a typical multi-armed bandit for a two reasons:
1. The reward distribution is non-stationary (each arm converges to 0 as it
keeps getting pulled.
2. The distribution of rewards is dependent on the history of the arms that
were chosen.
We choose a very simple heuristic: pick the arm with the best mean + 1 standard
deviation. That should ensure we consistently pick the arm with the best
likelihood of improving the value function.
In future, we should consider discounting the rewards of earlier iterations, and
focus more on the more-recent rewards.
"""
mutable struct BanditDuality <: AbstractDualityHandler
arms::Vector{_BanditArm}
last_arm_index::Int
logs_seen::Int
function BanditDuality(args::AbstractDualityHandler...)
return new(_BanditArm[_BanditArm(arg, Float64[]) for arg in args], 1, 1)
end
end
function Base.show(io::IO, handler::BanditDuality)
print(io, "BanditDuality with arms:")
for arm in handler.arms
print(io, "\n * ", arm.handler)
end
return
end
function BanditDuality()
return BanditDuality(ContinuousConicDuality(), StrengthenedConicDuality())
end
function _choose_best_arm(handler::BanditDuality)
_, index = findmax(
map(handler.arms) do arm
return Statistics.mean(arm.rewards) + Statistics.std(arm.rewards)
end,
)
handler.last_arm_index = index
return handler.arms[index]
end
function _update_rewards(handler::BanditDuality, log::Vector{Log})
# The bound is monotonic, so instead of worring about whether we are
# maximizing or minimizing, let's just compute:
# |bound_t - bound_{t-1}|
# reward = -----------------------
# time_t - time_{t-1}
t, t′ = log[end], log[end-1]
reward = abs(t.bound - t′.bound) / (t.time - t′.time)
# This check is needed because we should probably keep using the first
# handler until we start to improve the bound. This can take quite a few
# iterations in some models. (Until we start to improve, the reward will be
# zero, so we'd never revisit it.
const_bound = isapprox(log[1].bound, log[end].bound; atol = 1e-6)
# To start with, we should add the reward to all arms to construct a prior
# distribution for the arms. The 10 is somewhat arbitrary.
if length(log) < 10 || const_bound
for arm in handler.arms
push!(arm.rewards, reward)
end
else
push!(handler.arms[handler.last_arm_index].rewards, reward)
end
return
end
function prepare_backward_pass(
node::Node,
handler::BanditDuality,
options::Options,
)
if length(options.log) > handler.logs_seen
_update_rewards(handler, options.log)
handler.logs_seen = length(options.log)
end
arm = _choose_best_arm(handler)
return prepare_backward_pass(node, arm.handler, options)
end
function get_dual_solution(node::Node, handler::BanditDuality)
return get_dual_solution(node, handler.arms[handler.last_arm_index].handler)
end
function duality_log_key(handler::BanditDuality)
return duality_log_key(handler.arms[handler.last_arm_index].handler)
end