-
Notifications
You must be signed in to change notification settings - Fork 10
/
rths.jl
625 lines (576 loc) · 24.8 KB
/
rths.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
export RealTimeHeuristicSearch, RTHS
export AlternatingRealTimeHeuristicSearch, AlternatingRTHS, ARTHS
"""
RealTimeHeuristicSearch(;
heuristic::Heuristic = GoalCountHeuristic(),
n_iters::Int = 1,
max_nodes::Int = 50,
update_method::Symbol = :dijkstra,
search_neighbors::Symbol = :unexpanded,
save_search::Bool = true,
reuse_search::Bool = false,
reuse_paths::Bool = true,
kwargs...
)
RealTimeHeuristicSearch(
planner::ForwardPlanner,
n_iters::Int = 1,
update_method::Symbol = :dijkstra,
search_neighbors::Symbol = :unexpanded,
reuse_search::Bool = true,
reuse_paths::Bool = true,
)
A real time heuristic search (`RTHS`) algorithm [1] similar to `RTDP`. Instead
of greedy rollouts, forward heuristic search is performed from the current state
(up to `max_nodes`), and value estimates are updated for all states in the
interior of the search tree. Search may also be performed from neighboring
states by configuring `search_neighbors`. A simulated action is then taken to
the best neighboring state. This process repeats for `n_iters`, with future
searches using the updated value function as a more informed heuristic.
Returns a [`ReusableTreePolicy`](@ref), containing a [`TabularVPolicy`](@ref)
of state value estimates, the most recent [`PathSearchSolution`](@ref) produced
by forward search (including a search tree if `save_search` is `true`), and a
reusable tree of goal paths if `reuse_paths` is `true`.
# Arguments
- `planner`: The [`ForwardPlanner`](@ref) used for lookahead heuristic search.
Any keyword arguments accepted by [`ForwardPlanner`](@ref) are also keyword
arguments for `RTHS` (e.g. `heuristic`, `max_nodes`, `save_search`, etc.).
- `n_iters`: Number of iterations to perform. In each iteration, search is
followed by a simulated action to the best neighboring state. If a goal or
dead end is reached, we restart from the initial state.
- `update_method`: Method used to update value estimates, either via
cost differencing from the terminal node (`:costdiff`), or via Dijkstra's
algorithm (`:dijkstra`).
- `search_neighbors`: Controls whether search is additionally performed from all
neighbors of the current state (`:all`), from neighbors unexpanded by the
initial search (`:unexpanded`), or none of them (`:none`).
- `reuse_search`: If `true`, then previous search solutions are reused by
subsequent searches in future iterations or calls to [`refine!`](@ref).
The latter requires `save_search` to be `true`. Search solutions are reused
via the `:reroot` refinement method for [`ForwardPlanner`](@ref).
- `reuse_paths`: If `true`, then every time a path to the goal is found, it is
stored in a reusable tree of goal paths, as in Tree Adaptive A* [2]. Future
searches will terminate once a previous path is encountered, reducing the cost
of search. A consistent `heuristic` is required to ensure path optimality.
# Update Methods
Setting the `update_method` keyword argument controls how value estimates
``V(s)`` (or equivalently, cost-to-goal estimates ``h(s) = -V(s)``) are updated:
- `:costdiff`: Value estimates are updated by cost differencing from
the terminal node ``t``. For each node ``s`` in the search tree's interior, we
estimate the cost-to-goal ``h(s)`` by adding a lower bound on the cost from
``s`` to ``t`` to the cost-to-goal of ``t``:
```math
h(s) = h(t) + (c(r, t) - c(r, s))
```
where ``c(r, s)`` is the cost from the root node ``r`` to ``s``. This is
the update used by Real-Time Adaptive A* [3]. Takes ``O(N)`` time, where ``N``
is the size of the tree's interior.
- `:dijkstra` : Value estimates are backpropagated from the search frontier via
Dijkstra's algorithm. For each node ``s`` in tree's interior, we update the
cost-to-goal ``h(s)`` by minimizing over all paths to the frontier:
```math
h(s) = \\min_{t \\in F} h(t) + c(s, t)
```
This is the update rule by LSS-LRTA* [4], which produces more informed
value estimates than `:costdiff`, but takes ``O(NB \\log NB)`` time. ``N`` is
the size of the tree's interior and ``B`` is the maximal branching factor.
The `save_parents` keyword for [`ForwardPlanner`](@ref) defaults to `true`
when this method is used.
Both of these update methods require a `heuristic` that is initially consistent
for the updated value estimates to remain consistent.
[1] R. E. Korf, "Real-Time Heuristic Search," Artificial Intelligence, vol. 42,
no. 2, pp. 189–211, Mar. 1990, <https://doi.org/10.1016/0004-3702(90)90054-4>.
[2] C. Hernández, X. Sun, S. Koenig, and P. Meseguer, "Tree Adaptive A*,"
AAMAS (2011), pp. 123–130. <https://dl.acm.org/doi/abs/10.5555/2030470.2030488>.
[3] S. Koenig and M. Likhachev, "Real-Time Adaptive A*," AAMAS (2006),
pp. 281–288. <https://doi.org/10.1145/1160633.1160682>.
[4] S. Koenig and X. Sun, "Comparing real-time and incremental heuristic
search for real-time situated agents", AAMAS (2009), pp. 313–341.
<https://dl.acm.org/doi/10.5555/1018410.1018838>.
"""
@auto_hash_equals mutable struct RealTimeHeuristicSearch{
P <: ForwardPlanner
} <: Planner
planner::P
n_iters::Int
update_method::Symbol
search_neighbors::Symbol
reuse_search::Bool
reuse_paths::Bool
callback::Union{Nothing, Function}
end
const RTHS = RealTimeHeuristicSearch
@doc (@doc RealTimeHeuristicSearch) RTHS
function RealTimeHeuristicSearch(
planner::ForwardPlanner;
n_iters::Int = 1,
update_method::Symbol = :dijkstra,
search_neighbors::Symbol = :unexpanded,
reuse_search::Bool = false,
reuse_paths::Bool = true,
verbose::Bool = false,
callback = verbose ? LoggerCallback() : nothing,
)
planner = copy(planner)
# Ensure that the search solution can be re-rooted
if reuse_search == true
planner.refine_method = :reroot
planner.save_parents = true
planner.save_children = true
planner.save_search = true
else
planner.refine_method = :restart
end
# Ensure all search node parents are saved for Dijkstra updating
if update_method == :dijkstra
planner.save_parents = true
end
return RealTimeHeuristicSearch(
planner, n_iters, update_method, search_neighbors,
reuse_search, reuse_paths, callback
)
end
function RealTimeHeuristicSearch(;
n_iters::Int = 1,
max_nodes::Int = 50,
update_method::Symbol = :dijkstra,
search_neighbors::Symbol = :unexpanded,
save_search::Bool = true,
reuse_search::Bool = false,
reuse_paths::Bool = true,
verbose::Bool = false,
callback = verbose ? LoggerCallback() : nothing,
kwargs...
)
planner = ForwardPlanner(;
max_nodes = max_nodes, save_search = save_search, kwargs...
)
# Ensure that the search solution can be re-rooted
if reuse_search == true
planner.refine_method = :reroot
planner.save_parents = true
planner.save_children = true
planner.save_search = true
else
planner.refine_method = :restart
end
# Ensure all search node parents are saved for Dijkstra updating
if update_method == :dijkstra
planner.save_parents = true
end
return RealTimeHeuristicSearch(
planner, n_iters, update_method, search_neighbors,
reuse_search, reuse_paths, callback
)
end
function RealTimeHeuristicSearch(heuristic::Heuristic; kwargs...)
return RealTimeHeuristicSearch(; heuristic = heuristic, kwargs...)
end
function Base.getproperty(planner::P, name::Symbol) where {P <: RTHS}
hasfield(P, name) ?
getfield(planner, name) : getproperty(planner.planner, name)
end
function Base.setproperty!(planner::P, name::Symbol, val) where {P <: RTHS}
hasfield(P, name) ?
setfield!(planner, name, val) : setproperty!(planner.planner, name, val)
end
function Base.hasproperty(planner::P, name::Symbol) where {P <: RTHS}
hasfield(P, name) || hasproperty(planner.planner, name)
end
function Base.copy(p::RealTimeHeuristicSearch)
return RealTimeHeuristicSearch(
copy(p.planner), p.n_iters, p.update_method, p.search_neighbors,
p.reuse_search, p.reuse_paths, p.callback
)
end
function solve(planner::RealTimeHeuristicSearch,
domain::Domain, state::State, spec::Specification)
# Simplify goal specification
spec = simplify_goal(spec, domain, state)
# Precompute heuristic information
heuristic = precomputed(planner.heuristic, domain, state, spec)
# Initialize then refine solution
default = HeuristicVPolicy(heuristic, domain, spec)
value_policy = TabularVPolicy(domain, spec, default)
search_sol = init_sol(planner.planner, heuristic, domain, state, spec)
sol = ReusableTreePolicy{typeof(state)}(value_policy, search_sol)
sol = solve!(sol, planner, domain, state, spec)
return sol
end
function solve!(sol::ReusableTreePolicy, planner::RealTimeHeuristicSearch,
domain::Domain, state::State, spec::Specification)
@unpack n_iters, heuristic, search_neighbors = planner
@unpack save_search, reuse_search, reuse_paths, callback = planner
planner = copy(planner)
# Use previously computed policy values to guide search
planner.heuristic = PruningHeuristic(PolicyValueHeuristic(sol), heuristic)
# Ensure that search planner returns a search tree in its solution
planner.save_search = true
search_planner = planner.planner
# Run callback if provided
if !isnothing(callback)
callback(planner, sol, state, state, 0, nothing, -Inf, nothing)
end
# Iteratively perform heuristic search followed by simulated execution
init_state = state
act = PDDL.no_op
for i in 1:n_iters
# Restart if goal is reached
if is_goal(spec, domain, state, act)
act = PDDL.no_op
state = init_state
end
# Reprioritize nodes on the search frontier based on updated values
if reuse_search && is_expanded(state, sol.search_sol)
reorder_queue!(sol.search_sol, planner, sol)
end
# Run search from current state and update values
if reuse_paths
tree_spec = ReusableTreeGoal(spec, sol.goal_tree)
refine!(sol.search_sol, search_planner, domain, state, tree_spec)
else
refine!(sol.search_sol, search_planner, domain, state, spec)
end
update_values!(sol, planner, domain, spec, sol.search_sol)
# Run search from neighboring states
if search_neighbors != :none && reuse_paths
tree_spec = ReusableTreeGoal(spec, sol.goal_tree)
search_neighbors!(sol, planner, domain, state, spec, tree_spec, i)
elseif search_neighbors != :none
search_neighbors!(sol, planner, domain, state, spec, spec, i)
end
# Run callback if provided
if !isnothing(callback)
callback(planner, sol, init_state, state, i, nothing,
get_value(sol, state), best_action(sol, state))
end
# Follow policy one step forward if possible
act = best_action(sol, state)
act = ismissing(act) ? PDDL.no_op : act
state = ismissing(act) ? init_state : transition(domain, state, act)
end
# Empty search tree and queue if `save_search` was originally false
if !save_search
empty!(sol.search_sol.search_tree)
empty!(sol.search_sol.search_frontier)
empty!(sol.search_sol.search_order)
sol.search_sol.expanded = -1
end
return sol
end
function search_neighbors!(
sol::ReusableTreePolicy, planner::RealTimeHeuristicSearch,
domain::Domain, state::State, spec::Specification,
tree_spec::Specification = spec, i_iter::Int = 1
)
@unpack reuse_search, search_neighbors, callback = planner
# Avoid resetting node count in neighbor refinement steps
search_planner = copy(planner.planner)
search_planner.reset_node_count = false
# Run search from neighboring states
for act in available(domain, state)
next_state = transition(domain, state, act)
get_value(sol, next_state) == -Inf && continue
next_expanded = is_expanded(next_state, sol.search_sol)
search_sol = nothing
if is_goal(spec, domain, next_state, act)
# Goal reached, set value of neighboring state to 0
!has_action_goal(spec) && set_value!(sol, next_state, 0.0)
elseif search_neighbors == :all && reuse_search && next_expanded
# Refine search starting from neighboring state
search_sol = copy(sol.search_sol)
reorder_queue!(search_sol, planner, sol)
refine!(search_sol, search_planner, domain, next_state, tree_spec)
update_values!(sol, planner, domain, spec, search_sol)
elseif search_neighbors == :all
# Run new search from neighboring state
search_sol = solve(search_planner, domain, next_state, tree_spec)
update_values!(sol, planner, domain, spec, search_sol)
elseif search_neighbors == :unexpanded && !next_expanded
# Run new search from unexpanded neighboring state
search_sol = solve(search_planner, domain, next_state, tree_spec)
update_values!(sol, planner, domain, spec, search_sol)
end
# Run callback if search was performed
if !isnothing(callback) && !isnothing(search_sol)
callback(planner, sol, nothing, state, i_iter,
act, get_value(sol, state, act), nothing)
end
end
# Set value of current state to best of updated neighboring values
best_q = -Inf
for act in available(domain, state)
best_q = max(best_q, get_value(sol, state, act))
end
set_value!(sol, state, best_q)
return sol
end
function update_values!(
policy::ReusableTreePolicy, planner::RealTimeHeuristicSearch,
domain::Domain, spec::Specification, search_sol::PathSearchSolution
)
if search_sol.status == :failure
# Set all values to -Inf since none can reach the goal
for (node_id, _) in search_sol.search_tree
set_value!(policy, node_id, -Inf)
end
elseif planner.update_method == :costdiff
# Update values via cost differencing from the terminal node
update_values_costdiff!(policy, planner, domain, spec, search_sol)
elseif planner.update_method == :dijkstra
# Update values via Dijkstra's algorithm from search frontier
update_values_dijkstra!(policy, planner, domain, spec, search_sol)
end
# Insert path to the terminal node into reusable tree
if planner.reuse_paths && search_sol.status == :success
node_id, (_, h_val, _) = peek(search_sol.search_frontier)
insert_path!(policy, search_sol.search_tree, node_id, h_val)
end
return policy
end
"Cost-differencing update used by RTAA* (Koenig & Likhachev, 2006)."
function update_values_costdiff!(
policy::ReusableTreePolicy, planner::RealTimeHeuristicSearch,
domain::Domain, spec::Specification, search_sol::PathSearchSolution
)
@unpack trajectory, search_tree, search_frontier = search_sol
# Get value of terminal node from search frontier
terminal_id, (_, terminal_h_val, _) = peek(search_frontier)
# Recompute f-value in case g_mult != 1.0
terminal_path_cost = search_tree[terminal_id].path_cost
terminal_f_val = terminal_path_cost + terminal_h_val
# Set value of terminal node
if !has_action_goal(spec) || search_sol.status != :success
set_value!(policy, terminal_id, -terminal_h_val)
end
# Update values of all closed nodes in search tree
for (node_id, node) in search_tree
haskey(search_frontier, node_id) && continue
h_val = terminal_f_val - node.path_cost
set_value!(policy, node_id, -h_val)
end
return policy
end
"Dijkstra update used by LSS-LRTA* (Koenig, 2004; Koenig & Sun, 2009)."
function update_values_dijkstra!(
policy::ReusableTreePolicy, planner::RealTimeHeuristicSearch,
domain::Domain, spec::Specification, search_sol::PathSearchSolution
)
@unpack trajectory, search_tree, search_frontier = search_sol
# Construct priority queue of nodes ordered by h-values
h_val_iter = Iterators.map(keys(search_tree)) do node_id
if haskey(search_frontier, node_id)
h_val = search_frontier[node_id][2]
else # Set h-value to Inf for all interior nodes
h_val = Inf32
set_value!(policy, node_id, -h_val)
end
return (node_id => h_val)::Pair{UInt,Float32}
end
queue = PriorityQueue{UInt,Float32}(h_val_iter)
# Update values of all nodes in the search tree's interior
while !isempty(queue)
node_id, h_val = dequeue_pair!(queue)
node = search_tree[node_id]
# Iterate over parents
parent_ref = node.parent
while !isnothing(parent_ref)
parent_id = parent_ref.id
parent_act = parent_ref.action
parent_ref = parent_ref.next
# Skip self-edges / edges without actions
isnothing(parent_act) && continue
parent_id == node_id && continue
# Skip parents that were deleted by rerooting etc.
parent = get(search_tree, parent_id, nothing)
isnothing(parent) && continue
# Skip if parent node already has a lower h-value
parent_h_val = haskey(queue, parent_id) ?
queue[parent_id] : Float32(-get_value(policy, parent_id, -Inf32))
parent_h_val > h_val || continue
# Skip if parent node's h-value can't be decreased
act_cost = get_cost(spec, domain, parent.state,
parent_act, node.state)
parent_h_val > h_val + act_cost || continue
parent_h_val = (h_val + act_cost) |> Float32
# Update parent's h-value and insert into priority queue
set_value!(policy, parent_id, -parent_h_val)
queue[parent_id] = parent_h_val
end
end
return policy
end
function reorder_queue!(
search_sol::PathSearchSolution, planner::RealTimeHeuristicSearch,
policy::ReusableTreePolicy,
)
@unpack h_mult = planner
queue = search_sol.search_frontier
for (node_id, priority) in queue
f_val, h_val, n = priority
!has_cached_value(policy, node_id) && continue
new_h_val::Float32 = -get_value(policy, node_id)
new_h_val == h_val && continue
new_f_val::Float32 = f_val + h_mult * (new_h_val - h_val)
new_priority = (new_f_val, new_h_val, n)
queue[node_id] = new_priority
end
return search_sol
end
function refine!(sol::PolicySolution, planner::RealTimeHeuristicSearch,
domain::Domain, state::State, spec::Specification)
return solve!(sol, planner, domain, state, spec)
end
function (cb::LoggerCallback)(
planner::RealTimeHeuristicSearch,
sol::PolicySolution, init_state::Union{Nothing, State}, cur_state::State,
n::Int, act, cur_v, best_act
)
if n == 0 && get(cb.options, :log_header, true)
@logmsg cb.loglevel "Running RTHS..."
n_iters, max_nodes = planner.n_iters, planner.max_nodes
@logmsg cb.loglevel "n_iters = $n_iters, max_nodes = $max_nodes"
return nothing
end
log_period = get(cb.options, :log_period, 1)
if n > 0 && n % log_period == 0 && !isnothing(act) && !ismissing(act)
act_str = write_pddl(act)
@logmsg cb.loglevel "Iteration $n $act_str: value = $cur_v"
end
if n > 0 && n % log_period == 0 && !isnothing(best_act)
init_v = get_value(sol, init_state)
best_act_str = ismissing(best_act) ? "missing" : write_pddl(best_act)
@logmsg(cb.loglevel,
"Iteration $n: best action = $best_act_str, " *
"value = $cur_v, initial state value = $init_v")
end
return nothing
end
"""
AlternatingRealTimeHeuristicSearch(
planners::RealTimeHeuristicSearch...;
share_values::Bool = true,
share_search::Bool = false,
share_paths::Bool = true,
)
A variant of [`RealTimeHeuristicSearch`](@ref) (`AlternatingRTHS` or `ARTHS`
for short) that alternates between different search strategies, corresponding
to one or more `planners`. For example, `ARTHS` can alternate between a
`RTHS` planner that uses A* search (`h_mult = 1.0`) and an `RTHS` planner that
uses breadth-first search (`h_mult = 0.0`), ensuring that the updated region
of the state space is both deep and broad.
Returns a [`MultiSolution`](@ref) composed of [`ReusableTreePolicy`](@ref)
sub-solutions. By default, these sub-solutions share and update the same value
estimates (`share_values = true`) and the same reusable tree of optimal paths
to the goal (`share_paths = true`), but do not share the same reusable forward
search tree (`share_search = false`).
"""
@auto_hash_equals mutable struct AlternatingRealTimeHeuristicSearch{
Ps <: Tuple
} <: Planner
planners::Ps
share_values::Bool
share_search::Bool
share_paths::Bool
end
const AlternatingRTHS = AlternatingRealTimeHeuristicSearch
const ARTHS = AlternatingRealTimeHeuristicSearch
@doc (@doc AlternatingRealTimeHeuristicSearch) AlternatingRTHS
@doc (@doc AlternatingRealTimeHeuristicSearch) ARTHS
function AlternatingRealTimeHeuristicSearch(
planners::P...;
share_values::Bool = true,
share_search::Bool = false,
share_paths::Bool = true,
) where {P <: RealTimeHeuristicSearch}
@assert !isempty(planners) "Please provide one or more `RTHS` planners."
return AlternatingRealTimeHeuristicSearch(
planners, share_values, share_search, share_paths
)
end
function Base.getproperty(planner::P, name::Symbol) where {P <: ARTHS}
if hasfield(P, name)
return getfield(planner, name)
else
return map(p -> getproperty(p, name), planner.planners)
end
end
function Base.setproperty!(planner::P, name::Symbol, val) where {P <: ARTHS}
if hasfield(P, name)
return setfield!(planner, name, val)
elseif val isa Tuple
return map(zip(planner.planners, val)) do (p, v)
setproperty!(p, name, v)
end
else
return map(p -> setproperty!(p, name, val), planner.planners)
end
end
function Base.hasproperty(planner::P, name::Symbol) where {P <: ARTHS}
hasfield(P, name) || any(hasproperty(p, name) for p in planner.planners)
end
function Base.copy(p::AlternatingRealTimeHeuristicSearch)
return AlternatingRealTimeHeuristicSearch(
map(copy, p.planners), p.share_values, p.share_search, p.share_paths
)
end
function solve(planner::AlternatingRealTimeHeuristicSearch,
domain::Domain, state::State, spec::Specification)
# Simplify goal specification
spec = simplify_goal(spec, domain, state)
# Construct (possibly) shared solution components
S = typeof(state)
subplanner = first(planner.planners)
shared_heuristic = precomputed(subplanner.heuristic, domain, state, spec)
shared_default = HeuristicVPolicy(shared_heuristic, domain, spec)
shared_value_policy = TabularVPolicy(domain, spec, shared_default)
shared_search_sol = init_sol(subplanner.planner, shared_heuristic,
domain, state, spec)
shared_goal_tree = Dict{UInt, PathNode{S}}()
# Construct sub-solutions for each component planner
subsols = map(planner.planners) do subplanner
# Precompute heuristic information
if shared_heuristic.heuristic === subplanner.heuristic
heuristic = shared_heuristic
else
heuristic = precomputed(subplanner.heuristic, domain, state, spec)
end
# Initialize value table
if planner.share_values
value_policy = shared_value_policy
else
default = HeuristicVPolicy(heuristic, domain, spec)
value_policy = TabularVPolicy(domain, spec, default)
end
# Initialize search solution
if planner.share_search
search_sol = shared_search_sol
else
search_sol = init_sol(subplanner.planner, heuristic,
domain, state, spec)
end
# Initialize goal tree
if planner.share_paths
goal_tree = shared_goal_tree
else
goal_tree = Dict{UInt, PathNode{S}}()
end
# Return sub-solution
return ReusableTreePolicy(value_policy, search_sol, goal_tree)
end
sol = MultiSolution(subsols)
return solve!(sol, planner, domain, state, spec)
end
function solve!(sol::MultiSolution, planner::AlternatingRealTimeHeuristicSearch,
domain::Domain, state::State, spec::Specification)
@assert length(sol.solutions) == length(planner.planners)
for (subsol, subplanner) in zip(sol.solutions, planner.planners)
@assert subsol isa ReusableTreePolicy
solve!(subsol, subplanner, domain, state, spec)
end
return sol
end
function refine!(sol::MultiSolution, planner::AlternatingRealTimeHeuristicSearch,
domain::Domain, state::State, spec::Specification)
return solve!(sol, planner, domain, state, spec)
end