Skip to content

Commit

Permalink
Allow disconnecting Fly runtime during initialization (#2776)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonatanklosko authored Sep 9, 2024
1 parent 85ae48e commit 2eb5963
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 22 deletions.
16 changes: 16 additions & 0 deletions lib/livebook/fly_api.ex
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,22 @@ defmodule Livebook.FlyAPI do
%{id: machine["id"], private_ip: machine["private_ip"]}
end

@doc """
Deletes the given machine.
"""
@spec delete_machine(String.t(), String.t(), String.t()) :: :ok | {:error, error}
def delete_machine(token, app_name, machine_id) do
params = %{force: true}

with {:ok, _data} <-
flaps_request(token, "/v1/apps/#{app_name}/machines/#{machine_id}",
method: :delete,
params: params
) do
:ok
end
end

@doc """
Waits for the machine to start.
"""
Expand Down
11 changes: 9 additions & 2 deletions lib/livebook/runtime.ex
Original file line number Diff line number Diff line change
Expand Up @@ -806,13 +806,20 @@ defprotocol Livebook.Runtime do
The `runtime` should be the struct updated with all information
necessary for further communication.
In case the initialization is a particularly involved process, the
process may send updates to the caller:
In case the initialization is a particularly involved, the process
may send updates to the caller:
* `{:runtime_connect_info, pid, info}`
Where `info` is a few word text describing the current initialization
step.
If the caller decides to abort the initialization, they can forecefully
kill the process. The runtime resources should already be tolerant
to abrupt Livebook termination and autodestroy through monitoring
and timeouts. However, when the initialization process gets killed,
it may be desirable to eagerly remove the resources it has already
allocated, which can be achieved with an additional watcher process.
"""
@spec connect(t()) :: pid()
def connect(runtime)
Expand Down
34 changes: 34 additions & 0 deletions lib/livebook/runtime/fly.ex
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,14 @@ defmodule Livebook.Runtime.Fly do
|> :erlang.term_to_binary()
|> Base.encode64()

parent = self()

{:ok, watcher_pid} =
DynamicSupervisor.start_child(
Livebook.RuntimeSupervisor,
{Task, fn -> watcher(parent, config) end}
)

with :ok <-
(if config.volume_id && runtime.previous_machine_id do
with_log(caller, "await resources", fn ->
Expand All @@ -128,6 +136,7 @@ defmodule Livebook.Runtime.Fly do
with_log(caller, "create machine", fn ->
create_machine(config, runtime_data)
end),
_ <- send(watcher_pid, {:machine_created, machine_id}),
child_node <- :"#{node_base}@#{machine_id}.vm.#{config.app_name}.internal",
{:ok, proxy_port} <-
with_log(caller, "start proxy", fn ->
Expand All @@ -151,6 +160,8 @@ defmodule Livebook.Runtime.Fly do

send(primary_pid, :node_initialized)

send(watcher_pid, :done)

runtime = %{runtime | node: child_node, server_pid: server_pid, machine_id: machine_id}
send(caller, {:runtime_connect_done, self(), {:ok, runtime}})

Expand All @@ -172,6 +183,29 @@ defmodule Livebook.Runtime.Fly do
{:noreply, state}
end

defp watcher(parent, config) do
ref = Process.monitor(parent)
watcher_loop(%{ref: ref, config: config, machine_id: nil})
end

defp watcher_loop(state) do
receive do
{:DOWN, ref, :process, _pid, _reason} when ref == state.ref ->
# If the parent process is killed, we try to eagerly free the
# created resources
if machine_id = state.machine_id do
config = state.config
_ = Livebook.FlyAPI.delete_machine(config.token, config.app_name, machine_id)
end

{:machine_created, machine_id} ->
watcher_loop(%{state | machine_id: machine_id})

:done ->
:ok
end
end

defp create_machine(config, runtime_data) do
base_image = Enum.find(Livebook.Config.docker_images(), &(&1.tag == config.docker_tag))
image = "ghcr.io/livebook-dev/livebook:#{base_image.tag}"
Expand Down
12 changes: 9 additions & 3 deletions lib/livebook/session.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2424,9 +2424,15 @@ defmodule Livebook.Session do
end

defp handle_action(state, {:disconnect_runtime, runtime}) do
Runtime.disconnect(runtime)
state = %{state | runtime_monitor_ref: nil}
after_runtime_disconnected(state)
if state.runtime_connect do
Process.demonitor(state.runtime_connect.ref, [:flush])
Process.exit(state.runtime_connect.pid, :kill)
%{state | runtime_connect: nil}
else
Runtime.disconnect(runtime)
state = %{state | runtime_monitor_ref: nil}
after_runtime_disconnected(state)
end
end

defp handle_action(state, {:start_evaluation, cell, section, evaluation_opts}) do
Expand Down
2 changes: 1 addition & 1 deletion lib/livebook/session/data.ex
Original file line number Diff line number Diff line change
Expand Up @@ -921,7 +921,7 @@ defmodule Livebook.Session.Data do
end

def apply_operation(data, {:disconnect_runtime, _client_id}) do
with :connected <- data.runtime_status do
with true <- data.runtime_status in [:connecting, :connected] do
data
|> with_actions()
|> disconnect_runtime()
Expand Down
36 changes: 26 additions & 10 deletions lib/livebook_web/live/session_live/fly_runtime_component.ex
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,27 @@ defmodule LivebookWeb.SessionLive.FlyRuntimeComponent do
/>
<div class="mt-8">
<.button
phx-click="init"
phx-target={@myself}
disabled={
@runtime_status == :connecting or not @specs_changeset.valid? or
volume_errors(@volume_id, @volumes, @region) != []
}
>
<%= label(@app_name, @runtime, @runtime_status) %>
</.button>
<div class="flex gap-2">
<.button
phx-click="init"
phx-target={@myself}
disabled={
@runtime_status == :connecting or not @specs_changeset.valid? or
volume_errors(@volume_id, @volumes, @region) != []
}
>
<%= label(@app_name, @runtime, @runtime_status) %>
</.button>
<.button
:if={@runtime_status == :connecting}
color="red"
outlined
phx-click="disconnect"
phx-target={@myself}
>
Disconnect
</.button>
</div>
<div
:if={reconnecting?(@app_name, @runtime) && @runtime_connect_info}
class="mt-4 scroll-mb-8"
Expand Down Expand Up @@ -582,6 +593,11 @@ defmodule LivebookWeb.SessionLive.FlyRuntimeComponent do
{:noreply, socket}
end

def handle_event("disconnect", %{}, socket) do
Session.disconnect_runtime(socket.assigns.session.pid)
{:noreply, socket}
end

def handle_event("open_save_config", %{}, socket) do
changeset = config_secret_changeset(socket, %{name: @config_secret_prefix})
save_config = %{changeset: changeset, inflight: false, error: false}
Expand Down
2 changes: 1 addition & 1 deletion lib/livebook_web/live/session_live/render.ex
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ defmodule LivebookWeb.SessionLive.Render do
</.button>
<.button
:if={@data_view.runtime_status == :connected}
:if={@data_view.runtime_status in [:connected, :connecting]}
color="red"
outlined
type="button"
Expand Down
7 changes: 2 additions & 5 deletions test/livebook/session/data_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -3940,11 +3940,8 @@ defmodule Livebook.Session.DataTest do
end

describe "apply_operation/2 given :disconnect_runtime" do
test "returns an error if the runtime is not connected" do
data =
data_after_operations!([
{:connect_runtime, @cid}
])
test "returns an error if the runtime is disconnected" do
data = Data.new()

operation = {:disconnect_runtime, @cid}

Expand Down

0 comments on commit 2eb5963

Please sign in to comment.