Skip to content

Commit

Permalink
Revert resize: wait for events according to hybrid plug
Browse files Browse the repository at this point in the history
Since 4817165, when reverting a
resized instance back to the source host, the libvirt driver waits for
vif-plugged events when spawning the instance. When called from
finish_revert_resize() in the source compute manager, libvirt's
finish_revert_migration() does not pass vifs_already_plugged to
_create_domain_and_network(), making the latter use the default False
value.

When the source compute manager calls
network_api.migrate_instance_finish() in finish_revert_resize(), this
updates the port binding back to the source host. If Neutron is
configured to use OVS hybrid plug, it will send the vif-plugged event
immediately after completing this request. This happens before the
virt driver's finish_revert_migration() method is called. This causes
the wait in the libvirt driver to time out because the event is
received before Nova starts waiting for it.

The neutron ovs l2 agent sends vif-plugged events when two conditions
are met. First the port must be bound to the host managed by the
l2 agent and second, the agent must have completed configuring the
port on ovs. This involves assigning the port a local VLAN for tenant
isolation, applying security group rules if required and applying
QoS policies or other agent extensions like service function chaining.

During the boot process, we bind the port first to the host
then plug the interface into ovs which triggers the l2 agent to
configure it resulting in the emission of the vif-plugged event.
In the revert case, as noted above, since the vif is already plugged
on the source node when hybrid-plug is used, binding the port to the
source node fulfils the second condition to send the vif-plugged event.

Events sent immediately after port binding update are hereafter known
as "bind-time" events. For ports that do not use OVS hybrid plug,
Neutron will continue to send vif-plugged events only when Nova
actually plugs the VIF. These types of events are hereafter known as
"plug-time" events. OVS hybrid plug is a per agent setting, so for
a particular host, bind-time events are an all-or-nothing thing for the
ovs backend: either all VIF_TYPE=ovs ports have them, or no ovs ports
have them. In general, a host will only have one network backend.
The only exception to this is SR-IOV. SR-IOV is commonly deployed on
the same host as other network backends such as OVS or linuxbridge.
SR-IOV ports with VNIC_TYPE=direct-physical will always have only
bind-time events. If an instance mixes OVS ports with hybrid-plug=False
with direct physical ports, it will have both kinds of events.

For same host resize reverts we do not update the binding host as the
host does not change, as such for same host resize we do not receive
bind time events. For same host revert we therefore do not wait for
bind time events in the compute manager.

This patch adds functions to the NetworkInfo model that return what
kinds of events each VIF has. These are then used in the migration
revert logic to decide when to wait for external events: in the
compute manager, when binding the port, for bind-time events,
and/or in libvirt, when plugging the VIFs, for plug-time events.

Closes-bug: #1832028
Closes-Bug: #1833902
Co-Authored-By: Sean Mooney <[email protected]>
Change-Id: I51673e58fc8d5f051df911630f6d7a928d123a5b
  • Loading branch information
notartom and SeanMooney committed Jul 10, 2019
1 parent 2722cab commit 7a7a223
Show file tree
Hide file tree
Showing 9 changed files with 351 additions and 44 deletions.
62 changes: 48 additions & 14 deletions nova/compute/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4208,6 +4208,49 @@ def revert_resize(self, context, instance, migration):
self.compute_rpcapi.finish_revert_resize(context, instance,
migration, migration.source_compute)

def _finish_revert_resize_network_migrate_finish(self, context, instance,
migration):
"""Causes port binding to be updated. In some Neutron or port
configurations - see NetworkModel.get_bind_time_events() - we
expect the vif-plugged event from Neutron immediately and wait for it.
The rest of the time, the event is expected further along in the
virt driver, so we don't wait here.
:param context: The request context.
:param instance: The instance undergoing the revert resize.
:param migration: The Migration object of the resize being reverted.
:raises: eventlet.timeout.Timeout or
exception.VirtualInterfacePlugException.
"""
network_info = instance.get_network_info()
events = []
deadline = CONF.vif_plugging_timeout
if deadline and utils.is_neutron() and network_info:
events = network_info.get_bind_time_events(migration)
if events:
LOG.debug('Will wait for bind-time events: %s', events)
error_cb = self._neutron_failed_migration_callback
try:
with self.virtapi.wait_for_instance_event(instance, events,
deadline=deadline,
error_callback=error_cb):
# NOTE(hanrong): we need to change migration.dest_compute to
# source host temporarily.
# "network_api.migrate_instance_finish" will setup the network
# for the instance on the destination host. For revert resize,
# the instance will back to the source host, the setup of the
# network for instance should be on the source host. So set
# the migration.dest_compute to source host at here.
with utils.temporary_mutation(
migration, dest_compute=migration.source_compute):
self.network_api.migrate_instance_finish(context,
instance,
migration)
except eventlet.timeout.Timeout:
with excutils.save_and_reraise_exception():
LOG.error('Timeout waiting for Neutron events: %s', events,
instance=instance)

@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
Expand Down Expand Up @@ -4255,17 +4298,8 @@ def finish_revert_resize(self, context, instance, migration):

self.network_api.setup_networks_on_host(context, instance,
migration.source_compute)
# NOTE(hanrong): we need to change migration.dest_compute to
# source host temporarily. "network_api.migrate_instance_finish"
# will setup the network for the instance on the destination host.
# For revert resize, the instance will back to the source host, the
# setup of the network for instance should be on the source host.
# So set the migration.dest_compute to source host at here.
with utils.temporary_mutation(
migration, dest_compute=migration.source_compute):
self.network_api.migrate_instance_finish(context,
instance,
migration)
self._finish_revert_resize_network_migrate_finish(
context, instance, migration)
network_info = self.network_api.get_instance_nw_info(context,
instance)

Expand Down Expand Up @@ -6570,8 +6604,8 @@ def pre_live_migration(self, context, instance, block_migration, disk,
return migrate_data

@staticmethod
def _neutron_failed_live_migration_callback(event_name, instance):
msg = ('Neutron reported failure during live migration '
def _neutron_failed_migration_callback(event_name, instance):
msg = ('Neutron reported failure during migration '
'with %(event)s for instance %(uuid)s')
msg_args = {'event': event_name, 'uuid': instance.uuid}
if CONF.vif_plugging_is_fatal:
Expand Down Expand Up @@ -6649,7 +6683,7 @@ class _BreakWaitForInstanceEvent(Exception):
disk = None

deadline = CONF.vif_plugging_timeout
error_cb = self._neutron_failed_live_migration_callback
error_cb = self._neutron_failed_migration_callback
# In order to avoid a race with the vif plugging that the virt
# driver does on the destination host, we register our events
# to wait for before calling pre_live_migration. Then if the
Expand Down
25 changes: 25 additions & 0 deletions nova/network/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,17 @@ def labeled_ips(self):
'ips': ips}
return []

def has_bind_time_event(self, migration):
"""Returns whether this VIF's network-vif-plugged external event will
be sent by Neutron at "bind-time" - in other words, as soon as the port
binding is updated. This is in the context of updating the port binding
to a host that already has the instance in a shutoff state - in
practice, this means reverting either a cold migration or a
non-same-host resize.
"""
return (self.is_hybrid_plug_enabled() and not
migration.is_same_host())

def is_hybrid_plug_enabled(self):
return self['details'].get(VIF_DETAILS_OVS_HYBRID_PLUG, False)

Expand Down Expand Up @@ -515,6 +526,20 @@ def wait(self, do_raise=True):
def json(self):
return jsonutils.dumps(self)

def get_bind_time_events(self, migration):
"""Returns whether any of our VIFs have "bind-time" events. See
has_bind_time_event() docstring for more details.
"""
return [('network-vif-plugged', vif['id'])
for vif in self if vif.has_bind_time_event(migration)]

def get_plug_time_events(self, migration):
"""Complementary to get_bind_time_events(), any event that does not
fall in that category is a plug-time event.
"""
return [('network-vif-plugged', vif['id'])
for vif in self if not vif.has_bind_time_event(migration)]


class NetworkInfoAsyncWrapper(NetworkInfo):
"""Wrapper around NetworkInfo that allows retrieving NetworkInfo
Expand Down
3 changes: 3 additions & 0 deletions nova/objects/migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ def instance(self):
def instance(self, instance):
self._cached_instance = instance

def is_same_host(self):
return self.source_compute == self.dest_compute


@base.NovaObjectRegistry.register
class MigrationList(base.ObjectListBase, base.NovaObject):
Expand Down
8 changes: 6 additions & 2 deletions nova/tests/unit/compute/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -5892,7 +5892,9 @@ def fake_finish_revert_migration_driver(*args, **kwargs):
old_vm_state = vm_states.ACTIVE
else:
old_vm_state = vm_states.STOPPED
params = {'vm_state': old_vm_state}
params = {'vm_state': old_vm_state,
'info_cache': objects.InstanceInfoCache(
network_info=network_model.NetworkInfo([]))}
instance = self._create_fake_instance_obj(params)

self.stub_out('nova.virt.fake.FakeDriver.finish_migration', fake)
Expand Down Expand Up @@ -6042,7 +6044,9 @@ def test_finish_revert_resize_validate_source_compute(self):
def fake(*args, **kwargs):
pass

instance = self._create_fake_instance_obj()
params = {'info_cache': objects.InstanceInfoCache(
network_info=network_model.NetworkInfo([]))}
instance = self._create_fake_instance_obj(params)

self.stub_out('nova.virt.fake.FakeDriver.finish_migration', fake)
self.stub_out('nova.virt.fake.FakeDriver.finish_revert_migration',
Expand Down
91 changes: 91 additions & 0 deletions nova/tests/unit/compute/test_compute_mgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5070,6 +5070,97 @@ def test_notify_volume_usage_detach_no_block_stats(self):
self.context, fake_instance, fake_bdm)
block_stats.assert_called_once_with(fake_instance, 'vda')

def _test_finish_revert_resize_network_migrate_finish(
self, vifs, events, migration=None):
instance = fake_instance.fake_instance_obj(self.context)
instance.info_cache = objects.InstanceInfoCache(
network_info=network_model.NetworkInfo(vifs))
if migration is None:
migration = objects.Migration(
source_compute='fake-source',
dest_compute='fake-dest')

def fake_migrate_instance_finish(context, instance, migration):
# NOTE(artom) This looks weird, but it's checking that the
# temporaty_mutation() context manager did its job.
self.assertEqual(migration.dest_compute, migration.source_compute)

with test.nested(
mock.patch.object(self.compute.virtapi,
'wait_for_instance_event'),
mock.patch.object(self.compute.network_api,
'migrate_instance_finish',
side_effect=fake_migrate_instance_finish)
) as (mock_wait, mock_migrate_instance_finish):
self.compute._finish_revert_resize_network_migrate_finish(
self.context, instance, migration)
mock_wait.assert_called_once_with(
instance, events, deadline=CONF.vif_plugging_timeout,
error_callback=self.compute._neutron_failed_migration_callback)
mock_migrate_instance_finish.assert_called_once_with(
self.context, instance, migration)

def test_finish_revert_resize_network_migrate_finish_wait(self):
"""Test that we wait for bind-time events if we have a hybrid-plugged
VIF.
"""
self._test_finish_revert_resize_network_migrate_finish(
[network_model.VIF(id=uuids.hybrid_vif,
details={'ovs_hybrid_plug': True}),
network_model.VIF(id=uuids.normal_vif,
details={'ovs_hybrid_plug': False})],
[('network-vif-plugged', uuids.hybrid_vif)])

def test_finish_revert_resize_network_migrate_finish_same_host(self):
"""Test that we're not waiting for any events if its a same host
resize revert.
"""
migration = objects.Migration(
source_compute='fake-source', dest_compute='fake-source')

self._test_finish_revert_resize_network_migrate_finish(
[network_model.VIF(id=uuids.hybrid_vif,
details={'ovs_hybrid_plug': True}),
network_model.VIF(id=uuids.normal_vif,
details={'ovs_hybrid_plug': False})],
[], migration=migration
)

def test_finish_revert_resize_network_migrate_finish_dont_wait(self):
"""Test that we're not waiting for any events if we don't have any
hybrid-plugged VIFs.
"""
self._test_finish_revert_resize_network_migrate_finish(
[network_model.VIF(id=uuids.hybrid_vif,
details={'ovs_hybrid_plug': False}),
network_model.VIF(id=uuids.normal_vif,
details={'ovs_hybrid_plug': False})],
[])

def test_finish_revert_resize_network_migrate_finish_no_vif_timeout(self):
"""Test that we're not waiting for any events if vif_plugging_timeout
is 0.
"""
self.flags(vif_plugging_timeout=0)
self._test_finish_revert_resize_network_migrate_finish(
[network_model.VIF(id=uuids.hybrid_vif,
details={'ovs_hybrid_plug': True}),
network_model.VIF(id=uuids.normal_vif,
details={'ovs_hybrid_plug': True})],
[])

@mock.patch.object(utils, 'is_neutron', return_value=False)
def test_finish_revert_resize_network_migrate_finish_not_neutron(self, _):
"""Test that we're not waiting for any events if we're not using
Neutron.
"""
self._test_finish_revert_resize_network_migrate_finish(
[network_model.VIF(id=uuids.hybrid_vif,
details={'ovs_hybrid_plug': True}),
network_model.VIF(id=uuids.normal_vif,
details={'ovs_hybrid_plug': True})],
[])


class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
def setUp(self):
Expand Down
30 changes: 30 additions & 0 deletions nova/tests/unit/network/test_network_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
# under the License.

from oslo_config import cfg
from oslo_utils.fixture import uuidsentinel as uuids

from nova import exception
from nova.network import model
from nova import objects
from nova import test
from nova.tests.unit import fake_network_cache_model
from nova.virt import netutils
Expand Down Expand Up @@ -857,6 +859,34 @@ def test_injection_ipv6_with_lxc_no_gateway(self):
libvirt_virt_type='lxc')
self.assertEqual(expected, template)

def test_get_events(self):
network_info = model.NetworkInfo([
model.VIF(
id=uuids.hybrid_vif,
details={'ovs_hybrid_plug': True}),
model.VIF(
id=uuids.normal_vif,
details={'ovs_hybrid_plug': False})])
same_host = objects.Migration(source_compute='fake-host',
dest_compute='fake-host')
diff_host = objects.Migration(source_compute='fake-host1',
dest_compute='fake-host2')
# Same-host migrations will have all events be plug-time.
self.assertItemsEqual(
[('network-vif-plugged', uuids.normal_vif),
('network-vif-plugged', uuids.hybrid_vif)],
network_info.get_plug_time_events(same_host))
# Same host migration will have no plug-time events.
self.assertEqual([], network_info.get_bind_time_events(same_host))
# Diff-host migration + OVS hybrid plug = bind-time events
self.assertEqual(
[('network-vif-plugged', uuids.hybrid_vif)],
network_info.get_bind_time_events(diff_host))
# Diff-host migration + normal OVS = plug-time events
self.assertEqual(
[('network-vif-plugged', uuids.normal_vif)],
network_info.get_plug_time_events(diff_host))


class TestNetworkMetadata(test.NoDBTestCase):
def setUp(self):
Expand Down
8 changes: 8 additions & 0 deletions nova/tests/unit/objects/test_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,14 @@ def test_get_by_uuid(self, mock_db_get):
mig = objects.Migration.get_by_uuid(self.context, uuidsentinel.mig)
self.assertEqual(uuidsentinel.mig, mig.uuid)

def test_is_same_host(self):
same_host = objects.Migration(source_compute='fake-host',
dest_compute='fake-host')
diff_host = objects.Migration(source_compute='fake-host1',
dest_compute='fake-host2')
self.assertTrue(same_host.is_same_host())
self.assertFalse(diff_host.is_same_host())


class TestMigrationObject(test_objects._LocalTest,
_TestMigrationObject):
Expand Down
Loading

0 comments on commit 7a7a223

Please sign in to comment.