Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix live-migration with extra disks #686

Merged
merged 8 commits into from
Mar 28, 2024
35 changes: 21 additions & 14 deletions internal/server/device/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,22 +121,10 @@ func (d *disk) sourceIsCeph() bool {

// CanHotPlug returns whether the device can be managed whilst the instance is running.
func (d *disk) CanHotPlug() bool {
// Containers support hot-plugging all disk types.
if d.inst.Type() == instancetype.Container {
return true
}

// Only VirtioFS works with path hotplug.
// As migration.stateful turns off VirtioFS, this also turns off hotplugging of paths.
if util.IsTrue(d.inst.ExpandedConfig()["migration.stateful"]) {
return false
}

// Block disks can be hot-plugged into VMs.
// All disks can be hot-plugged.
return true
}

// validateConfig checks the supplied config for correctness.
// isRequired indicates whether the supplied device config requires this device to start OK.
func (d *disk) isRequired(devConfig deviceConfig.Device) bool {
// Defaults to required.
Expand Down Expand Up @@ -399,7 +387,7 @@ func (d *disk) validateConfig(instConf instance.ConfigReader) error {
return fmt.Errorf("Failed checking if custom volume is exclusively attached to another instance: %w", err)
}

if remoteInstance != nil {
if remoteInstance != nil && remoteInstance.ID != instConf.ID() {
return fmt.Errorf("Custom volume is already attached to an instance on a different node")
}

Expand Down Expand Up @@ -468,6 +456,25 @@ func (d *disk) validateConfig(instConf instance.ConfigReader) error {
}
}

// Restrict disks allowed when live-migratable.
if instConf.Type() == instancetype.VM && util.IsTrue(instConf.ExpandedConfig()["migration.stateful"]) {
if d.config["path"] != "" && d.config["path"] != "/" {
return fmt.Errorf("Shared filesystem are incompatible with migration.stateful=true")
}

if d.config["pool"] == "" {
return fmt.Errorf("Only Incus-managed disks are allowed with migration.stateful=true")
}

if d.config["io.bus"] == "nvme" {
return fmt.Errorf("NVME disks aren't supported with migration.stateful=true")
}

if d.config["path"] != "/" && d.pool != nil && !d.pool.Driver().Info().Remote {
return fmt.Errorf("Only additional disks coming from a shared storage pool are supported with migration.stateful=true")
}
}

return nil
}

Expand Down
75 changes: 75 additions & 0 deletions internal/server/instance/drivers/driver_qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -6687,6 +6687,17 @@ func (d *qemu) migrateSendLive(pool storagePools.Pool, clusterMoveSourceName str
defer revert.Fail() // Run the revert fail before the earlier defers.

d.logger.Debug("Setup temporary migration storage snapshot")
} else {
// Still set some options for shared storage.
capabilities := map[string]bool{
// Automatically throttle down the guest to speed up convergence of RAM migration.
"auto-converge": true,
}

err = monitor.MigrateSetCapabilities(capabilities)
if err != nil {
return fmt.Errorf("Failed setting migration capabilities: %w", err)
}
}

// Perform storage transfer while instance is still running.
Expand All @@ -6700,6 +6711,38 @@ func (d *qemu) migrateSendLive(pool storagePools.Pool, clusterMoveSourceName str
return err
}

// Derive the effective storage project name from the instance config's project.
storageProjectName, err := project.StorageVolumeProject(d.state.DB.Cluster, d.project.Name, db.StoragePoolVolumeTypeCustom)
if err != nil {
return err
}

// Notify the shared disks that they're going to be accessed from another system.
for _, dev := range d.expandedDevices.Sorted() {
if dev.Config["type"] != "disk" || dev.Config["path"] == "/" {
continue
}

// Load the pool for the disk.
diskPool, err := storagePools.LoadByName(d.state, dev.Config["pool"])
if err != nil {
return fmt.Errorf("Failed loading storage pool: %w", err)
}

// Setup the volume entry.
extraSourceArgs := &localMigration.VolumeSourceArgs{
ClusterMove: true,
}

vol := diskPool.GetVolume(storageDrivers.VolumeTypeCustom, storageDrivers.ContentTypeBlock, project.StorageVolume(storageProjectName, dev.Config["source"]), nil)

// Call MigrateVolume on the source.
err = diskPool.Driver().MigrateVolume(vol, nil, extraSourceArgs, nil)
if err != nil {
return fmt.Errorf("Failed to prepare device %q for migration: %w", dev.Name, err)
}
}

// Non-shared storage snapshot transfer.
if !sharedStorage {
listener, err := net.Listen("unix", "")
Expand Down Expand Up @@ -7180,6 +7223,38 @@ func (d *qemu) MigrateReceive(args instance.MigrateReceiveArgs) error {
return fmt.Errorf("Failed creating instance on target: %w", err)
}

// Derive the effective storage project name from the instance config's project.
storageProjectName, err := project.StorageVolumeProject(d.state.DB.Cluster, d.project.Name, db.StoragePoolVolumeTypeCustom)
if err != nil {
return err
}

// Notify the shared disks that they're going to be accessed from another system.
for _, dev := range d.expandedDevices.Sorted() {
if dev.Config["type"] != "disk" || dev.Config["path"] == "/" {
continue
}

// Load the pool for the disk.
diskPool, err := storagePools.LoadByName(d.state, dev.Config["pool"])
if err != nil {
return fmt.Errorf("Failed loading storage pool: %w", err)
}

// Setup the volume entry.
extraTargetArgs := localMigration.VolumeTargetArgs{
ClusterMoveSourceName: args.ClusterMoveSourceName,
}

vol := diskPool.GetVolume(storageDrivers.VolumeTypeCustom, storageDrivers.ContentTypeBlock, project.StorageVolume(storageProjectName, dev.Config["source"]), nil)

// Call MigrateVolume on the source.
err = diskPool.Driver().CreateVolumeFromMigration(vol, nil, extraTargetArgs, nil, nil)
if err != nil {
return fmt.Errorf("Failed to prepare device %q for migration: %w", dev.Name, err)
}
}

// Only delete all instance volumes on error if the pool volume creation has succeeded to
// avoid deleting an existing conflicting volume.
isRemoteClusterMove := args.ClusterMoveSourceName != "" && poolInfo.Remote
Expand Down
3 changes: 2 additions & 1 deletion internal/server/instance/instance_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ type ConfigReader interface {
Project() api.Project
Type() instancetype.Type
Architecture() int
ID() int

ExpandedConfig() map[string]string
ExpandedDevices() deviceConfig.Devices
LocalConfig() map[string]string
Expand Down Expand Up @@ -127,7 +129,6 @@ type Instance interface {
OnHook(hookName string, args map[string]string) error

// Properties.
ID() int
Location() string
Name() string
CloudInitID() string
Expand Down
4 changes: 2 additions & 2 deletions internal/server/storage/drivers/driver_lvm_volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ func (d *lvm) CreateVolumeFromMigration(vol Volume, conn io.ReadWriteCloser, vol
}

// Mark the volume for shared locking during live migration.
if vol.volType == VolumeTypeVM {
if vol.volType == VolumeTypeVM || vol.IsCustomBlock() {
volDevPath := d.lvmDevPath(d.config["lvm.vg_name"], vol.volType, vol.contentType, vol.Name())
_, err := subprocess.RunCommand("lvchange", "--activate", "sy", "--ignoreactivationskip", volDevPath)
if err != nil {
Expand Down Expand Up @@ -870,7 +870,7 @@ func (d *lvm) RenameVolume(vol Volume, newVolName string, op *operations.Operati
func (d *lvm) MigrateVolume(vol Volume, conn io.ReadWriteCloser, volSrcArgs *migration.VolumeSourceArgs, op *operations.Operation) error {
if d.clustered && volSrcArgs.ClusterMove {
// Mark the volume for shared locking during live migration.
if vol.volType == VolumeTypeVM {
if vol.volType == VolumeTypeVM || vol.IsCustomBlock() {
// Block volume.
volDevPath := d.lvmDevPath(d.config["lvm.vg_name"], vol.volType, vol.contentType, vol.Name())
_, err := subprocess.RunCommand("lvchange", "--activate", "sy", "--ignoreactivationskip", volDevPath)
Expand Down
Loading