diff --git a/criu/cr-restore.c b/criu/cr-restore.c index d5b6c8037a..646300bdb8 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2354,12 +2354,12 @@ int cr_restore_tasks(void) if (init_service_fd()) return 1; - if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE)) - return -1; - if (check_img_inventory(/* restore = */ true) < 0) goto err; + if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE)) + return -1; + if (init_stats(RESTORE_STATS)) goto err; diff --git a/criu/image.c b/criu/image.c index 9fb390ab7e..9589167fb1 100644 --- a/criu/image.c +++ b/criu/image.c @@ -26,6 +26,14 @@ TaskKobjIdsEntry *root_ids; u32 root_cg_set; Lsmtype image_lsm; +struct inventory_plugin { + struct list_head node; + char *name; +}; + +struct list_head inventory_plugins_list = LIST_HEAD_INIT(inventory_plugins_list); +static int n_inventory_plugins; + int check_img_inventory(bool restore) { int ret = -1; @@ -99,6 +107,19 @@ int check_img_inventory(bool restore) } else { opts.network_lock_method = he->network_lock_method; } + + if (!he->plugins_entry) { + /* backwards compatibility: if the 'plugins_entry' field is missing, + * all plugins should be enabled during restore. + */ + n_inventory_plugins = -1; + } else { + PluginsEntry *pe = he->plugins_entry; + for (int i = 0; i < pe->n_plugins; i++) { + if (add_inventory_plugin(pe->plugins[i])) + goto out_err; + } + } } ret = 0; @@ -110,8 +131,92 @@ int check_img_inventory(bool restore) return ret; } +/** + * Check if the 'plugins' field in the inventory image contains + * the specified plugin name. If found, the plugin is removed + * from the linked list. + */ +bool check_and_remove_inventory_plugin(const char *name, size_t n) +{ + if (n_inventory_plugins == -1) + return true; /* backwards compatibility */ + + if (n_inventory_plugins > 0) { + struct inventory_plugin *p, *tmp; + + list_for_each_entry_safe(p, tmp, &inventory_plugins_list, node) { + if (!strncmp(name, p->name, n)) { + xfree(p->name); + list_del(&p->node); + xfree(p); + n_inventory_plugins--; + return true; + } + } + } + + return false; +} + +/** + * We expect during restore all loaded plugins to be removed from + * the inventory_plugins_list. If the list is not empty, show an + * error message for each missing plugin. + */ +int check_inventory_plugins(void) +{ + struct inventory_plugin *p; + + if (n_inventory_plugins <= 0) + return 0; + + list_for_each_entry(p, &inventory_plugins_list, node) { + pr_err("Missing required plugin: %s\n", p->name); + } + + return -1; +} + +/** + * Add plugin name to the inventory image. These values + * can be used to identify required plugins during restore. + */ +int add_inventory_plugin(const char *name) +{ + struct inventory_plugin *p; + + p = xmalloc(sizeof(struct inventory_plugin)); + if (p == NULL) + return -1; + + p->name = xstrdup(name); + if (!p->name) { + xfree(p); + return -1; + } + list_add(&p->node, &inventory_plugins_list); + n_inventory_plugins++; + + return 0; +} + +void free_inventory_plugins_list(void) +{ + struct inventory_plugin *p, *tmp; + + if (!list_empty(&inventory_plugins_list)) { + list_for_each_entry_safe(p, tmp, &inventory_plugins_list, node) { + xfree(p->name); + list_del(&p->node); + xfree(p); + } + } + n_inventory_plugins = 0; +} + int write_img_inventory(InventoryEntry *he) { + PluginsEntry pe = PLUGINS_ENTRY__INIT; struct cr_img *img; int ret; @@ -121,8 +226,27 @@ int write_img_inventory(InventoryEntry *he) if (!img) return -1; + if (!list_empty(&inventory_plugins_list)) { + struct inventory_plugin *p; + int i = 0; + + pe.n_plugins = n_inventory_plugins; + pe.plugins = xmalloc(n_inventory_plugins * sizeof(char *)); + if (!pe.plugins) + return -1; + + list_for_each_entry(p, &inventory_plugins_list, node) { + pe.plugins[i] = p->name; + i++; + } + } + he->plugins_entry = &pe; + ret = pb_write_one(img, he, PB_INVENTORY); + free_inventory_plugins_list(); + xfree(pe.plugins); + xfree(he->root_ids); close_image(img); if (ret < 0) diff --git a/criu/include/image.h b/criu/include/image.h index a17aae35c2..afa7d5e12f 100644 --- a/criu/include/image.h +++ b/criu/include/image.h @@ -177,4 +177,8 @@ extern int read_img_str(struct cr_img *, char **pstr, int size); extern void close_image(struct cr_img *); +extern int add_inventory_plugin(const char *name); +extern int check_inventory_plugins(void); +extern bool check_and_remove_inventory_plugin(const char *name, size_t n); + #endif /* __CR_IMAGE_H__ */ diff --git a/criu/plugin.c b/criu/plugin.c index 58b5ea5bfe..65e79a0692 100644 --- a/criu/plugin.c +++ b/criu/plugin.c @@ -256,6 +256,9 @@ int cr_plugin_init(int stage) goto err; } + if (stage == CR_PLUGIN_STAGE__RESTORE && check_inventory_plugins()) + goto err; + exit_code = 0; err: closedir(d); diff --git a/images/inventory.proto b/images/inventory.proto index a735bad1d0..7f655031bc 100644 --- a/images/inventory.proto +++ b/images/inventory.proto @@ -10,6 +10,13 @@ enum lsmtype { APPARMOR = 2; } +// It is not possible to distinguish between an empty repeated field +// and unset repeated field. To solve this problem and provide backwards +// compabibility, we use the 'plugins_entry' message. +message plugins_entry { + repeated string plugins = 12; +}; + message inventory_entry { required uint32 img_version = 1; optional bool fdinfo_per_id = 2; @@ -21,4 +28,5 @@ message inventory_entry { optional uint32 pre_dump_mode = 9; optional bool tcp_close = 10; optional uint32 network_lock_method = 11; + optional plugins_entry plugins_entry = 12; } diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c index b56ba6d140..96c0861628 100644 --- a/plugins/amdgpu/amdgpu_plugin.c +++ b/plugins/amdgpu/amdgpu_plugin.c @@ -60,6 +60,10 @@ static LIST_HEAD(update_vma_info_list); size_t kfd_max_buffer_size; +bool plugin_added_to_inventory = false; + +bool plugin_disabled = false; + /**************************************************************************************************/ /* Call ioctl, restarting if it is interrupted */ @@ -332,6 +336,13 @@ void getenv_size_t(const char *var, size_t *value) int amdgpu_plugin_init(int stage) { + if (stage == CR_PLUGIN_STAGE__RESTORE) { + if (!check_and_remove_inventory_plugin(CR_PLUGIN_DESC.name, strlen(CR_PLUGIN_DESC.name))) { + plugin_disabled = true; + return 0; + } + } + pr_info("initialized: %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name); topology_init(&src_topology); @@ -365,6 +376,9 @@ int amdgpu_plugin_init(int stage) void amdgpu_plugin_fini(int stage, int ret) { + if (plugin_disabled) + return; + pr_info("finished %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name); if (stage == CR_PLUGIN_STAGE__RESTORE) @@ -414,6 +428,14 @@ int amdgpu_plugin_handle_device_vma(int fd, const struct stat *st_buf) if (ret) pr_perror("%s(), Can't handle VMAs of input device", __func__); + if (!ret && !plugin_added_to_inventory) { + ret = add_inventory_plugin(CR_PLUGIN_DESC.name); + if (ret) + pr_err("Failed to add AMDGPU plugin to inventory image\n"); + else + plugin_added_to_inventory = true; + } + return ret; } CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, amdgpu_plugin_handle_device_vma) @@ -1540,6 +1562,9 @@ int amdgpu_plugin_restore_file(int id) size_t img_size; FILE *img_fp = NULL; + if (plugin_disabled) + return -ENOTSUP; + pr_info("Initialized kfd plugin restorer with ID = %d\n", id); snprintf(img_path, sizeof(img_path), IMG_KFD_FILE, id); @@ -1746,6 +1771,9 @@ int amdgpu_plugin_update_vmamap(const char *in_path, const uint64_t addr, const char *p_end; bool is_kfd = false, is_renderD = false; + if (plugin_disabled) + return -ENOTSUP; + plugin_log_msg("Enter %s\n", __func__); strncpy(path, in_path, sizeof(path)); @@ -1805,6 +1833,9 @@ int amdgpu_plugin_resume_devices_late(int target_pid) struct kfd_ioctl_criu_args args = { 0 }; int fd, exit_code = 0; + if (plugin_disabled) + return -ENOTSUP; + pr_info("Inside %s for target pid = %d\n", __func__, target_pid); fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC); diff --git a/plugins/cuda/cuda_plugin.c b/plugins/cuda/cuda_plugin.c index 23c3f4b1ab..c4fc67fa9f 100644 --- a/plugins/cuda/cuda_plugin.c +++ b/plugins/cuda/cuda_plugin.c @@ -38,6 +38,8 @@ */ bool plugin_disabled = false; +bool plugin_added_to_inventory = false; + struct pid_info { int pid; char checkpointed; @@ -319,7 +321,7 @@ int cuda_plugin_checkpoint_devices(int pid) k_rtsigset_t save_sigset; if (plugin_disabled) { - return 0; + return -ENOTSUP; } restore_tid = get_cuda_restore_tid(pid); @@ -354,6 +356,15 @@ int cuda_plugin_checkpoint_devices(int pid) pr_err("Failed to restore process after error %s on pid %d\n", msg_buf, pid); } } + + if (!status && !plugin_added_to_inventory) { + status = add_inventory_plugin(CR_PLUGIN_DESC.name); + if (status) + pr_err("Failed to add CUDA plugin to inventory image\n"); + else + plugin_added_to_inventory = true; + } + interrupt: int_ret = interrupt_restore_thread(restore_tid, &save_sigset); @@ -367,7 +378,7 @@ int cuda_plugin_pause_devices(int pid) char msg_buf[CUDA_CKPT_BUF_SIZE]; if (plugin_disabled) { - return 0; + return -ENOTSUP; } restore_tid = get_cuda_restore_tid(pid); @@ -463,6 +474,13 @@ int cuda_plugin_init(int stage) { int ret; + if (stage == CR_PLUGIN_STAGE__RESTORE) { + if (!check_and_remove_inventory_plugin(CR_PLUGIN_DESC.name, strlen(CR_PLUGIN_DESC.name))) { + plugin_disabled = true; + return 0; + } + } + if (!fault_injected(FI_PLUGIN_CUDA_FORCE_ENABLE) && access("/dev/nvidiactl", F_OK)) { pr_info("/dev/nvidiactl doesn't exist. The CUDA plugin is disabled.\n"); plugin_disabled = true;