diff --git a/config/kernel-blk-queue.m4 b/config/kernel-blk-queue.m4 index 2f0b386e6637..a064140f337a 100644 --- a/config/kernel-blk-queue.m4 +++ b/config/kernel-blk-queue.m4 @@ -25,6 +25,8 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_PLUG], [ dnl # dnl # 2.6.32 - 4.11: statically allocated bdi in request_queue dnl # 4.12: dynamically allocated bdi in request_queue +dnl # 6.11: bdi no longer available through request_queue, so get it from +dnl # the gendisk attached to the queue dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI], [ ZFS_LINUX_TEST_SRC([blk_queue_bdi], [ @@ -47,6 +49,30 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_BDI], [ ]) ]) +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISK_BDI], [ + ZFS_LINUX_TEST_SRC([blk_queue_disk_bdi], [ + #include + #include + ], [ + struct request_queue q; + struct gendisk disk; + struct backing_dev_info bdi __attribute__ ((unused)); + q.disk = &disk; + q.disk->bdi = &bdi; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISK_BDI], [ + AC_MSG_CHECKING([whether backing_dev_info is available through queue gendisk]) + ZFS_LINUX_TEST_RESULT([blk_queue_disk_bdi], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLK_QUEUE_DISK_BDI, 1, + [backing_dev_info is available through queue gendisk]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + dnl # dnl # 5.9: added blk_queue_update_readahead(), dnl # 5.15: renamed to disk_update_readahead() @@ -407,6 +433,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [ ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI + ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISK_BDI ZFS_AC_KERNEL_SRC_BLK_QUEUE_UPDATE_READAHEAD ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE @@ -421,6 +448,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [ ZFS_AC_KERNEL_BLK_QUEUE_PLUG ZFS_AC_KERNEL_BLK_QUEUE_BDI + ZFS_AC_KERNEL_BLK_QUEUE_DISK_BDI ZFS_AC_KERNEL_BLK_QUEUE_UPDATE_READAHEAD ZFS_AC_KERNEL_BLK_QUEUE_DISCARD ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE diff --git a/config/kernel-make-request-fn.m4 b/config/kernel-make-request-fn.m4 index 9813ad2fb3f3..4c54bdd6d4a2 100644 --- a/config/kernel-make-request-fn.m4 +++ b/config/kernel-make-request-fn.m4 @@ -58,6 +58,13 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ disk = blk_alloc_disk(lim, NUMA_NO_NODE); ]) + ZFS_LINUX_TEST_SRC([blkdev_queue_limits_features], [ + #include + ],[ + struct queue_limits *lim = NULL; + lim->features = 0; + ]) + ZFS_LINUX_TEST_SRC([blk_cleanup_disk], [ #include ],[ @@ -114,6 +121,20 @@ AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [ AC_MSG_RESULT(yes) AC_DEFINE([HAVE_BLK_ALLOC_DISK_2ARG], 1, [blk_alloc_disk() exists and takes 2 args]) + dnl # + dnl # Linux 6.11 API change: + dnl # struct queue_limits gains a 'features' field, + dnl # used to set flushing options + dnl # + AC_MSG_CHECKING([whether struct queue_limits has a features field]) + ZFS_LINUX_TEST_RESULT([blkdev_queue_limits_features], [ + AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_BLKDEV_QUEUE_LIMITS_FEATURES], 1, + [struct queue_limits has a features field]) + ], [ + AC_MSG_RESULT(no) + ]) + dnl # dnl # 5.20 API change, dnl # Removed blk_cleanup_disk(), put_disk() should be used. diff --git a/config/kernel-mm-page-size.m4 b/config/kernel-mm-page-size.m4 deleted file mode 100644 index d5ebd926986a..000000000000 --- a/config/kernel-mm-page-size.m4 +++ /dev/null @@ -1,17 +0,0 @@ -AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE], [ - ZFS_LINUX_TEST_SRC([page_size], [ - #include - ],[ - unsigned long s; - s = page_size(NULL); - ]) -]) -AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_SIZE], [ - AC_MSG_CHECKING([whether page_size() is available]) - ZFS_LINUX_TEST_RESULT([page_size], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_MM_PAGE_SIZE, 1, [page_size() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-mm-pagemap.m4 b/config/kernel-mm-pagemap.m4 new file mode 100644 index 000000000000..466b6fa07d9a --- /dev/null +++ b/config/kernel-mm-pagemap.m4 @@ -0,0 +1,36 @@ +AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE], [ + ZFS_LINUX_TEST_SRC([page_size], [ + #include + ],[ + unsigned long s; + s = page_size(NULL); + ]) +]) +AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_SIZE], [ + AC_MSG_CHECKING([whether page_size() is available]) + ZFS_LINUX_TEST_RESULT([page_size], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_MM_PAGE_SIZE, 1, [page_size() is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + + +AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING], [ + ZFS_LINUX_TEST_SRC([page_mapping], [ + #include + ],[ + struct page *p = NULL; + struct address_space *m = page_mapping(NULL); + ]) +]) +AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_MAPPING], [ + AC_MSG_CHECKING([whether page_mapping() is available]) + ZFS_LINUX_TEST_RESULT([page_mapping], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_MM_PAGE_MAPPING, 1, [page_mapping() is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel-register_sysctl_table.m4 b/config/kernel-register_sysctl_table.m4 index a5e934f56d29..12ffe9d95142 100644 --- a/config/kernel-register_sysctl_table.m4 +++ b/config/kernel-register_sysctl_table.m4 @@ -25,3 +25,62 @@ AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE], [ AC_MSG_RESULT([no]) ]) ]) + +dnl # +dnl # Linux 6.11 register_sysctl() enforces that sysctl tables no longer +dnl # supply a sentinel end-of-table element. 6.6 introduces +dnl # register_sysctl_sz() to enable callers to choose, so we use it if +dnl # available for backward compatibility. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_SZ], [ + ZFS_LINUX_TEST_SRC([has_register_sysctl_sz], [ + #include + ],[ + struct ctl_table test_table[] __attribute__((unused)) = {0}; + register_sysctl_sz("", test_table, 0); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SYSCTL_SZ], [ + AC_MSG_CHECKING([whether register_sysctl_sz exists]) + ZFS_LINUX_TEST_RESULT([has_register_sysctl_sz], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_REGISTER_SYSCTL_SZ, 1, + [register_sysctl_sz exists]) + ],[ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # Linux 6.11 makes const the ctl_table arg of proc_handler +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_PROC_HANDLER_CTL_TABLE_CONST], [ + ZFS_LINUX_TEST_SRC([has_proc_handler_ctl_table_const], [ + #include + + static int test_handler( + const struct ctl_table *ctl __attribute((unused)), + int write __attribute((unused)), + void *buffer __attribute((unused)), + size_t *lenp __attribute((unused)), + loff_t *ppos __attribute((unused))) + { + return (0); + } + ], [ + proc_handler *ph __attribute((unused)) = + &test_handler; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_PROC_HANDLER_CTL_TABLE_CONST], [ + AC_MSG_CHECKING([whether proc_handler ctl_table arg is const]) + ZFS_LINUX_TEST_RESULT([has_proc_handler_ctl_table_const], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_PROC_HANDLER_CTL_TABLE_CONST, 1, + [proc_handler ctl_table arg is const]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index f282ccd8b9d7..4d471358d242 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -167,9 +167,12 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_WRITEPAGE_T ZFS_AC_KERNEL_SRC_RECLAIMED ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE + ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_SZ + ZFS_AC_KERNEL_SRC_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ ZFS_AC_KERNEL_SRC_SYNC_BDEV ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE + ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE @@ -319,9 +322,12 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_WRITEPAGE_T ZFS_AC_KERNEL_RECLAIMED ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE + ZFS_AC_KERNEL_REGISTER_SYSCTL_SZ + ZFS_AC_KERNEL_PROC_HANDLER_CTL_TABLE_CONST ZFS_AC_KERNEL_COPY_SPLICE_READ ZFS_AC_KERNEL_SYNC_BDEV ZFS_AC_KERNEL_MM_PAGE_SIZE + ZFS_AC_KERNEL_MM_PAGE_MAPPING case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_CPU_HAS_FEATURE diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h index 658f546213de..c2e818b4d4ee 100644 --- a/include/os/linux/kernel/linux/blkdev_compat.h +++ b/include/os/linux/kernel/linux/blkdev_compat.h @@ -57,6 +57,11 @@ blk_queue_flag_clear(unsigned int flag, struct request_queue *q) #endif /* + * 6.11 API + * Setting the flush flags directly is no longer possible; flush flags are set + * on the queue_limits structure and passed to blk_disk_alloc(). In this case + * we remove this function entirely. + * * 4.7 API, * The blk_queue_write_cache() interface has replaced blk_queue_flush() * interface. However, the new interface is GPL-only thus we implement @@ -68,31 +73,33 @@ blk_queue_flag_clear(unsigned int flag, struct request_queue *q) * new one is GPL-only. Thus if the GPL-only version is detected we * implement our own trivial helper. */ +#if !defined(HAVE_BLK_ALLOC_DISK_2ARG) || \ + !defined(HAVE_BLKDEV_QUEUE_LIMITS_FEATURES) static inline void -blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua) +blk_queue_set_write_cache(struct request_queue *q, bool on) { #if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY) - if (wc) + if (on) { blk_queue_flag_set(QUEUE_FLAG_WC, q); - else - blk_queue_flag_clear(QUEUE_FLAG_WC, q); - if (fua) blk_queue_flag_set(QUEUE_FLAG_FUA, q); - else + } else { + blk_queue_flag_clear(QUEUE_FLAG_WC, q); blk_queue_flag_clear(QUEUE_FLAG_FUA, q); + } #elif defined(HAVE_BLK_QUEUE_WRITE_CACHE) - blk_queue_write_cache(q, wc, fua); + blk_queue_write_cache(q, on, on); #elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY) - if (wc) - q->flush_flags |= REQ_FLUSH; - if (fua) - q->flush_flags |= REQ_FUA; + if (on) + q->flush_flags |= REQ_FLUSH | REQ_FUA; + else + q->flush_flags &= ~(REQ_FLUSH | REQ_FUA); #elif defined(HAVE_BLK_QUEUE_FLUSH) - blk_queue_flush(q, (wc ? REQ_FLUSH : 0) | (fua ? REQ_FUA : 0)); + blk_queue_flush(q, on ? (REQ_FLUSH | REQ_FUA) : 0); #else #error "Unsupported kernel" #endif } +#endif /* !HAVE_BLK_ALLOC_DISK_2ARG || !HAVE_BLKDEV_QUEUE_LIMITS_FEATURES */ /* * Detect if a device has a write cache. Used to set the intial value for the @@ -126,8 +133,10 @@ blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages) { #if !defined(HAVE_BLK_QUEUE_UPDATE_READAHEAD) && \ !defined(HAVE_DISK_UPDATE_READAHEAD) -#ifdef HAVE_BLK_QUEUE_BDI_DYNAMIC +#if defined(HAVE_BLK_QUEUE_BDI_DYNAMIC) q->backing_dev_info->ra_pages = ra_pages; +#elif defined(HAVE_BLK_QUEUE_DISK_BDI) + q->disk->bdi->ra_pages = ra_pages; #else q->backing_dev_info.ra_pages = ra_pages; #endif diff --git a/include/os/linux/kernel/linux/mm_compat.h b/include/os/linux/kernel/linux/mm_compat.h index 40056c68d6dd..817f6df422de 100644 --- a/include/os/linux/kernel/linux/mm_compat.h +++ b/include/os/linux/kernel/linux/mm_compat.h @@ -21,16 +21,23 @@ /* * Copyright (c) 2023, 2024, Klara Inc. + * Copyright (c) 2024, Rob Norris */ #ifndef _ZFS_MM_COMPAT_H #define _ZFS_MM_COMPAT_H #include +#include /* 5.4 introduced page_size(). Older kernels can use a trivial macro instead */ #ifndef HAVE_MM_PAGE_SIZE #define page_size(p) ((unsigned long)(PAGE_SIZE << compound_order(p))) #endif +/* 6.11 removed page_mapping(). A simple wrapper around folio_mapping() works */ +#ifndef HAVE_MM_PAGE_MAPPING +#define page_mapping(p) folio_mapping(page_folio(p)) +#endif + #endif /* _ZFS_MM_COMPAT_H */ diff --git a/module/os/linux/spl/spl-proc.c b/module/os/linux/spl/spl-proc.c index f0f929d3ce90..2c0cdd9febf5 100644 --- a/module/os/linux/spl/spl-proc.c +++ b/module/os/linux/spl/spl-proc.c @@ -22,6 +22,9 @@ * * Solaris Porting Layer (SPL) Proc Implementation. */ +/* + * Copyright (c) 2024, Rob Norris + */ #include #include @@ -43,6 +46,12 @@ typedef struct ctl_table __no_const spl_ctl_table; typedef struct ctl_table spl_ctl_table; #endif +#ifdef HAVE_PROC_HANDLER_CTL_TABLE_CONST +#define CONST_CTL_TABLE const struct ctl_table +#else +#define CONST_CTL_TABLE struct ctl_table +#endif + static unsigned long table_min = 0; static unsigned long table_max = ~0; @@ -60,7 +69,7 @@ struct proc_dir_entry *proc_spl_kstat = NULL; #ifdef DEBUG_KMEM static int -proc_domemused(struct ctl_table *table, int write, +proc_domemused(CONST_CTL_TABLE *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int rc = 0; @@ -88,7 +97,7 @@ proc_domemused(struct ctl_table *table, int write, #endif /* DEBUG_KMEM */ static int -proc_doslab(struct ctl_table *table, int write, +proc_doslab(CONST_CTL_TABLE *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int rc = 0; @@ -135,7 +144,7 @@ proc_doslab(struct ctl_table *table, int write, } static int -proc_dohostid(struct ctl_table *table, int write, +proc_dohostid(CONST_CTL_TABLE *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char *end, str[32]; @@ -688,6 +697,37 @@ static void spl_proc_cleanup(void) } } +#ifndef HAVE_REGISTER_SYSCTL_TABLE + +/* + * Traditionally, struct ctl_table arrays have been terminated by an "empty" + * sentinel element (specifically, one with .procname == NULL). + * + * Linux 6.6 began migrating away from this, adding register_sysctl_sz() so + * that callers could provide the size directly, and redefining + * register_sysctl() to just call register_sysctl_sz() with the array size. It + * retained support for the terminating element so that existing callers would + * continue to work. + * + * Linux 6.11 removed support for the terminating element, instead interpreting + * it as a real malformed element, and rejecting it. + * + * In order to continue support older kernels, we retain the terminating + * sentinel element for our sysctl tables, but instead detect availability of + * register_sysctl_sz(). If it exists, we pass it the array size -1, stopping + * the kernel from trying to process the terminator. For pre-6.6 kernels that + * don't have register_sysctl_sz(), we just use register_sysctl(), which can + * handle the terminating element as it always has. + */ +#ifdef HAVE_REGISTER_SYSCTL_SZ +#define spl_proc_register_sysctl(p, t) \ + register_sysctl_sz(p, t, ARRAY_SIZE(t)-1) +#else +#define spl_proc_register_sysctl(p, t) \ + register_sysctl(p, t) +#endif +#endif + int spl_proc_init(void) { @@ -698,16 +738,17 @@ spl_proc_init(void) if (spl_header == NULL) return (-EUNATCH); #else - spl_header = register_sysctl("kernel/spl", spl_table); + spl_header = spl_proc_register_sysctl("kernel/spl", spl_table); if (spl_header == NULL) return (-EUNATCH); - spl_kmem = register_sysctl("kernel/spl/kmem", spl_kmem_table); + spl_kmem = spl_proc_register_sysctl("kernel/spl/kmem", spl_kmem_table); if (spl_kmem == NULL) { rc = -EUNATCH; goto out; } - spl_kstat = register_sysctl("kernel/spl/kstat", spl_kstat_table); + spl_kstat = spl_proc_register_sysctl("kernel/spl/kstat", + spl_kstat_table); if (spl_kstat == NULL) { rc = -EUNATCH; goto out; diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 1cecad9f7755..8061169c3293 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -69,6 +69,7 @@ #include #include #include +#include /* * Programming rules. diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index 83f80f62aee7..2beec6436bff 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2012, 2020 by Delphix. All rights reserved. + * Copyright (c) 2024, Rob Norris * Copyright (c) 2024, Klara, Inc. */ @@ -1089,11 +1090,42 @@ static const struct block_device_operations zvol_ops = { #endif }; +/* + * Since 6.9, Linux has been removing queue limit setters in favour of an + * initial queue_limits struct applied when the device is open. Since 6.11, + * queue_limits is being extended to allow more things to be applied when the + * device is open. Setters are also being removed for this. + * + * For OpenZFS, this means that depending on kernel version, some options may + * be set up before the device is open, and some applied to an open device + * (queue) after the fact. + * + * We manage this complexity by having our own limits struct, + * zvol_queue_limits_t, in which we carry any queue config that we're + * interested in setting. This structure is the same on all kernels. + * + * These limits are then applied to the queue at device open time by the most + * appropriate method for the kernel. + * + * zvol_queue_limits_convert() is used on 6.9+ (where the two-arg form of + * blk_alloc_disk() exists). This converts our limits struct to a proper Linux + * struct queue_limits, and passes it in. Any fields added in later kernels are + * (obviously) not set up here. + * + * zvol_queue_limits_apply() is called on all kernel versions after the queue + * is created, and applies any remaining config. Before 6.9 that will be + * everything, via setter methods. After 6.9 that will be whatever couldn't be + * put into struct queue_limits. (This implies that zvol_queue_limits_apply() + * will always be a no-op on the latest kernel we support). + */ typedef struct zvol_queue_limits { unsigned int zql_max_hw_sectors; unsigned short zql_max_segments; unsigned int zql_max_segment_size; unsigned int zql_io_opt; + unsigned int zql_physical_block_size; + unsigned int zql_max_discard_sectors; + unsigned int zql_discard_granularity; } zvol_queue_limits_t; static void @@ -1162,6 +1194,11 @@ zvol_queue_limits_init(zvol_queue_limits_t *limits, zvol_state_t *zv, } limits->zql_io_opt = zv->zv_volblocksize; + + limits->zql_physical_block_size = zv->zv_volblocksize; + limits->zql_max_discard_sectors = + (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9; + limits->zql_discard_granularity = zv->zv_volblocksize; } #ifdef HAVE_BLK_ALLOC_DISK_2ARG @@ -1174,18 +1211,34 @@ zvol_queue_limits_convert(zvol_queue_limits_t *limits, qlimits->max_segments = limits->zql_max_segments; qlimits->max_segment_size = limits->zql_max_segment_size; qlimits->io_opt = limits->zql_io_opt; + qlimits->physical_block_size = limits->zql_physical_block_size; + qlimits->max_discard_sectors = limits->zql_max_discard_sectors; + qlimits->discard_granularity = limits->zql_discard_granularity; +#ifdef HAVE_BLKDEV_QUEUE_LIMITS_FEATURES + qlimits->features = + BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_IO_STAT; +#endif } -#else +#endif + static void zvol_queue_limits_apply(zvol_queue_limits_t *limits, struct request_queue *queue) { +#ifndef HAVE_BLK_ALLOC_DISK_2ARG blk_queue_max_hw_sectors(queue, limits->zql_max_hw_sectors); blk_queue_max_segments(queue, limits->zql_max_segments); blk_queue_max_segment_size(queue, limits->zql_max_segment_size); blk_queue_io_opt(queue, limits->zql_io_opt); -} + blk_queue_physical_block_size(queue, limits->zql_physical_block_size); + blk_queue_max_discard_sectors(queue, limits->zql_max_discard_sectors); + blk_queue_discard_granularity(queue, limits->zql_discard_granularity); +#endif +#ifndef HAVE_BLKDEV_QUEUE_LIMITS_FEATURES + blk_queue_set_write_cache(queue, B_TRUE); + blk_queue_flag_set(QUEUE_FLAG_IO_STAT, queue); #endif +} static int zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits) @@ -1208,6 +1261,10 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits) return (1); } +#ifndef HAVE_BLKDEV_QUEUE_LIMITS_FEATURES + blk_queue_set_write_cache(zso->zvo_queue, B_TRUE); +#endif + zso->zvo_disk = disk; zso->zvo_disk->minors = ZVOL_MINORS; zso->zvo_queue = zso->zvo_disk->queue; @@ -1223,7 +1280,6 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits) } zso->zvo_disk->queue = zso->zvo_queue; - zvol_queue_limits_apply(limits, zso->zvo_queue); #endif /* HAVE_BLK_ALLOC_DISK */ #else zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE); @@ -1237,8 +1293,10 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits) } zso->zvo_disk->queue = zso->zvo_queue; - zvol_queue_limits_apply(limits, zso->zvo_queue); #endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */ + + zvol_queue_limits_apply(limits, zso->zvo_queue); + return (0); } @@ -1260,7 +1318,6 @@ zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits) return (1); } zso->zvo_queue = zso->zvo_disk->queue; - zvol_queue_limits_apply(limits, zso->zvo_queue); zso->zvo_disk->minors = ZVOL_MINORS; #elif defined(HAVE_BLK_ALLOC_DISK_2ARG) struct queue_limits qlimits; @@ -1291,10 +1348,11 @@ zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits) /* Our queue is now created, assign it to our disk */ zso->zvo_disk->queue = zso->zvo_queue; - zvol_queue_limits_apply(limits, zso->zvo_queue); - #endif + + zvol_queue_limits_apply(limits, zso->zvo_queue); #endif + return (0); } @@ -1360,8 +1418,6 @@ zvol_alloc(dev_t dev, const char *name) if (ret != 0) goto out_kmem; - blk_queue_set_write_cache(zso->zvo_queue, B_TRUE, B_TRUE); - /* Limit read-ahead to a single page to prevent over-prefetching. */ blk_queue_set_read_ahead(zso->zvo_queue, 1); @@ -1370,9 +1426,6 @@ zvol_alloc(dev_t dev, const char *name) blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue); } - /* Enable /proc/diskstats */ - blk_queue_flag_set(QUEUE_FLAG_IO_STAT, zso->zvo_queue); - zso->zvo_queue->queuedata = zv; zso->zvo_dev = dev; zv->zv_open_count = 0; @@ -1639,14 +1692,6 @@ zvol_os_create_minor(const char *name) set_capacity(zv->zv_zso->zvo_disk, zv->zv_volsize >> 9); - - - blk_queue_physical_block_size(zv->zv_zso->zvo_queue, - zv->zv_volblocksize); - blk_queue_max_discard_sectors(zv->zv_zso->zvo_queue, - (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9); - blk_queue_discard_granularity(zv->zv_zso->zvo_queue, - zv->zv_volblocksize); #ifdef QUEUE_FLAG_DISCARD blk_queue_flag_set(QUEUE_FLAG_DISCARD, zv->zv_zso->zvo_queue); #endif