Skip to content

Commit

Permalink
zvol: Fix zvol_misc crashes when using blk-mq
Browse files Browse the repository at this point in the history
We have recently been seeing a lot of zvol_misc test failures when
blk-mq was enabled on F38 and Centos 9 (openzfs#14872).  The failures look
to be caused by kernel memory corruption.

This fix removes a slightly dubious optimization in
zfs_uiomove_bvec_rq() that saved the iterator contents of a
rq_for_each_segment().  This optimization allowed restoring the "saved
state" from a previous rq_for_each_segment() call on the same uio so
that you wouldn't need to iterate though each bvec on every
zfs_uiomove_bvec_rq() call.  However, if the kernel is manipulating
the requests/bios/bvecs under the covers between zfs_uiomove_bvec_rq()
calls, then it could result in corruption from using the "saved state".

Fixes: openzfs#14872
Signed-off-by: Tony Hutter <[email protected]>
Requires-builders: fedora38
  • Loading branch information
tonyhutter committed May 19, 2023
1 parent e34e15e commit 85f2e36
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 48 deletions.
4 changes: 2 additions & 2 deletions TEST
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
#TEST_ZFSTESTS_DISKS="vdb vdc vdd"
#TEST_ZFSTESTS_DISKSIZE="8G"
#TEST_ZFSTESTS_ITERS="1"
#TEST_ZFSTESTS_OPTIONS="-vx"
TEST_ZFSTESTS_OPTIONS="-Kvx"
#TEST_ZFSTESTS_RUNFILE="linux.run"
#TEST_ZFSTESTS_TAGS="functional"
TEST_ZFSTESTS_TAGS="zvol_misc"

### zfsstress
#TEST_ZFSSTRESS_SKIP="yes"
Expand Down
18 changes: 18 additions & 0 deletions include/os/linux/kernel/linux/blkdev_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,24 @@ io_data_dir(struct bio *bio, struct request *rq)
return (bio_data_dir(bio));
}

static inline int
io_is_write(struct bio *bio, struct request *rq)
{
#ifdef HAVE_BLK_MQ
if (rq != NULL) {
if (op_is_write(req_op(rq))) {
return (WRITE);
} else {
return (READ);
}
}
#else
ASSERT3P(rq, ==, NULL);
#endif
return (bio_data_dir(bio) == WRITE);

}

static inline int
io_is_flush(struct bio *bio, struct request *rq)
{
Expand Down
8 changes: 0 additions & 8 deletions include/os/linux/spl/sys/uio.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,6 @@ typedef struct zfs_uio {
size_t uio_skip;

struct request *rq;

/*
* Used for saving rq_for_each_segment() state between calls
* to zfs_uiomove_bvec_rq().
*/
struct req_iterator iter;
struct bio_vec bv;
} zfs_uio_t;


Expand Down Expand Up @@ -138,7 +131,6 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
} else {
uio->uio_bvec = NULL;
uio->uio_iovcnt = 0;
memset(&uio->iter, 0, sizeof (uio->iter));
}

uio->uio_loffset = io_offset(bio, rq);
Expand Down
29 changes: 0 additions & 29 deletions module/os/linux/zfs/zfs_uio.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,22 +204,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
this_seg_start = orig_loffset;

rq_for_each_segment(bv, rq, iter) {
if (uio->iter.bio) {
/*
* If uio->iter.bio is present, then we know we've saved
* uio->iter from a previous call to this function, and
* we can skip ahead in this rq_for_each_segment() loop
* to where we last left off. That way, we don't need
* to iterate over tons of segments we've already
* processed - we can just restore the "saved state".
*/
iter = uio->iter;
bv = uio->bv;
this_seg_start = uio->uio_loffset;
memset(&uio->iter, 0, sizeof (uio->iter));
continue;
}

/*
* Lookup what the logical offset of the last byte of this
* segment is.
Expand Down Expand Up @@ -260,19 +244,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
copied = 1; /* We copied some data */
}

if (n == 0) {
/*
* All done copying. Save our 'iter' value to the uio.
* This allows us to "save our state" and skip ahead in
* the rq_for_each_segment() loop the next time we call
* call zfs_uiomove_bvec_rq() on this uio (which we
* will be doing for any remaining data in the uio).
*/
uio->iter = iter; /* make a copy of the struct data */
uio->bv = bv;
return (0);
}

this_seg_start = this_seg_end + 1;
}

Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zil.c
Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,7 @@ zil_create(zilog_t *zilog)
*/
txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);

ASSERT(zh->zh_claim_txg == 0);
ASSERT3U(zh->zh_claim_txg, ==, 0);
ASSERT(zh->zh_replay_seq == 0);

blk = zh->zh_log;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
do_test

set_blk_mq 1
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
do_test
# set_blk_mq 1
# log_must zpool export $TESTPOOL
# log_must zpool import $TESTPOOL
# do_test

log_pass "ZFS volume FUA works"
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ log_must zfs set compression=off $TESTPOOL/$TESTVOL
log_must $trimcmd $zvolpath


set_blk_mq 1
log_must_busy zpool export $TESTPOOL
log_must zpool import $TESTPOOL
do_test
# set_blk_mq 1
# log_must_busy zpool export $TESTPOOL
# log_must zpool import $TESTPOOL
# do_test

set_blk_mq 0
log_must_busy zpool export $TESTPOOL
Expand Down

0 comments on commit 85f2e36

Please sign in to comment.