Skip to content

Commit

Permalink
Log I/Os longer than zio_delay_max (30s default)
Browse files Browse the repository at this point in the history
There have been reports of ZFS deadlocking due to what appears to
be a lost IO.  This patch addes some debugging to determine the
exact state of the IO which neither 1) completed, 2) failed, or
3) timed out after zio_delay_max (30) seconds.

This information will be logged using the ZFS FMA infrastructure
as a 'delay' event and posted to the internal zevent log.  By
default the last 64 events will be kept in the log but the limit
is configurable via the zfs_zevent_len_max module option.

To dump the contents of the log use the 'zpool events -v' command
and look for the resource.fs.zfs.delay event.  It will include
various information about the pool, vdev, and zio which may shed
some light on the issue.

In the context of this change the 120 second kernel blocked thread
watchdog has been disabled for synchronous IOs.

Signed-off-by: Brian Behlendorf <[email protected]>
Issue openzfs#930
  • Loading branch information
behlendorf authored and unya committed Dec 13, 2013
1 parent 9931ed5 commit 783ae3f
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 10 deletions.
2 changes: 2 additions & 0 deletions include/sys/fm/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET "zio_offset"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE "zio_size"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS "zio_flags"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE "zio_stage"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE "zio_pipeline"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY "zio_delay"
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected"
Expand Down
4 changes: 4 additions & 0 deletions module/zfs/zfs_fm.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,10 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
DATA_TYPE_INT32, zio->io_error, NULL);
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS,
DATA_TYPE_INT32, zio->io_flags, NULL);
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE,
DATA_TYPE_UINT32, zio->io_stage, NULL);
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE,
DATA_TYPE_UINT32, zio->io_pipeline, NULL);
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY,
DATA_TYPE_UINT64, zio->io_delay, NULL);

Expand Down
32 changes: 22 additions & 10 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1349,18 +1349,34 @@ __zio_execute(zio_t *zio)
int
zio_wait(zio_t *zio)
{
uint64_t timeout;
int error;

ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
ASSERT(zio->io_executor == NULL);

zio->io_waiter = curthread;
timeout = ddi_get_lbolt() + (zio_delay_max / MILLISEC * hz);

__zio_execute(zio);

mutex_enter(&zio->io_lock);
while (zio->io_executor != NULL)
cv_wait(&zio->io_cv, &zio->io_lock);
while (zio->io_executor != NULL) {
/*
* Wake up periodically to prevent the kernel from complaining
* about a blocked task. However, check zio_delay_max to see
* if the I/O has exceeded the timeout and post an ereport.
*/
cv_timedwait_interruptible(&zio->io_cv, &zio->io_lock,
ddi_get_lbolt() + hz);

if (timeout && (ddi_get_lbolt() > timeout)) {
zio->io_delay = zio_delay_max;
zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
zio->io_spa, zio->io_vd, zio, 0, 0);
timeout = 0;
}
}
mutex_exit(&zio->io_lock);

error = zio->io_error;
Expand Down Expand Up @@ -2979,15 +2995,11 @@ zio_done(zio_t *zio)
vdev_stat_update(zio, zio->io_size);

/*
* If this I/O is attached to a particular vdev is slow, exeeding
* 30 seconds to complete, post an error described the I/O delay.
* We ignore these errors if the device is currently unavailable.
* When an I/O completes but was slow post an ereport.
*/
if (zio->io_delay >= zio_delay_max) {
if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd))
zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
zio->io_vd, zio, 0, 0);
}
if (zio->io_delay >= zio_delay_max)
zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
zio->io_vd, zio, 0, 0);

if (zio->io_error) {
/*
Expand Down

0 comments on commit 783ae3f

Please sign in to comment.