Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix concurrent resilvers initiated at same time #14892

Merged
merged 1 commit into from
May 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2021, Colm Buckley <[email protected]>
* Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
*/

/*
Expand Down Expand Up @@ -6874,9 +6875,11 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));

if (dsl_scan_resilvering(spa_get_dsl(spa)))
if (dsl_scan_resilvering(spa_get_dsl(spa)) ||
dsl_scan_resilver_scheduled(spa_get_dsl(spa))) {
return (spa_vdev_exit(spa, NULL, txg,
ZFS_ERR_RESILVER_IN_PROGRESS));
}
} else {
if (vdev_rebuild_active(rvd))
return (spa_vdev_exit(spa, NULL, txg,
Expand Down
13 changes: 12 additions & 1 deletion module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, Datto Inc. All rights reserved.
* Copyright (c) 2021, Klara Inc.
* Copyright [2021] Hewlett Packard Enterprise Development LP
* Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
*/

#include <sys/zfs_context.h>
Expand Down Expand Up @@ -2699,6 +2699,17 @@ vdev_reopen(vdev_t *vd)
(void) vdev_validate(vd);
}

/*
* Recheck if resilver is still needed and cancel any
* scheduled resilver if resilver is unneeded.
*/
if (!vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL) &&
spa->spa_async_tasks & SPA_ASYNC_RESILVER) {
mutex_enter(&spa->spa_async_lock);
spa->spa_async_tasks &= ~SPA_ASYNC_RESILVER;
mutex_exit(&spa->spa_async_lock);
}

/*
* Reassess parent vdev's health.
*/
Expand Down
3 changes: 2 additions & 1 deletion tests/runfiles/common.run
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,8 @@ tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
tags = ['functional', 'cli_root', 'zpool_replace']

[tests/functional/cli_root/zpool_resilver]
tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart',
'zpool_resilver_concurrent']
tags = ['functional', 'cli_root', 'zpool_resilver']

[tests/functional/cli_root/zpool_scrub]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -1142,6 +1142,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_resilver/setup.ksh \
functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh \
functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh \
functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh \
functional/cli_root/zpool_scrub/cleanup.ksh \
functional/cli_root/zpool_scrub/setup.ksh \
functional/cli_root/zpool_scrub/zpool_scrub_001_neg.ksh \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib

#
# DESCRIPTION:
# Verify 'zpool clear' doesn't cause concurrent resilvers
#
# STRATEGY:
# 1. Create N(10) virtual disk files.
# 2. Create draid pool based on the virtual disk files.
# 3. Fill the filesystem with directories and files.
# 4. Force-fault 2 vdevs and verify distributed spare is kicked in.
# 5. Free the distributed spare by replacing the faulty drive.
# 6. Run zpool clear and verify that it does not initiate 2 resilvers
# concurrently while distributed spare gets kicked in.
#

verify_runnable "global"

typeset -ir devs=10
typeset -ir nparity=1
typeset -ir ndata=8
typeset -ir dspare=1

function cleanup
{
poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"

for i in {0..$devs}; do
log_must rm -f "$BASEDIR/vdev$i"
done

for dir in $BASEDIR; do
if [[ -d $dir ]]; then
log_must rm -rf $dir
fi
done

zed_stop
zed_cleanup
}

log_assert "Verify zpool clear on draid pool doesn't cause concurrent resilvers"
log_onexit cleanup

setup_test_env $TESTPOOL draid${nparity}:${ndata}d:${dspare}s $devs

# ZED needed for sequential resilver
zed_setup
log_must zed_start

log_must zpool offline -f $TESTPOOL $BASEDIR/vdev5
log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60
log_must zpool wait -t resilver $TESTPOOL
log_must zpool offline -f $TESTPOOL $BASEDIR/vdev6

log_must zpool labelclear -f $BASEDIR/vdev5
log_must zpool labelclear -f $BASEDIR/vdev6

log_must zpool replace -w $TESTPOOL $BASEDIR/vdev5
sync_pool $TESTPOOL

log_must zpool events -c
log_must zpool clear $TESTPOOL
log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60
log_must zpool wait -t resilver $TESTPOOL
log_must zpool wait -t scrub $TESTPOOL

nof_resilver=$(zpool events | grep -c resilver_start)
if [ $nof_resilver = 1 ] ; then
log_must verify_pool $TESTPOOL
log_pass "zpool clear on draid pool doesn't cause concurrent resilvers"
else
log_fail "FAIL: sequential and healing resilver initiated concurrently"
fi
akashb-22 marked this conversation as resolved.
Show resolved Hide resolved