From b22bab2547b74b5df5e4698880e7d027b1d44509 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Fri, 28 Jul 2023 16:30:33 -0400 Subject: [PATCH] Remove fastwrite mechanism. Fastwrite was introduced many years ago to improve ZIL writes spread between multiple top-level vdevs by tracking number of allocated but not written blocks and choosing vdev with smaller count. It suposed to reduce ZIL knowledge about allocation, but actually made ZIL to even more actively report allocation code about the allocations, complicating both ZIL and metaslabs code. On top of that, it seems ZIO_FLAG_FASTWRITE setting in dmu_sync() was lost many years ago, that was one of the declared benefits. Plus introduction of embedded log metaslab class solved another problem with allocation rotor accounting both normal and log allocations, since in most cases those are now in different metaslab classes. After all that, I'd prefer to simplify already too complicated ZIL, ZIO and metaslab code if the benefit of complexity is not obvious. Reviewed-by: Brian Behlendorf Reviewed-by: George Wilson Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15107 --- include/sys/metaslab.h | 3 -- include/sys/vdev_impl.h | 1 - include/sys/zil_impl.h | 1 - include/sys/zio.h | 1 - module/zfs/metaslab.c | 67 ++--------------------------------------- module/zfs/vdev.c | 2 -- module/zfs/zil.c | 42 +++----------------------- module/zfs/zio.c | 14 +-------- 8 files changed, 8 insertions(+), 123 deletions(-) diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index fec080139a2b..0df6e5f81fc1 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -80,7 +80,6 @@ uint64_t metaslab_largest_allocatable(metaslab_t *); #define METASLAB_ASYNC_ALLOC 0x8 #define METASLAB_DONT_THROTTLE 0x10 #define METASLAB_MUST_RESERVE 0x20 -#define METASLAB_FASTWRITE 0x40 #define METASLAB_ZIL 0x80 int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t, @@ -96,8 +95,6 @@ void metaslab_unalloc_dva(spa_t *, const dva_t *, uint64_t); int metaslab_claim(spa_t *, const blkptr_t *, uint64_t); int metaslab_claim_impl(vdev_t *, uint64_t, uint64_t, uint64_t); void metaslab_check_free(spa_t *, const blkptr_t *); -void metaslab_fastwrite_mark(spa_t *, const blkptr_t *); -void metaslab_fastwrite_unmark(spa_t *, const blkptr_t *); void metaslab_stat_init(void); void metaslab_stat_fini(void); diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 5f4e82ad8657..ad9dc3aefd8e 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -266,7 +266,6 @@ struct vdev { metaslab_group_t *vdev_mg; /* metaslab group */ metaslab_group_t *vdev_log_mg; /* embedded slog metaslab group */ metaslab_t **vdev_ms; /* metaslab array */ - uint64_t vdev_pending_fastwrite; /* allocated fastwrites */ txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */ txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */ txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */ diff --git a/include/sys/zil_impl.h b/include/sys/zil_impl.h index 03a409c5257c..b58dad9695a6 100644 --- a/include/sys/zil_impl.h +++ b/include/sys/zil_impl.h @@ -91,7 +91,6 @@ typedef enum { typedef struct lwb { zilog_t *lwb_zilog; /* back pointer to log struct */ blkptr_t lwb_blk; /* on disk address of this log blk */ - boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */ boolean_t lwb_slog; /* lwb_blk is on SLOG device */ boolean_t lwb_indirect; /* do not postpone zil_lwb_commit() */ int lwb_nused; /* # used bytes in buffer */ diff --git a/include/sys/zio.h b/include/sys/zio.h index f4da80783e56..e1f4d5c04499 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -222,7 +222,6 @@ typedef uint64_t zio_flag_t; #define ZIO_FLAG_NOPWRITE (1ULL << 28) #define ZIO_FLAG_REEXECUTED (1ULL << 29) #define ZIO_FLAG_DELEGATED (1ULL << 30) -#define ZIO_FLAG_FASTWRITE (1ULL << 31) #define ZIO_FLAG_MUSTSUCCEED 0 #define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT) diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 9991e1a22cdf..8393e8dd91d5 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -5101,7 +5101,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, zio_alloc_list_t *zal, int allocator) { metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator]; - metaslab_group_t *mg, *fast_mg, *rotor; + metaslab_group_t *mg, *rotor; vdev_t *vd; boolean_t try_hard = B_FALSE; @@ -5164,15 +5164,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, } else if (d != 0) { vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1])); mg = vd->vdev_mg->mg_next; - } else if (flags & METASLAB_FASTWRITE) { - mg = fast_mg = mca->mca_rotor; - - do { - if (fast_mg->mg_vd->vdev_pending_fastwrite < - mg->mg_vd->vdev_pending_fastwrite) - mg = fast_mg; - } while ((fast_mg = fast_mg->mg_next) != mca->mca_rotor); - } else { ASSERT(mca->mca_rotor != NULL); mg = mca->mca_rotor; @@ -5297,7 +5288,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, mg->mg_bias = 0; } - if ((flags & METASLAB_FASTWRITE) || + if ((flags & METASLAB_ZIL) || atomic_add_64_nv(&mca->mca_aliquot, asize) >= mg->mg_aliquot + mg->mg_bias) { mca->mca_rotor = mg->mg_next; @@ -5310,11 +5301,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, ((flags & METASLAB_GANG_HEADER) ? 1 : 0)); DVA_SET_ASIZE(&dva[d], asize); - if (flags & METASLAB_FASTWRITE) { - atomic_add_64(&vd->vdev_pending_fastwrite, - psize); - } - return (0); } next: @@ -5950,55 +5936,6 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg) return (error); } -void -metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp) -{ - const dva_t *dva = bp->blk_dva; - int ndvas = BP_GET_NDVAS(bp); - uint64_t psize = BP_GET_PSIZE(bp); - int d; - vdev_t *vd; - - ASSERT(!BP_IS_HOLE(bp)); - ASSERT(!BP_IS_EMBEDDED(bp)); - ASSERT(psize > 0); - - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - - for (d = 0; d < ndvas; d++) { - if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL) - continue; - atomic_add_64(&vd->vdev_pending_fastwrite, psize); - } - - spa_config_exit(spa, SCL_VDEV, FTAG); -} - -void -metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp) -{ - const dva_t *dva = bp->blk_dva; - int ndvas = BP_GET_NDVAS(bp); - uint64_t psize = BP_GET_PSIZE(bp); - int d; - vdev_t *vd; - - ASSERT(!BP_IS_HOLE(bp)); - ASSERT(!BP_IS_EMBEDDED(bp)); - ASSERT(psize > 0); - - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - - for (d = 0; d < ndvas; d++) { - if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL) - continue; - ASSERT3U(vd->vdev_pending_fastwrite, >=, psize); - atomic_sub_64(&vd->vdev_pending_fastwrite, psize); - } - - spa_config_exit(spa, SCL_VDEV, FTAG); -} - static void metaslab_check_free_impl_cb(uint64_t inner, vdev_t *vd, uint64_t offset, uint64_t size, void *arg) diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index f3812b843e95..87c145593237 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1192,7 +1192,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd) ASSERT(tvd == tvd->vdev_top); - tvd->vdev_pending_fastwrite = svd->vdev_pending_fastwrite; tvd->vdev_ms_array = svd->vdev_ms_array; tvd->vdev_ms_shift = svd->vdev_ms_shift; tvd->vdev_ms_count = svd->vdev_ms_count; @@ -1655,7 +1654,6 @@ vdev_metaslab_fini(vdev_t *vd) } } ASSERT0(vd->vdev_ms_count); - ASSERT3U(vd->vdev_pending_fastwrite, ==, 0); } typedef struct vdev_probe_stats { diff --git a/module/zfs/zil.c b/module/zfs/zil.c index be5b9edf6ede..6f04a7d4a7df 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -761,15 +761,13 @@ zil_lwb_vdev_compare(const void *x1, const void *x2) } static lwb_t * -zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg, - boolean_t fastwrite) +zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg) { lwb_t *lwb; lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP); lwb->lwb_zilog = zilog; lwb->lwb_blk = *bp; - lwb->lwb_fastwrite = fastwrite; lwb->lwb_slog = slog; lwb->lwb_indirect = B_FALSE; if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) { @@ -916,7 +914,6 @@ zil_create(zilog_t *zilog) dmu_tx_t *tx = NULL; blkptr_t blk; int error = 0; - boolean_t fastwrite = FALSE; boolean_t slog = FALSE; dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); @@ -949,8 +946,6 @@ zil_create(zilog_t *zilog) error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk, ZIL_MIN_BLKSZ, &slog); - fastwrite = TRUE; - if (error == 0) zil_init_log_chain(zilog, &blk); } @@ -959,7 +954,7 @@ zil_create(zilog_t *zilog) * Allocate a log write block (lwb) for the first log block. */ if (error == 0) - lwb = zil_alloc_lwb(zilog, &blk, slog, txg, fastwrite); + lwb = zil_alloc_lwb(zilog, &blk, slog, txg); /* * If we just allocated the first log block, commit our transaction @@ -1044,9 +1039,6 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) ASSERT(zh->zh_claim_txg == 0); VERIFY(!keep_first); while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) { - if (lwb->lwb_fastwrite) - metaslab_fastwrite_unmark(zilog->zl_spa, - &lwb->lwb_blk); if (lwb->lwb_buf != NULL) zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); zio_free(zilog->zl_spa, txg, &lwb->lwb_blk); @@ -1551,7 +1543,6 @@ zil_lwb_write_done(zio_t *zio) ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED); lwb->lwb_state = LWB_STATE_WRITE_DONE; lwb->lwb_write_zio = NULL; - lwb->lwb_fastwrite = FALSE; nlwb = list_next(&zilog->zl_lwb_list, lwb); mutex_exit(&zilog->zl_lock); @@ -1718,20 +1709,12 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb) ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]); - /* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */ - mutex_enter(&zilog->zl_lock); - if (!lwb->lwb_fastwrite) { - metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk); - lwb->lwb_fastwrite = 1; - } - lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk), - zil_lwb_write_done, lwb, prio, - ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb); + zil_lwb_write_done, lwb, prio, ZIO_FLAG_CANFAIL, &zb); + mutex_enter(&zilog->zl_lock); lwb->lwb_state = LWB_STATE_OPENED; - zil_lwb_set_zio_dependency(zilog, lwb); zilog->zl_last_lwb_opened = lwb; mutex_exit(&zilog->zl_lock); @@ -1864,7 +1847,7 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs) /* * Allocate a new log write block (lwb). */ - nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE); + nlwb = zil_alloc_lwb(zilog, bp, slog, txg); } lwb->lwb_state = LWB_STATE_ISSUED; @@ -3651,18 +3634,6 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) BP_ZERO(&zh->zh_log); } - /* - * Remove fastwrite on any blocks that have been pre-allocated for - * the next commit. This prevents fastwrite counter pollution by - * unused, long-lived LWBs. - */ - for (; lwb != NULL; lwb = list_next(&zilog->zl_lwb_list, lwb)) { - if (lwb->lwb_fastwrite && !lwb->lwb_write_zio) { - metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk); - lwb->lwb_fastwrite = 0; - } - } - mutex_exit(&zilog->zl_lock); } @@ -3895,9 +3866,6 @@ zil_close(zilog_t *zilog) ASSERT(list_is_empty(&zilog->zl_lwb_list)); ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED); - if (lwb->lwb_fastwrite) - metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk); - zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); zil_free_lwb(zilog, lwb); } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 3f5e6a08d89c..b5627109900c 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3024,11 +3024,6 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) */ pio->io_pipeline = ZIO_INTERLOCK_PIPELINE; - /* - * We didn't allocate this bp, so make sure it doesn't get unmarked. - */ - pio->io_flags &= ~ZIO_FLAG_FASTWRITE; - zio_nowait(zio); return (pio); @@ -3616,7 +3611,6 @@ zio_dva_allocate(zio_t *zio) ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa)); ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); - flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0; if (zio->io_flags & ZIO_FLAG_NODATA) flags |= METASLAB_DONT_THROTTLE; if (zio->io_flags & ZIO_FLAG_GANG_CHILD) @@ -3776,7 +3770,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, * of, so we just hash the objset ID to pick the allocator to get * some parallelism. */ - int flags = METASLAB_FASTWRITE | METASLAB_ZIL; + int flags = METASLAB_ZIL; int allocator = (uint_t)cityhash4(0, 0, 0, os->os_dsl_dataset->ds_object) % spa->spa_alloc_count; error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, @@ -4931,12 +4925,6 @@ zio_done(zio_t *zio) zfs_ereport_free_checksum(zcr); } - if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp && - !BP_IS_HOLE(zio->io_bp) && !BP_IS_EMBEDDED(zio->io_bp) && - !(zio->io_flags & ZIO_FLAG_NOPWRITE)) { - metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp); - } - /* * It is the responsibility of the done callback to ensure that this * particular zio is no longer discoverable for adoption, and as