From 77b90e530ebf71441dd94dc1b62a9ccdcab893a8 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Mon, 3 Jul 2023 19:54:40 +1000 Subject: [PATCH] ddt: slim down ddt_entry_t This slims down the in-memory entry to as small as it can be. The IO-related parts are made into a separate entry, since they're relatively rarely needed. The variable allocation for dde_phys is to support the upcoming flat format. Signed-off-by: Rob Norris Sponsored-by: Klara, Inc. Sponsored-by: iXsystems, Inc. --- include/sys/ddt.h | 22 ++++++++++++++++------ module/zfs/ddt.c | 46 +++++++++++++++++++++++++++++++++------------- module/zfs/zio.c | 26 ++++++++++++++------------ 3 files changed, 63 insertions(+), 31 deletions(-) diff --git a/include/sys/ddt.h b/include/sys/ddt.h index b258bb2776eb..af1f8444090e 100644 --- a/include/sys/ddt.h +++ b/include/sys/ddt.h @@ -151,16 +151,22 @@ typedef struct { #define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */ #define DDE_FLAG_OVERQUOTA (1 << 1) /* entry unusable, no space */ +/* + * Additional data to support entry update or repair. This is fixed size + * because its relatively rarely used. + */ typedef struct { - /* key must be first for ddt_key_compare */ - ddt_key_t dde_key; /* ddt_tree key */ - ddt_phys_t dde_phys[DDT_PHYS_MAX]; /* on-disk data */ + /* copy of data after a repair read, to be rewritten */ + abd_t *dde_repair_abd; /* in-flight update IOs */ zio_t *dde_lead_zio[DDT_PHYS_MAX]; +} ddt_entry_io_t; - /* copy of data after a repair read, to be rewritten */ - struct abd *dde_repair_abd; +typedef struct { + /* key must be first for ddt_key_compare */ + ddt_key_t dde_key; /* ddt_tree key */ + avl_node_t dde_node; /* ddt_tree_node */ /* storage type and class the entry was loaded from */ ddt_type_t dde_type; @@ -170,7 +176,9 @@ typedef struct { kcondvar_t dde_cv; /* signaled when load completes */ uint64_t dde_waiters; /* count of waiters on dde_cv */ - avl_node_t dde_node; /* ddt_tree node */ + ddt_entry_io_t *dde_io; /* IO support, when required */ + + ddt_phys_t dde_phys[]; /* physical data */ } ddt_entry_t; /* @@ -265,6 +273,8 @@ extern void ddt_prefetch_all(spa_t *spa); extern boolean_t ddt_class_contains(spa_t *spa, ddt_class_t max_class, const blkptr_t *bp); +extern void ddt_alloc_entry_io(ddt_entry_t *dde); + extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp); extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde); diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index 8a01d2745d51..4699ad0fb3e1 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -164,6 +164,9 @@ static kmem_cache_t *ddt_cache; static kmem_cache_t *ddt_entry_cache; +#define DDT_ENTRY_SIZE \ + (sizeof (ddt_entry_t) + sizeof (ddt_phys_t) * DDT_PHYS_MAX) + /* * Enable/disable prefetching of dedup-ed blocks which are going to be freed. */ @@ -343,7 +346,7 @@ ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class, return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os, ddt->ddt_object[type][class], &dde->dde_key, - dde->dde_phys, sizeof (dde->dde_phys))); + dde->dde_phys, sizeof (ddt_phys_t) * DDT_NPHYS(ddt))); } static int @@ -386,7 +389,7 @@ ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class, return (ddt_ops[type]->ddt_op_update(ddt->ddt_os, ddt->ddt_object[type][class], &dde->dde_key, dde->dde_phys, - sizeof (dde->dde_phys), tx)); + sizeof (ddt_phys_t) * DDT_NPHYS(ddt), tx)); } static int @@ -597,7 +600,7 @@ ddt_init(void) ddt_cache = kmem_cache_create("ddt_cache", sizeof (ddt_t), 0, NULL, NULL, NULL, NULL, NULL, 0); ddt_entry_cache = kmem_cache_create("ddt_entry_cache", - sizeof (ddt_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + DDT_ENTRY_SIZE, 0, NULL, NULL, NULL, NULL, NULL, 0); } void @@ -613,7 +616,7 @@ ddt_alloc(const ddt_key_t *ddk) ddt_entry_t *dde; dde = kmem_cache_alloc(ddt_entry_cache, KM_SLEEP); - memset(dde, 0, sizeof (ddt_entry_t)); + memset(dde, 0, DDT_ENTRY_SIZE); cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL); dde->dde_key = *ddk; @@ -621,14 +624,27 @@ ddt_alloc(const ddt_key_t *ddk) return (dde); } +void +ddt_alloc_entry_io(ddt_entry_t *dde) +{ + if (dde->dde_io != NULL) + return; + + dde->dde_io = kmem_zalloc(sizeof (ddt_entry_io_t), KM_SLEEP); +} + static void ddt_free(const ddt_t *ddt, ddt_entry_t *dde) { - for (int p = 0; p < DDT_NPHYS(ddt); p++) - ASSERT3P(dde->dde_lead_zio[p], ==, NULL); + if (dde->dde_io != NULL) { + for (int p = 0; p < DDT_NPHYS(ddt); p++) + ASSERT3P(dde->dde_io->dde_lead_zio[p], ==, NULL); - if (dde->dde_repair_abd != NULL) - abd_free(dde->dde_repair_abd); + if (dde->dde_io->dde_repair_abd != NULL) + abd_free(dde->dde_io->dde_repair_abd); + + kmem_free(dde->dde_io, sizeof (ddt_entry_io_t)); + } cv_destroy(&dde->dde_cv); kmem_cache_free(ddt_entry_cache, dde); @@ -1195,6 +1211,7 @@ ddt_repair_start(ddt_t *ddt, const blkptr_t *bp) ddt_key_fill(&ddk, bp); dde = ddt_alloc(&ddk); + ddt_alloc_entry_io(dde); for (ddt_type_t type = 0; type < DDT_TYPES; type++) { for (ddt_class_t class = 0; class < DDT_CLASSES; class++) { @@ -1209,7 +1226,7 @@ ddt_repair_start(ddt_t *ddt, const blkptr_t *bp) } } - memset(dde->dde_phys, 0, sizeof (dde->dde_phys)); + memset(dde->dde_phys, 0, sizeof (ddt_phys_t) * DDT_NPHYS(ddt)); return (dde); } @@ -1221,7 +1238,8 @@ ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde) ddt_enter(ddt); - if (dde->dde_repair_abd != NULL && spa_writeable(ddt->ddt_spa) && + if (dde->dde_io->dde_repair_abd != NULL && + spa_writeable(ddt->ddt_spa) && avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL) avl_insert(&ddt->ddt_repair_tree, dde, where); else @@ -1259,8 +1277,9 @@ ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio) continue; ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk, - rdde->dde_repair_abd, DDK_GET_PSIZE(rddk), NULL, NULL, - ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL)); + rdde->dde_io->dde_repair_abd, DDK_GET_PSIZE(rddk), + NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, + ZIO_DDT_CHILD_FLAGS(zio), NULL)); } zio_nowait(zio); @@ -1305,7 +1324,8 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) ASSERT(dde->dde_flags & DDE_FLAG_LOADED); for (int p = 0; p < DDT_NPHYS(ddt); p++) { - ASSERT3P(dde->dde_lead_zio[p], ==, NULL); + ASSERT(dde->dde_io == NULL || + dde->dde_io->dde_lead_zio[p] == NULL); ddt_phys_t *ddp = &dde->dde_phys[p]; if (ddp->ddp_phys_birth == 0) { ASSERT0(ddp->ddp_refcnt); diff --git a/module/zfs/zio.c b/module/zfs/zio.c index be6afb32e6e8..34ab27ca1cb9 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3249,8 +3249,8 @@ zio_ddt_child_read_done(zio_t *zio) if (zio->io_error == 0) ddt_phys_clear(ddp); /* this ddp doesn't need repair */ - if (zio->io_error == 0 && dde->dde_repair_abd == NULL) - dde->dde_repair_abd = zio->io_abd; + if (zio->io_error == 0 && dde->dde_io->dde_repair_abd == NULL) + dde->dde_io->dde_repair_abd = zio->io_abd; else abd_free(zio->io_abd); mutex_exit(&pio->io_lock); @@ -3324,8 +3324,8 @@ zio_ddt_read_done(zio_t *zio) zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE); return (NULL); } - if (dde->dde_repair_abd != NULL) { - abd_copy(zio->io_abd, dde->dde_repair_abd, + if (dde->dde_io->dde_repair_abd != NULL) { + abd_copy(zio->io_abd, dde->dde_io->dde_repair_abd, zio->io_size); zio->io_child_error[ZIO_CHILD_DDT] = 0; } @@ -3362,7 +3362,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) if (DDT_PHYS_IS_DITTO(ddt, p)) continue; - zio_t *lio = dde->dde_lead_zio[p]; + zio_t *lio = dde->dde_io->dde_lead_zio[p]; if (lio != NULL && do_raw) { return (lio->io_size != zio->io_size || @@ -3456,7 +3456,7 @@ zio_ddt_child_write_ready(zio_t *zio) ddt_enter(ddt); - ASSERT(dde->dde_lead_zio[p] == zio); + ASSERT(dde->dde_io->dde_lead_zio[p] == zio); ddt_phys_fill(ddp, zio->io_bp); @@ -3479,8 +3479,8 @@ zio_ddt_child_write_done(zio_t *zio) ddt_enter(ddt); ASSERT(ddp->ddp_refcnt == 0); - ASSERT(dde->dde_lead_zio[p] == zio); - dde->dde_lead_zio[p] = NULL; + ASSERT(dde->dde_io->dde_lead_zio[p] == zio); + dde->dde_io->dde_lead_zio[p] = NULL; if (zio->io_error == 0) { zio_link_t *zl = NULL; @@ -3547,11 +3547,13 @@ zio_ddt_write(zio_t *zio) return (zio); } - if (ddp->ddp_phys_birth != 0 || dde->dde_lead_zio[p] != NULL) { + ddt_alloc_entry_io(dde); + + if (ddp->ddp_phys_birth != 0 || dde->dde_io->dde_lead_zio[p] != NULL) { if (ddp->ddp_phys_birth != 0) ddt_bp_fill(ddp, bp, txg); - if (dde->dde_lead_zio[p] != NULL) - zio_add_child(zio, dde->dde_lead_zio[p]); + if (dde->dde_io->dde_lead_zio[p] != NULL) + zio_add_child(zio, dde->dde_io->dde_lead_zio[p]); else ddt_phys_addref(ddp); } else if (zio->io_bp_override) { @@ -3567,7 +3569,7 @@ zio_ddt_write(zio_t *zio) ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark); zio_push_transform(cio, zio->io_abd, zio->io_size, 0, NULL); - dde->dde_lead_zio[p] = cio; + dde->dde_io->dde_lead_zio[p] = cio; } ddt_exit(ddt);