From c51810f037af9e05b927fe1726e817d3caafb7ee Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Mon, 3 Jul 2023 22:16:04 +1000 Subject: [PATCH] ddt: introduce lightweight entry The idea here is that sometimes you need the contents of an entry with no intent to modify it, and/or from a place where its difficult to get hold of its originating ddt_t to know how to interpret it. A lightweight entry contains everything you might need to "read" an entry - its key, type and phys contents - but none of the extras for modifying it or using it in a larger context. It also has the full complement of phys slots, so it can represent any kind of dedup entry without having to know the specific configuration of the table it came from. Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Sponsored-by: Klara, Inc. Sponsored-by: iXsystems, Inc. Closes #15893 --- cmd/zdb/zdb.c | 15 ++++++++------- include/sys/ddt.h | 16 ++++++++++++++-- include/sys/ddt_impl.h | 13 ++++++++++++- include/sys/dsl_scan.h | 2 +- module/zfs/ddt.c | 31 ++++++++++++++++--------------- module/zfs/dsl_scan.c | 15 +++++++-------- 6 files changed, 58 insertions(+), 34 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 7a6459b756b2..3bde5736c0fa 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -1914,15 +1914,16 @@ dump_log_spacemaps(spa_t *spa) } static void -dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index) +dump_ddt_entry(const ddt_t *ddt, const ddt_lightweight_entry_t *ddlwe, + uint64_t index) { - const ddt_key_t *ddk = &dde->dde_key; + const ddt_key_t *ddk = &ddlwe->ddlwe_key; char blkbuf[BP_SPRINTF_LEN]; blkptr_t blk; int p; - for (p = 0; p < DDT_NPHYS(ddt); p++) { - const ddt_phys_t *ddp = &dde->dde_phys[p]; + for (p = 0; p < ddlwe->ddlwe_nphys; p++) { + const ddt_phys_t *ddp = &ddlwe->ddlwe_phys[p]; if (ddp->ddp_phys_birth == 0) continue; ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); @@ -1959,7 +1960,7 @@ static void dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class) { char name[DDT_NAMELEN]; - ddt_entry_t dde; + ddt_lightweight_entry_t ddlwe; uint64_t walk = 0; dmu_object_info_t doi; uint64_t count, dspace, mspace; @@ -2000,8 +2001,8 @@ dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class) (void) printf("%s contents:\n\n", name); - while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0) - dump_dde(ddt, &dde, walk); + while ((error = ddt_object_walk(ddt, type, class, &walk, &ddlwe)) == 0) + dump_ddt_entry(ddt, &ddlwe, walk); ASSERT3U(error, ==, ENOENT); diff --git a/include/sys/ddt.h b/include/sys/ddt.h index a2e069f13922..7a0916690909 100644 --- a/include/sys/ddt.h +++ b/include/sys/ddt.h @@ -173,6 +173,18 @@ typedef struct { avl_node_t dde_node; /* ddt_tree node */ } ddt_entry_t; +/* + * A lightweight entry is for short-lived or transient uses, like iterating or + * inspecting, when you don't care where it came from. + */ +typedef struct { + ddt_key_t ddlwe_key; + ddt_type_t ddlwe_type; + ddt_class_t ddlwe_class; + uint8_t ddlwe_nphys; + ddt_phys_t ddlwe_phys[DDT_PHYS_MAX]; +} ddt_lightweight_entry_t; + /* * In-core DDT object. This covers all entries and stats for a the whole pool * for a given checksum type. @@ -241,7 +253,6 @@ extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa); extern int ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize); extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp); -extern ddt_t *ddt_select_checksum(spa_t *spa, enum zio_checksum checksum); extern void ddt_enter(ddt_t *ddt); extern void ddt_exit(ddt_t *ddt); extern void ddt_init(void); @@ -263,7 +274,8 @@ extern void ddt_create(spa_t *spa); extern int ddt_load(spa_t *spa); extern void ddt_unload(spa_t *spa); extern void ddt_sync(spa_t *spa, uint64_t txg); -extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde); +extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, + ddt_lightweight_entry_t *ddlwe); extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp); diff --git a/include/sys/ddt_impl.h b/include/sys/ddt_impl.h index e97b71621c37..e88a046ab8ae 100644 --- a/include/sys/ddt_impl.h +++ b/include/sys/ddt_impl.h @@ -41,6 +41,17 @@ extern "C" { #define DDT_DIR_VERSION "version" #define DDT_DIR_FLAGS "flags" +/* Fill a lightweight entry from a live entry. */ +#define DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, ddlwe) do { \ + memset((ddlwe), 0, sizeof (*ddlwe)); \ + (ddlwe)->ddlwe_key = (dde)->dde_key; \ + (ddlwe)->ddlwe_type = (dde)->dde_type; \ + (ddlwe)->ddlwe_class = (dde)->dde_class; \ + (ddlwe)->ddlwe_nphys = DDT_NPHYS(ddt); \ + for (int p = 0; p < (ddlwe)->ddlwe_nphys; p++) \ + (ddlwe)->ddlwe_phys[p] = (dde)->dde_phys[p]; \ +} while (0) + /* * Ops vector to access a specific DDT object type. */ @@ -91,7 +102,7 @@ extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg); extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz, char *name); extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz, - uint64_t *walk, ddt_entry_t *dde); + uint64_t *walk, ddt_lightweight_entry_t *ddlwe); extern int ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz, uint64_t *count); extern int ddt_object_info(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz, diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h index f32f59a2bedf..b91d7f4be88f 100644 --- a/include/sys/dsl_scan.h +++ b/include/sys/dsl_scan.h @@ -202,7 +202,7 @@ boolean_t dsl_scan_resilvering(struct dsl_pool *dp); boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp); boolean_t dsl_dataset_unstable(struct dsl_dataset *ds); void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, - ddt_entry_t *dde, dmu_tx_t *tx); + ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx); void dsl_scan_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx); void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx); void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2, diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index 9bb0b8f15fca..aac2250bf30c 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -401,13 +401,20 @@ ddt_object_remove(ddt_t *ddt, ddt_type_t type, ddt_class_t class, int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class, - uint64_t *walk, ddt_entry_t *dde) + uint64_t *walk, ddt_lightweight_entry_t *ddlwe) { ASSERT(ddt_object_exists(ddt, type, class)); - return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os, - ddt->ddt_object[type][class], walk, &dde->dde_key, - dde->dde_phys, sizeof (dde->dde_phys))); + int error = ddt_ops[type]->ddt_op_walk(ddt->ddt_os, + ddt->ddt_object[type][class], walk, &ddlwe->ddlwe_key, + ddlwe->ddlwe_phys, sizeof (ddlwe->ddlwe_phys)); + if (error == 0) { + ddlwe->ddlwe_type = type; + ddlwe->ddlwe_class = class; + ddlwe->ddlwe_nphys = DDT_NPHYS(ddt); + return (0); + } + return (error); } int @@ -572,12 +579,6 @@ ddt_select(spa_t *spa, const blkptr_t *bp) return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]); } -ddt_t * -ddt_select_checksum(spa_t *spa, enum zio_checksum checksum) -{ - return (spa->spa_ddt[checksum]); -} - void ddt_enter(ddt_t *ddt) { @@ -1347,8 +1348,10 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) * traversing.) */ if (nclass < oclass) { + ddt_lightweight_entry_t ddlwe; + DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe); dsl_scan_ddt_entry(dp->dp_scan, - ddt->ddt_checksum, dde, tx); + ddt->ddt_checksum, &ddlwe, tx); } } } @@ -1455,7 +1458,7 @@ ddt_sync(spa_t *spa, uint64_t txg) } int -ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde) +ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_lightweight_entry_t *ddlwe) { do { do { @@ -1468,10 +1471,8 @@ ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde) ddb->ddb_class)) { error = ddt_object_walk(ddt, ddb->ddb_type, ddb->ddb_class, - &ddb->ddb_cursor, dde); + &ddb->ddb_cursor, ddlwe); } - dde->dde_type = ddb->ddb_type; - dde->dde_class = ddb->ddb_class; if (error == 0) return (0); if (error != ENOENT) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 737ee4f6600c..dec0eb28dc5f 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -2929,10 +2929,10 @@ enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, - ddt_entry_t *dde, dmu_tx_t *tx) + ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx) { (void) tx; - const ddt_key_t *ddk = &dde->dde_key; + const ddt_key_t *ddk = &ddlwe->ddlwe_key; blkptr_t bp; zbookmark_phys_t zb = { 0 }; @@ -2953,9 +2953,8 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, if (scn->scn_done_txg != 0) return; - ddt_t *ddt = ddt_select_checksum(tx->tx_pool->dp_spa, checksum); - for (int p = 0; p < DDT_NPHYS(ddt); p++) { - ddt_phys_t *ddp = &dde->dde_phys[p]; + for (int p = 0; p < ddlwe->ddlwe_nphys; p++) { + ddt_phys_t *ddp = &ddlwe->ddlwe_phys[p]; if (ddp->ddp_phys_birth == 0 || ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg) @@ -3004,11 +3003,11 @@ static void dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx) { ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark; - ddt_entry_t dde = {{{{0}}}}; + ddt_lightweight_entry_t ddlwe = {0}; int error; uint64_t n = 0; - while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) { + while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &ddlwe)) == 0) { ddt_t *ddt; if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max) @@ -3023,7 +3022,7 @@ dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx) ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum]; ASSERT(avl_first(&ddt->ddt_tree) == NULL); - dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx); + dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &ddlwe, tx); n++; if (dsl_scan_check_suspend(scn, NULL))