-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ddt: add support for prefetching tables into the ARC
This change adds a new `zpool prefetch -t ddt $pool` command which causes a pool's DDT to be loaded into the ARC. The primary goal is to remove the need to "warm" a pool's cache before deduplication stops slowing write performance. It may also provide a way to reload portions of a DDT if they have been flushed due to inactivity. Sponsored-by: iXsystems, Inc. Sponsored-by: Catalogics, Inc. Sponsored-by: Klara, Inc. Reviewed-by: Alexander Motin <[email protected]> Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Allan Jude <[email protected]> Signed-off-by: Will Andrews <[email protected]> Signed-off-by: Fred Weigel <[email protected]> Signed-off-by: Rob Norris <[email protected]> Signed-off-by: Don Brady <[email protected]> Co-authored-by: Will Andrews <[email protected]> Co-authored-by: Don Brady <[email protected]> Closes #15890
- Loading branch information
Showing
37 changed files
with
1,057 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,7 @@ | |
* Copyright (c) 2017, Intel Corporation. | ||
* Copyright (c) 2019, loli10K <[email protected]> | ||
* Copyright (c) 2021, Colm Buckley <[email protected]> | ||
* Copyright (c) 2021, Klara Inc. | ||
* Copyright (c) 2021, 2023, Klara Inc. | ||
* Copyright [2021] Hewlett Packard Enterprise Development LP | ||
*/ | ||
|
||
|
@@ -90,6 +90,7 @@ static int zpool_do_remove(int, char **); | |
static int zpool_do_labelclear(int, char **); | ||
|
||
static int zpool_do_checkpoint(int, char **); | ||
static int zpool_do_prefetch(int, char **); | ||
|
||
static int zpool_do_list(int, char **); | ||
static int zpool_do_iostat(int, char **); | ||
|
@@ -176,6 +177,7 @@ typedef enum { | |
HELP_LIST, | ||
HELP_OFFLINE, | ||
HELP_ONLINE, | ||
HELP_PREFETCH, | ||
HELP_REPLACE, | ||
HELP_REMOVE, | ||
HELP_INITIALIZE, | ||
|
@@ -307,6 +309,7 @@ static zpool_command_t command_table[] = { | |
{ "labelclear", zpool_do_labelclear, HELP_LABELCLEAR }, | ||
{ NULL }, | ||
{ "checkpoint", zpool_do_checkpoint, HELP_CHECKPOINT }, | ||
{ "prefetch", zpool_do_prefetch, HELP_PREFETCH }, | ||
{ NULL }, | ||
{ "list", zpool_do_list, HELP_LIST }, | ||
{ "iostat", zpool_do_iostat, HELP_IOSTAT }, | ||
|
@@ -398,6 +401,9 @@ get_usage(zpool_help_t idx) | |
return (gettext("\tlist [-gHLpPv] [-o property[,...]] " | ||
"[-T d|u] [pool] ... \n" | ||
"\t [interval [count]]\n")); | ||
case HELP_PREFETCH: | ||
return (gettext("\tprefetch -t <type> [<type opts>] <pool>\n" | ||
"\t -t ddt <pool>\n")); | ||
case HELP_OFFLINE: | ||
return (gettext("\toffline [--power]|[[-f][-t]] <pool> " | ||
"<device> ...\n")); | ||
|
@@ -3827,6 +3833,72 @@ zpool_do_checkpoint(int argc, char **argv) | |
|
||
#define CHECKPOINT_OPT 1024 | ||
|
||
/* | ||
* zpool prefetch <type> [<type opts>] <pool> | ||
* | ||
* Prefetchs a particular type of data in the specified pool. | ||
*/ | ||
int | ||
zpool_do_prefetch(int argc, char **argv) | ||
{ | ||
int c; | ||
char *poolname; | ||
char *typestr = NULL; | ||
zpool_prefetch_type_t type; | ||
zpool_handle_t *zhp; | ||
int err = 0; | ||
|
||
while ((c = getopt(argc, argv, "t:")) != -1) { | ||
switch (c) { | ||
case 't': | ||
typestr = optarg; | ||
break; | ||
case ':': | ||
(void) fprintf(stderr, gettext("missing argument for " | ||
"'%c' option\n"), optopt); | ||
usage(B_FALSE); | ||
break; | ||
case '?': | ||
(void) fprintf(stderr, gettext("invalid option '%c'\n"), | ||
optopt); | ||
usage(B_FALSE); | ||
} | ||
} | ||
argc -= optind; | ||
argv += optind; | ||
|
||
if (argc < 1) { | ||
(void) fprintf(stderr, gettext("missing pool name argument\n")); | ||
usage(B_FALSE); | ||
} | ||
|
||
if (argc > 1) { | ||
(void) fprintf(stderr, gettext("too many arguments\n")); | ||
usage(B_FALSE); | ||
} | ||
|
||
poolname = argv[0]; | ||
|
||
argc--; | ||
argv++; | ||
|
||
if (strcmp(typestr, "ddt") == 0) { | ||
type = ZPOOL_PREFETCH_DDT; | ||
} else { | ||
(void) fprintf(stderr, gettext("unsupported prefetch type\n")); | ||
usage(B_FALSE); | ||
} | ||
|
||
if ((zhp = zpool_open(g_zfs, poolname)) == NULL) | ||
return (1); | ||
|
||
err = zpool_prefetch(zhp, type); | ||
|
||
zpool_close(zhp); | ||
|
||
return (err); | ||
} | ||
|
||
/* | ||
* zpool import [-d dir] [-D] | ||
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l] | ||
|
@@ -6446,6 +6518,7 @@ print_one_column(zpool_prop_t prop, uint64_t value, const char *str, | |
case ZPOOL_PROP_EXPANDSZ: | ||
case ZPOOL_PROP_CHECKPOINT: | ||
case ZPOOL_PROP_DEDUPRATIO: | ||
case ZPOOL_PROP_DEDUPCACHED: | ||
if (value == 0) | ||
(void) strlcpy(propval, "-", sizeof (propval)); | ||
else | ||
|
@@ -8792,13 +8865,17 @@ print_l2cache(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **l2cache, | |
} | ||
|
||
static void | ||
print_dedup_stats(nvlist_t *config) | ||
print_dedup_stats(zpool_handle_t *zhp, nvlist_t *config, boolean_t literal) | ||
{ | ||
ddt_histogram_t *ddh; | ||
ddt_stat_t *dds; | ||
ddt_object_t *ddo; | ||
uint_t c; | ||
char dspace[6], mspace[6]; | ||
/* Extra space provided for literal display */ | ||
char dspace[32], mspace[32], cspace[32]; | ||
uint64_t cspace_prop; | ||
enum zfs_nicenum_format format; | ||
zprop_source_t src; | ||
|
||
/* | ||
* If the pool was faulted then we may not have been able to | ||
|
@@ -8816,12 +8893,26 @@ print_dedup_stats(nvlist_t *config) | |
return; | ||
} | ||
|
||
zfs_nicebytes(ddo->ddo_dspace, dspace, sizeof (dspace)); | ||
zfs_nicebytes(ddo->ddo_mspace, mspace, sizeof (mspace)); | ||
(void) printf("DDT entries %llu, size %s on disk, %s in core\n", | ||
/* | ||
* Squash cached size into in-core size to handle race. | ||
* Only include cached size if it is available. | ||
*/ | ||
cspace_prop = zpool_get_prop_int(zhp, ZPOOL_PROP_DEDUPCACHED, &src); | ||
cspace_prop = MIN(cspace_prop, ddo->ddo_mspace); | ||
format = literal ? ZFS_NICENUM_RAW : ZFS_NICENUM_1024; | ||
zfs_nicenum_format(cspace_prop, cspace, sizeof (cspace), format); | ||
zfs_nicenum_format(ddo->ddo_dspace, dspace, sizeof (dspace), format); | ||
zfs_nicenum_format(ddo->ddo_mspace, mspace, sizeof (mspace), format); | ||
(void) printf("DDT entries %llu, size %s on disk, %s in core", | ||
(u_longlong_t)ddo->ddo_count, | ||
dspace, | ||
mspace); | ||
if (src != ZPROP_SRC_DEFAULT) { | ||
(void) printf(", %s cached (%.02f%%)", | ||
cspace, | ||
(double)cspace_prop / (double)ddo->ddo_mspace * 100.0); | ||
} | ||
(void) printf("\n"); | ||
|
||
verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, | ||
(uint64_t **)&dds, &c) == 0); | ||
|
@@ -8857,6 +8948,10 @@ status_callback(zpool_handle_t *zhp, void *data) | |
uint_t c; | ||
vdev_stat_t *vs; | ||
|
||
/* If dedup stats were requested, also fetch dedupcached. */ | ||
if (cbp->cb_dedup_stats > 1) | ||
zpool_add_propname(zhp, ZPOOL_DEDUPCACHED_PROP_NAME); | ||
|
||
config = zpool_get_config(zhp, NULL); | ||
reason = zpool_get_status(zhp, &msgid, &errata); | ||
|
||
|
@@ -9338,7 +9433,7 @@ status_callback(zpool_handle_t *zhp, void *data) | |
} | ||
|
||
if (cbp->cb_dedup_stats) | ||
print_dedup_stats(config); | ||
print_dedup_stats(zhp, config, cbp->cb_literal); | ||
} else { | ||
(void) printf(gettext("config: The configuration cannot be " | ||
"determined.\n")); | ||
|
@@ -9412,7 +9507,8 @@ zpool_do_status(int argc, char **argv) | |
cmd = optarg; | ||
break; | ||
case 'D': | ||
cb.cb_dedup_stats = B_TRUE; | ||
if (++cb.cb_dedup_stats > 2) | ||
cb.cb_dedup_stats = 2; | ||
break; | ||
case 'e': | ||
cb.cb_print_unhealthy = B_TRUE; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.