From 3699b88592f2fcbbe7cdd9993eef9d1134938883 Mon Sep 17 00:00:00 2001 From: George Melikov Date: Tue, 10 Sep 2019 23:34:53 +0300 Subject: [PATCH] zio_compress: introduce max size threshold Now default compression is lz4, which can stop compression process by itself on incompressible data. If there are additional size checks - we will only make our compressratio worse. New usable compression thresholds are: - less than BPE_PAYLOAD_SIZE (embedded_data feature); - at least one saved sector. Old 12.5% threshold is left to minimize affect on existing user expectations of CPU utilization. If data wasn't compressed - it will be saved as ZIO_COMPRESS_OFF, so if we really need to recompress data without ashift info and check anything - we can just compress it with zero threshold. So, we don't need a new feature flag here! Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Reviewed-by: Alexander Motin Signed-off-by: George Melikov Closes #9416 --- cmd/zstream/zstream_recompress.c | 3 ++- include/sys/zio.h | 3 +++ include/sys/zio_compress.h | 3 ++- man/man7/zfsprops.7 | 26 ++++++++++++++++++-------- module/zfs/arc.c | 13 ++++++++----- module/zfs/dmu_recv.c | 2 +- module/zfs/zio.c | 21 ++++++++++++++++++++- module/zfs/zio_compress.c | 14 +++----------- 8 files changed, 57 insertions(+), 28 deletions(-) diff --git a/cmd/zstream/zstream_recompress.c b/cmd/zstream/zstream_recompress.c index ae2c56320b2a..586ac5623aa5 100644 --- a/cmd/zstream/zstream_recompress.c +++ b/cmd/zstream/zstream_recompress.c @@ -288,7 +288,8 @@ zstream_do_recompress(int argc, char *argv[]) abd_t *pabd = abd_get_from_buf_struct(&abd, buf, bufsz); size_t csize = zio_compress_data(ctype, &dabd, - &pabd, drrw->drr_logical_size, level); + &pabd, drrw->drr_logical_size, + drrw->drr_logical_size, level); size_t rounded = P2ROUNDUP(csize, SPA_MINBLOCKSIZE); if (rounded >= drrw->drr_logical_size) { diff --git a/include/sys/zio.h b/include/sys/zio.h index 3a756949a422..1bc91908acf1 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -29,6 +29,7 @@ * Copyright (c) 2019, Allan Jude * Copyright (c) 2019, 2023, 2024, Klara Inc. * Copyright (c) 2019-2020, Michael Niewöhner + * Copyright (c) 2024 by George Melikov. All rights reserved. */ #ifndef _ZIO_H @@ -601,6 +602,8 @@ extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, extern void zio_flush(zio_t *zio, vdev_t *vd); extern void zio_shrink(zio_t *zio, uint64_t size); +extern size_t zio_get_compression_max_size(uint64_t gcd_alloc, + uint64_t min_alloc, size_t s_len); extern int zio_wait(zio_t *zio); extern void zio_nowait(zio_t *zio); extern void zio_execute(void *zio); diff --git a/include/sys/zio_compress.h b/include/sys/zio_compress.h index 31602039a150..ceef757abd20 100644 --- a/include/sys/zio_compress.h +++ b/include/sys/zio_compress.h @@ -25,6 +25,7 @@ * Copyright (c) 2019, 2024, Klara, Inc. * Use is subject to license terms. * Copyright (c) 2015, 2016 by Delphix. All rights reserved. + * Copyright (c) 2021, 2024 by George Melikov. All rights reserved. */ #ifndef _SYS_ZIO_COMPRESS_H @@ -174,7 +175,7 @@ extern int zfs_lz4_decompress(abd_t *src, abd_t *dst, size_t s_len, * Compress and decompress data if necessary. */ extern size_t zio_compress_data(enum zio_compress c, abd_t *src, abd_t **dst, - size_t s_len, uint8_t level); + size_t s_len, size_t d_len, uint8_t level); extern int zio_decompress_data(enum zio_compress c, abd_t *src, abd_t *abd, size_t s_len, size_t d_len, uint8_t *level); extern int zio_compress_to_feature(enum zio_compress comp); diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 index 506b7deff5b7..62c4da2d7d19 100644 --- a/man/man7/zfsprops.7 +++ b/man/man7/zfsprops.7 @@ -912,14 +912,24 @@ zeroes (the NUL byte). When a zero-filled block is detected, it is stored as a hole and not compressed using the indicated compression algorithm. .Pp -Any block being compressed must be no larger than 7/8 of its original size -after compression, otherwise the compression will not be considered worthwhile -and the block saved uncompressed. -Note that when the logical block is less than -8 times the disk sector size this effectively reduces the necessary compression -ratio; for example, 8 KiB blocks on disks with 4 KiB disk sectors must compress -to 1/2 -or less of their original size. +All blocks are allocated as a whole number of sectors +.Pq chunks of 2^ Ns Sy ashift No bytes , e.g . Sy 512B No or Sy 4KB . +Compression may result in a non-sector-aligned size, which will be rounded up +to a whole number of sectors. +If compression saves less than one whole sector, +the block will be stored uncompressed. +Therefore, blocks whose logical size is a small number of sectors will +experience less compression +(e.g. for +.Sy recordsize Ns = Ns Sy 16K +with +.Sy 4K +sectors, which have 4 sectors per block, +compression needs to save at least 25% to actually save space on disk). +.Pp +There is +.Sy 12.5% +default compression threshold in addition to sector rounding. .It Xo .Sy context Ns = Ns Sy none Ns | Ns .Ar SELinux-User : Ns Ar SELinux-Role : Ns Ar SELinux-Type : Ns Ar Sensitivity-Level diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 714a30e863a7..42175e9d4cf6 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -29,6 +29,7 @@ * Copyright (c) 2019, 2023, Klara Inc. * Copyright (c) 2019, Allan Jude * Copyright (c) 2020, The FreeBSD Foundation [1] + * Copyright (c) 2021, 2024 by George Melikov. All rights reserved. * * [1] Portions of this software were developed by Allan Jude * under sponsorship from the FreeBSD Foundation. @@ -1786,7 +1787,7 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj) !HDR_COMPRESSION_ENABLED(hdr)) { abd = NULL; csize = zio_compress_data(HDR_GET_COMPRESS(hdr), - hdr->b_l1hdr.b_pabd, &abd, lsize, hdr->b_complevel); + hdr->b_l1hdr.b_pabd, &abd, lsize, lsize, hdr->b_complevel); ASSERT3P(abd, !=, NULL); ASSERT3U(csize, <=, psize); abd_zero_off(abd, csize, psize - csize); @@ -9029,8 +9030,8 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize, if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) { cabd = abd_alloc_for_io(MAX(size, asize), ismd); uint64_t csize = zio_compress_data(compress, to_write, &cabd, - size, hdr->b_complevel); - if (csize > psize) { + size, MIN(size, psize), hdr->b_complevel); + if (csize >= size || csize > psize) { /* * We can't re-compress the block into the original * psize. Even if it fits into asize, it does not @@ -10521,9 +10522,11 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb) */ list_insert_tail(&cb->l2wcb_abd_list, abd_buf); - /* try to compress the buffer */ + /* try to compress the buffer, at least one sector to save */ psize = zio_compress_data(ZIO_COMPRESS_LZ4, - abd_buf->abd, &abd, sizeof (*lb), 0); + abd_buf->abd, &abd, sizeof (*lb), + zio_get_compression_max_size(dev->l2ad_vdev->vdev_ashift, + dev->l2ad_vdev->vdev_ashift, sizeof (*lb)), 0); /* a log block is never entirely zero */ ASSERT(psize != 0); diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index a1752650f3ba..9b3da032f354 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -1408,7 +1408,7 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw, abd_t *cabd = abd_alloc_linear(BP_GET_PSIZE(bp), B_FALSE); uint64_t csize = zio_compress_data(BP_GET_COMPRESS(bp), - abd, &cabd, abd_get_size(abd), + abd, &cabd, abd_get_size(abd), BP_GET_PSIZE(bp), rwa->os->os_complevel); abd_zero_off(cabd, csize, BP_GET_PSIZE(bp) - csize); /* Swap in newly compressed data into the abd */ diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 53992931e049..5e0215b18a79 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -26,6 +26,7 @@ * Copyright (c) 2019, 2023, 2024, Klara Inc. * Copyright (c) 2019, Allan Jude * Copyright (c) 2021, Datto, Inc. + * Copyright (c) 2021, 2024 by George Melikov. All rights reserved. */ #include @@ -1691,6 +1692,21 @@ zio_roundup_alloc_size(spa_t *spa, uint64_t size) return (spa->spa_min_alloc); } +size_t +zio_get_compression_max_size(uint64_t gcd_alloc, uint64_t min_alloc, + size_t s_len) +{ + size_t d_len; + + /* minimum 12.5% must be saved (legacy value, may be changed later) */ + d_len = s_len - (s_len >> 3); + + d_len = d_len - d_len % gcd_alloc; + if (d_len < min_alloc) + return (BPE_PAYLOAD_SIZE); + return (d_len); +} + /* * ========================================================================== * Prepare to read and write logical blocks @@ -1872,7 +1888,10 @@ zio_write_compress(zio_t *zio) psize = lsize; else psize = zio_compress_data(compress, zio->io_abd, &cabd, - lsize, zp->zp_complevel); + lsize, + zio_get_compression_max_size(spa->spa_gcd_alloc, + spa->spa_min_alloc, lsize), + zp->zp_complevel); if (psize == 0) { compress = ZIO_COMPRESS_OFF; } else if (psize >= lsize) { diff --git a/module/zfs/zio_compress.c b/module/zfs/zio_compress.c index 9182917f75eb..10c482573862 100644 --- a/module/zfs/zio_compress.c +++ b/module/zfs/zio_compress.c @@ -22,15 +22,11 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - */ -/* * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. - */ - -/* * Copyright (c) 2013, 2018 by Delphix. All rights reserved. * Copyright (c) 2019, 2024, Klara, Inc. * Copyright (c) 2019, Allan Jude + * Copyright (c) 2021, 2024 by George Melikov. All rights reserved. */ #include @@ -129,9 +125,9 @@ zio_compress_select(spa_t *spa, enum zio_compress child, size_t zio_compress_data(enum zio_compress c, abd_t *src, abd_t **dst, size_t s_len, - uint8_t level) + size_t d_len, uint8_t level) { - size_t c_len, d_len; + size_t c_len; uint8_t complevel; zio_compress_info_t *ci = &zio_compress_table[c]; @@ -156,15 +152,11 @@ zio_compress_data(enum zio_compress c, abd_t *src, abd_t **dst, size_t s_len, if (*dst == NULL) *dst = abd_alloc_sametype(src, s_len); - /* Compress at least 12.5%, but limit to the size of the dest abd. */ - d_len = MIN(s_len - (s_len >> 3), abd_get_size(*dst)); - c_len = ci->ci_compress(src, *dst, s_len, d_len, complevel); if (c_len > d_len) return (s_len); - ASSERT3U(c_len, <=, d_len); return (c_len); }