Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TRIM/UNMAP/DISCARD support for vdevs (2) #1016

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5d4b1da
Make sure free ZIO sizes are correct for gang blocks.
dechamps Sep 12, 2012
56fd167
Export avl_is_empty from zavl.
dechamps Sep 14, 2012
e26b714
Add TRIM support.
Aug 31, 2012
eeaf02a
Fix frame size for trim_map_free_locked().
dechamps Aug 31, 2012
8550163
Add ZIO TRIM statistics.
dechamps Sep 17, 2012
bd7b454
Don't test for gang blocks when populating the trim map.
dechamps Sep 12, 2012
1e8545e
Don't TRIM after the SPA has been frozen.
dechamps Sep 14, 2012
4eb0414
Add TRIM support for disk vdevs using Linux DISCARD interface.
dechamps Aug 31, 2012
1709065
Add TRIM support for file vdevs.
dechamps Sep 3, 2012
71f704f
Add zfs_trim_zero parameter.
dechamps Sep 18, 2012
64016ec
Use zfs_trim_zero=1 in ztest.
dechamps Sep 18, 2012
c34da93
Fix assertion failure due to stale inflight writes.
dechamps Sep 6, 2012
5ee41d3
Fix assertion failure due to inflight frees.
dechamps Sep 7, 2012
7e03b85
Fix TRIM with ashift=12.
dechamps Sep 19, 2012
479b028
Fix an issue with a NULL zio buf free on raidz.
dechamps Sep 20, 2012
6a3ceba
Don't register repair writes in the trim map.
dechamps Oct 2, 2012
dc1d05f
Lock SCL_STATE while waiting for TRIM I/Os to complete.
dechamps Sep 21, 2012
9c15fca
Don't use ZIO_FLAG_CONFIG_WRITER for TRIM ZIOs.
dechamps Sep 21, 2012
082e2b0
TRIM the whole vdev on create/add/attach.
dechamps Sep 20, 2012
31aae37
Add TRIM support for L2ARC.
dechamps Sep 25, 2012
c85f097
TRIM cache devices on export and remove.
dechamps Sep 25, 2012
5b46ad4
Improve TXG handling in the TRIM module.
dechamps Oct 3, 2012
f60b8f5
Silence the "task trim blocked" kernel message.
dechamps Oct 3, 2012
c502f67
Add TRIM TXG batching.
dechamps Oct 3, 2012
17122c3
TRIM cache devices based on time instead of TXGs.
dechamps Oct 3, 2012
1d668b0
Align trim map frees using vdev ashift.
dechamps Oct 5, 2012
2f55683
Fix an integer underflow issue when freezing the SPA early.
dechamps Oct 6, 2012
455d66d
Make sure DISCARD requests have at least one iovec.
dechamps Oct 7, 2012
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -6038,6 +6038,9 @@ main(int argc, char **argv)
/* Override location of zpool.cache */
VERIFY(asprintf((char **)&spa_config_path, "%s/zpool.cache",
ztest_opts.zo_dir) != -1);

/* Make sure TRIM zeroes data so that we can test it */
zfs_trim_zero = 1;

ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t),
UMEM_NOFAIL);
Expand Down
20 changes: 20 additions & 0 deletions config/kernel-max-discard-sectors.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
dnl #
dnl # 2.6.32 API change
dnl # max_discard_sectors is available.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_MAX_DISCARD_SECTORS], [
AC_MSG_CHECKING([whether ql->max_discard_sectors is available])
ZFS_LINUX_TRY_COMPILE([
#include <linux/blkdev.h>
],[
struct queue_limits ql __attribute__ ((unused));

ql.max_discard_sectors = 0;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_MAX_DISCARD_SECTORS, 1,
[ql->max_discard_sectors is available])
],[
AC_MSG_RESULT(no)
])
])
1 change: 1 addition & 0 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_GET_GENDISK
ZFS_AC_KERNEL_RQ_IS_SYNC
ZFS_AC_KERNEL_RQ_FOR_EACH_SEGMENT
ZFS_AC_KERNEL_MAX_DISCARD_SECTORS
ZFS_AC_KERNEL_DISCARD_GRANULARITY
ZFS_AC_KERNEL_CONST_XATTR_HANDLER
ZFS_AC_KERNEL_XATTR_HANDLER_GET
Expand Down
37 changes: 37 additions & 0 deletions include/linux/blkdev_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,37 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags)
# define VDEV_REQ_DISCARD REQ_DISCARD
#endif

/*
* 2.6.32 API change
* blk_queue_discard is now available.
*/
#ifdef REQ_DISCARD
#ifndef HAVE_BLK_QUEUE_DISCARD
static inline unsigned long
blk_queue_discard(struct request_queue *q)
{
return !!q->prepare_discard_fn;
}
#endif
#endif

/*
* 2.6.32 API change
* On 2.6.32, maximum discard request size lies in:
* request_queue.limits.max_discard_sectors
* Before 2.6.32, it lies in:
* request_queue.max_hw_sectors
*/
static inline unsigned int
blk_queue_max_discard_sectors_get(struct request_queue *q)
{
#ifdef HAVE_MAX_DISCARD_SECTORS
return q->limits.max_discard_sectors;
#else
return q->max_hw_sectors;
#endif
}

/*
* 2.6.33 API change
* Discard granularity and alignment restrictions may now be set. For
Expand All @@ -443,8 +474,14 @@ blk_queue_discard_granularity(struct request_queue *q, unsigned int dg)
{
q->limits.discard_granularity = dg;
}
static inline unsigned int
blk_queue_discard_granularity_get(struct request_queue *q)
{
return q->limits.discard_granularity;
}
#else
#define blk_queue_discard_granularity(x, dg) ((void)0)
#define blk_queue_discard_granularity_get(x) (0)
#endif /* HAVE_DISCARD_GRANULARITY */

/*
Expand Down
1 change: 1 addition & 0 deletions include/sys/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/space_map.h \
$(top_srcdir)/include/sys/spa.h \
$(top_srcdir)/include/sys/spa_impl.h \
$(top_srcdir)/include/sys/trim_map.h \
$(top_srcdir)/include/sys/txg.h \
$(top_srcdir)/include/sys/txg_impl.h \
$(top_srcdir)/include/sys/u8_textprep_data.h \
Expand Down
2 changes: 1 addition & 1 deletion include/sys/arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ void arc_fini(void);
*/

void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
void l2arc_remove_vdev(vdev_t *vd);
void l2arc_remove_vdev(vdev_t *vd, int permanent);
boolean_t l2arc_vdev_present(vdev_t *vd);
void l2arc_init(void);
void l2arc_fini(void);
Expand Down
3 changes: 3 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,9 @@ struct spa {
spa_proc_state_t spa_proc_state; /* see definition */
proc_t *spa_proc; /* "zpool-poolname" process */
uint64_t spa_did; /* if procp != p0, did of t1 */
kthread_t *spa_trim_thread; /* thread sending TRIM I/Os */
kmutex_t spa_trim_lock; /* protects spa_trim_cv */
kcondvar_t spa_trim_cv; /* used to notify TRIM thread */
boolean_t spa_autoreplace; /* autoreplace set in open */
int spa_vdev_locks; /* locks grabbed */
uint64_t spa_creation_version; /* version at pool creation */
Expand Down
51 changes: 51 additions & 0 deletions include/sys/trim_map.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 Pawel Jakub Dawidek <[email protected]>.
* All rights reserved.
*/

#ifndef _SYS_TRIM_MAP_H
#define _SYS_TRIM_MAP_H

#include <sys/avl.h>
#include <sys/list.h>
#include <sys/spa.h>

#ifdef __cplusplus
extern "C" {
#endif

extern void trim_map_create(vdev_t *vd);
extern void trim_map_destroy(vdev_t *vd);
extern void trim_map_free(vdev_t *vd, uint64_t offset, uint64_t size, uint64_t txg);
extern boolean_t trim_map_write_start(zio_t *zio);
extern void trim_map_write_done(zio_t *zio);

extern void trim_thread_create(spa_t *spa);
extern void trim_thread_destroy(spa_t *spa);
extern void trim_thread_wakeup(spa_t *spa);

#ifdef __cplusplus
}
#endif

#endif /* _SYS_TRIM_MAP_H */
1 change: 1 addition & 0 deletions include/sys/vdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ typedef enum vdev_dtl_type {
} vdev_dtl_type_t;

extern int zfs_nocacheflush;
extern int zfs_trim_zero;

extern int vdev_open(vdev_t *);
extern void vdev_open_children(vdev_t *);
Expand Down
4 changes: 4 additions & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ struct vdev {
uint64_t vdev_unspare; /* unspare when resilvering done */
hrtime_t vdev_last_try; /* last reopen time */
boolean_t vdev_nowritecache; /* true if flushwritecache failed */
boolean_t vdev_notrim; /* true if trim failed */
boolean_t vdev_checkremove; /* temporary online test */
boolean_t vdev_forcefault; /* force online fault */
boolean_t vdev_splitting; /* split or repair in progress */
Expand All @@ -204,6 +205,7 @@ struct vdev {
spa_aux_vdev_t *vdev_aux; /* for l2cache vdevs */
zio_t *vdev_probe_zio; /* root of current probe */
vdev_aux_t vdev_label_aux; /* on-disk aux state */
struct trim_map *vdev_trimmap;

/*
* For DTrace to work in userland (libzpool) context, these fields must
Expand Down Expand Up @@ -326,6 +328,8 @@ extern void vdev_set_min_asize(vdev_t *vd);
*/
extern int zfs_vdev_cache_size;

extern int zfs_notrim;

#ifdef __cplusplus
}
#endif
Expand Down
6 changes: 6 additions & 0 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -493,13 +493,19 @@ typedef struct vsecattr {

#define CRCREAT 0

#define F_FREESP 11 /* Free file space */

extern int fop_getattr(vnode_t *vp, vattr_t *vap);
extern int fop_space(vnode_t *vp, int cmd, struct flock *bfp,
int flag, off_t offset);

#define VOP_CLOSE(vp, f, c, o, cr, ct) vn_close(vp)
#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0
#define VOP_GETATTR(vp, vap, fl, cr, ct) fop_getattr((vp), (vap));

#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd)
#define VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct) \
fop_space((vp), (cmd), (bfp), (flag), (offset))

#define VN_RELE(vp) vn_close(vp)

Expand Down
46 changes: 43 additions & 3 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ enum zio_compress {
#define ZIO_PRIORITY_RESILVER (zio_priority_table[9])
#define ZIO_PRIORITY_SCRUB (zio_priority_table[10])
#define ZIO_PRIORITY_DDT_PREFETCH (zio_priority_table[11])
#define ZIO_PRIORITY_TABLE_SIZE 12
#define ZIO_PRIORITY_TRIM (zio_priority_table[12])
#define ZIO_PRIORITY_TABLE_SIZE 13

#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
Expand Down Expand Up @@ -357,6 +358,39 @@ typedef struct zio_link {
list_node_t zl_child_node;
} zio_link_t;

/*
* Used for TRIM kstat.
*/
typedef struct zio_trim_stats {
/*
* Number of bytes successfully TRIMmed.
*/
kstat_named_t zio_trim_bytes;

/*
* Number of successful TRIM requests.
*/
kstat_named_t zio_trim_success;

/*
* Number of TRIM requests that failed because TRIM is not
* supported.
*/
kstat_named_t zio_trim_unsupported;

/*
* Number of TRIM requests that failed for other reasons.
*/
kstat_named_t zio_trim_failed;
} zio_trim_stats_t;

extern zio_trim_stats_t zio_trim_stats;

#define ZIO_TRIM_STAT_INCR(stat, val) \
atomic_add_64(&zio_trim_stats.stat.value.ui64, (val));
#define ZIO_TRIM_STAT_BUMP(stat) \
ZIO_TRIM_STAT_INCR(stat, 1);

struct zio {
/* Core information about this I/O */
zbookmark_t io_bookmark;
Expand Down Expand Up @@ -429,6 +463,9 @@ struct zio {

/* Taskq dispatching state */
taskq_ent_t io_tqent;

avl_node_t io_trim_node;
list_node_t io_trim_link;
};

extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
Expand Down Expand Up @@ -459,7 +496,8 @@ extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
zio_done_func_t *done, void *private, enum zio_flag flags);

extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_done_func_t *done, void *private, int priority, enum zio_flag flags);
uint64_t offset, uint64_t size, zio_done_func_t *done,
void *private, int priority, enum zio_flag flags);

extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
Expand All @@ -472,12 +510,14 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
boolean_t labels);

extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);
const blkptr_t *bp, uint64_t size, enum zio_flag flags);

extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
blkptr_t *old_bp, uint64_t size, boolean_t use_slog);
extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern zio_t *zio_trim(zio_t *zio, spa_t *spa, vdev_t *vd,
uint64_t offset, uint64_t size, enum zio_flag flags);
extern void zio_shrink(zio_t *zio, uint64_t size);

extern int zio_wait(zio_t *zio);
Expand Down
10 changes: 6 additions & 4 deletions include/sys/zio_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ enum zio_stage {

ZIO_STAGE_READY = 1 << 15, /* RWFCI */

ZIO_STAGE_VDEV_IO_START = 1 << 16, /* RW--I */
ZIO_STAGE_VDEV_IO_DONE = 1 << 17, /* RW--I */
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 18, /* RW--I */
ZIO_STAGE_VDEV_IO_START = 1 << 16, /* RWF-I */
ZIO_STAGE_VDEV_IO_DONE = 1 << 17, /* RWF-- */
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 18, /* RWF-I */

ZIO_STAGE_CHECKSUM_VERIFY = 1 << 19, /* R---- */

Expand Down Expand Up @@ -143,7 +143,9 @@ enum zio_stage {
#define ZIO_FREE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_STAGE_FREE_BP_INIT | \
ZIO_STAGE_DVA_FREE)
ZIO_STAGE_DVA_FREE | \
ZIO_STAGE_VDEV_IO_START | \
ZIO_STAGE_VDEV_IO_ASSESS)

#define ZIO_DDT_FREE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
Expand Down
2 changes: 2 additions & 0 deletions lib/libspl/include/sys/dkio.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ struct dk_geom {
*/
#define DKIOCFLUSHWRITECACHE (DKIOC|34) /* flush cache to phys medium */

#define DKIOCTRIM (DKIOC|35) /* TRIM a block */

struct dk_callback {
void (*dkc_callback)(void *dkc_cookie, int error);
void *dkc_cookie;
Expand Down
8 changes: 8 additions & 0 deletions lib/libspl/include/sys/time.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
#include_next <sys/time.h>
#include <sys/types.h>

#ifndef TIME_MAX
#define TIME_MAX LLONG_MAX
#endif

#ifndef SEC
#define SEC 1
#endif
Expand All @@ -50,6 +54,10 @@
#define NSEC_PER_USEC 1000L
#endif

#ifndef NSEC_PER_SEC
#define NSEC_PER_SEC 1000000000L
#endif

extern hrtime_t gethrtime(void);
extern void gethrestime(timestruc_t *);

Expand Down
1 change: 1 addition & 0 deletions lib/libzpool/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ libzpool_la_SOURCES = \
$(top_srcdir)/module/zfs/spa_history.c \
$(top_srcdir)/module/zfs/spa_misc.c \
$(top_srcdir)/module/zfs/space_map.c \
$(top_srcdir)/module/zfs/trim_map.c \
$(top_srcdir)/module/zfs/txg.c \
$(top_srcdir)/module/zfs/uberblock.c \
$(top_srcdir)/module/zfs/unique.c \
Expand Down
Loading