Skip to content

Commit

Permalink
btrfs: remove reada infrastructure
Browse files Browse the repository at this point in the history
Currently there is only one user for btrfs metadata readahead, and
that's scrub.

But even for the single user, it's not providing the correct
functionality it needs, as scrub needs reada for commit root, which
current readahead can't provide. (Although it's pretty easy to add such
feature).

Despite this, there are some extra problems related to metadata
readahead:

- Duplicated feature with btrfs_path::reada

- Partly duplicated feature of btrfs_fs_info::buffer_radix
  Btrfs already caches its metadata in buffer_radix, while readahead
  tries to read the tree block no matter if it's already cached.

- Poor layer separation
  Metadata readahead works kinda at device level.
  This is definitely not the correct layer it should be, since metadata
  is at btrfs logical address space, it should not bother device at all.

  This brings extra chance for bugs to sneak in, while brings
  unnecessary complexity.

- Dead code
  In the very beginning of scrub.c we have #undef DEBUG, rendering all
  the debug related code useless and unable to test.

Thus here I purpose to remove the metadata readahead mechanism
completely.

[BENCHMARK]
There is a full benchmark for the scrub performance difference using the
old btrfs_reada_add() and btrfs_path::reada.

For the worst case (no dirty metadata, slow HDD), there could be a 5%
performance drop for scrub.
For other cases (even SATA SSD), there is no distinguishable performance
difference.

The number is reported scrub speed, in MiB/s.
The resolution is limited by the reported duration, which only has a
resolution of 1 second.

	Old		New		Diff
SSD	455.3		466.332		+2.42%
HDD	103.927 	98.012		-5.69%

Comprehensive test methodology is in the cover letter of the patch.

Signed-off-by: Qu Wenruo <[email protected]>
Signed-off-by: David Sterba <[email protected]>
  • Loading branch information
adam900710 authored and kdave committed Jan 7, 2022
1 parent dcf62b2 commit f26c923
Show file tree
Hide file tree
Showing 10 changed files with 3 additions and 1,189 deletions.
2 changes: 1 addition & 1 deletion fs/btrfs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
subpage.o tree-mod-log.o
Expand Down
25 changes: 0 additions & 25 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,6 @@ struct btrfs_fs_info {
struct btrfs_workqueue *endio_write_workers;
struct btrfs_workqueue *endio_freespace_worker;
struct btrfs_workqueue *caching_workers;
struct btrfs_workqueue *readahead_workers;

/*
* fixup workers take dirty pages that didn't properly go through
Expand Down Expand Up @@ -958,13 +957,6 @@ struct btrfs_fs_info {

struct btrfs_delayed_root *delayed_root;

/* readahead tree */
spinlock_t reada_lock;
struct radix_tree_root reada_tree;

/* readahead works cnt */
atomic_t reada_works_cnt;

/* Extent buffer radix tree */
spinlock_t buffer_lock;
/* Entries are eb->start / sectorsize */
Expand Down Expand Up @@ -3807,23 +3799,6 @@ static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
btrfs_bio_counter_sub(fs_info, 1);
}

/* reada.c */
struct reada_control {
struct btrfs_fs_info *fs_info; /* tree to prefetch */
struct btrfs_key key_start;
struct btrfs_key key_end; /* exclusive */
atomic_t elems;
struct kref refcnt;
wait_queue_head_t wait;
};
struct reada_control *btrfs_reada_add(struct btrfs_root *root,
struct btrfs_key *start, struct btrfs_key *end);
int btrfs_reada_wait(void *handle);
void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct extent_buffer *eb, int err);
void btrfs_reada_remove_dev(struct btrfs_device *dev);
void btrfs_reada_undo_remove_dev(struct btrfs_device *dev);

static inline int is_fstree(u64 rootid)
{
if (rootid == BTRFS_FS_TREE_OBJECTID ||
Expand Down
5 changes: 0 additions & 5 deletions fs/btrfs/dev-replace.c
Original file line number Diff line number Diff line change
Expand Up @@ -906,9 +906,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
}
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);

if (!scrub_ret)
btrfs_reada_remove_dev(src_device);

/*
* We have to use this loop approach because at this point src_device
* has to be available for transaction commit to complete, yet new
Expand All @@ -917,7 +914,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
while (1) {
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
btrfs_reada_undo_remove_dev(src_device);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return PTR_ERR(trans);
}
Expand Down Expand Up @@ -968,7 +964,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
up_write(&dev_replace->rwsem);
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_reada_undo_remove_dev(src_device);
btrfs_rm_dev_replace_blocked(fs_info);
if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(tgt_device);
Expand Down
20 changes: 2 additions & 18 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -665,9 +665,6 @@ static int validate_subpage_buffer(struct page *page, u64 start, u64 end,
if (ret < 0)
goto err;

if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(eb, ret);

set_extent_buffer_uptodate(eb);

free_extent_buffer(eb);
Expand Down Expand Up @@ -715,10 +712,6 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
}
ret = validate_extent_buffer(eb);
err:
if (reads_done &&
test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(eb, ret);

if (ret) {
/*
* our io error hook is going to dec the io pages
Expand Down Expand Up @@ -2232,7 +2225,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
btrfs_destroy_workqueue(fs_info->delayed_workers);
btrfs_destroy_workqueue(fs_info->caching_workers);
btrfs_destroy_workqueue(fs_info->readahead_workers);
btrfs_destroy_workqueue(fs_info->flush_workers);
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
if (fs_info->discard_ctl.discard_workers)
Expand Down Expand Up @@ -2445,9 +2437,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
fs_info->delayed_workers =
btrfs_alloc_workqueue(fs_info, "delayed-meta", flags,
max_active, 0);
fs_info->readahead_workers =
btrfs_alloc_workqueue(fs_info, "readahead", flags,
max_active, 2);
fs_info->qgroup_rescan_workers =
btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
fs_info->discard_ctl.discard_workers =
Expand All @@ -2459,9 +2448,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
fs_info->endio_meta_write_workers &&
fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->readahead_workers &&
fs_info->fixup_workers && fs_info->delayed_workers &&
fs_info->qgroup_rescan_workers &&
fs_info->caching_workers && fs_info->fixup_workers &&
fs_info->delayed_workers && fs_info->qgroup_rescan_workers &&
fs_info->discard_ctl.discard_workers)) {
return -ENOMEM;
}
Expand Down Expand Up @@ -3091,7 +3079,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)

atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->reada_works_cnt, 0);
atomic_set(&fs_info->nr_delayed_iputs, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->global_root_tree = RB_ROOT;
Expand All @@ -3102,9 +3089,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
fs_info->tree_mod_log = RB_ROOT;
fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
spin_lock_init(&fs_info->reada_lock);
btrfs_init_ref_verify(fs_info);

fs_info->thread_pool_size = min_t(unsigned long,
Expand Down
3 changes: 0 additions & 3 deletions fs/btrfs/extent_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -3087,9 +3087,6 @@ static void end_bio_extent_readpage(struct bio *bio)
set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = mirror;
atomic_dec(&eb->io_pages);
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
&eb->bflags))
btree_readahead_hook(eb, -EIO);
}
readpage_ok:
if (likely(uptodate)) {
Expand Down
Loading

0 comments on commit f26c923

Please sign in to comment.