Skip to content

Commit

Permalink
Btrfs: implement the free space B-tree
Browse files Browse the repository at this point in the history
The free space cache has turned out to be a scalability bottleneck on
large, busy filesystems. When the cache for a lot of block groups needs
to be written out, we can get extremely long commit times; if this
happens in the critical section, things are especially bad because we
block new transactions from happening.

The main problem with the free space cache is that it has to be written
out in its entirety and is managed in an ad hoc fashion. Using a B-tree
to store free space fixes this: updates can be done as needed and we get
all of the benefits of using a B-tree: checksumming, RAID handling,
well-understood behavior.

With the free space tree, we get commit times that are about the same as
the no cache case with load times slower than the free space cache case
but still much faster than the no cache case. Free space is represented
with extents until it becomes more space-efficient to use bitmaps,
giving us similar space overhead to the free space cache.

The operations on the free space tree are: adding and removing free
space, handling the creation and deletion of block groups, and loading
the free space for a block group. We can also create the free space tree
by walking the extent tree and clear the free space tree.

Signed-off-by: Omar Sandoval <[email protected]>
Signed-off-by: Chris Mason <[email protected]>
  • Loading branch information
osandov authored and masoncl committed Dec 17, 2015
1 parent 208acb8 commit a5ed918
Show file tree
Hide file tree
Showing 5 changed files with 1,686 additions and 4 deletions.
2 changes: 1 addition & 1 deletion fs/btrfs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o hash.o
uuid-tree.o props.o hash.o free-space-tree.o

btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
Expand Down
27 changes: 26 additions & 1 deletion fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1302,8 +1302,20 @@ struct btrfs_block_group_cache {
u64 delalloc_bytes;
u64 bytes_super;
u64 flags;
u64 sectorsize;
u64 cache_generation;
u32 sectorsize;

/*
* If the free space extent count exceeds this number, convert the block
* group to bitmaps.
*/
u32 bitmap_high_thresh;

/*
* If the free space extent count drops below this number, convert the
* block group back to extents.
*/
u32 bitmap_low_thresh;

/*
* It is just used for the delayed data space allocation because
Expand Down Expand Up @@ -1359,6 +1371,15 @@ struct btrfs_block_group_cache {
struct list_head io_list;

struct btrfs_io_ctl io_ctl;

/* Lock for free space tree operations. */
struct mutex free_space_lock;

/*
* Does the block group need to be added to the free space tree?
* Protected by free_space_lock.
*/
int needs_free_space;
};

/* delayed seq elem */
Expand Down Expand Up @@ -1410,6 +1431,7 @@ struct btrfs_fs_info {
struct btrfs_root *csum_root;
struct btrfs_root *quota_root;
struct btrfs_root *uuid_root;
struct btrfs_root *free_space_root;

/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
Expand Down Expand Up @@ -3555,6 +3577,9 @@ void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
void check_system_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const u64 type);
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *info, u64 start, u64 end);

/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
Expand Down
5 changes: 3 additions & 2 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,8 @@ static void put_caching_control(struct btrfs_caching_control *ctl)
* we need to check the pinned_extents for any extents that can't be used yet
* since their free space will be released as soon as the transaction commits.
*/
static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *info, u64 start, u64 end)
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *info, u64 start, u64 end)
{
u64 extent_start, extent_end, size, total_added = 0;
int ret;
Expand Down Expand Up @@ -9381,6 +9381,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
INIT_LIST_HEAD(&cache->io_list);
btrfs_init_free_space_ctl(cache);
atomic_set(&cache->trimming, 0);
mutex_init(&cache->free_space_lock);

return cache;
}
Expand Down
Loading

0 comments on commit a5ed918

Please sign in to comment.