whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 312b3a93dda6db9354b0c6b0f1868c1434e8c787
parent 12491ed354d23c0ecbe02459bf4be58b8c772bc8
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sun,  3 Feb 2019 08:48:33 -0800

Merge tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - regression fix: transaction commit can run away due to delayed ref
   waiting heuristic, this is not necessary now because of the proper
   reservation mechanism introduced in 5.0

 - regression fix: potential crash due to use-before-check of an ERR_PTR
   return value

 - fix for transaction abort during transaction commit that needs to
   properly clean up pending block groups

 - fix deadlock during b-tree node/leaf splitting, when this happens on
   some of the fundamental trees, we must prevent new tree block
   allocation to re-enter indirectly via the block group flushing path

 - potential memory leak after errors during mount

* tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: On error always free subvol_name in btrfs_mount
  btrfs: clean up pending block groups when transaction commit aborts
  btrfs: fix potential oops in device_list_add
  btrfs: don't end the transaction for delayed refs in throttle
  Btrfs: fix deadlock when allocating tree block during leaf/node split

Diffstat:
Mfs/btrfs/ctree.c | 78++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Mfs/btrfs/super.c | 3+++
Mfs/btrfs/transaction.c | 24++++++++++++++++--------
Mfs/btrfs/volumes.c | 4++--
4 files changed, 71 insertions(+), 38 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c @@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, return 0; } +static struct extent_buffer *alloc_tree_block_no_bg_flush( + struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 parent_start, + const struct btrfs_disk_key *disk_key, + int level, + u64 hint, + u64 empty_size) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_buffer *ret; + + /* + * If we are COWing a node/leaf from the extent, chunk, device or free + * space trees, make sure that we do not finish block group creation of + * pending block groups. We do this to avoid a deadlock. + * COWing can result in allocation of a new chunk, and flushing pending + * block groups (btrfs_create_pending_block_groups()) can be triggered + * when finishing allocation of a new chunk. Creation of a pending block + * group modifies the extent, chunk, device and free space trees, + * therefore we could deadlock with ourselves since we are holding a + * lock on an extent buffer that btrfs_create_pending_block_groups() may + * try to COW later. + * For similar reasons, we also need to delay flushing pending block + * groups when splitting a leaf or node, from one of those trees, since + * we are holding a write lock on it and its parent or when inserting a + * new root node for one of those trees. + */ + if (root == fs_info->extent_root || + root == fs_info->chunk_root || + root == fs_info->dev_root || + root == fs_info->free_space_root) + trans->can_flush_pending_bgs = false; + + ret = btrfs_alloc_tree_block(trans, root, parent_start, + root->root_key.objectid, disk_key, level, + hint, empty_size); + trans->can_flush_pending_bgs = true; + + return ret; +} + /* * does the dirty work in cow of a single block. The parent block (if * supplied) is updated to point to the new cow copy. The new buffer is marked @@ -1015,28 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) parent_start = parent->start; - /* - * If we are COWing a node/leaf from the extent, chunk, device or free - * space trees, make sure that we do not finish block group creation of - * pending block groups. We do this to avoid a deadlock. - * COWing can result in allocation of a new chunk, and flushing pending - * block groups (btrfs_create_pending_block_groups()) can be triggered - * when finishing allocation of a new chunk. Creation of a pending block - * group modifies the extent, chunk, device and free space trees, - * therefore we could deadlock with ourselves since we are holding a - * lock on an extent buffer that btrfs_create_pending_block_groups() may - * try to COW later. - */ - if (root == fs_info->extent_root || - root == fs_info->chunk_root || - root == fs_info->dev_root || - root == fs_info->free_space_root) - trans->can_flush_pending_bgs = false; - - cow = btrfs_alloc_tree_block(trans, root, parent_start, - root->root_key.objectid, &disk_key, level, - search_start, empty_size); - trans->can_flush_pending_bgs = true; + cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key, + level, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -3345,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); - c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &lower_key, level, root->node->start, 0); + c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level, + root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -3475,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, mid = (c_nritems + 1) / 2; btrfs_node_key(c, &disk_key, mid); - split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &disk_key, level, c->start, 0); + split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level, + c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -4260,8 +4282,8 @@ again: else btrfs_item_key(l, &disk_key, mid); - right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &disk_key, 0, l->start, 0); + right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c @@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, flags | SB_RDONLY, device_name, data); if (IS_ERR(mnt_root)) { root = ERR_CAST(mnt_root); + kfree(subvol_name); goto out; } @@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (error < 0) { root = ERR_PTR(error); mntput(mnt_root); + kfree(subvol_name); goto out; } } } if (IS_ERR(mnt_root)) { root = ERR_CAST(mnt_root); + kfree(subvol_name); goto out; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c @@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_chunk_metadata(trans); - if (lock && should_end_transaction(trans) && - READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { - spin_lock(&info->trans_lock); - if (cur_trans->state == TRANS_STATE_RUNNING) - cur_trans->state = TRANS_STATE_BLOCKED; - spin_unlock(&info->trans_lock); - } - if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { if (throttle) return btrfs_commit_transaction(trans); @@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) kmem_cache_free(btrfs_trans_handle_cachep, trans); } +/* + * Release reserved delayed ref space of all pending block groups of the + * transaction and remove them from the list + */ +static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_block_group_cache *block_group, *tmp; + + list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { + btrfs_delayed_refs_rsv_release(fs_info, 1); + list_del_init(&block_group->bg_list); + } +} + static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) { /* @@ -2270,6 +2277,7 @@ scrub_continue: btrfs_scrub_continue(fs_info); cleanup_transaction: btrfs_trans_release_metadata(trans); + btrfs_cleanup_pending_block_groups(trans); btrfs_trans_release_chunk_metadata(trans); trans->block_rsv = NULL; btrfs_warn(fs_info, "Skipping commit of aborted transaction."); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c @@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, else fs_devices = alloc_fs_devices(disk_super->fsid, NULL); - fs_devices->fsid_change = fsid_change_in_progress; - if (IS_ERR(fs_devices)) return ERR_CAST(fs_devices); + fs_devices->fsid_change = fsid_change_in_progress; + mutex_lock(&fs_devices->device_list_mutex); list_add(&fs_devices->fs_list, &fs_uuids);