whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 92825b0298ca6822085ef483f914b6e0dea9bf66
parent 1fbf3e48123d701584bc75ccac67ef2fe412ac4c
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Tue, 12 Mar 2019 14:53:57 -0700

Merge tag 'for-5.1-part2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "Correctness and a deadlock fixes"

* tag 'for-5.1-part2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: zstd: ensure reclaim timer is properly cleaned up
  btrfs: move ulist allocation out of transaction in quota enable
  btrfs: save drop_progress if we drop refs at all
  btrfs: check for refs on snapshot delete resume
  Btrfs: fix deadlock between clone/dedupe and rename
  Btrfs: fix corruption reading shared and compressed extents after hole punching

Diffstat:
Mfs/btrfs/ctree.h | 2++
Mfs/btrfs/extent-tree.c | 74+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Mfs/btrfs/extent_io.c | 4++--
Mfs/btrfs/ioctl.c | 21+++------------------
Mfs/btrfs/qgroup.c | 13++++++-------
Mfs/btrfs/root-tree.c | 8++++++--
Mfs/btrfs/zstd.c | 6++++--
7 files changed, 90 insertions(+), 38 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h @@ -1210,6 +1210,8 @@ enum { * Set for the subvolume tree owning the reloc tree. */ BTRFS_ROOT_DEAD_RELOC_TREE, + /* Mark dead root stored on device whose cleanup needs to be resumed */ + BTRFS_ROOT_DEAD_TREE, }; /* diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c @@ -8764,6 +8764,8 @@ struct walk_control { u64 refs[BTRFS_MAX_LEVEL]; u64 flags[BTRFS_MAX_LEVEL]; struct btrfs_key update_progress; + struct btrfs_key drop_progress; + int drop_level; int stage; int level; int shared_level; @@ -8771,6 +8773,7 @@ struct walk_control { int keep_locks; int reada_slot; int reada_count; + int restarted; }; #define DROP_REFERENCE 1 @@ -8934,6 +8937,33 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, } /* + * This is used to verify a ref exists for this root to deal with a bug where we + * would have a drop_progress key that hadn't been updated properly. + */ +static int check_ref_exists(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, u64 parent, + int level) +{ + struct btrfs_path *path; + struct btrfs_extent_inline_ref *iref; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = lookup_extent_backref(trans, path, &iref, bytenr, + root->fs_info->nodesize, parent, + root->root_key.objectid, level, 0); + btrfs_free_path(path); + if (ret == -ENOENT) + return 0; + if (ret < 0) + return ret; + return 1; +} + +/* * helper to process tree block pointer. * * when wc->stage == DROP_REFERENCE, this function checks @@ -9088,6 +9118,23 @@ skip: } /* + * If we had a drop_progress we need to verify the refs are set + * as expected. If we find our ref then we know that from here + * on out everything should be correct, and we can clear the + * ->restarted flag. + */ + if (wc->restarted) { + ret = check_ref_exists(trans, root, bytenr, parent, + level - 1); + if (ret < 0) + goto out_unlock; + if (ret == 0) + goto no_delete; + ret = 0; + wc->restarted = 0; + } + + /* * Reloc tree doesn't contribute to qgroup numbers, and we have * already accounted them at merge time (replace_path), * thus we could skip expensive subtree trace here. @@ -9102,13 +9149,23 @@ skip: ret); } } + + /* + * We need to update the next key in our walk control so we can + * update the drop_progress key accordingly. We don't care if + * find_next_key doesn't find a key because that means we're at + * the end and are going to clean up now. + */ + wc->drop_level = level; + find_next_key(path, level, &wc->drop_progress); + ret = btrfs_free_extent(trans, root, bytenr, fs_info->nodesize, parent, root->root_key.objectid, level - 1, 0); if (ret) goto out_unlock; } - +no_delete: *lookup_info = 1; ret = 1; @@ -9425,6 +9482,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } } + wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state); wc->level = level; wc->shared_level = -1; wc->stage = DROP_REFERENCE; @@ -9452,12 +9510,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } if (wc->stage == DROP_REFERENCE) { - level = wc->level; - btrfs_node_key(path->nodes[level], - &root_item->drop_progress, - path->slots[level]); - root_item->drop_level = level; - } + wc->drop_level = wc->level; + btrfs_node_key_to_cpu(path->nodes[wc->drop_level], + &wc->drop_progress, + path->slots[wc->drop_level]); + } + btrfs_cpu_key_to_disk(&root_item->drop_progress, + &wc->drop_progress); + root_item->drop_level = wc->drop_level; BUG_ON(wc->level == 0); if (btrfs_should_end_transaction(trans) || diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c @@ -2995,11 +2995,11 @@ static int __do_readpage(struct extent_io_tree *tree, */ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && prev_em_start && *prev_em_start != (u64)-1 && - *prev_em_start != em->orig_start) + *prev_em_start != em->start) force_bio_submit = true; if (prev_em_start) - *prev_em_start = em->orig_start; + *prev_em_start = em->start; free_extent_map(em); em = NULL; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c @@ -3207,21 +3207,6 @@ out: return ret; } -static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) -{ - inode_unlock(inode1); - inode_unlock(inode2); -} - -static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) -{ - if (inode1 < inode2) - swap(inode1, inode2); - - inode_lock_nested(inode1, I_MUTEX_PARENT); - inode_lock_nested(inode2, I_MUTEX_CHILD); -} - static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, struct inode *inode2, u64 loff2, u64 len) { @@ -3956,7 +3941,7 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in, if (same_inode) inode_lock(inode_in); else - btrfs_double_inode_lock(inode_in, inode_out); + lock_two_nondirectories(inode_in, inode_out); /* don't make the dst file partly checksummed */ if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) != @@ -4013,7 +3998,7 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in, if (same_inode) inode_unlock(inode_in); else - btrfs_double_inode_unlock(inode_in, inode_out); + unlock_two_nondirectories(inode_in, inode_out); return ret; } @@ -4043,7 +4028,7 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, if (same_inode) inode_unlock(src_inode); else - btrfs_double_inode_unlock(src_inode, dst_inode); + unlock_two_nondirectories(src_inode, dst_inode); return ret < 0 ? ret : len; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c @@ -894,6 +894,12 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) if (fs_info->quota_root) goto out; + fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); + if (!fs_info->qgroup_ulist) { + ret = -ENOMEM; + goto out; + } + /* * 1 for quota root item * 1 for BTRFS_QGROUP_STATUS item @@ -909,13 +915,6 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) goto out; } - fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); - if (!fs_info->qgroup_ulist) { - ret = -ENOMEM; - btrfs_abort_transaction(trans, ret); - goto out; - } - /* * initially create the quota tree */ diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c @@ -263,8 +263,10 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info) if (root) { WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)); - if (btrfs_root_refs(&root->root_item) == 0) + if (btrfs_root_refs(&root->root_item) == 0) { + set_bit(BTRFS_ROOT_DEAD_TREE, &root->state); btrfs_add_dead_root(root); + } continue; } @@ -310,8 +312,10 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info) break; } - if (btrfs_root_refs(&root->root_item) == 0) + if (btrfs_root_refs(&root->root_item) == 0) { + set_bit(BTRFS_ROOT_DEAD_TREE, &root->state); btrfs_add_dead_root(root); + } } btrfs_free_path(path); diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c @@ -195,8 +195,7 @@ static void zstd_cleanup_workspace_manager(void) struct workspace *workspace; int i; - del_timer(&wsm.timer); - + spin_lock(&wsm.lock); for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) { while (!list_empty(&wsm.idle_ws[i])) { workspace = container_of(wsm.idle_ws[i].next, @@ -206,6 +205,9 @@ static void zstd_cleanup_workspace_manager(void) wsm.ops->free_workspace(&workspace->list); } } + spin_unlock(&wsm.lock); + + del_timer_sync(&wsm.timer); } /*