whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 6b529fb0a3eabf9c4cc3e94c11477250379ce6d8
parent 72d657dd2115804b93bde4b77e426cc2de70eebf
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Mon, 14 Jan 2019 05:55:51 +1200

Merge tag 'for-5.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - two regression fixes in clone/dedupe ioctls, the generic check
   callback needs to lock extents properly and wait for io to avoid
   problems with writeback and relocation

 - fix deadlock when using free space tree due to block group creation

 - a recently added check refuses a valid fileystem with seeding device,
   make that work again with a quickfix, proper solution needs more
   intrusive changes

* tag 'for-5.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: Use real device structure to verify dev extent
  Btrfs: fix deadlock when using free space tree due to block group creation
  Btrfs: fix race between reflink/dedupe and relocation
  Btrfs: fix race between cloning range ending at eof and writeback

Diffstat:
Mfs/btrfs/ctree.c | 16+++++++++-------
Mfs/btrfs/ioctl.c | 49+++++++++++++++++++++++++++++++++++++++++++------
Mfs/btrfs/volumes.c | 12++++++++++++
3 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c @@ -1016,19 +1016,21 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, parent_start = parent->start; /* - * If we are COWing a node/leaf from the extent, chunk or device trees, - * make sure that we do not finish block group creation of pending block - * groups. We do this to avoid a deadlock. + * If we are COWing a node/leaf from the extent, chunk, device or free + * space trees, make sure that we do not finish block group creation of + * pending block groups. We do this to avoid a deadlock. * COWing can result in allocation of a new chunk, and flushing pending * block groups (btrfs_create_pending_block_groups()) can be triggered * when finishing allocation of a new chunk. Creation of a pending block - * group modifies the extent, chunk and device trees, therefore we could - * deadlock with ourselves since we are holding a lock on an extent - * buffer that btrfs_create_pending_block_groups() may try to COW later. + * group modifies the extent, chunk, device and free space trees, + * therefore we could deadlock with ourselves since we are holding a + * lock on an extent buffer that btrfs_create_pending_block_groups() may + * try to COW later. */ if (root == fs_info->extent_root || root == fs_info->chunk_root || - root == fs_info->dev_root) + root == fs_info->dev_root || + root == fs_info->free_space_root) trans->can_flush_pending_bgs = false; cow = btrfs_alloc_tree_block(trans, root, parent_start, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c @@ -3221,6 +3221,26 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) inode_lock_nested(inode2, I_MUTEX_CHILD); } +static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); +} + +static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + if (inode1 < inode2) { + swap(inode1, inode2); + swap(loff1, loff2); + } else if (inode1 == inode2 && loff2 < loff1) { + swap(loff1, loff2); + } + lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); +} + static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, struct inode *dst, u64 dst_loff) { @@ -3242,11 +3262,12 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, return -EINVAL; /* - * Lock destination range to serialize with concurrent readpages(). + * Lock destination range to serialize with concurrent readpages() and + * source range to serialize with relocation. */ - lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); - unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); return ret; } @@ -3905,17 +3926,33 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, len = ALIGN(src->i_size, bs) - off; if (destoff > inode->i_size) { + const u64 wb_start = ALIGN_DOWN(inode->i_size, bs); + ret = btrfs_cont_expand(inode, inode->i_size, destoff); if (ret) return ret; + /* + * We may have truncated the last block if the inode's size is + * not sector size aligned, so we need to wait for writeback to + * complete before proceeding further, otherwise we can race + * with cloning and attempt to increment a reference to an + * extent that no longer exists (writeback completed right after + * we found the previous extent covering eof and before we + * attempted to increment its reference count). + */ + ret = btrfs_wait_ordered_range(inode, wb_start, + destoff - wb_start); + if (ret) + return ret; } /* - * Lock destination range to serialize with concurrent readpages(). + * Lock destination range to serialize with concurrent readpages() and + * source range to serialize with relocation. */ - lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); + btrfs_double_extent_lock(src, off, inode, destoff, len); ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); - unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); + btrfs_double_extent_unlock(src, off, inode, destoff, len); /* * Truncate page cache pages so that future reads will see the cloned * data immediately and not the previous data. diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c @@ -7825,6 +7825,18 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, ret = -EUCLEAN; goto out; } + + /* It's possible this device is a dummy for seed device */ + if (dev->disk_total_bytes == 0) { + dev = find_device(fs_info->fs_devices->seed, devid, NULL); + if (!dev) { + btrfs_err(fs_info, "failed to find seed devid %llu", + devid); + ret = -EUCLEAN; + goto out; + } + } + if (physical_offset + physical_len > dev->disk_total_bytes) { btrfs_err(fs_info, "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",