whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 9ab97aea85cca43a6aedc90e0d1feba91eebe1ad
parent 195303136f192d37b89e20a8d1d2670d0d825266
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Mon, 31 Dec 2018 09:41:37 -0800

Merge tag 'f2fs-for-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've focused on bug fixes since Pixel devices have
  been shipping with f2fs. Some of them were related to hardware
  encryption support which are actually not an issue in mainline, but
  would be better to merge them in order to avoid potential bugs.

  Enhancements:
   - do GC sub-sections when the section is large
   - add a flag in ioctl(SHUTDOWN) to trigger fsck for QA
   - use kvmalloc() in order to give another chance to avoid ENOMEM

  Bug fixes:
   - fix accessing memory boundaries in a malformed iamge
   - GC gives stale unencrypted block
   - GC counts in large sections
   - detect idle time more precisely
   - block allocation of DIO writes
   - race conditions between write_begin and write_checkpoint
   - allow GCs for node segments via ioctl()

  There are various clean-ups and minor bug fixes as well"

* tag 'f2fs-for-4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (43 commits)
  f2fs: sanity check of xattr entry size
  f2fs: fix use-after-free issue when accessing sbi->stat_info
  f2fs: check PageWriteback flag for ordered case
  f2fs: fix validation of the block count in sanity_check_raw_super
  f2fs: fix missing unlock(sbi->gc_mutex)
  f2fs: fix to dirty inode synchronously
  f2fs: clean up structure extent_node
  f2fs: fix block address for __check_sit_bitmap
  f2fs: fix sbi->extent_list corruption issue
  f2fs: clean up checkpoint flow
  f2fs: flush stale issued discard candidates
  f2fs: correct wrong spelling, issing_*
  f2fs: use kvmalloc, if kmalloc is failed
  f2fs: remove redundant comment of unused wio_mutex
  f2fs: fix to reorder set_page_dirty and wait_on_page_writeback
  f2fs: clear PG_writeback if IPU failed
  f2fs: add an ioctl() to explicitly trigger fsck later
  f2fs: avoid frequent costly fsck triggers
  f2fs: fix m_may_create to make OPU DIO write correctly
  f2fs: fix to update new block address correctly for OPU
  ...

Diffstat:
MDocumentation/ABI/testing/sysfs-fs-f2fs | 9+++++++++
Mfs/f2fs/acl.c | 20+++++++++++---------
Mfs/f2fs/checkpoint.c | 33++++++++++++---------------------
Mfs/f2fs/data.c | 160+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mfs/f2fs/debug.c | 25+++++++++----------------
Mfs/f2fs/dir.c | 20+++++++++++++++-----
Mfs/f2fs/f2fs.h | 98++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
Mfs/f2fs/file.c | 42+++++++++++++++++++++++++++---------------
Mfs/f2fs/gc.c | 81++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mfs/f2fs/inline.c | 20++++++++++----------
Mfs/f2fs/inode.c | 22+++++++++++-----------
Mfs/f2fs/namei.c | 8++++----
Mfs/f2fs/node.c | 40++++++++++++++++++++--------------------
Mfs/f2fs/node.h | 2+-
Mfs/f2fs/recovery.c | 4++--
Mfs/f2fs/segment.c | 101+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Mfs/f2fs/segment.h | 2+-
Mfs/f2fs/shrinker.c | 2+-
Mfs/f2fs/super.c | 170++++++++++++++++++++++++++++++++++++++++----------------------------------------
Mfs/f2fs/sysfs.c | 27+++++++++++++++++----------
Mfs/f2fs/xattr.c | 22+++++++++++++++-------
21 files changed, 542 insertions(+), 366 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -92,6 +92,15 @@ Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> Description: Controls the number of trials to find a victim segment. +What: /sys/fs/f2fs/<disk>/migration_granularity +Date: October 2018 +Contact: "Chao Yu" <yuchao0@huawei.com> +Description: + Controls migration granularity of garbage collection on large + section, it can let GC move partial segment{s} of one section + in one GC cycle, so that dispersing heavy overhead GC to + multiple lightweight one. + What: /sys/fs/f2fs/<disk>/dir_level Date: March 2014 Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c @@ -160,7 +160,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi, return (void *)f2fs_acl; fail: - kfree(f2fs_acl); + kvfree(f2fs_acl); return ERR_PTR(-EINVAL); } @@ -190,7 +190,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, acl = NULL; else acl = ERR_PTR(retval); - kfree(value); + kvfree(value); return acl; } @@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0); - kfree(value); + kvfree(value); if (!error) set_cached_acl(inode, type, acl); @@ -352,12 +352,14 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, return PTR_ERR(p); clone = f2fs_acl_clone(p, GFP_NOFS); - if (!clone) - goto no_mem; + if (!clone) { + ret = -ENOMEM; + goto release_acl; + } ret = f2fs_acl_create_masq(clone, mode); if (ret < 0) - goto no_mem_clone; + goto release_clone; if (ret == 0) posix_acl_release(clone); @@ -371,11 +373,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, return 0; -no_mem_clone: +release_clone: posix_acl_release(clone); -no_mem: +release_acl: posix_acl_release(p); - return -ENOMEM; + return ret; } int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c @@ -44,7 +44,7 @@ repeat: cond_resched(); goto repeat; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); if (!PageUptodate(page)) SetPageUptodate(page); return page; @@ -370,9 +370,8 @@ continue_unlock: goto continue_unlock; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -911,7 +910,7 @@ free_fail_no_cp: f2fs_put_page(cp1, 1); f2fs_put_page(cp2, 1); fail_no_cp: - kfree(sbi->ckpt); + kvfree(sbi->ckpt); return -EINVAL; } @@ -1290,11 +1289,11 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi, struct page *page = f2fs_grab_meta_page(sbi, blk_addr); int err; + f2fs_wait_on_page_writeback(page, META, true, true); + memcpy(page_address(page), src, PAGE_SIZE); - set_page_dirty(page); - f2fs_wait_on_page_writeback(page, META, true); - f2fs_bug_on(sbi, PageWriteback(page)); + set_page_dirty(page); if (unlikely(!clear_page_dirty_for_io(page))) f2fs_bug_on(sbi, 1); @@ -1328,11 +1327,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) int err; /* Flush all the NAT/SIT pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - break; - } + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* * modify checkpoint @@ -1405,14 +1402,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) for (i = 0; i < nm_i->nat_bits_blocks; i++) f2fs_update_meta_page(sbi, nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), blk + i); - - /* Flush all the NAT BITS pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - f2fs_sync_meta_pages(sbi, META, LONG_MAX, - FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - break; - } } /* write out checkpoint buffer at block 0 */ @@ -1448,6 +1437,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* wait for previous submitted meta pages writeback */ f2fs_wait_on_all_pages_writeback(sbi); @@ -1465,7 +1456,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) * invalidate intermediate page cache borrowed from meta inode * which are used for migration of encrypted inode's blocks. */ - if (f2fs_sb_has_encrypt(sbi->sb)) + if (f2fs_sb_has_encrypt(sbi)) invalidate_mapping_pages(META_MAPPING(sbi), MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c @@ -372,29 +372,6 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, return false; } -static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, - struct page *page, nid_t ino, - enum page_type type) -{ - enum page_type btype = PAGE_TYPE_OF_BIO(type); - enum temp_type temp; - struct f2fs_bio_info *io; - bool ret = false; - - for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { - io = sbi->write_io[btype] + temp; - - down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, page, ino); - up_read(&io->io_rwsem); - - /* TODO: use HOT temp only for meta pages now. */ - if (ret || btype == META) - break; - } - return ret; -} - static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp) { @@ -420,13 +397,19 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, nid_t ino, enum page_type type, bool force) { enum temp_type temp; - - if (!force && !has_merged_page(sbi, inode, page, ino, type)) - return; + bool ret = true; for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + if (!force) { + enum page_type btype = PAGE_TYPE_OF_BIO(type); + struct f2fs_bio_info *io = sbi->write_io[btype] + temp; - __f2fs_submit_merged_write(sbi, type, temp); + down_read(&io->io_rwsem); + ret = __has_merged_page(io, inode, page, ino); + up_read(&io->io_rwsem); + } + if (ret) + __f2fs_submit_merged_write(sbi, type, temp); /* TODO: use HOT temp only for meta pages now. */ if (type >= META) @@ -643,7 +626,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn) */ void f2fs_set_data_blkaddr(struct dnode_of_data *dn) { - f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); __set_data_blkaddr(dn); if (set_page_dirty(dn->node_page)) dn->node_changed = true; @@ -673,7 +656,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, dn->ofs_in_node, count); - f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); for (; count > 0; dn->ofs_in_node++) { block_t blkaddr = datablock_addr(dn->inode, @@ -957,6 +940,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) return err; } + if (direct_io && allow_outplace_dio(inode, iocb, from)) + return 0; + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; @@ -970,6 +956,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = true; if (direct_io) { map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); @@ -1028,7 +1015,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, unsigned int maxblocks = map->m_len; struct dnode_of_data dn; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int mode = create ? ALLOC_NODE : LOOKUP_NODE; + int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE; pgoff_t pgofs, end_offset, end; int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; @@ -1048,6 +1035,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end = pgofs + maxblocks; if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { + if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + map->m_may_create) + goto next_dnode; + map->m_pblk = ei.blk + pgofs - ei.fofs; map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); map->m_flags = F2FS_MAP_MAPPED; @@ -1062,7 +1053,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, } next_dnode: - if (create) + if (map->m_may_create) __do_map_lock(sbi, flag, true); /* When reading holes, we need its node page */ @@ -1099,11 +1090,13 @@ next_block: if (is_valid_data_blkaddr(sbi, blkaddr)) { /* use out-place-update for driect IO under LFS mode */ - if (test_opt(sbi, LFS) && create && - flag == F2FS_GET_BLOCK_DIO) { + if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + map->m_may_create) { err = __allocate_data_block(&dn, map->m_seg_type); - if (!err) + if (!err) { + blkaddr = dn.data_blkaddr; set_inode_flag(inode, FI_APPEND_WRITE); + } } } else { if (create) { @@ -1209,7 +1202,7 @@ skip: f2fs_put_dnode(&dn); - if (create) { + if (map->m_may_create) { __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } @@ -1235,7 +1228,7 @@ sync_out: } f2fs_put_dnode(&dn); unlock_out: - if (create) { + if (map->m_may_create) { __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } @@ -1257,6 +1250,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; last_lblk = F2FS_BLK_ALIGN(pos + len); while (map.m_lblk < last_lblk) { @@ -1271,7 +1265,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, - pgoff_t *next_pgofs, int seg_type) + pgoff_t *next_pgofs, int seg_type, bool may_write) { struct f2fs_map_blocks map; int err; @@ -1281,6 +1275,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, map.m_next_pgofs = next_pgofs; map.m_next_extent = NULL; map.m_seg_type = seg_type; + map.m_may_create = may_write; err = f2fs_map_blocks(inode, &map, create, flag); if (!err) { @@ -1297,16 +1292,25 @@ static int get_data_block(struct inode *inode, sector_t iblock, { return __get_data_block(inode, iblock, bh_result, create, flag, next_pgofs, - NO_CHECK_TYPE); + NO_CHECK_TYPE, create); +} + +static int get_data_block_dio_write(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + return __get_data_block(inode, iblock, bh_result, create, + F2FS_GET_BLOCK_DIO, NULL, + f2fs_rw_hint_to_seg_type(inode->i_write_hint), + true); } static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DIO, NULL, - f2fs_rw_hint_to_seg_type( - inode->i_write_hint)); + F2FS_GET_BLOCK_DIO, NULL, + f2fs_rw_hint_to_seg_type(inode->i_write_hint), + false); } static int get_data_block_bmap(struct inode *inode, sector_t iblock, @@ -1318,7 +1322,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock, return __get_data_block(inode, iblock, bh_result, create, F2FS_GET_BLOCK_BMAP, NULL, - NO_CHECK_TYPE); + NO_CHECK_TYPE, create); } static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) @@ -1525,6 +1529,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; for (; nr_pages; nr_pages--) { if (pages) { @@ -1855,6 +1860,8 @@ got_it: if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); err = f2fs_inplace_write_data(fio); + if (err && PageWriteback(page)) + end_page_writeback(page); trace_f2fs_do_write_data_page(fio->page, IPU); set_inode_flag(inode, FI_UPDATE_WRITE); return err; @@ -2142,12 +2149,11 @@ continue_unlock: if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, - DATA, true); + DATA, true, true); else goto continue_unlock; } - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -2324,6 +2330,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, bool locked = false; struct extent_info ei = {0,0,0}; int err = 0; + int flag; /* * we already allocated all the blocks, so we don't need to get @@ -2333,9 +2340,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, !is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; + /* f2fs_lock_op avoids race between write CP and convert_inline_page */ + if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode)) + flag = F2FS_GET_BLOCK_DEFAULT; + else + flag = F2FS_GET_BLOCK_PRE_AIO; + if (f2fs_has_inline_data(inode) || (pos & PAGE_MASK) >= i_size_read(inode)) { - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + __do_map_lock(sbi, flag, true); locked = true; } restart: @@ -2373,6 +2386,7 @@ restart: f2fs_put_dnode(&dn); __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO); locked = true; goto restart; } @@ -2386,7 +2400,7 @@ out: f2fs_put_dnode(&dn); unlock_out: if (locked) - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + __do_map_lock(sbi, flag, false); return err; } @@ -2457,7 +2471,7 @@ repeat: } } - f2fs_wait_on_page_writeback(page, DATA, false); + f2fs_wait_on_page_writeback(page, DATA, false, true); if (len == PAGE_SIZE || PageUptodate(page)) return 0; @@ -2548,6 +2562,53 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter, return 0; } +static void f2fs_dio_end_io(struct bio *bio) +{ + struct f2fs_private_dio *dio = bio->bi_private; + + dec_page_count(F2FS_I_SB(dio->inode), + dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ); + + bio->bi_private = dio->orig_private; + bio->bi_end_io = dio->orig_end_io; + + kvfree(dio); + + bio_endio(bio); +} + +static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode, + loff_t file_offset) +{ + struct f2fs_private_dio *dio; + bool write = (bio_op(bio) == REQ_OP_WRITE); + int err; + + dio = f2fs_kzalloc(F2FS_I_SB(inode), + sizeof(struct f2fs_private_dio), GFP_NOFS); + if (!dio) { + err = -ENOMEM; + goto out; + } + + dio->inode = inode; + dio->orig_end_io = bio->bi_end_io; + dio->orig_private = bio->bi_private; + dio->write = write; + + bio->bi_end_io = f2fs_dio_end_io; + bio->bi_private = dio; + + inc_page_count(F2FS_I_SB(inode), + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); + + submit_bio(bio); + return; +out: + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); +} + static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct address_space *mapping = iocb->ki_filp->f_mapping; @@ -2594,7 +2655,10 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) down_read(&fi->i_gc_rwsem[READ]); } - err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); + err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, + iter, rw == WRITE ? get_data_block_dio_write : + get_data_block_dio, NULL, f2fs_dio_submit_bio, + DIO_LOCKING | DIO_SKIP_HOLES); if (do_opu) up_read(&fi->i_gc_rwsem[READ]); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c @@ -53,6 +53,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->vw_cnt = atomic_read(&sbi->vw_cnt); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); + si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ); + si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA); @@ -62,7 +64,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->nr_flushed = atomic_read(&SM_I(sbi)->fcc_info->issued_flush); si->nr_flushing = - atomic_read(&SM_I(sbi)->fcc_info->issing_flush); + atomic_read(&SM_I(sbi)->fcc_info->queued_flush); si->flush_list_empty = llist_empty(&SM_I(sbi)->fcc_info->issue_list); } @@ -70,7 +72,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->nr_discarded = atomic_read(&SM_I(sbi)->dcc_info->issued_discard); si->nr_discarding = - atomic_read(&SM_I(sbi)->dcc_info->issing_discard); + atomic_read(&SM_I(sbi)->dcc_info->queued_discard); si->nr_discard_cmd = atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks; @@ -197,7 +199,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE; - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); si->base_mem += __bitmap_size(sbi, SIT_BITMAP); @@ -374,6 +376,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - DIO (R: %4d, W: %4d)\n", + si->nr_dio_read, si->nr_dio_write); seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n", si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta); seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " @@ -444,18 +448,7 @@ static int stat_show(struct seq_file *s, void *v) return 0; } -static int stat_open(struct inode *inode, struct file *file) -{ - return single_open(file, stat_show, inode->i_private); -} - -static const struct file_operations stat_fops = { - .owner = THIS_MODULE, - .open = stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(stat); int f2fs_build_stats(struct f2fs_sb_info *sbi) { @@ -510,7 +503,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) list_del(&si->stat_list); mutex_unlock(&f2fs_stat_mutex); - kfree(si); + kvfree(si); } int __init f2fs_create_root_stats(void) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c @@ -293,7 +293,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, { enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); - f2fs_wait_on_page_writeback(page, type, true); + f2fs_wait_on_page_writeback(page, type, true, true); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode->i_mode); set_page_dirty(page); @@ -307,7 +307,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_inode *ri; - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); @@ -550,7 +550,7 @@ start: ++level; goto start; add_dentry: - f2fs_wait_on_page_writeback(dentry_page, DATA, true); + f2fs_wait_on_page_writeback(dentry_page, DATA, true, true); if (inode) { down_write(&F2FS_I(inode)->i_sem); @@ -705,7 +705,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, return f2fs_delete_inline_entry(dentry, page, dir, inode); lock_page(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; @@ -808,6 +808,17 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, de_name.name = d->filename[bit_pos]; de_name.len = le16_to_cpu(de->name_len); + /* check memory boundary before moving forward */ + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + if (unlikely(bit_pos > d->max)) { + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: corrupted namelen=%d, run fsck to fix.", + __func__, le16_to_cpu(de->name_len)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EINVAL; + goto out; + } + if (f2fs_encrypted_inode(d->inode)) { int save_len = fstr->len; @@ -830,7 +841,6 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, if (readdir_ra) f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); ctx->pos = start_pos + bit_pos; } out: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h @@ -67,7 +67,7 @@ struct f2fs_fault_info { unsigned int inject_type; }; -extern char *f2fs_fault_name[FAULT_MAX]; +extern const char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) #endif @@ -152,12 +152,13 @@ struct f2fs_mount_info { #define F2FS_FEATURE_VERITY 0x0400 /* reserved */ #define F2FS_FEATURE_SB_CHKSUM 0x0800 -#define F2FS_HAS_FEATURE(sb, mask) \ - ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) -#define F2FS_SET_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask)) -#define F2FS_CLEAR_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)) +#define __F2FS_HAS_FEATURE(raw_super, mask) \ + ((raw_super->feature & cpu_to_le32(mask)) != 0) +#define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask) +#define F2FS_SET_FEATURE(sbi, mask) \ + (sbi->raw_super->feature |= cpu_to_le32(mask)) +#define F2FS_CLEAR_FEATURE(sbi, mask) \ + (sbi->raw_super->feature &= ~cpu_to_le32(mask)) /* * Default values for user and/or group using reserved blocks @@ -284,7 +285,7 @@ struct discard_cmd { struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ unsigned char state; /* state */ - unsigned char issuing; /* issuing discard */ + unsigned char queued; /* queued discard */ int error; /* bio error */ spinlock_t lock; /* for state/bio_ref updating */ unsigned short bio_ref; /* bio reference count */ @@ -326,7 +327,7 @@ struct discard_cmd_control { unsigned int undiscard_blks; /* # of undiscard blocks */ unsigned int next_pos; /* next discard position */ atomic_t issued_discard; /* # of issued discard */ - atomic_t issing_discard; /* # of issing discard */ + atomic_t queued_discard; /* # of queued discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ struct rb_root_cached root; /* root of discard rb-tree */ bool rbtree_check; /* config for consistence check */ @@ -416,6 +417,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ #define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ #define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ +#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* @@ -557,16 +559,8 @@ struct extent_info { }; struct extent_node { - struct rb_node rb_node; - union { - struct { - unsigned int fofs; - unsigned int len; - u32 blk; - }; - struct extent_info ei; /* extent info */ - - }; + struct rb_node rb_node; /* rb node located in rb-tree */ + struct extent_info ei; /* extent info */ struct list_head list; /* node in global extent list of sbi */ struct extent_tree *et; /* extent tree pointer */ }; @@ -601,6 +595,7 @@ struct f2fs_map_blocks { pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; + bool m_may_create; /* indicate it is from write path */ }; /* for flag in get_data_block */ @@ -889,7 +884,7 @@ struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ atomic_t issued_flush; /* # of issued flushes */ - atomic_t issing_flush; /* # of issing flushes */ + atomic_t queued_flush; /* # of queued flushes */ struct llist_head issue_list; /* list for command issue */ struct llist_node *dispatch_list; /* list for command dispatch */ }; @@ -956,6 +951,8 @@ enum count_type { F2FS_RD_DATA, F2FS_RD_NODE, F2FS_RD_META, + F2FS_DIO_WRITE, + F2FS_DIO_READ, NR_COUNT_TYPE, }; @@ -1170,8 +1167,6 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ - struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; - /* bio ordering for NODE/DATA */ /* keep migration IO order for LFS mode */ struct rw_semaphore io_order_lock; mempool_t *write_io_dummy; /* Dummy pages */ @@ -1263,6 +1258,7 @@ struct f2fs_sb_info { struct f2fs_gc_kthread *gc_thread; /* GC thread */ unsigned int cur_victim_sec; /* current victim section num */ unsigned int gc_mode; /* current GC state */ + unsigned int next_victim_seg[2]; /* next segment in victim section */ /* for skip statistic */ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ @@ -1272,6 +1268,8 @@ struct f2fs_sb_info { /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; + /* migration granularity of garbage collection, unit: segment */ + unsigned int migration_granularity; /* * for stat information. @@ -1330,6 +1328,13 @@ struct f2fs_sb_info { __u32 s_chksum_seed; }; +struct f2fs_private_dio { + struct inode *inode; + void *orig_private; + bio_end_io_t *orig_end_io; + bool write; +}; + #ifdef CONFIG_F2FS_FAULT_INJECTION #define f2fs_show_injection_info(type) \ printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \ @@ -1608,12 +1613,16 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) { unsigned long flags; - set_sbi_flag(sbi, SBI_NEED_FSCK); + /* + * In order to re-enable nat_bits we need to call fsck.f2fs by + * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost, + * so let's rely on regular fsck or unclean shutdown. + */ if (lock) spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); - kfree(NM_I(sbi)->nat_bits); + kvfree(NM_I(sbi)->nat_bits); NM_I(sbi)->nat_bits = NULL; if (lock) spin_unlock_irqrestore(&sbi->cp_lock, flags); @@ -2146,7 +2155,11 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || - get_pages(sbi, F2FS_WB_CP_DATA)) + get_pages(sbi, F2FS_WB_CP_DATA) || + get_pages(sbi, F2FS_DIO_READ) || + get_pages(sbi, F2FS_DIO_WRITE) || + atomic_read(&SM_I(sbi)->dcc_info->queued_discard) || + atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) return false; return f2fs_time_over(sbi, type); } @@ -2370,6 +2383,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_NEW_INODE: if (set) return; + /* fall through */ case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: @@ -2672,22 +2686,37 @@ static inline bool is_dot_dotdot(const struct qstr *str) static inline bool f2fs_may_extent_tree(struct inode *inode) { - if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) || + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, EXTENT_CACHE) || is_inode_flag_set(inode, FI_NO_EXTENT)) return false; + /* + * for recovered files during mount do not create extents + * if shrinker is not registered. + */ + if (list_empty(&sbi->s_list)) + return false; + return S_ISREG(inode->i_mode); } static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { + void *ret; + if (time_to_inject(sbi, FAULT_KMALLOC)) { f2fs_show_injection_info(FAULT_KMALLOC); return NULL; } - return kmalloc(size, flags); + ret = kmalloc(size, flags); + if (ret) + return ret; + + return kvmalloc(size, flags); } static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, @@ -2762,6 +2791,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } +#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) + #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \ (!is_read_io(fio->op) || fio->is_meta)) @@ -3007,7 +3038,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered); + enum page_type type, bool ordered, bool locked); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, block_t len); @@ -3147,6 +3178,7 @@ struct f2fs_stat_info { int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; int nr_rd_data, nr_rd_node, nr_rd_meta; + int nr_dio_read, nr_dio_write; unsigned int io_skip_bggc, other_skip_bggc; int nr_flushing, nr_flushed, flush_list_empty; int nr_discarding, nr_discarded; @@ -3459,9 +3491,9 @@ static inline bool f2fs_post_read_required(struct inode *inode) } #define F2FS_FEATURE_FUNCS(name, flagname) \ -static inline int f2fs_sb_has_##name(struct super_block *sb) \ +static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \ { \ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \ + return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \ } F2FS_FEATURE_FUNCS(encrypt, ENCRYPT); @@ -3491,7 +3523,7 @@ static inline int get_blkz_type(struct f2fs_sb_info *sbi, static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) { - return f2fs_sb_has_blkzoned(sbi->sb); + return f2fs_sb_has_blkzoned(sbi); } static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) @@ -3566,7 +3598,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. */ - if (f2fs_sb_has_blkzoned(sbi->sb)) + if (f2fs_sb_has_blkzoned(sbi)) return true; if (test_opt(sbi, LFS) && (rw == WRITE) && block_unaligned_IO(inode, iocb, iter)) @@ -3589,7 +3621,7 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) { #ifdef CONFIG_QUOTA - if (f2fs_sb_has_quota_ino(sbi->sb)) + if (f2fs_sb_has_quota_ino(sbi)) return true; if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c @@ -82,7 +82,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) } /* fill the page */ - f2fs_wait_on_page_writeback(page, DATA, false); + f2fs_wait_on_page_writeback(page, DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -216,6 +216,9 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, trace_f2fs_sync_file_enter(inode); + if (S_ISDIR(inode->i_mode)) + goto go_write; + /* if fdatasync is triggered, let's do in-place-update */ if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) set_inode_flag(inode, FI_NEED_IPU); @@ -575,7 +578,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, if (IS_ERR(page)) return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); truncate_out: - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, offset, PAGE_SIZE - offset); /* An encrypted inode should have a key and truncate the last page. */ @@ -696,7 +699,7 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, unsigned int flags; if (f2fs_has_extra_attr(inode) && - f2fs_sb_has_inode_crtime(inode->i_sb) && + f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { stat->result_mask |= STATX_BTIME; stat->btime.tv_sec = fi->i_crtime.tv_sec; @@ -892,7 +895,7 @@ static int fill_zero(struct inode *inode, pgoff_t index, if (IS_ERR(page)) return PTR_ERR(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, start, len); set_page_dirty(page); f2fs_put_page(page, 1); @@ -1496,7 +1499,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_map_blocks map = { .m_next_pgofs = NULL, - .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE }; + .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, + .m_may_create = true }; pgoff_t pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; @@ -1681,7 +1685,7 @@ static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags) inode->i_ctime = current_time(inode); f2fs_set_inode_flags(inode); - f2fs_mark_inode_dirty_sync(inode, false); + f2fs_mark_inode_dirty_sync(inode, true); return 0; } @@ -1962,6 +1966,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) f2fs_stop_checkpoint(sbi, false); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; + case F2FS_GOING_DOWN_NEED_FSCK: + set_sbi_flag(sbi, SBI_NEED_FSCK); + /* do checkpoint only */ + ret = f2fs_sync_fs(sb, 1); + if (ret) + goto out; + break; default: ret = -EINVAL; goto out; @@ -2030,7 +2041,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - if (!f2fs_sb_has_encrypt(inode->i_sb)) + if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) return -EOPNOTSUPP; f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); @@ -2040,7 +2051,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { - if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb)) + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -2051,7 +2062,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_encrypt(inode->i_sb)) + if (!f2fs_sb_has_encrypt(sbi)) return -EOPNOTSUPP; err = mnt_want_write_file(filp); @@ -2155,7 +2166,7 @@ do_more: } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); - range.start += sbi->blocks_per_seg; + range.start += BLKS_PER_SEC(sbi); if (range.start <= end) goto do_more; out: @@ -2197,7 +2208,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, { struct inode *inode = file_inode(filp); struct f2fs_map_blocks map = { .m_next_extent = NULL, - .m_seg_type = NO_CHECK_TYPE }; + .m_seg_type = NO_CHECK_TYPE , + .m_may_create = false }; struct extent_info ei = {0, 0, 0}; pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; @@ -2560,7 +2572,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) return -EFAULT; if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || - sbi->segs_per_sec != 1) { + __is_large_section(sbi)) { f2fs_msg(sbi->sb, KERN_WARNING, "Can't flush %u in %d for segs_per_sec %u != 1\n", range.dev_num, sbi->s_ndevs, @@ -2635,12 +2647,11 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct super_block *sb = sbi->sb; struct page *ipage; kprojid_t kprojid; int err; - if (!f2fs_sb_has_project_quota(sb)) { + if (!f2fs_sb_has_project_quota(sbi)) { if (projid != F2FS_DEF_PROJID) return -EOPNOTSUPP; else @@ -2757,7 +2768,7 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags & F2FS_FL_USER_VISIBLE); - if (f2fs_sb_has_project_quota(inode->i_sb)) + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, fi->i_projid); @@ -2932,6 +2943,7 @@ int f2fs_precache_extents(struct inode *inode) map.m_next_pgofs = NULL; map.m_next_extent = &m_next_extent; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; end = F2FS_I_SB(inode)->max_file_blocks; while (map.m_lblk < end) { diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c @@ -142,7 +142,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { err = PTR_ERR(gc_th->f2fs_gc_task); - kfree(gc_th); + kvfree(gc_th); sbi->gc_thread = NULL; } out: @@ -155,7 +155,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) if (!gc_th) return; kthread_stop(gc_th->f2fs_gc_task); - kfree(gc_th); + kvfree(gc_th); sbi->gc_thread = NULL; } @@ -323,8 +323,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_cost = get_max_cost(sbi, &p); if (*result != NULL_SEGNO) { - if (IS_DATASEG(get_seg_entry(sbi, *result)->type) && - get_valid_blocks(sbi, *result, false) && + if (get_valid_blocks(sbi, *result, false) && !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) p.min_segno = *result; goto out; @@ -333,6 +332,22 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (p.max_search == 0) goto out; + if (__is_large_section(sbi) && p.alloc_mode == LFS) { + if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) { + p.min_segno = sbi->next_victim_seg[BG_GC]; + *result = p.min_segno; + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + goto got_result; + } + if (gc_type == FG_GC && + sbi->next_victim_seg[FG_GC] != NULL_SEGNO) { + p.min_segno = sbi->next_victim_seg[FG_GC]; + *result = p.min_segno; + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; + goto got_result; + } + } + last_victim = sm->last_victim[p.gc_mode]; if (p.alloc_mode == LFS && gc_type == FG_GC) { p.min_segno = check_bg_victims(sbi); @@ -395,6 +410,8 @@ next: } if (p.min_segno != NULL_SEGNO) { got_it: + *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; +got_result: if (p.alloc_mode == LFS) { secno = GET_SEC_FROM_SEG(sbi, p.min_segno); if (gc_type == FG_GC) @@ -402,13 +419,13 @@ got_it: else set_bit(secno, dirty_i->victim_secmap); } - *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; + } +out: + if (p.min_segno != NULL_SEGNO) trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, sbi->cur_victim_sec, prefree_segments(sbi), free_segments(sbi)); - } -out: mutex_unlock(&dirty_i->seglist_lock); return (p.min_segno == NULL_SEGNO) ? 0 : 1; @@ -658,6 +675,14 @@ got_it: fio.page = page; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; + /* + * don't cache encrypted data into meta inode until previous dirty + * data were writebacked to avoid racing between GC and flush. + */ + f2fs_wait_on_page_writeback(page, DATA, true, true); + + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi), dn.data_blkaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); @@ -743,7 +768,9 @@ static int move_data_block(struct inode *inode, block_t bidx, * don't cache encrypted data into meta inode until previous dirty * data were writebacked to avoid racing between GC and flush. */ - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); + + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); if (err) @@ -802,8 +829,8 @@ static int move_data_block(struct inode *inode, block_t bidx, } write_page: + f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); set_page_dirty(fio.encrypted_page); - f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true); if (clear_page_dirty_for_io(fio.encrypted_page)) dec_page_count(fio.sbi, F2FS_DIRTY_META); @@ -811,7 +838,7 @@ write_page: ClearPageError(page); /* allocate block address */ - f2fs_wait_on_page_writeback(dn.node_page, NODE, true); + f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); fio.op = REQ_OP_WRITE; fio.op_flags = REQ_SYNC; @@ -897,8 +924,9 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, bool is_dirty = PageDirty(page); retry: + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); - f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); @@ -1093,15 +1121,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + sbi->segs_per_sec; - int seg_freed = 0; + int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; int submitted = 0; + if (__is_large_section(sbi)) + end_segno = rounddown(end_segno, sbi->segs_per_sec); + /* readahead multi ssa blocks those have contiguous address */ - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), - sbi->segs_per_sec, META_SSA, true); + end_segno - segno, META_SSA, true); /* reference all summary page */ while (segno < end_segno) { @@ -1130,10 +1161,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, GET_SUM_BLOCK(sbi, segno)); f2fs_put_page(sum_page, 0); - if (get_valid_blocks(sbi, segno, false) == 0 || - !PageUptodate(sum_page) || - unlikely(f2fs_cp_error(sbi))) - goto next; + if (get_valid_blocks(sbi, segno, false) == 0) + goto freed; + if (__is_large_section(sbi) && + migrated >= sbi->migration_granularity) + goto skip; + if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) + goto skip; sum = page_address(sum_page); if (type != GET_SUM_TYPE((&sum->footer))) { @@ -1141,7 +1175,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, "type [%d, %d] in SSA and SIT", segno, type, GET_SUM_TYPE((&sum->footer))); set_sbi_flag(sbi, SBI_NEED_FSCK); - goto next; + goto skip; } /* @@ -1160,10 +1194,15 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, stat_inc_seg_count(sbi, type, gc_type); +freed: if (gc_type == FG_GC && get_valid_blocks(sbi, segno, false) == 0) seg_freed++; -next: + migrated++; + + if (__is_large_section(sbi) && segno + 1 < end_segno) + sbi->next_victim_seg[gc_type] = segno + 1; +skip: f2fs_put_page(sum_page, 0); } @@ -1307,7 +1346,7 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; /* give warm/cold data area from slower device */ - if (sbi->s_ndevs && sbi->segs_per_sec == 1) + if (sbi->s_ndevs && !__is_large_section(sbi)) SIT_I(sbi)->last_victim[ALLOC_NEXT] = GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c @@ -72,7 +72,7 @@ void f2fs_truncate_inline_inode(struct inode *inode, addr = inline_data_addr(inode, ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); set_page_dirty(ipage); @@ -161,7 +161,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) fio.old_blkaddr = dn->data_blkaddr; set_inode_flag(dn->inode, FI_HOT_DATA); f2fs_outplace_write_data(dn, &fio); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); if (dirty) { inode_dec_dirty_pages(dn->inode); f2fs_remove_dirty_inode(dn->inode); @@ -236,7 +236,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) f2fs_bug_on(F2FS_I_SB(inode), page->index); - f2fs_wait_on_page_writeback(dn.inode_page, NODE, true); + f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true); src_addr = kmap_atomic(page); dst_addr = inline_data_addr(inode, dn.inode_page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); @@ -277,7 +277,7 @@ process_inline: ipage = f2fs_get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); src_addr = inline_data_addr(inode, npage); dst_addr = inline_data_addr(inode, ipage); @@ -391,7 +391,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, goto out; } - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); dentry_blk = page_address(page); @@ -501,18 +501,18 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); - kfree(backup_dentry); + kvfree(backup_dentry); return 0; recover: lock_page(ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir)); f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); set_page_dirty(ipage); f2fs_put_page(ipage, 1); - kfree(backup_dentry); + kvfree(backup_dentry); return err; } @@ -565,7 +565,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, } } - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); name_hash = f2fs_dentry_hash(new_name, NULL); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); @@ -597,7 +597,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, int i; lock_page(page); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); inline_dentry = inline_data_addr(dir, page); make_dentry_ptr_inline(dir, &d, inline_dentry); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c @@ -103,7 +103,7 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) while (start < end) { if (*start++) { - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); set_inode_flag(inode, FI_DATA_EXIST); set_raw_inline(inode, F2FS_INODE(ipage)); @@ -118,7 +118,7 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page { struct f2fs_inode *ri = &F2FS_NODE(page)->i; - if (!f2fs_sb_has_inode_chksum(sbi->sb)) + if (!f2fs_sb_has_inode_chksum(sbi)) return false; if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) @@ -218,7 +218,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) + if (f2fs_sb_has_flexible_inline_xattr(sbi) && !f2fs_has_extra_attr(inode)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, @@ -228,7 +228,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } if (f2fs_has_extra_attr(inode) && - !f2fs_sb_has_extra_attr(sbi->sb)) { + !f2fs_sb_has_extra_attr(sbi)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, "%s: inode (ino=%lx) is with extra_attr, " @@ -340,7 +340,7 @@ static int do_read_inode(struct inode *inode) fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size); } else if (f2fs_has_inline_xattr(inode) || f2fs_has_inline_dentry(inode)) { @@ -390,14 +390,14 @@ static int do_read_inode(struct inode *inode) if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); - if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) i_projid = (projid_t)le32_to_cpu(ri->i_projid); else i_projid = F2FS_DEF_PROJID; fi->i_projid = make_kprojid(&init_user_ns, i_projid); - if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) && + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime); fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); @@ -497,7 +497,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) struct f2fs_inode *ri; struct extent_tree *et = F2FS_I(inode)->extent_tree; - f2fs_wait_on_page_writeback(node_page, NODE, true); + f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_page_dirty(node_page); f2fs_inode_synced(inode); @@ -542,11 +542,11 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) if (f2fs_has_extra_attr(inode)) { ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); - if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb)) + if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode))) ri->i_inline_xattr_size = cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); - if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_projid)) { projid_t i_projid; @@ -556,7 +556,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_projid = cpu_to_le32(i_projid); } - if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_crtime)) { ri->i_crtime = diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c @@ -61,7 +61,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) goto fail; } - if (f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_sb_has_project_quota(sbi) && (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; else @@ -79,7 +79,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); - if (f2fs_sb_has_extra_attr(sbi->sb)) { + if (f2fs_sb_has_extra_attr(sbi)) { set_inode_flag(inode, FI_EXTRA_ATTR); F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; } @@ -92,7 +92,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); if (f2fs_has_inline_xattr(inode)) xattr_size = F2FS_OPTION(sbi).inline_xattr_size; @@ -635,7 +635,7 @@ out_f2fs_handle_failed_inode: f2fs_handle_failed_inode(inode); out_free_encrypted_link: if (disk_link.name != (unsigned char *)symname) - kfree(disk_link.name); + kvfree(disk_link.name); return err; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c @@ -826,6 +826,7 @@ static int truncate_node(struct dnode_of_data *dn) struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info ni; int err; + pgoff_t index; err = f2fs_get_node_info(sbi, dn->nid, &ni); if (err) @@ -845,10 +846,11 @@ static int truncate_node(struct dnode_of_data *dn) clear_node_page_dirty(dn->node_page); set_sbi_flag(sbi, SBI_IS_DIRTY); + index = dn->node_page->index; f2fs_put_page(dn->node_page, 1); invalidate_mapping_pages(NODE_MAPPING(sbi), - dn->node_page->index, dn->node_page->index); + index, index); dn->node_page = NULL; trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); @@ -1104,7 +1106,7 @@ skip_partial: ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { lock_page(page); BUG_ON(page->mapping != NODE_MAPPING(sbi)); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; set_page_dirty(page); unlock_page(page); @@ -1232,7 +1234,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) new_ni.version = 0; set_node_addr(sbi, &new_ni, NEW_ADDR, false); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(page, S_ISDIR(dn->inode->i_mode)); if (!PageUptodate(page)) @@ -1596,10 +1598,10 @@ int f2fs_move_node_page(struct page *node_page, int gc_type) .for_reclaim = 0, }; + f2fs_wait_on_page_writeback(node_page, NODE, true, true); + set_page_dirty(node_page); - f2fs_wait_on_page_writeback(node_page, NODE, true); - f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page)); if (!clear_page_dirty_for_io(node_page)) { err = -EAGAIN; goto out_page; @@ -1687,8 +1689,7 @@ continue_unlock: goto continue_unlock; } - f2fs_wait_on_page_writeback(page, NODE, true); - BUG_ON(PageWriteback(page)); + f2fs_wait_on_page_writeback(page, NODE, true, true); set_fsync_mark(page, 0); set_dentry_mark(page, 0); @@ -1739,7 +1740,7 @@ continue_unlock: "Retry to write fsync mark: ino=%u, idx=%lx", ino, last_page->index); lock_page(last_page); - f2fs_wait_on_page_writeback(last_page, NODE, true); + f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); unlock_page(last_page); goto retry; @@ -1820,9 +1821,8 @@ continue_unlock: goto lock_node; } - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -1889,7 +1889,7 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, get_page(page); spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, false); if (TestClearPageError(page)) ret = -EIO; @@ -2467,7 +2467,7 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) src_addr = inline_xattr_addr(inode, page); inline_size = inline_xattr_size(inode); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memcpy(dst_addr, src_addr, inline_size); update_inode: f2fs_update_inode(inode, ipage); @@ -2561,17 +2561,17 @@ retry: if (dst->i_inline & F2FS_EXTRA_ATTR) { dst->i_extra_isize = src->i_extra_isize; - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) && + if (f2fs_sb_has_flexible_inline_xattr(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_inline_xattr_size)) dst->i_inline_xattr_size = src->i_inline_xattr_size; - if (f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_sb_has_project_quota(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) dst->i_projid = src->i_projid; - if (f2fs_sb_has_inode_crtime(sbi->sb) && + if (f2fs_sb_has_inode_crtime(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_crtime_nsec)) { dst->i_crtime = src->i_crtime; @@ -3113,17 +3113,17 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) for (i = 0; i < nm_i->nat_blocks; i++) kvfree(nm_i->free_nid_bitmap[i]); - kfree(nm_i->free_nid_bitmap); + kvfree(nm_i->free_nid_bitmap); } kvfree(nm_i->free_nid_count); - kfree(nm_i->nat_bitmap); - kfree(nm_i->nat_bits); + kvfree(nm_i->nat_bitmap); + kvfree(nm_i->nat_bits); #ifdef CONFIG_F2FS_CHECK_FS - kfree(nm_i->nat_bitmap_mir); + kvfree(nm_i->nat_bitmap_mir); #endif sbi->nm_info = NULL; - kfree(nm_i); + kvfree(nm_i); } int __init f2fs_create_node_manager_caches(void) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h @@ -361,7 +361,7 @@ static inline int set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); - f2fs_wait_on_page_writeback(p, NODE, true); + f2fs_wait_on_page_writeback(p, NODE, true, true); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c @@ -250,7 +250,7 @@ static int recover_inode(struct inode *inode, struct page *page) i_gid_write(inode, le32_to_cpu(raw->i_gid)); if (raw->i_inline & F2FS_EXTRA_ATTR) { - if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize), i_projid)) { projid_t i_projid; @@ -539,7 +539,7 @@ retry_dn: goto out; } - f2fs_wait_on_page_writeback(dn.node_page, NODE, true); + f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); err = f2fs_get_node_info(sbi, dn.nid, &ni); if (err) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c @@ -229,7 +229,7 @@ static int __revoke_inmem_pages(struct inode *inode, lock_page(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); if (recover) { struct dnode_of_data dn; @@ -387,8 +387,9 @@ static int __f2fs_commit_inmem_pages(struct inode *inode) if (page->mapping == inode->i_mapping) { trace_f2fs_commit_inmem_page(page, INMEM); + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); - f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); @@ -620,14 +621,16 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) return 0; if (!test_opt(sbi, FLUSH_MERGE)) { + atomic_inc(&fcc->queued_flush); ret = submit_flush_wait(sbi, ino); + atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; } - if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) { + if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) { ret = submit_flush_wait(sbi, ino); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; @@ -646,14 +649,14 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) if (fcc->f2fs_issue_flush) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); } else { struct llist_node *list; list = llist_del_all(&fcc->issue_list); if (!list) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); } else { struct flush_cmd *tmp, *next; @@ -662,7 +665,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) llist_for_each_entry_safe(tmp, next, list, llnode) { if (tmp == &cmd) { cmd.ret = ret; - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); continue; } tmp->ret = ret; @@ -691,7 +694,7 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) if (!fcc) return -ENOMEM; atomic_set(&fcc->issued_flush, 0); - atomic_set(&fcc->issing_flush, 0); + atomic_set(&fcc->queued_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; @@ -703,7 +706,7 @@ init_thread: "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { err = PTR_ERR(fcc->f2fs_issue_flush); - kfree(fcc); + kvfree(fcc); SM_I(sbi)->fcc_info = NULL; return err; } @@ -722,7 +725,7 @@ void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) kthread_stop(flush_thread); } if (free) { - kfree(fcc); + kvfree(fcc); SM_I(sbi)->fcc_info = NULL; } } @@ -907,7 +910,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, dc->len = len; dc->ref = 0; dc->state = D_PREP; - dc->issuing = 0; + dc->queued = 0; dc->error = 0; init_completion(&dc->wait); list_add_tail(&dc->list, pend_list); @@ -940,7 +943,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc, struct discard_cmd *dc) { if (dc->state == D_DONE) - atomic_sub(dc->issuing, &dcc->issing_discard); + atomic_sub(dc->queued, &dcc->queued_discard); list_del(&dc->list); rb_erase_cached(&dc->rb_node, &dcc->root); @@ -1143,12 +1146,12 @@ submit: dc->bio_ref++; spin_unlock_irqrestore(&dc->lock, flags); - atomic_inc(&dcc->issing_discard); - dc->issuing++; + atomic_inc(&dcc->queued_discard); + dc->queued++; list_move_tail(&dc->list, wait_list); /* sanity check on discard range */ - __check_sit_bitmap(sbi, start, start + len); + __check_sit_bitmap(sbi, lstart, lstart + len); bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; @@ -1649,6 +1652,10 @@ static int issue_discard_thread(void *data) if (dcc->discard_wake) dcc->discard_wake = 0; + /* clean up pending candidates before going to sleep */ + if (atomic_read(&dcc->queued_discard)) + __wait_all_discard_cmd(sbi, NULL); + if (try_to_freeze()) continue; if (f2fs_readonly(sbi->sb)) @@ -1734,7 +1741,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sbi->sb) && + if (f2fs_sb_has_blkzoned(sbi) && bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif @@ -1882,7 +1889,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); - bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1; + bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); mutex_lock(&dirty_i->seglist_lock); @@ -1914,7 +1921,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, (end - 1) <= cpc->trim_end) continue; - if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { + if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); continue; @@ -1946,7 +1953,7 @@ find_next: sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (f2fs_sb_has_blkzoned(sbi->sb) || + if (f2fs_sb_has_blkzoned(sbi) || (force && len < cpc->trim_minlen)) goto skip; @@ -1994,7 +2001,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&dcc->fstrim_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); - atomic_set(&dcc->issing_discard, 0); + atomic_set(&dcc->queued_discard, 0); atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; @@ -2010,7 +2017,7 @@ init_thread: "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(dcc->f2fs_issue_discard)) { err = PTR_ERR(dcc->f2fs_issue_discard); - kfree(dcc); + kvfree(dcc); SM_I(sbi)->dcc_info = NULL; return err; } @@ -2027,7 +2034,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) f2fs_stop_discard_thread(sbi); - kfree(dcc); + kvfree(dcc); SM_I(sbi)->dcc_info = NULL; } @@ -2146,7 +2153,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* update total number of valid blocks to be written in ckpt area */ SIT_I(sbi)->written_valid_blocks += del; - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) get_sec_entry(sbi, segno)->valid_blocks += del; } @@ -2412,7 +2419,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) { /* if segs_per_sec is large than 1, we need to keep original policy. */ - if (sbi->segs_per_sec != 1) + if (__is_large_section(sbi)) return CURSEG_I(sbi, type)->segno; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) @@ -2722,7 +2729,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) struct discard_policy dpolicy; unsigned long long trimmed = 0; int err = 0; - bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1; + bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; @@ -3272,16 +3279,18 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, } void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered) + enum page_type type, bool ordered, bool locked) { if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); - if (ordered) + if (ordered) { wait_on_page_writeback(page); - else + f2fs_bug_on(sbi, locked && PageWriteback(page)); + } else { wait_for_stable_page(page); + } } } @@ -3298,7 +3307,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) cpage = find_lock_page(META_MAPPING(sbi), blkaddr); if (cpage) { - f2fs_wait_on_page_writeback(cpage, DATA, true); + f2fs_wait_on_page_writeback(cpage, DATA, true, true); f2fs_put_page(cpage, 1); } } @@ -3880,7 +3889,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) if (!sit_i->tmp_map) return -ENOMEM; - if (sbi->segs_per_sec > 1) { + if (__is_large_section(sbi)) { sit_i->sec_entries = f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry), MAIN_SECS(sbi)), @@ -4035,7 +4044,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) se->valid_blocks; } - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; } @@ -4079,7 +4088,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) sbi->discard_blks -= se->valid_blocks; } - if (sbi->segs_per_sec > 1) { + if (__is_large_section(sbi)) { get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; get_sec_entry(sbi, start)->valid_blocks -= @@ -4314,7 +4323,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) destroy_victim_secmap(sbi); SM_I(sbi)->dirty_info = NULL; - kfree(dirty_i); + kvfree(dirty_i); } static void destroy_curseg(struct f2fs_sb_info *sbi) @@ -4326,10 +4335,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi) return; SM_I(sbi)->curseg_array = NULL; for (i = 0; i < NR_CURSEG_TYPE; i++) { - kfree(array[i].sum_blk); - kfree(array[i].journal); + kvfree(array[i].sum_blk); + kvfree(array[i].journal); } - kfree(array); + kvfree(array); } static void destroy_free_segmap(struct f2fs_sb_info *sbi) @@ -4340,7 +4349,7 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = NULL; kvfree(free_i->free_segmap); kvfree(free_i->free_secmap); - kfree(free_i); + kvfree(free_i); } static void destroy_sit_info(struct f2fs_sb_info *sbi) @@ -4353,26 +4362,26 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) if (sit_i->sentries) { for (start = 0; start < MAIN_SEGS(sbi); start++) { - kfree(sit_i->sentries[start].cur_valid_map); + kvfree(sit_i->sentries[start].cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - kfree(sit_i->sentries[start].cur_valid_map_mir); + kvfree(sit_i->sentries[start].cur_valid_map_mir); #endif - kfree(sit_i->sentries[start].ckpt_valid_map); - kfree(sit_i->sentries[start].discard_map); + kvfree(sit_i->sentries[start].ckpt_valid_map); + kvfree(sit_i->sentries[start].discard_map); } } - kfree(sit_i->tmp_map); + kvfree(sit_i->tmp_map); kvfree(sit_i->sentries); kvfree(sit_i->sec_entries); kvfree(sit_i->dirty_sentries_bitmap); SM_I(sbi)->sit_info = NULL; - kfree(sit_i->sit_bitmap); + kvfree(sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS - kfree(sit_i->sit_bitmap_mir); + kvfree(sit_i->sit_bitmap_mir); #endif - kfree(sit_i); + kvfree(sit_i); } void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) @@ -4388,7 +4397,7 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) destroy_free_segmap(sbi); destroy_sit_info(sbi); sbi->sm_info = NULL; - kfree(sm_info); + kvfree(sm_info); } int __init f2fs_create_segment_manager_caches(void) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h @@ -333,7 +333,7 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, * In order to get # of valid blocks in a section instantly from many * segments, f2fs manages two counting structures separately. */ - if (use_section && sbi->segs_per_sec > 1) + if (use_section && __is_large_section(sbi)) return get_sec_entry(sbi, segno)->valid_blocks; else return get_seg_entry(sbi, segno)->valid_blocks; diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c @@ -135,6 +135,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi)); spin_lock(&f2fs_list_lock); - list_del(&sbi->s_list); + list_del_init(&sbi->s_list); spin_unlock(&f2fs_list_lock); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c @@ -38,7 +38,7 @@ static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION -char *f2fs_fault_name[FAULT_MAX] = { +const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", [FAULT_KVMALLOC] = "kvmalloc", [FAULT_PAGE_ALLOC] = "page alloc", @@ -259,7 +259,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "quota options when quota turned on"); return -EINVAL; } - if (f2fs_sb_has_quota_ino(sb)) { + if (f2fs_sb_has_quota_ino(sbi)) { f2fs_msg(sb, KERN_INFO, "QUOTA feature is enabled, so ignore qf_name"); return 0; @@ -289,7 +289,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, set_opt(sbi, QUOTA); return 0; errout: - kfree(qname); + kvfree(qname); return ret; } @@ -302,7 +302,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) " when quota turned on"); return -EINVAL; } - kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); + kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]); F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; return 0; } @@ -314,7 +314,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) * 'grpquota' mount options are allowed even without quota feature * to support legacy quotas in quota files. */ - if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) { + if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. " "Cannot enable project quota enforcement."); return -1; @@ -348,7 +348,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) } } - if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) { + if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_INFO, "QUOTA feature is enabled, so ignore jquota_fmt"); F2FS_OPTION(sbi).s_jquota_fmt = 0; @@ -399,10 +399,10 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, BG_GC); set_opt(sbi, FORCE_FG_GC); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_disable_roll_forward: set_opt(sbi, DISABLE_ROLL_FORWARD); @@ -417,7 +417,7 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, DISCARD); break; case Opt_nodiscard: - if (f2fs_sb_has_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_WARNING, "discard is required for zoned block devices"); return -EINVAL; @@ -566,11 +566,11 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { - if (f2fs_sb_has_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_WARNING, "adaptive mode is not allowed with " "zoned block device feature"); - kfree(name); + kvfree(name); return -EINVAL; } set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); @@ -578,10 +578,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "lfs", 3)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_io_size_bits: if (args->from && match_int(args, &arg)) @@ -714,10 +714,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "fs-based", 8)) { F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_alloc: name = match_strdup(&args[0]); @@ -731,10 +731,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "reuse", 5)) { F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_fsync: name = match_strdup(&args[0]); @@ -751,14 +751,14 @@ static int parse_options(struct super_block *sb, char *options) F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_NOBARRIER; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_test_dummy_encryption: #ifdef CONFIG_F2FS_FS_ENCRYPTION - if (!f2fs_sb_has_encrypt(sb)) { + if (!f2fs_sb_has_encrypt(sbi)) { f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); return -EINVAL; } @@ -783,10 +783,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "disable", 7)) { set_opt(sbi, DISABLE_CHECKPOINT); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; default: f2fs_msg(sb, KERN_ERR, @@ -799,13 +799,13 @@ static int parse_options(struct super_block *sb, char *options) if (f2fs_check_quota_options(sbi)) return -EINVAL; #else - if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) { + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { f2fs_msg(sbi->sb, KERN_INFO, "Filesystem with quota feature cannot be mounted RDWR " "without CONFIG_QUOTA"); return -EINVAL; } - if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) { + if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { f2fs_msg(sb, KERN_ERR, "Filesystem with project quota feature cannot be " "mounted RDWR without CONFIG_QUOTA"); @@ -821,8 +821,8 @@ static int parse_options(struct super_block *sb, char *options) } if (test_opt(sbi, INLINE_XATTR_SIZE)) { - if (!f2fs_sb_has_extra_attr(sb) || - !f2fs_sb_has_flexible_inline_xattr(sb)) { + if (!f2fs_sb_has_extra_attr(sbi) || + !f2fs_sb_has_flexible_inline_xattr(sbi)) { f2fs_msg(sb, KERN_ERR, "extra_attr or flexible_inline_xattr " "feature is off"); @@ -1017,10 +1017,10 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->s_ndevs; i++) { blkdev_put(FDEV(i).bdev, FMODE_EXCL); #ifdef CONFIG_BLK_DEV_ZONED - kfree(FDEV(i).blkz_type); + kvfree(FDEV(i).blkz_type); #endif } - kfree(sbi->devs); + kvfree(sbi->devs); } static void f2fs_put_super(struct super_block *sb) @@ -1058,9 +1058,6 @@ static void f2fs_put_super(struct super_block *sb) f2fs_write_checkpoint(sbi, &cpc); } - /* f2fs_write_checkpoint can update stat informaion */ - f2fs_destroy_stats(sbi); - /* * normally superblock is clean, so we need to release this. * In addition, EIO will skip do checkpoint, we need this as well. @@ -1080,29 +1077,35 @@ static void f2fs_put_super(struct super_block *sb) iput(sbi->node_inode); iput(sbi->meta_inode); + /* + * iput() can update stat information, if f2fs_write_checkpoint() + * above failed with error. + */ + f2fs_destroy_stats(sbi); + /* destroy f2fs internal modules */ f2fs_destroy_node_manager(sbi); f2fs_destroy_segment_manager(sbi); - kfree(sbi->ckpt); + kvfree(sbi->ckpt); f2fs_unregister_sysfs(sbi); sb->s_fs_info = NULL; if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kfree(sbi->raw_super); + kvfree(sbi->raw_super); destroy_device_list(sbi); mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) - kfree(sbi->write_io[i]); - kfree(sbi); + kvfree(sbi->write_io[i]); + kvfree(sbi); } int f2fs_sync_fs(struct super_block *sb, int sync) @@ -1431,7 +1434,7 @@ static void default_options(struct f2fs_sb_info *sbi) sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); - if (f2fs_sb_has_blkzoned(sbi->sb)) + if (f2fs_sb_has_blkzoned(sbi)) set_opt_mode(sbi, F2FS_MOUNT_LFS); else set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); @@ -1457,19 +1460,16 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) sbi->sb->s_flags |= SB_ACTIVE; - mutex_lock(&sbi->gc_mutex); f2fs_update_time(sbi, DISABLE_TIME); while (!f2fs_time_over(sbi, DISABLE_TIME)) { + mutex_lock(&sbi->gc_mutex); err = f2fs_gc(sbi, true, false, NULL_SEGNO); if (err == -ENODATA) break; - if (err && err != -EAGAIN) { - mutex_unlock(&sbi->gc_mutex); + if (err && err != -EAGAIN) return err; - } } - mutex_unlock(&sbi->gc_mutex); err = sync_filesystem(sbi->sb); if (err) @@ -1531,7 +1531,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) GFP_KERNEL); if (!org_mount_opt.s_qf_names[i]) { for (j = 0; j < i; j++) - kfree(org_mount_opt.s_qf_names[j]); + kvfree(org_mount_opt.s_qf_names[j]); return -ENOMEM; } } else { @@ -1575,7 +1575,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~SB_RDONLY; if (sb_any_quota_suspended(sb)) { dquot_resume(sb, -1); - } else if (f2fs_sb_has_quota_ino(sb)) { + } else if (f2fs_sb_has_quota_ino(sbi)) { err = f2fs_enable_quotas(sb); if (err) goto restore_opts; @@ -1651,7 +1651,7 @@ skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) - kfree(org_mount_opt.s_qf_names[i]); + kvfree(org_mount_opt.s_qf_names[i]); #endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | @@ -1672,7 +1672,7 @@ restore_opts: #ifdef CONFIG_QUOTA F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i]; } #endif @@ -1817,7 +1817,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) int enabled = 0; int i, err; - if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) { + if (f2fs_sb_has_quota_ino(sbi) && rdonly) { err = f2fs_enable_quotas(sbi->sb); if (err) { f2fs_msg(sbi->sb, KERN_ERR, @@ -1848,7 +1848,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, unsigned long qf_inum; int err; - BUG_ON(!f2fs_sb_has_quota_ino(sb)); + BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb))); qf_inum = f2fs_qf_ino(sb, type); if (!qf_inum) @@ -1993,7 +1993,7 @@ static int f2fs_quota_off(struct super_block *sb, int type) goto out_put; err = dquot_quota_off(sb, type); - if (err || f2fs_sb_has_quota_ino(sb)) + if (err || f2fs_sb_has_quota_ino(F2FS_SB(sb))) goto out_put; inode_lock(inode); @@ -2173,7 +2173,7 @@ static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, * if LOST_FOUND feature is enabled. * */ - if (f2fs_sb_has_lost_found(sbi->sb) && + if (f2fs_sb_has_lost_found(sbi) && inode->i_ino == F2FS_ROOT_INO(sbi)) return -EPERM; @@ -2396,7 +2396,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, __u32 crc = 0; /* Check checksum_offset and crc in superblock */ - if (le32_to_cpu(raw_super->feature) & F2FS_FEATURE_SB_CHKSUM) { + if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) { crc_offset = le32_to_cpu(raw_super->checksum_offset); if (crc_offset != offsetof(struct f2fs_super_block, crc)) { @@ -2496,10 +2496,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } - if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) { + if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) { f2fs_msg(sb, KERN_INFO, - "Wrong segment_count / block_count (%u > %u)", - segment_count, le32_to_cpu(raw_super->block_count)); + "Wrong segment_count / block_count (%u > %llu)", + segment_count, le64_to_cpu(raw_super->block_count)); return 1; } @@ -2674,7 +2674,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) static void init_sb_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = sbi->raw_super; - int i, j; + int i; sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); @@ -2692,7 +2692,10 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); sbi->cur_victim_sec = NULL_SECNO; + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; + sbi->migration_granularity = sbi->segs_per_sec; sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; @@ -2710,9 +2713,6 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - for (i = 0; i < NR_PAGE_TYPE - 1; i++) - for (j = HOT; j < NR_TEMP_TYPE; j++) - mutex_init(&sbi->wio_mutex[i][j]); init_rwsem(&sbi->io_order_lock); spin_lock_init(&sbi->cp_lock); @@ -2749,7 +2749,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) unsigned int n = 0; int err = -EIO; - if (!f2fs_sb_has_blkzoned(sbi->sb)) + if (!f2fs_sb_has_blkzoned(sbi)) return 0; if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != @@ -2800,7 +2800,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) } } - kfree(zones); + kvfree(zones); return err; } @@ -2860,7 +2860,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, /* No valid superblock */ if (!*raw_super) - kfree(super); + kvfree(super); else err = 0; @@ -2880,7 +2880,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) } /* we should update superblock crc here */ - if (!recover && f2fs_sb_has_sb_chksum(sbi->sb)) { + if (!recover && f2fs_sb_has_sb_chksum(sbi)) { crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi), offsetof(struct f2fs_super_block, crc)); F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc); @@ -2968,7 +2968,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && - !f2fs_sb_has_blkzoned(sbi->sb)) { + !f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sbi->sb, KERN_ERR, "Zoned block device feature not enabled\n"); return -EINVAL; @@ -3064,7 +3064,7 @@ try_onemore: sbi->raw_super = raw_super; /* precompute checksum seed for metadata */ - if (f2fs_sb_has_inode_chksum(sb)) + if (f2fs_sb_has_inode_chksum(sbi)) sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, sizeof(raw_super->uuid)); @@ -3074,7 +3074,7 @@ try_onemore: * devices, but mandatory for host-managed zoned block devices. */ #ifndef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_ERR, "Zoned block device support is not enabled\n"); err = -EOPNOTSUPP; @@ -3101,13 +3101,13 @@ try_onemore: #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; - if (f2fs_sb_has_quota_ino(sb)) + if (f2fs_sb_has_quota_ino(sbi)) sb->s_qcop = &dquot_quotactl_sysfile_ops; else sb->s_qcop = &f2fs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; - if (f2fs_sb_has_quota_ino(sbi->sb)) { + if (f2fs_sb_has_quota_ino(sbi)) { for (i = 0; i < MAXQUOTAS; i++) { if (f2fs_qf_ino(sbi->sb, i)) sbi->nquota_files++; @@ -3259,30 +3259,30 @@ try_onemore: f2fs_build_gc_manager(sbi); + err = f2fs_build_stats(sbi); + if (err) + goto free_nm; + /* get an inode for node space */ sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); if (IS_ERR(sbi->node_inode)) { f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); err = PTR_ERR(sbi->node_inode); - goto free_nm; + goto free_stats; } - err = f2fs_build_stats(sbi); - if (err) - goto free_node_inode; - /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); if (IS_ERR(root)) { f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); err = PTR_ERR(root); - goto free_stats; + goto free_node_inode; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size || !root->i_nlink) { iput(root); err = -EINVAL; - goto free_stats; + goto free_node_inode; } sb->s_root = d_make_root(root); /* allocate root dentry */ @@ -3297,7 +3297,7 @@ try_onemore: #ifdef CONFIG_QUOTA /* Enable quota usage during mount */ - if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); if (err) f2fs_msg(sb, KERN_ERR, @@ -3369,7 +3369,7 @@ skip_recovery: if (err) goto free_meta; } - kfree(options); + kvfree(options); /* recover broken superblock */ if (recovery) { @@ -3392,7 +3392,7 @@ skip_recovery: free_meta: #ifdef CONFIG_QUOTA f2fs_truncate_quota_inode_pages(sb); - if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) f2fs_quota_off_umount(sbi->sb); #endif /* @@ -3406,19 +3406,19 @@ free_meta: free_root_inode: dput(sb->s_root); sb->s_root = NULL; -free_stats: - f2fs_destroy_stats(sbi); free_node_inode: f2fs_release_ino_entry(sbi, true); truncate_inode_pages_final(NODE_MAPPING(sbi)); iput(sbi->node_inode); +free_stats: + f2fs_destroy_stats(sbi); free_nm: f2fs_destroy_node_manager(sbi); free_sm: f2fs_destroy_segment_manager(sbi); free_devices: destroy_device_list(sbi); - kfree(sbi->ckpt); + kvfree(sbi->ckpt); free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); @@ -3428,19 +3428,19 @@ free_percpu: destroy_percpu_info(sbi); free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) - kfree(sbi->write_io[i]); + kvfree(sbi->write_io[i]); free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif - kfree(options); + kvfree(options); free_sb_buf: - kfree(raw_super); + kvfree(raw_super); free_sbi: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kfree(sbi); + kvfree(sbi); /* give only one another chance */ if (retry) { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c @@ -90,34 +90,34 @@ static ssize_t features_show(struct f2fs_attr *a, if (!sb->s_bdev->bd_part) return snprintf(buf, PAGE_SIZE, "0\n"); - if (f2fs_sb_has_encrypt(sb)) + if (f2fs_sb_has_encrypt(sbi)) len += snprintf(buf, PAGE_SIZE - len, "%s", "encryption"); - if (f2fs_sb_has_blkzoned(sb)) + if (f2fs_sb_has_blkzoned(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); - if (f2fs_sb_has_extra_attr(sb)) + if (f2fs_sb_has_extra_attr(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "extra_attr"); - if (f2fs_sb_has_project_quota(sb)) + if (f2fs_sb_has_project_quota(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "projquota"); - if (f2fs_sb_has_inode_chksum(sb)) + if (f2fs_sb_has_inode_chksum(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_checksum"); - if (f2fs_sb_has_flexible_inline_xattr(sb)) + if (f2fs_sb_has_flexible_inline_xattr(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); - if (f2fs_sb_has_quota_ino(sb)) + if (f2fs_sb_has_quota_ino(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "quota_ino"); - if (f2fs_sb_has_inode_crtime(sb)) + if (f2fs_sb_has_inode_crtime(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_crtime"); - if (f2fs_sb_has_lost_found(sb)) + if (f2fs_sb_has_lost_found(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); - if (f2fs_sb_has_sb_chksum(sb)) + if (f2fs_sb_has_sb_chksum(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "sb_checksum"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); @@ -246,6 +246,11 @@ out: return count; } + if (!strcmp(a->attr.name, "migration_granularity")) { + if (t == 0 || t > sbi->segs_per_sec) + return -EINVAL; + } + if (!strcmp(a->attr.name, "trim_sections")) return -EINVAL; @@ -406,6 +411,7 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); @@ -460,6 +466,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_hot_blocks), ATTR_LIST(min_ssr_sections), ATTR_LIST(max_victim_search), + ATTR_LIST(migration_granularity), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c @@ -288,7 +288,7 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr) static int lookup_all_xattrs(struct inode *inode, struct page *ipage, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, - void **base_addr) + void **base_addr, int *base_size) { void *cur_addr, *txattr_addr, *last_addr = NULL; nid_t xnid = F2FS_I(inode)->i_xattr_nid; @@ -299,8 +299,8 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, if (!size && !inline_size) return -ENODATA; - txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), - inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS); + *base_size = inline_size + size + XATTR_PADDING_SIZE; + txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); if (!txattr_addr) return -ENOMEM; @@ -312,8 +312,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *xe = __find_inline_xattr(inode, txattr_addr, &last_addr, index, len, name); - if (*xe) + if (*xe) { + *base_size = inline_size; goto check; + } } /* read from xattr node block */ @@ -415,7 +417,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } f2fs_wait_on_page_writeback(ipage ? ipage : in_page, - NODE, true); + NODE, true, true); /* no need to use xattr node block */ if (hsize <= inline_size) { err = f2fs_truncate_xattr_node(inode); @@ -439,7 +441,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, goto in_page_out; } f2fs_bug_on(sbi, new_nid); - f2fs_wait_on_page_writeback(xpage, NODE, true); + f2fs_wait_on_page_writeback(xpage, NODE, true, true); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); @@ -474,6 +476,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, int error = 0; unsigned int size, len; void *base_addr = NULL; + int base_size; if (name == NULL) return -EINVAL; @@ -484,7 +487,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, - &entry, &base_addr); + &entry, &base_addr, &base_size); up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -498,6 +501,11 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (buffer) { char *pval = entry->e_name + entry->e_name_len; + + if (base_size - (pval - (char *)base_addr) < size) { + error = -ERANGE; + goto out; + } memcpy(buffer, pval, size); } error = size;