whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 922c010cf236357dea020f483c18373d6a494ffb
parent f9007cc601e7b7ed2e0f0c1664d8252fdce964d3
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Fri, 29 Mar 2019 16:02:28 -0700

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "22 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (22 commits)
  fs/proc/proc_sysctl.c: fix NULL pointer dereference in put_links
  fs: fs_parser: fix printk format warning
  checkpatch: add %pt as a valid vsprintf extension
  mm/migrate.c: add missing flush_dcache_page for non-mapped page migrate
  drivers/block/zram/zram_drv.c: fix idle/writeback string compare
  mm/page_isolation.c: fix a wrong flag in set_migratetype_isolate()
  mm/memory_hotplug.c: fix notification in offline error path
  ptrace: take into account saved_sigmask in PTRACE{GET,SET}SIGMASK
  fs/proc/kcore.c: make kcore_modules static
  include/linux/list.h: fix list_is_first() kernel-doc
  mm/debug.c: fix __dump_page when mapping->host is not set
  mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified
  include/linux/hugetlb.h: convert to use vm_fault_t
  iommu/io-pgtable-arm-v7s: request DMA32 memory, and improve debugging
  mm: add support for kmem caches in DMA32 zone
  ocfs2: fix inode bh swapping mixup in ocfs2_reflink_inodes_lock
  mm/hotplug: fix offline undo_isolate_page_range()
  fs/open.c: allow opening only regular files during execve()
  mailmap: add Changbin Du
  mm/debug.c: add a cast to u64 for atomic64_read()
  ...

Diffstat:
M.mailmap | 2++
Mdrivers/block/zram/zram_drv.c | 32++++++--------------------------
Mdrivers/iommu/io-pgtable-arm-v7s.c | 19+++++++++++++++----
Mfs/fs_parser.c | 2+-
Mfs/ocfs2/refcounttree.c | 42++++++++++++++++++++++++------------------
Mfs/open.c | 6++++++
Mfs/proc/kcore.c | 2+-
Mfs/proc/proc_sysctl.c | 3++-
Minclude/linux/hugetlb.h | 8+++++++-
Minclude/linux/list.h | 2+-
Minclude/linux/page-isolation.h | 10----------
Minclude/linux/sched/signal.h | 18++++++++++++++++++
Minclude/linux/slab.h | 2++
Mkernel/ptrace.c | 15+++++++++++++--
Mmm/debug.c | 4++--
Mmm/kasan/kasan.h | 5++++-
Mmm/memory.c | 11++++++-----
Mmm/memory_hotplug.c | 19++++++++++++++-----
Mmm/mempolicy.c | 40+++++++++++++++++++++++++++++++++-------
Mmm/migrate.c | 11++++++++---
Mmm/page_alloc.c | 2+-
Mmm/page_isolation.c | 51++++++++++++++++++++++++++++++++-------------------
Mmm/slab.c | 2++
Mmm/slab.h | 3++-
Mmm/slab_common.c | 2+-
Mmm/slub.c | 5+++++
Mmm/sparse.c | 2+-
Mscripts/checkpatch.pl | 2+-
28 files changed, 210 insertions(+), 112 deletions(-)

diff --git a/.mailmap b/.mailmap @@ -224,3 +224,5 @@ Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com> Yusuke Goda <goda.yusuke@renesas.com> Gustavo Padovan <gustavo@las.ic.unicamp.br> Gustavo Padovan <padovan@profusion.mobi> +Changbin Du <changbin.du@intel.com> <changbin.du@intel.com> +Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c @@ -290,18 +290,8 @@ static ssize_t idle_store(struct device *dev, struct zram *zram = dev_to_zram(dev); unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; int index; - char mode_buf[8]; - ssize_t sz; - sz = strscpy(mode_buf, buf, sizeof(mode_buf)); - if (sz <= 0) - return -EINVAL; - - /* ignore trailing new line */ - if (mode_buf[sz - 1] == '\n') - mode_buf[sz - 1] = 0x00; - - if (strcmp(mode_buf, "all")) + if (!sysfs_streq(buf, "all")) return -EINVAL; down_read(&zram->init_lock); @@ -635,25 +625,15 @@ static ssize_t writeback_store(struct device *dev, struct bio bio; struct bio_vec bio_vec; struct page *page; - ssize_t ret, sz; - char mode_buf[8]; - int mode = -1; + ssize_t ret; + int mode; unsigned long blk_idx = 0; - sz = strscpy(mode_buf, buf, sizeof(mode_buf)); - if (sz <= 0) - return -EINVAL; - - /* ignore trailing newline */ - if (mode_buf[sz - 1] == '\n') - mode_buf[sz - 1] = 0x00; - - if (!strcmp(mode_buf, "idle")) + if (sysfs_streq(buf, "idle")) mode = IDLE_WRITEBACK; - else if (!strcmp(mode_buf, "huge")) + else if (sysfs_streq(buf, "huge")) mode = HUGE_WRITEBACK; - - if (mode == -1) + else return -EINVAL; down_read(&zram->init_lock); diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c @@ -160,6 +160,14 @@ #define ARM_V7S_TCR_PD1 BIT(5) +#ifdef CONFIG_ZONE_DMA32 +#define ARM_V7S_TABLE_GFP_DMA GFP_DMA32 +#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32 +#else +#define ARM_V7S_TABLE_GFP_DMA GFP_DMA +#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA +#endif + typedef u32 arm_v7s_iopte; static bool selftest_running; @@ -197,13 +205,16 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, void *table = NULL; if (lvl == 1) - table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size)); + table = (void *)__get_free_pages( + __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size)); else if (lvl == 2) - table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA); + table = kmem_cache_zalloc(data->l2_tables, gfp); phys = virt_to_phys(table); - if (phys != (arm_v7s_iopte)phys) + if (phys != (arm_v7s_iopte)phys) { /* Doesn't fit in PTE */ + dev_err(dev, "Page table does not fit in PTE: %pa", &phys); goto out_free; + } if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) @@ -733,7 +744,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", ARM_V7S_TABLE_SIZE(2), ARM_V7S_TABLE_SIZE(2), - SLAB_CACHE_DMA, NULL); + ARM_V7S_TABLE_SLAB_FLAGS, NULL); if (!data->l2_tables) goto out_free_data; diff --git a/fs/fs_parser.c b/fs/fs_parser.c @@ -410,7 +410,7 @@ bool fs_validate_description(const struct fs_parameter_description *desc) for (param = desc->specs; param->name; param++) { if (param->opt == e->opt && param->type != fs_param_is_enum) { - pr_err("VALIDATE %s: e[%lu] enum val for %s\n", + pr_err("VALIDATE %s: e[%tu] enum val for %s\n", name, e - desc->enums, param->name); good = false; } diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c @@ -4719,22 +4719,23 @@ out: /* Lock an inode and grab a bh pointing to the inode. */ int ocfs2_reflink_inodes_lock(struct inode *s_inode, - struct buffer_head **bh1, + struct buffer_head **bh_s, struct inode *t_inode, - struct buffer_head **bh2) + struct buffer_head **bh_t) { - struct inode *inode1; - struct inode *inode2; + struct inode *inode1 = s_inode; + struct inode *inode2 = t_inode; struct ocfs2_inode_info *oi1; struct ocfs2_inode_info *oi2; + struct buffer_head *bh1 = NULL; + struct buffer_head *bh2 = NULL; bool same_inode = (s_inode == t_inode); + bool need_swap = (inode1->i_ino > inode2->i_ino); int status; /* First grab the VFS and rw locks. */ lock_two_nondirectories(s_inode, t_inode); - inode1 = s_inode; - inode2 = t_inode; - if (inode1->i_ino > inode2->i_ino) + if (need_swap) swap(inode1, inode2); status = ocfs2_rw_lock(inode1, 1); @@ -4757,17 +4758,13 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode, trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno, (unsigned long long)oi2->ip_blkno); - if (*bh1) - *bh1 = NULL; - if (*bh2) - *bh2 = NULL; - /* We always want to lock the one with the lower lockid first. */ if (oi1->ip_blkno > oi2->ip_blkno) mlog_errno(-ENOLCK); /* lock id1 */ - status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET); + status = ocfs2_inode_lock_nested(inode1, &bh1, 1, + OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -4776,15 +4773,25 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode, /* lock id2 */ if (!same_inode) { - status = ocfs2_inode_lock_nested(inode2, bh2, 1, + status = ocfs2_inode_lock_nested(inode2, &bh2, 1, OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); goto out_cl1; } - } else - *bh2 = *bh1; + } else { + bh2 = bh1; + } + + /* + * If we swapped inode order above, we have to swap the buffer heads + * before passing them back to the caller. + */ + if (need_swap) + swap(bh1, bh2); + *bh_s = bh1; + *bh_t = bh2; trace_ocfs2_double_lock_end( (unsigned long long)oi1->ip_blkno, @@ -4794,8 +4801,7 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode, out_cl1: ocfs2_inode_unlock(inode1, 1); - brelse(*bh1); - *bh1 = NULL; + brelse(bh1); out_rw2: ocfs2_rw_unlock(inode2, 1); out_i2: diff --git a/fs/open.c b/fs/open.c @@ -733,6 +733,12 @@ static int do_dentry_open(struct file *f, return 0; } + /* Any file opened for execve()/uselib() has to be a regular file. */ + if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) { + error = -EACCES; + goto cleanup_file; + } + if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = get_write_access(inode); if (unlikely(error)) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c @@ -615,7 +615,7 @@ static void __init proc_kcore_text_init(void) /* * MODULES_VADDR has no intersection with VMALLOC_ADDR. */ -struct kcore_list kcore_modules; +static struct kcore_list kcore_modules; static void __init add_modules_range(void) { if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c @@ -1626,7 +1626,8 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; - put_links(header); + if (parent) + put_links(header); start_unregistering(header); if (!--header->count) kfree_rcu(header, rcu); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h @@ -203,7 +203,6 @@ static inline void hugetlb_show_meminfo(void) #define pud_huge(x) 0 #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) -#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ src_addr, pagep) ({ BUG(); 0; }) #define huge_pte_offset(mm, address, sz) 0 @@ -234,6 +233,13 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, { BUG(); } +static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + unsigned int flags) +{ + BUG(); + return 0; +} #endif /* !CONFIG_HUGETLB_PAGE */ /* diff --git a/include/linux/list.h b/include/linux/list.h @@ -207,7 +207,7 @@ static inline void list_bulk_move_tail(struct list_head *head, } /** - * list_is_first -- tests whether @ list is the first entry in list @head + * list_is_first -- tests whether @list is the first entry in list @head * @list: the entry to test * @head: the head of the list */ diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h @@ -41,16 +41,6 @@ int move_freepages_block(struct zone *zone, struct page *page, /* * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. - * If specified range includes migrate types other than MOVABLE or CMA, - * this will fail with -EBUSY. - * - * For isolating all pages in the range finally, the caller have to - * free all pages in the range. test_page_isolated() can be used for - * test it. - * - * The following flags are allowed (they can be combined in a bit mask) - * SKIP_HWPOISON - ignore hwpoison pages - * REPORT_FAILURE - report details about the failure to isolate the range */ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h @@ -418,10 +418,20 @@ static inline void set_restore_sigmask(void) set_thread_flag(TIF_RESTORE_SIGMASK); WARN_ON(!test_thread_flag(TIF_SIGPENDING)); } + +static inline void clear_tsk_restore_sigmask(struct task_struct *tsk) +{ + clear_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK); +} + static inline void clear_restore_sigmask(void) { clear_thread_flag(TIF_RESTORE_SIGMASK); } +static inline bool test_tsk_restore_sigmask(struct task_struct *tsk) +{ + return test_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK); +} static inline bool test_restore_sigmask(void) { return test_thread_flag(TIF_RESTORE_SIGMASK); @@ -439,6 +449,10 @@ static inline void set_restore_sigmask(void) current->restore_sigmask = true; WARN_ON(!test_thread_flag(TIF_SIGPENDING)); } +static inline void clear_tsk_restore_sigmask(struct task_struct *tsk) +{ + tsk->restore_sigmask = false; +} static inline void clear_restore_sigmask(void) { current->restore_sigmask = false; @@ -447,6 +461,10 @@ static inline bool test_restore_sigmask(void) { return current->restore_sigmask; } +static inline bool test_tsk_restore_sigmask(struct task_struct *tsk) +{ + return tsk->restore_sigmask; +} static inline bool test_and_clear_restore_sigmask(void) { if (!current->restore_sigmask) diff --git a/include/linux/slab.h b/include/linux/slab.h @@ -32,6 +32,8 @@ #define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) /* Use GFP_DMA memory */ #define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U) +/* Use GFP_DMA32 memory */ +#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U) /* DEBUG: Store the last owner for bug hunting */ #define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U) /* Panic if kmem_cache_create() fails */ diff --git a/kernel/ptrace.c b/kernel/ptrace.c @@ -29,6 +29,7 @@ #include <linux/hw_breakpoint.h> #include <linux/cn_proc.h> #include <linux/compat.h> +#include <linux/sched/signal.h> /* * Access another process' address space via ptrace. @@ -924,18 +925,26 @@ int ptrace_request(struct task_struct *child, long request, ret = ptrace_setsiginfo(child, &siginfo); break; - case PTRACE_GETSIGMASK: + case PTRACE_GETSIGMASK: { + sigset_t *mask; + if (addr != sizeof(sigset_t)) { ret = -EINVAL; break; } - if (copy_to_user(datavp, &child->blocked, sizeof(sigset_t))) + if (test_tsk_restore_sigmask(child)) + mask = &child->saved_sigmask; + else + mask = &child->blocked; + + if (copy_to_user(datavp, mask, sizeof(sigset_t))) ret = -EFAULT; else ret = 0; break; + } case PTRACE_SETSIGMASK: { sigset_t new_set; @@ -961,6 +970,8 @@ int ptrace_request(struct task_struct *child, long request, child->blocked = new_set; spin_unlock_irq(&child->sighand->siglock); + clear_tsk_restore_sigmask(child); + ret = 0; break; } diff --git a/mm/debug.c b/mm/debug.c @@ -79,7 +79,7 @@ void __dump_page(struct page *page, const char *reason) pr_warn("ksm "); else if (mapping) { pr_warn("%ps ", mapping->a_ops); - if (mapping->host->i_dentry.first) { + if (mapping->host && mapping->host->i_dentry.first) { struct dentry *dentry; dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias); pr_warn("name:\"%pd\" ", dentry); @@ -168,7 +168,7 @@ void dump_mm(const struct mm_struct *mm) mm_pgtables_bytes(mm), mm->map_count, mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm, - atomic64_read(&mm->pinned_vm), + (u64)atomic64_read(&mm->pinned_vm), mm->data_vm, mm->exec_vm, mm->stack_vm, mm->start_code, mm->end_code, mm->start_data, mm->end_data, mm->start_brk, mm->brk, mm->start_stack, diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h @@ -163,7 +163,10 @@ static inline u8 random_tag(void) #endif #ifndef arch_kasan_set_tag -#define arch_kasan_set_tag(addr, tag) ((void *)(addr)) +static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) +{ + return addr; +} #endif #ifndef arch_kasan_reset_tag #define arch_kasan_reset_tag(addr) ((void *)(addr)) diff --git a/mm/memory.c b/mm/memory.c @@ -1549,10 +1549,12 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte))); goto out_unlock; } - entry = *pte; - goto out_mkwrite; - } else - goto out_unlock; + entry = pte_mkyoung(*pte); + entry = maybe_mkwrite(pte_mkdirty(entry), vma); + if (ptep_set_access_flags(vma, addr, pte, entry, 1)) + update_mmu_cache(vma, addr, pte); + } + goto out_unlock; } /* Ok, finally just insert the thing.. */ @@ -1561,7 +1563,6 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, else entry = pte_mkspecial(pfn_t_pte(pfn, prot)); -out_mkwrite: if (mkwrite) { entry = pte_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c @@ -1576,7 +1576,7 @@ static int __ref __offline_pages(unsigned long start_pfn, { unsigned long pfn, nr_pages; long offlined_pages; - int ret, node; + int ret, node, nr_isolate_pageblock; unsigned long flags; unsigned long valid_start, valid_end; struct zone *zone; @@ -1602,10 +1602,11 @@ static int __ref __offline_pages(unsigned long start_pfn, ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE, SKIP_HWPOISON | REPORT_FAILURE); - if (ret) { + if (ret < 0) { reason = "failure to isolate range"; goto failed_removal; } + nr_isolate_pageblock = ret; arg.start_pfn = start_pfn; arg.nr_pages = nr_pages; @@ -1657,8 +1658,16 @@ static int __ref __offline_pages(unsigned long start_pfn, /* Ok, all of our target is isolated. We cannot do rollback at this point. */ offline_isolated_pages(start_pfn, end_pfn); - /* reset pagetype flags and makes migrate type to be MOVABLE */ - undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); + + /* + * Onlining will reset pagetype flags and makes migrate type + * MOVABLE, so just need to decrease the number of isolated + * pageblocks zone counter here. + */ + spin_lock_irqsave(&zone->lock, flags); + zone->nr_isolate_pageblock -= nr_isolate_pageblock; + spin_unlock_irqrestore(&zone->lock, flags); + /* removal success */ adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages); zone->present_pages -= offlined_pages; @@ -1690,12 +1699,12 @@ static int __ref __offline_pages(unsigned long start_pfn, failed_removal_isolated: undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); + memory_notify(MEM_CANCEL_OFFLINE, &arg); failed_removal: pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n", (unsigned long long) start_pfn << PAGE_SHIFT, ((unsigned long long) end_pfn << PAGE_SHIFT) - 1, reason); - memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ mem_hotplug_done(); return ret; diff --git a/mm/mempolicy.c b/mm/mempolicy.c @@ -428,6 +428,13 @@ static inline bool queue_pages_required(struct page *page, return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); } +/* + * queue_pages_pmd() has three possible return values: + * 1 - pages are placed on the right node or queued successfully. + * 0 - THP was split. + * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing + * page was already on a node that does not follow the policy. + */ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -437,7 +444,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long flags; if (unlikely(is_pmd_migration_entry(*pmd))) { - ret = 1; + ret = -EIO; goto unlock; } page = pmd_page(*pmd); @@ -454,8 +461,15 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, ret = 1; flags = qp->flags; /* go to thp migration */ - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(walk->vma)) { + ret = -EIO; + goto unlock; + } + migrate_page_add(page, qp->pagelist, flags); + } else + ret = -EIO; unlock: spin_unlock(ptl); out: @@ -480,8 +494,10 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { ret = queue_pages_pmd(pmd, ptl, addr, end, walk); - if (ret) + if (ret > 0) return 0; + else if (ret < 0) + return ret; } if (pmd_trans_unstable(pmd)) @@ -502,11 +518,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, continue; if (!queue_pages_required(page, qp)) continue; - migrate_page_add(page, qp->pagelist, flags); + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(vma)) + break; + migrate_page_add(page, qp->pagelist, flags); + } else + break; } pte_unmap_unlock(pte - 1, ptl); cond_resched(); - return 0; + return addr != end ? -EIO : 0; } static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, @@ -576,7 +597,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, unsigned long endvma = vma->vm_end; unsigned long flags = qp->flags; - if (!vma_migratable(vma)) + /* + * Need check MPOL_MF_STRICT to return -EIO if possible + * regardless of vma_migratable + */ + if (!vma_migratable(vma) && + !(flags & MPOL_MF_STRICT)) return 1; if (endvma > end) @@ -603,7 +629,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, } /* queue pages from current vma */ - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & MPOL_MF_VALID) return 0; return 1; } diff --git a/mm/migrate.c b/mm/migrate.c @@ -248,10 +248,8 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, pte = swp_entry_to_pte(entry); } else if (is_device_public_page(new)) { pte = pte_mkdevmap(pte); - flush_dcache_page(new); } - } else - flush_dcache_page(new); + } #ifdef CONFIG_HUGETLB_PAGE if (PageHuge(new)) { @@ -995,6 +993,13 @@ static int move_to_new_page(struct page *newpage, struct page *page, */ if (!PageMappingFlags(page)) page->mapping = NULL; + + if (unlikely(is_zone_device_page(newpage))) { + if (is_device_public_page(newpage)) + flush_dcache_page(newpage); + } else + flush_dcache_page(newpage); + } out: return rc; diff --git a/mm/page_alloc.c b/mm/page_alloc.c @@ -8233,7 +8233,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, ret = start_isolate_page_range(pfn_max_align_down(start), pfn_max_align_up(end), migratetype, 0); - if (ret) + if (ret < 0) return ret; /* diff --git a/mm/page_isolation.c b/mm/page_isolation.c @@ -59,7 +59,8 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. * We just check MOVABLE pages. */ - if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, flags)) + if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, + isol_flags)) ret = 0; /* @@ -160,27 +161,36 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) return NULL; } -/* - * start_isolate_page_range() -- make page-allocation-type of range of pages - * to be MIGRATE_ISOLATE. - * @start_pfn: The lower PFN of the range to be isolated. - * @end_pfn: The upper PFN of the range to be isolated. - * @migratetype: migrate type to set in error recovery. +/** + * start_isolate_page_range() - make page-allocation-type of range of pages to + * be MIGRATE_ISOLATE. + * @start_pfn: The lower PFN of the range to be isolated. + * @end_pfn: The upper PFN of the range to be isolated. + * start_pfn/end_pfn must be aligned to pageblock_order. + * @migratetype: Migrate type to set in error recovery. + * @flags: The following flags are allowed (they can be combined in + * a bit mask) + * SKIP_HWPOISON - ignore hwpoison pages + * REPORT_FAILURE - report details about the failure to + * isolate the range * * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in * the range will never be allocated. Any free pages and pages freed in the - * future will not be allocated again. - * - * start_pfn/end_pfn must be aligned to pageblock_order. - * Return 0 on success and -EBUSY if any part of range cannot be isolated. + * future will not be allocated again. If specified range includes migrate types + * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all + * pages in the range finally, the caller have to free all pages in the range. + * test_page_isolated() can be used for test it. * * There is no high level synchronization mechanism that prevents two threads - * from trying to isolate overlapping ranges. If this happens, one thread + * from trying to isolate overlapping ranges. If this happens, one thread * will notice pageblocks in the overlapping range already set to isolate. * This happens in set_migratetype_isolate, and set_migratetype_isolate - * returns an error. We then clean up by restoring the migration type on - * pageblocks we may have modified and return -EBUSY to caller. This + * returns an error. We then clean up by restoring the migration type on + * pageblocks we may have modified and return -EBUSY to caller. This * prevents two threads from simultaneously working on overlapping ranges. + * + * Return: the number of isolated pageblocks on success and -EBUSY if any part + * of range cannot be isolated. */ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned migratetype, int flags) @@ -188,6 +198,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned long pfn; unsigned long undo_pfn; struct page *page; + int nr_isolate_pageblock = 0; BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); @@ -196,13 +207,15 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (page && - set_migratetype_isolate(page, migratetype, flags)) { - undo_pfn = pfn; - goto undo; + if (page) { + if (set_migratetype_isolate(page, migratetype, flags)) { + undo_pfn = pfn; + goto undo; + } + nr_isolate_pageblock++; } } - return 0; + return nr_isolate_pageblock; undo: for (pfn = start_pfn; pfn < undo_pfn; diff --git a/mm/slab.c b/mm/slab.c @@ -2115,6 +2115,8 @@ done: cachep->allocflags = __GFP_COMP; if (flags & SLAB_CACHE_DMA) cachep->allocflags |= GFP_DMA; + if (flags & SLAB_CACHE_DMA32) + cachep->allocflags |= GFP_DMA32; if (flags & SLAB_RECLAIM_ACCOUNT) cachep->allocflags |= __GFP_RECLAIMABLE; cachep->size = size; diff --git a/mm/slab.h b/mm/slab.h @@ -127,7 +127,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, /* Legal flag mask for kmem_cache_create(), for various configurations */ -#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \ +#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ + SLAB_CACHE_DMA32 | SLAB_PANIC | \ SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS ) #if defined(CONFIG_DEBUG_SLAB) diff --git a/mm/slab_common.c b/mm/slab_common.c @@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, SLAB_FAILSLAB | SLAB_KASAN) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ - SLAB_ACCOUNT) + SLAB_CACHE_DMA32 | SLAB_ACCOUNT) /* * Merge control. If this is set then no merging of slab caches will occur. diff --git a/mm/slub.c b/mm/slub.c @@ -3589,6 +3589,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) if (s->flags & SLAB_CACHE_DMA) s->allocflags |= GFP_DMA; + if (s->flags & SLAB_CACHE_DMA32) + s->allocflags |= GFP_DMA32; + if (s->flags & SLAB_RECLAIM_ACCOUNT) s->allocflags |= __GFP_RECLAIMABLE; @@ -5679,6 +5682,8 @@ static char *create_unique_id(struct kmem_cache *s) */ if (s->flags & SLAB_CACHE_DMA) *p++ = 'd'; + if (s->flags & SLAB_CACHE_DMA32) + *p++ = 'D'; if (s->flags & SLAB_RECLAIM_ACCOUNT) *p++ = 'a'; if (s->flags & SLAB_CONSISTENCY_CHECKS) diff --git a/mm/sparse.c b/mm/sparse.c @@ -567,7 +567,7 @@ void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn) } #ifdef CONFIG_MEMORY_HOTREMOVE -/* Mark all memory sections within the pfn range as online */ +/* Mark all memory sections within the pfn range as offline */ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl @@ -5977,7 +5977,7 @@ sub process { while ($fmt =~ /(\%[\*\d\.]*p(\w))/g) { $specifier = $1; $extension = $2; - if ($extension !~ /[SsBKRraEhMmIiUDdgVCbGNOx]/) { + if ($extension !~ /[SsBKRraEhMmIiUDdgVCbGNOxt]/) { $bad_specifier = $specifier; last; }