--- zzzz-none-000/linux-3.10.107/mm/ksm.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/mm/ksm.c 2021-02-04 17:41:59.000000000 +0000 @@ -476,7 +476,8 @@ flush_dcache_page(page); } else { put_page(page); -out: page = NULL; +out: + page = NULL; } up_read(&mm->mmap_sem); return page; @@ -543,7 +544,7 @@ expected_mapping = (void *)stable_node + (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); again: - kpfn = ACCESS_ONCE(stable_node->kpfn); + kpfn = READ_ONCE(stable_node->kpfn); page = pfn_to_page(kpfn); /* @@ -552,7 +553,7 @@ * but on Alpha we need to be more careful. */ smp_read_barrier_depends(); - if (ACCESS_ONCE(page->mapping) != expected_mapping) + if (READ_ONCE(page->mapping) != expected_mapping) goto stale; /* @@ -578,14 +579,14 @@ cpu_relax(); } - if (ACCESS_ONCE(page->mapping) != expected_mapping) { + if (READ_ONCE(page->mapping) != expected_mapping) { put_page(page); goto stale; } if (lock_it) { lock_page(page); - if (ACCESS_ONCE(page->mapping) != expected_mapping) { + if (READ_ONCE(page->mapping) != expected_mapping) { unlock_page(page); put_page(page); goto stale; @@ -601,7 +602,7 @@ * before checking whether node->kpfn has been changed. */ smp_rmb(); - if (ACCESS_ONCE(stable_node->kpfn) != kpfn) + if (READ_ONCE(stable_node->kpfn) != kpfn) goto again; remove_node_from_stable_tree(stable_node); return NULL; @@ -626,7 +627,7 @@ unlock_page(page); put_page(page); - if (stable_node->hlist.first) + if (!hlist_empty(&stable_node->hlist)) ksm_pages_sharing--; else ksm_pages_shared--; @@ -893,7 +894,7 @@ * this assure us that no O_DIRECT can happen after the check * or in the middle of the check. */ - entry = ptep_clear_flush(vma, addr, ptep); + entry = ptep_clear_flush_notify(vma, addr, ptep); /* * Check that no O_DIRECT or similar I/O is in progress on the * page @@ -946,7 +947,6 @@ pmd = mm_find_pmd(mm, addr); if (!pmd) goto out; - BUG_ON(pmd_trans_huge(*pmd)); mmun_start = addr; mmun_end = addr + PAGE_SIZE; @@ -962,7 +962,7 @@ page_add_anon_rmap(kpage, vma, addr); flush_cache_page(vma, addr, pte_pfn(*ptep)); - ptep_clear_flush(vma, addr, ptep); + ptep_clear_flush_notify(vma, addr, ptep); set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); page_remove_rmap(page); @@ -1023,8 +1023,6 @@ if (page == kpage) /* ksm page forked */ return 0; - if (!(vma->vm_flags & VM_MERGEABLE)) - goto out; if (PageTransCompound(page) && page_trans_compound_anon_split(page)) goto out; BUG_ON(PageTransCompound(page)); @@ -1089,10 +1087,8 @@ int err = -EFAULT; down_read(&mm->mmap_sem); - if (ksm_test_exit(mm)) - goto out; - vma = find_vma(mm, rmap_item->address); - if (!vma || vma->vm_start > rmap_item->address) + vma = find_mergeable_vma(mm, rmap_item->address); + if (!vma) goto out; err = try_to_merge_one_page(vma, page, kpage); @@ -1179,8 +1175,18 @@ cond_resched(); stable_node = rb_entry(*new, struct stable_node, node); tree_page = get_ksm_page(stable_node, false); - if (!tree_page) - return NULL; + if (!tree_page) { + /* + * If we walked over a stale stable_node, + * get_ksm_page() will call rb_erase() and it + * may rebalance the tree from under us. So + * restart the search from scratch. Returning + * NULL would be safe too, but we'd generate + * false negative insertions just because some + * stable_node was stale. + */ + goto again; + } ret = memcmp_pages(page, tree_page); put_page(tree_page); @@ -1256,12 +1262,14 @@ unsigned long kpfn; struct rb_root *root; struct rb_node **new; - struct rb_node *parent = NULL; + struct rb_node *parent; struct stable_node *stable_node; kpfn = page_to_pfn(kpage); nid = get_kpfn_nid(kpfn); root = root_stable_tree + nid; +again: + parent = NULL; new = &root->rb_node; while (*new) { @@ -1271,8 +1279,18 @@ cond_resched(); stable_node = rb_entry(*new, struct stable_node, node); tree_page = get_ksm_page(stable_node, false); - if (!tree_page) - return NULL; + if (!tree_page) { + /* + * If we walked over a stale stable_node, + * get_ksm_page() will call rb_erase() and it + * may rebalance the tree from under us. So + * restart the search from scratch. Returning + * NULL would be safe too, but we'd generate + * false negative insertions just because some + * stable_node was stale. + */ + goto again; + } ret = memcmp_pages(kpage, tree_page); put_page(tree_page); @@ -1342,7 +1360,7 @@ cond_resched(); tree_rmap_item = rb_entry(*new, struct rmap_item, node); tree_page = get_mergeable_page(tree_rmap_item); - if (IS_ERR_OR_NULL(tree_page)) + if (!tree_page) return NULL; /* @@ -1750,7 +1768,7 @@ */ if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP)) + VM_HUGETLB | VM_MIXEDMAP)) return 0; /* just ignore the advice */ #ifdef VM_SAO @@ -1892,30 +1910,35 @@ return new_page; } -int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg, - unsigned long *vm_flags) +int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) { struct stable_node *stable_node; struct rmap_item *rmap_item; - unsigned int mapcount = page_mapcount(page); - int referenced = 0; + int ret = SWAP_AGAIN; int search_new_forks = 0; - VM_BUG_ON(!PageKsm(page)); - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageKsm(page), page); + + /* + * Rely on the page lock to protect against concurrent modifications + * to that page's node of the stable tree. + */ + VM_BUG_ON_PAGE(!PageLocked(page), page); stable_node = page_stable_node(page); if (!stable_node) - return 0; + return ret; again: hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; struct anon_vma_chain *vmac; struct vm_area_struct *vma; + cond_resched(); anon_vma_lock_read(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { + cond_resched(); vma = vmac->vma; if (rmap_item->address < vma->vm_start || rmap_item->address >= vma->vm_end) @@ -1929,113 +1952,16 @@ if ((rmap_item->mm == vma->vm_mm) == search_new_forks) continue; - if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) - continue; - - referenced += page_referenced_one(page, vma, - rmap_item->address, &mapcount, vm_flags); - if (!search_new_forks || !mapcount) - break; - } - anon_vma_unlock_read(anon_vma); - if (!mapcount) - goto out; - } - if (!search_new_forks++) - goto again; -out: - return referenced; -} - -int try_to_unmap_ksm(struct page *page, enum ttu_flags flags) -{ - struct stable_node *stable_node; - struct rmap_item *rmap_item; - int ret = SWAP_AGAIN; - int search_new_forks = 0; - - VM_BUG_ON(!PageKsm(page)); - VM_BUG_ON(!PageLocked(page)); - - stable_node = page_stable_node(page); - if (!stable_node) - return SWAP_FAIL; -again: - hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { - struct anon_vma *anon_vma = rmap_item->anon_vma; - struct anon_vma_chain *vmac; - struct vm_area_struct *vma; - - anon_vma_lock_read(anon_vma); - anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, - 0, ULONG_MAX) { - vma = vmac->vma; - if (rmap_item->address < vma->vm_start || - rmap_item->address >= vma->vm_end) - continue; - /* - * Initially we examine only the vma which covers this - * rmap_item; but later, if there is still work to do, - * we examine covering vmas in other mms: in case they - * were forked from the original since ksmd passed. - */ - if ((rmap_item->mm == vma->vm_mm) == search_new_forks) + if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) continue; - ret = try_to_unmap_one(page, vma, - rmap_item->address, flags); - if (ret != SWAP_AGAIN || !page_mapped(page)) { + ret = rwc->rmap_one(page, vma, + rmap_item->address, rwc->arg); + if (ret != SWAP_AGAIN) { anon_vma_unlock_read(anon_vma); goto out; } - } - anon_vma_unlock_read(anon_vma); - } - if (!search_new_forks++) - goto again; -out: - return ret; -} - -#ifdef CONFIG_MIGRATION -int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *, - struct vm_area_struct *, unsigned long, void *), void *arg) -{ - struct stable_node *stable_node; - struct rmap_item *rmap_item; - int ret = SWAP_AGAIN; - int search_new_forks = 0; - - VM_BUG_ON(!PageKsm(page)); - VM_BUG_ON(!PageLocked(page)); - - stable_node = page_stable_node(page); - if (!stable_node) - return ret; -again: - hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { - struct anon_vma *anon_vma = rmap_item->anon_vma; - struct anon_vma_chain *vmac; - struct vm_area_struct *vma; - - anon_vma_lock_read(anon_vma); - anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, - 0, ULONG_MAX) { - vma = vmac->vma; - if (rmap_item->address < vma->vm_start || - rmap_item->address >= vma->vm_end) - continue; - /* - * Initially we examine only the vma which covers this - * rmap_item; but later, if there is still work to do, - * we examine covering vmas in other mms: in case they - * were forked from the original since ksmd passed. - */ - if ((rmap_item->mm == vma->vm_mm) == search_new_forks) - continue; - - ret = rmap_one(page, vma, rmap_item->address, arg); - if (ret != SWAP_AGAIN) { + if (rwc->done && rwc->done(page)) { anon_vma_unlock_read(anon_vma); goto out; } @@ -2048,17 +1974,18 @@ return ret; } +#ifdef CONFIG_MIGRATION void ksm_migrate_page(struct page *newpage, struct page *oldpage) { struct stable_node *stable_node; - VM_BUG_ON(!PageLocked(oldpage)); - VM_BUG_ON(!PageLocked(newpage)); - VM_BUG_ON(newpage->mapping != oldpage->mapping); + VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); + VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); + VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage); stable_node = page_stable_node(newpage); if (stable_node) { - VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage)); + VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage); stable_node->kpfn = page_to_pfn(newpage); /* * newpage->mapping was set in advance; now we need smp_wmb() @@ -2073,18 +2000,12 @@ #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_MEMORY_HOTREMOVE -static int just_wait(void *word) -{ - schedule(); - return 0; -} - static void wait_while_offlining(void) { while (ksm_run & KSM_RUN_OFFLINE) { mutex_unlock(&ksm_thread_mutex); wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE), - just_wait, TASK_UNINTERRUPTIBLE); + TASK_UNINTERRUPTIBLE); mutex_lock(&ksm_thread_mutex); } } @@ -2195,7 +2116,7 @@ unsigned long msecs; int err; - err = strict_strtoul(buf, 10, &msecs); + err = kstrtoul(buf, 10, &msecs); if (err || msecs > UINT_MAX) return -EINVAL; @@ -2218,7 +2139,7 @@ int err; unsigned long nr_pages; - err = strict_strtoul(buf, 10, &nr_pages); + err = kstrtoul(buf, 10, &nr_pages); if (err || nr_pages > UINT_MAX) return -EINVAL; @@ -2240,7 +2161,7 @@ int err; unsigned long flags; - err = strict_strtoul(buf, 10, &flags); + err = kstrtoul(buf, 10, &flags); if (err || flags > UINT_MAX) return -EINVAL; if (flags > KSM_RUN_UNMERGE) @@ -2310,8 +2231,8 @@ * Allocate stable and unstable together: * MAXSMP NODES_SHIFT 10 will use 16kB. */ - buf = kcalloc(nr_node_ids + nr_node_ids, - sizeof(*buf), GFP_KERNEL | __GFP_ZERO); + buf = kcalloc(nr_node_ids + nr_node_ids, sizeof(*buf), + GFP_KERNEL); /* Let us assume that RB_ROOT is NULL is zero */ if (!buf) err = -ENOMEM; @@ -2411,7 +2332,7 @@ ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd"); if (IS_ERR(ksm_thread)) { - printk(KERN_ERR "ksm: creating kthread failed\n"); + pr_err("ksm: creating kthread failed\n"); err = PTR_ERR(ksm_thread); goto out_free; } @@ -2419,7 +2340,7 @@ #ifdef CONFIG_SYSFS err = sysfs_create_group(mm_kobj, &ksm_attr_group); if (err) { - printk(KERN_ERR "ksm: register sysfs failed\n"); + pr_err("ksm: register sysfs failed\n"); kthread_stop(ksm_thread); goto out_free; } @@ -2439,4 +2360,4 @@ out: return err; } -module_init(ksm_init) +subsys_initcall(ksm_init);