--- zzzz-none-000/linux-3.10.107/mm/mprotect.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/mm/mprotect.c 2021-02-04 17:41:59.000000000 +0000 @@ -23,153 +23,170 @@ #include #include #include +#include #include #include #include #include -#ifndef pgprot_modify -static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) +#include "internal.h" + +/* + * For a prot_numa update we only hold mmap_sem for read so there is a + * potential race with faulting where a pmd was temporarily none. This + * function checks for a transhuge pmd under the appropriate lock. It + * returns a pte if it was successfully locked or NULL if it raced with + * a transhuge insertion. + */ +static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr, int prot_numa, spinlock_t **ptl) { - return newprot; + pte_t *pte; + spinlock_t *pmdl; + + /* !prot_numa is protected by mmap_sem held for write */ + if (!prot_numa) + return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); + + pmdl = pmd_lock(vma->vm_mm, pmd); + if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) { + spin_unlock(pmdl); + return NULL; + } + + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); + spin_unlock(pmdl); + return pte; } -#endif static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, - int dirty_accountable, int prot_numa, bool *ret_all_same_node) + int dirty_accountable, int prot_numa) { struct mm_struct *mm = vma->vm_mm; pte_t *pte, oldpte; spinlock_t *ptl; unsigned long pages = 0; - bool all_same_node = true; - int last_nid = -1; - pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl); + if (!pte) + return 0; + arch_enter_lazy_mmu_mode(); do { oldpte = *pte; if (pte_present(oldpte)) { pte_t ptent; - bool updated = false; + bool preserve_write = prot_numa && pte_write(oldpte); - ptent = ptep_modify_prot_start(mm, addr, pte); - if (!prot_numa) { - ptent = pte_modify(ptent, newprot); - updated = true; - } else { + /* + * Avoid trapping faults against the zero or KSM + * pages. See similar comment in change_huge_pmd. + */ + if (prot_numa) { struct page *page; page = vm_normal_page(vma, addr, oldpte); - if (page) { - int this_nid = page_to_nid(page); - if (last_nid == -1) - last_nid = this_nid; - if (last_nid != this_nid) - all_same_node = false; - - /* only check non-shared pages */ - if (!pte_numa(oldpte) && - page_mapcount(page) == 1) { - ptent = pte_mknuma(ptent); - updated = true; - } - } + if (!page || PageKsm(page)) + continue; + + /* Avoid TLB flush if possible */ + if (pte_protnone(oldpte)) + continue; } - /* - * Avoid taking write faults for pages we know to be - * dirty. - */ - if (dirty_accountable && pte_dirty(ptent)) { + ptent = ptep_modify_prot_start(mm, addr, pte); + ptent = pte_modify(ptent, newprot); + if (preserve_write) ptent = pte_mkwrite(ptent); - updated = true; - } - if (updated) - pages++; + /* Avoid taking write faults for known dirty pages */ + if (dirty_accountable && pte_dirty(ptent) && + (pte_soft_dirty(ptent) || + !(vma->vm_flags & VM_SOFTDIRTY))) { + ptent = pte_mkwrite(ptent); + } ptep_modify_prot_commit(mm, addr, pte, ptent); - } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { + pages++; + } else if (IS_ENABLED(CONFIG_MIGRATION)) { swp_entry_t entry = pte_to_swp_entry(oldpte); if (is_write_migration_entry(entry)) { + pte_t newpte; /* * A protection check is difficult so * just be safe and disable write */ make_migration_entry_read(&entry); - set_pte_at(mm, addr, pte, - swp_entry_to_pte(entry)); + newpte = swp_entry_to_pte(entry); + if (pte_swp_soft_dirty(oldpte)) + newpte = pte_swp_mksoft_dirty(newpte); + set_pte_at(mm, addr, pte, newpte); + + pages++; } - pages++; } } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); - *ret_all_same_node = all_same_node; return pages; } -#ifdef CONFIG_NUMA_BALANCING -static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, - pmd_t *pmd) -{ - spin_lock(&mm->page_table_lock); - set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd)); - spin_unlock(&mm->page_table_lock); -} -#else -static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, - pmd_t *pmd) -{ - BUG(); -} -#endif /* CONFIG_NUMA_BALANCING */ - static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) { pmd_t *pmd; + struct mm_struct *mm = vma->vm_mm; unsigned long next; unsigned long pages = 0; unsigned long nr_huge_updates = 0; - bool all_same_node; + unsigned long mni_start = 0; pmd = pmd_offset(pud, addr); do { + unsigned long this_pages; + next = pmd_addr_end(addr, end); + if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd)) + continue; + + /* invoke the mmu notifier if the pmd is populated */ + if (!mni_start) { + mni_start = addr; + mmu_notifier_invalidate_range_start(mm, mni_start, end); + } + if (pmd_trans_huge(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) split_huge_page_pmd(vma, addr, pmd); - else if (change_huge_pmd(vma, pmd, addr, newprot, - prot_numa)) { - pages += HPAGE_PMD_NR; - nr_huge_updates++; - continue; + else { + int nr_ptes = change_huge_pmd(vma, pmd, addr, + newprot, prot_numa); + + if (nr_ptes) { + if (nr_ptes == HPAGE_PMD_NR) { + pages += HPAGE_PMD_NR; + nr_huge_updates++; + } + + /* huge pmd was handled */ + continue; + } } - /* fall through */ + /* fall through, the trans huge pmd just split */ } - if (pmd_none_or_clear_bad(pmd)) - continue; - pages += change_pte_range(vma, pmd, addr, next, newprot, - dirty_accountable, prot_numa, &all_same_node); - - /* - * If we are changing protections for NUMA hinting faults then - * set pmd_numa if the examined pages were all on the same - * node. This allows a regular PMD to be handled as one fault - * and effectively batches the taking of the PTL - */ - if (prot_numa && all_same_node) - change_pmd_protnuma(vma->vm_mm, addr, pmd); + this_pages = change_pte_range(vma, pmd, addr, next, newprot, + dirty_accountable, prot_numa); + pages += this_pages; } while (pmd++, addr = next, addr != end); + if (mni_start) + mmu_notifier_invalidate_range_end(mm, mni_start, end); + if (nr_huge_updates) count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); - return pages; } @@ -227,15 +244,12 @@ unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) { - struct mm_struct *mm = vma->vm_mm; unsigned long pages; - mmu_notifier_invalidate_range_start(mm, start, end); if (is_vm_hugetlb_page(vma)) pages = hugetlb_change_protection(vma, start, end, newprot); else pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); - mmu_notifier_invalidate_range_end(mm, start, end); return pages; } @@ -278,7 +292,8 @@ */ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *pprev = vma_merge(mm, *pprev, start, end, newflags, - vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma)); + vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), + vma->vm_userfaultfd_ctx); if (*pprev) { vma = *pprev; goto success; @@ -304,17 +319,21 @@ * held in write mode. */ vma->vm_flags = newflags; - vma->vm_page_prot = pgprot_modify(vma->vm_page_prot, - vm_get_page_prot(newflags)); - - if (vma_wants_writenotify(vma)) { - vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); - dirty_accountable = 1; - } + dirty_accountable = vma_wants_writenotify(vma); + vma_set_page_prot(vma); change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0); + /* + * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major + * fault on access. + */ + if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED && + (newflags & VM_WRITE)) { + populate_vma_page_range(vma, start, end, NULL); + } + vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); vm_stat_account(mm, newflags, vma->vm_file, nrpages); perf_event_mmap(vma);