diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 191 |
1 files changed, 120 insertions, 71 deletions
diff --git a/mm/memory.c b/mm/memory.c index 550405fc3b5e..cbdc2cd9cedb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2260,26 +2260,17 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, return 0; } -/** - * remap_pfn_range - remap kernel memory to userspace - * @vma: user vma to map to - * @addr: target page aligned user address to start at - * @pfn: page frame number of kernel physical memory address - * @size: size of mapping area - * @prot: page protection flags for this mapping - * - * Note: this is only safe if the mm semaphore is held when called. - * - * Return: %0 on success, negative error code otherwise. +/* + * Variant of remap_pfn_range that does not call track_pfn_remap. The caller + * must have pre-validated the caching bits of the pgprot_t. */ -int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, unsigned long size, pgprot_t prot) +int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot) { pgd_t *pgd; unsigned long next; unsigned long end = addr + PAGE_ALIGN(size); struct mm_struct *mm = vma->vm_mm; - unsigned long remap_pfn = pfn; int err; if (WARN_ON_ONCE(!PAGE_ALIGNED(addr))) @@ -2309,10 +2300,6 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, vma->vm_pgoff = pfn; } - err = track_pfn_remap(vma, &prot, remap_pfn, addr, PAGE_ALIGN(size)); - if (err) - return -EINVAL; - vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; BUG_ON(addr >= end); @@ -2324,12 +2311,36 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, err = remap_p4d_range(mm, pgd, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) - break; + return err; } while (pgd++, addr = next, addr != end); + return 0; +} + +/** + * remap_pfn_range - remap kernel memory to userspace + * @vma: user vma to map to + * @addr: target page aligned user address to start at + * @pfn: page frame number of kernel physical memory address + * @size: size of mapping area + * @prot: page protection flags for this mapping + * + * Note: this is only safe if the mm semaphore is held when called. + * + * Return: %0 on success, negative error code otherwise. + */ +int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + int err; + + err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size)); if (err) - untrack_pfn(vma, remap_pfn, PAGE_ALIGN(size)); + return -EINVAL; + err = remap_pfn_range_notrack(vma, addr, pfn, size, prot); + if (err) + untrack_pfn(vma, pfn, PAGE_ALIGN(size)); return err; } EXPORT_SYMBOL(remap_pfn_range); @@ -2446,13 +2457,21 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, } do { next = pmd_addr_end(addr, end); - if (create || !pmd_none_or_clear_bad(pmd)) { - err = apply_to_pte_range(mm, pmd, addr, next, fn, data, - create, mask); - if (err) - break; + if (pmd_none(*pmd) && !create) + continue; + if (WARN_ON_ONCE(pmd_leaf(*pmd))) + return -EINVAL; + if (!pmd_none(*pmd) && WARN_ON_ONCE(pmd_bad(*pmd))) { + if (!create) + continue; + pmd_clear_bad(pmd); } + err = apply_to_pte_range(mm, pmd, addr, next, + fn, data, create, mask); + if (err) + break; } while (pmd++, addr = next, addr != end); + return err; } @@ -2474,13 +2493,21 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, } do { next = pud_addr_end(addr, end); - if (create || !pud_none_or_clear_bad(pud)) { - err = apply_to_pmd_range(mm, pud, addr, next, fn, data, - create, mask); - if (err) - break; + if (pud_none(*pud) && !create) + continue; + if (WARN_ON_ONCE(pud_leaf(*pud))) + return -EINVAL; + if (!pud_none(*pud) && WARN_ON_ONCE(pud_bad(*pud))) { + if (!create) + continue; + pud_clear_bad(pud); } + err = apply_to_pmd_range(mm, pud, addr, next, + fn, data, create, mask); + if (err) + break; } while (pud++, addr = next, addr != end); + return err; } @@ -2502,13 +2529,21 @@ static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, } do { next = p4d_addr_end(addr, end); - if (create || !p4d_none_or_clear_bad(p4d)) { - err = apply_to_pud_range(mm, p4d, addr, next, fn, data, - create, mask); - if (err) - break; + if (p4d_none(*p4d) && !create) + continue; + if (WARN_ON_ONCE(p4d_leaf(*p4d))) + return -EINVAL; + if (!p4d_none(*p4d) && WARN_ON_ONCE(p4d_bad(*p4d))) { + if (!create) + continue; + p4d_clear_bad(p4d); } + err = apply_to_pud_range(mm, p4d, addr, next, + fn, data, create, mask); + if (err) + break; } while (p4d++, addr = next, addr != end); + return err; } @@ -2528,9 +2563,17 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - if (!create && pgd_none_or_clear_bad(pgd)) + if (pgd_none(*pgd) && !create) continue; - err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create, &mask); + if (WARN_ON_ONCE(pgd_leaf(*pgd))) + return -EINVAL; + if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) { + if (!create) + continue; + pgd_clear_bad(pgd); + } + err = apply_to_p4d_range(mm, pgd, addr, next, + fn, data, create, &mask); if (err) break; } while (pgd++, addr = next, addr != end); @@ -3309,28 +3352,26 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); if (page) { - int err; - __SetPageLocked(page); __SetPageSwapBacked(page); - set_page_private(page, entry.val); - /* Tell memcg to use swap ownership records */ - SetPageSwapCache(page); - err = mem_cgroup_charge(page, vma->vm_mm, - GFP_KERNEL); - ClearPageSwapCache(page); - if (err) { + if (mem_cgroup_swapin_charge_page(page, + vma->vm_mm, GFP_KERNEL, entry)) { ret = VM_FAULT_OOM; goto out_page; } + mem_cgroup_swapin_uncharge_swap(entry); shadow = get_shadow_from_swap_cache(entry); if (shadow) workingset_refault(page, shadow); lru_cache_add(page); + + /* To provide entry to swap_readpage() */ + set_page_private(page, entry.val); swap_readpage(page, true); + set_page_private(page, 0); } } else { page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, @@ -4100,7 +4141,6 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) int page_nid = NUMA_NO_NODE; int last_cpupid; int target_nid; - bool migrated = false; pte_t pte, old_pte; bool was_writable = pte_savedwrite(vmf->orig_pte); int flags = 0; @@ -4117,29 +4157,17 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) goto out; } - /* - * Make it present again, Depending on how arch implementes non - * accessible ptes, some can allow access by kernel mode. - */ - old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte); + /* Get the normal PTE */ + old_pte = ptep_get(vmf->pte); pte = pte_modify(old_pte, vma->vm_page_prot); - pte = pte_mkyoung(pte); - if (was_writable) - pte = pte_mkwrite(pte); - ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); - update_mmu_cache(vma, vmf->address, vmf->pte); page = vm_normal_page(vma, vmf->address, pte); - if (!page) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; - } + if (!page) + goto out_map; /* TODO: handle PTE-mapped THP */ - if (PageCompound(page)) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; - } + if (PageCompound(page)) + goto out_map; /* * Avoid grouping on RO pages in general. RO pages shouldn't hurt as @@ -4149,7 +4177,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) * pte_dirty has unpredictable behaviour between PTE scan updates, * background writeback, dirty balancing and application behaviour. */ - if (!pte_write(pte)) + if (!was_writable) flags |= TNF_NO_GROUP; /* @@ -4163,24 +4191,45 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) page_nid = page_to_nid(page); target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, &flags); - pte_unmap_unlock(vmf->pte, vmf->ptl); if (target_nid == NUMA_NO_NODE) { put_page(page); - goto out; + goto out_map; } + pte_unmap_unlock(vmf->pte, vmf->ptl); /* Migrate to the requested node */ - migrated = migrate_misplaced_page(page, vma, target_nid); - if (migrated) { + if (migrate_misplaced_page(page, vma, target_nid)) { page_nid = target_nid; flags |= TNF_MIGRATED; - } else + } else { flags |= TNF_MIGRATE_FAIL; + vmf->pte = pte_offset_map(vmf->pmd, vmf->address); + spin_lock(vmf->ptl); + if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + goto out; + } + goto out_map; + } out: if (page_nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, page_nid, 1, flags); return 0; +out_map: + /* + * Make it present again, depending on how arch implements + * non-accessible ptes, some can allow access by kernel mode. + */ + old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte); + pte = pte_modify(old_pte, vma->vm_page_prot); + pte = pte_mkyoung(pte); + if (was_writable) + pte = pte_mkwrite(pte); + ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); + update_mmu_cache(vma, vmf->address, vmf->pte); + pte_unmap_unlock(vmf->pte, vmf->ptl); + goto out; } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) |