• Home
  • Raw
  • Download

Lines Matching +full:wp +full:- +full:content

1 // SPDX-License-Identifier: GPL-2.0-only
9 * demand-loading started 01.12.91 - seems it is high on the list of
10 * things wanted, and it should be easy to implement. - Linus
14 * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
15 * pages started 02.12.91, seems to work. - Linus.
21 * Also corrected some "invalidate()"s - I wasn't doing enough of them.
27 * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why.
29 * 20.12.91 - Ok, making the swap-device changeable like the root.
33 * 05.04.94 - Multi-page memory management added for v1.1.
36 * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG
87 #include "pgalloc-track.h"
91 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
95 /* use the per-pgdat data instead for discontigmem - mbligh */
172 if (current->rss_stat.count[i]) { in sync_mm_rss()
173 add_mm_counter(mm, i, current->rss_stat.count[i]); in sync_mm_rss()
174 current->rss_stat.count[i] = 0; in sync_mm_rss()
177 current->rss_stat.events = 0; in sync_mm_rss()
184 if (likely(task->mm == mm)) in add_mm_counter_fast()
185 task->rss_stat.count[member] += val; in add_mm_counter_fast()
190 #define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
198 if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH)) in check_sync_rss_stat()
199 sync_mm_rss(task->mm); in check_sync_rss_stat()
222 mm_dec_nr_ptes(tlb->mm); in free_pte_range()
250 if (end - 1 > ceiling - 1) in free_pmd_range()
256 mm_dec_nr_pmds(tlb->mm); in free_pmd_range()
284 if (end - 1 > ceiling - 1) in free_pud_range()
290 mm_dec_nr_puds(tlb->mm); in free_pud_range()
318 if (end - 1 > ceiling - 1) in free_p4d_range()
327 * This function frees user-level page tables of a process.
343 * Why all these "- 1"s? Because 0 represents both the bottom in free_pgd_range()
344 * of the address space and the top of it (using -1 for the in free_pgd_range()
348 * Comparisons need to use "end - 1" and "ceiling - 1" (though in free_pgd_range()
359 * bother to round floor or end up - the tests don't need that. in free_pgd_range()
373 if (end - 1 > ceiling - 1) in free_pgd_range()
374 end -= PMD_SIZE; in free_pgd_range()
375 if (addr > end - 1) in free_pgd_range()
382 pgd = pgd_offset(tlb->mm, addr); in free_pgd_range()
395 struct vm_area_struct *next = vma->vm_next; in free_pgtables()
396 unsigned long addr = vma->vm_start; in free_pgtables()
406 hugetlb_free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
407 floor, next ? next->vm_start : ceiling); in free_pgtables()
412 while (next && next->vm_start <= vma->vm_end + PMD_SIZE in free_pgtables()
415 next = vma->vm_next; in free_pgtables()
419 free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
420 floor, next ? next->vm_start : ceiling); in free_pgtables()
431 return -ENOMEM; in __pte_alloc()
441 * of a chain of data-dependent loads, meaning most CPUs (alpha in __pte_alloc()
443 * seen in-order. See the alpha page table accessors for the in __pte_alloc()
464 return -ENOMEM; in __pte_alloc_kernel()
488 if (current->mm == mm) in add_mm_rss_vec()
497 * is found. For example, we might have a PFN-mapped pte in
505 pgd_t *pgd = pgd_offset(vma->vm_mm, addr); in print_bad_pte()
534 mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; in print_bad_pte()
538 current->comm, in print_bad_pte()
543 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); in print_bad_pte()
545 vma->vm_file, in print_bad_pte()
546 vma->vm_ops ? vma->vm_ops->fault : NULL, in print_bad_pte()
547 vma->vm_file ? vma->vm_file->f_op->mmap : NULL, in print_bad_pte()
548 mapping ? mapping->a_ops->readpage : NULL); in print_bad_pte()
554 * vm_normal_page -- This function gets the "struct page" associated with a pte.
574 * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
603 if (vma->vm_ops && vma->vm_ops->find_special_page) in vm_normal_page()
604 return vma->vm_ops->find_special_page(vma, addr); in vm_normal_page()
605 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) in vm_normal_page()
618 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page()
619 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page()
625 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page()
626 if (pfn == vma->vm_pgoff + off) in vm_normal_page()
628 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page()
658 * in a direct-access (dax) mapping, so let's just replicate the in vm_normal_page_pmd()
661 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page_pmd()
662 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page_pmd()
668 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page_pmd()
669 if (pfn == vma->vm_pgoff + off) in vm_normal_page_pmd()
671 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page_pmd()
703 unsigned long vm_flags = dst_vma->vm_flags; in copy_nonpresent_pte()
713 if (unlikely(list_empty(&dst_mm->mmlist))) { in copy_nonpresent_pte()
715 if (list_empty(&dst_mm->mmlist)) in copy_nonpresent_pte()
716 list_add(&dst_mm->mmlist, in copy_nonpresent_pte()
717 &src_mm->mmlist); in copy_nonpresent_pte()
757 * We do not preserve soft-dirty information, because so in copy_nonpresent_pte()
784 * the page refcount and re-use the pte the traditional
793 * And if we need a pre-allocated page but don't yet have
803 struct mm_struct *src_mm = src_vma->vm_mm; in copy_present_page()
806 if (!is_cow_mapping(src_vma->vm_flags)) in copy_present_page()
822 if (likely(!atomic_read(&src_mm->has_pinned))) in copy_present_page()
829 return -EAGAIN; in copy_present_page()
843 pte = mk_pte(new_page, dst_vma->vm_page_prot); in copy_present_page()
846 /* Uffd-wp needs to be delivered to dest pte as well */ in copy_present_page()
848 set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); in copy_present_page()
853 * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page
861 struct mm_struct *src_mm = src_vma->vm_mm; in copy_present_pte()
862 unsigned long vm_flags = src_vma->vm_flags; in copy_present_pte()
900 set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); in copy_present_pte()
928 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pte_range()
929 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pte_range()
944 ret = -ENOMEM; in copy_pte_range()
956 * We are holding two locks at this point - either of them in copy_pte_range()
983 * If we need a pre-allocated page for this pte, drop the in copy_pte_range()
986 if (unlikely(ret == -EAGAIN)) in copy_pte_range()
990 * pre-alloc page cannot be reused by next time so as in copy_pte_range()
1010 ret = -ENOMEM; in copy_pte_range()
1015 WARN_ON_ONCE(ret != -EAGAIN); in copy_pte_range()
1018 return -ENOMEM; in copy_pte_range()
1035 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pmd_range()
1036 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pmd_range()
1042 return -ENOMEM; in copy_pmd_range()
1049 VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); in copy_pmd_range()
1052 if (err == -ENOMEM) in copy_pmd_range()
1053 return -ENOMEM; in copy_pmd_range()
1062 return -ENOMEM; in copy_pmd_range()
1072 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pud_range()
1073 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pud_range()
1079 return -ENOMEM; in copy_pud_range()
1086 VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma); in copy_pud_range()
1089 if (err == -ENOMEM) in copy_pud_range()
1090 return -ENOMEM; in copy_pud_range()
1099 return -ENOMEM; in copy_pud_range()
1109 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_p4d_range()
1115 return -ENOMEM; in copy_p4d_range()
1123 return -ENOMEM; in copy_p4d_range()
1133 unsigned long addr = src_vma->vm_start; in copy_page_range()
1134 unsigned long end = src_vma->vm_end; in copy_page_range()
1135 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_page_range()
1136 struct mm_struct *src_mm = src_vma->vm_mm; in copy_page_range()
1147 if (!(src_vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) && in copy_page_range()
1148 !src_vma->anon_vma) in copy_page_range()
1154 if (unlikely(src_vma->vm_flags & VM_PFNMAP)) { in copy_page_range()
1170 is_cow = is_cow_mapping(src_vma->vm_flags); in copy_page_range()
1184 raw_write_seqcount_begin(&src_mm->write_protect_seq); in copy_page_range()
1196 ret = -ENOMEM; in copy_page_range()
1202 raw_write_seqcount_end(&src_mm->write_protect_seq); in copy_page_range()
1213 struct mm_struct *mm = tlb->mm; in zap_pte_range()
1240 if (vma->vm_flags & VM_USEREXPTE) in zap_pte_range()
1248 if (details->check_mapping && in zap_pte_range()
1249 details->check_mapping != page_rmapping(page)) in zap_pte_range()
1253 tlb->fullmm); in zap_pte_range()
1257 if (vma->vm_flags & VM_PURGEABLE) in zap_pte_range()
1265 likely(!(vma->vm_flags & VM_SEQ_READ))) in zap_pte_range()
1268 rss[mm_counter(page)]--; in zap_pte_range()
1284 if (unlikely(details && details->check_mapping)) { in zap_pte_range()
1290 if (details->check_mapping != in zap_pte_range()
1295 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); in zap_pte_range()
1296 rss[mm_counter(page)]--; in zap_pte_range()
1302 /* If details->check_mapping, we leave swap entries. */ in zap_pte_range()
1307 rss[MM_SWAPENTS]--; in zap_pte_range()
1312 rss[mm_counter(page)]--; in zap_pte_range()
1316 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); in zap_pte_range()
1358 if (next - addr != HPAGE_PMD_SIZE) in zap_pmd_range()
1363 } else if (details && details->single_page && in zap_pmd_range()
1364 PageTransCompound(details->single_page) && in zap_pmd_range()
1365 next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { in zap_pmd_range()
1366 spinlock_t *ptl = pmd_lock(tlb->mm, pmd); in zap_pmd_range()
1404 if (next - addr != HPAGE_PUD_SIZE) { in zap_pud_range()
1405 mmap_assert_locked(tlb->mm); in zap_pud_range()
1450 pgd = pgd_offset(vma->vm_mm, addr); in unmap_page_range()
1466 unsigned long start = max(vma->vm_start, start_addr); in unmap_single_vma()
1469 if (start >= vma->vm_end) in unmap_single_vma()
1471 end = min(vma->vm_end, end_addr); in unmap_single_vma()
1472 if (end <= vma->vm_start) in unmap_single_vma()
1475 if (vma->vm_file) in unmap_single_vma()
1478 if (unlikely(vma->vm_flags & VM_PFNMAP)) in unmap_single_vma()
1484 * It is undesirable to test vma->vm_file as it in unmap_single_vma()
1485 * should be non-null for valid hugetlb area. in unmap_single_vma()
1488 * hugetlbfs ->mmap method fails, in unmap_single_vma()
1489 * mmap_region() nullifies vma->vm_file in unmap_single_vma()
1494 if (vma->vm_file) { in unmap_single_vma()
1495 i_mmap_lock_write(vma->vm_file->f_mapping); in unmap_single_vma()
1497 i_mmap_unlock_write(vma->vm_file->f_mapping); in unmap_single_vma()
1505 * unmap_vmas - unmap a range of memory covered by a list of vma's
1519 * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
1528 mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, in unmap_vmas()
1531 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) in unmap_vmas()
1537 * zap_page_range - remove user pages in a given range
1551 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, in zap_page_range()
1553 tlb_gather_mmu(&tlb, vma->vm_mm, start, range.end); in zap_page_range()
1554 update_hiwater_rss(vma->vm_mm); in zap_page_range()
1556 for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next) in zap_page_range()
1563 * zap_page_range_single - remove user pages in a given range
1578 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, in zap_page_range_single()
1580 tlb_gather_mmu(&tlb, vma->vm_mm, address, range.end); in zap_page_range_single()
1581 update_hiwater_rss(vma->vm_mm); in zap_page_range_single()
1589 * zap_vma_ptes - remove ptes mapping the vma
1602 if (address < vma->vm_start || address + size > vma->vm_end || in zap_vma_ptes()
1603 !(vma->vm_flags & VM_PFNMAP)) in zap_vma_ptes()
1645 return -EINVAL; in validate_page_before_insert()
1654 return -EBUSY; in insert_page_into_pte_locked()
1673 struct mm_struct *mm = vma->vm_mm; in insert_page()
1681 retval = -ENOMEM; in insert_page()
1698 return -EINVAL; in insert_page_in_batch_locked()
1714 struct mm_struct *const mm = vma->vm_mm; in insert_pages()
1720 ret = -EFAULT; in insert_pages()
1726 remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); in insert_pages()
1729 ret = -ENOMEM; in insert_pages()
1744 remaining_pages_total -= pte_idx; in insert_pages()
1751 pages_to_write_in_pmd -= batch_size; in insert_pages()
1752 remaining_pages_total -= batch_size; in insert_pages()
1764 * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
1782 const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; in vm_insert_pages()
1784 if (addr < vma->vm_start || end_addr >= vma->vm_end) in vm_insert_pages()
1785 return -EFAULT; in vm_insert_pages()
1786 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_pages()
1787 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_pages()
1788 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_pages()
1789 vma->vm_flags |= VM_MIXEDMAP; in vm_insert_pages()
1792 return insert_pages(vma, addr, pages, num, vma->vm_page_prot); in vm_insert_pages()
1795 int err = -EINVAL; in vm_insert_pages()
1802 *num = pgcount - idx; in vm_insert_pages()
1809 * vm_insert_page - insert single page into user vma
1830 * Usually this function is called from f_op->mmap() handler
1831 * under mm->mmap_lock write-lock, so it can change vma->vm_flags.
1833 * function from other places, for example from page-fault handler.
1840 if (addr < vma->vm_start || addr >= vma->vm_end) in vm_insert_page()
1841 return -EFAULT; in vm_insert_page()
1843 return -EINVAL; in vm_insert_page()
1844 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_page()
1845 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_page()
1846 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_page()
1847 vma->vm_flags |= VM_MIXEDMAP; in vm_insert_page()
1849 return insert_page(vma, addr, page, vma->vm_page_prot); in vm_insert_page()
1854 * __vm_map_pages - maps range of kernel pages into user vma
1868 unsigned long uaddr = vma->vm_start; in __vm_map_pages()
1873 return -ENXIO; in __vm_map_pages()
1876 if (count > num - offset) in __vm_map_pages()
1877 return -ENXIO; in __vm_map_pages()
1890 * vm_map_pages - maps range of kernel pages starts with non zero offset
1910 return __vm_map_pages(vma, pages, num, vma->vm_pgoff); in vm_map_pages()
1915 * vm_map_pages_zero - map range of kernel pages starts with zero offset
1937 struct mm_struct *mm = vma->vm_mm; in insert_pfn()
1988 * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
1995 * to override pgprot on a per-page basis.
2003 * a value of @pgprot different from that of @vma->vm_page_prot.
2017 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); in vmf_insert_pfn_prot()
2018 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == in vmf_insert_pfn_prot()
2020 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); in vmf_insert_pfn_prot()
2021 BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); in vmf_insert_pfn_prot()
2023 if (addr < vma->vm_start || addr >= vma->vm_end) in vmf_insert_pfn_prot()
2037 * vmf_insert_pfn - insert single pfn into user vma
2045 * This function should only be called from a vm_ops->fault handler, and
2059 return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); in vmf_insert_pfn()
2066 if (vma->vm_flags & VM_MIXEDMAP) in vm_mixed_ok()
2085 if (addr < vma->vm_start || addr >= vma->vm_end) in __vm_insert_mixed()
2115 if (err == -ENOMEM) in __vm_insert_mixed()
2117 if (err < 0 && err != -EBUSY) in __vm_insert_mixed()
2124 * vmf_insert_mixed_prot - insert single pfn into user vma with specified pgprot
2131 * to override pgprot on a per-page basis.
2133 * Typically this function should be used by drivers to set caching- and
2134 * encryption bits different than those of @vma->vm_page_prot, because
2135 * the caching- or encryption mode may not be known at mmap() time.
2136 * This is ok as long as @vma->vm_page_prot is not used by the core vm
2139 * functions that don't touch caching- or encryption bits, using pte_modify()
2141 * Also when new page-table entries are created, this is only done using the
2142 * fault() callback, and never using the value of vma->vm_page_prot,
2143 * except for page-table entries that point to anonymous pages as the result
2159 return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, false); in vmf_insert_mixed()
2171 return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, true); in vmf_insert_mixed_mkwrite()
2178 * in null mappings (currently treated as "copy-on-access")
2190 return -ENOMEM; in remap_pte_range()
2195 err = -EACCES; in remap_pte_range()
2214 pfn -= addr >> PAGE_SHIFT; in remap_pmd_range()
2217 return -ENOMEM; in remap_pmd_range()
2237 pfn -= addr >> PAGE_SHIFT; in remap_pud_range()
2240 return -ENOMEM; in remap_pud_range()
2259 pfn -= addr >> PAGE_SHIFT; in remap_p4d_range()
2262 return -ENOMEM; in remap_p4d_range()
2274 * remap_pfn_range - remap kernel memory to userspace
2291 struct mm_struct *mm = vma->vm_mm; in remap_pfn_range()
2296 return -EINVAL; in remap_pfn_range()
2311 * There's a horrible special case to handle copy-on-write in remap_pfn_range()
2313 * un-COW'ed pages by matching them up with "vma->vm_pgoff". in remap_pfn_range()
2316 if (is_cow_mapping(vma->vm_flags)) { in remap_pfn_range()
2317 if (addr != vma->vm_start || end != vma->vm_end) in remap_pfn_range()
2318 return -EINVAL; in remap_pfn_range()
2319 vma->vm_pgoff = pfn; in remap_pfn_range()
2324 return -EINVAL; in remap_pfn_range()
2326 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; in remap_pfn_range()
2329 pfn -= addr >> PAGE_SHIFT; in remap_pfn_range()
2348 * vm_iomap_memory - remap memory to userspace
2357 * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
2358 * whatever write-combining details or similar.
2368 return -EINVAL; in vm_iomap_memory()
2370 * You *really* shouldn't map things that aren't page-aligned, in vm_iomap_memory()
2378 return -EINVAL; in vm_iomap_memory()
2381 if (vma->vm_pgoff > pages) in vm_iomap_memory()
2382 return -EINVAL; in vm_iomap_memory()
2383 pfn += vma->vm_pgoff; in vm_iomap_memory()
2384 pages -= vma->vm_pgoff; in vm_iomap_memory()
2387 vm_len = vma->vm_end - vma->vm_start; in vm_iomap_memory()
2389 return -EINVAL; in vm_iomap_memory()
2392 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); in vm_iomap_memory()
2410 return -ENOMEM; in apply_to_pte_range()
2435 pte_unmap_unlock(pte-1, ptl); in apply_to_pte_range()
2453 return -ENOMEM; in apply_to_pmd_range()
2481 return -ENOMEM; in apply_to_pud_range()
2509 return -ENOMEM; in apply_to_p4d_range()
2536 return -EINVAL; in __apply_to_page_range()
2581 * read non-atomically. Before making any commitment, on those architectures
2610 struct vm_area_struct *vma = vmf->vma; in cow_user_page()
2611 struct mm_struct *mm = vma->vm_mm; in cow_user_page()
2612 unsigned long addr = vmf->address; in cow_user_page()
2621 * a "struct page" for it. We do a best-effort copy by in cow_user_page()
2623 * fails, we just zero-fill it. Live with it. in cow_user_page()
2632 if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { in cow_user_page()
2635 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in cow_user_page()
2637 if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { in cow_user_page()
2642 update_mmu_tlb(vma, addr, vmf->pte); in cow_user_page()
2647 entry = pte_mkyoung(vmf->orig_pte); in cow_user_page()
2648 if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) in cow_user_page()
2649 update_mmu_cache(vma, addr, vmf->pte); in cow_user_page()
2662 /* Re-validate under PTL if the page is still mapped */ in cow_user_page()
2663 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in cow_user_page()
2665 if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { in cow_user_page()
2667 update_mmu_tlb(vma, addr, vmf->pte); in cow_user_page()
2679 * use-case in cow_user_page()
2691 pte_unmap_unlock(vmf->pte, vmf->ptl); in cow_user_page()
2700 struct file *vm_file = vma->vm_file; in __get_fault_gfp_mask()
2703 return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; in __get_fault_gfp_mask()
2721 struct page *page = vmf->page; in do_page_mkwrite()
2722 unsigned int old_flags = vmf->flags; in do_page_mkwrite()
2724 vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; in do_page_mkwrite()
2726 if (vmf->vma->vm_file && in do_page_mkwrite()
2727 IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) in do_page_mkwrite()
2730 ret = vmf->vma->vm_ops->page_mkwrite(vmf); in do_page_mkwrite()
2732 vmf->flags = old_flags; in do_page_mkwrite()
2737 if (!page->mapping) { in do_page_mkwrite()
2754 struct vm_area_struct *vma = vmf->vma; in fault_dirty_shared_page()
2756 struct page *page = vmf->page; in fault_dirty_shared_page()
2758 bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; in fault_dirty_shared_page()
2763 * Take a local copy of the address_space - page.mapping may be zeroed in fault_dirty_shared_page()
2765 * pinned by vma->vm_file's reference. We rely on unlock_page()'s in fault_dirty_shared_page()
2772 file_update_time(vma->vm_file); in fault_dirty_shared_page()
2803 * any related book-keeping.
2806 __releases(vmf->ptl) in wp_page_reuse()
2808 struct vm_area_struct *vma = vmf->vma; in wp_page_reuse()
2809 struct page *page = vmf->page; in wp_page_reuse()
2817 page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); in wp_page_reuse()
2819 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_reuse()
2820 entry = pte_mkyoung(vmf->orig_pte); in wp_page_reuse()
2822 if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) in wp_page_reuse()
2823 update_mmu_cache(vma, vmf->address, vmf->pte); in wp_page_reuse()
2824 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_reuse()
2836 * - Allocate a page, copy the content of the old page to the new one.
2837 * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
2838 * - Take the PTL. If the pte changed, bail out and release the allocated page
2839 * - If the pte is still the way we remember it, update the page table and all
2840 * relevant references. This includes dropping the reference the page-table
2842 * - In any case, unlock the PTL and drop the reference we took to the old page.
2846 struct vm_area_struct *vma = vmf->vma; in wp_page_copy()
2847 struct mm_struct *mm = vma->vm_mm; in wp_page_copy()
2848 struct page *old_page = vmf->page; in wp_page_copy()
2857 if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { in wp_page_copy()
2859 vmf->address); in wp_page_copy()
2864 vmf->address); in wp_page_copy()
2871 * it's fine. If not, userspace would re-fault on in wp_page_copy()
2889 vmf->address & PAGE_MASK, in wp_page_copy()
2890 (vmf->address & PAGE_MASK) + PAGE_SIZE); in wp_page_copy()
2894 * Re-check the pte - we dropped the lock in wp_page_copy()
2896 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); in wp_page_copy()
2897 if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { in wp_page_copy()
2907 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_copy()
2908 entry = mk_pte(new_page, vma->vm_page_prot); in wp_page_copy()
2917 ptep_clear_flush_notify(vma, vmf->address, vmf->pte); in wp_page_copy()
2918 page_add_new_anon_rmap(new_page, vma, vmf->address, false); in wp_page_copy()
2919 if (vma->vm_flags & VM_PURGEABLE) { in wp_page_copy()
2920 pr_info("set wp new page %lx purgeable\n", page_to_pfn(new_page)); in wp_page_copy()
2922 uxpte_set_present(vma, vmf->address); in wp_page_copy()
2930 set_pte_at_notify(mm, vmf->address, vmf->pte, entry); in wp_page_copy()
2931 update_mmu_cache(vma, vmf->address, vmf->pte); in wp_page_copy()
2962 update_mmu_tlb(vma, vmf->address, vmf->pte); in wp_page_copy()
2968 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_copy()
2970 * No need to double call mmu_notifier->invalidate_range() callback as in wp_page_copy()
2979 if (page_copied && (vma->vm_flags & VM_LOCKED)) { in wp_page_copy()
2997 * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
3003 * shared mapping due to PTE being read-only once the mapped page is prepared.
3014 WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); in finish_mkwrite_fault()
3015 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, in finish_mkwrite_fault()
3016 &vmf->ptl); in finish_mkwrite_fault()
3021 if (!pte_same(*vmf->pte, vmf->orig_pte)) { in finish_mkwrite_fault()
3022 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in finish_mkwrite_fault()
3023 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_mkwrite_fault()
3036 struct vm_area_struct *vma = vmf->vma; in wp_pfn_shared()
3038 if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { in wp_pfn_shared()
3041 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_pfn_shared()
3042 vmf->flags |= FAULT_FLAG_MKWRITE; in wp_pfn_shared()
3043 ret = vma->vm_ops->pfn_mkwrite(vmf); in wp_pfn_shared()
3053 __releases(vmf->ptl) in wp_page_shared()
3055 struct vm_area_struct *vma = vmf->vma; in wp_page_shared()
3058 get_page(vmf->page); in wp_page_shared()
3060 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { in wp_page_shared()
3063 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_shared()
3067 put_page(vmf->page); in wp_page_shared()
3072 unlock_page(vmf->page); in wp_page_shared()
3073 put_page(vmf->page); in wp_page_shared()
3078 lock_page(vmf->page); in wp_page_shared()
3081 put_page(vmf->page); in wp_page_shared()
3089 * and decrementing the shared-page counter for the old page.
3092 * done by the caller (the low-level page fault routine in most cases).
3100 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3105 __releases(vmf->ptl) in do_wp_page()
3107 struct vm_area_struct *vma = vmf->vma; in do_wp_page()
3109 if (userfaultfd_pte_wp(vma, *vmf->pte)) { in do_wp_page()
3110 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3115 * Userfaultfd write-protect can defer flushes. Ensure the TLB in do_wp_page()
3118 if (unlikely(userfaultfd_wp(vmf->vma) && in do_wp_page()
3119 mm_tlb_flush_pending(vmf->vma->vm_mm))) in do_wp_page()
3120 flush_tlb_page(vmf->vma, vmf->address); in do_wp_page()
3122 vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); in do_wp_page()
3123 if (!vmf->page) { in do_wp_page()
3129 * Just mark the pages writable and/or call ops->pfn_mkwrite. in do_wp_page()
3131 if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == in do_wp_page()
3135 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3143 if (PageAnon(vmf->page)) { in do_wp_page()
3144 struct page *page = vmf->page; in do_wp_page()
3163 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == in do_wp_page()
3171 get_page(vmf->page); in do_wp_page()
3173 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3181 zap_page_range_single(vma, start_addr, end_addr - start_addr, details); in unmap_mapping_range_vma()
3191 details->first_index, details->last_index) { in unmap_mapping_range_tree()
3193 vba = vma->vm_pgoff; in unmap_mapping_range_tree()
3194 vea = vba + vma_pages(vma) - 1; in unmap_mapping_range_tree()
3195 zba = details->first_index; in unmap_mapping_range_tree()
3198 zea = details->last_index; in unmap_mapping_range_tree()
3203 ((zba - vba) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3204 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3210 * unmap_mapping_page() - Unmap single page from processes.
3222 struct address_space *mapping = page->mapping; in unmap_mapping_page()
3229 details.first_index = page->index; in unmap_mapping_page()
3230 details.last_index = page->index + thp_nr_pages(page) - 1; in unmap_mapping_page()
3234 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_page()
3235 unmap_mapping_range_tree(&mapping->i_mmap, &details); in unmap_mapping_page()
3240 * unmap_mapping_pages() - Unmap pages from processes.
3258 details.last_index = start + nr - 1; in unmap_mapping_pages()
3263 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_pages()
3264 unmap_mapping_range_tree(&mapping->i_mmap, &details); in unmap_mapping_pages()
3269 * unmap_mapping_range - unmap the portion of all mmaps in the specified
3289 pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3294 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3296 hlen = ULONG_MAX - hba + 1; in unmap_mapping_range()
3304 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3313 struct vm_area_struct *vma = vmf->vma; in do_swap_page()
3322 if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) in do_swap_page()
3325 entry = pte_to_swp_entry(vmf->orig_pte); in do_swap_page()
3328 migration_entry_wait(vma->vm_mm, vmf->pmd, in do_swap_page()
3329 vmf->address); in do_swap_page()
3331 vmf->page = device_private_entry_to_page(entry); in do_swap_page()
3332 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
3333 vmf->address, &vmf->ptl); in do_swap_page()
3334 if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { in do_swap_page()
3335 spin_unlock(vmf->ptl); in do_swap_page()
3343 get_page(vmf->page); in do_swap_page()
3344 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
3345 vmf->page->pgmap->ops->migrate_to_ram(vmf); in do_swap_page()
3346 put_page(vmf->page); in do_swap_page()
3350 print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); in do_swap_page()
3358 page = lookup_swap_cache(entry, vma, vmf->address); in do_swap_page()
3364 if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && in do_swap_page()
3368 vmf->address); in do_swap_page()
3378 err = mem_cgroup_charge(page, vma->vm_mm, in do_swap_page()
3404 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
3405 vmf->address, &vmf->ptl); in do_swap_page()
3406 if (likely(pte_same(*vmf->pte, vmf->orig_pte))) in do_swap_page()
3415 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); in do_swap_page()
3426 locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); in do_swap_page()
3444 page = ksm_might_need_to_copy(page, vma, vmf->address); in do_swap_page()
3456 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in do_swap_page()
3457 &vmf->ptl); in do_swap_page()
3458 if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) in do_swap_page()
3476 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); in do_swap_page()
3477 dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); in do_swap_page()
3478 pte = mk_pte(page, vma->vm_page_prot); in do_swap_page()
3479 if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { in do_swap_page()
3481 vmf->flags &= ~FAULT_FLAG_WRITE; in do_swap_page()
3486 if (pte_swp_soft_dirty(vmf->orig_pte)) in do_swap_page()
3488 if (pte_swp_uffd_wp(vmf->orig_pte)) { in do_swap_page()
3492 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); in do_swap_page()
3493 arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); in do_swap_page()
3494 vmf->orig_pte = pte; in do_swap_page()
3498 page_add_new_anon_rmap(page, vma, vmf->address, false); in do_swap_page()
3501 do_page_add_anon_rmap(page, vma, vmf->address, exclusive); in do_swap_page()
3506 (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) in do_swap_page()
3522 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
3529 /* No need to invalidate - it was non-present before */ in do_swap_page()
3530 update_mmu_cache(vma, vmf->address, vmf->pte); in do_swap_page()
3532 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
3536 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
3549 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3555 struct vm_area_struct *vma = vmf->vma; in do_anonymous_page()
3560 /* File mapping without ->vm_ops ? */ in do_anonymous_page()
3561 if (vma->vm_flags & VM_SHARED) in do_anonymous_page()
3574 if (pte_alloc(vma->vm_mm, vmf->pmd)) in do_anonymous_page()
3578 if (unlikely(pmd_trans_unstable(vmf->pmd))) in do_anonymous_page()
3582 if (vma->vm_flags & VM_USEREXPTE) { in do_anonymous_page()
3589 /* Use the zero-page for reads */ in do_anonymous_page()
3590 if (!(vmf->flags & FAULT_FLAG_WRITE) && in do_anonymous_page()
3591 !mm_forbids_zeropage(vma->vm_mm)) { in do_anonymous_page()
3592 entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), in do_anonymous_page()
3593 vma->vm_page_prot)); in do_anonymous_page()
3595 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_anonymous_page()
3596 vmf->address, &vmf->ptl); in do_anonymous_page()
3597 if (!pte_none(*vmf->pte)) { in do_anonymous_page()
3598 update_mmu_tlb(vma, vmf->address, vmf->pte); in do_anonymous_page()
3601 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
3606 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
3615 page = alloc_zeroed_user_highpage_movable(vma, vmf->address); in do_anonymous_page()
3619 if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) in do_anonymous_page()
3630 entry = mk_pte(page, vma->vm_page_prot); in do_anonymous_page()
3632 if (vma->vm_flags & VM_WRITE) in do_anonymous_page()
3635 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in do_anonymous_page()
3636 &vmf->ptl); in do_anonymous_page()
3637 if (!pte_none(*vmf->pte)) { in do_anonymous_page()
3638 update_mmu_cache(vma, vmf->address, vmf->pte); in do_anonymous_page()
3642 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
3648 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
3653 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); in do_anonymous_page()
3654 page_add_new_anon_rmap(page, vma, vmf->address, false); in do_anonymous_page()
3655 if (vma->vm_flags & VM_PURGEABLE) in do_anonymous_page()
3660 if (vma->vm_flags & VM_PURGEABLE) in do_anonymous_page()
3661 uxpte_set_present(vma, vmf->address); in do_anonymous_page()
3663 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); in do_anonymous_page()
3665 /* No need to invalidate - it was non-present before */ in do_anonymous_page()
3666 update_mmu_cache(vma, vmf->address, vmf->pte); in do_anonymous_page()
3668 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
3681 * released depending on flags and vma->vm_ops->fault() return value.
3686 struct vm_area_struct *vma = vmf->vma; in __do_fault()
3704 if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { in __do_fault()
3705 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in __do_fault()
3706 if (!vmf->prealloc_pte) in __do_fault()
3711 ret = vma->vm_ops->fault(vmf); in __do_fault()
3716 if (unlikely(PageHWPoison(vmf->page))) { in __do_fault()
3718 unlock_page(vmf->page); in __do_fault()
3719 put_page(vmf->page); in __do_fault()
3720 vmf->page = NULL; in __do_fault()
3725 lock_page(vmf->page); in __do_fault()
3727 VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); in __do_fault()
3745 struct vm_area_struct *vma = vmf->vma; in pte_alloc_one_map()
3747 if (!pmd_none(*vmf->pmd)) in pte_alloc_one_map()
3749 if (vmf->prealloc_pte) { in pte_alloc_one_map()
3750 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in pte_alloc_one_map()
3751 if (unlikely(!pmd_none(*vmf->pmd))) { in pte_alloc_one_map()
3752 spin_unlock(vmf->ptl); in pte_alloc_one_map()
3756 mm_inc_nr_ptes(vma->vm_mm); in pte_alloc_one_map()
3757 pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); in pte_alloc_one_map()
3758 spin_unlock(vmf->ptl); in pte_alloc_one_map()
3759 vmf->prealloc_pte = NULL; in pte_alloc_one_map()
3760 } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { in pte_alloc_one_map()
3775 if (pmd_devmap_trans_unstable(vmf->pmd)) in pte_alloc_one_map()
3779 * At this point we know that our vmf->pmd points to a page of ptes in pte_alloc_one_map()
3782 * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still in pte_alloc_one_map()
3783 * be valid and we will re-check to make sure the vmf->pte isn't in pte_alloc_one_map()
3784 * pte_none() under vmf->ptl protection when we return to in pte_alloc_one_map()
3787 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in pte_alloc_one_map()
3788 &vmf->ptl); in pte_alloc_one_map()
3795 struct vm_area_struct *vma = vmf->vma; in deposit_prealloc_pte()
3797 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); in deposit_prealloc_pte()
3802 mm_inc_nr_ptes(vma->vm_mm); in deposit_prealloc_pte()
3803 vmf->prealloc_pte = NULL; in deposit_prealloc_pte()
3808 struct vm_area_struct *vma = vmf->vma; in do_set_pmd()
3809 bool write = vmf->flags & FAULT_FLAG_WRITE; in do_set_pmd()
3810 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_set_pmd()
3826 if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) { in do_set_pmd()
3827 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in do_set_pmd()
3828 if (!vmf->prealloc_pte) in do_set_pmd()
3833 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_set_pmd()
3834 if (unlikely(!pmd_none(*vmf->pmd))) in do_set_pmd()
3840 entry = mk_huge_pmd(page, vma->vm_page_prot); in do_set_pmd()
3844 add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); in do_set_pmd()
3852 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); in do_set_pmd()
3854 update_mmu_cache_pmd(vma, haddr, vmf->pmd); in do_set_pmd()
3860 spin_unlock(vmf->ptl); in do_set_pmd()
3872 * alloc_set_pte - setup new PTE entry for given page and add reverse page
3873 * mapping. If needed, the function allocates page table or use pre-allocated.
3878 * Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on
3882 * vm_ops->map_pages.
3888 struct vm_area_struct *vma = vmf->vma; in alloc_set_pte()
3889 bool write = vmf->flags & FAULT_FLAG_WRITE; in alloc_set_pte()
3893 if (pmd_none(*vmf->pmd) && PageTransCompound(page)) { in alloc_set_pte()
3899 if (!vmf->pte) { in alloc_set_pte()
3905 /* Re-check under ptl */ in alloc_set_pte()
3906 if (unlikely(!pte_none(*vmf->pte))) { in alloc_set_pte()
3907 update_mmu_tlb(vma, vmf->address, vmf->pte); in alloc_set_pte()
3912 entry = mk_pte(page, vma->vm_page_prot); in alloc_set_pte()
3916 /* copy-on-write page */ in alloc_set_pte()
3917 if (write && !(vma->vm_flags & VM_SHARED)) { in alloc_set_pte()
3918 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); in alloc_set_pte()
3919 page_add_new_anon_rmap(page, vma, vmf->address, false); in alloc_set_pte()
3922 inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); in alloc_set_pte()
3925 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); in alloc_set_pte()
3927 /* no need to invalidate: a not-present page won't be cached */ in alloc_set_pte()
3928 update_mmu_cache(vma, vmf->address, vmf->pte); in alloc_set_pte()
3935 * finish_fault - finish page fault once we have prepared the page to fault
3955 if ((vmf->flags & FAULT_FLAG_WRITE) && in finish_fault()
3956 !(vmf->vma->vm_flags & VM_SHARED)) in finish_fault()
3957 page = vmf->cow_page; in finish_fault()
3959 page = vmf->page; in finish_fault()
3965 if (!(vmf->vma->vm_flags & VM_SHARED)) in finish_fault()
3966 ret = check_stable_address_space(vmf->vma->vm_mm); in finish_fault()
3969 if (vmf->pte) in finish_fault()
3970 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_fault()
3991 return -EINVAL; in fault_around_bytes_set()
4015 * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
4016 * not ready to be mapped: not up-to-date, locked, etc.
4036 unsigned long address = vmf->address, nr_pages, mask; in do_fault_around()
4037 pgoff_t start_pgoff = vmf->pgoff; in do_fault_around()
4043 mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; in do_fault_around()
4045 vmf->address = max(address & mask, vmf->vma->vm_start); in do_fault_around()
4046 off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); in do_fault_around()
4047 start_pgoff -= off; in do_fault_around()
4053 end_pgoff = start_pgoff - in do_fault_around()
4054 ((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + in do_fault_around()
4055 PTRS_PER_PTE - 1; in do_fault_around()
4056 end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, in do_fault_around()
4057 start_pgoff + nr_pages - 1); in do_fault_around()
4059 if (pmd_none(*vmf->pmd)) { in do_fault_around()
4060 vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); in do_fault_around()
4061 if (!vmf->prealloc_pte) in do_fault_around()
4066 vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); in do_fault_around()
4069 if (pmd_trans_huge(*vmf->pmd)) { in do_fault_around()
4074 /* ->map_pages() haven't done anything useful. Cold page cache? */ in do_fault_around()
4075 if (!vmf->pte) in do_fault_around()
4079 vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT); in do_fault_around()
4080 if (!pte_none(*vmf->pte)) in do_fault_around()
4082 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_fault_around()
4084 vmf->address = address; in do_fault_around()
4085 vmf->pte = NULL; in do_fault_around()
4091 struct vm_area_struct *vma = vmf->vma; in do_read_fault()
4095 * Let's call ->map_pages() first and use ->fault() as fallback in do_read_fault()
4099 if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { in do_read_fault()
4110 unlock_page(vmf->page); in do_read_fault()
4112 put_page(vmf->page); in do_read_fault()
4118 struct vm_area_struct *vma = vmf->vma; in do_cow_fault()
4124 vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); in do_cow_fault()
4125 if (!vmf->cow_page) in do_cow_fault()
4128 if (mem_cgroup_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL)) { in do_cow_fault()
4129 put_page(vmf->cow_page); in do_cow_fault()
4132 cgroup_throttle_swaprate(vmf->cow_page, GFP_KERNEL); in do_cow_fault()
4140 copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); in do_cow_fault()
4141 __SetPageUptodate(vmf->cow_page); in do_cow_fault()
4144 unlock_page(vmf->page); in do_cow_fault()
4145 put_page(vmf->page); in do_cow_fault()
4150 put_page(vmf->cow_page); in do_cow_fault()
4156 struct vm_area_struct *vma = vmf->vma; in do_shared_fault()
4167 if (vma->vm_ops->page_mkwrite) { in do_shared_fault()
4168 unlock_page(vmf->page); in do_shared_fault()
4172 put_page(vmf->page); in do_shared_fault()
4180 unlock_page(vmf->page); in do_shared_fault()
4181 put_page(vmf->page); in do_shared_fault()
4190 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4199 struct vm_area_struct *vma = vmf->vma; in do_fault()
4200 struct mm_struct *vm_mm = vma->vm_mm; in do_fault()
4206 if (!vma->vm_ops->fault) { in do_fault()
4211 if (unlikely(!pmd_present(*vmf->pmd))) in do_fault()
4214 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, in do_fault()
4215 vmf->pmd, in do_fault()
4216 vmf->address, in do_fault()
4217 &vmf->ptl); in do_fault()
4225 if (unlikely(pte_none(*vmf->pte))) in do_fault()
4230 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_fault()
4232 } else if (!(vmf->flags & FAULT_FLAG_WRITE)) in do_fault()
4234 else if (!(vma->vm_flags & VM_SHARED)) in do_fault()
4240 if (vmf->prealloc_pte) { in do_fault()
4241 pte_free(vm_mm, vmf->prealloc_pte); in do_fault()
4242 vmf->prealloc_pte = NULL; in do_fault()
4264 struct vm_area_struct *vma = vmf->vma; in do_numa_page()
4271 bool was_writable = pte_savedwrite(vmf->orig_pte); in do_numa_page()
4279 vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd); in do_numa_page()
4280 spin_lock(vmf->ptl); in do_numa_page()
4281 if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { in do_numa_page()
4282 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4290 old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte); in do_numa_page()
4291 pte = pte_modify(old_pte, vma->vm_page_prot); in do_numa_page()
4295 ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); in do_numa_page()
4296 update_mmu_cache(vma, vmf->address, vmf->pte); in do_numa_page()
4298 page = vm_normal_page(vma, vmf->address, pte); in do_numa_page()
4300 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4304 /* TODO: handle PTE-mapped THP */ in do_numa_page()
4306 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4325 if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED)) in do_numa_page()
4330 target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, in do_numa_page()
4332 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4354 if (vma_is_anonymous(vmf->vma)) in create_huge_pmd()
4356 if (vmf->vma->vm_ops->huge_fault) in create_huge_pmd()
4357 return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); in create_huge_pmd()
4364 if (vma_is_anonymous(vmf->vma)) { in wp_huge_pmd()
4365 if (userfaultfd_huge_pmd_wp(vmf->vma, orig_pmd)) in wp_huge_pmd()
4369 if (vmf->vma->vm_ops->huge_fault) { in wp_huge_pmd()
4370 vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); in wp_huge_pmd()
4376 /* COW or write-notify handled on pte level: split pmd. */ in wp_huge_pmd()
4377 __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL); in wp_huge_pmd()
4387 if (vma_is_anonymous(vmf->vma)) in create_huge_pud()
4389 if (vmf->vma->vm_ops->huge_fault) { in create_huge_pud()
4390 vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); in create_huge_pud()
4396 /* COW or write-notify not handled on PUD level: split pud.*/ in create_huge_pud()
4397 __split_huge_pud(vmf->vma, vmf->pud, vmf->address); in create_huge_pud()
4406 if (vma_is_anonymous(vmf->vma)) in wp_huge_pud()
4408 if (vmf->vma->vm_ops->huge_fault) in wp_huge_pud()
4409 return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); in wp_huge_pud()
4423 * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
4433 if (unlikely(pmd_none(*vmf->pmd))) { in handle_pte_fault()
4435 * Leave __pte_alloc() until later: because vm_ops->fault may in handle_pte_fault()
4440 vmf->pte = NULL; in handle_pte_fault()
4443 if (pmd_devmap_trans_unstable(vmf->pmd)) in handle_pte_fault()
4451 vmf->pte = pte_offset_map(vmf->pmd, vmf->address); in handle_pte_fault()
4452 vmf->orig_pte = *vmf->pte; in handle_pte_fault()
4456 * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and in handle_pte_fault()
4463 if (pte_none(vmf->orig_pte)) { in handle_pte_fault()
4464 pte_unmap(vmf->pte); in handle_pte_fault()
4465 vmf->pte = NULL; in handle_pte_fault()
4469 if (!vmf->pte) { in handle_pte_fault()
4470 if (vma_is_anonymous(vmf->vma)) in handle_pte_fault()
4476 if (!pte_present(vmf->orig_pte)) in handle_pte_fault()
4479 if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) in handle_pte_fault()
4482 vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); in handle_pte_fault()
4483 spin_lock(vmf->ptl); in handle_pte_fault()
4484 entry = vmf->orig_pte; in handle_pte_fault()
4485 if (unlikely(!pte_same(*vmf->pte, entry))) { in handle_pte_fault()
4486 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in handle_pte_fault()
4489 if (vmf->flags & FAULT_FLAG_WRITE) { in handle_pte_fault()
4495 if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, in handle_pte_fault()
4496 vmf->flags & FAULT_FLAG_WRITE)) { in handle_pte_fault()
4497 update_mmu_cache(vmf->vma, vmf->address, vmf->pte); in handle_pte_fault()
4500 if (vmf->flags & FAULT_FLAG_TRIED) in handle_pte_fault()
4508 if (vmf->flags & FAULT_FLAG_WRITE) in handle_pte_fault()
4509 flush_tlb_fix_spurious_fault(vmf->vma, vmf->address); in handle_pte_fault()
4512 pte_unmap_unlock(vmf->pte, vmf->ptl); in handle_pte_fault()
4533 struct mm_struct *mm = vma->vm_mm; in __handle_mm_fault()
4612 * mm_account_fault - Do page fault accountings
4615 * of perf event counters, but we'll still do the per-task accounting to
4624 * still be in per-arch page fault handlers at the entry of page fault.
4635 * - Unsuccessful faults (e.g. when the address wasn't valid). That in mm_account_fault()
4640 * - Incomplete faults (VM_FAULT_RETRY). They will only be counted in mm_account_fault()
4654 current->maj_flt++; in mm_account_fault()
4656 current->min_flt++; in mm_account_fault()
4686 count_memcg_event_mm(vma->vm_mm, PGFAULT); in handle_mm_fault()
4704 ret = hugetlb_fault(vma->vm_mm, vma, address, flags); in handle_mm_fault()
4729 * We've already handled the fast-path in-line.
4735 return -ENOMEM; in __p4d_alloc()
4739 spin_lock(&mm->page_table_lock); in __p4d_alloc()
4744 spin_unlock(&mm->page_table_lock); in __p4d_alloc()
4752 * We've already handled the fast-path in-line.
4758 return -ENOMEM; in __pud_alloc()
4762 spin_lock(&mm->page_table_lock); in __pud_alloc()
4768 spin_unlock(&mm->page_table_lock); in __pud_alloc()
4776 * We've already handled the fast-path in-line.
4783 return -ENOMEM; in __pmd_alloc()
4862 return -EINVAL; in follow_invalidate_pte()
4866 * follow_pte - look up PTE at a user virtual address
4882 * it is not a good general-purpose API.
4884 * Return: zero on success, -ve otherwise.
4894 * follow_pfn - look up PFN at a user virtual address
4904 * Return: zero and the pfn at @pfn on success, -ve otherwise.
4909 int ret = -EINVAL; in follow_pfn()
4913 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in follow_pfn()
4916 ret = follow_pte(vma->vm_mm, address, &ptep, &ptl); in follow_pfn()
4930 int ret = -EINVAL; in follow_phys()
4934 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in follow_phys()
4937 if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) in follow_phys()
4960 int offset = addr & (PAGE_SIZE-1); in generic_access_phys()
4963 return -EINVAL; in generic_access_phys()
4967 return -ENOMEM; in generic_access_phys()
4981 * Access another process' address space as given in mm. If non-NULL, use the
5011 if (!vma || vma->vm_start > addr) in __access_remote_vm()
5013 if (vma->vm_ops && vma->vm_ops->access) in __access_remote_vm()
5014 ret = vma->vm_ops->access(vma, addr, buf, in __access_remote_vm()
5022 offset = addr & (PAGE_SIZE-1); in __access_remote_vm()
5023 if (bytes > PAGE_SIZE-offset) in __access_remote_vm()
5024 bytes = PAGE_SIZE-offset; in __access_remote_vm()
5038 len -= bytes; in __access_remote_vm()
5044 return buf - old_buf; in __access_remote_vm()
5048 * access_remote_vm - access another process' address space
5093 struct mm_struct *mm = current->mm; in print_vma_addr()
5103 if (vma && vma->vm_file) { in print_vma_addr()
5104 struct file *f = vma->vm_file; in print_vma_addr()
5113 vma->vm_start, in print_vma_addr()
5114 vma->vm_end - vma->vm_start); in print_vma_addr()
5136 if (current->mm) in __might_fault()
5137 might_lock_read(&current->mm->mmap_lock); in __might_fault()
5156 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in process_huge_page()
5160 n = (addr_hint - addr) / PAGE_SIZE; in process_huge_page()
5166 for (i = pages_per_huge_page - 1; i >= 2 * n; i--) { in process_huge_page()
5172 base = pages_per_huge_page - 2 * (pages_per_huge_page - n); in process_huge_page()
5173 l = pages_per_huge_page - n; in process_huge_page()
5181 * Process remaining subpages in left-right-left-right pattern in process_huge_page()
5186 int right_idx = base + 2 * l - 1 - i; in process_huge_page()
5221 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in clear_huge_page()
5260 copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx, in copy_subpage()
5261 addr, copy_arg->vma); in copy_subpage()
5269 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in copy_user_huge_page()
5310 ret_val -= (PAGE_SIZE - rc); in copy_huge_page_from_user()
5326 page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, in ptlock_cache_init()
5337 page->ptl = ptl; in ptlock_alloc()
5343 kmem_cache_free(page_ptl_cachep, page->ptl); in ptlock_free()