Lines Matching +full:wp +full:- +full:content
1 // SPDX-License-Identifier: GPL-2.0-only
9 * demand-loading started 01.12.91 - seems it is high on the list of
10 * things wanted, and it should be easy to implement. - Linus
14 * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
15 * pages started 02.12.91, seems to work. - Linus.
21 * Also corrected some "invalidate()"s - I wasn't doing enough of them.
27 * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why.
29 * 20.12.91 - Ok, making the swap-device changeable like the root.
33 * 05.04.94 - Multi-page memory management added for v1.1.
36 * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG
87 #include "pgalloc-track.h"
92 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
96 /* use the per-pgdat data instead for discontigmem - mbligh */
173 if (current->rss_stat.count[i]) { in sync_mm_rss()
174 add_mm_counter(mm, i, current->rss_stat.count[i]); in sync_mm_rss()
175 current->rss_stat.count[i] = 0; in sync_mm_rss()
178 current->rss_stat.events = 0; in sync_mm_rss()
185 if (likely(task->mm == mm)) in add_mm_counter_fast()
186 task->rss_stat.count[member] += val; in add_mm_counter_fast()
191 #define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
199 if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH)) in check_sync_rss_stat()
200 sync_mm_rss(task->mm); in check_sync_rss_stat()
223 mm_dec_nr_ptes(tlb->mm); in free_pte_range()
251 if (end - 1 > ceiling - 1) in free_pmd_range()
257 mm_dec_nr_pmds(tlb->mm); in free_pmd_range()
285 if (end - 1 > ceiling - 1) in free_pud_range()
291 mm_dec_nr_puds(tlb->mm); in free_pud_range()
319 if (end - 1 > ceiling - 1) in free_p4d_range()
328 * This function frees user-level page tables of a process.
344 * Why all these "- 1"s? Because 0 represents both the bottom in free_pgd_range()
345 * of the address space and the top of it (using -1 for the in free_pgd_range()
349 * Comparisons need to use "end - 1" and "ceiling - 1" (though in free_pgd_range()
360 * bother to round floor or end up - the tests don't need that. in free_pgd_range()
374 if (end - 1 > ceiling - 1) in free_pgd_range()
375 end -= PMD_SIZE; in free_pgd_range()
376 if (addr > end - 1) in free_pgd_range()
383 pgd = pgd_offset(tlb->mm, addr); in free_pgd_range()
396 struct vm_area_struct *next = vma->vm_next; in free_pgtables()
397 unsigned long addr = vma->vm_start; in free_pgtables()
407 hugetlb_free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
408 floor, next ? next->vm_start : ceiling); in free_pgtables()
413 while (next && next->vm_start <= vma->vm_end + PMD_SIZE in free_pgtables()
416 next = vma->vm_next; in free_pgtables()
420 free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
421 floor, next ? next->vm_start : ceiling); in free_pgtables()
432 return -ENOMEM; in __pte_alloc()
442 * of a chain of data-dependent loads, meaning most CPUs (alpha in __pte_alloc()
444 * seen in-order. See the alpha page table accessors for the in __pte_alloc()
465 return -ENOMEM; in __pte_alloc_kernel()
489 if (current->mm == mm) in add_mm_rss_vec()
498 * is found. For example, we might have a PFN-mapped pte in
506 pgd_t *pgd = pgd_offset(vma->vm_mm, addr); in print_bad_pte()
535 mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; in print_bad_pte()
539 current->comm, in print_bad_pte()
544 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); in print_bad_pte()
546 vma->vm_file, in print_bad_pte()
547 vma->vm_ops ? vma->vm_ops->fault : NULL, in print_bad_pte()
548 vma->vm_file ? vma->vm_file->f_op->mmap : NULL, in print_bad_pte()
549 mapping ? mapping->a_ops->readpage : NULL); in print_bad_pte()
555 * vm_normal_page -- This function gets the "struct page" associated with a pte.
575 * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
604 if (vma->vm_ops && vma->vm_ops->find_special_page) in vm_normal_page()
605 return vma->vm_ops->find_special_page(vma, addr); in vm_normal_page()
606 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) in vm_normal_page()
619 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page()
620 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page()
626 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page()
627 if (pfn == vma->vm_pgoff + off) in vm_normal_page()
629 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page()
659 * in a direct-access (dax) mapping, so let's just replicate the in vm_normal_page_pmd()
662 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page_pmd()
663 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page_pmd()
669 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page_pmd()
670 if (pfn == vma->vm_pgoff + off) in vm_normal_page_pmd()
672 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page_pmd()
704 unsigned long vm_flags = dst_vma->vm_flags; in copy_nonpresent_pte()
714 if (unlikely(list_empty(&dst_mm->mmlist))) { in copy_nonpresent_pte()
716 if (list_empty(&dst_mm->mmlist)) in copy_nonpresent_pte()
717 list_add(&dst_mm->mmlist, in copy_nonpresent_pte()
718 &src_mm->mmlist); in copy_nonpresent_pte()
758 * We do not preserve soft-dirty information, because so in copy_nonpresent_pte()
785 * the page refcount and re-use the pte the traditional
794 * And if we need a pre-allocated page but don't yet have
804 struct mm_struct *src_mm = src_vma->vm_mm; in copy_present_page()
807 if (!is_cow_mapping(src_vma->vm_flags)) in copy_present_page()
823 if (likely(!atomic_read(&src_mm->has_pinned))) in copy_present_page()
829 * The vma->anon_vma of the child process may be NULL in copy_present_page()
832 * a copy of a non-anonymous page of that vma to the in copy_present_page()
841 return -EAGAIN; in copy_present_page()
855 pte = mk_pte(new_page, dst_vma->vm_page_prot); in copy_present_page()
858 /* Uffd-wp needs to be delivered to dest pte as well */ in copy_present_page()
860 set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); in copy_present_page()
865 * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page
873 struct mm_struct *src_mm = src_vma->vm_mm; in copy_present_pte()
874 unsigned long vm_flags = src_vma->vm_flags; in copy_present_pte()
912 set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); in copy_present_pte()
940 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pte_range()
941 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pte_range()
956 ret = -ENOMEM; in copy_pte_range()
968 * We are holding two locks at this point - either of them in copy_pte_range()
995 * If we need a pre-allocated page for this pte, drop the in copy_pte_range()
998 if (unlikely(ret == -EAGAIN)) in copy_pte_range()
1002 * pre-alloc page cannot be reused by next time so as in copy_pte_range()
1022 ret = -ENOMEM; in copy_pte_range()
1027 WARN_ON_ONCE(ret != -EAGAIN); in copy_pte_range()
1030 return -ENOMEM; in copy_pte_range()
1047 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pmd_range()
1048 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pmd_range()
1054 return -ENOMEM; in copy_pmd_range()
1061 VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); in copy_pmd_range()
1064 if (err == -ENOMEM) in copy_pmd_range()
1065 return -ENOMEM; in copy_pmd_range()
1074 return -ENOMEM; in copy_pmd_range()
1084 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pud_range()
1085 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pud_range()
1091 return -ENOMEM; in copy_pud_range()
1098 VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma); in copy_pud_range()
1101 if (err == -ENOMEM) in copy_pud_range()
1102 return -ENOMEM; in copy_pud_range()
1111 return -ENOMEM; in copy_pud_range()
1121 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_p4d_range()
1127 return -ENOMEM; in copy_p4d_range()
1135 return -ENOMEM; in copy_p4d_range()
1145 unsigned long addr = src_vma->vm_start; in copy_page_range()
1146 unsigned long end = src_vma->vm_end; in copy_page_range()
1147 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_page_range()
1148 struct mm_struct *src_mm = src_vma->vm_mm; in copy_page_range()
1159 if (!(src_vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) && in copy_page_range()
1160 !src_vma->anon_vma) in copy_page_range()
1166 if (unlikely(src_vma->vm_flags & VM_PFNMAP)) { in copy_page_range()
1182 is_cow = is_cow_mapping(src_vma->vm_flags); in copy_page_range()
1196 raw_write_seqcount_begin(&src_mm->write_protect_seq); in copy_page_range()
1208 ret = -ENOMEM; in copy_page_range()
1214 raw_write_seqcount_end(&src_mm->write_protect_seq); in copy_page_range()
1228 return !details->check_mapping; in should_zap_cows()
1236 struct mm_struct *mm = tlb->mm; in zap_pte_range()
1263 if (vma->vm_flags & VM_USEREXPTE) in zap_pte_range()
1271 if (details->check_mapping && in zap_pte_range()
1272 details->check_mapping != page_rmapping(page)) in zap_pte_range()
1276 tlb->fullmm); in zap_pte_range()
1280 if (vma->vm_flags & VM_PURGEABLE) in zap_pte_range()
1288 likely(!(vma->vm_flags & VM_SEQ_READ))) in zap_pte_range()
1291 rss[mm_counter(page)]--; in zap_pte_range()
1307 if (unlikely(details && details->check_mapping)) { in zap_pte_range()
1313 if (details->check_mapping != in zap_pte_range()
1318 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); in zap_pte_range()
1319 rss[mm_counter(page)]--; in zap_pte_range()
1329 rss[MM_SWAPENTS]--; in zap_pte_range()
1334 if (details && details->check_mapping && in zap_pte_range()
1335 details->check_mapping != page_rmapping(page)) in zap_pte_range()
1337 rss[mm_counter(page)]--; in zap_pte_range()
1341 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); in zap_pte_range()
1383 if (next - addr != HPAGE_PMD_SIZE) in zap_pmd_range()
1388 } else if (details && details->single_page && in zap_pmd_range()
1389 PageTransCompound(details->single_page) && in zap_pmd_range()
1390 next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { in zap_pmd_range()
1391 spinlock_t *ptl = pmd_lock(tlb->mm, pmd); in zap_pmd_range()
1429 if (next - addr != HPAGE_PUD_SIZE) { in zap_pud_range()
1430 mmap_assert_locked(tlb->mm); in zap_pud_range()
1475 pgd = pgd_offset(vma->vm_mm, addr); in unmap_page_range()
1491 unsigned long start = max(vma->vm_start, start_addr); in unmap_single_vma()
1494 if (start >= vma->vm_end) in unmap_single_vma()
1496 end = min(vma->vm_end, end_addr); in unmap_single_vma()
1497 if (end <= vma->vm_start) in unmap_single_vma()
1500 if (vma->vm_file) in unmap_single_vma()
1503 if (unlikely(vma->vm_flags & VM_PFNMAP)) in unmap_single_vma()
1509 * It is undesirable to test vma->vm_file as it in unmap_single_vma()
1510 * should be non-null for valid hugetlb area. in unmap_single_vma()
1513 * hugetlbfs ->mmap method fails, in unmap_single_vma()
1514 * mmap_region() nullifies vma->vm_file in unmap_single_vma()
1519 if (vma->vm_file) { in unmap_single_vma()
1520 i_mmap_lock_write(vma->vm_file->f_mapping); in unmap_single_vma()
1522 i_mmap_unlock_write(vma->vm_file->f_mapping); in unmap_single_vma()
1530 * unmap_vmas - unmap a range of memory covered by a list of vma's
1544 * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
1553 mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, in unmap_vmas()
1556 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) in unmap_vmas()
1562 * zap_page_range - remove user pages in a given range
1576 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, in zap_page_range()
1578 tlb_gather_mmu(&tlb, vma->vm_mm, start, range.end); in zap_page_range()
1579 update_hiwater_rss(vma->vm_mm); in zap_page_range()
1581 for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next) in zap_page_range()
1588 * zap_page_range_single - remove user pages in a given range
1603 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, in zap_page_range_single()
1605 tlb_gather_mmu(&tlb, vma->vm_mm, address, range.end); in zap_page_range_single()
1606 update_hiwater_rss(vma->vm_mm); in zap_page_range_single()
1614 * zap_vma_ptes - remove ptes mapping the vma
1627 if (address < vma->vm_start || address + size > vma->vm_end || in zap_vma_ptes()
1628 !(vma->vm_flags & VM_PFNMAP)) in zap_vma_ptes()
1670 return -EINVAL; in validate_page_before_insert()
1679 return -EBUSY; in insert_page_into_pte_locked()
1698 struct mm_struct *mm = vma->vm_mm; in insert_page()
1706 retval = -ENOMEM; in insert_page()
1723 return -EINVAL; in insert_page_in_batch_locked()
1739 struct mm_struct *const mm = vma->vm_mm; in insert_pages()
1745 ret = -EFAULT; in insert_pages()
1751 remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); in insert_pages()
1754 ret = -ENOMEM; in insert_pages()
1769 remaining_pages_total -= pte_idx; in insert_pages()
1776 pages_to_write_in_pmd -= batch_size; in insert_pages()
1777 remaining_pages_total -= batch_size; in insert_pages()
1789 * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
1807 const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; in vm_insert_pages()
1809 if (addr < vma->vm_start || end_addr >= vma->vm_end) in vm_insert_pages()
1810 return -EFAULT; in vm_insert_pages()
1811 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_pages()
1812 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_pages()
1813 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_pages()
1814 vma->vm_flags |= VM_MIXEDMAP; in vm_insert_pages()
1817 return insert_pages(vma, addr, pages, num, vma->vm_page_prot); in vm_insert_pages()
1820 int err = -EINVAL; in vm_insert_pages()
1827 *num = pgcount - idx; in vm_insert_pages()
1834 * vm_insert_page - insert single page into user vma
1855 * Usually this function is called from f_op->mmap() handler
1856 * under mm->mmap_lock write-lock, so it can change vma->vm_flags.
1858 * function from other places, for example from page-fault handler.
1865 if (addr < vma->vm_start || addr >= vma->vm_end) in vm_insert_page()
1866 return -EFAULT; in vm_insert_page()
1868 return -EINVAL; in vm_insert_page()
1869 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_page()
1870 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_page()
1871 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_page()
1872 vma->vm_flags |= VM_MIXEDMAP; in vm_insert_page()
1874 return insert_page(vma, addr, page, vma->vm_page_prot); in vm_insert_page()
1879 * __vm_map_pages - maps range of kernel pages into user vma
1893 unsigned long uaddr = vma->vm_start; in __vm_map_pages()
1898 return -ENXIO; in __vm_map_pages()
1901 if (count > num - offset) in __vm_map_pages()
1902 return -ENXIO; in __vm_map_pages()
1915 * vm_map_pages - maps range of kernel pages starts with non zero offset
1935 return __vm_map_pages(vma, pages, num, vma->vm_pgoff); in vm_map_pages()
1940 * vm_map_pages_zero - map range of kernel pages starts with zero offset
1962 struct mm_struct *mm = vma->vm_mm; in insert_pfn()
2013 * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
2020 * to override pgprot on a per-page basis.
2028 * a value of @pgprot different from that of @vma->vm_page_prot.
2042 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); in vmf_insert_pfn_prot()
2043 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == in vmf_insert_pfn_prot()
2045 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); in vmf_insert_pfn_prot()
2046 BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); in vmf_insert_pfn_prot()
2048 if (addr < vma->vm_start || addr >= vma->vm_end) in vmf_insert_pfn_prot()
2062 * vmf_insert_pfn - insert single pfn into user vma
2070 * This function should only be called from a vm_ops->fault handler, and
2084 return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); in vmf_insert_pfn()
2091 if (vma->vm_flags & VM_MIXEDMAP) in vm_mixed_ok()
2110 if (addr < vma->vm_start || addr >= vma->vm_end) in __vm_insert_mixed()
2140 if (err == -ENOMEM) in __vm_insert_mixed()
2142 if (err < 0 && err != -EBUSY) in __vm_insert_mixed()
2149 * vmf_insert_mixed_prot - insert single pfn into user vma with specified pgprot
2156 * to override pgprot on a per-page basis.
2158 * Typically this function should be used by drivers to set caching- and
2159 * encryption bits different than those of @vma->vm_page_prot, because
2160 * the caching- or encryption mode may not be known at mmap() time.
2161 * This is ok as long as @vma->vm_page_prot is not used by the core vm
2164 * functions that don't touch caching- or encryption bits, using pte_modify()
2166 * Also when new page-table entries are created, this is only done using the
2167 * fault() callback, and never using the value of vma->vm_page_prot,
2168 * except for page-table entries that point to anonymous pages as the result
2184 return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, false); in vmf_insert_mixed()
2196 return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, true); in vmf_insert_mixed_mkwrite()
2203 * in null mappings (currently treated as "copy-on-access")
2215 return -ENOMEM; in remap_pte_range()
2220 err = -EACCES; in remap_pte_range()
2239 pfn -= addr >> PAGE_SHIFT; in remap_pmd_range()
2242 return -ENOMEM; in remap_pmd_range()
2262 pfn -= addr >> PAGE_SHIFT; in remap_pud_range()
2265 return -ENOMEM; in remap_pud_range()
2284 pfn -= addr >> PAGE_SHIFT; in remap_p4d_range()
2287 return -ENOMEM; in remap_p4d_range()
2299 * remap_pfn_range - remap kernel memory to userspace
2316 struct mm_struct *mm = vma->vm_mm; in remap_pfn_range()
2321 return -EINVAL; in remap_pfn_range()
2336 * There's a horrible special case to handle copy-on-write in remap_pfn_range()
2338 * un-COW'ed pages by matching them up with "vma->vm_pgoff". in remap_pfn_range()
2341 if (is_cow_mapping(vma->vm_flags)) { in remap_pfn_range()
2342 if (addr != vma->vm_start || end != vma->vm_end) in remap_pfn_range()
2343 return -EINVAL; in remap_pfn_range()
2344 vma->vm_pgoff = pfn; in remap_pfn_range()
2349 return -EINVAL; in remap_pfn_range()
2351 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; in remap_pfn_range()
2354 pfn -= addr >> PAGE_SHIFT; in remap_pfn_range()
2373 * vm_iomap_memory - remap memory to userspace
2382 * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
2383 * whatever write-combining details or similar.
2393 return -EINVAL; in vm_iomap_memory()
2395 * You *really* shouldn't map things that aren't page-aligned, in vm_iomap_memory()
2403 return -EINVAL; in vm_iomap_memory()
2406 if (vma->vm_pgoff > pages) in vm_iomap_memory()
2407 return -EINVAL; in vm_iomap_memory()
2408 pfn += vma->vm_pgoff; in vm_iomap_memory()
2409 pages -= vma->vm_pgoff; in vm_iomap_memory()
2412 vm_len = vma->vm_end - vma->vm_start; in vm_iomap_memory()
2414 return -EINVAL; in vm_iomap_memory()
2417 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); in vm_iomap_memory()
2435 return -ENOMEM; in apply_to_pte_range()
2460 pte_unmap_unlock(pte-1, ptl); in apply_to_pte_range()
2478 return -ENOMEM; in apply_to_pmd_range()
2506 return -ENOMEM; in apply_to_pud_range()
2534 return -ENOMEM; in apply_to_p4d_range()
2561 return -EINVAL; in __apply_to_page_range()
2606 * read non-atomically. Before making any commitment, on those architectures
2635 struct vm_area_struct *vma = vmf->vma; in cow_user_page()
2636 struct mm_struct *mm = vma->vm_mm; in cow_user_page()
2637 unsigned long addr = vmf->address; in cow_user_page()
2646 * a "struct page" for it. We do a best-effort copy by in cow_user_page()
2648 * fails, we just zero-fill it. Live with it. in cow_user_page()
2657 if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { in cow_user_page()
2660 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in cow_user_page()
2662 if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { in cow_user_page()
2667 update_mmu_tlb(vma, addr, vmf->pte); in cow_user_page()
2672 entry = pte_mkyoung(vmf->orig_pte); in cow_user_page()
2673 if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) in cow_user_page()
2674 update_mmu_cache(vma, addr, vmf->pte); in cow_user_page()
2687 /* Re-validate under PTL if the page is still mapped */ in cow_user_page()
2688 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in cow_user_page()
2690 if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { in cow_user_page()
2692 update_mmu_tlb(vma, addr, vmf->pte); in cow_user_page()
2704 * use-case in cow_user_page()
2716 pte_unmap_unlock(vmf->pte, vmf->ptl); in cow_user_page()
2725 struct file *vm_file = vma->vm_file; in __get_fault_gfp_mask()
2728 return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; in __get_fault_gfp_mask()
2746 struct page *page = vmf->page; in do_page_mkwrite()
2747 unsigned int old_flags = vmf->flags; in do_page_mkwrite()
2749 vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; in do_page_mkwrite()
2751 if (vmf->vma->vm_file && in do_page_mkwrite()
2752 IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) in do_page_mkwrite()
2755 ret = vmf->vma->vm_ops->page_mkwrite(vmf); in do_page_mkwrite()
2757 vmf->flags = old_flags; in do_page_mkwrite()
2762 if (!page->mapping) { in do_page_mkwrite()
2779 struct vm_area_struct *vma = vmf->vma; in fault_dirty_shared_page()
2781 struct page *page = vmf->page; in fault_dirty_shared_page()
2783 bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; in fault_dirty_shared_page()
2788 * Take a local copy of the address_space - page.mapping may be zeroed in fault_dirty_shared_page()
2790 * pinned by vma->vm_file's reference. We rely on unlock_page()'s in fault_dirty_shared_page()
2797 file_update_time(vma->vm_file); in fault_dirty_shared_page()
2828 * any related book-keeping.
2831 __releases(vmf->ptl) in wp_page_reuse()
2833 struct vm_area_struct *vma = vmf->vma; in wp_page_reuse()
2834 struct page *page = vmf->page; in wp_page_reuse()
2842 page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); in wp_page_reuse()
2844 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_reuse()
2845 entry = pte_mkyoung(vmf->orig_pte); in wp_page_reuse()
2847 if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) in wp_page_reuse()
2848 update_mmu_cache(vma, vmf->address, vmf->pte); in wp_page_reuse()
2849 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_reuse()
2861 * - Allocate a page, copy the content of the old page to the new one.
2862 * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
2863 * - Take the PTL. If the pte changed, bail out and release the allocated page
2864 * - If the pte is still the way we remember it, update the page table and all
2865 * relevant references. This includes dropping the reference the page-table
2867 * - In any case, unlock the PTL and drop the reference we took to the old page.
2871 struct vm_area_struct *vma = vmf->vma; in wp_page_copy()
2872 struct mm_struct *mm = vma->vm_mm; in wp_page_copy()
2873 struct page *old_page = vmf->page; in wp_page_copy()
2882 if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { in wp_page_copy()
2884 vmf->address); in wp_page_copy()
2889 vmf->address); in wp_page_copy()
2896 * it's fine. If not, userspace would re-fault on in wp_page_copy()
2914 vmf->address & PAGE_MASK, in wp_page_copy()
2915 (vmf->address & PAGE_MASK) + PAGE_SIZE); in wp_page_copy()
2919 * Re-check the pte - we dropped the lock in wp_page_copy()
2921 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); in wp_page_copy()
2922 if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { in wp_page_copy()
2932 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_copy()
2933 entry = mk_pte(new_page, vma->vm_page_prot); in wp_page_copy()
2942 ptep_clear_flush_notify(vma, vmf->address, vmf->pte); in wp_page_copy()
2943 page_add_new_anon_rmap(new_page, vma, vmf->address, false); in wp_page_copy()
2944 if (vma->vm_flags & VM_PURGEABLE) { in wp_page_copy()
2945 pr_info("set wp new page %lx purgeable\n", page_to_pfn(new_page)); in wp_page_copy()
2947 uxpte_set_present(vma, vmf->address); in wp_page_copy()
2955 set_pte_at_notify(mm, vmf->address, vmf->pte, entry); in wp_page_copy()
2956 update_mmu_cache(vma, vmf->address, vmf->pte); in wp_page_copy()
2957 xpm_integrity_update_hook(vma, vmf->flags, new_page); in wp_page_copy()
2988 update_mmu_tlb(vma, vmf->address, vmf->pte); in wp_page_copy()
2994 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_copy()
2996 * No need to double call mmu_notifier->invalidate_range() callback as in wp_page_copy()
3005 if (page_copied && (vma->vm_flags & VM_LOCKED)) { in wp_page_copy()
3023 * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
3029 * shared mapping due to PTE being read-only once the mapped page is prepared.
3040 WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); in finish_mkwrite_fault()
3041 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, in finish_mkwrite_fault()
3042 &vmf->ptl); in finish_mkwrite_fault()
3047 if (!pte_same(*vmf->pte, vmf->orig_pte)) { in finish_mkwrite_fault()
3048 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in finish_mkwrite_fault()
3049 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_mkwrite_fault()
3053 if (unlikely(xpm_integrity_validate_hook(vmf->vma, vmf->flags, in finish_mkwrite_fault()
3054 vmf->address, vmf->page))) { in finish_mkwrite_fault()
3055 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_mkwrite_fault()
3069 struct vm_area_struct *vma = vmf->vma; in wp_pfn_shared()
3071 if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { in wp_pfn_shared()
3074 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_pfn_shared()
3075 vmf->flags |= FAULT_FLAG_MKWRITE; in wp_pfn_shared()
3076 ret = vma->vm_ops->pfn_mkwrite(vmf); in wp_pfn_shared()
3086 __releases(vmf->ptl) in wp_page_shared()
3088 struct vm_area_struct *vma = vmf->vma; in wp_page_shared()
3091 get_page(vmf->page); in wp_page_shared()
3093 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { in wp_page_shared()
3096 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_shared()
3100 put_page(vmf->page); in wp_page_shared()
3105 unlock_page(vmf->page); in wp_page_shared()
3106 put_page(vmf->page); in wp_page_shared()
3110 if (unlikely(xpm_integrity_validate_hook(vmf->vma, vmf->flags, vmf->address, in wp_page_shared()
3111 vmf->page))){ in wp_page_shared()
3112 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_shared()
3113 put_page(vmf->page); in wp_page_shared()
3118 lock_page(vmf->page); in wp_page_shared()
3121 put_page(vmf->page); in wp_page_shared()
3129 * and decrementing the shared-page counter for the old page.
3132 * done by the caller (the low-level page fault routine in most cases).
3140 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3145 __releases(vmf->ptl) in do_wp_page()
3147 struct vm_area_struct *vma = vmf->vma; in do_wp_page()
3149 if (userfaultfd_pte_wp(vma, *vmf->pte)) { in do_wp_page()
3150 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3155 * Userfaultfd write-protect can defer flushes. Ensure the TLB in do_wp_page()
3158 if (unlikely(userfaultfd_wp(vmf->vma) && in do_wp_page()
3159 mm_tlb_flush_pending(vmf->vma->vm_mm))) in do_wp_page()
3160 flush_tlb_page(vmf->vma, vmf->address); in do_wp_page()
3162 vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); in do_wp_page()
3163 if (!vmf->page) { in do_wp_page()
3169 * Just mark the pages writable and/or call ops->pfn_mkwrite. in do_wp_page()
3171 if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == in do_wp_page()
3175 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3183 if (PageAnon(vmf->page)) { in do_wp_page()
3184 struct page *page = vmf->page; in do_wp_page()
3202 if (unlikely(xpm_integrity_validate_hook(vmf->vma, vmf->flags, vmf->address, in do_wp_page()
3203 vmf->page))){ in do_wp_page()
3204 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3210 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == in do_wp_page()
3218 get_page(vmf->page); in do_wp_page()
3220 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3228 zap_page_range_single(vma, start_addr, end_addr - start_addr, details); in unmap_mapping_range_vma()
3238 details->first_index, details->last_index) { in unmap_mapping_range_tree()
3240 vba = vma->vm_pgoff; in unmap_mapping_range_tree()
3241 vea = vba + vma_pages(vma) - 1; in unmap_mapping_range_tree()
3242 zba = details->first_index; in unmap_mapping_range_tree()
3245 zea = details->last_index; in unmap_mapping_range_tree()
3250 ((zba - vba) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3251 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3257 * unmap_mapping_page() - Unmap single page from processes.
3269 struct address_space *mapping = page->mapping; in unmap_mapping_page()
3276 details.first_index = page->index; in unmap_mapping_page()
3277 details.last_index = page->index + thp_nr_pages(page) - 1; in unmap_mapping_page()
3281 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_page()
3282 unmap_mapping_range_tree(&mapping->i_mmap, &details); in unmap_mapping_page()
3287 * unmap_mapping_pages() - Unmap pages from processes.
3305 details.last_index = start + nr - 1; in unmap_mapping_pages()
3310 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_pages()
3311 unmap_mapping_range_tree(&mapping->i_mmap, &details); in unmap_mapping_pages()
3316 * unmap_mapping_range - unmap the portion of all mmaps in the specified
3336 pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3341 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3343 hlen = ULONG_MAX - hba + 1; in unmap_mapping_range()
3351 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3360 struct vm_area_struct *vma = vmf->vma; in do_swap_page()
3369 if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) in do_swap_page()
3372 entry = pte_to_swp_entry(vmf->orig_pte); in do_swap_page()
3375 migration_entry_wait(vma->vm_mm, vmf->pmd, in do_swap_page()
3376 vmf->address); in do_swap_page()
3378 vmf->page = device_private_entry_to_page(entry); in do_swap_page()
3379 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
3380 vmf->address, &vmf->ptl); in do_swap_page()
3381 if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { in do_swap_page()
3382 spin_unlock(vmf->ptl); in do_swap_page()
3390 get_page(vmf->page); in do_swap_page()
3391 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
3392 vmf->page->pgmap->ops->migrate_to_ram(vmf); in do_swap_page()
3393 put_page(vmf->page); in do_swap_page()
3397 print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); in do_swap_page()
3405 page = lookup_swap_cache(entry, vma, vmf->address); in do_swap_page()
3411 if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && in do_swap_page()
3415 vmf->address); in do_swap_page()
3425 err = mem_cgroup_charge(page, vma->vm_mm, in do_swap_page()
3451 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
3452 vmf->address, &vmf->ptl); in do_swap_page()
3453 if (likely(pte_same(*vmf->pte, vmf->orig_pte))) in do_swap_page()
3462 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); in do_swap_page()
3473 locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); in do_swap_page()
3491 page = ksm_might_need_to_copy(page, vma, vmf->address); in do_swap_page()
3503 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in do_swap_page()
3504 &vmf->ptl); in do_swap_page()
3505 if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) in do_swap_page()
3522 if (unlikely(xpm_integrity_validate_hook(vmf->vma, vmf->flags, in do_swap_page()
3523 vmf->address, page))){ in do_swap_page()
3528 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); in do_swap_page()
3529 dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); in do_swap_page()
3530 pte = mk_pte(page, vma->vm_page_prot); in do_swap_page()
3531 if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { in do_swap_page()
3533 vmf->flags &= ~FAULT_FLAG_WRITE; in do_swap_page()
3538 if (pte_swp_soft_dirty(vmf->orig_pte)) in do_swap_page()
3540 if (pte_swp_uffd_wp(vmf->orig_pte)) { in do_swap_page()
3544 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); in do_swap_page()
3545 arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); in do_swap_page()
3546 vmf->orig_pte = pte; in do_swap_page()
3550 page_add_new_anon_rmap(page, vma, vmf->address, false); in do_swap_page()
3553 do_page_add_anon_rmap(page, vma, vmf->address, exclusive); in do_swap_page()
3558 (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) in do_swap_page()
3574 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
3581 /* No need to invalidate - it was non-present before */ in do_swap_page()
3582 update_mmu_cache(vma, vmf->address, vmf->pte); in do_swap_page()
3584 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
3588 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
3601 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3607 struct vm_area_struct *vma = vmf->vma; in do_anonymous_page()
3612 /* File mapping without ->vm_ops ? */ in do_anonymous_page()
3613 if (vma->vm_flags & VM_SHARED) in do_anonymous_page()
3626 if (pte_alloc(vma->vm_mm, vmf->pmd)) in do_anonymous_page()
3630 if (unlikely(pmd_trans_unstable(vmf->pmd))) in do_anonymous_page()
3634 if (vma->vm_flags & VM_USEREXPTE) { in do_anonymous_page()
3638 if(xpm_integrity_check_hook(vma, vmf->flags, vmf->address, in do_anonymous_page()
3645 /* Use the zero-page for reads */ in do_anonymous_page()
3646 if (!(vmf->flags & FAULT_FLAG_WRITE) && in do_anonymous_page()
3647 !mm_forbids_zeropage(vma->vm_mm)) { in do_anonymous_page()
3648 entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), in do_anonymous_page()
3649 vma->vm_page_prot)); in do_anonymous_page()
3651 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_anonymous_page()
3652 vmf->address, &vmf->ptl); in do_anonymous_page()
3653 if (!pte_none(*vmf->pte)) { in do_anonymous_page()
3654 update_mmu_tlb(vma, vmf->address, vmf->pte); in do_anonymous_page()
3657 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
3662 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
3671 page = alloc_zeroed_user_highpage_movable(vma, vmf->address); in do_anonymous_page()
3675 if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) in do_anonymous_page()
3686 entry = mk_pte(page, vma->vm_page_prot); in do_anonymous_page()
3688 if (vma->vm_flags & VM_WRITE) in do_anonymous_page()
3691 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in do_anonymous_page()
3692 &vmf->ptl); in do_anonymous_page()
3693 if (!pte_none(*vmf->pte)) { in do_anonymous_page()
3694 update_mmu_cache(vma, vmf->address, vmf->pte); in do_anonymous_page()
3698 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
3704 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
3709 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); in do_anonymous_page()
3710 page_add_new_anon_rmap(page, vma, vmf->address, false); in do_anonymous_page()
3711 if (vma->vm_flags & VM_PURGEABLE) in do_anonymous_page()
3716 if (vma->vm_flags & VM_PURGEABLE) in do_anonymous_page()
3717 uxpte_set_present(vma, vmf->address); in do_anonymous_page()
3720 xpm_integrity_update_hook(vma, vmf->flags, page); in do_anonymous_page()
3723 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); in do_anonymous_page()
3725 /* No need to invalidate - it was non-present before */ in do_anonymous_page()
3726 update_mmu_cache(vma, vmf->address, vmf->pte); in do_anonymous_page()
3728 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
3741 * released depending on flags and vma->vm_ops->fault() return value.
3746 struct vm_area_struct *vma = vmf->vma; in __do_fault()
3764 if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { in __do_fault()
3765 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in __do_fault()
3766 if (!vmf->prealloc_pte) in __do_fault()
3771 ret = vma->vm_ops->fault(vmf); in __do_fault()
3776 if (unlikely(PageHWPoison(vmf->page))) { in __do_fault()
3777 struct page *page = vmf->page; in __do_fault()
3782 page->index, 1, false); in __do_fault()
3789 vmf->page = NULL; in __do_fault()
3794 lock_page(vmf->page); in __do_fault()
3796 VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); in __do_fault()
3814 struct vm_area_struct *vma = vmf->vma; in pte_alloc_one_map()
3816 if (!pmd_none(*vmf->pmd)) in pte_alloc_one_map()
3818 if (vmf->prealloc_pte) { in pte_alloc_one_map()
3819 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in pte_alloc_one_map()
3820 if (unlikely(!pmd_none(*vmf->pmd))) { in pte_alloc_one_map()
3821 spin_unlock(vmf->ptl); in pte_alloc_one_map()
3825 mm_inc_nr_ptes(vma->vm_mm); in pte_alloc_one_map()
3826 pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); in pte_alloc_one_map()
3827 spin_unlock(vmf->ptl); in pte_alloc_one_map()
3828 vmf->prealloc_pte = NULL; in pte_alloc_one_map()
3829 } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { in pte_alloc_one_map()
3844 if (pmd_devmap_trans_unstable(vmf->pmd)) in pte_alloc_one_map()
3848 * At this point we know that our vmf->pmd points to a page of ptes in pte_alloc_one_map()
3851 * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still in pte_alloc_one_map()
3852 * be valid and we will re-check to make sure the vmf->pte isn't in pte_alloc_one_map()
3853 * pte_none() under vmf->ptl protection when we return to in pte_alloc_one_map()
3856 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in pte_alloc_one_map()
3857 &vmf->ptl); in pte_alloc_one_map()
3864 struct vm_area_struct *vma = vmf->vma; in deposit_prealloc_pte()
3866 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); in deposit_prealloc_pte()
3871 mm_inc_nr_ptes(vma->vm_mm); in deposit_prealloc_pte()
3872 vmf->prealloc_pte = NULL; in deposit_prealloc_pte()
3877 struct vm_area_struct *vma = vmf->vma; in do_set_pmd()
3878 bool write = vmf->flags & FAULT_FLAG_WRITE; in do_set_pmd()
3879 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_set_pmd()
3895 if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) { in do_set_pmd()
3896 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in do_set_pmd()
3897 if (!vmf->prealloc_pte) in do_set_pmd()
3902 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_set_pmd()
3903 if (unlikely(!pmd_none(*vmf->pmd))) in do_set_pmd()
3909 entry = mk_huge_pmd(page, vma->vm_page_prot); in do_set_pmd()
3913 add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); in do_set_pmd()
3921 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); in do_set_pmd()
3923 update_mmu_cache_pmd(vma, haddr, vmf->pmd); in do_set_pmd()
3929 spin_unlock(vmf->ptl); in do_set_pmd()
3941 * alloc_set_pte - setup new PTE entry for given page and add reverse page
3942 * mapping. If needed, the function allocates page table or use pre-allocated.
3947 * Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on
3951 * vm_ops->map_pages.
3957 struct vm_area_struct *vma = vmf->vma; in alloc_set_pte()
3958 bool write = vmf->flags & FAULT_FLAG_WRITE; in alloc_set_pte()
3962 if (pmd_none(*vmf->pmd) && PageTransCompound(page)) { in alloc_set_pte()
3968 if (!vmf->pte) { in alloc_set_pte()
3974 /* Re-check under ptl */ in alloc_set_pte()
3975 if (unlikely(!pte_none(*vmf->pte))) { in alloc_set_pte()
3976 update_mmu_tlb(vma, vmf->address, vmf->pte); in alloc_set_pte()
3981 if (unlikely(xpm_integrity_validate_hook(vmf->vma, vmf->flags, in alloc_set_pte()
3982 vmf->address, page))) in alloc_set_pte()
3986 entry = mk_pte(page, vma->vm_page_prot); in alloc_set_pte()
3990 /* copy-on-write page */ in alloc_set_pte()
3991 if (write && !(vma->vm_flags & VM_SHARED)) { in alloc_set_pte()
3992 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); in alloc_set_pte()
3993 page_add_new_anon_rmap(page, vma, vmf->address, false); in alloc_set_pte()
3996 inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); in alloc_set_pte()
3999 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); in alloc_set_pte()
4001 /* no need to invalidate: a not-present page won't be cached */ in alloc_set_pte()
4002 update_mmu_cache(vma, vmf->address, vmf->pte); in alloc_set_pte()
4009 * finish_fault - finish page fault once we have prepared the page to fault
4029 if ((vmf->flags & FAULT_FLAG_WRITE) && in finish_fault()
4030 !(vmf->vma->vm_flags & VM_SHARED)) in finish_fault()
4031 page = vmf->cow_page; in finish_fault()
4033 page = vmf->page; in finish_fault()
4039 if (!(vmf->vma->vm_flags & VM_SHARED)) in finish_fault()
4040 ret = check_stable_address_space(vmf->vma->vm_mm); in finish_fault()
4043 if (vmf->pte) in finish_fault()
4044 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_fault()
4065 return -EINVAL; in fault_around_bytes_set()
4089 * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
4090 * not ready to be mapped: not up-to-date, locked, etc.
4110 unsigned long address = vmf->address, nr_pages, mask; in do_fault_around()
4111 pgoff_t start_pgoff = vmf->pgoff; in do_fault_around()
4117 mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; in do_fault_around()
4119 vmf->address = max(address & mask, vmf->vma->vm_start); in do_fault_around()
4120 off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); in do_fault_around()
4121 start_pgoff -= off; in do_fault_around()
4127 end_pgoff = start_pgoff - in do_fault_around()
4128 ((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + in do_fault_around()
4129 PTRS_PER_PTE - 1; in do_fault_around()
4130 end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, in do_fault_around()
4131 start_pgoff + nr_pages - 1); in do_fault_around()
4133 if (pmd_none(*vmf->pmd)) { in do_fault_around()
4134 vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); in do_fault_around()
4135 if (!vmf->prealloc_pte) in do_fault_around()
4140 vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); in do_fault_around()
4143 if (pmd_trans_huge(*vmf->pmd)) { in do_fault_around()
4148 /* ->map_pages() haven't done anything useful. Cold page cache? */ in do_fault_around()
4149 if (!vmf->pte) in do_fault_around()
4153 vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT); in do_fault_around()
4154 if (!pte_none(*vmf->pte)) in do_fault_around()
4156 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_fault_around()
4158 vmf->address = address; in do_fault_around()
4159 vmf->pte = NULL; in do_fault_around()
4165 struct vm_area_struct *vma = vmf->vma; in do_read_fault()
4169 * Let's call ->map_pages() first and use ->fault() as fallback in do_read_fault()
4173 if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { in do_read_fault()
4184 unlock_page(vmf->page); in do_read_fault()
4186 put_page(vmf->page); in do_read_fault()
4192 struct vm_area_struct *vma = vmf->vma; in do_cow_fault()
4198 vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); in do_cow_fault()
4199 if (!vmf->cow_page) in do_cow_fault()
4202 if (mem_cgroup_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL)) { in do_cow_fault()
4203 put_page(vmf->cow_page); in do_cow_fault()
4206 cgroup_throttle_swaprate(vmf->cow_page, GFP_KERNEL); in do_cow_fault()
4214 copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); in do_cow_fault()
4215 __SetPageUptodate(vmf->cow_page); in do_cow_fault()
4218 unlock_page(vmf->page); in do_cow_fault()
4219 put_page(vmf->page); in do_cow_fault()
4224 put_page(vmf->cow_page); in do_cow_fault()
4230 struct vm_area_struct *vma = vmf->vma; in do_shared_fault()
4241 if (vma->vm_ops->page_mkwrite) { in do_shared_fault()
4242 unlock_page(vmf->page); in do_shared_fault()
4246 put_page(vmf->page); in do_shared_fault()
4254 unlock_page(vmf->page); in do_shared_fault()
4255 put_page(vmf->page); in do_shared_fault()
4264 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4273 struct vm_area_struct *vma = vmf->vma; in do_fault()
4274 struct mm_struct *vm_mm = vma->vm_mm; in do_fault()
4280 if (!vma->vm_ops->fault) { in do_fault()
4285 if (unlikely(!pmd_present(*vmf->pmd))) in do_fault()
4288 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, in do_fault()
4289 vmf->pmd, in do_fault()
4290 vmf->address, in do_fault()
4291 &vmf->ptl); in do_fault()
4299 if (unlikely(pte_none(*vmf->pte))) in do_fault()
4304 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_fault()
4306 } else if (!(vmf->flags & FAULT_FLAG_WRITE)) in do_fault()
4308 else if (!(vma->vm_flags & VM_SHARED)) in do_fault()
4314 if (vmf->prealloc_pte) { in do_fault()
4315 pte_free(vm_mm, vmf->prealloc_pte); in do_fault()
4316 vmf->prealloc_pte = NULL; in do_fault()
4338 struct vm_area_struct *vma = vmf->vma; in do_numa_page()
4345 bool was_writable = pte_savedwrite(vmf->orig_pte); in do_numa_page()
4353 vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd); in do_numa_page()
4354 spin_lock(vmf->ptl); in do_numa_page()
4355 if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { in do_numa_page()
4356 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4364 old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte); in do_numa_page()
4365 pte = pte_modify(old_pte, vma->vm_page_prot); in do_numa_page()
4369 ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); in do_numa_page()
4370 update_mmu_cache(vma, vmf->address, vmf->pte); in do_numa_page()
4372 page = vm_normal_page(vma, vmf->address, pte); in do_numa_page()
4374 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4378 /* TODO: handle PTE-mapped THP */ in do_numa_page()
4380 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4399 if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED)) in do_numa_page()
4404 target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, in do_numa_page()
4406 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
4428 if (vma_is_anonymous(vmf->vma)) in create_huge_pmd()
4430 if (vmf->vma->vm_ops->huge_fault) in create_huge_pmd()
4431 return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); in create_huge_pmd()
4438 if (vma_is_anonymous(vmf->vma)) { in wp_huge_pmd()
4439 if (userfaultfd_huge_pmd_wp(vmf->vma, orig_pmd)) in wp_huge_pmd()
4443 if (vmf->vma->vm_ops->huge_fault) { in wp_huge_pmd()
4444 vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); in wp_huge_pmd()
4450 /* COW or write-notify handled on pte level: split pmd. */ in wp_huge_pmd()
4451 __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL); in wp_huge_pmd()
4461 if (vma_is_anonymous(vmf->vma)) in create_huge_pud()
4463 if (vmf->vma->vm_ops->huge_fault) in create_huge_pud()
4464 return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); in create_huge_pud()
4474 if (vma_is_anonymous(vmf->vma)) in wp_huge_pud()
4476 if (vmf->vma->vm_ops->huge_fault) { in wp_huge_pud()
4477 vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); in wp_huge_pud()
4483 /* COW or write-notify not handled on PUD level: split pud.*/ in wp_huge_pud()
4484 __split_huge_pud(vmf->vma, vmf->pud, vmf->address); in wp_huge_pud()
4498 * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
4508 if (unlikely(pmd_none(*vmf->pmd))) { in handle_pte_fault()
4510 * Leave __pte_alloc() until later: because vm_ops->fault may in handle_pte_fault()
4515 vmf->pte = NULL; in handle_pte_fault()
4518 if (pmd_devmap_trans_unstable(vmf->pmd)) in handle_pte_fault()
4526 vmf->pte = pte_offset_map(vmf->pmd, vmf->address); in handle_pte_fault()
4527 vmf->orig_pte = *vmf->pte; in handle_pte_fault()
4531 * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and in handle_pte_fault()
4538 if (pte_none(vmf->orig_pte)) { in handle_pte_fault()
4539 pte_unmap(vmf->pte); in handle_pte_fault()
4540 vmf->pte = NULL; in handle_pte_fault()
4544 if (!vmf->pte) { in handle_pte_fault()
4545 if (vma_is_anonymous(vmf->vma)) in handle_pte_fault()
4551 if (!pte_present(vmf->orig_pte)) in handle_pte_fault()
4554 if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) in handle_pte_fault()
4557 vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); in handle_pte_fault()
4558 spin_lock(vmf->ptl); in handle_pte_fault()
4559 entry = vmf->orig_pte; in handle_pte_fault()
4560 if (unlikely(!pte_same(*vmf->pte, entry))) { in handle_pte_fault()
4561 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in handle_pte_fault()
4564 if (vmf->flags & FAULT_FLAG_WRITE) { in handle_pte_fault()
4570 if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, in handle_pte_fault()
4571 vmf->flags & FAULT_FLAG_WRITE)) { in handle_pte_fault()
4572 update_mmu_cache(vmf->vma, vmf->address, vmf->pte); in handle_pte_fault()
4575 if (vmf->flags & FAULT_FLAG_TRIED) in handle_pte_fault()
4583 if (vmf->flags & FAULT_FLAG_WRITE) in handle_pte_fault()
4584 flush_tlb_fix_spurious_fault(vmf->vma, vmf->address); in handle_pte_fault()
4587 pte_unmap_unlock(vmf->pte, vmf->ptl); in handle_pte_fault()
4608 struct mm_struct *mm = vma->vm_mm; in __handle_mm_fault()
4687 * mm_account_fault - Do page fault accountings
4690 * of perf event counters, but we'll still do the per-task accounting to
4699 * still be in per-arch page fault handlers at the entry of page fault.
4710 * - Unsuccessful faults (e.g. when the address wasn't valid). That in mm_account_fault()
4715 * - Incomplete faults (VM_FAULT_RETRY). They will only be counted in mm_account_fault()
4729 current->maj_flt++; in mm_account_fault()
4731 current->min_flt++; in mm_account_fault()
4761 count_memcg_event_mm(vma->vm_mm, PGFAULT); in handle_mm_fault()
4779 ret = hugetlb_fault(vma->vm_mm, vma, address, flags); in handle_mm_fault()
4804 * We've already handled the fast-path in-line.
4810 return -ENOMEM; in __p4d_alloc()
4814 spin_lock(&mm->page_table_lock); in __p4d_alloc()
4819 spin_unlock(&mm->page_table_lock); in __p4d_alloc()
4827 * We've already handled the fast-path in-line.
4833 return -ENOMEM; in __pud_alloc()
4837 spin_lock(&mm->page_table_lock); in __pud_alloc()
4843 spin_unlock(&mm->page_table_lock); in __pud_alloc()
4851 * We've already handled the fast-path in-line.
4858 return -ENOMEM; in __pmd_alloc()
4937 return -EINVAL; in follow_invalidate_pte()
4941 * follow_pte - look up PTE at a user virtual address
4957 * it is not a good general-purpose API.
4959 * Return: zero on success, -ve otherwise.
4969 * follow_pfn - look up PFN at a user virtual address
4979 * Return: zero and the pfn at @pfn on success, -ve otherwise.
4984 int ret = -EINVAL; in follow_pfn()
4988 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in follow_pfn()
4991 ret = follow_pte(vma->vm_mm, address, &ptep, &ptl); in follow_pfn()
5005 int ret = -EINVAL; in follow_phys()
5009 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in follow_phys()
5012 if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) in follow_phys()
5035 int offset = addr & (PAGE_SIZE-1); in generic_access_phys()
5038 return -EINVAL; in generic_access_phys()
5042 return -ENOMEM; in generic_access_phys()
5056 * Access another process' address space as given in mm. If non-NULL, use the
5086 if (!vma || vma->vm_start > addr) in __access_remote_vm()
5088 if (vma->vm_ops && vma->vm_ops->access) in __access_remote_vm()
5089 ret = vma->vm_ops->access(vma, addr, buf, in __access_remote_vm()
5097 offset = addr & (PAGE_SIZE-1); in __access_remote_vm()
5098 if (bytes > PAGE_SIZE-offset) in __access_remote_vm()
5099 bytes = PAGE_SIZE-offset; in __access_remote_vm()
5113 len -= bytes; in __access_remote_vm()
5119 return buf - old_buf; in __access_remote_vm()
5123 * access_remote_vm - access another process' address space
5168 struct mm_struct *mm = current->mm; in print_vma_addr()
5178 if (vma && vma->vm_file) { in print_vma_addr()
5179 struct file *f = vma->vm_file; in print_vma_addr()
5188 vma->vm_start, in print_vma_addr()
5189 vma->vm_end - vma->vm_start); in print_vma_addr()
5211 if (current->mm) in __might_fault()
5212 might_lock_read(¤t->mm->mmap_lock); in __might_fault()
5231 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in process_huge_page()
5235 n = (addr_hint - addr) / PAGE_SIZE; in process_huge_page()
5241 for (i = pages_per_huge_page - 1; i >= 2 * n; i--) { in process_huge_page()
5247 base = pages_per_huge_page - 2 * (pages_per_huge_page - n); in process_huge_page()
5248 l = pages_per_huge_page - n; in process_huge_page()
5256 * Process remaining subpages in left-right-left-right pattern in process_huge_page()
5261 int right_idx = base + 2 * l - 1 - i; in process_huge_page()
5296 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in clear_huge_page()
5335 copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx, in copy_subpage()
5336 addr, copy_arg->vma); in copy_subpage()
5344 ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); in copy_user_huge_page()
5385 ret_val -= (PAGE_SIZE - rc); in copy_huge_page_from_user()
5403 page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, in ptlock_cache_init()
5414 page->ptl = ptl; in ptlock_alloc()
5420 kmem_cache_free(page_ptl_cachep, page->ptl); in ptlock_free()