Lines Matching +full:tlb +full:- +full:split
1 // SPDX-License-Identifier: GPL-2.0-only
21 #include <linux/backing-dev.h>
39 #include <linux/memory-tiers.h>
42 #include <asm/tlb.h>
78 if (!vma->vm_mm) /* vdso */ in hugepage_vma_check()
103 !transhuge_vma_suitable(vma, (vma->vm_end - HPAGE_PMD_SIZE))) in hugepage_vma_check()
111 if (!in_pf && shmem_file(vma->vm_file)) in hugepage_vma_check()
112 return shmem_is_huge(file_inode(vma->vm_file), vma->vm_pgoff, in hugepage_vma_check()
113 !enforce_sysfs, vma->vm_mm, vm_flags); in hugepage_vma_check()
138 if (!vma->anon_vma) in hugepage_vma_check()
183 if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags)) in mm_get_huge_zero_page()
189 if (test_and_set_bit(MMF_HUGE_ZERO_PAGE, &mm->flags)) in mm_get_huge_zero_page()
197 if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags)) in mm_put_huge_zero_page()
261 ret = -EINVAL; in enabled_store()
293 return -EINVAL; in single_hugepage_flag_store()
356 return -EINVAL; in defrag_store()
406 return -ENOMEM; in hugepage_init_sysfs()
454 return -EINVAL; in hugepage_init()
462 * we use page->mapping and page->index in second tail page in hugepage_init()
475 err = register_shrinker(&huge_zero_page_shrinker, "thp-zero"); in hugepage_init()
478 err = register_shrinker(&deferred_split_shrinker, "thp-deferred_split"); in hugepage_init()
484 * where the extra memory used could hurt more than TLB overhead in hugepage_init()
487 if (totalram_pages() < (512 << (20 - PAGE_SHIFT))) { in hugepage_init()
543 if (likely(vma->vm_flags & VM_WRITE)) in maybe_pmd_mkwrite()
556 return &memcg->deferred_split_queue; in get_deferred_split_queue()
558 return &pgdat->deferred_split_queue; in get_deferred_split_queue()
566 return &pgdat->deferred_split_queue; in get_deferred_split_queue()
582 return is_huge_zero_page(&folio->page) || in is_transparent_hugepage()
597 if (off_end <= off_align || (off_end - off_align) < size) in __thp_get_unmapped_area()
604 ret = current->mm->get_unmapped_area(filp, addr, len_pad, in __thp_get_unmapped_area()
621 ret += (off - ret) & (size - 1); in __thp_get_unmapped_area()
635 return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); in thp_get_unmapped_area()
642 struct vm_area_struct *vma = vmf->vma; in __do_huge_pmd_anonymous_page()
645 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in __do_huge_pmd_anonymous_page()
650 if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) { in __do_huge_pmd_anonymous_page()
658 pgtable = pte_alloc_one(vma->vm_mm); in __do_huge_pmd_anonymous_page()
664 clear_huge_page(page, vmf->address, HPAGE_PMD_NR); in __do_huge_pmd_anonymous_page()
672 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in __do_huge_pmd_anonymous_page()
673 if (unlikely(!pmd_none(*vmf->pmd))) { in __do_huge_pmd_anonymous_page()
678 ret = check_stable_address_space(vma->vm_mm); in __do_huge_pmd_anonymous_page()
684 spin_unlock(vmf->ptl); in __do_huge_pmd_anonymous_page()
686 pte_free(vma->vm_mm, pgtable); in __do_huge_pmd_anonymous_page()
692 entry = mk_huge_pmd(page, vma->vm_page_prot); in __do_huge_pmd_anonymous_page()
696 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); in __do_huge_pmd_anonymous_page()
697 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); in __do_huge_pmd_anonymous_page()
698 update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); in __do_huge_pmd_anonymous_page()
699 add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); in __do_huge_pmd_anonymous_page()
700 mm_inc_nr_ptes(vma->vm_mm); in __do_huge_pmd_anonymous_page()
701 spin_unlock(vmf->ptl); in __do_huge_pmd_anonymous_page()
703 count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC); in __do_huge_pmd_anonymous_page()
708 spin_unlock(vmf->ptl); in __do_huge_pmd_anonymous_page()
711 pte_free(vma->vm_mm, pgtable); in __do_huge_pmd_anonymous_page()
728 const bool vma_madvised = vma && (vma->vm_flags & VM_HUGEPAGE); in vma_thp_gfp_mask()
760 entry = mk_pmd(zero_page, vma->vm_page_prot); in set_huge_zero_page()
769 struct vm_area_struct *vma = vmf->vma; in do_huge_pmd_anonymous_page()
772 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_huge_pmd_anonymous_page()
778 khugepaged_enter_vma(vma, vma->vm_flags); in do_huge_pmd_anonymous_page()
780 if (!(vmf->flags & FAULT_FLAG_WRITE) && in do_huge_pmd_anonymous_page()
781 !mm_forbids_zeropage(vma->vm_mm) && in do_huge_pmd_anonymous_page()
786 pgtable = pte_alloc_one(vma->vm_mm); in do_huge_pmd_anonymous_page()
789 zero_page = mm_get_huge_zero_page(vma->vm_mm); in do_huge_pmd_anonymous_page()
791 pte_free(vma->vm_mm, pgtable); in do_huge_pmd_anonymous_page()
795 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_huge_pmd_anonymous_page()
797 if (pmd_none(*vmf->pmd)) { in do_huge_pmd_anonymous_page()
798 ret = check_stable_address_space(vma->vm_mm); in do_huge_pmd_anonymous_page()
800 spin_unlock(vmf->ptl); in do_huge_pmd_anonymous_page()
801 pte_free(vma->vm_mm, pgtable); in do_huge_pmd_anonymous_page()
803 spin_unlock(vmf->ptl); in do_huge_pmd_anonymous_page()
804 pte_free(vma->vm_mm, pgtable); in do_huge_pmd_anonymous_page()
808 set_huge_zero_page(pgtable, vma->vm_mm, vma, in do_huge_pmd_anonymous_page()
809 haddr, vmf->pmd, zero_page); in do_huge_pmd_anonymous_page()
810 update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); in do_huge_pmd_anonymous_page()
811 spin_unlock(vmf->ptl); in do_huge_pmd_anonymous_page()
814 spin_unlock(vmf->ptl); in do_huge_pmd_anonymous_page()
815 pte_free(vma->vm_mm, pgtable); in do_huge_pmd_anonymous_page()
825 return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp); in do_huge_pmd_anonymous_page()
832 struct mm_struct *mm = vma->vm_mm; in insert_pfn_pmd()
876 * vmf_insert_pfn_pmd - insert a pmd size pfn
887 unsigned long addr = vmf->address & PMD_MASK; in vmf_insert_pfn_pmd()
888 struct vm_area_struct *vma = vmf->vma; in vmf_insert_pfn_pmd()
889 pgprot_t pgprot = vma->vm_page_prot; in vmf_insert_pfn_pmd()
897 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && in vmf_insert_pfn_pmd()
899 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == in vmf_insert_pfn_pmd()
901 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); in vmf_insert_pfn_pmd()
903 if (addr < vma->vm_start || addr >= vma->vm_end) in vmf_insert_pfn_pmd()
907 pgtable = pte_alloc_one(vma->vm_mm); in vmf_insert_pfn_pmd()
914 insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable); in vmf_insert_pfn_pmd()
922 if (likely(vma->vm_flags & VM_WRITE)) in maybe_pud_mkwrite()
930 struct mm_struct *mm = vma->vm_mm; in insert_pfn_pud()
931 pgprot_t prot = vma->vm_page_prot; in insert_pfn_pud()
965 * vmf_insert_pfn_pud - insert a pud size pfn
976 unsigned long addr = vmf->address & PUD_MASK; in vmf_insert_pfn_pud()
977 struct vm_area_struct *vma = vmf->vma; in vmf_insert_pfn_pud()
978 pgprot_t pgprot = vma->vm_page_prot; in vmf_insert_pfn_pud()
985 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && in vmf_insert_pfn_pud()
987 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == in vmf_insert_pfn_pud()
989 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); in vmf_insert_pfn_pud()
991 if (addr < vma->vm_start || addr >= vma->vm_end) in vmf_insert_pfn_pud()
996 insert_pfn_pud(vma, addr, vmf->pud, pfn, write); in vmf_insert_pfn_pud()
1019 struct mm_struct *mm = vma->vm_mm; in follow_devmap_pmd()
1041 return ERR_PTR(-EEXIST); in follow_devmap_pmd()
1046 return ERR_PTR(-EFAULT); in follow_devmap_pmd()
1063 int ret = -ENOMEM; in copy_huge_pmd()
1065 /* Skip if can be re-fill on fault */ in copy_huge_pmd()
1077 ret = -EAGAIN; in copy_huge_pmd()
1112 * under splitting since we don't split the page itself, only pmd to in copy_huge_pmd()
1130 /* Page maybe pinned: split and retry the fault on PTEs. */ in copy_huge_pmd()
1136 return -EAGAIN; in copy_huge_pmd()
1174 struct mm_struct *mm = vma->vm_mm; in follow_devmap_pud()
1198 return ERR_PTR(-EEXIST); in follow_devmap_pud()
1203 return ERR_PTR(-EFAULT); in follow_devmap_pud()
1225 ret = -EAGAIN; in copy_huge_pud()
1232 * under splitting since we don't split the page itself, only pud to in copy_huge_pud()
1241 * and split if duplicating fails. in copy_huge_pud()
1256 bool write = vmf->flags & FAULT_FLAG_WRITE; in huge_pud_set_accessed()
1258 vmf->ptl = pud_lock(vmf->vma->vm_mm, vmf->pud); in huge_pud_set_accessed()
1259 if (unlikely(!pud_same(*vmf->pud, orig_pud))) in huge_pud_set_accessed()
1262 touch_pud(vmf->vma, vmf->address, vmf->pud, write); in huge_pud_set_accessed()
1264 spin_unlock(vmf->ptl); in huge_pud_set_accessed()
1270 bool write = vmf->flags & FAULT_FLAG_WRITE; in huge_pmd_set_accessed()
1272 vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); in huge_pmd_set_accessed()
1273 if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd))) in huge_pmd_set_accessed()
1276 touch_pmd(vmf->vma, vmf->address, vmf->pmd, write); in huge_pmd_set_accessed()
1279 spin_unlock(vmf->ptl); in huge_pmd_set_accessed()
1284 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in do_huge_pmd_wp_page()
1285 struct vm_area_struct *vma = vmf->vma; in do_huge_pmd_wp_page()
1288 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_huge_pmd_wp_page()
1289 pmd_t orig_pmd = vmf->orig_pmd; in do_huge_pmd_wp_page()
1291 vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd); in do_huge_pmd_wp_page()
1292 VM_BUG_ON_VMA(!vma->anon_vma, vma); in do_huge_pmd_wp_page()
1297 spin_lock(vmf->ptl); in do_huge_pmd_wp_page()
1299 if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) { in do_huge_pmd_wp_page()
1300 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1314 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1316 spin_lock(vmf->ptl); in do_huge_pmd_wp_page()
1317 if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) { in do_huge_pmd_wp_page()
1318 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1349 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1354 if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1)) in do_huge_pmd_wp_page()
1355 update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); in do_huge_pmd_wp_page()
1356 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1362 spin_unlock(vmf->ptl); in do_huge_pmd_wp_page()
1364 __split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL); in do_huge_pmd_wp_page()
1373 if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE))) in can_change_pmd_writable()
1384 /* Do we need write faults for uffd-wp tracking? */ in can_change_pmd_writable()
1388 if (!(vma->vm_flags & VM_SHARED)) { in can_change_pmd_writable()
1412 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) in can_follow_write_pmd()
1415 /* ... or read-only private ones */ in can_follow_write_pmd()
1416 if (!(vma->vm_flags & VM_MAYWRITE)) in can_follow_write_pmd()
1420 if (vma->vm_flags & VM_WRITE) in can_follow_write_pmd()
1430 /* ... and a write-fault isn't required for other reasons. */ in can_follow_write_pmd()
1441 struct mm_struct *mm = vma->vm_mm; in follow_trans_huge_pmd()
1456 return ERR_PTR(-EFAULT); in follow_trans_huge_pmd()
1462 return ERR_PTR(-EMLINK); in follow_trans_huge_pmd()
1483 struct vm_area_struct *vma = vmf->vma; in do_huge_pmd_numa_page()
1484 pmd_t oldpmd = vmf->orig_pmd; in do_huge_pmd_numa_page()
1487 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_huge_pmd_numa_page()
1489 int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK); in do_huge_pmd_numa_page()
1493 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_huge_pmd_numa_page()
1494 if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { in do_huge_pmd_numa_page()
1495 spin_unlock(vmf->ptl); in do_huge_pmd_numa_page()
1499 pmd = pmd_modify(oldpmd, vma->vm_page_prot); in do_huge_pmd_numa_page()
1507 can_change_pmd_writable(vma, vmf->address, pmd)) in do_huge_pmd_numa_page()
1533 spin_unlock(vmf->ptl); in do_huge_pmd_numa_page()
1545 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_huge_pmd_numa_page()
1546 if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { in do_huge_pmd_numa_page()
1547 spin_unlock(vmf->ptl); in do_huge_pmd_numa_page()
1552 pmd = pmd_modify(oldpmd, vma->vm_page_prot); in do_huge_pmd_numa_page()
1556 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); in do_huge_pmd_numa_page()
1557 update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); in do_huge_pmd_numa_page()
1558 spin_unlock(vmf->ptl); in do_huge_pmd_numa_page()
1569 bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, in madvise_free_huge_pmd() argument
1575 struct mm_struct *mm = tlb->mm; in madvise_free_huge_pmd()
1578 tlb_change_page_size(tlb, HPAGE_PMD_SIZE); in madvise_free_huge_pmd()
1606 * If user want to discard part-pages of THP, split it so MADV_FREE in madvise_free_huge_pmd()
1609 if (next - addr != HPAGE_PMD_SIZE) { in madvise_free_huge_pmd()
1628 tlb_remove_pmd_tlb_entry(tlb, pmd, addr); in madvise_free_huge_pmd()
1648 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, in zap_huge_pmd() argument
1654 tlb_change_page_size(tlb, HPAGE_PMD_SIZE); in zap_huge_pmd()
1666 tlb->fullmm); in zap_huge_pmd()
1668 tlb_remove_pmd_tlb_entry(tlb, pmd, addr); in zap_huge_pmd()
1671 zap_deposited_table(tlb->mm, pmd); in zap_huge_pmd()
1674 zap_deposited_table(tlb->mm, pmd); in zap_huge_pmd()
1696 zap_deposited_table(tlb->mm, pmd); in zap_huge_pmd()
1697 add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); in zap_huge_pmd()
1700 zap_deposited_table(tlb->mm, pmd); in zap_huge_pmd()
1701 add_mm_counter(tlb->mm, mm_counter_file(page), -HPAGE_PMD_NR); in zap_huge_pmd()
1706 tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE); in zap_huge_pmd()
1717 * With split pmd lock we also need to move preallocated in pmd_move_must_withdraw()
1742 struct mm_struct *mm = vma->vm_mm; in move_huge_pmd()
1788 * - 0 if PMD could not be locked
1789 * - 1 if PMD was locked but protections unchanged and TLB flush unnecessary
1791 * - HPAGE_PMD_NR if protections changed and TLB flush necessary
1793 int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, in change_huge_pmd() argument
1797 struct mm_struct *mm = vma->vm_mm; in change_huge_pmd()
1805 tlb_change_page_size(tlb, HPAGE_PMD_SIZE); in change_huge_pmd()
1851 * Avoid trapping faults against the zero page. The read-only in change_huge_pmd()
1852 * data is likely to be read-cached on the local CPU and in change_huge_pmd()
1888 * // pmd is re-established in change_huge_pmd()
1918 tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE); in change_huge_pmd()
1933 ptl = pmd_lock(vma->vm_mm, pmd); in __pmd_trans_huge_lock()
1951 ptl = pud_lock(vma->vm_mm, pud); in __pud_trans_huge_lock()
1959 int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, in zap_huge_pud() argument
1968 pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm); in zap_huge_pud()
1969 tlb_remove_pud_tlb_entry(tlb, pud, addr); in zap_huge_pud()
1984 VM_BUG_ON_VMA(vma->vm_start > haddr, vma); in __split_huge_pud_locked()
1985 VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma); in __split_huge_pud_locked()
1999 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, in __split_huge_pud()
2003 ptl = pud_lock(vma->vm_mm, pud); in __split_huge_pud()
2017 struct mm_struct *mm = vma->vm_mm; in __split_huge_zero_page_pmd()
2042 entry = pfn_pte(my_zero_pfn(addr), vma->vm_page_prot); in __split_huge_zero_page_pmd()
2050 pte_unmap(pte - 1); in __split_huge_zero_page_pmd()
2058 struct mm_struct *mm = vma->vm_mm; in __split_huge_pmd_locked()
2069 VM_BUG_ON_VMA(vma->vm_start > haddr, vma); in __split_huge_pmd_locked()
2070 VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); in __split_huge_pmd_locked()
2100 add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); in __split_huge_pmd_locked()
2134 * the whole access to the hugepage during the split (which in __split_huge_pmd_locked()
2135 * happens in place). If we overwrite the pmd with the not-huge in __split_huge_pmd_locked()
2138 * size TLB miss on the small sized TLB while the hugepage TLB in __split_huge_pmd_locked()
2139 * entry is still established in the huge TLB. Some CPU doesn't in __split_huge_pmd_locked()
2144 * two entries loaded in the two TLB is identical (which should in __split_huge_pmd_locked()
2146 * small and huge TLB entries for the same virtual address to be in __split_huge_pmd_locked()
2150 * remain set at all times on the pmd until the split is in __split_huge_pmd_locked()
2151 * complete for this pmd), then we flush the SMP TLB and finally in __split_huge_pmd_locked()
2152 * we write the non-huge version of the pmd entry with in __split_huge_pmd_locked()
2169 * Without "freeze", we'll simply split the PMD, propagating the in __split_huge_pmd_locked()
2171 * each subpage -- no need to (temporarily) clear. in __split_huge_pmd_locked()
2175 * managed to clear PageAnonExclusive() -- see in __split_huge_pmd_locked()
2178 * In case we cannot clear PageAnonExclusive(), split the PMD in __split_huge_pmd_locked()
2187 page_ref_add(page, HPAGE_PMD_NR - 1); in __split_huge_pmd_locked()
2227 entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot)); in __split_huge_pmd_locked()
2234 /* NOTE: this may set soft-dirty too on some archs */ in __split_huge_pmd_locked()
2247 pte_unmap(pte - 1); in __split_huge_pmd_locked()
2265 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, in __split_huge_pmd()
2269 ptl = pmd_lock(vma->vm_mm, pmd); in __split_huge_pmd()
2297 pmd_t *pmd = mm_find_pmd(vma->vm_mm, address); in split_huge_pmd_address()
2309 * contain an hugepage: check if we need to split an huge pmd. in split_huge_pmd_if_needed()
2322 /* Check if we need to split start first. */ in vma_adjust_trans_huge()
2325 /* Check if we need to split end next. */ in vma_adjust_trans_huge()
2330 * check if we need to split it. in vma_adjust_trans_huge()
2333 struct vm_area_struct *next = find_vma(vma->vm_mm, vma->vm_end); in vma_adjust_trans_huge()
2334 unsigned long nstart = next->vm_start; in vma_adjust_trans_huge()
2380 lockdep_assert_held(&lruvec->lru_lock); in lru_add_page_tail()
2386 list_add_tail(&tail->lru, list); in lru_add_page_tail()
2391 tail->mlock_count = 0; in lru_add_page_tail()
2393 list_add_tail(&tail->lru, &head->lru); in lru_add_page_tail()
2401 struct page *head = &folio->page; in __split_huge_page_tail()
2409 VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); in __split_huge_page_tail()
2417 * Note that for mapped sub-pages of an anonymous THP, in __split_huge_page_tail()
2421 * unreferenced sub-pages of an anonymous THP: we can simply drop in __split_huge_page_tail()
2422 * PG_anon_exclusive (-> PG_mappedtodisk) for these here. in __split_huge_page_tail()
2424 page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; in __split_huge_page_tail()
2425 page_tail->flags |= (head->flags & in __split_huge_page_tail()
2442 /* ->mapping in first and second tail page is replaced by other uses */ in __split_huge_page_tail()
2443 VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, in __split_huge_page_tail()
2445 page_tail->mapping = head->mapping; in __split_huge_page_tail()
2446 page_tail->index = head->index + tail; in __split_huge_page_tail()
2449 * page->private should not be set in tail pages. Fix up and warn once in __split_huge_page_tail()
2452 if (unlikely(page_tail->private)) { in __split_huge_page_tail()
2454 page_tail->private = 0; in __split_huge_page_tail()
2457 new_folio->swap.val = folio->swap.val + tail; in __split_huge_page_tail()
2459 /* Page flags must be visible before we make the page non-compound. */ in __split_huge_page_tail()
2483 * pages to show after the currently processed elements - e.g. in __split_huge_page_tail()
2493 struct page *head = &folio->page; in __split_huge_page()
2504 offset = swp_offset(folio->swap); in __split_huge_page()
2505 swap_cache = swap_address_space(folio->swap); in __split_huge_page()
2506 xa_lock(&swap_cache->i_pages); in __split_huge_page()
2514 for (i = nr - 1; i >= 1; i--) { in __split_huge_page()
2520 if (shmem_mapping(head->mapping)) in __split_huge_page()
2524 inode_to_wb(folio->mapping->host)); in __split_huge_page()
2528 __xa_store(&head->mapping->i_pages, head[i].index, in __split_huge_page()
2531 __xa_store(&swap_cache->i_pages, offset + i, in __split_huge_page()
2547 xa_unlock(&swap_cache->i_pages); in __split_huge_page()
2554 xa_unlock(&head->mapping->i_pages); in __split_huge_page()
2559 shmem_uncharge(head->mapping->host, nr_dropped); in __split_huge_page()
2563 split_swap_cluster(folio->swap); in __split_huge_page()
2576 * of the tail pages after the split is complete. in __split_huge_page()
2582 /* Racy check whether the huge page can be split */
2595 return folio_mapcount(folio) == folio_ref_count(folio) - extra_pins - 1; in can_split_folio()
2600 * subpage of huge page to split. Split doesn't change the position of @page.
2602 * Only caller must hold pin on the @page, otherwise split fails with -EBUSY.
2613 * Returns 0 if the hugepage is split successfully.
2614 * Returns -EBUSY if the page is pinned or if anon_vma disappeared from under
2621 XA_STATE(xas, &folio->mapping->i_pages, folio->index); in split_huge_page_to_list()
2631 is_hzp = is_huge_zero_page(&folio->page); in split_huge_page_to_list()
2634 return -EBUSY; in split_huge_page_to_list()
2638 return -EBUSY; in split_huge_page_to_list()
2646 * is taken to serialise against parallel split or collapse in split_huge_page_to_list()
2651 ret = -EBUSY; in split_huge_page_to_list()
2654 end = -1; in split_huge_page_to_list()
2660 mapping = folio->mapping; in split_huge_page_to_list()
2664 ret = -EBUSY; in split_huge_page_to_list()
2672 ret = -EBUSY; in split_huge_page_to_list()
2687 * but on 32-bit, i_size_read() takes an irq-unsafe seqlock, in split_huge_page_to_list()
2692 end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); in split_huge_page_to_list()
2694 end = shmem_fallocend(mapping->host, end); in split_huge_page_to_list()
2698 * Racy check if we can split the page, before unmap_folio() will in split_huge_page_to_list()
2699 * split PMDs in split_huge_page_to_list()
2702 ret = -EAGAIN; in split_huge_page_to_list()
2721 /* Prevent deferred_split_scan() touching ->_refcount */ in split_huge_page_to_list()
2722 spin_lock(&ds_queue->split_queue_lock); in split_huge_page_to_list()
2725 !list_empty(&folio->_deferred_list)) { in split_huge_page_to_list()
2726 ds_queue->split_queue_len--; in split_huge_page_to_list()
2727 list_del_init(&folio->_deferred_list); in split_huge_page_to_list()
2729 spin_unlock(&ds_queue->split_queue_lock); in split_huge_page_to_list()
2737 NR_SHMEM_THPS, -nr); in split_huge_page_to_list()
2740 NR_FILE_THPS, -nr); in split_huge_page_to_list()
2749 spin_unlock(&ds_queue->split_queue_lock); in split_huge_page_to_list()
2755 ret = -EAGAIN; in split_huge_page_to_list()
2775 * queueing THP splits, and that list is (racily observed to be) non-empty.
2778 * zero: because even when split_queue_lock is held, a non-empty _deferred_list
2779 * might be in use on deferred_split_scan()'s unlocked on-stack list.
2782 * therefore important to unqueue deferred split before changing folio memcg.
2794 spin_lock_irqsave(&ds_queue->split_queue_lock, flags); in __folio_unqueue_deferred_split()
2795 if (!list_empty(&folio->_deferred_list)) { in __folio_unqueue_deferred_split()
2796 ds_queue->split_queue_len--; in __folio_unqueue_deferred_split()
2797 list_del_init(&folio->_deferred_list); in __folio_unqueue_deferred_split()
2800 spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); in __folio_unqueue_deferred_split()
2821 * Exclude swapcache: originally to avoid a corrupt deferred split in deferred_split_folio()
2830 if (!list_empty(&folio->_deferred_list)) in deferred_split_folio()
2833 spin_lock_irqsave(&ds_queue->split_queue_lock, flags); in deferred_split_folio()
2834 if (list_empty(&folio->_deferred_list)) { in deferred_split_folio()
2836 list_add_tail(&folio->_deferred_list, &ds_queue->split_queue); in deferred_split_folio()
2837 ds_queue->split_queue_len++; in deferred_split_folio()
2844 spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); in deferred_split_folio()
2850 struct pglist_data *pgdata = NODE_DATA(sc->nid); in deferred_split_count()
2851 struct deferred_split *ds_queue = &pgdata->deferred_split_queue; in deferred_split_count()
2854 if (sc->memcg) in deferred_split_count()
2855 ds_queue = &sc->memcg->deferred_split_queue; in deferred_split_count()
2857 return READ_ONCE(ds_queue->split_queue_len); in deferred_split_count()
2863 struct pglist_data *pgdata = NODE_DATA(sc->nid); in deferred_split_scan()
2864 struct deferred_split *ds_queue = &pgdata->deferred_split_queue; in deferred_split_scan()
2868 int split = 0; in deferred_split_scan() local
2871 if (sc->memcg) in deferred_split_scan()
2872 ds_queue = &sc->memcg->deferred_split_queue; in deferred_split_scan()
2875 spin_lock_irqsave(&ds_queue->split_queue_lock, flags); in deferred_split_scan()
2877 list_for_each_entry_safe(folio, next, &ds_queue->split_queue, in deferred_split_scan()
2880 list_move(&folio->_deferred_list, &list); in deferred_split_scan()
2883 list_del_init(&folio->_deferred_list); in deferred_split_scan()
2884 ds_queue->split_queue_len--; in deferred_split_scan()
2886 if (!--sc->nr_to_scan) in deferred_split_scan()
2889 spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); in deferred_split_scan()
2896 split++; in deferred_split_scan()
2902 spin_lock_irqsave(&ds_queue->split_queue_lock, flags); in deferred_split_scan()
2903 list_splice_tail(&list, &ds_queue->split_queue); in deferred_split_scan()
2904 spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); in deferred_split_scan()
2907 * Stop shrinker if we didn't split any page, but the queue is empty. in deferred_split_scan()
2910 if (!split && list_empty(&ds_queue->split_queue)) in deferred_split_scan()
2912 return split; in deferred_split_scan()
2930 unsigned long total = 0, split = 0; in split_huge_pages_all() local
2932 pr_debug("Split all THPs\n"); in split_huge_pages_all()
2937 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { in split_huge_pages_all()
2962 split++; in split_huge_pages_all()
2963 pfn += nr_pages - 1; in split_huge_pages_all()
2971 pr_debug("%lu of %lu THP split\n", split, total); in split_huge_pages_all()
2976 return vma_is_special_huge(vma) || (vma->vm_flags & VM_IO) || in vma_not_suitable_for_thp_split()
2986 unsigned long total = 0, split = 0; in split_huge_pages_pid() local
2997 ret = -ESRCH; in split_huge_pages_pid()
3008 ret = -EINVAL; in split_huge_pages_pid()
3012 pr_debug("Split huge pages in pid: %d, vaddr: [0x%lx - 0x%lx]\n", in split_huge_pages_pid()
3018 * table filled with PTE-mapped THPs, each of which is distinct. in split_huge_pages_pid()
3030 addr = vma->vm_end; in split_huge_pages_pid()
3052 split++; in split_huge_pages_pid()
3062 pr_debug("%lu of %lu THP split\n", split, total); in split_huge_pages_pid()
3074 int ret = -EINVAL; in split_huge_pages_in_file()
3077 unsigned long total = 0, split = 0; in split_huge_pages_in_file() local
3087 pr_debug("split file-backed THPs in file: %s, page offset: [0x%lx - 0x%lx]\n", in split_huge_pages_in_file()
3090 mapping = candidate->f_mapping; in split_huge_pages_in_file()
3109 split++; in split_huge_pages_in_file()
3120 pr_debug("%lu of %lu file-backed THP split\n", split, total); in split_huge_pages_in_file()
3142 ret = -EFAULT; in split_huge_pages_write()
3148 input_buf[MAX_INPUT_BUF_SZ - 1] = '\0'; in split_huge_pages_write()
3161 ret = -EINVAL; in split_huge_pages_write()
3167 ret = -EINVAL; in split_huge_pages_write()
3183 ret = -EINVAL; in split_huge_pages_write()
3215 struct vm_area_struct *vma = pvmw->vma; in set_pmd_migration_entry()
3216 struct mm_struct *mm = vma->vm_mm; in set_pmd_migration_entry()
3217 unsigned long address = pvmw->address; in set_pmd_migration_entry()
3223 if (!(pvmw->pmd && !pvmw->pte)) in set_pmd_migration_entry()
3227 pmdval = pmdp_invalidate(vma, address, pvmw->pmd); in set_pmd_migration_entry()
3232 set_pmd_at(mm, address, pvmw->pmd, pmdval); in set_pmd_migration_entry()
3233 return -EBUSY; in set_pmd_migration_entry()
3253 set_pmd_at(mm, address, pvmw->pmd, pmdswp); in set_pmd_migration_entry()
3263 struct vm_area_struct *vma = pvmw->vma; in remove_migration_pmd()
3264 struct mm_struct *mm = vma->vm_mm; in remove_migration_pmd()
3265 unsigned long address = pvmw->address; in remove_migration_pmd()
3270 if (!(pvmw->pmd && !pvmw->pte)) in remove_migration_pmd()
3273 entry = pmd_to_swp_entry(*pvmw->pmd); in remove_migration_pmd()
3275 pmde = mk_huge_pmd(new, READ_ONCE(vma->vm_page_prot)); in remove_migration_pmd()
3276 if (pmd_swp_soft_dirty(*pvmw->pmd)) in remove_migration_pmd()
3280 if (pmd_swp_uffd_wp(*pvmw->pmd)) in remove_migration_pmd()
3284 /* NOTE: this may contain setting soft-dirty on some archs */ in remove_migration_pmd()
3299 set_pmd_at(mm, haddr, pvmw->pmd, pmde); in remove_migration_pmd()
3301 /* No need to invalidate - it was non-present before */ in remove_migration_pmd()
3302 update_mmu_cache_pmd(vma, address, pvmw->pmd); in remove_migration_pmd()