• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * PPC Huge TLB Page Support for Kernel.
3  *
4  * Copyright (C) 2003 David Gibson, IBM Corporation.
5  * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
6  *
7  * Based on the IA-32 version:
8  * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
9  */
10 
11 #include <linux/mm.h>
12 #include <linux/io.h>
13 #include <linux/slab.h>
14 #include <linux/hugetlb.h>
15 #include <linux/export.h>
16 #include <linux/of_fdt.h>
17 #include <linux/memblock.h>
18 #include <linux/moduleparam.h>
19 #include <linux/swap.h>
20 #include <linux/swapops.h>
21 #include <linux/kmemleak.h>
22 #include <asm/pgtable.h>
23 #include <asm/pgalloc.h>
24 #include <asm/tlb.h>
25 #include <asm/setup.h>
26 #include <asm/hugetlb.h>
27 #include <asm/pte-walk.h>
28 
29 bool hugetlb_disabled = false;
30 
31 #define hugepd_none(hpd)	(hpd_val(hpd) == 0)
32 
33 #define PTE_T_ORDER	(__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *)))
34 
huge_pte_offset(struct mm_struct * mm,unsigned long addr,unsigned long sz)35 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
36 {
37 	/*
38 	 * Only called for hugetlbfs pages, hence can ignore THP and the
39 	 * irq disabled walk.
40 	 */
41 	return __find_linux_pte(mm->pgd, addr, NULL, NULL);
42 }
43 
__hugepte_alloc(struct mm_struct * mm,hugepd_t * hpdp,unsigned long address,unsigned int pdshift,unsigned int pshift,spinlock_t * ptl)44 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
45 			   unsigned long address, unsigned int pdshift,
46 			   unsigned int pshift, spinlock_t *ptl)
47 {
48 	struct kmem_cache *cachep;
49 	pte_t *new;
50 	int i;
51 	int num_hugepd;
52 
53 	if (pshift >= pdshift) {
54 		cachep = PGT_CACHE(PTE_T_ORDER);
55 		num_hugepd = 1 << (pshift - pdshift);
56 		new = NULL;
57 	} else if (IS_ENABLED(CONFIG_PPC_8xx)) {
58 		cachep = NULL;
59 		num_hugepd = 1;
60 		new = pte_alloc_one(mm);
61 	} else {
62 		cachep = PGT_CACHE(pdshift - pshift);
63 		num_hugepd = 1;
64 		new = NULL;
65 	}
66 
67 	if (!cachep && !new) {
68 		WARN_ONCE(1, "No page table cache created for hugetlb tables");
69 		return -ENOMEM;
70 	}
71 
72 	if (cachep)
73 		new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
74 
75 	BUG_ON(pshift > HUGEPD_SHIFT_MASK);
76 	BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
77 
78 	if (!new)
79 		return -ENOMEM;
80 
81 	/*
82 	 * Make sure other cpus find the hugepd set only after a
83 	 * properly initialized page table is visible to them.
84 	 * For more details look for comment in __pte_alloc().
85 	 */
86 	smp_wmb();
87 
88 	spin_lock(ptl);
89 	/*
90 	 * We have multiple higher-level entries that point to the same
91 	 * actual pte location.  Fill in each as we go and backtrack on error.
92 	 * We need all of these so the DTLB pgtable walk code can find the
93 	 * right higher-level entry without knowing if it's a hugepage or not.
94 	 */
95 	for (i = 0; i < num_hugepd; i++, hpdp++) {
96 		if (unlikely(!hugepd_none(*hpdp)))
97 			break;
98 		hugepd_populate(hpdp, new, pshift);
99 	}
100 	/* If we bailed from the for loop early, an error occurred, clean up */
101 	if (i < num_hugepd) {
102 		for (i = i - 1 ; i >= 0; i--, hpdp--)
103 			*hpdp = __hugepd(0);
104 		if (cachep)
105 			kmem_cache_free(cachep, new);
106 		else
107 			pte_free(mm, new);
108 	} else {
109 		kmemleak_ignore(new);
110 	}
111 	spin_unlock(ptl);
112 	return 0;
113 }
114 
115 /*
116  * At this point we do the placement change only for BOOK3S 64. This would
117  * possibly work on other subarchs.
118  */
huge_pte_alloc(struct mm_struct * mm,unsigned long addr,unsigned long sz)119 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
120 {
121 	pgd_t *pg;
122 	pud_t *pu;
123 	pmd_t *pm;
124 	hugepd_t *hpdp = NULL;
125 	unsigned pshift = __ffs(sz);
126 	unsigned pdshift = PGDIR_SHIFT;
127 	spinlock_t *ptl;
128 
129 	addr &= ~(sz-1);
130 	pg = pgd_offset(mm, addr);
131 
132 #ifdef CONFIG_PPC_BOOK3S_64
133 	if (pshift == PGDIR_SHIFT)
134 		/* 16GB huge page */
135 		return (pte_t *) pg;
136 	else if (pshift > PUD_SHIFT) {
137 		/*
138 		 * We need to use hugepd table
139 		 */
140 		ptl = &mm->page_table_lock;
141 		hpdp = (hugepd_t *)pg;
142 	} else {
143 		pdshift = PUD_SHIFT;
144 		pu = pud_alloc(mm, pg, addr);
145 		if (!pu)
146 			return NULL;
147 		if (pshift == PUD_SHIFT)
148 			return (pte_t *)pu;
149 		else if (pshift > PMD_SHIFT) {
150 			ptl = pud_lockptr(mm, pu);
151 			hpdp = (hugepd_t *)pu;
152 		} else {
153 			pdshift = PMD_SHIFT;
154 			pm = pmd_alloc(mm, pu, addr);
155 			if (!pm)
156 				return NULL;
157 			if (pshift == PMD_SHIFT)
158 				/* 16MB hugepage */
159 				return (pte_t *)pm;
160 			else {
161 				ptl = pmd_lockptr(mm, pm);
162 				hpdp = (hugepd_t *)pm;
163 			}
164 		}
165 	}
166 #else
167 	if (pshift >= PGDIR_SHIFT) {
168 		ptl = &mm->page_table_lock;
169 		hpdp = (hugepd_t *)pg;
170 	} else {
171 		pdshift = PUD_SHIFT;
172 		pu = pud_alloc(mm, pg, addr);
173 		if (!pu)
174 			return NULL;
175 		if (pshift >= PUD_SHIFT) {
176 			ptl = pud_lockptr(mm, pu);
177 			hpdp = (hugepd_t *)pu;
178 		} else {
179 			pdshift = PMD_SHIFT;
180 			pm = pmd_alloc(mm, pu, addr);
181 			if (!pm)
182 				return NULL;
183 			ptl = pmd_lockptr(mm, pm);
184 			hpdp = (hugepd_t *)pm;
185 		}
186 	}
187 #endif
188 	if (!hpdp)
189 		return NULL;
190 
191 	BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
192 
193 	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr,
194 						  pdshift, pshift, ptl))
195 		return NULL;
196 
197 	return hugepte_offset(*hpdp, addr, pdshift);
198 }
199 
200 #ifdef CONFIG_PPC_BOOK3S_64
201 /*
202  * Tracks gpages after the device tree is scanned and before the
203  * huge_boot_pages list is ready on pseries.
204  */
205 #define MAX_NUMBER_GPAGES	1024
206 __initdata static u64 gpage_freearray[MAX_NUMBER_GPAGES];
207 __initdata static unsigned nr_gpages;
208 
209 /*
210  * Build list of addresses of gigantic pages.  This function is used in early
211  * boot before the buddy allocator is setup.
212  */
pseries_add_gpage(u64 addr,u64 page_size,unsigned long number_of_pages)213 void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
214 {
215 	if (!addr)
216 		return;
217 	while (number_of_pages > 0) {
218 		gpage_freearray[nr_gpages] = addr;
219 		nr_gpages++;
220 		number_of_pages--;
221 		addr += page_size;
222 	}
223 }
224 
pseries_alloc_bootmem_huge_page(struct hstate * hstate)225 int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
226 {
227 	struct huge_bootmem_page *m;
228 	if (nr_gpages == 0)
229 		return 0;
230 	m = phys_to_virt(gpage_freearray[--nr_gpages]);
231 	gpage_freearray[nr_gpages] = 0;
232 	list_add(&m->list, &huge_boot_pages);
233 	m->hstate = hstate;
234 	return 1;
235 }
236 #endif
237 
238 
alloc_bootmem_huge_page(struct hstate * h)239 int __init alloc_bootmem_huge_page(struct hstate *h)
240 {
241 
242 #ifdef CONFIG_PPC_BOOK3S_64
243 	if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled())
244 		return pseries_alloc_bootmem_huge_page(h);
245 #endif
246 	return __alloc_bootmem_huge_page(h);
247 }
248 
249 #ifndef CONFIG_PPC_BOOK3S_64
250 #define HUGEPD_FREELIST_SIZE \
251 	((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
252 
253 struct hugepd_freelist {
254 	struct rcu_head	rcu;
255 	unsigned int index;
256 	void *ptes[0];
257 };
258 
259 static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
260 
hugepd_free_rcu_callback(struct rcu_head * head)261 static void hugepd_free_rcu_callback(struct rcu_head *head)
262 {
263 	struct hugepd_freelist *batch =
264 		container_of(head, struct hugepd_freelist, rcu);
265 	unsigned int i;
266 
267 	for (i = 0; i < batch->index; i++)
268 		kmem_cache_free(PGT_CACHE(PTE_T_ORDER), batch->ptes[i]);
269 
270 	free_page((unsigned long)batch);
271 }
272 
hugepd_free(struct mmu_gather * tlb,void * hugepte)273 static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
274 {
275 	struct hugepd_freelist **batchp;
276 
277 	batchp = &get_cpu_var(hugepd_freelist_cur);
278 
279 	if (atomic_read(&tlb->mm->mm_users) < 2 ||
280 	    mm_is_thread_local(tlb->mm)) {
281 		kmem_cache_free(PGT_CACHE(PTE_T_ORDER), hugepte);
282 		put_cpu_var(hugepd_freelist_cur);
283 		return;
284 	}
285 
286 	if (*batchp == NULL) {
287 		*batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
288 		(*batchp)->index = 0;
289 	}
290 
291 	(*batchp)->ptes[(*batchp)->index++] = hugepte;
292 	if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
293 		call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback);
294 		*batchp = NULL;
295 	}
296 	put_cpu_var(hugepd_freelist_cur);
297 }
298 #else
hugepd_free(struct mmu_gather * tlb,void * hugepte)299 static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {}
300 #endif
301 
free_hugepd_range(struct mmu_gather * tlb,hugepd_t * hpdp,int pdshift,unsigned long start,unsigned long end,unsigned long floor,unsigned long ceiling)302 static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
303 			      unsigned long start, unsigned long end,
304 			      unsigned long floor, unsigned long ceiling)
305 {
306 	pte_t *hugepte = hugepd_page(*hpdp);
307 	int i;
308 
309 	unsigned long pdmask = ~((1UL << pdshift) - 1);
310 	unsigned int num_hugepd = 1;
311 	unsigned int shift = hugepd_shift(*hpdp);
312 
313 	/* Note: On fsl the hpdp may be the first of several */
314 	if (shift > pdshift)
315 		num_hugepd = 1 << (shift - pdshift);
316 
317 	start &= pdmask;
318 	if (start < floor)
319 		return;
320 	if (ceiling) {
321 		ceiling &= pdmask;
322 		if (! ceiling)
323 			return;
324 	}
325 	if (end - 1 > ceiling - 1)
326 		return;
327 
328 	for (i = 0; i < num_hugepd; i++, hpdp++)
329 		*hpdp = __hugepd(0);
330 
331 	if (shift >= pdshift)
332 		hugepd_free(tlb, hugepte);
333 	else if (IS_ENABLED(CONFIG_PPC_8xx))
334 		pgtable_free_tlb(tlb, hugepte, 0);
335 	else
336 		pgtable_free_tlb(tlb, hugepte,
337 				 get_hugepd_cache_index(pdshift - shift));
338 }
339 
hugetlb_free_pmd_range(struct mmu_gather * tlb,pud_t * pud,unsigned long addr,unsigned long end,unsigned long floor,unsigned long ceiling)340 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
341 				   unsigned long addr, unsigned long end,
342 				   unsigned long floor, unsigned long ceiling)
343 {
344 	pmd_t *pmd;
345 	unsigned long next;
346 	unsigned long start;
347 
348 	start = addr;
349 	do {
350 		unsigned long more;
351 
352 		pmd = pmd_offset(pud, addr);
353 		next = pmd_addr_end(addr, end);
354 		if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
355 			/*
356 			 * if it is not hugepd pointer, we should already find
357 			 * it cleared.
358 			 */
359 			WARN_ON(!pmd_none_or_clear_bad(pmd));
360 			continue;
361 		}
362 		/*
363 		 * Increment next by the size of the huge mapping since
364 		 * there may be more than one entry at this level for a
365 		 * single hugepage, but all of them point to
366 		 * the same kmem cache that holds the hugepte.
367 		 */
368 		more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
369 		if (more > next)
370 			next = more;
371 
372 		free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
373 				  addr, next, floor, ceiling);
374 	} while (addr = next, addr != end);
375 
376 	start &= PUD_MASK;
377 	if (start < floor)
378 		return;
379 	if (ceiling) {
380 		ceiling &= PUD_MASK;
381 		if (!ceiling)
382 			return;
383 	}
384 	if (end - 1 > ceiling - 1)
385 		return;
386 
387 	pmd = pmd_offset(pud, start);
388 	pud_clear(pud);
389 	pmd_free_tlb(tlb, pmd, start);
390 	mm_dec_nr_pmds(tlb->mm);
391 }
392 
hugetlb_free_pud_range(struct mmu_gather * tlb,pgd_t * pgd,unsigned long addr,unsigned long end,unsigned long floor,unsigned long ceiling)393 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
394 				   unsigned long addr, unsigned long end,
395 				   unsigned long floor, unsigned long ceiling)
396 {
397 	pud_t *pud;
398 	unsigned long next;
399 	unsigned long start;
400 
401 	start = addr;
402 	do {
403 		pud = pud_offset(pgd, addr);
404 		next = pud_addr_end(addr, end);
405 		if (!is_hugepd(__hugepd(pud_val(*pud)))) {
406 			if (pud_none_or_clear_bad(pud))
407 				continue;
408 			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
409 					       ceiling);
410 		} else {
411 			unsigned long more;
412 			/*
413 			 * Increment next by the size of the huge mapping since
414 			 * there may be more than one entry at this level for a
415 			 * single hugepage, but all of them point to
416 			 * the same kmem cache that holds the hugepte.
417 			 */
418 			more = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
419 			if (more > next)
420 				next = more;
421 
422 			free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
423 					  addr, next, floor, ceiling);
424 		}
425 	} while (addr = next, addr != end);
426 
427 	start &= PGDIR_MASK;
428 	if (start < floor)
429 		return;
430 	if (ceiling) {
431 		ceiling &= PGDIR_MASK;
432 		if (!ceiling)
433 			return;
434 	}
435 	if (end - 1 > ceiling - 1)
436 		return;
437 
438 	pud = pud_offset(pgd, start);
439 	pgd_clear(pgd);
440 	pud_free_tlb(tlb, pud, start);
441 	mm_dec_nr_puds(tlb->mm);
442 }
443 
444 /*
445  * This function frees user-level page tables of a process.
446  */
hugetlb_free_pgd_range(struct mmu_gather * tlb,unsigned long addr,unsigned long end,unsigned long floor,unsigned long ceiling)447 void hugetlb_free_pgd_range(struct mmu_gather *tlb,
448 			    unsigned long addr, unsigned long end,
449 			    unsigned long floor, unsigned long ceiling)
450 {
451 	pgd_t *pgd;
452 	unsigned long next;
453 
454 	/*
455 	 * Because there are a number of different possible pagetable
456 	 * layouts for hugepage ranges, we limit knowledge of how
457 	 * things should be laid out to the allocation path
458 	 * (huge_pte_alloc(), above).  Everything else works out the
459 	 * structure as it goes from information in the hugepd
460 	 * pointers.  That means that we can't here use the
461 	 * optimization used in the normal page free_pgd_range(), of
462 	 * checking whether we're actually covering a large enough
463 	 * range to have to do anything at the top level of the walk
464 	 * instead of at the bottom.
465 	 *
466 	 * To make sense of this, you should probably go read the big
467 	 * block comment at the top of the normal free_pgd_range(),
468 	 * too.
469 	 */
470 
471 	do {
472 		next = pgd_addr_end(addr, end);
473 		pgd = pgd_offset(tlb->mm, addr);
474 		if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
475 			if (pgd_none_or_clear_bad(pgd))
476 				continue;
477 			hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
478 		} else {
479 			unsigned long more;
480 			/*
481 			 * Increment next by the size of the huge mapping since
482 			 * there may be more than one entry at the pgd level
483 			 * for a single hugepage, but all of them point to the
484 			 * same kmem cache that holds the hugepte.
485 			 */
486 			more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
487 			if (more > next)
488 				next = more;
489 
490 			free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
491 					  addr, next, floor, ceiling);
492 		}
493 	} while (addr = next, addr != end);
494 }
495 
follow_huge_pd(struct vm_area_struct * vma,unsigned long address,hugepd_t hpd,int flags,int pdshift)496 struct page *follow_huge_pd(struct vm_area_struct *vma,
497 			    unsigned long address, hugepd_t hpd,
498 			    int flags, int pdshift)
499 {
500 	pte_t *ptep;
501 	spinlock_t *ptl;
502 	struct page *page = NULL;
503 	unsigned long mask;
504 	int shift = hugepd_shift(hpd);
505 	struct mm_struct *mm = vma->vm_mm;
506 
507 retry:
508 	/*
509 	 * hugepage directory entries are protected by mm->page_table_lock
510 	 * Use this instead of huge_pte_lockptr
511 	 */
512 	ptl = &mm->page_table_lock;
513 	spin_lock(ptl);
514 
515 	ptep = hugepte_offset(hpd, address, pdshift);
516 	if (pte_present(*ptep)) {
517 		mask = (1UL << shift) - 1;
518 		page = pte_page(*ptep);
519 		page += ((address & mask) >> PAGE_SHIFT);
520 		if (flags & FOLL_GET)
521 			get_page(page);
522 	} else {
523 		if (is_hugetlb_entry_migration(*ptep)) {
524 			spin_unlock(ptl);
525 			__migration_entry_wait(mm, ptep, ptl);
526 			goto retry;
527 		}
528 	}
529 	spin_unlock(ptl);
530 	return page;
531 }
532 
533 #ifdef CONFIG_PPC_MM_SLICES
hugetlb_get_unmapped_area(struct file * file,unsigned long addr,unsigned long len,unsigned long pgoff,unsigned long flags)534 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
535 					unsigned long len, unsigned long pgoff,
536 					unsigned long flags)
537 {
538 	struct hstate *hstate = hstate_file(file);
539 	int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
540 
541 #ifdef CONFIG_PPC_RADIX_MMU
542 	if (radix_enabled())
543 		return radix__hugetlb_get_unmapped_area(file, addr, len,
544 						       pgoff, flags);
545 #endif
546 	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
547 }
548 #endif
549 
vma_mmu_pagesize(struct vm_area_struct * vma)550 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
551 {
552 	/* With radix we don't use slice, so derive it from vma*/
553 	if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) {
554 		unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
555 
556 		return 1UL << mmu_psize_to_shift(psize);
557 	}
558 	return vma_kernel_pagesize(vma);
559 }
560 
add_huge_page_size(unsigned long long size)561 static int __init add_huge_page_size(unsigned long long size)
562 {
563 	int shift = __ffs(size);
564 	int mmu_psize;
565 
566 	/* Check that it is a page size supported by the hardware and
567 	 * that it fits within pagetable and slice limits. */
568 	if (size <= PAGE_SIZE || !is_power_of_2(size))
569 		return -EINVAL;
570 
571 	mmu_psize = check_and_get_huge_psize(shift);
572 	if (mmu_psize < 0)
573 		return -EINVAL;
574 
575 	BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
576 
577 	/* Return if huge page size has already been setup */
578 	if (size_to_hstate(size))
579 		return 0;
580 
581 	hugetlb_add_hstate(shift - PAGE_SHIFT);
582 
583 	return 0;
584 }
585 
hugepage_setup_sz(char * str)586 static int __init hugepage_setup_sz(char *str)
587 {
588 	unsigned long long size;
589 
590 	size = memparse(str, &str);
591 
592 	if (add_huge_page_size(size) != 0) {
593 		hugetlb_bad_size();
594 		pr_err("Invalid huge page size specified(%llu)\n", size);
595 	}
596 
597 	return 1;
598 }
599 __setup("hugepagesz=", hugepage_setup_sz);
600 
hugetlbpage_init(void)601 static int __init hugetlbpage_init(void)
602 {
603 	bool configured = false;
604 	int psize;
605 
606 	if (hugetlb_disabled) {
607 		pr_info("HugeTLB support is disabled!\n");
608 		return 0;
609 	}
610 
611 	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() &&
612 	    !mmu_has_feature(MMU_FTR_16M_PAGE))
613 		return -ENODEV;
614 
615 	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
616 		unsigned shift;
617 		unsigned pdshift;
618 
619 		if (!mmu_psize_defs[psize].shift)
620 			continue;
621 
622 		shift = mmu_psize_to_shift(psize);
623 
624 #ifdef CONFIG_PPC_BOOK3S_64
625 		if (shift > PGDIR_SHIFT)
626 			continue;
627 		else if (shift > PUD_SHIFT)
628 			pdshift = PGDIR_SHIFT;
629 		else if (shift > PMD_SHIFT)
630 			pdshift = PUD_SHIFT;
631 		else
632 			pdshift = PMD_SHIFT;
633 #else
634 		if (shift < PUD_SHIFT)
635 			pdshift = PMD_SHIFT;
636 		else if (shift < PGDIR_SHIFT)
637 			pdshift = PUD_SHIFT;
638 		else
639 			pdshift = PGDIR_SHIFT;
640 #endif
641 
642 		if (add_huge_page_size(1ULL << shift) < 0)
643 			continue;
644 		/*
645 		 * if we have pdshift and shift value same, we don't
646 		 * use pgt cache for hugepd.
647 		 */
648 		if (pdshift > shift) {
649 			if (!IS_ENABLED(CONFIG_PPC_8xx))
650 				pgtable_cache_add(pdshift - shift);
651 		} else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) ||
652 			   IS_ENABLED(CONFIG_PPC_8xx)) {
653 			pgtable_cache_add(PTE_T_ORDER);
654 		}
655 
656 		configured = true;
657 	}
658 
659 	if (configured) {
660 		if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
661 			hugetlbpage_init_default();
662 	} else
663 		pr_info("Failed to initialize. Disabling HugeTLB");
664 
665 	return 0;
666 }
667 
668 arch_initcall(hugetlbpage_init);
669 
flush_dcache_icache_hugepage(struct page * page)670 void flush_dcache_icache_hugepage(struct page *page)
671 {
672 	int i;
673 	void *start;
674 
675 	BUG_ON(!PageCompound(page));
676 
677 	for (i = 0; i < compound_nr(page); i++) {
678 		if (!PageHighMem(page)) {
679 			__flush_dcache_icache(page_address(page+i));
680 		} else {
681 			start = kmap_atomic(page+i);
682 			__flush_dcache_icache(start);
683 			kunmap_atomic(start);
684 		}
685 	}
686 }
687