1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * PowerPC64 SLB support.
4 *
5 * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
6 * Based on earlier code written by:
7 * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
8 * Copyright (c) 2001 Dave Engebretsen
9 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
10 */
11
12 #include <asm/asm-prototypes.h>
13 #include <asm/interrupt.h>
14 #include <asm/mmu.h>
15 #include <asm/mmu_context.h>
16 #include <asm/paca.h>
17 #include <asm/lppaca.h>
18 #include <asm/ppc-opcode.h>
19 #include <asm/cputable.h>
20 #include <asm/cacheflush.h>
21 #include <asm/smp.h>
22 #include <linux/compiler.h>
23 #include <linux/context_tracking.h>
24 #include <linux/mm_types.h>
25 #include <linux/pgtable.h>
26
27 #include <asm/udbg.h>
28 #include <asm/code-patching.h>
29
30 #include "internal.h"
31
32
33 static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
34
35 bool stress_slb_enabled __initdata;
36
parse_stress_slb(char * p)37 static int __init parse_stress_slb(char *p)
38 {
39 stress_slb_enabled = true;
40 return 0;
41 }
42 early_param("stress_slb", parse_stress_slb);
43
44 __ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key);
45
assert_slb_presence(bool present,unsigned long ea)46 static void assert_slb_presence(bool present, unsigned long ea)
47 {
48 #ifdef CONFIG_DEBUG_VM
49 unsigned long tmp;
50
51 WARN_ON_ONCE(mfmsr() & MSR_EE);
52
53 if (!cpu_has_feature(CPU_FTR_ARCH_206))
54 return;
55
56 /*
57 * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware
58 * ignores all other bits from 0-27, so just clear them all.
59 */
60 ea &= ~((1UL << SID_SHIFT) - 1);
61 asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
62
63 WARN_ON(present == (tmp == 0));
64 #endif
65 }
66
slb_shadow_update(unsigned long ea,int ssize,unsigned long flags,enum slb_index index)67 static inline void slb_shadow_update(unsigned long ea, int ssize,
68 unsigned long flags,
69 enum slb_index index)
70 {
71 struct slb_shadow *p = get_slb_shadow();
72
73 /*
74 * Clear the ESID first so the entry is not valid while we are
75 * updating it. No write barriers are needed here, provided
76 * we only update the current CPU's SLB shadow buffer.
77 */
78 WRITE_ONCE(p->save_area[index].esid, 0);
79 WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
80 WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
81 }
82
slb_shadow_clear(enum slb_index index)83 static inline void slb_shadow_clear(enum slb_index index)
84 {
85 WRITE_ONCE(get_slb_shadow()->save_area[index].esid, cpu_to_be64(index));
86 }
87
create_shadowed_slbe(unsigned long ea,int ssize,unsigned long flags,enum slb_index index)88 static inline void create_shadowed_slbe(unsigned long ea, int ssize,
89 unsigned long flags,
90 enum slb_index index)
91 {
92 /*
93 * Updating the shadow buffer before writing the SLB ensures
94 * we don't get a stale entry here if we get preempted by PHYP
95 * between these two statements.
96 */
97 slb_shadow_update(ea, ssize, flags, index);
98
99 assert_slb_presence(false, ea);
100 asm volatile("slbmte %0,%1" :
101 : "r" (mk_vsid_data(ea, ssize, flags)),
102 "r" (mk_esid_data(ea, ssize, index))
103 : "memory" );
104 }
105
106 /*
107 * Insert bolted entries into SLB (which may not be empty, so don't clear
108 * slb_cache_ptr).
109 */
__slb_restore_bolted_realmode(void)110 void __slb_restore_bolted_realmode(void)
111 {
112 struct slb_shadow *p = get_slb_shadow();
113 enum slb_index index;
114
115 /* No isync needed because realmode. */
116 for (index = 0; index < SLB_NUM_BOLTED; index++) {
117 asm volatile("slbmte %0,%1" :
118 : "r" (be64_to_cpu(p->save_area[index].vsid)),
119 "r" (be64_to_cpu(p->save_area[index].esid)));
120 }
121
122 assert_slb_presence(true, local_paca->kstack);
123 }
124
125 /*
126 * Insert the bolted entries into an empty SLB.
127 */
slb_restore_bolted_realmode(void)128 void slb_restore_bolted_realmode(void)
129 {
130 __slb_restore_bolted_realmode();
131 get_paca()->slb_cache_ptr = 0;
132
133 get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
134 get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
135 }
136
137 /*
138 * This flushes all SLB entries including 0, so it must be realmode.
139 */
slb_flush_all_realmode(void)140 void slb_flush_all_realmode(void)
141 {
142 asm volatile("slbmte %0,%0; slbia" : : "r" (0));
143 }
144
__slb_flush_and_restore_bolted(bool preserve_kernel_lookaside)145 static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside)
146 {
147 struct slb_shadow *p = get_slb_shadow();
148 unsigned long ksp_esid_data, ksp_vsid_data;
149 u32 ih;
150
151 /*
152 * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside
153 * information created with Class=0 entries, which we use for kernel
154 * SLB entries (the SLB entries themselves are still invalidated).
155 *
156 * Older processors will ignore this optimisation. Over-invalidation
157 * is fine because we never rely on lookaside information existing.
158 */
159 if (preserve_kernel_lookaside)
160 ih = 1;
161 else
162 ih = 0;
163
164 ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
165 ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
166
167 asm volatile(PPC_SLBIA(%0)" \n"
168 "slbmte %1, %2 \n"
169 :: "i" (ih),
170 "r" (ksp_vsid_data),
171 "r" (ksp_esid_data)
172 : "memory");
173 }
174
175 /*
176 * This flushes non-bolted entries, it can be run in virtual mode. Must
177 * be called with interrupts disabled.
178 */
slb_flush_and_restore_bolted(void)179 void slb_flush_and_restore_bolted(void)
180 {
181 BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
182
183 WARN_ON(!irqs_disabled());
184
185 /*
186 * We can't take a PMU exception in the following code, so hard
187 * disable interrupts.
188 */
189 hard_irq_disable();
190
191 isync();
192 __slb_flush_and_restore_bolted(false);
193 isync();
194
195 assert_slb_presence(true, get_paca()->kstack);
196
197 get_paca()->slb_cache_ptr = 0;
198
199 get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
200 get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
201 }
202
slb_save_contents(struct slb_entry * slb_ptr)203 void slb_save_contents(struct slb_entry *slb_ptr)
204 {
205 int i;
206 unsigned long e, v;
207
208 /* Save slb_cache_ptr value. */
209 get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;
210
211 if (!slb_ptr)
212 return;
213
214 for (i = 0; i < mmu_slb_size; i++) {
215 asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i));
216 asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i));
217 slb_ptr->esid = e;
218 slb_ptr->vsid = v;
219 slb_ptr++;
220 }
221 }
222
slb_dump_contents(struct slb_entry * slb_ptr)223 void slb_dump_contents(struct slb_entry *slb_ptr)
224 {
225 int i, n;
226 unsigned long e, v;
227 unsigned long llp;
228
229 if (!slb_ptr)
230 return;
231
232 pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
233
234 for (i = 0; i < mmu_slb_size; i++) {
235 e = slb_ptr->esid;
236 v = slb_ptr->vsid;
237 slb_ptr++;
238
239 if (!e && !v)
240 continue;
241
242 pr_err("%02d %016lx %016lx %s\n", i, e, v,
243 (e & SLB_ESID_V) ? "VALID" : "NOT VALID");
244
245 if (!(e & SLB_ESID_V))
246 continue;
247
248 llp = v & SLB_VSID_LLP;
249 if (v & SLB_VSID_B_1T) {
250 pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n",
251 GET_ESID_1T(e),
252 (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
253 } else {
254 pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n",
255 GET_ESID(e),
256 (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
257 }
258 }
259
260 if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
261 /* RR is not so useful as it's often not used for allocation */
262 pr_err("SLB RR allocator index %d\n", get_paca()->stab_rr);
263
264 /* Dump slb cache entires as well. */
265 pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
266 pr_err("Valid SLB cache entries:\n");
267 n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
268 for (i = 0; i < n; i++)
269 pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
270 pr_err("Rest of SLB cache entries:\n");
271 for (i = n; i < SLB_CACHE_ENTRIES; i++)
272 pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
273 }
274 }
275
slb_vmalloc_update(void)276 void slb_vmalloc_update(void)
277 {
278 /*
279 * vmalloc is not bolted, so just have to flush non-bolted.
280 */
281 slb_flush_and_restore_bolted();
282 }
283
preload_hit(struct thread_info * ti,unsigned long esid)284 static bool preload_hit(struct thread_info *ti, unsigned long esid)
285 {
286 unsigned char i;
287
288 for (i = 0; i < ti->slb_preload_nr; i++) {
289 unsigned char idx;
290
291 idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
292 if (esid == ti->slb_preload_esid[idx])
293 return true;
294 }
295 return false;
296 }
297
preload_add(struct thread_info * ti,unsigned long ea)298 static bool preload_add(struct thread_info *ti, unsigned long ea)
299 {
300 unsigned char idx;
301 unsigned long esid;
302
303 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
304 /* EAs are stored >> 28 so 256MB segments don't need clearing */
305 if (ea & ESID_MASK_1T)
306 ea &= ESID_MASK_1T;
307 }
308
309 esid = ea >> SID_SHIFT;
310
311 if (preload_hit(ti, esid))
312 return false;
313
314 idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
315 ti->slb_preload_esid[idx] = esid;
316 if (ti->slb_preload_nr == SLB_PRELOAD_NR)
317 ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
318 else
319 ti->slb_preload_nr++;
320
321 return true;
322 }
323
preload_age(struct thread_info * ti)324 static void preload_age(struct thread_info *ti)
325 {
326 if (!ti->slb_preload_nr)
327 return;
328 ti->slb_preload_nr--;
329 ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
330 }
331
slb_setup_new_exec(void)332 void slb_setup_new_exec(void)
333 {
334 struct thread_info *ti = current_thread_info();
335 struct mm_struct *mm = current->mm;
336 unsigned long exec = 0x10000000;
337
338 WARN_ON(irqs_disabled());
339
340 /*
341 * preload cache can only be used to determine whether a SLB
342 * entry exists if it does not start to overflow.
343 */
344 if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
345 return;
346
347 hard_irq_disable();
348
349 /*
350 * We have no good place to clear the slb preload cache on exec,
351 * flush_thread is about the earliest arch hook but that happens
352 * after we switch to the mm and have aleady preloaded the SLBEs.
353 *
354 * For the most part that's probably okay to use entries from the
355 * previous exec, they will age out if unused. It may turn out to
356 * be an advantage to clear the cache before switching to it,
357 * however.
358 */
359
360 /*
361 * preload some userspace segments into the SLB.
362 * Almost all 32 and 64bit PowerPC executables are linked at
363 * 0x10000000 so it makes sense to preload this segment.
364 */
365 if (!is_kernel_addr(exec)) {
366 if (preload_add(ti, exec))
367 slb_allocate_user(mm, exec);
368 }
369
370 /* Libraries and mmaps. */
371 if (!is_kernel_addr(mm->mmap_base)) {
372 if (preload_add(ti, mm->mmap_base))
373 slb_allocate_user(mm, mm->mmap_base);
374 }
375
376 /* see switch_slb */
377 asm volatile("isync" : : : "memory");
378
379 local_irq_enable();
380 }
381
preload_new_slb_context(unsigned long start,unsigned long sp)382 void preload_new_slb_context(unsigned long start, unsigned long sp)
383 {
384 struct thread_info *ti = current_thread_info();
385 struct mm_struct *mm = current->mm;
386 unsigned long heap = mm->start_brk;
387
388 WARN_ON(irqs_disabled());
389
390 /* see above */
391 if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
392 return;
393
394 hard_irq_disable();
395
396 /* Userspace entry address. */
397 if (!is_kernel_addr(start)) {
398 if (preload_add(ti, start))
399 slb_allocate_user(mm, start);
400 }
401
402 /* Top of stack, grows down. */
403 if (!is_kernel_addr(sp)) {
404 if (preload_add(ti, sp))
405 slb_allocate_user(mm, sp);
406 }
407
408 /* Bottom of heap, grows up. */
409 if (heap && !is_kernel_addr(heap)) {
410 if (preload_add(ti, heap))
411 slb_allocate_user(mm, heap);
412 }
413
414 /* see switch_slb */
415 asm volatile("isync" : : : "memory");
416
417 local_irq_enable();
418 }
419
slb_cache_slbie_kernel(unsigned int index)420 static void slb_cache_slbie_kernel(unsigned int index)
421 {
422 unsigned long slbie_data = get_paca()->slb_cache[index];
423 unsigned long ksp = get_paca()->kstack;
424
425 slbie_data <<= SID_SHIFT;
426 slbie_data |= 0xc000000000000000ULL;
427 if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data)
428 return;
429 slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT;
430
431 asm volatile("slbie %0" : : "r" (slbie_data));
432 }
433
slb_cache_slbie_user(unsigned int index)434 static void slb_cache_slbie_user(unsigned int index)
435 {
436 unsigned long slbie_data = get_paca()->slb_cache[index];
437
438 slbie_data <<= SID_SHIFT;
439 slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT;
440 slbie_data |= SLBIE_C; /* user slbs have C=1 */
441
442 asm volatile("slbie %0" : : "r" (slbie_data));
443 }
444
445 /* Flush all user entries from the segment table of the current processor. */
switch_slb(struct task_struct * tsk,struct mm_struct * mm)446 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
447 {
448 struct thread_info *ti = task_thread_info(tsk);
449 unsigned char i;
450
451 /*
452 * We need interrupts hard-disabled here, not just soft-disabled,
453 * so that a PMU interrupt can't occur, which might try to access
454 * user memory (to get a stack trace) and possible cause an SLB miss
455 * which would update the slb_cache/slb_cache_ptr fields in the PACA.
456 */
457 hard_irq_disable();
458 isync();
459 if (stress_slb()) {
460 __slb_flush_and_restore_bolted(false);
461 isync();
462 get_paca()->slb_cache_ptr = 0;
463 get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
464
465 } else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
466 /*
467 * SLBIA IH=3 invalidates all Class=1 SLBEs and their
468 * associated lookaside structures, which matches what
469 * switch_slb wants. So ARCH_300 does not use the slb
470 * cache.
471 */
472 asm volatile(PPC_SLBIA(3));
473
474 } else {
475 unsigned long offset = get_paca()->slb_cache_ptr;
476
477 if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
478 offset <= SLB_CACHE_ENTRIES) {
479 /*
480 * Could assert_slb_presence(true) here, but
481 * hypervisor or machine check could have come
482 * in and removed the entry at this point.
483 */
484
485 for (i = 0; i < offset; i++)
486 slb_cache_slbie_user(i);
487
488 /* Workaround POWER5 < DD2.1 issue */
489 if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
490 slb_cache_slbie_user(0);
491
492 } else {
493 /* Flush but retain kernel lookaside information */
494 __slb_flush_and_restore_bolted(true);
495 isync();
496
497 get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
498 }
499
500 get_paca()->slb_cache_ptr = 0;
501 }
502 get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
503
504 copy_mm_to_paca(mm);
505
506 /*
507 * We gradually age out SLBs after a number of context switches to
508 * reduce reload overhead of unused entries (like we do with FP/VEC
509 * reload). Each time we wrap 256 switches, take an entry out of the
510 * SLB preload cache.
511 */
512 tsk->thread.load_slb++;
513 if (!tsk->thread.load_slb) {
514 unsigned long pc = KSTK_EIP(tsk);
515
516 preload_age(ti);
517 preload_add(ti, pc);
518 }
519
520 for (i = 0; i < ti->slb_preload_nr; i++) {
521 unsigned char idx;
522 unsigned long ea;
523
524 idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
525 ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
526
527 slb_allocate_user(mm, ea);
528 }
529
530 /*
531 * Synchronize slbmte preloads with possible subsequent user memory
532 * address accesses by the kernel (user mode won't happen until
533 * rfid, which is safe).
534 */
535 isync();
536 }
537
slb_set_size(u16 size)538 void slb_set_size(u16 size)
539 {
540 mmu_slb_size = size;
541 }
542
slb_initialize(void)543 void slb_initialize(void)
544 {
545 unsigned long linear_llp, vmalloc_llp, io_llp;
546 unsigned long lflags;
547 static int slb_encoding_inited;
548 #ifdef CONFIG_SPARSEMEM_VMEMMAP
549 unsigned long vmemmap_llp;
550 #endif
551
552 /* Prepare our SLB miss handler based on our page size */
553 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
554 io_llp = mmu_psize_defs[mmu_io_psize].sllp;
555 vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
556 get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
557 #ifdef CONFIG_SPARSEMEM_VMEMMAP
558 vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
559 #endif
560 if (!slb_encoding_inited) {
561 slb_encoding_inited = 1;
562 pr_devel("SLB: linear LLP = %04lx\n", linear_llp);
563 pr_devel("SLB: io LLP = %04lx\n", io_llp);
564 #ifdef CONFIG_SPARSEMEM_VMEMMAP
565 pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
566 #endif
567 }
568
569 get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
570 get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
571 get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
572
573 lflags = SLB_VSID_KERNEL | linear_llp;
574
575 /* Invalidate the entire SLB (even entry 0) & all the ERATS */
576 asm volatile("isync":::"memory");
577 asm volatile("slbmte %0,%0"::"r" (0) : "memory");
578 asm volatile("isync; slbia; isync":::"memory");
579 create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
580
581 /*
582 * For the boot cpu, we're running on the stack in init_thread_union,
583 * which is in the first segment of the linear mapping, and also
584 * get_paca()->kstack hasn't been initialized yet.
585 * For secondary cpus, we need to bolt the kernel stack entry now.
586 */
587 slb_shadow_clear(KSTACK_INDEX);
588 if (raw_smp_processor_id() != boot_cpuid &&
589 (get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET)
590 create_shadowed_slbe(get_paca()->kstack,
591 mmu_kernel_ssize, lflags, KSTACK_INDEX);
592
593 asm volatile("isync":::"memory");
594 }
595
slb_cache_update(unsigned long esid_data)596 static void slb_cache_update(unsigned long esid_data)
597 {
598 int slb_cache_index;
599
600 if (cpu_has_feature(CPU_FTR_ARCH_300))
601 return; /* ISAv3.0B and later does not use slb_cache */
602
603 if (stress_slb())
604 return;
605
606 /*
607 * Now update slb cache entries
608 */
609 slb_cache_index = local_paca->slb_cache_ptr;
610 if (slb_cache_index < SLB_CACHE_ENTRIES) {
611 /*
612 * We have space in slb cache for optimized switch_slb().
613 * Top 36 bits from esid_data as per ISA
614 */
615 local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
616 local_paca->slb_cache_ptr++;
617 } else {
618 /*
619 * Our cache is full and the current cache content strictly
620 * doesn't indicate the active SLB conents. Bump the ptr
621 * so that switch_slb() will ignore the cache.
622 */
623 local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
624 }
625 }
626
alloc_slb_index(bool kernel)627 static enum slb_index alloc_slb_index(bool kernel)
628 {
629 enum slb_index index;
630
631 /*
632 * The allocation bitmaps can become out of synch with the SLB
633 * when the _switch code does slbie when bolting a new stack
634 * segment and it must not be anywhere else in the SLB. This leaves
635 * a kernel allocated entry that is unused in the SLB. With very
636 * large systems or small segment sizes, the bitmaps could slowly
637 * fill with these entries. They will eventually be cleared out
638 * by the round robin allocator in that case, so it's probably not
639 * worth accounting for.
640 */
641
642 /*
643 * SLBs beyond 32 entries are allocated with stab_rr only
644 * POWER7/8/9 have 32 SLB entries, this could be expanded if a
645 * future CPU has more.
646 */
647 if (local_paca->slb_used_bitmap != U32_MAX) {
648 index = ffz(local_paca->slb_used_bitmap);
649 local_paca->slb_used_bitmap |= 1U << index;
650 if (kernel)
651 local_paca->slb_kern_bitmap |= 1U << index;
652 } else {
653 /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
654 index = local_paca->stab_rr;
655 if (index < (mmu_slb_size - 1))
656 index++;
657 else
658 index = SLB_NUM_BOLTED;
659 local_paca->stab_rr = index;
660 if (index < 32) {
661 if (kernel)
662 local_paca->slb_kern_bitmap |= 1U << index;
663 else
664 local_paca->slb_kern_bitmap &= ~(1U << index);
665 }
666 }
667 BUG_ON(index < SLB_NUM_BOLTED);
668
669 return index;
670 }
671
slb_insert_entry(unsigned long ea,unsigned long context,unsigned long flags,int ssize,bool kernel)672 static long slb_insert_entry(unsigned long ea, unsigned long context,
673 unsigned long flags, int ssize, bool kernel)
674 {
675 unsigned long vsid;
676 unsigned long vsid_data, esid_data;
677 enum slb_index index;
678
679 vsid = get_vsid(context, ea, ssize);
680 if (!vsid)
681 return -EFAULT;
682
683 /*
684 * There must not be a kernel SLB fault in alloc_slb_index or before
685 * slbmte here or the allocation bitmaps could get out of whack with
686 * the SLB.
687 *
688 * User SLB faults or preloads take this path which might get inlined
689 * into the caller, so add compiler barriers here to ensure unsafe
690 * memory accesses do not come between.
691 */
692 barrier();
693
694 index = alloc_slb_index(kernel);
695
696 vsid_data = __mk_vsid_data(vsid, ssize, flags);
697 esid_data = mk_esid_data(ea, ssize, index);
698
699 /*
700 * No need for an isync before or after this slbmte. The exception
701 * we enter with and the rfid we exit with are context synchronizing.
702 * User preloads should add isync afterwards in case the kernel
703 * accesses user memory before it returns to userspace with rfid.
704 */
705 assert_slb_presence(false, ea);
706 if (stress_slb()) {
707 int slb_cache_index = local_paca->slb_cache_ptr;
708
709 /*
710 * stress_slb() does not use slb cache, repurpose as a
711 * cache of inserted (non-bolted) kernel SLB entries. All
712 * non-bolted kernel entries are flushed on any user fault,
713 * or if there are already 3 non-boled kernel entries.
714 */
715 BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3);
716 if (!kernel || slb_cache_index == 3) {
717 int i;
718
719 for (i = 0; i < slb_cache_index; i++)
720 slb_cache_slbie_kernel(i);
721 slb_cache_index = 0;
722 }
723
724 if (kernel)
725 local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
726 local_paca->slb_cache_ptr = slb_cache_index;
727 }
728 asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
729
730 barrier();
731
732 if (!kernel)
733 slb_cache_update(esid_data);
734
735 return 0;
736 }
737
slb_allocate_kernel(unsigned long ea,unsigned long id)738 static long slb_allocate_kernel(unsigned long ea, unsigned long id)
739 {
740 unsigned long context;
741 unsigned long flags;
742 int ssize;
743
744 if (id == LINEAR_MAP_REGION_ID) {
745
746 /* We only support upto H_MAX_PHYSMEM_BITS */
747 if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS))
748 return -EFAULT;
749
750 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
751
752 #ifdef CONFIG_SPARSEMEM_VMEMMAP
753 } else if (id == VMEMMAP_REGION_ID) {
754
755 if (ea >= H_VMEMMAP_END)
756 return -EFAULT;
757
758 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
759 #endif
760 } else if (id == VMALLOC_REGION_ID) {
761
762 if (ea >= H_VMALLOC_END)
763 return -EFAULT;
764
765 flags = local_paca->vmalloc_sllp;
766
767 } else if (id == IO_REGION_ID) {
768
769 if (ea >= H_KERN_IO_END)
770 return -EFAULT;
771
772 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
773
774 } else {
775 return -EFAULT;
776 }
777
778 ssize = MMU_SEGSIZE_1T;
779 if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
780 ssize = MMU_SEGSIZE_256M;
781
782 context = get_kernel_context(ea);
783
784 return slb_insert_entry(ea, context, flags, ssize, true);
785 }
786
slb_allocate_user(struct mm_struct * mm,unsigned long ea)787 static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
788 {
789 unsigned long context;
790 unsigned long flags;
791 int bpsize;
792 int ssize;
793
794 /*
795 * consider this as bad access if we take a SLB miss
796 * on an address above addr limit.
797 */
798 if (ea >= mm_ctx_slb_addr_limit(&mm->context))
799 return -EFAULT;
800
801 context = get_user_context(&mm->context, ea);
802 if (!context)
803 return -EFAULT;
804
805 if (unlikely(ea >= H_PGTABLE_RANGE)) {
806 WARN_ON(1);
807 return -EFAULT;
808 }
809
810 ssize = user_segment_size(ea);
811
812 bpsize = get_slice_psize(mm, ea);
813 flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
814
815 return slb_insert_entry(ea, context, flags, ssize, false);
816 }
817
DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)818 DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
819 {
820 unsigned long ea = regs->dar;
821 unsigned long id = get_region_id(ea);
822
823 /* IRQs are not reconciled here, so can't check irqs_disabled */
824 VM_WARN_ON(mfmsr() & MSR_EE);
825
826 if (regs_is_unrecoverable(regs))
827 return -EINVAL;
828
829 /*
830 * SLB kernel faults must be very careful not to touch anything that is
831 * not bolted. E.g., PACA and global variables are okay, mm->context
832 * stuff is not. SLB user faults may access all of memory (and induce
833 * one recursive SLB kernel fault), so the kernel fault must not
834 * trample on the user fault state at those points.
835 */
836
837 /*
838 * This is a raw interrupt handler, for performance, so that
839 * fast_interrupt_return can be used. The handler must not touch local
840 * irq state, or schedule. We could test for usermode and upgrade to a
841 * normal process context (synchronous) interrupt for those, which
842 * would make them first-class kernel code and able to be traced and
843 * instrumented, although performance would suffer a bit, it would
844 * probably be a good tradeoff.
845 */
846 if (id >= LINEAR_MAP_REGION_ID) {
847 long err;
848 #ifdef CONFIG_DEBUG_VM
849 /* Catch recursive kernel SLB faults. */
850 BUG_ON(local_paca->in_kernel_slb_handler);
851 local_paca->in_kernel_slb_handler = 1;
852 #endif
853 err = slb_allocate_kernel(ea, id);
854 #ifdef CONFIG_DEBUG_VM
855 local_paca->in_kernel_slb_handler = 0;
856 #endif
857 return err;
858 } else {
859 struct mm_struct *mm = current->mm;
860 long err;
861
862 if (unlikely(!mm))
863 return -EFAULT;
864
865 err = slb_allocate_user(mm, ea);
866 if (!err)
867 preload_add(current_thread_info(), ea);
868
869 return err;
870 }
871 }
872
DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault)873 DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault)
874 {
875 int err = regs->result;
876
877 if (err == -EFAULT) {
878 if (user_mode(regs))
879 _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
880 else
881 bad_page_fault(regs, SIGSEGV);
882 } else if (err == -EINVAL) {
883 unrecoverable_exception(regs);
884 } else {
885 BUG();
886 }
887 }
888