1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * This file contains the routines for handling the MMU on those
4 * PowerPC implementations where the MMU is not using the hash
5 * table, such as 8xx, 4xx, BookE's etc...
6 *
7 * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
8 * IBM Corp.
9 *
10 * Derived from previous arch/powerpc/mm/mmu_context.c
11 * and arch/powerpc/include/asm/mmu_context.h
12 *
13 * TODO:
14 *
15 * - The global context lock will not scale very well
16 * - The maps should be dynamically allocated to allow for processors
17 * that support more PID bits at runtime
18 * - Implement flush_tlb_mm() by making the context stale and picking
19 * a new one
20 * - More aggressively clear stale map bits and maybe find some way to
21 * also clear mm->cpu_vm_mask bits when processes are migrated
22 */
23
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/init.h>
27 #include <linux/spinlock.h>
28 #include <linux/memblock.h>
29 #include <linux/notifier.h>
30 #include <linux/cpu.h>
31 #include <linux/slab.h>
32
33 #include <asm/mmu_context.h>
34 #include <asm/tlbflush.h>
35 #include <asm/smp.h>
36
37 #include <mm/mmu_decl.h>
38
39 /*
40 * Room for two PTE table pointers, usually the kernel and current user
41 * pointer to their respective root page table (pgdir).
42 */
43 void *abatron_pteptrs[2];
44
45 /*
46 * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
47 * A better way would be to keep track of tasks that own contexts, and implement
48 * an LRU usage. That way very active tasks don't always have to pay the TLB
49 * reload overhead. The kernel pages are mapped shared, so the kernel can run on
50 * behalf of any task that makes a kernel entry. Shared does not mean they are
51 * not protected, just that the ASID comparison is not performed. -- Dan
52 *
53 * The IBM4xx has 256 contexts, so we can just rotate through these as a way of
54 * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison
55 * is disabled, so we can use a TID of zero to represent all kernel pages as
56 * shared among all contexts. -- Dan
57 *
58 * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should
59 * normally never have to steal though the facility is present if needed.
60 * -- BenH
61 */
62 #define FIRST_CONTEXT 1
63 #if defined(CONFIG_PPC_8xx)
64 #define LAST_CONTEXT 16
65 #elif defined(CONFIG_PPC_47x)
66 #define LAST_CONTEXT 65535
67 #else
68 #define LAST_CONTEXT 255
69 #endif
70
71 static unsigned int next_context, nr_free_contexts;
72 static unsigned long *context_map;
73 static unsigned long *stale_map[NR_CPUS];
74 static struct mm_struct **context_mm;
75 static DEFINE_RAW_SPINLOCK(context_lock);
76
77 #define CTX_MAP_SIZE \
78 (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1))
79
80
81 /* Steal a context from a task that has one at the moment.
82 *
83 * This is used when we are running out of available PID numbers
84 * on the processors.
85 *
86 * This isn't an LRU system, it just frees up each context in
87 * turn (sort-of pseudo-random replacement :). This would be the
88 * place to implement an LRU scheme if anyone was motivated to do it.
89 * -- paulus
90 *
91 * For context stealing, we use a slightly different approach for
92 * SMP and UP. Basically, the UP one is simpler and doesn't use
93 * the stale map as we can just flush the local CPU
94 * -- benh
95 */
steal_context_smp(unsigned int id)96 static unsigned int steal_context_smp(unsigned int id)
97 {
98 struct mm_struct *mm;
99 unsigned int cpu, max, i;
100
101 max = LAST_CONTEXT - FIRST_CONTEXT;
102
103 /* Attempt to free next_context first and then loop until we manage */
104 while (max--) {
105 /* Pick up the victim mm */
106 mm = context_mm[id];
107
108 /* We have a candidate victim, check if it's active, on SMP
109 * we cannot steal active contexts
110 */
111 if (mm->context.active) {
112 id++;
113 if (id > LAST_CONTEXT)
114 id = FIRST_CONTEXT;
115 continue;
116 }
117
118 /* Mark this mm has having no context anymore */
119 mm->context.id = MMU_NO_CONTEXT;
120
121 /* Mark it stale on all CPUs that used this mm. For threaded
122 * implementations, we set it on all threads on each core
123 * represented in the mask. A future implementation will use
124 * a core map instead but this will do for now.
125 */
126 for_each_cpu(cpu, mm_cpumask(mm)) {
127 for (i = cpu_first_thread_sibling(cpu);
128 i <= cpu_last_thread_sibling(cpu); i++) {
129 if (stale_map[i])
130 __set_bit(id, stale_map[i]);
131 }
132 cpu = i - 1;
133 }
134 return id;
135 }
136
137 /* This will happen if you have more CPUs than available contexts,
138 * all we can do here is wait a bit and try again
139 */
140 raw_spin_unlock(&context_lock);
141 cpu_relax();
142 raw_spin_lock(&context_lock);
143
144 /* This will cause the caller to try again */
145 return MMU_NO_CONTEXT;
146 }
147
steal_all_contexts(void)148 static unsigned int steal_all_contexts(void)
149 {
150 struct mm_struct *mm;
151 int cpu = smp_processor_id();
152 unsigned int id;
153
154 for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
155 /* Pick up the victim mm */
156 mm = context_mm[id];
157
158 /* Mark this mm as having no context anymore */
159 mm->context.id = MMU_NO_CONTEXT;
160 if (id != FIRST_CONTEXT) {
161 context_mm[id] = NULL;
162 __clear_bit(id, context_map);
163 }
164 if (IS_ENABLED(CONFIG_SMP))
165 __clear_bit(id, stale_map[cpu]);
166 }
167
168 /* Flush the TLB for all contexts (not to be used on SMP) */
169 _tlbil_all();
170
171 nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT;
172
173 return FIRST_CONTEXT;
174 }
175
176 /* Note that this will also be called on SMP if all other CPUs are
177 * offlined, which means that it may be called for cpu != 0. For
178 * this to work, we somewhat assume that CPUs that are onlined
179 * come up with a fully clean TLB (or are cleaned when offlined)
180 */
steal_context_up(unsigned int id)181 static unsigned int steal_context_up(unsigned int id)
182 {
183 struct mm_struct *mm;
184 int cpu = smp_processor_id();
185
186 /* Pick up the victim mm */
187 mm = context_mm[id];
188
189 /* Flush the TLB for that context */
190 local_flush_tlb_mm(mm);
191
192 /* Mark this mm has having no context anymore */
193 mm->context.id = MMU_NO_CONTEXT;
194
195 /* XXX This clear should ultimately be part of local_flush_tlb_mm */
196 if (IS_ENABLED(CONFIG_SMP))
197 __clear_bit(id, stale_map[cpu]);
198
199 return id;
200 }
201
set_context(unsigned long id,pgd_t * pgd)202 static void set_context(unsigned long id, pgd_t *pgd)
203 {
204 if (IS_ENABLED(CONFIG_PPC_8xx)) {
205 s16 offset = (s16)(__pa(swapper_pg_dir));
206
207 /*
208 * Register M_TWB will contain base address of level 1 table minus the
209 * lower part of the kernel PGDIR base address, so that all accesses to
210 * level 1 table are done relative to lower part of kernel PGDIR base
211 * address.
212 */
213 mtspr(SPRN_M_TWB, __pa(pgd) - offset);
214
215 /* Update context */
216 mtspr(SPRN_M_CASID, id - 1);
217
218 /* sync */
219 mb();
220 } else {
221 if (IS_ENABLED(CONFIG_40x))
222 mb(); /* sync */
223
224 mtspr(SPRN_PID, id);
225 isync();
226 }
227 }
228
switch_mmu_context(struct mm_struct * prev,struct mm_struct * next,struct task_struct * tsk)229 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
230 struct task_struct *tsk)
231 {
232 unsigned int id;
233 unsigned int i, cpu = smp_processor_id();
234 unsigned long *map;
235
236 /* No lockless fast path .. yet */
237 raw_spin_lock(&context_lock);
238
239 if (IS_ENABLED(CONFIG_SMP)) {
240 /* Mark us active and the previous one not anymore */
241 next->context.active++;
242 if (prev) {
243 WARN_ON(prev->context.active < 1);
244 prev->context.active--;
245 }
246 }
247
248 again:
249
250 /* If we already have a valid assigned context, skip all that */
251 id = next->context.id;
252 if (likely(id != MMU_NO_CONTEXT))
253 goto ctxt_ok;
254
255 /* We really don't have a context, let's try to acquire one */
256 id = next_context;
257 if (id > LAST_CONTEXT)
258 id = FIRST_CONTEXT;
259 map = context_map;
260
261 /* No more free contexts, let's try to steal one */
262 if (nr_free_contexts == 0) {
263 if (num_online_cpus() > 1) {
264 id = steal_context_smp(id);
265 if (id == MMU_NO_CONTEXT)
266 goto again;
267 goto stolen;
268 }
269 if (IS_ENABLED(CONFIG_PPC_8xx))
270 id = steal_all_contexts();
271 else
272 id = steal_context_up(id);
273 goto stolen;
274 }
275 nr_free_contexts--;
276
277 /* We know there's at least one free context, try to find it */
278 while (__test_and_set_bit(id, map)) {
279 id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
280 if (id > LAST_CONTEXT)
281 id = FIRST_CONTEXT;
282 }
283 stolen:
284 next_context = id + 1;
285 context_mm[id] = next;
286 next->context.id = id;
287
288 ctxt_ok:
289
290 /* If that context got marked stale on this CPU, then flush the
291 * local TLB for it and unmark it before we use it
292 */
293 if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) {
294 local_flush_tlb_mm(next);
295
296 /* XXX This clear should ultimately be part of local_flush_tlb_mm */
297 for (i = cpu_first_thread_sibling(cpu);
298 i <= cpu_last_thread_sibling(cpu); i++) {
299 if (stale_map[i])
300 __clear_bit(id, stale_map[i]);
301 }
302 }
303
304 /* Flick the MMU and release lock */
305 if (IS_ENABLED(CONFIG_BDI_SWITCH))
306 abatron_pteptrs[1] = next->pgd;
307 set_context(id, next->pgd);
308 raw_spin_unlock(&context_lock);
309 }
310
311 /*
312 * Set up the context for a new address space.
313 */
init_new_context(struct task_struct * t,struct mm_struct * mm)314 int init_new_context(struct task_struct *t, struct mm_struct *mm)
315 {
316 /*
317 * We have MMU_NO_CONTEXT set to be ~0. Hence check
318 * explicitly against context.id == 0. This ensures that we properly
319 * initialize context slice details for newly allocated mm's (which will
320 * have id == 0) and don't alter context slice inherited via fork (which
321 * will have id != 0).
322 */
323 if (mm->context.id == 0)
324 slice_init_new_context_exec(mm);
325 mm->context.id = MMU_NO_CONTEXT;
326 mm->context.active = 0;
327 pte_frag_set(&mm->context, NULL);
328 return 0;
329 }
330
331 /*
332 * We're finished using the context for an address space.
333 */
destroy_context(struct mm_struct * mm)334 void destroy_context(struct mm_struct *mm)
335 {
336 unsigned long flags;
337 unsigned int id;
338
339 if (mm->context.id == MMU_NO_CONTEXT)
340 return;
341
342 WARN_ON(mm->context.active != 0);
343
344 raw_spin_lock_irqsave(&context_lock, flags);
345 id = mm->context.id;
346 if (id != MMU_NO_CONTEXT) {
347 __clear_bit(id, context_map);
348 mm->context.id = MMU_NO_CONTEXT;
349 context_mm[id] = NULL;
350 nr_free_contexts++;
351 }
352 raw_spin_unlock_irqrestore(&context_lock, flags);
353 }
354
mmu_ctx_cpu_prepare(unsigned int cpu)355 static int mmu_ctx_cpu_prepare(unsigned int cpu)
356 {
357 /* We don't touch CPU 0 map, it's allocated at aboot and kept
358 * around forever
359 */
360 if (cpu == boot_cpuid)
361 return 0;
362
363 stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
364 return 0;
365 }
366
mmu_ctx_cpu_dead(unsigned int cpu)367 static int mmu_ctx_cpu_dead(unsigned int cpu)
368 {
369 #ifdef CONFIG_HOTPLUG_CPU
370 if (cpu == boot_cpuid)
371 return 0;
372
373 kfree(stale_map[cpu]);
374 stale_map[cpu] = NULL;
375
376 /* We also clear the cpu_vm_mask bits of CPUs going away */
377 clear_tasks_mm_cpumask(cpu);
378 #endif
379 return 0;
380 }
381
382 /*
383 * Initialize the context management stuff.
384 */
mmu_context_init(void)385 void __init mmu_context_init(void)
386 {
387 /* Mark init_mm as being active on all possible CPUs since
388 * we'll get called with prev == init_mm the first time
389 * we schedule on a given CPU
390 */
391 init_mm.context.active = NR_CPUS;
392
393 /*
394 * Allocate the maps used by context management
395 */
396 context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
397 if (!context_map)
398 panic("%s: Failed to allocate %zu bytes\n", __func__,
399 CTX_MAP_SIZE);
400 context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1),
401 SMP_CACHE_BYTES);
402 if (!context_mm)
403 panic("%s: Failed to allocate %zu bytes\n", __func__,
404 sizeof(void *) * (LAST_CONTEXT + 1));
405 if (IS_ENABLED(CONFIG_SMP)) {
406 stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
407 if (!stale_map[boot_cpuid])
408 panic("%s: Failed to allocate %zu bytes\n", __func__,
409 CTX_MAP_SIZE);
410
411 cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
412 "powerpc/mmu/ctx:prepare",
413 mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
414 }
415
416 printk(KERN_INFO
417 "MMU: Allocated %zu bytes of context maps for %d contexts\n",
418 2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)),
419 LAST_CONTEXT - FIRST_CONTEXT + 1);
420
421 /*
422 * Some processors have too few contexts to reserve one for
423 * init_mm, and require using context 0 for a normal task.
424 * Other processors reserve the use of context zero for the kernel.
425 * This code assumes FIRST_CONTEXT < 32.
426 */
427 context_map[0] = (1 << FIRST_CONTEXT) - 1;
428 next_context = FIRST_CONTEXT;
429 nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
430 }
431