1 /*
2 * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU)
3 * Licensed under the Mulan PSL v2.
4 * You can use this software according to the terms and conditions of the Mulan PSL v2.
5 * You may obtain a copy of Mulan PSL v2 at:
6 * http://license.coscl.org.cn/MulanPSL2
7 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
8 * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
9 * PURPOSE.
10 * See the Mulan PSL v2 for more details.
11 */
12 #include <arch/mm/page_table.h>
13 #include <arch/mm/cache.h>
14 #include <common/backtrace.h>
15 #include <common/errno.h>
16 #include <common/util.h>
17 #include <mm/mm.h>
18 #include <mm/vmspace.h>
19 #include <mm/common_pte.h>
20 #include <sched/context.h>
21 #include <object/recycle.h>
22 #include <object/user_fault.h>
23 #include <object/thread.h>
24 #include <mm/page_fault.h>
25
dump_pgfault_error(void)26 static void dump_pgfault_error(void)
27 {
28 kinfo("kernel dump:\n");
29 kinfo("process: %p\n", current_cap_group);
30 print_thread(current_thread);
31 kinfo("faulting IP: 0x%lx, SP: 0x%lx\n",
32 arch_get_thread_next_ip(current_thread),
33 arch_get_thread_stack(current_thread));
34 kprint_vmr(current_thread->vmspace);
35
36 backtrace();
37 }
38
39 /*
40 * Perform general COW
41 * Step-1: get PA of page containing fault_addr, so as kernal VA of that page
42 * Step-2: allocate a new page and record in VMR
43 * Step-3: copy using kernel VA to new page
44 * Step-4(?): update VMR perm (How and when? Neccessary?)
45 * Step-5: update PTE permission and PPN
46 * Step-6: Flush TLB of user virtual page(user_vpa)
47 */
__do_general_cow(struct vmspace * vmspace,struct vmregion * vmr,vaddr_t fault_addr,pte_t * fault_pte,struct common_pte_t * pte_info)48 static void __do_general_cow(struct vmspace *vmspace, struct vmregion *vmr,
49 vaddr_t fault_addr, pte_t *fault_pte,
50 struct common_pte_t *pte_info)
51 {
52 vaddr_t kva, user_vpa;
53 vaddr_t new_page;
54 paddr_t new_pa;
55 struct common_pte_t new_pte_attr;
56
57 /* Step-1: get PA of page containing fault_addr, so as kernal VA of that
58 * page */
59 kva = phys_to_virt(pte_info->ppn << PAGE_SHIFT);
60
61 /* Step-2: allocate a new page and record in VMR */
62 new_page = (vaddr_t)get_pages(0);
63 BUG_ON((void *)new_page == NULL); // Out-of-memory
64 new_pa = virt_to_phys((void *)new_page);
65
66 vmregion_record_cow_private_page(vmr, (void *)new_page);
67
68 /* Step-3: copy using kernel VA to new page */
69 memcpy((void *)new_page, (void *)kva, PAGE_SIZE);
70
71 /* Step-5: update PTE permission and PPN */
72 new_pte_attr.ppn = new_pa >> PAGE_SHIFT;
73 new_pte_attr.perm = pte_info->perm | VMR_WRITE;
74 new_pte_attr.valid = 1;
75 new_pte_attr.access = 0;
76 new_pte_attr.dirty = 0;
77
78 update_pte(fault_pte, L3, &new_pte_attr);
79
80 vmspace->rss += PAGE_SIZE;
81
82 /* Step-6: Flush TLB of user virtual page(user_vpa) */
83 user_vpa = ROUND_DOWN(fault_addr, PAGE_SIZE);
84 flush_tlb_by_range(vmspace, user_vpa, PAGE_SIZE);
85 }
86
do_cow(struct vmspace * vmspace,struct vmregion * fault_vmr,vaddr_t fault_addr)87 static int do_cow(struct vmspace *vmspace, struct vmregion *fault_vmr,
88 vaddr_t fault_addr)
89 {
90 int ret = 0;
91 paddr_t pa;
92 pte_t *fault_pte;
93 struct common_pte_t pte_info;
94 lock(&vmspace->pgtbl_lock);
95 ret = query_in_pgtbl(vmspace->pgtbl, fault_addr, &pa, &fault_pte);
96 /**
97 * Although we are handling a permission fault here, it's still possible
98 * that we would discover it should be a translation error when we
99 * really started to resolve it here. For example, before the page fault
100 * was forwarded to here, we didn't grab the pgtbl_lock, so it's
101 * possible that the page was unmapped or swapped out. There is another
102 * special case: in RISC-V, we are unable to determine a page fault is
103 * translation fault or permission fault from scause reported by the
104 * hardware, so we have to check the pte here.
105 *
106 * We query the page table for the actual PTE **atomically** here. So if
107 * the PTE is missing, we can be sure that it's a translation fault. And
108 * we forward it back to translation fault handler by returning -EFAULT.
109 */
110 if (ret) {
111 ret = -EFAULT;
112 goto out;
113 }
114 parse_pte_to_common(fault_pte, L3, &pte_info);
115
116 // Fast path: already handled page fault in other threads
117 if (pte_info.perm & VMR_WRITE) {
118 goto out;
119 }
120 __do_general_cow(vmspace, fault_vmr, fault_addr, fault_pte, &pte_info);
121
122 out:
123 unlock(&vmspace->pgtbl_lock);
124 return ret;
125 }
126
check_trans_fault(struct vmspace * vmspace,vaddr_t fault_addr)127 static int check_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr)
128 {
129 int ret = 0;
130 paddr_t pa;
131 pte_t *fault_pte;
132
133 lock(&vmspace->pgtbl_lock);
134 ret = query_in_pgtbl(vmspace->pgtbl, fault_addr, &pa, &fault_pte);
135 unlock(&vmspace->pgtbl_lock);
136 if (ret) {
137 return -EFAULT;
138 }
139
140 return 0;
141 }
142
handle_trans_fault(struct vmspace * vmspace,vaddr_t fault_addr)143 int handle_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr)
144 {
145 struct vmregion *vmr;
146 struct pmobject *pmo;
147 paddr_t pa;
148 unsigned long offset;
149 unsigned long index;
150 int ret = 0;
151
152 /*
153 * Grab lock here.
154 * Because two threads (in same process) on different cores
155 * may fault on the same page, so we need to prevent them
156 * from adding the same mapping twice.
157 */
158 lock(&vmspace->vmspace_lock);
159 vmr = find_vmr_for_va(vmspace, fault_addr);
160
161 if (vmr == NULL) {
162 kinfo("handle_trans_fault: no vmr found for va 0x%lx!\n", fault_addr);
163 dump_pgfault_error();
164 unlock(&vmspace->vmspace_lock);
165
166 #if defined(CHCORE_ARCH_AARCH64)
167 /* kernel fault fixup is only supported on AArch64 and Sparc */
168 return -EFAULT;
169 #endif
170 sys_exit_group(-1);
171
172 BUG("should not reach here");
173 }
174
175 pmo = vmr->pmo;
176 /* Get the offset in the pmo for faulting addr */
177 offset = ROUND_DOWN(fault_addr, PAGE_SIZE) - vmr->start;
178 vmr_prop_t perm = vmr->perm;
179 switch (pmo->type) {
180 case PMO_ANONYM:
181 case PMO_SHM: {
182 /* Boundary check */
183 BUG_ON(offset >= pmo->size);
184
185 /* Get the index in the pmo radix for faulting addr */
186 index = offset / PAGE_SIZE;
187
188 fault_addr = ROUND_DOWN(fault_addr, PAGE_SIZE);
189
190 pa = get_page_from_pmo(pmo, index);
191 if (pa == 0) {
192 /*
193 * Not committed before. Then, allocate the physical
194 * page.
195 */
196 void *new_va = get_pages(0);
197 long rss = 0;
198 BUG_ON(new_va == NULL);
199 pa = virt_to_phys(new_va);
200 BUG_ON(pa == 0);
201 /* Clear to 0 for the newly allocated page */
202 memset((void *)phys_to_virt(pa), 0, PAGE_SIZE);
203 /*
204 * Record the physical page in the radix tree:
205 * the offset is used as index in the radix tree
206 */
207 kdebug("commit: index: %ld, 0x%lx\n", index, pa);
208 commit_page_to_pmo(pmo, index, pa);
209
210 /* Add mapping in the page table */
211 lock(&vmspace->pgtbl_lock);
212 map_range_in_pgtbl(vmspace->pgtbl, fault_addr, pa, PAGE_SIZE, perm, &rss);
213 vmspace->rss += rss;
214 unlock(&vmspace->pgtbl_lock);
215 } else {
216 /*
217 * pa != 0: the faulting address has be committed a
218 * physical page.
219 *
220 * For concurrent page faults:
221 *
222 * When type is PMO_ANONYM, the later faulting threads
223 * of the process do not need to modify the page
224 * table because a previous faulting thread will do
225 * that. (This is always true for the same process)
226 * However, if one process map an anonymous pmo for
227 * another process (e.g., main stack pmo), the faulting
228 * thread (e.g, in the new process) needs to update its
229 * page table.
230 * So, for simplicity, we just update the page table.
231 * Note that adding the same mapping is harmless.
232 *
233 * When type is PMO_SHM, the later faulting threads
234 * needs to add the mapping in the page table.
235 * Repeated mapping operations are harmless.
236 */
237 if (pmo->type == PMO_SHM || pmo->type == PMO_ANONYM) {
238 /* Add mapping in the page table */
239 long rss = 0;
240 lock(&vmspace->pgtbl_lock);
241 map_range_in_pgtbl(
242 vmspace->pgtbl, fault_addr, pa, PAGE_SIZE, perm, &rss);
243 vmspace->rss += rss;
244 unlock(&vmspace->pgtbl_lock);
245 }
246 }
247
248 if (perm & VMR_EXEC) {
249 arch_flush_cache(fault_addr, PAGE_SIZE, SYNC_IDCACHE);
250 }
251
252 break;
253 }
254 case PMO_FILE: {
255 #ifdef CHCORE_ENABLE_FMAP
256 unlock(&vmspace->vmspace_lock);
257 fault_addr = ROUND_DOWN(fault_addr, PAGE_SIZE);
258 handle_user_fault(pmo, ROUND_DOWN(fault_addr, PAGE_SIZE));
259 BUG("Should never be here!\n");
260 #else
261 kinfo("file mmap is not enabled.\n");
262 dump_pgfault_error();
263
264 unlock(&vmspace->vmspace_lock);
265 sys_exit_group(-1);
266
267 BUG("should not reach here");
268 break;
269 #endif
270 }
271 case PMO_FORBID: {
272 kinfo("Forbidden memory access (pmo->type is PMO_FORBID).\n");
273 dump_pgfault_error();
274
275 unlock(&vmspace->vmspace_lock);
276 sys_exit_group(-1);
277
278 BUG("should not reach here");
279 break;
280 }
281 default: {
282 kinfo("handle_trans_fault: faulting vmr->pmo->type"
283 "(pmo type %d at 0x%lx)\n",
284 vmr->pmo->type,
285 fault_addr);
286 dump_pgfault_error();
287
288 unlock(&vmspace->vmspace_lock);
289 sys_exit_group(-1);
290
291 BUG("should not reach here");
292 break;
293 }
294 }
295
296 unlock(&vmspace->vmspace_lock);
297 return ret;
298 }
299
300 // clang-format off
301 /**
302 * @brief Handle a permission fault triggered by hardware. This function
303 * is arch-independent.
304 *
305 * A permission fault is that the permission required to execute an
306 * instruction cannot be satisfied by permission of a page in page table
307 * when **executing** the instruction by the hardware. To handle a
308 * permission fault, we have to differentiate permissions in the VMR where
309 * the faulting page belongs(declared_perm) and the priviledge needed
310 * by the faulting instruction(desired_perm). In some cases, a permission
311 * fault is demanded by us. For instance, if a page is shared through CoW,
312 * it's mapped as readonly sharing at first, and there would be a permission
313 * fault when it's written, and we could do the copy part of CoW in permission
314 * fault handlers. In other cases, a permission fault might indicate a bug or
315 * a malicious attack, and we would kill the faulting process.
316 *
317 * It's worth noting that there is a time window between the processor core
318 * try to execute the faulting instruction and we started to handle the perm
319 * fault here. But the faulting process and our kernel are multithreaded.
320 * During the time window, the faulting page may has been unmapped by other
321 * threads of the process, or swapped out by the kernel. So, we could figure
322 * out that we find nothing in page table when we query the faulting address
323 * surprisingly. Under such circumstances, we would treat this permission fault
324 * as a translation fault and handle it for correctness.
325 *
326 * In conclusion, following execution flow is possible when handling a
327 * permission fault. Noting that a permission fault might downgrade to a
328 * translation fault, but it's impossible to get a reverse scenario.
329 *
330 * demanded fault: a permission fault with clearly defined desired_perm and
331 * declared_perm combinations. e.g., declared = READ and desired = WRITE,
332 * it would be forwarded to a specific handler, e.g. do_cow
333 *
334 * A. demanded fault->specific handler->success
335 * A.1. re-execute faulting instruction->success
336 * A.2. page swapped out by another thread->re-execute->trans fault
337 * B. demanded fault->specific handler->return an EFAULT->fallback to trans fault
338 * C. demanded fault->specific handler->return other error->kill process
339 * D. undemanded fault->check is trans fault->fallback to trans fault
340 * E. undemanded fault->check is not trans fault->kill process
341 */
342 // clang-format on
handle_perm_fault(struct vmspace * vmspace,vaddr_t fault_addr,vmr_prop_t desired_perm)343 int handle_perm_fault(struct vmspace *vmspace, vaddr_t fault_addr,
344 vmr_prop_t desired_perm)
345 {
346 int ret = 0;
347 struct vmregion *vmr;
348 vmr_prop_t declared_perm;
349
350 lock(&vmspace->vmspace_lock);
351 vmr = find_vmr_for_va(vmspace, fault_addr);
352
353 if (vmr == NULL) {
354 kinfo("handle_perm_fault: no vmr found for va 0x%lx!\n", fault_addr);
355 dump_pgfault_error();
356 unlock(&vmspace->vmspace_lock);
357 #if defined(CHCORE_ARCH_AARCH64)
358 return -EFAULT;
359 #else
360 sys_exit_group(-1);
361 BUG("should not reach here");
362 #endif
363 }
364
365 declared_perm = vmr->perm;
366
367 // Handle write to a read-only page
368 if ((declared_perm & VMR_READ) && desired_perm == VMR_WRITE) {
369 // Handle COW here
370 if (declared_perm & VMR_COW) {
371 ret = do_cow(vmspace, vmr, fault_addr);
372 if (ret != 0 && ret != -EFAULT) {
373 goto out_illegal;
374 } else if (ret == -EFAULT) {
375 goto out_trans_fault;
376 } else {
377 goto out_succ;
378 }
379 }
380 }
381
382 /**
383 * For other invalid declared/desired permission combinations,
384 * we check whether it's a trans fault actually here, if so we
385 * handle it. But if not, we could be sure that it's an access
386 * try with illegal permission, so we kill the faulting process.
387 */
388 ret = check_trans_fault(vmspace, fault_addr);
389 if (ret == -EFAULT) {
390 goto out_trans_fault;
391 } else {
392 goto out_illegal;
393 }
394 out_succ:
395 unlock(&vmspace->vmspace_lock);
396 return ret;
397 out_illegal:
398 // Illegal access permission, kill process
399 kinfo("handle_perm_fault failed: fault_addr=%p desired_perm=%lu\n",
400 fault_addr,
401 desired_perm);
402 dump_pgfault_error();
403 unlock(&vmspace->vmspace_lock);
404 #if defined(CHCORE_ARCH_AARCH64)
405 return -EPERM;
406 #else
407 sys_exit_group(-1);
408 return -1;
409 #endif
410 out_trans_fault:
411 /**
412 * It's possible for a permission fault to downgrade to a translation
413 * fault. For example, the page has been swapped out or unmapped
414 * concurrently before the permission fault is forwarded to here. We let
415 * actual permission fault handlers(e.g. do_cow) to return -EFAULT when
416 * they detect such cases, and treat it as a translation fault then
417 * handle it atomically.
418 */
419 unlock(&vmspace->vmspace_lock);
420 ret = handle_trans_fault(vmspace, fault_addr);
421 return ret;
422 }
423