• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU)
3  * Licensed under the Mulan PSL v2.
4  * You can use this software according to the terms and conditions of the Mulan PSL v2.
5  * You may obtain a copy of Mulan PSL v2 at:
6  *     http://license.coscl.org.cn/MulanPSL2
7  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
8  * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
9  * PURPOSE.
10  * See the Mulan PSL v2 for more details.
11  */
12 #include <arch/mm/page_table.h>
13 #include <arch/mm/cache.h>
14 #include <common/backtrace.h>
15 #include <common/errno.h>
16 #include <common/util.h>
17 #include <mm/mm.h>
18 #include <mm/vmspace.h>
19 #include <mm/common_pte.h>
20 #include <sched/context.h>
21 #include <object/recycle.h>
22 #include <object/user_fault.h>
23 #include <object/thread.h>
24 #include <mm/page_fault.h>
25 
dump_pgfault_error(void)26 static void dump_pgfault_error(void)
27 {
28     kinfo("kernel dump:\n");
29     kinfo("process: %p\n", current_cap_group);
30     print_thread(current_thread);
31     kinfo("faulting IP: 0x%lx, SP: 0x%lx\n",
32           arch_get_thread_next_ip(current_thread),
33           arch_get_thread_stack(current_thread));
34     kprint_vmr(current_thread->vmspace);
35 
36     backtrace();
37 }
38 
39 /*
40  * Perform general COW
41  * Step-1: get PA of page containing fault_addr, so as kernal VA of that page
42  * Step-2: allocate a new page and record in VMR
43  * Step-3: copy using kernel VA to new page
44  * Step-4(?): update VMR perm (How and when? Neccessary?)
45  * Step-5: update PTE permission and PPN
46  * Step-6: Flush TLB of user virtual page(user_vpa)
47  */
__do_general_cow(struct vmspace * vmspace,struct vmregion * vmr,vaddr_t fault_addr,pte_t * fault_pte,struct common_pte_t * pte_info)48 static void __do_general_cow(struct vmspace *vmspace, struct vmregion *vmr,
49                              vaddr_t fault_addr, pte_t *fault_pte,
50                              struct common_pte_t *pte_info)
51 {
52     vaddr_t kva, user_vpa;
53     vaddr_t new_page;
54     paddr_t new_pa;
55     struct common_pte_t new_pte_attr;
56 
57     /* Step-1: get PA of page containing fault_addr, so as kernal VA of that
58      * page */
59     kva = phys_to_virt(pte_info->ppn << PAGE_SHIFT);
60 
61     /* Step-2: allocate a new page and record in VMR */
62     new_page = (vaddr_t)get_pages(0);
63     BUG_ON((void *)new_page == NULL); // Out-of-memory
64     new_pa = virt_to_phys((void *)new_page);
65 
66     vmregion_record_cow_private_page(vmr, (void *)new_page);
67 
68     /* Step-3: copy using kernel VA to new page */
69     memcpy((void *)new_page, (void *)kva, PAGE_SIZE);
70 
71     /* Step-5: update PTE permission and PPN */
72     new_pte_attr.ppn = new_pa >> PAGE_SHIFT;
73     new_pte_attr.perm = pte_info->perm | VMR_WRITE;
74     new_pte_attr.valid = 1;
75     new_pte_attr.access = 0;
76     new_pte_attr.dirty = 0;
77 
78     update_pte(fault_pte, L3, &new_pte_attr);
79 
80     vmspace->rss += PAGE_SIZE;
81 
82     /* Step-6: Flush TLB of user virtual page(user_vpa) */
83     user_vpa = ROUND_DOWN(fault_addr, PAGE_SIZE);
84     flush_tlb_by_range(vmspace, user_vpa, PAGE_SIZE);
85 }
86 
do_cow(struct vmspace * vmspace,struct vmregion * fault_vmr,vaddr_t fault_addr)87 static int do_cow(struct vmspace *vmspace, struct vmregion *fault_vmr,
88                   vaddr_t fault_addr)
89 {
90     int ret = 0;
91     paddr_t pa;
92     pte_t *fault_pte;
93     struct common_pte_t pte_info;
94     lock(&vmspace->pgtbl_lock);
95     ret = query_in_pgtbl(vmspace->pgtbl, fault_addr, &pa, &fault_pte);
96     /**
97      * Although we are handling a permission fault here, it's still possible
98      * that we would discover it should be a translation error when we
99      * really started to resolve it here. For example, before the page fault
100      * was forwarded to here, we didn't grab the pgtbl_lock, so it's
101      * possible that the page was unmapped or swapped out. There is another
102      * special case: in RISC-V, we are unable to determine a page fault is
103      * translation fault or permission fault from scause reported by the
104      * hardware, so we have to check the pte here.
105      *
106      * We query the page table for the actual PTE **atomically** here. So if
107      * the PTE is missing, we can be sure that it's a translation fault. And
108      * we forward it back to translation fault handler by returning -EFAULT.
109      */
110     if (ret) {
111         ret = -EFAULT;
112         goto out;
113     }
114     parse_pte_to_common(fault_pte, L3, &pte_info);
115 
116     // Fast path: already handled page fault in other threads
117     if (pte_info.perm & VMR_WRITE) {
118         goto out;
119     }
120     __do_general_cow(vmspace, fault_vmr, fault_addr, fault_pte, &pte_info);
121 
122 out:
123     unlock(&vmspace->pgtbl_lock);
124     return ret;
125 }
126 
check_trans_fault(struct vmspace * vmspace,vaddr_t fault_addr)127 static int check_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr)
128 {
129     int ret = 0;
130     paddr_t pa;
131     pte_t *fault_pte;
132 
133     lock(&vmspace->pgtbl_lock);
134     ret = query_in_pgtbl(vmspace->pgtbl, fault_addr, &pa, &fault_pte);
135     unlock(&vmspace->pgtbl_lock);
136     if (ret) {
137         return -EFAULT;
138     }
139 
140     return 0;
141 }
142 
handle_trans_fault(struct vmspace * vmspace,vaddr_t fault_addr)143 int handle_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr)
144 {
145     struct vmregion *vmr;
146     struct pmobject *pmo;
147     paddr_t pa;
148     unsigned long offset;
149     unsigned long index;
150     int ret = 0;
151 
152     /*
153      * Grab lock here.
154      * Because two threads (in same process) on different cores
155      * may fault on the same page, so we need to prevent them
156      * from adding the same mapping twice.
157      */
158     lock(&vmspace->vmspace_lock);
159     vmr = find_vmr_for_va(vmspace, fault_addr);
160 
161     if (vmr == NULL) {
162         kinfo("handle_trans_fault: no vmr found for va 0x%lx!\n", fault_addr);
163         dump_pgfault_error();
164         unlock(&vmspace->vmspace_lock);
165 
166 #if defined(CHCORE_ARCH_AARCH64)
167         /* kernel fault fixup is only supported on AArch64 and Sparc */
168         return -EFAULT;
169 #endif
170         sys_exit_group(-1);
171 
172         BUG("should not reach here");
173     }
174 
175     pmo = vmr->pmo;
176     /* Get the offset in the pmo for faulting addr */
177     offset = ROUND_DOWN(fault_addr, PAGE_SIZE) - vmr->start;
178     vmr_prop_t perm = vmr->perm;
179     switch (pmo->type) {
180     case PMO_ANONYM:
181     case PMO_SHM: {
182         /* Boundary check */
183         BUG_ON(offset >= pmo->size);
184 
185         /* Get the index in the pmo radix for faulting addr */
186         index = offset / PAGE_SIZE;
187 
188         fault_addr = ROUND_DOWN(fault_addr, PAGE_SIZE);
189 
190         pa = get_page_from_pmo(pmo, index);
191         if (pa == 0) {
192             /*
193              * Not committed before. Then, allocate the physical
194              * page.
195              */
196             void *new_va = get_pages(0);
197             long rss = 0;
198             BUG_ON(new_va == NULL);
199             pa = virt_to_phys(new_va);
200             BUG_ON(pa == 0);
201             /* Clear to 0 for the newly allocated page */
202             memset((void *)phys_to_virt(pa), 0, PAGE_SIZE);
203             /*
204              * Record the physical page in the radix tree:
205              * the offset is used as index in the radix tree
206              */
207             kdebug("commit: index: %ld, 0x%lx\n", index, pa);
208             commit_page_to_pmo(pmo, index, pa);
209 
210             /* Add mapping in the page table */
211             lock(&vmspace->pgtbl_lock);
212             map_range_in_pgtbl(vmspace->pgtbl, fault_addr, pa, PAGE_SIZE, perm, &rss);
213             vmspace->rss += rss;
214             unlock(&vmspace->pgtbl_lock);
215         } else {
216             /*
217              * pa != 0: the faulting address has be committed a
218              * physical page.
219              *
220              * For concurrent page faults:
221              *
222              * When type is PMO_ANONYM, the later faulting threads
223              * of the process do not need to modify the page
224              * table because a previous faulting thread will do
225              * that. (This is always true for the same process)
226              * However, if one process map an anonymous pmo for
227              * another process (e.g., main stack pmo), the faulting
228              * thread (e.g, in the new process) needs to update its
229              * page table.
230              * So, for simplicity, we just update the page table.
231              * Note that adding the same mapping is harmless.
232              *
233              * When type is PMO_SHM, the later faulting threads
234              * needs to add the mapping in the page table.
235              * Repeated mapping operations are harmless.
236              */
237             if (pmo->type == PMO_SHM || pmo->type == PMO_ANONYM) {
238                 /* Add mapping in the page table */
239                 long rss = 0;
240                 lock(&vmspace->pgtbl_lock);
241                 map_range_in_pgtbl(
242                     vmspace->pgtbl, fault_addr, pa, PAGE_SIZE, perm, &rss);
243                 vmspace->rss += rss;
244                 unlock(&vmspace->pgtbl_lock);
245             }
246         }
247 
248         if (perm & VMR_EXEC) {
249             arch_flush_cache(fault_addr, PAGE_SIZE, SYNC_IDCACHE);
250         }
251 
252         break;
253     }
254     case PMO_FILE: {
255 #ifdef CHCORE_ENABLE_FMAP
256         unlock(&vmspace->vmspace_lock);
257         fault_addr = ROUND_DOWN(fault_addr, PAGE_SIZE);
258         handle_user_fault(pmo, ROUND_DOWN(fault_addr, PAGE_SIZE));
259         BUG("Should never be here!\n");
260 #else
261         kinfo("file mmap is not enabled.\n");
262         dump_pgfault_error();
263 
264         unlock(&vmspace->vmspace_lock);
265         sys_exit_group(-1);
266 
267         BUG("should not reach here");
268         break;
269 #endif
270     }
271     case PMO_FORBID: {
272         kinfo("Forbidden memory access (pmo->type is PMO_FORBID).\n");
273         dump_pgfault_error();
274 
275         unlock(&vmspace->vmspace_lock);
276         sys_exit_group(-1);
277 
278         BUG("should not reach here");
279         break;
280     }
281     default: {
282         kinfo("handle_trans_fault: faulting vmr->pmo->type"
283               "(pmo type %d at 0x%lx)\n",
284               vmr->pmo->type,
285               fault_addr);
286         dump_pgfault_error();
287 
288         unlock(&vmspace->vmspace_lock);
289         sys_exit_group(-1);
290 
291         BUG("should not reach here");
292         break;
293     }
294     }
295 
296     unlock(&vmspace->vmspace_lock);
297     return ret;
298 }
299 
300 // clang-format off
301 /**
302  * @brief Handle a permission fault triggered by hardware. This function
303  * is arch-independent.
304  *
305  * A permission fault is that the permission required to execute an
306  * instruction cannot be satisfied by permission of a page in page table
307  * when **executing** the instruction by the hardware. To handle a
308  * permission fault, we have to differentiate permissions in the VMR where
309  * the faulting page belongs(declared_perm) and the priviledge needed
310  * by the faulting instruction(desired_perm). In some cases, a permission
311  * fault is demanded by us. For instance, if a page is shared through CoW,
312  * it's mapped as readonly sharing at first, and there would be a permission
313  * fault when it's written, and we could do the copy part of CoW in permission
314  * fault handlers. In other cases, a permission fault might indicate a bug or
315  * a malicious attack, and we would kill the faulting process.
316  *
317  * It's worth noting that there is a time window between the processor core
318  * try to execute the faulting instruction and we started to handle the perm
319  * fault here. But the faulting process and our kernel are multithreaded.
320  * During the time window, the faulting page may has been unmapped by other
321  * threads of the process, or swapped out by the kernel. So, we could figure
322  * out that we find nothing in page table when we query the faulting address
323  * surprisingly. Under such circumstances, we would treat this permission fault
324  * as a translation fault and handle it for correctness.
325  *
326  * In conclusion, following execution flow is possible when handling a
327  * permission fault. Noting that a permission fault might downgrade to a
328  * translation fault, but it's impossible to get a reverse scenario.
329  *
330  * demanded fault: a permission fault with clearly defined desired_perm and
331  * declared_perm combinations. e.g., declared = READ and desired = WRITE,
332  * it would be forwarded to a specific handler, e.g. do_cow
333  *
334  * A. demanded fault->specific handler->success
335  *      A.1. re-execute faulting instruction->success
336  *      A.2. page swapped out by another thread->re-execute->trans fault
337  * B. demanded fault->specific handler->return an EFAULT->fallback to trans fault
338  * C. demanded fault->specific handler->return other error->kill process
339  * D. undemanded fault->check is trans fault->fallback to trans fault
340  * E. undemanded fault->check is not trans fault->kill process
341  */
342 // clang-format on
handle_perm_fault(struct vmspace * vmspace,vaddr_t fault_addr,vmr_prop_t desired_perm)343 int handle_perm_fault(struct vmspace *vmspace, vaddr_t fault_addr,
344                       vmr_prop_t desired_perm)
345 {
346     int ret = 0;
347     struct vmregion *vmr;
348     vmr_prop_t declared_perm;
349 
350     lock(&vmspace->vmspace_lock);
351     vmr = find_vmr_for_va(vmspace, fault_addr);
352 
353     if (vmr == NULL) {
354         kinfo("handle_perm_fault: no vmr found for va 0x%lx!\n", fault_addr);
355         dump_pgfault_error();
356         unlock(&vmspace->vmspace_lock);
357 #if defined(CHCORE_ARCH_AARCH64)
358         return -EFAULT;
359 #else
360         sys_exit_group(-1);
361         BUG("should not reach here");
362 #endif
363     }
364 
365     declared_perm = vmr->perm;
366 
367     // Handle write to a read-only page
368     if ((declared_perm & VMR_READ) && desired_perm == VMR_WRITE) {
369         // Handle COW here
370         if (declared_perm & VMR_COW) {
371             ret = do_cow(vmspace, vmr, fault_addr);
372             if (ret != 0 && ret != -EFAULT) {
373                 goto out_illegal;
374             } else if (ret == -EFAULT) {
375                 goto out_trans_fault;
376             } else {
377                 goto out_succ;
378             }
379         }
380     }
381 
382     /**
383      * For other invalid declared/desired permission combinations,
384      * we check whether it's a trans fault actually here, if so we
385      * handle it. But if not, we could be sure that it's an access
386      * try with illegal permission, so we kill the faulting process.
387      */
388     ret = check_trans_fault(vmspace, fault_addr);
389     if (ret == -EFAULT) {
390         goto out_trans_fault;
391     } else {
392         goto out_illegal;
393     }
394 out_succ:
395     unlock(&vmspace->vmspace_lock);
396     return ret;
397 out_illegal:
398     // Illegal access permission, kill process
399     kinfo("handle_perm_fault failed: fault_addr=%p desired_perm=%lu\n",
400           fault_addr,
401           desired_perm);
402     dump_pgfault_error();
403     unlock(&vmspace->vmspace_lock);
404 #if defined(CHCORE_ARCH_AARCH64)
405     return -EPERM;
406 #else
407     sys_exit_group(-1);
408     return -1;
409 #endif
410 out_trans_fault:
411     /**
412      * It's possible for a permission fault to downgrade to a translation
413      * fault. For example, the page has been swapped out or unmapped
414      * concurrently before the permission fault is forwarded to here. We let
415      * actual permission fault handlers(e.g. do_cow) to return -EFAULT when
416      * they detect such cases, and treat it as a translation fault then
417      * handle it atomically.
418      */
419     unlock(&vmspace->vmspace_lock);
420     ret = handle_trans_fault(vmspace, fault_addr);
421     return ret;
422 }
423