• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU)
3  * Licensed under the Mulan PSL v2.
4  * You can use this software according to the terms and conditions of the Mulan PSL v2.
5  * You may obtain a copy of Mulan PSL v2 at:
6  *     http://license.coscl.org.cn/MulanPSL2
7  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
8  * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
9  * PURPOSE.
10  * See the Mulan PSL v2 for more details.
11  */
12 
13 #include "chcore/error.h"
14 #include "chcore/ipc.h"
15 #include <stddef.h>
16 #include <stdio.h>
17 #include <chcore/defs.h>
18 #include <chcore/memory.h>
19 #include <chcore/syscall.h>
20 #include <pthread.h>
21 #include <chcore/container/list.h>
22 #include <sys/mman.h>
23 #include <chcore-internal/fs_defs.h>
24 #include <chcore/type.h>
25 
26 #include "chcore_mman.h"
27 #include "fd.h"
28 
29 /*
30  * When a thread is created, it mmaps a chunk of memory for the thread stack and
31  * TLS, which is reclaimed when the thread exits (by calling unmapself).
32  * Unmapself is a piece of assembly code that jumps to munmap to reclaim
33  * associated memory.
34  *
35  * Linux implements mmap/munmap in kernel mode, so unmapself.s reclaims the
36  * associated memory directly through system calls. Becasue the user/kernel
37  * stacks of a thread are different. After the user stack is reclaimed in the
38  * kernel, the control flow can still return unmapself and make other system
39  * calls without using the thread stack.
40  *
41  * Since chcore mmap is implemented in user mode and relevant mmap information
42  * is recorded in user mode. When a thread exits, relevant data structures need
43  * to be recycled. Therefore, once the thread stack is released and relevant
44  * data structures are recycled through C code, it cannot return to unmapself
45  * again. And subsequent thread_exit system calls cannot be executed. Therefore,
46  * the user version of unmapself needs to maintain a common stack that can be
47  * used as a temporary thread stack when the thread exits.
48  */
49 
50 static bool __initial_common_stack_success = false;
51 
52 struct htable va2pmo;
53 /* For sequential access. */
54 static struct list_head pmo_node_head;
55 static pthread_spinlock_t va2pmo_lock;
56 
57 /* For unmapself. */
58 static cap_t common_stack_pmo_cap;
59 static vaddr_t common_stack_addr;
60 pthread_spinlock_t common_stack_lock;
61 
62 /* For initial once. */
63 pthread_once_t init_mmap_once = PTHREAD_ONCE_INIT;
64 pthread_once_t init_common_stack_once = PTHREAD_ONCE_INIT;
65 
initial_mmap(void)66 static void initial_mmap(void)
67 {
68     pthread_spin_init(&va2pmo_lock, 0);
69     init_htable(&va2pmo, HASH_TABLE_SIZE);
70     init_list_head(&pmo_node_head);
71 }
72 
initial_common_stack(void)73 static void initial_common_stack(void)
74 {
75     u64 prot;
76     vaddr_t stack_bottom_addr;
77     int ret = 0;
78 
79     pthread_spin_init(&common_stack_lock, 0);
80 
81     common_stack_pmo_cap = usys_create_pmo(UNMAPSELF_STACK_SIZE, PMO_ANONYM);
82     if (common_stack_pmo_cap < 0) {
83         printf("Error occur on create unmapself pmo\n");
84         ret = common_stack_pmo_cap;
85         goto fail_out;
86     }
87 
88     stack_bottom_addr = (vaddr_t)chcore_alloc_vaddr(UNMAPSELF_STACK_SIZE);
89     if (!stack_bottom_addr) {
90         printf("Error occur on alloc vaddr\n");
91         ret = -ENOMEM;
92         goto revoke_pmo_cap;
93     }
94 
95     /* Prepare the common stack for thread exiting. */
96     prot = PROT_READ | PROT_WRITE;
97     ret = usys_map_pmo(SELF_CAP, common_stack_pmo_cap, stack_bottom_addr, prot);
98     if (ret < 0) {
99         printf("Error occur on map stack of unmapself\n");
100         goto free_addr;
101     }
102     common_stack_addr = stack_bottom_addr + UNMAPSELF_STACK_SIZE;
103     __initial_common_stack_success = true;
104     return;
105 
106 free_addr:
107     chcore_free_vaddr(common_stack_addr, UNMAPSELF_STACK_SIZE);
108 revoke_pmo_cap:
109     usys_revoke_cap(common_stack_pmo_cap, false);
110 fail_out:
111     __initial_common_stack_success = false;
112 }
113 
new_pmo_node(cap_t cap,vaddr_t va,size_t length,int type,ipc_struct_t * _fs_ipc_struct)114 static struct pmo_node *new_pmo_node(cap_t cap, vaddr_t va, size_t length,
115                                      int type, ipc_struct_t *_fs_ipc_struct)
116 {
117     struct pmo_node *node;
118 
119     node = (struct pmo_node *)malloc(sizeof(struct pmo_node));
120     if (node == NULL) {
121         return NULL;
122     }
123     node->cap = cap;
124     node->va = va;
125     node->pmo_size = length;
126     node->type = type;
127     node->_fs_ipc_struct = _fs_ipc_struct;
128     init_hlist_node(&node->hash_node);
129     return node;
130 }
131 
free_pmo_node(struct pmo_node * node)132 static inline void free_pmo_node(struct pmo_node *node)
133 {
134     if (node) {
135         free(node);
136     }
137 }
138 
139 /* Find the first pmo which fits the condition. */
get_next_pmo_node(void * va,int length,struct pmo_node * start_pmo_node)140 static struct pmo_node *get_next_pmo_node(void *va, int length,
141                                           struct pmo_node *start_pmo_node)
142 {
143     struct hlist_head *buckets;
144     struct pmo_node *node = NULL;
145     struct list_head *start;
146 
147     if (!start_pmo_node) {
148         buckets = htable_get_bucket(&va2pmo, VA_TO_KEY(va));
149 
150         for_each_in_hlist (node, hash_node, buckets) {
151             if (node->va == (vaddr_t)va) {
152                 goto out;
153             }
154         }
155         start = &pmo_node_head;
156     } else {
157         start = &start_pmo_node->list_node;
158     }
159 
160     if (start->next == &pmo_node_head) {
161         goto fail;
162     }
163 
164     for_each_in_list (node, struct pmo_node, list_node, start) {
165         if (node->va >= (vaddr_t)va
166             && node->va + node->pmo_size <= (vaddr_t)va + length) {
167             goto out;
168         }
169     }
170 
171 fail:
172     node = NULL;
173 out:
174     return node;
175 }
176 
177 /* Insert the node to list in order of virtual addresses */
add_node_in_order(struct pmo_node * node)178 static void add_node_in_order(struct pmo_node *node)
179 {
180     struct pmo_node *temp;
181 
182     if (list_empty(&pmo_node_head)) {
183         list_add(&node->list_node, &pmo_node_head);
184         return;
185     }
186 
187     for_each_in_list (temp, struct pmo_node, list_node, &pmo_node_head) {
188         if ((u64)temp->va > (u64)node->va) {
189             list_add(&node->list_node, temp->list_node.prev);
190             return;
191         }
192     }
193     list_add(&node->list_node, pmo_node_head.prev);
194 }
195 
__is_overlapped_area(unsigned long start,size_t lenght)196 static bool __is_overlapped_area(unsigned long start, size_t lenght)
197 {
198     struct pmo_node *temp;
199     if (list_empty(&pmo_node_head)) {
200         return false;
201     }
202 
203     for_each_in_list (temp, struct pmo_node, list_node, &pmo_node_head) {
204         if ((unsigned long)temp->va < start + lenght
205             && temp->va + temp->pmo_size > start) {
206             return true;
207         }
208     }
209 
210     return false;
211 }
212 
chcore_mmap(void * start,size_t length,int prot,int flags,int fd,off_t off)213 void *chcore_mmap(void *start, size_t length, int prot, int flags, int fd,
214                   off_t off)
215 {
216     struct pmo_node *node;
217     void *map_addr = NULL;
218     cap_t pmo_cap;
219     int ret;
220     vmr_prop_t map_perm = prot;
221 
222     if (fd != -1) {
223         printf(
224             "%s: here only supports anonymous mapping with fd -1, but arg fd is %d\n",
225             __func__,
226             fd);
227         goto err_exit;
228     }
229 
230     /* Check @prot */
231     if (prot & PROT_CHECK_MASK) {
232         printf("%s: here cannot support PROT: %d\n", __func__, prot);
233         goto err_exit;
234     }
235 
236     /* Check @flags */
237     if (flags & (~(MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED_NOREPLACE))) {
238         printf("%s: here only supports anonymous and private mapping\n",
239                __func__);
240         goto err_exit;
241     }
242 
243     /* Round up @length */
244     if (length % PAGE_SIZE) {
245         length = ROUND_UP(length, PAGE_SIZE);
246     }
247 
248     /* Using VMR_COW for cow mapping to implement MAP_PRIVATE */
249     map_perm |= flags & MAP_PRIVATE ? VMR_COW : 0;
250 
251     pthread_once(&init_mmap_once, initial_mmap);
252 
253     pthread_spin_lock(&va2pmo_lock);
254     if (flags & MAP_FIXED_NOREPLACE) {
255         if (__is_overlapped_area((unsigned long)start, length)) {
256             map_addr = NULL;
257             errno = EEXIST;
258         } else {
259             map_addr = start;
260         }
261     } else {
262         map_addr = (void *)chcore_alloc_vaddr(length);
263     }
264 
265     if (map_addr == NULL) {
266         printf("Fail: allocate vaddr failed\n");
267         goto err_exit;
268     }
269 
270     /* pmo create */
271     pmo_cap = usys_create_pmo(length, PMO_ANONYM);
272     if (pmo_cap <= 0) {
273         printf("Fail: cannot create the new pmo for mmap\n");
274         goto err_free_addr;
275     }
276 
277     node = new_pmo_node(pmo_cap, (vaddr_t)map_addr, length, PMO_ANONYM, NULL);
278     if (node == NULL) {
279         goto err_free_pmo;
280     }
281 
282     htable_add(&va2pmo, VA_TO_KEY(map_addr), &node->hash_node);
283     add_node_in_order(node);
284     pthread_spin_unlock(&va2pmo_lock);
285 
286     /* map pmo */
287     if ((ret = usys_map_pmo(SELF_CAP, pmo_cap, (vaddr_t)map_addr, map_perm))
288         != 0) {
289         goto err_free_node;
290     }
291 
292     return map_addr;
293 
294 err_free_node:
295     htable_del(&node->hash_node);
296     list_del(&node->list_node);
297     free_pmo_node(node);
298 err_free_pmo:
299     usys_revoke_cap(pmo_cap, false);
300 err_free_addr:
301     chcore_free_vaddr((unsigned long)map_addr, length);
302 err_exit:
303     pthread_spin_unlock(&va2pmo_lock);
304     map_addr = (void *)(-1);
305     return map_addr;
306 }
307 
chcore_fmap(void * start,size_t length,int prot,int flags,int fd,off_t off)308 void *chcore_fmap(void *start, size_t length, int prot, int flags, int fd,
309                   off_t off)
310 {
311     struct pmo_node *node;
312     struct fd_record_extension *fd_ext;
313     struct fs_request *fr;
314     ipc_struct_t *_fs_ipc_struct;
315     cap_t fmap_pmo_cap;
316     ipc_msg_t *ipc_msg;
317     long ret;
318 
319     BUG_ON(fd_dic[fd] == 0);
320     /**
321      * One cap slot number to receive pmo_cap.
322      */
323     fd_ext = (struct fd_record_extension *)fd_dic[fd]->private_data;
324     _fs_ipc_struct = get_ipc_struct_by_mount_id(fd_ext->mount_id);
325     ipc_msg = ipc_create_msg(_fs_ipc_struct, sizeof(struct fs_request));
326 
327     fr = (struct fs_request *)ipc_get_msg_data(ipc_msg);
328 
329     /* Step: Allocate a mmap address in client user-level */
330     if (!start) {
331         start = (void *)chcore_alloc_vaddr(length /* length */);
332         if (!start) {
333             ipc_destroy_msg(ipc_msg);
334             return CHCORE_ERR_PTR(-ENOMEM);
335         }
336     }
337 
338     fr->req = FS_REQ_FMAP;
339     fr->mmap.addr = start;
340     fr->mmap.length = length;
341     fr->mmap.prot = prot;
342     fr->mmap.flags = flags;
343     fr->mmap.fd = fd;
344     fr->mmap.offset = off;
345 
346     ret = ipc_call(_fs_ipc_struct, ipc_msg);
347     if (ret < 0) {
348         return CHCORE_ERR_PTR(ret);
349     }
350 
351     BUG_ON(ipc_msg->cap_slot_number <= 0);
352 
353     fmap_pmo_cap = ipc_get_msg_cap(ipc_msg, 0);
354     ipc_destroy_msg(ipc_msg);
355 
356     /* Step: map pmo in addr */
357     vmr_prop_t perm;
358     perm = (prot & PROT_READ ? VMR_READ : 0)
359            | (prot & PROT_WRITE ? VMR_WRITE : 0)
360            | (prot & PROT_EXEC ? VMR_EXEC : 0);
361     if (flags & MAP_PRIVATE && perm & VMR_WRITE) {
362         perm &= (~VMR_WRITE);
363         perm |= VMR_COW;
364     }
365     ret = usys_map_pmo_with_length(fmap_pmo_cap, (vaddr_t)start, perm, length);
366     // ret = usys_map_pmo(SELF_CAP, fmap_pmo_cap, a, VM_READ
367     // | VM_WRITE);
368 
369     if (ret < 0) {
370         return CHCORE_ERR_PTR(ret);
371     }
372 
373     pthread_once(&init_mmap_once, initial_mmap);
374     pthread_spin_lock(&va2pmo_lock);
375     node = new_pmo_node(fmap_pmo_cap, (vaddr_t)start, length, PMO_FILE, _fs_ipc_struct);
376     htable_add(&va2pmo, VA_TO_KEY(start), &node->hash_node);
377     add_node_in_order(node);
378     pthread_spin_unlock(&va2pmo_lock);
379 
380     return start; /* Generated addr */
381 }
382 
chcore_munmap(void * start,size_t length)383 int chcore_munmap(void *start, size_t length)
384 {
385     cap_t pmo_cap;
386     int ret = 0;
387     size_t pmo_size;
388     vaddr_t addr;
389     vaddr_t end_addr;
390     struct pmo_node *node;
391     struct pmo_node *prev_node = NULL;
392     int type;
393     struct fd_record_extension *fd_ext;
394     struct fs_request *fr;
395     ipc_struct_t *_fs_ipc_struct;
396     ipc_msg_t *ipc_msg;
397 
398     if (((vaddr_t)start % PAGE_SIZE) || (length % PAGE_SIZE)) {
399         ret = -EINVAL;
400         return ret;
401     }
402 
403     if (length == 0) {
404         return 0;
405     }
406 
407     addr = (vaddr_t)start;
408     end_addr = (vaddr_t)start + length;
409     while (length != 0) {
410         pthread_spin_lock(&va2pmo_lock);
411         node = get_next_pmo_node((void *)addr, length, prev_node);
412         if (node == NULL) {
413             pthread_spin_unlock(&va2pmo_lock);
414             if (prev_node)
415                 free_pmo_node(prev_node);
416             return ret;
417         }
418 
419         pmo_cap = node->cap;
420         pmo_size = node->pmo_size;
421         addr = node->va;
422         type = node->type;
423         _fs_ipc_struct = node->_fs_ipc_struct;
424 
425         hlist_del(&node->hash_node);
426         list_del(&node->list_node);
427         if (prev_node)
428             free_pmo_node(prev_node);
429 
430         usys_unmap_pmo(SELF_CAP, pmo_cap, (vaddr_t)addr);
431         usys_revoke_cap(pmo_cap, false);
432         chcore_free_vaddr(addr, pmo_size);
433         pthread_spin_unlock(&va2pmo_lock);
434 
435         if (type == PMO_FILE) {
436             ipc_msg = ipc_create_msg(_fs_ipc_struct, sizeof(struct fs_request));
437             fr = (struct fs_request *)ipc_get_msg_data(ipc_msg);
438 
439             fr->req = FS_REQ_FUNMAP;
440             fr->munmap.addr = (void *)addr;
441             fr->munmap.length = pmo_size;
442             ret = ipc_call(_fs_ipc_struct, ipc_msg);
443             ipc_destroy_msg(ipc_msg);
444         }
445 
446         addr += pmo_size;
447         length = end_addr - addr;
448         prev_node = node;
449     }
450 
451     free_pmo_node(node);
452     return ret;
453 }
454 
455 /*
456  * Lock the common stack and return the top addr of the stack.
457  *
458  * If the thread unmap it's thread stack, it can not make any function call. So,
459  * we need to switch the thread stack to another stack and call some collection
460  * functions (including stack collection). Then, release the common stack lock
461  * before traping to the kernel.
462  *
463  * If the common stack initialization fails, the current thread stack cannot be
464  * released, so the system call of thread_exit is called directly, and the
465  * thread stack is released when the entire CAPgroup is reclaimed.
466  */
chcore_lock_common_stack(void)467 vaddr_t chcore_lock_common_stack(void)
468 {
469     pthread_once(&init_common_stack_once, initial_common_stack);
470     if (unlikely(!__initial_common_stack_success)) {
471         printf("init common stack failed!\n");
472         usys_exit(0);
473     }
474 
475     pthread_spin_lock(&common_stack_lock);
476     return common_stack_addr;
477 }
478 
479 /*
480  * Unmap the thread stack before thread exit.
481  * We must go back to unmapself.s and release the lock in assembly code.
482  * Otherwise, other threads may use the common stack after the current thread
483  * releases the lock, thereby breaking the return address of the current thread
484  * on the common stack.
485  */
chcore_unmapself(void * start,size_t length)486 vaddr_t chcore_unmapself(void *start, size_t length)
487 {
488     chcore_munmap(start, length);
489     return (vaddr_t)&common_stack_lock;
490 }
491