1 /*
2 * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU)
3 * Licensed under the Mulan PSL v2.
4 * You can use this software according to the terms and conditions of the Mulan PSL v2.
5 * You may obtain a copy of Mulan PSL v2 at:
6 * http://license.coscl.org.cn/MulanPSL2
7 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
8 * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
9 * PURPOSE.
10 * See the Mulan PSL v2 for more details.
11 */
12
13 #include "chcore/error.h"
14 #include "chcore/ipc.h"
15 #include <stddef.h>
16 #include <stdio.h>
17 #include <chcore/defs.h>
18 #include <chcore/memory.h>
19 #include <chcore/syscall.h>
20 #include <pthread.h>
21 #include <chcore/container/list.h>
22 #include <sys/mman.h>
23 #include <chcore-internal/fs_defs.h>
24 #include <chcore/type.h>
25
26 #include "chcore_mman.h"
27 #include "fd.h"
28
29 /*
30 * When a thread is created, it mmaps a chunk of memory for the thread stack and
31 * TLS, which is reclaimed when the thread exits (by calling unmapself).
32 * Unmapself is a piece of assembly code that jumps to munmap to reclaim
33 * associated memory.
34 *
35 * Linux implements mmap/munmap in kernel mode, so unmapself.s reclaims the
36 * associated memory directly through system calls. Becasue the user/kernel
37 * stacks of a thread are different. After the user stack is reclaimed in the
38 * kernel, the control flow can still return unmapself and make other system
39 * calls without using the thread stack.
40 *
41 * Since chcore mmap is implemented in user mode and relevant mmap information
42 * is recorded in user mode. When a thread exits, relevant data structures need
43 * to be recycled. Therefore, once the thread stack is released and relevant
44 * data structures are recycled through C code, it cannot return to unmapself
45 * again. And subsequent thread_exit system calls cannot be executed. Therefore,
46 * the user version of unmapself needs to maintain a common stack that can be
47 * used as a temporary thread stack when the thread exits.
48 */
49
50 static bool __initial_common_stack_success = false;
51
52 struct htable va2pmo;
53 /* For sequential access. */
54 static struct list_head pmo_node_head;
55 static pthread_spinlock_t va2pmo_lock;
56
57 /* For unmapself. */
58 static cap_t common_stack_pmo_cap;
59 static vaddr_t common_stack_addr;
60 pthread_spinlock_t common_stack_lock;
61
62 /* For initial once. */
63 pthread_once_t init_mmap_once = PTHREAD_ONCE_INIT;
64 pthread_once_t init_common_stack_once = PTHREAD_ONCE_INIT;
65
initial_mmap(void)66 static void initial_mmap(void)
67 {
68 pthread_spin_init(&va2pmo_lock, 0);
69 init_htable(&va2pmo, HASH_TABLE_SIZE);
70 init_list_head(&pmo_node_head);
71 }
72
initial_common_stack(void)73 static void initial_common_stack(void)
74 {
75 u64 prot;
76 vaddr_t stack_bottom_addr;
77 int ret = 0;
78
79 pthread_spin_init(&common_stack_lock, 0);
80
81 common_stack_pmo_cap = usys_create_pmo(UNMAPSELF_STACK_SIZE, PMO_ANONYM);
82 if (common_stack_pmo_cap < 0) {
83 printf("Error occur on create unmapself pmo\n");
84 ret = common_stack_pmo_cap;
85 goto fail_out;
86 }
87
88 stack_bottom_addr = (vaddr_t)chcore_alloc_vaddr(UNMAPSELF_STACK_SIZE);
89 if (!stack_bottom_addr) {
90 printf("Error occur on alloc vaddr\n");
91 ret = -ENOMEM;
92 goto revoke_pmo_cap;
93 }
94
95 /* Prepare the common stack for thread exiting. */
96 prot = PROT_READ | PROT_WRITE;
97 ret = usys_map_pmo(SELF_CAP, common_stack_pmo_cap, stack_bottom_addr, prot);
98 if (ret < 0) {
99 printf("Error occur on map stack of unmapself\n");
100 goto free_addr;
101 }
102 common_stack_addr = stack_bottom_addr + UNMAPSELF_STACK_SIZE;
103 __initial_common_stack_success = true;
104 return;
105
106 free_addr:
107 chcore_free_vaddr(common_stack_addr, UNMAPSELF_STACK_SIZE);
108 revoke_pmo_cap:
109 usys_revoke_cap(common_stack_pmo_cap, false);
110 fail_out:
111 __initial_common_stack_success = false;
112 }
113
new_pmo_node(cap_t cap,vaddr_t va,size_t length,int type,ipc_struct_t * _fs_ipc_struct)114 static struct pmo_node *new_pmo_node(cap_t cap, vaddr_t va, size_t length,
115 int type, ipc_struct_t *_fs_ipc_struct)
116 {
117 struct pmo_node *node;
118
119 node = (struct pmo_node *)malloc(sizeof(struct pmo_node));
120 if (node == NULL) {
121 return NULL;
122 }
123 node->cap = cap;
124 node->va = va;
125 node->pmo_size = length;
126 node->type = type;
127 node->_fs_ipc_struct = _fs_ipc_struct;
128 init_hlist_node(&node->hash_node);
129 return node;
130 }
131
free_pmo_node(struct pmo_node * node)132 static inline void free_pmo_node(struct pmo_node *node)
133 {
134 if (node) {
135 free(node);
136 }
137 }
138
139 /* Find the first pmo which fits the condition. */
get_next_pmo_node(void * va,int length,struct pmo_node * start_pmo_node)140 static struct pmo_node *get_next_pmo_node(void *va, int length,
141 struct pmo_node *start_pmo_node)
142 {
143 struct hlist_head *buckets;
144 struct pmo_node *node = NULL;
145 struct list_head *start;
146
147 if (!start_pmo_node) {
148 buckets = htable_get_bucket(&va2pmo, VA_TO_KEY(va));
149
150 for_each_in_hlist (node, hash_node, buckets) {
151 if (node->va == (vaddr_t)va) {
152 goto out;
153 }
154 }
155 start = &pmo_node_head;
156 } else {
157 start = &start_pmo_node->list_node;
158 }
159
160 if (start->next == &pmo_node_head) {
161 goto fail;
162 }
163
164 for_each_in_list (node, struct pmo_node, list_node, start) {
165 if (node->va >= (vaddr_t)va
166 && node->va + node->pmo_size <= (vaddr_t)va + length) {
167 goto out;
168 }
169 }
170
171 fail:
172 node = NULL;
173 out:
174 return node;
175 }
176
177 /* Insert the node to list in order of virtual addresses */
add_node_in_order(struct pmo_node * node)178 static void add_node_in_order(struct pmo_node *node)
179 {
180 struct pmo_node *temp;
181
182 if (list_empty(&pmo_node_head)) {
183 list_add(&node->list_node, &pmo_node_head);
184 return;
185 }
186
187 for_each_in_list (temp, struct pmo_node, list_node, &pmo_node_head) {
188 if ((u64)temp->va > (u64)node->va) {
189 list_add(&node->list_node, temp->list_node.prev);
190 return;
191 }
192 }
193 list_add(&node->list_node, pmo_node_head.prev);
194 }
195
__is_overlapped_area(unsigned long start,size_t lenght)196 static bool __is_overlapped_area(unsigned long start, size_t lenght)
197 {
198 struct pmo_node *temp;
199 if (list_empty(&pmo_node_head)) {
200 return false;
201 }
202
203 for_each_in_list (temp, struct pmo_node, list_node, &pmo_node_head) {
204 if ((unsigned long)temp->va < start + lenght
205 && temp->va + temp->pmo_size > start) {
206 return true;
207 }
208 }
209
210 return false;
211 }
212
chcore_mmap(void * start,size_t length,int prot,int flags,int fd,off_t off)213 void *chcore_mmap(void *start, size_t length, int prot, int flags, int fd,
214 off_t off)
215 {
216 struct pmo_node *node;
217 void *map_addr = NULL;
218 cap_t pmo_cap;
219 int ret;
220 vmr_prop_t map_perm = prot;
221
222 if (fd != -1) {
223 printf(
224 "%s: here only supports anonymous mapping with fd -1, but arg fd is %d\n",
225 __func__,
226 fd);
227 goto err_exit;
228 }
229
230 /* Check @prot */
231 if (prot & PROT_CHECK_MASK) {
232 printf("%s: here cannot support PROT: %d\n", __func__, prot);
233 goto err_exit;
234 }
235
236 /* Check @flags */
237 if (flags & (~(MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED_NOREPLACE))) {
238 printf("%s: here only supports anonymous and private mapping\n",
239 __func__);
240 goto err_exit;
241 }
242
243 /* Round up @length */
244 if (length % PAGE_SIZE) {
245 length = ROUND_UP(length, PAGE_SIZE);
246 }
247
248 /* Using VMR_COW for cow mapping to implement MAP_PRIVATE */
249 map_perm |= flags & MAP_PRIVATE ? VMR_COW : 0;
250
251 pthread_once(&init_mmap_once, initial_mmap);
252
253 pthread_spin_lock(&va2pmo_lock);
254 if (flags & MAP_FIXED_NOREPLACE) {
255 if (__is_overlapped_area((unsigned long)start, length)) {
256 map_addr = NULL;
257 errno = EEXIST;
258 } else {
259 map_addr = start;
260 }
261 } else {
262 map_addr = (void *)chcore_alloc_vaddr(length);
263 }
264
265 if (map_addr == NULL) {
266 printf("Fail: allocate vaddr failed\n");
267 goto err_exit;
268 }
269
270 /* pmo create */
271 pmo_cap = usys_create_pmo(length, PMO_ANONYM);
272 if (pmo_cap <= 0) {
273 printf("Fail: cannot create the new pmo for mmap\n");
274 goto err_free_addr;
275 }
276
277 node = new_pmo_node(pmo_cap, (vaddr_t)map_addr, length, PMO_ANONYM, NULL);
278 if (node == NULL) {
279 goto err_free_pmo;
280 }
281
282 htable_add(&va2pmo, VA_TO_KEY(map_addr), &node->hash_node);
283 add_node_in_order(node);
284 pthread_spin_unlock(&va2pmo_lock);
285
286 /* map pmo */
287 if ((ret = usys_map_pmo(SELF_CAP, pmo_cap, (vaddr_t)map_addr, map_perm))
288 != 0) {
289 goto err_free_node;
290 }
291
292 return map_addr;
293
294 err_free_node:
295 htable_del(&node->hash_node);
296 list_del(&node->list_node);
297 free_pmo_node(node);
298 err_free_pmo:
299 usys_revoke_cap(pmo_cap, false);
300 err_free_addr:
301 chcore_free_vaddr((unsigned long)map_addr, length);
302 err_exit:
303 pthread_spin_unlock(&va2pmo_lock);
304 map_addr = (void *)(-1);
305 return map_addr;
306 }
307
chcore_fmap(void * start,size_t length,int prot,int flags,int fd,off_t off)308 void *chcore_fmap(void *start, size_t length, int prot, int flags, int fd,
309 off_t off)
310 {
311 struct pmo_node *node;
312 struct fd_record_extension *fd_ext;
313 struct fs_request *fr;
314 ipc_struct_t *_fs_ipc_struct;
315 cap_t fmap_pmo_cap;
316 ipc_msg_t *ipc_msg;
317 long ret;
318
319 BUG_ON(fd_dic[fd] == 0);
320 /**
321 * One cap slot number to receive pmo_cap.
322 */
323 fd_ext = (struct fd_record_extension *)fd_dic[fd]->private_data;
324 _fs_ipc_struct = get_ipc_struct_by_mount_id(fd_ext->mount_id);
325 ipc_msg = ipc_create_msg(_fs_ipc_struct, sizeof(struct fs_request));
326
327 fr = (struct fs_request *)ipc_get_msg_data(ipc_msg);
328
329 /* Step: Allocate a mmap address in client user-level */
330 if (!start) {
331 start = (void *)chcore_alloc_vaddr(length /* length */);
332 if (!start) {
333 ipc_destroy_msg(ipc_msg);
334 return CHCORE_ERR_PTR(-ENOMEM);
335 }
336 }
337
338 fr->req = FS_REQ_FMAP;
339 fr->mmap.addr = start;
340 fr->mmap.length = length;
341 fr->mmap.prot = prot;
342 fr->mmap.flags = flags;
343 fr->mmap.fd = fd;
344 fr->mmap.offset = off;
345
346 ret = ipc_call(_fs_ipc_struct, ipc_msg);
347 if (ret < 0) {
348 return CHCORE_ERR_PTR(ret);
349 }
350
351 BUG_ON(ipc_msg->cap_slot_number <= 0);
352
353 fmap_pmo_cap = ipc_get_msg_cap(ipc_msg, 0);
354 ipc_destroy_msg(ipc_msg);
355
356 /* Step: map pmo in addr */
357 vmr_prop_t perm;
358 perm = (prot & PROT_READ ? VMR_READ : 0)
359 | (prot & PROT_WRITE ? VMR_WRITE : 0)
360 | (prot & PROT_EXEC ? VMR_EXEC : 0);
361 if (flags & MAP_PRIVATE && perm & VMR_WRITE) {
362 perm &= (~VMR_WRITE);
363 perm |= VMR_COW;
364 }
365 ret = usys_map_pmo_with_length(fmap_pmo_cap, (vaddr_t)start, perm, length);
366 // ret = usys_map_pmo(SELF_CAP, fmap_pmo_cap, a, VM_READ
367 // | VM_WRITE);
368
369 if (ret < 0) {
370 return CHCORE_ERR_PTR(ret);
371 }
372
373 pthread_once(&init_mmap_once, initial_mmap);
374 pthread_spin_lock(&va2pmo_lock);
375 node = new_pmo_node(fmap_pmo_cap, (vaddr_t)start, length, PMO_FILE, _fs_ipc_struct);
376 htable_add(&va2pmo, VA_TO_KEY(start), &node->hash_node);
377 add_node_in_order(node);
378 pthread_spin_unlock(&va2pmo_lock);
379
380 return start; /* Generated addr */
381 }
382
chcore_munmap(void * start,size_t length)383 int chcore_munmap(void *start, size_t length)
384 {
385 cap_t pmo_cap;
386 int ret = 0;
387 size_t pmo_size;
388 vaddr_t addr;
389 vaddr_t end_addr;
390 struct pmo_node *node;
391 struct pmo_node *prev_node = NULL;
392 int type;
393 struct fd_record_extension *fd_ext;
394 struct fs_request *fr;
395 ipc_struct_t *_fs_ipc_struct;
396 ipc_msg_t *ipc_msg;
397
398 if (((vaddr_t)start % PAGE_SIZE) || (length % PAGE_SIZE)) {
399 ret = -EINVAL;
400 return ret;
401 }
402
403 if (length == 0) {
404 return 0;
405 }
406
407 addr = (vaddr_t)start;
408 end_addr = (vaddr_t)start + length;
409 while (length != 0) {
410 pthread_spin_lock(&va2pmo_lock);
411 node = get_next_pmo_node((void *)addr, length, prev_node);
412 if (node == NULL) {
413 pthread_spin_unlock(&va2pmo_lock);
414 if (prev_node)
415 free_pmo_node(prev_node);
416 return ret;
417 }
418
419 pmo_cap = node->cap;
420 pmo_size = node->pmo_size;
421 addr = node->va;
422 type = node->type;
423 _fs_ipc_struct = node->_fs_ipc_struct;
424
425 hlist_del(&node->hash_node);
426 list_del(&node->list_node);
427 if (prev_node)
428 free_pmo_node(prev_node);
429
430 usys_unmap_pmo(SELF_CAP, pmo_cap, (vaddr_t)addr);
431 usys_revoke_cap(pmo_cap, false);
432 chcore_free_vaddr(addr, pmo_size);
433 pthread_spin_unlock(&va2pmo_lock);
434
435 if (type == PMO_FILE) {
436 ipc_msg = ipc_create_msg(_fs_ipc_struct, sizeof(struct fs_request));
437 fr = (struct fs_request *)ipc_get_msg_data(ipc_msg);
438
439 fr->req = FS_REQ_FUNMAP;
440 fr->munmap.addr = (void *)addr;
441 fr->munmap.length = pmo_size;
442 ret = ipc_call(_fs_ipc_struct, ipc_msg);
443 ipc_destroy_msg(ipc_msg);
444 }
445
446 addr += pmo_size;
447 length = end_addr - addr;
448 prev_node = node;
449 }
450
451 free_pmo_node(node);
452 return ret;
453 }
454
455 /*
456 * Lock the common stack and return the top addr of the stack.
457 *
458 * If the thread unmap it's thread stack, it can not make any function call. So,
459 * we need to switch the thread stack to another stack and call some collection
460 * functions (including stack collection). Then, release the common stack lock
461 * before traping to the kernel.
462 *
463 * If the common stack initialization fails, the current thread stack cannot be
464 * released, so the system call of thread_exit is called directly, and the
465 * thread stack is released when the entire CAPgroup is reclaimed.
466 */
chcore_lock_common_stack(void)467 vaddr_t chcore_lock_common_stack(void)
468 {
469 pthread_once(&init_common_stack_once, initial_common_stack);
470 if (unlikely(!__initial_common_stack_success)) {
471 printf("init common stack failed!\n");
472 usys_exit(0);
473 }
474
475 pthread_spin_lock(&common_stack_lock);
476 return common_stack_addr;
477 }
478
479 /*
480 * Unmap the thread stack before thread exit.
481 * We must go back to unmapself.s and release the lock in assembly code.
482 * Otherwise, other threads may use the common stack after the current thread
483 * releases the lock, thereby breaking the return address of the current thread
484 * on the common stack.
485 */
chcore_unmapself(void * start,size_t length)486 vaddr_t chcore_unmapself(void *start, size_t length)
487 {
488 chcore_munmap(start, length);
489 return (vaddr_t)&common_stack_lock;
490 }
491