• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU)
3  * Licensed under the Mulan PSL v2.
4  * You can use this software according to the terms and conditions of the Mulan PSL v2.
5  * You may obtain a copy of Mulan PSL v2 at:
6  *     http://license.coscl.org.cn/MulanPSL2
7  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
8  * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
9  * PURPOSE.
10  * See the Mulan PSL v2 for more details.
11  */
12 #ifdef CHCORE_ENABLE_FMAP
13 #include <stddef.h>
14 /**
15  * User-level lib for handling user page fault in fmap
16  */
17 #include <time.h>
18 #include <pthread.h>
19 #include <malloc.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <chcore-internal/fs_debug.h>
23 #include <chcore/defs.h>
24 #include <chcore/ring_buffer.h>
25 #include <chcore/container/list.h>
26 #include <chcore/memory.h>
27 
28 #include "fs_page_fault.h"
29 #include "fs_page_cache.h"
30 #include "fs_wrapper_defs.h"
31 #include "fs_vnode.h"
32 
33 struct ring_buffer *fault_msg_buffer;
34 #define MAX_MSG_NUM 100
35 cap_t notific_cap;
36 struct list_head fmap_area_mappings;
37 pthread_rwlock_t fmap_area_lock;
38 
39 /**
40  * If page cache module is available,
41  *      use addr of page cache page first.
42  * Else,
43  *      use specific operation defined by under file system (eg. tmpfs)
44  * Return (vaddr_t)0 as error.
45  */
fs_wrapper_fmap_get_page_addr(struct fs_vnode * vnode,off_t offset)46 vaddr_t fs_wrapper_fmap_get_page_addr(struct fs_vnode *vnode, off_t offset)
47 {
48     vaddr_t page_buf;
49     off_t page_idx;
50 
51     pthread_rwlock_rdlock(&vnode->rwlock);
52 
53     assert(offset % PAGE_SIZE == 0);
54     if (offset >= ROUND_UP(vnode->size, PAGE_SIZE)) {
55         /* out-of-range */
56         pthread_rwlock_unlock(&vnode->rwlock);
57         return (vaddr_t)0;
58     }
59 
60     if (using_page_cache) {
61         page_idx = offset / PAGE_SIZE;
62         page_buf = (vaddr_t)page_cache_get_block_or_page(
63             vnode->page_cache, page_idx, -1, READ);
64     } else {
65         page_buf = server_ops.fmap_get_page_addr(vnode->private, offset);
66     }
67 
68     pthread_rwlock_unlock(&vnode->rwlock);
69     return (vaddr_t)page_buf;
70 }
71 
handle_one_fault(badge_t fault_badge,vaddr_t fault_va)72 static int handle_one_fault(badge_t fault_badge, vaddr_t fault_va)
73 {
74     vaddr_t server_page_addr;
75     size_t area_off;
76     struct fs_vnode *vnode;
77     off_t file_offset;
78     u64 flags;
79     vmr_prop_t prot, map_perm = 0;
80     bool copy = 0;
81     int ret;
82 
83     fs_debug_trace_fswrapper("badge=0x%x, va=0x%lx\n", fault_badge, fault_va);
84 
85     /* Find mapping area info */
86     ret = fmap_area_find(
87         fault_badge, fault_va, &area_off, &vnode, &file_offset, &flags, &prot);
88     if (ret < 0) {
89         fs_debug_error("ret = %d\n", ret);
90         BUG_ON("why a fault happened when not recorded\n");
91     }
92 
93     fs_debug_trace_fswrapper(
94         "fmap_area: area_off=0x%lx, file_off=0x%lx, flags=%ld\n",
95         area_off,
96         file_offset,
97         flags);
98 
99     /* Get a server address space page va for mapping client */
100     server_page_addr =
101         fs_wrapper_fmap_get_page_addr(vnode, file_offset + area_off);
102     if (!server_page_addr) {
103         /* The file offset is out-of-range */
104         fs_debug_warn("vnode->size=0x%lx, offset=0x%lx\n",
105                       vnode->size,
106                       file_offset + area_off);
107     }
108 
109     /* Handle flags */
110     if (flags & MAP_SHARED) {
111         copy = 0;
112         map_perm = prot;
113         if (!server_page_addr) {
114             /* The file offset is out-of-range */
115             fs_debug_warn("vnode->size=0x%lx, offset=0x%lx\n",
116                           vnode->size,
117                           file_offset + area_off);
118 
119             pthread_rwlock_wrlock(&vnode->rwlock);
120             ret = server_ops.ftruncate(vnode->private,
121                                        file_offset + area_off + PAGE_SIZE);
122             if (ret) {
123                 goto out_fail;
124             }
125             vnode->size = file_offset + area_off + PAGE_SIZE;
126             server_page_addr =
127                 fs_wrapper_fmap_get_page_addr(vnode, file_offset + area_off);
128             if (!server_page_addr) {
129                 goto out_fail;
130             }
131             pthread_rwlock_unlock(&vnode->rwlock);
132         }
133     } else if (flags & MAP_PRIVATE) {
134         copy = 0;
135         map_perm = VMR_READ | (prot & VMR_EXEC);
136         if (!server_page_addr) {
137             /* The file offset is out-of-range */
138             fs_debug_warn("vnode->size=0x%lx, offset=0x%lx\n",
139                           vnode->size,
140                           file_offset + area_off);
141         }
142     }
143 
144     /* Map client page table, and notify fault thread */
145     ret = usys_user_fault_map(
146         fault_badge, fault_va, server_page_addr, copy, map_perm);
147     if (ret < 0) {
148         BUG_ON("this call should always be success here\n");
149     }
150 
151     return 0;
152 out_fail:
153     pthread_rwlock_unlock(&vnode->rwlock);
154     return ret;
155 }
156 
user_fault_handler(void * args)157 void *user_fault_handler(void *args)
158 {
159     struct user_fault_msg msg;
160     int ret;
161 
162     usys_set_prio(0, 55);
163     sched_yield();
164 
165     while (1) {
166         usys_wait(notific_cap, 1 /* Block */, NULL);
167         while (get_one_msg(fault_msg_buffer, &msg)) {
168             fs_debug_trace_fswrapper(
169                 "fault_msg_slot: 0x%lx | 0x%lx | 0x%lx\n",
170                 (vaddr_t)fault_msg_buffer,
171                 (u64)msg,
172                 (vaddr_t)((void *)fault_msg_buffer + END_OFFSET));
173             /* Handle msg */
174             ret = handle_one_fault(msg.fault_badge, msg.fault_va);
175             if (ret) {
176                 fs_debug_error("ret = %d\n", ret);
177             }
178         }
179     }
180     return NULL;
181 }
182 
fs_page_fault_init(void)183 int fs_page_fault_init(void)
184 {
185     int ret;
186     pthread_t fh;
187 
188     /* Create a ring buffer to recieve kernel fault msg */
189     fault_msg_buffer =
190         new_ringbuffer(MAX_MSG_NUM, sizeof(struct user_fault_msg));
191     if (fault_msg_buffer == 0)
192         return -ENOMEM;
193 
194     /* Create a notification for fault handler */
195     notific_cap = usys_create_notifc();
196     if (notific_cap < 0)
197         return notific_cap;
198 
199     /* Register fmap_fault_pool in kernel using syscall */
200     ret = usys_user_fault_register(notific_cap, (vaddr_t)fault_msg_buffer);
201     if (ret < 0) {
202         free_ringbuffer(fault_msg_buffer);
203         return ret;
204     }
205 
206     /* Init fmap_area_mapping list */
207     init_list_head(&fmap_area_mappings);
208     pthread_rwlock_init(&fmap_area_lock, NULL);
209 
210     /* Create fault handler to do user-level page fault */
211     ret = pthread_create(&fh, NULL, user_fault_handler, NULL);
212     if (ret < 0) {
213         free_ringbuffer(fault_msg_buffer);
214         return ret;
215     }
216 
217     return 0;
218 }
219 
220 /**
221  * Helpers for fmap_area_mappings
222  */
223 
224 static struct fmap_area_mapping *
create_fmap_mapping(badge_t client_badge,vaddr_t client_va_start,size_t length,struct fs_vnode * vnode,off_t file_offset,u64 flags,vmr_prop_t prot)225 create_fmap_mapping(badge_t client_badge, vaddr_t client_va_start,
226                     size_t length, struct fs_vnode *vnode, off_t file_offset,
227                     u64 flags, vmr_prop_t prot)
228 {
229     struct fmap_area_mapping *mapping;
230 
231     mapping = (struct fmap_area_mapping *)malloc(sizeof(*mapping));
232     if (!mapping) {
233         return NULL;
234     }
235     mapping->client_badge = client_badge;
236     mapping->client_va_start = client_va_start;
237     mapping->length = length;
238     mapping->vnode = vnode;
239     inc_ref_fs_vnode(vnode);
240     mapping->file_offset = file_offset;
241     mapping->flags = flags;
242     mapping->prot = prot;
243 
244     return mapping;
245 }
246 
deinit_fmap_mapping(struct fmap_area_mapping * mapping)247 static void deinit_fmap_mapping(struct fmap_area_mapping *mapping)
248 {
249     dec_ref_fs_vnode(mapping->vnode);
250     free(mapping);
251 }
252 
__is_overlapped_area(struct fmap_area_mapping * new_mapping)253 static bool __is_overlapped_area(struct fmap_area_mapping *new_mapping)
254 {
255     struct fmap_area_mapping *iter;
256     if (list_empty(&fmap_area_mappings)) {
257         return false;
258     }
259 
260     for_each_in_list (
261         iter, struct fmap_area_mapping, node, &fmap_area_mappings) {
262         if (new_mapping->client_badge == iter->client_badge
263             /* StartA <= EndB */
264             && new_mapping->client_va_start
265                    < iter->client_va_start + iter->length
266             /* EndA >= StartB */
267             && new_mapping->client_va_start + new_mapping->length
268                    > iter->client_va_start) {
269             return true;
270         }
271     }
272 
273     return false;
274 }
275 
fmap_area_insert(badge_t client_badge,vaddr_t client_va_start,size_t length,struct fs_vnode * vnode,off_t file_offset,u64 flags,vmr_prop_t prot)276 int fmap_area_insert(badge_t client_badge, vaddr_t client_va_start,
277                      size_t length, struct fs_vnode *vnode, off_t file_offset,
278                      u64 flags, vmr_prop_t prot)
279 {
280     struct fmap_area_mapping *mapping = create_fmap_mapping(
281         client_badge, client_va_start, length, vnode, file_offset, flags, prot);
282     if (!mapping)
283         return -ENOMEM;
284 
285     fs_debug_trace_fswrapper(
286         "client_badge=0x%x, client_va_start=0x%lx, length=%ld\n"
287         "vnode->id=%ld, file_offset=%ld, flags=%ld\n",
288         client_badge,
289         client_va_start,
290         length,
291         vnode->vnode_id,
292         file_offset,
293         flags);
294     pthread_rwlock_wrlock(&fmap_area_lock);
295     if (__is_overlapped_area(mapping)) {
296         free(mapping);
297         pthread_rwlock_unlock(&fmap_area_lock);
298         return -EEXIST;
299     }
300     list_append(&mapping->node, &fmap_area_mappings);
301     pthread_rwlock_unlock(&fmap_area_lock);
302     return 0;
303 }
304 
305 /**
306  * [IN] client_badge, client_va
307  * [OUT] area_off, vnode, file_offset, prot
308  */
fmap_area_find(badge_t client_badge,vaddr_t client_va,size_t * area_off,struct fs_vnode ** vnode,off_t * file_offset,u64 * flags,vmr_prop_t * prot)309 int fmap_area_find(badge_t client_badge, vaddr_t client_va, size_t *area_off,
310                    struct fs_vnode **vnode, off_t *file_offset, u64 *flags,
311                    vmr_prop_t *prot)
312 {
313     struct fmap_area_mapping *area_iter;
314     pthread_rwlock_rdlock(&fmap_area_lock);
315     for_each_in_list (
316         area_iter, struct fmap_area_mapping, node, &fmap_area_mappings) {
317         if (area_iter->client_badge == client_badge
318             && (area_iter->client_va_start <= client_va)
319             && (area_iter->client_va_start + area_iter->length > client_va)) {
320             /* Hit */
321             *area_off = client_va - area_iter->client_va_start;
322             *vnode = area_iter->vnode;
323             *file_offset = area_iter->file_offset;
324             *flags = area_iter->flags;
325             *prot = area_iter->prot;
326             pthread_rwlock_unlock(&fmap_area_lock);
327             return 0;
328         }
329     }
330     pthread_rwlock_unlock(&fmap_area_lock);
331 
332     return -1; /* Not Found */
333 }
334 
fmap_area_remove(badge_t client_badge,vaddr_t client_va_start,size_t length)335 int fmap_area_remove(badge_t client_badge, vaddr_t client_va_start,
336                      size_t length)
337 {
338     int ret = -EINVAL;
339     struct fmap_area_mapping *area_iter, *tmp;
340     pthread_rwlock_rdlock(&fmap_area_lock);
341     for_each_in_list_safe (area_iter, tmp, node, &fmap_area_mappings) {
342         if (area_iter->client_badge == client_badge
343             && (area_iter->client_va_start == client_va_start)
344             && (area_iter->length == length)) {
345             list_del(&area_iter->node);
346             deinit_fmap_mapping(area_iter);
347             ret = 0;
348             break;
349         }
350     }
351     pthread_rwlock_unlock(&fmap_area_lock);
352 
353     return ret;
354 }
355 
fmap_area_recycle(badge_t client_badge)356 void fmap_area_recycle(badge_t client_badge)
357 {
358     struct fmap_area_mapping *area_iter, *tmp;
359     pthread_rwlock_rdlock(&fmap_area_lock);
360     for_each_in_list_safe (area_iter, tmp, node, &fmap_area_mappings) {
361         if (area_iter->client_badge == client_badge) {
362             list_del(&area_iter->node);
363             deinit_fmap_mapping(area_iter);
364         }
365     }
366     pthread_rwlock_unlock(&fmap_area_lock);
367 }
368 
369 #endif
370