1 /*
2 * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU)
3 * Licensed under the Mulan PSL v2.
4 * You can use this software according to the terms and conditions of the Mulan PSL v2.
5 * You may obtain a copy of Mulan PSL v2 at:
6 * http://license.coscl.org.cn/MulanPSL2
7 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
8 * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
9 * PURPOSE.
10 * See the Mulan PSL v2 for more details.
11 */
12 #include "liblaunch.h"
13 #include <chcore/type.h>
14 #include <chcore/defs.h>
15 #include <chcore/syscall.h>
16 #include <chcore/thread.h>
17 #include <chcore/pmo.h>
18 #include <assert.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <malloc.h>
22
23 /* An example to launch an (libc) application process */
24
25 #define AT_NULL 0 /* end of vector */
26 #define AT_IGNORE 1 /* entry should be ignored */
27 #define AT_EXECFD 2 /* file descriptor of program */
28 #define AT_PHDR 3 /* program headers for program */
29 #define AT_PHENT 4 /* size of program header entry */
30 #define AT_PHNUM 5 /* number of program headers */
31 #define AT_PAGESZ 6 /* system page size */
32 #define AT_BASE 7 /* base address of interpreter */
33 #define AT_FLAGS 8 /* flags */
34 #define AT_ENTRY 9 /* entry point of program */
35 #define AT_NOTELF 10 /* program is not ELF */
36 #define AT_UID 11 /* real uid */
37 #define AT_EUID 12 /* effective uid */
38 #define AT_GID 13 /* real gid */
39 #define AT_EGID 14 /* effective gid */
40 #define AT_PLATFORM 15 /* string identifying CPU for optimizations */
41 #define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
42 #define AT_CLKTCK 17 /* frequency at which times() increments */
43 /* AT_* values 18 through 22 are reserved */
44 #define AT_SECURE 23 /* secure mode boolean */
45 #define AT_BASE_PLATFORM \
46 24 /* string identifying real platform, may \
47 * differ from AT_PLATFORM. */
48 #define AT_RANDOM 25 /* address of 16 random bytes */
49 #define AT_HWCAP2 26 /* extension of AT_HWCAP */
50
51 #define AT_EXECFN 31 /* filename of program */
52
53 #define INIT_ENV_SIZE PAGE_SIZE
54 #define FAKE_RANDOM_OFF 64
55
56 const char PLAT[] = "aarch64";
57
58 /* enviroments */
59 #define SET_LD_LIB_PATH
60 /* A user can mount external storage on /user/lib for dynamic libraries. */
61 const char LD_LIB_PATH[] = "LD_LIBRARY_PATH=/:/lib:/user/lib";
62
63 struct env_buf {
64 unsigned long *entries_tail;
65 unsigned long *entries_end;
66 char *strings_tail;
67 char *strings_end;
68 unsigned long strings_offset;
69 };
70
env_buf_append_int(struct env_buf * env_buf,unsigned long val)71 static void env_buf_append_int(struct env_buf *env_buf, unsigned long val)
72 {
73 assert(env_buf->entries_tail < env_buf->entries_end);
74 *(env_buf->entries_tail++) = val;
75 }
76
env_buf_append_str(struct env_buf * env_buf,const char * val)77 static void env_buf_append_str(struct env_buf *env_buf, const char *val)
78 {
79 size_t val_size = strlen(val) + 1;
80
81 assert(env_buf->strings_tail + val_size <= env_buf->strings_end);
82 strlcpy(env_buf->strings_tail, val, val_size);
83 env_buf_append_int(env_buf,
84 (unsigned long)env_buf->strings_tail
85 + env_buf->strings_offset);
86 env_buf->strings_tail += val_size;
87 }
88
env_buf_append_int_auxv(struct env_buf * env_buf,unsigned long type,unsigned long val)89 static void env_buf_append_int_auxv(struct env_buf *env_buf, unsigned long type,
90 unsigned long val)
91 {
92 env_buf_append_int(env_buf, type);
93 env_buf_append_int(env_buf, val);
94 }
95
env_buf_append_str_auxv(struct env_buf * env_buf,unsigned long type,const char * val)96 static void env_buf_append_str_auxv(struct env_buf *env_buf, unsigned long type,
97 const char *val)
98 {
99 env_buf_append_int(env_buf, type);
100 env_buf_append_str(env_buf, val);
101 }
102
103 #define FAKE_UGID 1000
104 #define FAKE_CLKTLK 100
105 #define FAKE_RANDOM_OFF 64
106
107 /**
108 * NOTE: The stack format:
109 * http://articles.manugarg.com/aboutelfauxiliaryvectors.html
110 * The url shows a 32-bit format stack, but we implemented a 64-bit stack below.
111 * People as smart as you could understand the difference.
112 */
construct_init_env(char * env,vaddr_t top_vaddr,struct process_metadata * meta,char * name,struct pmo_map_request * pmo_map_reqs,int nr_pmo_map_reqs,cap_t caps[],int nr_caps,int argc,char ** argv,int envp_argc,char ** envp_argv,int pid,unsigned long load_offset)113 static void construct_init_env(char *env, vaddr_t top_vaddr,
114 struct process_metadata *meta, char *name,
115 struct pmo_map_request *pmo_map_reqs,
116 int nr_pmo_map_reqs, cap_t caps[], int nr_caps,
117 int argc, char **argv, int envp_argc,
118 char **envp_argv, int pid,
119 unsigned long load_offset)
120 {
121 int i;
122 struct env_buf env_buf;
123 char pidstr[16];
124
125 memset(env, 0, INIT_ENV_SIZE);
126
127 /* The strings part starts from the middle of the page. */
128 env_buf.entries_tail = (unsigned long *)env;
129 env_buf.entries_end = (unsigned long *)(env + INIT_ENV_SIZE / 2);
130 env_buf.strings_tail = (char *)env_buf.entries_end;
131 env_buf.strings_end = env + INIT_ENV_SIZE;
132 env_buf.strings_offset = top_vaddr - INIT_ENV_SIZE - (unsigned long)env;
133
134 /* argc, argv */
135 env_buf_append_int(&env_buf, argc);
136
137 for (i = 0; i < argc; i++) {
138 env_buf_append_str(&env_buf, argv[i]);
139 }
140
141 env_buf_append_int(&env_buf, 0);
142
143 /* envp */
144 env_buf_append_str(&env_buf, LD_LIB_PATH);
145 snprintf(pidstr, 15, "PID=%d\n", pid);
146 env_buf_append_str(&env_buf, pidstr);
147
148 for (i = 0; i < envp_argc; i++) {
149 env_buf_append_str(&env_buf, envp_argv[i]);
150 }
151
152 env_buf_append_int(&env_buf, 0);
153
154 /* auxv */
155 env_buf_append_int_auxv(&env_buf, AT_SECURE, 0);
156 env_buf_append_int_auxv(&env_buf, AT_PAGESZ, PAGE_SIZE);
157 env_buf_append_int_auxv(&env_buf, AT_PHDR, meta->phdr_addr + load_offset);
158 env_buf_append_int_auxv(&env_buf, AT_PHENT, meta->phentsize);
159 env_buf_append_int_auxv(&env_buf, AT_PHNUM, meta->phnum);
160 env_buf_append_int_auxv(&env_buf, AT_FLAGS, meta->flags);
161 env_buf_append_int_auxv(&env_buf, AT_ENTRY, meta->entry + load_offset);
162 env_buf_append_int_auxv(&env_buf, AT_UID, FAKE_UGID);
163 env_buf_append_int_auxv(&env_buf, AT_EUID, FAKE_UGID);
164 env_buf_append_int_auxv(&env_buf, AT_GID, FAKE_UGID);
165 env_buf_append_int_auxv(&env_buf, AT_EGID, FAKE_UGID);
166 env_buf_append_int_auxv(&env_buf, AT_CLKTCK, FAKE_CLKTLK);
167 env_buf_append_int_auxv(&env_buf, AT_HWCAP, 0);
168 env_buf_append_str_auxv(&env_buf, AT_PLATFORM, PLAT);
169 env_buf_append_int_auxv(&env_buf, AT_RANDOM, top_vaddr - FAKE_RANDOM_OFF);
170 env_buf_append_int_auxv(&env_buf, AT_BASE, 0);
171 env_buf_append_int_auxv(
172 &env_buf, AT_CHCORE_PMO_CNT, nr_pmo_map_reqs ?: ENVP_NONE_PMOS);
173 for (i = 0; i < nr_pmo_map_reqs; i++) {
174 env_buf_append_int_auxv(
175 &env_buf, AT_CHCORE_PMO_MAP_ADDR, pmo_map_reqs[i].addr);
176 }
177
178 env_buf_append_int_auxv(
179 &env_buf, AT_CHCORE_CAP_CNT, nr_caps ?: ENVP_NONE_CAPS);
180 for (i = 0; i < nr_caps; i++) {
181 env_buf_append_int_auxv(&env_buf, AT_CHCORE_CAP_VAL, caps[i]);
182 }
183 env_buf_append_int_auxv(&env_buf, AT_NULL, 0);
184 }
185
186 /* A wrapper of usys_create_pmo. */
create_pmos(struct pmo_request * req,int cnt)187 static void create_pmos(struct pmo_request *req, int cnt)
188 {
189 for (int i = 0; i < cnt; ++i) {
190 req[i].ret_cap = usys_create_pmo(req[i].size, req[i].type);
191 }
192 }
193
194 /* A wrapper of usys_map_pmo. */
map_pmos(cap_t target_process_cap,struct pmo_map_request * pmos_map_requests,int cnt)195 static int map_pmos(cap_t target_process_cap,
196 struct pmo_map_request *pmos_map_requests, int cnt)
197 {
198 int ret = 0;
199 for (int i = 0; i < cnt; ++i) {
200 struct pmo_map_request *req = &pmos_map_requests[i];
201 req->ret = usys_map_pmo(
202 target_process_cap, req->pmo_cap, req->addr, req->perm);
203 if (req->ret < 0)
204 ret = req->ret;
205 }
206 return ret;
207 }
208
209 /** All processes have 2 pmo for stack and stack overflow detection */
210 #define DEFAULT_PMO_CNT (2)
211
212 /**
213 * @brief Launch a new process in ChCore. The new process would execute
214 * programs in the given ELF file.
215 *
216 * As a microkernel, ChCore does not have the concept of "process" actually.
217 * So we have to build a traditional "process" step by step in the userspace.
218 *
219 * To build a process, we have to perform following steps:
220 * Step-1: Create cap group of the new process. In ChCore, all system resources
221 * used by a process are represented using capabilities, so a process is a
222 * collection of capabilities, i.e. cap group, from where the kernel stands.
223 * Step-2: Transfer all capabilities specified in args to the new cap group.
224 * Step-3: Create pmo of the stack, and a forbidden pmo to detect stack
225 * overflow. Step-4: Construct auxiliary vector on the stack. Step-5: Map the
226 * two pmos and pmos of segments in the ELF file to construct the virtual memory
227 * space of the new process. Step-6: Map custom pmos specified in args into the
228 * new process. Step-7: Create the main/root thread of the new process. At this
229 * point, all hardware state and system resources required by the new process is
230 * ready, and the new thread would be scheduled by the kernel later.
231 *
232 * @param lp_args [In] see documentation of struct launch_process_args.
233 * @return 0 on success, otherwise an error code. All memory resources
234 * allocated by this function would be freed on error.
235 */
launch_process_with_pmos_caps(struct launch_process_args * lp_args)236 int launch_process_with_pmos_caps(struct launch_process_args *lp_args)
237 {
238 cap_t new_process_cap;
239 cap_t main_thread_cap;
240 int ret;
241 vaddr_t pc;
242 /* for creating pmos */
243 struct pmo_request pmo_requests[DEFAULT_PMO_CNT];
244 cap_t main_stack_cap;
245 cap_t forbid_area_cap;
246 u64 offset;
247 u64 stack_top;
248 int i;
249 /* for mapping pmos */
250 struct pmo_map_request *seg_pmos_map_requests;
251 cap_t *transfer_caps = NULL;
252 vaddr_t load_offset = lp_args->load_offset;
253 char init_env[INIT_ENV_SIZE];
254
255 /*
256 * user_elf: elf struct
257 * child_process_cap: if not NULL, set to child_process_cap that can be
258 * used in current process.
259 *
260 * child_main_thread_cap: if not NULL, set to child_main_thread_cap
261 * that can be used in current process.
262 *
263 * pmo_map_reqs, nr_pmo_map_reqs: input pmos to map in the new process
264 *
265 * caps, nr_caps: copy from farther process to child process
266 *
267 * cpuid: affinity
268 *
269 * argc/argv: the number of arguments and the arguments.
270 *
271 * badge: the badge is generated by the invokder (usually procmgr).
272 *
273 * pcid: the pcid is fixed for root process and servers,
274 * and the rest is generated by procmgr.
275 */
276 struct user_elf *user_elf = lp_args->user_elf;
277 cap_t *child_process_cap = lp_args->child_process_cap;
278 cap_t *child_main_thread_cap = lp_args->child_main_thread_cap;
279 struct pmo_map_request *pmo_map_reqs = lp_args->pmo_map_reqs;
280 int nr_pmo_map_reqs = lp_args->nr_pmo_map_reqs;
281 cap_t *caps = lp_args->caps;
282 int nr_caps = lp_args->nr_caps;
283 s32 cpuid = lp_args->cpuid;
284 int argc = lp_args->argc;
285 char **argv = lp_args->argv;
286 int envp_argc = lp_args->envp_argc;
287 char **envp_argv = lp_args->envp_argv;
288 badge_t badge = lp_args->badge;
289 int pid = lp_args->pid;
290 u64 pcid = lp_args->pcid;
291 /* The name of process/cap_group: mainly used for debugging */
292 char *process_name = lp_args->process_name;
293 unsigned long stack_size = lp_args->stack_size;
294
295 /* for usys_creat_thread */
296 struct thread_args args;
297 struct cap_group_args cg_args;
298
299 seg_pmos_map_requests = malloc(sizeof(struct pmo_map_request)
300 * (user_elf->segs_nr + DEFAULT_PMO_CNT));
301 if (!seg_pmos_map_requests) {
302 goto nomem;
303 }
304
305 /* Step-1: Create cap group of the new process. */
306 cg_args.badge = badge;
307 cg_args.name = (vaddr_t)process_name;
308 cg_args.name_len = strlen(process_name);
309 cg_args.pcid = pcid;
310 #ifdef CHCORE_OH_TEE
311 cg_args.pid = pid;
312 cg_args.heap_size = lp_args->heap_size;
313 if (lp_args->puuid != NULL) {
314 cg_args.puuid = (vaddr_t)&lp_args->puuid->uuid;
315 } else {
316 cg_args.puuid = 0;
317 }
318 #endif
319 new_process_cap = usys_create_cap_group((unsigned long)&cg_args);
320 if (new_process_cap < 0) {
321 printf("%s: fail to create new_process_cap (ret: %d)\n",
322 __func__,
323 new_process_cap);
324 goto fail;
325 }
326
327 /** Step-2: Transfer capabilities */
328 if (nr_caps > 0) {
329 transfer_caps = malloc(sizeof(int) * nr_caps);
330 /* usys_transfer_caps is used during process creation */
331 ret = usys_transfer_caps(new_process_cap, caps, nr_caps, transfer_caps);
332 if (ret != 0) {
333 printf("usys_transfer_caps ret %d\n", ret);
334 usys_exit(-1);
335 }
336 }
337
338 pc = user_elf->elf_meta.entry;
339
340 /* Step-3: Create pmo of the stack, and a forbidden pmo to detect stack overflow. */
341 pmo_requests[0].size = stack_size;
342 pmo_requests[0].type = PMO_ANONYM;
343
344 pmo_requests[1].size = PAGE_SIZE;
345 pmo_requests[1].type = PMO_FORBID;
346
347 create_pmos((void *)pmo_requests, DEFAULT_PMO_CNT);
348
349 /* get result caps */
350 main_stack_cap = pmo_requests[0].ret_cap;
351 if (main_stack_cap < 0) {
352 printf("%s: fail to create_pmos (ret cap: %d)\n",
353 __func__,
354 main_stack_cap);
355 goto fail;
356 }
357 #ifdef CHCORE_OH_TEE
358 ret = usys_transfer_pmo_owner(main_stack_cap, new_process_cap);
359 if (ret < 0) {
360 printf("%s: fail to transfer main_stack_cap's owner ret %d\n",
361 __func__,
362 ret);
363 goto fail;
364 }
365 #endif /* CHCORE_OH_TEE */
366
367 /* Map a forbidden pmo in case of stack overflow */
368 forbid_area_cap = pmo_requests[1].ret_cap;
369 if (forbid_area_cap < 0) {
370 printf("%s: fail to create_pmos (ret cap: %d)\n",
371 __func__,
372 forbid_area_cap);
373 goto fail;
374 }
375 #ifdef CHCORE_OH_TEE
376 ret = usys_transfer_pmo_owner(forbid_area_cap, new_process_cap);
377 if (ret < 0) {
378 printf("%s: fail to transfer forbid_area_cap's owner ret %d\n",
379 __func__,
380 ret);
381 goto fail;
382 }
383 #endif /* CHCORE_OH_TEE */
384
385 /* Step-4: Construct auxiliary vector on the stack. */
386 stack_top = MAIN_THREAD_STACK_BASE + stack_size;
387 construct_init_env(init_env,
388 stack_top,
389 &user_elf->elf_meta,
390 user_elf->path,
391 pmo_map_reqs,
392 nr_pmo_map_reqs,
393 transfer_caps,
394 nr_caps,
395 argc,
396 argv,
397 envp_argc,
398 envp_argv,
399 pid,
400 load_offset);
401 offset = stack_size - INIT_ENV_SIZE;
402
403 free(transfer_caps);
404
405 ret = usys_write_pmo(main_stack_cap, offset, init_env, INIT_ENV_SIZE);
406 if (ret != 0) {
407 printf("%s: fail to write_pmo (ret: %d)\n", __func__, ret);
408 goto fail;
409 }
410 /** Step-5: Map the two pmos and pmos of segments in the ELF file */
411 /* Map the main stack pmo */
412 seg_pmos_map_requests[0].pmo_cap = main_stack_cap;
413 seg_pmos_map_requests[0].addr = MAIN_THREAD_STACK_BASE;
414 seg_pmos_map_requests[0].perm = VM_READ | VM_WRITE;
415
416 /* Map the forbidden area in case of stack overflow */
417 seg_pmos_map_requests[1].pmo_cap = forbid_area_cap;
418 seg_pmos_map_requests[1].addr = MAIN_THREAD_STACK_BASE - PAGE_SIZE;
419 seg_pmos_map_requests[1].perm = VM_FORBID;
420
421 /* Map each segment in the elf binary */
422 for (i = 0; i < user_elf->segs_nr; ++i) {
423 seg_pmos_map_requests[DEFAULT_PMO_CNT + i].pmo_cap =
424 user_elf->user_elf_segs[i].elf_pmo;
425 seg_pmos_map_requests[DEFAULT_PMO_CNT + i].addr = ROUND_DOWN(
426 user_elf->user_elf_segs[i].p_vaddr + load_offset, PAGE_SIZE);
427 seg_pmos_map_requests[DEFAULT_PMO_CNT + i].perm =
428 user_elf->user_elf_segs[i].perm;
429 }
430
431 ret = map_pmos(
432 new_process_cap, (void *)seg_pmos_map_requests, DEFAULT_PMO_CNT + i);
433 if (ret != 0) {
434 printf("%s: fail to map_pmos (ret: %d)\n", __func__, ret);
435 goto fail;
436 }
437
438 /* Remove two caps just created for the new process in procmgr */
439 usys_revoke_cap(main_stack_cap, false);
440 usys_revoke_cap(forbid_area_cap, false);
441
442 /** Step-6: Map custom pmos specified in args into the new process. */
443 if (nr_pmo_map_reqs) {
444 ret = map_pmos(new_process_cap, (void *)pmo_map_reqs, nr_pmo_map_reqs);
445 if (ret != 0) {
446 printf("%s: fail to map_pmos (ret: %d)\n", __func__, ret);
447 goto fail;
448 }
449 }
450
451 /** Step-7: Create the main/root thread of the new process. */
452 args.cap_group_cap = new_process_cap;
453 args.stack = MAIN_THREAD_STACK_BASE + offset;
454 args.pc = pc + load_offset;
455 args.arg = (unsigned long)NULL;
456 args.prio = MAIN_THREAD_PRIO;
457 args.tls = cpuid;
458 args.type = TYPE_USER;
459 main_thread_cap = usys_create_thread((unsigned long)&args);
460
461 if (child_process_cap)
462 *child_process_cap = new_process_cap;
463 if (child_main_thread_cap)
464 *child_main_thread_cap = main_thread_cap;
465
466 free(seg_pmos_map_requests);
467 return 0;
468 fail:
469 free(seg_pmos_map_requests);
470 return -EINVAL;
471 nomem:
472 return -ENOMEM;
473 }
474