1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <linux/unistd.h>
18 #include <sched.h>
19 #include <semaphore.h>
20
21 #include <cstring> // strerror
22
23 #include "berberis/base/checks.h"
24 #include "berberis/base/tracing.h"
25 #include "berberis/guest_os_primitives/guest_signal.h"
26 #include "berberis/guest_os_primitives/guest_thread.h"
27 #include "berberis/guest_os_primitives/guest_thread_manager.h" // ResetCurrentGuestThreadAfterFork
28 #include "berberis/guest_os_primitives/scoped_pending_signals.h"
29 #include "berberis/guest_state/guest_addr.h"
30 #include "berberis/guest_state/guest_state_opaque.h"
31 #include "berberis/runtime/execute_guest.h"
32 #include "berberis/runtime_primitives/runtime_library.h"
33
34 #include "guest_signal_action.h"
35 #include "guest_thread_manager_impl.h"
36 #include "scoped_signal_blocker.h"
37
38 namespace berberis {
39
40 namespace {
41
CloneSyscall(long flags,long child_stack,long parent_tid,long new_tls,long child_tid)42 long CloneSyscall(long flags, long child_stack, long parent_tid, long new_tls, long child_tid) {
43 #if defined(__x86_64__) // sys_clone's last two arguments are flipped on x86-64.
44 return syscall(__NR_clone, flags, child_stack, parent_tid, child_tid, new_tls);
45 #else
46 return syscall(__NR_clone, flags, child_stack, parent_tid, new_tls, child_tid);
47 #endif
48 }
49
50 struct GuestThreadCloneInfo {
51 GuestThread* thread;
52 HostSigset mask;
53 sem_t sem;
54 };
55
SemPostOrDie(sem_t * sem)56 void SemPostOrDie(sem_t* sem) {
57 int error = sem_post(sem);
58 // sem_post works in two stages: it increments semaphore's value, and then calls FUTEX_WAKE.
59 // If FUTEX_WAIT sporadically returns inside sem_wait between sem_post stages then sem_wait
60 // may observe the updated value and successfully finish. If semaphore is destroyed upon
61 // sem_wait return (like in CloneGuestThread), sem_post's call to FUTEX_WAKE will fail with
62 // EINVAL.
63 // Note that sem_destroy itself may do nothing (bionic and glibc are like that), the actual
64 // destruction happens because we free up memory (e.g. stack frame) where sem_t is stored.
65 // More details at https://sourceware.org/bugzilla/show_bug.cgi?id=12674
66 #if defined(__GLIBC__)
67
68 #if (__GLIBC__ < 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ < 21))
69 // GLibc before 2.21 may return EINVAL in the above situation. We ignore it since we cannot do
70 // anything about it, and it doesn't really break anything: we just acknowledge the fact that the
71 // semaphore can be destoyed already.
72 LOG_ALWAYS_FATAL_IF(error != 0 && errno != EINVAL, "sem_post returned error=%s", strerror(errno));
73 #else
74 // Recent GLibc ignores the error code returned from FUTEX_WAKE. So, it never returns EINVAL.
75 LOG_ALWAYS_FATAL_IF(error != 0,
76 "sem_post returned error=%s GLIBC=%d GLIBC_MINOR=%d",
77 strerror(errno),
78 __GLIBC__,
79 __GLIBC_MINOR__);
80 #endif
81
82 #else
83 // Bionic ignores the error code returned from FUTEX_WAKE. So, it never returns EINVAL.
84 LOG_ALWAYS_FATAL_IF(error != 0, "sem_post returned error=%s", strerror(errno));
85 #endif // defined(__GLIBC__)
86 }
87
RunClonedGuestThread(void * arg)88 int RunClonedGuestThread(void* arg) {
89 GuestThreadCloneInfo* info = static_cast<GuestThreadCloneInfo*>(arg);
90 GuestThread* thread = info->thread;
91
92 // Cannot use host pthread_key!
93 // TODO(b/280551726): Clear guest thread in exit syscall.
94 InsertCurrentThread(thread, false);
95
96 // ExecuteGuest requires pending signals enabled.
97 ScopedPendingSignalsEnabler scoped_pending_signals_enabler(thread);
98
99 // Host signals are blocked in parent before the clone,
100 // and remain blocked in child until this point.
101 RTSigprocmaskSyscallOrDie(SIG_SETMASK, &info->mask, nullptr);
102
103 // Notify parent that child is ready. Now parent can:
104 // - search for child in thread table
105 // - send child a signal
106 // - dispose info
107 SemPostOrDie(&info->sem);
108 // TODO(b/77574158): Ensure caller has a chance to handle the notification.
109 sched_yield();
110
111 ExecuteGuest(thread->state());
112
113 LOG_ALWAYS_FATAL("cloned thread didn't exit");
114 return 0;
115 }
116
117 } // namespace
118
119 // go/berberis-guest-threads
CloneGuestThread(GuestThread * thread,int flags,GuestAddr guest_stack_top,GuestAddr parent_tid,GuestAddr new_tls,GuestAddr child_tid)120 pid_t CloneGuestThread(GuestThread* thread,
121 int flags,
122 GuestAddr guest_stack_top,
123 GuestAddr parent_tid,
124 GuestAddr new_tls,
125 GuestAddr child_tid) {
126 ThreadState& thread_state = *thread->state();
127 if (!(flags & CLONE_VM)) {
128 // Memory is *not* shared with the child.
129 // Run the child on the same host stack as the parent. Thus, can use host local variables.
130 // The child gets a copy of guest thread object.
131 // ATTENTION: Do not set new tls for the host - tls might be incompatible.
132 // TODO(b/280551726): Consider forcing new host tls to 0.
133 long pid = CloneSyscall(flags & ~CLONE_SETTLS, 0, parent_tid, 0, child_tid);
134 if (pid == 0) {
135 // Child, reset thread table.
136 ResetCurrentGuestThreadAfterFork(thread);
137 if (guest_stack_top) {
138 SetStackRegister(GetCPUState(thread_state), guest_stack_top);
139 // TODO(b/280551726): Reset stack attributes?
140 }
141 if ((flags & CLONE_SETTLS)) {
142 SetTlsAddr(thread_state, new_tls);
143 }
144 }
145 return pid;
146 }
147
148 // Memory is shared with the child.
149 // The child needs a distinct stack, both host and guest! Because of the distinct host stack,
150 // cannot use host local variables. For now, use clone function to pass parameters to the child.
151 // The child needs new instance of guest thread object.
152
153 GuestThreadCloneInfo info;
154
155 info.thread = GuestThread::CreateClone(thread, (flags & CLONE_SIGHAND) != 0);
156 if (info.thread == nullptr) {
157 return EAGAIN;
158 }
159
160 ThreadState& clone_thread_state = *info.thread->state();
161
162 if ((flags & CLONE_SETTLS)) {
163 SetTlsAddr(clone_thread_state, new_tls);
164 }
165
166 // Current insn addr is on SVC instruction, move to the next.
167 // TODO(b/280551726): Not needed if we can use raw syscall and continue current execution.
168 CPUState& clone_cpu = GetCPUState(clone_thread_state);
169 AdvanceInsnAddrBeyondSyscall(clone_cpu);
170 SetReturnValueRegister(clone_cpu, 0); // Syscall return value
171
172 if (guest_stack_top != kNullGuestAddr) {
173 SetStackRegister(GetCPUState(clone_thread_state), guest_stack_top);
174 SetLinkRegister(clone_cpu, kNullGuestAddr);
175 } else {
176 if (!(flags & CLONE_VFORK)) {
177 TRACE("CLONE_VM with NULL guest stack and not in CLONE_VFORK mode, returning EINVAL");
178 return EINVAL;
179 }
180 // See b/323981318 and b/156400255.
181 TRACE("CLONE_VFORK with CLONE_VM and NULL guest stack, will share guest stack with parent");
182 // GuestThread::CreateClone has already copied stack and link pointers to new thread.
183 }
184
185 // Thread must start with pending signals while it's executing runtime code.
186 SetPendingSignalsStatusAtomic(clone_thread_state, kPendingSignalsEnabled);
187 SetResidence(clone_thread_state, kOutsideGeneratedCode);
188
189 int error = sem_init(&info.sem, 0, 0);
190 LOG_ALWAYS_FATAL_IF(error != 0, "sem_init returned error=%s", strerror(errno));
191
192 // ATTENTION: Don't set new tls for the host - tls might be incompatible.
193 // TODO(b/280551726): Consider forcing new host tls to 0.
194 long pid;
195 {
196 ScopedSignalBlocker signal_blocker;
197 info.mask = *signal_blocker.old_mask();
198 pid = clone(RunClonedGuestThread,
199 info.thread->GetHostStackTop(),
200 flags & ~CLONE_SETTLS,
201 &info,
202 parent_tid,
203 nullptr,
204 child_tid);
205 if (pid != -1) {
206 CHECK_EQ(0, sem_wait(&info.sem)); // Wait with blocked signals to avoid EINTR.
207 }
208 }
209
210 if (pid == -1) {
211 GuestThread::Destroy(info.thread);
212 }
213
214 sem_destroy(&info.sem);
215 return pid;
216 }
217
218 } // namespace berberis
219