• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <linux/unistd.h>
18 #include <sched.h>
19 #include <semaphore.h>
20 
21 #include <cstring>  // strerror
22 
23 #include "berberis/base/checks.h"
24 #include "berberis/base/tracing.h"
25 #include "berberis/guest_os_primitives/guest_signal.h"
26 #include "berberis/guest_os_primitives/guest_thread.h"
27 #include "berberis/guest_os_primitives/guest_thread_manager.h"  // ResetCurrentGuestThreadAfterFork
28 #include "berberis/guest_os_primitives/scoped_pending_signals.h"
29 #include "berberis/guest_state/guest_addr.h"
30 #include "berberis/guest_state/guest_state_opaque.h"
31 #include "berberis/runtime/execute_guest.h"
32 #include "berberis/runtime_primitives/runtime_library.h"
33 
34 #include "guest_signal_action.h"
35 #include "guest_thread_manager_impl.h"
36 #include "scoped_signal_blocker.h"
37 
38 namespace berberis {
39 
40 namespace {
41 
CloneSyscall(long flags,long child_stack,long parent_tid,long new_tls,long child_tid)42 long CloneSyscall(long flags, long child_stack, long parent_tid, long new_tls, long child_tid) {
43 #if defined(__x86_64__)  // sys_clone's last two arguments are flipped on x86-64.
44   return syscall(__NR_clone, flags, child_stack, parent_tid, child_tid, new_tls);
45 #else
46   return syscall(__NR_clone, flags, child_stack, parent_tid, new_tls, child_tid);
47 #endif
48 }
49 
50 struct GuestThreadCloneInfo {
51   GuestThread* thread;
52   HostSigset mask;
53   sem_t sem;
54 };
55 
SemPostOrDie(sem_t * sem)56 void SemPostOrDie(sem_t* sem) {
57   int error = sem_post(sem);
58   // sem_post works in two stages: it increments semaphore's value, and then calls FUTEX_WAKE.
59   // If FUTEX_WAIT sporadically returns inside sem_wait between sem_post stages then sem_wait
60   // may observe the updated value and successfully finish. If semaphore is destroyed upon
61   // sem_wait return (like in CloneGuestThread), sem_post's call to FUTEX_WAKE will fail with
62   // EINVAL.
63   // Note that sem_destroy itself may do nothing (bionic and glibc are like that), the actual
64   // destruction happens because we free up memory (e.g. stack frame) where sem_t is stored.
65   // More details at https://sourceware.org/bugzilla/show_bug.cgi?id=12674
66 #if defined(__GLIBC__)
67 
68 #if (__GLIBC__ < 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ < 21))
69   // GLibc before 2.21 may return EINVAL in the above situation. We ignore it since we cannot do
70   // anything about it, and it doesn't really break anything: we just acknowledge the fact that the
71   // semaphore can be destoyed already.
72   LOG_ALWAYS_FATAL_IF(error != 0 && errno != EINVAL, "sem_post returned error=%s", strerror(errno));
73 #else
74   // Recent GLibc ignores the error code returned from FUTEX_WAKE. So, it never returns EINVAL.
75   LOG_ALWAYS_FATAL_IF(error != 0,
76                       "sem_post returned error=%s GLIBC=%d GLIBC_MINOR=%d",
77                       strerror(errno),
78                       __GLIBC__,
79                       __GLIBC_MINOR__);
80 #endif
81 
82 #else
83   // Bionic ignores the error code returned from FUTEX_WAKE. So, it never returns EINVAL.
84   LOG_ALWAYS_FATAL_IF(error != 0, "sem_post returned error=%s", strerror(errno));
85 #endif  // defined(__GLIBC__)
86 }
87 
RunClonedGuestThread(void * arg)88 int RunClonedGuestThread(void* arg) {
89   GuestThreadCloneInfo* info = static_cast<GuestThreadCloneInfo*>(arg);
90   GuestThread* thread = info->thread;
91 
92   // Cannot use host pthread_key!
93   // TODO(b/280551726): Clear guest thread in exit syscall.
94   InsertCurrentThread(thread, false);
95 
96   // ExecuteGuest requires pending signals enabled.
97   ScopedPendingSignalsEnabler scoped_pending_signals_enabler(thread);
98 
99   // Host signals are blocked in parent before the clone,
100   // and remain blocked in child until this point.
101   RTSigprocmaskSyscallOrDie(SIG_SETMASK, &info->mask, nullptr);
102 
103   // Notify parent that child is ready. Now parent can:
104   // - search for child in thread table
105   // - send child a signal
106   // - dispose info
107   SemPostOrDie(&info->sem);
108   // TODO(b/77574158): Ensure caller has a chance to handle the notification.
109   sched_yield();
110 
111   ExecuteGuest(thread->state());
112 
113   LOG_ALWAYS_FATAL("cloned thread didn't exit");
114   return 0;
115 }
116 
117 }  // namespace
118 
119 // go/berberis-guest-threads
CloneGuestThread(GuestThread * thread,int flags,GuestAddr guest_stack_top,GuestAddr parent_tid,GuestAddr new_tls,GuestAddr child_tid)120 pid_t CloneGuestThread(GuestThread* thread,
121                        int flags,
122                        GuestAddr guest_stack_top,
123                        GuestAddr parent_tid,
124                        GuestAddr new_tls,
125                        GuestAddr child_tid) {
126   ThreadState& thread_state = *thread->state();
127   if (!(flags & CLONE_VM)) {
128     // Memory is *not* shared with the child.
129     // Run the child on the same host stack as the parent. Thus, can use host local variables.
130     // The child gets a copy of guest thread object.
131     // ATTENTION: Do not set new tls for the host - tls might be incompatible.
132     // TODO(b/280551726): Consider forcing new host tls to 0.
133     long pid = CloneSyscall(flags & ~CLONE_SETTLS, 0, parent_tid, 0, child_tid);
134     if (pid == 0) {
135       // Child, reset thread table.
136       ResetCurrentGuestThreadAfterFork(thread);
137       if (guest_stack_top) {
138         SetStackRegister(GetCPUState(thread_state), guest_stack_top);
139         // TODO(b/280551726): Reset stack attributes?
140       }
141       if ((flags & CLONE_SETTLS)) {
142         SetTlsAddr(thread_state, new_tls);
143       }
144     }
145     return pid;
146   }
147 
148   // Memory is shared with the child.
149   // The child needs a distinct stack, both host and guest! Because of the distinct host stack,
150   // cannot use host local variables. For now, use clone function to pass parameters to the child.
151   // The child needs new instance of guest thread object.
152 
153   GuestThreadCloneInfo info;
154 
155   info.thread = GuestThread::CreateClone(thread, (flags & CLONE_SIGHAND) != 0);
156   if (info.thread == nullptr) {
157     return EAGAIN;
158   }
159 
160   ThreadState& clone_thread_state = *info.thread->state();
161 
162   if ((flags & CLONE_SETTLS)) {
163     SetTlsAddr(clone_thread_state, new_tls);
164   }
165 
166   // Current insn addr is on SVC instruction, move to the next.
167   // TODO(b/280551726): Not needed if we can use raw syscall and continue current execution.
168   CPUState& clone_cpu = GetCPUState(clone_thread_state);
169   AdvanceInsnAddrBeyondSyscall(clone_cpu);
170   SetReturnValueRegister(clone_cpu, 0);  // Syscall return value
171 
172   if (guest_stack_top != kNullGuestAddr) {
173     SetStackRegister(GetCPUState(clone_thread_state), guest_stack_top);
174     SetLinkRegister(clone_cpu, kNullGuestAddr);
175   } else {
176     if (!(flags & CLONE_VFORK)) {
177       TRACE("CLONE_VM with NULL guest stack and not in CLONE_VFORK mode, returning EINVAL");
178       return EINVAL;
179     }
180     // See b/323981318 and b/156400255.
181     TRACE("CLONE_VFORK with CLONE_VM and NULL guest stack, will share guest stack with parent");
182     // GuestThread::CreateClone has already copied stack and link pointers to new thread.
183   }
184 
185   // Thread must start with pending signals while it's executing runtime code.
186   SetPendingSignalsStatusAtomic(clone_thread_state, kPendingSignalsEnabled);
187   SetResidence(clone_thread_state, kOutsideGeneratedCode);
188 
189   int error = sem_init(&info.sem, 0, 0);
190   LOG_ALWAYS_FATAL_IF(error != 0, "sem_init returned error=%s", strerror(errno));
191 
192   // ATTENTION: Don't set new tls for the host - tls might be incompatible.
193   // TODO(b/280551726): Consider forcing new host tls to 0.
194   long pid;
195   {
196     ScopedSignalBlocker signal_blocker;
197     info.mask = *signal_blocker.old_mask();
198     pid = clone(RunClonedGuestThread,
199                 info.thread->GetHostStackTop(),
200                 flags & ~CLONE_SETTLS,
201                 &info,
202                 parent_tid,
203                 nullptr,
204                 child_tid);
205     if (pid != -1) {
206       CHECK_EQ(0, sem_wait(&info.sem));  // Wait with blocked signals to avoid EINTR.
207     }
208   }
209 
210   if (pid == -1) {
211     GuestThread::Destroy(info.thread);
212   }
213 
214   sem_destroy(&info.sem);
215   return pid;
216 }
217 
218 }  // namespace berberis
219