1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "host/commands/run_cvd/process_monitor.h"
18
19 #include <sys/prctl.h>
20 #include <sys/types.h>
21 #include <sys/wait.h>
22
23 #include <assert.h>
24 #include <errno.h>
25 #include <signal.h>
26 #include <stdio.h>
27
28 #include <algorithm>
29 #include <atomic>
30 #include <future>
31 #include <memory>
32 #include <thread>
33
34 #include <android-base/logging.h>
35
36 #include "common/libs/fs/shared_buf.h"
37 #include "common/libs/fs/shared_select.h"
38 #include "common/libs/utils/result.h"
39 #include "common/libs/utils/subprocess.h"
40 #include "host/libs/config/cuttlefish_config.h"
41 #include "host/libs/config/known_paths.h"
42
43 namespace cuttlefish {
44
45 namespace {
46
47 struct ParentToChildMessage {
48 bool stop;
49 };
50
LogSubprocessExit(const std::string & name,pid_t pid,int wstatus)51 void LogSubprocessExit(const std::string& name, pid_t pid, int wstatus) {
52 LOG(INFO) << "Detected unexpected exit of monitored subprocess " << name;
53 if (WIFEXITED(wstatus)) {
54 LOG(INFO) << "Subprocess " << name << " (" << pid
55 << ") has exited with exit code " << WEXITSTATUS(wstatus);
56 } else if (WIFSIGNALED(wstatus)) {
57 LOG(ERROR) << "Subprocess " << name << " (" << pid
58 << ") was interrupted by a signal: " << WTERMSIG(wstatus);
59 } else {
60 LOG(INFO) << "subprocess " << name << " (" << pid
61 << ") has exited for unknown reasons";
62 }
63 }
64
LogSubprocessExit(const std::string & name,const siginfo_t & infop)65 void LogSubprocessExit(const std::string& name, const siginfo_t& infop) {
66 LOG(INFO) << "Detected unexpected exit of monitored subprocess " << name;
67 if (infop.si_code == CLD_EXITED) {
68 LOG(INFO) << "Subprocess " << name << " (" << infop.si_pid
69 << ") has exited with exit code " << infop.si_status;
70 } else if (infop.si_code == CLD_KILLED) {
71 LOG(ERROR) << "Subprocess " << name << " (" << infop.si_pid
72 << ") was interrupted by a signal: " << infop.si_status;
73 } else {
74 LOG(INFO) << "subprocess " << name << " (" << infop.si_pid
75 << ") has exited for unknown reasons (code = " << infop.si_code
76 << ", status = " << infop.si_status << ")";
77 }
78 }
79
StartSubprocesses(std::vector<MonitorEntry> & entries)80 Result<void> StartSubprocesses(std::vector<MonitorEntry>& entries) {
81 LOG(DEBUG) << "Starting monitored subprocesses";
82 for (auto& monitored : entries) {
83 LOG(INFO) << monitored.cmd->GetShortName();
84 auto options = SubprocessOptions().InGroup(true);
85 monitored.proc.reset(new Subprocess(monitored.cmd->Start(options)));
86 CF_EXPECT(monitored.proc->Started(), "Failed to start subprocess");
87 }
88 return {};
89 }
90
ReadMonitorSocketLoopForStop(std::atomic_bool & running,SharedFD & monitor_socket)91 Result<void> ReadMonitorSocketLoopForStop(std::atomic_bool& running,
92 SharedFD& monitor_socket) {
93 LOG(DEBUG) << "Waiting for a `stop` message from the parent";
94 while (running.load()) {
95 ParentToChildMessage message;
96 CF_EXPECT(ReadExactBinary(monitor_socket, &message) == sizeof(message),
97 "Could not read message from parent");
98 if (message.stop) {
99 running.store(false);
100 // Wake up the wait() loop by giving it an exited child process
101 if (fork() == 0) {
102 std::exit(0);
103 }
104 }
105 }
106 return {};
107 }
108
MonitorLoop(const std::atomic_bool & running,const bool restart_subprocesses,std::vector<MonitorEntry> & monitored)109 Result<void> MonitorLoop(const std::atomic_bool& running,
110 const bool restart_subprocesses,
111 std::vector<MonitorEntry>& monitored) {
112 while (running.load()) {
113 int wstatus;
114 pid_t pid = wait(&wstatus);
115 int error_num = errno;
116 CF_EXPECT(pid != -1, "Wait failed: " << strerror(error_num));
117 if (!WIFSIGNALED(wstatus) && !WIFEXITED(wstatus)) {
118 LOG(DEBUG) << "Unexpected status from wait: " << wstatus
119 << " for pid " << pid;
120 continue;
121 }
122 if (!running.load()) { // Avoid extra restarts near the end
123 break;
124 }
125 auto matches = [pid](const auto& it) { return it.proc->pid() == pid; };
126 auto it = std::find_if(monitored.begin(), monitored.end(), matches);
127 if (it == monitored.end()) {
128 LogSubprocessExit("(unknown)", pid, wstatus);
129 } else {
130 LogSubprocessExit(it->cmd->GetShortName(), it->proc->pid(), wstatus);
131 if (restart_subprocesses) {
132 auto options = SubprocessOptions().InGroup(true);
133 it->proc.reset(new Subprocess(it->cmd->Start(options)));
134 } else {
135 bool is_critical = it->is_critical;
136 monitored.erase(it);
137 if (running.load() && is_critical) {
138 LOG(ERROR) << "Stopping all monitored processes due to unexpected "
139 "exit of critical process";
140 Command stop_cmd(StopCvdBinary());
141 stop_cmd.Start();
142 }
143 }
144 }
145 }
146 return {};
147 }
148
StopSubprocesses(std::vector<MonitorEntry> & monitored)149 Result<void> StopSubprocesses(std::vector<MonitorEntry>& monitored) {
150 LOG(DEBUG) << "Stopping monitored subprocesses";
151 auto stop = [](const auto& it) {
152 auto stop_result = it.proc->Stop();
153 if (stop_result == StopperResult::kStopFailure) {
154 LOG(WARNING) << "Error in stopping \"" << it.cmd->GetShortName() << "\"";
155 return false;
156 }
157 siginfo_t infop;
158 auto success = it.proc->Wait(&infop, WEXITED);
159 if (success < 0) {
160 LOG(WARNING) << "Failed to wait for process " << it.cmd->GetShortName();
161 return false;
162 }
163 if (stop_result == StopperResult::kStopCrash) {
164 LogSubprocessExit(it.cmd->GetShortName(), infop);
165 }
166 return true;
167 };
168 // Processes were started in the order they appear in the vector, stop them in
169 // reverse order for symmetry.
170 size_t stopped = std::count_if(monitored.rbegin(), monitored.rend(), stop);
171 CF_EXPECT(stopped == monitored.size(), "Didn't stop all subprocesses");
172 return {};
173 }
174
175 } // namespace
176
RestartSubprocesses(bool r)177 ProcessMonitor::Properties& ProcessMonitor::Properties::RestartSubprocesses(
178 bool r) & {
179 restart_subprocesses_ = r;
180 return *this;
181 }
182
RestartSubprocesses(bool r)183 ProcessMonitor::Properties ProcessMonitor::Properties::RestartSubprocesses(
184 bool r) && {
185 return std::move(RestartSubprocesses(r));
186 }
187
AddCommand(MonitorCommand cmd)188 ProcessMonitor::Properties& ProcessMonitor::Properties::AddCommand(
189 MonitorCommand cmd) & {
190 entries_.emplace_back(std::move(cmd.command), cmd.is_critical);
191 return *this;
192 }
193
AddCommand(MonitorCommand cmd)194 ProcessMonitor::Properties ProcessMonitor::Properties::AddCommand(
195 MonitorCommand cmd) && {
196 return std::move(AddCommand(std::move(cmd)));
197 }
198
ProcessMonitor(ProcessMonitor::Properties && properties)199 ProcessMonitor::ProcessMonitor(ProcessMonitor::Properties&& properties)
200 : properties_(std::move(properties)), monitor_(-1) {}
201
StopMonitoredProcesses()202 Result<void> ProcessMonitor::StopMonitoredProcesses() {
203 CF_EXPECT(monitor_ != -1, "The monitor process has already exited.");
204 CF_EXPECT(monitor_socket_->IsOpen(), "The monitor socket is already closed");
205 ParentToChildMessage message;
206 message.stop = true;
207 CF_EXPECT(WriteAllBinary(monitor_socket_, &message) == sizeof(message),
208 "Failed to communicate with monitor socket: "
209 << monitor_socket_->StrError());
210
211 pid_t last_monitor = monitor_;
212 monitor_ = -1;
213 monitor_socket_->Close();
214 int wstatus;
215 CF_EXPECT(waitpid(last_monitor, &wstatus, 0) == last_monitor,
216 "Failed to wait for monitor process");
217 CF_EXPECT(!WIFSIGNALED(wstatus), "Monitor process exited due to a signal");
218 CF_EXPECT(WIFEXITED(wstatus), "Monitor process exited for unknown reasons");
219 CF_EXPECT(WEXITSTATUS(wstatus) == 0,
220 "Monitor process exited with code " << WEXITSTATUS(wstatus));
221 return {};
222 }
223
StartAndMonitorProcesses()224 Result<void> ProcessMonitor::StartAndMonitorProcesses() {
225 CF_EXPECT(monitor_ == -1, "The monitor process was already started");
226 CF_EXPECT(!monitor_socket_->IsOpen(), "Monitor socket was already opened");
227
228 SharedFD client_pipe, host_pipe;
229 CF_EXPECT(SharedFD::Pipe(&client_pipe, &host_pipe),
230 "Could not create the monitor socket.");
231 monitor_ = fork();
232 if (monitor_ == 0) {
233 monitor_socket_ = client_pipe;
234 host_pipe->Close();
235 auto monitor_result = MonitorRoutine();
236 if (!monitor_result.ok()) {
237 LOG(ERROR) << "Monitoring processes failed:\n"
238 << monitor_result.error().Message();
239 LOG(DEBUG) << "Monitoring processes failed:\n"
240 << monitor_result.error().Trace();
241 }
242 std::exit(monitor_result.ok() ? 0 : 1);
243 } else {
244 client_pipe->Close();
245 monitor_socket_ = host_pipe;
246 return {};
247 }
248 }
249
MonitorRoutine()250 Result<void> ProcessMonitor::MonitorRoutine() {
251 // Make this process a subreaper to reliably catch subprocess exits.
252 // See https://man7.org/linux/man-pages/man2/prctl.2.html
253 prctl(PR_SET_CHILD_SUBREAPER, 1);
254 prctl(PR_SET_PDEATHSIG, SIGHUP); // Die when parent dies
255
256 LOG(DEBUG) << "Monitoring subprocesses";
257 StartSubprocesses(properties_.entries_);
258
259 std::atomic_bool running(true);
260 auto parent_comms =
261 std::async(std::launch::async, ReadMonitorSocketLoopForStop,
262 std::ref(running), std::ref(monitor_socket_));
263
264 MonitorLoop(running, properties_.restart_subprocesses_, properties_.entries_);
265 CF_EXPECT(parent_comms.get(), "Should have exited if monitoring stopped");
266
267 StopSubprocesses(properties_.entries_);
268 LOG(DEBUG) << "Done monitoring subprocesses";
269 return {};
270 }
271
272 } // namespace cuttlefish
273