1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "host/commands/run_cvd/process_monitor.h"
18
19 #include <sys/prctl.h>
20 #include <sys/types.h>
21 #include <sys/wait.h>
22
23 #include <assert.h>
24 #include <errno.h>
25 #include <signal.h>
26 #include <stdio.h>
27
28 #include <algorithm>
29 #include <future>
30 #include <thread>
31
32 #include <android-base/logging.h>
33
34 #include "common/libs/fs/shared_buf.h"
35 #include "common/libs/fs/shared_select.h"
36
37 namespace cuttlefish {
38
39 struct ParentToChildMessage {
40 bool stop;
41 };
42
RestartSubprocesses(bool r)43 ProcessMonitor::Properties& ProcessMonitor::Properties::RestartSubprocesses(
44 bool r) & {
45 restart_subprocesses_ = r;
46 return *this;
47 }
48
RestartSubprocesses(bool r)49 ProcessMonitor::Properties ProcessMonitor::Properties::RestartSubprocesses(
50 bool r) && {
51 restart_subprocesses_ = r;
52 return std::move(*this);
53 }
54
AddCommand(Command cmd)55 ProcessMonitor::Properties& ProcessMonitor::Properties::AddCommand(
56 Command cmd) & {
57 auto& entry = entries_.emplace_back();
58 entry.cmd.reset(new Command(std::move(cmd)));
59 return *this;
60 }
61
AddCommand(Command cmd)62 ProcessMonitor::Properties ProcessMonitor::Properties::AddCommand(
63 Command cmd) && {
64 auto& entry = entries_.emplace_back();
65 entry.cmd.reset(new Command(std::move(cmd)));
66 return std::move(*this);
67 }
68
ProcessMonitor(ProcessMonitor::Properties && properties)69 ProcessMonitor::ProcessMonitor(ProcessMonitor::Properties&& properties)
70 : properties_(std::move(properties)), monitor_(-1) {}
71
StopMonitoredProcesses()72 Result<void> ProcessMonitor::StopMonitoredProcesses() {
73 CF_EXPECT(monitor_ != -1, "The monitor process has already exited.");
74 CF_EXPECT(monitor_socket_->IsOpen(), "The monitor socket is already closed");
75 ParentToChildMessage message;
76 message.stop = true;
77 CF_EXPECT(WriteAllBinary(monitor_socket_, &message) == sizeof(message),
78 "Failed to communicate with monitor socket: "
79 << monitor_socket_->StrError());
80
81 pid_t last_monitor = monitor_;
82 monitor_ = -1;
83 monitor_socket_->Close();
84 int wstatus;
85 CF_EXPECT(waitpid(last_monitor, &wstatus, 0) == last_monitor,
86 "Failed to wait for monitor process");
87 CF_EXPECT(!WIFSIGNALED(wstatus), "Monitor process exited due to a signal");
88 CF_EXPECT(WIFEXITED(wstatus), "Monitor process exited for unknown reasons");
89 CF_EXPECT(WEXITSTATUS(wstatus) == 0,
90 "Monitor process exited with code " << WEXITSTATUS(wstatus));
91 return {};
92 }
93
StartAndMonitorProcesses()94 Result<void> ProcessMonitor::StartAndMonitorProcesses() {
95 CF_EXPECT(monitor_ == -1, "The monitor process was already started");
96 CF_EXPECT(!monitor_socket_->IsOpen(), "Monitor socket was already opened");
97
98 SharedFD client_pipe, host_pipe;
99 CF_EXPECT(SharedFD::Pipe(&client_pipe, &host_pipe),
100 "Could not create the monitor socket.");
101 monitor_ = fork();
102 if (monitor_ == 0) {
103 monitor_socket_ = client_pipe;
104 host_pipe->Close();
105 auto monitor = MonitorRoutine();
106 if (!monitor.ok()) {
107 LOG(ERROR) << "Monitoring processes failed:\n" << monitor.error();
108 }
109 std::exit(monitor.ok() ? 0 : 1);
110 } else {
111 client_pipe->Close();
112 monitor_socket_ = host_pipe;
113 return {};
114 }
115 }
116
LogSubprocessExit(const std::string & name,pid_t pid,int wstatus)117 static void LogSubprocessExit(const std::string& name, pid_t pid, int wstatus) {
118 LOG(INFO) << "Detected unexpected exit of monitored subprocess " << name;
119 if (WIFEXITED(wstatus)) {
120 LOG(INFO) << "Subprocess " << name << " (" << pid
121 << ") has exited with exit code " << WEXITSTATUS(wstatus);
122 } else if (WIFSIGNALED(wstatus)) {
123 LOG(ERROR) << "Subprocess " << name << " (" << pid
124 << ") was interrupted by a signal: " << WTERMSIG(wstatus);
125 } else {
126 LOG(INFO) << "subprocess " << name << " (" << pid
127 << ") has exited for unknown reasons";
128 }
129 }
130
LogSubprocessExit(const std::string & name,const siginfo_t & infop)131 static void LogSubprocessExit(const std::string& name, const siginfo_t& infop) {
132 LOG(INFO) << "Detected unexpected exit of monitored subprocess " << name;
133 if (infop.si_code == CLD_EXITED) {
134 LOG(INFO) << "Subprocess " << name << " (" << infop.si_pid
135 << ") has exited with exit code " << infop.si_status;
136 } else if (infop.si_code == CLD_KILLED) {
137 LOG(ERROR) << "Subprocess " << name << " (" << infop.si_pid
138 << ") was interrupted by a signal: " << infop.si_status;
139 } else {
140 LOG(INFO) << "subprocess " << name << " (" << infop.si_pid
141 << ") has exited for unknown reasons (code = " << infop.si_code
142 << ", status = " << infop.si_status << ")";
143 }
144 }
145
MonitorRoutine()146 Result<void> ProcessMonitor::MonitorRoutine() {
147 // Make this process a subreaper to reliably catch subprocess exits.
148 // See https://man7.org/linux/man-pages/man2/prctl.2.html
149 prctl(PR_SET_CHILD_SUBREAPER, 1);
150 prctl(PR_SET_PDEATHSIG, SIGHUP); // Die when parent dies
151
152 LOG(DEBUG) << "Starting monitoring subprocesses";
153 for (auto& monitored : properties_.entries_) {
154 LOG(INFO) << monitored.cmd->GetShortName();
155 auto options = SubprocessOptions().InGroup(true);
156 monitored.proc.reset(new Subprocess(monitored.cmd->Start(options)));
157 CF_EXPECT(monitored.proc->Started(), "Failed to start process");
158 }
159
160 bool running = true;
161 auto policy = std::launch::async;
162 auto parent_comms = std::async(policy, [&running, this]() -> Result<void> {
163 LOG(DEBUG) << "Waiting for a `stop` message from the parent.";
164 while (running) {
165 ParentToChildMessage message;
166 CF_EXPECT(ReadExactBinary(monitor_socket_, &message) == sizeof(message),
167 "Could not read message from parent.");
168 if (message.stop) {
169 running = false;
170 // Wake up the wait() loop by giving it an exited child process
171 if (fork() == 0) {
172 std::exit(0);
173 }
174 }
175 }
176 return {};
177 });
178
179 auto& monitored = properties_.entries_;
180
181 LOG(DEBUG) << "Monitoring subprocesses";
182 while(running) {
183 int wstatus;
184 pid_t pid = wait(&wstatus);
185 int error_num = errno;
186 CF_EXPECT(pid != -1, "Wait failed: " << strerror(error_num));
187 if (!WIFSIGNALED(wstatus) && !WIFEXITED(wstatus)) {
188 LOG(DEBUG) << "Unexpected status from wait: " << wstatus
189 << " for pid " << pid;
190 continue;
191 }
192 if (!running) { // Avoid extra restarts near the end
193 break;
194 }
195 auto matches = [pid](const auto& it) { return it.proc->pid() == pid; };
196 auto it = std::find_if(monitored.begin(), monitored.end(), matches);
197 if (it == monitored.end()) {
198 LogSubprocessExit("(unknown)", pid, wstatus);
199 } else {
200 LogSubprocessExit(it->cmd->GetShortName(), it->proc->pid(), wstatus);
201 if (properties_.restart_subprocesses_) {
202 auto options = SubprocessOptions().InGroup(true);
203 it->proc.reset(new Subprocess(it->cmd->Start(options)));
204 } else {
205 properties_.entries_.erase(it);
206 }
207 }
208 }
209
210 CF_EXPECT(parent_comms.get()); // Should have exited if `running` is false
211 auto stop = [](const auto& it) {
212 auto stop_result = it.proc->Stop();
213 if (stop_result == StopperResult::kStopFailure) {
214 LOG(WARNING) << "Error in stopping \"" << it.cmd->GetShortName() << "\"";
215 return false;
216 }
217 siginfo_t infop;
218 auto success = it.proc->Wait(&infop, WEXITED);
219 if (success < 0) {
220 LOG(WARNING) << "Failed to wait for process " << it.cmd->GetShortName();
221 return false;
222 }
223 if (stop_result == StopperResult::kStopCrash) {
224 LogSubprocessExit(it.cmd->GetShortName(), infop);
225 }
226 return true;
227 };
228 // Processes were started in the order they appear in the vector, stop them in
229 // reverse order for symmetry.
230 size_t stopped = std::count_if(monitored.rbegin(), monitored.rend(), stop);
231 LOG(DEBUG) << "Done monitoring subprocesses";
232 CF_EXPECT(stopped == monitored.size(), "Didn't stop all subprocesses");
233 return {};
234 }
235
236 } // namespace cuttlefish
237