1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "host/commands/run_cvd/process_monitor.h"
18
19 #include <sys/prctl.h>
20 #include <sys/types.h>
21 #include <sys/wait.h>
22
23 #include <assert.h>
24 #include <errno.h>
25 #include <signal.h>
26 #include <stdio.h>
27
28 #include <algorithm>
29 #include <thread>
30
31 #include <android-base/logging.h>
32
33 #include "common/libs/fs/shared_buf.h"
34 #include "common/libs/fs/shared_select.h"
35
36 namespace cuttlefish {
37
38 struct ParentToChildMessage {
39 bool stop;
40 };
41
ProcessMonitor(bool restart_subprocesses)42 ProcessMonitor::ProcessMonitor(bool restart_subprocesses)
43 : restart_subprocesses_(restart_subprocesses), monitor_(-1) {
44 }
45
AddCommand(Command cmd)46 void ProcessMonitor::AddCommand(Command cmd) {
47 CHECK(monitor_ == -1) << "The monitor process is already running.";
48 CHECK(!monitor_socket_->IsOpen()) << "The monitor socket is already open.";
49
50 monitored_processes_.push_back(MonitorEntry());
51 auto& entry = monitored_processes_.back();
52 entry.cmd.reset(new Command(std::move(cmd)));
53 }
54
StopMonitoredProcesses()55 bool ProcessMonitor::StopMonitoredProcesses() {
56 if (monitor_ == -1) {
57 LOG(ERROR) << "The monitor process is already dead.";
58 return false;
59 }
60 if (!monitor_socket_->IsOpen()) {
61 LOG(ERROR) << "The monitor socket is already closed.";
62 return false;
63 }
64 ParentToChildMessage message;
65 message.stop = true;
66 if (WriteAllBinary(monitor_socket_, &message) != sizeof(message)) {
67 LOG(ERROR) << "Failed to communicate with monitor socket: "
68 << monitor_socket_->StrError();
69 return false;
70 }
71 pid_t last_monitor = monitor_;
72 monitor_ = -1;
73 monitor_socket_->Close();
74 int wstatus;
75 if (waitpid(last_monitor, &wstatus, 0) != last_monitor) {
76 LOG(ERROR) << "Failed to wait for monitor process";
77 return false;
78 }
79 if (WIFSIGNALED(wstatus)) {
80 LOG(ERROR) << "Monitor process exited due to a signal";
81 return false;
82 }
83 if (!WIFEXITED(wstatus)) {
84 LOG(ERROR) << "Monitor process exited for unknown reasons";
85 return false;
86 }
87 if (WEXITSTATUS(wstatus) != 0) {
88 LOG(ERROR) << "Monitor process exited with code " << WEXITSTATUS(wstatus);
89 return false;
90 }
91 return true;
92 }
93
StartAndMonitorProcesses()94 bool ProcessMonitor::StartAndMonitorProcesses() {
95 if (monitor_ != -1) {
96 LOG(ERROR) << "The monitor process was already started";
97 return false;
98 }
99 if (monitor_socket_->IsOpen()) {
100 LOG(ERROR) << "The monitor socket was already opened.";
101 return false;
102 }
103 SharedFD client_pipe, host_pipe;
104 if (!SharedFD::Pipe(&client_pipe, &host_pipe)) {
105 LOG(ERROR) << "Could not create the monitor socket.";
106 return false;
107 }
108 monitor_ = fork();
109 if (monitor_ == 0) {
110 monitor_socket_ = client_pipe;
111 host_pipe->Close();
112 std::exit(MonitorRoutine() ? 0 : 1);
113 } else {
114 client_pipe->Close();
115 monitor_socket_ = host_pipe;
116 return true;
117 }
118 }
119
LogSubprocessExit(const std::string & name,pid_t pid,int wstatus)120 static void LogSubprocessExit(const std::string& name, pid_t pid, int wstatus) {
121 LOG(INFO) << "Detected exit of monitored subprocess " << name;
122 if (WIFEXITED(wstatus)) {
123 LOG(INFO) << "Subprocess " << name << " (" << pid
124 << ") has exited with exit code " << WEXITSTATUS(wstatus);
125 } else if (WIFSIGNALED(wstatus)) {
126 LOG(ERROR) << "Subprocess " << name << " (" << pid
127 << ") was interrupted by a signal: " << WTERMSIG(wstatus);
128 } else {
129 LOG(INFO) << "subprocess " << name << " (" << pid
130 << ") has exited for unknown reasons";
131 }
132 }
133
MonitorRoutine()134 bool ProcessMonitor::MonitorRoutine() {
135 // Make this process a subreaper to reliably catch subprocess exits.
136 // See https://man7.org/linux/man-pages/man2/prctl.2.html
137 prctl(PR_SET_CHILD_SUBREAPER, 1);
138 prctl(PR_SET_PDEATHSIG, SIGHUP); // Die when parent dies
139
140 LOG(DEBUG) << "Starting monitoring subprocesses";
141 for (auto& monitored : monitored_processes_) {
142 cuttlefish::SubprocessOptions options;
143 options.InGroup(true);
144 monitored.proc.reset(new Subprocess(monitored.cmd->Start(options)));
145 CHECK(monitored.proc->Started()) << "Failed to start process";
146 }
147
148 bool running = true;
149 std::thread parent_comms_thread([&running, this]() {
150 LOG(DEBUG) << "Waiting for a `stop` message from the parent.";
151 while (running) {
152 ParentToChildMessage message;
153 CHECK(ReadExactBinary(monitor_socket_, &message) == sizeof(message))
154 << "Could not read message from parent.";
155 if (message.stop) {
156 running = false;
157 // Wake up the wait() loop by giving it an exited child process
158 if (fork() == 0) {
159 std::exit(0);
160 }
161 }
162 }
163 });
164
165 auto& monitored = monitored_processes_;
166
167 LOG(DEBUG) << "Monitoring subprocesses";
168 while(running) {
169 int wstatus;
170 pid_t pid = wait(&wstatus);
171 int error_num = errno;
172 CHECK(pid != -1) << "Wait failed: " << strerror(error_num);
173 if (!WIFSIGNALED(wstatus) && !WIFEXITED(wstatus)) {
174 LOG(DEBUG) << "Unexpected status from wait: " << wstatus
175 << " for pid " << pid;
176 continue;
177 }
178 if (!running) { // Avoid extra restarts near the end
179 break;
180 }
181 auto matches = [pid](const auto& it) { return it.proc->pid() == pid; };
182 auto it = std::find_if(monitored.begin(), monitored.end(), matches);
183 if (it == monitored.end()) {
184 LogSubprocessExit("(unknown)", pid, wstatus);
185 } else {
186 LogSubprocessExit(it->cmd->GetShortName(), it->proc->pid(), wstatus);
187 if (restart_subprocesses_) {
188 cuttlefish::SubprocessOptions options;
189 options.InGroup(true);
190 it->proc.reset(new Subprocess(it->cmd->Start(options)));
191 } else {
192 monitored_processes_.erase(it);
193 }
194 }
195 }
196
197 parent_comms_thread.join(); // Should have exited if `running` is false
198 // Processes were started in the order they appear in the vector, stop them in
199 // reverse order for symmetry.
200 auto stop = [](const auto& it) {
201 if (!it.proc->Stop()) {
202 LOG(WARNING) << "Error in stopping \"" << it.cmd->GetShortName() << "\"";
203 return false;
204 }
205 int wstatus = 0;
206 auto ret = it.proc->Wait(&wstatus, 0);
207 if (ret < 0) {
208 LOG(WARNING) << "Failed to wait for process " << it.cmd->GetShortName();
209 return false;
210 }
211 return true;
212 };
213 size_t stopped = std::count_if(monitored.rbegin(), monitored.rend(), stop);
214 LOG(DEBUG) << "Done monitoring subprocesses";
215 return stopped == monitored.size();
216 }
217
218 } // namespace cuttlefish
219