• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "host/commands/run_cvd/process_monitor.h"
18 
19 #include <sys/prctl.h>
20 #include <sys/types.h>
21 #include <sys/wait.h>
22 
23 #include <assert.h>
24 #include <errno.h>
25 #include <signal.h>
26 #include <stdio.h>
27 
28 #include <algorithm>
29 #include <atomic>
30 #include <future>
31 #include <memory>
32 #include <thread>
33 
34 #include <android-base/logging.h>
35 
36 #include "common/libs/fs/shared_buf.h"
37 #include "common/libs/fs/shared_select.h"
38 #include "common/libs/utils/result.h"
39 #include "common/libs/utils/subprocess.h"
40 #include "host/libs/config/cuttlefish_config.h"
41 #include "host/libs/config/known_paths.h"
42 
43 namespace cuttlefish {
44 
45 namespace {
46 
47 struct ParentToChildMessage {
48   bool stop;
49 };
50 
LogSubprocessExit(const std::string & name,pid_t pid,int wstatus)51 void LogSubprocessExit(const std::string& name, pid_t pid, int wstatus) {
52   LOG(INFO) << "Detected unexpected exit of monitored subprocess " << name;
53   if (WIFEXITED(wstatus)) {
54     LOG(INFO) << "Subprocess " << name << " (" << pid
55               << ") has exited with exit code " << WEXITSTATUS(wstatus);
56   } else if (WIFSIGNALED(wstatus)) {
57     LOG(ERROR) << "Subprocess " << name << " (" << pid
58                << ") was interrupted by a signal: " << WTERMSIG(wstatus);
59   } else {
60     LOG(INFO) << "subprocess " << name << " (" << pid
61               << ") has exited for unknown reasons";
62   }
63 }
64 
LogSubprocessExit(const std::string & name,const siginfo_t & infop)65 void LogSubprocessExit(const std::string& name, const siginfo_t& infop) {
66   LOG(INFO) << "Detected unexpected exit of monitored subprocess " << name;
67   if (infop.si_code == CLD_EXITED) {
68     LOG(INFO) << "Subprocess " << name << " (" << infop.si_pid
69               << ") has exited with exit code " << infop.si_status;
70   } else if (infop.si_code == CLD_KILLED) {
71     LOG(ERROR) << "Subprocess " << name << " (" << infop.si_pid
72                << ") was interrupted by a signal: " << infop.si_status;
73   } else {
74     LOG(INFO) << "subprocess " << name << " (" << infop.si_pid
75               << ") has exited for unknown reasons (code = " << infop.si_code
76               << ", status = " << infop.si_status << ")";
77   }
78 }
79 
StartSubprocesses(std::vector<MonitorEntry> & entries)80 Result<void> StartSubprocesses(std::vector<MonitorEntry>& entries) {
81   LOG(DEBUG) << "Starting monitored subprocesses";
82   for (auto& monitored : entries) {
83     LOG(INFO) << monitored.cmd->GetShortName();
84     auto options = SubprocessOptions().InGroup(true);
85     monitored.proc.reset(new Subprocess(monitored.cmd->Start(options)));
86     CF_EXPECT(monitored.proc->Started(), "Failed to start subprocess");
87   }
88   return {};
89 }
90 
ReadMonitorSocketLoopForStop(std::atomic_bool & running,SharedFD & monitor_socket)91 Result<void> ReadMonitorSocketLoopForStop(std::atomic_bool& running,
92                                           SharedFD& monitor_socket) {
93   LOG(DEBUG) << "Waiting for a `stop` message from the parent";
94   while (running.load()) {
95     ParentToChildMessage message;
96     CF_EXPECT(ReadExactBinary(monitor_socket, &message) == sizeof(message),
97               "Could not read message from parent");
98     if (message.stop) {
99       running.store(false);
100       // Wake up the wait() loop by giving it an exited child process
101       if (fork() == 0) {
102         std::exit(0);
103       }
104     }
105   }
106   return {};
107 }
108 
MonitorLoop(const std::atomic_bool & running,const bool restart_subprocesses,std::vector<MonitorEntry> & monitored)109 Result<void> MonitorLoop(const std::atomic_bool& running,
110                          const bool restart_subprocesses,
111                          std::vector<MonitorEntry>& monitored) {
112   while (running.load()) {
113     int wstatus;
114     pid_t pid = wait(&wstatus);
115     int error_num = errno;
116     CF_EXPECT(pid != -1, "Wait failed: " << strerror(error_num));
117     if (!WIFSIGNALED(wstatus) && !WIFEXITED(wstatus)) {
118       LOG(DEBUG) << "Unexpected status from wait: " << wstatus
119                   << " for pid " << pid;
120       continue;
121     }
122     if (!running.load()) {  // Avoid extra restarts near the end
123       break;
124     }
125     auto matches = [pid](const auto& it) { return it.proc->pid() == pid; };
126     auto it = std::find_if(monitored.begin(), monitored.end(), matches);
127     if (it == monitored.end()) {
128       LogSubprocessExit("(unknown)", pid, wstatus);
129     } else {
130       LogSubprocessExit(it->cmd->GetShortName(), it->proc->pid(), wstatus);
131       if (restart_subprocesses) {
132         auto options = SubprocessOptions().InGroup(true);
133         it->proc.reset(new Subprocess(it->cmd->Start(options)));
134       } else {
135         bool is_critical = it->is_critical;
136         monitored.erase(it);
137         if (running.load() && is_critical) {
138           LOG(ERROR) << "Stopping all monitored processes due to unexpected "
139                         "exit of critical process";
140           Command stop_cmd(StopCvdBinary());
141           stop_cmd.Start();
142         }
143       }
144     }
145   }
146   return {};
147 }
148 
StopSubprocesses(std::vector<MonitorEntry> & monitored)149 Result<void> StopSubprocesses(std::vector<MonitorEntry>& monitored) {
150   LOG(DEBUG) << "Stopping monitored subprocesses";
151   auto stop = [](const auto& it) {
152     auto stop_result = it.proc->Stop();
153     if (stop_result == StopperResult::kStopFailure) {
154       LOG(WARNING) << "Error in stopping \"" << it.cmd->GetShortName() << "\"";
155       return false;
156     }
157     siginfo_t infop;
158     auto success = it.proc->Wait(&infop, WEXITED);
159     if (success < 0) {
160       LOG(WARNING) << "Failed to wait for process " << it.cmd->GetShortName();
161       return false;
162     }
163     if (stop_result == StopperResult::kStopCrash) {
164       LogSubprocessExit(it.cmd->GetShortName(), infop);
165     }
166     return true;
167   };
168   // Processes were started in the order they appear in the vector, stop them in
169   // reverse order for symmetry.
170   size_t stopped = std::count_if(monitored.rbegin(), monitored.rend(), stop);
171   CF_EXPECT(stopped == monitored.size(), "Didn't stop all subprocesses");
172   return {};
173 }
174 
175 }  // namespace
176 
RestartSubprocesses(bool r)177 ProcessMonitor::Properties& ProcessMonitor::Properties::RestartSubprocesses(
178     bool r) & {
179   restart_subprocesses_ = r;
180   return *this;
181 }
182 
RestartSubprocesses(bool r)183 ProcessMonitor::Properties ProcessMonitor::Properties::RestartSubprocesses(
184     bool r) && {
185   return std::move(RestartSubprocesses(r));
186 }
187 
AddCommand(MonitorCommand cmd)188 ProcessMonitor::Properties& ProcessMonitor::Properties::AddCommand(
189     MonitorCommand cmd) & {
190   entries_.emplace_back(std::move(cmd.command), cmd.is_critical);
191   return *this;
192 }
193 
AddCommand(MonitorCommand cmd)194 ProcessMonitor::Properties ProcessMonitor::Properties::AddCommand(
195     MonitorCommand cmd) && {
196   return std::move(AddCommand(std::move(cmd)));
197 }
198 
ProcessMonitor(ProcessMonitor::Properties && properties)199 ProcessMonitor::ProcessMonitor(ProcessMonitor::Properties&& properties)
200     : properties_(std::move(properties)), monitor_(-1) {}
201 
StopMonitoredProcesses()202 Result<void> ProcessMonitor::StopMonitoredProcesses() {
203   CF_EXPECT(monitor_ != -1, "The monitor process has already exited.");
204   CF_EXPECT(monitor_socket_->IsOpen(), "The monitor socket is already closed");
205   ParentToChildMessage message;
206   message.stop = true;
207   CF_EXPECT(WriteAllBinary(monitor_socket_, &message) == sizeof(message),
208             "Failed to communicate with monitor socket: "
209                 << monitor_socket_->StrError());
210 
211   pid_t last_monitor = monitor_;
212   monitor_ = -1;
213   monitor_socket_->Close();
214   int wstatus;
215   CF_EXPECT(waitpid(last_monitor, &wstatus, 0) == last_monitor,
216             "Failed to wait for monitor process");
217   CF_EXPECT(!WIFSIGNALED(wstatus), "Monitor process exited due to a signal");
218   CF_EXPECT(WIFEXITED(wstatus), "Monitor process exited for unknown reasons");
219   CF_EXPECT(WEXITSTATUS(wstatus) == 0,
220             "Monitor process exited with code " << WEXITSTATUS(wstatus));
221   return {};
222 }
223 
StartAndMonitorProcesses()224 Result<void> ProcessMonitor::StartAndMonitorProcesses() {
225   CF_EXPECT(monitor_ == -1, "The monitor process was already started");
226   CF_EXPECT(!monitor_socket_->IsOpen(), "Monitor socket was already opened");
227 
228   SharedFD client_pipe, host_pipe;
229   CF_EXPECT(SharedFD::Pipe(&client_pipe, &host_pipe),
230             "Could not create the monitor socket.");
231   monitor_ = fork();
232   if (monitor_ == 0) {
233     monitor_socket_ = client_pipe;
234     host_pipe->Close();
235     auto monitor_result = MonitorRoutine();
236     if (!monitor_result.ok()) {
237       LOG(ERROR) << "Monitoring processes failed:\n"
238                  << monitor_result.error().Message();
239       LOG(DEBUG) << "Monitoring processes failed:\n"
240                  << monitor_result.error().Trace();
241     }
242     std::exit(monitor_result.ok() ? 0 : 1);
243   } else {
244     client_pipe->Close();
245     monitor_socket_ = host_pipe;
246     return {};
247   }
248 }
249 
MonitorRoutine()250 Result<void> ProcessMonitor::MonitorRoutine() {
251   // Make this process a subreaper to reliably catch subprocess exits.
252   // See https://man7.org/linux/man-pages/man2/prctl.2.html
253   prctl(PR_SET_CHILD_SUBREAPER, 1);
254   prctl(PR_SET_PDEATHSIG, SIGHUP);  // Die when parent dies
255 
256   LOG(DEBUG) << "Monitoring subprocesses";
257   StartSubprocesses(properties_.entries_);
258 
259   std::atomic_bool running(true);
260   auto parent_comms =
261       std::async(std::launch::async, ReadMonitorSocketLoopForStop,
262                  std::ref(running), std::ref(monitor_socket_));
263 
264   MonitorLoop(running, properties_.restart_subprocesses_, properties_.entries_);
265   CF_EXPECT(parent_comms.get(), "Should have exited if monitoring stopped");
266 
267   StopSubprocesses(properties_.entries_);
268   LOG(DEBUG) << "Done monitoring subprocesses";
269   return {};
270 }
271 
272 }  // namespace cuttlefish
273