• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "host/commands/run_cvd/process_monitor.h"
18 
19 #include <sys/prctl.h>
20 #include <sys/types.h>
21 #include <sys/wait.h>
22 
23 #include <assert.h>
24 #include <errno.h>
25 #include <signal.h>
26 #include <stdio.h>
27 
28 #include <algorithm>
29 #include <future>
30 #include <thread>
31 
32 #include <android-base/logging.h>
33 
34 #include "common/libs/fs/shared_buf.h"
35 #include "common/libs/fs/shared_select.h"
36 
37 namespace cuttlefish {
38 
39 struct ParentToChildMessage {
40   bool stop;
41 };
42 
RestartSubprocesses(bool r)43 ProcessMonitor::Properties& ProcessMonitor::Properties::RestartSubprocesses(
44     bool r) & {
45   restart_subprocesses_ = r;
46   return *this;
47 }
48 
RestartSubprocesses(bool r)49 ProcessMonitor::Properties ProcessMonitor::Properties::RestartSubprocesses(
50     bool r) && {
51   restart_subprocesses_ = r;
52   return std::move(*this);
53 }
54 
AddCommand(Command cmd)55 ProcessMonitor::Properties& ProcessMonitor::Properties::AddCommand(
56     Command cmd) & {
57   auto& entry = entries_.emplace_back();
58   entry.cmd.reset(new Command(std::move(cmd)));
59   return *this;
60 }
61 
AddCommand(Command cmd)62 ProcessMonitor::Properties ProcessMonitor::Properties::AddCommand(
63     Command cmd) && {
64   auto& entry = entries_.emplace_back();
65   entry.cmd.reset(new Command(std::move(cmd)));
66   return std::move(*this);
67 }
68 
ProcessMonitor(ProcessMonitor::Properties && properties)69 ProcessMonitor::ProcessMonitor(ProcessMonitor::Properties&& properties)
70     : properties_(std::move(properties)), monitor_(-1) {}
71 
StopMonitoredProcesses()72 Result<void> ProcessMonitor::StopMonitoredProcesses() {
73   CF_EXPECT(monitor_ != -1, "The monitor process has already exited.");
74   CF_EXPECT(monitor_socket_->IsOpen(), "The monitor socket is already closed");
75   ParentToChildMessage message;
76   message.stop = true;
77   CF_EXPECT(WriteAllBinary(monitor_socket_, &message) == sizeof(message),
78             "Failed to communicate with monitor socket: "
79                 << monitor_socket_->StrError());
80 
81   pid_t last_monitor = monitor_;
82   monitor_ = -1;
83   monitor_socket_->Close();
84   int wstatus;
85   CF_EXPECT(waitpid(last_monitor, &wstatus, 0) == last_monitor,
86             "Failed to wait for monitor process");
87   CF_EXPECT(!WIFSIGNALED(wstatus), "Monitor process exited due to a signal");
88   CF_EXPECT(WIFEXITED(wstatus), "Monitor process exited for unknown reasons");
89   CF_EXPECT(WEXITSTATUS(wstatus) == 0,
90             "Monitor process exited with code " << WEXITSTATUS(wstatus));
91   return {};
92 }
93 
StartAndMonitorProcesses()94 Result<void> ProcessMonitor::StartAndMonitorProcesses() {
95   CF_EXPECT(monitor_ == -1, "The monitor process was already started");
96   CF_EXPECT(!monitor_socket_->IsOpen(), "Monitor socket was already opened");
97 
98   SharedFD client_pipe, host_pipe;
99   CF_EXPECT(SharedFD::Pipe(&client_pipe, &host_pipe),
100             "Could not create the monitor socket.");
101   monitor_ = fork();
102   if (monitor_ == 0) {
103     monitor_socket_ = client_pipe;
104     host_pipe->Close();
105     auto monitor = MonitorRoutine();
106     if (!monitor.ok()) {
107       LOG(ERROR) << "Monitoring processes failed:\n" << monitor.error();
108     }
109     std::exit(monitor.ok() ? 0 : 1);
110   } else {
111     client_pipe->Close();
112     monitor_socket_ = host_pipe;
113     return {};
114   }
115 }
116 
LogSubprocessExit(const std::string & name,pid_t pid,int wstatus)117 static void LogSubprocessExit(const std::string& name, pid_t pid, int wstatus) {
118   LOG(INFO) << "Detected unexpected exit of monitored subprocess " << name;
119   if (WIFEXITED(wstatus)) {
120     LOG(INFO) << "Subprocess " << name << " (" << pid
121               << ") has exited with exit code " << WEXITSTATUS(wstatus);
122   } else if (WIFSIGNALED(wstatus)) {
123     LOG(ERROR) << "Subprocess " << name << " (" << pid
124                << ") was interrupted by a signal: " << WTERMSIG(wstatus);
125   } else {
126     LOG(INFO) << "subprocess " << name << " (" << pid
127               << ") has exited for unknown reasons";
128   }
129 }
130 
LogSubprocessExit(const std::string & name,const siginfo_t & infop)131 static void LogSubprocessExit(const std::string& name, const siginfo_t& infop) {
132   LOG(INFO) << "Detected unexpected exit of monitored subprocess " << name;
133   if (infop.si_code == CLD_EXITED) {
134     LOG(INFO) << "Subprocess " << name << " (" << infop.si_pid
135               << ") has exited with exit code " << infop.si_status;
136   } else if (infop.si_code == CLD_KILLED) {
137     LOG(ERROR) << "Subprocess " << name << " (" << infop.si_pid
138                << ") was interrupted by a signal: " << infop.si_status;
139   } else {
140     LOG(INFO) << "subprocess " << name << " (" << infop.si_pid
141               << ") has exited for unknown reasons (code = " << infop.si_code
142               << ", status = " << infop.si_status << ")";
143   }
144 }
145 
MonitorRoutine()146 Result<void> ProcessMonitor::MonitorRoutine() {
147   // Make this process a subreaper to reliably catch subprocess exits.
148   // See https://man7.org/linux/man-pages/man2/prctl.2.html
149   prctl(PR_SET_CHILD_SUBREAPER, 1);
150   prctl(PR_SET_PDEATHSIG, SIGHUP); // Die when parent dies
151 
152   LOG(DEBUG) << "Starting monitoring subprocesses";
153   for (auto& monitored : properties_.entries_) {
154     LOG(INFO) << monitored.cmd->GetShortName();
155     auto options = SubprocessOptions().InGroup(true);
156     monitored.proc.reset(new Subprocess(monitored.cmd->Start(options)));
157     CF_EXPECT(monitored.proc->Started(), "Failed to start process");
158   }
159 
160   bool running = true;
161   auto policy = std::launch::async;
162   auto parent_comms = std::async(policy, [&running, this]() -> Result<void> {
163     LOG(DEBUG) << "Waiting for a `stop` message from the parent.";
164     while (running) {
165       ParentToChildMessage message;
166       CF_EXPECT(ReadExactBinary(monitor_socket_, &message) == sizeof(message),
167                 "Could not read message from parent.");
168       if (message.stop) {
169         running = false;
170         // Wake up the wait() loop by giving it an exited child process
171         if (fork() == 0) {
172           std::exit(0);
173         }
174       }
175     }
176     return {};
177   });
178 
179   auto& monitored = properties_.entries_;
180 
181   LOG(DEBUG) << "Monitoring subprocesses";
182   while(running) {
183     int wstatus;
184     pid_t pid = wait(&wstatus);
185     int error_num = errno;
186     CF_EXPECT(pid != -1, "Wait failed: " << strerror(error_num));
187     if (!WIFSIGNALED(wstatus) && !WIFEXITED(wstatus)) {
188       LOG(DEBUG) << "Unexpected status from wait: " << wstatus
189                   << " for pid " << pid;
190       continue;
191     }
192     if (!running) { // Avoid extra restarts near the end
193       break;
194     }
195     auto matches = [pid](const auto& it) { return it.proc->pid() == pid; };
196     auto it = std::find_if(monitored.begin(), monitored.end(), matches);
197     if (it == monitored.end()) {
198       LogSubprocessExit("(unknown)", pid, wstatus);
199     } else {
200       LogSubprocessExit(it->cmd->GetShortName(), it->proc->pid(), wstatus);
201       if (properties_.restart_subprocesses_) {
202         auto options = SubprocessOptions().InGroup(true);
203         it->proc.reset(new Subprocess(it->cmd->Start(options)));
204       } else {
205         properties_.entries_.erase(it);
206       }
207     }
208   }
209 
210   CF_EXPECT(parent_comms.get());  // Should have exited if `running` is false
211   auto stop = [](const auto& it) {
212     auto stop_result = it.proc->Stop();
213     if (stop_result == StopperResult::kStopFailure) {
214       LOG(WARNING) << "Error in stopping \"" << it.cmd->GetShortName() << "\"";
215       return false;
216     }
217     siginfo_t infop;
218     auto success = it.proc->Wait(&infop, WEXITED);
219     if (success < 0) {
220       LOG(WARNING) << "Failed to wait for process " << it.cmd->GetShortName();
221       return false;
222     }
223     if (stop_result == StopperResult::kStopCrash) {
224       LogSubprocessExit(it.cmd->GetShortName(), infop);
225     }
226     return true;
227   };
228   // Processes were started in the order they appear in the vector, stop them in
229   // reverse order for symmetry.
230   size_t stopped = std::count_if(monitored.rbegin(), monitored.rend(), stop);
231   LOG(DEBUG) << "Done monitoring subprocesses";
232   CF_EXPECT(stopped == monitored.size(), "Didn't stop all subprocesses");
233   return {};
234 }
235 
236 }  // namespace cuttlefish
237