1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "host/commands/process_sandboxer/pidfd.h"
17
18 #include <dirent.h>
19 #include <fcntl.h>
20 #include <linux/sched.h>
21 #include <signal.h>
22 #include <sys/prctl.h>
23 #include <sys/syscall.h>
24 #include <sys/types.h>
25 #include <unistd.h>
26
27 #include <cerrno>
28 #include <cstdint>
29 #include <fstream>
30 #include <ios>
31 #include <memory>
32 #include <sstream>
33 #include <string>
34 #include <string_view>
35 #include <unordered_map>
36 #include <utility>
37 #include <vector>
38
39 #include <absl/log/check.h>
40 #include <absl/log/log.h>
41 #include <absl/status/status.h>
42 #include <absl/status/statusor.h>
43 #include <absl/strings/numbers.h>
44 #include <absl/strings/str_cat.h>
45 #include <absl/strings/str_format.h>
46 #include <absl/strings/str_join.h>
47 #include <absl/strings/str_split.h>
48 #include <absl/types/span.h>
49 #include <sandboxed_api/util/fileops.h>
50
51 namespace cuttlefish::process_sandboxer {
52
53 using sapi::file_util::fileops::FDCloser;
54
FromRunningProcess(pid_t pid)55 absl::StatusOr<PidFd> PidFd::FromRunningProcess(pid_t pid) {
56 FDCloser fd(syscall(__NR_pidfd_open, pid, 0)); // Always CLOEXEC
57 if (fd.get() < 0) {
58 return absl::ErrnoToStatus(errno, "`pidfd_open` failed");
59 }
60 return PidFd(std::move(fd), pid);
61 }
62
LaunchSubprocess(absl::Span<const std::string> argv,std::vector<std::pair<FDCloser,int>> fds,absl::Span<const std::string> env)63 absl::StatusOr<PidFd> PidFd::LaunchSubprocess(
64 absl::Span<const std::string> argv,
65 std::vector<std::pair<FDCloser, int>> fds,
66 absl::Span<const std::string> env) {
67 int pidfd;
68 clone_args args_for_clone = clone_args{
69 .flags = CLONE_PIDFD,
70 .pidfd = reinterpret_cast<std::uintptr_t>(&pidfd),
71 };
72
73 pid_t res = syscall(__NR_clone3, &args_for_clone, sizeof(args_for_clone));
74 if (res < 0) {
75 std::string argv_str = absl::StrJoin(argv, "','");
76 std::string error = absl::StrCat("clone3 failed: argv=['", argv_str, "']");
77 return absl::ErrnoToStatus(errno, error);
78 } else if (res > 0) {
79 std::string argv_str = absl::StrJoin(argv, "','");
80 VLOG(1) << res << ": Running w/o sandbox ['" << argv_str << "]";
81
82 FDCloser fd(pidfd);
83 return PidFd(std::move(fd), res);
84 }
85
86 /* Duplicate every input in `fds` into a range higher than the highest output
87 * in `fds`, in case there is any overlap between inputs and outputs. */
88 int minimum_backup_fd = -1;
89 for (const auto& [my_fd, target_fd] : fds) {
90 if (target_fd + 1 > minimum_backup_fd) {
91 minimum_backup_fd = target_fd + 1;
92 }
93 }
94
95 std::unordered_map<int, int> backup_mapping;
96 for (const auto& [my_fd, target_fd] : fds) {
97 int backup = fcntl(my_fd.get(), F_DUPFD, minimum_backup_fd);
98 PCHECK(backup >= 0) << "fcntl(..., F_DUPFD) failed";
99 int flags = fcntl(backup, F_GETFD);
100 PCHECK(flags >= 0) << "fcntl(..., F_GETFD failed";
101 flags &= FD_CLOEXEC;
102 PCHECK(fcntl(backup, F_SETFD, flags) >= 0) << "fcntl(..., F_SETFD failed";
103 backup_mapping[backup] = target_fd;
104 }
105
106 for (const auto& [backup_fd, target_fd] : backup_mapping) {
107 // dup2 always unsets FD_CLOEXEC
108 PCHECK(dup2(backup_fd, target_fd) >= 0) << "dup2 failed";
109 }
110
111 std::vector<std::string> argv_clone(argv.begin(), argv.end());
112 std::vector<char*> argv_cstr;
113 argv_cstr.reserve(argv_clone.size());
114 for (auto& arg : argv_clone) {
115 argv_cstr.emplace_back(arg.data());
116 }
117 argv_cstr.emplace_back(nullptr);
118
119 std::vector<std::string> env_clone(env.begin(), env.end());
120 std::vector<char*> env_cstr;
121 env_cstr.reserve(env_clone.size());
122 for (std::string& env_member : env_clone) {
123 env_cstr.emplace_back(env_member.data());
124 }
125 env_cstr.emplace_back(nullptr);
126
127 if (prctl(PR_SET_PDEATHSIG, SIGHUP) < 0) { // Die when parent dies
128 PLOG(FATAL) << "prctl failed";
129 }
130
131 execve(argv_cstr[0], argv_cstr.data(), env_cstr.data());
132
133 PLOG(FATAL) << "execv failed";
134 }
135
PidFd(FDCloser fd,pid_t pid)136 PidFd::PidFd(FDCloser fd, pid_t pid) : fd_(std::move(fd)), pid_(pid) {}
137
Get() const138 int PidFd::Get() const { return fd_.get(); }
139
AllFds()140 absl::StatusOr<std::vector<std::pair<FDCloser, int>>> PidFd::AllFds() {
141 std::vector<std::pair<FDCloser, int>> fds;
142
143 std::string dir_name = absl::StrFormat("/proc/%d/fd", pid_);
144 std::unique_ptr<DIR, int (*)(DIR*)> dir(opendir(dir_name.c_str()), closedir);
145 if (!dir) {
146 return absl::ErrnoToStatus(errno, "`opendir` failed");
147 }
148 for (dirent* ent = readdir(dir.get()); ent; ent = readdir(dir.get())) {
149 int other_fd;
150 // `d_name` is guaranteed to be null terminated
151 std::string_view name{ent->d_name};
152 if (name == "." || name == "..") {
153 continue;
154 }
155 if (!absl::SimpleAtoi(name, &other_fd)) {
156 std::string error = absl::StrFormat("'%v/%v' not an int", dir_name, name);
157 return absl::InternalError(error);
158 }
159 // Always CLOEXEC
160 FDCloser our_fd(syscall(__NR_pidfd_getfd, fd_.get(), other_fd, 0));
161 if (our_fd.get() < 0) {
162 return absl::ErrnoToStatus(errno, "`pidfd_getfd` failed");
163 }
164 fds.emplace_back(std::move(our_fd), other_fd);
165 }
166
167 return fds;
168 }
169
ReadNullSepFile(const std::string & path)170 static absl::StatusOr<std::vector<std::string>> ReadNullSepFile(
171 const std::string& path) {
172 std::ifstream cmdline_file(path, std::ios::binary);
173 if (!cmdline_file) {
174 auto err = absl::StrFormat("Failed to open '%v'", path);
175 return absl::InternalError(err);
176 }
177 std::stringstream buffer;
178 buffer << cmdline_file.rdbuf();
179 if (!cmdline_file) {
180 auto err = absl::StrFormat("Failed to read '%v'", path);
181 return absl::InternalError(err);
182 }
183
184 std::vector<std::string> members = absl::StrSplit(buffer.str(), '\0');
185 if (members.empty()) {
186 return absl::InternalError(absl::StrFormat("'%v' is empty", path));
187 } else if (members.back().empty()) {
188 members.pop_back(); // may end in a null terminator
189 }
190 return members;
191 }
192
Argv()193 absl::StatusOr<std::vector<std::string>> PidFd::Argv() {
194 return ReadNullSepFile(absl::StrFormat("/proc/%d/cmdline", pid_));
195 }
196
Env()197 absl::StatusOr<std::vector<std::string>> PidFd::Env() {
198 return ReadNullSepFile(absl::StrFormat("/proc/%d/environ", pid_));
199 }
200
HaltHierarchy()201 absl::Status PidFd::HaltHierarchy() {
202 if (absl::Status stop = SendSignal(SIGSTOP); !stop.ok()) {
203 return stop;
204 }
205 if (absl::Status halt_children = HaltChildHierarchy(); !halt_children.ok()) {
206 return halt_children;
207 }
208 return SendSignal(SIGKILL);
209 }
210
211 /* Assumes the process referred to by `pid` does not spawn any more children or
212 * reap any children while this function is running. */
FindChildPids(pid_t pid)213 static absl::StatusOr<std::vector<pid_t>> FindChildPids(pid_t pid) {
214 std::vector<pid_t> child_pids;
215
216 std::string task_dir = absl::StrFormat("/proc/%d/task", pid);
217 std::unique_ptr<DIR, int (*)(DIR*)> dir(opendir(task_dir.c_str()), closedir);
218 if (!dir) {
219 return absl::ErrnoToStatus(errno, "`opendir` failed");
220 }
221
222 while (dirent* ent = readdir(dir.get())) {
223 // `d_name` is guaranteed to be null terminated
224 std::string_view name = ent->d_name;
225 if (name == "." || name == "..") {
226 continue;
227 }
228 std::string children_file =
229 absl::StrFormat("/proc/%d/task/%s/children", pid, name);
230 std::ifstream children_stream(children_file);
231 if (!children_stream) {
232 std::string err = absl::StrCat("can't read child file: ", children_file);
233 return absl::InternalError(err);
234 }
235
236 std::string children_str;
237 std::getline(children_stream, children_str);
238 for (std::string_view child_str : absl::StrSplit(children_str, ' ')) {
239 if (child_str.empty()) {
240 continue;
241 }
242 pid_t child_pid;
243 if (!absl::SimpleAtoi(child_str, &child_pid)) {
244 std::string error = absl::StrFormat("'%s' is not a pid_t", child_str);
245 return absl::InternalError(error);
246 }
247 child_pids.emplace_back(child_pid);
248 }
249 }
250
251 return child_pids;
252 }
253
HaltChildHierarchy()254 absl::Status PidFd::HaltChildHierarchy() {
255 absl::StatusOr<std::vector<pid_t>> children = FindChildPids(pid_);
256 if (!children.ok()) {
257 return children.status();
258 }
259 for (pid_t child : *children) {
260 absl::StatusOr<PidFd> child_pidfd = FromRunningProcess(child);
261 if (!child_pidfd.ok()) {
262 return child_pidfd.status();
263 }
264 // HaltHierarchy will SIGSTOP the child so it cannot spawn more children
265 // or reap its own children while everything is being stopped.
266 if (absl::Status halt = child_pidfd->HaltHierarchy(); !halt.ok()) {
267 return halt;
268 }
269 }
270
271 return absl::OkStatus();
272 }
273
SendSignal(int signal)274 absl::Status PidFd::SendSignal(int signal) {
275 if (syscall(__NR_pidfd_send_signal, fd_.get(), signal, nullptr, 0) < 0) {
276 return absl::ErrnoToStatus(errno, "pidfd_send_signal failed");
277 }
278 return absl::OkStatus();
279 }
280
281 } // namespace cuttlefish::process_sandboxer
282