1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Implementation file for the sandbox2::Namespace class.
16
17 #include "sandboxed_api/sandbox2/namespace.h"
18
19 #include <fcntl.h>
20 #include <net/if.h>
21 #include <sched.h>
22 #include <sys/ioctl.h>
23 #include <sys/mount.h>
24 #include <sys/socket.h>
25 #include <sys/stat.h>
26 #include <syscall.h>
27 #include <unistd.h>
28
29 #include <cstdint>
30 #include <cstring>
31 #include <memory>
32 #include <string>
33 #include <utility>
34 #include <vector>
35
36 #include "absl/strings/str_cat.h"
37 #include "sandboxed_api/sandbox2/forkserver.pb.h"
38 #include "sandboxed_api/sandbox2/mounts.h"
39 #include "sandboxed_api/util/fileops.h"
40 #include "sandboxed_api/util/path.h"
41 #include "sandboxed_api/util/raw_logging.h"
42
43 namespace sandbox2 {
44
45 namespace file = ::sapi::file;
46 namespace file_util = ::sapi::file_util;
47
48 static constexpr char kSandbox2ChrootPath[] = "/tmp/.sandbox2chroot";
49
50 namespace {
MountFallbackToReadOnly(const char * source,const char * target,const char * filesystem,uintptr_t flags,const void * data)51 int MountFallbackToReadOnly(const char* source, const char* target,
52 const char* filesystem, uintptr_t flags,
53 const void* data) {
54 int rv = mount(source, target, filesystem, flags, data);
55 if (rv != 0 && (flags & MS_RDONLY) == 0) {
56 SAPI_RAW_PLOG(WARNING, "Mounting %s on %s (fs type %s) read-write failed",
57 source, target, filesystem);
58 rv = mount(source, target, filesystem, flags | MS_RDONLY, data);
59 if (rv == 0) {
60 SAPI_RAW_LOG(INFO, "Mounted %s on %s (fs type %s) as read-only", source,
61 target, filesystem);
62 }
63 }
64 return rv;
65 }
66
PrepareChroot(const Mounts & mounts)67 void PrepareChroot(const Mounts& mounts) {
68 // Create a tmpfs mount for the new rootfs.
69 SAPI_RAW_CHECK(
70 file_util::fileops::CreateDirectoryRecursively(kSandbox2ChrootPath, 0700),
71 "could not create directory for rootfs");
72 SAPI_RAW_PCHECK(mount("none", kSandbox2ChrootPath, "tmpfs", 0, nullptr) == 0,
73 "mounting rootfs failed");
74
75 // Walk the tree and perform all the mount operations.
76 mounts.CreateMounts(kSandbox2ChrootPath);
77
78 if (mounts.IsRootReadOnly()) {
79 // Remount the chroot read-only
80 SAPI_RAW_PCHECK(mount(kSandbox2ChrootPath, kSandbox2ChrootPath, "",
81 MS_BIND | MS_REMOUNT | MS_RDONLY, nullptr) == 0,
82 "remounting chroot read-only failed");
83 }
84 }
85
TryDenySetgroups()86 void TryDenySetgroups() {
87 file_util::fileops::FDCloser fd(
88 TEMP_FAILURE_RETRY(open("/proc/self/setgroups", O_WRONLY | O_CLOEXEC)));
89 // We ignore errors since they are most likely due to an old kernel.
90 if (fd.get() == -1) {
91 return;
92 }
93
94 dprintf(fd.get(), "deny");
95 }
96
WriteIDMap(const char * map_path,int32_t uid)97 void WriteIDMap(const char* map_path, int32_t uid) {
98 file_util::fileops::FDCloser fd(
99 TEMP_FAILURE_RETRY(open(map_path, O_WRONLY | O_CLOEXEC)));
100 SAPI_RAW_PCHECK(fd.get() != -1, "Couldn't open %s", map_path);
101
102 SAPI_RAW_PCHECK(dprintf(fd.get(), "1000 %d 1", uid) >= 0,
103 "Could not write %d to %s", uid, map_path);
104 }
105
SetupIDMaps(uid_t uid,gid_t gid)106 void SetupIDMaps(uid_t uid, gid_t gid) {
107 TryDenySetgroups();
108 WriteIDMap("/proc/self/uid_map", uid);
109 WriteIDMap("/proc/self/gid_map", gid);
110 }
111
ActivateLoopbackInterface()112 void ActivateLoopbackInterface() {
113 ifreq ifreq;
114
115 ifreq.ifr_flags = 0;
116 strncpy(ifreq.ifr_name, "lo", IFNAMSIZ);
117
118 // Create an AF_INET6 socket to perform the IF FLAGS ioctls on.
119 int fd = socket(AF_INET6, SOCK_DGRAM, 0);
120 SAPI_RAW_PCHECK(fd != -1, "creating socket for activating loopback failed");
121
122 file_util::fileops::FDCloser fd_closer{fd};
123
124 // First get the existing flags.
125 SAPI_RAW_PCHECK(ioctl(fd, SIOCGIFFLAGS, &ifreq) != -1,
126 "Getting existing flags");
127
128 // From 812 kernels, we don't have CAP_NET_ADMIN anymore. But the interface is
129 // already up, so we can skip the next ioctl.
130 if (ifreq.ifr_flags & IFF_UP) {
131 return;
132 }
133
134 // Set the UP flag and write the flags back.
135 ifreq.ifr_flags |= IFF_UP;
136 SAPI_RAW_PCHECK(ioctl(fd, SIOCSIFFLAGS, &ifreq) != -1, "Setting IFF_UP flag");
137 }
138
139 // Logs the filesystem contents if verbose logging is enabled.
LogFilesystem(const std::string & dir)140 void LogFilesystem(const std::string& dir) {
141 std::vector<std::string> entries;
142 std::string error;
143 if (!file_util::fileops::ListDirectoryEntries(dir, &entries, &error)) {
144 SAPI_RAW_PLOG(ERROR, "could not list directory entries for %s", dir);
145 return;
146 }
147
148 for (const auto& entry : entries) {
149 struct stat64 st;
150 std::string full_path = file::JoinPath(dir, entry);
151 if (lstat64(full_path.c_str(), &st) != 0) {
152 SAPI_RAW_PLOG(ERROR, "could not stat %s", full_path);
153 continue;
154 }
155
156 char ftype;
157 switch (st.st_mode & S_IFMT) {
158 case S_IFREG:
159 ftype = '-';
160 break;
161 case S_IFDIR:
162 ftype = 'd';
163 break;
164 case S_IFLNK:
165 ftype = 'l';
166 break;
167 default:
168 ftype = '?';
169 break;
170 }
171
172 std::string type_and_mode;
173 type_and_mode += ftype;
174 type_and_mode += st.st_mode & S_IRUSR ? 'r' : '-';
175 type_and_mode += st.st_mode & S_IWUSR ? 'w' : '-';
176 type_and_mode += st.st_mode & S_IXUSR ? 'x' : '-';
177 type_and_mode += st.st_mode & S_IRGRP ? 'r' : '-';
178 type_and_mode += st.st_mode & S_IWGRP ? 'w' : '-';
179 type_and_mode += st.st_mode & S_IXGRP ? 'x' : '-';
180 type_and_mode += st.st_mode & S_IROTH ? 'r' : '-';
181 type_and_mode += st.st_mode & S_IWOTH ? 'w' : '-';
182 type_and_mode += st.st_mode & S_IXOTH ? 'x' : '-';
183
184 std::string link;
185 if (S_ISLNK(st.st_mode)) {
186 link = absl::StrCat(" -> ", file_util::fileops::ReadLink(full_path));
187 }
188 SAPI_RAW_VLOG(2, "%s %s%s", type_and_mode.c_str(), full_path.c_str(),
189 link.c_str());
190
191 if (S_ISDIR(st.st_mode)) {
192 LogFilesystem(full_path);
193 }
194 }
195 }
196
197 } // namespace
198
Namespace(Mounts mounts,std::string hostname,NetNsMode netns_config,bool allow_mount_propagation)199 Namespace::Namespace(Mounts mounts, std::string hostname,
200 NetNsMode netns_config, bool allow_mount_propagation)
201 : mounts_(std::move(mounts)),
202 hostname_(std::move(hostname)),
203 allow_mount_propagation_(allow_mount_propagation),
204 netns_config_(netns_config) {
205 // Remove the CLONE_NEWNET flag to allow networking, or for the shared netns.
206 // In the latter case, the flag will be added later on.
207 if (netns_config_ == NETNS_MODE_NONE ||
208 netns_config_ == NETNS_MODE_SHARED_PER_FORKSERVER) {
209 clone_flags_ &= ~CLONE_NEWNET;
210 }
211 }
212
InitializeNamespaces(uid_t uid,gid_t gid,int32_t clone_flags,const Mounts & mounts,const std::string & hostname,bool avoid_pivot_root,bool allow_mount_propagation)213 void Namespace::InitializeNamespaces(uid_t uid, gid_t gid, int32_t clone_flags,
214 const Mounts& mounts,
215 const std::string& hostname,
216 bool avoid_pivot_root,
217 bool allow_mount_propagation) {
218 if (clone_flags & CLONE_NEWUSER && !avoid_pivot_root) {
219 SetupIDMaps(uid, gid);
220 }
221
222 if (!(clone_flags & CLONE_NEWNS)) {
223 // CLONE_NEWNS is always set if we're running in namespaces.
224 return;
225 }
226
227 std::unique_ptr<file_util::fileops::FDCloser> root_fd;
228 if (avoid_pivot_root) {
229 // We want to bind-mount chrooted to real root, so that symlinks work.
230 // Reference to main root is kept to escape later from the chroot
231 root_fd = std::make_unique<file_util::fileops::FDCloser>(
232 TEMP_FAILURE_RETRY(open("/", O_PATH)));
233 SAPI_RAW_CHECK(root_fd->get() != -1, "creating fd for main root");
234
235 SAPI_RAW_PCHECK(chroot("/realroot") != -1, "chrooting to real root");
236 SAPI_RAW_PCHECK(chdir("/") != -1, "chdir / after chrooting real root");
237 }
238
239 SAPI_RAW_PCHECK(
240 mount("", "/proc", "proc", MS_NODEV | MS_NOEXEC | MS_NOSUID, nullptr) !=
241 -1,
242 "Could not mount a new /proc"
243 );
244
245 if (clone_flags & CLONE_NEWNET) {
246 // Some things can only be done if inside a new network namespace, like
247 // mounting /sys, setting a hostname or bringing up lo if necessary.
248
249 SAPI_RAW_PCHECK(
250 MountFallbackToReadOnly("", "/sys", "sysfs",
251 MS_NODEV | MS_NOEXEC | MS_NOSUID,
252 nullptr) != -1,
253 "Could not mount a new /sys"
254 );
255
256 SAPI_RAW_PCHECK(sethostname(hostname.c_str(), hostname.size()) != -1,
257 "Could not set network namespace hostname '%s'", hostname);
258 ActivateLoopbackInterface();
259 }
260
261 PrepareChroot(mounts);
262
263 if (avoid_pivot_root) {
264 // Keep a reference to /proc/self as it might not be mounted later
265 file_util::fileops::FDCloser proc_self_fd(
266 TEMP_FAILURE_RETRY(open("/proc/self/", O_PATH)));
267 SAPI_RAW_PCHECK(proc_self_fd.get() != -1, "opening /proc/self");
268
269 // Return to the main root
270 SAPI_RAW_PCHECK(fchdir(root_fd->get()) != -1, "chdir to main root");
271 SAPI_RAW_PCHECK(chroot(".") != -1, "chrooting to main root");
272 SAPI_RAW_PCHECK(chdir("/") != -1, "chdir / after chrooting main root");
273
274 // Get a refrence to /realroot to umount it later
275 file_util::fileops::FDCloser realroot_fd(
276 TEMP_FAILURE_RETRY(open("/realroot", O_PATH)));
277
278 // Move the chroot out of realroot to /
279 std::string chroot_path = file::JoinPath("/realroot", kSandbox2ChrootPath);
280 SAPI_RAW_PCHECK(chdir(chroot_path.c_str()) != -1, "chdir to chroot");
281 SAPI_RAW_PCHECK(mount(".", "/", "", MS_MOVE, nullptr) == 0,
282 "moving rootfs failed");
283 SAPI_RAW_PCHECK(chroot(".") != -1, "chrooting moved chroot");
284 SAPI_RAW_PCHECK(chdir("/") != -1, "chdir / after chroot");
285
286 // Umount the realroot so that no reference is left
287 SAPI_RAW_PCHECK(fchdir(realroot_fd.get()) != -1, "fchdir to /realroot");
288 SAPI_RAW_PCHECK(umount2(".", MNT_DETACH) != -1, "detaching old root");
289
290 if (clone_flags & CLONE_NEWUSER) {
291 // Also CLONE_NEWNS so that / mount becomes locked
292 SAPI_RAW_PCHECK(unshare(CLONE_NEWUSER | CLONE_NEWNS) != -1,
293 "unshare(CLONE_NEWUSER | CLONE_NEWNS)");
294 // Setup ID maps using reference to /proc/self obatined earlier
295 file_util::fileops::FDCloser setgroups_fd(TEMP_FAILURE_RETRY(
296 openat(proc_self_fd.get(), "setgroups", O_WRONLY | O_CLOEXEC)));
297 // We ignore errors since they are most likely due to an old kernel.
298 if (setgroups_fd.get() != -1) {
299 dprintf(setgroups_fd.get(), "deny");
300 }
301 file_util::fileops::FDCloser uid_map_fd(
302 TEMP_FAILURE_RETRY(openat(proc_self_fd.get(), "uid_map", O_WRONLY)));
303 SAPI_RAW_PCHECK(uid_map_fd.get() != -1, "Couldn't open uid_map");
304 SAPI_RAW_PCHECK(dprintf(uid_map_fd.get(), "1000 1000 1") >= 0,
305 "Could not write uid_map");
306 file_util::fileops::FDCloser gid_map_fd(
307 TEMP_FAILURE_RETRY(openat(proc_self_fd.get(), "gid_map", O_WRONLY)));
308 SAPI_RAW_PCHECK(gid_map_fd.get() != -1, "Couldn't open gid_map");
309 SAPI_RAW_PCHECK(dprintf(gid_map_fd.get(), "1000 1000 1") >= 0,
310 "Could not write gid_map");
311 }
312 } else {
313 // This requires some explanation: It's actually possible to pivot_root('/',
314 // '/'). After this operation has been completed, the old root is mounted
315 // over the new root, and it's OK to simply umount('/') now, and to have
316 // new_root as '/'. This allows us not care about providing any special
317 // directory for old_root, which is sometimes not easy, given that e.g. /tmp
318 // might not always be present inside new_root.
319 SAPI_RAW_PCHECK(syscall(__NR_pivot_root, kSandbox2ChrootPath,
320 kSandbox2ChrootPath) != -1,
321 "pivot root");
322 SAPI_RAW_PCHECK(umount2("/", MNT_DETACH) != -1, "detaching old root");
323 }
324
325 SAPI_RAW_PCHECK(chdir("/") == 0,
326 "changing cwd after mntns initialization failed");
327
328 if (allow_mount_propagation) {
329 SAPI_RAW_PCHECK(mount("/", "/", "", MS_SLAVE | MS_REC, nullptr) == 0,
330 "changing mount propagation to slave failed");
331 } else {
332 SAPI_RAW_PCHECK(mount("/", "/", "", MS_PRIVATE | MS_REC, nullptr) == 0,
333 "changing mount propagation to private failed");
334 }
335
336 if (SAPI_RAW_VLOG_IS_ON(2)) {
337 SAPI_RAW_VLOG(2, "Dumping the sandboxee's filesystem:");
338 LogFilesystem("/");
339 }
340 }
341
InitializeInitialNamespaces(uid_t uid,gid_t gid)342 void Namespace::InitializeInitialNamespaces(uid_t uid, gid_t gid) {
343 SetupIDMaps(uid, gid);
344 SAPI_RAW_CHECK(
345 file_util::fileops::CreateDirectoryRecursively(kSandbox2ChrootPath, 0700),
346 "could not create directory for rootfs");
347 SAPI_RAW_PCHECK(mount("none", kSandbox2ChrootPath, "tmpfs", 0, nullptr) == 0,
348 "mounting rootfs failed");
349 auto realroot_path = file::JoinPath(kSandbox2ChrootPath, "/realroot");
350 SAPI_RAW_CHECK(
351 file_util::fileops::CreateDirectoryRecursively(realroot_path, 0700),
352 "could not create directory for real root");
353 SAPI_RAW_PCHECK(syscall(__NR_pivot_root, kSandbox2ChrootPath,
354 realroot_path.c_str()) != -1,
355 "pivot root");
356 SAPI_RAW_PCHECK(symlink("/realroot/proc", "/proc") != -1, "symlinking /proc");
357 SAPI_RAW_PCHECK(
358 mount("/", "/", "", MS_BIND | MS_REMOUNT | MS_RDONLY, nullptr) == 0,
359 "remounting rootfs read-only failed");
360 }
361
362 } // namespace sandbox2
363