• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "service_utils.h"
18 
19 #include <fcntl.h>
20 #include <grp.h>
21 #include <sys/mount.h>
22 #include <sys/prctl.h>
23 #include <sys/wait.h>
24 #include <unistd.h>
25 #include <map>
26 
27 #include <android-base/file.h>
28 #include <android-base/logging.h>
29 #include <android-base/properties.h>
30 #include <android-base/stringprintf.h>
31 #include <android-base/strings.h>
32 #include <cutils/android_get_control_file.h>
33 #include <cutils/sockets.h>
34 #include <fstab/fstab.h>
35 #include <processgroup/processgroup.h>
36 
37 #include "mount_namespace.h"
38 #include "util.h"
39 
40 using android::base::GetProperty;
41 using android::base::StartsWith;
42 using android::base::StringPrintf;
43 using android::base::unique_fd;
44 using android::base::WriteStringToFile;
45 
46 namespace android {
47 namespace init {
48 
49 namespace {
50 
EnterNamespace(int nstype,const char * path)51 Result<void> EnterNamespace(int nstype, const char* path) {
52     auto fd = unique_fd{open(path, O_RDONLY | O_CLOEXEC)};
53     if (fd == -1) {
54         return ErrnoError() << "Could not open namespace at " << path;
55     }
56     if (setns(fd.get(), nstype) == -1) {
57         return ErrnoError() << "Could not setns() namespace at " << path;
58     }
59     return {};
60 }
61 
SetUpMountNamespace(bool remount_proc,bool remount_sys)62 Result<void> SetUpMountNamespace(bool remount_proc, bool remount_sys) {
63     constexpr unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
64 
65     // Recursively remount / as MS_SLAVE like zygote does so that
66     // unmounting and mounting /proc doesn't interfere with the parent
67     // namespace's /proc mount. This will also prevent any other
68     // mounts/unmounts initiated by the service from interfering with the
69     // parent namespace but will still allow mount events from the parent
70     // namespace to propagate to the child.
71     if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
72         return ErrnoError() << "Could not remount(/) recursively as MS_SLAVE";
73     }
74 
75     // umount() then mount() /proc and/or /sys
76     // Note that it is not sufficient to mount with MS_REMOUNT.
77     if (remount_proc) {
78         if (umount("/proc") == -1) {
79             return ErrnoError() << "Could not umount(/proc)";
80         }
81         if (mount("", "/proc", "proc", kSafeFlags, "") == -1) {
82             return ErrnoError() << "Could not mount(/proc)";
83         }
84     }
85     if (remount_sys) {
86         android::fs_mgr::Fstab mounts;
87         if (!ReadFstabFromFile("/proc/mounts", &mounts)) {
88             LOG(ERROR) << "Could not read /proc/mounts";
89         }
90         if (umount2("/sys", MNT_DETACH) == -1) {
91             return ErrnoError() << "Could not umount(/sys)";
92         }
93         if (mount("sysfs", "/sys", "sysfs", kSafeFlags, "") == -1) {
94             return ErrnoError() << "Could not mount(/sys)";
95         }
96         // Unmounting /sys also unmounts all nested mounts like tracefs.
97         //
98         // Look up the filesystems that were mounted under /sys before we wiped
99         // it and attempt to restore them.
100         for (const auto& entry : mounts) {
101             // Never mount /sys/kernel/debug/tracing. This is the *one* mount
102             // that is special within Linux kernel: for backward compatibility
103             // tracefs gets auto-mounted there whenever one mounts debugfs [1].
104             //
105             // Attempting to mount the filesystem here will cause SELinux
106             // denials, because unlike *all other* filesystems in Android, it's
107             // not init who mounted it so there's no policy that would allow it.
108             //
109             // [1] https://lore.kernel.org/lkml/20150204143755.694479564@goodmis.org/
110             if (entry.mount_point.starts_with("/sys/") &&
111                 entry.mount_point != "/sys/kernel/debug/tracing") {
112                 if (mount(entry.blk_device.c_str(), entry.mount_point.c_str(),
113                           entry.fs_type.c_str(), entry.flags, "")) {
114                     LOG(WARNING) << "Could not mount(" << entry.mount_point
115                                  << ") after switching netns: " << ErrnoError().str();
116                 }
117             }
118         }
119     }
120     return {};
121 }
122 
SetUpPidNamespace(const char * name)123 Result<void> SetUpPidNamespace(const char* name) {
124     if (prctl(PR_SET_NAME, name) == -1) {
125         return ErrnoError() << "Could not set name";
126     }
127 
128     pid_t child_pid = fork();
129     if (child_pid == -1) {
130         return ErrnoError() << "Could not fork init inside the PID namespace";
131     }
132 
133     if (child_pid > 0) {
134         // So that we exit with the right status.
135         static int init_exitstatus = 0;
136         signal(SIGTERM, [](int) { _exit(init_exitstatus); });
137 
138         pid_t waited_pid;
139         int status;
140         while ((waited_pid = wait(&status)) > 0) {
141             // This loop will end when there are no processes left inside the
142             // PID namespace or when the init process inside the PID namespace
143             // gets a signal.
144             if (waited_pid == child_pid) {
145                 init_exitstatus = status;
146             }
147         }
148         if (!WIFEXITED(init_exitstatus)) {
149             _exit(EXIT_FAILURE);
150         }
151         _exit(WEXITSTATUS(init_exitstatus));
152     }
153     return {};
154 }
155 
SetupStdio(bool stdio_to_kmsg)156 void SetupStdio(bool stdio_to_kmsg) {
157     auto fd = unique_fd{open("/dev/null", O_RDWR | O_CLOEXEC)};
158     dup2(fd.get(), STDIN_FILENO);
159     if (stdio_to_kmsg) {
160         fd.reset(open("/dev/kmsg_debug", O_WRONLY | O_CLOEXEC));
161         if (fd == -1) fd.reset(open("/dev/null", O_WRONLY | O_CLOEXEC));
162     }
163     dup2(fd.get(), STDOUT_FILENO);
164     dup2(fd.get(), STDERR_FILENO);
165 }
166 
OpenConsole(const std::string & console)167 void OpenConsole(const std::string& console) {
168     auto fd = unique_fd{open(console.c_str(), O_RDWR | O_CLOEXEC)};
169     if (fd == -1) fd.reset(open("/dev/null", O_RDWR | O_CLOEXEC));
170     ioctl(fd.get(), TIOCSCTTY, 0);
171     dup2(fd.get(), 0);
172     dup2(fd.get(), 1);
173     dup2(fd.get(), 2);
174 }
175 
176 }  // namespace
177 
Publish() const178 void Descriptor::Publish() const {
179     auto published_name = name_;
180 
181     for (auto& c : published_name) {
182         c = isalnum(c) ? c : '_';
183     }
184 
185     int fd = fd_.get();
186     // For safety, the FD is created as CLOEXEC, so that must be removed before publishing.
187     auto fd_flags = fcntl(fd, F_GETFD);
188     fd_flags &= ~FD_CLOEXEC;
189     if (fcntl(fd, F_SETFD, fd_flags) != 0) {
190         PLOG(ERROR) << "Failed to remove CLOEXEC from '" << published_name << "'";
191     }
192 
193     std::string val = std::to_string(fd);
194     setenv(published_name.c_str(), val.c_str(), 1);
195 }
196 
Create(const std::string & global_context) const197 Result<Descriptor> SocketDescriptor::Create(const std::string& global_context) const {
198     const auto& socket_context = context.empty() ? global_context : context;
199     auto result = CreateSocket(name, type | SOCK_CLOEXEC, passcred, listen, perm, uid, gid,
200                                socket_context);
201     if (!result.ok()) {
202         return result.error();
203     }
204 
205     return Descriptor(ANDROID_SOCKET_ENV_PREFIX + name, unique_fd(*result));
206 }
207 
Create() const208 Result<Descriptor> FileDescriptor::Create() const {
209     int flags = (type == "r") ? O_RDONLY : (type == "w") ? O_WRONLY : O_RDWR;
210 
211     // Make sure we do not block on open (eg: devices can chose to block on carrier detect).  Our
212     // intention is never to delay launch of a service for such a condition.  The service can
213     // perform its own blocking on carrier detect.
214     unique_fd fd(TEMP_FAILURE_RETRY(open(name.c_str(), flags | O_NONBLOCK | O_CLOEXEC)));
215 
216     if (fd < 0) {
217         return ErrnoError() << "Failed to open file '" << name << "'";
218     }
219 
220     // Fixup as we set O_NONBLOCK for open, the intent for fd is to block reads.
221     fcntl(fd.get(), F_SETFL, flags);
222 
223     return Descriptor(ANDROID_FILE_ENV_PREFIX + name, std::move(fd));
224 }
225 
EnterNamespaces(const NamespaceInfo & info,const std::string & name,std::optional<MountNamespace> override_mount_namespace)226 Result<void> EnterNamespaces(const NamespaceInfo& info, const std::string& name,
227                              std::optional<MountNamespace> override_mount_namespace) {
228     for (const auto& [nstype, path] : info.namespaces_to_enter) {
229         if (auto result = EnterNamespace(nstype, path.c_str()); !result.ok()) {
230             return result;
231         }
232     }
233 
234 #if defined(__ANDROID__)
235     if (override_mount_namespace.has_value()) {
236         if (auto result = SwitchToMountNamespaceIfNeeded(override_mount_namespace.value());
237             !result.ok()) {
238             return result;
239         }
240     }
241 #endif
242 
243     if (info.flags & CLONE_NEWNS) {
244         bool remount_proc = info.flags & CLONE_NEWPID;
245         bool remount_sys =
246                 std::any_of(info.namespaces_to_enter.begin(), info.namespaces_to_enter.end(),
247                             [](const auto& entry) { return entry.first == CLONE_NEWNET; });
248         if (auto result = SetUpMountNamespace(remount_proc, remount_sys); !result.ok()) {
249             return result;
250         }
251     }
252 
253     if (info.flags & CLONE_NEWPID) {
254         // This will fork again to run an init process inside the PID namespace.
255         if (auto result = SetUpPidNamespace(name.c_str()); !result.ok()) {
256             return result;
257         }
258     }
259 
260     return {};
261 }
262 
SetProcessAttributes(const ProcessAttributes & attr,InterprocessFifo setsid_finished)263 Result<void> SetProcessAttributes(const ProcessAttributes& attr, InterprocessFifo setsid_finished) {
264     if (attr.ioprio_class != IoSchedClass_NONE) {
265         if (android_set_ioprio(getpid(), attr.ioprio_class, attr.ioprio_pri)) {
266             PLOG(ERROR) << "failed to set pid " << getpid() << " ioprio=" << attr.ioprio_class
267                         << "," << attr.ioprio_pri;
268         }
269     }
270 
271     if (RequiresConsole(attr)) {
272         setsid();
273         setsid_finished.Write(kSetSidFinished);
274         setsid_finished.Close();
275         OpenConsole(attr.console);
276     } else {
277         // Without PID namespaces, this call duplicates the setpgid() call from
278         // the parent process. With PID namespaces, this setpgid() call sets the
279         // process group ID for a child of the init process in the PID
280         // namespace.
281         if (setpgid(0, 0) == -1) {
282             return ErrnoError() << "setpgid failed";
283         }
284         SetupStdio(attr.stdio_to_kmsg);
285     }
286 
287     for (const auto& rlimit : attr.rlimits) {
288         if (setrlimit(rlimit.first, &rlimit.second) == -1) {
289             return ErrnoErrorf("setrlimit({}, {{rlim_cur={}, rlim_max={}}}) failed", rlimit.first,
290                                rlimit.second.rlim_cur, rlimit.second.rlim_max);
291         }
292     }
293 
294     if (attr.gid) {
295         if (setgid(attr.gid) != 0) {
296             return ErrnoError() << "setgid failed";
297         }
298     }
299     if (setgroups(attr.supp_gids.size(), const_cast<gid_t*>(&attr.supp_gids[0])) != 0) {
300         return ErrnoError() << "setgroups failed";
301     }
302     if (attr.uid()) {
303         if (setuid(attr.uid()) != 0) {
304             return ErrnoError() << "setuid failed";
305         }
306     }
307 
308     if (attr.priority != 0) {
309         if (setpriority(PRIO_PROCESS, 0, attr.priority) != 0) {
310             return ErrnoError() << "setpriority failed";
311         }
312     }
313     return {};
314 }
315 
WritePidToFiles(std::vector<std::string> * files)316 Result<void> WritePidToFiles(std::vector<std::string>* files) {
317     if (files->empty()) {
318         // No files to write pid to, exit early.
319         return {};
320     }
321 
322     if (!CgroupsAvailable()) {
323         return Error() << "cgroups are not available";
324     }
325 
326     // See if there were "writepid" instructions to write to files under cpuset path.
327     std::string cpuset_path;
328     if (CgroupGetControllerPath("cpuset", &cpuset_path)) {
329         auto cpuset_predicate = [&cpuset_path](const std::string& path) {
330             return StartsWith(path, cpuset_path + "/");
331         };
332         auto iter = std::find_if(files->begin(), files->end(), cpuset_predicate);
333         if (iter == files->end()) {
334             // There were no "writepid" instructions for cpusets, check if the system default
335             // cpuset is specified to be used for the process.
336             std::string default_cpuset = GetProperty("ro.cpuset.default", "");
337             if (!default_cpuset.empty()) {
338                 // Make sure the cpuset name starts and ends with '/'.
339                 // A single '/' means the 'root' cpuset.
340                 if (default_cpuset.front() != '/') {
341                     default_cpuset.insert(0, 1, '/');
342                 }
343                 if (default_cpuset.back() != '/') {
344                     default_cpuset.push_back('/');
345                 }
346                 files->push_back(
347                         StringPrintf("%s%stasks", cpuset_path.c_str(), default_cpuset.c_str()));
348             }
349         }
350     } else {
351         LOG(ERROR) << "cpuset cgroup controller is not mounted!";
352     }
353 
354     // Issue a warning whenever writepid is being used with a cgroup. This can't be done during
355     // command parsing because cgroups might not be configured at the time or parsing.
356     for (const auto& file : *files) {
357         if (CgroupGetControllerFromPath(file, nullptr)) {
358             LOG(WARNING) << "writepid usage with cgroups path '" << file
359                          << "' is obsolete, please use task_profiles!";
360         }
361     }
362 
363     std::string pid_str = std::to_string(getpid());
364     for (const auto& file : *files) {
365         if (!WriteStringToFile(pid_str, file)) {
366             return ErrnoError() << "couldn't write " << pid_str << " to " << file;
367         }
368     }
369     return {};
370 }
371 
372 }  // namespace init
373 }  // namespace android
374