• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "service.h"
18 
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <inttypes.h>
22 #include <linux/securebits.h>
23 #include <sched.h>
24 #include <sys/prctl.h>
25 #include <sys/stat.h>
26 #include <sys/time.h>
27 #include <termios.h>
28 #include <unistd.h>
29 #include <thread>
30 
31 #include <android-base/file.h>
32 #include <android-base/logging.h>
33 #include <android-base/properties.h>
34 #include <android-base/scopeguard.h>
35 #include <android-base/stringprintf.h>
36 #include <android-base/strings.h>
37 #include <cutils/android_get_control_file.h>
38 #include <cutils/sockets.h>
39 #include <processgroup/processgroup.h>
40 #include <selinux/selinux.h>
41 #include <sys/signalfd.h>
42 
43 #include <string>
44 
45 #include "interprocess_fifo.h"
46 #include "lmkd_service.h"
47 #include "service_list.h"
48 #include "util.h"
49 
50 #if defined(__BIONIC__)
51 #include <bionic/reserved_signals.h>
52 #endif
53 
54 #ifdef INIT_FULL_SOURCES
55 #include <android/api-level.h>
56 
57 #include "mount_namespace.h"
58 #include "reboot_utils.h"
59 #include "selinux.h"
60 #else
61 #include "host_init_stubs.h"
62 #endif
63 
64 using android::base::boot_clock;
65 using android::base::GetBoolProperty;
66 using android::base::GetIntProperty;
67 using android::base::GetProperty;
68 using android::base::Join;
69 using android::base::make_scope_guard;
70 using android::base::SetProperty;
71 using android::base::StartsWith;
72 using android::base::StringPrintf;
73 using android::base::unique_fd;
74 using android::base::WriteStringToFile;
75 
76 namespace android {
77 namespace init {
78 
ComputeContextFromExecutable(const std::string & service_path)79 static Result<std::string> ComputeContextFromExecutable(const std::string& service_path) {
80     std::string computed_context;
81 
82     char* raw_con = nullptr;
83     char* raw_filecon = nullptr;
84 
85     if (getcon(&raw_con) == -1) {
86         return Error() << "Could not get security context";
87     }
88     std::unique_ptr<char, decltype(&freecon)> mycon(raw_con, freecon);
89 
90     if (getfilecon(service_path.c_str(), &raw_filecon) == -1) {
91         return Error() << "Could not get file context";
92     }
93     std::unique_ptr<char, decltype(&freecon)> filecon(raw_filecon, freecon);
94 
95     char* new_con = nullptr;
96     int rc = security_compute_create(mycon.get(), filecon.get(),
97                                      string_to_security_class("process"), &new_con);
98     if (rc == 0) {
99         computed_context = new_con;
100         free(new_con);
101     }
102     if (rc == 0 && computed_context == mycon.get()) {
103         return Error() << "File " << service_path << "(labeled \"" << filecon.get()
104                        << "\") has incorrect label or no domain transition from " << mycon.get()
105                        << " to another SELinux domain defined. Have you configured your "
106                           "service correctly? https://source.android.com/security/selinux/"
107                           "device-policy#label_new_services_and_address_denials. Note: this "
108                           "error shows up even in permissive mode in order to make auditing "
109                           "denials possible.";
110     }
111     if (rc < 0) {
112         return Error() << "Could not get process context";
113     }
114     return computed_context;
115 }
116 
ExpandArgsAndExecv(const std::vector<std::string> & args,bool sigstop)117 static bool ExpandArgsAndExecv(const std::vector<std::string>& args, bool sigstop) {
118     std::vector<std::string> expanded_args;
119     std::vector<char*> c_strings;
120 
121     expanded_args.resize(args.size());
122     c_strings.push_back(const_cast<char*>(args[0].data()));
123     for (std::size_t i = 1; i < args.size(); ++i) {
124         auto expanded_arg = ExpandProps(args[i]);
125         if (!expanded_arg.ok()) {
126             LOG(FATAL) << args[0] << ": cannot expand arguments': " << expanded_arg.error();
127         }
128         expanded_args[i] = *expanded_arg;
129         c_strings.push_back(expanded_args[i].data());
130     }
131     c_strings.push_back(nullptr);
132 
133     if (sigstop) {
134         kill(getpid(), SIGSTOP);
135     }
136 
137     return execv(c_strings[0], c_strings.data()) == 0;
138 }
139 
140 unsigned long Service::next_start_order_ = 1;
141 bool Service::is_exec_service_running_ = false;
142 
Service(const std::string & name,Subcontext * subcontext_for_restart_commands,const std::string & filename,const std::vector<std::string> & args)143 Service::Service(const std::string& name, Subcontext* subcontext_for_restart_commands,
144                  const std::string& filename, const std::vector<std::string>& args)
145     : Service(name, 0, std::nullopt, 0, {}, 0, "", subcontext_for_restart_commands, filename,
146               args) {}
147 
Service(const std::string & name,unsigned flags,std::optional<uid_t> uid,gid_t gid,const std::vector<gid_t> & supp_gids,int namespace_flags,const std::string & seclabel,Subcontext * subcontext_for_restart_commands,const std::string & filename,const std::vector<std::string> & args)148 Service::Service(const std::string& name, unsigned flags, std::optional<uid_t> uid, gid_t gid,
149                  const std::vector<gid_t>& supp_gids, int namespace_flags,
150                  const std::string& seclabel, Subcontext* subcontext_for_restart_commands,
151                  const std::string& filename, const std::vector<std::string>& args)
152     : name_(name),
153       classnames_({"default"}),
154       flags_(flags),
155       pid_(0),
156       crash_count_(0),
157       proc_attr_{.ioprio_class = IoSchedClass_NONE,
158                  .ioprio_pri = 0,
159                  .parsed_uid = uid,
160                  .gid = gid,
161                  .supp_gids = supp_gids,
162                  .priority = 0},
163       namespaces_{.flags = namespace_flags},
164       seclabel_(seclabel),
165       subcontext_(subcontext_for_restart_commands),
166       onrestart_(false, subcontext_for_restart_commands, "<Service '" + name + "' onrestart>", 0,
167                  "onrestart", {}),
168       oom_score_adjust_(DEFAULT_OOM_SCORE_ADJUST),
169       start_order_(0),
170       args_(args),
171       filename_(filename) {}
172 
NotifyStateChange(const std::string & new_state) const173 void Service::NotifyStateChange(const std::string& new_state) const {
174     if ((flags_ & SVC_TEMPORARY) != 0) {
175         // Services created by 'exec' are temporary and don't have properties tracking their state.
176         return;
177     }
178 
179     std::string prop_name = "init.svc." + name_;
180     SetProperty(prop_name, new_state);
181 
182     if (new_state == "running") {
183         uint64_t start_ns = time_started_.time_since_epoch().count();
184         std::string boottime_property = "ro.boottime." + name_;
185         if (GetProperty(boottime_property, "").empty()) {
186             SetProperty(boottime_property, std::to_string(start_ns));
187         }
188     }
189 
190     // init.svc_debug_pid.* properties are only for tests, and should not be used
191     // on device for security checks.
192     std::string pid_property = "init.svc_debug_pid." + name_;
193     if (new_state == "running") {
194         SetProperty(pid_property, std::to_string(pid_));
195     } else if (new_state == "stopped") {
196         SetProperty(pid_property, "");
197     }
198 }
199 
KillProcessGroup(int signal)200 void Service::KillProcessGroup(int signal) {
201     // Always attempt the process kill if process is still running.
202     // Cgroup clean up routines are idempotent. It's safe to call
203     // killProcessGroup repeatedly. During shutdown, `init` will
204     // call this function to send SIGTERM/SIGKILL to all processes.
205     // These signals must be sent for a successful shutdown.
206     if (!process_cgroup_empty_ || IsRunning()) {
207         LOG(INFO) << "Sending signal " << signal << " to service '" << name_ << "' (pid " << pid_
208                   << ") process group...";
209         int r;
210         if (signal == SIGTERM) {
211             r = killProcessGroupOnce(uid(), pid_, signal);
212         } else {
213             r = killProcessGroup(uid(), pid_, signal);
214         }
215 
216         if (r == 0) process_cgroup_empty_ = true;
217     }
218 
219     if (oom_score_adjust_ != DEFAULT_OOM_SCORE_ADJUST) {
220         LmkdUnregister(name_, pid_);
221     }
222 }
223 
SetProcessAttributesAndCaps(InterprocessFifo setsid_finished)224 void Service::SetProcessAttributesAndCaps(InterprocessFifo setsid_finished) {
225     // Keep capabilites on uid change.
226     if (capabilities_ && uid()) {
227         // If Android is running in a container, some securebits might already
228         // be locked, so don't change those.
229         unsigned long securebits = prctl(PR_GET_SECUREBITS);
230         if (securebits == -1UL) {
231             PLOG(FATAL) << "prctl(PR_GET_SECUREBITS) failed for " << name_;
232         }
233         securebits |= SECBIT_KEEP_CAPS | SECBIT_KEEP_CAPS_LOCKED;
234         if (prctl(PR_SET_SECUREBITS, securebits) != 0) {
235             PLOG(FATAL) << "prctl(PR_SET_SECUREBITS) failed for " << name_;
236         }
237     }
238 
239     if (auto result = SetProcessAttributes(proc_attr_, std::move(setsid_finished)); !result.ok()) {
240         LOG(FATAL) << "cannot set attribute for " << name_ << ": " << result.error();
241     }
242 
243     if (!seclabel_.empty()) {
244         if (setexeccon(seclabel_.c_str()) < 0) {
245             PLOG(FATAL) << "cannot setexeccon('" << seclabel_ << "') for " << name_;
246         }
247     }
248 
249     if (capabilities_) {
250         if (!SetCapsForExec(*capabilities_)) {
251             LOG(FATAL) << "cannot set capabilities for " << name_;
252         }
253     } else if (uid()) {
254         // Inheritable caps can be non-zero when running in a container.
255         if (!DropInheritableCaps()) {
256             LOG(FATAL) << "cannot drop inheritable caps for " << name_;
257         }
258     }
259 }
260 
Reap(const siginfo_t & siginfo)261 void Service::Reap(const siginfo_t& siginfo) {
262     if (!(flags_ & SVC_ONESHOT) || (flags_ & SVC_RESTART)) {
263         KillProcessGroup(SIGKILL);
264     } else {
265         // Legacy behavior from ~2007 until Android R: this else branch did not exist and we did not
266         // kill the process group in this case.
267         if (SelinuxGetVendorAndroidVersion() >= __ANDROID_API_R__) {
268             // The new behavior in Android R is to kill these process groups in all cases.  The
269             // 'true' parameter instructions KillProcessGroup() to report a warning message where it
270             // detects a difference in behavior has occurred.
271             KillProcessGroup(SIGKILL);
272         }
273     }
274 
275     // Remove any socket resources we may have created.
276     for (const auto& socket : sockets_) {
277         if (socket.persist) {
278             continue;
279         }
280         auto path = ANDROID_SOCKET_DIR "/" + socket.name;
281         unlink(path.c_str());
282     }
283 
284     for (const auto& f : reap_callbacks_) {
285         f(siginfo);
286     }
287 
288     if ((siginfo.si_code != CLD_EXITED || siginfo.si_status != 0) && on_failure_reboot_target_) {
289         LOG(ERROR) << "Service " << name_
290                    << " has 'reboot_on_failure' option and failed, shutting down system.";
291         trigger_shutdown(*on_failure_reboot_target_);
292     }
293 
294     if (flags_ & SVC_EXEC) UnSetExec();
295 
296     if (name_ == "zygote" || name_ == "zygote64") {
297         removeAllEmptyProcessGroups();
298     }
299 
300     if (flags_ & SVC_TEMPORARY) return;
301 
302     pid_ = 0;
303     flags_ &= (~SVC_RUNNING);
304     start_order_ = 0;
305     was_last_exit_ok_ = siginfo.si_code == CLD_EXITED && siginfo.si_status == 0;
306 
307     // Oneshot processes go into the disabled state on exit,
308     // except when manually restarted.
309     if ((flags_ & SVC_ONESHOT) && !(flags_ & SVC_RESTART) && !(flags_ & SVC_RESET)) {
310         flags_ |= SVC_DISABLED;
311     }
312 
313     // Disabled and reset processes do not get restarted automatically.
314     if (flags_ & (SVC_DISABLED | SVC_RESET))  {
315         NotifyStateChange("stopped");
316         return;
317     }
318 
319 #if INIT_FULL_SOURCES
320     static bool is_apex_updatable = true;
321 #else
322     static bool is_apex_updatable = false;
323 #endif
324     const bool use_default_mount_ns =
325             mount_namespace_.has_value() && *mount_namespace_ == NS_DEFAULT;
326     const bool is_process_updatable = use_default_mount_ns && is_apex_updatable;
327 
328 #if defined(__BIONIC__) && defined(SEGV_MTEAERR)
329     // As a precaution, we only upgrade a service once per reboot, to limit
330     // the potential impact.
331     //
332     // BIONIC_SIGNAL_ART_PROFILER is a magic value used by deuggerd to signal
333     // that the process crashed with SIGSEGV and SEGV_MTEAERR. This signal will
334     // never be seen otherwise in a crash, because it always gets handled by the
335     // profiling signal handlers in bionic. See also
336     // debuggerd/handler/debuggerd_handler.cpp.
337     bool should_upgrade_mte = siginfo.si_code != CLD_EXITED &&
338                               siginfo.si_status == BIONIC_SIGNAL_ART_PROFILER && !upgraded_mte_;
339 
340     if (should_upgrade_mte) {
341         constexpr int kDefaultUpgradeSecs = 60;
342         int secs = GetIntProperty("persist.device_config.memory_safety_native.upgrade_secs.default",
343                                   kDefaultUpgradeSecs);
344         secs = GetIntProperty(
345                 "persist.device_config.memory_safety_native.upgrade_secs.service." + name_, secs);
346         if (secs > 0) {
347             LOG(INFO) << "Upgrading service " << name_ << " to sync MTE for " << secs << " seconds";
348             once_environment_vars_.emplace_back("BIONIC_MEMTAG_UPGRADE_SECS", std::to_string(secs));
349             upgraded_mte_ = true;
350         } else {
351             LOG(INFO) << "Not upgrading service " << name_ << " to sync MTE due to device config";
352         }
353     }
354 #endif
355 
356     // If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
357     // reboot into bootloader or set crashing property
358     boot_clock::time_point now = boot_clock::now();
359     constexpr const char native_watchdog_reboot_time[] = "persist.init.svc.last_fatal_reboot_epoch";
360     uint64_t throttle_window =
361             std::chrono::duration_cast<std::chrono::seconds>(std::chrono::hours(24)).count();
362     if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) &&
363         !was_last_exit_ok_) {
364         bool boot_completed = GetBoolProperty("sys.boot_completed", false);
365         if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
366             if (++crash_count_ > 4) {
367                 auto exit_reason =
368                         boot_completed
369                                 ? "in " + std::to_string(fatal_crash_window_.count()) + " minutes"
370                                 : "before boot completed";
371                 if (flags_ & SVC_CRITICAL) {
372                     if (!GetBoolProperty("init.svc_debug.no_fatal." + name_, false)) {
373                         uint64_t epoch_time =
374                                 std::chrono::duration_cast<std::chrono::seconds>(
375                                         std::chrono::system_clock::now().time_since_epoch())
376                                         .count();
377                         // Do not reboot again If it was already initiated in the last 24hrs
378                         if (epoch_time - GetIntProperty(native_watchdog_reboot_time, 0) >
379                             throttle_window) {
380                             SetProperty(native_watchdog_reboot_time, std::to_string(epoch_time));
381                             // Aborts into `fatal_reboot_target_'.
382                             SetFatalRebootTarget(fatal_reboot_target_);
383                             LOG(FATAL) << "critical process '" << name_ << "' exited 4 times "
384                                        << exit_reason;
385                         } else {
386                             LOG(INFO) << "Reboot already performed in last 24hrs because of crash.";
387                         }
388                     }
389                 } else {
390                     LOG(ERROR) << "process with updatable components '" << name_
391                                << "' exited 4 times " << exit_reason;
392                     // Notifies update_verifier and apexd
393                     SetProperty("sys.init.updatable_crashing_process_name", name_);
394                     SetProperty("sys.init.updatable_crashing", "1");
395                 }
396             }
397         } else {
398             time_crashed_ = now;
399             crash_count_ = 1;
400         }
401     }
402 
403     flags_ &= (~SVC_RESTART);
404     flags_ |= SVC_RESTARTING;
405 
406     // Execute all onrestart commands for this service.
407     onrestart_.ExecuteAllCommands();
408 
409     NotifyStateChange("restarting");
410     return;
411 }
412 
DumpState() const413 void Service::DumpState() const {
414     LOG(INFO) << "service " << name_;
415     LOG(INFO) << "  class '" << Join(classnames_, " ") << "'";
416     LOG(INFO) << "  exec " << Join(args_, " ");
417     for (const auto& socket : sockets_) {
418         LOG(INFO) << "  socket " << socket.name;
419     }
420     for (const auto& file : files_) {
421         LOG(INFO) << "  file " << file.name;
422     }
423 }
424 
425 
ExecStart()426 Result<void> Service::ExecStart() {
427     auto reboot_on_failure = make_scope_guard([this] {
428         if (on_failure_reboot_target_) {
429             trigger_shutdown(*on_failure_reboot_target_);
430         }
431     });
432 
433     if (is_updatable() && !IsDefaultMountNamespaceReady()) {
434         // Don't delay the service for ExecStart() as the semantic is that
435         // the caller might depend on the side effect of the execution.
436         return Error() << "Cannot start an updatable service '" << name_
437                        << "' before configs from APEXes are all loaded";
438     }
439 
440     flags_ |= SVC_ONESHOT;
441 
442     if (auto result = Start(); !result.ok()) {
443         return result;
444     }
445 
446     flags_ |= SVC_EXEC;
447     is_exec_service_running_ = true;
448 
449     LOG(INFO) << "SVC_EXEC service '" << name_ << "' pid " << pid_ << " (uid " << uid() << " gid "
450               << proc_attr_.gid << "+" << proc_attr_.supp_gids.size() << " context "
451               << (!seclabel_.empty() ? seclabel_ : "default") << ") started; waiting...";
452 
453     reboot_on_failure.Disable();
454     return {};
455 }
456 
CheckConsole()457 Result<void> Service::CheckConsole() {
458     if (!(flags_ & SVC_CONSOLE)) {
459         return {};
460     }
461 
462     // On newer kernels, /dev/console will always exist because
463     // "console=ttynull" is hard-coded in CONFIG_CMDLINE. This new boot
464     // property should be set via "androidboot.serialconsole=0" to explicitly
465     // disable services requiring the console. For older kernels and boot
466     // images, not setting this at all will fall back to the old behavior
467     if (GetProperty("ro.boot.serialconsole", "") == "0") {
468         flags_ |= SVC_DISABLED;
469         return {};
470     }
471 
472     if (proc_attr_.console.empty()) {
473         proc_attr_.console = "/dev/" + GetProperty("ro.boot.console", "console");
474     }
475 
476     // Make sure that open call succeeds to ensure a console driver is
477     // properly registered for the device node
478     int console_fd = open(proc_attr_.console.c_str(), O_RDWR | O_CLOEXEC);
479     if (console_fd < 0) {
480         flags_ |= SVC_DISABLED;
481         return ErrnoError() << "Couldn't open console '" << proc_attr_.console << "'";
482     }
483     close(console_fd);
484     return {};
485 }
486 
487 // Configures the memory cgroup properties for the service.
ConfigureMemcg()488 void Service::ConfigureMemcg() {
489     if (swappiness_ != -1) {
490         if (!setProcessGroupSwappiness(uid(), pid_, swappiness_)) {
491             PLOG(ERROR) << "setProcessGroupSwappiness failed";
492         }
493     }
494 
495     if (soft_limit_in_bytes_ != -1) {
496         if (!setProcessGroupSoftLimit(uid(), pid_, soft_limit_in_bytes_)) {
497             PLOG(ERROR) << "setProcessGroupSoftLimit failed";
498         }
499     }
500 
501     size_t computed_limit_in_bytes = limit_in_bytes_;
502     if (limit_percent_ != -1) {
503         long page_size = sysconf(_SC_PAGESIZE);
504         long num_pages = sysconf(_SC_PHYS_PAGES);
505         if (page_size > 0 && num_pages > 0) {
506             size_t max_mem = SIZE_MAX;
507             if (size_t(num_pages) < SIZE_MAX / size_t(page_size)) {
508                 max_mem = size_t(num_pages) * size_t(page_size);
509             }
510             computed_limit_in_bytes =
511                     std::min(computed_limit_in_bytes, max_mem / 100 * limit_percent_);
512         }
513     }
514 
515     if (!limit_property_.empty()) {
516         // This ends up overwriting computed_limit_in_bytes but only if the
517         // property is defined.
518         computed_limit_in_bytes =
519                 android::base::GetUintProperty(limit_property_, computed_limit_in_bytes, SIZE_MAX);
520     }
521 
522     if (computed_limit_in_bytes != size_t(-1)) {
523         if (!setProcessGroupLimit(uid(), pid_, computed_limit_in_bytes)) {
524             PLOG(ERROR) << "setProcessGroupLimit failed";
525         }
526     }
527 }
528 
529 // Enters namespaces, sets environment variables, writes PID files and runs the service executable.
RunService(const std::vector<Descriptor> & descriptors,InterprocessFifo cgroups_activated,InterprocessFifo setsid_finished)530 void Service::RunService(const std::vector<Descriptor>& descriptors,
531                          InterprocessFifo cgroups_activated, InterprocessFifo setsid_finished) {
532     if (auto result = EnterNamespaces(namespaces_, name_, mount_namespace_); !result.ok()) {
533         LOG(FATAL) << "Service '" << name_ << "' failed to set up namespaces: " << result.error();
534     }
535 
536     for (const auto& [key, value] : once_environment_vars_) {
537         setenv(key.c_str(), value.c_str(), 1);
538     }
539     for (const auto& [key, value] : environment_vars_) {
540         setenv(key.c_str(), value.c_str(), 1);
541     }
542 
543     for (const auto& descriptor : descriptors) {
544         descriptor.Publish();
545     }
546 
547     if (auto result = WritePidToFiles(&writepid_files_); !result.ok()) {
548         LOG(ERROR) << "failed to write pid to files: " << result.error();
549     }
550 
551     // Wait until the cgroups have been created and until the cgroup controllers have been
552     // activated.
553     Result<uint8_t> byte = cgroups_activated.Read();
554     if (!byte.ok()) {
555         LOG(ERROR) << name_ << ": failed to read from notification channel: " << byte.error();
556     }
557     cgroups_activated.Close();
558     if (*byte != kCgroupsActivated) {
559         LOG(FATAL) << "Service '" << name_  << "' failed to start due to a fatal error";
560         _exit(EXIT_FAILURE);
561     }
562 
563     if (task_profiles_.size() > 0) {
564         bool succeeded = SelinuxGetVendorAndroidVersion() < __ANDROID_API_U__
565                                  ?
566                                  // Compatibility mode: apply the task profiles to the current
567                                  // thread.
568                                  SetTaskProfiles(getpid(), task_profiles_)
569                                  :
570                                  // Apply the task profiles to the current process.
571                                  SetProcessProfiles(getuid(), getpid(), task_profiles_);
572         if (!succeeded) {
573             LOG(ERROR) << "failed to set task profiles";
574         }
575     }
576 
577     // As requested, set our gid, supplemental gids, uid, context, and
578     // priority. Aborts on failure.
579     SetProcessAttributesAndCaps(std::move(setsid_finished));
580 
581     if (!ExpandArgsAndExecv(args_, sigstop_)) {
582         PLOG(ERROR) << "cannot execv('" << args_[0]
583                     << "'). See the 'Debugging init' section of init's README.md for tips";
584     }
585 }
586 
Start()587 Result<void> Service::Start() {
588     auto reboot_on_failure = make_scope_guard([this] {
589         if (on_failure_reboot_target_) {
590             trigger_shutdown(*on_failure_reboot_target_);
591         }
592     });
593 
594     if (is_updatable() && !IsDefaultMountNamespaceReady()) {
595         ServiceList::GetInstance().DelayService(*this);
596         return Error() << "Cannot start an updatable service '" << name_
597                        << "' before configs from APEXes are all loaded. "
598                        << "Queued for execution.";
599     }
600 
601     bool disabled = (flags_ & (SVC_DISABLED | SVC_RESET));
602     ResetFlagsForStart();
603 
604     // Running processes require no additional work --- if they're in the
605     // process of exiting, we've ensured that they will immediately restart
606     // on exit, unless they are ONESHOT. For ONESHOT service, if it's in
607     // stopping status, we just set SVC_RESTART flag so it will get restarted
608     // in Reap().
609     if (flags_ & SVC_RUNNING) {
610         if ((flags_ & SVC_ONESHOT) && disabled) {
611             flags_ |= SVC_RESTART;
612         }
613 
614         LOG(INFO) << "service '" << name_
615                   << "' requested start, but it is already running (flags: " << flags_ << ")";
616 
617         // It is not an error to try to start a service that is already running.
618         reboot_on_failure.Disable();
619         return {};
620     }
621 
622     // cgroups_activated is used for communication from the parent to the child
623     // while setsid_finished is used for communication from the child process to
624     // the parent process. These two communication channels are separate because
625     // combining these into a single communication channel would introduce a
626     // race between the Write() calls by the parent and by the child.
627     InterprocessFifo cgroups_activated, setsid_finished;
628     OR_RETURN(cgroups_activated.Initialize());
629     OR_RETURN(setsid_finished.Initialize());
630 
631     if (Result<void> result = CheckConsole(); !result.ok()) {
632         return result;
633     }
634 
635     struct stat sb;
636     if (stat(args_[0].c_str(), &sb) == -1) {
637         flags_ |= SVC_DISABLED;
638         return ErrnoError() << "Cannot find '" << args_[0] << "'";
639     }
640 
641     std::string scon;
642     if (!seclabel_.empty()) {
643         scon = seclabel_;
644     } else {
645         auto result = ComputeContextFromExecutable(args_[0]);
646         if (!result.ok()) {
647             return result.error();
648         }
649         scon = *result;
650     }
651 
652     if (!mount_namespace_.has_value()) {
653         // remember from which mount namespace the service should start
654         SetMountNamespace();
655     }
656 
657     LOG(INFO) << "starting service '" << name_ << "'...";
658 
659     std::vector<Descriptor> descriptors;
660     for (const auto& socket : sockets_) {
661         if (auto result = socket.Create(scon); result.ok()) {
662             descriptors.emplace_back(std::move(*result));
663         } else {
664             LOG(INFO) << "Could not create socket '" << socket.name << "': " << result.error();
665         }
666     }
667 
668     for (const auto& file : files_) {
669         if (auto result = file.Create(); result.ok()) {
670             descriptors.emplace_back(std::move(*result));
671         } else {
672             LOG(INFO) << "Could not open file '" << file.name << "': " << result.error();
673         }
674     }
675 
676     if (shared_kallsyms_file_) {
677         if (auto result = CreateSharedKallsymsFd(); result.ok()) {
678             descriptors.emplace_back(std::move(*result));
679         } else {
680             LOG(INFO) << "Could not obtain a copy of /proc/kallsyms: " << result.error();
681         }
682     }
683 
684     pid_t pid = -1;
685     if (namespaces_.flags) {
686         pid = clone(nullptr, nullptr, namespaces_.flags | SIGCHLD, nullptr);
687     } else {
688         pid = fork();
689     }
690 
691     if (pid == 0) {
692         umask(077);
693         cgroups_activated.CloseWriteFd();
694         setsid_finished.CloseReadFd();
695         RunService(descriptors, std::move(cgroups_activated), std::move(setsid_finished));
696         _exit(127);
697     } else {
698         cgroups_activated.CloseReadFd();
699         setsid_finished.CloseWriteFd();
700     }
701 
702     if (pid < 0) {
703         pid_ = 0;
704         return ErrnoError() << "Failed to fork";
705     }
706 
707     once_environment_vars_.clear();
708 
709     if (oom_score_adjust_ != DEFAULT_OOM_SCORE_ADJUST) {
710         std::string oom_str = std::to_string(oom_score_adjust_);
711         std::string oom_file = StringPrintf("/proc/%d/oom_score_adj", pid);
712         if (!WriteStringToFile(oom_str, oom_file)) {
713             PLOG(ERROR) << "couldn't write oom_score_adj";
714         }
715     }
716 
717     time_started_ = boot_clock::now();
718     pid_ = pid;
719     flags_ |= SVC_RUNNING;
720     start_order_ = next_start_order_++;
721     process_cgroup_empty_ = false;
722 
723     if (CgroupsAvailable()) {
724         bool use_memcg = swappiness_ != -1 || soft_limit_in_bytes_ != -1 || limit_in_bytes_ != -1 ||
725                          limit_percent_ != -1 || !limit_property_.empty();
726         errno = -createProcessGroup(uid(), pid_, use_memcg);
727         if (errno != 0) {
728             Result<void> result = cgroups_activated.Write(kActivatingCgroupsFailed);
729             if (!result.ok()) {
730                 return Error() << "Sending notification failed: " << result.error();
731             }
732             return Error() << "createProcessGroup(" << uid() << ", " << pid_ << ", " << use_memcg
733                            << ") failed for service '" << name_ << "': " << strerror(errno);
734         }
735 
736         // When the blkio controller is mounted in the v1 hierarchy, NormalIoPriority is
737         // the default (/dev/blkio). When the blkio controller is mounted in the v2 hierarchy, the
738         // NormalIoPriority profile has to be applied explicitly.
739         SetProcessProfiles(uid(), pid_, {"NormalIoPriority"});
740 
741         if (use_memcg) {
742             ConfigureMemcg();
743         }
744     }
745 
746     if (oom_score_adjust_ != DEFAULT_OOM_SCORE_ADJUST) {
747         LmkdRegister(name_, uid(), pid_, oom_score_adjust_);
748     }
749 
750     if (Result<void> result = cgroups_activated.Write(kCgroupsActivated); !result.ok()) {
751         return Error() << "Sending cgroups activated notification failed: " << result.error();
752     }
753 
754     cgroups_activated.Close();
755 
756     // Call setpgid() from the parent process to make sure that this call has
757     // finished before the parent process calls kill(-pgid, ...).
758     if (!RequiresConsole(proc_attr_)) {
759         if (setpgid(pid, pid) < 0) {
760             switch (errno) {
761                 case EACCES:  // Child has already performed setpgid() followed by execve().
762                 case ESRCH:   // Child process no longer exists.
763                     break;
764                 default:
765                     PLOG(ERROR) << "setpgid() from parent failed";
766             }
767         }
768     } else {
769         // The Read() call below will return an error if the child is killed.
770         if (Result<uint8_t> result = setsid_finished.Read();
771             !result.ok() || *result != kSetSidFinished) {
772             if (!result.ok()) {
773                 return Error() << "Waiting for setsid() failed: " << result.error();
774             } else {
775                 return Error() << "Waiting for setsid() failed: " << static_cast<uint32_t>(*result)
776                                << " <> " << static_cast<uint32_t>(kSetSidFinished);
777             }
778         }
779     }
780 
781     setsid_finished.Close();
782 
783     NotifyStateChange("running");
784     reboot_on_failure.Disable();
785 
786     LOG(INFO) << "... started service '" << name_ << "' has pid " << pid_;
787 
788     return {};
789 }
790 
791 // Set mount namespace for the service.
792 // The reason why remember the mount namespace:
793 //   If this service is started before APEXes and corresponding linker configuration
794 //   get available, mark it as pre-apexd one. Note that this marking is
795 //   permanent. So for example, if the service is re-launched (e.g., due
796 //   to crash), it is still recognized as pre-apexd... for consistency.
SetMountNamespace()797 void Service::SetMountNamespace() {
798     // APEXd is always started in the "current" namespace because it is the process to set up
799     // the current namespace. So, leave mount_namespace_ as empty.
800     if (args_[0] == "/system/bin/apexd") {
801         return;
802     }
803     // Services in the following list start in the "default" mount namespace.
804     // Note that they should use bootstrap bionic if they start before APEXes are ready.
805     static const std::set<std::string> kUseDefaultMountNamespace = {
806             "ueventd",           // load firmwares from APEXes
807             "hwservicemanager",  // load VINTF fragments from APEXes
808             "servicemanager",    // load VINTF fragments from APEXes
809     };
810     if (kUseDefaultMountNamespace.find(name_) != kUseDefaultMountNamespace.end()) {
811         mount_namespace_ = NS_DEFAULT;
812         return;
813     }
814     // Use the "default" mount namespace only if it's ready
815     mount_namespace_ = IsDefaultMountNamespaceReady() ? NS_DEFAULT : NS_BOOTSTRAP;
816 }
817 
ThreadCount()818 static int ThreadCount() {
819     std::unique_ptr<DIR, decltype(&closedir)> dir(opendir("/proc/self/task"), closedir);
820     if (!dir) {
821         return -1;
822     }
823 
824     int count = 0;
825     dirent* entry;
826     while ((entry = readdir(dir.get())) != nullptr) {
827         if (entry->d_name[0] != '.') {
828             count++;
829         }
830     }
831     return count;
832 }
833 
834 // Must be called BEFORE any threads are created. See also the sigprocmask() man page.
CreateSigchldFd()835 unique_fd Service::CreateSigchldFd() {
836     CHECK_EQ(ThreadCount(), 1);
837     sigset_t mask;
838     sigemptyset(&mask);
839     sigaddset(&mask, SIGCHLD);
840     if (sigprocmask(SIG_BLOCK, &mask, nullptr) < 0) {
841         PLOG(FATAL) << "Failed to block SIGCHLD";
842     }
843 
844     return unique_fd(signalfd(-1, &mask, SFD_CLOEXEC));
845 }
846 
OpenAndSaveStaticKallsymsFd()847 void Service::OpenAndSaveStaticKallsymsFd() {
848     Result<Descriptor> result = CreateSharedKallsymsFd();
849     if (!result.ok()) {
850       LOG(ERROR) << result.error();
851     }
852 }
853 
854 // This function is designed to be called in two situations:
855 // 1) early during second_stage init, to open and save the shared fd as a
856 //    static (see OpenAndSaveStaticKallsymsFd).
857 // 2) whenever a service requesting a copy of the fd is being started, at which
858 //    point it will get a duplicated copy of the static fd.
CreateSharedKallsymsFd()859 Result<Descriptor> Service::CreateSharedKallsymsFd() {
860     static constexpr char kallsyms_path[] = "/proc/kallsyms";
861     static int static_fd = open(kallsyms_path, O_RDONLY | O_NONBLOCK | O_CLOEXEC);
862     if (static_fd < 0) {
863         return ErrnoError() << "failed to open " << kallsyms_path;
864     }
865 
866     unique_fd fd{fcntl(static_fd, F_DUPFD_CLOEXEC, /*min_fd=*/3)};
867     if (fd < 0) {
868         return ErrnoError() << "failed fcntl(F_DUPFD_CLOEXEC)";
869     }
870 
871     // Use the same environment variable as if the service specified
872     // "file /proc/kallsyms r".
873     return Descriptor(std::string(ANDROID_FILE_ENV_PREFIX) + kallsyms_path, std::move(fd));
874 }
875 
SetStartedInFirstStage(pid_t pid)876 void Service::SetStartedInFirstStage(pid_t pid) {
877     LOG(INFO) << "adding first-stage service '" << name_ << "'...";
878 
879     time_started_ = boot_clock::now();  // not accurate, but doesn't matter here
880     pid_ = pid;
881     flags_ |= SVC_RUNNING;
882     start_order_ = next_start_order_++;
883 
884     NotifyStateChange("running");
885 }
886 
ResetFlagsForStart()887 void Service::ResetFlagsForStart() {
888     // Starting a service removes it from the disabled or reset state and
889     // immediately takes it out of the restarting state if it was in there.
890     flags_ &= ~(SVC_DISABLED | SVC_RESTARTING | SVC_RESET | SVC_RESTART | SVC_DISABLED_START);
891 }
892 
StartIfNotDisabled()893 Result<void> Service::StartIfNotDisabled() {
894     if (!(flags_ & SVC_DISABLED)) {
895         return Start();
896     } else {
897         flags_ |= SVC_DISABLED_START;
898     }
899     return {};
900 }
901 
Enable()902 Result<void> Service::Enable() {
903     flags_ &= ~(SVC_DISABLED | SVC_RC_DISABLED);
904     if (flags_ & SVC_DISABLED_START) {
905         return Start();
906     }
907     return {};
908 }
909 
Reset()910 void Service::Reset() {
911     StopOrReset(SVC_RESET);
912 }
913 
Stop()914 void Service::Stop() {
915     StopOrReset(SVC_DISABLED);
916 }
917 
Terminate()918 void Service::Terminate() {
919     flags_ &= ~(SVC_RESTARTING | SVC_DISABLED_START);
920     flags_ |= SVC_DISABLED;
921     if (pid_) {
922         KillProcessGroup(SIGTERM);
923         NotifyStateChange("stopping");
924     }
925 }
926 
Timeout()927 void Service::Timeout() {
928     // All process state flags will be taken care of in Reap(), we really just want to kill the
929     // process here when it times out.  Oneshot processes will transition to be disabled, and
930     // all other processes will transition to be restarting.
931     LOG(INFO) << "Service '" << name_ << "' expired its timeout of " << timeout_period_->count()
932               << " seconds and will now be killed";
933     if (pid_) {
934         KillProcessGroup(SIGKILL);
935         NotifyStateChange("stopping");
936     }
937 }
938 
Restart()939 void Service::Restart() {
940     if (flags_ & SVC_RUNNING) {
941         /* Stop, wait, then start the service. */
942         StopOrReset(SVC_RESTART);
943     } else if (!(flags_ & SVC_RESTARTING)) {
944         /* Just start the service since it's not running. */
945         if (auto result = Start(); !result.ok()) {
946             LOG(ERROR) << "Could not restart '" << name_ << "': " << result.error();
947         }
948     } /* else: Service is restarting anyways. */
949 }
950 
951 // The how field should be either SVC_DISABLED, SVC_RESET, or SVC_RESTART.
StopOrReset(int how)952 void Service::StopOrReset(int how) {
953     // The service is still SVC_RUNNING until its process exits, but if it has
954     // already exited it shoudn't attempt a restart yet.
955     flags_ &= ~(SVC_RESTARTING | SVC_DISABLED_START);
956 
957     if ((how != SVC_DISABLED) && (how != SVC_RESET) && (how != SVC_RESTART)) {
958         // An illegal flag: default to SVC_DISABLED.
959         LOG(ERROR) << "service '" << name_ << "' requested unknown flag " << how
960                    << ", defaulting to disabling it.";
961         how = SVC_DISABLED;
962     }
963 
964     // If the service has not yet started, prevent it from auto-starting with its class.
965     if (how == SVC_RESET) {
966         flags_ |= (flags_ & SVC_RC_DISABLED) ? SVC_DISABLED : SVC_RESET;
967     } else {
968         flags_ |= how;
969     }
970     // Make sure it's in right status when a restart immediately follow a
971     // stop/reset or vice versa.
972     if (how == SVC_RESTART) {
973         flags_ &= (~(SVC_DISABLED | SVC_RESET));
974     } else {
975         flags_ &= (~SVC_RESTART);
976     }
977 
978     if (pid_) {
979         if (flags_ & SVC_GENTLE_KILL) {
980             KillProcessGroup(SIGTERM);
981             if (!process_cgroup_empty()) std::this_thread::sleep_for(200ms);
982         }
983         KillProcessGroup(SIGKILL);
984         NotifyStateChange("stopping");
985     } else {
986         NotifyStateChange("stopped");
987     }
988 }
989 
MakeTemporaryOneshotService(const std::vector<std::string> & args)990 Result<std::unique_ptr<Service>> Service::MakeTemporaryOneshotService(
991         const std::vector<std::string>& args) {
992     // Parse the arguments: exec [SECLABEL [UID [GID]*] --] COMMAND ARGS...
993     // SECLABEL can be a - to denote default
994     std::size_t command_arg = 1;
995     for (std::size_t i = 1; i < args.size(); ++i) {
996         if (args[i] == "--") {
997             command_arg = i + 1;
998             break;
999         }
1000     }
1001     if (command_arg > 4 + NR_SVC_SUPP_GIDS) {
1002         return Error() << "exec called with too many supplementary group ids";
1003     }
1004 
1005     if (command_arg >= args.size()) {
1006         return Error() << "exec called without command";
1007     }
1008     std::vector<std::string> str_args(args.begin() + command_arg, args.end());
1009 
1010     static size_t exec_count = 0;
1011     exec_count++;
1012     std::string name = "exec " + std::to_string(exec_count) + " (" + Join(str_args, " ") + ")";
1013 
1014     unsigned flags = SVC_ONESHOT | SVC_TEMPORARY;
1015     unsigned namespace_flags = 0;
1016 
1017     std::string seclabel = "";
1018     if (command_arg > 2 && args[1] != "-") {
1019         seclabel = args[1];
1020     }
1021     Result<uid_t> uid = 0;
1022     if (command_arg > 3) {
1023         uid = DecodeUid(args[2]);
1024         if (!uid.ok()) {
1025             return Error() << "Unable to decode UID for '" << args[2] << "': " << uid.error();
1026         }
1027     }
1028     Result<gid_t> gid = 0;
1029     std::vector<gid_t> supp_gids;
1030     if (command_arg > 4) {
1031         gid = DecodeUid(args[3]);
1032         if (!gid.ok()) {
1033             return Error() << "Unable to decode GID for '" << args[3] << "': " << gid.error();
1034         }
1035         std::size_t nr_supp_gids = command_arg - 1 /* -- */ - 4 /* exec SECLABEL UID GID */;
1036         for (size_t i = 0; i < nr_supp_gids; ++i) {
1037             auto supp_gid = DecodeUid(args[4 + i]);
1038             if (!supp_gid.ok()) {
1039                 return Error() << "Unable to decode GID for '" << args[4 + i]
1040                                << "': " << supp_gid.error();
1041             }
1042             supp_gids.push_back(*supp_gid);
1043         }
1044     }
1045 
1046     return std::make_unique<Service>(name, flags, *uid, *gid, supp_gids, namespace_flags, seclabel,
1047                                      nullptr, /*filename=*/"", str_args);
1048 }
1049 
1050 // This is used for snapuserd_proxy, which hands off a socket to snapuserd. It's
1051 // a special case to support the daemon launched in first-stage init. The persist
1052 // feature is not part of the init language and is only used here.
MarkSocketPersistent(const std::string & socket_name)1053 bool Service::MarkSocketPersistent(const std::string& socket_name) {
1054     for (auto& socket : sockets_) {
1055         if (socket.name == socket_name) {
1056             socket.persist = true;
1057             return true;
1058         }
1059     }
1060     return false;
1061 }
1062 
1063 }  // namespace init
1064 }  // namespace android
1065