1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "service.h"
18
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <inttypes.h>
22 #include <linux/securebits.h>
23 #include <sched.h>
24 #include <sys/prctl.h>
25 #include <sys/stat.h>
26 #include <sys/time.h>
27 #include <termios.h>
28 #include <unistd.h>
29 #include <thread>
30
31 #include <android-base/file.h>
32 #include <android-base/logging.h>
33 #include <android-base/properties.h>
34 #include <android-base/scopeguard.h>
35 #include <android-base/stringprintf.h>
36 #include <android-base/strings.h>
37 #include <cutils/android_get_control_file.h>
38 #include <cutils/sockets.h>
39 #include <processgroup/processgroup.h>
40 #include <selinux/selinux.h>
41 #include <sys/signalfd.h>
42
43 #include <string>
44
45 #include "interprocess_fifo.h"
46 #include "lmkd_service.h"
47 #include "service_list.h"
48 #include "util.h"
49
50 #if defined(__BIONIC__)
51 #include <bionic/reserved_signals.h>
52 #endif
53
54 #ifdef INIT_FULL_SOURCES
55 #include <android/api-level.h>
56
57 #include "mount_namespace.h"
58 #include "reboot_utils.h"
59 #include "selinux.h"
60 #else
61 #include "host_init_stubs.h"
62 #endif
63
64 using android::base::boot_clock;
65 using android::base::GetBoolProperty;
66 using android::base::GetIntProperty;
67 using android::base::GetProperty;
68 using android::base::Join;
69 using android::base::make_scope_guard;
70 using android::base::SetProperty;
71 using android::base::StartsWith;
72 using android::base::StringPrintf;
73 using android::base::unique_fd;
74 using android::base::WriteStringToFile;
75
76 namespace android {
77 namespace init {
78
ComputeContextFromExecutable(const std::string & service_path)79 static Result<std::string> ComputeContextFromExecutable(const std::string& service_path) {
80 std::string computed_context;
81
82 char* raw_con = nullptr;
83 char* raw_filecon = nullptr;
84
85 if (getcon(&raw_con) == -1) {
86 return Error() << "Could not get security context";
87 }
88 std::unique_ptr<char, decltype(&freecon)> mycon(raw_con, freecon);
89
90 if (getfilecon(service_path.c_str(), &raw_filecon) == -1) {
91 return Error() << "Could not get file context";
92 }
93 std::unique_ptr<char, decltype(&freecon)> filecon(raw_filecon, freecon);
94
95 char* new_con = nullptr;
96 int rc = security_compute_create(mycon.get(), filecon.get(),
97 string_to_security_class("process"), &new_con);
98 if (rc == 0) {
99 computed_context = new_con;
100 free(new_con);
101 }
102 if (rc == 0 && computed_context == mycon.get()) {
103 return Error() << "File " << service_path << "(labeled \"" << filecon.get()
104 << "\") has incorrect label or no domain transition from " << mycon.get()
105 << " to another SELinux domain defined. Have you configured your "
106 "service correctly? https://source.android.com/security/selinux/"
107 "device-policy#label_new_services_and_address_denials. Note: this "
108 "error shows up even in permissive mode in order to make auditing "
109 "denials possible.";
110 }
111 if (rc < 0) {
112 return Error() << "Could not get process context";
113 }
114 return computed_context;
115 }
116
ExpandArgsAndExecv(const std::vector<std::string> & args,bool sigstop)117 static bool ExpandArgsAndExecv(const std::vector<std::string>& args, bool sigstop) {
118 std::vector<std::string> expanded_args;
119 std::vector<char*> c_strings;
120
121 expanded_args.resize(args.size());
122 c_strings.push_back(const_cast<char*>(args[0].data()));
123 for (std::size_t i = 1; i < args.size(); ++i) {
124 auto expanded_arg = ExpandProps(args[i]);
125 if (!expanded_arg.ok()) {
126 LOG(FATAL) << args[0] << ": cannot expand arguments': " << expanded_arg.error();
127 }
128 expanded_args[i] = *expanded_arg;
129 c_strings.push_back(expanded_args[i].data());
130 }
131 c_strings.push_back(nullptr);
132
133 if (sigstop) {
134 kill(getpid(), SIGSTOP);
135 }
136
137 return execv(c_strings[0], c_strings.data()) == 0;
138 }
139
140 unsigned long Service::next_start_order_ = 1;
141 bool Service::is_exec_service_running_ = false;
142
Service(const std::string & name,Subcontext * subcontext_for_restart_commands,const std::string & filename,const std::vector<std::string> & args)143 Service::Service(const std::string& name, Subcontext* subcontext_for_restart_commands,
144 const std::string& filename, const std::vector<std::string>& args)
145 : Service(name, 0, std::nullopt, 0, {}, 0, "", subcontext_for_restart_commands, filename,
146 args) {}
147
Service(const std::string & name,unsigned flags,std::optional<uid_t> uid,gid_t gid,const std::vector<gid_t> & supp_gids,int namespace_flags,const std::string & seclabel,Subcontext * subcontext_for_restart_commands,const std::string & filename,const std::vector<std::string> & args)148 Service::Service(const std::string& name, unsigned flags, std::optional<uid_t> uid, gid_t gid,
149 const std::vector<gid_t>& supp_gids, int namespace_flags,
150 const std::string& seclabel, Subcontext* subcontext_for_restart_commands,
151 const std::string& filename, const std::vector<std::string>& args)
152 : name_(name),
153 classnames_({"default"}),
154 flags_(flags),
155 pid_(0),
156 crash_count_(0),
157 proc_attr_{.ioprio_class = IoSchedClass_NONE,
158 .ioprio_pri = 0,
159 .parsed_uid = uid,
160 .gid = gid,
161 .supp_gids = supp_gids,
162 .priority = 0},
163 namespaces_{.flags = namespace_flags},
164 seclabel_(seclabel),
165 subcontext_(subcontext_for_restart_commands),
166 onrestart_(false, subcontext_for_restart_commands, "<Service '" + name + "' onrestart>", 0,
167 "onrestart", {}),
168 oom_score_adjust_(DEFAULT_OOM_SCORE_ADJUST),
169 start_order_(0),
170 args_(args),
171 filename_(filename) {}
172
NotifyStateChange(const std::string & new_state) const173 void Service::NotifyStateChange(const std::string& new_state) const {
174 if ((flags_ & SVC_TEMPORARY) != 0) {
175 // Services created by 'exec' are temporary and don't have properties tracking their state.
176 return;
177 }
178
179 std::string prop_name = "init.svc." + name_;
180 SetProperty(prop_name, new_state);
181
182 if (new_state == "running") {
183 uint64_t start_ns = time_started_.time_since_epoch().count();
184 std::string boottime_property = "ro.boottime." + name_;
185 if (GetProperty(boottime_property, "").empty()) {
186 SetProperty(boottime_property, std::to_string(start_ns));
187 }
188 }
189
190 // init.svc_debug_pid.* properties are only for tests, and should not be used
191 // on device for security checks.
192 std::string pid_property = "init.svc_debug_pid." + name_;
193 if (new_state == "running") {
194 SetProperty(pid_property, std::to_string(pid_));
195 } else if (new_state == "stopped") {
196 SetProperty(pid_property, "");
197 }
198 }
199
KillProcessGroup(int signal)200 void Service::KillProcessGroup(int signal) {
201 // Always attempt the process kill if process is still running.
202 // Cgroup clean up routines are idempotent. It's safe to call
203 // killProcessGroup repeatedly. During shutdown, `init` will
204 // call this function to send SIGTERM/SIGKILL to all processes.
205 // These signals must be sent for a successful shutdown.
206 if (!process_cgroup_empty_ || IsRunning()) {
207 LOG(INFO) << "Sending signal " << signal << " to service '" << name_ << "' (pid " << pid_
208 << ") process group...";
209 int r;
210 if (signal == SIGTERM) {
211 r = killProcessGroupOnce(uid(), pid_, signal);
212 } else {
213 r = killProcessGroup(uid(), pid_, signal);
214 }
215
216 if (r == 0) process_cgroup_empty_ = true;
217 }
218
219 if (oom_score_adjust_ != DEFAULT_OOM_SCORE_ADJUST) {
220 LmkdUnregister(name_, pid_);
221 }
222 }
223
SetProcessAttributesAndCaps(InterprocessFifo setsid_finished)224 void Service::SetProcessAttributesAndCaps(InterprocessFifo setsid_finished) {
225 // Keep capabilites on uid change.
226 if (capabilities_ && uid()) {
227 // If Android is running in a container, some securebits might already
228 // be locked, so don't change those.
229 unsigned long securebits = prctl(PR_GET_SECUREBITS);
230 if (securebits == -1UL) {
231 PLOG(FATAL) << "prctl(PR_GET_SECUREBITS) failed for " << name_;
232 }
233 securebits |= SECBIT_KEEP_CAPS | SECBIT_KEEP_CAPS_LOCKED;
234 if (prctl(PR_SET_SECUREBITS, securebits) != 0) {
235 PLOG(FATAL) << "prctl(PR_SET_SECUREBITS) failed for " << name_;
236 }
237 }
238
239 if (auto result = SetProcessAttributes(proc_attr_, std::move(setsid_finished)); !result.ok()) {
240 LOG(FATAL) << "cannot set attribute for " << name_ << ": " << result.error();
241 }
242
243 if (!seclabel_.empty()) {
244 if (setexeccon(seclabel_.c_str()) < 0) {
245 PLOG(FATAL) << "cannot setexeccon('" << seclabel_ << "') for " << name_;
246 }
247 }
248
249 if (capabilities_) {
250 if (!SetCapsForExec(*capabilities_)) {
251 LOG(FATAL) << "cannot set capabilities for " << name_;
252 }
253 } else if (uid()) {
254 // Inheritable caps can be non-zero when running in a container.
255 if (!DropInheritableCaps()) {
256 LOG(FATAL) << "cannot drop inheritable caps for " << name_;
257 }
258 }
259 }
260
Reap(const siginfo_t & siginfo)261 void Service::Reap(const siginfo_t& siginfo) {
262 if (!(flags_ & SVC_ONESHOT) || (flags_ & SVC_RESTART)) {
263 KillProcessGroup(SIGKILL);
264 } else {
265 // Legacy behavior from ~2007 until Android R: this else branch did not exist and we did not
266 // kill the process group in this case.
267 if (SelinuxGetVendorAndroidVersion() >= __ANDROID_API_R__) {
268 // The new behavior in Android R is to kill these process groups in all cases. The
269 // 'true' parameter instructions KillProcessGroup() to report a warning message where it
270 // detects a difference in behavior has occurred.
271 KillProcessGroup(SIGKILL);
272 }
273 }
274
275 // Remove any socket resources we may have created.
276 for (const auto& socket : sockets_) {
277 if (socket.persist) {
278 continue;
279 }
280 auto path = ANDROID_SOCKET_DIR "/" + socket.name;
281 unlink(path.c_str());
282 }
283
284 for (const auto& f : reap_callbacks_) {
285 f(siginfo);
286 }
287
288 if ((siginfo.si_code != CLD_EXITED || siginfo.si_status != 0) && on_failure_reboot_target_) {
289 LOG(ERROR) << "Service " << name_
290 << " has 'reboot_on_failure' option and failed, shutting down system.";
291 trigger_shutdown(*on_failure_reboot_target_);
292 }
293
294 if (flags_ & SVC_EXEC) UnSetExec();
295
296 if (name_ == "zygote" || name_ == "zygote64") {
297 removeAllEmptyProcessGroups();
298 }
299
300 if (flags_ & SVC_TEMPORARY) return;
301
302 pid_ = 0;
303 flags_ &= (~SVC_RUNNING);
304 start_order_ = 0;
305 was_last_exit_ok_ = siginfo.si_code == CLD_EXITED && siginfo.si_status == 0;
306
307 // Oneshot processes go into the disabled state on exit,
308 // except when manually restarted.
309 if ((flags_ & SVC_ONESHOT) && !(flags_ & SVC_RESTART) && !(flags_ & SVC_RESET)) {
310 flags_ |= SVC_DISABLED;
311 }
312
313 // Disabled and reset processes do not get restarted automatically.
314 if (flags_ & (SVC_DISABLED | SVC_RESET)) {
315 NotifyStateChange("stopped");
316 return;
317 }
318
319 #if INIT_FULL_SOURCES
320 static bool is_apex_updatable = true;
321 #else
322 static bool is_apex_updatable = false;
323 #endif
324 const bool use_default_mount_ns =
325 mount_namespace_.has_value() && *mount_namespace_ == NS_DEFAULT;
326 const bool is_process_updatable = use_default_mount_ns && is_apex_updatable;
327
328 #if defined(__BIONIC__) && defined(SEGV_MTEAERR)
329 // As a precaution, we only upgrade a service once per reboot, to limit
330 // the potential impact.
331 //
332 // BIONIC_SIGNAL_ART_PROFILER is a magic value used by deuggerd to signal
333 // that the process crashed with SIGSEGV and SEGV_MTEAERR. This signal will
334 // never be seen otherwise in a crash, because it always gets handled by the
335 // profiling signal handlers in bionic. See also
336 // debuggerd/handler/debuggerd_handler.cpp.
337 bool should_upgrade_mte = siginfo.si_code != CLD_EXITED &&
338 siginfo.si_status == BIONIC_SIGNAL_ART_PROFILER && !upgraded_mte_;
339
340 if (should_upgrade_mte) {
341 constexpr int kDefaultUpgradeSecs = 60;
342 int secs = GetIntProperty("persist.device_config.memory_safety_native.upgrade_secs.default",
343 kDefaultUpgradeSecs);
344 secs = GetIntProperty(
345 "persist.device_config.memory_safety_native.upgrade_secs.service." + name_, secs);
346 if (secs > 0) {
347 LOG(INFO) << "Upgrading service " << name_ << " to sync MTE for " << secs << " seconds";
348 once_environment_vars_.emplace_back("BIONIC_MEMTAG_UPGRADE_SECS", std::to_string(secs));
349 upgraded_mte_ = true;
350 } else {
351 LOG(INFO) << "Not upgrading service " << name_ << " to sync MTE due to device config";
352 }
353 }
354 #endif
355
356 // If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
357 // reboot into bootloader or set crashing property
358 boot_clock::time_point now = boot_clock::now();
359 constexpr const char native_watchdog_reboot_time[] = "persist.init.svc.last_fatal_reboot_epoch";
360 uint64_t throttle_window =
361 std::chrono::duration_cast<std::chrono::seconds>(std::chrono::hours(24)).count();
362 if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) &&
363 !was_last_exit_ok_) {
364 bool boot_completed = GetBoolProperty("sys.boot_completed", false);
365 if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
366 if (++crash_count_ > 4) {
367 auto exit_reason =
368 boot_completed
369 ? "in " + std::to_string(fatal_crash_window_.count()) + " minutes"
370 : "before boot completed";
371 if (flags_ & SVC_CRITICAL) {
372 if (!GetBoolProperty("init.svc_debug.no_fatal." + name_, false)) {
373 uint64_t epoch_time =
374 std::chrono::duration_cast<std::chrono::seconds>(
375 std::chrono::system_clock::now().time_since_epoch())
376 .count();
377 // Do not reboot again If it was already initiated in the last 24hrs
378 if (epoch_time - GetIntProperty(native_watchdog_reboot_time, 0) >
379 throttle_window) {
380 SetProperty(native_watchdog_reboot_time, std::to_string(epoch_time));
381 // Aborts into `fatal_reboot_target_'.
382 SetFatalRebootTarget(fatal_reboot_target_);
383 LOG(FATAL) << "critical process '" << name_ << "' exited 4 times "
384 << exit_reason;
385 } else {
386 LOG(INFO) << "Reboot already performed in last 24hrs because of crash.";
387 }
388 }
389 } else {
390 LOG(ERROR) << "process with updatable components '" << name_
391 << "' exited 4 times " << exit_reason;
392 // Notifies update_verifier and apexd
393 SetProperty("sys.init.updatable_crashing_process_name", name_);
394 SetProperty("sys.init.updatable_crashing", "1");
395 }
396 }
397 } else {
398 time_crashed_ = now;
399 crash_count_ = 1;
400 }
401 }
402
403 flags_ &= (~SVC_RESTART);
404 flags_ |= SVC_RESTARTING;
405
406 // Execute all onrestart commands for this service.
407 onrestart_.ExecuteAllCommands();
408
409 NotifyStateChange("restarting");
410 return;
411 }
412
DumpState() const413 void Service::DumpState() const {
414 LOG(INFO) << "service " << name_;
415 LOG(INFO) << " class '" << Join(classnames_, " ") << "'";
416 LOG(INFO) << " exec " << Join(args_, " ");
417 for (const auto& socket : sockets_) {
418 LOG(INFO) << " socket " << socket.name;
419 }
420 for (const auto& file : files_) {
421 LOG(INFO) << " file " << file.name;
422 }
423 }
424
425
ExecStart()426 Result<void> Service::ExecStart() {
427 auto reboot_on_failure = make_scope_guard([this] {
428 if (on_failure_reboot_target_) {
429 trigger_shutdown(*on_failure_reboot_target_);
430 }
431 });
432
433 if (is_updatable() && !IsDefaultMountNamespaceReady()) {
434 // Don't delay the service for ExecStart() as the semantic is that
435 // the caller might depend on the side effect of the execution.
436 return Error() << "Cannot start an updatable service '" << name_
437 << "' before configs from APEXes are all loaded";
438 }
439
440 flags_ |= SVC_ONESHOT;
441
442 if (auto result = Start(); !result.ok()) {
443 return result;
444 }
445
446 flags_ |= SVC_EXEC;
447 is_exec_service_running_ = true;
448
449 LOG(INFO) << "SVC_EXEC service '" << name_ << "' pid " << pid_ << " (uid " << uid() << " gid "
450 << proc_attr_.gid << "+" << proc_attr_.supp_gids.size() << " context "
451 << (!seclabel_.empty() ? seclabel_ : "default") << ") started; waiting...";
452
453 reboot_on_failure.Disable();
454 return {};
455 }
456
CheckConsole()457 Result<void> Service::CheckConsole() {
458 if (!(flags_ & SVC_CONSOLE)) {
459 return {};
460 }
461
462 // On newer kernels, /dev/console will always exist because
463 // "console=ttynull" is hard-coded in CONFIG_CMDLINE. This new boot
464 // property should be set via "androidboot.serialconsole=0" to explicitly
465 // disable services requiring the console. For older kernels and boot
466 // images, not setting this at all will fall back to the old behavior
467 if (GetProperty("ro.boot.serialconsole", "") == "0") {
468 flags_ |= SVC_DISABLED;
469 return {};
470 }
471
472 if (proc_attr_.console.empty()) {
473 proc_attr_.console = "/dev/" + GetProperty("ro.boot.console", "console");
474 }
475
476 // Make sure that open call succeeds to ensure a console driver is
477 // properly registered for the device node
478 int console_fd = open(proc_attr_.console.c_str(), O_RDWR | O_CLOEXEC);
479 if (console_fd < 0) {
480 flags_ |= SVC_DISABLED;
481 return ErrnoError() << "Couldn't open console '" << proc_attr_.console << "'";
482 }
483 close(console_fd);
484 return {};
485 }
486
487 // Configures the memory cgroup properties for the service.
ConfigureMemcg()488 void Service::ConfigureMemcg() {
489 if (swappiness_ != -1) {
490 if (!setProcessGroupSwappiness(uid(), pid_, swappiness_)) {
491 PLOG(ERROR) << "setProcessGroupSwappiness failed";
492 }
493 }
494
495 if (soft_limit_in_bytes_ != -1) {
496 if (!setProcessGroupSoftLimit(uid(), pid_, soft_limit_in_bytes_)) {
497 PLOG(ERROR) << "setProcessGroupSoftLimit failed";
498 }
499 }
500
501 size_t computed_limit_in_bytes = limit_in_bytes_;
502 if (limit_percent_ != -1) {
503 long page_size = sysconf(_SC_PAGESIZE);
504 long num_pages = sysconf(_SC_PHYS_PAGES);
505 if (page_size > 0 && num_pages > 0) {
506 size_t max_mem = SIZE_MAX;
507 if (size_t(num_pages) < SIZE_MAX / size_t(page_size)) {
508 max_mem = size_t(num_pages) * size_t(page_size);
509 }
510 computed_limit_in_bytes =
511 std::min(computed_limit_in_bytes, max_mem / 100 * limit_percent_);
512 }
513 }
514
515 if (!limit_property_.empty()) {
516 // This ends up overwriting computed_limit_in_bytes but only if the
517 // property is defined.
518 computed_limit_in_bytes =
519 android::base::GetUintProperty(limit_property_, computed_limit_in_bytes, SIZE_MAX);
520 }
521
522 if (computed_limit_in_bytes != size_t(-1)) {
523 if (!setProcessGroupLimit(uid(), pid_, computed_limit_in_bytes)) {
524 PLOG(ERROR) << "setProcessGroupLimit failed";
525 }
526 }
527 }
528
529 // Enters namespaces, sets environment variables, writes PID files and runs the service executable.
RunService(const std::vector<Descriptor> & descriptors,InterprocessFifo cgroups_activated,InterprocessFifo setsid_finished)530 void Service::RunService(const std::vector<Descriptor>& descriptors,
531 InterprocessFifo cgroups_activated, InterprocessFifo setsid_finished) {
532 if (auto result = EnterNamespaces(namespaces_, name_, mount_namespace_); !result.ok()) {
533 LOG(FATAL) << "Service '" << name_ << "' failed to set up namespaces: " << result.error();
534 }
535
536 for (const auto& [key, value] : once_environment_vars_) {
537 setenv(key.c_str(), value.c_str(), 1);
538 }
539 for (const auto& [key, value] : environment_vars_) {
540 setenv(key.c_str(), value.c_str(), 1);
541 }
542
543 for (const auto& descriptor : descriptors) {
544 descriptor.Publish();
545 }
546
547 if (auto result = WritePidToFiles(&writepid_files_); !result.ok()) {
548 LOG(ERROR) << "failed to write pid to files: " << result.error();
549 }
550
551 // Wait until the cgroups have been created and until the cgroup controllers have been
552 // activated.
553 Result<uint8_t> byte = cgroups_activated.Read();
554 if (!byte.ok()) {
555 LOG(ERROR) << name_ << ": failed to read from notification channel: " << byte.error();
556 }
557 cgroups_activated.Close();
558 if (*byte != kCgroupsActivated) {
559 LOG(FATAL) << "Service '" << name_ << "' failed to start due to a fatal error";
560 _exit(EXIT_FAILURE);
561 }
562
563 if (task_profiles_.size() > 0) {
564 bool succeeded = SelinuxGetVendorAndroidVersion() < __ANDROID_API_U__
565 ?
566 // Compatibility mode: apply the task profiles to the current
567 // thread.
568 SetTaskProfiles(getpid(), task_profiles_)
569 :
570 // Apply the task profiles to the current process.
571 SetProcessProfiles(getuid(), getpid(), task_profiles_);
572 if (!succeeded) {
573 LOG(ERROR) << "failed to set task profiles";
574 }
575 }
576
577 // As requested, set our gid, supplemental gids, uid, context, and
578 // priority. Aborts on failure.
579 SetProcessAttributesAndCaps(std::move(setsid_finished));
580
581 if (!ExpandArgsAndExecv(args_, sigstop_)) {
582 PLOG(ERROR) << "cannot execv('" << args_[0]
583 << "'). See the 'Debugging init' section of init's README.md for tips";
584 }
585 }
586
Start()587 Result<void> Service::Start() {
588 auto reboot_on_failure = make_scope_guard([this] {
589 if (on_failure_reboot_target_) {
590 trigger_shutdown(*on_failure_reboot_target_);
591 }
592 });
593
594 if (is_updatable() && !IsDefaultMountNamespaceReady()) {
595 ServiceList::GetInstance().DelayService(*this);
596 return Error() << "Cannot start an updatable service '" << name_
597 << "' before configs from APEXes are all loaded. "
598 << "Queued for execution.";
599 }
600
601 bool disabled = (flags_ & (SVC_DISABLED | SVC_RESET));
602 ResetFlagsForStart();
603
604 // Running processes require no additional work --- if they're in the
605 // process of exiting, we've ensured that they will immediately restart
606 // on exit, unless they are ONESHOT. For ONESHOT service, if it's in
607 // stopping status, we just set SVC_RESTART flag so it will get restarted
608 // in Reap().
609 if (flags_ & SVC_RUNNING) {
610 if ((flags_ & SVC_ONESHOT) && disabled) {
611 flags_ |= SVC_RESTART;
612 }
613
614 LOG(INFO) << "service '" << name_
615 << "' requested start, but it is already running (flags: " << flags_ << ")";
616
617 // It is not an error to try to start a service that is already running.
618 reboot_on_failure.Disable();
619 return {};
620 }
621
622 // cgroups_activated is used for communication from the parent to the child
623 // while setsid_finished is used for communication from the child process to
624 // the parent process. These two communication channels are separate because
625 // combining these into a single communication channel would introduce a
626 // race between the Write() calls by the parent and by the child.
627 InterprocessFifo cgroups_activated, setsid_finished;
628 OR_RETURN(cgroups_activated.Initialize());
629 OR_RETURN(setsid_finished.Initialize());
630
631 if (Result<void> result = CheckConsole(); !result.ok()) {
632 return result;
633 }
634
635 struct stat sb;
636 if (stat(args_[0].c_str(), &sb) == -1) {
637 flags_ |= SVC_DISABLED;
638 return ErrnoError() << "Cannot find '" << args_[0] << "'";
639 }
640
641 std::string scon;
642 if (!seclabel_.empty()) {
643 scon = seclabel_;
644 } else {
645 auto result = ComputeContextFromExecutable(args_[0]);
646 if (!result.ok()) {
647 return result.error();
648 }
649 scon = *result;
650 }
651
652 if (!mount_namespace_.has_value()) {
653 // remember from which mount namespace the service should start
654 SetMountNamespace();
655 }
656
657 LOG(INFO) << "starting service '" << name_ << "'...";
658
659 std::vector<Descriptor> descriptors;
660 for (const auto& socket : sockets_) {
661 if (auto result = socket.Create(scon); result.ok()) {
662 descriptors.emplace_back(std::move(*result));
663 } else {
664 LOG(INFO) << "Could not create socket '" << socket.name << "': " << result.error();
665 }
666 }
667
668 for (const auto& file : files_) {
669 if (auto result = file.Create(); result.ok()) {
670 descriptors.emplace_back(std::move(*result));
671 } else {
672 LOG(INFO) << "Could not open file '" << file.name << "': " << result.error();
673 }
674 }
675
676 if (shared_kallsyms_file_) {
677 if (auto result = CreateSharedKallsymsFd(); result.ok()) {
678 descriptors.emplace_back(std::move(*result));
679 } else {
680 LOG(INFO) << "Could not obtain a copy of /proc/kallsyms: " << result.error();
681 }
682 }
683
684 pid_t pid = -1;
685 if (namespaces_.flags) {
686 pid = clone(nullptr, nullptr, namespaces_.flags | SIGCHLD, nullptr);
687 } else {
688 pid = fork();
689 }
690
691 if (pid == 0) {
692 umask(077);
693 cgroups_activated.CloseWriteFd();
694 setsid_finished.CloseReadFd();
695 RunService(descriptors, std::move(cgroups_activated), std::move(setsid_finished));
696 _exit(127);
697 } else {
698 cgroups_activated.CloseReadFd();
699 setsid_finished.CloseWriteFd();
700 }
701
702 if (pid < 0) {
703 pid_ = 0;
704 return ErrnoError() << "Failed to fork";
705 }
706
707 once_environment_vars_.clear();
708
709 if (oom_score_adjust_ != DEFAULT_OOM_SCORE_ADJUST) {
710 std::string oom_str = std::to_string(oom_score_adjust_);
711 std::string oom_file = StringPrintf("/proc/%d/oom_score_adj", pid);
712 if (!WriteStringToFile(oom_str, oom_file)) {
713 PLOG(ERROR) << "couldn't write oom_score_adj";
714 }
715 }
716
717 time_started_ = boot_clock::now();
718 pid_ = pid;
719 flags_ |= SVC_RUNNING;
720 start_order_ = next_start_order_++;
721 process_cgroup_empty_ = false;
722
723 if (CgroupsAvailable()) {
724 bool use_memcg = swappiness_ != -1 || soft_limit_in_bytes_ != -1 || limit_in_bytes_ != -1 ||
725 limit_percent_ != -1 || !limit_property_.empty();
726 errno = -createProcessGroup(uid(), pid_, use_memcg);
727 if (errno != 0) {
728 Result<void> result = cgroups_activated.Write(kActivatingCgroupsFailed);
729 if (!result.ok()) {
730 return Error() << "Sending notification failed: " << result.error();
731 }
732 return Error() << "createProcessGroup(" << uid() << ", " << pid_ << ", " << use_memcg
733 << ") failed for service '" << name_ << "': " << strerror(errno);
734 }
735
736 // When the blkio controller is mounted in the v1 hierarchy, NormalIoPriority is
737 // the default (/dev/blkio). When the blkio controller is mounted in the v2 hierarchy, the
738 // NormalIoPriority profile has to be applied explicitly.
739 SetProcessProfiles(uid(), pid_, {"NormalIoPriority"});
740
741 if (use_memcg) {
742 ConfigureMemcg();
743 }
744 }
745
746 if (oom_score_adjust_ != DEFAULT_OOM_SCORE_ADJUST) {
747 LmkdRegister(name_, uid(), pid_, oom_score_adjust_);
748 }
749
750 if (Result<void> result = cgroups_activated.Write(kCgroupsActivated); !result.ok()) {
751 return Error() << "Sending cgroups activated notification failed: " << result.error();
752 }
753
754 cgroups_activated.Close();
755
756 // Call setpgid() from the parent process to make sure that this call has
757 // finished before the parent process calls kill(-pgid, ...).
758 if (!RequiresConsole(proc_attr_)) {
759 if (setpgid(pid, pid) < 0) {
760 switch (errno) {
761 case EACCES: // Child has already performed setpgid() followed by execve().
762 case ESRCH: // Child process no longer exists.
763 break;
764 default:
765 PLOG(ERROR) << "setpgid() from parent failed";
766 }
767 }
768 } else {
769 // The Read() call below will return an error if the child is killed.
770 if (Result<uint8_t> result = setsid_finished.Read();
771 !result.ok() || *result != kSetSidFinished) {
772 if (!result.ok()) {
773 return Error() << "Waiting for setsid() failed: " << result.error();
774 } else {
775 return Error() << "Waiting for setsid() failed: " << static_cast<uint32_t>(*result)
776 << " <> " << static_cast<uint32_t>(kSetSidFinished);
777 }
778 }
779 }
780
781 setsid_finished.Close();
782
783 NotifyStateChange("running");
784 reboot_on_failure.Disable();
785
786 LOG(INFO) << "... started service '" << name_ << "' has pid " << pid_;
787
788 return {};
789 }
790
791 // Set mount namespace for the service.
792 // The reason why remember the mount namespace:
793 // If this service is started before APEXes and corresponding linker configuration
794 // get available, mark it as pre-apexd one. Note that this marking is
795 // permanent. So for example, if the service is re-launched (e.g., due
796 // to crash), it is still recognized as pre-apexd... for consistency.
SetMountNamespace()797 void Service::SetMountNamespace() {
798 // APEXd is always started in the "current" namespace because it is the process to set up
799 // the current namespace. So, leave mount_namespace_ as empty.
800 if (args_[0] == "/system/bin/apexd") {
801 return;
802 }
803 // Services in the following list start in the "default" mount namespace.
804 // Note that they should use bootstrap bionic if they start before APEXes are ready.
805 static const std::set<std::string> kUseDefaultMountNamespace = {
806 "ueventd", // load firmwares from APEXes
807 "hwservicemanager", // load VINTF fragments from APEXes
808 "servicemanager", // load VINTF fragments from APEXes
809 };
810 if (kUseDefaultMountNamespace.find(name_) != kUseDefaultMountNamespace.end()) {
811 mount_namespace_ = NS_DEFAULT;
812 return;
813 }
814 // Use the "default" mount namespace only if it's ready
815 mount_namespace_ = IsDefaultMountNamespaceReady() ? NS_DEFAULT : NS_BOOTSTRAP;
816 }
817
ThreadCount()818 static int ThreadCount() {
819 std::unique_ptr<DIR, decltype(&closedir)> dir(opendir("/proc/self/task"), closedir);
820 if (!dir) {
821 return -1;
822 }
823
824 int count = 0;
825 dirent* entry;
826 while ((entry = readdir(dir.get())) != nullptr) {
827 if (entry->d_name[0] != '.') {
828 count++;
829 }
830 }
831 return count;
832 }
833
834 // Must be called BEFORE any threads are created. See also the sigprocmask() man page.
CreateSigchldFd()835 unique_fd Service::CreateSigchldFd() {
836 CHECK_EQ(ThreadCount(), 1);
837 sigset_t mask;
838 sigemptyset(&mask);
839 sigaddset(&mask, SIGCHLD);
840 if (sigprocmask(SIG_BLOCK, &mask, nullptr) < 0) {
841 PLOG(FATAL) << "Failed to block SIGCHLD";
842 }
843
844 return unique_fd(signalfd(-1, &mask, SFD_CLOEXEC));
845 }
846
OpenAndSaveStaticKallsymsFd()847 void Service::OpenAndSaveStaticKallsymsFd() {
848 Result<Descriptor> result = CreateSharedKallsymsFd();
849 if (!result.ok()) {
850 LOG(ERROR) << result.error();
851 }
852 }
853
854 // This function is designed to be called in two situations:
855 // 1) early during second_stage init, to open and save the shared fd as a
856 // static (see OpenAndSaveStaticKallsymsFd).
857 // 2) whenever a service requesting a copy of the fd is being started, at which
858 // point it will get a duplicated copy of the static fd.
CreateSharedKallsymsFd()859 Result<Descriptor> Service::CreateSharedKallsymsFd() {
860 static constexpr char kallsyms_path[] = "/proc/kallsyms";
861 static int static_fd = open(kallsyms_path, O_RDONLY | O_NONBLOCK | O_CLOEXEC);
862 if (static_fd < 0) {
863 return ErrnoError() << "failed to open " << kallsyms_path;
864 }
865
866 unique_fd fd{fcntl(static_fd, F_DUPFD_CLOEXEC, /*min_fd=*/3)};
867 if (fd < 0) {
868 return ErrnoError() << "failed fcntl(F_DUPFD_CLOEXEC)";
869 }
870
871 // Use the same environment variable as if the service specified
872 // "file /proc/kallsyms r".
873 return Descriptor(std::string(ANDROID_FILE_ENV_PREFIX) + kallsyms_path, std::move(fd));
874 }
875
SetStartedInFirstStage(pid_t pid)876 void Service::SetStartedInFirstStage(pid_t pid) {
877 LOG(INFO) << "adding first-stage service '" << name_ << "'...";
878
879 time_started_ = boot_clock::now(); // not accurate, but doesn't matter here
880 pid_ = pid;
881 flags_ |= SVC_RUNNING;
882 start_order_ = next_start_order_++;
883
884 NotifyStateChange("running");
885 }
886
ResetFlagsForStart()887 void Service::ResetFlagsForStart() {
888 // Starting a service removes it from the disabled or reset state and
889 // immediately takes it out of the restarting state if it was in there.
890 flags_ &= ~(SVC_DISABLED | SVC_RESTARTING | SVC_RESET | SVC_RESTART | SVC_DISABLED_START);
891 }
892
StartIfNotDisabled()893 Result<void> Service::StartIfNotDisabled() {
894 if (!(flags_ & SVC_DISABLED)) {
895 return Start();
896 } else {
897 flags_ |= SVC_DISABLED_START;
898 }
899 return {};
900 }
901
Enable()902 Result<void> Service::Enable() {
903 flags_ &= ~(SVC_DISABLED | SVC_RC_DISABLED);
904 if (flags_ & SVC_DISABLED_START) {
905 return Start();
906 }
907 return {};
908 }
909
Reset()910 void Service::Reset() {
911 StopOrReset(SVC_RESET);
912 }
913
Stop()914 void Service::Stop() {
915 StopOrReset(SVC_DISABLED);
916 }
917
Terminate()918 void Service::Terminate() {
919 flags_ &= ~(SVC_RESTARTING | SVC_DISABLED_START);
920 flags_ |= SVC_DISABLED;
921 if (pid_) {
922 KillProcessGroup(SIGTERM);
923 NotifyStateChange("stopping");
924 }
925 }
926
Timeout()927 void Service::Timeout() {
928 // All process state flags will be taken care of in Reap(), we really just want to kill the
929 // process here when it times out. Oneshot processes will transition to be disabled, and
930 // all other processes will transition to be restarting.
931 LOG(INFO) << "Service '" << name_ << "' expired its timeout of " << timeout_period_->count()
932 << " seconds and will now be killed";
933 if (pid_) {
934 KillProcessGroup(SIGKILL);
935 NotifyStateChange("stopping");
936 }
937 }
938
Restart()939 void Service::Restart() {
940 if (flags_ & SVC_RUNNING) {
941 /* Stop, wait, then start the service. */
942 StopOrReset(SVC_RESTART);
943 } else if (!(flags_ & SVC_RESTARTING)) {
944 /* Just start the service since it's not running. */
945 if (auto result = Start(); !result.ok()) {
946 LOG(ERROR) << "Could not restart '" << name_ << "': " << result.error();
947 }
948 } /* else: Service is restarting anyways. */
949 }
950
951 // The how field should be either SVC_DISABLED, SVC_RESET, or SVC_RESTART.
StopOrReset(int how)952 void Service::StopOrReset(int how) {
953 // The service is still SVC_RUNNING until its process exits, but if it has
954 // already exited it shoudn't attempt a restart yet.
955 flags_ &= ~(SVC_RESTARTING | SVC_DISABLED_START);
956
957 if ((how != SVC_DISABLED) && (how != SVC_RESET) && (how != SVC_RESTART)) {
958 // An illegal flag: default to SVC_DISABLED.
959 LOG(ERROR) << "service '" << name_ << "' requested unknown flag " << how
960 << ", defaulting to disabling it.";
961 how = SVC_DISABLED;
962 }
963
964 // If the service has not yet started, prevent it from auto-starting with its class.
965 if (how == SVC_RESET) {
966 flags_ |= (flags_ & SVC_RC_DISABLED) ? SVC_DISABLED : SVC_RESET;
967 } else {
968 flags_ |= how;
969 }
970 // Make sure it's in right status when a restart immediately follow a
971 // stop/reset or vice versa.
972 if (how == SVC_RESTART) {
973 flags_ &= (~(SVC_DISABLED | SVC_RESET));
974 } else {
975 flags_ &= (~SVC_RESTART);
976 }
977
978 if (pid_) {
979 if (flags_ & SVC_GENTLE_KILL) {
980 KillProcessGroup(SIGTERM);
981 if (!process_cgroup_empty()) std::this_thread::sleep_for(200ms);
982 }
983 KillProcessGroup(SIGKILL);
984 NotifyStateChange("stopping");
985 } else {
986 NotifyStateChange("stopped");
987 }
988 }
989
MakeTemporaryOneshotService(const std::vector<std::string> & args)990 Result<std::unique_ptr<Service>> Service::MakeTemporaryOneshotService(
991 const std::vector<std::string>& args) {
992 // Parse the arguments: exec [SECLABEL [UID [GID]*] --] COMMAND ARGS...
993 // SECLABEL can be a - to denote default
994 std::size_t command_arg = 1;
995 for (std::size_t i = 1; i < args.size(); ++i) {
996 if (args[i] == "--") {
997 command_arg = i + 1;
998 break;
999 }
1000 }
1001 if (command_arg > 4 + NR_SVC_SUPP_GIDS) {
1002 return Error() << "exec called with too many supplementary group ids";
1003 }
1004
1005 if (command_arg >= args.size()) {
1006 return Error() << "exec called without command";
1007 }
1008 std::vector<std::string> str_args(args.begin() + command_arg, args.end());
1009
1010 static size_t exec_count = 0;
1011 exec_count++;
1012 std::string name = "exec " + std::to_string(exec_count) + " (" + Join(str_args, " ") + ")";
1013
1014 unsigned flags = SVC_ONESHOT | SVC_TEMPORARY;
1015 unsigned namespace_flags = 0;
1016
1017 std::string seclabel = "";
1018 if (command_arg > 2 && args[1] != "-") {
1019 seclabel = args[1];
1020 }
1021 Result<uid_t> uid = 0;
1022 if (command_arg > 3) {
1023 uid = DecodeUid(args[2]);
1024 if (!uid.ok()) {
1025 return Error() << "Unable to decode UID for '" << args[2] << "': " << uid.error();
1026 }
1027 }
1028 Result<gid_t> gid = 0;
1029 std::vector<gid_t> supp_gids;
1030 if (command_arg > 4) {
1031 gid = DecodeUid(args[3]);
1032 if (!gid.ok()) {
1033 return Error() << "Unable to decode GID for '" << args[3] << "': " << gid.error();
1034 }
1035 std::size_t nr_supp_gids = command_arg - 1 /* -- */ - 4 /* exec SECLABEL UID GID */;
1036 for (size_t i = 0; i < nr_supp_gids; ++i) {
1037 auto supp_gid = DecodeUid(args[4 + i]);
1038 if (!supp_gid.ok()) {
1039 return Error() << "Unable to decode GID for '" << args[4 + i]
1040 << "': " << supp_gid.error();
1041 }
1042 supp_gids.push_back(*supp_gid);
1043 }
1044 }
1045
1046 return std::make_unique<Service>(name, flags, *uid, *gid, supp_gids, namespace_flags, seclabel,
1047 nullptr, /*filename=*/"", str_args);
1048 }
1049
1050 // This is used for snapuserd_proxy, which hands off a socket to snapuserd. It's
1051 // a special case to support the daemon launched in first-stage init. The persist
1052 // feature is not part of the init language and is only used here.
MarkSocketPersistent(const std::string & socket_name)1053 bool Service::MarkSocketPersistent(const std::string& socket_name) {
1054 for (auto& socket : sockets_) {
1055 if (socket.name == socket_name) {
1056 socket.persist = true;
1057 return true;
1058 }
1059 }
1060 return false;
1061 }
1062
1063 } // namespace init
1064 } // namespace android
1065