1 // Copyright 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef SANDBOXED_API_SANDBOX2_POLICYBUILDER_H_ 16 #define SANDBOXED_API_SANDBOX2_POLICYBUILDER_H_ 17 18 #include <linux/filter.h> 19 20 #include <cstddef> 21 #include <cstdint> 22 #include <functional> 23 #include <memory> 24 #include <string> 25 #include <type_traits> 26 #include <utility> 27 #include <vector> 28 29 #include "absl/base/attributes.h" 30 #include "absl/base/macros.h" 31 #include "absl/container/flat_hash_set.h" 32 #include "absl/log/check.h" 33 #include "absl/status/status.h" 34 #include "absl/status/statusor.h" 35 #include "absl/strings/string_view.h" 36 #include "absl/types/optional.h" 37 #include "absl/types/span.h" 38 #include "sandboxed_api/sandbox2/allowlists/map_exec.h" // Temporary 39 #include "sandboxed_api/sandbox2/forkserver.pb.h" 40 #include "sandboxed_api/sandbox2/mounts.h" 41 #include "sandboxed_api/sandbox2/network_proxy/filtering.h" 42 #include "sandboxed_api/sandbox2/policy.h" 43 44 struct bpf_labels; 45 46 namespace sandbox2 { 47 48 class AllowAllSyscalls; 49 class NamespacesToken; 50 class LoadUserBpfCodeFromFile; 51 class MapExec; 52 class SeccompSpeculation; 53 class TraceAllSyscalls; 54 class UnrestrictedNetworking; 55 56 namespace builder_internal { 57 58 template <typename, typename = void> 59 constexpr bool is_type_complete_v = false; 60 61 template <typename T> 62 constexpr bool is_type_complete_v<T, std::void_t<decltype(sizeof(T))>> = true; 63 64 } // namespace builder_internal 65 66 // PolicyBuilder is a helper class to simplify creation of policies. The builder 67 // uses fluent interface for convenience and increased readability of policies. 68 // 69 // To build a policy you simply create a new builder object, call methods on it 70 // specifying what you want and finally call `BuildOrDie()` to generate you 71 // policy. 72 // 73 // For instance this would generate a simple policy suitable for binaries doing 74 // only computations: 75 // 76 // ```c++ 77 // std::unique_ptr<Policy> policy = 78 // PolicyBuilder() 79 // .AllowRead() 80 // .AllowWrite() 81 // .AllowExit() 82 // .AllowSystemMalloc() 83 // .BuildOrDie(); 84 // ``` 85 // 86 // Operations are executed in the order they are dictated, though in most cases 87 // this has no influence since the operations themselves commute. 88 // 89 // For instance these two policies are equivalent: 90 // 91 // ```c++ 92 // auto policy = PolicyBuilder.AllowRead().AllowWrite().BuildOrDie(); 93 // auto policy = PolicyBuilder.AllowWrite().AllowRead().BuildOrDie(); 94 // ``` 95 // 96 // While these two are not: 97 // 98 // 99 // ```c++ 100 // auto policy = PolicyBuilder.AllowRead().BlockSyscallWithErrno(__NR_read, EIO) 101 // .BuildOrDie(); 102 // auto policy = PolicyBuilder.BlockSyscallWithErrno(__NR_read, EIO).AllowRead() 103 // .BuildOrDie(); 104 // ``` 105 // 106 // In fact the first one is equivalent to: 107 // 108 // ```c++ 109 // auto policy = PolicyBuilder.AllowRead().BuildOrDie(); 110 // ``` 111 // 112 // If you dislike the chained style, it is also possible to write the first 113 // example as this: 114 // 115 // ```c++ 116 // PolicyBuilder builder; 117 // builder.AllowRead(); 118 // builder.AllowWrite(); 119 // builder.AllowExit(); 120 // builder.AllowSystemMalloc(); 121 // auto policy = builder.BuildOrDie(); 122 // ``` 123 // 124 // For a more complicated example, see examples/static/static_sandbox.cc 125 class PolicyBuilder final { 126 public: 127 // Possible CPU fence modes for `AllowRestartableSequences()` 128 enum CpuFenceMode { 129 // Allow only fast fences for restartable sequences. 130 kRequireFastFences, 131 132 // Allow fast fences as well as slow fences if fast fences are unavailable. 133 kAllowSlowFences, 134 }; 135 136 static constexpr absl::string_view kDefaultHostname = "sandbox2"; 137 138 // Seccomp takes a 16-bit filter length, so the limit would be 64k. 139 // 140 // We set it lower so that there is for sure some room for the default policy. 141 static constexpr size_t kMaxUserPolicyLength = 30000; 142 143 using BpfFunc = const std::function<std::vector<sock_filter>(bpf_labels&)>&; 144 145 // Appends code to allow visibility restricted policy functionality. 146 // 147 // For example: 148 // `Allow(sandbox2::UnrestrictedNetworking);` 149 // This allows unrestricted network access by not creating a network 150 // namespace. 151 // 152 // Each `type T` is defined in an individual library and individually 153 // visibility restricted. 154 template <typename... T> Allow(T...tags)155 PolicyBuilder& Allow(T... tags) { 156 return (Allow(tags), ...); 157 } 158 159 // Disables the use of namespaces. 160 // 161 // The default security posture of Sandbox2 depends on the use of namespaces 162 // and syscall filters. By disabling namespaces, the default security posture 163 // is weakened. 164 // 165 // The consequence of disabling namespaces is that the sandboxee will be able 166 // to access the host's file system, network, and other resources if the 167 // appropriate syscalls are also allowed. 168 // 169 // Disabling namespaces is not recommended and should only be done if 170 // absolutely necessary. 171 PolicyBuilder& DisableNamespaces(NamespacesToken); 172 173 // Allows the use of memory mappings that are marked as executable. 174 // 175 // This applies to the mmap and mprotect syscalls and by default, mapped 176 // memory pages are not allowed to be marked as both writable and executable. 177 // 178 // The use of this API is usually only necessary for JIT engines. To 179 // actually allow executable mappings, the respective mmap()/mprotect() 180 // syscalls need to be added to the policy as well. 181 PolicyBuilder& Allow(MapExec); 182 183 // Allows the sandboxee to benefit from speculative execution. 184 // 185 // By default and on recent (6.x) kernels, additional mitigations are enabled 186 // to prevent speculative execution attacks. This call disables those 187 // mitigations to reclaim some of the performance overhead. 188 // 189 // NOTE: The performance benefits of using this API are highly dependent on 190 // the host CPU architecture and the workload running inside the sandbox. 191 // The Linux kernel will disable both the IBPB and STIBP mitigations for the 192 // the sandboxee on CPUs that support this. 193 // 194 // On newer AMD processors, such as Milan or Genoa, this leads to having fewer 195 // branch mispredictions and thus improved performance. However, forcing STIBP 196 // to be enabled on the machine level is even better, as those CPUs optimize 197 // for this. 198 // 199 // This is an advanced API, so users should make sure they understand the 200 // risks. Do not use in environments with untrusted code and/or data. 201 PolicyBuilder& Allow(SeccompSpeculation); 202 203 // Allows unrestricted access to the network by *not* creating a network 204 // namespace. 205 // 206 // This only disables the network namespace. To actually allow networking, 207 // you would also need to allow networking syscalls. 208 // 209 // NOTE: Requires namespace support. 210 PolicyBuilder& Allow(UnrestrictedNetworking); 211 212 // Appends code to allow a specific syscall. 213 PolicyBuilder& AllowSyscall(uint32_t num); 214 215 // Appends code to allow a number of syscalls. 216 PolicyBuilder& AllowSyscalls(absl::Span<const uint32_t> nums); 217 218 // Appends code to block a syscalls while setting errno to the error given. 219 PolicyBuilder& BlockSyscallsWithErrno(absl::Span<const uint32_t> nums, 220 int error); 221 222 // Appends code to block a specific syscall and setting errno. 223 PolicyBuilder& BlockSyscallWithErrno(uint32_t num, int error); 224 225 // Appends code to allow waiting for events on epoll file descriptors. 226 // 227 // Allows these syscalls: 228 // - epoll_wait 229 // - epoll_pwait 230 // - epoll_pwait2 231 PolicyBuilder& AllowEpollWait(); 232 233 // Appends code to allow using epoll. 234 // 235 // Allows these syscalls: 236 // - epoll_create 237 // - epoll_create1 238 // - epoll_ctl 239 // - epoll_wait 240 // - epoll_pwait 241 // - epoll_pwait2 242 PolicyBuilder& AllowEpoll(); 243 244 // Appends code to allow initializing an inotify instance. 245 // 246 // Allows these syscalls: 247 // - inotify_init 248 // - inotify_init1 249 PolicyBuilder& AllowInotifyInit(); 250 251 // Appends code to allow synchronous I/O multiplexing. 252 // 253 // Allows these syscalls: 254 // - pselect6 255 // - select 256 PolicyBuilder& AllowSelect(); 257 258 // Appends code to allow exiting. 259 // 260 // Allows these syscalls: 261 // - exit 262 // - exit_group 263 PolicyBuilder& AllowExit(); 264 265 // Appends code to allow restartable sequences and necessary /proc files. 266 // 267 // Allows these syscalls: 268 // - rseq 269 // - mmap(..., PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, ...) 270 // - getcpu 271 // - membarrier 272 // - futex(WAIT) 273 // - futex(WAKE) 274 // - rt_sigprocmask(SIG_SETMASK) 275 // Allows these files: 276 // - "/proc/cpuinfo" 277 // - "/proc/stat" 278 // And this directory (including subdirs/files): 279 // - "/sys/devices/system/cpu/" 280 // 281 // If `cpu_fence_mode` is `kAllowSlowFences`, also permits slow CPU fences. 282 // Allows these syscalls: 283 // - sched_getaffinity 284 // - sched_setaffinity 285 // Allows these files: 286 // - "/proc/self/cpuset" 287 // 288 // If `cpu_fence_mode` is `kRequireFastFences`, RSEQ functionality may not 289 // be enabled if fast CPU fences are not available. 290 PolicyBuilder& AllowRestartableSequences(CpuFenceMode cpu_fence_mode); ABSL_DEPRECATE_AND_INLINE()291 ABSL_DEPRECATE_AND_INLINE() 292 PolicyBuilder& AllowRestartableSequencesWithProcFiles( 293 CpuFenceMode cpu_fence_mode) { 294 return this->AllowRestartableSequences(cpu_fence_mode); 295 } 296 297 // Appends code to allow the scudo version of malloc, free and 298 // friends. 299 // 300 // This should be used in conjunction with namespaces. If scudo 301 // options are passed to the sandboxee through an environment variable, access 302 // to "/proc/self/environ" will have to be allowed by the policy. 303 // 304 // NOTE: This function is tuned towards the secure scudo allocator. If you are 305 // using another implementation, this function might not be the most 306 // suitable. 307 PolicyBuilder& AllowScudoMalloc(); 308 309 // Appends code to allow the system-allocator version of malloc, free and 310 // friends. 311 // 312 // NOTE: This function is tuned towards the malloc implementation in glibc. If 313 // you are using another implementation, this function might not be the 314 // most suitable. 315 PolicyBuilder& AllowSystemMalloc(); 316 317 // Appends code to allow the tcmalloc version of malloc, free and 318 // friends. 319 PolicyBuilder& AllowTcMalloc(); 320 321 // Appends code to allow syscalls typically used by the LLVM sanitizers: ASAN, 322 // MSAN, TSAN. 323 // 324 // NOTE: This method is intended as a best effort for adding syscalls that 325 // are common to many binaries. It may not be fully inclusive of all potential 326 // syscalls for all binaries. 327 PolicyBuilder& AllowLlvmSanitizers(); 328 329 // Appends code to allow syscalls typically used by the LLVM coverage. 330 // 331 // NOTE: This method is intended as a best effort. 332 PolicyBuilder& AllowLlvmCoverage(); 333 334 // Appends code to unconditionally allow mmap. Specifically this allows mmap 335 // and mmap2 syscall on architectures where these syscalls exist. 336 // 337 // This function requires that targets :map_exec library to be linked 338 // against. Otherwise, the PolicyBuilder will fail to build the policy. 339 // 340 // Prefer using `AllowMmapWithoutExec()` as allowing mapping executable pages 341 // makes exploitation easier. 342 std::enable_if_t<builder_internal::is_type_complete_v<MapExec>, 343 PolicyBuilder&> 344 AllowMmap(); 345 346 // Appends code to allow mmap calls that don't specify PROT_EXEC. 347 PolicyBuilder& AllowMmapWithoutExec(); 348 349 // Appends code to allow mprotect calls that don't specify PROT_EXEC. 350 PolicyBuilder& AllowMprotectWithoutExec(); 351 352 // Appends code to allow mlock and munlock calls. 353 PolicyBuilder& AllowMlock(); 354 355 // Appends code to allow calling futex with the given operation. 356 PolicyBuilder& AllowFutexOp(int op); 357 358 // Appends code to allow opening and possibly creating files or directories. 359 // 360 // Allows these syscalls: 361 // - creat 362 // - open 363 // - openat 364 PolicyBuilder& AllowOpen(); 365 366 // Appends code to allow calling stat, fstat and lstat. 367 // 368 // Allows these syscalls: 369 // - fstat 370 // - fstat64 371 // - fstatat 372 // - fstatat64 373 // - fstatfs 374 // - fstatfs64 375 // - lstat 376 // - lstat64 377 // - newfstatat 378 // - oldfstat 379 // - oldlstat 380 // - oldstat 381 // - stat 382 // - stat64 383 // - statfs 384 // - statfs64 385 // - ustat 386 PolicyBuilder& AllowStat(); 387 388 // Appends code to allow checking file permissions. 389 // 390 // Allows these syscalls: 391 // - access 392 // - faccessat 393 PolicyBuilder& AllowAccess(); 394 395 // Appends code to allow duplicating file descriptors. 396 // 397 // Allows these syscalls: 398 // - dup 399 // - dup2 400 // - dup3 401 PolicyBuilder& AllowDup(); 402 403 // Appends code to allow creating pipes. 404 // 405 // Allows these syscalls: 406 // - pipe 407 // - pipe2 408 PolicyBuilder& AllowPipe(); 409 410 // Appends code to allow changing file permissions. 411 // 412 // Allows these syscalls: 413 // - chmod 414 // - fchmod 415 // - fchmodat 416 PolicyBuilder& AllowChmod(); 417 418 // Appends code to allow changing file ownership. 419 // 420 // Allows these syscalls: 421 // - chown 422 // - lchown 423 // - fchown 424 // - fchownat 425 PolicyBuilder& AllowChown(); 426 427 // Appends code to the policy to allow reading from file descriptors. 428 // 429 // Allows these syscalls: 430 // - read 431 // - readv 432 // - preadv 433 // - pread64 434 PolicyBuilder& AllowRead(); 435 436 // Appends code to the policy to allow writing to file descriptors. 437 // 438 // Allows these syscalls: 439 // - write 440 // - writev 441 // - pwritev 442 // - pwrite64 443 PolicyBuilder& AllowWrite(); 444 445 // Appends code to allow reading directories. 446 // 447 // Allows these syscalls: 448 // - getdents 449 // - getdents64 450 PolicyBuilder& AllowReaddir(); 451 452 // Appends code to allow reading symbolic links. 453 // 454 // Allows these syscalls: 455 // - readlink 456 // - readlinkat 457 PolicyBuilder& AllowReadlink(); 458 459 // Appends code to allow creating links. 460 // 461 // Allows these syscalls: 462 // - link 463 // - linkat 464 PolicyBuilder& AllowLink(); 465 466 // Appends code to allow creating symbolic links. 467 // 468 // Allows these syscalls: 469 // - symlink 470 // - symlinkat 471 PolicyBuilder& AllowSymlink(); 472 473 // Appends code to allow creating directories. 474 // 475 // Allows these syscalls: 476 // - mkdir 477 // - mkdirat 478 PolicyBuilder& AllowMkdir(); 479 480 // Appends code to allow changing file timestamps. 481 // 482 // Allows these syscalls: 483 // - futimens 484 // - futimesat 485 // - utime 486 // - utimensat 487 // - utimes 488 PolicyBuilder& AllowUtime(); 489 490 // Appends code to allow safe calls to bpf. 491 // 492 // Allows this syscall: 493 // - bpf 494 // 495 // The above is only allowed when the cmd is one of: 496 // BPF_MAP_LOOKUP_ELEM, BPF_OBJ_GET, BPF_MAP_GET_NEXT_KEY, 497 // BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD 498 PolicyBuilder& AllowSafeBpf(); 499 500 // Appends code to allow safe calls to fcntl. 501 // 502 // Allows these syscalls: 503 // - fcntl 504 // - fcntl64 (on architectures where it exists) 505 // 506 // The above are only allowed when the cmd is one of: 507 // F_GETFD, F_SETFD, F_GETFL, F_SETFL, F_GETLK, F_SETLKW, F_SETLK, 508 // F_DUPFD, F_DUPFD_CLOEXEC 509 PolicyBuilder& AllowSafeFcntl(); 510 511 // Appends code to allow creating new processes. 512 // 513 // Allows these syscalls: 514 // - fork 515 // - vfork 516 // - clone 517 // 518 // NOTE: While this function allows the calls, the default policy is run first 519 // and it has checks for dangerous flags which can create a violation. See 520 // sandbox2/policy.cc for more details. 521 PolicyBuilder& AllowFork(); 522 523 // Appends code to allow waiting for processes. 524 // 525 // Allows these syscalls: 526 // - waitpid (on architectures where it exists) 527 // - wait4 528 PolicyBuilder& AllowWait(); 529 530 // Appends code to allow setting alarms / interval timers. 531 // 532 // Allows these syscalls: 533 // - alarm (on architectures where it exists) 534 // - setitimer 535 PolicyBuilder& AllowAlarm(); 536 537 // Appends code to allow setting posix timers. 538 // 539 // Allows these syscalls: 540 // - timer_create 541 // - timer_delete 542 // - timer_settime 543 // - timer_gettime 544 // - timer_getoverrun 545 PolicyBuilder& AllowPosixTimers(); 546 547 // Appends code to allow setting up signal handlers, returning from them, etc. 548 // 549 // Allows these syscalls: 550 // - rt_sigaction 551 // - rt_sigreturn 552 // - rt_procmask 553 // - signal (on architectures where it exists) 554 // - sigaction (on architectures where it exists) 555 // - sigreturn (on architectures where it exists) 556 // - sigprocmask (on architectures where it exists) 557 PolicyBuilder& AllowHandleSignals(); 558 559 // Appends code to allow doing the TCGETS ioctl. 560 // 561 // Allows these syscalls: 562 // - ioctl (when the first argument is TCGETS) 563 PolicyBuilder& AllowTCGETS(); 564 565 // Appends code to allow to getting the current time. 566 // 567 // Allows these syscalls: 568 // - time 569 // - gettimeofday 570 // - clock_gettime 571 PolicyBuilder& AllowTime(); 572 573 // Appends code to allow sleeping in the current thread. 574 // 575 // Allow these syscalls: 576 // - clock_nanosleep 577 // - nanosleep 578 PolicyBuilder& AllowSleep(); 579 580 // Appends code to allow getting the uid, euid, gid, etc. 581 // 582 // Allows these syscalls: 583 // - getuid + geteuid + getresuid 584 // - getgid + getegid + getresgid 585 // - getuid32 + geteuid32 + getresuid32 (on architectures where they exist) 586 // - getgid32 + getegid32 + getresgid32 (on architectures where they exist) 587 // - getgroups 588 PolicyBuilder& AllowGetIDs(); 589 590 // Appends code to allow getting the pid, ppid and tid. 591 // 592 // Allows these syscalls: 593 // - getpid 594 // - getppid 595 // - gettid 596 PolicyBuilder& AllowGetPIDs(); 597 598 // Appends code to allow getting process groups. 599 // 600 // Allows these syscalls: 601 // - getpgid 602 // - getpgrp 603 PolicyBuilder& AllowGetPGIDs(); 604 605 // Appends code to allow getting the rlimits. 606 // 607 // Allows these syscalls: 608 // - getrlimit 609 // - ugetrlimit (on architectures where it exist) 610 PolicyBuilder& AllowGetRlimit(); 611 612 // Appends code to allow setting the rlimits. 613 // 614 // Allows these syscalls: 615 // - setrlimit 616 // - usetrlimit (on architectures where it exist) 617 PolicyBuilder& AllowSetRlimit(); 618 619 // Appends code to allow reading random bytes. 620 // 621 // Allows these syscalls: 622 // - getrandom (with no flags or GRND_NONBLOCK) 623 // 624 PolicyBuilder& AllowGetRandom(); 625 626 // Appends code to allow configuring wipe-on-fork memory. 627 // 628 // Allows these syscalls: 629 // - madvise (with advice equal to -1 or MADV_WIPEONFORK). 630 PolicyBuilder& AllowWipeOnFork(); 631 632 // Enables syscalls required to use the logging support enabled via 633 // `Client::SendLogsToSupervisor()` 634 // 635 // Allows the following: 636 // - Writes 637 // - kill(0, SIGABRT) (for LOG(FATAL)) 638 // - clock_gettime 639 // - gettid 640 // - close 641 PolicyBuilder& AllowLogForwarding(); 642 643 // Appends code to allow deleting files and directories. 644 // 645 // Allows these syscalls: 646 // - rmdir (if available) 647 // - unlink (if available) 648 // - unlinkat 649 PolicyBuilder& AllowUnlink(); 650 651 // Appends code to allow renaming files. 652 // 653 // Allows these syscalls: 654 // - rename (if available) 655 // - renameat 656 // - renameat2 657 PolicyBuilder& AllowRename(); 658 659 // Appends code to allow creating event notification file descriptors. 660 // 661 // Allows these syscalls: 662 // - eventfd (if available) 663 // - eventfd2 664 PolicyBuilder& AllowEventFd(); 665 666 // Appends code to allow polling files. 667 // 668 // Allows these syscalls: 669 // - poll (if available) 670 // - ppoll 671 PolicyBuilder& AllowPoll(); 672 673 // Appends code to allow setting the name of a thread. 674 // 675 // Allows the following 676 // - prctl(PR_SET_NAME, ...) 677 PolicyBuilder& AllowPrctlSetName(); 678 679 // Appends code to allow setting a name for an anonymous memory region. 680 // 681 // Allows the following 682 // - prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ...) 683 PolicyBuilder& AllowPrctlSetVma(); 684 685 // Enables the syscalls necessary to start a statically linked binary. 686 // 687 // The current list of allowed syscalls are below. However you should *not* 688 // depend on the specifics, as these will change whenever the startup code 689 // changes. 690 // 691 // - uname, 692 // - brk, 693 // - set_tid_address, 694 // - set_robust_list, 695 // - futex(FUTEX_WAIT_BITSET, ...) 696 // - rt_sigaction(0x20, ...) 697 // - rt_sigaction(0x21, ...) 698 // - rt_sigprocmask(SIG_UNBLOCK, ...) 699 // - arch_prctl(ARCH_SET_FS) 700 // 701 // NOTE: This will call `BlockSyscallWithErrno(__NR_readlink, ENOENT)`. If you 702 // do not want readlink blocked, put a different call before this call. 703 PolicyBuilder& AllowStaticStartup(); 704 705 // Enables the syscalls necessary to start a dynamically linked binary. 706 // 707 // In addition to syscalls allowed by `AllowStaticStartup`, also allow 708 // reading, seeking, mmap()-ing and closing files. 709 std::enable_if_t<builder_internal::is_type_complete_v<MapExec>, 710 PolicyBuilder&> 711 AllowDynamicStartup(); 712 713 // Appends a policy, which will be run on the specified syscall. 714 // 715 // NOTE: This policy must be written without labels. If you need labels, use 716 // the overloaded function passing a BpfFunc object instead of the 717 // sock_filter. 718 PolicyBuilder& AddPolicyOnSyscall(uint32_t num, 719 absl::Span<const sock_filter> policy); 720 721 // Appends a policy, which will be run on the specified syscall. 722 // 723 // Example of how to use it: 724 // builder.AddPolicyOnSyscall( 725 // __NR_socket, [](bpf_labels& labels) -> std::vector<sock_filter> { 726 // return { 727 // ARG(0), // domain is first argument of socket 728 // JEQ(AF_UNIX, JUMP(&labels, af_unix)), 729 // JEQ(AF_NETLINK, JUMP(&labels, af_netlink)), 730 // KILL, 731 // 732 // LABEL(&labels, af_unix), 733 // ARG(1), 734 // JEQ(SOCK_STREAM | SOCK_NONBLOCK, ALLOW), 735 // KILL, 736 // 737 // LABEL(&labels, af_netlink), 738 // ARG(2), 739 // JEQ(NETLINK_ROUTE, ALLOW), 740 // }; 741 // }); 742 // 743 // NOTE: This policy may use labels. 744 PolicyBuilder& AddPolicyOnSyscall(uint32_t num, BpfFunc f); 745 746 // Appends a policy, which will be run on the specified syscalls. 747 // 748 // NOTE: This policy must be written without labels. 749 PolicyBuilder& AddPolicyOnSyscalls(absl::Span<const uint32_t> nums, 750 absl::Span<const sock_filter> policy); 751 752 // Appends a policy, which will be run on the specified syscalls. 753 // 754 // NOTE: This policy may use labels. 755 PolicyBuilder& AddPolicyOnSyscalls(absl::Span<const uint32_t> nums, 756 BpfFunc f); 757 758 // Equivalent to `AddPolicyOnSyscalls(mmap_syscalls, policy)`, where 759 // mmap_syscalls is a subset of {__NR_mmap, __NR_mmap2}, which exists on the 760 // target architecture. 761 // 762 // NOTE: This policy must be written without labels. 763 PolicyBuilder& AddPolicyOnMmap(absl::Span<const sock_filter> policy); 764 765 // Equivalent to `AddPolicyOnSyscalls(mmap_syscalls, f)`, where mmap_syscalls 766 // is a subset of {__NR_mmap, __NR_mmap2}, which exists on the target 767 // architecture. 768 // 769 // NOTE: This policy may use labels. 770 PolicyBuilder& AddPolicyOnMmap(BpfFunc f); 771 772 // Builds the policy returning a unique_ptr to it or status if an error 773 // happened. 774 // 775 // NOTE: This should only be called once. 776 absl::StatusOr<std::unique_ptr<Policy>> TryBuild(); 777 778 // Builds the policy returning a unique_ptr to it. 779 // 780 // NOTE: This function will abort if an error happened in any of the 781 // PolicyBuilder methods. This should only be called once. BuildOrDie()782 std::unique_ptr<Policy> BuildOrDie() { return TryBuild().value(); } 783 784 // Adds a bind-mount for a file from outside the namespace to inside. 785 // 786 // This will also create parent directories inside the namespace if needed. 787 // 788 // NOTE: Requires namespace support. 789 PolicyBuilder& AddFile(absl::string_view path, bool is_ro = true); 790 PolicyBuilder& AddFileAt(absl::string_view outside, absl::string_view inside, 791 bool is_ro = true); 792 793 // Adds the libraries and linker required by a binary. 794 // 795 // This does not add the binary itself, only the libraries it depends on. It 796 // should work correctly for most binaries, but you might need to tweak it in 797 // some cases. Run `ldd` yourself and use `AddFile` or `AddDirectory`. 798 // 799 // This function is safe even for untrusted/potentially malicious binaries. It 800 // adds libraries only from standard library dirs and ld_library_path. 801 // 802 // NOTE: Requires namespace support. This method is intended as a best effort 803 PolicyBuilder& AddLibrariesForBinary(absl::string_view path, 804 absl::string_view ld_library_path = {}); 805 806 // Similar to `AddLibrariesForBinary`, but the binary is specified with an 807 // open fd. 808 // 809 // NOTE: Requires namespace support. 810 PolicyBuilder& AddLibrariesForBinary(int fd, 811 absl::string_view ld_library_path = {}); 812 813 // Adds a bind-mount for a directory from outside the namespace to inside. 814 // 815 // This will also create parent directories inside the namespace if needed. 816 // 817 // If the directory contains symlinks, they might still be inaccessible 818 // inside the sandbox (resulting in ENOENT). For example, the symlinks might 819 // point to a location outside the sandbox. Symlinks can be resolved using 820 // `sapi::file_util::fileops::ReadLink()`. 821 // 822 // NOTE: Requires namespace support. 823 PolicyBuilder& AddDirectory(absl::string_view path, bool is_ro = true); 824 PolicyBuilder& AddDirectoryAt(absl::string_view outside, 825 absl::string_view inside, bool is_ro = true); 826 827 // Adds a tmpfs inside the namespace. 828 // 829 // This will also create parent directories inside the namespace if needed. 830 // 831 // NOTE: Requires namespace support. 832 PolicyBuilder& AddTmpfs(absl::string_view inside, size_t size); 833 834 // Allows unrestricted access to the network by *not* creating a network 835 // namespace. This only disables the network namespace. To actually allow 836 // networking, you would also need to allow networking syscalls. Calling this 837 // function will enable use of namespaces. 838 ABSL_DEPRECATED("Use Allow(sandbox2::UnrestrictedNetworking()) instead.") 839 PolicyBuilder& AllowUnrestrictedNetworking(); 840 841 // Enables a shared network namespace for all sandboxees that are started by 842 // the same forkserver. 843 // 844 // This results in sandboxed processes to run in the same shared network 845 // namespace instead of creating a separate network namespace for each 846 // sandboxed process started by the ForkServer process. 847 // 848 // NOTE: Requires namespace support. 849 // 850 // IMPORTANT: This is incompatible with AllowUnrestrictedNetworking. 851 PolicyBuilder& UseForkServerSharedNetNs(); 852 853 // Enables the use of namespaces. 854 // 855 // Namespaces are enabled by default. 856 // This is a no-op. 857 ABSL_DEPRECATED("Namespaces are enabled by default; no need to call this") EnableNamespaces()858 PolicyBuilder& EnableNamespaces() { 859 if (!use_namespaces_) { 860 SetError(absl::FailedPreconditionError( 861 "Namespaces cannot be both disabled and enabled")); 862 return *this; 863 } 864 requires_namespaces_ = true; 865 return *this; 866 } 867 868 // Set hostname in the network namespace. 869 // 870 // The default hostname is "sandbox2". 871 // 872 // NOTE: Requires namespace support. 873 // 874 // IMPORTANT: This is incompatible with AllowUnrestrictedNetworking. 875 PolicyBuilder& SetHostname(absl::string_view hostname); 876 877 // Enables/disables stack trace collection on violations. 878 // 879 // NOTE: This is enabled by default. 880 PolicyBuilder& CollectStacktracesOnViolation(bool enable); 881 882 // Enables/disables stack trace collection on signals (e.g. crashes / killed 883 // from a signal). 884 // 885 // NOTE: This is enabled by default. 886 PolicyBuilder& CollectStacktracesOnSignal(bool enable); 887 888 // Enables/disables stack trace collection on hitting a timeout. 889 // 890 // NOTE: This is enabled by default. 891 PolicyBuilder& CollectStacktracesOnTimeout(bool enable); 892 893 // Enables/disables stack trace collection on getting killed by the sandbox 894 // monitor or the user. 895 // 896 // NOTE: This is disabled by default. 897 PolicyBuilder& CollectStacktracesOnKill(bool enable); 898 899 // Enables/disables stack trace collection on normal process exit. 900 // 901 // NOTE: This is disabled by default. 902 PolicyBuilder& CollectStacktracesOnExit(bool enable); 903 904 // Changes the default action to ALLOW. 905 // 906 // All syscalls not handled explicitly by the policy will thus be 907 // allowed. 908 // 909 // IMPORTANT: Do not use in environments with untrusted code and/or data. 910 PolicyBuilder& DefaultAction(AllowAllSyscalls); 911 912 // Changes the default action to `SANDBOX2_TRACE`. 913 // 914 // All syscalls not handled explicitly by the policy will be passed off to 915 // the `sandbox2::Notify` implementation given to the `sandbox2::Sandbox2` 916 // instance. 917 PolicyBuilder& DefaultAction(TraceAllSyscalls); 918 919 ABSL_DEPRECATED("Use DefaultAction(sandbox2::AllowAllSyscalls()) instead") 920 PolicyBuilder& DangerDefaultAllowAll(); 921 922 // Allows syscalls that are necessary for the NetworkProxyClient. 923 PolicyBuilder& AddNetworkProxyPolicy(); 924 925 // Allows syscalls that are necessary for the NetworkProxyClient and 926 // the NetworkProxyHandler. 927 PolicyBuilder& AddNetworkProxyHandlerPolicy(); 928 929 // Makes root of the filesystem writeable 930 // Not recommended 931 // 932 // NOTE: Requires namespace support. 933 PolicyBuilder& SetRootWritable(); 934 935 // Changes mounts propagation from MS_PRIVATE to MS_SLAVE. 936 // DangerAllowMountPropagation()937 PolicyBuilder& DangerAllowMountPropagation() { 938 allow_mount_propagation_ = true; 939 return *this; 940 } 941 942 // Allows connections to this IP. 943 PolicyBuilder& AllowIPv4(const std::string& ip_and_mask, uint32_t port = 0); 944 PolicyBuilder& AllowIPv6(const std::string& ip_and_mask, uint32_t port = 0); 945 946 // Returns the current status of the PolicyBuilder. GetStatus()947 absl::Status GetStatus() { return last_status_; } 948 mounts()949 const Mounts& mounts() const { return mounts_; } 950 951 // Returns the absolute path for the given `relative_path`. 952 // 953 // If `relative_path` is absolute, it will be returned as is and `base` will 954 // be ignored. 955 // 956 // If `relative_path` is relative and `base` is not provided, it will be 957 // resolved relative to the current working directory. 958 // 959 // If `relative_path` is relative and an absolute `base` is provided, it will 960 // be resolved relative to `base`. 961 // 962 // If both, `relative_path` and `base` are relative, then first `base` will be 963 // resolved relative to the current working directory, and then 964 // `relative_path` will be resolved relative to `base`. 965 // 966 // In all cases where `relative_path` is relative, non-canonical paths will be 967 // canonicalized and the result must be anchored to the base directory. If the 968 // resulting path is outside the base directory, an error will be returned. 969 // 970 // On ERROR, such as `relative_path` is empty, an empty string is returned. 971 static std::string AnchorPathAbsolute(absl::string_view relative_path, 972 absl::string_view base = {}); 973 974 private: 975 friend class PolicyBuilderPeer; // For testing 976 friend class StackTracePeer; 977 978 // Similar to AddFile(At)/AddDirectory(At) but it won't force use of 979 // namespaces - files will only be added to the namespace if it is not 980 // disabled by the time of TryBuild(). 981 PolicyBuilder& AddFileIfNamespaced(absl::string_view path, bool is_ro = true); 982 PolicyBuilder& AddFileAtIfNamespaced(absl::string_view outside, 983 absl::string_view inside, 984 bool is_ro = true); 985 PolicyBuilder& AddDirectoryIfNamespaced(absl::string_view path, 986 bool is_ro = true); 987 PolicyBuilder& AddDirectoryAtIfNamespaced(absl::string_view outside, 988 absl::string_view inside, 989 bool is_ro = true); 990 991 // Allows a limited version of madvise. 992 PolicyBuilder& AllowLimitedMadvise(); 993 994 // Allows MADV_POPULATE_READ and MADV_POPULATE_WRITE. 995 PolicyBuilder& AllowMadvisePopulate(); 996 997 // Traps instead of denying ptrace. 998 PolicyBuilder& TrapPtrace(); 999 1000 // Appends code to block a specific syscall and setting errno at the end of 1001 // the policy - decision taken by user policy take precedence. 1002 PolicyBuilder& OverridableBlockSyscallWithErrno(uint32_t num, int error); 1003 SetMounts(Mounts mounts)1004 PolicyBuilder& SetMounts(Mounts mounts) { 1005 mounts_ = std::move(mounts); 1006 return *this; 1007 } 1008 1009 std::vector<sock_filter> ResolveBpfFunc(BpfFunc f); 1010 1011 // This function returns a PolicyBuilder so that we can use it in the status 1012 // macros. 1013 PolicyBuilder& SetError(const absl::Status& status); 1014 1015 Mounts mounts_; 1016 bool use_namespaces_ = true; 1017 bool requires_namespaces_ = false; 1018 NetNsMode netns_mode_ = NETNS_MODE_UNSPECIFIED; 1019 bool allow_map_exec_ = true; // Temporary default while we migrate users. 1020 bool allow_speculation_ = false; 1021 bool allow_mount_propagation_ = false; 1022 std::string hostname_ = std::string(kDefaultHostname); 1023 1024 // Stack trace collection 1025 bool collect_stacktrace_on_violation_ = true; 1026 bool collect_stacktrace_on_signal_ = true; 1027 bool collect_stacktrace_on_timeout_ = true; 1028 bool collect_stacktrace_on_kill_ = false; 1029 bool collect_stacktrace_on_exit_ = false; 1030 1031 // Seccomp fields 1032 std::vector<sock_filter> user_policy_; 1033 std::vector<sock_filter> overridable_policy_; 1034 std::optional<sock_filter> default_action_; 1035 bool user_policy_handles_bpf_ = false; 1036 bool user_policy_handles_ptrace_ = false; 1037 absl::flat_hash_set<uint32_t> handled_syscalls_; 1038 absl::flat_hash_set<uint32_t> allowed_syscalls_; 1039 absl::flat_hash_set<uint32_t> blocked_syscalls_; 1040 absl::flat_hash_set<uint32_t> custom_policy_syscalls_; 1041 1042 // Error handling 1043 absl::Status last_status_ = absl::OkStatus(); 1044 bool already_built_ = false; 1045 1046 struct { 1047 bool static_startup = false; 1048 bool dynamic_startup = false; 1049 bool system_malloc = false; 1050 bool scudo_malloc = false; 1051 bool tcmalloc = false; 1052 bool llvm_sanitizers = false; 1053 bool llvm_coverage = false; 1054 bool limited_madvise = false; 1055 bool madvise_populate = false; 1056 bool mmap_without_exec = false; 1057 bool mprotect_without_exec = false; 1058 bool safe_bpf = false; 1059 bool safe_fcntl = false; 1060 bool tcgets = false; 1061 bool slow_fences = false; 1062 bool fast_fences = false; 1063 bool getrlimit = false; 1064 bool getrandom = false; 1065 bool wipe_on_fork = false; 1066 bool log_forwarding = false; 1067 bool prctl_set_name = false; 1068 bool prctl_set_vma = false; 1069 } allowed_complex_; 1070 1071 // List of allowed hosts 1072 absl::optional<AllowedHosts> allowed_hosts_; 1073 }; 1074 1075 } // namespace sandbox2 1076 1077 #endif // SANDBOXED_API_SANDBOX2_POLICYBUILDER_H_ 1078