• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef SANDBOXED_API_SANDBOX2_POLICYBUILDER_H_
16 #define SANDBOXED_API_SANDBOX2_POLICYBUILDER_H_
17 
18 #include <linux/filter.h>
19 
20 #include <cstddef>
21 #include <cstdint>
22 #include <functional>
23 #include <memory>
24 #include <string>
25 #include <type_traits>
26 #include <utility>
27 #include <vector>
28 
29 #include "absl/base/attributes.h"
30 #include "absl/base/macros.h"
31 #include "absl/container/flat_hash_set.h"
32 #include "absl/log/check.h"
33 #include "absl/status/status.h"
34 #include "absl/status/statusor.h"
35 #include "absl/strings/string_view.h"
36 #include "absl/types/optional.h"
37 #include "absl/types/span.h"
38 #include "sandboxed_api/sandbox2/allowlists/map_exec.h"  // Temporary
39 #include "sandboxed_api/sandbox2/forkserver.pb.h"
40 #include "sandboxed_api/sandbox2/mounts.h"
41 #include "sandboxed_api/sandbox2/network_proxy/filtering.h"
42 #include "sandboxed_api/sandbox2/policy.h"
43 
44 struct bpf_labels;
45 
46 namespace sandbox2 {
47 
48 class AllowAllSyscalls;
49 class NamespacesToken;
50 class LoadUserBpfCodeFromFile;
51 class MapExec;
52 class SeccompSpeculation;
53 class TraceAllSyscalls;
54 class UnrestrictedNetworking;
55 
56 namespace builder_internal {
57 
58 template <typename, typename = void>
59 constexpr bool is_type_complete_v = false;
60 
61 template <typename T>
62 constexpr bool is_type_complete_v<T, std::void_t<decltype(sizeof(T))>> = true;
63 
64 }  // namespace builder_internal
65 
66 // PolicyBuilder is a helper class to simplify creation of policies. The builder
67 // uses fluent interface for convenience and increased readability of policies.
68 //
69 // To build a policy you simply create a new builder object, call methods on it
70 // specifying what you want and finally call `BuildOrDie()` to generate you
71 // policy.
72 //
73 // For instance this would generate a simple policy suitable for binaries doing
74 // only computations:
75 //
76 // ```c++
77 // std::unique_ptr<Policy> policy =
78 //     PolicyBuilder()
79 //       .AllowRead()
80 //       .AllowWrite()
81 //       .AllowExit()
82 //       .AllowSystemMalloc()
83 //       .BuildOrDie();
84 // ```
85 //
86 // Operations are executed in the order they are dictated, though in most cases
87 // this has no influence since the operations themselves commute.
88 //
89 // For instance these two policies are equivalent:
90 //
91 // ```c++
92 // auto policy = PolicyBuilder.AllowRead().AllowWrite().BuildOrDie();
93 // auto policy = PolicyBuilder.AllowWrite().AllowRead().BuildOrDie();
94 // ```
95 //
96 // While these two are not:
97 //
98 //
99 // ```c++
100 // auto policy = PolicyBuilder.AllowRead().BlockSyscallWithErrno(__NR_read, EIO)
101 //                            .BuildOrDie();
102 // auto policy = PolicyBuilder.BlockSyscallWithErrno(__NR_read, EIO).AllowRead()
103 //                            .BuildOrDie();
104 // ```
105 //
106 // In fact the first one is equivalent to:
107 //
108 // ```c++
109 // auto policy = PolicyBuilder.AllowRead().BuildOrDie();
110 // ```
111 //
112 // If you dislike the chained style, it is also possible to write the first
113 // example as this:
114 //
115 // ```c++
116 // PolicyBuilder builder;
117 // builder.AllowRead();
118 // builder.AllowWrite();
119 // builder.AllowExit();
120 // builder.AllowSystemMalloc();
121 // auto policy = builder.BuildOrDie();
122 // ```
123 //
124 // For a more complicated example, see examples/static/static_sandbox.cc
125 class PolicyBuilder final {
126  public:
127   // Possible CPU fence modes for `AllowRestartableSequences()`
128   enum CpuFenceMode {
129     // Allow only fast fences for restartable sequences.
130     kRequireFastFences,
131 
132     // Allow fast fences as well as slow fences if fast fences are unavailable.
133     kAllowSlowFences,
134   };
135 
136   static constexpr absl::string_view kDefaultHostname = "sandbox2";
137 
138   // Seccomp takes a 16-bit filter length, so the limit would be 64k.
139   //
140   // We set it lower so that there is for sure some room for the default policy.
141   static constexpr size_t kMaxUserPolicyLength = 30000;
142 
143   using BpfFunc = const std::function<std::vector<sock_filter>(bpf_labels&)>&;
144 
145   // Appends code to allow visibility restricted policy functionality.
146   //
147   // For example:
148   // `Allow(sandbox2::UnrestrictedNetworking);`
149   // This allows unrestricted network access by not creating a network
150   // namespace.
151   //
152   // Each `type T` is defined in an individual library and individually
153   // visibility restricted.
154   template <typename... T>
Allow(T...tags)155   PolicyBuilder& Allow(T... tags) {
156     return (Allow(tags), ...);
157   }
158 
159   // Disables the use of namespaces.
160   //
161   // The default security posture of Sandbox2 depends on the use of namespaces
162   // and syscall filters. By disabling namespaces, the default security posture
163   // is weakened.
164   //
165   // The consequence of disabling namespaces is that the sandboxee will be able
166   // to access the host's file system, network, and other resources if the
167   // appropriate syscalls are also allowed.
168   //
169   // Disabling namespaces is not recommended and should only be done if
170   // absolutely necessary.
171   PolicyBuilder& DisableNamespaces(NamespacesToken);
172 
173   // Allows the use of memory mappings that are marked as executable.
174   //
175   // This applies to the mmap and mprotect syscalls and by default, mapped
176   // memory pages are not allowed to be marked as both writable and executable.
177   //
178   // The use of this API is usually only necessary for JIT engines. To
179   // actually allow executable mappings, the respective mmap()/mprotect()
180   // syscalls need to be added to the policy as well.
181   PolicyBuilder& Allow(MapExec);
182 
183   // Allows the sandboxee to benefit from speculative execution.
184   //
185   // By default and on recent (6.x) kernels, additional mitigations are enabled
186   // to prevent speculative execution attacks. This call disables those
187   // mitigations to reclaim some of the performance overhead.
188   //
189   // NOTE: The performance benefits of using this API are highly dependent on
190   // the host CPU architecture and the workload running inside the sandbox.
191   // The Linux kernel will disable both the IBPB and STIBP mitigations for the
192   // the sandboxee on CPUs that support this.
193   //
194   // On newer AMD processors, such as Milan or Genoa, this leads to having fewer
195   // branch mispredictions and thus improved performance. However, forcing STIBP
196   // to be enabled on the machine level is even better, as those CPUs optimize
197   // for this.
198   //
199   // This is an advanced API, so users should make sure they understand the
200   // risks. Do not use in environments with untrusted code and/or data.
201   PolicyBuilder& Allow(SeccompSpeculation);
202 
203   // Allows unrestricted access to the network by *not* creating a network
204   // namespace.
205   //
206   // This only disables the network namespace. To actually allow networking,
207   // you would also need to allow networking syscalls.
208   //
209   // NOTE: Requires namespace support.
210   PolicyBuilder& Allow(UnrestrictedNetworking);
211 
212   // Appends code to allow a specific syscall.
213   PolicyBuilder& AllowSyscall(uint32_t num);
214 
215   // Appends code to allow a number of syscalls.
216   PolicyBuilder& AllowSyscalls(absl::Span<const uint32_t> nums);
217 
218   // Appends code to block a syscalls while setting errno to the error given.
219   PolicyBuilder& BlockSyscallsWithErrno(absl::Span<const uint32_t> nums,
220                                         int error);
221 
222   // Appends code to block a specific syscall and setting errno.
223   PolicyBuilder& BlockSyscallWithErrno(uint32_t num, int error);
224 
225   // Appends code to allow waiting for events on epoll file descriptors.
226   //
227   // Allows these syscalls:
228   // - epoll_wait
229   // - epoll_pwait
230   // - epoll_pwait2
231   PolicyBuilder& AllowEpollWait();
232 
233   // Appends code to allow using epoll.
234   //
235   // Allows these syscalls:
236   // - epoll_create
237   // - epoll_create1
238   // - epoll_ctl
239   // - epoll_wait
240   // - epoll_pwait
241   // - epoll_pwait2
242   PolicyBuilder& AllowEpoll();
243 
244   // Appends code to allow initializing an inotify instance.
245   //
246   // Allows these syscalls:
247   // - inotify_init
248   // - inotify_init1
249   PolicyBuilder& AllowInotifyInit();
250 
251   // Appends code to allow synchronous I/O multiplexing.
252   //
253   // Allows these syscalls:
254   // - pselect6
255   // - select
256   PolicyBuilder& AllowSelect();
257 
258   // Appends code to allow exiting.
259   //
260   // Allows these syscalls:
261   // - exit
262   // - exit_group
263   PolicyBuilder& AllowExit();
264 
265   // Appends code to allow restartable sequences and necessary /proc files.
266   //
267   // Allows these syscalls:
268   // - rseq
269   // - mmap(..., PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, ...)
270   // - getcpu
271   // - membarrier
272   // - futex(WAIT)
273   // - futex(WAKE)
274   // - rt_sigprocmask(SIG_SETMASK)
275   // Allows these files:
276   // - "/proc/cpuinfo"
277   // - "/proc/stat"
278   // And this directory (including subdirs/files):
279   // - "/sys/devices/system/cpu/"
280   //
281   // If `cpu_fence_mode` is `kAllowSlowFences`, also permits slow CPU fences.
282   // Allows these syscalls:
283   // - sched_getaffinity
284   // - sched_setaffinity
285   // Allows these files:
286   // - "/proc/self/cpuset"
287   //
288   // If `cpu_fence_mode` is `kRequireFastFences`, RSEQ functionality may not
289   // be enabled if fast CPU fences are not available.
290   PolicyBuilder& AllowRestartableSequences(CpuFenceMode cpu_fence_mode);
ABSL_DEPRECATE_AND_INLINE()291   ABSL_DEPRECATE_AND_INLINE()
292   PolicyBuilder& AllowRestartableSequencesWithProcFiles(
293       CpuFenceMode cpu_fence_mode) {
294     return this->AllowRestartableSequences(cpu_fence_mode);
295   }
296 
297   // Appends code to allow the scudo version of malloc, free and
298   // friends.
299   //
300   // This should be used in conjunction with namespaces. If scudo
301   // options are passed to the sandboxee through an environment variable, access
302   // to "/proc/self/environ" will have to be allowed by the policy.
303   //
304   // NOTE: This function is tuned towards the secure scudo allocator. If you are
305   //       using another implementation, this function might not be the most
306   //       suitable.
307   PolicyBuilder& AllowScudoMalloc();
308 
309   // Appends code to allow the system-allocator version of malloc, free and
310   // friends.
311   //
312   // NOTE: This function is tuned towards the malloc implementation in glibc. If
313   //       you are using another implementation, this function might not be the
314   //       most suitable.
315   PolicyBuilder& AllowSystemMalloc();
316 
317   // Appends code to allow the tcmalloc version of malloc, free and
318   // friends.
319   PolicyBuilder& AllowTcMalloc();
320 
321   // Appends code to allow syscalls typically used by the LLVM sanitizers: ASAN,
322   // MSAN, TSAN.
323   //
324   // NOTE: This method is intended as a best effort for adding syscalls that
325   // are common to many binaries. It may not be fully inclusive of all potential
326   // syscalls for all binaries.
327   PolicyBuilder& AllowLlvmSanitizers();
328 
329   // Appends code to allow syscalls typically used by the LLVM coverage.
330   //
331   // NOTE: This method is intended as a best effort.
332   PolicyBuilder& AllowLlvmCoverage();
333 
334   // Appends code to unconditionally allow mmap. Specifically this allows mmap
335   // and mmap2 syscall on architectures where these syscalls exist.
336   //
337   // This function requires that targets :map_exec library to be linked
338   // against. Otherwise, the PolicyBuilder will fail to build the policy.
339   //
340   // Prefer using `AllowMmapWithoutExec()` as allowing mapping executable pages
341   // makes exploitation easier.
342   std::enable_if_t<builder_internal::is_type_complete_v<MapExec>,
343                    PolicyBuilder&>
344   AllowMmap();
345 
346   // Appends code to allow mmap calls that don't specify PROT_EXEC.
347   PolicyBuilder& AllowMmapWithoutExec();
348 
349   // Appends code to allow mprotect calls that don't specify PROT_EXEC.
350   PolicyBuilder& AllowMprotectWithoutExec();
351 
352   // Appends code to allow mlock and munlock calls.
353   PolicyBuilder& AllowMlock();
354 
355   // Appends code to allow calling futex with the given operation.
356   PolicyBuilder& AllowFutexOp(int op);
357 
358   // Appends code to allow opening and possibly creating files or directories.
359   //
360   // Allows these syscalls:
361   // - creat
362   // - open
363   // - openat
364   PolicyBuilder& AllowOpen();
365 
366   // Appends code to allow calling stat, fstat and lstat.
367   //
368   // Allows these syscalls:
369   // - fstat
370   // - fstat64
371   // - fstatat
372   // - fstatat64
373   // - fstatfs
374   // - fstatfs64
375   // - lstat
376   // - lstat64
377   // - newfstatat
378   // - oldfstat
379   // - oldlstat
380   // - oldstat
381   // - stat
382   // - stat64
383   // - statfs
384   // - statfs64
385   // - ustat
386   PolicyBuilder& AllowStat();
387 
388   // Appends code to allow checking file permissions.
389   //
390   // Allows these syscalls:
391   // - access
392   // - faccessat
393   PolicyBuilder& AllowAccess();
394 
395   // Appends code to allow duplicating file descriptors.
396   //
397   // Allows these syscalls:
398   // - dup
399   // - dup2
400   // - dup3
401   PolicyBuilder& AllowDup();
402 
403   // Appends code to allow creating pipes.
404   //
405   // Allows these syscalls:
406   // - pipe
407   // - pipe2
408   PolicyBuilder& AllowPipe();
409 
410   // Appends code to allow changing file permissions.
411   //
412   // Allows these syscalls:
413   // - chmod
414   // - fchmod
415   // - fchmodat
416   PolicyBuilder& AllowChmod();
417 
418   // Appends code to allow changing file ownership.
419   //
420   // Allows these syscalls:
421   // - chown
422   // - lchown
423   // - fchown
424   // - fchownat
425   PolicyBuilder& AllowChown();
426 
427   // Appends code to the policy to allow reading from file descriptors.
428   //
429   // Allows these syscalls:
430   // - read
431   // - readv
432   // - preadv
433   // - pread64
434   PolicyBuilder& AllowRead();
435 
436   // Appends code to the policy to allow writing to file descriptors.
437   //
438   // Allows these syscalls:
439   // - write
440   // - writev
441   // - pwritev
442   // - pwrite64
443   PolicyBuilder& AllowWrite();
444 
445   // Appends code to allow reading directories.
446   //
447   // Allows these syscalls:
448   // - getdents
449   // - getdents64
450   PolicyBuilder& AllowReaddir();
451 
452   // Appends code to allow reading symbolic links.
453   //
454   // Allows these syscalls:
455   // - readlink
456   // - readlinkat
457   PolicyBuilder& AllowReadlink();
458 
459   // Appends code to allow creating links.
460   //
461   // Allows these syscalls:
462   // - link
463   // - linkat
464   PolicyBuilder& AllowLink();
465 
466   // Appends code to allow creating symbolic links.
467   //
468   // Allows these syscalls:
469   // - symlink
470   // - symlinkat
471   PolicyBuilder& AllowSymlink();
472 
473   // Appends code to allow creating directories.
474   //
475   // Allows these syscalls:
476   // - mkdir
477   // - mkdirat
478   PolicyBuilder& AllowMkdir();
479 
480   // Appends code to allow changing file timestamps.
481   //
482   // Allows these syscalls:
483   // - futimens
484   // - futimesat
485   // - utime
486   // - utimensat
487   // - utimes
488   PolicyBuilder& AllowUtime();
489 
490   // Appends code to allow safe calls to bpf.
491   //
492   // Allows this syscall:
493   // - bpf
494   //
495   // The above is only allowed when the cmd is one of:
496   // BPF_MAP_LOOKUP_ELEM, BPF_OBJ_GET, BPF_MAP_GET_NEXT_KEY,
497   // BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD
498   PolicyBuilder& AllowSafeBpf();
499 
500   // Appends code to allow safe calls to fcntl.
501   //
502   // Allows these syscalls:
503   // - fcntl
504   // - fcntl64 (on architectures where it exists)
505   //
506   // The above are only allowed when the cmd is one of:
507   // F_GETFD, F_SETFD, F_GETFL, F_SETFL, F_GETLK, F_SETLKW, F_SETLK,
508   // F_DUPFD, F_DUPFD_CLOEXEC
509   PolicyBuilder& AllowSafeFcntl();
510 
511   // Appends code to allow creating new processes.
512   //
513   // Allows these syscalls:
514   // - fork
515   // - vfork
516   // - clone
517   //
518   // NOTE: While this function allows the calls, the default policy is run first
519   // and it has checks for dangerous flags which can create a violation. See
520   // sandbox2/policy.cc for more details.
521   PolicyBuilder& AllowFork();
522 
523   // Appends code to allow waiting for processes.
524   //
525   // Allows these syscalls:
526   // - waitpid (on architectures where it exists)
527   // - wait4
528   PolicyBuilder& AllowWait();
529 
530   // Appends code to allow setting alarms / interval timers.
531   //
532   // Allows these syscalls:
533   // - alarm (on architectures where it exists)
534   // - setitimer
535   PolicyBuilder& AllowAlarm();
536 
537   // Appends code to allow setting posix timers.
538   //
539   // Allows these syscalls:
540   // - timer_create
541   // - timer_delete
542   // - timer_settime
543   // - timer_gettime
544   // - timer_getoverrun
545   PolicyBuilder& AllowPosixTimers();
546 
547   // Appends code to allow setting up signal handlers, returning from them, etc.
548   //
549   // Allows these syscalls:
550   // - rt_sigaction
551   // - rt_sigreturn
552   // - rt_procmask
553   // - signal (on architectures where it exists)
554   // - sigaction (on architectures where it exists)
555   // - sigreturn (on architectures where it exists)
556   // - sigprocmask (on architectures where it exists)
557   PolicyBuilder& AllowHandleSignals();
558 
559   // Appends code to allow doing the TCGETS ioctl.
560   //
561   // Allows these syscalls:
562   // - ioctl (when the first argument is TCGETS)
563   PolicyBuilder& AllowTCGETS();
564 
565   // Appends code to allow to getting the current time.
566   //
567   // Allows these syscalls:
568   // - time
569   // - gettimeofday
570   // - clock_gettime
571   PolicyBuilder& AllowTime();
572 
573   // Appends code to allow sleeping in the current thread.
574   //
575   // Allow these syscalls:
576   // - clock_nanosleep
577   // - nanosleep
578   PolicyBuilder& AllowSleep();
579 
580   // Appends code to allow getting the uid, euid, gid, etc.
581   //
582   // Allows these syscalls:
583   // - getuid + geteuid + getresuid
584   // - getgid + getegid + getresgid
585   // - getuid32 + geteuid32 + getresuid32 (on architectures where they exist)
586   // - getgid32 + getegid32 + getresgid32 (on architectures where they exist)
587   // - getgroups
588   PolicyBuilder& AllowGetIDs();
589 
590   // Appends code to allow getting the pid, ppid and tid.
591   //
592   // Allows these syscalls:
593   // - getpid
594   // - getppid
595   // - gettid
596   PolicyBuilder& AllowGetPIDs();
597 
598   // Appends code to allow getting process groups.
599   //
600   // Allows these syscalls:
601   // - getpgid
602   // - getpgrp
603   PolicyBuilder& AllowGetPGIDs();
604 
605   // Appends code to allow getting the rlimits.
606   //
607   // Allows these syscalls:
608   // - getrlimit
609   // - ugetrlimit (on architectures where it exist)
610   PolicyBuilder& AllowGetRlimit();
611 
612   // Appends code to allow setting the rlimits.
613   //
614   // Allows these syscalls:
615   // - setrlimit
616   // - usetrlimit (on architectures where it exist)
617   PolicyBuilder& AllowSetRlimit();
618 
619   // Appends code to allow reading random bytes.
620   //
621   // Allows these syscalls:
622   // - getrandom (with no flags or GRND_NONBLOCK)
623   //
624   PolicyBuilder& AllowGetRandom();
625 
626   // Appends code to allow configuring wipe-on-fork memory.
627   //
628   // Allows these syscalls:
629   // - madvise (with advice equal to -1 or MADV_WIPEONFORK).
630   PolicyBuilder& AllowWipeOnFork();
631 
632   // Enables syscalls required to use the logging support enabled via
633   // `Client::SendLogsToSupervisor()`
634   //
635   // Allows the following:
636   // - Writes
637   // - kill(0, SIGABRT) (for LOG(FATAL))
638   // - clock_gettime
639   // - gettid
640   // - close
641   PolicyBuilder& AllowLogForwarding();
642 
643   // Appends code to allow deleting files and directories.
644   //
645   // Allows these syscalls:
646   // - rmdir (if available)
647   // - unlink (if available)
648   // - unlinkat
649   PolicyBuilder& AllowUnlink();
650 
651   // Appends code to allow renaming files.
652   //
653   // Allows these syscalls:
654   // - rename (if available)
655   // - renameat
656   // - renameat2
657   PolicyBuilder& AllowRename();
658 
659   // Appends code to allow creating event notification file descriptors.
660   //
661   // Allows these syscalls:
662   // - eventfd (if available)
663   // - eventfd2
664   PolicyBuilder& AllowEventFd();
665 
666   // Appends code to allow polling files.
667   //
668   // Allows these syscalls:
669   // - poll (if available)
670   // - ppoll
671   PolicyBuilder& AllowPoll();
672 
673   // Appends code to allow setting the name of a thread.
674   //
675   // Allows the following
676   // - prctl(PR_SET_NAME, ...)
677   PolicyBuilder& AllowPrctlSetName();
678 
679   // Appends code to allow setting a name for an anonymous memory region.
680   //
681   // Allows the following
682   // - prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ...)
683   PolicyBuilder& AllowPrctlSetVma();
684 
685   // Enables the syscalls necessary to start a statically linked binary.
686   //
687   // The current list of allowed syscalls are below. However you should *not*
688   // depend on the specifics, as these will change whenever the startup code
689   // changes.
690   //
691   // - uname,
692   // - brk,
693   // - set_tid_address,
694   // - set_robust_list,
695   // - futex(FUTEX_WAIT_BITSET, ...)
696   // - rt_sigaction(0x20, ...)
697   // - rt_sigaction(0x21, ...)
698   // - rt_sigprocmask(SIG_UNBLOCK, ...)
699   // - arch_prctl(ARCH_SET_FS)
700   //
701   // NOTE: This will call `BlockSyscallWithErrno(__NR_readlink, ENOENT)`. If you
702   // do not want readlink blocked, put a different call before this call.
703   PolicyBuilder& AllowStaticStartup();
704 
705   // Enables the syscalls necessary to start a dynamically linked binary.
706   //
707   // In addition to syscalls allowed by `AllowStaticStartup`, also allow
708   // reading, seeking, mmap()-ing and closing files.
709   std::enable_if_t<builder_internal::is_type_complete_v<MapExec>,
710                    PolicyBuilder&>
711   AllowDynamicStartup();
712 
713   // Appends a policy, which will be run on the specified syscall.
714   //
715   // NOTE: This policy must be written without labels. If you need labels, use
716   // the overloaded function passing a BpfFunc object instead of the
717   // sock_filter.
718   PolicyBuilder& AddPolicyOnSyscall(uint32_t num,
719                                     absl::Span<const sock_filter> policy);
720 
721   // Appends a policy, which will be run on the specified syscall.
722   //
723   // Example of how to use it:
724   //  builder.AddPolicyOnSyscall(
725   //      __NR_socket, [](bpf_labels& labels) -> std::vector<sock_filter> {
726   //        return {
727   //            ARG(0),  // domain is first argument of socket
728   //            JEQ(AF_UNIX, JUMP(&labels, af_unix)),
729   //            JEQ(AF_NETLINK, JUMP(&labels, af_netlink)),
730   //            KILL,
731   //
732   //            LABEL(&labels, af_unix),
733   //            ARG(1),
734   //            JEQ(SOCK_STREAM | SOCK_NONBLOCK, ALLOW),
735   //            KILL,
736   //
737   //            LABEL(&labels, af_netlink),
738   //            ARG(2),
739   //            JEQ(NETLINK_ROUTE, ALLOW),
740   //        };
741   //      });
742   //
743   // NOTE: This policy may use labels.
744   PolicyBuilder& AddPolicyOnSyscall(uint32_t num, BpfFunc f);
745 
746   // Appends a policy, which will be run on the specified syscalls.
747   //
748   // NOTE: This policy must be written without labels.
749   PolicyBuilder& AddPolicyOnSyscalls(absl::Span<const uint32_t> nums,
750                                      absl::Span<const sock_filter> policy);
751 
752   // Appends a policy, which will be run on the specified syscalls.
753   //
754   // NOTE: This policy may use labels.
755   PolicyBuilder& AddPolicyOnSyscalls(absl::Span<const uint32_t> nums,
756                                      BpfFunc f);
757 
758   // Equivalent to `AddPolicyOnSyscalls(mmap_syscalls, policy)`, where
759   // mmap_syscalls is a subset of {__NR_mmap, __NR_mmap2}, which exists on the
760   // target architecture.
761   //
762   // NOTE: This policy must be written without labels.
763   PolicyBuilder& AddPolicyOnMmap(absl::Span<const sock_filter> policy);
764 
765   // Equivalent to `AddPolicyOnSyscalls(mmap_syscalls, f)`, where mmap_syscalls
766   // is a subset of {__NR_mmap, __NR_mmap2}, which exists on the target
767   // architecture.
768   //
769   // NOTE: This policy may use labels.
770   PolicyBuilder& AddPolicyOnMmap(BpfFunc f);
771 
772   // Builds the policy returning a unique_ptr to it or status if an error
773   // happened.
774   //
775   // NOTE: This should only be called once.
776   absl::StatusOr<std::unique_ptr<Policy>> TryBuild();
777 
778   // Builds the policy returning a unique_ptr to it.
779   //
780   // NOTE: This function will abort if an error happened in any of the
781   // PolicyBuilder methods. This should only be called once.
BuildOrDie()782   std::unique_ptr<Policy> BuildOrDie() { return TryBuild().value(); }
783 
784   // Adds a bind-mount for a file from outside the namespace to inside.
785   //
786   // This will also create parent directories inside the namespace if needed.
787   //
788   // NOTE: Requires namespace support.
789   PolicyBuilder& AddFile(absl::string_view path, bool is_ro = true);
790   PolicyBuilder& AddFileAt(absl::string_view outside, absl::string_view inside,
791                            bool is_ro = true);
792 
793   // Adds the libraries and linker required by a binary.
794   //
795   // This does not add the binary itself, only the libraries it depends on. It
796   // should work correctly for most binaries, but you might need to tweak it in
797   // some cases. Run `ldd` yourself and use `AddFile` or `AddDirectory`.
798   //
799   // This function is safe even for untrusted/potentially malicious binaries. It
800   // adds libraries only from standard library dirs and ld_library_path.
801   //
802   // NOTE: Requires namespace support. This method is intended as a best effort
803   PolicyBuilder& AddLibrariesForBinary(absl::string_view path,
804                                        absl::string_view ld_library_path = {});
805 
806   // Similar to `AddLibrariesForBinary`, but the binary is specified with an
807   // open fd.
808   //
809   // NOTE: Requires namespace support.
810   PolicyBuilder& AddLibrariesForBinary(int fd,
811                                        absl::string_view ld_library_path = {});
812 
813   // Adds a bind-mount for a directory from outside the namespace to inside.
814   //
815   // This will also create parent directories inside the namespace if needed.
816   //
817   // If the directory contains symlinks, they might still be inaccessible
818   // inside the sandbox (resulting in ENOENT). For example, the symlinks might
819   // point to a location outside the sandbox. Symlinks can be resolved using
820   // `sapi::file_util::fileops::ReadLink()`.
821   //
822   // NOTE: Requires namespace support.
823   PolicyBuilder& AddDirectory(absl::string_view path, bool is_ro = true);
824   PolicyBuilder& AddDirectoryAt(absl::string_view outside,
825                                 absl::string_view inside, bool is_ro = true);
826 
827   // Adds a tmpfs inside the namespace.
828   //
829   // This will also create parent directories inside the namespace if needed.
830   //
831   // NOTE: Requires namespace support.
832   PolicyBuilder& AddTmpfs(absl::string_view inside, size_t size);
833 
834   // Allows unrestricted access to the network by *not* creating a network
835   // namespace. This only disables the network namespace. To actually allow
836   // networking, you would also need to allow networking syscalls. Calling this
837   // function will enable use of namespaces.
838   ABSL_DEPRECATED("Use Allow(sandbox2::UnrestrictedNetworking()) instead.")
839   PolicyBuilder& AllowUnrestrictedNetworking();
840 
841   // Enables a shared network namespace for all sandboxees that are started by
842   // the same forkserver.
843   //
844   // This results in sandboxed processes to run in the same shared network
845   // namespace instead of creating a separate network namespace for each
846   // sandboxed process started by the ForkServer process.
847   //
848   // NOTE: Requires namespace support.
849   //
850   // IMPORTANT: This is incompatible with AllowUnrestrictedNetworking.
851   PolicyBuilder& UseForkServerSharedNetNs();
852 
853   // Enables the use of namespaces.
854   //
855   // Namespaces are enabled by default.
856   // This is a no-op.
857   ABSL_DEPRECATED("Namespaces are enabled by default; no need to call this")
EnableNamespaces()858   PolicyBuilder& EnableNamespaces() {
859     if (!use_namespaces_) {
860       SetError(absl::FailedPreconditionError(
861           "Namespaces cannot be both disabled and enabled"));
862       return *this;
863     }
864     requires_namespaces_ = true;
865     return *this;
866   }
867 
868   // Set hostname in the network namespace.
869   //
870   // The default hostname is "sandbox2".
871   //
872   // NOTE: Requires namespace support.
873   //
874   // IMPORTANT: This is incompatible with AllowUnrestrictedNetworking.
875   PolicyBuilder& SetHostname(absl::string_view hostname);
876 
877   // Enables/disables stack trace collection on violations.
878   //
879   // NOTE: This is enabled by default.
880   PolicyBuilder& CollectStacktracesOnViolation(bool enable);
881 
882   // Enables/disables stack trace collection on signals (e.g. crashes / killed
883   // from a signal).
884   //
885   // NOTE: This is enabled by default.
886   PolicyBuilder& CollectStacktracesOnSignal(bool enable);
887 
888   // Enables/disables stack trace collection on hitting a timeout.
889   //
890   // NOTE: This is enabled by default.
891   PolicyBuilder& CollectStacktracesOnTimeout(bool enable);
892 
893   // Enables/disables stack trace collection on getting killed by the sandbox
894   // monitor or the user.
895   //
896   // NOTE: This is disabled by default.
897   PolicyBuilder& CollectStacktracesOnKill(bool enable);
898 
899   // Enables/disables stack trace collection on normal process exit.
900   //
901   // NOTE: This is disabled by default.
902   PolicyBuilder& CollectStacktracesOnExit(bool enable);
903 
904   // Changes the default action to ALLOW.
905   //
906   // All syscalls not handled explicitly by the policy will thus be
907   // allowed.
908   //
909   // IMPORTANT: Do not use in environments with untrusted code and/or data.
910   PolicyBuilder& DefaultAction(AllowAllSyscalls);
911 
912   // Changes the default action to `SANDBOX2_TRACE`.
913   //
914   // All syscalls not handled explicitly by the policy will be passed off to
915   // the `sandbox2::Notify` implementation given to the `sandbox2::Sandbox2`
916   // instance.
917   PolicyBuilder& DefaultAction(TraceAllSyscalls);
918 
919   ABSL_DEPRECATED("Use DefaultAction(sandbox2::AllowAllSyscalls()) instead")
920   PolicyBuilder& DangerDefaultAllowAll();
921 
922   // Allows syscalls that are necessary for the NetworkProxyClient.
923   PolicyBuilder& AddNetworkProxyPolicy();
924 
925   // Allows syscalls that are necessary for the NetworkProxyClient and
926   // the NetworkProxyHandler.
927   PolicyBuilder& AddNetworkProxyHandlerPolicy();
928 
929   // Makes root of the filesystem writeable
930   // Not recommended
931   //
932   // NOTE: Requires namespace support.
933   PolicyBuilder& SetRootWritable();
934 
935   // Changes mounts propagation from MS_PRIVATE to MS_SLAVE.
936   //
DangerAllowMountPropagation()937   PolicyBuilder& DangerAllowMountPropagation() {
938     allow_mount_propagation_ = true;
939     return *this;
940   }
941 
942   // Allows connections to this IP.
943   PolicyBuilder& AllowIPv4(const std::string& ip_and_mask, uint32_t port = 0);
944   PolicyBuilder& AllowIPv6(const std::string& ip_and_mask, uint32_t port = 0);
945 
946   // Returns the current status of the PolicyBuilder.
GetStatus()947   absl::Status GetStatus() { return last_status_; }
948 
mounts()949   const Mounts& mounts() const { return mounts_; }
950 
951   // Returns the absolute path for the given `relative_path`.
952   //
953   // If `relative_path` is absolute, it will be returned as is and `base` will
954   // be ignored.
955   //
956   // If `relative_path` is relative and `base` is not provided, it will be
957   // resolved relative to the current working directory.
958   //
959   // If `relative_path` is relative and an absolute `base` is provided, it will
960   // be resolved relative to `base`.
961   //
962   // If both, `relative_path` and `base` are relative, then first `base` will be
963   // resolved relative to the current working directory, and then
964   // `relative_path` will be resolved relative to `base`.
965   //
966   // In all cases where `relative_path` is relative, non-canonical paths will be
967   // canonicalized and the result must be anchored to the base directory. If the
968   // resulting path is outside the base directory, an error will be returned.
969   //
970   // On ERROR, such as `relative_path` is empty, an empty string is returned.
971   static std::string AnchorPathAbsolute(absl::string_view relative_path,
972                                         absl::string_view base = {});
973 
974  private:
975   friend class PolicyBuilderPeer;  // For testing
976   friend class StackTracePeer;
977 
978   // Similar to AddFile(At)/AddDirectory(At) but it won't force use of
979   // namespaces - files will only be added to the namespace if it is not
980   // disabled by the time of TryBuild().
981   PolicyBuilder& AddFileIfNamespaced(absl::string_view path, bool is_ro = true);
982   PolicyBuilder& AddFileAtIfNamespaced(absl::string_view outside,
983                                        absl::string_view inside,
984                                        bool is_ro = true);
985   PolicyBuilder& AddDirectoryIfNamespaced(absl::string_view path,
986                                           bool is_ro = true);
987   PolicyBuilder& AddDirectoryAtIfNamespaced(absl::string_view outside,
988                                             absl::string_view inside,
989                                             bool is_ro = true);
990 
991   // Allows a limited version of madvise.
992   PolicyBuilder& AllowLimitedMadvise();
993 
994   // Allows MADV_POPULATE_READ and MADV_POPULATE_WRITE.
995   PolicyBuilder& AllowMadvisePopulate();
996 
997   // Traps instead of denying ptrace.
998   PolicyBuilder& TrapPtrace();
999 
1000   // Appends code to block a specific syscall and setting errno at the end of
1001   // the policy - decision taken by user policy take precedence.
1002   PolicyBuilder& OverridableBlockSyscallWithErrno(uint32_t num, int error);
1003 
SetMounts(Mounts mounts)1004   PolicyBuilder& SetMounts(Mounts mounts) {
1005     mounts_ = std::move(mounts);
1006     return *this;
1007   }
1008 
1009   std::vector<sock_filter> ResolveBpfFunc(BpfFunc f);
1010 
1011   // This function returns a PolicyBuilder so that we can use it in the status
1012   // macros.
1013   PolicyBuilder& SetError(const absl::Status& status);
1014 
1015   Mounts mounts_;
1016   bool use_namespaces_ = true;
1017   bool requires_namespaces_ = false;
1018   NetNsMode netns_mode_ = NETNS_MODE_UNSPECIFIED;
1019   bool allow_map_exec_ = true;  //  Temporary default while we migrate users.
1020   bool allow_speculation_ = false;
1021   bool allow_mount_propagation_ = false;
1022   std::string hostname_ = std::string(kDefaultHostname);
1023 
1024   // Stack trace collection
1025   bool collect_stacktrace_on_violation_ = true;
1026   bool collect_stacktrace_on_signal_ = true;
1027   bool collect_stacktrace_on_timeout_ = true;
1028   bool collect_stacktrace_on_kill_ = false;
1029   bool collect_stacktrace_on_exit_ = false;
1030 
1031   // Seccomp fields
1032   std::vector<sock_filter> user_policy_;
1033   std::vector<sock_filter> overridable_policy_;
1034   std::optional<sock_filter> default_action_;
1035   bool user_policy_handles_bpf_ = false;
1036   bool user_policy_handles_ptrace_ = false;
1037   absl::flat_hash_set<uint32_t> handled_syscalls_;
1038   absl::flat_hash_set<uint32_t> allowed_syscalls_;
1039   absl::flat_hash_set<uint32_t> blocked_syscalls_;
1040   absl::flat_hash_set<uint32_t> custom_policy_syscalls_;
1041 
1042   // Error handling
1043   absl::Status last_status_ = absl::OkStatus();
1044   bool already_built_ = false;
1045 
1046   struct {
1047     bool static_startup = false;
1048     bool dynamic_startup = false;
1049     bool system_malloc = false;
1050     bool scudo_malloc = false;
1051     bool tcmalloc = false;
1052     bool llvm_sanitizers = false;
1053     bool llvm_coverage = false;
1054     bool limited_madvise = false;
1055     bool madvise_populate = false;
1056     bool mmap_without_exec = false;
1057     bool mprotect_without_exec = false;
1058     bool safe_bpf = false;
1059     bool safe_fcntl = false;
1060     bool tcgets = false;
1061     bool slow_fences = false;
1062     bool fast_fences = false;
1063     bool getrlimit = false;
1064     bool getrandom = false;
1065     bool wipe_on_fork = false;
1066     bool log_forwarding = false;
1067     bool prctl_set_name = false;
1068     bool prctl_set_vma = false;
1069   } allowed_complex_;
1070 
1071   // List of allowed hosts
1072   absl::optional<AllowedHosts> allowed_hosts_;
1073 };
1074 
1075 }  // namespace sandbox2
1076 
1077 #endif  // SANDBOXED_API_SANDBOX2_POLICYBUILDER_H_
1078