• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "sandboxed_api/sandbox2/policybuilder.h"
16 
17 #include <fcntl.h>  // For the fcntl flags
18 #include <linux/bpf_common.h>
19 #include <linux/filter.h>
20 #include <linux/futex.h>
21 #include <linux/random.h>  // For GRND_NONBLOCK
22 #include <linux/seccomp.h>
23 #include <stddef.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h>  // For mmap arguments
26 #include <sys/prctl.h>
27 #include <sys/socket.h>
28 #include <sys/stat.h>
29 #include <sys/statvfs.h>
30 #include <syscall.h>
31 #include <unistd.h>
32 
33 #include <array>
34 #include <cerrno>
35 #include <csignal>
36 #include <cstdint>
37 #include <cstdlib>
38 #include <deque>
39 #include <functional>
40 #include <iterator>
41 #include <limits>
42 #include <memory>
43 #include <optional>
44 #include <string>
45 #include <type_traits>
46 #include <utility>
47 #include <vector>
48 
49 #include "absl/container/flat_hash_set.h"
50 #include "absl/log/log.h"
51 #include "absl/memory/memory.h"
52 #include "absl/status/status.h"
53 #include "absl/status/statusor.h"
54 #include "absl/strings/match.h"
55 #include "absl/strings/str_cat.h"
56 #include "absl/strings/string_view.h"
57 #include "absl/types/span.h"
58 #include "sandboxed_api/config.h"
59 #include "sandboxed_api/sandbox2/allowlists/all_syscalls.h"
60 #include "sandboxed_api/sandbox2/allowlists/namespaces.h"
61 #include "sandboxed_api/sandbox2/allowlists/seccomp_speculation.h"
62 #include "sandboxed_api/sandbox2/allowlists/trace_all_syscalls.h"
63 #include "sandboxed_api/sandbox2/allowlists/unrestricted_networking.h"
64 #include "sandboxed_api/sandbox2/forkserver.pb.h"
65 #include "sandboxed_api/sandbox2/namespace.h"
66 #include "sandboxed_api/sandbox2/network_proxy/filtering.h"
67 #include "sandboxed_api/sandbox2/policy.h"
68 #include "sandboxed_api/sandbox2/syscall.h"
69 #include "sandboxed_api/sandbox2/util/bpf_helper.h"
70 #include "sandboxed_api/util/fileops.h"
71 #include "sandboxed_api/util/path.h"
72 
73 #if defined(SAPI_X86_64)
74 #include <asm/prctl.h>
75 #elif defined(SAPI_PPC64_LE)
76 #include <asm/termbits.h>  // On PPC, TCGETS macro needs termios
77 #endif
78 
79 #ifndef BPF_MAP_LOOKUP_ELEM
80 #define BPF_MAP_LOOKUP_ELEM 1
81 #endif
82 #ifndef BPF_OBJ_GET
83 #define BPF_OBJ_GET 7
84 #endif
85 #ifndef BPF_MAP_GET_NEXT_KEY
86 #define BPF_MAP_GET_NEXT_KEY 4
87 #endif
88 #ifndef BPF_MAP_GET_NEXT_ID
89 #define BPF_MAP_GET_NEXT_ID 12
90 #endif
91 #ifndef BPF_MAP_GET_FD_BY_ID
92 #define BPF_MAP_GET_FD_BY_ID 14
93 #endif
94 #ifndef BPF_OBJ_GET_INFO_BY_FD
95 #define BPF_OBJ_GET_INFO_BY_FD 15
96 #endif
97 
98 #ifndef MAP_FIXED_NOREPLACE
99 #define MAP_FIXED_NOREPLACE 0x100000
100 #endif
101 #ifndef MADV_POPULATE_READ
102 #define MADV_POPULATE_READ 22  // Linux 5.14+
103 #endif
104 #ifndef MADV_POPULATE_WRITE  // Linux 5.14+
105 #define MADV_POPULATE_WRITE 23
106 #endif
107 #ifndef PR_SET_VMA
108 #define PR_SET_VMA 0x53564d41
109 #endif
110 #ifndef PR_SET_VMA_ANON_NAME
111 #define PR_SET_VMA_ANON_NAME 0
112 #endif
113 
114 namespace sandbox2 {
115 namespace {
116 
117 namespace file = ::sapi::file;
118 namespace fileops = ::sapi::file_util::fileops;
119 
120 // Validates that the path is absolute and canonical.
ValidatePath(absl::string_view path,bool allow_relative_path=false)121 absl::StatusOr<std::string> ValidatePath(absl::string_view path,
122                                          bool allow_relative_path = false) {
123   if (path.empty()) {
124     return absl::InvalidArgumentError("Path must not be empty");
125   }
126 
127   if (!file::IsAbsolutePath(path) && !allow_relative_path) {
128     return absl::InvalidArgumentError(
129         absl::StrCat("Path must be absolute: ", path));
130   }
131 
132   std::string fixed_path = file::CleanPath(path);
133   if (fixed_path != path) {
134     return absl::InvalidArgumentError(
135         absl::StrCat("Path is not canonical: ", path));
136   }
137 
138   return fixed_path;
139 }
140 
141 constexpr uint32_t kMmapSyscalls[] = {
142 #ifdef __NR_mmap2
143     __NR_mmap2,
144 #endif
145 #ifdef __NR_mmap
146     __NR_mmap,
147 #endif
148 };
149 
CheckMapExec(uint32_t num)150 constexpr bool CheckMapExec(uint32_t num) {
151   if (num == __NR_mprotect) {
152     return true;
153   }
154 #ifdef __NR_pkey_mprotect
155   if (num == __NR_pkey_mprotect) {
156     return true;
157   }
158 #endif
159   for (uint32_t mmap_syscall : kMmapSyscalls) {
160     if (num == mmap_syscall) {
161       return true;
162     }
163   }
164   return false;
165 }
166 
CheckBpfBounds(const sock_filter & filter,size_t max_jmp)167 bool CheckBpfBounds(const sock_filter& filter, size_t max_jmp) {
168   if (BPF_CLASS(filter.code) == BPF_JMP) {
169     if (BPF_OP(filter.code) == BPF_JA) {
170       return filter.k <= max_jmp;
171     }
172     return filter.jt <= max_jmp && filter.jf <= max_jmp;
173   }
174   return true;
175 }
176 
IsOnReadOnlyDev(const std::string & path)177 bool IsOnReadOnlyDev(const std::string& path) {
178   struct statvfs vfs;
179   if (TEMP_FAILURE_RETRY(statvfs(path.c_str(), &vfs)) == -1) {
180     PLOG(ERROR) << "Could not statvfs: " << path.c_str();
181     return false;
182   }
183   return vfs.f_flag & ST_RDONLY;
184 }
185 
186 }  // namespace
187 
DisableNamespaces(NamespacesToken)188 PolicyBuilder& PolicyBuilder::DisableNamespaces(NamespacesToken) {
189   if (requires_namespaces_) {
190     SetError(absl::FailedPreconditionError(
191         "Namespaces cannot be both disabled and enabled. You're probably "
192         "using features that implicitly enable namespaces (SetHostname, "
193         "AddFile, AddDirectory, AddDataDependency, AddLibrariesForBinary "
194         "or similar)"));
195     return *this;
196   }
197   use_namespaces_ = false;
198   return *this;
199 }
200 
Allow(MapExec)201 PolicyBuilder& PolicyBuilder::Allow(MapExec) {
202   allow_map_exec_ = true;
203   return *this;
204 }
205 
Allow(SeccompSpeculation)206 PolicyBuilder& PolicyBuilder::Allow(SeccompSpeculation) {
207   allow_speculation_ = true;
208   return *this;
209 }
210 
Allow(UnrestrictedNetworking)211 PolicyBuilder& PolicyBuilder::Allow(UnrestrictedNetworking) {
212   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
213 
214   if (netns_mode_ != NETNS_MODE_UNSPECIFIED) {
215     SetError(absl::FailedPreconditionError(absl::StrCat(
216         "Incompatible with other network namespaces modes. A sandbox can have "
217         "only one network namespace mode. Attempted to configure: ",
218         NetNsMode_Name(netns_mode_))));
219     return *this;
220   }
221 
222   netns_mode_ = NETNS_MODE_NONE;
223   return *this;
224 }
225 
AllowSyscall(uint32_t num)226 PolicyBuilder& PolicyBuilder::AllowSyscall(uint32_t num) {
227   if (handled_syscalls_.insert(num).second &&
228       allowed_syscalls_.insert(num).second) {
229     if (!allow_map_exec_ && CheckMapExec(num)) {
230       SetError(absl::FailedPreconditionError(
231           "Allowing unrestricted mmap/mprotect/pkey_mprotect requires "
232           "Allow(MapExec)."));
233       return *this;
234     }
235     user_policy_.insert(user_policy_.end(), {SYSCALL(num, ALLOW)});
236   }
237   return *this;
238 }
239 
AllowSyscalls(absl::Span<const uint32_t> nums)240 PolicyBuilder& PolicyBuilder::AllowSyscalls(absl::Span<const uint32_t> nums) {
241   for (auto num : nums) {
242     AllowSyscall(num);
243   }
244   return *this;
245 }
246 
BlockSyscallsWithErrno(absl::Span<const uint32_t> nums,int error)247 PolicyBuilder& PolicyBuilder::BlockSyscallsWithErrno(
248     absl::Span<const uint32_t> nums, int error) {
249   for (auto num : nums) {
250     BlockSyscallWithErrno(num, error);
251   }
252   return *this;
253 }
254 
BlockSyscallWithErrno(uint32_t num,int error)255 PolicyBuilder& PolicyBuilder::BlockSyscallWithErrno(uint32_t num, int error) {
256   if (handled_syscalls_.insert(num).second &&
257       blocked_syscalls_.insert(num).second) {
258     user_policy_.insert(user_policy_.end(), {SYSCALL(num, ERRNO(error))});
259     if (num == __NR_bpf) {
260       user_policy_handles_bpf_ = true;
261     }
262     if (num == __NR_ptrace) {
263       user_policy_handles_ptrace_ = true;
264     }
265   }
266   return *this;
267 }
268 
OverridableBlockSyscallWithErrno(uint32_t num,int error)269 PolicyBuilder& PolicyBuilder::OverridableBlockSyscallWithErrno(uint32_t num,
270                                                                int error) {
271   overridable_policy_.insert(overridable_policy_.end(),
272                              {SYSCALL(num, ERRNO(error))});
273   return *this;
274 }
275 
AllowEpollWait()276 PolicyBuilder& PolicyBuilder::AllowEpollWait() {
277   return AllowSyscalls({
278 #ifdef __NR_epoll_wait
279       __NR_epoll_wait,
280 #endif
281 #ifdef __NR_epoll_pwait
282       __NR_epoll_pwait,
283 #endif
284 #ifdef __NR_epoll_pwait2
285       __NR_epoll_pwait2,
286 #endif
287   });
288 }
289 
AllowEpoll()290 PolicyBuilder& PolicyBuilder::AllowEpoll() {
291   AllowSyscalls({
292 #ifdef __NR_epoll_create
293       __NR_epoll_create,
294 #endif
295 #ifdef __NR_epoll_create1
296       __NR_epoll_create1,
297 #endif
298 #ifdef __NR_epoll_ctl
299       __NR_epoll_ctl,
300 #endif
301   });
302 
303   return AllowEpollWait();
304 }
305 
AllowInotifyInit()306 PolicyBuilder& PolicyBuilder::AllowInotifyInit() {
307   return AllowSyscalls({
308 #ifdef __NR_inotify_init
309       __NR_inotify_init,
310 #endif
311 #ifdef __NR_inotify_init1
312       __NR_inotify_init1,
313 #endif
314   });
315 }
316 
AllowSelect()317 PolicyBuilder& PolicyBuilder::AllowSelect() {
318   return AllowSyscalls({
319 #ifdef __NR_select
320       __NR_select,
321 #endif
322 #ifdef __NR_pselect6
323       __NR_pselect6,
324 #endif
325   });
326 }
327 
AllowExit()328 PolicyBuilder& PolicyBuilder::AllowExit() {
329   return AllowSyscalls({__NR_exit, __NR_exit_group});
330 }
331 
AllowScudoMalloc()332 PolicyBuilder& PolicyBuilder::AllowScudoMalloc() {
333   if (allowed_complex_.scudo_malloc) {
334     return *this;
335   }
336   allowed_complex_.scudo_malloc = true;
337   AllowTime();
338   AllowSyscalls({__NR_munmap, __NR_nanosleep});
339   AllowFutexOp(FUTEX_WAKE);
340   AllowLimitedMadvise();
341   AllowGetRandom();
342   AllowGetPIDs();
343   AllowWipeOnFork();
344 #ifdef __NR_open
345   OverridableBlockSyscallWithErrno(__NR_open, ENOENT);
346 #endif
347 #ifdef __NR_openat
348   OverridableBlockSyscallWithErrno(__NR_openat, ENOENT);
349 #endif
350 
351   return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
352     return {
353         ARG_32(2),  // prot
354         JEQ32(PROT_NONE, JUMP(&labels, prot_none)),
355         JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
356 
357         // PROT_READ | PROT_WRITE
358         ARG_32(3),  // flags
359         BPF_STMT(BPF_ALU | BPF_AND | BPF_K,
360                  ~uint32_t{MAP_FIXED | MAP_NORESERVE}),
361         JEQ32(MAP_PRIVATE | MAP_ANONYMOUS, ALLOW),
362         JUMP(&labels, mmap_end),
363 
364         // PROT_NONE
365         LABEL(&labels, prot_none),
366         ARG_32(3),  // flags
367         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, ALLOW),
368 
369         LABEL(&labels, mmap_end),
370     };
371   });
372 }
373 
AllowTcMalloc()374 PolicyBuilder& PolicyBuilder::AllowTcMalloc() {
375   if (allowed_complex_.tcmalloc) {
376     return *this;
377   }
378   allowed_complex_.tcmalloc = true;
379   AllowTime();
380   AllowRestartableSequences(kRequireFastFences);
381   AllowSyscalls({__NR_munmap, __NR_nanosleep, __NR_brk, __NR_mincore,
382                  __NR_membarrier, __NR_lseek});
383   AllowLimitedMadvise();
384   AllowPrctlSetVma();
385   AllowPoll();
386   AllowGetPIDs();
387 
388   AddPolicyOnSyscall(__NR_mprotect, {
389                                         ARG_32(2),
390                                         JEQ32(PROT_READ | PROT_WRITE, ALLOW),
391                                         JEQ32(PROT_NONE, ALLOW),
392                                     });
393 
394   return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
395     return {
396         ARG_32(2),  // prot
397         JEQ32(PROT_NONE, JUMP(&labels, prot_none)),
398         JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
399 
400         // PROT_READ | PROT_WRITE
401         ARG_32(3),  // flags
402         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE, ALLOW),
403         JUMP(&labels, mmap_end),
404 
405         // PROT_NONE
406         LABEL(&labels, prot_none),
407         ARG_32(3),  // flags
408         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, ALLOW),
409         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED_NOREPLACE, ALLOW),
410         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE, ALLOW),
411 
412         LABEL(&labels, mmap_end),
413     };
414   });
415 }
416 
AllowSystemMalloc()417 PolicyBuilder& PolicyBuilder::AllowSystemMalloc() {
418   if (allowed_complex_.system_malloc) {
419     return *this;
420   }
421   allowed_complex_.system_malloc = true;
422   AllowSyscalls({__NR_munmap, __NR_brk});
423   AllowFutexOp(FUTEX_WAKE);
424   AddPolicyOnSyscall(__NR_mremap, {
425                                       ARG_32(3),
426                                       JEQ32(MREMAP_MAYMOVE, ALLOW),
427                                   });
428   return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
429     return {
430         ARG_32(2),  // prot
431         JEQ32(PROT_NONE, JUMP(&labels, prot_none)),
432         JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
433 
434         // PROT_READ | PROT_WRITE
435         ARG_32(3),  // flags
436         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE, ALLOW),
437 
438         // PROT_NONE
439         LABEL(&labels, prot_none),
440         ARG_32(3),  // flags
441         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, ALLOW),
442 
443         LABEL(&labels, mmap_end),
444     };
445   });
446 
447   return *this;
448 }
449 
AllowLlvmSanitizers()450 PolicyBuilder& PolicyBuilder::AllowLlvmSanitizers() {
451   if constexpr (!sapi::sanitizers::IsAny()) {
452     return *this;
453   }
454   if (allowed_complex_.llvm_sanitizers) {
455     return *this;
456   }
457   allowed_complex_.llvm_sanitizers = true;
458   // *san use a custom allocator that runs mmap/unmap under the hood.  For
459   // example:
460   // https://github.com/llvm/llvm-project/blob/596d534ac3524052df210be8d3c01a33b2260a42/compiler-rt/lib/asan/asan_allocator.cpp#L980
461   // https://github.com/llvm/llvm-project/blob/62ec4ac90738a5f2d209ed28c822223e58aaaeb7/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h#L98
462   AllowMmapWithoutExec();
463   AllowSyscall(__NR_munmap);
464   AllowSyscall(__NR_sched_yield);
465 
466   // https://github.com/llvm/llvm-project/blob/4bbc3290a25c0dc26007912a96e0f77b2092ee56/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp#L293
467   AddPolicyOnSyscall(__NR_mprotect,
468                      {
469                          ARG_32(2),
470                          BPF_STMT(BPF_AND | BPF_ALU | BPF_K,
471                                   ~uint32_t{PROT_READ | PROT_WRITE}),
472                          JEQ32(PROT_NONE, ALLOW),
473                      });
474 
475   AddPolicyOnSyscall(__NR_madvise, {
476                                        ARG_32(2),
477                                        JEQ32(MADV_DONTDUMP, ALLOW),
478                                        JEQ32(MADV_DONTNEED, ALLOW),
479                                        JEQ32(MADV_NOHUGEPAGE, ALLOW),
480                                    });
481   // Sanitizers read from /proc. For example:
482   // https://github.com/llvm/llvm-project/blob/634da7a1c61ee8c173e90a841eb1f4ea03caa20b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp#L1155
483   AddDirectoryIfNamespaced("/proc");
484   AllowOpen();
485   // Sanitizers need pid for reports. For example:
486   // https://github.com/llvm/llvm-project/blob/634da7a1c61ee8c173e90a841eb1f4ea03caa20b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp#L740
487   AllowGetPIDs();
488   // Sanitizers may try color output. For example:
489   // https://github.com/llvm/llvm-project/blob/87dd3d350c4ce0115b2cdf91d85ddd05ae2661aa/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp#L157
490   OverridableBlockSyscallWithErrno(__NR_ioctl, EPERM);
491   // https://github.com/llvm/llvm-project/blob/9aa39481d9eb718e872993791547053a3c1f16d5/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp#L150
492   // https://sourceware.org/git/?p=glibc.git;a=blob;f=nptl/pthread_getattr_np.c;h=de7edfa0928224eb8375e2fe894d6677570fbb3b;hb=HEAD#l188
493   AllowSyscall(__NR_sched_getaffinity);
494   // https://github.com/llvm/llvm-project/blob/3cabbf60393cc8d55fe635e35e89e5973162de33/compiler-rt/lib/interception/interception.h#L352
495 #ifdef __ELF__
496   AllowDynamicStartup();
497 #endif
498   // https://github.com/llvm/llvm-project/blob/02c2b472b510ff55679844c087b66e7837e13dc2/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp#L434
499 #ifdef __NR_readlink
500   OverridableBlockSyscallWithErrno(__NR_readlink, ENOENT);
501 #endif
502   OverridableBlockSyscallWithErrno(__NR_readlinkat, ENOENT);
503   if constexpr (sapi::sanitizers::IsASan()) {
504     AllowSyscall(__NR_sigaltstack);
505   }
506   if constexpr (sapi::sanitizers::IsTSan()) {
507     AllowSyscall(__NR_set_robust_list);
508   }
509   return *this;
510 }
511 
AllowLlvmCoverage()512 PolicyBuilder& PolicyBuilder::AllowLlvmCoverage() {
513   if (!sapi::IsCoverageRun()) {
514     return *this;
515   }
516   if (allowed_complex_.llvm_coverage) {
517     return *this;
518   }
519   allowed_complex_.llvm_coverage = true;
520   AllowStat();
521   AllowGetPIDs();
522   AllowOpen();
523   AllowRead();
524   AllowWrite();
525   AllowMkdir();
526   AllowSafeFcntl();
527   AllowSyscalls({
528       __NR_munmap, __NR_close, __NR_lseek,
529 #ifdef __NR__llseek
530       __NR__llseek,  // Newer glibc on PPC
531 #endif
532   });
533   AllowTcMalloc();
534   AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
535     return {
536         ARG_32(2),  // prot
537         JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
538         ARG_32(3),  // flags
539         JEQ32(MAP_SHARED, ALLOW),
540         LABEL(&labels, mmap_end),
541     };
542   });
543   const char* coverage_dir = std::getenv("COVERAGE_DIR");
544   if (!coverage_dir || absl::string_view(coverage_dir).empty()) {
545     LOG(WARNING)
546         << "Environment variable COVERAGE is set but COVERAGE_DIR is not set. "
547            "No directory to collect coverage data will be added to the "
548            "sandbox.";
549     return *this;
550   }
551   AddDirectoryIfNamespaced(coverage_dir, /*is_ro=*/false);
552   return *this;
553 }
554 
AllowLimitedMadvise()555 PolicyBuilder& PolicyBuilder::AllowLimitedMadvise() {
556   if (allowed_complex_.limited_madvise) {
557     return *this;
558   }
559   allowed_complex_.limited_madvise = true;
560   return AddPolicyOnSyscall(__NR_madvise, {
561                                               ARG_32(2),
562                                               JEQ32(MADV_SEQUENTIAL, ALLOW),
563                                               JEQ32(MADV_DONTNEED, ALLOW),
564                                               JEQ32(MADV_REMOVE, ALLOW),
565                                               JEQ32(MADV_HUGEPAGE, ALLOW),
566                                               JEQ32(MADV_NOHUGEPAGE, ALLOW),
567                                           });
568 }
569 
AllowMadvisePopulate()570 PolicyBuilder& PolicyBuilder::AllowMadvisePopulate() {
571   if (allowed_complex_.madvise_populate) {
572     return *this;
573   }
574   allowed_complex_.madvise_populate = true;
575   return AddPolicyOnSyscall(__NR_madvise, {
576                                               ARG_32(2),
577                                               JEQ32(MADV_POPULATE_READ, ALLOW),
578                                               JEQ32(MADV_POPULATE_WRITE, ALLOW),
579                                           });
580 }
581 
AllowMmapWithoutExec()582 PolicyBuilder& PolicyBuilder::AllowMmapWithoutExec() {
583   if (allowed_complex_.mmap_without_exec) {
584     return *this;
585   }
586   allowed_complex_.mmap_without_exec = true;
587   return AddPolicyOnMmap({
588       ARG_32(2),
589       BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, PROT_EXEC, 1, 0),
590       ALLOW,
591   });
592 }
593 
AllowMprotectWithoutExec()594 PolicyBuilder& PolicyBuilder::AllowMprotectWithoutExec() {
595   if (allowed_complex_.mprotect_without_exec) {
596     return *this;
597   }
598   allowed_complex_.mprotect_without_exec = true;
599   return AddPolicyOnSyscall(
600       __NR_mprotect, {
601                          ARG_32(2),
602                          BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, PROT_EXEC, 1, 0),
603                          ALLOW,
604                      });
605 }
606 
607 std::enable_if_t<builder_internal::is_type_complete_v<MapExec>, PolicyBuilder&>
AllowMmap()608 PolicyBuilder::AllowMmap() {
609   return AllowSyscalls(kMmapSyscalls);
610 }
611 
AllowMlock()612 PolicyBuilder& PolicyBuilder::AllowMlock() {
613 #ifdef __NR_mlock
614   AllowSyscall(__NR_mlock);
615 #endif
616 #ifdef __NR_munlock
617   AllowSyscall(__NR_munlock);
618 #endif
619 #ifdef __NR_mlock2
620   AllowSyscall(__NR_mlock2);
621 #endif
622   return *this;
623 }
624 
AllowOpen()625 PolicyBuilder& PolicyBuilder::AllowOpen() {
626 #ifdef __NR_creat
627   AllowSyscall(__NR_creat);
628 #endif
629 #ifdef __NR_open
630   AllowSyscall(__NR_open);
631 #endif
632 #ifdef __NR_openat
633   AllowSyscall(__NR_openat);
634 #endif
635   return *this;
636 }
637 
AllowStat()638 PolicyBuilder& PolicyBuilder::AllowStat() {
639 #ifdef __NR_fstat
640   AllowSyscall(__NR_fstat);
641 #endif
642 #ifdef __NR_fstat64
643   AllowSyscall(__NR_fstat64);
644 #endif
645 #ifdef __NR_fstatat
646   AllowSyscall(__NR_fstatat);
647 #endif
648 #ifdef __NR_fstatat64
649   AllowSyscall(__NR_fstatat64);
650 #endif
651 #ifdef __NR_fstatfs
652   AllowSyscall(__NR_fstatfs);
653 #endif
654 #ifdef __NR_fstatfs64
655   AllowSyscall(__NR_fstatfs64);
656 #endif
657 #ifdef __NR_lstat
658   AllowSyscall(__NR_lstat);
659 #endif
660 #ifdef __NR_lstat64
661   AllowSyscall(__NR_lstat64);
662 #endif
663 #ifdef __NR_newfstatat
664   AllowSyscall(__NR_newfstatat);
665 #endif
666 #ifdef __NR_oldfstat
667   AllowSyscall(__NR_oldfstat);
668 #endif
669 #ifdef __NR_oldlstat
670   AllowSyscall(__NR_oldlstat);
671 #endif
672 #ifdef __NR_oldstat
673   AllowSyscall(__NR_oldstat);
674 #endif
675 #ifdef __NR_stat
676   AllowSyscall(__NR_stat);
677 #endif
678 #ifdef __NR_stat64
679   AllowSyscall(__NR_stat64);
680 #endif
681 #ifdef __NR_statfs
682   AllowSyscall(__NR_statfs);
683 #endif
684 #ifdef __NR_statfs64
685   AllowSyscall(__NR_statfs64);
686 #endif
687   return *this;
688 }
689 
AllowAccess()690 PolicyBuilder& PolicyBuilder::AllowAccess() {
691 #ifdef __NR_access
692   AllowSyscall(__NR_access);
693 #endif
694 #ifdef __NR_faccessat
695   AllowSyscall(__NR_faccessat);
696 #endif
697 #ifdef __NR_faccessat2
698   AllowSyscall(__NR_faccessat2);
699 #endif
700   return *this;
701 }
702 
AllowDup()703 PolicyBuilder& PolicyBuilder::AllowDup() {
704   AllowSyscall(__NR_dup);
705 #ifdef __NR_dup2
706   AllowSyscall(__NR_dup2);
707 #endif
708   AllowSyscall(__NR_dup3);
709   return *this;
710 }
711 
AllowPipe()712 PolicyBuilder& PolicyBuilder::AllowPipe() {
713 #ifdef __NR_pipe
714   AllowSyscall(__NR_pipe);
715 #endif
716   AllowSyscall(__NR_pipe2);
717   return *this;
718 }
719 
AllowChmod()720 PolicyBuilder& PolicyBuilder::AllowChmod() {
721 #ifdef __NR_chmod
722   AllowSyscall(__NR_chmod);
723 #endif
724   AllowSyscall(__NR_fchmod);
725   AllowSyscall(__NR_fchmodat);
726   return *this;
727 }
728 
AllowChown()729 PolicyBuilder& PolicyBuilder::AllowChown() {
730 #ifdef __NR_chown
731   AllowSyscall(__NR_chown);
732 #endif
733 #ifdef __NR_lchown
734   AllowSyscall(__NR_lchown);
735 #endif
736   AllowSyscall(__NR_fchown);
737   AllowSyscall(__NR_fchownat);
738   return *this;
739 }
740 
AllowRead()741 PolicyBuilder& PolicyBuilder::AllowRead() {
742   return AllowSyscalls({
743       __NR_read,
744       __NR_readv,
745       __NR_preadv,
746       __NR_pread64,
747   });
748 }
749 
AllowWrite()750 PolicyBuilder& PolicyBuilder::AllowWrite() {
751   return AllowSyscalls({
752       __NR_write,
753       __NR_writev,
754       __NR_pwritev,
755       __NR_pwrite64,
756   });
757 }
758 
AllowReaddir()759 PolicyBuilder& PolicyBuilder::AllowReaddir() {
760   return AllowSyscalls({
761 #ifdef __NR_getdents
762       __NR_getdents,
763 #endif
764 #ifdef __NR_getdents64
765       __NR_getdents64,
766 #endif
767   });
768 }
769 
AllowReadlink()770 PolicyBuilder& PolicyBuilder::AllowReadlink() {
771   return AllowSyscalls({
772 #ifdef __NR_readlink
773       __NR_readlink,
774 #endif
775 #ifdef __NR_readlinkat
776       __NR_readlinkat,
777 #endif
778   });
779 }
780 
AllowLink()781 PolicyBuilder& PolicyBuilder::AllowLink() {
782   return AllowSyscalls({
783 #ifdef __NR_link
784       __NR_link,
785 #endif
786 #ifdef __NR_linkat
787       __NR_linkat,
788 #endif
789   });
790 }
791 
AllowSymlink()792 PolicyBuilder& PolicyBuilder::AllowSymlink() {
793   return AllowSyscalls({
794 #ifdef __NR_symlink
795       __NR_symlink,
796 #endif
797 #ifdef __NR_symlinkat
798       __NR_symlinkat,
799 #endif
800   });
801 }
802 
AllowMkdir()803 PolicyBuilder& PolicyBuilder::AllowMkdir() {
804   return AllowSyscalls({
805 #ifdef __NR_mkdir
806       __NR_mkdir,
807 #endif
808 #ifdef __NR_mkdirat
809       __NR_mkdirat,
810 #endif
811   });
812 }
813 
AllowUtime()814 PolicyBuilder& PolicyBuilder::AllowUtime() {
815   return AllowSyscalls({
816 #ifdef __NR_futimens
817       __NR_futimens,
818 #endif
819 #ifdef __NR_futimesat
820       __NR_futimesat,
821 #endif
822 #ifdef __NR_utime
823       __NR_utime,
824 #endif
825 #ifdef __NR_utimes
826       __NR_utimes,
827 #endif
828 #ifdef __NR_utimensat
829       __NR_utimensat,
830 #endif
831   });
832 }
833 
AllowSafeBpf()834 PolicyBuilder& PolicyBuilder::AllowSafeBpf() {
835   if (allowed_complex_.safe_bpf) {
836     return *this;
837   }
838   allowed_complex_.safe_bpf = true;
839   user_policy_handles_bpf_ = true;
840   return AddPolicyOnSyscall(__NR_bpf, {
841                                           ARG_32(1),
842                                           JEQ32(BPF_MAP_LOOKUP_ELEM, ALLOW),
843                                           JEQ32(BPF_OBJ_GET, ALLOW),
844                                           JEQ32(BPF_MAP_GET_NEXT_KEY, ALLOW),
845                                           JEQ32(BPF_MAP_GET_NEXT_ID, ALLOW),
846                                           JEQ32(BPF_MAP_GET_FD_BY_ID, ALLOW),
847                                           JEQ32(BPF_OBJ_GET_INFO_BY_FD, ALLOW),
848                                       });
849 }
850 
AllowSafeFcntl()851 PolicyBuilder& PolicyBuilder::AllowSafeFcntl() {
852   if (allowed_complex_.safe_fcntl) {
853     return *this;
854   }
855   allowed_complex_.safe_fcntl = true;
856   return AddPolicyOnSyscalls({__NR_fcntl,
857 #ifdef __NR_fcntl64
858                               __NR_fcntl64
859 #endif
860                              },
861                              {
862                                  ARG_32(1),
863                                  JEQ32(F_GETFD, ALLOW),
864                                  JEQ32(F_SETFD, ALLOW),
865                                  JEQ32(F_GETFL, ALLOW),
866                                  JEQ32(F_SETFL, ALLOW),
867                                  JEQ32(F_GETLK, ALLOW),
868                                  JEQ32(F_SETLK, ALLOW),
869                                  JEQ32(F_SETLKW, ALLOW),
870                                  JEQ32(F_DUPFD, ALLOW),
871                                  JEQ32(F_DUPFD_CLOEXEC, ALLOW),
872                              });
873 }
874 
AllowFork()875 PolicyBuilder& PolicyBuilder::AllowFork() {
876   return AllowSyscalls({
877 #ifdef __NR_fork
878       __NR_fork,
879 #endif
880 #ifdef __NR_vfork
881       __NR_vfork,
882 #endif
883       __NR_clone});
884 }
885 
AllowWait()886 PolicyBuilder& PolicyBuilder::AllowWait() {
887   return AllowSyscalls({
888 #ifdef __NR_waitpid
889       __NR_waitpid,
890 #endif
891       __NR_wait4});
892 }
893 
AllowAlarm()894 PolicyBuilder& PolicyBuilder::AllowAlarm() {
895   return AllowSyscalls({
896 #ifdef __NR_alarm
897       __NR_alarm,
898 #endif
899       __NR_setitimer});
900 }
901 
AllowPosixTimers()902 PolicyBuilder& PolicyBuilder::AllowPosixTimers() {
903   return AllowSyscalls({
904       __NR_timer_create,
905       __NR_timer_delete,
906       __NR_timer_settime,
907       __NR_timer_gettime,
908       __NR_timer_getoverrun,
909   });
910 }
911 
AllowHandleSignals()912 PolicyBuilder& PolicyBuilder::AllowHandleSignals() {
913   return AllowSyscalls({
914       __NR_restart_syscall,
915       __NR_rt_sigaction,
916       __NR_rt_sigreturn,
917       __NR_rt_sigprocmask,
918 #ifdef __NR_signal
919       __NR_signal,
920 #endif
921 #ifdef __NR_sigaction
922       __NR_sigaction,
923 #endif
924 #ifdef __NR_sigreturn
925       __NR_sigreturn,
926 #endif
927 #ifdef __NR_sigprocmask
928       __NR_sigprocmask,
929 #endif
930 #ifdef __NR_sigaltstack
931       __NR_sigaltstack,
932 #endif
933   });
934 }
935 
AllowTCGETS()936 PolicyBuilder& PolicyBuilder::AllowTCGETS() {
937   if (allowed_complex_.tcgets) {
938     return *this;
939   }
940   allowed_complex_.tcgets = true;
941   return AddPolicyOnSyscall(__NR_ioctl, {
942                                             ARG_32(1),
943                                             JEQ32(TCGETS, ALLOW),
944                                         });
945 }
946 
AllowTime()947 PolicyBuilder& PolicyBuilder::AllowTime() {
948   return AllowSyscalls({
949 #ifdef __NR_time
950       __NR_time,
951 #endif
952       __NR_gettimeofday, __NR_clock_gettime});
953 }
954 
AllowSleep()955 PolicyBuilder& PolicyBuilder::AllowSleep() {
956   return AllowSyscalls({
957       __NR_clock_nanosleep,
958       __NR_nanosleep,
959   });
960 }
961 
AllowGetIDs()962 PolicyBuilder& PolicyBuilder::AllowGetIDs() {
963   return AllowSyscalls({
964       __NR_getuid,
965       __NR_geteuid,
966       __NR_getresuid,
967       __NR_getgid,
968       __NR_getegid,
969       __NR_getresgid,
970 #ifdef __NR_getuid32
971       __NR_getuid32,
972       __NR_geteuid32,
973       __NR_getresuid32,
974       __NR_getgid32,
975       __NR_getegid32,
976       __NR_getresgid32,
977 #endif
978       __NR_getgroups,
979   });
980 }
981 
AllowRestartableSequences(CpuFenceMode cpu_fence_mode)982 PolicyBuilder& PolicyBuilder::AllowRestartableSequences(
983     CpuFenceMode cpu_fence_mode) {
984   if (!allowed_complex_.slow_fences && !allowed_complex_.fast_fences) {
985 #ifdef __NR_rseq
986     AllowSyscall(__NR_rseq);
987 #endif
988     AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
989       return {
990           ARG_32(2),  // prot
991           JNE32(PROT_READ | PROT_WRITE, JUMP(&labels, mmap_end)),
992 
993           ARG_32(3),  // flags
994           JEQ32(MAP_PRIVATE | MAP_ANONYMOUS, ALLOW),
995 
996           LABEL(&labels, mmap_end),
997       };
998     });
999     AllowSyscall(__NR_getcpu);
1000     AllowSyscall(__NR_membarrier);
1001     AllowFutexOp(FUTEX_WAIT);
1002     AllowFutexOp(FUTEX_WAKE);
1003     AllowRead();
1004     AllowOpen();
1005     AllowPoll();
1006     AllowSyscall(__NR_close);
1007     AddPolicyOnSyscall(__NR_rt_sigprocmask, {
1008                                                 ARG_32(0),
1009                                                 JEQ32(SIG_SETMASK, ALLOW),
1010                                             });
1011     AllowPrctlSetVma();
1012 
1013     AddFileIfNamespaced("/proc/cpuinfo");
1014     AddFileIfNamespaced("/proc/stat");
1015     AddDirectoryIfNamespaced("/sys/devices/system/cpu");
1016   }
1017   if (cpu_fence_mode == kAllowSlowFences && !allowed_complex_.slow_fences) {
1018     AllowSyscall(__NR_sched_getaffinity);
1019     AllowSyscall(__NR_sched_setaffinity);
1020     AddFileIfNamespaced("/proc/self/cpuset");
1021     allowed_complex_.slow_fences = true;
1022   } else if (cpu_fence_mode == kRequireFastFences) {
1023     allowed_complex_.fast_fences = true;
1024   }
1025   return *this;
1026 }
1027 
AllowGetPIDs()1028 PolicyBuilder& PolicyBuilder::AllowGetPIDs() {
1029   return AllowSyscalls({
1030       __NR_getpid,
1031       __NR_getppid,
1032       __NR_gettid,
1033   });
1034 }
1035 
AllowGetPGIDs()1036 PolicyBuilder& PolicyBuilder::AllowGetPGIDs() {
1037   return AllowSyscalls({
1038       __NR_getpgid,
1039 #ifdef __NR_getpgrp
1040       __NR_getpgrp,
1041 #endif
1042   });
1043 }
1044 
AllowGetRlimit()1045 PolicyBuilder& PolicyBuilder::AllowGetRlimit() {
1046   if (allowed_complex_.getrlimit) {
1047     return *this;
1048   }
1049   allowed_complex_.getrlimit = true;
1050 #ifdef __NR_prlimit64
1051   AddPolicyOnSyscall(__NR_prlimit64, {ARG(2), JEQ64(0, 0, ALLOW)});
1052 #endif
1053   return AllowSyscalls({
1054 #ifdef __NR_getrlimit
1055       __NR_getrlimit,
1056 #endif
1057 #ifdef __NR_ugetrlimit
1058       __NR_ugetrlimit,
1059 #endif
1060   });
1061 }
1062 
AllowSetRlimit()1063 PolicyBuilder& PolicyBuilder::AllowSetRlimit() {
1064   return AllowSyscalls({
1065 #ifdef __NR_prlimit64
1066       __NR_prlimit64,
1067 #endif
1068 #ifdef __NR_setrlimit
1069       __NR_setrlimit,
1070 #endif
1071 #ifdef __NR_usetrlimit
1072       __NR_usetrlimit,
1073 #endif
1074   });
1075 }
1076 
AllowGetRandom()1077 PolicyBuilder& PolicyBuilder::AllowGetRandom() {
1078   if (allowed_complex_.getrandom) {
1079     return *this;
1080   }
1081   allowed_complex_.getrandom = true;
1082   return AddPolicyOnSyscall(__NR_getrandom, {
1083                                                 ARG_32(2),
1084                                                 JEQ32(0, ALLOW),
1085                                                 JEQ32(GRND_NONBLOCK, ALLOW),
1086                                             });
1087 }
1088 
AllowWipeOnFork()1089 PolicyBuilder& PolicyBuilder::AllowWipeOnFork() {
1090   if (allowed_complex_.wipe_on_fork) {
1091     return *this;
1092   }
1093   allowed_complex_.wipe_on_fork = true;
1094   // System headers may not be recent enough to include MADV_WIPEONFORK.
1095   static constexpr uint32_t kMadv_WipeOnFork = 18;
1096   // The -1 value is used by code to probe that the kernel returns -EINVAL for
1097   // unknown values because some environments, like qemu, ignore madvise
1098   // completely, but code needs to know whether WIPEONFORK took effect.
1099   return AddPolicyOnSyscall(__NR_madvise,
1100                             {
1101                                 ARG_32(2),
1102                                 JEQ32(kMadv_WipeOnFork, ALLOW),
1103                                 JEQ32(static_cast<uint32_t>(-1), ALLOW),
1104                             });
1105 }
1106 
AllowLogForwarding()1107 PolicyBuilder& PolicyBuilder::AllowLogForwarding() {
1108   if (allowed_complex_.log_forwarding) {
1109     return *this;
1110   }
1111   allowed_complex_.log_forwarding = true;
1112   AllowWrite();
1113   AllowSystemMalloc();
1114   AllowTcMalloc();
1115 
1116   // From comms
1117   AllowGetPIDs();
1118   AllowSyscalls({// from logging code
1119                  __NR_clock_gettime,
1120                  // From comms
1121                  __NR_gettid, __NR_close});
1122 
1123   // For generating stacktraces in logging (e.g. `LOG(FATAL)`)
1124   AddPolicyOnSyscall(__NR_rt_sigprocmask, {
1125                                               ARG_32(0),
1126                                               JEQ32(SIG_BLOCK, ALLOW),
1127                                           });
1128   AllowGetRlimit();
1129 
1130   // For LOG(FATAL)
1131   return AddPolicyOnSyscall(__NR_kill,
1132                             [](bpf_labels& labels) -> std::vector<sock_filter> {
1133                               return {
1134                                   ARG_32(0),
1135                                   JNE32(0, JUMP(&labels, pid_not_null)),
1136                                   ARG_32(1),
1137                                   JEQ32(SIGABRT, ALLOW),
1138                                   LABEL(&labels, pid_not_null),
1139                               };
1140                             });
1141 }
1142 
AllowUnlink()1143 PolicyBuilder& PolicyBuilder::AllowUnlink() {
1144   AllowSyscalls({
1145 #ifdef __NR_rmdir
1146       __NR_rmdir,
1147 #endif
1148 #ifdef __NR_unlink
1149       __NR_unlink,
1150 #endif
1151       __NR_unlinkat,
1152   });
1153   return *this;
1154 }
1155 
AllowPoll()1156 PolicyBuilder& PolicyBuilder::AllowPoll() {
1157   AllowSyscalls({
1158 #ifdef __NR_poll
1159       __NR_poll,
1160 #endif
1161       __NR_ppoll,
1162   });
1163   return *this;
1164 }
1165 
AllowRename()1166 PolicyBuilder& PolicyBuilder::AllowRename() {
1167   AllowSyscalls({
1168 #ifdef __NR_rename
1169       __NR_rename,
1170 #endif
1171       __NR_renameat,
1172 #ifdef __NR_renameat2
1173       __NR_renameat2,
1174 #endif
1175   });
1176   return *this;
1177 }
1178 
AllowEventFd()1179 PolicyBuilder& PolicyBuilder::AllowEventFd() {
1180   AllowSyscalls({
1181 #ifdef __NR_eventfd
1182       __NR_eventfd,
1183 #endif
1184       __NR_eventfd2,
1185   });
1186   return *this;
1187 }
1188 
AllowPrctlSetName()1189 PolicyBuilder& PolicyBuilder::AllowPrctlSetName() {
1190   if (allowed_complex_.prctl_set_name) {
1191     return *this;
1192   }
1193   allowed_complex_.prctl_set_name = true;
1194   AddPolicyOnSyscall(__NR_prctl, {ARG_32(0), JEQ32(PR_SET_NAME, ALLOW)});
1195   return *this;
1196 }
1197 
AllowPrctlSetVma()1198 PolicyBuilder& PolicyBuilder::AllowPrctlSetVma() {
1199   if (allowed_complex_.prctl_set_vma) {
1200     return *this;
1201   }
1202   allowed_complex_.prctl_set_vma = true;
1203   AddPolicyOnSyscall(__NR_prctl,
1204                      [](bpf_labels& labels) -> std::vector<sock_filter> {
1205                        return {
1206                            ARG_32(0),
1207                            JNE32(PR_SET_VMA, JUMP(&labels, prctlsetvma_end)),
1208                            ARG_32(1),
1209                            JEQ32(PR_SET_VMA_ANON_NAME, ALLOW),
1210                            LABEL(&labels, prctlsetvma_end),
1211                        };
1212                      });
1213   return *this;
1214 }
1215 
AllowFutexOp(int op)1216 PolicyBuilder& PolicyBuilder::AllowFutexOp(int op) {
1217   return AddPolicyOnSyscall(
1218       __NR_futex, {
1219                       ARG_32(1),
1220                       // a <- a & FUTEX_CMD_MASK
1221                       BPF_STMT(BPF_ALU + BPF_AND + BPF_K,
1222                                static_cast<uint32_t>(FUTEX_CMD_MASK)),
1223                       JEQ32(static_cast<uint32_t>(op) & FUTEX_CMD_MASK, ALLOW),
1224                   });
1225 }
1226 
AllowStaticStartup()1227 PolicyBuilder& PolicyBuilder::AllowStaticStartup() {
1228   if (allowed_complex_.static_startup) {
1229     return *this;
1230   }
1231   allowed_complex_.static_startup = true;
1232   AllowGetRlimit();
1233   AllowSyscalls({
1234       // These syscalls take a pointer, so no restriction.
1235       __NR_uname,
1236       __NR_brk,
1237       __NR_set_tid_address,
1238 
1239 #if defined(__ARM_NR_set_tls)
1240       // libc sets the TLS during startup
1241       __ARM_NR_set_tls,
1242 #endif
1243 
1244       // This syscall takes a pointer and a length.
1245       // We could restrict length, but it might change, so not worth it.
1246       __NR_set_robust_list,
1247   });
1248 
1249   AllowFutexOp(FUTEX_WAIT_BITSET);
1250 
1251   AddPolicyOnSyscall(__NR_rt_sigaction,
1252                      {
1253                          ARG_32(0),
1254                          // This is real-time signals used internally by libc.
1255                          JEQ32(__SIGRTMIN + 0, ALLOW),
1256                          JEQ32(__SIGRTMIN + 1, ALLOW),
1257                      });
1258 
1259   AllowSyscall(__NR_rt_sigprocmask);
1260 
1261 #ifdef SAPI_X86_64
1262   // The second argument is a pointer.
1263   AddPolicyOnSyscall(__NR_arch_prctl, {
1264                                           ARG_32(0),
1265                                           JEQ32(ARCH_SET_FS, ALLOW),
1266                                       });
1267 #endif
1268 
1269   if constexpr (sapi::host_cpu::IsArm64()) {
1270     OverridableBlockSyscallWithErrno(__NR_readlinkat, ENOENT);
1271   }
1272 #ifdef __NR_readlink
1273   OverridableBlockSyscallWithErrno(__NR_readlink, ENOENT);
1274 #endif
1275 
1276   AllowGetRlimit();
1277   AddPolicyOnSyscall(__NR_mprotect, {
1278                                         ARG_32(2),
1279                                         JEQ32(PROT_READ, ALLOW),
1280                                     });
1281 
1282   OverridableBlockSyscallWithErrno(__NR_sigaltstack, ENOSYS);
1283 
1284   return *this;
1285 }
1286 
1287 std::enable_if_t<builder_internal::is_type_complete_v<MapExec>, PolicyBuilder&>
AllowDynamicStartup()1288 PolicyBuilder::AllowDynamicStartup() {
1289   if (!allow_map_exec_) {
1290     SetError(absl::FailedPreconditionError(
1291         "Allowing dynamic startup requires Allow(MapExec)."));
1292     return *this;
1293   }
1294   if (allowed_complex_.dynamic_startup) {
1295     return *this;
1296   }
1297   allowed_complex_.dynamic_startup = true;
1298 
1299   AllowAccess();
1300   AllowOpen();
1301   AllowRead();
1302   AllowStat();
1303   AllowSyscalls({__NR_lseek,
1304 #ifdef __NR__llseek
1305                  __NR__llseek,  // Newer glibc on PPC
1306 #endif
1307                  __NR_close, __NR_munmap});
1308   AddPolicyOnSyscall(__NR_mprotect, {
1309                                         ARG_32(2),
1310                                         JEQ32(PROT_READ, ALLOW),
1311                                         JEQ32(PROT_NONE, ALLOW),
1312                                         JEQ32(PROT_READ | PROT_WRITE, ALLOW),
1313                                         JEQ32(PROT_READ | PROT_EXEC, ALLOW),
1314                                     });
1315   AllowStaticStartup();
1316 
1317   return AddPolicyOnMmap([](bpf_labels& labels) -> std::vector<sock_filter> {
1318     return {
1319         ARG_32(2),  // prot
1320         JEQ32(PROT_READ | PROT_EXEC, JUMP(&labels, prot_exec)),
1321         JEQ32(PROT_READ | PROT_WRITE, JUMP(&labels, prot_read_write)),
1322         JNE32(PROT_READ, JUMP(&labels, mmap_end)),
1323 
1324         // PROT_READ
1325         ARG_32(3),  // flags
1326         JEQ32(MAP_PRIVATE, ALLOW),
1327         JUMP(&labels, mmap_end),
1328 
1329         // PROT_READ | PROT_WRITE
1330         LABEL(&labels, prot_read_write),
1331         ARG_32(3),  // flags
1332         JEQ32(MAP_FILE | MAP_PRIVATE | MAP_FIXED | MAP_DENYWRITE, ALLOW),
1333         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, ALLOW),
1334         JEQ32(MAP_ANONYMOUS | MAP_PRIVATE, ALLOW),
1335         JUMP(&labels, mmap_end),
1336 
1337         // PROT_READ | PROT_EXEC
1338         LABEL(&labels, prot_exec),
1339         ARG_32(3),  // flags
1340         JEQ32(MAP_FILE | MAP_PRIVATE | MAP_DENYWRITE, ALLOW),
1341         JEQ32(MAP_FILE | MAP_PRIVATE | MAP_DENYWRITE | MAP_FIXED, ALLOW),
1342 
1343         LABEL(&labels, mmap_end),
1344     };
1345   });
1346 }
1347 
AddPolicyOnSyscall(uint32_t num,absl::Span<const sock_filter> policy)1348 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscall(
1349     uint32_t num, absl::Span<const sock_filter> policy) {
1350   return AddPolicyOnSyscalls({num}, policy);
1351 }
1352 
AddPolicyOnSyscall(uint32_t num,BpfFunc f)1353 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscall(uint32_t num, BpfFunc f) {
1354   return AddPolicyOnSyscalls({num}, f);
1355 }
1356 
AddPolicyOnSyscalls(absl::Span<const uint32_t> nums,absl::Span<const sock_filter> policy)1357 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscalls(
1358     absl::Span<const uint32_t> nums, absl::Span<const sock_filter> policy) {
1359   if (nums.empty()) {
1360     SetError(absl::InvalidArgumentError(
1361         "Cannot add a policy for empty list of syscalls"));
1362     return *this;
1363   }
1364   std::deque<sock_filter> out;
1365   // Insert and verify the policy.
1366   out.insert(out.end(), policy.begin(), policy.end());
1367   for (size_t i = 0; i < out.size(); ++i) {
1368     sock_filter& filter = out[i];
1369     const size_t max_jump = out.size() - i - 1;
1370     if (!CheckBpfBounds(filter, max_jump)) {
1371       SetError(absl::InvalidArgumentError("bpf jump out of bounds"));
1372       return *this;
1373     }
1374     // Syscall arch is expected as TRACE value
1375     if (filter.code == (BPF_RET | BPF_K) &&
1376         (filter.k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE &&
1377         (filter.k & SECCOMP_RET_DATA) != Syscall::GetHostArch()) {
1378       LOG(WARNING) << "SANDBOX2_TRACE should be used in policy instead of "
1379                       "TRACE(value)";
1380       filter = SANDBOX2_TRACE;
1381     }
1382   }
1383   // Pre-/Postcondition: Syscall number loaded into A register
1384   out.push_back(LOAD_SYSCALL_NR);
1385   if (out.size() > std::numeric_limits<uint32_t>::max()) {
1386     SetError(absl::InvalidArgumentError("syscall policy is too long"));
1387     return *this;
1388   }
1389   // Create jumps for each syscall.
1390   size_t do_policy_loc = out.size();
1391   // Iterate in reverse order and prepend instruction, so that jumps can be
1392   // calculated easily.
1393   constexpr size_t kMaxShortJump = 255;
1394   bool last = true;
1395   for (auto it = std::rbegin(nums); it != std::rend(nums); ++it) {
1396     if (*it == __NR_bpf || *it == __NR_ptrace) {
1397       SetError(absl::InvalidArgumentError(
1398           "cannot add policy for bpf/ptrace syscall"));
1399       return *this;
1400     }
1401     // If syscall is not matched try with the next one.
1402     uint8_t jf = 0;
1403     // If last syscall on the list does not match skip the policy by jumping
1404     // over it.
1405     if (last) {
1406       if (out.size() > kMaxShortJump) {
1407         out.push_front(
1408             BPF_STMT(BPF_JMP + BPF_JA, static_cast<uint32_t>(out.size())));
1409       } else {
1410         jf = out.size();
1411       }
1412       last = false;
1413     }
1414     // Add a helper absolute jump if needed - the policy/last helper jump is
1415     // out of reach of a short jump.
1416     if ((out.size() - do_policy_loc) > kMaxShortJump) {
1417       out.push_front(BPF_STMT(
1418           BPF_JMP + BPF_JA, static_cast<uint32_t>(out.size() - policy.size())));
1419       do_policy_loc = out.size();
1420       ++jf;
1421     }
1422     uint8_t jt = out.size() - do_policy_loc;
1423     out.push_front(BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, *it, jt, jf));
1424   }
1425   custom_policy_syscalls_.insert(nums.begin(), nums.end());
1426   user_policy_.insert(user_policy_.end(), out.begin(), out.end());
1427   return *this;
1428 }
1429 
AddPolicyOnSyscalls(absl::Span<const uint32_t> nums,BpfFunc f)1430 PolicyBuilder& PolicyBuilder::AddPolicyOnSyscalls(
1431     absl::Span<const uint32_t> nums, BpfFunc f) {
1432   return AddPolicyOnSyscalls(nums, ResolveBpfFunc(f));
1433 }
1434 
AddPolicyOnMmap(absl::Span<const sock_filter> policy)1435 PolicyBuilder& PolicyBuilder::AddPolicyOnMmap(
1436     absl::Span<const sock_filter> policy) {
1437   return AddPolicyOnSyscalls(kMmapSyscalls, policy);
1438 }
1439 
AddPolicyOnMmap(BpfFunc f)1440 PolicyBuilder& PolicyBuilder::AddPolicyOnMmap(BpfFunc f) {
1441   return AddPolicyOnSyscalls(kMmapSyscalls, f);
1442 }
1443 
DangerDefaultAllowAll()1444 PolicyBuilder& PolicyBuilder::DangerDefaultAllowAll() {
1445   return DefaultAction(AllowAllSyscalls());
1446 }
1447 
DefaultAction(AllowAllSyscalls)1448 PolicyBuilder& PolicyBuilder::DefaultAction(AllowAllSyscalls) {
1449   default_action_ = ALLOW;
1450   return *this;
1451 }
1452 
DefaultAction(TraceAllSyscalls)1453 PolicyBuilder& PolicyBuilder::DefaultAction(TraceAllSyscalls) {
1454   default_action_ = SANDBOX2_TRACE;
1455   return *this;
1456 }
1457 
ResolveBpfFunc(BpfFunc f)1458 std::vector<sock_filter> PolicyBuilder::ResolveBpfFunc(BpfFunc f) {
1459   bpf_labels l = {0};
1460 
1461   std::vector<sock_filter> policy = f(l);
1462   if (bpf_resolve_jumps(&l, policy.data(), policy.size()) != 0) {
1463     SetError(absl::InternalError("Cannot resolve bpf jumps"));
1464   }
1465 
1466   return policy;
1467 }
1468 
TryBuild()1469 absl::StatusOr<std::unique_ptr<Policy>> PolicyBuilder::TryBuild() {
1470   if (!last_status_.ok()) {
1471     return last_status_;
1472   }
1473 
1474   if (user_policy_.size() > kMaxUserPolicyLength) {
1475     return absl::FailedPreconditionError(
1476         absl::StrCat("User syscall policy is to long (", user_policy_.size(),
1477                      " > ", kMaxUserPolicyLength, ")."));
1478   }
1479 
1480   // Using `new` to access a non-public constructor.
1481   auto policy = absl::WrapUnique(new Policy());
1482 
1483   if (already_built_) {
1484     return absl::FailedPreconditionError("Can only build policy once.");
1485   }
1486 
1487   if (use_namespaces_) {
1488     // If no specific netns mode is set, default to per-sandboxee.
1489     if (netns_mode_ == NETNS_MODE_UNSPECIFIED) {
1490       netns_mode_ = NETNS_MODE_PER_SANDBOXEE;
1491     }
1492     if (netns_mode_ == NETNS_MODE_NONE && hostname_ != kDefaultHostname) {
1493       return absl::FailedPreconditionError(
1494           "Cannot set hostname without network namespaces.");
1495     }
1496     policy->namespace_ = Namespace(std::move(mounts_), hostname_, netns_mode_,
1497                                    allow_mount_propagation_);
1498   }
1499 
1500   policy->allow_map_exec_ = allow_map_exec_;
1501   policy->allow_speculation_ = allow_speculation_;
1502   policy->collect_stacktrace_on_signal_ = collect_stacktrace_on_signal_;
1503   policy->collect_stacktrace_on_violation_ = collect_stacktrace_on_violation_;
1504   policy->collect_stacktrace_on_timeout_ = collect_stacktrace_on_timeout_;
1505   policy->collect_stacktrace_on_kill_ = collect_stacktrace_on_kill_;
1506   policy->collect_stacktrace_on_exit_ = collect_stacktrace_on_exit_;
1507   policy->user_policy_ = std::move(user_policy_);
1508   if (default_action_) {
1509     policy->user_policy_.push_back(*default_action_);
1510   }
1511   policy->user_policy_.insert(policy->user_policy_.end(),
1512                               overridable_policy_.begin(),
1513                               overridable_policy_.end());
1514   policy->user_policy_handles_bpf_ = user_policy_handles_bpf_;
1515   policy->user_policy_handles_ptrace_ = user_policy_handles_ptrace_;
1516 
1517   policy->allowed_hosts_ = std::move(allowed_hosts_);
1518   already_built_ = true;
1519   return std::move(policy);
1520 }
1521 
AddFile(absl::string_view path,bool is_ro)1522 PolicyBuilder& PolicyBuilder::AddFile(absl::string_view path, bool is_ro) {
1523   return AddFileAt(path, path, is_ro);
1524 }
1525 
AddFileAt(absl::string_view outside,absl::string_view inside,bool is_ro)1526 PolicyBuilder& PolicyBuilder::AddFileAt(absl::string_view outside,
1527                                         absl::string_view inside, bool is_ro) {
1528   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1529   return AddFileAtIfNamespaced(outside, inside, is_ro);
1530 }
1531 
AddFileIfNamespaced(absl::string_view path,bool is_ro)1532 PolicyBuilder& PolicyBuilder::AddFileIfNamespaced(absl::string_view path,
1533                                                   bool is_ro) {
1534   return AddFileAtIfNamespaced(path, path, is_ro);
1535 }
1536 
AddFileAtIfNamespaced(absl::string_view outside,absl::string_view inside,bool is_ro)1537 PolicyBuilder& PolicyBuilder::AddFileAtIfNamespaced(absl::string_view outside,
1538                                                     absl::string_view inside,
1539                                                     bool is_ro) {
1540   auto valid_outside = ValidatePath(outside);
1541   if (!valid_outside.ok()) {
1542     SetError(valid_outside.status());
1543     return *this;
1544   }
1545 
1546   if (absl::StartsWith(*valid_outside, "/proc/self") &&
1547       *valid_outside != "/proc/self/cpuset") {
1548     SetError(absl::InvalidArgumentError(
1549         absl::StrCat("Cannot add /proc/self mounts, you need to mount the "
1550                      "whole /proc instead. You tried to mount ",
1551                      outside)));
1552     return *this;
1553   }
1554 
1555   if (!is_ro && IsOnReadOnlyDev(*valid_outside)) {
1556     SetError(absl::FailedPreconditionError(
1557         absl::StrCat("Cannot add ", outside,
1558                      " as read-write as it's on a read-only device")));
1559     return *this;
1560   }
1561 
1562   if (auto status = mounts_.AddFileAt(*valid_outside, inside, is_ro);
1563       !status.ok()) {
1564     SetError(
1565         absl::InternalError(absl::StrCat("Could not add file ", outside, " => ",
1566                                          inside, ": ", status.message())));
1567   }
1568   return *this;
1569 }
1570 
AddLibrariesForBinary(absl::string_view path,absl::string_view ld_library_path)1571 PolicyBuilder& PolicyBuilder::AddLibrariesForBinary(
1572     absl::string_view path, absl::string_view ld_library_path) {
1573   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1574 
1575   auto valid_path = ValidatePath(path, /*allow_relative_path=*/true);
1576   if (!valid_path.ok()) {
1577     SetError(valid_path.status());
1578     return *this;
1579   }
1580 
1581   if (auto status = mounts_.AddMappingsForBinary(*valid_path, ld_library_path);
1582       !status.ok()) {
1583     SetError(absl::InternalError(absl::StrCat(
1584         "Could not add libraries for ", *valid_path, ": ", status.message())));
1585   }
1586   return *this;
1587 }
1588 
AddLibrariesForBinary(int fd,absl::string_view ld_library_path)1589 PolicyBuilder& PolicyBuilder::AddLibrariesForBinary(
1590     int fd, absl::string_view ld_library_path) {
1591   return AddLibrariesForBinary(absl::StrCat("/proc/self/fd/", fd),
1592                                ld_library_path);
1593 }
1594 
AddDirectory(absl::string_view path,bool is_ro)1595 PolicyBuilder& PolicyBuilder::AddDirectory(absl::string_view path, bool is_ro) {
1596   return AddDirectoryAt(path, path, is_ro);
1597 }
1598 
AddDirectoryAt(absl::string_view outside,absl::string_view inside,bool is_ro)1599 PolicyBuilder& PolicyBuilder::AddDirectoryAt(absl::string_view outside,
1600                                              absl::string_view inside,
1601                                              bool is_ro) {
1602   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1603   return AddDirectoryAtIfNamespaced(outside, inside, is_ro);
1604 }
1605 
AddDirectoryIfNamespaced(absl::string_view path,bool is_ro)1606 PolicyBuilder& PolicyBuilder::AddDirectoryIfNamespaced(absl::string_view path,
1607                                                        bool is_ro) {
1608   return AddDirectoryAtIfNamespaced(path, path, is_ro);
1609 }
1610 
AddDirectoryAtIfNamespaced(absl::string_view outside,absl::string_view inside,bool is_ro)1611 PolicyBuilder& PolicyBuilder::AddDirectoryAtIfNamespaced(
1612     absl::string_view outside, absl::string_view inside, bool is_ro) {
1613   auto valid_outside = ValidatePath(outside);
1614   if (!valid_outside.ok()) {
1615     SetError(valid_outside.status());
1616     return *this;
1617   }
1618 
1619   if (absl::StartsWith(*valid_outside, "/proc/self")) {
1620     SetError(absl::InvalidArgumentError(
1621         absl::StrCat("Cannot add /proc/self mounts, you need to mount the "
1622                      "whole /proc instead. You tried to mount ",
1623                      outside)));
1624     return *this;
1625   }
1626 
1627   if (!is_ro && IsOnReadOnlyDev(*valid_outside)) {
1628     SetError(absl::FailedPreconditionError(
1629         absl::StrCat("Cannot add ", outside,
1630                      " as read-write as it's on a read-only device")));
1631     return *this;
1632   }
1633 
1634   if (absl::Status status =
1635           mounts_.AddDirectoryAt(*valid_outside, inside, is_ro);
1636       !status.ok()) {
1637     SetError(absl::InternalError(absl::StrCat("Could not add directory ",
1638                                               outside, " => ", inside, ": ",
1639                                               status.message())));
1640     return *this;
1641   }
1642   return *this;
1643 }
1644 
AddTmpfs(absl::string_view inside,size_t size)1645 PolicyBuilder& PolicyBuilder::AddTmpfs(absl::string_view inside, size_t size) {
1646   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1647 
1648   if (auto status = mounts_.AddTmpfs(inside, size); !status.ok()) {
1649     SetError(absl::InternalError(absl::StrCat("Could not mount tmpfs ", inside,
1650                                               ": ", status.message())));
1651   }
1652   return *this;
1653 }
1654 
1655 // Use Allow(sandbox2::UnrestrictedNetworking()) instead.
AllowUnrestrictedNetworking()1656 PolicyBuilder& PolicyBuilder::AllowUnrestrictedNetworking() {
1657   return Allow(UnrestrictedNetworking());
1658 }
1659 
UseForkServerSharedNetNs()1660 PolicyBuilder& PolicyBuilder::UseForkServerSharedNetNs() {
1661   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1662 
1663   if (netns_mode_ != NETNS_MODE_UNSPECIFIED) {
1664     SetError(absl::FailedPreconditionError(absl::StrCat(
1665         "Incompatible with other network namespaces modes. A sandbox can have "
1666         "only one network namespace mode. Attempted to configure: ",
1667         NetNsMode_Name(netns_mode_))));
1668     return *this;
1669   }
1670 
1671   netns_mode_ = NETNS_MODE_SHARED_PER_FORKSERVER;
1672   return *this;
1673 }
1674 
SetHostname(absl::string_view hostname)1675 PolicyBuilder& PolicyBuilder::SetHostname(absl::string_view hostname) {
1676   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1677   hostname_ = std::string(hostname);
1678 
1679   return *this;
1680 }
1681 
CollectStacktracesOnViolation(bool enable)1682 PolicyBuilder& PolicyBuilder::CollectStacktracesOnViolation(bool enable) {
1683   collect_stacktrace_on_violation_ = enable;
1684   return *this;
1685 }
1686 
CollectStacktracesOnSignal(bool enable)1687 PolicyBuilder& PolicyBuilder::CollectStacktracesOnSignal(bool enable) {
1688   collect_stacktrace_on_signal_ = enable;
1689   return *this;
1690 }
1691 
CollectStacktracesOnTimeout(bool enable)1692 PolicyBuilder& PolicyBuilder::CollectStacktracesOnTimeout(bool enable) {
1693   collect_stacktrace_on_timeout_ = enable;
1694   return *this;
1695 }
1696 
CollectStacktracesOnKill(bool enable)1697 PolicyBuilder& PolicyBuilder::CollectStacktracesOnKill(bool enable) {
1698   collect_stacktrace_on_kill_ = enable;
1699   return *this;
1700 }
1701 
CollectStacktracesOnExit(bool enable)1702 PolicyBuilder& PolicyBuilder::CollectStacktracesOnExit(bool enable) {
1703   collect_stacktrace_on_exit_ = enable;
1704   return *this;
1705 }
1706 
AddNetworkProxyPolicy()1707 PolicyBuilder& PolicyBuilder::AddNetworkProxyPolicy() {
1708   if (allowed_hosts_) {
1709     SetError(absl::FailedPreconditionError(
1710         "AddNetworkProxyPolicy or AddNetworkProxyHandlerPolicy can be called "
1711         "at most once"));
1712     return *this;
1713   }
1714 
1715   allowed_hosts_ = AllowedHosts();
1716 
1717   AllowSafeFcntl();
1718   AllowFutexOp(FUTEX_WAKE);
1719   AllowFutexOp(FUTEX_WAIT);
1720   AllowFutexOp(FUTEX_WAIT_BITSET);
1721   AllowDup();
1722   AllowSyscalls({
1723       __NR_recvmsg,
1724       __NR_close,
1725       __NR_gettid,
1726   });
1727   AddPolicyOnSyscall(__NR_socket, {
1728                                       ARG_32(0),
1729                                       JEQ32(AF_INET, ALLOW),
1730                                       JEQ32(AF_INET6, ALLOW),
1731                                   });
1732   AddPolicyOnSyscall(__NR_getsockopt,
1733                      [](bpf_labels& labels) -> std::vector<sock_filter> {
1734                        return {
1735                            ARG_32(1),
1736                            JNE32(SOL_SOCKET, JUMP(&labels, getsockopt_end)),
1737                            ARG_32(2),
1738                            JEQ32(SO_TYPE, ALLOW),
1739                            LABEL(&labels, getsockopt_end),
1740                        };
1741                      });
1742 #ifdef SAPI_PPC64_LE
1743   AddPolicyOnSyscall(__NR_socketcall, {
1744                                           ARG_32(0),
1745                                           JEQ32(SYS_SOCKET, ALLOW),
1746                                           JEQ32(SYS_GETSOCKOPT, ALLOW),
1747                                           JEQ32(SYS_RECVMSG, ALLOW),
1748                                       });
1749 #endif
1750   return *this;
1751 }
1752 
AddNetworkProxyHandlerPolicy()1753 PolicyBuilder& PolicyBuilder::AddNetworkProxyHandlerPolicy() {
1754   AddNetworkProxyPolicy();
1755   AllowSyscall(__NR_rt_sigreturn);
1756 
1757   AddPolicyOnSyscall(__NR_rt_sigaction, {
1758                                             ARG_32(0),
1759                                             JEQ32(SIGSYS, ALLOW),
1760                                         });
1761 
1762   AddPolicyOnSyscall(__NR_rt_sigprocmask, {
1763                                               ARG_32(0),
1764                                               JEQ32(SIG_UNBLOCK, ALLOW),
1765                                           });
1766 
1767   AddPolicyOnSyscall(__NR_connect, {TRAP(0)});
1768 #ifdef SAPI_PPC64_LE
1769   AddPolicyOnSyscall(__NR_socketcall, {
1770                                           ARG_32(0),
1771                                           JEQ32(SYS_CONNECT, TRAP(0)),
1772                                       });
1773 #endif
1774   return *this;
1775 }
1776 
TrapPtrace()1777 PolicyBuilder& PolicyBuilder::TrapPtrace() {
1778   if (handled_syscalls_.insert(__NR_ptrace).second) {
1779     user_policy_.insert(user_policy_.end(), {SYSCALL(__NR_ptrace, TRAP(0))});
1780     user_policy_handles_ptrace_ = true;
1781   }
1782   return *this;
1783 }
1784 
SetRootWritable()1785 PolicyBuilder& PolicyBuilder::SetRootWritable() {
1786   EnableNamespaces();  // NOLINT(clang-diagnostic-deprecated-declarations)
1787   mounts_.SetRootWritable();
1788 
1789   return *this;
1790 }
1791 
AllowIPv4(const std::string & ip_and_mask,uint32_t port)1792 PolicyBuilder& PolicyBuilder::AllowIPv4(const std::string& ip_and_mask,
1793                                         uint32_t port) {
1794   if (!allowed_hosts_) {
1795     SetError(absl::FailedPreconditionError(
1796         "AddNetworkProxyPolicy or AddNetworkProxyHandlerPolicy must be called "
1797         "before adding IP rules"));
1798     return *this;
1799   }
1800 
1801   absl::Status status = allowed_hosts_->AllowIPv4(ip_and_mask, port);
1802   if (!status.ok()) {
1803     SetError(status);
1804   }
1805   return *this;
1806 }
1807 
AllowIPv6(const std::string & ip_and_mask,uint32_t port)1808 PolicyBuilder& PolicyBuilder::AllowIPv6(const std::string& ip_and_mask,
1809                                         uint32_t port) {
1810   if (!allowed_hosts_) {
1811     SetError(absl::FailedPreconditionError(
1812         "AddNetworkProxyPolicy or AddNetworkProxyHandlerPolicy must be called "
1813         "before adding IP rules"));
1814     return *this;
1815   }
1816 
1817   absl::Status status = allowed_hosts_->AllowIPv6(ip_and_mask, port);
1818   if (!status.ok()) {
1819     SetError(status);
1820   }
1821   return *this;
1822 }
1823 
SetError(const absl::Status & status)1824 PolicyBuilder& PolicyBuilder::SetError(const absl::Status& status) {
1825   LOG(ERROR) << status;
1826   last_status_ = status;
1827   return *this;
1828 }
1829 
AnchorPathAbsolute(absl::string_view relative_path,absl::string_view base)1830 std::string PolicyBuilder::AnchorPathAbsolute(absl::string_view relative_path,
1831                                               absl::string_view base) {
1832   if (relative_path.empty()) {
1833     LOG(ERROR) << "Passed relative_path is empty";
1834     return "";
1835   }
1836 
1837   if (file::IsAbsolutePath(relative_path)) {
1838     VLOG(3) << "Nothing to do, relative_path is absolute";
1839     return std::string(relative_path);
1840   }
1841 
1842   std::string clean_path = file::CleanPath(relative_path);
1843   if (absl::StartsWith(clean_path, "../") || clean_path == "..") {
1844     LOG(ERROR)
1845         << "Anchored path would be outside of base because relative_path: '"
1846         << relative_path << "' starts with '..'";
1847     return "";
1848   }
1849 
1850   if (file::IsAbsolutePath(base)) {
1851     return file::CleanPath(file::JoinPath(base, clean_path));
1852   }
1853 
1854   std::string cwd = fileops::GetCWD();
1855   if (cwd.empty()) {
1856     LOG(ERROR) << "Failed to get current working directory";
1857     return "";
1858   }
1859 
1860   if (base.empty()) {
1861     VLOG(1) << "Using current working directory as base is empty";
1862     // CWD is guaranteed to exist and clean_path is guaranteed to not start with
1863     // '..'.
1864     return file::CleanPath(file::JoinPath(cwd, clean_path));
1865   }
1866 
1867   return file::CleanPath(file::JoinPath(cwd, base, clean_path));
1868 }
1869 
1870 }  // namespace sandbox2
1871