1// Program explore is evolved from the code discussed in more depth 2// here: 3// 4// https://github.com/golang/go/issues/3405 5// 6// The code here demonstrates that while PR_SET_NO_NEW_PRIVS only 7// applies to the calling thread, since 8// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=103502a35cfce0710909da874f092cb44823ca03 9// the seccomp filter application forces the setting to be mirrored on 10// all the threads of a process. 11// 12// Based on the command line options, we can manipulate the program to 13// behave in various ways. Example command lines: 14// 15// sudo ./explore 16// sudo ./explore --kill=false 17// sudo ./explore --kill=false --errno=0 18// 19// Supported Go toolchains are after go1.10. Those prior to go1.15 20// require this environment variable to be set to build successfully: 21// 22// export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*" 23// 24// Go toolchains go1.16+ can be compiled CGO_ENABLED=0 too, 25// demonstrating native nocgo support for seccomp features. 26package main 27 28import ( 29 "flag" 30 "fmt" 31 "log" 32 "runtime" 33 "syscall" 34 "time" 35 "unsafe" 36 37 "kernel.org/pub/linux/libs/security/libcap/psx" 38) 39 40var ( 41 withPSX = flag.Bool("psx", false, "use the psx mechanism to invoke prctl syscall") 42 delays = flag.Bool("delays", false, "use this to pause the program at various places") 43 kill = flag.Bool("kill", true, "kill the process if setuid attempted") 44 errno = flag.Int("errno", int(syscall.ENOTSUP), "if kill is false, block syscall and return this errno") 45) 46 47const ( 48 prSetNoNewPrivs = 38 49 50 sysSeccomp = 317 // x86_64 syscall number 51 seccompSetModeFilter = 1 // uses user-supplied filter. 52 seccompFilterFlagTsync = (1 << 0) // mirror filtering on all threads. 53 seccompRetErrno = 0x00050000 // returns an errno 54 seccompRetData = 0x0000ffff // mask for RET data payload (ex. errno) 55 seccompRetKillProcess = 0x80000000 // kill the whole process immediately 56 seccompRetTrap = 0x00030000 // disallow and force a SIGSYS 57 seccompRetAllow = 0x7fff0000 58 59 bpfLd = 0x00 60 bpfJmp = 0x05 61 bpfRet = 0x06 62 63 bpfW = 0x00 64 65 bpfAbs = 0x20 66 bpfJeq = 0x10 67 68 bpfK = 0x00 69 70 auditArchX86_64 = 3221225534 // HACK: I don't understand this value 71 archNr = auditArchX86_64 72 73 syscallNr = 0 74) 75 76// SockFilter is a single filter block. 77type SockFilter struct { 78 // Code is the filter code instruction. 79 Code uint16 80 // Jt is the target for a true result from the code execution. 81 Jt uint8 82 // Jf is the target for a false result from the code execution. 83 Jf uint8 84 // K is a generic multiuse field 85 K uint32 86} 87 88// SockFProg is a 89type SockFProg struct { 90 // Len is the number of contiguous SockFilter blocks that can 91 // be found at *Filter. 92 Len uint16 93 // Filter is the address of the first SockFilter block of a 94 // program sequence. 95 Filter *SockFilter 96} 97 98// SockFilterSlice is a subprogram filter. 99type SockFilterSlice []SockFilter 100 101func bpfStmt(code uint16, k uint32) SockFilter { 102 return SockFilter{code, 0, 0, k} 103} 104 105func bpfJump(code uint16, k uint32, jt uint8, jf uint8) SockFilter { 106 return SockFilter{code, jt, jf, k} 107} 108 109func validateArchitecture() []SockFilter { 110 return []SockFilter{ 111 bpfStmt(bpfLd+bpfW+bpfAbs, 4), // HACK: I don't understand this 4. 112 bpfJump(bpfJmp+bpfJeq+bpfK, archNr, 1, 0), 113 bpfStmt(bpfRet+bpfK, seccompRetKillProcess), 114 } 115} 116 117func ExamineSyscall() []SockFilter { 118 return []SockFilter{ 119 bpfStmt(bpfLd+bpfW+bpfAbs, syscallNr), 120 } 121} 122 123func AllowSyscall(syscallNum uint32) []SockFilter { 124 return []SockFilter{ 125 bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1), 126 bpfStmt(bpfRet+bpfK, seccompRetAllow), 127 } 128} 129 130func DisallowSyscall(syscallNum, errno uint32) []SockFilter { 131 return []SockFilter{ 132 bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1), 133 bpfStmt(bpfRet+bpfK, seccompRetErrno|(errno&seccompRetData)), 134 } 135} 136 137func KillProcess() []SockFilter { 138 return []SockFilter{ 139 bpfStmt(bpfRet+bpfK, seccompRetKillProcess), 140 } 141} 142 143func NotifyProcessAndDie() []SockFilter { 144 return []SockFilter{ 145 bpfStmt(bpfRet+bpfK, seccompRetTrap), 146 } 147} 148 149func TrapOnSyscall(syscallNum uint32) []SockFilter { 150 return []SockFilter{ 151 bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1), 152 bpfStmt(bpfRet+bpfK, seccompRetTrap), 153 } 154} 155 156func AllGood() []SockFilter { 157 return []SockFilter{ 158 bpfStmt(bpfRet+bpfK, seccompRetAllow), 159 } 160} 161 162// prctl executes the prctl - unless the --psx commandline argument is 163// used, this is on a single thread. 164//go:uintptrescapes 165func prctl(option, arg1, arg2, arg3, arg4, arg5 uintptr) error { 166 var e syscall.Errno 167 if *withPSX { 168 _, _, e = psx.Syscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5) 169 } else { 170 _, _, e = syscall.RawSyscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5) 171 } 172 if e != 0 { 173 return e 174 } 175 if *delays { 176 fmt.Println("prctl'd - check now") 177 time.Sleep(1 * time.Minute) 178 } 179 return nil 180} 181 182// SeccompSetModeFilter is our wrapper for performing our seccomp system call. 183//go:uintptrescapes 184func SeccompSetModeFilter(prog *SockFProg) error { 185 if _, _, e := syscall.RawSyscall(sysSeccomp, seccompSetModeFilter, seccompFilterFlagTsync, uintptr(unsafe.Pointer(prog))); e != 0 { 186 return e 187 } 188 return nil 189} 190 191var empty func() 192 193func lockProcessThread(pick bool) { 194 // Make sure we are 195 pid := uintptr(syscall.Getpid()) 196 runtime.LockOSThread() 197 for { 198 tid, _, _ := syscall.RawSyscall(syscall.SYS_GETTID, 0, 0, 0) 199 if (tid == pid) == pick { 200 fmt.Println("validated TID:", tid, "== PID:", pid, "is", pick) 201 break 202 } 203 runtime.UnlockOSThread() 204 go func() { 205 time.Sleep(1 * time.Microsecond) 206 }() 207 runtime.Gosched() 208 runtime.LockOSThread() 209 } 210} 211 212// applyPolicy uploads the program sequence. 213func applyPolicy(prog *SockFProg) { 214 // Without PSX we can't guarantee the thread we execute the 215 // seccomp call on will be the same one that we disabled new 216 // privs on. With PSX, the disabling of new privs is mirrored 217 // on all threads. 218 if !*withPSX { 219 lockProcessThread(false) 220 defer runtime.UnlockOSThread() 221 } 222 223 // This is required to load a filter without privilege. 224 if err := prctl(prSetNoNewPrivs, 1, 0, 0, 0, 0); err != nil { 225 log.Fatalf("Prctl(PR_SET_NO_NEW_PRIVS): %v", err) 226 } 227 228 fmt.Println("Applying syscall policy...") 229 if err := SeccompSetModeFilter(prog); err != nil { 230 log.Fatalf("seccomp_set_mode_filter: %v", err) 231 } 232 fmt.Println("...Policy applied") 233} 234 235func main() { 236 flag.Parse() 237 238 if *delays { 239 fmt.Println("check first", syscall.Getpid()) 240 time.Sleep(60 * time.Second) 241 } 242 243 var filter []SockFilter 244 filter = append(filter, validateArchitecture()...) 245 246 // Grab the system call number. 247 filter = append(filter, ExamineSyscall()...) 248 249 // List disallowed syscalls. 250 for _, x := range []uint32{ 251 syscall.SYS_SETUID, 252 } { 253 if *kill { 254 filter = append(filter, TrapOnSyscall(x)...) 255 } else { 256 filter = append(filter, DisallowSyscall(x, uint32(*errno))...) 257 } 258 } 259 260 filter = append(filter, AllGood()...) 261 262 prog := &SockFProg{ 263 Len: uint16(len(filter)), 264 Filter: &filter[0], 265 } 266 267 applyPolicy(prog) 268 269 // Ensure we are running on the TID=PID. 270 lockProcessThread(true) 271 272 log.Print("Now it is time to try to run something privileged...") 273 if _, _, e := syscall.RawSyscall(syscall.SYS_SETUID, 1, 0, 0); e != 0 { 274 log.Fatalf("setuid failed with an error: %v", e) 275 } 276 log.Print("Looked like that worked, but it really didn't: uid == ", syscall.Getuid(), " != 1") 277} 278