• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Program explore is evolved from the code discussed in more depth
2// here:
3//
4//   https://github.com/golang/go/issues/3405
5//
6// The code here demonstrates that while PR_SET_NO_NEW_PRIVS only
7// applies to the calling thread, since
8// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=103502a35cfce0710909da874f092cb44823ca03
9// the seccomp filter application forces the setting to be mirrored on
10// all the threads of a process.
11//
12// Based on the command line options, we can manipulate the program to
13// behave in various ways. Example command lines:
14//
15//   sudo ./explore
16//   sudo ./explore --kill=false
17//   sudo ./explore --kill=false --errno=0
18//
19// Supported Go toolchains are after go1.10. Those prior to go1.15
20// require this environment variable to be set to build successfully:
21//
22//   export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*"
23//
24// Go toolchains go1.16+ can be compiled CGO_ENABLED=0 too,
25// demonstrating native nocgo support for seccomp features.
26package main
27
28import (
29	"flag"
30	"fmt"
31	"log"
32	"runtime"
33	"syscall"
34	"time"
35	"unsafe"
36
37	"kernel.org/pub/linux/libs/security/libcap/psx"
38)
39
40var (
41	withPSX = flag.Bool("psx", false, "use the psx mechanism to invoke prctl syscall")
42	delays  = flag.Bool("delays", false, "use this to pause the program at various places")
43	kill    = flag.Bool("kill", true, "kill the process if setuid attempted")
44	errno   = flag.Int("errno", int(syscall.ENOTSUP), "if kill is false, block syscall and return this errno")
45)
46
47const (
48	prSetNoNewPrivs = 38
49
50	sysSeccomp             = 317        // x86_64 syscall number
51	seccompSetModeFilter   = 1          // uses user-supplied filter.
52	seccompFilterFlagTsync = (1 << 0)   // mirror filtering on all threads.
53	seccompRetErrno        = 0x00050000 // returns an errno
54	seccompRetData         = 0x0000ffff // mask for RET data payload (ex. errno)
55	seccompRetKillProcess  = 0x80000000 // kill the whole process immediately
56	seccompRetTrap         = 0x00030000 // disallow and force a SIGSYS
57	seccompRetAllow        = 0x7fff0000
58
59	bpfLd  = 0x00
60	bpfJmp = 0x05
61	bpfRet = 0x06
62
63	bpfW = 0x00
64
65	bpfAbs = 0x20
66	bpfJeq = 0x10
67
68	bpfK = 0x00
69
70	auditArchX86_64 = 3221225534 // HACK: I don't understand this value
71	archNr          = auditArchX86_64
72
73	syscallNr = 0
74)
75
76// SockFilter is a single filter block.
77type SockFilter struct {
78	// Code is the filter code instruction.
79	Code uint16
80	// Jt is the target for a true result from the code execution.
81	Jt uint8
82	// Jf is the target for a false result from the code execution.
83	Jf uint8
84	// K is a generic multiuse field
85	K uint32
86}
87
88// SockFProg is a
89type SockFProg struct {
90	// Len is the number of contiguous SockFilter blocks that can
91	// be found at *Filter.
92	Len uint16
93	// Filter is the address of the first SockFilter block of a
94	// program sequence.
95	Filter *SockFilter
96}
97
98// SockFilterSlice is a subprogram filter.
99type SockFilterSlice []SockFilter
100
101func bpfStmt(code uint16, k uint32) SockFilter {
102	return SockFilter{code, 0, 0, k}
103}
104
105func bpfJump(code uint16, k uint32, jt uint8, jf uint8) SockFilter {
106	return SockFilter{code, jt, jf, k}
107}
108
109func validateArchitecture() []SockFilter {
110	return []SockFilter{
111		bpfStmt(bpfLd+bpfW+bpfAbs, 4), // HACK: I don't understand this 4.
112		bpfJump(bpfJmp+bpfJeq+bpfK, archNr, 1, 0),
113		bpfStmt(bpfRet+bpfK, seccompRetKillProcess),
114	}
115}
116
117func ExamineSyscall() []SockFilter {
118	return []SockFilter{
119		bpfStmt(bpfLd+bpfW+bpfAbs, syscallNr),
120	}
121}
122
123func AllowSyscall(syscallNum uint32) []SockFilter {
124	return []SockFilter{
125		bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
126		bpfStmt(bpfRet+bpfK, seccompRetAllow),
127	}
128}
129
130func DisallowSyscall(syscallNum, errno uint32) []SockFilter {
131	return []SockFilter{
132		bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
133		bpfStmt(bpfRet+bpfK, seccompRetErrno|(errno&seccompRetData)),
134	}
135}
136
137func KillProcess() []SockFilter {
138	return []SockFilter{
139		bpfStmt(bpfRet+bpfK, seccompRetKillProcess),
140	}
141}
142
143func NotifyProcessAndDie() []SockFilter {
144	return []SockFilter{
145		bpfStmt(bpfRet+bpfK, seccompRetTrap),
146	}
147}
148
149func TrapOnSyscall(syscallNum uint32) []SockFilter {
150	return []SockFilter{
151		bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
152		bpfStmt(bpfRet+bpfK, seccompRetTrap),
153	}
154}
155
156func AllGood() []SockFilter {
157	return []SockFilter{
158		bpfStmt(bpfRet+bpfK, seccompRetAllow),
159	}
160}
161
162// prctl executes the prctl - unless the --psx commandline argument is
163// used, this is on a single thread.
164//go:uintptrescapes
165func prctl(option, arg1, arg2, arg3, arg4, arg5 uintptr) error {
166	var e syscall.Errno
167	if *withPSX {
168		_, _, e = psx.Syscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5)
169	} else {
170		_, _, e = syscall.RawSyscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5)
171	}
172	if e != 0 {
173		return e
174	}
175	if *delays {
176		fmt.Println("prctl'd - check now")
177		time.Sleep(1 * time.Minute)
178	}
179	return nil
180}
181
182// SeccompSetModeFilter is our wrapper for performing our seccomp system call.
183//go:uintptrescapes
184func SeccompSetModeFilter(prog *SockFProg) error {
185	if _, _, e := syscall.RawSyscall(sysSeccomp, seccompSetModeFilter, seccompFilterFlagTsync, uintptr(unsafe.Pointer(prog))); e != 0 {
186		return e
187	}
188	return nil
189}
190
191var empty func()
192
193func lockProcessThread(pick bool) {
194	// Make sure we are
195	pid := uintptr(syscall.Getpid())
196	runtime.LockOSThread()
197	for {
198		tid, _, _ := syscall.RawSyscall(syscall.SYS_GETTID, 0, 0, 0)
199		if (tid == pid) == pick {
200			fmt.Println("validated TID:", tid, "== PID:", pid, "is", pick)
201			break
202		}
203		runtime.UnlockOSThread()
204		go func() {
205			time.Sleep(1 * time.Microsecond)
206		}()
207		runtime.Gosched()
208		runtime.LockOSThread()
209	}
210}
211
212// applyPolicy uploads the program sequence.
213func applyPolicy(prog *SockFProg) {
214	// Without PSX we can't guarantee the thread we execute the
215	// seccomp call on will be the same one that we disabled new
216	// privs on. With PSX, the disabling of new privs is mirrored
217	// on all threads.
218	if !*withPSX {
219		lockProcessThread(false)
220		defer runtime.UnlockOSThread()
221	}
222
223	// This is required to load a filter without privilege.
224	if err := prctl(prSetNoNewPrivs, 1, 0, 0, 0, 0); err != nil {
225		log.Fatalf("Prctl(PR_SET_NO_NEW_PRIVS): %v", err)
226	}
227
228	fmt.Println("Applying syscall policy...")
229	if err := SeccompSetModeFilter(prog); err != nil {
230		log.Fatalf("seccomp_set_mode_filter: %v", err)
231	}
232	fmt.Println("...Policy applied")
233}
234
235func main() {
236	flag.Parse()
237
238	if *delays {
239		fmt.Println("check first", syscall.Getpid())
240		time.Sleep(60 * time.Second)
241	}
242
243	var filter []SockFilter
244	filter = append(filter, validateArchitecture()...)
245
246	// Grab the system call number.
247	filter = append(filter, ExamineSyscall()...)
248
249	// List disallowed syscalls.
250	for _, x := range []uint32{
251		syscall.SYS_SETUID,
252	} {
253		if *kill {
254			filter = append(filter, TrapOnSyscall(x)...)
255		} else {
256			filter = append(filter, DisallowSyscall(x, uint32(*errno))...)
257		}
258	}
259
260	filter = append(filter, AllGood()...)
261
262	prog := &SockFProg{
263		Len:    uint16(len(filter)),
264		Filter: &filter[0],
265	}
266
267	applyPolicy(prog)
268
269	// Ensure we are running on the TID=PID.
270	lockProcessThread(true)
271
272	log.Print("Now it is time to try to run something privileged...")
273	if _, _, e := syscall.RawSyscall(syscall.SYS_SETUID, 1, 0, 0); e != 0 {
274		log.Fatalf("setuid failed with an error: %v", e)
275	}
276	log.Print("Looked like that worked, but it really didn't: uid == ", syscall.Getuid(), " != 1")
277}
278