• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2021 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package syscall
6
7import (
8	"runtime"
9	"unsafe"
10)
11
12type SysProcAttr struct {
13	Chroot     string      // Chroot.
14	Credential *Credential // Credential.
15	Ptrace     bool        // Enable tracing.
16	Setsid     bool        // Create session.
17	// Setpgid sets the process group ID of the child to Pgid,
18	// or, if Pgid == 0, to the new child's process ID.
19	Setpgid bool
20	// Setctty sets the controlling terminal of the child to
21	// file descriptor Ctty. Ctty must be a descriptor number
22	// in the child process: an index into ProcAttr.Files.
23	// This is only meaningful if Setsid is true.
24	Setctty bool
25	Noctty  bool // Detach fd 0 from controlling terminal
26	Ctty    int  // Controlling TTY fd
27	// Foreground places the child process group in the foreground.
28	// This implies Setpgid. The Ctty field must be set to
29	// the descriptor of the controlling TTY.
30	// Unlike Setctty, in this case Ctty must be a descriptor
31	// number in the parent process.
32	Foreground bool
33	Pgid       int    // Child's process group ID if Setpgid.
34	Pdeathsig  Signal // Signal that the process will get when its parent dies (Linux and FreeBSD only)
35	Jail       int    // Jail to which the child process is attached (FreeBSD only).
36}
37
38const (
39	_P_PID = 0
40
41	_PROC_PDEATHSIG_CTL = 11
42)
43
44// Implemented in runtime package.
45func runtime_BeforeFork()
46func runtime_AfterFork()
47func runtime_AfterForkInChild()
48
49// Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child.
50// If a dup or exec fails, write the errno error to pipe.
51// (Pipe is close-on-exec so if exec succeeds, it will be closed.)
52// In the child, this function must not acquire any locks, because
53// they might have been locked at the time of the fork. This means
54// no rescheduling, no malloc calls, and no new stack segments.
55// For the same reason compiler does not race instrument it.
56// The calls to RawSyscall are okay because they are assembly
57// functions that do not grow the stack.
58//
59//go:norace
60func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) {
61	// Declare all variables at top in case any
62	// declarations require heap allocation (e.g., err1).
63	var (
64		r1              uintptr
65		err1            Errno
66		nextfd          int
67		i               int
68		pgrp            _C_int
69		cred            *Credential
70		ngroups, groups uintptr
71		upid            uintptr
72	)
73
74	rlim := origRlimitNofile.Load()
75
76	// Record parent PID so child can test if it has died.
77	ppid, _, _ := RawSyscall(SYS_GETPID, 0, 0, 0)
78
79	// guard against side effects of shuffling fds below.
80	// Make sure that nextfd is beyond any currently open files so
81	// that we can't run the risk of overwriting any of them.
82	fd := make([]int, len(attr.Files))
83	nextfd = len(attr.Files)
84	for i, ufd := range attr.Files {
85		if nextfd < int(ufd) {
86			nextfd = int(ufd)
87		}
88		fd[i] = int(ufd)
89	}
90	nextfd++
91
92	// About to call fork.
93	// No more allocation or calls of non-assembly functions.
94	runtime_BeforeFork()
95	r1, _, err1 = RawSyscall(SYS_FORK, 0, 0, 0)
96	if err1 != 0 {
97		runtime_AfterFork()
98		return 0, err1
99	}
100
101	if r1 != 0 {
102		// parent; return PID
103		runtime_AfterFork()
104		return int(r1), 0
105	}
106
107	// Fork succeeded, now in child.
108
109	// Attach to the given jail, if any. The system call also changes the
110	// process' root and working directories to the jail's path directory.
111	if sys.Jail > 0 {
112		_, _, err1 = RawSyscall(SYS_JAIL_ATTACH, uintptr(sys.Jail), 0, 0)
113		if err1 != 0 {
114			goto childerror
115		}
116	}
117
118	// Enable tracing if requested.
119	if sys.Ptrace {
120		_, _, err1 = RawSyscall(SYS_PTRACE, uintptr(PTRACE_TRACEME), 0, 0)
121		if err1 != 0 {
122			goto childerror
123		}
124	}
125
126	// Session ID
127	if sys.Setsid {
128		_, _, err1 = RawSyscall(SYS_SETSID, 0, 0, 0)
129		if err1 != 0 {
130			goto childerror
131		}
132	}
133
134	// Set process group
135	if sys.Setpgid || sys.Foreground {
136		// Place child in process group.
137		_, _, err1 = RawSyscall(SYS_SETPGID, 0, uintptr(sys.Pgid), 0)
138		if err1 != 0 {
139			goto childerror
140		}
141	}
142
143	if sys.Foreground {
144		// This should really be pid_t, however _C_int (aka int32) is
145		// generally equivalent.
146		pgrp = _C_int(sys.Pgid)
147		if pgrp == 0 {
148			r1, _, err1 = RawSyscall(SYS_GETPID, 0, 0, 0)
149			if err1 != 0 {
150				goto childerror
151			}
152
153			pgrp = _C_int(r1)
154		}
155
156		// Place process group in foreground.
157		_, _, err1 = RawSyscall(SYS_IOCTL, uintptr(sys.Ctty), uintptr(TIOCSPGRP), uintptr(unsafe.Pointer(&pgrp)))
158		if err1 != 0 {
159			goto childerror
160		}
161	}
162
163	// Restore the signal mask. We do this after TIOCSPGRP to avoid
164	// having the kernel send a SIGTTOU signal to the process group.
165	runtime_AfterForkInChild()
166
167	// Chroot
168	if chroot != nil {
169		_, _, err1 = RawSyscall(SYS_CHROOT, uintptr(unsafe.Pointer(chroot)), 0, 0)
170		if err1 != 0 {
171			goto childerror
172		}
173	}
174
175	// User and groups
176	if cred = sys.Credential; cred != nil {
177		ngroups = uintptr(len(cred.Groups))
178		groups = uintptr(0)
179		if ngroups > 0 {
180			groups = uintptr(unsafe.Pointer(&cred.Groups[0]))
181		}
182		if !cred.NoSetGroups {
183			_, _, err1 = RawSyscall(SYS_SETGROUPS, ngroups, groups, 0)
184			if err1 != 0 {
185				goto childerror
186			}
187		}
188		_, _, err1 = RawSyscall(SYS_SETGID, uintptr(cred.Gid), 0, 0)
189		if err1 != 0 {
190			goto childerror
191		}
192		_, _, err1 = RawSyscall(SYS_SETUID, uintptr(cred.Uid), 0, 0)
193		if err1 != 0 {
194			goto childerror
195		}
196	}
197
198	// Chdir
199	if dir != nil {
200		_, _, err1 = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0)
201		if err1 != 0 {
202			goto childerror
203		}
204	}
205
206	// Parent death signal
207	if sys.Pdeathsig != 0 {
208		switch runtime.GOARCH {
209		case "386", "arm":
210			_, _, err1 = RawSyscall6(SYS_PROCCTL, _P_PID, 0, 0, _PROC_PDEATHSIG_CTL, uintptr(unsafe.Pointer(&sys.Pdeathsig)), 0)
211		default:
212			_, _, err1 = RawSyscall6(SYS_PROCCTL, _P_PID, 0, _PROC_PDEATHSIG_CTL, uintptr(unsafe.Pointer(&sys.Pdeathsig)), 0, 0)
213		}
214		if err1 != 0 {
215			goto childerror
216		}
217
218		// Signal self if parent is already dead. This might cause a
219		// duplicate signal in rare cases, but it won't matter when
220		// using SIGKILL.
221		r1, _, _ = RawSyscall(SYS_GETPPID, 0, 0, 0)
222		if r1 != ppid {
223			upid, _, _ = RawSyscall(SYS_GETPID, 0, 0, 0)
224			_, _, err1 = RawSyscall(SYS_KILL, upid, uintptr(sys.Pdeathsig), 0)
225			if err1 != 0 {
226				goto childerror
227			}
228		}
229	}
230
231	// Pass 1: look for fd[i] < i and move those up above len(fd)
232	// so that pass 2 won't stomp on an fd it needs later.
233	if pipe < nextfd {
234		_, _, err1 = RawSyscall(SYS_FCNTL, uintptr(pipe), F_DUP2FD_CLOEXEC, uintptr(nextfd))
235		if err1 != 0 {
236			goto childerror
237		}
238		pipe = nextfd
239		nextfd++
240	}
241	for i = 0; i < len(fd); i++ {
242		if fd[i] >= 0 && fd[i] < i {
243			if nextfd == pipe { // don't stomp on pipe
244				nextfd++
245			}
246			_, _, err1 = RawSyscall(SYS_FCNTL, uintptr(fd[i]), F_DUP2FD_CLOEXEC, uintptr(nextfd))
247			if err1 != 0 {
248				goto childerror
249			}
250			fd[i] = nextfd
251			nextfd++
252		}
253	}
254
255	// Pass 2: dup fd[i] down onto i.
256	for i = 0; i < len(fd); i++ {
257		if fd[i] == -1 {
258			RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
259			continue
260		}
261		if fd[i] == i {
262			// dup2(i, i) won't clear close-on-exec flag on Linux,
263			// probably not elsewhere either.
264			_, _, err1 = RawSyscall(SYS_FCNTL, uintptr(fd[i]), F_SETFD, 0)
265			if err1 != 0 {
266				goto childerror
267			}
268			continue
269		}
270		// The new fd is created NOT close-on-exec,
271		// which is exactly what we want.
272		_, _, err1 = RawSyscall(SYS_DUP2, uintptr(fd[i]), uintptr(i), 0)
273		if err1 != 0 {
274			goto childerror
275		}
276	}
277
278	// By convention, we don't close-on-exec the fds we are
279	// started with, so if len(fd) < 3, close 0, 1, 2 as needed.
280	// Programs that know they inherit fds >= 3 will need
281	// to set them close-on-exec.
282	for i = len(fd); i < 3; i++ {
283		RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
284	}
285
286	// Detach fd 0 from tty
287	if sys.Noctty {
288		_, _, err1 = RawSyscall(SYS_IOCTL, 0, uintptr(TIOCNOTTY), 0)
289		if err1 != 0 {
290			goto childerror
291		}
292	}
293
294	// Set the controlling TTY to Ctty
295	if sys.Setctty {
296		_, _, err1 = RawSyscall(SYS_IOCTL, uintptr(sys.Ctty), uintptr(TIOCSCTTY), 0)
297		if err1 != 0 {
298			goto childerror
299		}
300	}
301
302	// Restore original rlimit.
303	if rlim != nil {
304		RawSyscall(SYS_SETRLIMIT, uintptr(RLIMIT_NOFILE), uintptr(unsafe.Pointer(rlim)), 0)
305	}
306
307	// Time to exec.
308	_, _, err1 = RawSyscall(SYS_EXECVE,
309		uintptr(unsafe.Pointer(argv0)),
310		uintptr(unsafe.Pointer(&argv[0])),
311		uintptr(unsafe.Pointer(&envv[0])))
312
313childerror:
314	// send error code on pipe
315	RawSyscall(SYS_WRITE, uintptr(pipe), uintptr(unsafe.Pointer(&err1)), unsafe.Sizeof(err1))
316	for {
317		RawSyscall(SYS_EXIT, 253, 0, 0)
318	}
319}
320
321// forkAndExecFailureCleanup cleans up after an exec failure.
322func forkAndExecFailureCleanup(attr *ProcAttr, sys *SysProcAttr) {
323	// Nothing to do.
324}
325