• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Support for pidfd was added during the course of a few Linux releases:
6//  v5.1: pidfd_send_signal syscall;
7//  v5.2: CLONE_PIDFD flag for clone syscall;
8//  v5.3: pidfd_open syscall, clone3 syscall;
9//  v5.4: P_PIDFD idtype support for waitid syscall;
10//  v5.6: pidfd_getfd syscall.
11//
12// N.B. Alternative Linux implementations may not follow this ordering. e.g.,
13// QEMU user mode 7.2 added pidfd_open, but CLONE_PIDFD was not added until
14// 8.0.
15
16package os
17
18import (
19	"errors"
20	"internal/syscall/unix"
21	"runtime"
22	"sync"
23	"syscall"
24	"unsafe"
25)
26
27// ensurePidfd initializes the PidFD field in sysAttr if it is not already set.
28// It returns the original or modified SysProcAttr struct and a flag indicating
29// whether the PidFD should be duplicated before using.
30func ensurePidfd(sysAttr *syscall.SysProcAttr) (*syscall.SysProcAttr, bool) {
31	if !pidfdWorks() {
32		return sysAttr, false
33	}
34
35	var pidfd int
36
37	if sysAttr == nil {
38		return &syscall.SysProcAttr{
39			PidFD: &pidfd,
40		}, false
41	}
42	if sysAttr.PidFD == nil {
43		newSys := *sysAttr // copy
44		newSys.PidFD = &pidfd
45		return &newSys, false
46	}
47
48	return sysAttr, true
49}
50
51// getPidfd returns the value of sysAttr.PidFD (or its duplicate if needDup is
52// set) and a flag indicating whether the value can be used.
53func getPidfd(sysAttr *syscall.SysProcAttr, needDup bool) (uintptr, bool) {
54	if !pidfdWorks() {
55		return 0, false
56	}
57
58	h := *sysAttr.PidFD
59	if needDup {
60		dupH, e := unix.Fcntl(h, syscall.F_DUPFD_CLOEXEC, 0)
61		if e != nil {
62			return 0, false
63		}
64		h = dupH
65	}
66	return uintptr(h), true
67}
68
69func pidfdFind(pid int) (uintptr, error) {
70	if !pidfdWorks() {
71		return 0, syscall.ENOSYS
72	}
73
74	h, err := unix.PidFDOpen(pid, 0)
75	if err != nil {
76		return 0, convertESRCH(err)
77	}
78	return h, nil
79}
80
81// _P_PIDFD is used as idtype argument to waitid syscall.
82const _P_PIDFD = 3
83
84func (p *Process) pidfdWait() (*ProcessState, error) {
85	// When pidfd is used, there is no wait/kill race (described in CL 23967)
86	// because the PID recycle issue doesn't exist (IOW, pidfd, unlike PID,
87	// is guaranteed to refer to one particular process). Thus, there is no
88	// need for the workaround (blockUntilWaitable + sigMu) from pidWait.
89	//
90	// We _do_ need to be careful about reuse of the pidfd FD number when
91	// closing the pidfd. See handle for more details.
92	handle, status := p.handleTransientAcquire()
93	switch status {
94	case statusDone:
95		// Process already completed Wait, or was not found by
96		// pidfdFind. Return ECHILD for consistency with what the wait
97		// syscall would return.
98		return nil, NewSyscallError("wait", syscall.ECHILD)
99	case statusReleased:
100		return nil, syscall.EINVAL
101	}
102	defer p.handleTransientRelease()
103
104	var (
105		info   unix.SiginfoChild
106		rusage syscall.Rusage
107		e      syscall.Errno
108	)
109	for {
110		_, _, e = syscall.Syscall6(syscall.SYS_WAITID, _P_PIDFD, handle, uintptr(unsafe.Pointer(&info)), syscall.WEXITED, uintptr(unsafe.Pointer(&rusage)), 0)
111		if e != syscall.EINTR {
112			break
113		}
114	}
115	if e != 0 {
116		return nil, NewSyscallError("waitid", e)
117	}
118	// Release the Process' handle reference, in addition to the reference
119	// we took above.
120	p.handlePersistentRelease(statusDone)
121	return &ProcessState{
122		pid:    int(info.Pid),
123		status: info.WaitStatus(),
124		rusage: &rusage,
125	}, nil
126}
127
128func (p *Process) pidfdSendSignal(s syscall.Signal) error {
129	handle, status := p.handleTransientAcquire()
130	switch status {
131	case statusDone:
132		return ErrProcessDone
133	case statusReleased:
134		return errors.New("os: process already released")
135	}
136	defer p.handleTransientRelease()
137
138	return convertESRCH(unix.PidFDSendSignal(handle, s))
139}
140
141func pidfdWorks() bool {
142	return checkPidfdOnce() == nil
143}
144
145var checkPidfdOnce = sync.OnceValue(checkPidfd)
146
147// checkPidfd checks whether all required pidfd-related syscalls work. This
148// consists of pidfd_open and pidfd_send_signal syscalls, waitid syscall with
149// idtype of P_PIDFD, and clone(CLONE_PIDFD).
150//
151// Reasons for non-working pidfd syscalls include an older kernel and an
152// execution environment in which the above system calls are restricted by
153// seccomp or a similar technology.
154func checkPidfd() error {
155	// In Android version < 12, pidfd-related system calls are not allowed
156	// by seccomp and trigger the SIGSYS signal. See issue #69065.
157	if runtime.GOOS == "android" {
158		ignoreSIGSYS()
159		defer restoreSIGSYS()
160	}
161
162	// Get a pidfd of the current process (opening of "/proc/self" won't
163	// work for waitid).
164	fd, err := unix.PidFDOpen(syscall.Getpid(), 0)
165	if err != nil {
166		return NewSyscallError("pidfd_open", err)
167	}
168	defer syscall.Close(int(fd))
169
170	// Check waitid(P_PIDFD) works.
171	for {
172		_, _, err = syscall.Syscall6(syscall.SYS_WAITID, _P_PIDFD, fd, 0, syscall.WEXITED, 0, 0)
173		if err != syscall.EINTR {
174			break
175		}
176	}
177	// Expect ECHILD from waitid since we're not our own parent.
178	if err != syscall.ECHILD {
179		return NewSyscallError("pidfd_wait", err)
180	}
181
182	// Check pidfd_send_signal works (should be able to send 0 to itself).
183	if err := unix.PidFDSendSignal(fd, 0); err != nil {
184		return NewSyscallError("pidfd_send_signal", err)
185	}
186
187	// Verify that clone(CLONE_PIDFD) works.
188	//
189	// This shouldn't be necessary since pidfd_open was added in Linux 5.3,
190	// after CLONE_PIDFD in Linux 5.2, but some alternative Linux
191	// implementations may not adhere to this ordering.
192	if err := checkClonePidfd(); err != nil {
193		return err
194	}
195
196	return nil
197}
198
199// Provided by syscall.
200//
201//go:linkname checkClonePidfd
202func checkClonePidfd() error
203
204// Provided by runtime.
205//
206//go:linkname ignoreSIGSYS
207func ignoreSIGSYS()
208
209//go:linkname restoreSIGSYS
210func restoreSIGSYS()
211