1// Copyright 2023 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Support for pidfd was added during the course of a few Linux releases: 6// v5.1: pidfd_send_signal syscall; 7// v5.2: CLONE_PIDFD flag for clone syscall; 8// v5.3: pidfd_open syscall, clone3 syscall; 9// v5.4: P_PIDFD idtype support for waitid syscall; 10// v5.6: pidfd_getfd syscall. 11// 12// N.B. Alternative Linux implementations may not follow this ordering. e.g., 13// QEMU user mode 7.2 added pidfd_open, but CLONE_PIDFD was not added until 14// 8.0. 15 16package os 17 18import ( 19 "errors" 20 "internal/syscall/unix" 21 "runtime" 22 "sync" 23 "syscall" 24 "unsafe" 25) 26 27// ensurePidfd initializes the PidFD field in sysAttr if it is not already set. 28// It returns the original or modified SysProcAttr struct and a flag indicating 29// whether the PidFD should be duplicated before using. 30func ensurePidfd(sysAttr *syscall.SysProcAttr) (*syscall.SysProcAttr, bool) { 31 if !pidfdWorks() { 32 return sysAttr, false 33 } 34 35 var pidfd int 36 37 if sysAttr == nil { 38 return &syscall.SysProcAttr{ 39 PidFD: &pidfd, 40 }, false 41 } 42 if sysAttr.PidFD == nil { 43 newSys := *sysAttr // copy 44 newSys.PidFD = &pidfd 45 return &newSys, false 46 } 47 48 return sysAttr, true 49} 50 51// getPidfd returns the value of sysAttr.PidFD (or its duplicate if needDup is 52// set) and a flag indicating whether the value can be used. 53func getPidfd(sysAttr *syscall.SysProcAttr, needDup bool) (uintptr, bool) { 54 if !pidfdWorks() { 55 return 0, false 56 } 57 58 h := *sysAttr.PidFD 59 if needDup { 60 dupH, e := unix.Fcntl(h, syscall.F_DUPFD_CLOEXEC, 0) 61 if e != nil { 62 return 0, false 63 } 64 h = dupH 65 } 66 return uintptr(h), true 67} 68 69func pidfdFind(pid int) (uintptr, error) { 70 if !pidfdWorks() { 71 return 0, syscall.ENOSYS 72 } 73 74 h, err := unix.PidFDOpen(pid, 0) 75 if err != nil { 76 return 0, convertESRCH(err) 77 } 78 return h, nil 79} 80 81// _P_PIDFD is used as idtype argument to waitid syscall. 82const _P_PIDFD = 3 83 84func (p *Process) pidfdWait() (*ProcessState, error) { 85 // When pidfd is used, there is no wait/kill race (described in CL 23967) 86 // because the PID recycle issue doesn't exist (IOW, pidfd, unlike PID, 87 // is guaranteed to refer to one particular process). Thus, there is no 88 // need for the workaround (blockUntilWaitable + sigMu) from pidWait. 89 // 90 // We _do_ need to be careful about reuse of the pidfd FD number when 91 // closing the pidfd. See handle for more details. 92 handle, status := p.handleTransientAcquire() 93 switch status { 94 case statusDone: 95 // Process already completed Wait, or was not found by 96 // pidfdFind. Return ECHILD for consistency with what the wait 97 // syscall would return. 98 return nil, NewSyscallError("wait", syscall.ECHILD) 99 case statusReleased: 100 return nil, syscall.EINVAL 101 } 102 defer p.handleTransientRelease() 103 104 var ( 105 info unix.SiginfoChild 106 rusage syscall.Rusage 107 e syscall.Errno 108 ) 109 for { 110 _, _, e = syscall.Syscall6(syscall.SYS_WAITID, _P_PIDFD, handle, uintptr(unsafe.Pointer(&info)), syscall.WEXITED, uintptr(unsafe.Pointer(&rusage)), 0) 111 if e != syscall.EINTR { 112 break 113 } 114 } 115 if e != 0 { 116 return nil, NewSyscallError("waitid", e) 117 } 118 // Release the Process' handle reference, in addition to the reference 119 // we took above. 120 p.handlePersistentRelease(statusDone) 121 return &ProcessState{ 122 pid: int(info.Pid), 123 status: info.WaitStatus(), 124 rusage: &rusage, 125 }, nil 126} 127 128func (p *Process) pidfdSendSignal(s syscall.Signal) error { 129 handle, status := p.handleTransientAcquire() 130 switch status { 131 case statusDone: 132 return ErrProcessDone 133 case statusReleased: 134 return errors.New("os: process already released") 135 } 136 defer p.handleTransientRelease() 137 138 return convertESRCH(unix.PidFDSendSignal(handle, s)) 139} 140 141func pidfdWorks() bool { 142 return checkPidfdOnce() == nil 143} 144 145var checkPidfdOnce = sync.OnceValue(checkPidfd) 146 147// checkPidfd checks whether all required pidfd-related syscalls work. This 148// consists of pidfd_open and pidfd_send_signal syscalls, waitid syscall with 149// idtype of P_PIDFD, and clone(CLONE_PIDFD). 150// 151// Reasons for non-working pidfd syscalls include an older kernel and an 152// execution environment in which the above system calls are restricted by 153// seccomp or a similar technology. 154func checkPidfd() error { 155 // In Android version < 12, pidfd-related system calls are not allowed 156 // by seccomp and trigger the SIGSYS signal. See issue #69065. 157 if runtime.GOOS == "android" { 158 ignoreSIGSYS() 159 defer restoreSIGSYS() 160 } 161 162 // Get a pidfd of the current process (opening of "/proc/self" won't 163 // work for waitid). 164 fd, err := unix.PidFDOpen(syscall.Getpid(), 0) 165 if err != nil { 166 return NewSyscallError("pidfd_open", err) 167 } 168 defer syscall.Close(int(fd)) 169 170 // Check waitid(P_PIDFD) works. 171 for { 172 _, _, err = syscall.Syscall6(syscall.SYS_WAITID, _P_PIDFD, fd, 0, syscall.WEXITED, 0, 0) 173 if err != syscall.EINTR { 174 break 175 } 176 } 177 // Expect ECHILD from waitid since we're not our own parent. 178 if err != syscall.ECHILD { 179 return NewSyscallError("pidfd_wait", err) 180 } 181 182 // Check pidfd_send_signal works (should be able to send 0 to itself). 183 if err := unix.PidFDSendSignal(fd, 0); err != nil { 184 return NewSyscallError("pidfd_send_signal", err) 185 } 186 187 // Verify that clone(CLONE_PIDFD) works. 188 // 189 // This shouldn't be necessary since pidfd_open was added in Linux 5.3, 190 // after CLONE_PIDFD in Linux 5.2, but some alternative Linux 191 // implementations may not adhere to this ordering. 192 if err := checkClonePidfd(); err != nil { 193 return err 194 } 195 196 return nil 197} 198 199// Provided by syscall. 200// 201//go:linkname checkClonePidfd 202func checkClonePidfd() error 203 204// Provided by runtime. 205// 206//go:linkname ignoreSIGSYS 207func ignoreSIGSYS() 208 209//go:linkname restoreSIGSYS 210func restoreSIGSYS() 211