Lines Matching +full:charge +full:- +full:current +full:- +full:limit +full:- +full:mapping
1 // SPDX-License-Identifier: GPL-2.0-only
9 * 'fork.c' contains the help-routines for the 'fork' system call
84 #include <linux/posix-timers.h>
85 #include <linux/user-return-notifier.h>
132 static int max_threads; /* tunable limit on nr_threads */
211 vfree(vm_stack->addr); in free_vm_stack_cache()
234 kasan_unpoison_shadow(s->addr, THREAD_SIZE); in alloc_thread_stack_node()
237 memset(s->addr, 0, THREAD_SIZE); in alloc_thread_stack_node()
239 tsk->stack_vm_area = s; in alloc_thread_stack_node()
240 tsk->stack = s->addr; in alloc_thread_stack_node()
241 return s->addr; in alloc_thread_stack_node()
261 tsk->stack_vm_area = find_vm_area(stack); in alloc_thread_stack_node()
262 tsk->stack = stack; in alloc_thread_stack_node()
270 tsk->stack = kasan_reset_tag(page_address(page)); in alloc_thread_stack_node()
271 return tsk->stack; in alloc_thread_stack_node()
286 memcg_kmem_uncharge_page(vm->pages[i], 0); in free_thread_stack()
290 NULL, tsk->stack_vm_area) != NULL) in free_thread_stack()
296 vfree_atomic(tsk->stack); in free_thread_stack()
301 __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); in free_thread_stack()
312 tsk->stack = stack; in alloc_thread_stack_node()
318 kmem_cache_free(thread_stack_cache, tsk->stack); in free_thread_stack()
331 /* SLAB cache for signal_struct structures (tsk->signal) */
334 /* SLAB cache for sighand_struct structures (tsk->sighand) */
337 /* SLAB cache for files_struct structures (tsk->files) */
340 /* SLAB cache for fs_struct structures (tsk->fs) */
346 /* SLAB cache for mm_struct structures (tsk->mm) */
364 ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); in vm_area_dup()
365 ASSERT_EXCLUSIVE_WRITER(orig->vm_file); in vm_area_dup()
367 * orig->shared.rb may be modified concurrently, but the clone in vm_area_dup()
371 INIT_LIST_HEAD(&new->anon_vma_chain); in vm_area_dup()
372 new->vm_next = new->vm_prev = NULL; in vm_area_dup()
392 mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB, in account_kernel_stack()
410 BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); in memcg_charge_kernel_stack()
414 * If memcg_kmem_charge_page() fails, page->mem_cgroup in memcg_charge_kernel_stack()
418 ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, in memcg_charge_kernel_stack()
430 if (WARN_ON(tsk->state != TASK_DEAD)) in release_task_stack()
433 account_kernel_stack(tsk, -1); in release_task_stack()
435 tsk->stack = NULL; in release_task_stack()
437 tsk->stack_vm_area = NULL; in release_task_stack()
444 if (refcount_dec_and_test(&tsk->stack_refcount)) in put_task_stack()
464 WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0); in free_task()
469 if (tsk->flags & PF_KTHREAD) in free_task()
482 unsigned long charge; in dup_mmap() local
487 retval = -EINTR; in dup_mmap()
493 * Not linked in yet - no deadlock potential: in dup_mmap()
498 RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); in dup_mmap()
500 mm->total_vm = oldmm->total_vm; in dup_mmap()
501 mm->data_vm = oldmm->data_vm; in dup_mmap()
502 mm->exec_vm = oldmm->exec_vm; in dup_mmap()
503 mm->stack_vm = oldmm->stack_vm; in dup_mmap()
505 rb_link = &mm->mm_rb.rb_node; in dup_mmap()
507 pprev = &mm->mmap; in dup_mmap()
516 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { in dup_mmap()
519 if (mpnt->vm_flags & VM_DONTCOPY) { in dup_mmap()
520 vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); in dup_mmap()
523 charge = 0; in dup_mmap()
525 * Don't duplicate many vmas if we've been oom-killed (for in dup_mmap()
528 if (fatal_signal_pending(current)) { in dup_mmap()
529 retval = -EINTR; in dup_mmap()
532 if (mpnt->vm_flags & VM_ACCOUNT) { in dup_mmap()
537 charge = len; in dup_mmap()
545 tmp->vm_mm = mm; in dup_mmap()
549 if (tmp->vm_flags & VM_WIPEONFORK) { in dup_mmap()
553 * copy page for current vma. in dup_mmap()
555 tmp->anon_vma = NULL; in dup_mmap()
558 tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); in dup_mmap()
559 file = tmp->vm_file; in dup_mmap()
562 struct address_space *mapping = file->f_mapping; in dup_mmap() local
565 if (tmp->vm_flags & VM_DENYWRITE) in dup_mmap()
567 i_mmap_lock_write(mapping); in dup_mmap()
568 if (tmp->vm_flags & VM_SHARED) in dup_mmap()
569 mapping_allow_writable(mapping); in dup_mmap()
570 flush_dcache_mmap_lock(mapping); in dup_mmap()
573 &mapping->i_mmap); in dup_mmap()
574 flush_dcache_mmap_unlock(mapping); in dup_mmap()
575 i_mmap_unlock_write(mapping); in dup_mmap()
579 * Clear hugetlb-related page reserves for children. This only in dup_mmap()
581 * are not guaranteed to succeed, even if read-only in dup_mmap()
590 pprev = &tmp->vm_next; in dup_mmap()
591 tmp->vm_prev = prev; in dup_mmap()
595 rb_link = &tmp->vm_rb.rb_right; in dup_mmap()
596 rb_parent = &tmp->vm_rb; in dup_mmap()
598 mm->map_count++; in dup_mmap()
599 if (!(tmp->vm_flags & VM_WIPEONFORK)) in dup_mmap()
602 if (tmp->vm_ops && tmp->vm_ops->open) in dup_mmap()
603 tmp->vm_ops->open(tmp); in dup_mmap()
623 retval = -ENOMEM; in dup_mmap()
624 vm_unacct_memory(charge); in dup_mmap()
631 mm->pgd = pgd_alloc(mm); in mm_alloc_pgd()
632 if (unlikely(!mm->pgd)) in mm_alloc_pgd()
633 return -ENOMEM; in mm_alloc_pgd()
639 pgd_free(mm, mm->pgd); in mm_free_pgd()
646 RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); in dup_mmap()
662 long x = atomic_long_read(&mm->rss_stat.count[i]); in check_mm()
665 pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", in check_mm()
670 pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n", in check_mm()
674 VM_BUG_ON_MM(mm->pmd_huge_pte, mm); in check_mm()
689 WARN_ON_ONCE(mm == current->mm); in __mmdrop()
690 WARN_ON_ONCE(mm == current->active_mm); in __mmdrop()
695 put_user_ns(mm->user_ns); in __mmdrop()
710 if (unlikely(atomic_dec_and_test(&mm->mm_count))) { in mmdrop_async()
711 INIT_WORK(&mm->async_put_work, mmdrop_async_fn); in mmdrop_async()
712 schedule_work(&mm->async_put_work); in mmdrop_async()
724 if (sig->oom_mm) in free_signal_struct()
725 mmdrop_async(sig->oom_mm); in free_signal_struct()
731 if (refcount_dec_and_test(&sig->sigcnt)) in put_signal_struct()
737 WARN_ON(!tsk->exit_state); in __put_task_struct()
738 WARN_ON(refcount_read(&tsk->usage)); in __put_task_struct()
739 WARN_ON(tsk == current); in __put_task_struct()
747 put_signal_struct(tsk->signal); in __put_task_struct()
792 * Handle zero-sized whitelist or empty thread_struct, otherwise in task_struct_whitelist()
825 init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; in fork_init()
826 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; in fork_init()
827 init_task.signal->rlim[RLIMIT_SIGPENDING] = in fork_init()
828 init_task.signal->rlim[RLIMIT_NPROC]; in fork_init()
885 tsk->token = orig->token; in dup_task_struct()
886 tsk->ftoken = 0; in dup_task_struct()
889 * arch_dup_task_struct() clobbers the stack-related fields. Make in dup_task_struct()
890 * sure they're properly initialized before using any stack-related in dup_task_struct()
893 tsk->stack = stack; in dup_task_struct()
895 tsk->stack_vm_area = stack_vm_area; in dup_task_struct()
898 refcount_set(&tsk->stack_refcount, 1); in dup_task_struct()
915 tsk->seccomp.filter = NULL; in dup_task_struct()
924 tsk->stack_canary = get_random_canary(); in dup_task_struct()
926 if (orig->cpus_ptr == &orig->cpus_mask) in dup_task_struct()
927 tsk->cpus_ptr = &tsk->cpus_mask; in dup_task_struct()
933 refcount_set(&tsk->rcu_users, 2); in dup_task_struct()
935 refcount_set(&tsk->usage, 1); in dup_task_struct()
937 tsk->btrace_seq = 0; in dup_task_struct()
939 tsk->splice_pipe = NULL; in dup_task_struct()
940 tsk->task_frag.page = NULL; in dup_task_struct()
941 tsk->wake_q.next = NULL; in dup_task_struct()
948 tsk->fail_nth = 0; in dup_task_struct()
952 tsk->throttle_queue = NULL; in dup_task_struct()
953 tsk->use_memdelay = 0; in dup_task_struct()
957 tsk->active_memcg = NULL; in dup_task_struct()
987 spin_lock_init(&mm->ioctx_lock); in mm_init_aio()
988 mm->ioctx_table = NULL; in mm_init_aio()
996 if (mm->owner == p) in mm_clear_owner()
997 WRITE_ONCE(mm->owner, NULL); in mm_clear_owner()
1004 mm->owner = p; in mm_init_owner()
1011 mm->pasid = INIT_PASID; in mm_init_pasid()
1018 mm->uprobes_state.xol_area = NULL; in mm_init_uprobes_state()
1025 mm->mmap = NULL; in mm_init()
1026 mm->mm_rb = RB_ROOT; in mm_init()
1027 mm->vmacache_seqnum = 0; in mm_init()
1029 mm->rss_threshold = 0; in mm_init()
1031 atomic_set(&mm->mm_users, 1); in mm_init()
1032 atomic_set(&mm->mm_count, 1); in mm_init()
1033 seqcount_init(&mm->write_protect_seq); in mm_init()
1035 INIT_LIST_HEAD(&mm->mmlist); in mm_init()
1036 mm->core_state = NULL; in mm_init()
1038 mm->map_count = 0; in mm_init()
1039 mm->locked_vm = 0; in mm_init()
1040 atomic_set(&mm->has_pinned, 0); in mm_init()
1041 atomic64_set(&mm->pinned_vm, 0); in mm_init()
1042 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); in mm_init()
1043 spin_lock_init(&mm->page_table_lock); in mm_init()
1044 spin_lock_init(&mm->arg_lock); in mm_init()
1049 RCU_INIT_POINTER(mm->exe_file, NULL); in mm_init()
1053 mm->pmd_huge_pte = NULL; in mm_init()
1058 if (current->mm) { in mm_init()
1059 mm->flags = current->mm->flags & MMF_INIT_MASK; in mm_init()
1060 mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; in mm_init()
1062 mm->flags = default_dump_filter; in mm_init()
1063 mm->def_flags = 0; in mm_init()
1072 mm->user_ns = get_user_ns(user_ns); in mm_init()
1094 return mm_init(mm, current, current_user_ns()); in mm_alloc()
1099 VM_BUG_ON(atomic_read(&mm->mm_users)); in __mmput()
1108 if (!list_empty(&mm->mmlist)) { in __mmput()
1110 list_del(&mm->mmlist); in __mmput()
1113 if (mm->binfmt) in __mmput()
1114 module_put(mm->binfmt->module); in __mmput()
1125 if (atomic_dec_and_test(&mm->mm_users)) in mmput()
1141 if (atomic_dec_and_test(&mm->mm_users)) { in mmput_async()
1142 INIT_WORK(&mm->async_put_work, mmput_async_fn); in mmput_async()
1143 schedule_work(&mm->async_put_work); in mmput_async()
1149 * set_mm_exe_file - change a reference to the mm's executable file
1156 * mm->exe_file, but does so without using set_mm_exe_file() in order
1166 * this mm -- see comment above for justification. in set_mm_exe_file()
1168 old_exe_file = rcu_dereference_raw(mm->exe_file); in set_mm_exe_file()
1172 rcu_assign_pointer(mm->exe_file, new_exe_file); in set_mm_exe_file()
1178 * get_mm_exe_file - acquire a reference to the mm's executable file
1188 exe_file = rcu_dereference(mm->exe_file); in get_mm_exe_file()
1197 * get_task_exe_file - acquire a reference to the task's executable file
1209 mm = task->mm; in get_task_exe_file()
1211 if (!(task->flags & PF_KTHREAD)) in get_task_exe_file()
1220 * get_task_mm - acquire a reference to the task's mm
1233 mm = task->mm; in get_task_mm()
1235 if (task->flags & PF_KTHREAD) in get_task_mm()
1250 err = down_read_killable(&task->signal->exec_update_lock); in mm_access()
1255 if (mm && mm != current->mm && in mm_access()
1258 mm = ERR_PTR(-EACCES); in mm_access()
1260 up_read(&task->signal->exec_update_lock); in mm_access()
1270 vfork = tsk->vfork_done; in complete_vfork_done()
1272 tsk->vfork_done = NULL; in complete_vfork_done()
1291 child->vfork_done = NULL; in wait_for_vfork_done()
1304 * from the current process.
1324 if (tsk->clear_child_tid) { in mm_release()
1325 if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) && in mm_release()
1326 atomic_read(&mm->mm_users) > 1) { in mm_release()
1328 * We don't check the error code - if userspace has in mm_release()
1331 put_user(0, tsk->clear_child_tid); in mm_release()
1332 do_futex(tsk->clear_child_tid, FUTEX_WAKE, in mm_release()
1335 tsk->clear_child_tid = NULL; in mm_release()
1342 if (tsk->vfork_done) in mm_release()
1359 * dup_mm() - duplicates an existing mm structure
1380 if (!mm_init(mm, tsk, mm->user_ns)) in dup_mm()
1387 mm->hiwater_rss = get_mm_rss(mm); in dup_mm()
1388 mm->hiwater_vm = mm->total_vm; in dup_mm()
1390 if (mm->binfmt && !try_module_get(mm->binfmt->module)) in dup_mm()
1397 mm->binfmt = NULL; in dup_mm()
1410 tsk->min_flt = tsk->maj_flt = 0; in copy_mm()
1411 tsk->nvcsw = tsk->nivcsw = 0; in copy_mm()
1413 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; in copy_mm()
1414 tsk->last_switch_time = 0; in copy_mm()
1417 tsk->mm = NULL; in copy_mm()
1418 tsk->active_mm = NULL; in copy_mm()
1425 oldmm = current->mm; in copy_mm()
1438 retval = -ENOMEM; in copy_mm()
1439 mm = dup_mm(tsk, current->mm); in copy_mm()
1444 tsk->mm = mm; in copy_mm()
1445 tsk->active_mm = mm; in copy_mm()
1454 struct fs_struct *fs = current->fs; in copy_fs()
1456 /* tsk->fs is already what we want */ in copy_fs()
1457 spin_lock(&fs->lock); in copy_fs()
1458 if (fs->in_exec) { in copy_fs()
1459 spin_unlock(&fs->lock); in copy_fs()
1460 return -EAGAIN; in copy_fs()
1462 fs->users++; in copy_fs()
1463 spin_unlock(&fs->lock); in copy_fs()
1466 tsk->fs = copy_fs_struct(fs); in copy_fs()
1467 if (!tsk->fs) in copy_fs()
1468 return -ENOMEM; in copy_fs()
1480 oldf = current->files; in copy_files()
1485 atomic_inc(&oldf->count); in copy_files()
1493 tsk->files = newf; in copy_files()
1502 struct io_context *ioc = current->io_context; in copy_io()
1512 tsk->io_context = ioc; in copy_io()
1513 } else if (ioprio_valid(ioc->ioprio)) { in copy_io()
1516 return -ENOMEM; in copy_io()
1518 new_ioc->ioprio = ioc->ioprio; in copy_io()
1530 refcount_inc(¤t->sighand->count); in copy_sighand()
1534 RCU_INIT_POINTER(tsk->sighand, sig); in copy_sighand()
1536 return -ENOMEM; in copy_sighand()
1538 refcount_set(&sig->count, 1); in copy_sighand()
1539 spin_lock_irq(¤t->sighand->siglock); in copy_sighand()
1540 memcpy(sig->action, current->sighand->action, sizeof(sig->action)); in copy_sighand()
1541 spin_unlock_irq(¤t->sighand->siglock); in copy_sighand()
1552 if (refcount_dec_and_test(&sighand->count)) { in __cleanup_sighand()
1567 struct posix_cputimers *pct = &sig->posix_cputimers; in posix_cpu_timers_init_group()
1570 cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); in posix_cpu_timers_init_group()
1582 tsk->signal = sig; in copy_signal()
1584 return -ENOMEM; in copy_signal()
1586 sig->nr_threads = 1; in copy_signal()
1587 atomic_set(&sig->live, 1); in copy_signal()
1588 refcount_set(&sig->sigcnt, 1); in copy_signal()
1591 sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); in copy_signal()
1592 tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head); in copy_signal()
1594 init_waitqueue_head(&sig->wait_chldexit); in copy_signal()
1595 sig->curr_target = tsk; in copy_signal()
1596 init_sigpending(&sig->shared_pending); in copy_signal()
1597 INIT_HLIST_HEAD(&sig->multiprocess); in copy_signal()
1598 seqlock_init(&sig->stats_lock); in copy_signal()
1599 prev_cputime_init(&sig->prev_cputime); in copy_signal()
1602 INIT_LIST_HEAD(&sig->posix_timers); in copy_signal()
1603 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); in copy_signal()
1604 sig->real_timer.function = it_real_fn; in copy_signal()
1607 task_lock(current->group_leader); in copy_signal()
1608 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); in copy_signal()
1609 task_unlock(current->group_leader); in copy_signal()
1616 sig->oom_score_adj = current->signal->oom_score_adj; in copy_signal()
1617 sig->oom_score_adj_min = current->signal->oom_score_adj_min; in copy_signal()
1619 mutex_init(&sig->cred_guard_mutex); in copy_signal()
1620 init_rwsem(&sig->exec_update_lock); in copy_signal()
1629 * Must be called with sighand->lock held, which is common to in copy_seccomp()
1634 assert_spin_locked(¤t->sighand->siglock); in copy_seccomp()
1636 /* Ref-count the new filter user, and assign it. */ in copy_seccomp()
1637 get_seccomp_filter(current); in copy_seccomp()
1638 p->seccomp = current->seccomp; in copy_seccomp()
1645 if (task_no_new_privs(current)) in copy_seccomp()
1653 if (p->seccomp.mode != SECCOMP_MODE_DISABLED) in copy_seccomp()
1660 current->clear_child_tid = tidptr; in SYSCALL_DEFINE1()
1662 return task_pid_vnr(current); in SYSCALL_DEFINE1()
1667 raw_spin_lock_init(&p->pi_lock); in rt_mutex_init_task()
1669 p->pi_waiters = RB_ROOT_CACHED; in rt_mutex_init_task()
1670 p->pi_top_task = NULL; in rt_mutex_init_task()
1671 p->pi_blocked_on = NULL; in rt_mutex_init_task()
1680 INIT_HLIST_NODE(&task->pid_links[type]); in init_task_pid_links()
1688 task->thread_pid = pid; in init_task_pid()
1690 task->signal->pids[type] = pid; in init_task_pid()
1696 p->rcu_read_lock_nesting = 0; in rcu_copy_process()
1697 p->rcu_read_unlock_special.s = 0; in rcu_copy_process()
1698 p->rcu_blocked_node = NULL; in rcu_copy_process()
1699 INIT_LIST_HEAD(&p->rcu_node_entry); in rcu_copy_process()
1702 p->rcu_tasks_holdout = false; in rcu_copy_process()
1703 INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); in rcu_copy_process()
1704 p->rcu_tasks_idle_cpu = -1; in rcu_copy_process()
1707 p->trc_reader_nesting = 0; in rcu_copy_process()
1708 p->trc_reader_special.s = 0; in rcu_copy_process()
1709 INIT_LIST_HEAD(&p->trc_holdout_list); in rcu_copy_process()
1715 if (file->f_op == &pidfd_fops) in pidfd_pid()
1716 return file->private_data; in pidfd_pid()
1718 return ERR_PTR(-EBADF); in pidfd_pid()
1723 struct pid *pid = file->private_data; in pidfd_release()
1725 file->private_data = NULL; in pidfd_release()
1732 * pidfd_show_fdinfo - print information about a pidfd
1747 * starting from the current pid namespace of the instance, i.e. the
1758 * - create two new pid namespaces ns1 and ns2 in the initial pid
1761 * - create a process with a pidfd in ns1
1762 * - send pidfd from ns1 to ns2
1763 * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid
1768 struct pid *pid = f->private_data; in pidfd_show_fdinfo()
1770 pid_t nr = -1; in pidfd_show_fdinfo()
1773 ns = proc_pid_ns(file_inode(m->file)->i_sb); in pidfd_show_fdinfo()
1784 /* If nr is non-zero it means that 'pid' is valid and that in pidfd_show_fdinfo()
1789 for (i = ns->level + 1; i <= pid->level; i++) in pidfd_show_fdinfo()
1790 seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); in pidfd_show_fdinfo()
1802 struct pid *pid = file->private_data; in pidfd_poll()
1805 poll_wait(file, &pid->wait_pidfd, pts); in pidfd_poll()
1836 call_rcu(&tsk->rcu, __delayed_free_task); in delayed_free_task()
1844 if (!tsk->mm) in copy_oom_score_adj()
1853 set_bit(MMF_MULTIPROCESS, &tsk->mm->flags); in copy_oom_score_adj()
1855 tsk->signal->oom_score_adj = current->signal->oom_score_adj; in copy_oom_score_adj()
1856 tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min; in copy_oom_score_adj()
1866 * flags). The actual kick-off is left to the caller.
1874 int pidfd = -1, retval; in copy_process()
1878 u64 clone_flags = args->flags; in copy_process()
1879 struct nsproxy *nsp = current->nsproxy; in copy_process()
1886 return ERR_PTR(-EINVAL); in copy_process()
1889 return ERR_PTR(-EINVAL); in copy_process()
1896 return ERR_PTR(-EINVAL); in copy_process()
1904 return ERR_PTR(-EINVAL); in copy_process()
1909 * multi-rooted process trees, prevent global and container-inits in copy_process()
1913 current->signal->flags & SIGNAL_UNKILLABLE) in copy_process()
1914 return ERR_PTR(-EINVAL); in copy_process()
1922 (task_active_pid_ns(current) != nsp->pid_ns_for_children)) in copy_process()
1923 return ERR_PTR(-EINVAL); in copy_process()
1931 if (nsp->time_ns != nsp->time_ns_for_children) in copy_process()
1932 return ERR_PTR(-EINVAL); in copy_process()
1937 * - CLONE_DETACHED is blocked so that we can potentially in copy_process()
1939 * - CLONE_THREAD is blocked until someone really needs it. in copy_process()
1942 return ERR_PTR(-EINVAL); in copy_process()
1954 spin_lock_irq(¤t->sighand->siglock); in copy_process()
1956 hlist_add_head(&delayed.node, ¤t->signal->multiprocess); in copy_process()
1958 spin_unlock_irq(¤t->sighand->siglock); in copy_process()
1959 retval = -ERESTARTNOINTR; in copy_process()
1960 if (signal_pending(current)) in copy_process()
1963 retval = -ENOMEM; in copy_process()
1964 p = dup_task_struct(current, node); in copy_process()
1971 * p->set_child_tid which is (ab)used as a kthread's data pointer for in copy_process()
1974 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL; in copy_process()
1978 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; in copy_process()
1986 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); in copy_process()
1988 retval = -EAGAIN; in copy_process()
1989 if (atomic_read(&p->real_cred->user->processes) >= in copy_process()
1991 if (p->real_cred->user != INIT_USER && in copy_process()
1995 current->flags &= ~PF_NPROC_EXCEEDED; in copy_process()
2006 retval = -EAGAIN; in copy_process()
2014 p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE); in copy_process()
2015 p->flags |= PF_FORKNOEXEC; in copy_process()
2016 INIT_LIST_HEAD(&p->children); in copy_process()
2017 INIT_LIST_HEAD(&p->sibling); in copy_process()
2019 p->vfork_done = NULL; in copy_process()
2020 spin_lock_init(&p->alloc_lock); in copy_process()
2022 init_sigpending(&p->pending); in copy_process()
2024 p->utime = p->stime = p->gtime = 0; in copy_process()
2026 p->utimescaled = p->stimescaled = 0; in copy_process()
2028 prev_cputime_init(&p->prev_cputime); in copy_process()
2031 seqcount_init(&p->vtime.seqcount); in copy_process()
2032 p->vtime.starttime = 0; in copy_process()
2033 p->vtime.state = VTIME_INACTIVE; in copy_process()
2037 p->io_uring = NULL; in copy_process()
2041 memset(&p->rss_stat, 0, sizeof(p->rss_stat)); in copy_process()
2044 p->default_timer_slack_ns = current->timer_slack_ns; in copy_process()
2047 p->psi_flags = 0; in copy_process()
2050 task_io_accounting_init(&p->ioac); in copy_process()
2053 posix_cputimers_init(&p->posix_cputimers); in copy_process()
2055 p->io_context = NULL; in copy_process()
2059 p->mempolicy = mpol_dup(p->mempolicy); in copy_process()
2060 if (IS_ERR(p->mempolicy)) { in copy_process()
2061 retval = PTR_ERR(p->mempolicy); in copy_process()
2062 p->mempolicy = NULL; in copy_process()
2067 p->cpuset_mem_spread_rotor = NUMA_NO_NODE; in copy_process()
2068 p->cpuset_slab_spread_rotor = NUMA_NO_NODE; in copy_process()
2069 seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock); in copy_process()
2072 memset(&p->irqtrace, 0, sizeof(p->irqtrace)); in copy_process()
2073 p->irqtrace.hardirq_disable_ip = _THIS_IP_; in copy_process()
2074 p->irqtrace.softirq_enable_ip = _THIS_IP_; in copy_process()
2075 p->softirqs_enabled = 1; in copy_process()
2076 p->softirq_context = 0; in copy_process()
2079 p->pagefault_disabled = 0; in copy_process()
2086 p->blocked_on = NULL; /* not blocked yet */ in copy_process()
2089 p->sequential_io = 0; in copy_process()
2090 p->sequential_io_avg = 0; in copy_process()
2093 p->bpf_ctx = NULL; in copy_process()
2136 retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls); in copy_process()
2143 pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid, in copy_process()
2144 args->set_tid_size); in copy_process()
2172 retval = put_user(pidfd, args->pidfd); in copy_process()
2178 p->plug = NULL; in copy_process()
2200 p->pid = pid_nr(pid); in copy_process()
2202 p->group_leader = current->group_leader; in copy_process()
2203 p->tgid = current->tgid; in copy_process()
2205 p->group_leader = p; in copy_process()
2206 p->tgid = p->pid; in copy_process()
2209 p->nr_dirtied = 0; in copy_process()
2210 p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); in copy_process()
2211 p->dirty_paused_when = 0; in copy_process()
2213 p->pdeath_signal = 0; in copy_process()
2214 INIT_LIST_HEAD(&p->thread_group); in copy_process()
2215 p->task_works = NULL; in copy_process()
2229 * From this point on we must avoid any synchronous user-space in copy_process()
2230 * communication until we take the tasklist-lock. In particular, we do in copy_process()
2231 * not want user-space to be able to predict the process start-time by in copy_process()
2236 p->start_time = ktime_get_ns(); in copy_process()
2237 p->start_boottime = ktime_get_boottime_ns(); in copy_process()
2245 /* CLONE_PARENT re-uses the old parent */ in copy_process()
2247 p->real_parent = current->real_parent; in copy_process()
2248 p->parent_exec_id = current->parent_exec_id; in copy_process()
2250 p->exit_signal = -1; in copy_process()
2252 p->exit_signal = current->group_leader->exit_signal; in copy_process()
2254 p->real_parent = current; in copy_process()
2255 p->parent_exec_id = current->self_exec_id; in copy_process()
2256 p->exit_signal = args->exit_signal; in copy_process()
2261 spin_lock(¤t->sighand->siglock); in copy_process()
2272 if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) { in copy_process()
2273 retval = -ENOMEM; in copy_process()
2278 if (fatal_signal_pending(current)) { in copy_process()
2279 retval = -EINTR; in copy_process()
2288 if (likely(p->pid)) { in copy_process()
2294 init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); in copy_process()
2295 init_task_pid(p, PIDTYPE_SID, task_session(current)); in copy_process()
2298 ns_of_pid(pid)->child_reaper = p; in copy_process()
2299 p->signal->flags |= SIGNAL_UNKILLABLE; in copy_process()
2301 p->signal->shared_pending.signal = delayed.signal; in copy_process()
2302 p->signal->tty = tty_kref_get(current->signal->tty); in copy_process()
2308 p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper || in copy_process()
2309 p->real_parent->signal->is_child_subreaper; in copy_process()
2310 list_add_tail(&p->sibling, &p->real_parent->children); in copy_process()
2311 list_add_tail_rcu(&p->tasks, &init_task.tasks); in copy_process()
2317 current->signal->nr_threads++; in copy_process()
2318 atomic_inc(¤t->signal->live); in copy_process()
2319 refcount_inc(¤t->signal->sigcnt); in copy_process()
2321 list_add_tail_rcu(&p->thread_group, in copy_process()
2322 &p->group_leader->thread_group); in copy_process()
2323 list_add_tail_rcu(&p->thread_node, in copy_process()
2324 &p->signal->thread_head); in copy_process()
2331 spin_unlock(¤t->sighand->siglock); in copy_process()
2348 spin_unlock(¤t->sighand->siglock); in copy_process()
2362 if (p->io_context) in copy_process()
2367 if (p->mm) { in copy_process()
2368 mm_clear_owner(p->mm, p); in copy_process()
2369 mmput(p->mm); in copy_process()
2373 free_signal_struct(p->signal); in copy_process()
2375 __cleanup_sighand(p->sighand); in copy_process()
2392 mpol_put(p->mempolicy); in copy_process()
2397 atomic_dec(&p->cred->user->processes); in copy_process()
2400 p->state = TASK_DEAD; in copy_process()
2404 spin_lock_irq(¤t->sighand->siglock); in copy_process()
2406 spin_unlock_irq(¤t->sighand->siglock); in copy_process()
2415 INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */ in init_idle_pids()
2442 * Ok, this is the main fork-routine.
2444 * It copies the process, and if successful kick-starts
2447 * args->exit_signal is expected to be checked for sanity by the caller.
2451 u64 clone_flags = args->flags; in kernel_clone()
2467 if ((args->flags & CLONE_PIDFD) && in kernel_clone()
2468 (args->flags & CLONE_PARENT_SETTID) && in kernel_clone()
2469 (args->pidfd == args->parent_tid)) in kernel_clone()
2470 return -EINVAL; in kernel_clone()
2481 else if (args->exit_signal != SIGCHLD) in kernel_clone()
2486 if (likely(!ptrace_event_enabled(current, trace))) in kernel_clone()
2497 * Do this prior waking up the new thread - the thread pointer in kernel_clone()
2500 trace_sched_process_fork(current, p); in kernel_clone()
2506 put_user(nr, args->parent_tid); in kernel_clone()
2509 p->vfork_done = &vfork; in kernel_clone()
2556 return -EINVAL; in SYSCALL_DEFINE0()
2619 pid_t *kset_tid = kargs->set_tid; in copy_clone_args_from_user()
2630 return -E2BIG; in copy_clone_args_from_user()
2632 return -EINVAL; in copy_clone_args_from_user()
2639 return -EINVAL; in copy_clone_args_from_user()
2642 return -EINVAL; in copy_clone_args_from_user()
2645 return -EINVAL; in copy_clone_args_from_user()
2653 return -EINVAL; in copy_clone_args_from_user()
2657 return -EINVAL; in copy_clone_args_from_user()
2674 (kargs->set_tid_size * sizeof(pid_t)))) in copy_clone_args_from_user()
2675 return -EFAULT; in copy_clone_args_from_user()
2677 kargs->set_tid = kset_tid; in copy_clone_args_from_user()
2683 * clone3_stack_valid - check and prepare stack
2692 if (kargs->stack == 0) { in clone3_stack_valid()
2693 if (kargs->stack_size > 0) in clone3_stack_valid()
2696 if (kargs->stack_size == 0) in clone3_stack_valid()
2699 if (!access_ok((void __user *)kargs->stack, kargs->stack_size)) in clone3_stack_valid()
2703 kargs->stack += kargs->stack_size; in clone3_stack_valid()
2713 if (kargs->flags & in clone3_args_valid()
2718 * - make the CLONE_DETACHED bit reuseable for clone3 in clone3_args_valid()
2719 * - make the CSIGNAL bits reuseable for clone3 in clone3_args_valid()
2721 if (kargs->flags & (CLONE_DETACHED | CSIGNAL)) in clone3_args_valid()
2724 if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) == in clone3_args_valid()
2728 if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) && in clone3_args_valid()
2729 kargs->exit_signal) in clone3_args_valid()
2739 * clone3 - create a new process with specific properties
2763 return -EINVAL; in SYSCALL_DEFINE2()
2775 leader = top = top->group_leader; in walk_process_tree()
2778 list_for_each_entry(child, &parent->children, sibling) { in walk_process_tree()
2793 parent = child->real_parent; in walk_process_tree()
2794 leader = parent->group_leader; in walk_process_tree()
2809 spin_lock_init(&sighand->siglock); in sighand_ctor()
2810 init_waitqueue_head(&sighand->signalfd_wqh); in sighand_ctor()
2862 return -EINVAL; in check_unshare_flags()
2870 if (!thread_group_empty(current)) in check_unshare_flags()
2871 return -EINVAL; in check_unshare_flags()
2874 if (refcount_read(¤t->sighand->count) > 1) in check_unshare_flags()
2875 return -EINVAL; in check_unshare_flags()
2879 return -EINVAL; in check_unshare_flags()
2890 struct fs_struct *fs = current->fs; in unshare_fs()
2896 if (fs->users == 1) in unshare_fs()
2901 return -ENOMEM; in unshare_fs()
2912 struct files_struct *fd = current->files; in unshare_fd()
2916 (fd && atomic_read(&fd->count) > 1)) { in unshare_fd()
2930 * constructed. Here we are modifying the current, active,
2993 exit_sem(current); in ksys_unshare()
2997 exit_shm(current); in ksys_unshare()
2998 shm_init_task(current); in ksys_unshare()
3002 switch_task_namespaces(current, new_nsproxy); in ksys_unshare()
3004 task_lock(current); in ksys_unshare()
3007 fs = current->fs; in ksys_unshare()
3008 spin_lock(&fs->lock); in ksys_unshare()
3009 current->fs = new_fs; in ksys_unshare()
3010 if (--fs->users) in ksys_unshare()
3014 spin_unlock(&fs->lock); in ksys_unshare()
3018 fd = current->files; in ksys_unshare()
3019 current->files = new_fd; in ksys_unshare()
3023 task_unlock(current); in ksys_unshare()
3032 perf_event_namespaces(current); in ksys_unshare()
3055 * Helper to unshare the files of the current task.
3062 struct task_struct *task = current; in unshare_files()
3071 *displaced = task->files; in unshare_files()
3073 task->files = copy; in unshare_files()