1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/cgroup.h>
3 #include <linux/sched.h>
4 #include <linux/sched/task.h>
5 #include <linux/sched/signal.h>
6
7 #include "cgroup-internal.h"
8
9 #include <trace/events/cgroup.h>
10
11 /*
12 * Propagate the cgroup frozen state upwards by the cgroup tree.
13 */
cgroup_propagate_frozen(struct cgroup * cgrp,bool frozen)14 static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
15 {
16 int desc = 1;
17
18 /*
19 * If the new state is frozen, some freezing ancestor cgroups may change
20 * their state too, depending on if all their descendants are frozen.
21 *
22 * Otherwise, all ancestor cgroups are forced into the non-frozen state.
23 */
24 while ((cgrp = cgroup_parent(cgrp))) {
25 if (frozen) {
26 cgrp->freezer.nr_frozen_descendants += desc;
27 if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
28 test_bit(CGRP_FREEZE, &cgrp->flags) &&
29 cgrp->freezer.nr_frozen_descendants ==
30 cgrp->nr_descendants) {
31 set_bit(CGRP_FROZEN, &cgrp->flags);
32 cgroup_file_notify(&cgrp->events_file);
33 TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
34 desc++;
35 }
36 } else {
37 cgrp->freezer.nr_frozen_descendants -= desc;
38 if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
39 clear_bit(CGRP_FROZEN, &cgrp->flags);
40 cgroup_file_notify(&cgrp->events_file);
41 TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
42 desc++;
43 }
44 }
45 }
46 }
47
48 /*
49 * Revisit the cgroup frozen state.
50 * Checks if the cgroup is really frozen and perform all state transitions.
51 */
cgroup_update_frozen(struct cgroup * cgrp)52 void cgroup_update_frozen(struct cgroup *cgrp)
53 {
54 bool frozen;
55
56 lockdep_assert_held(&css_set_lock);
57
58 /*
59 * If the cgroup has to be frozen (CGRP_FREEZE bit set),
60 * and all tasks are frozen and/or stopped, let's consider
61 * the cgroup frozen. Otherwise it's not frozen.
62 */
63 frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
64 cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
65
66 if (frozen) {
67 /* Already there? */
68 if (test_bit(CGRP_FROZEN, &cgrp->flags))
69 return;
70
71 set_bit(CGRP_FROZEN, &cgrp->flags);
72 } else {
73 /* Already there? */
74 if (!test_bit(CGRP_FROZEN, &cgrp->flags))
75 return;
76
77 clear_bit(CGRP_FROZEN, &cgrp->flags);
78 }
79 cgroup_file_notify(&cgrp->events_file);
80 TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
81
82 /* Update the state of ancestor cgroups. */
83 cgroup_propagate_frozen(cgrp, frozen);
84 }
85
86 /*
87 * Increment cgroup's nr_frozen_tasks.
88 */
cgroup_inc_frozen_cnt(struct cgroup * cgrp)89 static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
90 {
91 cgrp->freezer.nr_frozen_tasks++;
92 }
93
94 /*
95 * Decrement cgroup's nr_frozen_tasks.
96 */
cgroup_dec_frozen_cnt(struct cgroup * cgrp)97 static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
98 {
99 cgrp->freezer.nr_frozen_tasks--;
100 WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
101 }
102
103 /*
104 * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
105 * and revisit the state of the cgroup, if necessary.
106 */
cgroup_enter_frozen(void)107 void cgroup_enter_frozen(void)
108 {
109 struct cgroup *cgrp;
110
111 if (current->frozen)
112 return;
113
114 spin_lock_irq(&css_set_lock);
115 current->frozen = true;
116 cgrp = task_dfl_cgroup(current);
117 cgroup_inc_frozen_cnt(cgrp);
118 cgroup_update_frozen(cgrp);
119 spin_unlock_irq(&css_set_lock);
120 }
121
122 /*
123 * Conditionally leave frozen/stopped state. Update cgroup's counters,
124 * and revisit the state of the cgroup, if necessary.
125 *
126 * If always_leave is not set, and the cgroup is freezing,
127 * we're racing with the cgroup freezing. In this case, we don't
128 * drop the frozen counter to avoid a transient switch to
129 * the unfrozen state.
130 */
cgroup_leave_frozen(bool always_leave)131 void cgroup_leave_frozen(bool always_leave)
132 {
133 struct cgroup *cgrp;
134
135 spin_lock_irq(&css_set_lock);
136 cgrp = task_dfl_cgroup(current);
137 if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
138 cgroup_dec_frozen_cnt(cgrp);
139 cgroup_update_frozen(cgrp);
140 WARN_ON_ONCE(!current->frozen);
141 current->frozen = false;
142 } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
143 spin_lock(¤t->sighand->siglock);
144 current->jobctl |= JOBCTL_TRAP_FREEZE;
145 set_thread_flag(TIF_SIGPENDING);
146 spin_unlock(¤t->sighand->siglock);
147 }
148 spin_unlock_irq(&css_set_lock);
149 }
150
151 /*
152 * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
153 * jobctl bit.
154 */
cgroup_freeze_task(struct task_struct * task,bool freeze)155 static void cgroup_freeze_task(struct task_struct *task, bool freeze)
156 {
157 unsigned long flags;
158
159 /* If the task is about to die, don't bother with freezing it. */
160 if (!lock_task_sighand(task, &flags))
161 return;
162
163 if (freeze) {
164 task->jobctl |= JOBCTL_TRAP_FREEZE;
165 signal_wake_up(task, false);
166 } else {
167 task->jobctl &= ~JOBCTL_TRAP_FREEZE;
168 wake_up_process(task);
169 }
170
171 unlock_task_sighand(task, &flags);
172 }
173
174 /*
175 * Freeze or unfreeze all tasks in the given cgroup.
176 */
cgroup_do_freeze(struct cgroup * cgrp,bool freeze,u64 ts_nsec)177 static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze, u64 ts_nsec)
178 {
179 struct css_task_iter it;
180 struct task_struct *task;
181
182 lockdep_assert_held(&cgroup_mutex);
183
184 spin_lock_irq(&css_set_lock);
185 write_seqcount_begin(&cgrp->kmi_ext_info->freezer.freeze_seq);
186 if (freeze) {
187 set_bit(CGRP_FREEZE, &cgrp->flags);
188 cgrp->kmi_ext_info->freezer.freeze_start_nsec = ts_nsec;
189 } else {
190 clear_bit(CGRP_FREEZE, &cgrp->flags);
191 cgrp->kmi_ext_info->freezer.frozen_nsec += (ts_nsec -
192 cgrp->kmi_ext_info->freezer.freeze_start_nsec);
193 }
194 write_seqcount_end(&cgrp->kmi_ext_info->freezer.freeze_seq);
195 spin_unlock_irq(&css_set_lock);
196
197 if (freeze)
198 TRACE_CGROUP_PATH(freeze, cgrp);
199 else
200 TRACE_CGROUP_PATH(unfreeze, cgrp);
201
202 css_task_iter_start(&cgrp->self, 0, &it);
203 while ((task = css_task_iter_next(&it))) {
204 /*
205 * Ignore kernel threads here. Freezing cgroups containing
206 * kthreads isn't supported.
207 */
208 if (task->flags & PF_KTHREAD)
209 continue;
210 cgroup_freeze_task(task, freeze);
211 }
212 css_task_iter_end(&it);
213
214 /*
215 * Cgroup state should be revisited here to cover empty leaf cgroups
216 * and cgroups which descendants are already in the desired state.
217 */
218 spin_lock_irq(&css_set_lock);
219 if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
220 cgroup_update_frozen(cgrp);
221 spin_unlock_irq(&css_set_lock);
222 }
223
224 /*
225 * Adjust the task state (freeze or unfreeze) and revisit the state of
226 * source and destination cgroups.
227 */
cgroup_freezer_migrate_task(struct task_struct * task,struct cgroup * src,struct cgroup * dst)228 void cgroup_freezer_migrate_task(struct task_struct *task,
229 struct cgroup *src, struct cgroup *dst)
230 {
231 lockdep_assert_held(&css_set_lock);
232
233 /*
234 * Kernel threads are not supposed to be frozen at all.
235 */
236 if (task->flags & PF_KTHREAD)
237 return;
238
239 /*
240 * It's not necessary to do changes if both of the src and dst cgroups
241 * are not freezing and task is not frozen.
242 */
243 if (!test_bit(CGRP_FREEZE, &src->flags) &&
244 !test_bit(CGRP_FREEZE, &dst->flags) &&
245 !task->frozen)
246 return;
247
248 /*
249 * Adjust counters of freezing and frozen tasks.
250 * Note, that if the task is frozen, but the destination cgroup is not
251 * frozen, we bump both counters to keep them balanced.
252 */
253 if (task->frozen) {
254 cgroup_inc_frozen_cnt(dst);
255 cgroup_dec_frozen_cnt(src);
256 }
257 cgroup_update_frozen(dst);
258 cgroup_update_frozen(src);
259
260 /*
261 * Force the task to the desired state.
262 */
263 cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
264 }
265
cgroup_freeze(struct cgroup * cgrp,bool freeze)266 void cgroup_freeze(struct cgroup *cgrp, bool freeze)
267 {
268 struct cgroup_subsys_state *css;
269 struct cgroup *dsct;
270 bool applied = false;
271 u64 ts_nsec;
272
273 lockdep_assert_held(&cgroup_mutex);
274
275 /*
276 * Nothing changed? Just exit.
277 */
278 if (cgrp->freezer.freeze == freeze)
279 return;
280
281 cgrp->freezer.freeze = freeze;
282 ts_nsec = ktime_get_ns();
283
284 /*
285 * Propagate changes downwards the cgroup tree.
286 */
287 css_for_each_descendant_pre(css, &cgrp->self) {
288 dsct = css->cgroup;
289
290 if (cgroup_is_dead(dsct))
291 continue;
292
293 if (freeze) {
294 dsct->freezer.e_freeze++;
295 /*
296 * Already frozen because of ancestor's settings?
297 */
298 if (dsct->freezer.e_freeze > 1)
299 continue;
300 } else {
301 dsct->freezer.e_freeze--;
302 /*
303 * Still frozen because of ancestor's settings?
304 */
305 if (dsct->freezer.e_freeze > 0)
306 continue;
307
308 WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
309 }
310
311 /*
312 * Do change actual state: freeze or unfreeze.
313 */
314 cgroup_do_freeze(dsct, freeze, ts_nsec);
315 applied = true;
316 }
317
318 /*
319 * Even if the actual state hasn't changed, let's notify a user.
320 * The state can be enforced by an ancestor cgroup: the cgroup
321 * can already be in the desired state or it can be locked in the
322 * opposite state, so that the transition will never happen.
323 * In both cases it's better to notify a user, that there is
324 * nothing to wait for.
325 */
326 if (!applied) {
327 TRACE_CGROUP_PATH(notify_frozen, cgrp,
328 test_bit(CGRP_FROZEN, &cgrp->flags));
329 cgroup_file_notify(&cgrp->events_file);
330 }
331 }
332