1 /* drivers/misc/uid_sys_stats.c
2 *
3 * Copyright (C) 2014 - 2015 Google, Inc.
4 *
5 * This software is licensed under the terms of the GNU General Public
6 * License version 2, as published by the Free Software Foundation, and
7 * may be copied, distributed, and modified under those terms.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 */
15
16 #include <linux/atomic.h>
17 #include <linux/err.h>
18 #include <linux/hashtable.h>
19 #include <linux/init.h>
20 #include <linux/kernel.h>
21 #include <linux/list.h>
22 #include <linux/llist.h>
23 #include <linux/mm.h>
24 #include <linux/proc_fs.h>
25 #include <linux/profile.h>
26 #include <linux/sched/cputime.h>
27 #include <linux/seq_file.h>
28 #include <linux/slab.h>
29 #include <linux/uaccess.h>
30 #include <linux/spinlock_types.h>
31
32 #define UID_HASH_BITS 10
33 #define UID_HASH_NUMS (1 << UID_HASH_BITS)
34 DECLARE_HASHTABLE(hash_table, UID_HASH_BITS);
35 /* uid_lock[bkt] ensure consistency of hash_table[bkt] */
36 spinlock_t uid_lock[UID_HASH_NUMS];
37
38 #define for_each_bkt(bkt) \
39 for (bkt = 0; bkt < HASH_SIZE(hash_table); bkt++)
40
41 /* iterate over all uid_entrys hashing to the same bkt */
42 #define for_each_uid_entry(uid_entry, bkt) \
43 hlist_for_each_entry(uid_entry, &hash_table[bkt], hash)
44
45 #define for_each_uid_entry_safe(uid_entry, tmp, bkt) \
46 hlist_for_each_entry_safe(uid_entry, tmp,\
47 &hash_table[bkt], hash)
48
49 static struct proc_dir_entry *cpu_parent;
50 static struct proc_dir_entry *io_parent;
51 static struct proc_dir_entry *proc_parent;
52
53 struct io_stats {
54 u64 read_bytes;
55 u64 write_bytes;
56 u64 rchar;
57 u64 wchar;
58 u64 fsync;
59 };
60
61 #define UID_STATE_FOREGROUND 0
62 #define UID_STATE_BACKGROUND 1
63 #define UID_STATE_TOTAL_LAST 2
64 #define UID_STATE_DEAD_TASKS 3
65 #define UID_STATE_SIZE 4
66
67 #define MAX_TASK_COMM_LEN 256
68
69 struct task_entry {
70 char comm[MAX_TASK_COMM_LEN];
71 pid_t pid;
72 struct io_stats io[UID_STATE_SIZE];
73 struct hlist_node hash;
74 };
75
76 struct uid_entry {
77 uid_t uid;
78 u64 utime;
79 u64 stime;
80 int state;
81 struct io_stats io[UID_STATE_SIZE];
82 struct hlist_node hash;
83 };
84
init_hash_table_and_lock(void)85 static void init_hash_table_and_lock(void)
86 {
87 int i;
88
89 hash_init(hash_table);
90 for (i = 0; i < UID_HASH_NUMS; i++)
91 spin_lock_init(&uid_lock[i]);
92 }
93
uid_to_bkt(uid_t uid)94 static inline int uid_to_bkt(uid_t uid)
95 {
96 return hash_min(uid, HASH_BITS(hash_table));
97 }
98
trylock_uid(uid_t uid)99 static inline int trylock_uid(uid_t uid)
100 {
101 return spin_trylock(&uid_lock[uid_to_bkt(uid)]);
102 }
103
lock_uid(uid_t uid)104 static inline void lock_uid(uid_t uid)
105 {
106 spin_lock(&uid_lock[uid_to_bkt(uid)]);
107 }
108
unlock_uid(uid_t uid)109 static inline void unlock_uid(uid_t uid)
110 {
111 spin_unlock(&uid_lock[uid_to_bkt(uid)]);
112 }
113
lock_uid_by_bkt(u32 bkt)114 static inline void lock_uid_by_bkt(u32 bkt)
115 {
116 spin_lock(&uid_lock[bkt]);
117 }
118
unlock_uid_by_bkt(u32 bkt)119 static inline void unlock_uid_by_bkt(u32 bkt)
120 {
121 spin_unlock(&uid_lock[bkt]);
122 }
123
compute_write_bytes(struct task_io_accounting * ioac)124 static u64 compute_write_bytes(struct task_io_accounting *ioac)
125 {
126 if (ioac->write_bytes <= ioac->cancelled_write_bytes)
127 return 0;
128
129 return ioac->write_bytes - ioac->cancelled_write_bytes;
130 }
131
compute_io_bucket_stats(struct io_stats * io_bucket,struct io_stats * io_curr,struct io_stats * io_last,struct io_stats * io_dead)132 static void compute_io_bucket_stats(struct io_stats *io_bucket,
133 struct io_stats *io_curr,
134 struct io_stats *io_last,
135 struct io_stats *io_dead)
136 {
137 /* tasks could switch to another uid group, but its io_last in the
138 * previous uid group could still be positive.
139 * therefore before each update, do an overflow check first
140 */
141 int64_t delta;
142
143 delta = io_curr->read_bytes + io_dead->read_bytes -
144 io_last->read_bytes;
145 io_bucket->read_bytes += delta > 0 ? delta : 0;
146 delta = io_curr->write_bytes + io_dead->write_bytes -
147 io_last->write_bytes;
148 io_bucket->write_bytes += delta > 0 ? delta : 0;
149 delta = io_curr->rchar + io_dead->rchar - io_last->rchar;
150 io_bucket->rchar += delta > 0 ? delta : 0;
151 delta = io_curr->wchar + io_dead->wchar - io_last->wchar;
152 io_bucket->wchar += delta > 0 ? delta : 0;
153 delta = io_curr->fsync + io_dead->fsync - io_last->fsync;
154 io_bucket->fsync += delta > 0 ? delta : 0;
155
156 io_last->read_bytes = io_curr->read_bytes;
157 io_last->write_bytes = io_curr->write_bytes;
158 io_last->rchar = io_curr->rchar;
159 io_last->wchar = io_curr->wchar;
160 io_last->fsync = io_curr->fsync;
161
162 memset(io_dead, 0, sizeof(struct io_stats));
163 }
164
find_uid_entry(uid_t uid)165 static struct uid_entry *find_uid_entry(uid_t uid)
166 {
167 struct uid_entry *uid_entry;
168 u32 bkt = uid_to_bkt(uid);
169
170 for_each_uid_entry(uid_entry, bkt) {
171 if (uid_entry->uid == uid)
172 return uid_entry;
173 }
174 return NULL;
175 }
176
find_or_register_uid(uid_t uid)177 static struct uid_entry *find_or_register_uid(uid_t uid)
178 {
179 struct uid_entry *uid_entry;
180
181 uid_entry = find_uid_entry(uid);
182 if (uid_entry)
183 return uid_entry;
184
185 uid_entry = kzalloc(sizeof(struct uid_entry), GFP_ATOMIC);
186 if (!uid_entry)
187 return NULL;
188
189 uid_entry->uid = uid;
190 hash_add(hash_table, &uid_entry->hash, uid);
191
192 return uid_entry;
193 }
194
calc_uid_cputime(struct uid_entry * uid_entry,u64 * total_utime,u64 * total_stime)195 static void calc_uid_cputime(struct uid_entry *uid_entry,
196 u64 *total_utime, u64 *total_stime)
197 {
198 struct user_namespace *user_ns = current_user_ns();
199 struct task_struct *p, *t;
200 u64 utime, stime;
201 uid_t uid;
202
203 rcu_read_lock();
204 for_each_process(p) {
205 uid = from_kuid_munged(user_ns, task_uid(p));
206
207 if (uid != uid_entry->uid)
208 continue;
209
210 for_each_thread(p, t) {
211 /* avoid double accounting of dying threads */
212 if (!(t->flags & PF_EXITING)) {
213 task_cputime_adjusted(t, &utime, &stime);
214 *total_utime += utime;
215 *total_stime += stime;
216 }
217 }
218 }
219 rcu_read_unlock();
220 }
221
uid_cputime_show(struct seq_file * m,void * v)222 static int uid_cputime_show(struct seq_file *m, void *v)
223 {
224 struct uid_entry *uid_entry = NULL;
225 u32 bkt;
226
227 for_each_bkt(bkt) {
228 lock_uid_by_bkt(bkt);
229 for_each_uid_entry(uid_entry, bkt) {
230 u64 total_utime = uid_entry->utime;
231 u64 total_stime = uid_entry->stime;
232
233 calc_uid_cputime(uid_entry, &total_utime, &total_stime);
234 seq_printf(m, "%d: %llu %llu\n", uid_entry->uid,
235 ktime_to_us(total_utime), ktime_to_us(total_stime));
236 }
237 unlock_uid_by_bkt(bkt);
238 }
239
240 return 0;
241 }
242
uid_cputime_open(struct inode * inode,struct file * file)243 static int uid_cputime_open(struct inode *inode, struct file *file)
244 {
245 return single_open(file, uid_cputime_show, pde_data(inode));
246 }
247
248 static const struct proc_ops uid_cputime_fops = {
249 .proc_open = uid_cputime_open,
250 .proc_read = seq_read,
251 .proc_lseek = seq_lseek,
252 .proc_release = single_release,
253 };
254
uid_remove_open(struct inode * inode,struct file * file)255 static int uid_remove_open(struct inode *inode, struct file *file)
256 {
257 return single_open(file, NULL, NULL);
258 }
259
uid_remove_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)260 static ssize_t uid_remove_write(struct file *file,
261 const char __user *buffer, size_t count, loff_t *ppos)
262 {
263 char uids[128];
264 char *start_uid, *end_uid = NULL;
265 long int uid_start = 0, uid_end = 0;
266
267 if (count >= sizeof(uids))
268 count = sizeof(uids) - 1;
269
270 if (copy_from_user(uids, buffer, count))
271 return -EFAULT;
272
273 uids[count] = '\0';
274 end_uid = uids;
275 start_uid = strsep(&end_uid, "-");
276
277 if (!start_uid || !end_uid)
278 return -EINVAL;
279
280 if (kstrtol(start_uid, 10, &uid_start) != 0 ||
281 kstrtol(end_uid, 10, &uid_end) != 0) {
282 return -EINVAL;
283 }
284
285 for (; uid_start <= uid_end; uid_start++) {
286 struct uid_entry *uid_entry;
287 struct hlist_node *tmp;
288 u32 bkt = uid_to_bkt((uid_t)uid_start);
289
290 lock_uid(uid_start);
291 for_each_uid_entry_safe(uid_entry, tmp, bkt) {
292 if (uid_start == uid_entry->uid) {
293 hash_del(&uid_entry->hash);
294 kfree(uid_entry);
295 }
296 }
297 unlock_uid(uid_start);
298 }
299
300 return count;
301 }
302
303 static const struct proc_ops uid_remove_fops = {
304 .proc_open = uid_remove_open,
305 .proc_release = single_release,
306 .proc_write = uid_remove_write,
307 };
308
__add_uid_io_stats(struct uid_entry * uid_entry,struct task_io_accounting * ioac,int slot)309 static void __add_uid_io_stats(struct uid_entry *uid_entry,
310 struct task_io_accounting *ioac, int slot)
311 {
312 struct io_stats *io_slot = &uid_entry->io[slot];
313
314 io_slot->read_bytes += ioac->read_bytes;
315 io_slot->write_bytes += compute_write_bytes(ioac);
316 io_slot->rchar += ioac->rchar;
317 io_slot->wchar += ioac->wchar;
318 io_slot->fsync += ioac->syscfs;
319 }
320
add_uid_io_stats(struct uid_entry * uid_entry,struct task_struct * task,int slot)321 static void add_uid_io_stats(struct uid_entry *uid_entry,
322 struct task_struct *task, int slot)
323 {
324 struct task_entry *task_entry __maybe_unused;
325
326 /* avoid double accounting of dying threads */
327 if (slot != UID_STATE_DEAD_TASKS && (task->flags & PF_EXITING))
328 return;
329
330 __add_uid_io_stats(uid_entry, &task->ioac, slot);
331 }
332
update_io_stats_uid(struct uid_entry * uid_entry)333 static void update_io_stats_uid(struct uid_entry *uid_entry)
334 {
335 struct user_namespace *user_ns = current_user_ns();
336 struct task_struct *p, *t;
337 struct io_stats io;
338
339 memset(&io, 0, sizeof(struct io_stats));
340
341 rcu_read_lock();
342 for_each_process(p) {
343 uid_t uid = from_kuid_munged(user_ns, task_uid(p));
344
345 if (uid != uid_entry->uid)
346 continue;
347
348 for_each_thread(p, t) {
349 /* avoid double accounting of dying threads */
350 if (!(t->flags & PF_EXITING)) {
351 io.read_bytes += t->ioac.read_bytes;
352 io.write_bytes += compute_write_bytes(&t->ioac);
353 io.rchar += t->ioac.rchar;
354 io.wchar += t->ioac.wchar;
355 io.fsync += t->ioac.syscfs;
356 }
357 }
358 }
359 rcu_read_unlock();
360
361 compute_io_bucket_stats(&uid_entry->io[uid_entry->state], &io,
362 &uid_entry->io[UID_STATE_TOTAL_LAST],
363 &uid_entry->io[UID_STATE_DEAD_TASKS]);
364 }
365
uid_io_show(struct seq_file * m,void * v)366 static int uid_io_show(struct seq_file *m, void *v)
367 {
368
369 struct uid_entry *uid_entry = NULL;
370 u32 bkt;
371
372 for_each_bkt(bkt) {
373 lock_uid_by_bkt(bkt);
374 for_each_uid_entry(uid_entry, bkt) {
375
376 update_io_stats_uid(uid_entry);
377
378 seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
379 uid_entry->uid,
380 uid_entry->io[UID_STATE_FOREGROUND].rchar,
381 uid_entry->io[UID_STATE_FOREGROUND].wchar,
382 uid_entry->io[UID_STATE_FOREGROUND].read_bytes,
383 uid_entry->io[UID_STATE_FOREGROUND].write_bytes,
384 uid_entry->io[UID_STATE_BACKGROUND].rchar,
385 uid_entry->io[UID_STATE_BACKGROUND].wchar,
386 uid_entry->io[UID_STATE_BACKGROUND].read_bytes,
387 uid_entry->io[UID_STATE_BACKGROUND].write_bytes,
388 uid_entry->io[UID_STATE_FOREGROUND].fsync,
389 uid_entry->io[UID_STATE_BACKGROUND].fsync);
390 }
391 unlock_uid_by_bkt(bkt);
392 }
393
394 return 0;
395 }
396
uid_io_open(struct inode * inode,struct file * file)397 static int uid_io_open(struct inode *inode, struct file *file)
398 {
399 return single_open(file, uid_io_show, pde_data(inode));
400 }
401
402 static const struct proc_ops uid_io_fops = {
403 .proc_open = uid_io_open,
404 .proc_read = seq_read,
405 .proc_lseek = seq_lseek,
406 .proc_release = single_release,
407 };
408
uid_procstat_open(struct inode * inode,struct file * file)409 static int uid_procstat_open(struct inode *inode, struct file *file)
410 {
411 return single_open(file, NULL, NULL);
412 }
413
uid_procstat_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)414 static ssize_t uid_procstat_write(struct file *file,
415 const char __user *buffer, size_t count, loff_t *ppos)
416 {
417 struct uid_entry *uid_entry;
418 uid_t uid;
419 int argc, state;
420 char input[128];
421
422 if (count >= sizeof(input))
423 return -EINVAL;
424
425 if (copy_from_user(input, buffer, count))
426 return -EFAULT;
427
428 input[count] = '\0';
429
430 argc = sscanf(input, "%u %d", &uid, &state);
431 if (argc != 2)
432 return -EINVAL;
433
434 if (state != UID_STATE_BACKGROUND && state != UID_STATE_FOREGROUND)
435 return -EINVAL;
436
437 lock_uid(uid);
438 uid_entry = find_or_register_uid(uid);
439 if (!uid_entry) {
440 unlock_uid(uid);
441 return -EINVAL;
442 }
443
444 if (uid_entry->state == state) {
445 unlock_uid(uid);
446 return count;
447 }
448
449 update_io_stats_uid(uid_entry);
450 uid_entry->state = state;
451 unlock_uid(uid);
452
453 return count;
454 }
455
456 static const struct proc_ops uid_procstat_fops = {
457 .proc_open = uid_procstat_open,
458 .proc_release = single_release,
459 .proc_write = uid_procstat_write,
460 };
461
462 struct update_stats_work {
463 uid_t uid;
464 struct task_io_accounting ioac;
465 u64 utime;
466 u64 stime;
467 struct llist_node node;
468 };
469
470 static LLIST_HEAD(work_usw);
471
update_stats_workfn(struct work_struct * work)472 static void update_stats_workfn(struct work_struct *work)
473 {
474 struct update_stats_work *usw, *t;
475 struct uid_entry *uid_entry;
476 struct task_entry *task_entry __maybe_unused;
477 struct llist_node *node;
478
479 node = llist_del_all(&work_usw);
480 llist_for_each_entry_safe(usw, t, node, node) {
481 lock_uid(usw->uid);
482 uid_entry = find_uid_entry(usw->uid);
483 if (!uid_entry)
484 goto next;
485
486 uid_entry->utime += usw->utime;
487 uid_entry->stime += usw->stime;
488
489 __add_uid_io_stats(uid_entry, &usw->ioac, UID_STATE_DEAD_TASKS);
490 next:
491 unlock_uid(usw->uid);
492 kfree(usw);
493 }
494
495 }
496 static DECLARE_WORK(update_stats_work, update_stats_workfn);
497
process_notifier(struct notifier_block * self,unsigned long cmd,void * v)498 static int process_notifier(struct notifier_block *self,
499 unsigned long cmd, void *v)
500 {
501 struct task_struct *task = v;
502 struct uid_entry *uid_entry;
503 u64 utime, stime;
504 uid_t uid;
505
506 if (!task)
507 return NOTIFY_OK;
508
509 uid = from_kuid_munged(current_user_ns(), task_uid(task));
510 if (!trylock_uid(uid)) {
511 struct update_stats_work *usw;
512
513 usw = kmalloc(sizeof(struct update_stats_work), GFP_KERNEL);
514 if (usw) {
515 usw->uid = uid;
516 /*
517 * Copy task->ioac since task might be destroyed before
518 * the work is later performed.
519 */
520 usw->ioac = task->ioac;
521 task_cputime_adjusted(task, &usw->utime, &usw->stime);
522 llist_add(&usw->node, &work_usw);
523 schedule_work(&update_stats_work);
524 }
525 return NOTIFY_OK;
526 }
527
528 uid_entry = find_or_register_uid(uid);
529 if (!uid_entry) {
530 pr_err("%s: failed to find uid %d\n", __func__, uid);
531 goto exit;
532 }
533
534 task_cputime_adjusted(task, &utime, &stime);
535 uid_entry->utime += utime;
536 uid_entry->stime += stime;
537
538 add_uid_io_stats(uid_entry, task, UID_STATE_DEAD_TASKS);
539
540 exit:
541 unlock_uid(uid);
542 return NOTIFY_OK;
543 }
544
545 static struct notifier_block process_notifier_block = {
546 .notifier_call = process_notifier,
547 };
548
proc_uid_sys_stats_init(void)549 static int __init proc_uid_sys_stats_init(void)
550 {
551 init_hash_table_and_lock();
552
553 cpu_parent = proc_mkdir("uid_cputime", NULL);
554 if (!cpu_parent) {
555 pr_err("%s: failed to create uid_cputime proc entry\n",
556 __func__);
557 goto err;
558 }
559
560 proc_create_data("remove_uid_range", 0222, cpu_parent,
561 &uid_remove_fops, NULL);
562 proc_create_data("show_uid_stat", 0444, cpu_parent,
563 &uid_cputime_fops, NULL);
564
565 io_parent = proc_mkdir("uid_io", NULL);
566 if (!io_parent) {
567 pr_err("%s: failed to create uid_io proc entry\n",
568 __func__);
569 goto err;
570 }
571
572 proc_create_data("stats", 0444, io_parent,
573 &uid_io_fops, NULL);
574
575 proc_parent = proc_mkdir("uid_procstat", NULL);
576 if (!proc_parent) {
577 pr_err("%s: failed to create uid_procstat proc entry\n",
578 __func__);
579 goto err;
580 }
581
582 proc_create_data("set", 0222, proc_parent,
583 &uid_procstat_fops, NULL);
584
585 profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block);
586
587 return 0;
588
589 err:
590 remove_proc_subtree("uid_cputime", NULL);
591 remove_proc_subtree("uid_io", NULL);
592 remove_proc_subtree("uid_procstat", NULL);
593 return -ENOMEM;
594 }
595
596 early_initcall(proc_uid_sys_stats_init);
597