• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* drivers/misc/uid_sys_stats.c
2  *
3  * Copyright (C) 2014 - 2015 Google, Inc.
4  *
5  * This software is licensed under the terms of the GNU General Public
6  * License version 2, as published by the Free Software Foundation, and
7  * may be copied, distributed, and modified under those terms.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  */
15 
16 #include <linux/atomic.h>
17 #include <linux/err.h>
18 #include <linux/hashtable.h>
19 #include <linux/init.h>
20 #include <linux/kernel.h>
21 #include <linux/list.h>
22 #include <linux/llist.h>
23 #include <linux/mm.h>
24 #include <linux/proc_fs.h>
25 #include <linux/profile.h>
26 #include <linux/sched/cputime.h>
27 #include <linux/seq_file.h>
28 #include <linux/slab.h>
29 #include <linux/uaccess.h>
30 #include <linux/spinlock_types.h>
31 
32 #define UID_HASH_BITS	10
33 #define UID_HASH_NUMS	(1 << UID_HASH_BITS)
34 DECLARE_HASHTABLE(hash_table, UID_HASH_BITS);
35 /* uid_lock[bkt] ensure consistency of hash_table[bkt] */
36 spinlock_t uid_lock[UID_HASH_NUMS];
37 
38 #define for_each_bkt(bkt) \
39 	for (bkt = 0; bkt < HASH_SIZE(hash_table); bkt++)
40 
41 /* iterate over all uid_entrys hashing to the same bkt */
42 #define for_each_uid_entry(uid_entry, bkt) \
43 	hlist_for_each_entry(uid_entry, &hash_table[bkt], hash)
44 
45 #define for_each_uid_entry_safe(uid_entry, tmp, bkt) \
46 	hlist_for_each_entry_safe(uid_entry, tmp,\
47 			&hash_table[bkt], hash)
48 
49 static struct proc_dir_entry *cpu_parent;
50 static struct proc_dir_entry *io_parent;
51 static struct proc_dir_entry *proc_parent;
52 
53 struct io_stats {
54 	u64 read_bytes;
55 	u64 write_bytes;
56 	u64 rchar;
57 	u64 wchar;
58 	u64 fsync;
59 };
60 
61 #define UID_STATE_FOREGROUND	0
62 #define UID_STATE_BACKGROUND	1
63 #define UID_STATE_TOTAL_LAST	2
64 #define UID_STATE_DEAD_TASKS	3
65 #define UID_STATE_SIZE		4
66 
67 #define MAX_TASK_COMM_LEN 256
68 
69 struct task_entry {
70 	char comm[MAX_TASK_COMM_LEN];
71 	pid_t pid;
72 	struct io_stats io[UID_STATE_SIZE];
73 	struct hlist_node hash;
74 };
75 
76 struct uid_entry {
77 	uid_t uid;
78 	u64 utime;
79 	u64 stime;
80 	int state;
81 	struct io_stats io[UID_STATE_SIZE];
82 	struct hlist_node hash;
83 };
84 
init_hash_table_and_lock(void)85 static void init_hash_table_and_lock(void)
86 {
87 	int i;
88 
89 	hash_init(hash_table);
90 	for (i = 0; i < UID_HASH_NUMS; i++)
91 		spin_lock_init(&uid_lock[i]);
92 }
93 
uid_to_bkt(uid_t uid)94 static inline int uid_to_bkt(uid_t uid)
95 {
96 	return hash_min(uid, HASH_BITS(hash_table));
97 }
98 
trylock_uid(uid_t uid)99 static inline int trylock_uid(uid_t uid)
100 {
101 	return spin_trylock(&uid_lock[uid_to_bkt(uid)]);
102 }
103 
lock_uid(uid_t uid)104 static inline void lock_uid(uid_t uid)
105 {
106 	spin_lock(&uid_lock[uid_to_bkt(uid)]);
107 }
108 
unlock_uid(uid_t uid)109 static inline void unlock_uid(uid_t uid)
110 {
111 	spin_unlock(&uid_lock[uid_to_bkt(uid)]);
112 }
113 
lock_uid_by_bkt(u32 bkt)114 static inline void lock_uid_by_bkt(u32 bkt)
115 {
116 	spin_lock(&uid_lock[bkt]);
117 }
118 
unlock_uid_by_bkt(u32 bkt)119 static inline void unlock_uid_by_bkt(u32 bkt)
120 {
121 	spin_unlock(&uid_lock[bkt]);
122 }
123 
compute_write_bytes(struct task_io_accounting * ioac)124 static u64 compute_write_bytes(struct task_io_accounting *ioac)
125 {
126 	if (ioac->write_bytes <= ioac->cancelled_write_bytes)
127 		return 0;
128 
129 	return ioac->write_bytes - ioac->cancelled_write_bytes;
130 }
131 
compute_io_bucket_stats(struct io_stats * io_bucket,struct io_stats * io_curr,struct io_stats * io_last,struct io_stats * io_dead)132 static void compute_io_bucket_stats(struct io_stats *io_bucket,
133 					struct io_stats *io_curr,
134 					struct io_stats *io_last,
135 					struct io_stats *io_dead)
136 {
137 	/* tasks could switch to another uid group, but its io_last in the
138 	 * previous uid group could still be positive.
139 	 * therefore before each update, do an overflow check first
140 	 */
141 	int64_t delta;
142 
143 	delta = io_curr->read_bytes + io_dead->read_bytes -
144 		io_last->read_bytes;
145 	io_bucket->read_bytes += delta > 0 ? delta : 0;
146 	delta = io_curr->write_bytes + io_dead->write_bytes -
147 		io_last->write_bytes;
148 	io_bucket->write_bytes += delta > 0 ? delta : 0;
149 	delta = io_curr->rchar + io_dead->rchar - io_last->rchar;
150 	io_bucket->rchar += delta > 0 ? delta : 0;
151 	delta = io_curr->wchar + io_dead->wchar - io_last->wchar;
152 	io_bucket->wchar += delta > 0 ? delta : 0;
153 	delta = io_curr->fsync + io_dead->fsync - io_last->fsync;
154 	io_bucket->fsync += delta > 0 ? delta : 0;
155 
156 	io_last->read_bytes = io_curr->read_bytes;
157 	io_last->write_bytes = io_curr->write_bytes;
158 	io_last->rchar = io_curr->rchar;
159 	io_last->wchar = io_curr->wchar;
160 	io_last->fsync = io_curr->fsync;
161 
162 	memset(io_dead, 0, sizeof(struct io_stats));
163 }
164 
find_uid_entry(uid_t uid)165 static struct uid_entry *find_uid_entry(uid_t uid)
166 {
167 	struct uid_entry *uid_entry;
168 	u32 bkt = uid_to_bkt(uid);
169 
170 	for_each_uid_entry(uid_entry, bkt) {
171 		if (uid_entry->uid == uid)
172 			return uid_entry;
173 	}
174 	return NULL;
175 }
176 
find_or_register_uid(uid_t uid)177 static struct uid_entry *find_or_register_uid(uid_t uid)
178 {
179 	struct uid_entry *uid_entry;
180 
181 	uid_entry = find_uid_entry(uid);
182 	if (uid_entry)
183 		return uid_entry;
184 
185 	uid_entry = kzalloc(sizeof(struct uid_entry), GFP_ATOMIC);
186 	if (!uid_entry)
187 		return NULL;
188 
189 	uid_entry->uid = uid;
190 	hash_add(hash_table, &uid_entry->hash, uid);
191 
192 	return uid_entry;
193 }
194 
calc_uid_cputime(struct uid_entry * uid_entry,u64 * total_utime,u64 * total_stime)195 static void calc_uid_cputime(struct uid_entry *uid_entry,
196 			u64 *total_utime, u64 *total_stime)
197 {
198 	struct user_namespace *user_ns = current_user_ns();
199 	struct task_struct *p, *t;
200 	u64 utime, stime;
201 	uid_t uid;
202 
203 	rcu_read_lock();
204 	for_each_process(p) {
205 		uid = from_kuid_munged(user_ns, task_uid(p));
206 
207 		if (uid != uid_entry->uid)
208 			continue;
209 
210 		for_each_thread(p, t) {
211 			/* avoid double accounting of dying threads */
212 			if (!(t->flags & PF_EXITING)) {
213 				task_cputime_adjusted(t, &utime, &stime);
214 				*total_utime += utime;
215 				*total_stime += stime;
216 			}
217 		}
218 	}
219 	rcu_read_unlock();
220 }
221 
uid_cputime_show(struct seq_file * m,void * v)222 static int uid_cputime_show(struct seq_file *m, void *v)
223 {
224 	struct uid_entry *uid_entry = NULL;
225 	u32 bkt;
226 
227 	for_each_bkt(bkt) {
228 		lock_uid_by_bkt(bkt);
229 		for_each_uid_entry(uid_entry, bkt) {
230 			u64 total_utime = uid_entry->utime;
231 			u64 total_stime = uid_entry->stime;
232 
233 			calc_uid_cputime(uid_entry, &total_utime, &total_stime);
234 			seq_printf(m, "%d: %llu %llu\n", uid_entry->uid,
235 				ktime_to_us(total_utime), ktime_to_us(total_stime));
236 		}
237 		unlock_uid_by_bkt(bkt);
238 	}
239 
240 	return 0;
241 }
242 
uid_cputime_open(struct inode * inode,struct file * file)243 static int uid_cputime_open(struct inode *inode, struct file *file)
244 {
245 	return single_open(file, uid_cputime_show, pde_data(inode));
246 }
247 
248 static const struct proc_ops uid_cputime_fops = {
249 	.proc_open	= uid_cputime_open,
250 	.proc_read	= seq_read,
251 	.proc_lseek	= seq_lseek,
252 	.proc_release	= single_release,
253 };
254 
uid_remove_open(struct inode * inode,struct file * file)255 static int uid_remove_open(struct inode *inode, struct file *file)
256 {
257 	return single_open(file, NULL, NULL);
258 }
259 
uid_remove_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)260 static ssize_t uid_remove_write(struct file *file,
261 			const char __user *buffer, size_t count, loff_t *ppos)
262 {
263 	char uids[128];
264 	char *start_uid, *end_uid = NULL;
265 	long int uid_start = 0, uid_end = 0;
266 
267 	if (count >= sizeof(uids))
268 		count = sizeof(uids) - 1;
269 
270 	if (copy_from_user(uids, buffer, count))
271 		return -EFAULT;
272 
273 	uids[count] = '\0';
274 	end_uid = uids;
275 	start_uid = strsep(&end_uid, "-");
276 
277 	if (!start_uid || !end_uid)
278 		return -EINVAL;
279 
280 	if (kstrtol(start_uid, 10, &uid_start) != 0 ||
281 		kstrtol(end_uid, 10, &uid_end) != 0) {
282 		return -EINVAL;
283 	}
284 
285 	for (; uid_start <= uid_end; uid_start++) {
286 		struct uid_entry *uid_entry;
287 		struct hlist_node *tmp;
288 		u32 bkt = uid_to_bkt((uid_t)uid_start);
289 
290 		lock_uid(uid_start);
291 		for_each_uid_entry_safe(uid_entry, tmp, bkt) {
292 			if (uid_start == uid_entry->uid) {
293 				hash_del(&uid_entry->hash);
294 				kfree(uid_entry);
295 			}
296 		}
297 		unlock_uid(uid_start);
298 	}
299 
300 	return count;
301 }
302 
303 static const struct proc_ops uid_remove_fops = {
304 	.proc_open	= uid_remove_open,
305 	.proc_release	= single_release,
306 	.proc_write	= uid_remove_write,
307 };
308 
__add_uid_io_stats(struct uid_entry * uid_entry,struct task_io_accounting * ioac,int slot)309 static void __add_uid_io_stats(struct uid_entry *uid_entry,
310 			struct task_io_accounting *ioac, int slot)
311 {
312 	struct io_stats *io_slot = &uid_entry->io[slot];
313 
314 	io_slot->read_bytes += ioac->read_bytes;
315 	io_slot->write_bytes += compute_write_bytes(ioac);
316 	io_slot->rchar += ioac->rchar;
317 	io_slot->wchar += ioac->wchar;
318 	io_slot->fsync += ioac->syscfs;
319 }
320 
add_uid_io_stats(struct uid_entry * uid_entry,struct task_struct * task,int slot)321 static void add_uid_io_stats(struct uid_entry *uid_entry,
322 			struct task_struct *task, int slot)
323 {
324 	struct task_entry *task_entry __maybe_unused;
325 
326 	/* avoid double accounting of dying threads */
327 	if (slot != UID_STATE_DEAD_TASKS && (task->flags & PF_EXITING))
328 		return;
329 
330 	__add_uid_io_stats(uid_entry, &task->ioac, slot);
331 }
332 
update_io_stats_uid(struct uid_entry * uid_entry)333 static void update_io_stats_uid(struct uid_entry *uid_entry)
334 {
335 	struct user_namespace *user_ns = current_user_ns();
336 	struct task_struct *p, *t;
337 	struct io_stats io;
338 
339 	memset(&io, 0, sizeof(struct io_stats));
340 
341 	rcu_read_lock();
342 	for_each_process(p) {
343 		uid_t uid = from_kuid_munged(user_ns, task_uid(p));
344 
345 		if (uid != uid_entry->uid)
346 			continue;
347 
348 		for_each_thread(p, t) {
349 			/* avoid double accounting of dying threads */
350 			if (!(t->flags & PF_EXITING)) {
351 				io.read_bytes += t->ioac.read_bytes;
352 				io.write_bytes += compute_write_bytes(&t->ioac);
353 				io.rchar += t->ioac.rchar;
354 				io.wchar += t->ioac.wchar;
355 				io.fsync += t->ioac.syscfs;
356 			}
357 		}
358 	}
359 	rcu_read_unlock();
360 
361 	compute_io_bucket_stats(&uid_entry->io[uid_entry->state], &io,
362 					&uid_entry->io[UID_STATE_TOTAL_LAST],
363 					&uid_entry->io[UID_STATE_DEAD_TASKS]);
364 }
365 
uid_io_show(struct seq_file * m,void * v)366 static int uid_io_show(struct seq_file *m, void *v)
367 {
368 
369 	struct uid_entry *uid_entry = NULL;
370 	u32 bkt;
371 
372 	for_each_bkt(bkt) {
373 		lock_uid_by_bkt(bkt);
374 		for_each_uid_entry(uid_entry, bkt) {
375 
376 			update_io_stats_uid(uid_entry);
377 
378 			seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
379 				uid_entry->uid,
380 				uid_entry->io[UID_STATE_FOREGROUND].rchar,
381 				uid_entry->io[UID_STATE_FOREGROUND].wchar,
382 				uid_entry->io[UID_STATE_FOREGROUND].read_bytes,
383 				uid_entry->io[UID_STATE_FOREGROUND].write_bytes,
384 				uid_entry->io[UID_STATE_BACKGROUND].rchar,
385 				uid_entry->io[UID_STATE_BACKGROUND].wchar,
386 				uid_entry->io[UID_STATE_BACKGROUND].read_bytes,
387 				uid_entry->io[UID_STATE_BACKGROUND].write_bytes,
388 				uid_entry->io[UID_STATE_FOREGROUND].fsync,
389 				uid_entry->io[UID_STATE_BACKGROUND].fsync);
390 		}
391 		unlock_uid_by_bkt(bkt);
392 	}
393 
394 	return 0;
395 }
396 
uid_io_open(struct inode * inode,struct file * file)397 static int uid_io_open(struct inode *inode, struct file *file)
398 {
399 	return single_open(file, uid_io_show, pde_data(inode));
400 }
401 
402 static const struct proc_ops uid_io_fops = {
403 	.proc_open	= uid_io_open,
404 	.proc_read	= seq_read,
405 	.proc_lseek	= seq_lseek,
406 	.proc_release	= single_release,
407 };
408 
uid_procstat_open(struct inode * inode,struct file * file)409 static int uid_procstat_open(struct inode *inode, struct file *file)
410 {
411 	return single_open(file, NULL, NULL);
412 }
413 
uid_procstat_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)414 static ssize_t uid_procstat_write(struct file *file,
415 			const char __user *buffer, size_t count, loff_t *ppos)
416 {
417 	struct uid_entry *uid_entry;
418 	uid_t uid;
419 	int argc, state;
420 	char input[128];
421 
422 	if (count >= sizeof(input))
423 		return -EINVAL;
424 
425 	if (copy_from_user(input, buffer, count))
426 		return -EFAULT;
427 
428 	input[count] = '\0';
429 
430 	argc = sscanf(input, "%u %d", &uid, &state);
431 	if (argc != 2)
432 		return -EINVAL;
433 
434 	if (state != UID_STATE_BACKGROUND && state != UID_STATE_FOREGROUND)
435 		return -EINVAL;
436 
437 	lock_uid(uid);
438 	uid_entry = find_or_register_uid(uid);
439 	if (!uid_entry) {
440 		unlock_uid(uid);
441 		return -EINVAL;
442 	}
443 
444 	if (uid_entry->state == state) {
445 		unlock_uid(uid);
446 		return count;
447 	}
448 
449 	update_io_stats_uid(uid_entry);
450 	uid_entry->state = state;
451 	unlock_uid(uid);
452 
453 	return count;
454 }
455 
456 static const struct proc_ops uid_procstat_fops = {
457 	.proc_open	= uid_procstat_open,
458 	.proc_release	= single_release,
459 	.proc_write	= uid_procstat_write,
460 };
461 
462 struct update_stats_work {
463 	uid_t uid;
464 	struct task_io_accounting ioac;
465 	u64 utime;
466 	u64 stime;
467 	struct llist_node node;
468 };
469 
470 static LLIST_HEAD(work_usw);
471 
update_stats_workfn(struct work_struct * work)472 static void update_stats_workfn(struct work_struct *work)
473 {
474 	struct update_stats_work *usw, *t;
475 	struct uid_entry *uid_entry;
476 	struct task_entry *task_entry __maybe_unused;
477 	struct llist_node *node;
478 
479 	node = llist_del_all(&work_usw);
480 	llist_for_each_entry_safe(usw, t, node, node) {
481 		lock_uid(usw->uid);
482 		uid_entry = find_uid_entry(usw->uid);
483 		if (!uid_entry)
484 			goto next;
485 
486 		uid_entry->utime += usw->utime;
487 		uid_entry->stime += usw->stime;
488 
489 		__add_uid_io_stats(uid_entry, &usw->ioac, UID_STATE_DEAD_TASKS);
490 next:
491 		unlock_uid(usw->uid);
492 		kfree(usw);
493 	}
494 
495 }
496 static DECLARE_WORK(update_stats_work, update_stats_workfn);
497 
process_notifier(struct notifier_block * self,unsigned long cmd,void * v)498 static int process_notifier(struct notifier_block *self,
499 			unsigned long cmd, void *v)
500 {
501 	struct task_struct *task = v;
502 	struct uid_entry *uid_entry;
503 	u64 utime, stime;
504 	uid_t uid;
505 
506 	if (!task)
507 		return NOTIFY_OK;
508 
509 	uid = from_kuid_munged(current_user_ns(), task_uid(task));
510 	if (!trylock_uid(uid)) {
511 		struct update_stats_work *usw;
512 
513 		usw = kmalloc(sizeof(struct update_stats_work), GFP_KERNEL);
514 		if (usw) {
515 			usw->uid = uid;
516 			/*
517 			 * Copy task->ioac since task might be destroyed before
518 			 * the work is later performed.
519 			 */
520 			usw->ioac = task->ioac;
521 			task_cputime_adjusted(task, &usw->utime, &usw->stime);
522 			llist_add(&usw->node, &work_usw);
523 			schedule_work(&update_stats_work);
524 		}
525 		return NOTIFY_OK;
526 	}
527 
528 	uid_entry = find_or_register_uid(uid);
529 	if (!uid_entry) {
530 		pr_err("%s: failed to find uid %d\n", __func__, uid);
531 		goto exit;
532 	}
533 
534 	task_cputime_adjusted(task, &utime, &stime);
535 	uid_entry->utime += utime;
536 	uid_entry->stime += stime;
537 
538 	add_uid_io_stats(uid_entry, task, UID_STATE_DEAD_TASKS);
539 
540 exit:
541 	unlock_uid(uid);
542 	return NOTIFY_OK;
543 }
544 
545 static struct notifier_block process_notifier_block = {
546 	.notifier_call	= process_notifier,
547 };
548 
proc_uid_sys_stats_init(void)549 static int __init proc_uid_sys_stats_init(void)
550 {
551 	init_hash_table_and_lock();
552 
553 	cpu_parent = proc_mkdir("uid_cputime", NULL);
554 	if (!cpu_parent) {
555 		pr_err("%s: failed to create uid_cputime proc entry\n",
556 			__func__);
557 		goto err;
558 	}
559 
560 	proc_create_data("remove_uid_range", 0222, cpu_parent,
561 		&uid_remove_fops, NULL);
562 	proc_create_data("show_uid_stat", 0444, cpu_parent,
563 		&uid_cputime_fops, NULL);
564 
565 	io_parent = proc_mkdir("uid_io", NULL);
566 	if (!io_parent) {
567 		pr_err("%s: failed to create uid_io proc entry\n",
568 			__func__);
569 		goto err;
570 	}
571 
572 	proc_create_data("stats", 0444, io_parent,
573 		&uid_io_fops, NULL);
574 
575 	proc_parent = proc_mkdir("uid_procstat", NULL);
576 	if (!proc_parent) {
577 		pr_err("%s: failed to create uid_procstat proc entry\n",
578 			__func__);
579 		goto err;
580 	}
581 
582 	proc_create_data("set", 0222, proc_parent,
583 		&uid_procstat_fops, NULL);
584 
585 	profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block);
586 
587 	return 0;
588 
589 err:
590 	remove_proc_subtree("uid_cputime", NULL);
591 	remove_proc_subtree("uid_io", NULL);
592 	remove_proc_subtree("uid_procstat", NULL);
593 	return -ENOMEM;
594 }
595 
596 early_initcall(proc_uid_sys_stats_init);
597