• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* drivers/misc/uid_sys_stats.c
2  *
3  * Copyright (C) 2014 - 2015 Google, Inc.
4  *
5  * This software is licensed under the terms of the GNU General Public
6  * License version 2, as published by the Free Software Foundation, and
7  * may be copied, distributed, and modified under those terms.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  */
15 
16 #include <linux/atomic.h>
17 #include <linux/err.h>
18 #include <linux/hashtable.h>
19 #include <linux/init.h>
20 #include <linux/kernel.h>
21 #include <linux/list.h>
22 #include <linux/llist.h>
23 #include <linux/mm.h>
24 #include <linux/proc_fs.h>
25 #include <linux/profile.h>
26 #include <linux/sched/cputime.h>
27 #include <linux/seq_file.h>
28 #include <linux/slab.h>
29 #include <linux/uaccess.h>
30 #include <linux/spinlock_types.h>
31 
32 #define UID_HASH_BITS	10
33 #define UID_HASH_NUMS	(1 << UID_HASH_BITS)
34 DECLARE_HASHTABLE(hash_table, UID_HASH_BITS);
35 /*
36  * uid_lock[bkt] ensure consistency of hash_table[bkt]
37  */
38 spinlock_t uid_lock[UID_HASH_NUMS];
39 
40 static struct proc_dir_entry *cpu_parent;
41 static struct proc_dir_entry *io_parent;
42 static struct proc_dir_entry *proc_parent;
43 
44 struct io_stats {
45 	u64 read_bytes;
46 	u64 write_bytes;
47 	u64 rchar;
48 	u64 wchar;
49 	u64 fsync;
50 };
51 
52 #define UID_STATE_FOREGROUND	0
53 #define UID_STATE_BACKGROUND	1
54 #define UID_STATE_TOTAL_LAST	2
55 #define UID_STATE_DEAD_TASKS	3
56 #define UID_STATE_SIZE		4
57 
58 #define MAX_TASK_COMM_LEN 256
59 
60 struct task_entry {
61 	char comm[MAX_TASK_COMM_LEN];
62 	pid_t pid;
63 	struct io_stats io[UID_STATE_SIZE];
64 	struct hlist_node hash;
65 };
66 
67 struct uid_entry {
68 	uid_t uid;
69 	u64 utime;
70 	u64 stime;
71 	int state;
72 	struct io_stats io[UID_STATE_SIZE];
73 	struct hlist_node hash;
74 };
75 
trylock_uid(uid_t uid)76 static inline int trylock_uid(uid_t uid)
77 {
78 	return spin_trylock(
79 		&uid_lock[hash_min(uid, HASH_BITS(hash_table))]);
80 }
81 
lock_uid(uid_t uid)82 static inline void lock_uid(uid_t uid)
83 {
84 	spin_lock(&uid_lock[hash_min(uid, HASH_BITS(hash_table))]);
85 }
86 
unlock_uid(uid_t uid)87 static inline void unlock_uid(uid_t uid)
88 {
89 	spin_unlock(&uid_lock[hash_min(uid, HASH_BITS(hash_table))]);
90 }
91 
lock_uid_by_bkt(u32 bkt)92 static inline void lock_uid_by_bkt(u32 bkt)
93 {
94 	spin_lock(&uid_lock[bkt]);
95 }
96 
unlock_uid_by_bkt(u32 bkt)97 static inline void unlock_uid_by_bkt(u32 bkt)
98 {
99 	spin_unlock(&uid_lock[bkt]);
100 }
101 
compute_write_bytes(struct task_io_accounting * ioac)102 static u64 compute_write_bytes(struct task_io_accounting *ioac)
103 {
104 	if (ioac->write_bytes <= ioac->cancelled_write_bytes)
105 		return 0;
106 
107 	return ioac->write_bytes - ioac->cancelled_write_bytes;
108 }
109 
compute_io_bucket_stats(struct io_stats * io_bucket,struct io_stats * io_curr,struct io_stats * io_last,struct io_stats * io_dead)110 static void compute_io_bucket_stats(struct io_stats *io_bucket,
111 					struct io_stats *io_curr,
112 					struct io_stats *io_last,
113 					struct io_stats *io_dead)
114 {
115 	/* tasks could switch to another uid group, but its io_last in the
116 	 * previous uid group could still be positive.
117 	 * therefore before each update, do an overflow check first
118 	 */
119 	int64_t delta;
120 
121 	delta = io_curr->read_bytes + io_dead->read_bytes -
122 		io_last->read_bytes;
123 	io_bucket->read_bytes += delta > 0 ? delta : 0;
124 	delta = io_curr->write_bytes + io_dead->write_bytes -
125 		io_last->write_bytes;
126 	io_bucket->write_bytes += delta > 0 ? delta : 0;
127 	delta = io_curr->rchar + io_dead->rchar - io_last->rchar;
128 	io_bucket->rchar += delta > 0 ? delta : 0;
129 	delta = io_curr->wchar + io_dead->wchar - io_last->wchar;
130 	io_bucket->wchar += delta > 0 ? delta : 0;
131 	delta = io_curr->fsync + io_dead->fsync - io_last->fsync;
132 	io_bucket->fsync += delta > 0 ? delta : 0;
133 
134 	io_last->read_bytes = io_curr->read_bytes;
135 	io_last->write_bytes = io_curr->write_bytes;
136 	io_last->rchar = io_curr->rchar;
137 	io_last->wchar = io_curr->wchar;
138 	io_last->fsync = io_curr->fsync;
139 
140 	memset(io_dead, 0, sizeof(struct io_stats));
141 }
142 
find_uid_entry(uid_t uid)143 static struct uid_entry *find_uid_entry(uid_t uid)
144 {
145 	struct uid_entry *uid_entry;
146 	hash_for_each_possible(hash_table, uid_entry, hash, uid) {
147 		if (uid_entry->uid == uid)
148 			return uid_entry;
149 	}
150 	return NULL;
151 }
152 
find_or_register_uid(uid_t uid)153 static struct uid_entry *find_or_register_uid(uid_t uid)
154 {
155 	struct uid_entry *uid_entry;
156 
157 	uid_entry = find_uid_entry(uid);
158 	if (uid_entry)
159 		return uid_entry;
160 
161 	uid_entry = kzalloc(sizeof(struct uid_entry), GFP_ATOMIC);
162 	if (!uid_entry)
163 		return NULL;
164 
165 	uid_entry->uid = uid;
166 	hash_add(hash_table, &uid_entry->hash, uid);
167 
168 	return uid_entry;
169 }
170 
calc_uid_cputime(struct uid_entry * uid_entry,u64 * total_utime,u64 * total_stime)171 static void calc_uid_cputime(struct uid_entry *uid_entry,
172 			u64 *total_utime, u64 *total_stime)
173 {
174 	struct user_namespace *user_ns = current_user_ns();
175 	struct task_struct *p, *t;
176 	u64 utime, stime;
177 	uid_t uid;
178 
179 	rcu_read_lock();
180 	for_each_process(p) {
181 		uid = from_kuid_munged(user_ns, task_uid(p));
182 
183 		if (uid != uid_entry->uid)
184 			continue;
185 
186 		for_each_thread(p, t) {
187 			/* avoid double accounting of dying threads */
188 			if (!(t->flags & PF_EXITING)) {
189 				task_cputime_adjusted(t, &utime, &stime);
190 				*total_utime += utime;
191 				*total_stime += stime;
192 			}
193 		}
194 	}
195 	rcu_read_unlock();
196 }
197 
uid_cputime_show(struct seq_file * m,void * v)198 static int uid_cputime_show(struct seq_file *m, void *v)
199 {
200 	struct uid_entry *uid_entry = NULL;
201 	u32 bkt;
202 
203 	for (bkt = 0, uid_entry = NULL; uid_entry == NULL &&
204 		bkt < HASH_SIZE(hash_table); bkt++) {
205 
206 		lock_uid_by_bkt(bkt);
207 		hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
208 			u64 total_utime = uid_entry->utime;
209 			u64 total_stime = uid_entry->stime;
210 
211 			calc_uid_cputime(uid_entry, &total_utime, &total_stime);
212 			seq_printf(m, "%d: %llu %llu\n", uid_entry->uid,
213 				ktime_to_us(total_utime), ktime_to_us(total_stime));
214 		}
215 		unlock_uid_by_bkt(bkt);
216 	}
217 
218 	return 0;
219 }
220 
uid_cputime_open(struct inode * inode,struct file * file)221 static int uid_cputime_open(struct inode *inode, struct file *file)
222 {
223 	return single_open(file, uid_cputime_show, PDE_DATA(inode));
224 }
225 
226 static const struct proc_ops uid_cputime_fops = {
227 	.proc_open	= uid_cputime_open,
228 	.proc_read	= seq_read,
229 	.proc_lseek	= seq_lseek,
230 	.proc_release	= single_release,
231 };
232 
uid_remove_open(struct inode * inode,struct file * file)233 static int uid_remove_open(struct inode *inode, struct file *file)
234 {
235 	return single_open(file, NULL, NULL);
236 }
237 
uid_remove_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)238 static ssize_t uid_remove_write(struct file *file,
239 			const char __user *buffer, size_t count, loff_t *ppos)
240 {
241 	struct uid_entry *uid_entry;
242 	struct hlist_node *tmp;
243 	char uids[128];
244 	char *start_uid, *end_uid = NULL;
245 	long int uid_start = 0, uid_end = 0;
246 
247 	if (count >= sizeof(uids))
248 		count = sizeof(uids) - 1;
249 
250 	if (copy_from_user(uids, buffer, count))
251 		return -EFAULT;
252 
253 	uids[count] = '\0';
254 	end_uid = uids;
255 	start_uid = strsep(&end_uid, "-");
256 
257 	if (!start_uid || !end_uid)
258 		return -EINVAL;
259 
260 	if (kstrtol(start_uid, 10, &uid_start) != 0 ||
261 		kstrtol(end_uid, 10, &uid_end) != 0) {
262 		return -EINVAL;
263 	}
264 
265 	for (; uid_start <= uid_end; uid_start++) {
266 		lock_uid(uid_start);
267 		hash_for_each_possible_safe(hash_table, uid_entry, tmp,
268 							hash, (uid_t)uid_start) {
269 			if (uid_start == uid_entry->uid) {
270 				hash_del(&uid_entry->hash);
271 				kfree(uid_entry);
272 			}
273 		}
274 		unlock_uid(uid_start);
275 	}
276 
277 	return count;
278 }
279 
280 static const struct proc_ops uid_remove_fops = {
281 	.proc_open	= uid_remove_open,
282 	.proc_release	= single_release,
283 	.proc_write	= uid_remove_write,
284 };
285 
__add_uid_io_stats(struct uid_entry * uid_entry,struct task_io_accounting * ioac,int slot)286 static void __add_uid_io_stats(struct uid_entry *uid_entry,
287 			struct task_io_accounting *ioac, int slot)
288 {
289 	struct io_stats *io_slot = &uid_entry->io[slot];
290 
291 	io_slot->read_bytes += ioac->read_bytes;
292 	io_slot->write_bytes += compute_write_bytes(ioac);
293 	io_slot->rchar += ioac->rchar;
294 	io_slot->wchar += ioac->wchar;
295 	io_slot->fsync += ioac->syscfs;
296 }
297 
add_uid_io_stats(struct uid_entry * uid_entry,struct task_struct * task,int slot)298 static void add_uid_io_stats(struct uid_entry *uid_entry,
299 			struct task_struct *task, int slot)
300 {
301 	struct task_entry *task_entry __maybe_unused;
302 
303 	/* avoid double accounting of dying threads */
304 	if (slot != UID_STATE_DEAD_TASKS && (task->flags & PF_EXITING))
305 		return;
306 
307 	__add_uid_io_stats(uid_entry, &task->ioac, slot);
308 }
309 
update_io_stats_uid(struct uid_entry * uid_entry)310 static void update_io_stats_uid(struct uid_entry *uid_entry)
311 {
312 	struct user_namespace *user_ns = current_user_ns();
313 	struct task_struct *p, *t;
314 	struct io_stats io;
315 
316 	memset(&io, 0, sizeof(struct io_stats));
317 
318 	rcu_read_lock();
319 	for_each_process(p) {
320 		uid_t uid = from_kuid_munged(user_ns, task_uid(p));
321 
322 		if (uid != uid_entry->uid)
323 			continue;
324 
325 		for_each_thread(p, t) {
326 			/* avoid double accounting of dying threads */
327 			if (!(t->flags & PF_EXITING)) {
328 				io.read_bytes += t->ioac.read_bytes;
329 				io.write_bytes += compute_write_bytes(&t->ioac);
330 				io.rchar += t->ioac.rchar;
331 				io.wchar += t->ioac.wchar;
332 				io.fsync += t->ioac.syscfs;
333 			}
334 		}
335 	}
336 	rcu_read_unlock();
337 
338 	compute_io_bucket_stats(&uid_entry->io[uid_entry->state], &io,
339 					&uid_entry->io[UID_STATE_TOTAL_LAST],
340 					&uid_entry->io[UID_STATE_DEAD_TASKS]);
341 }
342 
uid_io_show(struct seq_file * m,void * v)343 static int uid_io_show(struct seq_file *m, void *v)
344 {
345 
346 	struct uid_entry *uid_entry = NULL;
347 	u32 bkt;
348 
349 	for (bkt = 0, uid_entry = NULL; uid_entry == NULL && bkt < HASH_SIZE(hash_table);
350 		bkt++) {
351 		lock_uid_by_bkt(bkt);
352 		hlist_for_each_entry(uid_entry, &hash_table[bkt], hash) {
353 
354 			update_io_stats_uid(uid_entry);
355 
356 			seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
357 				uid_entry->uid,
358 				uid_entry->io[UID_STATE_FOREGROUND].rchar,
359 				uid_entry->io[UID_STATE_FOREGROUND].wchar,
360 				uid_entry->io[UID_STATE_FOREGROUND].read_bytes,
361 				uid_entry->io[UID_STATE_FOREGROUND].write_bytes,
362 				uid_entry->io[UID_STATE_BACKGROUND].rchar,
363 				uid_entry->io[UID_STATE_BACKGROUND].wchar,
364 				uid_entry->io[UID_STATE_BACKGROUND].read_bytes,
365 				uid_entry->io[UID_STATE_BACKGROUND].write_bytes,
366 				uid_entry->io[UID_STATE_FOREGROUND].fsync,
367 				uid_entry->io[UID_STATE_BACKGROUND].fsync);
368 		}
369 		unlock_uid_by_bkt(bkt);
370 	}
371 
372 	return 0;
373 }
374 
uid_io_open(struct inode * inode,struct file * file)375 static int uid_io_open(struct inode *inode, struct file *file)
376 {
377 	return single_open(file, uid_io_show, PDE_DATA(inode));
378 }
379 
380 static const struct proc_ops uid_io_fops = {
381 	.proc_open	= uid_io_open,
382 	.proc_read	= seq_read,
383 	.proc_lseek	= seq_lseek,
384 	.proc_release	= single_release,
385 };
386 
uid_procstat_open(struct inode * inode,struct file * file)387 static int uid_procstat_open(struct inode *inode, struct file *file)
388 {
389 	return single_open(file, NULL, NULL);
390 }
391 
uid_procstat_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)392 static ssize_t uid_procstat_write(struct file *file,
393 			const char __user *buffer, size_t count, loff_t *ppos)
394 {
395 	struct uid_entry *uid_entry;
396 	uid_t uid;
397 	int argc, state;
398 	char input[128];
399 
400 	if (count >= sizeof(input))
401 		return -EINVAL;
402 
403 	if (copy_from_user(input, buffer, count))
404 		return -EFAULT;
405 
406 	input[count] = '\0';
407 
408 	argc = sscanf(input, "%u %d", &uid, &state);
409 	if (argc != 2)
410 		return -EINVAL;
411 
412 	if (state != UID_STATE_BACKGROUND && state != UID_STATE_FOREGROUND)
413 		return -EINVAL;
414 
415 	lock_uid(uid);
416 	uid_entry = find_or_register_uid(uid);
417 	if (!uid_entry) {
418 		unlock_uid(uid);
419 		return -EINVAL;
420 	}
421 
422 	if (uid_entry->state == state) {
423 		unlock_uid(uid);
424 		return count;
425 	}
426 
427 	update_io_stats_uid(uid_entry);
428 	uid_entry->state = state;
429 	unlock_uid(uid);
430 
431 	return count;
432 }
433 
434 static const struct proc_ops uid_procstat_fops = {
435 	.proc_open	= uid_procstat_open,
436 	.proc_release	= single_release,
437 	.proc_write	= uid_procstat_write,
438 };
439 
440 struct update_stats_work {
441 	uid_t uid;
442 	struct task_io_accounting ioac;
443 	u64 utime;
444 	u64 stime;
445 	struct llist_node node;
446 };
447 
448 static LLIST_HEAD(work_usw);
449 
update_stats_workfn(struct work_struct * work)450 static void update_stats_workfn(struct work_struct *work)
451 {
452 	struct update_stats_work *usw, *t;
453 	struct uid_entry *uid_entry;
454 	struct task_entry *task_entry __maybe_unused;
455 	struct llist_node *node;
456 
457 	node = llist_del_all(&work_usw);
458 	llist_for_each_entry_safe(usw, t, node, node) {
459 		lock_uid(usw->uid);
460 		uid_entry = find_uid_entry(usw->uid);
461 		if (!uid_entry)
462 			goto next;
463 
464 		uid_entry->utime += usw->utime;
465 		uid_entry->stime += usw->stime;
466 
467 		__add_uid_io_stats(uid_entry, &usw->ioac, UID_STATE_DEAD_TASKS);
468 next:
469 		unlock_uid(usw->uid);
470 		kfree(usw);
471 	}
472 
473 }
474 static DECLARE_WORK(update_stats_work, update_stats_workfn);
475 
process_notifier(struct notifier_block * self,unsigned long cmd,void * v)476 static int process_notifier(struct notifier_block *self,
477 			unsigned long cmd, void *v)
478 {
479 	struct task_struct *task = v;
480 	struct uid_entry *uid_entry;
481 	u64 utime, stime;
482 	uid_t uid;
483 
484 	if (!task)
485 		return NOTIFY_OK;
486 
487 	uid = from_kuid_munged(current_user_ns(), task_uid(task));
488 	if (!trylock_uid(uid)) {
489 		struct update_stats_work *usw;
490 
491 		usw = kmalloc(sizeof(struct update_stats_work), GFP_KERNEL);
492 		if (usw) {
493 			usw->uid = uid;
494 			/*
495 			 * Copy task->ioac since task might be destroyed before
496 			 * the work is later performed.
497 			 */
498 			usw->ioac = task->ioac;
499 			task_cputime_adjusted(task, &usw->utime, &usw->stime);
500 			llist_add(&usw->node, &work_usw);
501 			schedule_work(&update_stats_work);
502 		}
503 		return NOTIFY_OK;
504 	}
505 
506 	uid_entry = find_or_register_uid(uid);
507 	if (!uid_entry) {
508 		pr_err("%s: failed to find uid %d\n", __func__, uid);
509 		goto exit;
510 	}
511 
512 	task_cputime_adjusted(task, &utime, &stime);
513 	uid_entry->utime += utime;
514 	uid_entry->stime += stime;
515 
516 	add_uid_io_stats(uid_entry, task, UID_STATE_DEAD_TASKS);
517 
518 exit:
519 	unlock_uid(uid);
520 	return NOTIFY_OK;
521 }
522 
523 static struct notifier_block process_notifier_block = {
524 	.notifier_call	= process_notifier,
525 };
526 
init_hash_table_and_lock(void)527 static void init_hash_table_and_lock(void)
528 {
529 	int i;
530 
531 	hash_init(hash_table);
532 	for (i = 0; i < UID_HASH_NUMS; i++)
533 		spin_lock_init(&uid_lock[i]);
534 }
535 
proc_uid_sys_stats_init(void)536 static int __init proc_uid_sys_stats_init(void)
537 {
538 	init_hash_table_and_lock();
539 
540 	cpu_parent = proc_mkdir("uid_cputime", NULL);
541 	if (!cpu_parent) {
542 		pr_err("%s: failed to create uid_cputime proc entry\n",
543 			__func__);
544 		goto err;
545 	}
546 
547 	proc_create_data("remove_uid_range", 0222, cpu_parent,
548 		&uid_remove_fops, NULL);
549 	proc_create_data("show_uid_stat", 0444, cpu_parent,
550 		&uid_cputime_fops, NULL);
551 
552 	io_parent = proc_mkdir("uid_io", NULL);
553 	if (!io_parent) {
554 		pr_err("%s: failed to create uid_io proc entry\n",
555 			__func__);
556 		goto err;
557 	}
558 
559 	proc_create_data("stats", 0444, io_parent,
560 		&uid_io_fops, NULL);
561 
562 	proc_parent = proc_mkdir("uid_procstat", NULL);
563 	if (!proc_parent) {
564 		pr_err("%s: failed to create uid_procstat proc entry\n",
565 			__func__);
566 		goto err;
567 	}
568 
569 	proc_create_data("set", 0222, proc_parent,
570 		&uid_procstat_fops, NULL);
571 
572 	profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block);
573 
574 	return 0;
575 
576 err:
577 	remove_proc_subtree("uid_cputime", NULL);
578 	remove_proc_subtree("uid_io", NULL);
579 	remove_proc_subtree("uid_procstat", NULL);
580 	return -ENOMEM;
581 }
582 
583 early_initcall(proc_uid_sys_stats_init);
584