• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16 
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/miscdevice.h>
21 #include <linux/netfilter/x_tables.h>
22 #include <linux/netfilter/xt_qtaguid.h>
23 #include <linux/ratelimit.h>
24 #include <linux/seq_file.h>
25 #include <linux/skbuff.h>
26 #include <linux/workqueue.h>
27 #include <net/addrconf.h>
28 #include <net/sock.h>
29 #include <net/tcp.h>
30 #include <net/udp.h>
31 
32 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
33 #include <linux/netfilter_ipv6/ip6_tables.h>
34 #endif
35 
36 #include <linux/netfilter/xt_socket.h>
37 #include "xt_qtaguid_internal.h"
38 #include "xt_qtaguid_print.h"
39 #include "../../fs/proc/internal.h"
40 
41 /*
42  * We only use the xt_socket funcs within a similar context to avoid unexpected
43  * return values.
44  */
45 #define XT_SOCKET_SUPPORTED_HOOKS \
46 	((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
47 
48 
49 static const char *module_procdirname = "xt_qtaguid";
50 static struct proc_dir_entry *xt_qtaguid_procdir;
51 
52 static unsigned int proc_iface_perms = S_IRUGO;
53 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
54 
55 static struct proc_dir_entry *xt_qtaguid_stats_file;
56 static unsigned int proc_stats_perms = S_IRUGO;
57 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
58 
59 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
60 
61 /* Everybody can write. But proc_ctrl_write_limited is true by default which
62  * limits what can be controlled. See the can_*() functions.
63  */
64 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
65 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
66 
67 /* Limited by default, so the gid of the ctrl and stats proc entries
68  * will limit what can be done. See the can_*() functions.
69  */
70 static bool proc_stats_readall_limited = true;
71 static bool proc_ctrl_write_limited = true;
72 
73 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
74 		   S_IRUGO | S_IWUSR);
75 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
76 		   S_IRUGO | S_IWUSR);
77 
78 /*
79  * Limit the number of active tags (via socket tags) for a given UID.
80  * Multiple processes could share the UID.
81  */
82 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
83 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
84 
85 /*
86  * After the kernel has initiallized this module, it is still possible
87  * to make it passive.
88  * Setting passive to Y:
89  *  - the iface stats handling will not act on notifications.
90  *  - iptables matches will never match.
91  *  - ctrl commands silently succeed.
92  *  - stats are always empty.
93  * This is mostly usefull when a bug is suspected.
94  */
95 static bool module_passive;
96 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
97 
98 /*
99  * Control how qtaguid data is tracked per proc/uid.
100  * Setting tag_tracking_passive to Y:
101  *  - don't create proc specific structs to track tags
102  *  - don't check that active tag stats exceed some limits.
103  *  - don't clean up socket tags on process exits.
104  * This is mostly usefull when a bug is suspected.
105  */
106 static bool qtu_proc_handling_passive;
107 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
108 		   S_IRUGO | S_IWUSR);
109 
110 #define QTU_DEV_NAME "xt_qtaguid"
111 
112 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
113 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
114 
115 /*---------------------------------------------------------------------------*/
116 static const char *iface_stat_procdirname = "iface_stat";
117 static struct proc_dir_entry *iface_stat_procdir;
118 /*
119  * The iface_stat_all* will go away once userspace gets use to the new fields
120  * that have a format line.
121  */
122 static const char *iface_stat_all_procfilename = "iface_stat_all";
123 static struct proc_dir_entry *iface_stat_all_procfile;
124 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
125 static struct proc_dir_entry *iface_stat_fmt_procfile;
126 
127 
128 static LIST_HEAD(iface_stat_list);
129 static DEFINE_SPINLOCK(iface_stat_list_lock);
130 
131 static struct rb_root sock_tag_tree = RB_ROOT;
132 static DEFINE_SPINLOCK(sock_tag_list_lock);
133 
134 static struct rb_root tag_counter_set_tree = RB_ROOT;
135 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
136 
137 static struct rb_root uid_tag_data_tree = RB_ROOT;
138 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
139 
140 static struct rb_root proc_qtu_data_tree = RB_ROOT;
141 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
142 
143 static struct qtaguid_event_counts qtu_events;
144 /*----------------------------------------------*/
can_manipulate_uids(void)145 static bool can_manipulate_uids(void)
146 {
147 	/* root pwnd */
148 	return in_egroup_p(xt_qtaguid_ctrl_file->gid)
149 		|| unlikely(!from_kuid(&init_user_ns, current_fsuid())) || unlikely(!proc_ctrl_write_limited)
150 		|| unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
151 }
152 
can_impersonate_uid(kuid_t uid)153 static bool can_impersonate_uid(kuid_t uid)
154 {
155 	return uid_eq(uid, current_fsuid()) || can_manipulate_uids();
156 }
157 
can_read_other_uid_stats(kuid_t uid)158 static bool can_read_other_uid_stats(kuid_t uid)
159 {
160 	/* root pwnd */
161 	return in_egroup_p(xt_qtaguid_stats_file->gid)
162 		|| unlikely(!from_kuid(&init_user_ns, current_fsuid())) || uid_eq(uid, current_fsuid())
163 		|| unlikely(!proc_stats_readall_limited)
164 		|| unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
165 }
166 
dc_add_byte_packets(struct data_counters * counters,int set,enum ifs_tx_rx direction,enum ifs_proto ifs_proto,int bytes,int packets)167 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
168 				  enum ifs_tx_rx direction,
169 				  enum ifs_proto ifs_proto,
170 				  int bytes,
171 				  int packets)
172 {
173 	counters->bpc[set][direction][ifs_proto].bytes += bytes;
174 	counters->bpc[set][direction][ifs_proto].packets += packets;
175 }
176 
tag_node_tree_search(struct rb_root * root,tag_t tag)177 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
178 {
179 	struct rb_node *node = root->rb_node;
180 
181 	while (node) {
182 		struct tag_node *data = rb_entry(node, struct tag_node, node);
183 		int result;
184 		RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
185 			 " node=%p data=%p\n", tag, node, data);
186 		result = tag_compare(tag, data->tag);
187 		RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
188 			 " data.tag=0x%llx (uid=%u) res=%d\n",
189 			 tag, data->tag, get_uid_from_tag(data->tag), result);
190 		if (result < 0)
191 			node = node->rb_left;
192 		else if (result > 0)
193 			node = node->rb_right;
194 		else
195 			return data;
196 	}
197 	return NULL;
198 }
199 
tag_node_tree_insert(struct tag_node * data,struct rb_root * root)200 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
201 {
202 	struct rb_node **new = &(root->rb_node), *parent = NULL;
203 
204 	/* Figure out where to put new node */
205 	while (*new) {
206 		struct tag_node *this = rb_entry(*new, struct tag_node,
207 						 node);
208 		int result = tag_compare(data->tag, this->tag);
209 		RB_DEBUG("qtaguid: %s(): tag=0x%llx"
210 			 " (uid=%u)\n", __func__,
211 			 this->tag,
212 			 get_uid_from_tag(this->tag));
213 		parent = *new;
214 		if (result < 0)
215 			new = &((*new)->rb_left);
216 		else if (result > 0)
217 			new = &((*new)->rb_right);
218 		else
219 			BUG();
220 	}
221 
222 	/* Add new node and rebalance tree. */
223 	rb_link_node(&data->node, parent, new);
224 	rb_insert_color(&data->node, root);
225 }
226 
tag_stat_tree_insert(struct tag_stat * data,struct rb_root * root)227 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
228 {
229 	tag_node_tree_insert(&data->tn, root);
230 }
231 
tag_stat_tree_search(struct rb_root * root,tag_t tag)232 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
233 {
234 	struct tag_node *node = tag_node_tree_search(root, tag);
235 	if (!node)
236 		return NULL;
237 	return rb_entry(&node->node, struct tag_stat, tn.node);
238 }
239 
tag_counter_set_tree_insert(struct tag_counter_set * data,struct rb_root * root)240 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
241 					struct rb_root *root)
242 {
243 	tag_node_tree_insert(&data->tn, root);
244 }
245 
tag_counter_set_tree_search(struct rb_root * root,tag_t tag)246 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
247 							   tag_t tag)
248 {
249 	struct tag_node *node = tag_node_tree_search(root, tag);
250 	if (!node)
251 		return NULL;
252 	return rb_entry(&node->node, struct tag_counter_set, tn.node);
253 
254 }
255 
tag_ref_tree_insert(struct tag_ref * data,struct rb_root * root)256 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
257 {
258 	tag_node_tree_insert(&data->tn, root);
259 }
260 
tag_ref_tree_search(struct rb_root * root,tag_t tag)261 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
262 {
263 	struct tag_node *node = tag_node_tree_search(root, tag);
264 	if (!node)
265 		return NULL;
266 	return rb_entry(&node->node, struct tag_ref, tn.node);
267 }
268 
sock_tag_tree_search(struct rb_root * root,const struct sock * sk)269 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
270 					     const struct sock *sk)
271 {
272 	struct rb_node *node = root->rb_node;
273 
274 	while (node) {
275 		struct sock_tag *data = rb_entry(node, struct sock_tag,
276 						 sock_node);
277 		if (sk < data->sk)
278 			node = node->rb_left;
279 		else if (sk > data->sk)
280 			node = node->rb_right;
281 		else
282 			return data;
283 	}
284 	return NULL;
285 }
286 
sock_tag_tree_insert(struct sock_tag * data,struct rb_root * root)287 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
288 {
289 	struct rb_node **new = &(root->rb_node), *parent = NULL;
290 
291 	/* Figure out where to put new node */
292 	while (*new) {
293 		struct sock_tag *this = rb_entry(*new, struct sock_tag,
294 						 sock_node);
295 		parent = *new;
296 		if (data->sk < this->sk)
297 			new = &((*new)->rb_left);
298 		else if (data->sk > this->sk)
299 			new = &((*new)->rb_right);
300 		else
301 			BUG();
302 	}
303 
304 	/* Add new node and rebalance tree. */
305 	rb_link_node(&data->sock_node, parent, new);
306 	rb_insert_color(&data->sock_node, root);
307 }
308 
sock_tag_tree_erase(struct rb_root * st_to_free_tree)309 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
310 {
311 	struct rb_node *node;
312 	struct sock_tag *st_entry;
313 
314 	node = rb_first(st_to_free_tree);
315 	while (node) {
316 		st_entry = rb_entry(node, struct sock_tag, sock_node);
317 		node = rb_next(node);
318 		CT_DEBUG("qtaguid: %s(): "
319 			 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
320 			 st_entry->sk,
321 			 st_entry->tag,
322 			 get_uid_from_tag(st_entry->tag));
323 		rb_erase(&st_entry->sock_node, st_to_free_tree);
324 		sock_put(st_entry->sk);
325 		kfree(st_entry);
326 	}
327 }
328 
proc_qtu_data_tree_search(struct rb_root * root,const pid_t pid)329 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
330 						       const pid_t pid)
331 {
332 	struct rb_node *node = root->rb_node;
333 
334 	while (node) {
335 		struct proc_qtu_data *data = rb_entry(node,
336 						      struct proc_qtu_data,
337 						      node);
338 		if (pid < data->pid)
339 			node = node->rb_left;
340 		else if (pid > data->pid)
341 			node = node->rb_right;
342 		else
343 			return data;
344 	}
345 	return NULL;
346 }
347 
proc_qtu_data_tree_insert(struct proc_qtu_data * data,struct rb_root * root)348 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
349 				      struct rb_root *root)
350 {
351 	struct rb_node **new = &(root->rb_node), *parent = NULL;
352 
353 	/* Figure out where to put new node */
354 	while (*new) {
355 		struct proc_qtu_data *this = rb_entry(*new,
356 						      struct proc_qtu_data,
357 						      node);
358 		parent = *new;
359 		if (data->pid < this->pid)
360 			new = &((*new)->rb_left);
361 		else if (data->pid > this->pid)
362 			new = &((*new)->rb_right);
363 		else
364 			BUG();
365 	}
366 
367 	/* Add new node and rebalance tree. */
368 	rb_link_node(&data->node, parent, new);
369 	rb_insert_color(&data->node, root);
370 }
371 
uid_tag_data_tree_insert(struct uid_tag_data * data,struct rb_root * root)372 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
373 				     struct rb_root *root)
374 {
375 	struct rb_node **new = &(root->rb_node), *parent = NULL;
376 
377 	/* Figure out where to put new node */
378 	while (*new) {
379 		struct uid_tag_data *this = rb_entry(*new,
380 						     struct uid_tag_data,
381 						     node);
382 		parent = *new;
383 		if (data->uid < this->uid)
384 			new = &((*new)->rb_left);
385 		else if (data->uid > this->uid)
386 			new = &((*new)->rb_right);
387 		else
388 			BUG();
389 	}
390 
391 	/* Add new node and rebalance tree. */
392 	rb_link_node(&data->node, parent, new);
393 	rb_insert_color(&data->node, root);
394 }
395 
uid_tag_data_tree_search(struct rb_root * root,uid_t uid)396 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
397 						     uid_t uid)
398 {
399 	struct rb_node *node = root->rb_node;
400 
401 	while (node) {
402 		struct uid_tag_data *data = rb_entry(node,
403 						     struct uid_tag_data,
404 						     node);
405 		if (uid < data->uid)
406 			node = node->rb_left;
407 		else if (uid > data->uid)
408 			node = node->rb_right;
409 		else
410 			return data;
411 	}
412 	return NULL;
413 }
414 
415 /*
416  * Allocates a new uid_tag_data struct if needed.
417  * Returns a pointer to the found or allocated uid_tag_data.
418  * Returns a PTR_ERR on failures, and lock is not held.
419  * If found is not NULL:
420  *   sets *found to true if not allocated.
421  *   sets *found to false if allocated.
422  */
get_uid_data(uid_t uid,bool * found_res)423 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
424 {
425 	struct uid_tag_data *utd_entry;
426 
427 	/* Look for top level uid_tag_data for the UID */
428 	utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
429 	DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
430 
431 	if (found_res)
432 		*found_res = utd_entry;
433 	if (utd_entry)
434 		return utd_entry;
435 
436 	utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
437 	if (!utd_entry) {
438 		pr_err("qtaguid: get_uid_data(%u): "
439 		       "tag data alloc failed\n", uid);
440 		return ERR_PTR(-ENOMEM);
441 	}
442 
443 	utd_entry->uid = uid;
444 	utd_entry->tag_ref_tree = RB_ROOT;
445 	uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
446 	DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
447 	return utd_entry;
448 }
449 
450 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
new_tag_ref(tag_t new_tag,struct uid_tag_data * utd_entry)451 static struct tag_ref *new_tag_ref(tag_t new_tag,
452 				   struct uid_tag_data *utd_entry)
453 {
454 	struct tag_ref *tr_entry;
455 	int res;
456 
457 	if (utd_entry->num_active_tags + 1 > max_sock_tags) {
458 		pr_info("qtaguid: new_tag_ref(0x%llx): "
459 			"tag ref alloc quota exceeded. max=%d\n",
460 			new_tag, max_sock_tags);
461 		res = -EMFILE;
462 		goto err_res;
463 
464 	}
465 
466 	tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
467 	if (!tr_entry) {
468 		pr_err("qtaguid: new_tag_ref(0x%llx): "
469 		       "tag ref alloc failed\n",
470 		       new_tag);
471 		res = -ENOMEM;
472 		goto err_res;
473 	}
474 	tr_entry->tn.tag = new_tag;
475 	/* tr_entry->num_sock_tags  handled by caller */
476 	utd_entry->num_active_tags++;
477 	tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
478 	DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
479 		 " inserted new tag ref %p\n",
480 		 new_tag, tr_entry);
481 	return tr_entry;
482 
483 err_res:
484 	return ERR_PTR(res);
485 }
486 
lookup_tag_ref(tag_t full_tag,struct uid_tag_data ** utd_res)487 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
488 				      struct uid_tag_data **utd_res)
489 {
490 	struct uid_tag_data *utd_entry;
491 	struct tag_ref *tr_entry;
492 	bool found_utd;
493 	uid_t uid = get_uid_from_tag(full_tag);
494 
495 	DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
496 		 full_tag, uid);
497 
498 	utd_entry = get_uid_data(uid, &found_utd);
499 	if (IS_ERR_OR_NULL(utd_entry)) {
500 		if (utd_res)
501 			*utd_res = utd_entry;
502 		return NULL;
503 	}
504 
505 	tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
506 	if (utd_res)
507 		*utd_res = utd_entry;
508 	DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
509 		 full_tag, utd_entry, tr_entry);
510 	return tr_entry;
511 }
512 
513 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
get_tag_ref(tag_t full_tag,struct uid_tag_data ** utd_res)514 static struct tag_ref *get_tag_ref(tag_t full_tag,
515 				   struct uid_tag_data **utd_res)
516 {
517 	struct uid_tag_data *utd_entry;
518 	struct tag_ref *tr_entry;
519 
520 	DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
521 		 full_tag);
522 	tr_entry = lookup_tag_ref(full_tag, &utd_entry);
523 	BUG_ON(IS_ERR_OR_NULL(utd_entry));
524 	if (!tr_entry)
525 		tr_entry = new_tag_ref(full_tag, utd_entry);
526 
527 	if (utd_res)
528 		*utd_res = utd_entry;
529 	DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
530 		 full_tag, utd_entry, tr_entry);
531 	return tr_entry;
532 }
533 
534 /* Checks and maybe frees the UID Tag Data entry */
put_utd_entry(struct uid_tag_data * utd_entry)535 static void put_utd_entry(struct uid_tag_data *utd_entry)
536 {
537 	/* Are we done with the UID tag data entry? */
538 	if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
539 		!utd_entry->num_pqd) {
540 		DR_DEBUG("qtaguid: %s(): "
541 			 "erase utd_entry=%p uid=%u "
542 			 "by pid=%u tgid=%u uid=%u\n", __func__,
543 			 utd_entry, utd_entry->uid,
544 			 current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
545 		BUG_ON(utd_entry->num_active_tags);
546 		rb_erase(&utd_entry->node, &uid_tag_data_tree);
547 		kfree(utd_entry);
548 	} else {
549 		DR_DEBUG("qtaguid: %s(): "
550 			 "utd_entry=%p still has %d tags %d proc_qtu_data\n",
551 			 __func__, utd_entry, utd_entry->num_active_tags,
552 			 utd_entry->num_pqd);
553 		BUG_ON(!(utd_entry->num_active_tags ||
554 			 utd_entry->num_pqd));
555 	}
556 }
557 
558 /*
559  * If no sock_tags are using this tag_ref,
560  * decrements refcount of utd_entry, removes tr_entry
561  * from utd_entry->tag_ref_tree and frees.
562  */
free_tag_ref_from_utd_entry(struct tag_ref * tr_entry,struct uid_tag_data * utd_entry)563 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
564 					struct uid_tag_data *utd_entry)
565 {
566 	DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
567 		 tr_entry, tr_entry->tn.tag,
568 		 get_uid_from_tag(tr_entry->tn.tag));
569 	if (!tr_entry->num_sock_tags) {
570 		BUG_ON(!utd_entry->num_active_tags);
571 		utd_entry->num_active_tags--;
572 		rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
573 		DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
574 		kfree(tr_entry);
575 	}
576 }
577 
put_tag_ref_tree(tag_t full_tag,struct uid_tag_data * utd_entry)578 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
579 {
580 	struct rb_node *node;
581 	struct tag_ref *tr_entry;
582 	tag_t acct_tag;
583 
584 	DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
585 		 full_tag, get_uid_from_tag(full_tag));
586 	acct_tag = get_atag_from_tag(full_tag);
587 	node = rb_first(&utd_entry->tag_ref_tree);
588 	while (node) {
589 		tr_entry = rb_entry(node, struct tag_ref, tn.node);
590 		node = rb_next(node);
591 		if (!acct_tag || tr_entry->tn.tag == full_tag)
592 			free_tag_ref_from_utd_entry(tr_entry, utd_entry);
593 	}
594 }
595 
read_proc_u64(struct file * file,char __user * buf,size_t size,loff_t * ppos)596 static ssize_t read_proc_u64(struct file *file, char __user *buf,
597 			 size_t size, loff_t *ppos)
598 {
599 	uint64_t *valuep = PDE_DATA(file_inode(file));
600 	char tmp[24];
601 	size_t tmp_size;
602 
603 	tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
604 	return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
605 }
606 
read_proc_bool(struct file * file,char __user * buf,size_t size,loff_t * ppos)607 static ssize_t read_proc_bool(struct file *file, char __user *buf,
608 			  size_t size, loff_t *ppos)
609 {
610 	bool *valuep = PDE_DATA(file_inode(file));
611 	char tmp[24];
612 	size_t tmp_size;
613 
614 	tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
615 	return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
616 }
617 
get_active_counter_set(tag_t tag)618 static int get_active_counter_set(tag_t tag)
619 {
620 	int active_set = 0;
621 	struct tag_counter_set *tcs;
622 
623 	MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
624 		 " (uid=%u)\n",
625 		 tag, get_uid_from_tag(tag));
626 	/* For now we only handle UID tags for active sets */
627 	tag = get_utag_from_tag(tag);
628 	spin_lock_bh(&tag_counter_set_list_lock);
629 	tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
630 	if (tcs)
631 		active_set = tcs->active_set;
632 	spin_unlock_bh(&tag_counter_set_list_lock);
633 	return active_set;
634 }
635 
636 /*
637  * Find the entry for tracking the specified interface.
638  * Caller must hold iface_stat_list_lock
639  */
get_iface_entry(const char * ifname)640 static struct iface_stat *get_iface_entry(const char *ifname)
641 {
642 	struct iface_stat *iface_entry;
643 
644 	/* Find the entry for tracking the specified tag within the interface */
645 	if (ifname == NULL) {
646 		pr_info("qtaguid: iface_stat: get() NULL device name\n");
647 		return NULL;
648 	}
649 
650 	/* Iterate over interfaces */
651 	list_for_each_entry(iface_entry, &iface_stat_list, list) {
652 		if (!strcmp(ifname, iface_entry->ifname))
653 			goto done;
654 	}
655 	iface_entry = NULL;
656 done:
657 	return iface_entry;
658 }
659 
660 /* This is for fmt2 only */
pp_iface_stat_header(struct seq_file * m)661 static void pp_iface_stat_header(struct seq_file *m)
662 {
663 	seq_puts(m,
664 		 "ifname "
665 		 "total_skb_rx_bytes total_skb_rx_packets "
666 		 "total_skb_tx_bytes total_skb_tx_packets "
667 		 "rx_tcp_bytes rx_tcp_packets "
668 		 "rx_udp_bytes rx_udp_packets "
669 		 "rx_other_bytes rx_other_packets "
670 		 "tx_tcp_bytes tx_tcp_packets "
671 		 "tx_udp_bytes tx_udp_packets "
672 		 "tx_other_bytes tx_other_packets\n"
673 	);
674 }
675 
pp_iface_stat_line(struct seq_file * m,struct iface_stat * iface_entry)676 static void pp_iface_stat_line(struct seq_file *m,
677 			       struct iface_stat *iface_entry)
678 {
679 	struct data_counters *cnts;
680 	int cnt_set = 0;   /* We only use one set for the device */
681 	cnts = &iface_entry->totals_via_skb;
682 	seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
683 		   "%llu %llu %llu %llu %llu %llu %llu %llu\n",
684 		   iface_entry->ifname,
685 		   dc_sum_bytes(cnts, cnt_set, IFS_RX),
686 		   dc_sum_packets(cnts, cnt_set, IFS_RX),
687 		   dc_sum_bytes(cnts, cnt_set, IFS_TX),
688 		   dc_sum_packets(cnts, cnt_set, IFS_TX),
689 		   cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
690 		   cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
691 		   cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
692 		   cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
693 		   cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
694 		   cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
695 		   cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
696 		   cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
697 		   cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
698 		   cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
699 		   cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
700 		   cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
701 }
702 
703 struct proc_iface_stat_fmt_info {
704 	int fmt;
705 };
706 
iface_stat_fmt_proc_start(struct seq_file * m,loff_t * pos)707 static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
708 {
709 	struct proc_iface_stat_fmt_info *p = m->private;
710 	loff_t n = *pos;
711 
712 	/*
713 	 * This lock will prevent iface_stat_update() from changing active,
714 	 * and in turn prevent an interface from unregistering itself.
715 	 */
716 	spin_lock_bh(&iface_stat_list_lock);
717 
718 	if (unlikely(module_passive))
719 		return NULL;
720 
721 	if (!n && p->fmt == 2)
722 		pp_iface_stat_header(m);
723 
724 	return seq_list_start(&iface_stat_list, n);
725 }
726 
iface_stat_fmt_proc_next(struct seq_file * m,void * p,loff_t * pos)727 static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
728 {
729 	return seq_list_next(p, &iface_stat_list, pos);
730 }
731 
iface_stat_fmt_proc_stop(struct seq_file * m,void * p)732 static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
733 {
734 	spin_unlock_bh(&iface_stat_list_lock);
735 }
736 
iface_stat_fmt_proc_show(struct seq_file * m,void * v)737 static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
738 {
739 	struct proc_iface_stat_fmt_info *p = m->private;
740 	struct iface_stat *iface_entry;
741 	struct rtnl_link_stats64 dev_stats, *stats;
742 	struct rtnl_link_stats64 no_dev_stats = {0};
743 
744 
745 	CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
746 		 current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
747 
748 	iface_entry = list_entry(v, struct iface_stat, list);
749 
750 	if (iface_entry->active) {
751 		stats = dev_get_stats(iface_entry->net_dev,
752 				      &dev_stats);
753 	} else {
754 		stats = &no_dev_stats;
755 	}
756 	/*
757 	 * If the meaning of the data changes, then update the fmtX
758 	 * string.
759 	 */
760 	if (p->fmt == 1) {
761 		seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
762 			   iface_entry->ifname,
763 			   iface_entry->active,
764 			   iface_entry->totals_via_dev[IFS_RX].bytes,
765 			   iface_entry->totals_via_dev[IFS_RX].packets,
766 			   iface_entry->totals_via_dev[IFS_TX].bytes,
767 			   iface_entry->totals_via_dev[IFS_TX].packets,
768 			   stats->rx_bytes, stats->rx_packets,
769 			   stats->tx_bytes, stats->tx_packets
770 			   );
771 	} else {
772 		pp_iface_stat_line(m, iface_entry);
773 	}
774 	return 0;
775 }
776 
777 static const struct file_operations read_u64_fops = {
778 	.read		= read_proc_u64,
779 	.llseek		= default_llseek,
780 };
781 
782 static const struct file_operations read_bool_fops = {
783 	.read		= read_proc_bool,
784 	.llseek		= default_llseek,
785 };
786 
iface_create_proc_worker(struct work_struct * work)787 static void iface_create_proc_worker(struct work_struct *work)
788 {
789 	struct proc_dir_entry *proc_entry;
790 	struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
791 						   iface_work);
792 	struct iface_stat *new_iface  = isw->iface_entry;
793 
794 	/* iface_entries are not deleted, so safe to manipulate. */
795 	proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
796 	if (IS_ERR_OR_NULL(proc_entry)) {
797 		pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
798 		kfree(isw);
799 		return;
800 	}
801 
802 	new_iface->proc_ptr = proc_entry;
803 
804 	proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
805 			 &read_u64_fops,
806 			 &new_iface->totals_via_dev[IFS_TX].bytes);
807 	proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
808 			 &read_u64_fops,
809 			 &new_iface->totals_via_dev[IFS_RX].bytes);
810 	proc_create_data("tx_packets", proc_iface_perms, proc_entry,
811 			 &read_u64_fops,
812 			 &new_iface->totals_via_dev[IFS_TX].packets);
813 	proc_create_data("rx_packets", proc_iface_perms, proc_entry,
814 			 &read_u64_fops,
815 			 &new_iface->totals_via_dev[IFS_RX].packets);
816 	proc_create_data("active", proc_iface_perms, proc_entry,
817 			 &read_bool_fops, &new_iface->active);
818 
819 	IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
820 		 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
821 	kfree(isw);
822 }
823 
824 /*
825  * Will set the entry's active state, and
826  * update the net_dev accordingly also.
827  */
_iface_stat_set_active(struct iface_stat * entry,struct net_device * net_dev,bool activate)828 static void _iface_stat_set_active(struct iface_stat *entry,
829 				   struct net_device *net_dev,
830 				   bool activate)
831 {
832 	if (activate) {
833 		entry->net_dev = net_dev;
834 		entry->active = true;
835 		IF_DEBUG("qtaguid: %s(%s): "
836 			 "enable tracking. rfcnt=%d\n", __func__,
837 			 entry->ifname,
838 			 __this_cpu_read(*net_dev->pcpu_refcnt));
839 	} else {
840 		entry->active = false;
841 		entry->net_dev = NULL;
842 		IF_DEBUG("qtaguid: %s(%s): "
843 			 "disable tracking. rfcnt=%d\n", __func__,
844 			 entry->ifname,
845 			 __this_cpu_read(*net_dev->pcpu_refcnt));
846 
847 	}
848 }
849 
850 /* Caller must hold iface_stat_list_lock */
iface_alloc(struct net_device * net_dev)851 static struct iface_stat *iface_alloc(struct net_device *net_dev)
852 {
853 	struct iface_stat *new_iface;
854 	struct iface_stat_work *isw;
855 
856 	new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
857 	if (new_iface == NULL) {
858 		pr_err("qtaguid: iface_stat: create(%s): "
859 		       "iface_stat alloc failed\n", net_dev->name);
860 		return NULL;
861 	}
862 	new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
863 	if (new_iface->ifname == NULL) {
864 		pr_err("qtaguid: iface_stat: create(%s): "
865 		       "ifname alloc failed\n", net_dev->name);
866 		kfree(new_iface);
867 		return NULL;
868 	}
869 	spin_lock_init(&new_iface->tag_stat_list_lock);
870 	new_iface->tag_stat_tree = RB_ROOT;
871 	_iface_stat_set_active(new_iface, net_dev, true);
872 
873 	/*
874 	 * ipv6 notifier chains are atomic :(
875 	 * No create_proc_read_entry() for you!
876 	 */
877 	isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
878 	if (!isw) {
879 		pr_err("qtaguid: iface_stat: create(%s): "
880 		       "work alloc failed\n", new_iface->ifname);
881 		_iface_stat_set_active(new_iface, net_dev, false);
882 		kfree(new_iface->ifname);
883 		kfree(new_iface);
884 		return NULL;
885 	}
886 	isw->iface_entry = new_iface;
887 	INIT_WORK(&isw->iface_work, iface_create_proc_worker);
888 	schedule_work(&isw->iface_work);
889 	list_add(&new_iface->list, &iface_stat_list);
890 	return new_iface;
891 }
892 
iface_check_stats_reset_and_adjust(struct net_device * net_dev,struct iface_stat * iface)893 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
894 					       struct iface_stat *iface)
895 {
896 	struct rtnl_link_stats64 dev_stats, *stats;
897 	bool stats_rewound;
898 
899 	stats = dev_get_stats(net_dev, &dev_stats);
900 	/* No empty packets */
901 	stats_rewound =
902 		(stats->rx_bytes < iface->last_known[IFS_RX].bytes)
903 		|| (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
904 
905 	IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
906 		 "bytes rx/tx=%llu/%llu "
907 		 "active=%d last_known=%d "
908 		 "stats_rewound=%d\n", __func__,
909 		 net_dev ? net_dev->name : "?",
910 		 iface, net_dev,
911 		 stats->rx_bytes, stats->tx_bytes,
912 		 iface->active, iface->last_known_valid, stats_rewound);
913 
914 	if (iface->active && iface->last_known_valid && stats_rewound) {
915 		pr_warn_once("qtaguid: iface_stat: %s(%s): "
916 			     "iface reset its stats unexpectedly\n", __func__,
917 			     net_dev->name);
918 
919 		iface->totals_via_dev[IFS_TX].bytes +=
920 			iface->last_known[IFS_TX].bytes;
921 		iface->totals_via_dev[IFS_TX].packets +=
922 			iface->last_known[IFS_TX].packets;
923 		iface->totals_via_dev[IFS_RX].bytes +=
924 			iface->last_known[IFS_RX].bytes;
925 		iface->totals_via_dev[IFS_RX].packets +=
926 			iface->last_known[IFS_RX].packets;
927 		iface->last_known_valid = false;
928 		IF_DEBUG("qtaguid: %s(%s): iface=%p "
929 			 "used last known bytes rx/tx=%llu/%llu\n", __func__,
930 			 iface->ifname, iface, iface->last_known[IFS_RX].bytes,
931 			 iface->last_known[IFS_TX].bytes);
932 	}
933 }
934 
935 /*
936  * Create a new entry for tracking the specified interface.
937  * Do nothing if the entry already exists.
938  * Called when an interface is configured with a valid IP address.
939  */
iface_stat_create(struct net_device * net_dev,struct in_ifaddr * ifa)940 static void iface_stat_create(struct net_device *net_dev,
941 			      struct in_ifaddr *ifa)
942 {
943 	struct in_device *in_dev = NULL;
944 	const char *ifname;
945 	struct iface_stat *entry;
946 	__be32 ipaddr = 0;
947 	struct iface_stat *new_iface;
948 
949 	IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
950 		 net_dev ? net_dev->name : "?",
951 		 ifa, net_dev);
952 	if (!net_dev) {
953 		pr_err("qtaguid: iface_stat: create(): no net dev\n");
954 		return;
955 	}
956 
957 	ifname = net_dev->name;
958 	if (!ifa) {
959 		in_dev = in_dev_get(net_dev);
960 		if (!in_dev) {
961 			pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
962 			       ifname);
963 			return;
964 		}
965 		IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
966 			 ifname, in_dev);
967 		for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
968 			IF_DEBUG("qtaguid: iface_stat: create(%s): "
969 				 "ifa=%p ifa_label=%s\n",
970 				 ifname, ifa, ifa->ifa_label);
971 			if (!strcmp(ifname, ifa->ifa_label))
972 				break;
973 		}
974 	}
975 
976 	if (!ifa) {
977 		IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
978 			 ifname);
979 		goto done_put;
980 	}
981 	ipaddr = ifa->ifa_local;
982 
983 	spin_lock_bh(&iface_stat_list_lock);
984 	entry = get_iface_entry(ifname);
985 	if (entry != NULL) {
986 		IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
987 			 ifname, entry);
988 		iface_check_stats_reset_and_adjust(net_dev, entry);
989 		_iface_stat_set_active(entry, net_dev, true);
990 		IF_DEBUG("qtaguid: %s(%s): "
991 			 "tracking now %d on ip=%pI4\n", __func__,
992 			 entry->ifname, true, &ipaddr);
993 		goto done_unlock_put;
994 	}
995 
996 	new_iface = iface_alloc(net_dev);
997 	IF_DEBUG("qtaguid: iface_stat: create(%s): done "
998 		 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
999 done_unlock_put:
1000 	spin_unlock_bh(&iface_stat_list_lock);
1001 done_put:
1002 	if (in_dev)
1003 		in_dev_put(in_dev);
1004 }
1005 
iface_stat_create_ipv6(struct net_device * net_dev,struct inet6_ifaddr * ifa)1006 static void iface_stat_create_ipv6(struct net_device *net_dev,
1007 				   struct inet6_ifaddr *ifa)
1008 {
1009 	struct in_device *in_dev;
1010 	const char *ifname;
1011 	struct iface_stat *entry;
1012 	struct iface_stat *new_iface;
1013 	int addr_type;
1014 
1015 	IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1016 		 ifa, net_dev, net_dev ? net_dev->name : "");
1017 	if (!net_dev) {
1018 		pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1019 		return;
1020 	}
1021 	ifname = net_dev->name;
1022 
1023 	in_dev = in_dev_get(net_dev);
1024 	if (!in_dev) {
1025 		pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1026 		       ifname);
1027 		return;
1028 	}
1029 
1030 	IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1031 		 ifname, in_dev);
1032 
1033 	if (!ifa) {
1034 		IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1035 			 ifname);
1036 		goto done_put;
1037 	}
1038 	addr_type = ipv6_addr_type(&ifa->addr);
1039 
1040 	spin_lock_bh(&iface_stat_list_lock);
1041 	entry = get_iface_entry(ifname);
1042 	if (entry != NULL) {
1043 		IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1044 			 ifname, entry);
1045 		iface_check_stats_reset_and_adjust(net_dev, entry);
1046 		_iface_stat_set_active(entry, net_dev, true);
1047 		IF_DEBUG("qtaguid: %s(%s): "
1048 			 "tracking now %d on ip=%pI6c\n", __func__,
1049 			 entry->ifname, true, &ifa->addr);
1050 		goto done_unlock_put;
1051 	}
1052 
1053 	new_iface = iface_alloc(net_dev);
1054 	IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1055 		 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1056 
1057 done_unlock_put:
1058 	spin_unlock_bh(&iface_stat_list_lock);
1059 done_put:
1060 	in_dev_put(in_dev);
1061 }
1062 
get_sock_stat_nl(const struct sock * sk)1063 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1064 {
1065 	MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1066 	return sock_tag_tree_search(&sock_tag_tree, sk);
1067 }
1068 
get_sock_stat(const struct sock * sk)1069 static struct sock_tag *get_sock_stat(const struct sock *sk)
1070 {
1071 	struct sock_tag *sock_tag_entry;
1072 	MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1073 	if (!sk)
1074 		return NULL;
1075 	spin_lock_bh(&sock_tag_list_lock);
1076 	sock_tag_entry = get_sock_stat_nl(sk);
1077 	spin_unlock_bh(&sock_tag_list_lock);
1078 	return sock_tag_entry;
1079 }
1080 
ipx_proto(const struct sk_buff * skb,struct xt_action_param * par)1081 static int ipx_proto(const struct sk_buff *skb,
1082 		     struct xt_action_param *par)
1083 {
1084 	int thoff = 0, tproto;
1085 
1086 	switch (par->family) {
1087 	case NFPROTO_IPV6:
1088 		tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1089 		if (tproto < 0)
1090 			MT_DEBUG("%s(): transport header not found in ipv6"
1091 				 " skb=%p\n", __func__, skb);
1092 		break;
1093 	case NFPROTO_IPV4:
1094 		tproto = ip_hdr(skb)->protocol;
1095 		break;
1096 	default:
1097 		tproto = IPPROTO_RAW;
1098 	}
1099 	return tproto;
1100 }
1101 
1102 static void
data_counters_update(struct data_counters * dc,int set,enum ifs_tx_rx direction,int proto,int bytes)1103 data_counters_update(struct data_counters *dc, int set,
1104 		     enum ifs_tx_rx direction, int proto, int bytes)
1105 {
1106 	switch (proto) {
1107 	case IPPROTO_TCP:
1108 		dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1109 		break;
1110 	case IPPROTO_UDP:
1111 		dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1112 		break;
1113 	case IPPROTO_IP:
1114 	default:
1115 		dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1116 				    1);
1117 		break;
1118 	}
1119 }
1120 
1121 /*
1122  * Update stats for the specified interface. Do nothing if the entry
1123  * does not exist (when a device was never configured with an IP address).
1124  * Called when an device is being unregistered.
1125  */
iface_stat_update(struct net_device * net_dev,bool stash_only)1126 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1127 {
1128 	struct rtnl_link_stats64 dev_stats, *stats;
1129 	struct iface_stat *entry;
1130 
1131 	stats = dev_get_stats(net_dev, &dev_stats);
1132 	spin_lock_bh(&iface_stat_list_lock);
1133 	entry = get_iface_entry(net_dev->name);
1134 	if (entry == NULL) {
1135 		IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1136 			 net_dev->name);
1137 		spin_unlock_bh(&iface_stat_list_lock);
1138 		return;
1139 	}
1140 
1141 	IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1142 		 net_dev->name, entry);
1143 	if (!entry->active) {
1144 		IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1145 			 net_dev->name);
1146 		spin_unlock_bh(&iface_stat_list_lock);
1147 		return;
1148 	}
1149 
1150 	if (stash_only) {
1151 		entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1152 		entry->last_known[IFS_TX].packets = stats->tx_packets;
1153 		entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1154 		entry->last_known[IFS_RX].packets = stats->rx_packets;
1155 		entry->last_known_valid = true;
1156 		IF_DEBUG("qtaguid: %s(%s): "
1157 			 "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1158 			 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1159 		spin_unlock_bh(&iface_stat_list_lock);
1160 		return;
1161 	}
1162 	entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1163 	entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1164 	entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1165 	entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1166 	/* We don't need the last_known[] anymore */
1167 	entry->last_known_valid = false;
1168 	_iface_stat_set_active(entry, net_dev, false);
1169 	IF_DEBUG("qtaguid: %s(%s): "
1170 		 "disable tracking. rx/tx=%llu/%llu\n", __func__,
1171 		 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1172 	spin_unlock_bh(&iface_stat_list_lock);
1173 }
1174 
1175 /* Guarantied to return a net_device that has a name */
get_dev_and_dir(const struct sk_buff * skb,struct xt_action_param * par,enum ifs_tx_rx * direction,const struct net_device ** el_dev)1176 static void get_dev_and_dir(const struct sk_buff *skb,
1177 			    struct xt_action_param *par,
1178 			    enum ifs_tx_rx *direction,
1179 			    const struct net_device **el_dev)
1180 {
1181 	BUG_ON(!direction || !el_dev);
1182 
1183 	if (par->in) {
1184 		*el_dev = par->in;
1185 		*direction = IFS_RX;
1186 	} else if (par->out) {
1187 		*el_dev = par->out;
1188 		*direction = IFS_TX;
1189 	} else {
1190 		pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n",
1191 		       par->hooknum, __func__);
1192 		BUG();
1193 	}
1194 	if (unlikely(!(*el_dev)->name)) {
1195 		pr_err("qtaguid[%d]: %s(): no dev->name?!!\n",
1196 		       par->hooknum, __func__);
1197 		BUG();
1198 	}
1199 	if (skb->dev && *el_dev != skb->dev) {
1200 		MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs par->%s=%p %s\n",
1201 			 par->hooknum, skb->dev, skb->dev->name,
1202 			 *direction == IFS_RX ? "in" : "out",  *el_dev,
1203 			 (*el_dev)->name);
1204 	}
1205 }
1206 
1207 /*
1208  * Update stats for the specified interface from the skb.
1209  * Do nothing if the entry
1210  * does not exist (when a device was never configured with an IP address).
1211  * Called on each sk.
1212  */
iface_stat_update_from_skb(const struct sk_buff * skb,struct xt_action_param * par)1213 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1214 				       struct xt_action_param *par)
1215 {
1216 	struct iface_stat *entry;
1217 	const struct net_device *el_dev;
1218 	enum ifs_tx_rx direction;
1219 	int bytes = skb->len;
1220 	int proto;
1221 
1222 	get_dev_and_dir(skb, par, &direction, &el_dev);
1223 	proto = ipx_proto(skb, par);
1224 	MT_DEBUG("qtaguid[%d]: iface_stat: %s(%s): "
1225 		 "type=%d fam=%d proto=%d dir=%d\n",
1226 		 par->hooknum, __func__, el_dev->name, el_dev->type,
1227 		 par->family, proto, direction);
1228 
1229 	spin_lock_bh(&iface_stat_list_lock);
1230 	entry = get_iface_entry(el_dev->name);
1231 	if (entry == NULL) {
1232 		IF_DEBUG("qtaguid[%d]: iface_stat: %s(%s): not tracked\n",
1233 			 par->hooknum, __func__, el_dev->name);
1234 		spin_unlock_bh(&iface_stat_list_lock);
1235 		return;
1236 	}
1237 
1238 	IF_DEBUG("qtaguid[%d]: %s(%s): entry=%p\n", par->hooknum,  __func__,
1239 		 el_dev->name, entry);
1240 
1241 	data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1242 			     bytes);
1243 	spin_unlock_bh(&iface_stat_list_lock);
1244 }
1245 
tag_stat_update(struct tag_stat * tag_entry,enum ifs_tx_rx direction,int proto,int bytes)1246 static void tag_stat_update(struct tag_stat *tag_entry,
1247 			enum ifs_tx_rx direction, int proto, int bytes)
1248 {
1249 	int active_set;
1250 	active_set = get_active_counter_set(tag_entry->tn.tag);
1251 	MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1252 		 "dir=%d proto=%d bytes=%d)\n",
1253 		 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1254 		 active_set, direction, proto, bytes);
1255 	data_counters_update(&tag_entry->counters, active_set, direction,
1256 			     proto, bytes);
1257 	if (tag_entry->parent_counters)
1258 		data_counters_update(tag_entry->parent_counters, active_set,
1259 				     direction, proto, bytes);
1260 }
1261 
1262 /*
1263  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1264  * the interface.
1265  * iface_entry->tag_stat_list_lock should be held.
1266  */
create_if_tag_stat(struct iface_stat * iface_entry,tag_t tag)1267 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1268 					   tag_t tag)
1269 {
1270 	struct tag_stat *new_tag_stat_entry = NULL;
1271 	IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1272 		 " (uid=%u)\n", __func__,
1273 		 iface_entry, tag, get_uid_from_tag(tag));
1274 	new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1275 	if (!new_tag_stat_entry) {
1276 		pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1277 		goto done;
1278 	}
1279 	new_tag_stat_entry->tn.tag = tag;
1280 	tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1281 done:
1282 	return new_tag_stat_entry;
1283 }
1284 
if_tag_stat_update(const char * ifname,uid_t uid,const struct sock * sk,enum ifs_tx_rx direction,int proto,int bytes)1285 static void if_tag_stat_update(const char *ifname, uid_t uid,
1286 			       const struct sock *sk, enum ifs_tx_rx direction,
1287 			       int proto, int bytes)
1288 {
1289 	struct tag_stat *tag_stat_entry;
1290 	tag_t tag, acct_tag;
1291 	tag_t uid_tag;
1292 	struct data_counters *uid_tag_counters;
1293 	struct sock_tag *sock_tag_entry;
1294 	struct iface_stat *iface_entry;
1295 	struct tag_stat *new_tag_stat = NULL;
1296 	MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1297 		"uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1298 		 ifname, uid, sk, direction, proto, bytes);
1299 
1300 	spin_lock_bh(&iface_stat_list_lock);
1301 	iface_entry = get_iface_entry(ifname);
1302 	if (!iface_entry) {
1303 		pr_err_ratelimited("qtaguid: tag_stat: stat_update() "
1304 				   "%s not found\n", ifname);
1305 		spin_unlock_bh(&iface_stat_list_lock);
1306 		return;
1307 	}
1308 	/* It is ok to process data when an iface_entry is inactive */
1309 
1310 	MT_DEBUG("qtaguid: tag_stat: stat_update() dev=%s entry=%p\n",
1311 		 ifname, iface_entry);
1312 
1313 	/*
1314 	 * Look for a tagged sock.
1315 	 * It will have an acct_uid.
1316 	 */
1317 	sock_tag_entry = get_sock_stat(sk);
1318 	if (sock_tag_entry) {
1319 		tag = sock_tag_entry->tag;
1320 		acct_tag = get_atag_from_tag(tag);
1321 		uid_tag = get_utag_from_tag(tag);
1322 	} else {
1323 		acct_tag = make_atag_from_value(0);
1324 		tag = combine_atag_with_uid(acct_tag, uid);
1325 		uid_tag = make_tag_from_uid(uid);
1326 	}
1327 	MT_DEBUG("qtaguid: tag_stat: stat_update(): "
1328 		 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1329 		 tag, get_uid_from_tag(tag), iface_entry);
1330 	/* Loop over tag list under this interface for {acct_tag,uid_tag} */
1331 	spin_lock_bh(&iface_entry->tag_stat_list_lock);
1332 
1333 	tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1334 					      tag);
1335 	if (tag_stat_entry) {
1336 		/*
1337 		 * Updating the {acct_tag, uid_tag} entry handles both stats:
1338 		 * {0, uid_tag} will also get updated.
1339 		 */
1340 		tag_stat_update(tag_stat_entry, direction, proto, bytes);
1341 		goto unlock;
1342 	}
1343 
1344 	/* Loop over tag list under this interface for {0,uid_tag} */
1345 	tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1346 					      uid_tag);
1347 	if (!tag_stat_entry) {
1348 		/* Here: the base uid_tag did not exist */
1349 		/*
1350 		 * No parent counters. So
1351 		 *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1352 		 */
1353 		new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1354 		if (!new_tag_stat)
1355 			goto unlock;
1356 		uid_tag_counters = &new_tag_stat->counters;
1357 	} else {
1358 		uid_tag_counters = &tag_stat_entry->counters;
1359 	}
1360 
1361 	if (acct_tag) {
1362 		/* Create the child {acct_tag, uid_tag} and hook up parent. */
1363 		new_tag_stat = create_if_tag_stat(iface_entry, tag);
1364 		if (!new_tag_stat)
1365 			goto unlock;
1366 		new_tag_stat->parent_counters = uid_tag_counters;
1367 	} else {
1368 		/*
1369 		 * For new_tag_stat to be still NULL here would require:
1370 		 *  {0, uid_tag} exists
1371 		 *  and {acct_tag, uid_tag} doesn't exist
1372 		 *  AND acct_tag == 0.
1373 		 * Impossible. This reassures us that new_tag_stat
1374 		 * below will always be assigned.
1375 		 */
1376 		BUG_ON(!new_tag_stat);
1377 	}
1378 	tag_stat_update(new_tag_stat, direction, proto, bytes);
1379 unlock:
1380 	spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1381 	spin_unlock_bh(&iface_stat_list_lock);
1382 }
1383 
iface_netdev_event_handler(struct notifier_block * nb,unsigned long event,void * ptr)1384 static int iface_netdev_event_handler(struct notifier_block *nb,
1385 				      unsigned long event, void *ptr) {
1386 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1387 
1388 	if (unlikely(module_passive))
1389 		return NOTIFY_DONE;
1390 
1391 	IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1392 		 "ev=0x%lx/%s netdev=%p->name=%s\n",
1393 		 event, netdev_evt_str(event), dev, dev ? dev->name : "");
1394 
1395 	switch (event) {
1396 	case NETDEV_UP:
1397 		iface_stat_create(dev, NULL);
1398 		atomic64_inc(&qtu_events.iface_events);
1399 		break;
1400 	case NETDEV_DOWN:
1401 	case NETDEV_UNREGISTER:
1402 		iface_stat_update(dev, event == NETDEV_DOWN);
1403 		atomic64_inc(&qtu_events.iface_events);
1404 		break;
1405 	}
1406 	return NOTIFY_DONE;
1407 }
1408 
iface_inet6addr_event_handler(struct notifier_block * nb,unsigned long event,void * ptr)1409 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1410 					 unsigned long event, void *ptr)
1411 {
1412 	struct inet6_ifaddr *ifa = ptr;
1413 	struct net_device *dev;
1414 
1415 	if (unlikely(module_passive))
1416 		return NOTIFY_DONE;
1417 
1418 	IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1419 		 "ev=0x%lx/%s ifa=%p\n",
1420 		 event, netdev_evt_str(event), ifa);
1421 
1422 	switch (event) {
1423 	case NETDEV_UP:
1424 		BUG_ON(!ifa || !ifa->idev);
1425 		dev = (struct net_device *)ifa->idev->dev;
1426 		iface_stat_create_ipv6(dev, ifa);
1427 		atomic64_inc(&qtu_events.iface_events);
1428 		break;
1429 	case NETDEV_DOWN:
1430 	case NETDEV_UNREGISTER:
1431 		BUG_ON(!ifa || !ifa->idev);
1432 		dev = (struct net_device *)ifa->idev->dev;
1433 		iface_stat_update(dev, event == NETDEV_DOWN);
1434 		atomic64_inc(&qtu_events.iface_events);
1435 		break;
1436 	}
1437 	return NOTIFY_DONE;
1438 }
1439 
iface_inetaddr_event_handler(struct notifier_block * nb,unsigned long event,void * ptr)1440 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1441 					unsigned long event, void *ptr)
1442 {
1443 	struct in_ifaddr *ifa = ptr;
1444 	struct net_device *dev;
1445 
1446 	if (unlikely(module_passive))
1447 		return NOTIFY_DONE;
1448 
1449 	IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1450 		 "ev=0x%lx/%s ifa=%p\n",
1451 		 event, netdev_evt_str(event), ifa);
1452 
1453 	switch (event) {
1454 	case NETDEV_UP:
1455 		BUG_ON(!ifa || !ifa->ifa_dev);
1456 		dev = ifa->ifa_dev->dev;
1457 		iface_stat_create(dev, ifa);
1458 		atomic64_inc(&qtu_events.iface_events);
1459 		break;
1460 	case NETDEV_DOWN:
1461 	case NETDEV_UNREGISTER:
1462 		BUG_ON(!ifa || !ifa->ifa_dev);
1463 		dev = ifa->ifa_dev->dev;
1464 		iface_stat_update(dev, event == NETDEV_DOWN);
1465 		atomic64_inc(&qtu_events.iface_events);
1466 		break;
1467 	}
1468 	return NOTIFY_DONE;
1469 }
1470 
1471 static struct notifier_block iface_netdev_notifier_blk = {
1472 	.notifier_call = iface_netdev_event_handler,
1473 };
1474 
1475 static struct notifier_block iface_inetaddr_notifier_blk = {
1476 	.notifier_call = iface_inetaddr_event_handler,
1477 };
1478 
1479 static struct notifier_block iface_inet6addr_notifier_blk = {
1480 	.notifier_call = iface_inet6addr_event_handler,
1481 };
1482 
1483 static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
1484 	.start	= iface_stat_fmt_proc_start,
1485 	.next	= iface_stat_fmt_proc_next,
1486 	.stop	= iface_stat_fmt_proc_stop,
1487 	.show	= iface_stat_fmt_proc_show,
1488 };
1489 
proc_iface_stat_fmt_open(struct inode * inode,struct file * file)1490 static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
1491 {
1492 	struct proc_iface_stat_fmt_info *s;
1493 
1494 	s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
1495 			sizeof(struct proc_iface_stat_fmt_info));
1496 	if (!s)
1497 		return -ENOMEM;
1498 
1499 	s->fmt = (uintptr_t)PDE_DATA(inode);
1500 	return 0;
1501 }
1502 
1503 static const struct file_operations proc_iface_stat_fmt_fops = {
1504 	.open		= proc_iface_stat_fmt_open,
1505 	.read		= seq_read,
1506 	.llseek		= seq_lseek,
1507 	.release	= seq_release_private,
1508 };
1509 
iface_stat_init(struct proc_dir_entry * parent_procdir)1510 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1511 {
1512 	int err;
1513 
1514 	iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1515 	if (!iface_stat_procdir) {
1516 		pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1517 		err = -1;
1518 		goto err;
1519 	}
1520 
1521 	iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
1522 						   proc_iface_perms,
1523 						   parent_procdir,
1524 						   &proc_iface_stat_fmt_fops,
1525 						   (void *)1 /* fmt1 */);
1526 	if (!iface_stat_all_procfile) {
1527 		pr_err("qtaguid: iface_stat: init "
1528 		       " failed to create stat_old proc entry\n");
1529 		err = -1;
1530 		goto err_zap_entry;
1531 	}
1532 
1533 	iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
1534 						   proc_iface_perms,
1535 						   parent_procdir,
1536 						   &proc_iface_stat_fmt_fops,
1537 						   (void *)2 /* fmt2 */);
1538 	if (!iface_stat_fmt_procfile) {
1539 		pr_err("qtaguid: iface_stat: init "
1540 		       " failed to create stat_all proc entry\n");
1541 		err = -1;
1542 		goto err_zap_all_stats_entry;
1543 	}
1544 
1545 
1546 	err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1547 	if (err) {
1548 		pr_err("qtaguid: iface_stat: init "
1549 		       "failed to register dev event handler\n");
1550 		goto err_zap_all_stats_entries;
1551 	}
1552 	err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1553 	if (err) {
1554 		pr_err("qtaguid: iface_stat: init "
1555 		       "failed to register ipv4 dev event handler\n");
1556 		goto err_unreg_nd;
1557 	}
1558 
1559 	err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1560 	if (err) {
1561 		pr_err("qtaguid: iface_stat: init "
1562 		       "failed to register ipv6 dev event handler\n");
1563 		goto err_unreg_ip4_addr;
1564 	}
1565 	return 0;
1566 
1567 err_unreg_ip4_addr:
1568 	unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1569 err_unreg_nd:
1570 	unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1571 err_zap_all_stats_entries:
1572 	remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1573 err_zap_all_stats_entry:
1574 	remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1575 err_zap_entry:
1576 	remove_proc_entry(iface_stat_procdirname, parent_procdir);
1577 err:
1578 	return err;
1579 }
1580 
qtaguid_find_sk(const struct sk_buff * skb,struct xt_action_param * par)1581 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1582 				    struct xt_action_param *par)
1583 {
1584 	struct sock *sk;
1585 	unsigned int hook_mask = (1 << par->hooknum);
1586 
1587 	MT_DEBUG("qtaguid[%d]: find_sk(skb=%p) family=%d\n",
1588 		 par->hooknum, skb, par->family);
1589 
1590 	/*
1591 	 * Let's not abuse the the xt_socket_get*_sk(), or else it will
1592 	 * return garbage SKs.
1593 	 */
1594 	if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1595 		return NULL;
1596 
1597 	switch (par->family) {
1598 	case NFPROTO_IPV6:
1599 		sk = xt_socket_lookup_slow_v6(dev_net(skb->dev), skb, par->in);
1600 		break;
1601 	case NFPROTO_IPV4:
1602 		sk = xt_socket_lookup_slow_v4(dev_net(skb->dev), skb, par->in);
1603 		break;
1604 	default:
1605 		return NULL;
1606 	}
1607 
1608 	if (sk) {
1609 		MT_DEBUG("qtaguid[%d]: %p->sk_proto=%u->sk_state=%d\n",
1610 			 par->hooknum, sk, sk->sk_protocol, sk->sk_state);
1611 	}
1612 	return sk;
1613 }
1614 
account_for_uid(const struct sk_buff * skb,const struct sock * alternate_sk,uid_t uid,struct xt_action_param * par)1615 static void account_for_uid(const struct sk_buff *skb,
1616 			    const struct sock *alternate_sk, uid_t uid,
1617 			    struct xt_action_param *par)
1618 {
1619 	const struct net_device *el_dev;
1620 	enum ifs_tx_rx direction;
1621 	int proto;
1622 
1623 	get_dev_and_dir(skb, par, &direction, &el_dev);
1624 	proto = ipx_proto(skb, par);
1625 	MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d dir=%d\n",
1626 		 par->hooknum, el_dev->name, el_dev->type,
1627 		 par->family, proto, direction);
1628 
1629 	if_tag_stat_update(el_dev->name, uid,
1630 			   skb->sk ? skb->sk : alternate_sk,
1631 			   direction,
1632 			   proto, skb->len);
1633 }
1634 
qtaguid_mt(const struct sk_buff * skb,struct xt_action_param * par)1635 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1636 {
1637 	const struct xt_qtaguid_match_info *info = par->matchinfo;
1638 	const struct file *filp;
1639 	bool got_sock = false;
1640 	struct sock *sk;
1641 	kuid_t sock_uid;
1642 	bool res;
1643 	bool set_sk_callback_lock = false;
1644 	/*
1645 	 * TODO: unhack how to force just accounting.
1646 	 * For now we only do tag stats when the uid-owner is not requested
1647 	 */
1648 	bool do_tag_stat = !(info->match & XT_QTAGUID_UID);
1649 
1650 	if (unlikely(module_passive))
1651 		return (info->match ^ info->invert) == 0;
1652 
1653 	MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1654 		 par->hooknum, skb, par->in, par->out, par->family);
1655 
1656 	atomic64_inc(&qtu_events.match_calls);
1657 	if (skb == NULL) {
1658 		res = (info->match ^ info->invert) == 0;
1659 		goto ret_res;
1660 	}
1661 
1662 	switch (par->hooknum) {
1663 	case NF_INET_PRE_ROUTING:
1664 	case NF_INET_POST_ROUTING:
1665 		atomic64_inc(&qtu_events.match_calls_prepost);
1666 		iface_stat_update_from_skb(skb, par);
1667 		/*
1668 		 * We are done in pre/post. The skb will get processed
1669 		 * further alter.
1670 		 */
1671 		res = (info->match ^ info->invert);
1672 		goto ret_res;
1673 		break;
1674 	/* default: Fall through and do UID releated work */
1675 	}
1676 
1677 	sk = skb_to_full_sk(skb);
1678 	/*
1679 	 * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1680 	 * "struct inet_timewait_sock" which is missing fields.
1681 	 * So we ignore it.
1682 	 */
1683 	if (sk && sk->sk_state == TCP_TIME_WAIT)
1684 		sk = NULL;
1685 	if (sk == NULL) {
1686 		/*
1687 		 * A missing sk->sk_socket happens when packets are in-flight
1688 		 * and the matching socket is already closed and gone.
1689 		 */
1690 		sk = qtaguid_find_sk(skb, par);
1691 		/*
1692 		 * TCP_NEW_SYN_RECV are not "struct sock" but "struct request_sock"
1693 		 * where we can get a pointer to a full socket to retrieve uid/gid.
1694 		 * When in TCP_TIME_WAIT, sk is a struct inet_timewait_sock
1695 		 * which is missing fields and does not contain any reference
1696 		 * to a full socket, so just ignore the socket.
1697 		 */
1698 		if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
1699 			sock_gen_put(sk);
1700 			sk = sk_to_full_sk(sk);
1701 		} else if (sk && (!sk_fullsock(sk) || sk->sk_state == TCP_TIME_WAIT)) {
1702 			sock_gen_put(sk);
1703 			sk = NULL;
1704 		} else {
1705 			/*
1706 			 * If we got the socket from the find_sk(), we will need to put
1707 			 * it back, as nf_tproxy_get_sock_v4() got it.
1708 			 */
1709 			got_sock = sk;
1710 		}
1711 		if (sk)
1712 			atomic64_inc(&qtu_events.match_found_sk_in_ct);
1713 		else
1714 			atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1715 	} else {
1716 		atomic64_inc(&qtu_events.match_found_sk);
1717 	}
1718 	MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1719 		 par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1720 
1721 	if (!sk) {
1722 		/*
1723 		 * Here, the qtaguid_find_sk() using connection tracking
1724 		 * couldn't find the owner, so for now we just count them
1725 		 * against the system.
1726 		 */
1727 		if (do_tag_stat)
1728 			account_for_uid(skb, sk, 0, par);
1729 		MT_DEBUG("qtaguid[%d]: leaving (sk=NULL)\n", par->hooknum);
1730 		res = (info->match ^ info->invert) == 0;
1731 		atomic64_inc(&qtu_events.match_no_sk);
1732 		goto put_sock_ret_res;
1733 	} else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1734 		res = false;
1735 		goto put_sock_ret_res;
1736 	}
1737 	sock_uid = sk->sk_uid;
1738 	if (do_tag_stat)
1739 		account_for_uid(skb, sk, from_kuid(&init_user_ns, sock_uid),
1740 				par);
1741 
1742 	/*
1743 	 * The following two tests fail the match when:
1744 	 *    id not in range AND no inverted condition requested
1745 	 * or id     in range AND    inverted condition requested
1746 	 * Thus (!a && b) || (a && !b) == a ^ b
1747 	 */
1748 	if (info->match & XT_QTAGUID_UID) {
1749 		kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
1750 		kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
1751 
1752 		if ((uid_gte(sock_uid, uid_min) &&
1753 		     uid_lte(sock_uid, uid_max)) ^
1754 		    !(info->invert & XT_QTAGUID_UID)) {
1755 			MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1756 				 par->hooknum);
1757 			res = false;
1758 			goto put_sock_ret_res;
1759 		}
1760 	}
1761 	if (info->match & XT_QTAGUID_GID) {
1762 		kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
1763 		kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
1764 		set_sk_callback_lock = true;
1765 		read_lock_bh(&sk->sk_callback_lock);
1766 		MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1767 			 par->hooknum, sk, sk->sk_socket,
1768 			 sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1769 		filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1770 		if (!filp) {
1771 			res = ((info->match ^ info->invert) &
1772 			       XT_QTAGUID_GID) == 0;
1773 			atomic64_inc(&qtu_events.match_no_sk_gid);
1774 			goto put_sock_ret_res;
1775 		}
1776 		MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1777 			 par->hooknum, filp ?
1778 			 from_kuid(&init_user_ns, filp->f_cred->fsuid) : -1);
1779 		if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
1780 				gid_lte(filp->f_cred->fsgid, gid_max)) ^
1781 			!(info->invert & XT_QTAGUID_GID)) {
1782 			MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1783 				par->hooknum);
1784 			res = false;
1785 			goto put_sock_ret_res;
1786 		}
1787 	}
1788 	MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1789 	res = true;
1790 
1791 put_sock_ret_res:
1792 	if (got_sock)
1793 		sock_gen_put(sk);
1794 	if (set_sk_callback_lock)
1795 		read_unlock_bh(&sk->sk_callback_lock);
1796 ret_res:
1797 	MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1798 	return res;
1799 }
1800 
1801 #ifdef DDEBUG
1802 /*
1803  * This function is not in xt_qtaguid_print.c because of locks visibility.
1804  * The lock of sock_tag_list must be aquired before calling this function
1805  */
prdebug_full_state_locked(int indent_level,const char * fmt,...)1806 static void prdebug_full_state_locked(int indent_level, const char *fmt, ...)
1807 {
1808 	va_list args;
1809 	char *fmt_buff;
1810 	char *buff;
1811 
1812 	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1813 		return;
1814 
1815 	fmt_buff = kasprintf(GFP_ATOMIC,
1816 			     "qtaguid: %s(): %s {\n", __func__, fmt);
1817 	BUG_ON(!fmt_buff);
1818 	va_start(args, fmt);
1819 	buff = kvasprintf(GFP_ATOMIC,
1820 			  fmt_buff, args);
1821 	BUG_ON(!buff);
1822 	pr_debug("%s", buff);
1823 	kfree(fmt_buff);
1824 	kfree(buff);
1825 	va_end(args);
1826 
1827 	prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1828 
1829 	spin_lock_bh(&uid_tag_data_tree_lock);
1830 	prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1831 	prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1832 	spin_unlock_bh(&uid_tag_data_tree_lock);
1833 
1834 	spin_lock_bh(&iface_stat_list_lock);
1835 	prdebug_iface_stat_list(indent_level, &iface_stat_list);
1836 	spin_unlock_bh(&iface_stat_list_lock);
1837 
1838 	pr_debug("qtaguid: %s(): }\n", __func__);
1839 }
1840 #else
prdebug_full_state_locked(int indent_level,const char * fmt,...)1841 static void prdebug_full_state_locked(int indent_level, const char *fmt, ...) {}
1842 #endif
1843 
1844 struct proc_ctrl_print_info {
1845 	struct sock *sk; /* socket found by reading to sk_pos */
1846 	loff_t sk_pos;
1847 };
1848 
qtaguid_ctrl_proc_next(struct seq_file * m,void * v,loff_t * pos)1849 static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
1850 {
1851 	struct proc_ctrl_print_info *pcpi = m->private;
1852 	struct sock_tag *sock_tag_entry = v;
1853 	struct rb_node *node;
1854 
1855 	(*pos)++;
1856 
1857 	if (!v || v  == SEQ_START_TOKEN)
1858 		return NULL;
1859 
1860 	node = rb_next(&sock_tag_entry->sock_node);
1861 	if (!node) {
1862 		pcpi->sk = NULL;
1863 		sock_tag_entry = SEQ_START_TOKEN;
1864 	} else {
1865 		sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1866 		pcpi->sk = sock_tag_entry->sk;
1867 	}
1868 	pcpi->sk_pos = *pos;
1869 	return sock_tag_entry;
1870 }
1871 
qtaguid_ctrl_proc_start(struct seq_file * m,loff_t * pos)1872 static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
1873 {
1874 	struct proc_ctrl_print_info *pcpi = m->private;
1875 	struct sock_tag *sock_tag_entry;
1876 	struct rb_node *node;
1877 
1878 	spin_lock_bh(&sock_tag_list_lock);
1879 
1880 	if (unlikely(module_passive))
1881 		return NULL;
1882 
1883 	if (*pos == 0) {
1884 		pcpi->sk_pos = 0;
1885 		node = rb_first(&sock_tag_tree);
1886 		if (!node) {
1887 			pcpi->sk = NULL;
1888 			return SEQ_START_TOKEN;
1889 		}
1890 		sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1891 		pcpi->sk = sock_tag_entry->sk;
1892 	} else {
1893 		sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
1894 						NULL) ?: SEQ_START_TOKEN;
1895 		if (*pos != pcpi->sk_pos) {
1896 			/* seq_read skipped a next call */
1897 			*pos = pcpi->sk_pos;
1898 			return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
1899 		}
1900 	}
1901 	return sock_tag_entry;
1902 }
1903 
qtaguid_ctrl_proc_stop(struct seq_file * m,void * v)1904 static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
1905 {
1906 	spin_unlock_bh(&sock_tag_list_lock);
1907 }
1908 
1909 /*
1910  * Procfs reader to get all active socket tags using style "1)" as described in
1911  * fs/proc/generic.c
1912  */
qtaguid_ctrl_proc_show(struct seq_file * m,void * v)1913 static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
1914 {
1915 	struct sock_tag *sock_tag_entry = v;
1916 	uid_t uid;
1917 
1918 	CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
1919 		 current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
1920 
1921 	if (sock_tag_entry != SEQ_START_TOKEN) {
1922 		int sk_ref_count;
1923 		uid = get_uid_from_tag(sock_tag_entry->tag);
1924 		CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1925 			 "pid=%u\n",
1926 			 sock_tag_entry->sk,
1927 			 sock_tag_entry->tag,
1928 			 uid,
1929 			 sock_tag_entry->pid
1930 			);
1931 		sk_ref_count = atomic_read(
1932 			&sock_tag_entry->sk->sk_refcnt);
1933 		seq_printf(m, "sock=%pK tag=0x%llx (uid=%u) pid=%u "
1934 			   "f_count=%d\n",
1935 			   sock_tag_entry->sk,
1936 			   sock_tag_entry->tag, uid,
1937 			   sock_tag_entry->pid, sk_ref_count);
1938 	} else {
1939 		seq_printf(m, "events: sockets_tagged=%llu "
1940 			   "sockets_untagged=%llu "
1941 			   "counter_set_changes=%llu "
1942 			   "delete_cmds=%llu "
1943 			   "iface_events=%llu "
1944 			   "match_calls=%llu "
1945 			   "match_calls_prepost=%llu "
1946 			   "match_found_sk=%llu "
1947 			   "match_found_sk_in_ct=%llu "
1948 			   "match_found_no_sk_in_ct=%llu "
1949 			   "match_no_sk=%llu "
1950 			   "match_no_sk_gid=%llu\n",
1951 			   (u64)atomic64_read(&qtu_events.sockets_tagged),
1952 			   (u64)atomic64_read(&qtu_events.sockets_untagged),
1953 			   (u64)atomic64_read(&qtu_events.counter_set_changes),
1954 			   (u64)atomic64_read(&qtu_events.delete_cmds),
1955 			   (u64)atomic64_read(&qtu_events.iface_events),
1956 			   (u64)atomic64_read(&qtu_events.match_calls),
1957 			   (u64)atomic64_read(&qtu_events.match_calls_prepost),
1958 			   (u64)atomic64_read(&qtu_events.match_found_sk),
1959 			   (u64)atomic64_read(&qtu_events.match_found_sk_in_ct),
1960 			   (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct),
1961 			   (u64)atomic64_read(&qtu_events.match_no_sk),
1962 			   (u64)atomic64_read(&qtu_events.match_no_sk_gid));
1963 
1964 		/* Count the following as part of the last item_index. No need
1965 		 * to lock the sock_tag_list here since it is already locked when
1966 		 * starting the seq_file operation
1967 		 */
1968 		prdebug_full_state_locked(0, "proc ctrl");
1969 	}
1970 
1971 	return 0;
1972 }
1973 
1974 /*
1975  * Delete socket tags, and stat tags associated with a given
1976  * accouting tag and uid.
1977  */
ctrl_cmd_delete(const char * input)1978 static int ctrl_cmd_delete(const char *input)
1979 {
1980 	char cmd;
1981 	int uid_int;
1982 	kuid_t uid;
1983 	uid_t entry_uid;
1984 	tag_t acct_tag;
1985 	tag_t tag;
1986 	int res, argc;
1987 	struct iface_stat *iface_entry;
1988 	struct rb_node *node;
1989 	struct sock_tag *st_entry;
1990 	struct rb_root st_to_free_tree = RB_ROOT;
1991 	struct tag_stat *ts_entry;
1992 	struct tag_counter_set *tcs_entry;
1993 	struct tag_ref *tr_entry;
1994 	struct uid_tag_data *utd_entry;
1995 
1996 	argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid_int);
1997 	uid = make_kuid(&init_user_ns, uid_int);
1998 	CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1999 		 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
2000 		 acct_tag, uid_int);
2001 	if (argc < 2) {
2002 		res = -EINVAL;
2003 		goto err;
2004 	}
2005 	if (!valid_atag(acct_tag)) {
2006 		pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2007 		res = -EINVAL;
2008 		goto err;
2009 	}
2010 	if (argc < 3) {
2011 		uid = current_fsuid();
2012 		uid_int = from_kuid(&init_user_ns, uid);
2013 	} else if (!can_impersonate_uid(uid)) {
2014 		pr_info("qtaguid: ctrl_delete(%s): "
2015 			"insufficient priv from pid=%u tgid=%u uid=%u\n",
2016 			input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2017 		res = -EPERM;
2018 		goto err;
2019 	}
2020 
2021 	tag = combine_atag_with_uid(acct_tag, uid_int);
2022 	CT_DEBUG("qtaguid: ctrl_delete(%s): "
2023 		 "looking for tag=0x%llx (uid=%u)\n",
2024 		 input, tag, uid_int);
2025 
2026 	/* Delete socket tags */
2027 	spin_lock_bh(&sock_tag_list_lock);
2028 	spin_lock_bh(&uid_tag_data_tree_lock);
2029 	node = rb_first(&sock_tag_tree);
2030 	while (node) {
2031 		st_entry = rb_entry(node, struct sock_tag, sock_node);
2032 		entry_uid = get_uid_from_tag(st_entry->tag);
2033 		node = rb_next(node);
2034 		if (entry_uid != uid_int)
2035 			continue;
2036 
2037 		CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2038 			 input, st_entry->tag, entry_uid);
2039 
2040 		if (!acct_tag || st_entry->tag == tag) {
2041 			rb_erase(&st_entry->sock_node, &sock_tag_tree);
2042 			/* Can't sockfd_put() within spinlock, do it later. */
2043 			sock_tag_tree_insert(st_entry, &st_to_free_tree);
2044 			tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2045 			BUG_ON(tr_entry->num_sock_tags <= 0);
2046 			tr_entry->num_sock_tags--;
2047 			/*
2048 			 * TODO: remove if, and start failing.
2049 			 * This is a hack to work around the fact that in some
2050 			 * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2051 			 * and are trying to work around apps
2052 			 * that didn't open the /dev/xt_qtaguid.
2053 			 */
2054 			if (st_entry->list.next && st_entry->list.prev)
2055 				list_del(&st_entry->list);
2056 		}
2057 	}
2058 	spin_unlock_bh(&uid_tag_data_tree_lock);
2059 	spin_unlock_bh(&sock_tag_list_lock);
2060 
2061 	sock_tag_tree_erase(&st_to_free_tree);
2062 
2063 	/* Delete tag counter-sets */
2064 	spin_lock_bh(&tag_counter_set_list_lock);
2065 	/* Counter sets are only on the uid tag, not full tag */
2066 	tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2067 	if (tcs_entry) {
2068 		CT_DEBUG("qtaguid: ctrl_delete(%s): "
2069 			 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2070 			 input,
2071 			 tcs_entry->tn.tag,
2072 			 get_uid_from_tag(tcs_entry->tn.tag),
2073 			 tcs_entry->active_set);
2074 		rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2075 		kfree(tcs_entry);
2076 	}
2077 	spin_unlock_bh(&tag_counter_set_list_lock);
2078 
2079 	/*
2080 	 * If acct_tag is 0, then all entries belonging to uid are
2081 	 * erased.
2082 	 */
2083 	spin_lock_bh(&iface_stat_list_lock);
2084 	list_for_each_entry(iface_entry, &iface_stat_list, list) {
2085 		spin_lock_bh(&iface_entry->tag_stat_list_lock);
2086 		node = rb_first(&iface_entry->tag_stat_tree);
2087 		while (node) {
2088 			ts_entry = rb_entry(node, struct tag_stat, tn.node);
2089 			entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2090 			node = rb_next(node);
2091 
2092 			CT_DEBUG("qtaguid: ctrl_delete(%s): "
2093 				 "ts tag=0x%llx (uid=%u)\n",
2094 				 input, ts_entry->tn.tag, entry_uid);
2095 
2096 			if (entry_uid != uid_int)
2097 				continue;
2098 			if (!acct_tag || ts_entry->tn.tag == tag) {
2099 				CT_DEBUG("qtaguid: ctrl_delete(%s): "
2100 					 "erase ts: %s 0x%llx %u\n",
2101 					 input, iface_entry->ifname,
2102 					 get_atag_from_tag(ts_entry->tn.tag),
2103 					 entry_uid);
2104 				rb_erase(&ts_entry->tn.node,
2105 					 &iface_entry->tag_stat_tree);
2106 				kfree(ts_entry);
2107 			}
2108 		}
2109 		spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2110 	}
2111 	spin_unlock_bh(&iface_stat_list_lock);
2112 
2113 	/* Cleanup the uid_tag_data */
2114 	spin_lock_bh(&uid_tag_data_tree_lock);
2115 	node = rb_first(&uid_tag_data_tree);
2116 	while (node) {
2117 		utd_entry = rb_entry(node, struct uid_tag_data, node);
2118 		entry_uid = utd_entry->uid;
2119 		node = rb_next(node);
2120 
2121 		CT_DEBUG("qtaguid: ctrl_delete(%s): "
2122 			 "utd uid=%u\n",
2123 			 input, entry_uid);
2124 
2125 		if (entry_uid != uid_int)
2126 			continue;
2127 		/*
2128 		 * Go over the tag_refs, and those that don't have
2129 		 * sock_tags using them are freed.
2130 		 */
2131 		put_tag_ref_tree(tag, utd_entry);
2132 		put_utd_entry(utd_entry);
2133 	}
2134 	spin_unlock_bh(&uid_tag_data_tree_lock);
2135 
2136 	atomic64_inc(&qtu_events.delete_cmds);
2137 	res = 0;
2138 
2139 err:
2140 	return res;
2141 }
2142 
ctrl_cmd_counter_set(const char * input)2143 static int ctrl_cmd_counter_set(const char *input)
2144 {
2145 	char cmd;
2146 	uid_t uid = 0;
2147 	tag_t tag;
2148 	int res, argc;
2149 	struct tag_counter_set *tcs;
2150 	int counter_set;
2151 
2152 	argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2153 	CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2154 		 "set=%d uid=%u\n", input, argc, cmd,
2155 		 counter_set, uid);
2156 	if (argc != 3) {
2157 		res = -EINVAL;
2158 		goto err;
2159 	}
2160 	if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2161 		pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2162 			input);
2163 		res = -EINVAL;
2164 		goto err;
2165 	}
2166 	if (!can_manipulate_uids()) {
2167 		pr_info("qtaguid: ctrl_counterset(%s): "
2168 			"insufficient priv from pid=%u tgid=%u uid=%u\n",
2169 			input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2170 		res = -EPERM;
2171 		goto err;
2172 	}
2173 
2174 	tag = make_tag_from_uid(uid);
2175 	spin_lock_bh(&tag_counter_set_list_lock);
2176 	tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2177 	if (!tcs) {
2178 		tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2179 		if (!tcs) {
2180 			spin_unlock_bh(&tag_counter_set_list_lock);
2181 			pr_err("qtaguid: ctrl_counterset(%s): "
2182 			       "failed to alloc counter set\n",
2183 			       input);
2184 			res = -ENOMEM;
2185 			goto err;
2186 		}
2187 		tcs->tn.tag = tag;
2188 		tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2189 		CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2190 			 "(uid=%u) set=%d\n",
2191 			 input, tag, get_uid_from_tag(tag), counter_set);
2192 	}
2193 	tcs->active_set = counter_set;
2194 	spin_unlock_bh(&tag_counter_set_list_lock);
2195 	atomic64_inc(&qtu_events.counter_set_changes);
2196 	res = 0;
2197 
2198 err:
2199 	return res;
2200 }
2201 
ctrl_cmd_tag(const char * input)2202 static int ctrl_cmd_tag(const char *input)
2203 {
2204 	char cmd;
2205 	int sock_fd = 0;
2206 	kuid_t uid;
2207 	unsigned int uid_int = 0;
2208 	tag_t acct_tag = make_atag_from_value(0);
2209 	tag_t full_tag;
2210 	struct socket *el_socket;
2211 	int res, argc;
2212 	struct sock_tag *sock_tag_entry;
2213 	struct tag_ref *tag_ref_entry;
2214 	struct uid_tag_data *uid_tag_data_entry;
2215 	struct proc_qtu_data *pqd_entry;
2216 
2217 	/* Unassigned args will get defaulted later. */
2218 	argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid_int);
2219 	uid = make_kuid(&init_user_ns, uid_int);
2220 	CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2221 		 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2222 		 acct_tag, uid_int);
2223 	if (argc < 2) {
2224 		res = -EINVAL;
2225 		goto err;
2226 	}
2227 	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2228 	if (!el_socket) {
2229 		pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2230 			" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2231 			input, sock_fd, res, current->pid, current->tgid,
2232 			from_kuid(&init_user_ns, current_fsuid()));
2233 		goto err;
2234 	}
2235 	CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->sk_refcnt=%d ->sk=%p\n",
2236 		 input, atomic_read(&el_socket->sk->sk_refcnt),
2237 		 el_socket->sk);
2238 	if (argc < 3) {
2239 		acct_tag = make_atag_from_value(0);
2240 	} else if (!valid_atag(acct_tag)) {
2241 		pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2242 		res = -EINVAL;
2243 		goto err_put;
2244 	}
2245 	CT_DEBUG("qtaguid: ctrl_tag(%s): "
2246 		 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2247 		 "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2248 		 input, current->pid, current->tgid,
2249 		 from_kuid(&init_user_ns, current_uid()),
2250 		 from_kuid(&init_user_ns, current_euid()),
2251 		 from_kuid(&init_user_ns, current_fsuid()),
2252 		 from_kgid(&init_user_ns, xt_qtaguid_ctrl_file->gid),
2253 		 in_group_p(xt_qtaguid_ctrl_file->gid),
2254 		 in_egroup_p(xt_qtaguid_ctrl_file->gid));
2255 	if (argc < 4) {
2256 		uid = current_fsuid();
2257 		uid_int = from_kuid(&init_user_ns, uid);
2258 	} else if (!can_impersonate_uid(uid)) {
2259 		pr_info("qtaguid: ctrl_tag(%s): "
2260 			"insufficient priv from pid=%u tgid=%u uid=%u\n",
2261 			input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2262 		res = -EPERM;
2263 		goto err_put;
2264 	}
2265 	full_tag = combine_atag_with_uid(acct_tag, uid_int);
2266 
2267 	spin_lock_bh(&sock_tag_list_lock);
2268 	spin_lock_bh(&uid_tag_data_tree_lock);
2269 	sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2270 	tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2271 	if (IS_ERR(tag_ref_entry)) {
2272 		res = PTR_ERR(tag_ref_entry);
2273 		spin_unlock_bh(&uid_tag_data_tree_lock);
2274 		spin_unlock_bh(&sock_tag_list_lock);
2275 		goto err_put;
2276 	}
2277 	tag_ref_entry->num_sock_tags++;
2278 	if (sock_tag_entry) {
2279 		struct tag_ref *prev_tag_ref_entry;
2280 
2281 		CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2282 			 "st@%p ...->sk_refcnt=%d\n",
2283 			 input, el_socket->sk, sock_tag_entry,
2284 			 atomic_read(&el_socket->sk->sk_refcnt));
2285 		prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2286 						    &uid_tag_data_entry);
2287 		BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2288 		BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2289 		prev_tag_ref_entry->num_sock_tags--;
2290 		sock_tag_entry->tag = full_tag;
2291 	} else {
2292 		CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2293 			 input, el_socket->sk);
2294 		sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2295 					 GFP_ATOMIC);
2296 		if (!sock_tag_entry) {
2297 			pr_err("qtaguid: ctrl_tag(%s): "
2298 			       "socket tag alloc failed\n",
2299 			       input);
2300 			BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2301 			tag_ref_entry->num_sock_tags--;
2302 			free_tag_ref_from_utd_entry(tag_ref_entry,
2303 						    uid_tag_data_entry);
2304 			spin_unlock_bh(&uid_tag_data_tree_lock);
2305 			spin_unlock_bh(&sock_tag_list_lock);
2306 			res = -ENOMEM;
2307 			goto err_put;
2308 		}
2309 		/*
2310 		 * Hold the sk refcount here to make sure the sk pointer cannot
2311 		 * be freed and reused
2312 		 */
2313 		sock_hold(el_socket->sk);
2314 		sock_tag_entry->sk = el_socket->sk;
2315 		sock_tag_entry->pid = current->tgid;
2316 		sock_tag_entry->tag = combine_atag_with_uid(acct_tag, uid_int);
2317 		pqd_entry = proc_qtu_data_tree_search(
2318 			&proc_qtu_data_tree, current->tgid);
2319 		/*
2320 		 * TODO: remove if, and start failing.
2321 		 * At first, we want to catch user-space code that is not
2322 		 * opening the /dev/xt_qtaguid.
2323 		 */
2324 		if (IS_ERR_OR_NULL(pqd_entry))
2325 			pr_warn_once(
2326 				"qtaguid: %s(): "
2327 				"User space forgot to open /dev/xt_qtaguid? "
2328 				"pid=%u tgid=%u uid=%u\n", __func__,
2329 				current->pid, current->tgid,
2330 				from_kuid(&init_user_ns, current_fsuid()));
2331 		else
2332 			list_add(&sock_tag_entry->list,
2333 				 &pqd_entry->sock_tag_list);
2334 
2335 		sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2336 		atomic64_inc(&qtu_events.sockets_tagged);
2337 	}
2338 	spin_unlock_bh(&uid_tag_data_tree_lock);
2339 	spin_unlock_bh(&sock_tag_list_lock);
2340 	/* We keep the ref to the sk until it is untagged */
2341 	CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->sk_refcnt=%d\n",
2342 		 input, sock_tag_entry,
2343 		 atomic_read(&el_socket->sk->sk_refcnt));
2344 	sockfd_put(el_socket);
2345 	return 0;
2346 
2347 err_put:
2348 	CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->sk_refcnt=%d\n",
2349 		 input, atomic_read(&el_socket->sk->sk_refcnt) - 1);
2350 	/* Release the sock_fd that was grabbed by sockfd_lookup(). */
2351 	sockfd_put(el_socket);
2352 	return res;
2353 
2354 err:
2355 	CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2356 	return res;
2357 }
2358 
ctrl_cmd_untag(const char * input)2359 static int ctrl_cmd_untag(const char *input)
2360 {
2361 	char cmd;
2362 	int sock_fd = 0;
2363 	struct socket *el_socket;
2364 	int res, argc;
2365 
2366 	argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2367 	CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2368 		 input, argc, cmd, sock_fd);
2369 	if (argc < 2) {
2370 		res = -EINVAL;
2371 		return res;
2372 	}
2373 	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2374 	if (!el_socket) {
2375 		pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2376 			" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2377 			input, sock_fd, res, current->pid, current->tgid,
2378 			from_kuid(&init_user_ns, current_fsuid()));
2379 		return res;
2380 	}
2381 	CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2382 		 input, atomic_long_read(&el_socket->file->f_count),
2383 		 el_socket->sk);
2384 	res = qtaguid_untag(el_socket, false);
2385 	sockfd_put(el_socket);
2386 	return res;
2387 }
2388 
qtaguid_untag(struct socket * el_socket,bool kernel)2389 int qtaguid_untag(struct socket *el_socket, bool kernel)
2390 {
2391 	int res;
2392 	pid_t pid;
2393 	struct sock_tag *sock_tag_entry;
2394 	struct tag_ref *tag_ref_entry;
2395 	struct uid_tag_data *utd_entry;
2396 	struct proc_qtu_data *pqd_entry;
2397 
2398 	spin_lock_bh(&sock_tag_list_lock);
2399 	sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2400 	if (!sock_tag_entry) {
2401 		spin_unlock_bh(&sock_tag_list_lock);
2402 		res = -EINVAL;
2403 		return res;
2404 	}
2405 	/*
2406 	 * The socket already belongs to the current process
2407 	 * so it can do whatever it wants to it.
2408 	 */
2409 	rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2410 
2411 	tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2412 	BUG_ON(!tag_ref_entry);
2413 	BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2414 	spin_lock_bh(&uid_tag_data_tree_lock);
2415 	if (kernel)
2416 		pid = sock_tag_entry->pid;
2417 	else
2418 		pid = current->tgid;
2419 	pqd_entry = proc_qtu_data_tree_search(
2420 		&proc_qtu_data_tree, pid);
2421 	/*
2422 	 * TODO: remove if, and start failing.
2423 	 * At first, we want to catch user-space code that is not
2424 	 * opening the /dev/xt_qtaguid.
2425 	 */
2426 	if (IS_ERR_OR_NULL(pqd_entry) || !sock_tag_entry->list.next) {
2427 		pr_warn_once("qtaguid: %s(): "
2428 			     "User space forgot to open /dev/xt_qtaguid? "
2429 			     "pid=%u tgid=%u sk_pid=%u, uid=%u\n", __func__,
2430 			     current->pid, current->tgid, sock_tag_entry->pid,
2431 			     from_kuid(&init_user_ns, current_fsuid()));
2432 	} else {
2433 		list_del(&sock_tag_entry->list);
2434 	}
2435 	spin_unlock_bh(&uid_tag_data_tree_lock);
2436 	/*
2437 	 * We don't free tag_ref from the utd_entry here,
2438 	 * only during a cmd_delete().
2439 	 */
2440 	tag_ref_entry->num_sock_tags--;
2441 	spin_unlock_bh(&sock_tag_list_lock);
2442 	/*
2443 	 * Release the sock_fd that was grabbed at tag time.
2444 	 */
2445 	sock_put(sock_tag_entry->sk);
2446 	CT_DEBUG("qtaguid: done. st@%p ...->sk_refcnt=%d\n",
2447 		 sock_tag_entry,
2448 		 atomic_read(&el_socket->sk->sk_refcnt));
2449 
2450 	kfree(sock_tag_entry);
2451 	atomic64_inc(&qtu_events.sockets_untagged);
2452 
2453 	return 0;
2454 }
2455 
qtaguid_ctrl_parse(const char * input,size_t count)2456 static ssize_t qtaguid_ctrl_parse(const char *input, size_t count)
2457 {
2458 	char cmd;
2459 	ssize_t res;
2460 
2461 	CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2462 		 input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2463 
2464 	cmd = input[0];
2465 	/* Collect params for commands */
2466 	switch (cmd) {
2467 	case 'd':
2468 		res = ctrl_cmd_delete(input);
2469 		break;
2470 
2471 	case 's':
2472 		res = ctrl_cmd_counter_set(input);
2473 		break;
2474 
2475 	case 't':
2476 		res = ctrl_cmd_tag(input);
2477 		break;
2478 
2479 	case 'u':
2480 		res = ctrl_cmd_untag(input);
2481 		break;
2482 
2483 	default:
2484 		res = -EINVAL;
2485 		goto err;
2486 	}
2487 	if (!res)
2488 		res = count;
2489 err:
2490 	CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res);
2491 	return res;
2492 }
2493 
2494 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
qtaguid_ctrl_proc_write(struct file * file,const char __user * buffer,size_t count,loff_t * offp)2495 static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2496 				   size_t count, loff_t *offp)
2497 {
2498 	char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2499 
2500 	if (unlikely(module_passive))
2501 		return count;
2502 
2503 	if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2504 		return -EINVAL;
2505 
2506 	if (copy_from_user(input_buf, buffer, count))
2507 		return -EFAULT;
2508 
2509 	input_buf[count] = '\0';
2510 	return qtaguid_ctrl_parse(input_buf, count);
2511 }
2512 
2513 struct proc_print_info {
2514 	struct iface_stat *iface_entry;
2515 	int item_index;
2516 	tag_t tag; /* tag found by reading to tag_pos */
2517 	off_t tag_pos;
2518 	int tag_item_index;
2519 };
2520 
pp_stats_header(struct seq_file * m)2521 static void pp_stats_header(struct seq_file *m)
2522 {
2523 	seq_puts(m,
2524 		 "idx iface acct_tag_hex uid_tag_int cnt_set "
2525 		 "rx_bytes rx_packets "
2526 		 "tx_bytes tx_packets "
2527 		 "rx_tcp_bytes rx_tcp_packets "
2528 		 "rx_udp_bytes rx_udp_packets "
2529 		 "rx_other_bytes rx_other_packets "
2530 		 "tx_tcp_bytes tx_tcp_packets "
2531 		 "tx_udp_bytes tx_udp_packets "
2532 		 "tx_other_bytes tx_other_packets\n");
2533 }
2534 
pp_stats_line(struct seq_file * m,struct tag_stat * ts_entry,int cnt_set)2535 static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
2536 			 int cnt_set)
2537 {
2538 	struct data_counters *cnts;
2539 	tag_t tag = ts_entry->tn.tag;
2540 	uid_t stat_uid = get_uid_from_tag(tag);
2541 	struct proc_print_info *ppi = m->private;
2542 	/* Detailed tags are not available to everybody */
2543 	if (!can_read_other_uid_stats(make_kuid(&init_user_ns,stat_uid))) {
2544 		CT_DEBUG("qtaguid: stats line: "
2545 			 "%s 0x%llx %u: insufficient priv "
2546 			 "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2547 			 ppi->iface_entry->ifname,
2548 			 get_atag_from_tag(tag), stat_uid,
2549 			 current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
2550 			 from_kgid(&init_user_ns,xt_qtaguid_stats_file->gid));
2551 		return 0;
2552 	}
2553 	ppi->item_index++;
2554 	cnts = &ts_entry->counters;
2555 	seq_printf(m, "%d %s 0x%llx %u %u "
2556 		"%llu %llu "
2557 		"%llu %llu "
2558 		"%llu %llu "
2559 		"%llu %llu "
2560 		"%llu %llu "
2561 		"%llu %llu "
2562 		"%llu %llu "
2563 		"%llu %llu\n",
2564 		ppi->item_index,
2565 		ppi->iface_entry->ifname,
2566 		get_atag_from_tag(tag),
2567 		stat_uid,
2568 		cnt_set,
2569 		dc_sum_bytes(cnts, cnt_set, IFS_RX),
2570 		dc_sum_packets(cnts, cnt_set, IFS_RX),
2571 		dc_sum_bytes(cnts, cnt_set, IFS_TX),
2572 		dc_sum_packets(cnts, cnt_set, IFS_TX),
2573 		cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2574 		cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2575 		cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2576 		cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2577 		cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2578 		cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2579 		cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2580 		cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2581 		cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2582 		cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2583 		cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2584 		cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2585 	return seq_has_overflowed(m) ? -ENOSPC : 1;
2586 }
2587 
pp_sets(struct seq_file * m,struct tag_stat * ts_entry)2588 static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
2589 {
2590 	int ret;
2591 	int counter_set;
2592 	for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2593 	     counter_set++) {
2594 		ret = pp_stats_line(m, ts_entry, counter_set);
2595 		if (ret < 0)
2596 			return false;
2597 	}
2598 	return true;
2599 }
2600 
qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat * ptr)2601 static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
2602 {
2603 	struct iface_stat *iface_entry;
2604 
2605 	if (!ptr)
2606 		return false;
2607 
2608 	list_for_each_entry(iface_entry, &iface_stat_list, list)
2609 		if (iface_entry == ptr)
2610 			return true;
2611 	return false;
2612 }
2613 
qtaguid_stats_proc_next_iface_entry(struct proc_print_info * ppi)2614 static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
2615 {
2616 	spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2617 	list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
2618 		spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2619 		return;
2620 	}
2621 	ppi->iface_entry = NULL;
2622 }
2623 
qtaguid_stats_proc_next(struct seq_file * m,void * v,loff_t * pos)2624 static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
2625 {
2626 	struct proc_print_info *ppi = m->private;
2627 	struct tag_stat *ts_entry;
2628 	struct rb_node *node;
2629 
2630 	if (!v) {
2631 		pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
2632 		return NULL;
2633 	}
2634 
2635 	(*pos)++;
2636 
2637 	if (!ppi->iface_entry || unlikely(module_passive))
2638 		return NULL;
2639 
2640 	if (v == SEQ_START_TOKEN)
2641 		node = rb_first(&ppi->iface_entry->tag_stat_tree);
2642 	else
2643 		node = rb_next(&((struct tag_stat *)v)->tn.node);
2644 
2645 	while (!node) {
2646 		qtaguid_stats_proc_next_iface_entry(ppi);
2647 		if (!ppi->iface_entry)
2648 			return NULL;
2649 		node = rb_first(&ppi->iface_entry->tag_stat_tree);
2650 	}
2651 
2652 	ts_entry = rb_entry(node, struct tag_stat, tn.node);
2653 	ppi->tag = ts_entry->tn.tag;
2654 	ppi->tag_pos = *pos;
2655 	ppi->tag_item_index = ppi->item_index;
2656 	return ts_entry;
2657 }
2658 
qtaguid_stats_proc_start(struct seq_file * m,loff_t * pos)2659 static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
2660 {
2661 	struct proc_print_info *ppi = m->private;
2662 	struct tag_stat *ts_entry = NULL;
2663 
2664 	spin_lock_bh(&iface_stat_list_lock);
2665 
2666 	if (*pos == 0) {
2667 		ppi->item_index = 1;
2668 		ppi->tag_pos = 0;
2669 		if (list_empty(&iface_stat_list)) {
2670 			ppi->iface_entry = NULL;
2671 		} else {
2672 			ppi->iface_entry = list_first_entry(&iface_stat_list,
2673 							    struct iface_stat,
2674 							    list);
2675 			spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2676 		}
2677 		return SEQ_START_TOKEN;
2678 	}
2679 	if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
2680 		if (ppi->iface_entry) {
2681 			pr_err("qtaguid: %s(): iface_entry %p not found\n",
2682 			       __func__, ppi->iface_entry);
2683 			ppi->iface_entry = NULL;
2684 		}
2685 		return NULL;
2686 	}
2687 
2688 	spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2689 
2690 	if (!ppi->tag_pos) {
2691 		/* seq_read skipped first next call */
2692 		ts_entry = SEQ_START_TOKEN;
2693 	} else {
2694 		ts_entry = tag_stat_tree_search(
2695 				&ppi->iface_entry->tag_stat_tree, ppi->tag);
2696 		if (!ts_entry) {
2697 			pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
2698 				__func__, ppi->tag);
2699 			return NULL;
2700 		}
2701 	}
2702 
2703 	if (*pos == ppi->tag_pos) { /* normal resume */
2704 		ppi->item_index = ppi->tag_item_index;
2705 	} else {
2706 		/* seq_read skipped a next call */
2707 		*pos = ppi->tag_pos;
2708 		ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
2709 	}
2710 
2711 	return ts_entry;
2712 }
2713 
qtaguid_stats_proc_stop(struct seq_file * m,void * v)2714 static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
2715 {
2716 	struct proc_print_info *ppi = m->private;
2717 	if (ppi->iface_entry)
2718 		spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2719 	spin_unlock_bh(&iface_stat_list_lock);
2720 }
2721 
2722 /*
2723  * Procfs reader to get all tag stats using style "1)" as described in
2724  * fs/proc/generic.c
2725  * Groups all protocols tx/rx bytes.
2726  */
qtaguid_stats_proc_show(struct seq_file * m,void * v)2727 static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
2728 {
2729 	struct tag_stat *ts_entry = v;
2730 
2731 	if (v == SEQ_START_TOKEN)
2732 		pp_stats_header(m);
2733 	else
2734 		pp_sets(m, ts_entry);
2735 
2736 	return 0;
2737 }
2738 
2739 /*------------------------------------------*/
qtudev_open(struct inode * inode,struct file * file)2740 static int qtudev_open(struct inode *inode, struct file *file)
2741 {
2742 	struct uid_tag_data *utd_entry;
2743 	struct proc_qtu_data  *pqd_entry;
2744 	struct proc_qtu_data  *new_pqd_entry;
2745 	int res;
2746 	bool utd_entry_found;
2747 
2748 	if (unlikely(qtu_proc_handling_passive))
2749 		return 0;
2750 
2751 	DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2752 		 current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2753 
2754 	spin_lock_bh(&uid_tag_data_tree_lock);
2755 
2756 	/* Look for existing uid data, or alloc one. */
2757 	utd_entry = get_uid_data(from_kuid(&init_user_ns, current_fsuid()), &utd_entry_found);
2758 	if (IS_ERR_OR_NULL(utd_entry)) {
2759 		res = PTR_ERR(utd_entry);
2760 		goto err_unlock;
2761 	}
2762 
2763 	/* Look for existing PID based proc_data */
2764 	pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2765 					      current->tgid);
2766 	if (pqd_entry) {
2767 		pr_err("qtaguid: qtudev_open(): %u/%u %u "
2768 		       "%s already opened\n",
2769 		       current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
2770 		       QTU_DEV_NAME);
2771 		res = -EBUSY;
2772 		goto err_unlock_free_utd;
2773 	}
2774 
2775 	new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2776 	if (!new_pqd_entry) {
2777 		pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2778 		       "proc data alloc failed\n",
2779 		       current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2780 		res = -ENOMEM;
2781 		goto err_unlock_free_utd;
2782 	}
2783 	new_pqd_entry->pid = current->tgid;
2784 	INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2785 	new_pqd_entry->parent_tag_data = utd_entry;
2786 	utd_entry->num_pqd++;
2787 
2788 	proc_qtu_data_tree_insert(new_pqd_entry,
2789 				  &proc_qtu_data_tree);
2790 
2791 	spin_unlock_bh(&uid_tag_data_tree_lock);
2792 	DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2793 		 from_kuid(&init_user_ns, current_fsuid()), new_pqd_entry);
2794 	file->private_data = new_pqd_entry;
2795 	return 0;
2796 
2797 err_unlock_free_utd:
2798 	if (!utd_entry_found) {
2799 		rb_erase(&utd_entry->node, &uid_tag_data_tree);
2800 		kfree(utd_entry);
2801 	}
2802 err_unlock:
2803 	spin_unlock_bh(&uid_tag_data_tree_lock);
2804 	return res;
2805 }
2806 
qtudev_release(struct inode * inode,struct file * file)2807 static int qtudev_release(struct inode *inode, struct file *file)
2808 {
2809 	struct proc_qtu_data  *pqd_entry = file->private_data;
2810 	struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2811 	struct sock_tag *st_entry;
2812 	struct rb_root st_to_free_tree = RB_ROOT;
2813 	struct list_head *entry, *next;
2814 	struct tag_ref *tr;
2815 
2816 	if (unlikely(qtu_proc_handling_passive))
2817 		return 0;
2818 
2819 	/*
2820 	 * Do not trust the current->pid, it might just be a kworker cleaning
2821 	 * up after a dead proc.
2822 	 */
2823 	DR_DEBUG("qtaguid: qtudev_release(): "
2824 		 "pid=%u tgid=%u uid=%u "
2825 		 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2826 		 current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2827 		 pqd_entry, pqd_entry->pid, utd_entry,
2828 		 utd_entry->num_active_tags);
2829 
2830 	spin_lock_bh(&sock_tag_list_lock);
2831 	spin_lock_bh(&uid_tag_data_tree_lock);
2832 
2833 	list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2834 		st_entry = list_entry(entry, struct sock_tag, list);
2835 		DR_DEBUG("qtaguid: %s(): "
2836 			 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2837 			 __func__,
2838 			 st_entry, st_entry->sk,
2839 			 current->pid, current->tgid,
2840 			 pqd_entry->parent_tag_data->uid);
2841 
2842 		utd_entry = uid_tag_data_tree_search(
2843 			&uid_tag_data_tree,
2844 			get_uid_from_tag(st_entry->tag));
2845 		BUG_ON(IS_ERR_OR_NULL(utd_entry));
2846 		DR_DEBUG("qtaguid: %s(): "
2847 			 "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2848 			 st_entry->tag, utd_entry);
2849 		tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2850 					 st_entry->tag);
2851 		BUG_ON(!tr);
2852 		BUG_ON(tr->num_sock_tags <= 0);
2853 		tr->num_sock_tags--;
2854 		free_tag_ref_from_utd_entry(tr, utd_entry);
2855 
2856 		rb_erase(&st_entry->sock_node, &sock_tag_tree);
2857 		list_del(&st_entry->list);
2858 		/* Can't sockfd_put() within spinlock, do it later. */
2859 		sock_tag_tree_insert(st_entry, &st_to_free_tree);
2860 
2861 		/*
2862 		 * Try to free the utd_entry if no other proc_qtu_data is
2863 		 * using it (num_pqd is 0) and it doesn't have active tags
2864 		 * (num_active_tags is 0).
2865 		 */
2866 		put_utd_entry(utd_entry);
2867 	}
2868 
2869 	rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2870 	BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2871 	pqd_entry->parent_tag_data->num_pqd--;
2872 	put_utd_entry(pqd_entry->parent_tag_data);
2873 	kfree(pqd_entry);
2874 	file->private_data = NULL;
2875 
2876 	spin_unlock_bh(&uid_tag_data_tree_lock);
2877 	spin_unlock_bh(&sock_tag_list_lock);
2878 
2879 
2880 	sock_tag_tree_erase(&st_to_free_tree);
2881 
2882 	spin_lock_bh(&sock_tag_list_lock);
2883 	prdebug_full_state_locked(0, "%s(): pid=%u tgid=%u", __func__,
2884 			   current->pid, current->tgid);
2885 	spin_unlock_bh(&sock_tag_list_lock);
2886 	return 0;
2887 }
2888 
2889 /*------------------------------------------*/
2890 static const struct file_operations qtudev_fops = {
2891 	.owner = THIS_MODULE,
2892 	.open = qtudev_open,
2893 	.release = qtudev_release,
2894 };
2895 
2896 static struct miscdevice qtu_device = {
2897 	.minor = MISC_DYNAMIC_MINOR,
2898 	.name = QTU_DEV_NAME,
2899 	.fops = &qtudev_fops,
2900 	/* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2901 };
2902 
2903 static const struct seq_operations proc_qtaguid_ctrl_seqops = {
2904 	.start = qtaguid_ctrl_proc_start,
2905 	.next = qtaguid_ctrl_proc_next,
2906 	.stop = qtaguid_ctrl_proc_stop,
2907 	.show = qtaguid_ctrl_proc_show,
2908 };
2909 
proc_qtaguid_ctrl_open(struct inode * inode,struct file * file)2910 static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
2911 {
2912 	return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
2913 				sizeof(struct proc_ctrl_print_info));
2914 }
2915 
2916 static const struct file_operations proc_qtaguid_ctrl_fops = {
2917 	.open		= proc_qtaguid_ctrl_open,
2918 	.read		= seq_read,
2919 	.write		= qtaguid_ctrl_proc_write,
2920 	.llseek		= seq_lseek,
2921 	.release	= seq_release_private,
2922 };
2923 
2924 static const struct seq_operations proc_qtaguid_stats_seqops = {
2925 	.start = qtaguid_stats_proc_start,
2926 	.next = qtaguid_stats_proc_next,
2927 	.stop = qtaguid_stats_proc_stop,
2928 	.show = qtaguid_stats_proc_show,
2929 };
2930 
proc_qtaguid_stats_open(struct inode * inode,struct file * file)2931 static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
2932 {
2933 	return seq_open_private(file, &proc_qtaguid_stats_seqops,
2934 				sizeof(struct proc_print_info));
2935 }
2936 
2937 static const struct file_operations proc_qtaguid_stats_fops = {
2938 	.open		= proc_qtaguid_stats_open,
2939 	.read		= seq_read,
2940 	.llseek		= seq_lseek,
2941 	.release	= seq_release_private,
2942 };
2943 
2944 /*------------------------------------------*/
qtaguid_proc_register(struct proc_dir_entry ** res_procdir)2945 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2946 {
2947 	int ret;
2948 	*res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2949 	if (!*res_procdir) {
2950 		pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2951 		ret = -ENOMEM;
2952 		goto no_dir;
2953 	}
2954 
2955 	xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
2956 						*res_procdir,
2957 						&proc_qtaguid_ctrl_fops,
2958 						NULL);
2959 	if (!xt_qtaguid_ctrl_file) {
2960 		pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2961 			" file\n");
2962 		ret = -ENOMEM;
2963 		goto no_ctrl_entry;
2964 	}
2965 
2966 	xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
2967 						 *res_procdir,
2968 						 &proc_qtaguid_stats_fops,
2969 						 NULL);
2970 	if (!xt_qtaguid_stats_file) {
2971 		pr_err("qtaguid: failed to create xt_qtaguid/stats "
2972 			"file\n");
2973 		ret = -ENOMEM;
2974 		goto no_stats_entry;
2975 	}
2976 	/*
2977 	 * TODO: add support counter hacking
2978 	 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2979 	 */
2980 	return 0;
2981 
2982 no_stats_entry:
2983 	remove_proc_entry("ctrl", *res_procdir);
2984 no_ctrl_entry:
2985 	remove_proc_entry("xt_qtaguid", NULL);
2986 no_dir:
2987 	return ret;
2988 }
2989 
2990 static struct xt_match qtaguid_mt_reg __read_mostly = {
2991 	/*
2992 	 * This module masquerades as the "owner" module so that iptables
2993 	 * tools can deal with it.
2994 	 */
2995 	.name       = "owner",
2996 	.revision   = 1,
2997 	.family     = NFPROTO_UNSPEC,
2998 	.match      = qtaguid_mt,
2999 	.matchsize  = sizeof(struct xt_qtaguid_match_info),
3000 	.me         = THIS_MODULE,
3001 };
3002 
qtaguid_mt_init(void)3003 static int __init qtaguid_mt_init(void)
3004 {
3005 	if (qtaguid_proc_register(&xt_qtaguid_procdir)
3006 	    || iface_stat_init(xt_qtaguid_procdir)
3007 	    || xt_register_match(&qtaguid_mt_reg)
3008 	    || misc_register(&qtu_device))
3009 		return -1;
3010 	return 0;
3011 }
3012 
3013 /*
3014  * TODO: allow unloading of the module.
3015  * For now stats are permanent.
3016  * Kconfig forces'y/n' and never an 'm'.
3017  */
3018 
3019 module_init(qtaguid_mt_init);
3020 MODULE_AUTHOR("jpa <jpa@google.com>");
3021 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
3022 MODULE_LICENSE("GPL");
3023 MODULE_ALIAS("ipt_owner");
3024 MODULE_ALIAS("ip6t_owner");
3025 MODULE_ALIAS("ipt_qtaguid");
3026 MODULE_ALIAS("ip6t_qtaguid");
3027