• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 #ifndef __XT_QTAGUID_INTERNAL_H__
11 #define __XT_QTAGUID_INTERNAL_H__
12 
13 #include <linux/types.h>
14 #include <linux/rbtree.h>
15 #include <linux/spinlock_types.h>
16 #include <linux/workqueue.h>
17 
18 /* Iface handling */
19 #define IDEBUG_MASK (1<<0)
20 /* Iptable Matching. Per packet. */
21 #define MDEBUG_MASK (1<<1)
22 /* Red-black tree handling. Per packet. */
23 #define RDEBUG_MASK (1<<2)
24 /* procfs ctrl/stats handling */
25 #define CDEBUG_MASK (1<<3)
26 /* dev and resource tracking */
27 #define DDEBUG_MASK (1<<4)
28 
29 /* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
30 #define DEFAULT_DEBUG_MASK 0
31 
32 /*
33  * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
34  * All undef: text size ~ 0x3030; all def: ~ 0x4404.
35  */
36 #define IDEBUG
37 #define MDEBUG
38 #define RDEBUG
39 #define CDEBUG
40 #define DDEBUG
41 
42 #define MSK_DEBUG(mask, ...) do {                           \
43 		if (unlikely(qtaguid_debug_mask & (mask)))  \
44 			pr_debug(__VA_ARGS__);              \
45 	} while (0)
46 #ifdef IDEBUG
47 #define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
48 #else
49 #define IF_DEBUG(...) no_printk(__VA_ARGS__)
50 #endif
51 #ifdef MDEBUG
52 #define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
53 #else
54 #define MT_DEBUG(...) no_printk(__VA_ARGS__)
55 #endif
56 #ifdef RDEBUG
57 #define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
58 #else
59 #define RB_DEBUG(...) no_printk(__VA_ARGS__)
60 #endif
61 #ifdef CDEBUG
62 #define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
63 #else
64 #define CT_DEBUG(...) no_printk(__VA_ARGS__)
65 #endif
66 #ifdef DDEBUG
67 #define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
68 #else
69 #define DR_DEBUG(...) no_printk(__VA_ARGS__)
70 #endif
71 
72 extern uint qtaguid_debug_mask;
73 
74 /*---------------------------------------------------------------------------*/
75 /*
76  * Tags:
77  *
78  * They represent what the data usage counters will be tracked against.
79  * By default a tag is just based on the UID.
80  * The UID is used as the base for policing, and can not be ignored.
81  * So a tag will always at least represent a UID (uid_tag).
82  *
83  * A tag can be augmented with an "accounting tag" which is associated
84  * with a UID.
85  * User space can set the acct_tag portion of the tag which is then used
86  * with sockets: all data belonging to that socket will be counted against the
87  * tag. The policing is then based on the tag's uid_tag portion,
88  * and stats are collected for the acct_tag portion separately.
89  *
90  * There could be
91  * a:  {acct_tag=1, uid_tag=10003}
92  * b:  {acct_tag=2, uid_tag=10003}
93  * c:  {acct_tag=3, uid_tag=10003}
94  * d:  {acct_tag=0, uid_tag=10003}
95  * a, b, and c represent tags associated with specific sockets.
96  * d is for the totals for that uid, including all untagged traffic.
97  * Typically d is used with policing/quota rules.
98  *
99  * We want tag_t big enough to distinguish uid_t and acct_tag.
100  * It might become a struct if needed.
101  * Nothing should be using it as an int.
102  */
103 typedef uint64_t tag_t;  /* Only used via accessors */
104 
105 #define TAG_UID_MASK 0xFFFFFFFFULL
106 #define TAG_ACCT_MASK (~0xFFFFFFFFULL)
107 
tag_compare(tag_t t1,tag_t t2)108 static inline int tag_compare(tag_t t1, tag_t t2)
109 {
110 	return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
111 }
112 
combine_atag_with_uid(tag_t acct_tag,uid_t uid)113 static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
114 {
115 	return acct_tag | uid;
116 }
make_tag_from_uid(uid_t uid)117 static inline tag_t make_tag_from_uid(uid_t uid)
118 {
119 	return uid;
120 }
get_uid_from_tag(tag_t tag)121 static inline uid_t get_uid_from_tag(tag_t tag)
122 {
123 	return tag & TAG_UID_MASK;
124 }
get_utag_from_tag(tag_t tag)125 static inline tag_t get_utag_from_tag(tag_t tag)
126 {
127 	return tag & TAG_UID_MASK;
128 }
get_atag_from_tag(tag_t tag)129 static inline tag_t get_atag_from_tag(tag_t tag)
130 {
131 	return tag & TAG_ACCT_MASK;
132 }
133 
valid_atag(tag_t tag)134 static inline bool valid_atag(tag_t tag)
135 {
136 	return !(tag & TAG_UID_MASK);
137 }
make_atag_from_value(uint32_t value)138 static inline tag_t make_atag_from_value(uint32_t value)
139 {
140 	return (uint64_t)value << 32;
141 }
142 /*---------------------------------------------------------------------------*/
143 
144 /*
145  * Maximum number of socket tags that a UID is allowed to have active.
146  * Multiple processes belonging to the same UID contribute towards this limit.
147  * Special UIDs that can impersonate a UID also contribute (e.g. download
148  * manager, ...)
149  */
150 #define DEFAULT_MAX_SOCK_TAGS 1024
151 
152 /*
153  * For now we only track 2 sets of counters.
154  * The default set is 0.
155  * Userspace can activate another set for a given uid being tracked.
156  */
157 #define IFS_MAX_COUNTER_SETS 2
158 
159 enum ifs_tx_rx {
160 	IFS_TX,
161 	IFS_RX,
162 	IFS_MAX_DIRECTIONS
163 };
164 
165 /* For now, TCP, UDP, the rest */
166 enum ifs_proto {
167 	IFS_TCP,
168 	IFS_UDP,
169 	IFS_PROTO_OTHER,
170 	IFS_MAX_PROTOS
171 };
172 
173 struct byte_packet_counters {
174 	uint64_t bytes;
175 	uint64_t packets;
176 };
177 
178 struct data_counters {
179 	struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
180 };
181 
dc_sum_bytes(struct data_counters * counters,int set,enum ifs_tx_rx direction)182 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
183 				    int set,
184 				    enum ifs_tx_rx direction)
185 {
186 	return counters->bpc[set][direction][IFS_TCP].bytes
187 		+ counters->bpc[set][direction][IFS_UDP].bytes
188 		+ counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
189 }
190 
dc_sum_packets(struct data_counters * counters,int set,enum ifs_tx_rx direction)191 static inline uint64_t dc_sum_packets(struct data_counters *counters,
192 				      int set,
193 				      enum ifs_tx_rx direction)
194 {
195 	return counters->bpc[set][direction][IFS_TCP].packets
196 		+ counters->bpc[set][direction][IFS_UDP].packets
197 		+ counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
198 }
199 
200 
201 /* Generic X based nodes used as a base for rb_tree ops */
202 struct tag_node {
203 	struct rb_node node;
204 	tag_t tag;
205 };
206 
207 struct tag_stat {
208 	struct tag_node tn;
209 	struct data_counters counters;
210 	/*
211 	 * If this tag is acct_tag based, we need to count against the
212 	 * matching parent uid_tag.
213 	 */
214 	struct data_counters *parent_counters;
215 };
216 
217 struct iface_stat {
218 	struct list_head list;  /* in iface_stat_list */
219 	char *ifname;
220 	bool active;
221 	/* net_dev is only valid for active iface_stat */
222 	struct net_device *net_dev;
223 
224 	struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
225 	struct data_counters totals_via_skb;
226 	/*
227 	 * We keep the last_known, because some devices reset their counters
228 	 * just before NETDEV_UP, while some will reset just before
229 	 * NETDEV_REGISTER (which is more normal).
230 	 * So now, if the device didn't do a NETDEV_UNREGISTER and we see
231 	 * its current dev stats smaller that what was previously known, we
232 	 * assume an UNREGISTER and just use the last_known.
233 	 */
234 	struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
235 	/* last_known is usable when last_known_valid is true */
236 	bool last_known_valid;
237 
238 	struct proc_dir_entry *proc_ptr;
239 
240 	struct rb_root tag_stat_tree;
241 	spinlock_t tag_stat_list_lock;
242 };
243 
244 /* This is needed to create proc_dir_entries from atomic context. */
245 struct iface_stat_work {
246 	struct work_struct iface_work;
247 	struct iface_stat *iface_entry;
248 };
249 
250 /*
251  * Track tag that this socket is transferring data for, and not necessarily
252  * the uid that owns the socket.
253  * This is the tag against which tag_stat.counters will be billed.
254  * These structs need to be looked up by sock and pid.
255  */
256 struct sock_tag {
257 	struct rb_node sock_node;
258 	struct sock *sk;  /* Only used as a number, never dereferenced */
259 	/* Used to associate with a given pid */
260 	struct list_head list;   /* in proc_qtu_data.sock_tag_list */
261 	pid_t pid;
262 
263 	tag_t tag;
264 };
265 
266 struct qtaguid_event_counts {
267 	/* Various successful events */
268 	atomic64_t sockets_tagged;
269 	atomic64_t sockets_untagged;
270 	atomic64_t counter_set_changes;
271 	atomic64_t delete_cmds;
272 	atomic64_t iface_events;  /* Number of NETDEV_* events handled */
273 
274 	atomic64_t match_calls;   /* Number of times iptables called mt */
275 	/* Number of times iptables called mt from pre or post routing hooks */
276 	atomic64_t match_calls_prepost;
277 	/*
278 	 * match_found_sk_*: numbers related to the netfilter matching
279 	 * function finding a sock for the sk_buff.
280 	 * Total skbs processed is sum(match_found*).
281 	 */
282 	atomic64_t match_found_sk;   /* An sk was already in the sk_buff. */
283 	/* The connection tracker had or didn't have the sk. */
284 	atomic64_t match_found_sk_in_ct;
285 	atomic64_t match_found_no_sk_in_ct;
286 	/*
287 	 * No sk could be found. No apparent owner. Could happen with
288 	 * unsolicited traffic.
289 	 */
290 	atomic64_t match_no_sk;
291 	/*
292 	 * The file ptr in the sk_socket wasn't there and we couldn't get GID.
293 	 * This might happen for traffic while the socket is being closed.
294 	 */
295 	atomic64_t match_no_sk_gid;
296 };
297 
298 /* Track the set active_set for the given tag. */
299 struct tag_counter_set {
300 	struct tag_node tn;
301 	int active_set;
302 };
303 
304 /*----------------------------------------------*/
305 /*
306  * The qtu uid data is used to track resources that are created directly or
307  * indirectly by processes (uid tracked).
308  * It is shared by the processes with the same uid.
309  * Some of the resource will be counted to prevent further rogue allocations,
310  * some will need freeing once the owner process (uid) exits.
311  */
312 struct uid_tag_data {
313 	struct rb_node node;
314 	uid_t uid;
315 
316 	/*
317 	 * For the uid, how many accounting tags have been set.
318 	 */
319 	int num_active_tags;
320 	/* Track the number of proc_qtu_data that reference it */
321 	int num_pqd;
322 	struct rb_root tag_ref_tree;
323 	/* No tag_node_tree_lock; use uid_tag_data_tree_lock */
324 };
325 
326 struct tag_ref {
327 	struct tag_node tn;
328 
329 	/*
330 	 * This tracks the number of active sockets that have a tag on them
331 	 * which matches this tag_ref.tn.tag.
332 	 * A tag ref can live on after the sockets are untagged.
333 	 * A tag ref can only be removed during a tag delete command.
334 	 */
335 	int num_sock_tags;
336 };
337 
338 struct proc_qtu_data {
339 	struct rb_node node;
340 	pid_t pid;
341 
342 	struct uid_tag_data *parent_tag_data;
343 
344 	/* Tracks the sock_tags that need freeing upon this proc's death */
345 	struct list_head sock_tag_list;
346 	/* No spinlock_t sock_tag_list_lock; use the global one. */
347 };
348 
349 /*----------------------------------------------*/
350 #endif  /* ifndef __XT_QTAGUID_INTERNAL_H__ */
351