1 /*
2 * Kernel iptables module to track stats for packets based on user tags.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10 #ifndef __XT_QTAGUID_INTERNAL_H__
11 #define __XT_QTAGUID_INTERNAL_H__
12
13 #include <linux/types.h>
14 #include <linux/rbtree.h>
15 #include <linux/spinlock_types.h>
16 #include <linux/workqueue.h>
17
18 /* Iface handling */
19 #define IDEBUG_MASK (1<<0)
20 /* Iptable Matching. Per packet. */
21 #define MDEBUG_MASK (1<<1)
22 /* Red-black tree handling. Per packet. */
23 #define RDEBUG_MASK (1<<2)
24 /* procfs ctrl/stats handling */
25 #define CDEBUG_MASK (1<<3)
26 /* dev and resource tracking */
27 #define DDEBUG_MASK (1<<4)
28
29 /* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
30 #define DEFAULT_DEBUG_MASK 0
31
32 /*
33 * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
34 * All undef: text size ~ 0x3030; all def: ~ 0x4404.
35 */
36 #define IDEBUG
37 #define MDEBUG
38 #define RDEBUG
39 #define CDEBUG
40 #define DDEBUG
41
42 #define MSK_DEBUG(mask, ...) do { \
43 if (unlikely(qtaguid_debug_mask & (mask))) \
44 pr_debug(__VA_ARGS__); \
45 } while (0)
46 #ifdef IDEBUG
47 #define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
48 #else
49 #define IF_DEBUG(...) no_printk(__VA_ARGS__)
50 #endif
51 #ifdef MDEBUG
52 #define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
53 #else
54 #define MT_DEBUG(...) no_printk(__VA_ARGS__)
55 #endif
56 #ifdef RDEBUG
57 #define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
58 #else
59 #define RB_DEBUG(...) no_printk(__VA_ARGS__)
60 #endif
61 #ifdef CDEBUG
62 #define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
63 #else
64 #define CT_DEBUG(...) no_printk(__VA_ARGS__)
65 #endif
66 #ifdef DDEBUG
67 #define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
68 #else
69 #define DR_DEBUG(...) no_printk(__VA_ARGS__)
70 #endif
71
72 extern uint qtaguid_debug_mask;
73
74 /*---------------------------------------------------------------------------*/
75 /*
76 * Tags:
77 *
78 * They represent what the data usage counters will be tracked against.
79 * By default a tag is just based on the UID.
80 * The UID is used as the base for policing, and can not be ignored.
81 * So a tag will always at least represent a UID (uid_tag).
82 *
83 * A tag can be augmented with an "accounting tag" which is associated
84 * with a UID.
85 * User space can set the acct_tag portion of the tag which is then used
86 * with sockets: all data belonging to that socket will be counted against the
87 * tag. The policing is then based on the tag's uid_tag portion,
88 * and stats are collected for the acct_tag portion separately.
89 *
90 * There could be
91 * a: {acct_tag=1, uid_tag=10003}
92 * b: {acct_tag=2, uid_tag=10003}
93 * c: {acct_tag=3, uid_tag=10003}
94 * d: {acct_tag=0, uid_tag=10003}
95 * a, b, and c represent tags associated with specific sockets.
96 * d is for the totals for that uid, including all untagged traffic.
97 * Typically d is used with policing/quota rules.
98 *
99 * We want tag_t big enough to distinguish uid_t and acct_tag.
100 * It might become a struct if needed.
101 * Nothing should be using it as an int.
102 */
103 typedef uint64_t tag_t; /* Only used via accessors */
104
105 #define TAG_UID_MASK 0xFFFFFFFFULL
106 #define TAG_ACCT_MASK (~0xFFFFFFFFULL)
107
tag_compare(tag_t t1,tag_t t2)108 static inline int tag_compare(tag_t t1, tag_t t2)
109 {
110 return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
111 }
112
combine_atag_with_uid(tag_t acct_tag,uid_t uid)113 static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
114 {
115 return acct_tag | uid;
116 }
make_tag_from_uid(uid_t uid)117 static inline tag_t make_tag_from_uid(uid_t uid)
118 {
119 return uid;
120 }
get_uid_from_tag(tag_t tag)121 static inline uid_t get_uid_from_tag(tag_t tag)
122 {
123 return tag & TAG_UID_MASK;
124 }
get_utag_from_tag(tag_t tag)125 static inline tag_t get_utag_from_tag(tag_t tag)
126 {
127 return tag & TAG_UID_MASK;
128 }
get_atag_from_tag(tag_t tag)129 static inline tag_t get_atag_from_tag(tag_t tag)
130 {
131 return tag & TAG_ACCT_MASK;
132 }
133
valid_atag(tag_t tag)134 static inline bool valid_atag(tag_t tag)
135 {
136 return !(tag & TAG_UID_MASK);
137 }
make_atag_from_value(uint32_t value)138 static inline tag_t make_atag_from_value(uint32_t value)
139 {
140 return (uint64_t)value << 32;
141 }
142 /*---------------------------------------------------------------------------*/
143
144 /*
145 * Maximum number of socket tags that a UID is allowed to have active.
146 * Multiple processes belonging to the same UID contribute towards this limit.
147 * Special UIDs that can impersonate a UID also contribute (e.g. download
148 * manager, ...)
149 */
150 #define DEFAULT_MAX_SOCK_TAGS 1024
151
152 /*
153 * For now we only track 2 sets of counters.
154 * The default set is 0.
155 * Userspace can activate another set for a given uid being tracked.
156 */
157 #define IFS_MAX_COUNTER_SETS 2
158
159 enum ifs_tx_rx {
160 IFS_TX,
161 IFS_RX,
162 IFS_MAX_DIRECTIONS
163 };
164
165 /* For now, TCP, UDP, the rest */
166 enum ifs_proto {
167 IFS_TCP,
168 IFS_UDP,
169 IFS_PROTO_OTHER,
170 IFS_MAX_PROTOS
171 };
172
173 struct byte_packet_counters {
174 uint64_t bytes;
175 uint64_t packets;
176 };
177
178 struct data_counters {
179 struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
180 };
181
dc_sum_bytes(struct data_counters * counters,int set,enum ifs_tx_rx direction)182 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
183 int set,
184 enum ifs_tx_rx direction)
185 {
186 return counters->bpc[set][direction][IFS_TCP].bytes
187 + counters->bpc[set][direction][IFS_UDP].bytes
188 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
189 }
190
dc_sum_packets(struct data_counters * counters,int set,enum ifs_tx_rx direction)191 static inline uint64_t dc_sum_packets(struct data_counters *counters,
192 int set,
193 enum ifs_tx_rx direction)
194 {
195 return counters->bpc[set][direction][IFS_TCP].packets
196 + counters->bpc[set][direction][IFS_UDP].packets
197 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
198 }
199
200
201 /* Generic X based nodes used as a base for rb_tree ops */
202 struct tag_node {
203 struct rb_node node;
204 tag_t tag;
205 };
206
207 struct tag_stat {
208 struct tag_node tn;
209 struct data_counters counters;
210 /*
211 * If this tag is acct_tag based, we need to count against the
212 * matching parent uid_tag.
213 */
214 struct data_counters *parent_counters;
215 };
216
217 struct iface_stat {
218 struct list_head list; /* in iface_stat_list */
219 char *ifname;
220 bool active;
221 /* net_dev is only valid for active iface_stat */
222 struct net_device *net_dev;
223
224 struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
225 struct data_counters totals_via_skb;
226 /*
227 * We keep the last_known, because some devices reset their counters
228 * just before NETDEV_UP, while some will reset just before
229 * NETDEV_REGISTER (which is more normal).
230 * So now, if the device didn't do a NETDEV_UNREGISTER and we see
231 * its current dev stats smaller that what was previously known, we
232 * assume an UNREGISTER and just use the last_known.
233 */
234 struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
235 /* last_known is usable when last_known_valid is true */
236 bool last_known_valid;
237
238 struct proc_dir_entry *proc_ptr;
239
240 struct rb_root tag_stat_tree;
241 spinlock_t tag_stat_list_lock;
242 };
243
244 /* This is needed to create proc_dir_entries from atomic context. */
245 struct iface_stat_work {
246 struct work_struct iface_work;
247 struct iface_stat *iface_entry;
248 };
249
250 /*
251 * Track tag that this socket is transferring data for, and not necessarily
252 * the uid that owns the socket.
253 * This is the tag against which tag_stat.counters will be billed.
254 * These structs need to be looked up by sock and pid.
255 */
256 struct sock_tag {
257 struct rb_node sock_node;
258 struct sock *sk; /* Only used as a number, never dereferenced */
259 /* Used to associate with a given pid */
260 struct list_head list; /* in proc_qtu_data.sock_tag_list */
261 pid_t pid;
262
263 tag_t tag;
264 };
265
266 struct qtaguid_event_counts {
267 /* Various successful events */
268 atomic64_t sockets_tagged;
269 atomic64_t sockets_untagged;
270 atomic64_t counter_set_changes;
271 atomic64_t delete_cmds;
272 atomic64_t iface_events; /* Number of NETDEV_* events handled */
273
274 atomic64_t match_calls; /* Number of times iptables called mt */
275 /* Number of times iptables called mt from pre or post routing hooks */
276 atomic64_t match_calls_prepost;
277 /*
278 * match_found_sk_*: numbers related to the netfilter matching
279 * function finding a sock for the sk_buff.
280 * Total skbs processed is sum(match_found*).
281 */
282 atomic64_t match_found_sk; /* An sk was already in the sk_buff. */
283 /* The connection tracker had or didn't have the sk. */
284 atomic64_t match_found_sk_in_ct;
285 atomic64_t match_found_no_sk_in_ct;
286 /*
287 * No sk could be found. No apparent owner. Could happen with
288 * unsolicited traffic.
289 */
290 atomic64_t match_no_sk;
291 /*
292 * The file ptr in the sk_socket wasn't there and we couldn't get GID.
293 * This might happen for traffic while the socket is being closed.
294 */
295 atomic64_t match_no_sk_gid;
296 };
297
298 /* Track the set active_set for the given tag. */
299 struct tag_counter_set {
300 struct tag_node tn;
301 int active_set;
302 };
303
304 /*----------------------------------------------*/
305 /*
306 * The qtu uid data is used to track resources that are created directly or
307 * indirectly by processes (uid tracked).
308 * It is shared by the processes with the same uid.
309 * Some of the resource will be counted to prevent further rogue allocations,
310 * some will need freeing once the owner process (uid) exits.
311 */
312 struct uid_tag_data {
313 struct rb_node node;
314 uid_t uid;
315
316 /*
317 * For the uid, how many accounting tags have been set.
318 */
319 int num_active_tags;
320 /* Track the number of proc_qtu_data that reference it */
321 int num_pqd;
322 struct rb_root tag_ref_tree;
323 /* No tag_node_tree_lock; use uid_tag_data_tree_lock */
324 };
325
326 struct tag_ref {
327 struct tag_node tn;
328
329 /*
330 * This tracks the number of active sockets that have a tag on them
331 * which matches this tag_ref.tn.tag.
332 * A tag ref can live on after the sockets are untagged.
333 * A tag ref can only be removed during a tag delete command.
334 */
335 int num_sock_tags;
336 };
337
338 struct proc_qtu_data {
339 struct rb_node node;
340 pid_t pid;
341
342 struct uid_tag_data *parent_tag_data;
343
344 /* Tracks the sock_tags that need freeing upon this proc's death */
345 struct list_head sock_tag_list;
346 /* No spinlock_t sock_tag_list_lock; use the global one. */
347 };
348
349 /*----------------------------------------------*/
350 #endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */
351