• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Linux Socket Filter - Kernel level socket filtering
3  *
4  * Author:
5  *     Jay Schulist <jschlst@samba.org>
6  *
7  * Based on the design of:
8  *     - The Berkeley Packet Filter
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License
12  * as published by the Free Software Foundation; either version
13  * 2 of the License, or (at your option) any later version.
14  *
15  * Andi Kleen - Fix a few bad bugs and races.
16  * Kris Katterjohn - Added many additional checks in sk_chk_filter()
17  */
18 
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/mm.h>
22 #include <linux/fcntl.h>
23 #include <linux/socket.h>
24 #include <linux/in.h>
25 #include <linux/inet.h>
26 #include <linux/netdevice.h>
27 #include <linux/if_packet.h>
28 #include <net/ip.h>
29 #include <net/protocol.h>
30 #include <net/netlink.h>
31 #include <linux/skbuff.h>
32 #include <net/sock.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <asm/system.h>
36 #include <asm/uaccess.h>
37 #include <asm/unaligned.h>
38 #include <linux/filter.h>
39 
40 /* No hurry in this branch */
__load_pointer(struct sk_buff * skb,int k)41 static void *__load_pointer(struct sk_buff *skb, int k)
42 {
43 	u8 *ptr = NULL;
44 
45 	if (k >= SKF_NET_OFF)
46 		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
47 	else if (k >= SKF_LL_OFF)
48 		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
49 
50 	if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
51 		return ptr;
52 	return NULL;
53 }
54 
load_pointer(struct sk_buff * skb,int k,unsigned int size,void * buffer)55 static inline void *load_pointer(struct sk_buff *skb, int k,
56 				 unsigned int size, void *buffer)
57 {
58 	if (k >= 0)
59 		return skb_header_pointer(skb, k, size, buffer);
60 	else {
61 		if (k >= SKF_AD_OFF)
62 			return NULL;
63 		return __load_pointer(skb, k);
64 	}
65 }
66 
67 /**
68  *	sk_filter - run a packet through a socket filter
69  *	@sk: sock associated with &sk_buff
70  *	@skb: buffer to filter
71  *
72  * Run the filter code and then cut skb->data to correct size returned by
73  * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
74  * than pkt_len we keep whole skb->data. This is the socket level
75  * wrapper to sk_run_filter. It returns 0 if the packet should
76  * be accepted or -EPERM if the packet should be tossed.
77  *
78  */
sk_filter(struct sock * sk,struct sk_buff * skb)79 int sk_filter(struct sock *sk, struct sk_buff *skb)
80 {
81 	int err;
82 	struct sk_filter *filter;
83 
84 	err = security_sock_rcv_skb(sk, skb);
85 	if (err)
86 		return err;
87 
88 	rcu_read_lock_bh();
89 	filter = rcu_dereference(sk->sk_filter);
90 	if (filter) {
91 		unsigned int pkt_len = sk_run_filter(skb, filter->insns,
92 				filter->len);
93 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
94 	}
95 	rcu_read_unlock_bh();
96 
97 	return err;
98 }
99 EXPORT_SYMBOL(sk_filter);
100 
101 /**
102  *	sk_run_filter - run a filter on a socket
103  *	@skb: buffer to run the filter on
104  *	@filter: filter to apply
105  *	@flen: length of filter
106  *
107  * Decode and apply filter instructions to the skb->data.
108  * Return length to keep, 0 for none. skb is the data we are
109  * filtering, filter is the array of filter instructions, and
110  * len is the number of filter blocks in the array.
111  */
sk_run_filter(struct sk_buff * skb,struct sock_filter * filter,int flen)112 unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
113 {
114 	struct sock_filter *fentry;	/* We walk down these */
115 	void *ptr;
116 	u32 A = 0;			/* Accumulator */
117 	u32 X = 0;			/* Index Register */
118 	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
119 	u32 tmp;
120 	int k;
121 	int pc;
122 
123 	/*
124 	 * Process array of filter instructions.
125 	 */
126 	for (pc = 0; pc < flen; pc++) {
127 		fentry = &filter[pc];
128 
129 		switch (fentry->code) {
130 		case BPF_ALU|BPF_ADD|BPF_X:
131 			A += X;
132 			continue;
133 		case BPF_ALU|BPF_ADD|BPF_K:
134 			A += fentry->k;
135 			continue;
136 		case BPF_ALU|BPF_SUB|BPF_X:
137 			A -= X;
138 			continue;
139 		case BPF_ALU|BPF_SUB|BPF_K:
140 			A -= fentry->k;
141 			continue;
142 		case BPF_ALU|BPF_MUL|BPF_X:
143 			A *= X;
144 			continue;
145 		case BPF_ALU|BPF_MUL|BPF_K:
146 			A *= fentry->k;
147 			continue;
148 		case BPF_ALU|BPF_DIV|BPF_X:
149 			if (X == 0)
150 				return 0;
151 			A /= X;
152 			continue;
153 		case BPF_ALU|BPF_DIV|BPF_K:
154 			A /= fentry->k;
155 			continue;
156 		case BPF_ALU|BPF_AND|BPF_X:
157 			A &= X;
158 			continue;
159 		case BPF_ALU|BPF_AND|BPF_K:
160 			A &= fentry->k;
161 			continue;
162 		case BPF_ALU|BPF_OR|BPF_X:
163 			A |= X;
164 			continue;
165 		case BPF_ALU|BPF_OR|BPF_K:
166 			A |= fentry->k;
167 			continue;
168 		case BPF_ALU|BPF_LSH|BPF_X:
169 			A <<= X;
170 			continue;
171 		case BPF_ALU|BPF_LSH|BPF_K:
172 			A <<= fentry->k;
173 			continue;
174 		case BPF_ALU|BPF_RSH|BPF_X:
175 			A >>= X;
176 			continue;
177 		case BPF_ALU|BPF_RSH|BPF_K:
178 			A >>= fentry->k;
179 			continue;
180 		case BPF_ALU|BPF_NEG:
181 			A = -A;
182 			continue;
183 		case BPF_JMP|BPF_JA:
184 			pc += fentry->k;
185 			continue;
186 		case BPF_JMP|BPF_JGT|BPF_K:
187 			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
188 			continue;
189 		case BPF_JMP|BPF_JGE|BPF_K:
190 			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
191 			continue;
192 		case BPF_JMP|BPF_JEQ|BPF_K:
193 			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
194 			continue;
195 		case BPF_JMP|BPF_JSET|BPF_K:
196 			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
197 			continue;
198 		case BPF_JMP|BPF_JGT|BPF_X:
199 			pc += (A > X) ? fentry->jt : fentry->jf;
200 			continue;
201 		case BPF_JMP|BPF_JGE|BPF_X:
202 			pc += (A >= X) ? fentry->jt : fentry->jf;
203 			continue;
204 		case BPF_JMP|BPF_JEQ|BPF_X:
205 			pc += (A == X) ? fentry->jt : fentry->jf;
206 			continue;
207 		case BPF_JMP|BPF_JSET|BPF_X:
208 			pc += (A & X) ? fentry->jt : fentry->jf;
209 			continue;
210 		case BPF_LD|BPF_W|BPF_ABS:
211 			k = fentry->k;
212 load_w:
213 			ptr = load_pointer(skb, k, 4, &tmp);
214 			if (ptr != NULL) {
215 				A = get_unaligned_be32(ptr);
216 				continue;
217 			}
218 			break;
219 		case BPF_LD|BPF_H|BPF_ABS:
220 			k = fentry->k;
221 load_h:
222 			ptr = load_pointer(skb, k, 2, &tmp);
223 			if (ptr != NULL) {
224 				A = get_unaligned_be16(ptr);
225 				continue;
226 			}
227 			break;
228 		case BPF_LD|BPF_B|BPF_ABS:
229 			k = fentry->k;
230 load_b:
231 			ptr = load_pointer(skb, k, 1, &tmp);
232 			if (ptr != NULL) {
233 				A = *(u8 *)ptr;
234 				continue;
235 			}
236 			break;
237 		case BPF_LD|BPF_W|BPF_LEN:
238 			A = skb->len;
239 			continue;
240 		case BPF_LDX|BPF_W|BPF_LEN:
241 			X = skb->len;
242 			continue;
243 		case BPF_LD|BPF_W|BPF_IND:
244 			k = X + fentry->k;
245 			goto load_w;
246 		case BPF_LD|BPF_H|BPF_IND:
247 			k = X + fentry->k;
248 			goto load_h;
249 		case BPF_LD|BPF_B|BPF_IND:
250 			k = X + fentry->k;
251 			goto load_b;
252 		case BPF_LDX|BPF_B|BPF_MSH:
253 			ptr = load_pointer(skb, fentry->k, 1, &tmp);
254 			if (ptr != NULL) {
255 				X = (*(u8 *)ptr & 0xf) << 2;
256 				continue;
257 			}
258 			return 0;
259 		case BPF_LD|BPF_IMM:
260 			A = fentry->k;
261 			continue;
262 		case BPF_LDX|BPF_IMM:
263 			X = fentry->k;
264 			continue;
265 		case BPF_LD|BPF_MEM:
266 			A = mem[fentry->k];
267 			continue;
268 		case BPF_LDX|BPF_MEM:
269 			X = mem[fentry->k];
270 			continue;
271 		case BPF_MISC|BPF_TAX:
272 			X = A;
273 			continue;
274 		case BPF_MISC|BPF_TXA:
275 			A = X;
276 			continue;
277 		case BPF_RET|BPF_K:
278 			return fentry->k;
279 		case BPF_RET|BPF_A:
280 			return A;
281 		case BPF_ST:
282 			mem[fentry->k] = A;
283 			continue;
284 		case BPF_STX:
285 			mem[fentry->k] = X;
286 			continue;
287 		default:
288 			WARN_ON(1);
289 			return 0;
290 		}
291 
292 		/*
293 		 * Handle ancillary data, which are impossible
294 		 * (or very difficult) to get parsing packet contents.
295 		 */
296 		switch (k-SKF_AD_OFF) {
297 		case SKF_AD_PROTOCOL:
298 			A = ntohs(skb->protocol);
299 			continue;
300 		case SKF_AD_PKTTYPE:
301 			A = skb->pkt_type;
302 			continue;
303 		case SKF_AD_IFINDEX:
304 			A = skb->dev->ifindex;
305 			continue;
306 		case SKF_AD_NLATTR: {
307 			struct nlattr *nla;
308 
309 			if (skb_is_nonlinear(skb))
310 				return 0;
311 			if (A > skb->len - sizeof(struct nlattr))
312 				return 0;
313 
314 			nla = nla_find((struct nlattr *)&skb->data[A],
315 				       skb->len - A, X);
316 			if (nla)
317 				A = (void *)nla - (void *)skb->data;
318 			else
319 				A = 0;
320 			continue;
321 		}
322 		case SKF_AD_NLATTR_NEST: {
323 			struct nlattr *nla;
324 
325 			if (skb_is_nonlinear(skb))
326 				return 0;
327 			if (A > skb->len - sizeof(struct nlattr))
328 				return 0;
329 
330 			nla = (struct nlattr *)&skb->data[A];
331 			if (nla->nla_len > A - skb->len)
332 				return 0;
333 
334 			nla = nla_find_nested(nla, X);
335 			if (nla)
336 				A = (void *)nla - (void *)skb->data;
337 			else
338 				A = 0;
339 			continue;
340 		}
341 		default:
342 			return 0;
343 		}
344 	}
345 
346 	return 0;
347 }
348 EXPORT_SYMBOL(sk_run_filter);
349 
350 /**
351  *	sk_chk_filter - verify socket filter code
352  *	@filter: filter to verify
353  *	@flen: length of filter
354  *
355  * Check the user's filter code. If we let some ugly
356  * filter code slip through kaboom! The filter must contain
357  * no references or jumps that are out of range, no illegal
358  * instructions, and must end with a RET instruction.
359  *
360  * All jumps are forward as they are not signed.
361  *
362  * Returns 0 if the rule set is legal or -EINVAL if not.
363  */
sk_chk_filter(struct sock_filter * filter,int flen)364 int sk_chk_filter(struct sock_filter *filter, int flen)
365 {
366 	struct sock_filter *ftest;
367 	int pc;
368 
369 	if (flen == 0 || flen > BPF_MAXINSNS)
370 		return -EINVAL;
371 
372 	/* check the filter code now */
373 	for (pc = 0; pc < flen; pc++) {
374 		ftest = &filter[pc];
375 
376 		/* Only allow valid instructions */
377 		switch (ftest->code) {
378 		case BPF_ALU|BPF_ADD|BPF_K:
379 		case BPF_ALU|BPF_ADD|BPF_X:
380 		case BPF_ALU|BPF_SUB|BPF_K:
381 		case BPF_ALU|BPF_SUB|BPF_X:
382 		case BPF_ALU|BPF_MUL|BPF_K:
383 		case BPF_ALU|BPF_MUL|BPF_X:
384 		case BPF_ALU|BPF_DIV|BPF_X:
385 		case BPF_ALU|BPF_AND|BPF_K:
386 		case BPF_ALU|BPF_AND|BPF_X:
387 		case BPF_ALU|BPF_OR|BPF_K:
388 		case BPF_ALU|BPF_OR|BPF_X:
389 		case BPF_ALU|BPF_LSH|BPF_K:
390 		case BPF_ALU|BPF_LSH|BPF_X:
391 		case BPF_ALU|BPF_RSH|BPF_K:
392 		case BPF_ALU|BPF_RSH|BPF_X:
393 		case BPF_ALU|BPF_NEG:
394 		case BPF_LD|BPF_W|BPF_ABS:
395 		case BPF_LD|BPF_H|BPF_ABS:
396 		case BPF_LD|BPF_B|BPF_ABS:
397 		case BPF_LD|BPF_W|BPF_LEN:
398 		case BPF_LD|BPF_W|BPF_IND:
399 		case BPF_LD|BPF_H|BPF_IND:
400 		case BPF_LD|BPF_B|BPF_IND:
401 		case BPF_LD|BPF_IMM:
402 		case BPF_LDX|BPF_W|BPF_LEN:
403 		case BPF_LDX|BPF_B|BPF_MSH:
404 		case BPF_LDX|BPF_IMM:
405 		case BPF_MISC|BPF_TAX:
406 		case BPF_MISC|BPF_TXA:
407 		case BPF_RET|BPF_K:
408 		case BPF_RET|BPF_A:
409 			break;
410 
411 		/* Some instructions need special checks */
412 
413 		case BPF_ALU|BPF_DIV|BPF_K:
414 			/* check for division by zero */
415 			if (ftest->k == 0)
416 				return -EINVAL;
417 			break;
418 
419 		case BPF_LD|BPF_MEM:
420 		case BPF_LDX|BPF_MEM:
421 		case BPF_ST:
422 		case BPF_STX:
423 			/* check for invalid memory addresses */
424 			if (ftest->k >= BPF_MEMWORDS)
425 				return -EINVAL;
426 			break;
427 
428 		case BPF_JMP|BPF_JA:
429 			/*
430 			 * Note, the large ftest->k might cause loops.
431 			 * Compare this with conditional jumps below,
432 			 * where offsets are limited. --ANK (981016)
433 			 */
434 			if (ftest->k >= (unsigned)(flen-pc-1))
435 				return -EINVAL;
436 			break;
437 
438 		case BPF_JMP|BPF_JEQ|BPF_K:
439 		case BPF_JMP|BPF_JEQ|BPF_X:
440 		case BPF_JMP|BPF_JGE|BPF_K:
441 		case BPF_JMP|BPF_JGE|BPF_X:
442 		case BPF_JMP|BPF_JGT|BPF_K:
443 		case BPF_JMP|BPF_JGT|BPF_X:
444 		case BPF_JMP|BPF_JSET|BPF_K:
445 		case BPF_JMP|BPF_JSET|BPF_X:
446 			/* for conditionals both must be safe */
447 			if (pc + ftest->jt + 1 >= flen ||
448 			    pc + ftest->jf + 1 >= flen)
449 				return -EINVAL;
450 			break;
451 
452 		default:
453 			return -EINVAL;
454 		}
455 	}
456 
457 	return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
458 }
459 EXPORT_SYMBOL(sk_chk_filter);
460 
461 /**
462  * 	sk_filter_rcu_release: Release a socket filter by rcu_head
463  *	@rcu: rcu_head that contains the sk_filter to free
464  */
sk_filter_rcu_release(struct rcu_head * rcu)465 static void sk_filter_rcu_release(struct rcu_head *rcu)
466 {
467 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
468 
469 	sk_filter_release(fp);
470 }
471 
sk_filter_delayed_uncharge(struct sock * sk,struct sk_filter * fp)472 static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
473 {
474 	unsigned int size = sk_filter_len(fp);
475 
476 	atomic_sub(size, &sk->sk_omem_alloc);
477 	call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
478 }
479 
480 /**
481  *	sk_attach_filter - attach a socket filter
482  *	@fprog: the filter program
483  *	@sk: the socket to use
484  *
485  * Attach the user's filter code. We first run some sanity checks on
486  * it to make sure it does not explode on us later. If an error
487  * occurs or there is insufficient memory for the filter a negative
488  * errno code is returned. On success the return is zero.
489  */
sk_attach_filter(struct sock_fprog * fprog,struct sock * sk)490 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
491 {
492 	struct sk_filter *fp, *old_fp;
493 	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
494 	int err;
495 
496 	/* Make sure new filter is there and in the right amounts. */
497 	if (fprog->filter == NULL)
498 		return -EINVAL;
499 
500 	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
501 	if (!fp)
502 		return -ENOMEM;
503 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
504 		sock_kfree_s(sk, fp, fsize+sizeof(*fp));
505 		return -EFAULT;
506 	}
507 
508 	atomic_set(&fp->refcnt, 1);
509 	fp->len = fprog->len;
510 
511 	err = sk_chk_filter(fp->insns, fp->len);
512 	if (err) {
513 		sk_filter_uncharge(sk, fp);
514 		return err;
515 	}
516 
517 	rcu_read_lock_bh();
518 	old_fp = rcu_dereference(sk->sk_filter);
519 	rcu_assign_pointer(sk->sk_filter, fp);
520 	rcu_read_unlock_bh();
521 
522 	if (old_fp)
523 		sk_filter_delayed_uncharge(sk, old_fp);
524 	return 0;
525 }
526 
sk_detach_filter(struct sock * sk)527 int sk_detach_filter(struct sock *sk)
528 {
529 	int ret = -ENOENT;
530 	struct sk_filter *filter;
531 
532 	rcu_read_lock_bh();
533 	filter = rcu_dereference(sk->sk_filter);
534 	if (filter) {
535 		rcu_assign_pointer(sk->sk_filter, NULL);
536 		sk_filter_delayed_uncharge(sk, filter);
537 		ret = 0;
538 	}
539 	rcu_read_unlock_bh();
540 	return ret;
541 }
542