• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that: (1) source code distributions
7  * retain the above copyright notice and this paragraph in its entirety, (2)
8  * distributions including binary code include the above copyright notice and
9  * this paragraph in its entirety in the documentation or other materials
10  * provided with the distribution, and (3) all advertising materials mentioning
11  * features or use of this software display the following acknowledgement:
12  * ``This product includes software developed by the University of California,
13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14  * the University nor the names of its contributors may be used to endorse
15  * or promote products derived from this software without specific prior
16  * written permission.
17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20  *
21  *  Optimization module for tcpdump intermediate representation.
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27 
28 #ifdef _WIN32
29 #include <pcap-stdinc.h>
30 #else /* _WIN32 */
31 #if HAVE_INTTYPES_H
32 #include <inttypes.h>
33 #elif HAVE_STDINT_H
34 #include <stdint.h>
35 #endif
36 #ifdef HAVE_SYS_BITYPES_H
37 #include <sys/bitypes.h>
38 #endif
39 #include <sys/types.h>
40 #endif /* _WIN32 */
41 
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <memory.h>
45 #include <string.h>
46 
47 #include <errno.h>
48 
49 #include "pcap-int.h"
50 
51 #include "gencode.h"
52 
53 #ifdef HAVE_OS_PROTO_H
54 #include "os-proto.h"
55 #endif
56 
57 #ifdef BDEBUG
58 int pcap_optimizer_debug;
59 #endif
60 
61 #if defined(MSDOS) && !defined(__DJGPP__)
62 extern int _w32_ffs (int mask);
63 #define ffs _w32_ffs
64 #endif
65 
66 /*
67  * So is the check for _MSC_VER done because MinGW has this?
68  */
69 #if defined(_WIN32) && defined (_MSC_VER)
70 /*
71  * ffs -- vax ffs instruction
72  *
73  * XXX - with versions of VS that have it, use _BitScanForward()?
74  */
75 static int
ffs(int mask)76 ffs(int mask)
77 {
78 	int bit;
79 
80 	if (mask == 0)
81 		return(0);
82 	for (bit = 1; !(mask & 1); bit++)
83 		mask >>= 1;
84 	return(bit);
85 }
86 #endif
87 
88 /*
89  * Represents a deleted instruction.
90  */
91 #define NOP -1
92 
93 /*
94  * Register numbers for use-def values.
95  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
96  * location.  A_ATOM is the accumulator and X_ATOM is the index
97  * register.
98  */
99 #define A_ATOM BPF_MEMWORDS
100 #define X_ATOM (BPF_MEMWORDS+1)
101 
102 /*
103  * This define is used to represent *both* the accumulator and
104  * x register in use-def computations.
105  * Currently, the use-def code assumes only one definition per instruction.
106  */
107 #define AX_ATOM N_ATOMS
108 
109 /*
110  * These data structures are used in a Cocke and Shwarz style
111  * value numbering scheme.  Since the flowgraph is acyclic,
112  * exit values can be propagated from a node's predecessors
113  * provided it is uniquely defined.
114  */
115 struct valnode {
116 	int code;
117 	int v0, v1;
118 	int val;
119 	struct valnode *next;
120 };
121 
122 /* Integer constants mapped with the load immediate opcode. */
123 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
124 
125 struct vmapinfo {
126 	int is_const;
127 	bpf_int32 const_val;
128 };
129 
130 struct _opt_state {
131 	/*
132 	 * A flag to indicate that further optimization is needed.
133 	 * Iterative passes are continued until a given pass yields no
134 	 * branch movement.
135 	 */
136 	int done;
137 
138 	int n_blocks;
139 	struct block **blocks;
140 	int n_edges;
141 	struct edge **edges;
142 
143 	/*
144 	 * A bit vector set representation of the dominators.
145 	 * We round up the set size to the next power of two.
146 	 */
147 	int nodewords;
148 	int edgewords;
149 	struct block **levels;
150 	bpf_u_int32 *space;
151 
152 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
153 /*
154  * True if a is in uset {p}
155  */
156 #define SET_MEMBER(p, a) \
157 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
158 
159 /*
160  * Add 'a' to uset p.
161  */
162 #define SET_INSERT(p, a) \
163 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
164 
165 /*
166  * Delete 'a' from uset p.
167  */
168 #define SET_DELETE(p, a) \
169 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
170 
171 /*
172  * a := a intersect b
173  */
174 #define SET_INTERSECT(a, b, n)\
175 {\
176 	register bpf_u_int32 *_x = a, *_y = b;\
177 	register int _n = n;\
178 	while (--_n >= 0) *_x++ &= *_y++;\
179 }
180 
181 /*
182  * a := a - b
183  */
184 #define SET_SUBTRACT(a, b, n)\
185 {\
186 	register bpf_u_int32 *_x = a, *_y = b;\
187 	register int _n = n;\
188 	while (--_n >= 0) *_x++ &=~ *_y++;\
189 }
190 
191 /*
192  * a := a union b
193  */
194 #define SET_UNION(a, b, n)\
195 {\
196 	register bpf_u_int32 *_x = a, *_y = b;\
197 	register int _n = n;\
198 	while (--_n >= 0) *_x++ |= *_y++;\
199 }
200 
201 	uset all_dom_sets;
202 	uset all_closure_sets;
203 	uset all_edge_sets;
204 
205 #define MODULUS 213
206 	struct valnode *hashtbl[MODULUS];
207 	int curval;
208 	int maxval;
209 
210 	struct vmapinfo *vmap;
211 	struct valnode *vnode_base;
212 	struct valnode *next_vnode;
213 };
214 
215 typedef struct {
216 	/*
217 	 * Some pointers used to convert the basic block form of the code,
218 	 * into the array form that BPF requires.  'fstart' will point to
219 	 * the malloc'd array while 'ftail' is used during the recursive
220 	 * traversal.
221 	 */
222 	struct bpf_insn *fstart;
223 	struct bpf_insn *ftail;
224 } conv_state_t;
225 
226 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
227 static void opt_cleanup(opt_state_t *);
228 
229 static void intern_blocks(opt_state_t *, struct icode *);
230 
231 static void find_inedges(opt_state_t *, struct block *);
232 #ifdef BDEBUG
233 static void opt_dump(compiler_state_t *, struct icode *);
234 #endif
235 
236 #ifndef MAX
237 #define MAX(a,b) ((a)>(b)?(a):(b))
238 #endif
239 
240 static void
find_levels_r(opt_state_t * opt_state,struct icode * ic,struct block * b)241 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
242 {
243 	int level;
244 
245 	if (isMarked(ic, b))
246 		return;
247 
248 	Mark(ic, b);
249 	b->link = 0;
250 
251 	if (JT(b)) {
252 		find_levels_r(opt_state, ic, JT(b));
253 		find_levels_r(opt_state, ic, JF(b));
254 		level = MAX(JT(b)->level, JF(b)->level) + 1;
255 	} else
256 		level = 0;
257 	b->level = level;
258 	b->link = opt_state->levels[level];
259 	opt_state->levels[level] = b;
260 }
261 
262 /*
263  * Level graph.  The levels go from 0 at the leaves to
264  * N_LEVELS at the root.  The opt_state->levels[] array points to the
265  * first node of the level list, whose elements are linked
266  * with the 'link' field of the struct block.
267  */
268 static void
find_levels(opt_state_t * opt_state,struct icode * ic)269 find_levels(opt_state_t *opt_state, struct icode *ic)
270 {
271 	memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
272 	unMarkAll(ic);
273 	find_levels_r(opt_state, ic, ic->root);
274 }
275 
276 /*
277  * Find dominator relationships.
278  * Assumes graph has been leveled.
279  */
280 static void
find_dom(opt_state_t * opt_state,struct block * root)281 find_dom(opt_state_t *opt_state, struct block *root)
282 {
283 	int i;
284 	struct block *b;
285 	bpf_u_int32 *x;
286 
287 	/*
288 	 * Initialize sets to contain all nodes.
289 	 */
290 	x = opt_state->all_dom_sets;
291 	i = opt_state->n_blocks * opt_state->nodewords;
292 	while (--i >= 0)
293 		*x++ = ~0;
294 	/* Root starts off empty. */
295 	for (i = opt_state->nodewords; --i >= 0;)
296 		root->dom[i] = 0;
297 
298 	/* root->level is the highest level no found. */
299 	for (i = root->level; i >= 0; --i) {
300 		for (b = opt_state->levels[i]; b; b = b->link) {
301 			SET_INSERT(b->dom, b->id);
302 			if (JT(b) == 0)
303 				continue;
304 			SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
305 			SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
306 		}
307 	}
308 }
309 
310 static void
propedom(opt_state_t * opt_state,struct edge * ep)311 propedom(opt_state_t *opt_state, struct edge *ep)
312 {
313 	SET_INSERT(ep->edom, ep->id);
314 	if (ep->succ) {
315 		SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
316 		SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
317 	}
318 }
319 
320 /*
321  * Compute edge dominators.
322  * Assumes graph has been leveled and predecessors established.
323  */
324 static void
find_edom(opt_state_t * opt_state,struct block * root)325 find_edom(opt_state_t *opt_state, struct block *root)
326 {
327 	int i;
328 	uset x;
329 	struct block *b;
330 
331 	x = opt_state->all_edge_sets;
332 	for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
333 		x[i] = ~0;
334 
335 	/* root->level is the highest level no found. */
336 	memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
337 	memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
338 	for (i = root->level; i >= 0; --i) {
339 		for (b = opt_state->levels[i]; b != 0; b = b->link) {
340 			propedom(opt_state, &b->et);
341 			propedom(opt_state, &b->ef);
342 		}
343 	}
344 }
345 
346 /*
347  * Find the backwards transitive closure of the flow graph.  These sets
348  * are backwards in the sense that we find the set of nodes that reach
349  * a given node, not the set of nodes that can be reached by a node.
350  *
351  * Assumes graph has been leveled.
352  */
353 static void
find_closure(opt_state_t * opt_state,struct block * root)354 find_closure(opt_state_t *opt_state, struct block *root)
355 {
356 	int i;
357 	struct block *b;
358 
359 	/*
360 	 * Initialize sets to contain no nodes.
361 	 */
362 	memset((char *)opt_state->all_closure_sets, 0,
363 	      opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
364 
365 	/* root->level is the highest level no found. */
366 	for (i = root->level; i >= 0; --i) {
367 		for (b = opt_state->levels[i]; b; b = b->link) {
368 			SET_INSERT(b->closure, b->id);
369 			if (JT(b) == 0)
370 				continue;
371 			SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
372 			SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
373 		}
374 	}
375 }
376 
377 /*
378  * Return the register number that is used by s.  If A and X are both
379  * used, return AX_ATOM.  If no register is used, return -1.
380  *
381  * The implementation should probably change to an array access.
382  */
383 static int
atomuse(struct stmt * s)384 atomuse(struct stmt *s)
385 {
386 	register int c = s->code;
387 
388 	if (c == NOP)
389 		return -1;
390 
391 	switch (BPF_CLASS(c)) {
392 
393 	case BPF_RET:
394 		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
395 			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
396 
397 	case BPF_LD:
398 	case BPF_LDX:
399 		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
400 			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
401 
402 	case BPF_ST:
403 		return A_ATOM;
404 
405 	case BPF_STX:
406 		return X_ATOM;
407 
408 	case BPF_JMP:
409 	case BPF_ALU:
410 		if (BPF_SRC(c) == BPF_X)
411 			return AX_ATOM;
412 		return A_ATOM;
413 
414 	case BPF_MISC:
415 		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
416 	}
417 	abort();
418 	/* NOTREACHED */
419 }
420 
421 /*
422  * Return the register number that is defined by 's'.  We assume that
423  * a single stmt cannot define more than one register.  If no register
424  * is defined, return -1.
425  *
426  * The implementation should probably change to an array access.
427  */
428 static int
atomdef(struct stmt * s)429 atomdef(struct stmt *s)
430 {
431 	if (s->code == NOP)
432 		return -1;
433 
434 	switch (BPF_CLASS(s->code)) {
435 
436 	case BPF_LD:
437 	case BPF_ALU:
438 		return A_ATOM;
439 
440 	case BPF_LDX:
441 		return X_ATOM;
442 
443 	case BPF_ST:
444 	case BPF_STX:
445 		return s->k;
446 
447 	case BPF_MISC:
448 		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
449 	}
450 	return -1;
451 }
452 
453 /*
454  * Compute the sets of registers used, defined, and killed by 'b'.
455  *
456  * "Used" means that a statement in 'b' uses the register before any
457  * statement in 'b' defines it, i.e. it uses the value left in
458  * that register by a predecessor block of this block.
459  * "Defined" means that a statement in 'b' defines it.
460  * "Killed" means that a statement in 'b' defines it before any
461  * statement in 'b' uses it, i.e. it kills the value left in that
462  * register by a predecessor block of this block.
463  */
464 static void
compute_local_ud(struct block * b)465 compute_local_ud(struct block *b)
466 {
467 	struct slist *s;
468 	atomset def = 0, use = 0, killed = 0;
469 	int atom;
470 
471 	for (s = b->stmts; s; s = s->next) {
472 		if (s->s.code == NOP)
473 			continue;
474 		atom = atomuse(&s->s);
475 		if (atom >= 0) {
476 			if (atom == AX_ATOM) {
477 				if (!ATOMELEM(def, X_ATOM))
478 					use |= ATOMMASK(X_ATOM);
479 				if (!ATOMELEM(def, A_ATOM))
480 					use |= ATOMMASK(A_ATOM);
481 			}
482 			else if (atom < N_ATOMS) {
483 				if (!ATOMELEM(def, atom))
484 					use |= ATOMMASK(atom);
485 			}
486 			else
487 				abort();
488 		}
489 		atom = atomdef(&s->s);
490 		if (atom >= 0) {
491 			if (!ATOMELEM(use, atom))
492 				killed |= ATOMMASK(atom);
493 			def |= ATOMMASK(atom);
494 		}
495 	}
496 	if (BPF_CLASS(b->s.code) == BPF_JMP) {
497 		/*
498 		 * XXX - what about RET?
499 		 */
500 		atom = atomuse(&b->s);
501 		if (atom >= 0) {
502 			if (atom == AX_ATOM) {
503 				if (!ATOMELEM(def, X_ATOM))
504 					use |= ATOMMASK(X_ATOM);
505 				if (!ATOMELEM(def, A_ATOM))
506 					use |= ATOMMASK(A_ATOM);
507 			}
508 			else if (atom < N_ATOMS) {
509 				if (!ATOMELEM(def, atom))
510 					use |= ATOMMASK(atom);
511 			}
512 			else
513 				abort();
514 		}
515 	}
516 
517 	b->def = def;
518 	b->kill = killed;
519 	b->in_use = use;
520 }
521 
522 /*
523  * Assume graph is already leveled.
524  */
525 static void
find_ud(opt_state_t * opt_state,struct block * root)526 find_ud(opt_state_t *opt_state, struct block *root)
527 {
528 	int i, maxlevel;
529 	struct block *p;
530 
531 	/*
532 	 * root->level is the highest level no found;
533 	 * count down from there.
534 	 */
535 	maxlevel = root->level;
536 	for (i = maxlevel; i >= 0; --i)
537 		for (p = opt_state->levels[i]; p; p = p->link) {
538 			compute_local_ud(p);
539 			p->out_use = 0;
540 		}
541 
542 	for (i = 1; i <= maxlevel; ++i) {
543 		for (p = opt_state->levels[i]; p; p = p->link) {
544 			p->out_use |= JT(p)->in_use | JF(p)->in_use;
545 			p->in_use |= p->out_use &~ p->kill;
546 		}
547 	}
548 }
549 static void
init_val(opt_state_t * opt_state)550 init_val(opt_state_t *opt_state)
551 {
552 	opt_state->curval = 0;
553 	opt_state->next_vnode = opt_state->vnode_base;
554 	memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
555 	memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
556 }
557 
558 /* Because we really don't have an IR, this stuff is a little messy. */
559 static int
F(opt_state_t * opt_state,int code,int v0,int v1)560 F(opt_state_t *opt_state, int code, int v0, int v1)
561 {
562 	u_int hash;
563 	int val;
564 	struct valnode *p;
565 
566 	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
567 	hash %= MODULUS;
568 
569 	for (p = opt_state->hashtbl[hash]; p; p = p->next)
570 		if (p->code == code && p->v0 == v0 && p->v1 == v1)
571 			return p->val;
572 
573 	val = ++opt_state->curval;
574 	if (BPF_MODE(code) == BPF_IMM &&
575 	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
576 		opt_state->vmap[val].const_val = v0;
577 		opt_state->vmap[val].is_const = 1;
578 	}
579 	p = opt_state->next_vnode++;
580 	p->val = val;
581 	p->code = code;
582 	p->v0 = v0;
583 	p->v1 = v1;
584 	p->next = opt_state->hashtbl[hash];
585 	opt_state->hashtbl[hash] = p;
586 
587 	return val;
588 }
589 
590 static inline void
vstore(struct stmt * s,int * valp,int newval,int alter)591 vstore(struct stmt *s, int *valp, int newval, int alter)
592 {
593 	if (alter && *valp == newval)
594 		s->code = NOP;
595 	else
596 		*valp = newval;
597 }
598 
599 /*
600  * Do constant-folding on binary operators.
601  * (Unary operators are handled elsewhere.)
602  */
603 static void
fold_op(compiler_state_t * cstate,struct icode * ic,opt_state_t * opt_state,struct stmt * s,int v0,int v1)604 fold_op(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
605     struct stmt *s, int v0, int v1)
606 {
607 	bpf_u_int32 a, b;
608 
609 	a = opt_state->vmap[v0].const_val;
610 	b = opt_state->vmap[v1].const_val;
611 
612 	switch (BPF_OP(s->code)) {
613 	case BPF_ADD:
614 		a += b;
615 		break;
616 
617 	case BPF_SUB:
618 		a -= b;
619 		break;
620 
621 	case BPF_MUL:
622 		a *= b;
623 		break;
624 
625 	case BPF_DIV:
626 		if (b == 0)
627 			bpf_error(cstate, "division by zero");
628 		a /= b;
629 		break;
630 
631 	case BPF_MOD:
632 		if (b == 0)
633 			bpf_error(cstate, "modulus by zero");
634 		a %= b;
635 		break;
636 
637 	case BPF_AND:
638 		a &= b;
639 		break;
640 
641 	case BPF_OR:
642 		a |= b;
643 		break;
644 
645 	case BPF_XOR:
646 		a ^= b;
647 		break;
648 
649 	case BPF_LSH:
650 		a <<= b;
651 		break;
652 
653 	case BPF_RSH:
654 		a >>= b;
655 		break;
656 
657 	default:
658 		abort();
659 	}
660 	s->k = a;
661 	s->code = BPF_LD|BPF_IMM;
662 	opt_state->done = 0;
663 }
664 
665 static inline struct slist *
this_op(struct slist * s)666 this_op(struct slist *s)
667 {
668 	while (s != 0 && s->s.code == NOP)
669 		s = s->next;
670 	return s;
671 }
672 
673 static void
opt_not(struct block * b)674 opt_not(struct block *b)
675 {
676 	struct block *tmp = JT(b);
677 
678 	JT(b) = JF(b);
679 	JF(b) = tmp;
680 }
681 
682 static void
opt_peep(opt_state_t * opt_state,struct block * b)683 opt_peep(opt_state_t *opt_state, struct block *b)
684 {
685 	struct slist *s;
686 	struct slist *next, *last;
687 	int val;
688 
689 	s = b->stmts;
690 	if (s == 0)
691 		return;
692 
693 	last = s;
694 	for (/*empty*/; /*empty*/; s = next) {
695 		/*
696 		 * Skip over nops.
697 		 */
698 		s = this_op(s);
699 		if (s == 0)
700 			break;	/* nothing left in the block */
701 
702 		/*
703 		 * Find the next real instruction after that one
704 		 * (skipping nops).
705 		 */
706 		next = this_op(s->next);
707 		if (next == 0)
708 			break;	/* no next instruction */
709 		last = next;
710 
711 		/*
712 		 * st  M[k]	-->	st  M[k]
713 		 * ldx M[k]		tax
714 		 */
715 		if (s->s.code == BPF_ST &&
716 		    next->s.code == (BPF_LDX|BPF_MEM) &&
717 		    s->s.k == next->s.k) {
718 			opt_state->done = 0;
719 			next->s.code = BPF_MISC|BPF_TAX;
720 		}
721 		/*
722 		 * ld  #k	-->	ldx  #k
723 		 * tax			txa
724 		 */
725 		if (s->s.code == (BPF_LD|BPF_IMM) &&
726 		    next->s.code == (BPF_MISC|BPF_TAX)) {
727 			s->s.code = BPF_LDX|BPF_IMM;
728 			next->s.code = BPF_MISC|BPF_TXA;
729 			opt_state->done = 0;
730 		}
731 		/*
732 		 * This is an ugly special case, but it happens
733 		 * when you say tcp[k] or udp[k] where k is a constant.
734 		 */
735 		if (s->s.code == (BPF_LD|BPF_IMM)) {
736 			struct slist *add, *tax, *ild;
737 
738 			/*
739 			 * Check that X isn't used on exit from this
740 			 * block (which the optimizer might cause).
741 			 * We know the code generator won't generate
742 			 * any local dependencies.
743 			 */
744 			if (ATOMELEM(b->out_use, X_ATOM))
745 				continue;
746 
747 			/*
748 			 * Check that the instruction following the ldi
749 			 * is an addx, or it's an ldxms with an addx
750 			 * following it (with 0 or more nops between the
751 			 * ldxms and addx).
752 			 */
753 			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
754 				add = next;
755 			else
756 				add = this_op(next->next);
757 			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
758 				continue;
759 
760 			/*
761 			 * Check that a tax follows that (with 0 or more
762 			 * nops between them).
763 			 */
764 			tax = this_op(add->next);
765 			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
766 				continue;
767 
768 			/*
769 			 * Check that an ild follows that (with 0 or more
770 			 * nops between them).
771 			 */
772 			ild = this_op(tax->next);
773 			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
774 			    BPF_MODE(ild->s.code) != BPF_IND)
775 				continue;
776 			/*
777 			 * We want to turn this sequence:
778 			 *
779 			 * (004) ldi     #0x2		{s}
780 			 * (005) ldxms   [14]		{next}  -- optional
781 			 * (006) addx			{add}
782 			 * (007) tax			{tax}
783 			 * (008) ild     [x+0]		{ild}
784 			 *
785 			 * into this sequence:
786 			 *
787 			 * (004) nop
788 			 * (005) ldxms   [14]
789 			 * (006) nop
790 			 * (007) nop
791 			 * (008) ild     [x+2]
792 			 *
793 			 * XXX We need to check that X is not
794 			 * subsequently used, because we want to change
795 			 * what'll be in it after this sequence.
796 			 *
797 			 * We know we can eliminate the accumulator
798 			 * modifications earlier in the sequence since
799 			 * it is defined by the last stmt of this sequence
800 			 * (i.e., the last statement of the sequence loads
801 			 * a value into the accumulator, so we can eliminate
802 			 * earlier operations on the accumulator).
803 			 */
804 			ild->s.k += s->s.k;
805 			s->s.code = NOP;
806 			add->s.code = NOP;
807 			tax->s.code = NOP;
808 			opt_state->done = 0;
809 		}
810 	}
811 	/*
812 	 * If the comparison at the end of a block is an equality
813 	 * comparison against a constant, and nobody uses the value
814 	 * we leave in the A register at the end of a block, and
815 	 * the operation preceding the comparison is an arithmetic
816 	 * operation, we can sometime optimize it away.
817 	 */
818 	if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
819 	    !ATOMELEM(b->out_use, A_ATOM)) {
820 	    	/*
821 	    	 * We can optimize away certain subtractions of the
822 	    	 * X register.
823 	    	 */
824 		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
825 			val = b->val[X_ATOM];
826 			if (opt_state->vmap[val].is_const) {
827 				/*
828 				 * If we have a subtract to do a comparison,
829 				 * and the X register is a known constant,
830 				 * we can merge this value into the
831 				 * comparison:
832 				 *
833 				 * sub x  ->	nop
834 				 * jeq #y	jeq #(x+y)
835 				 */
836 				b->s.k += opt_state->vmap[val].const_val;
837 				last->s.code = NOP;
838 				opt_state->done = 0;
839 			} else if (b->s.k == 0) {
840 				/*
841 				 * If the X register isn't a constant,
842 				 * and the comparison in the test is
843 				 * against 0, we can compare with the
844 				 * X register, instead:
845 				 *
846 				 * sub x  ->	nop
847 				 * jeq #0	jeq x
848 				 */
849 				last->s.code = NOP;
850 				b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
851 				opt_state->done = 0;
852 			}
853 		}
854 		/*
855 		 * Likewise, a constant subtract can be simplified:
856 		 *
857 		 * sub #x ->	nop
858 		 * jeq #y ->	jeq #(x+y)
859 		 */
860 		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
861 			last->s.code = NOP;
862 			b->s.k += last->s.k;
863 			opt_state->done = 0;
864 		}
865 		/*
866 		 * And, similarly, a constant AND can be simplified
867 		 * if we're testing against 0, i.e.:
868 		 *
869 		 * and #k	nop
870 		 * jeq #0  ->	jset #k
871 		 */
872 		else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
873 		    b->s.k == 0) {
874 			b->s.k = last->s.k;
875 			b->s.code = BPF_JMP|BPF_K|BPF_JSET;
876 			last->s.code = NOP;
877 			opt_state->done = 0;
878 			opt_not(b);
879 		}
880 	}
881 	/*
882 	 * jset #0        ->   never
883 	 * jset #ffffffff ->   always
884 	 */
885 	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
886 		if (b->s.k == 0)
887 			JT(b) = JF(b);
888 		if ((u_int)b->s.k == 0xffffffffU)
889 			JF(b) = JT(b);
890 	}
891 	/*
892 	 * If we're comparing against the index register, and the index
893 	 * register is a known constant, we can just compare against that
894 	 * constant.
895 	 */
896 	val = b->val[X_ATOM];
897 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
898 		bpf_int32 v = opt_state->vmap[val].const_val;
899 		b->s.code &= ~BPF_X;
900 		b->s.k = v;
901 	}
902 	/*
903 	 * If the accumulator is a known constant, we can compute the
904 	 * comparison result.
905 	 */
906 	val = b->val[A_ATOM];
907 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
908 		bpf_int32 v = opt_state->vmap[val].const_val;
909 		switch (BPF_OP(b->s.code)) {
910 
911 		case BPF_JEQ:
912 			v = v == b->s.k;
913 			break;
914 
915 		case BPF_JGT:
916 			v = (unsigned)v > (unsigned)b->s.k;
917 			break;
918 
919 		case BPF_JGE:
920 			v = (unsigned)v >= (unsigned)b->s.k;
921 			break;
922 
923 		case BPF_JSET:
924 			v &= b->s.k;
925 			break;
926 
927 		default:
928 			abort();
929 		}
930 		if (JF(b) != JT(b))
931 			opt_state->done = 0;
932 		if (v)
933 			JF(b) = JT(b);
934 		else
935 			JT(b) = JF(b);
936 	}
937 }
938 
939 /*
940  * Compute the symbolic value of expression of 's', and update
941  * anything it defines in the value table 'val'.  If 'alter' is true,
942  * do various optimizations.  This code would be cleaner if symbolic
943  * evaluation and code transformations weren't folded together.
944  */
945 static void
opt_stmt(compiler_state_t * cstate,struct icode * ic,opt_state_t * opt_state,struct stmt * s,int val[],int alter)946 opt_stmt(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
947     struct stmt *s, int val[], int alter)
948 {
949 	int op;
950 	int v;
951 
952 	switch (s->code) {
953 
954 	case BPF_LD|BPF_ABS|BPF_W:
955 	case BPF_LD|BPF_ABS|BPF_H:
956 	case BPF_LD|BPF_ABS|BPF_B:
957 		v = F(opt_state, s->code, s->k, 0L);
958 		vstore(s, &val[A_ATOM], v, alter);
959 		break;
960 
961 	case BPF_LD|BPF_IND|BPF_W:
962 	case BPF_LD|BPF_IND|BPF_H:
963 	case BPF_LD|BPF_IND|BPF_B:
964 		v = val[X_ATOM];
965 		if (alter && opt_state->vmap[v].is_const) {
966 			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
967 			s->k += opt_state->vmap[v].const_val;
968 			v = F(opt_state, s->code, s->k, 0L);
969 			opt_state->done = 0;
970 		}
971 		else
972 			v = F(opt_state, s->code, s->k, v);
973 		vstore(s, &val[A_ATOM], v, alter);
974 		break;
975 
976 	case BPF_LD|BPF_LEN:
977 		v = F(opt_state, s->code, 0L, 0L);
978 		vstore(s, &val[A_ATOM], v, alter);
979 		break;
980 
981 	case BPF_LD|BPF_IMM:
982 		v = K(s->k);
983 		vstore(s, &val[A_ATOM], v, alter);
984 		break;
985 
986 	case BPF_LDX|BPF_IMM:
987 		v = K(s->k);
988 		vstore(s, &val[X_ATOM], v, alter);
989 		break;
990 
991 	case BPF_LDX|BPF_MSH|BPF_B:
992 		v = F(opt_state, s->code, s->k, 0L);
993 		vstore(s, &val[X_ATOM], v, alter);
994 		break;
995 
996 	case BPF_ALU|BPF_NEG:
997 		if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
998 			s->code = BPF_LD|BPF_IMM;
999 			s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1000 			val[A_ATOM] = K(s->k);
1001 		}
1002 		else
1003 			val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1004 		break;
1005 
1006 	case BPF_ALU|BPF_ADD|BPF_K:
1007 	case BPF_ALU|BPF_SUB|BPF_K:
1008 	case BPF_ALU|BPF_MUL|BPF_K:
1009 	case BPF_ALU|BPF_DIV|BPF_K:
1010 	case BPF_ALU|BPF_MOD|BPF_K:
1011 	case BPF_ALU|BPF_AND|BPF_K:
1012 	case BPF_ALU|BPF_OR|BPF_K:
1013 	case BPF_ALU|BPF_XOR|BPF_K:
1014 	case BPF_ALU|BPF_LSH|BPF_K:
1015 	case BPF_ALU|BPF_RSH|BPF_K:
1016 		op = BPF_OP(s->code);
1017 		if (alter) {
1018 			if (s->k == 0) {
1019 				/* don't optimize away "sub #0"
1020 				 * as it may be needed later to
1021 				 * fixup the generated math code */
1022 				if (op == BPF_ADD ||
1023 				    op == BPF_LSH || op == BPF_RSH ||
1024 				    op == BPF_OR || op == BPF_XOR) {
1025 					s->code = NOP;
1026 					break;
1027 				}
1028 				if (op == BPF_MUL || op == BPF_AND) {
1029 					s->code = BPF_LD|BPF_IMM;
1030 					val[A_ATOM] = K(s->k);
1031 					break;
1032 				}
1033 			}
1034 			if (opt_state->vmap[val[A_ATOM]].is_const) {
1035 				fold_op(cstate, ic, opt_state, s, val[A_ATOM], K(s->k));
1036 				val[A_ATOM] = K(s->k);
1037 				break;
1038 			}
1039 		}
1040 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1041 		break;
1042 
1043 	case BPF_ALU|BPF_ADD|BPF_X:
1044 	case BPF_ALU|BPF_SUB|BPF_X:
1045 	case BPF_ALU|BPF_MUL|BPF_X:
1046 	case BPF_ALU|BPF_DIV|BPF_X:
1047 	case BPF_ALU|BPF_MOD|BPF_X:
1048 	case BPF_ALU|BPF_AND|BPF_X:
1049 	case BPF_ALU|BPF_OR|BPF_X:
1050 	case BPF_ALU|BPF_XOR|BPF_X:
1051 	case BPF_ALU|BPF_LSH|BPF_X:
1052 	case BPF_ALU|BPF_RSH|BPF_X:
1053 		op = BPF_OP(s->code);
1054 		if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1055 			if (opt_state->vmap[val[A_ATOM]].is_const) {
1056 				fold_op(cstate, ic, opt_state, s, val[A_ATOM], val[X_ATOM]);
1057 				val[A_ATOM] = K(s->k);
1058 			}
1059 			else {
1060 				s->code = BPF_ALU|BPF_K|op;
1061 				s->k = opt_state->vmap[val[X_ATOM]].const_val;
1062 				opt_state->done = 0;
1063 				val[A_ATOM] =
1064 					F(opt_state, s->code, val[A_ATOM], K(s->k));
1065 			}
1066 			break;
1067 		}
1068 		/*
1069 		 * Check if we're doing something to an accumulator
1070 		 * that is 0, and simplify.  This may not seem like
1071 		 * much of a simplification but it could open up further
1072 		 * optimizations.
1073 		 * XXX We could also check for mul by 1, etc.
1074 		 */
1075 		if (alter && opt_state->vmap[val[A_ATOM]].is_const
1076 		    && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1077 			if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1078 				s->code = BPF_MISC|BPF_TXA;
1079 				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1080 				break;
1081 			}
1082 			else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1083 				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1084 				s->code = BPF_LD|BPF_IMM;
1085 				s->k = 0;
1086 				vstore(s, &val[A_ATOM], K(s->k), alter);
1087 				break;
1088 			}
1089 			else if (op == BPF_NEG) {
1090 				s->code = NOP;
1091 				break;
1092 			}
1093 		}
1094 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1095 		break;
1096 
1097 	case BPF_MISC|BPF_TXA:
1098 		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1099 		break;
1100 
1101 	case BPF_LD|BPF_MEM:
1102 		v = val[s->k];
1103 		if (alter && opt_state->vmap[v].is_const) {
1104 			s->code = BPF_LD|BPF_IMM;
1105 			s->k = opt_state->vmap[v].const_val;
1106 			opt_state->done = 0;
1107 		}
1108 		vstore(s, &val[A_ATOM], v, alter);
1109 		break;
1110 
1111 	case BPF_MISC|BPF_TAX:
1112 		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1113 		break;
1114 
1115 	case BPF_LDX|BPF_MEM:
1116 		v = val[s->k];
1117 		if (alter && opt_state->vmap[v].is_const) {
1118 			s->code = BPF_LDX|BPF_IMM;
1119 			s->k = opt_state->vmap[v].const_val;
1120 			opt_state->done = 0;
1121 		}
1122 		vstore(s, &val[X_ATOM], v, alter);
1123 		break;
1124 
1125 	case BPF_ST:
1126 		vstore(s, &val[s->k], val[A_ATOM], alter);
1127 		break;
1128 
1129 	case BPF_STX:
1130 		vstore(s, &val[s->k], val[X_ATOM], alter);
1131 		break;
1132 	}
1133 }
1134 
1135 static void
deadstmt(opt_state_t * opt_state,register struct stmt * s,register struct stmt * last[])1136 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1137 {
1138 	register int atom;
1139 
1140 	atom = atomuse(s);
1141 	if (atom >= 0) {
1142 		if (atom == AX_ATOM) {
1143 			last[X_ATOM] = 0;
1144 			last[A_ATOM] = 0;
1145 		}
1146 		else
1147 			last[atom] = 0;
1148 	}
1149 	atom = atomdef(s);
1150 	if (atom >= 0) {
1151 		if (last[atom]) {
1152 			opt_state->done = 0;
1153 			last[atom]->code = NOP;
1154 		}
1155 		last[atom] = s;
1156 	}
1157 }
1158 
1159 static void
opt_deadstores(opt_state_t * opt_state,register struct block * b)1160 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1161 {
1162 	register struct slist *s;
1163 	register int atom;
1164 	struct stmt *last[N_ATOMS];
1165 
1166 	memset((char *)last, 0, sizeof last);
1167 
1168 	for (s = b->stmts; s != 0; s = s->next)
1169 		deadstmt(opt_state, &s->s, last);
1170 	deadstmt(opt_state, &b->s, last);
1171 
1172 	for (atom = 0; atom < N_ATOMS; ++atom)
1173 		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1174 			last[atom]->code = NOP;
1175 			opt_state->done = 0;
1176 		}
1177 }
1178 
1179 static void
opt_blk(compiler_state_t * cstate,struct icode * ic,opt_state_t * opt_state,struct block * b,int do_stmts)1180 opt_blk(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
1181     struct block *b, int do_stmts)
1182 {
1183 	struct slist *s;
1184 	struct edge *p;
1185 	int i;
1186 	bpf_int32 aval, xval;
1187 
1188 #if 0
1189 	for (s = b->stmts; s && s->next; s = s->next)
1190 		if (BPF_CLASS(s->s.code) == BPF_JMP) {
1191 			do_stmts = 0;
1192 			break;
1193 		}
1194 #endif
1195 
1196 	/*
1197 	 * Initialize the atom values.
1198 	 */
1199 	p = b->in_edges;
1200 	if (p == 0) {
1201 		/*
1202 		 * We have no predecessors, so everything is undefined
1203 		 * upon entry to this block.
1204 		 */
1205 		memset((char *)b->val, 0, sizeof(b->val));
1206 	} else {
1207 		/*
1208 		 * Inherit values from our predecessors.
1209 		 *
1210 		 * First, get the values from the predecessor along the
1211 		 * first edge leading to this node.
1212 		 */
1213 		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1214 		/*
1215 		 * Now look at all the other nodes leading to this node.
1216 		 * If, for the predecessor along that edge, a register
1217 		 * has a different value from the one we have (i.e.,
1218 		 * control paths are merging, and the merging paths
1219 		 * assign different values to that register), give the
1220 		 * register the undefined value of 0.
1221 		 */
1222 		while ((p = p->next) != NULL) {
1223 			for (i = 0; i < N_ATOMS; ++i)
1224 				if (b->val[i] != p->pred->val[i])
1225 					b->val[i] = 0;
1226 		}
1227 	}
1228 	aval = b->val[A_ATOM];
1229 	xval = b->val[X_ATOM];
1230 	for (s = b->stmts; s; s = s->next)
1231 		opt_stmt(cstate, ic, opt_state, &s->s, b->val, do_stmts);
1232 
1233 	/*
1234 	 * This is a special case: if we don't use anything from this
1235 	 * block, and we load the accumulator or index register with a
1236 	 * value that is already there, or if this block is a return,
1237 	 * eliminate all the statements.
1238 	 *
1239 	 * XXX - what if it does a store?
1240 	 *
1241 	 * XXX - why does it matter whether we use anything from this
1242 	 * block?  If the accumulator or index register doesn't change
1243 	 * its value, isn't that OK even if we use that value?
1244 	 *
1245 	 * XXX - if we load the accumulator with a different value,
1246 	 * and the block ends with a conditional branch, we obviously
1247 	 * can't eliminate it, as the branch depends on that value.
1248 	 * For the index register, the conditional branch only depends
1249 	 * on the index register value if the test is against the index
1250 	 * register value rather than a constant; if nothing uses the
1251 	 * value we put into the index register, and we're not testing
1252 	 * against the index register's value, and there aren't any
1253 	 * other problems that would keep us from eliminating this
1254 	 * block, can we eliminate it?
1255 	 */
1256 	if (do_stmts &&
1257 	    ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1258 	      xval != 0 && b->val[X_ATOM] == xval) ||
1259 	     BPF_CLASS(b->s.code) == BPF_RET)) {
1260 		if (b->stmts != 0) {
1261 			b->stmts = 0;
1262 			opt_state->done = 0;
1263 		}
1264 	} else {
1265 		opt_peep(opt_state, b);
1266 		opt_deadstores(opt_state, b);
1267 	}
1268 	/*
1269 	 * Set up values for branch optimizer.
1270 	 */
1271 	if (BPF_SRC(b->s.code) == BPF_K)
1272 		b->oval = K(b->s.k);
1273 	else
1274 		b->oval = b->val[X_ATOM];
1275 	b->et.code = b->s.code;
1276 	b->ef.code = -b->s.code;
1277 }
1278 
1279 /*
1280  * Return true if any register that is used on exit from 'succ', has
1281  * an exit value that is different from the corresponding exit value
1282  * from 'b'.
1283  */
1284 static int
use_conflict(struct block * b,struct block * succ)1285 use_conflict(struct block *b, struct block *succ)
1286 {
1287 	int atom;
1288 	atomset use = succ->out_use;
1289 
1290 	if (use == 0)
1291 		return 0;
1292 
1293 	for (atom = 0; atom < N_ATOMS; ++atom)
1294 		if (ATOMELEM(use, atom))
1295 			if (b->val[atom] != succ->val[atom])
1296 				return 1;
1297 	return 0;
1298 }
1299 
1300 static struct block *
fold_edge(struct block * child,struct edge * ep)1301 fold_edge(struct block *child, struct edge *ep)
1302 {
1303 	int sense;
1304 	int aval0, aval1, oval0, oval1;
1305 	int code = ep->code;
1306 
1307 	if (code < 0) {
1308 		code = -code;
1309 		sense = 0;
1310 	} else
1311 		sense = 1;
1312 
1313 	if (child->s.code != code)
1314 		return 0;
1315 
1316 	aval0 = child->val[A_ATOM];
1317 	oval0 = child->oval;
1318 	aval1 = ep->pred->val[A_ATOM];
1319 	oval1 = ep->pred->oval;
1320 
1321 	if (aval0 != aval1)
1322 		return 0;
1323 
1324 	if (oval0 == oval1)
1325 		/*
1326 		 * The operands of the branch instructions are
1327 		 * identical, so the result is true if a true
1328 		 * branch was taken to get here, otherwise false.
1329 		 */
1330 		return sense ? JT(child) : JF(child);
1331 
1332 	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1333 		/*
1334 		 * At this point, we only know the comparison if we
1335 		 * came down the true branch, and it was an equality
1336 		 * comparison with a constant.
1337 		 *
1338 		 * I.e., if we came down the true branch, and the branch
1339 		 * was an equality comparison with a constant, we know the
1340 		 * accumulator contains that constant.  If we came down
1341 		 * the false branch, or the comparison wasn't with a
1342 		 * constant, we don't know what was in the accumulator.
1343 		 *
1344 		 * We rely on the fact that distinct constants have distinct
1345 		 * value numbers.
1346 		 */
1347 		return JF(child);
1348 
1349 	return 0;
1350 }
1351 
1352 static void
opt_j(opt_state_t * opt_state,struct edge * ep)1353 opt_j(opt_state_t *opt_state, struct edge *ep)
1354 {
1355 	register int i, k;
1356 	register struct block *target;
1357 
1358 	if (JT(ep->succ) == 0)
1359 		return;
1360 
1361 	if (JT(ep->succ) == JF(ep->succ)) {
1362 		/*
1363 		 * Common branch targets can be eliminated, provided
1364 		 * there is no data dependency.
1365 		 */
1366 		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1367 			opt_state->done = 0;
1368 			ep->succ = JT(ep->succ);
1369 		}
1370 	}
1371 	/*
1372 	 * For each edge dominator that matches the successor of this
1373 	 * edge, promote the edge successor to the its grandchild.
1374 	 *
1375 	 * XXX We violate the set abstraction here in favor a reasonably
1376 	 * efficient loop.
1377 	 */
1378  top:
1379 	for (i = 0; i < opt_state->edgewords; ++i) {
1380 		register bpf_u_int32 x = ep->edom[i];
1381 
1382 		while (x != 0) {
1383 			k = ffs(x) - 1;
1384 			x &=~ (1 << k);
1385 			k += i * BITS_PER_WORD;
1386 
1387 			target = fold_edge(ep->succ, opt_state->edges[k]);
1388 			/*
1389 			 * Check that there is no data dependency between
1390 			 * nodes that will be violated if we move the edge.
1391 			 */
1392 			if (target != 0 && !use_conflict(ep->pred, target)) {
1393 				opt_state->done = 0;
1394 				ep->succ = target;
1395 				if (JT(target) != 0)
1396 					/*
1397 					 * Start over unless we hit a leaf.
1398 					 */
1399 					goto top;
1400 				return;
1401 			}
1402 		}
1403 	}
1404 }
1405 
1406 
1407 static void
or_pullup(opt_state_t * opt_state,struct block * b)1408 or_pullup(opt_state_t *opt_state, struct block *b)
1409 {
1410 	int val, at_top;
1411 	struct block *pull;
1412 	struct block **diffp, **samep;
1413 	struct edge *ep;
1414 
1415 	ep = b->in_edges;
1416 	if (ep == 0)
1417 		return;
1418 
1419 	/*
1420 	 * Make sure each predecessor loads the same value.
1421 	 * XXX why?
1422 	 */
1423 	val = ep->pred->val[A_ATOM];
1424 	for (ep = ep->next; ep != 0; ep = ep->next)
1425 		if (val != ep->pred->val[A_ATOM])
1426 			return;
1427 
1428 	if (JT(b->in_edges->pred) == b)
1429 		diffp = &JT(b->in_edges->pred);
1430 	else
1431 		diffp = &JF(b->in_edges->pred);
1432 
1433 	at_top = 1;
1434 	while (1) {
1435 		if (*diffp == 0)
1436 			return;
1437 
1438 		if (JT(*diffp) != JT(b))
1439 			return;
1440 
1441 		if (!SET_MEMBER((*diffp)->dom, b->id))
1442 			return;
1443 
1444 		if ((*diffp)->val[A_ATOM] != val)
1445 			break;
1446 
1447 		diffp = &JF(*diffp);
1448 		at_top = 0;
1449 	}
1450 	samep = &JF(*diffp);
1451 	while (1) {
1452 		if (*samep == 0)
1453 			return;
1454 
1455 		if (JT(*samep) != JT(b))
1456 			return;
1457 
1458 		if (!SET_MEMBER((*samep)->dom, b->id))
1459 			return;
1460 
1461 		if ((*samep)->val[A_ATOM] == val)
1462 			break;
1463 
1464 		/* XXX Need to check that there are no data dependencies
1465 		   between dp0 and dp1.  Currently, the code generator
1466 		   will not produce such dependencies. */
1467 		samep = &JF(*samep);
1468 	}
1469 #ifdef notdef
1470 	/* XXX This doesn't cover everything. */
1471 	for (i = 0; i < N_ATOMS; ++i)
1472 		if ((*samep)->val[i] != pred->val[i])
1473 			return;
1474 #endif
1475 	/* Pull up the node. */
1476 	pull = *samep;
1477 	*samep = JF(pull);
1478 	JF(pull) = *diffp;
1479 
1480 	/*
1481 	 * At the top of the chain, each predecessor needs to point at the
1482 	 * pulled up node.  Inside the chain, there is only one predecessor
1483 	 * to worry about.
1484 	 */
1485 	if (at_top) {
1486 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1487 			if (JT(ep->pred) == b)
1488 				JT(ep->pred) = pull;
1489 			else
1490 				JF(ep->pred) = pull;
1491 		}
1492 	}
1493 	else
1494 		*diffp = pull;
1495 
1496 	opt_state->done = 0;
1497 }
1498 
1499 static void
and_pullup(opt_state_t * opt_state,struct block * b)1500 and_pullup(opt_state_t *opt_state, struct block *b)
1501 {
1502 	int val, at_top;
1503 	struct block *pull;
1504 	struct block **diffp, **samep;
1505 	struct edge *ep;
1506 
1507 	ep = b->in_edges;
1508 	if (ep == 0)
1509 		return;
1510 
1511 	/*
1512 	 * Make sure each predecessor loads the same value.
1513 	 */
1514 	val = ep->pred->val[A_ATOM];
1515 	for (ep = ep->next; ep != 0; ep = ep->next)
1516 		if (val != ep->pred->val[A_ATOM])
1517 			return;
1518 
1519 	if (JT(b->in_edges->pred) == b)
1520 		diffp = &JT(b->in_edges->pred);
1521 	else
1522 		diffp = &JF(b->in_edges->pred);
1523 
1524 	at_top = 1;
1525 	while (1) {
1526 		if (*diffp == 0)
1527 			return;
1528 
1529 		if (JF(*diffp) != JF(b))
1530 			return;
1531 
1532 		if (!SET_MEMBER((*diffp)->dom, b->id))
1533 			return;
1534 
1535 		if ((*diffp)->val[A_ATOM] != val)
1536 			break;
1537 
1538 		diffp = &JT(*diffp);
1539 		at_top = 0;
1540 	}
1541 	samep = &JT(*diffp);
1542 	while (1) {
1543 		if (*samep == 0)
1544 			return;
1545 
1546 		if (JF(*samep) != JF(b))
1547 			return;
1548 
1549 		if (!SET_MEMBER((*samep)->dom, b->id))
1550 			return;
1551 
1552 		if ((*samep)->val[A_ATOM] == val)
1553 			break;
1554 
1555 		/* XXX Need to check that there are no data dependencies
1556 		   between diffp and samep.  Currently, the code generator
1557 		   will not produce such dependencies. */
1558 		samep = &JT(*samep);
1559 	}
1560 #ifdef notdef
1561 	/* XXX This doesn't cover everything. */
1562 	for (i = 0; i < N_ATOMS; ++i)
1563 		if ((*samep)->val[i] != pred->val[i])
1564 			return;
1565 #endif
1566 	/* Pull up the node. */
1567 	pull = *samep;
1568 	*samep = JT(pull);
1569 	JT(pull) = *diffp;
1570 
1571 	/*
1572 	 * At the top of the chain, each predecessor needs to point at the
1573 	 * pulled up node.  Inside the chain, there is only one predecessor
1574 	 * to worry about.
1575 	 */
1576 	if (at_top) {
1577 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1578 			if (JT(ep->pred) == b)
1579 				JT(ep->pred) = pull;
1580 			else
1581 				JF(ep->pred) = pull;
1582 		}
1583 	}
1584 	else
1585 		*diffp = pull;
1586 
1587 	opt_state->done = 0;
1588 }
1589 
1590 static void
opt_blks(compiler_state_t * cstate,opt_state_t * opt_state,struct icode * ic,int do_stmts)1591 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1592     int do_stmts)
1593 {
1594 	int i, maxlevel;
1595 	struct block *p;
1596 
1597 	init_val(opt_state);
1598 	maxlevel = ic->root->level;
1599 
1600 	find_inedges(opt_state, ic->root);
1601 	for (i = maxlevel; i >= 0; --i)
1602 		for (p = opt_state->levels[i]; p; p = p->link)
1603 			opt_blk(cstate, ic, opt_state, p, do_stmts);
1604 
1605 	if (do_stmts)
1606 		/*
1607 		 * No point trying to move branches; it can't possibly
1608 		 * make a difference at this point.
1609 		 */
1610 		return;
1611 
1612 	for (i = 1; i <= maxlevel; ++i) {
1613 		for (p = opt_state->levels[i]; p; p = p->link) {
1614 			opt_j(opt_state, &p->et);
1615 			opt_j(opt_state, &p->ef);
1616 		}
1617 	}
1618 
1619 	find_inedges(opt_state, ic->root);
1620 	for (i = 1; i <= maxlevel; ++i) {
1621 		for (p = opt_state->levels[i]; p; p = p->link) {
1622 			or_pullup(opt_state, p);
1623 			and_pullup(opt_state, p);
1624 		}
1625 	}
1626 }
1627 
1628 static inline void
link_inedge(struct edge * parent,struct block * child)1629 link_inedge(struct edge *parent, struct block *child)
1630 {
1631 	parent->next = child->in_edges;
1632 	child->in_edges = parent;
1633 }
1634 
1635 static void
find_inedges(opt_state_t * opt_state,struct block * root)1636 find_inedges(opt_state_t *opt_state, struct block *root)
1637 {
1638 	int i;
1639 	struct block *b;
1640 
1641 	for (i = 0; i < opt_state->n_blocks; ++i)
1642 		opt_state->blocks[i]->in_edges = 0;
1643 
1644 	/*
1645 	 * Traverse the graph, adding each edge to the predecessor
1646 	 * list of its successors.  Skip the leaves (i.e. level 0).
1647 	 */
1648 	for (i = root->level; i > 0; --i) {
1649 		for (b = opt_state->levels[i]; b != 0; b = b->link) {
1650 			link_inedge(&b->et, JT(b));
1651 			link_inedge(&b->ef, JF(b));
1652 		}
1653 	}
1654 }
1655 
1656 static void
opt_root(struct block ** b)1657 opt_root(struct block **b)
1658 {
1659 	struct slist *tmp, *s;
1660 
1661 	s = (*b)->stmts;
1662 	(*b)->stmts = 0;
1663 	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1664 		*b = JT(*b);
1665 
1666 	tmp = (*b)->stmts;
1667 	if (tmp != 0)
1668 		sappend(s, tmp);
1669 	(*b)->stmts = s;
1670 
1671 	/*
1672 	 * If the root node is a return, then there is no
1673 	 * point executing any statements (since the bpf machine
1674 	 * has no side effects).
1675 	 */
1676 	if (BPF_CLASS((*b)->s.code) == BPF_RET)
1677 		(*b)->stmts = 0;
1678 }
1679 
1680 static void
opt_loop(compiler_state_t * cstate,opt_state_t * opt_state,struct icode * ic,int do_stmts)1681 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1682     int do_stmts)
1683 {
1684 
1685 #ifdef BDEBUG
1686 	if (pcap_optimizer_debug > 1) {
1687 		printf("opt_loop(root, %d) begin\n", do_stmts);
1688 		opt_dump(cstate, ic);
1689 	}
1690 #endif
1691 	do {
1692 		opt_state->done = 1;
1693 		find_levels(opt_state, ic);
1694 		find_dom(opt_state, ic->root);
1695 		find_closure(opt_state, ic->root);
1696 		find_ud(opt_state, ic->root);
1697 		find_edom(opt_state, ic->root);
1698 		opt_blks(cstate, opt_state, ic, do_stmts);
1699 #ifdef BDEBUG
1700 		if (pcap_optimizer_debug > 1) {
1701 			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1702 			opt_dump(cstate, ic);
1703 		}
1704 #endif
1705 	} while (!opt_state->done);
1706 }
1707 
1708 /*
1709  * Optimize the filter code in its dag representation.
1710  */
1711 void
bpf_optimize(compiler_state_t * cstate,struct icode * ic)1712 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1713 {
1714 	opt_state_t opt_state;
1715 
1716 	opt_init(cstate, &opt_state, ic);
1717 	opt_loop(cstate, &opt_state, ic, 0);
1718 	opt_loop(cstate, &opt_state, ic, 1);
1719 	intern_blocks(&opt_state, ic);
1720 #ifdef BDEBUG
1721 	if (pcap_optimizer_debug > 1) {
1722 		printf("after intern_blocks()\n");
1723 		opt_dump(cstate, ic);
1724 	}
1725 #endif
1726 	opt_root(&ic->root);
1727 #ifdef BDEBUG
1728 	if (pcap_optimizer_debug > 1) {
1729 		printf("after opt_root()\n");
1730 		opt_dump(cstate, ic);
1731 	}
1732 #endif
1733 	opt_cleanup(&opt_state);
1734 }
1735 
1736 static void
make_marks(struct icode * ic,struct block * p)1737 make_marks(struct icode *ic, struct block *p)
1738 {
1739 	if (!isMarked(ic, p)) {
1740 		Mark(ic, p);
1741 		if (BPF_CLASS(p->s.code) != BPF_RET) {
1742 			make_marks(ic, JT(p));
1743 			make_marks(ic, JF(p));
1744 		}
1745 	}
1746 }
1747 
1748 /*
1749  * Mark code array such that isMarked(ic->cur_mark, i) is true
1750  * only for nodes that are alive.
1751  */
1752 static void
mark_code(struct icode * ic)1753 mark_code(struct icode *ic)
1754 {
1755 	ic->cur_mark += 1;
1756 	make_marks(ic, ic->root);
1757 }
1758 
1759 /*
1760  * True iff the two stmt lists load the same value from the packet into
1761  * the accumulator.
1762  */
1763 static int
eq_slist(struct slist * x,struct slist * y)1764 eq_slist(struct slist *x, struct slist *y)
1765 {
1766 	while (1) {
1767 		while (x && x->s.code == NOP)
1768 			x = x->next;
1769 		while (y && y->s.code == NOP)
1770 			y = y->next;
1771 		if (x == 0)
1772 			return y == 0;
1773 		if (y == 0)
1774 			return x == 0;
1775 		if (x->s.code != y->s.code || x->s.k != y->s.k)
1776 			return 0;
1777 		x = x->next;
1778 		y = y->next;
1779 	}
1780 }
1781 
1782 static inline int
eq_blk(struct block * b0,struct block * b1)1783 eq_blk(struct block *b0, struct block *b1)
1784 {
1785 	if (b0->s.code == b1->s.code &&
1786 	    b0->s.k == b1->s.k &&
1787 	    b0->et.succ == b1->et.succ &&
1788 	    b0->ef.succ == b1->ef.succ)
1789 		return eq_slist(b0->stmts, b1->stmts);
1790 	return 0;
1791 }
1792 
1793 static void
intern_blocks(opt_state_t * opt_state,struct icode * ic)1794 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1795 {
1796 	struct block *p;
1797 	int i, j;
1798 	int done1; /* don't shadow global */
1799  top:
1800 	done1 = 1;
1801 	for (i = 0; i < opt_state->n_blocks; ++i)
1802 		opt_state->blocks[i]->link = 0;
1803 
1804 	mark_code(ic);
1805 
1806 	for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1807 		if (!isMarked(ic, opt_state->blocks[i]))
1808 			continue;
1809 		for (j = i + 1; j < opt_state->n_blocks; ++j) {
1810 			if (!isMarked(ic, opt_state->blocks[j]))
1811 				continue;
1812 			if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1813 				opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1814 					opt_state->blocks[j]->link : opt_state->blocks[j];
1815 				break;
1816 			}
1817 		}
1818 	}
1819 	for (i = 0; i < opt_state->n_blocks; ++i) {
1820 		p = opt_state->blocks[i];
1821 		if (JT(p) == 0)
1822 			continue;
1823 		if (JT(p)->link) {
1824 			done1 = 0;
1825 			JT(p) = JT(p)->link;
1826 		}
1827 		if (JF(p)->link) {
1828 			done1 = 0;
1829 			JF(p) = JF(p)->link;
1830 		}
1831 	}
1832 	if (!done1)
1833 		goto top;
1834 }
1835 
1836 static void
opt_cleanup(opt_state_t * opt_state)1837 opt_cleanup(opt_state_t *opt_state)
1838 {
1839 	free((void *)opt_state->vnode_base);
1840 	free((void *)opt_state->vmap);
1841 	free((void *)opt_state->edges);
1842 	free((void *)opt_state->space);
1843 	free((void *)opt_state->levels);
1844 	free((void *)opt_state->blocks);
1845 }
1846 
1847 /*
1848  * Return the number of stmts in 's'.
1849  */
1850 static u_int
slength(struct slist * s)1851 slength(struct slist *s)
1852 {
1853 	u_int n = 0;
1854 
1855 	for (; s; s = s->next)
1856 		if (s->s.code != NOP)
1857 			++n;
1858 	return n;
1859 }
1860 
1861 /*
1862  * Return the number of nodes reachable by 'p'.
1863  * All nodes should be initially unmarked.
1864  */
1865 static int
count_blocks(struct icode * ic,struct block * p)1866 count_blocks(struct icode *ic, struct block *p)
1867 {
1868 	if (p == 0 || isMarked(ic, p))
1869 		return 0;
1870 	Mark(ic, p);
1871 	return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1872 }
1873 
1874 /*
1875  * Do a depth first search on the flow graph, numbering the
1876  * the basic blocks, and entering them into the 'blocks' array.`
1877  */
1878 static void
number_blks_r(opt_state_t * opt_state,struct icode * ic,struct block * p)1879 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1880 {
1881 	int n;
1882 
1883 	if (p == 0 || isMarked(ic, p))
1884 		return;
1885 
1886 	Mark(ic, p);
1887 	n = opt_state->n_blocks++;
1888 	p->id = n;
1889 	opt_state->blocks[n] = p;
1890 
1891 	number_blks_r(opt_state, ic, JT(p));
1892 	number_blks_r(opt_state, ic, JF(p));
1893 }
1894 
1895 /*
1896  * Return the number of stmts in the flowgraph reachable by 'p'.
1897  * The nodes should be unmarked before calling.
1898  *
1899  * Note that "stmts" means "instructions", and that this includes
1900  *
1901  *	side-effect statements in 'p' (slength(p->stmts));
1902  *
1903  *	statements in the true branch from 'p' (count_stmts(JT(p)));
1904  *
1905  *	statements in the false branch from 'p' (count_stmts(JF(p)));
1906  *
1907  *	the conditional jump itself (1);
1908  *
1909  *	an extra long jump if the true branch requires it (p->longjt);
1910  *
1911  *	an extra long jump if the false branch requires it (p->longjf).
1912  */
1913 static u_int
count_stmts(struct icode * ic,struct block * p)1914 count_stmts(struct icode *ic, struct block *p)
1915 {
1916 	u_int n;
1917 
1918 	if (p == 0 || isMarked(ic, p))
1919 		return 0;
1920 	Mark(ic, p);
1921 	n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
1922 	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1923 }
1924 
1925 /*
1926  * Allocate memory.  All allocation is done before optimization
1927  * is begun.  A linear bound on the size of all data structures is computed
1928  * from the total number of blocks and/or statements.
1929  */
1930 static void
opt_init(compiler_state_t * cstate,opt_state_t * opt_state,struct icode * ic)1931 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
1932 {
1933 	bpf_u_int32 *p;
1934 	int i, n, max_stmts;
1935 
1936 	/*
1937 	 * First, count the blocks, so we can malloc an array to map
1938 	 * block number to block.  Then, put the blocks into the array.
1939 	 */
1940 	unMarkAll(ic);
1941 	n = count_blocks(ic, ic->root);
1942 	opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
1943 	if (opt_state->blocks == NULL)
1944 		bpf_error(cstate, "malloc");
1945 	unMarkAll(ic);
1946 	opt_state->n_blocks = 0;
1947 	number_blks_r(opt_state, ic, ic->root);
1948 
1949 	opt_state->n_edges = 2 * opt_state->n_blocks;
1950 	opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
1951 	if (opt_state->edges == NULL)
1952 		bpf_error(cstate, "malloc");
1953 
1954 	/*
1955 	 * The number of levels is bounded by the number of nodes.
1956 	 */
1957 	opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
1958 	if (opt_state->levels == NULL)
1959 		bpf_error(cstate, "malloc");
1960 
1961 	opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1962 	opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1963 
1964 	/* XXX */
1965 	opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
1966 				 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
1967 	if (opt_state->space == NULL)
1968 		bpf_error(cstate, "malloc");
1969 	p = opt_state->space;
1970 	opt_state->all_dom_sets = p;
1971 	for (i = 0; i < n; ++i) {
1972 		opt_state->blocks[i]->dom = p;
1973 		p += opt_state->nodewords;
1974 	}
1975 	opt_state->all_closure_sets = p;
1976 	for (i = 0; i < n; ++i) {
1977 		opt_state->blocks[i]->closure = p;
1978 		p += opt_state->nodewords;
1979 	}
1980 	opt_state->all_edge_sets = p;
1981 	for (i = 0; i < n; ++i) {
1982 		register struct block *b = opt_state->blocks[i];
1983 
1984 		b->et.edom = p;
1985 		p += opt_state->edgewords;
1986 		b->ef.edom = p;
1987 		p += opt_state->edgewords;
1988 		b->et.id = i;
1989 		opt_state->edges[i] = &b->et;
1990 		b->ef.id = opt_state->n_blocks + i;
1991 		opt_state->edges[opt_state->n_blocks + i] = &b->ef;
1992 		b->et.pred = b;
1993 		b->ef.pred = b;
1994 	}
1995 	max_stmts = 0;
1996 	for (i = 0; i < n; ++i)
1997 		max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
1998 	/*
1999 	 * We allocate at most 3 value numbers per statement,
2000 	 * so this is an upper bound on the number of valnodes
2001 	 * we'll need.
2002 	 */
2003 	opt_state->maxval = 3 * max_stmts;
2004 	opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2005 	opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2006 	if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2007 		bpf_error(cstate, "malloc");
2008 }
2009 
2010 /*
2011  * This is only used when supporting optimizer debugging.  It is
2012  * global state, so do *not* do more than one compile in parallel
2013  * and expect it to provide meaningful information.
2014  */
2015 #ifdef BDEBUG
2016 int bids[1000];
2017 #endif
2018 
2019 /*
2020  * Returns true if successful.  Returns false if a branch has
2021  * an offset that is too large.  If so, we have marked that
2022  * branch so that on a subsequent iteration, it will be treated
2023  * properly.
2024  */
2025 static int
convert_code_r(compiler_state_t * cstate,conv_state_t * conv_state,struct icode * ic,struct block * p)2026 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2027     struct icode *ic, struct block *p)
2028 {
2029 	struct bpf_insn *dst;
2030 	struct slist *src;
2031 	u_int slen;
2032 	u_int off;
2033 	int extrajmps;		/* number of extra jumps inserted */
2034 	struct slist **offset = NULL;
2035 
2036 	if (p == 0 || isMarked(ic, p))
2037 		return (1);
2038 	Mark(ic, p);
2039 
2040 	if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2041 		return (0);
2042 	if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2043 		return (0);
2044 
2045 	slen = slength(p->stmts);
2046 	dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2047 		/* inflate length by any extra jumps */
2048 
2049 	p->offset = (int)(dst - conv_state->fstart);
2050 
2051 	/* generate offset[] for convenience  */
2052 	if (slen) {
2053 		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2054 		if (!offset) {
2055 			bpf_error(cstate, "not enough core");
2056 			/*NOTREACHED*/
2057 		}
2058 	}
2059 	src = p->stmts;
2060 	for (off = 0; off < slen && src; off++) {
2061 #if 0
2062 		printf("off=%d src=%x\n", off, src);
2063 #endif
2064 		offset[off] = src;
2065 		src = src->next;
2066 	}
2067 
2068 	off = 0;
2069 	for (src = p->stmts; src; src = src->next) {
2070 		if (src->s.code == NOP)
2071 			continue;
2072 		dst->code = (u_short)src->s.code;
2073 		dst->k = src->s.k;
2074 
2075 		/* fill block-local relative jump */
2076 		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2077 #if 0
2078 			if (src->s.jt || src->s.jf) {
2079 				bpf_error(cstate, "illegal jmp destination");
2080 				/*NOTREACHED*/
2081 			}
2082 #endif
2083 			goto filled;
2084 		}
2085 		if (off == slen - 2)	/*???*/
2086 			goto filled;
2087 
2088 	    {
2089 		u_int i;
2090 		int jt, jf;
2091 		const char *ljerr = "%s for block-local relative jump: off=%d";
2092 
2093 #if 0
2094 		printf("code=%x off=%d %x %x\n", src->s.code,
2095 			off, src->s.jt, src->s.jf);
2096 #endif
2097 
2098 		if (!src->s.jt || !src->s.jf) {
2099 			bpf_error(cstate, ljerr, "no jmp destination", off);
2100 			/*NOTREACHED*/
2101 		}
2102 
2103 		jt = jf = 0;
2104 		for (i = 0; i < slen; i++) {
2105 			if (offset[i] == src->s.jt) {
2106 				if (jt) {
2107 					bpf_error(cstate, ljerr, "multiple matches", off);
2108 					/*NOTREACHED*/
2109 				}
2110 
2111 				dst->jt = i - off - 1;
2112 				jt++;
2113 			}
2114 			if (offset[i] == src->s.jf) {
2115 				if (jf) {
2116 					bpf_error(cstate, ljerr, "multiple matches", off);
2117 					/*NOTREACHED*/
2118 				}
2119 				dst->jf = i - off - 1;
2120 				jf++;
2121 			}
2122 		}
2123 		if (!jt || !jf) {
2124 			bpf_error(cstate, ljerr, "no destination found", off);
2125 			/*NOTREACHED*/
2126 		}
2127 	    }
2128 filled:
2129 		++dst;
2130 		++off;
2131 	}
2132 	if (offset)
2133 		free(offset);
2134 
2135 #ifdef BDEBUG
2136 	bids[dst - conv_state->fstart] = p->id + 1;
2137 #endif
2138 	dst->code = (u_short)p->s.code;
2139 	dst->k = p->s.k;
2140 	if (JT(p)) {
2141 		extrajmps = 0;
2142 		off = JT(p)->offset - (p->offset + slen) - 1;
2143 		if (off >= 256) {
2144 		    /* offset too large for branch, must add a jump */
2145 		    if (p->longjt == 0) {
2146 		    	/* mark this instruction and retry */
2147 			p->longjt++;
2148 			return(0);
2149 		    }
2150 		    /* branch if T to following jump */
2151 		    dst->jt = extrajmps;
2152 		    extrajmps++;
2153 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
2154 		    dst[extrajmps].k = off - extrajmps;
2155 		}
2156 		else
2157 		    dst->jt = off;
2158 		off = JF(p)->offset - (p->offset + slen) - 1;
2159 		if (off >= 256) {
2160 		    /* offset too large for branch, must add a jump */
2161 		    if (p->longjf == 0) {
2162 		    	/* mark this instruction and retry */
2163 			p->longjf++;
2164 			return(0);
2165 		    }
2166 		    /* branch if F to following jump */
2167 		    /* if two jumps are inserted, F goes to second one */
2168 		    dst->jf = extrajmps;
2169 		    extrajmps++;
2170 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
2171 		    dst[extrajmps].k = off - extrajmps;
2172 		}
2173 		else
2174 		    dst->jf = off;
2175 	}
2176 	return (1);
2177 }
2178 
2179 
2180 /*
2181  * Convert flowgraph intermediate representation to the
2182  * BPF array representation.  Set *lenp to the number of instructions.
2183  *
2184  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2185  * not* do free(fp) before returning fp; doing so would make no sense,
2186  * as the BPF array pointed to by the return value of icode_to_fcode()
2187  * must be valid - it's being returned for use in a bpf_program structure.
2188  *
2189  * If it appears that icode_to_fcode() is leaking, the problem is that
2190  * the program using pcap_compile() is failing to free the memory in
2191  * the BPF program when it's done - the leak is in the program, not in
2192  * the routine that happens to be allocating the memory.  (By analogy, if
2193  * a program calls fopen() without ever calling fclose() on the FILE *,
2194  * it will leak the FILE structure; the leak is not in fopen(), it's in
2195  * the program.)  Change the program to use pcap_freecode() when it's
2196  * done with the filter program.  See the pcap man page.
2197  */
2198 struct bpf_insn *
icode_to_fcode(compiler_state_t * cstate,struct icode * ic,struct block * root,u_int * lenp)2199 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2200     struct block *root, u_int *lenp)
2201 {
2202 	u_int n;
2203 	struct bpf_insn *fp;
2204 	conv_state_t conv_state;
2205 
2206 	/*
2207 	 * Loop doing convert_code_r() until no branches remain
2208 	 * with too-large offsets.
2209 	 */
2210 	while (1) {
2211 	    unMarkAll(ic);
2212 	    n = *lenp = count_stmts(ic, root);
2213 
2214 	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2215 	    if (fp == NULL)
2216 		    bpf_error(cstate, "malloc");
2217 	    memset((char *)fp, 0, sizeof(*fp) * n);
2218 	    conv_state.fstart = fp;
2219 	    conv_state.ftail = fp + n;
2220 
2221 	    unMarkAll(ic);
2222 	    if (convert_code_r(cstate, &conv_state, ic, root))
2223 		break;
2224 	    free(fp);
2225 	}
2226 
2227 	return fp;
2228 }
2229 
2230 /*
2231  * Make a copy of a BPF program and put it in the "fcode" member of
2232  * a "pcap_t".
2233  *
2234  * If we fail to allocate memory for the copy, fill in the "errbuf"
2235  * member of the "pcap_t" with an error message, and return -1;
2236  * otherwise, return 0.
2237  */
2238 int
install_bpf_program(pcap_t * p,struct bpf_program * fp)2239 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2240 {
2241 	size_t prog_size;
2242 
2243 	/*
2244 	 * Validate the program.
2245 	 */
2246 	if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2247 		pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2248 			"BPF program is not valid");
2249 		return (-1);
2250 	}
2251 
2252 	/*
2253 	 * Free up any already installed program.
2254 	 */
2255 	pcap_freecode(&p->fcode);
2256 
2257 	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2258 	p->fcode.bf_len = fp->bf_len;
2259 	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2260 	if (p->fcode.bf_insns == NULL) {
2261 		pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2262 			 "malloc: %s", pcap_strerror(errno));
2263 		return (-1);
2264 	}
2265 	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2266 	return (0);
2267 }
2268 
2269 #ifdef BDEBUG
2270 static void
dot_dump_node(struct icode * ic,struct block * block,struct bpf_program * prog,FILE * out)2271 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2272     FILE *out)
2273 {
2274 	int icount, noffset;
2275 	int i;
2276 
2277 	if (block == NULL || isMarked(ic, block))
2278 		return;
2279 	Mark(ic, block);
2280 
2281 	icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2282 	noffset = min(block->offset + icount, (int)prog->bf_len);
2283 
2284 	fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2285 	for (i = block->offset; i < noffset; i++) {
2286 		fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2287 	}
2288 	fprintf(out, "\" tooltip=\"");
2289 	for (i = 0; i < BPF_MEMWORDS; i++)
2290 		if (block->val[i] != 0)
2291 			fprintf(out, "val[%d]=%d ", i, block->val[i]);
2292 	fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2293 	fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2294 	fprintf(out, "\"");
2295 	if (JT(block) == NULL)
2296 		fprintf(out, ", peripheries=2");
2297 	fprintf(out, "];\n");
2298 
2299 	dot_dump_node(ic, JT(block), prog, out);
2300 	dot_dump_node(ic, JF(block), prog, out);
2301 }
2302 
2303 static void
dot_dump_edge(struct icode * ic,struct block * block,FILE * out)2304 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2305 {
2306 	if (block == NULL || isMarked(ic, block))
2307 		return;
2308 	Mark(ic, block);
2309 
2310 	if (JT(block)) {
2311 		fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2312 				block->id, JT(block)->id);
2313 		fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2314 			   block->id, JF(block)->id);
2315 	}
2316 	dot_dump_edge(ic, JT(block), out);
2317 	dot_dump_edge(ic, JF(block), out);
2318 }
2319 
2320 /* Output the block CFG using graphviz/DOT language
2321  * In the CFG, block's code, value index for each registers at EXIT,
2322  * and the jump relationship is show.
2323  *
2324  * example DOT for BPF `ip src host 1.1.1.1' is:
2325     digraph BPF {
2326     	block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2	jf 5" tooltip="val[A]=0 val[X]=0"];
2327     	block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4	jf 5" tooltip="val[A]=0 val[X]=0"];
2328     	block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2329     	block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2330     	"block0":se -> "block1":n [label="T"];
2331     	"block0":sw -> "block3":n [label="F"];
2332     	"block1":se -> "block2":n [label="T"];
2333     	"block1":sw -> "block3":n [label="F"];
2334     }
2335  *
2336  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2337  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
2338  */
2339 static void
dot_dump(compiler_state_t * cstate,struct icode * ic)2340 dot_dump(compiler_state_t *cstate, struct icode *ic)
2341 {
2342 	struct bpf_program f;
2343 	FILE *out = stdout;
2344 
2345 	memset(bids, 0, sizeof bids);
2346 	f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2347 
2348 	fprintf(out, "digraph BPF {\n");
2349 	ic->cur_mark = 0;
2350 	unMarkAll(ic);
2351 	dot_dump_node(ic, ic->root, &f, out);
2352 	ic->cur_mark = 0;
2353 	unMarkAll(ic);
2354 	dot_dump_edge(ic, ic->root, out);
2355 	fprintf(out, "}\n");
2356 
2357 	free((char *)f.bf_insns);
2358 }
2359 
2360 static void
plain_dump(compiler_state_t * cstate,struct icode * ic)2361 plain_dump(compiler_state_t *cstate, struct icode *ic)
2362 {
2363 	struct bpf_program f;
2364 
2365 	memset(bids, 0, sizeof bids);
2366 	f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2367 	bpf_dump(&f, 1);
2368 	putchar('\n');
2369 	free((char *)f.bf_insns);
2370 }
2371 
2372 static void
opt_dump(compiler_state_t * cstate,struct icode * ic)2373 opt_dump(compiler_state_t *cstate, struct icode *ic)
2374 {
2375 	/* if optimizer debugging is enabled, output DOT graph
2376 	 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2377 	 * convention in tcpdump command line
2378 	 */
2379 	if (pcap_optimizer_debug > 3)
2380 		dot_dump(cstate, ic);
2381 	else
2382 		plain_dump(cstate, ic);
2383 }
2384 #endif
2385