• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <stdio.h>
6 
7 #include "base/logging.h"
8 #include "sandbox/linux/seccomp-bpf/codegen.h"
9 
10 namespace {
11 
12 // Helper function for Traverse().
TraverseRecursively(std::set<sandbox::Instruction * > * visited,sandbox::Instruction * instruction)13 void TraverseRecursively(std::set<sandbox::Instruction*>* visited,
14                          sandbox::Instruction* instruction) {
15   if (visited->find(instruction) == visited->end()) {
16     visited->insert(instruction);
17     switch (BPF_CLASS(instruction->code)) {
18       case BPF_JMP:
19         if (BPF_OP(instruction->code) != BPF_JA) {
20           TraverseRecursively(visited, instruction->jf_ptr);
21         }
22         TraverseRecursively(visited, instruction->jt_ptr);
23         break;
24       case BPF_RET:
25         break;
26       default:
27         TraverseRecursively(visited, instruction->next);
28         break;
29     }
30   }
31 }
32 
33 }  // namespace
34 
35 namespace sandbox {
36 
CodeGen()37 CodeGen::CodeGen() : compiled_(false) {}
38 
~CodeGen()39 CodeGen::~CodeGen() {
40   for (Instructions::iterator iter = instructions_.begin();
41        iter != instructions_.end();
42        ++iter) {
43     delete *iter;
44   }
45   for (BasicBlocks::iterator iter = basic_blocks_.begin();
46        iter != basic_blocks_.end();
47        ++iter) {
48     delete *iter;
49   }
50 }
51 
PrintProgram(const SandboxBPF::Program & program)52 void CodeGen::PrintProgram(const SandboxBPF::Program& program) {
53   for (SandboxBPF::Program::const_iterator iter = program.begin();
54        iter != program.end();
55        ++iter) {
56     int ip = (int)(iter - program.begin());
57     fprintf(stderr, "%3d) ", ip);
58     switch (BPF_CLASS(iter->code)) {
59       case BPF_LD:
60         if (iter->code == BPF_LD + BPF_W + BPF_ABS) {
61           fprintf(stderr, "LOAD %d  // ", (int)iter->k);
62           if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
63             fprintf(stderr, "System call number\n");
64           } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
65             fprintf(stderr, "Architecture\n");
66           } else if (iter->k ==
67                      offsetof(struct arch_seccomp_data, instruction_pointer)) {
68             fprintf(stderr, "Instruction pointer (LSB)\n");
69           } else if (iter->k ==
70                      offsetof(struct arch_seccomp_data, instruction_pointer) +
71                          4) {
72             fprintf(stderr, "Instruction pointer (MSB)\n");
73           } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
74                      iter->k < offsetof(struct arch_seccomp_data, args) + 48 &&
75                      (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 ==
76                          0) {
77             fprintf(
78                 stderr,
79                 "Argument %d (%cSB)\n",
80                 (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8,
81                 (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M'
82                                                                          : 'L');
83           } else {
84             fprintf(stderr, "???\n");
85           }
86         } else {
87           fprintf(stderr, "LOAD ???\n");
88         }
89         break;
90       case BPF_JMP:
91         if (BPF_OP(iter->code) == BPF_JA) {
92           fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
93         } else {
94           fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
95               BPF_OP(iter->code) == BPF_JSET ? "&" :
96               BPF_OP(iter->code) == BPF_JEQ ? "==" :
97               BPF_OP(iter->code) == BPF_JGE ? ">=" :
98               BPF_OP(iter->code) == BPF_JGT ? ">"  : "???",
99               (int)iter->k,
100               ip + iter->jt + 1, ip + iter->jf + 1);
101         }
102         break;
103       case BPF_RET:
104         fprintf(stderr, "RET 0x%x  // ", iter->k);
105         if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
106           fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
107         } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
108           fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
109         } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE) {
110           fprintf(stderr, "Trace #%d\n", iter->k & SECCOMP_RET_DATA);
111         } else if (iter->k == SECCOMP_RET_ALLOW) {
112           fprintf(stderr, "Allowed\n");
113         } else {
114           fprintf(stderr, "???\n");
115         }
116         break;
117       case BPF_ALU:
118         fprintf(stderr, BPF_OP(iter->code) == BPF_NEG
119             ? "A := -A\n" : "A := A %s 0x%x\n",
120             BPF_OP(iter->code) == BPF_ADD ? "+"  :
121             BPF_OP(iter->code) == BPF_SUB ? "-"  :
122             BPF_OP(iter->code) == BPF_MUL ? "*"  :
123             BPF_OP(iter->code) == BPF_DIV ? "/"  :
124             BPF_OP(iter->code) == BPF_MOD ? "%"  :
125             BPF_OP(iter->code) == BPF_OR  ? "|"  :
126             BPF_OP(iter->code) == BPF_XOR ? "^"  :
127             BPF_OP(iter->code) == BPF_AND ? "&"  :
128             BPF_OP(iter->code) == BPF_LSH ? "<<" :
129             BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
130             (int)iter->k);
131         break;
132       default:
133         fprintf(stderr, "???\n");
134         break;
135     }
136   }
137   return;
138 }
139 
MakeInstruction(uint16_t code,uint32_t k,Instruction * next)140 Instruction* CodeGen::MakeInstruction(uint16_t code,
141                                       uint32_t k,
142                                       Instruction* next) {
143   // We can handle non-jumping instructions and "always" jumps. Both of
144   // them are followed by exactly one "next" instruction.
145   // We allow callers to defer specifying "next", but then they must call
146   // "joinInstructions" later.
147   if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
148     SANDBOX_DIE(
149         "Must provide both \"true\" and \"false\" branch "
150         "for a BPF_JMP");
151   }
152   if (next && BPF_CLASS(code) == BPF_RET) {
153     SANDBOX_DIE("Cannot append instructions after a return statement");
154   }
155   if (BPF_CLASS(code) == BPF_JMP) {
156     // "Always" jumps use the "true" branch target, only.
157     Instruction* insn = new Instruction(code, 0, next, NULL);
158     instructions_.push_back(insn);
159     return insn;
160   } else {
161     // Non-jumping instructions do not use any of the branch targets.
162     Instruction* insn = new Instruction(code, k, next);
163     instructions_.push_back(insn);
164     return insn;
165   }
166 }
167 
MakeInstruction(uint16_t code,const ErrorCode & err)168 Instruction* CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) {
169   if (BPF_CLASS(code) != BPF_RET) {
170     SANDBOX_DIE("ErrorCodes can only be used in return expressions");
171   }
172   if (err.error_type_ != ErrorCode::ET_SIMPLE &&
173       err.error_type_ != ErrorCode::ET_TRAP) {
174     SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program");
175   }
176   return MakeInstruction(code, err.err_);
177 }
178 
MakeInstruction(uint16_t code,uint32_t k,Instruction * jt,Instruction * jf)179 Instruction* CodeGen::MakeInstruction(uint16_t code,
180                                       uint32_t k,
181                                       Instruction* jt,
182                                       Instruction* jf) {
183   // We can handle all conditional jumps. They are followed by both a
184   // "true" and a "false" branch.
185   if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
186     SANDBOX_DIE("Expected a BPF_JMP instruction");
187   }
188   if (!jt && !jf) {
189     // We allow callers to defer specifying exactly one of the branch
190     // targets. It must then be set later by calling "JoinInstructions".
191     SANDBOX_DIE("Branches must jump to a valid instruction");
192   }
193   Instruction* insn = new Instruction(code, k, jt, jf);
194   instructions_.push_back(insn);
195   return insn;
196 }
197 
JoinInstructions(Instruction * head,Instruction * tail)198 void CodeGen::JoinInstructions(Instruction* head, Instruction* tail) {
199   // Merge two instructions, or set the branch target for an "always" jump.
200   // This function should be called, if the caller didn't initially provide
201   // a value for "next" when creating the instruction.
202   if (BPF_CLASS(head->code) == BPF_JMP) {
203     if (BPF_OP(head->code) == BPF_JA) {
204       if (head->jt_ptr) {
205         SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
206       }
207       head->jt_ptr = tail;
208     } else {
209       if (!head->jt_ptr && head->jf_ptr) {
210         head->jt_ptr = tail;
211       } else if (!head->jf_ptr && head->jt_ptr) {
212         head->jf_ptr = tail;
213       } else {
214         SANDBOX_DIE("Cannot append instructions after a jump");
215       }
216     }
217   } else if (BPF_CLASS(head->code) == BPF_RET) {
218     SANDBOX_DIE("Cannot append instructions after a return statement");
219   } else if (head->next) {
220     SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
221   } else {
222     head->next = tail;
223   }
224   return;
225 }
226 
Traverse(Instruction * instruction,void (* fnc)(Instruction *,void *),void * aux)227 void CodeGen::Traverse(Instruction* instruction,
228                        void (*fnc)(Instruction*, void*),
229                        void* aux) {
230   std::set<Instruction*> visited;
231   TraverseRecursively(&visited, instruction);
232   for (std::set<Instruction*>::const_iterator iter = visited.begin();
233        iter != visited.end();
234        ++iter) {
235     fnc(*iter, aux);
236   }
237 }
238 
FindBranchTargets(const Instruction & instructions,BranchTargets * branch_targets)239 void CodeGen::FindBranchTargets(const Instruction& instructions,
240                                 BranchTargets* branch_targets) {
241   // Follow all possible paths through the "instructions" graph and compute
242   // a list of branch targets. This will later be needed to compute the
243   // boundaries of basic blocks.
244   // We maintain a set of all instructions that we have previously seen. This
245   // set ultimately converges on all instructions in the program.
246   std::set<const Instruction*> seen_instructions;
247   Instructions stack;
248   for (const Instruction* insn = &instructions; insn;) {
249     seen_instructions.insert(insn);
250     if (BPF_CLASS(insn->code) == BPF_JMP) {
251       // Found a jump. Increase count of incoming edges for each of the jump
252       // targets.
253       ++(*branch_targets)[insn->jt_ptr];
254       if (BPF_OP(insn->code) != BPF_JA) {
255         ++(*branch_targets)[insn->jf_ptr];
256         stack.push_back(const_cast<Instruction*>(insn));
257       }
258       // Start a recursive decent for depth-first traversal.
259       if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
260         // We haven't seen the "true" branch yet. Traverse it now. We have
261         // already remembered the "false" branch on the stack and will
262         // traverse it later.
263         insn = insn->jt_ptr;
264         continue;
265       } else {
266         // Now try traversing the "false" branch.
267         insn = NULL;
268       }
269     } else {
270       // This is a non-jump instruction, just continue to the next instruction
271       // (if any). It's OK if "insn" becomes NULL when reaching a return
272       // instruction.
273       if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
274         SANDBOX_DIE(
275             "Internal compiler error; return instruction must be at "
276             "the end of the BPF program");
277       }
278       if (seen_instructions.find(insn->next) == seen_instructions.end()) {
279         insn = insn->next;
280       } else {
281         // We have seen this instruction before. That could happen if it is
282         // a branch target. No need to continue processing.
283         insn = NULL;
284       }
285     }
286     while (!insn && !stack.empty()) {
287       // We are done processing all the way to a leaf node, backtrack up the
288       // stack to any branches that we haven't processed yet. By definition,
289       // this has to be a "false" branch, as we always process the "true"
290       // branches right away.
291       insn = stack.back();
292       stack.pop_back();
293       if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
294         // We haven't seen the "false" branch yet. So, that's where we'll
295         // go now.
296         insn = insn->jf_ptr;
297       } else {
298         // We have seen both the "true" and the "false" branch, continue
299         // up the stack.
300         if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
301           SANDBOX_DIE(
302               "Internal compiler error; cannot find all "
303               "branch targets");
304         }
305         insn = NULL;
306       }
307     }
308   }
309   return;
310 }
311 
MakeBasicBlock(Instruction * head,Instruction * tail)312 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {
313   // Iterate over all the instructions between "head" and "tail" and
314   // insert them into a new basic block.
315   BasicBlock* bb = new BasicBlock;
316   for (;; head = head->next) {
317     bb->instructions.push_back(head);
318     if (head == tail) {
319       break;
320     }
321     if (BPF_CLASS(head->code) == BPF_JMP) {
322       SANDBOX_DIE("Found a jump inside of a basic block");
323     }
324   }
325   basic_blocks_.push_back(bb);
326   return bb;
327 }
328 
AddBasicBlock(Instruction * head,Instruction * tail,const BranchTargets & branch_targets,TargetsToBlocks * basic_blocks,BasicBlock ** firstBlock)329 void CodeGen::AddBasicBlock(Instruction* head,
330                             Instruction* tail,
331                             const BranchTargets& branch_targets,
332                             TargetsToBlocks* basic_blocks,
333                             BasicBlock** firstBlock) {
334   // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
335   // has not been set before.
336   BranchTargets::const_iterator iter = branch_targets.find(head);
337   if ((iter == branch_targets.end()) != !*firstBlock ||
338       !*firstBlock != basic_blocks->empty()) {
339     SANDBOX_DIE(
340         "Only the very first basic block should have no "
341         "incoming jumps");
342   }
343   BasicBlock* bb = MakeBasicBlock(head, tail);
344   if (!*firstBlock) {
345     *firstBlock = bb;
346   }
347   (*basic_blocks)[head] = bb;
348   return;
349 }
350 
CutGraphIntoBasicBlocks(Instruction * instructions,const BranchTargets & branch_targets,TargetsToBlocks * basic_blocks)351 BasicBlock* CodeGen::CutGraphIntoBasicBlocks(
352     Instruction* instructions,
353     const BranchTargets& branch_targets,
354     TargetsToBlocks* basic_blocks) {
355   // Textbook implementation of a basic block generator. All basic blocks
356   // start with a branch target and end with either a return statement or
357   // a jump (or are followed by an instruction that forms the beginning of a
358   // new block). Both conditional and "always" jumps are supported.
359   BasicBlock* first_block = NULL;
360   std::set<const Instruction*> seen_instructions;
361   Instructions stack;
362   Instruction* tail = NULL;
363   Instruction* head = instructions;
364   for (Instruction* insn = head; insn;) {
365     if (seen_instructions.find(insn) != seen_instructions.end()) {
366       // We somehow went in a circle. This should never be possible. Not even
367       // cyclic graphs are supposed to confuse us this much.
368       SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
369     }
370     seen_instructions.insert(insn);
371     if (tail && branch_targets.find(insn) != branch_targets.end()) {
372       // We reached a branch target. Start a new basic block (this means,
373       // flushing the previous basic block first).
374       AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
375       head = insn;
376     }
377     if (BPF_CLASS(insn->code) == BPF_JMP) {
378       // We reached a jump instruction, this completes our current basic
379       // block. Flush it and continue by traversing both the true and the
380       // false branch of the jump. We need to maintain a stack to do so.
381       AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
382       if (BPF_OP(insn->code) != BPF_JA) {
383         stack.push_back(insn->jf_ptr);
384       }
385       insn = insn->jt_ptr;
386 
387       // If we are jumping to an instruction that we have previously
388       // processed, we are done with this branch. Continue by backtracking
389       // up the stack.
390       while (seen_instructions.find(insn) != seen_instructions.end()) {
391       backtracking:
392         if (stack.empty()) {
393           // We successfully traversed all reachable instructions.
394           return first_block;
395         } else {
396           // Going up the stack.
397           insn = stack.back();
398           stack.pop_back();
399         }
400       }
401       // Starting a new basic block.
402       tail = NULL;
403       head = insn;
404     } else {
405       // We found a non-jumping instruction, append it to current basic
406       // block.
407       tail = insn;
408       insn = insn->next;
409       if (!insn) {
410         // We reached a return statement, flush the current basic block and
411         // backtrack up the stack.
412         AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
413         goto backtracking;
414       }
415     }
416   }
417   return first_block;
418 }
419 
420 // We define a comparator that inspects the sequence of instructions in our
421 // basic block and any blocks referenced by this block. This function can be
422 // used in a "less" comparator for the purpose of storing pointers to basic
423 // blocks in STL containers; this gives an easy option to use STL to find
424 // shared tail  sequences of basic blocks.
PointerCompare(const BasicBlock * block1,const BasicBlock * block2,const TargetsToBlocks & blocks)425 static int PointerCompare(const BasicBlock* block1,
426                           const BasicBlock* block2,
427                           const TargetsToBlocks& blocks) {
428   // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
429   // If we are looking at the exact same block, this is trivial and we don't
430   // need to do a full comparison.
431   if (block1 == block2) {
432     return 0;
433   }
434 
435   // We compare the sequence of instructions in both basic blocks.
436   const Instructions& insns1 = block1->instructions;
437   const Instructions& insns2 = block2->instructions;
438   // Basic blocks should never be empty.
439   CHECK(!insns1.empty());
440   CHECK(!insns2.empty());
441 
442   Instructions::const_iterator iter1 = insns1.begin();
443   Instructions::const_iterator iter2 = insns2.begin();
444   for (;; ++iter1, ++iter2) {
445     // If we have reached the end of the sequence of instructions in one or
446     // both basic blocks, we know the relative ordering between the two blocks
447     // and can return.
448     if (iter1 == insns1.end() || iter2 == insns2.end()) {
449       if (iter1 != insns1.end()) {
450         return 1;
451       }
452       if (iter2 != insns2.end()) {
453         return -1;
454       }
455 
456       // If the two blocks are the same length (and have elementwise-equal code
457       // and k fields) and their last instructions are neither a JMP nor a RET
458       // (which is the only way we can reach this point), then we must compare
459       // their successors.
460       Instruction* const insns1_last = insns1.back();
461       Instruction* const insns2_last = insns2.back();
462       CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP &&
463             BPF_CLASS(insns1_last->code) != BPF_RET);
464 
465       // Non jumping instructions will always have a valid next instruction.
466       CHECK(insns1_last->next);
467       CHECK(insns2_last->next);
468       return PointerCompare(blocks.find(insns1_last->next)->second,
469                             blocks.find(insns2_last->next)->second,
470                             blocks);
471     }
472 
473     // Compare the individual fields for both instructions.
474     const Instruction& insn1 = **iter1;
475     const Instruction& insn2 = **iter2;
476     if (insn1.code != insn2.code) {
477       return insn1.code - insn2.code;
478     }
479     if (insn1.k != insn2.k) {
480       return insn1.k - insn2.k;
481     }
482 
483     // Sanity check: If we're looking at a JMP or RET instruction, by definition
484     // it should be the last instruction of the basic block.
485     if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) {
486       CHECK_EQ(insns1.back(), &insn1);
487       CHECK_EQ(insns2.back(), &insn2);
488     }
489 
490     // RET instructions terminate execution, and only JMP instructions use the
491     // jt_ptr and jf_ptr fields.  Anything else can continue to the next
492     // instruction in the basic block.
493     if (BPF_CLASS(insn1.code) == BPF_RET) {
494       return 0;
495     } else if (BPF_CLASS(insn1.code) != BPF_JMP) {
496       continue;
497     }
498 
499     // Recursively compare the "true" and "false" branches.
500     // A well-formed BPF program can't have any cycles, so we know
501     // that our recursive algorithm will ultimately terminate.
502     // In the unlikely event that the programmer made a mistake and
503     // went out of the way to give us a cyclic program, we will crash
504     // with a stack overflow. We are OK with that.
505     if (BPF_OP(insn1.code) != BPF_JA) {
506       int c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
507                              blocks.find(insn2.jf_ptr)->second,
508                              blocks);
509       if (c != 0) {
510         return c;
511       }
512     }
513     return PointerCompare(blocks.find(insn1.jt_ptr)->second,
514                           blocks.find(insn2.jt_ptr)->second,
515                           blocks);
516   }
517 }
518 
MergeTails(TargetsToBlocks * blocks)519 void CodeGen::MergeTails(TargetsToBlocks* blocks) {
520   // We enter all of our basic blocks into a set using the BasicBlock::Less()
521   // comparator. This naturally results in blocks with identical tails of
522   // instructions to map to the same entry in the set. Whenever we discover
523   // that a particular chain of instructions is already in the set, we merge
524   // the basic blocks and update the pointer in the "blocks" map.
525   // Returns the number of unique basic blocks.
526   // N.B. We don't merge instructions on a granularity that is finer than
527   //      a basic block. In practice, this is sufficiently rare that we don't
528   //      incur a big cost.
529   //      Similarly, we currently don't merge anything other than tails. In
530   //      the future, we might decide to revisit this decision and attempt to
531   //      merge arbitrary sub-sequences of instructions.
532   BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
533   typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;
534   Set seen_basic_blocks(less);
535   for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();
536        ++iter) {
537     BasicBlock* bb = iter->second;
538     Set::const_iterator entry = seen_basic_blocks.find(bb);
539     if (entry == seen_basic_blocks.end()) {
540       // This is the first time we see this particular sequence of
541       // instructions. Enter the basic block into the set of known
542       // basic blocks.
543       seen_basic_blocks.insert(bb);
544     } else {
545       // We have previously seen another basic block that defines the same
546       // sequence of instructions. Merge the two blocks and update the
547       // pointer in the "blocks" map.
548       iter->second = *entry;
549     }
550   }
551 }
552 
ComputeIncomingBranches(BasicBlock * block,const TargetsToBlocks & targets_to_blocks,IncomingBranches * incoming_branches)553 void CodeGen::ComputeIncomingBranches(BasicBlock* block,
554                                       const TargetsToBlocks& targets_to_blocks,
555                                       IncomingBranches* incoming_branches) {
556   // We increment the number of incoming branches each time we encounter a
557   // basic block. But we only traverse recursively the very first time we
558   // encounter a new block. This is necessary to make topological sorting
559   // work correctly.
560   if (++(*incoming_branches)[block] == 1) {
561     Instruction* last_insn = block->instructions.back();
562     if (BPF_CLASS(last_insn->code) == BPF_JMP) {
563       ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,
564                               targets_to_blocks,
565                               incoming_branches);
566       if (BPF_OP(last_insn->code) != BPF_JA) {
567         ComputeIncomingBranches(
568             targets_to_blocks.find(last_insn->jf_ptr)->second,
569             targets_to_blocks,
570             incoming_branches);
571       }
572     } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
573       ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
574                               targets_to_blocks,
575                               incoming_branches);
576     }
577   }
578 }
579 
TopoSortBasicBlocks(BasicBlock * first_block,const TargetsToBlocks & blocks,BasicBlocks * basic_blocks)580 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,
581                                   const TargetsToBlocks& blocks,
582                                   BasicBlocks* basic_blocks) {
583   // Textbook implementation of a toposort. We keep looking for basic blocks
584   // that don't have any incoming branches (initially, this is just the
585   // "first_block") and add them to the topologically sorted list of
586   // "basic_blocks". As we do so, we remove outgoing branches. This potentially
587   // ends up making our descendants eligible for the sorted list. The
588   // sorting algorithm terminates when there are no more basic blocks that have
589   // no incoming branches. If we didn't move all blocks from the set of
590   // "unordered_blocks" to the sorted list of "basic_blocks", there must have
591   // been a cyclic dependency. This should never happen in a BPF program, as
592   // well-formed BPF programs only ever have forward branches.
593   IncomingBranches unordered_blocks;
594   ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
595 
596   std::set<BasicBlock*> heads;
597   for (;;) {
598     // Move block from "unordered_blocks" to "basic_blocks".
599     basic_blocks->push_back(first_block);
600 
601     // Inspect last instruction in the basic block. This is typically either a
602     // jump or a return statement. But it could also be a "normal" instruction
603     // that is followed by a jump target.
604     Instruction* last_insn = first_block->instructions.back();
605     if (BPF_CLASS(last_insn->code) == BPF_JMP) {
606       // Remove outgoing branches. This might end up moving our descendants
607       // into set of "head" nodes that no longer have any incoming branches.
608       TargetsToBlocks::const_iterator iter;
609       if (BPF_OP(last_insn->code) != BPF_JA) {
610         iter = blocks.find(last_insn->jf_ptr);
611         if (!--unordered_blocks[iter->second]) {
612           heads.insert(iter->second);
613         }
614       }
615       iter = blocks.find(last_insn->jt_ptr);
616       if (!--unordered_blocks[iter->second]) {
617         first_block = iter->second;
618         continue;
619       }
620     } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
621       // We encountered an instruction that doesn't change code flow. Try to
622       // pick the next "first_block" from "last_insn->next", if possible.
623       TargetsToBlocks::const_iterator iter;
624       iter = blocks.find(last_insn->next);
625       if (!--unordered_blocks[iter->second]) {
626         first_block = iter->second;
627         continue;
628       } else {
629         // Our basic block is supposed to be followed by "last_insn->next",
630         // but dependencies prevent this from happening. Insert a BPF_JA
631         // instruction to correct the code flow.
632         Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);
633         first_block->instructions.push_back(ja);
634         last_insn->next = ja;
635       }
636     }
637     if (heads.empty()) {
638       if (unordered_blocks.size() != basic_blocks->size()) {
639         SANDBOX_DIE("Internal compiler error; cyclic graph detected");
640       }
641       return;
642     }
643     // Proceed by picking an arbitrary node from the set of basic blocks that
644     // do not have any incoming branches.
645     first_block = *heads.begin();
646     heads.erase(heads.begin());
647   }
648 }
649 
ComputeRelativeJumps(BasicBlocks * basic_blocks,const TargetsToBlocks & targets_to_blocks)650 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,
651                                    const TargetsToBlocks& targets_to_blocks) {
652   // While we previously used pointers in jt_ptr and jf_ptr to link jump
653   // instructions to their targets, we now convert these jumps to relative
654   // jumps that are suitable for loading the BPF program into the kernel.
655   int offset = 0;
656 
657   // Since we just completed a toposort, all jump targets are guaranteed to
658   // go forward. This means, iterating over the basic blocks in reverse makes
659   // it trivial to compute the correct offsets.
660   BasicBlock* bb = NULL;
661   BasicBlock* last_bb = NULL;
662   for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
663        iter != basic_blocks->rend();
664        ++iter) {
665     last_bb = bb;
666     bb = *iter;
667     Instruction* insn = bb->instructions.back();
668     if (BPF_CLASS(insn->code) == BPF_JMP) {
669       // Basic block ended in a jump instruction. We can now compute the
670       // appropriate offsets.
671       if (BPF_OP(insn->code) == BPF_JA) {
672         // "Always" jumps use the 32bit "k" field for the offset, instead
673         // of the 8bit "jt" and "jf" fields.
674         int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
675         insn->k = jmp;
676         insn->jt = insn->jf = 0;
677       } else {
678         // The offset computations for conditional jumps are just the same
679         // as for "always" jumps.
680         int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
681         int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;
682 
683         // There is an added complication, because conditional relative jumps
684         // can only jump at most 255 instructions forward. If we have to jump
685         // further, insert an extra "always" jump.
686         Instructions::size_type jmp = bb->instructions.size();
687         if (jt > 255 || (jt == 255 && jf > 255)) {
688           Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);
689           bb->instructions.push_back(ja);
690           ja->k = jt;
691           ja->jt = ja->jf = 0;
692 
693           // The newly inserted "always" jump, of course, requires us to adjust
694           // the jump targets in the original conditional jump.
695           jt = 0;
696           ++jf;
697         }
698         if (jf > 255) {
699           Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);
700           bb->instructions.insert(bb->instructions.begin() + jmp, ja);
701           ja->k = jf;
702           ja->jt = ja->jf = 0;
703 
704           // Again, we have to adjust the jump targets in the original
705           // conditional jump.
706           ++jt;
707           jf = 0;
708         }
709 
710         // Now we can finally set the relative jump targets in the conditional
711         // jump instruction. Afterwards, we must no longer access the jt_ptr
712         // and jf_ptr fields.
713         insn->jt = jt;
714         insn->jf = jf;
715       }
716     } else if (BPF_CLASS(insn->code) != BPF_RET &&
717                targets_to_blocks.find(insn->next)->second != last_bb) {
718       SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
719     }
720 
721     // Proceed to next basic block.
722     offset += bb->instructions.size();
723     bb->offset = offset;
724   }
725   return;
726 }
727 
ConcatenateBasicBlocks(const BasicBlocks & basic_blocks,SandboxBPF::Program * program)728 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
729                                      SandboxBPF::Program* program) {
730   // Our basic blocks have been sorted and relative jump offsets have been
731   // computed. The last remaining step is for all the instructions in our
732   // basic blocks to be concatenated into a BPF program.
733   program->clear();
734   for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
735        bb_iter != basic_blocks.end();
736        ++bb_iter) {
737     const BasicBlock& bb = **bb_iter;
738     for (Instructions::const_iterator insn_iter = bb.instructions.begin();
739          insn_iter != bb.instructions.end();
740          ++insn_iter) {
741       const Instruction& insn = **insn_iter;
742       program->push_back(
743           (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});
744     }
745   }
746   return;
747 }
748 
Compile(Instruction * instructions,SandboxBPF::Program * program)749 void CodeGen::Compile(Instruction* instructions, SandboxBPF::Program* program) {
750   if (compiled_) {
751     SANDBOX_DIE(
752         "Cannot call Compile() multiple times. Create a new code "
753         "generator instead");
754   }
755   compiled_ = true;
756 
757   BranchTargets branch_targets;
758   FindBranchTargets(*instructions, &branch_targets);
759   TargetsToBlocks all_blocks;
760   BasicBlock* first_block =
761       CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
762   MergeTails(&all_blocks);
763   BasicBlocks basic_blocks;
764   TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
765   ComputeRelativeJumps(&basic_blocks, all_blocks);
766   ConcatenateBasicBlocks(basic_blocks, program);
767   return;
768 }
769 
770 }  // namespace sandbox
771