1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <errno.h>
6
7 #include <algorithm>
8 #include <set>
9 #include <vector>
10
11 #include "sandbox/linux/seccomp-bpf/codegen.h"
12 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
13 #include "sandbox/linux/tests/unit_tests.h"
14
15 namespace sandbox {
16
17 class SandboxUnittestHelper : public SandboxBPF {
18 public:
19 typedef SandboxBPF::Program Program;
20 };
21
22 // We want to access some of the private methods in the code generator. We
23 // do so by defining a "friend" that makes these methods public for us.
24 class CodeGenUnittestHelper : public CodeGen {
25 public:
FindBranchTargets(const Instruction & instructions,BranchTargets * branch_targets)26 void FindBranchTargets(const Instruction& instructions,
27 BranchTargets *branch_targets) {
28 CodeGen::FindBranchTargets(instructions, branch_targets);
29 }
30
CutGraphIntoBasicBlocks(Instruction * insns,const BranchTargets & branch_targets,TargetsToBlocks * blocks)31 BasicBlock *CutGraphIntoBasicBlocks(Instruction *insns,
32 const BranchTargets& branch_targets,
33 TargetsToBlocks *blocks) {
34 return CodeGen::CutGraphIntoBasicBlocks(insns, branch_targets, blocks);
35 }
36
MergeTails(TargetsToBlocks * blocks)37 void MergeTails(TargetsToBlocks *blocks) {
38 CodeGen::MergeTails(blocks);
39 }
40 };
41
42 enum { NO_FLAGS = 0x0000,
43 HAS_MERGEABLE_TAILS = 0x0001,
44 };
45
SampleProgramOneInstruction(CodeGen * codegen,int * flags)46 Instruction *SampleProgramOneInstruction(CodeGen *codegen, int *flags) {
47 // Create the most basic valid BPF program:
48 // RET ERR_ALLOWED
49 *flags = NO_FLAGS;
50 return codegen->MakeInstruction(BPF_RET+BPF_K,
51 ErrorCode(ErrorCode::ERR_ALLOWED));
52 }
53
SampleProgramSimpleBranch(CodeGen * codegen,int * flags)54 Instruction *SampleProgramSimpleBranch(CodeGen *codegen, int *flags) {
55 // Create a program with a single branch:
56 // JUMP if eq 42 then $0 else $1
57 // 0: RET EPERM
58 // 1: RET ERR_ALLOWED
59 *flags = NO_FLAGS;
60 return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
61 codegen->MakeInstruction(BPF_RET+BPF_K,
62 ErrorCode(EPERM)),
63 codegen->MakeInstruction(BPF_RET+BPF_K,
64 ErrorCode(ErrorCode::ERR_ALLOWED)));
65 }
66
SampleProgramAtypicalBranch(CodeGen * codegen,int * flags)67 Instruction *SampleProgramAtypicalBranch(CodeGen *codegen, int *flags) {
68 // Create a program with a single branch:
69 // JUMP if eq 42 then $0 else $0
70 // 0: RET ERR_ALLOWED
71
72 // N.B.: As the instructions in both sides of the branch are already
73 // the same object, we do not actually have any "mergeable" branches.
74 // This needs to be reflected in our choice of "flags".
75 *flags = NO_FLAGS;
76
77 Instruction *ret =
78 codegen->MakeInstruction(BPF_RET+BPF_K,
79 ErrorCode(ErrorCode::ERR_ALLOWED));
80 return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, ret, ret);
81 }
82
SampleProgramComplex(CodeGen * codegen,int * flags)83 Instruction *SampleProgramComplex(CodeGen *codegen, int *flags) {
84 // Creates a basic BPF program that we'll use to test some of the code:
85 // JUMP if eq 42 the $0 else $1 (insn6)
86 // 0: LD 23 (insn5)
87 // 1: JUMP if eq 42 then $2 else $4 (insn4)
88 // 2: JUMP to $3 (insn1)
89 // 3: LD 42 (insn0)
90 // RET ErrorCode(42) (insn2)
91 // 4: LD 42 (insn3)
92 // RET ErrorCode(42) (insn3+)
93 *flags = HAS_MERGEABLE_TAILS;
94
95 Instruction *insn0 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42);
96 SANDBOX_ASSERT(insn0);
97 SANDBOX_ASSERT(insn0->code == BPF_LD+BPF_W+BPF_ABS);
98 SANDBOX_ASSERT(insn0->k == 42);
99 SANDBOX_ASSERT(insn0->next == NULL);
100
101 Instruction *insn1 = codegen->MakeInstruction(BPF_JMP+BPF_JA, 0, insn0);
102 SANDBOX_ASSERT(insn1);
103 SANDBOX_ASSERT(insn1->code == BPF_JMP+BPF_JA);
104 SANDBOX_ASSERT(insn1->jt_ptr == insn0);
105
106 Instruction *insn2 = codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42));
107 SANDBOX_ASSERT(insn2);
108 SANDBOX_ASSERT(insn2->code == BPF_RET+BPF_K);
109 SANDBOX_ASSERT(insn2->next == NULL);
110
111 // We explicitly duplicate instructions so that MergeTails() can coalesce
112 // them later.
113 Instruction *insn3 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42,
114 codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42)));
115
116 Instruction *insn4 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
117 insn1, insn3);
118 SANDBOX_ASSERT(insn4);
119 SANDBOX_ASSERT(insn4->code == BPF_JMP+BPF_JEQ+BPF_K);
120 SANDBOX_ASSERT(insn4->k == 42);
121 SANDBOX_ASSERT(insn4->jt_ptr == insn1);
122 SANDBOX_ASSERT(insn4->jf_ptr == insn3);
123
124 codegen->JoinInstructions(insn0, insn2);
125 SANDBOX_ASSERT(insn0->next == insn2);
126
127 Instruction *insn5 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
128 23, insn4);
129 SANDBOX_ASSERT(insn5);
130 SANDBOX_ASSERT(insn5->code == BPF_LD+BPF_W+BPF_ABS);
131 SANDBOX_ASSERT(insn5->k == 23);
132 SANDBOX_ASSERT(insn5->next == insn4);
133
134 // Force a basic block that ends in neither a jump instruction nor a return
135 // instruction. It only contains "insn5". This exercises one of the less
136 // common code paths in the topo-sort algorithm.
137 // This also gives us a diamond-shaped pattern in our graph, which stresses
138 // another aspect of the topo-sort algorithm (namely, the ability to
139 // correctly count the incoming branches for subtrees that are not disjunct).
140 Instruction *insn6 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
141 insn5, insn4);
142
143 return insn6;
144 }
145
ForAllPrograms(void (* test)(CodeGenUnittestHelper *,Instruction *,int))146 void ForAllPrograms(void (*test)(CodeGenUnittestHelper *, Instruction *, int)){
147 Instruction *(*function_table[])(CodeGen *codegen, int *flags) = {
148 SampleProgramOneInstruction,
149 SampleProgramSimpleBranch,
150 SampleProgramAtypicalBranch,
151 SampleProgramComplex,
152 };
153
154 for (size_t i = 0; i < arraysize(function_table); ++i) {
155 CodeGenUnittestHelper codegen;
156 int flags = NO_FLAGS;
157 Instruction *prg = function_table[i](&codegen, &flags);
158 test(&codegen, prg, flags);
159 }
160 }
161
MakeInstruction(CodeGenUnittestHelper * codegen,Instruction * program,int)162 void MakeInstruction(CodeGenUnittestHelper *codegen,
163 Instruction *program, int) {
164 // Nothing to do here
165 }
166
SANDBOX_TEST(CodeGen,MakeInstruction)167 SANDBOX_TEST(CodeGen, MakeInstruction) {
168 ForAllPrograms(MakeInstruction);
169 }
170
FindBranchTargets(CodeGenUnittestHelper * codegen,Instruction * prg,int)171 void FindBranchTargets(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
172 BranchTargets branch_targets;
173 codegen->FindBranchTargets(*prg, &branch_targets);
174
175 // Verifying the general properties that should be true for every
176 // well-formed BPF program.
177 // Perform a depth-first traversal of the BPF program an verify that all
178 // targets of BPF_JMP instructions are represented in the "branch_targets".
179 // At the same time, compute a set of both the branch targets and all the
180 // instructions in the program.
181 std::vector<Instruction *> stack;
182 std::set<Instruction *> all_instructions;
183 std::set<Instruction *> target_instructions;
184 BranchTargets::const_iterator end = branch_targets.end();
185 for (Instruction *insn = prg;;) {
186 all_instructions.insert(insn);
187 if (BPF_CLASS(insn->code) == BPF_JMP) {
188 target_instructions.insert(insn->jt_ptr);
189 SANDBOX_ASSERT(insn->jt_ptr != NULL);
190 SANDBOX_ASSERT(branch_targets.find(insn->jt_ptr) != end);
191 if (BPF_OP(insn->code) != BPF_JA) {
192 target_instructions.insert(insn->jf_ptr);
193 SANDBOX_ASSERT(insn->jf_ptr != NULL);
194 SANDBOX_ASSERT(branch_targets.find(insn->jf_ptr) != end);
195 stack.push_back(insn->jf_ptr);
196 }
197 insn = insn->jt_ptr;
198 } else if (BPF_CLASS(insn->code) == BPF_RET) {
199 SANDBOX_ASSERT(insn->next == NULL);
200 if (stack.empty()) {
201 break;
202 }
203 insn = stack.back();
204 stack.pop_back();
205 } else {
206 SANDBOX_ASSERT(insn->next != NULL);
207 insn = insn->next;
208 }
209 }
210 SANDBOX_ASSERT(target_instructions.size() == branch_targets.size());
211
212 // We can now subtract the set of the branch targets from the set of all
213 // instructions. This gives us a set with the instructions that nobody
214 // ever jumps to. Verify that they are no included in the
215 // "branch_targets" that FindBranchTargets() computed for us.
216 Instructions non_target_instructions(all_instructions.size() -
217 target_instructions.size());
218 set_difference(all_instructions.begin(), all_instructions.end(),
219 target_instructions.begin(), target_instructions.end(),
220 non_target_instructions.begin());
221 for (Instructions::const_iterator iter = non_target_instructions.begin();
222 iter != non_target_instructions.end();
223 ++iter) {
224 SANDBOX_ASSERT(branch_targets.find(*iter) == end);
225 }
226 }
227
SANDBOX_TEST(CodeGen,FindBranchTargets)228 SANDBOX_TEST(CodeGen, FindBranchTargets) {
229 ForAllPrograms(FindBranchTargets);
230 }
231
CutGraphIntoBasicBlocks(CodeGenUnittestHelper * codegen,Instruction * prg,int)232 void CutGraphIntoBasicBlocks(CodeGenUnittestHelper *codegen,
233 Instruction *prg, int) {
234 BranchTargets branch_targets;
235 codegen->FindBranchTargets(*prg, &branch_targets);
236 TargetsToBlocks all_blocks;
237 BasicBlock *first_block =
238 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
239 SANDBOX_ASSERT(first_block != NULL);
240 SANDBOX_ASSERT(first_block->instructions.size() > 0);
241 Instruction *first_insn = first_block->instructions[0];
242
243 // Basic blocks are supposed to start with a branch target and end with
244 // either a jump or a return instruction. It can also end, if the next
245 // instruction forms the beginning of a new basic block. There should be
246 // no other jumps or return instructions in the middle of a basic block.
247 for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin();
248 bb_iter != all_blocks.end();
249 ++bb_iter) {
250 BasicBlock *bb = bb_iter->second;
251 SANDBOX_ASSERT(bb != NULL);
252 SANDBOX_ASSERT(bb->instructions.size() > 0);
253 Instruction *insn = bb->instructions[0];
254 SANDBOX_ASSERT(insn == first_insn ||
255 branch_targets.find(insn) != branch_targets.end());
256 for (Instructions::const_iterator insn_iter = bb->instructions.begin();;){
257 insn = *insn_iter;
258 if (++insn_iter != bb->instructions.end()) {
259 SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_JMP);
260 SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_RET);
261 } else {
262 SANDBOX_ASSERT(BPF_CLASS(insn->code) == BPF_JMP ||
263 BPF_CLASS(insn->code) == BPF_RET ||
264 branch_targets.find(insn->next) !=
265 branch_targets.end());
266 break;
267 }
268 SANDBOX_ASSERT(branch_targets.find(*insn_iter) == branch_targets.end());
269 }
270 }
271 }
272
SANDBOX_TEST(CodeGen,CutGraphIntoBasicBlocks)273 SANDBOX_TEST(CodeGen, CutGraphIntoBasicBlocks) {
274 ForAllPrograms(CutGraphIntoBasicBlocks);
275 }
276
MergeTails(CodeGenUnittestHelper * codegen,Instruction * prg,int flags)277 void MergeTails(CodeGenUnittestHelper *codegen, Instruction *prg,
278 int flags) {
279 BranchTargets branch_targets;
280 codegen->FindBranchTargets(*prg, &branch_targets);
281 TargetsToBlocks all_blocks;
282 BasicBlock *first_block =
283 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
284
285 // The shape of our graph and thus the function of our program should
286 // still be unchanged after we run MergeTails(). We verify this by
287 // serializing the graph and verifying that it is still the same.
288 // We also verify that at least some of the edges changed because of
289 // tail merging.
290 std::string graph[2];
291 std::string edges[2];
292
293 // The loop executes twice. After the first run, we call MergeTails() on
294 // our graph.
295 for (int i = 0;;) {
296 // Traverse the entire program in depth-first order.
297 std::vector<BasicBlock *> stack;
298 for (BasicBlock *bb = first_block;;) {
299 // Serialize the instructions in this basic block. In general, we only
300 // need to serialize "code" and "k"; except for a BPF_JA instruction
301 // where "k" isn't set.
302 // The stream of instructions should be unchanged after MergeTails().
303 for (Instructions::const_iterator iter = bb->instructions.begin();
304 iter != bb->instructions.end();
305 ++iter) {
306 graph[i].append(reinterpret_cast<char *>(&(*iter)->code),
307 sizeof((*iter)->code));
308 if (BPF_CLASS((*iter)->code) != BPF_JMP ||
309 BPF_OP((*iter)->code) != BPF_JA) {
310 graph[i].append(reinterpret_cast<char *>(&(*iter)->k),
311 sizeof((*iter)->k));
312 }
313 }
314
315 // Also serialize the addresses the basic blocks as we encounter them.
316 // This will change as basic blocks are coalesed by MergeTails().
317 edges[i].append(reinterpret_cast<char *>(&bb), sizeof(bb));
318
319 // Depth-first traversal of the graph. We only ever need to look at the
320 // very last instruction in the basic block, as that is the only one that
321 // can change code flow.
322 Instruction *insn = bb->instructions.back();
323 if (BPF_CLASS(insn->code) == BPF_JMP) {
324 // For jump instructions, we need to remember the "false" branch while
325 // traversing the "true" branch. This is not necessary for BPF_JA which
326 // only has a single branch.
327 if (BPF_OP(insn->code) != BPF_JA) {
328 stack.push_back(all_blocks[insn->jf_ptr]);
329 }
330 bb = all_blocks[insn->jt_ptr];
331 } else if (BPF_CLASS(insn->code) == BPF_RET) {
332 // After a BPF_RET, see if we need to back track.
333 if (stack.empty()) {
334 break;
335 }
336 bb = stack.back();
337 stack.pop_back();
338 } else {
339 // For "normal" instructions, just follow to the next basic block.
340 bb = all_blocks[insn->next];
341 }
342 }
343
344 // Our loop runs exactly two times.
345 if (++i > 1) {
346 break;
347 }
348 codegen->MergeTails(&all_blocks);
349 }
350 SANDBOX_ASSERT(graph[0] == graph[1]);
351 if (flags & HAS_MERGEABLE_TAILS) {
352 SANDBOX_ASSERT(edges[0] != edges[1]);
353 } else {
354 SANDBOX_ASSERT(edges[0] == edges[1]);
355 }
356 }
357
SANDBOX_TEST(CodeGen,MergeTails)358 SANDBOX_TEST(CodeGen, MergeTails) {
359 ForAllPrograms(MergeTails);
360 }
361
CompileAndCompare(CodeGenUnittestHelper * codegen,Instruction * prg,int)362 void CompileAndCompare(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
363 // TopoSortBasicBlocks() has internal checks that cause it to fail, if it
364 // detects a problem. Typically, if anything goes wrong, this looks to the
365 // TopoSort algorithm as if there had been cycles in the input data.
366 // This provides a pretty good unittest.
367 // We hand-crafted the program returned by SampleProgram() to exercise
368 // several of the more interesting code-paths. See comments in
369 // SampleProgram() for details.
370 // In addition to relying on the internal consistency checks in the compiler,
371 // we also serialize the graph and the resulting BPF program and compare
372 // them. With the exception of BPF_JA instructions that might have been
373 // inserted, both instruction streams should be equivalent.
374 // As Compile() modifies the instructions, we have to serialize the graph
375 // before calling Compile().
376 std::string source;
377 Instructions source_stack;
378 for (const Instruction *insn = prg, *next; insn; insn = next) {
379 if (BPF_CLASS(insn->code) == BPF_JMP) {
380 if (BPF_OP(insn->code) == BPF_JA) {
381 // Do not serialize BPF_JA instructions (see above).
382 next = insn->jt_ptr;
383 continue;
384 } else {
385 source_stack.push_back(insn->jf_ptr);
386 next = insn->jt_ptr;
387 }
388 } else if (BPF_CLASS(insn->code) == BPF_RET) {
389 if (source_stack.empty()) {
390 next = NULL;
391 } else {
392 next = source_stack.back();
393 source_stack.pop_back();
394 }
395 } else {
396 next = insn->next;
397 }
398 // Only serialize "code" and "k". That's all the information we need to
399 // compare. The rest of the information is encoded in the order of
400 // instructions.
401 source.append(reinterpret_cast<const char *>(&insn->code),
402 sizeof(insn->code));
403 source.append(reinterpret_cast<const char *>(&insn->k),
404 sizeof(insn->k));
405 }
406
407 // Compile the program
408 SandboxUnittestHelper::Program bpf;
409 codegen->Compile(prg, &bpf);
410
411 // Serialize the resulting BPF instructions.
412 std::string assembly;
413 std::vector<int> assembly_stack;
414 for (int idx = 0; idx >= 0;) {
415 SANDBOX_ASSERT(idx < (int)bpf.size());
416 struct sock_filter& insn = bpf[idx];
417 if (BPF_CLASS(insn.code) == BPF_JMP) {
418 if (BPF_OP(insn.code) == BPF_JA) {
419 // Do not serialize BPF_JA instructions (see above).
420 idx += insn.k + 1;
421 continue;
422 } else {
423 assembly_stack.push_back(idx + insn.jf + 1);
424 idx += insn.jt + 1;
425 }
426 } else if (BPF_CLASS(insn.code) == BPF_RET) {
427 if (assembly_stack.empty()) {
428 idx = -1;
429 } else {
430 idx = assembly_stack.back();
431 assembly_stack.pop_back();
432 }
433 } else {
434 ++idx;
435 }
436 // Serialize the same information that we serialized before compilation.
437 assembly.append(reinterpret_cast<char *>(&insn.code), sizeof(insn.code));
438 assembly.append(reinterpret_cast<char *>(&insn.k), sizeof(insn.k));
439 }
440 SANDBOX_ASSERT(source == assembly);
441 }
442
SANDBOX_TEST(CodeGen,All)443 SANDBOX_TEST(CodeGen, All) {
444 ForAllPrograms(CompileAndCompare);
445 }
446
447 } // namespace sandbox
448