• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "sandbox/linux/bpf_dsl/codegen.h"
6 
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <map>
11 #include <utility>
12 #include <vector>
13 
14 #include "base/macros.h"
15 #include "base/md5.h"
16 #include "base/strings/string_piece.h"
17 #include "sandbox/linux/system_headers/linux_filter.h"
18 #include "testing/gtest/include/gtest/gtest.h"
19 
20 namespace sandbox {
21 namespace {
22 
23 // Hash provides an abstraction for building "hash trees" from BPF
24 // control flow graphs, and efficiently identifying equivalent graphs.
25 //
26 // For simplicity, we use MD5, because base happens to provide a
27 // convenient API for its use. However, any collision-resistant hash
28 // should suffice.
29 class Hash {
30  public:
31   static const Hash kZero;
32 
Hash()33   Hash() : digest_() {}
34 
Hash(uint16_t code,uint32_t k,const Hash & jt=kZero,const Hash & jf=kZero)35   Hash(uint16_t code,
36        uint32_t k,
37        const Hash& jt = kZero,
38        const Hash& jf = kZero)
39       : digest_() {
40     base::MD5Context ctx;
41     base::MD5Init(&ctx);
42     HashValue(&ctx, code);
43     HashValue(&ctx, k);
44     HashValue(&ctx, jt);
45     HashValue(&ctx, jf);
46     base::MD5Final(&digest_, &ctx);
47   }
48 
49   Hash(const Hash& hash) = default;
50   Hash& operator=(const Hash& rhs) = default;
51 
operator ==(const Hash & lhs,const Hash & rhs)52   friend bool operator==(const Hash& lhs, const Hash& rhs) {
53     return lhs.Base16() == rhs.Base16();
54   }
operator !=(const Hash & lhs,const Hash & rhs)55   friend bool operator!=(const Hash& lhs, const Hash& rhs) {
56     return !(lhs == rhs);
57   }
58 
59  private:
60   template <typename T>
HashValue(base::MD5Context * ctx,const T & value)61   void HashValue(base::MD5Context* ctx, const T& value) {
62     base::MD5Update(ctx,
63                     base::StringPiece(reinterpret_cast<const char*>(&value),
64                                       sizeof(value)));
65   }
66 
Base16() const67   std::string Base16() const {
68     return base::MD5DigestToBase16(digest_);
69   }
70 
71   base::MD5Digest digest_;
72 };
73 
74 const Hash Hash::kZero;
75 
76 // Sanity check that equality and inequality work on Hash as required.
TEST(CodeGen,HashSanity)77 TEST(CodeGen, HashSanity) {
78   std::vector<Hash> hashes;
79 
80   // Push a bunch of logically distinct hashes.
81   hashes.push_back(Hash::kZero);
82   for (int i = 0; i < 4; ++i) {
83     hashes.push_back(Hash(i & 1, i & 2));
84   }
85   for (int i = 0; i < 16; ++i) {
86     hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8)));
87   }
88   for (int i = 0; i < 64; ++i) {
89     hashes.push_back(
90         Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32)));
91   }
92 
93   for (const Hash& a : hashes) {
94     for (const Hash& b : hashes) {
95       // Hashes should equal themselves, but not equal all others.
96       if (&a == &b) {
97         EXPECT_EQ(a, b);
98       } else {
99         EXPECT_NE(a, b);
100       }
101     }
102   }
103 }
104 
105 // ProgramTest provides a fixture for writing compiling sample
106 // programs with CodeGen and verifying the linearized output matches
107 // the input DAG.
108 class ProgramTest : public ::testing::Test {
109  protected:
ProgramTest()110   ProgramTest() : gen_(), node_hashes_() {}
111 
112   // MakeInstruction calls CodeGen::MakeInstruction() and associated
113   // the returned address with a hash of the instruction.
MakeInstruction(uint16_t code,uint32_t k,CodeGen::Node jt=CodeGen::kNullNode,CodeGen::Node jf=CodeGen::kNullNode)114   CodeGen::Node MakeInstruction(uint16_t code,
115                                 uint32_t k,
116                                 CodeGen::Node jt = CodeGen::kNullNode,
117                                 CodeGen::Node jf = CodeGen::kNullNode) {
118     CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf);
119     EXPECT_NE(CodeGen::kNullNode, res);
120 
121     Hash digest(code, k, Lookup(jt), Lookup(jf));
122     auto it = node_hashes_.insert(std::make_pair(res, digest));
123     EXPECT_EQ(digest, it.first->second);
124 
125     return res;
126   }
127 
128   // RunTest compiles the program and verifies that the output matches
129   // what is expected.  It should be called at the end of each program
130   // test case.
RunTest(CodeGen::Node head)131   void RunTest(CodeGen::Node head) {
132     // Compile the program
133     CodeGen::Program program = gen_.Compile(head);
134 
135     // Walk the program backwards, and compute the hash for each instruction.
136     std::vector<Hash> prog_hashes(program.size());
137     for (size_t i = program.size(); i > 0; --i) {
138       const sock_filter& insn = program.at(i - 1);
139       Hash& hash = prog_hashes.at(i - 1);
140 
141       if (BPF_CLASS(insn.code) == BPF_JMP) {
142         if (BPF_OP(insn.code) == BPF_JA) {
143           // The compiler adds JA instructions as needed, so skip them.
144           hash = prog_hashes.at(i + insn.k);
145         } else {
146           hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt),
147                       prog_hashes.at(i + insn.jf));
148         }
149       } else if (BPF_CLASS(insn.code) == BPF_RET) {
150         hash = Hash(insn.code, insn.k);
151       } else {
152         hash = Hash(insn.code, insn.k, prog_hashes.at(i));
153       }
154     }
155 
156     EXPECT_EQ(Lookup(head), prog_hashes.at(0));
157   }
158 
159  private:
Lookup(CodeGen::Node next) const160   const Hash& Lookup(CodeGen::Node next) const {
161     if (next == CodeGen::kNullNode) {
162       return Hash::kZero;
163     }
164     auto it = node_hashes_.find(next);
165     if (it == node_hashes_.end()) {
166       ADD_FAILURE() << "No hash found for node " << next;
167       return Hash::kZero;
168     }
169     return it->second;
170   }
171 
172   CodeGen gen_;
173   std::map<CodeGen::Node, Hash> node_hashes_;
174 
175   DISALLOW_COPY_AND_ASSIGN(ProgramTest);
176 };
177 
TEST_F(ProgramTest,OneInstruction)178 TEST_F(ProgramTest, OneInstruction) {
179   // Create the most basic valid BPF program:
180   //    RET 0
181   CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0);
182   RunTest(head);
183 }
184 
TEST_F(ProgramTest,SimpleBranch)185 TEST_F(ProgramTest, SimpleBranch) {
186   // Create a program with a single branch:
187   //    JUMP if eq 42 then $0 else $1
188   // 0: RET 1
189   // 1: RET 0
190   CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42,
191                                        MakeInstruction(BPF_RET + BPF_K, 1),
192                                        MakeInstruction(BPF_RET + BPF_K, 0));
193   RunTest(head);
194 }
195 
TEST_F(ProgramTest,AtypicalBranch)196 TEST_F(ProgramTest, AtypicalBranch) {
197   // Create a program with a single branch:
198   //    JUMP if eq 42 then $0 else $0
199   // 0: RET 0
200 
201   CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0);
202   CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret);
203 
204   // N.B.: As the instructions in both sides of the branch are already
205   //       the same object, we do not actually have any "mergeable" branches.
206   //       This needs to be reflected in our choice of "flags".
207   RunTest(head);
208 }
209 
TEST_F(ProgramTest,Complex)210 TEST_F(ProgramTest, Complex) {
211   // Creates a basic BPF program that we'll use to test some of the code:
212   //    JUMP if eq 42 the $0 else $1     (insn6)
213   // 0: LD 23                            (insn5)
214   // 1: JUMP if eq 42 then $2 else $4    (insn4)
215   // 2: JUMP to $3                       (insn2)
216   // 3: LD 42                            (insn1)
217   //    RET 42                           (insn0)
218   // 4: LD 42                            (insn3)
219   //    RET 42                           (insn3+)
220   CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42);
221   CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0);
222   CodeGen::Node insn2 = insn1;  // Implicit JUMP
223 
224   // We explicitly duplicate instructions to test that they're merged.
225   CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42,
226                                         MakeInstruction(BPF_RET + BPF_K, 42));
227   EXPECT_EQ(insn2, insn3);
228 
229   CodeGen::Node insn4 =
230       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3);
231   CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4);
232 
233   // Force a basic block that ends in neither a jump instruction nor a return
234   // instruction. It only contains "insn5". This exercises one of the less
235   // common code paths in the topo-sort algorithm.
236   // This also gives us a diamond-shaped pattern in our graph, which stresses
237   // another aspect of the topo-sort algorithm (namely, the ability to
238   // correctly count the incoming branches for subtrees that are not disjunct).
239   CodeGen::Node insn6 =
240       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4);
241 
242   RunTest(insn6);
243 }
244 
TEST_F(ProgramTest,ConfusingTails)245 TEST_F(ProgramTest, ConfusingTails) {
246   // This simple program demonstrates https://crbug.com/351103/
247   // The two "LOAD 0" instructions are blocks of their own. MergeTails() could
248   // be tempted to merge them since they are the same. However, they are
249   // not mergeable because they fall-through to non semantically equivalent
250   // blocks.
251   // Without the fix for this bug, this program should trigger the check in
252   // CompileAndCompare: the serialized graphs from the program and its compiled
253   // version will differ.
254   //
255   //  0) LOAD 1  // ???
256   //  1) if A == 0x1; then JMP 2 else JMP 3
257   //  2) LOAD 0  // System call number
258   //  3) if A == 0x2; then JMP 4 else JMP 5
259   //  4) LOAD 0  // System call number
260   //  5) if A == 0x1; then JMP 6 else JMP 7
261   //  6) RET 0
262   //  7) RET 1
263 
264   CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
265   CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
266   CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
267   CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
268   CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
269   CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
270   CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
271   CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
272 
273   RunTest(i0);
274 }
275 
TEST_F(ProgramTest,ConfusingTailsBasic)276 TEST_F(ProgramTest, ConfusingTailsBasic) {
277   // Without the fix for https://crbug.com/351103/, (see
278   // SampleProgramConfusingTails()), this would generate a cyclic graph and
279   // crash as the two "LOAD 0" instructions would get merged.
280   //
281   // 0) LOAD 1  // ???
282   // 1) if A == 0x1; then JMP 2 else JMP 3
283   // 2) LOAD 0  // System call number
284   // 3) if A == 0x2; then JMP 4 else JMP 5
285   // 4) LOAD 0  // System call number
286   // 5) RET 1
287 
288   CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1);
289   CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
290   CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
291   CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
292   CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
293   CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
294 
295   RunTest(i0);
296 }
297 
TEST_F(ProgramTest,ConfusingTailsMergeable)298 TEST_F(ProgramTest, ConfusingTailsMergeable) {
299   // This is similar to SampleProgramConfusingTails(), except that
300   // instructions 2 and 4 are now RET instructions.
301   // In PointerCompare(), this exercises the path where two blocks are of the
302   // same length and identical and the last instruction is a JMP or RET, so the
303   // following blocks don't need to be looked at and the blocks are mergeable.
304   //
305   // 0) LOAD 1  // ???
306   // 1) if A == 0x1; then JMP 2 else JMP 3
307   // 2) RET 42
308   // 3) if A == 0x2; then JMP 4 else JMP 5
309   // 4) RET 42
310   // 5) if A == 0x1; then JMP 6 else JMP 7
311   // 6) RET 0
312   // 7) RET 1
313 
314   CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
315   CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
316   CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
317   CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42);
318   CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
319   CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42);
320   CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
321   CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
322 
323   RunTest(i0);
324 }
325 
TEST_F(ProgramTest,InstructionFolding)326 TEST_F(ProgramTest, InstructionFolding) {
327   // Check that simple instructions are folded as expected.
328   CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0);
329   EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
330   CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1);
331   EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
332   EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
333   EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
334 
335   // Check that complex sequences are folded too.
336   CodeGen::Node c =
337       MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0,
338                       MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b));
339   EXPECT_EQ(c, MakeInstruction(
340                    BPF_LD + BPF_W + BPF_ABS, 0,
341                    MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)));
342 
343   RunTest(c);
344 }
345 
TEST_F(ProgramTest,FarBranches)346 TEST_F(ProgramTest, FarBranches) {
347   // BPF instructions use 8-bit fields for branch offsets, which means
348   // branch targets must be within 255 instructions of the branch
349   // instruction. CodeGen abstracts away this detail by inserting jump
350   // instructions as needed, which we test here by generating programs
351   // that should trigger any interesting boundary conditions.
352 
353   // Populate with 260 initial instruction nodes.
354   std::vector<CodeGen::Node> nodes;
355   nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
356   for (size_t i = 1; i < 260; ++i) {
357     nodes.push_back(
358         MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
359   }
360 
361   // Exhaustively test branch offsets near BPF's limits.
362   for (size_t jt = 250; jt < 260; ++jt) {
363     for (size_t jf = 250; jf < 260; ++jf) {
364       nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0,
365                                       nodes.rbegin()[jt], nodes.rbegin()[jf]));
366       RunTest(nodes.back());
367     }
368   }
369 }
370 
TEST_F(ProgramTest,JumpReuse)371 TEST_F(ProgramTest, JumpReuse) {
372   // As a code size optimization, we try to reuse jumps when possible
373   // instead of emitting new ones. Here we make sure that optimization
374   // is working as intended.
375   //
376   // NOTE: To simplify testing, we rely on implementation details
377   // about what CodeGen::Node values indicate (i.e., vector indices),
378   // but CodeGen users should treat them as opaque values.
379 
380   // Populate with 260 initial instruction nodes.
381   std::vector<CodeGen::Node> nodes;
382   nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
383   for (size_t i = 1; i < 260; ++i) {
384     nodes.push_back(
385         MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
386   }
387 
388   // Branching to nodes[0] and nodes[1] should require 3 new
389   // instructions: two far jumps plus the branch itself.
390   CodeGen::Node one =
391       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]);
392   EXPECT_EQ(nodes.back() + 3, one);  // XXX: Implementation detail!
393   RunTest(one);
394 
395   // Branching again to the same target nodes should require only one
396   // new instruction, as we can reuse the previous branch's jumps.
397   CodeGen::Node two =
398       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]);
399   EXPECT_EQ(one + 1, two);  // XXX: Implementation detail!
400   RunTest(two);
401 }
402 
403 }  // namespace
404 }  // namespace sandbox
405