• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Code Intelligence GmbH
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "sanitizer_hooks_with_pc.h"
16 
17 #include <cstddef>
18 #include <cstdint>
19 
20 // libFuzzer's compare hooks obtain the caller's address from the compiler
21 // builtin __builtin_return_adress. Since Java code will invoke the hooks always
22 // from the same native function, this builtin would always return the same
23 // value. Internally, the libFuzzer hooks call through to the always inlined
24 // HandleCmp and thus can't be mimicked without patching libFuzzer.
25 //
26 // We solve this problem via an inline assembly trampoline construction that
27 // translates a runtime argument `fake_pc` in the range [0, 512) into a call to
28 // a hook with a fake return address whose lower 9 bits are `fake_pc` up to a
29 // constant shift. This is achieved by pushing a return address pointing into
30 // 512 ret instructions at offset `fake_pc` onto the stack and then jumping
31 // directly to the address of the hook.
32 //
33 // Note: We only set the lowest 9 bits of the return address since only these
34 // bits are used by the libFuzzer value profiling mode for integer compares, see
35 // https://github.com/llvm/llvm-project/blob/704d92607d26e696daba596b72cb70effe79a872/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp#L390
36 // as well as
37 // https://github.com/llvm/llvm-project/blob/704d92607d26e696daba596b72cb70effe79a872/compiler-rt/lib/fuzzer/FuzzerValueBitMap.h#L34
38 // ValueProfileMap.AddValue() truncates its argument to 16 bits and shifts the
39 // PC to the left by log_2(128)=7, which means that only the lowest 16 - 7 bits
40 // of the return address matter. String compare hooks use the lowest 12 bits,
41 // but take the return address as an argument and thus don't require the
42 // indirection through a trampoline.
43 
44 #define REPEAT_8(a) a a a a a a a a
45 
46 #define REPEAT_512(a) REPEAT_8(REPEAT_8(REPEAT_8(a)))
47 
48 // The first four registers to pass arguments in according to the
49 // platform-specific x64 calling convention.
50 #ifdef _WIN64
51 #define REG_1 "rcx"
52 #define REG_2 "rdx"
53 #define REG_3 "r8"
54 #define REG_4 "r9"
55 #else
56 #define REG_1 "rdi"
57 #define REG_2 "rsi"
58 #define REG_3 "rdx"
59 #define REG_4 "rcx"
60 #endif
61 
62 // Call the function at address `func` with arguments `arg1` and `arg2` while
63 // ensuring that the return address is `fake_pc` up to a globally constant
64 // offset.
trampoline(uint64_t arg1,uint64_t arg2,void * func,uint16_t fake_pc)65 __attribute__((noinline)) void trampoline(uint64_t arg1, uint64_t arg2,
66                                           void *func, uint16_t fake_pc) {
67   // arg1 and arg2 have to be forwarded according to the x64 calling convention.
68   // We also fix func and fake_pc to their registers so that we can safely use
69   // rax below.
70   [[maybe_unused]] register uint64_t arg1_loc asm(REG_1) = arg1;
71   [[maybe_unused]] register uint64_t arg2_loc asm(REG_2) = arg2;
72   [[maybe_unused]] register void *func_loc asm(REG_3) = func;
73   [[maybe_unused]] register uint64_t fake_pc_loc asm(REG_4) = fake_pc;
74   asm volatile goto(
75       // Load RIP-relative address of the end of this function.
76       "lea %l[end_of_function](%%rip), %%rax \n\t"
77       "push %%rax \n\t"
78       // Load RIP-relative address of the ret sled into rax.
79       "lea ret_sled(%%rip), %%rax \n\t"
80       // Add the offset of the fake_pc-th ret.
81       "add %[fake_pc], %%rax \n\t"
82       // Push the fake return address pointing to that ret. The hook will return
83       // to it and then immediately return to the end of this function.
84       "push %%rax \n\t"
85       // Call func with the fake return address on the stack.
86       // Function arguments arg1 and arg2 are passed unchanged in the registers
87       // RDI and RSI as governed by the x64 calling convention.
88       "jmp *%[func] \n\t"
89       // Append a sled of 2^9=512 ret instructions.
90       "ret_sled: \n\t" REPEAT_512("ret \n\t")
91       :
92       : "r"(arg1_loc),
93         "r"(arg2_loc), [func] "r"(func_loc), [fake_pc] "r"(fake_pc_loc)
94       : "memory"
95       : end_of_function);
96 
97 end_of_function:
98   return;
99 }
100 
101 namespace {
102 uintptr_t trampoline_offset = 0;
103 }
104 
set_trampoline_offset()105 void set_trampoline_offset() {
106   // Stores the additive inverse of the current return address modulo 0x200u in
107   // trampoline_offset.
108   trampoline_offset =
109       0x200u -
110       (reinterpret_cast<uintptr_t>(__builtin_return_address(0)) & 0x1FFu);
111 }
112 
113 // Computes the additive shift that needs to be applied to the caller PC by
114 // caller_pc_to_fake_pc to make caller PC and resulting fake return address
115 // in their lowest 9 bite. This offset is constant for each binary, but may vary
116 // based on code generation specifics. By calibrating the trampoline, the fuzzer
117 // behavior is fully determined by the seed.
CalibrateTrampoline()118 void CalibrateTrampoline() {
119   trampoline(0, 0, reinterpret_cast<void *>(&set_trampoline_offset), 0);
120 }
121 
122 // Masks any address down to its lower 9 bits, adjusting for the trampoline
123 // shift.
caller_pc_to_fake_pc(const void * caller_pc)124 __attribute__((always_inline)) inline uint16_t caller_pc_to_fake_pc(
125     const void *caller_pc) {
126   return (reinterpret_cast<uintptr_t>(caller_pc) + trampoline_offset) & 0x1FFu;
127 }
128 
129 // The original hooks exposed by libFuzzer. All of these get the caller's
130 // address via __builtin_return_address(0).
131 extern "C" {
132 void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2);
133 void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2);
134 void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases);
135 void __sanitizer_cov_trace_div4(uint32_t val);
136 void __sanitizer_cov_trace_div8(uint64_t val);
137 void __sanitizer_cov_trace_gep(uintptr_t idx);
138 void __sanitizer_cov_trace_pc_indir(uintptr_t callee);
139 }
__sanitizer_cov_trace_cmp4_with_pc(void * caller_pc,uint32_t arg1,uint32_t arg2)140 void __sanitizer_cov_trace_cmp4_with_pc(void *caller_pc, uint32_t arg1,
141                                         uint32_t arg2) {
142   void *trace_cmp4 = reinterpret_cast<void *>(&__sanitizer_cov_trace_cmp4);
143   auto fake_pc = caller_pc_to_fake_pc(caller_pc);
144   trampoline(static_cast<uint64_t>(arg1), static_cast<uint64_t>(arg2),
145              trace_cmp4, fake_pc);
146 }
147 
__sanitizer_cov_trace_cmp8_with_pc(void * caller_pc,uint64_t arg1,uint64_t arg2)148 void __sanitizer_cov_trace_cmp8_with_pc(void *caller_pc, uint64_t arg1,
149                                         uint64_t arg2) {
150   void *trace_cmp8 = reinterpret_cast<void *>(&__sanitizer_cov_trace_cmp8);
151   auto fake_pc = caller_pc_to_fake_pc(caller_pc);
152   trampoline(static_cast<uint64_t>(arg1), static_cast<uint64_t>(arg2),
153              trace_cmp8, fake_pc);
154 }
155 
__sanitizer_cov_trace_switch_with_pc(void * caller_pc,uint64_t val,uint64_t * cases)156 void __sanitizer_cov_trace_switch_with_pc(void *caller_pc, uint64_t val,
157                                           uint64_t *cases) {
158   void *trace_switch = reinterpret_cast<void *>(&__sanitizer_cov_trace_switch);
159   auto fake_pc = caller_pc_to_fake_pc(caller_pc);
160   trampoline(static_cast<uint64_t>(val), reinterpret_cast<uint64_t>(cases),
161              trace_switch, fake_pc);
162 }
163 
__sanitizer_cov_trace_div4_with_pc(void * caller_pc,uint32_t val)164 void __sanitizer_cov_trace_div4_with_pc(void *caller_pc, uint32_t val) {
165   void *trace_div4 = reinterpret_cast<void *>(&__sanitizer_cov_trace_div4);
166   auto fake_pc = caller_pc_to_fake_pc(caller_pc);
167   trampoline(static_cast<uint64_t>(val), 0, trace_div4, fake_pc);
168 }
169 
__sanitizer_cov_trace_div8_with_pc(void * caller_pc,uint64_t val)170 void __sanitizer_cov_trace_div8_with_pc(void *caller_pc, uint64_t val) {
171   void *trace_div8 = reinterpret_cast<void *>(&__sanitizer_cov_trace_div8);
172   auto fake_pc = caller_pc_to_fake_pc(caller_pc);
173   trampoline(static_cast<uint64_t>(val), 0, trace_div8, fake_pc);
174 }
175 
__sanitizer_cov_trace_gep_with_pc(void * caller_pc,uintptr_t idx)176 void __sanitizer_cov_trace_gep_with_pc(void *caller_pc, uintptr_t idx) {
177   void *trace_gep = reinterpret_cast<void *>(&__sanitizer_cov_trace_gep);
178   auto fake_pc = caller_pc_to_fake_pc(caller_pc);
179   trampoline(static_cast<uint64_t>(idx), 0, trace_gep, fake_pc);
180 }
181 
__sanitizer_cov_trace_pc_indir_with_pc(void * caller_pc,uintptr_t callee)182 void __sanitizer_cov_trace_pc_indir_with_pc(void *caller_pc, uintptr_t callee) {
183   void *trace_pc_indir =
184       reinterpret_cast<void *>(&__sanitizer_cov_trace_pc_indir);
185   auto fake_pc = caller_pc_to_fake_pc(caller_pc);
186   trampoline(static_cast<uint64_t>(callee), 0, trace_pc_indir, fake_pc);
187 }
188