1 // Copyright 2022 Code Intelligence GmbH
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 /*
16  * Dynamically exported definitions of fuzzer hooks and libc functions that
17  * forward to the symbols provided by the jazzer_driver JNI library once it has
18  * been loaded.
19  */
20 
21 #define _GNU_SOURCE  // for RTLD_NEXT
22 #include <dlfcn.h>
23 #include <stdatomic.h>
24 #include <stddef.h>
25 #include <stdint.h>
26 #ifdef __APPLE__
27 // Using dyld's interpose feature requires knowing the addresses of libc
28 // functions.
29 #include <string.h>
30 #endif
31 
32 #if defined(__APPLE__) && defined(__arm64__)
33 // arm64 has a fixed instruction length of 32 bits, which means that the lowest
34 // two bits of the return address of a function are always zero. Since
35 // libFuzzer's value profiling uses the lowest bits of the address to index into
36 // a hash table, we increase their entropy by shifting away the constant bits.
37 #define GET_CALLER_PC() \
38   ((void *)(((uintptr_t)__builtin_return_address(0)) >> 2))
39 #else
40 #define GET_CALLER_PC() __builtin_return_address(0)
41 #endif
42 #define LIKELY(x) __builtin_expect(!!(x), 1)
43 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
44 
45 // Unwraps (foo, bar) passed as arguments to foo, bar - this allows passing
46 // multiple var args into a single macro.
47 #define UNWRAP_VA_ARGS(...) __VA_ARGS__
48 
49 // Define a dynamic, global symbol such as __sanitizer_weak_hook_memcmp that
50 // calls the local symbol of the same name in the jazzer_driver shared library
51 // loaded in the JVM.
52 #define DEFINE_LIBC_HOOK(name, ret, params, args)                           \
53   typedef void (*name##_hook_t)(void *, UNWRAP_VA_ARGS params, ret);        \
54   static _Atomic name##_hook_t name##_hook;                                 \
55                                                                             \
56   __attribute__((visibility("default"))) void __sanitizer_weak_hook_##name( \
57       void *called_pc, UNWRAP_VA_ARGS params, ret result) {                 \
58     name##_hook_t hook =                                                    \
59         atomic_load_explicit(&name##_hook, memory_order_relaxed);           \
60     if (LIKELY(hook != NULL)) {                                             \
61       hook(called_pc, UNWRAP_VA_ARGS args, result);                         \
62     }                                                                       \
63   }
64 
65 #define INIT_LIBC_HOOK(handle, name) \
66   atomic_store(&name##_hook, dlsym(handle, "__sanitizer_weak_hook_" #name))
67 
68 #ifdef __linux__
69 // Alternate definitions for libc functions mimicking those that libFuzzer would
70 // provide if it were linked into the JVM. All these functions invoke the real
71 // libc function loaded from the next library in search order (either libc
72 // itself or a sanitizer's interceptor).
73 //
74 // Function pointers have to be loaded and stored atomically even if libc
75 // functions are invoked from different threads, but we do not need any
76 // synchronization guarantees - in the worst case, we will non-deterministically
77 // lose a few hook invocations.
78 
79 #define DEFINE_LIBC_INTERCEPTOR(name, ret, params, args)                   \
80   DEFINE_LIBC_HOOK(name, ret, params, args)                                \
81                                                                            \
82   typedef ret (*name##_t)(UNWRAP_VA_ARGS params);                          \
83   static _Atomic name##_t name##_real;                                     \
84                                                                            \
85   __attribute__((visibility("default"))) ret name(UNWRAP_VA_ARGS params) { \
86     name##_t name##_real_local =                                           \
87         atomic_load_explicit(&name##_real, memory_order_relaxed);          \
88     if (UNLIKELY(name##_real_local == NULL)) {                             \
89       name##_real_local = dlsym(RTLD_NEXT, #name);                         \
90       atomic_store_explicit(&name##_real, name##_real_local,               \
91                             memory_order_relaxed);                         \
92     }                                                                      \
93     ret result = name##_real_local(UNWRAP_VA_ARGS args);                   \
94     __sanitizer_weak_hook_##name(GET_CALLER_PC(), UNWRAP_VA_ARGS args,     \
95                                  result);                                  \
96     return result;                                                         \
97   }
98 
99 #elif __APPLE__
100 // macOS namespace concept makes it impossible to override symbols in shared
101 // library dependencies simply by defining them. Instead, the dynamic linker's
102 // interpose feature is used to request that one function, identified by its
103 // address, is replaced by another at runtime.
104 
105 typedef struct {
106   const uintptr_t interceptor;
107   const uintptr_t func;
108 } interpose_t;
109 
110 #define INTERPOSE(_interceptor, _func)                        \
111   __attribute__((used)) static interpose_t _interpose_##_func \
112       __attribute__((section("__DATA,__interpose"))) = {      \
113           (uintptr_t)&_interceptor, (uintptr_t)&_func};
114 
115 #define DEFINE_LIBC_INTERCEPTOR(name, ret, params, args)               \
116   DEFINE_LIBC_HOOK(name, ret, params, args)                            \
117                                                                        \
118   __attribute__((visibility("default")))                               \
119   ret interposed_##name(UNWRAP_VA_ARGS params) {                       \
120     ret result = name(UNWRAP_VA_ARGS args);                            \
121     __sanitizer_weak_hook_##name(GET_CALLER_PC(), UNWRAP_VA_ARGS args, \
122                                  result);                              \
123     return result;                                                     \
124   }                                                                    \
125                                                                        \
126   INTERPOSE(interposed_##name, name)
127 #else
128 // TODO: Use https://github.com/microsoft/Detours to add Windows support.
129 #error "jazzer_preload is not supported on this OS"
130 #endif
131 
132 DEFINE_LIBC_INTERCEPTOR(bcmp, int, (const void *s1, const void *s2, size_t n),
133                         (s1, s2, n))
134 DEFINE_LIBC_INTERCEPTOR(memcmp, int, (const void *s1, const void *s2, size_t n),
135                         (s1, s2, n))
136 DEFINE_LIBC_INTERCEPTOR(strncmp, int,
137                         (const char *s1, const char *s2, size_t n), (s1, s2, n))
138 DEFINE_LIBC_INTERCEPTOR(strncasecmp, int,
139                         (const char *s1, const char *s2, size_t n), (s1, s2, n))
140 DEFINE_LIBC_INTERCEPTOR(strcmp, int, (const char *s1, const char *s2), (s1, s2))
141 DEFINE_LIBC_INTERCEPTOR(strcasecmp, int, (const char *s1, const char *s2),
142                         (s1, s2))
143 DEFINE_LIBC_INTERCEPTOR(strstr, char *, (const char *s1, const char *s2),
144                         (s1, s2))
145 DEFINE_LIBC_INTERCEPTOR(strcasestr, char *, (const char *s1, const char *s2),
146                         (s1, s2))
147 DEFINE_LIBC_INTERCEPTOR(memmem, void *,
148                         (const void *s1, size_t n1, const void *s2, size_t n2),
149                         (s1, n1, s2, n2))
150 
151 // Native libraries instrumented for fuzzing include references to fuzzer hooks
152 // that are resolved by the dynamic linker. We need to route these to the
153 // corresponding local symbols in the Jazzer driver JNI library.
154 // The __sanitizer_cov_trace_* family of functions is only invoked from code
155 // compiled with -fsanitize=fuzzer. We can assume that the Jazzer JNI library
156 // has been loaded before any such code, which necessarily belongs to the fuzz
157 // target, is executed and thus don't need NULL checks.
158 #define DEFINE_TRACE_HOOK(name, params, args)                                \
159   typedef void (*trace_##name##_t)(void *, UNWRAP_VA_ARGS params);           \
160   static _Atomic trace_##name##_t trace_##name##_with_pc;                    \
161                                                                              \
162   __attribute__((visibility("default"))) void __sanitizer_cov_trace_##name(  \
163       UNWRAP_VA_ARGS params) {                                               \
164     trace_##name##_t hook =                                                  \
165         atomic_load_explicit(&trace_##name##_with_pc, memory_order_relaxed); \
166     hook(GET_CALLER_PC(), UNWRAP_VA_ARGS args);                              \
167   }
168 
169 #define INIT_TRACE_HOOK(handle, name)   \
170   atomic_store(&trace_##name##_with_pc, \
171                dlsym(handle, "__sanitizer_cov_trace_" #name "_with_pc"))
172 
173 DEFINE_TRACE_HOOK(cmp1, (uint8_t arg1, uint8_t arg2), (arg1, arg2));
174 DEFINE_TRACE_HOOK(cmp2, (uint16_t arg1, uint16_t arg2), (arg1, arg2));
175 DEFINE_TRACE_HOOK(cmp4, (uint32_t arg1, uint32_t arg2), (arg1, arg2));
176 DEFINE_TRACE_HOOK(cmp8, (uint64_t arg1, uint64_t arg2), (arg1, arg2));
177 
178 DEFINE_TRACE_HOOK(const_cmp1, (uint8_t arg1, uint8_t arg2), (arg1, arg2));
179 DEFINE_TRACE_HOOK(const_cmp2, (uint16_t arg1, uint16_t arg2), (arg1, arg2));
180 DEFINE_TRACE_HOOK(const_cmp4, (uint32_t arg1, uint32_t arg2), (arg1, arg2));
181 DEFINE_TRACE_HOOK(const_cmp8, (uint64_t arg1, uint64_t arg2), (arg1, arg2));
182 
183 DEFINE_TRACE_HOOK(switch, (uint64_t val, uint64_t *cases), (val, cases));
184 
185 DEFINE_TRACE_HOOK(div4, (uint32_t arg), (arg))
186 DEFINE_TRACE_HOOK(div8, (uint64_t arg), (arg))
187 
188 DEFINE_TRACE_HOOK(gep, (uintptr_t arg), (arg))
189 
190 DEFINE_TRACE_HOOK(pc_indir, (uintptr_t arg), (arg))
191 
192 typedef void (*cov_8bit_counters_init_t)(uint8_t *, uint8_t *);
193 static _Atomic cov_8bit_counters_init_t cov_8bit_counters_init;
194 typedef void (*cov_pcs_init_t)(const uintptr_t *, const uintptr_t *);
195 static _Atomic cov_pcs_init_t cov_pcs_init;
196 
__sanitizer_cov_8bit_counters_init(uint8_t * start,uint8_t * end)197 __attribute__((visibility("default"))) void __sanitizer_cov_8bit_counters_init(
198     uint8_t *start, uint8_t *end) {
199   cov_8bit_counters_init_t init =
200       atomic_load_explicit(&cov_8bit_counters_init, memory_order_relaxed);
201   init(start, end);
202 }
203 
__sanitizer_cov_pcs_init(const uintptr_t * pcs_beg,const uintptr_t * pcs_end)204 __attribute__((visibility("default"))) void __sanitizer_cov_pcs_init(
205     const uintptr_t *pcs_beg, const uintptr_t *pcs_end) {
206   cov_pcs_init_t init =
207       atomic_load_explicit(&cov_pcs_init, memory_order_relaxed);
208   init(pcs_beg, pcs_end);
209 }
210 
211 // TODO: This is never updated and thus doesn't provide any information to the
212 //  fuzzer.
213 __attribute__((
214     visibility("default"))) _Thread_local uintptr_t __sancov_lowest_stack = 0;
215 
jazzer_preload_init(void * handle)216 __attribute__((visibility("default"))) void jazzer_preload_init(void *handle) {
217   INIT_LIBC_HOOK(handle, bcmp);
218   INIT_LIBC_HOOK(handle, memcmp);
219   INIT_LIBC_HOOK(handle, strncmp);
220   INIT_LIBC_HOOK(handle, strcmp);
221   INIT_LIBC_HOOK(handle, strncasecmp);
222   INIT_LIBC_HOOK(handle, strcasecmp);
223   INIT_LIBC_HOOK(handle, strstr);
224   INIT_LIBC_HOOK(handle, strcasestr);
225   INIT_LIBC_HOOK(handle, memmem);
226 
227   INIT_TRACE_HOOK(handle, cmp1);
228   INIT_TRACE_HOOK(handle, cmp2);
229   INIT_TRACE_HOOK(handle, cmp4);
230   INIT_TRACE_HOOK(handle, cmp8);
231 
232   INIT_TRACE_HOOK(handle, const_cmp1);
233   INIT_TRACE_HOOK(handle, const_cmp2);
234   INIT_TRACE_HOOK(handle, const_cmp4);
235   INIT_TRACE_HOOK(handle, const_cmp8);
236 
237   INIT_TRACE_HOOK(handle, switch);
238 
239   INIT_TRACE_HOOK(handle, div4);
240   INIT_TRACE_HOOK(handle, div8);
241 
242   INIT_TRACE_HOOK(handle, gep);
243 
244   INIT_TRACE_HOOK(handle, pc_indir);
245 
246   atomic_store(&cov_8bit_counters_init,
247                dlsym(handle, "__sanitizer_cov_8bit_counters_init"));
248   atomic_store(&cov_pcs_init, dlsym(handle, "__sanitizer_cov_pcs_init"));
249 }
250