1 // Copyright 2021 Code Intelligence GmbH
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "jvm_tooling.h"
16
17 #include <fstream>
18 #include <iostream>
19 #include <memory>
20 #include <utility>
21 #include <vector>
22
23 #include "absl/strings/str_format.h"
24 #include "absl/strings/str_join.h"
25 #include "absl/strings/str_replace.h"
26 #include "absl/strings/str_split.h"
27 #include "coverage_tracker.h"
28 #include "gflags/gflags.h"
29 #include "glog/logging.h"
30 #include "libfuzzer_callbacks.h"
31 #include "signal_handler.h"
32 #include "tools/cpp/runfiles/runfiles.h"
33 #include "utils.h"
34
35 DEFINE_string(cp, ".",
36 "the classpath to use for fuzzing. Behaves analogously to java's "
37 "-cp (separator is ':' on Linux/macOS and ';' on Windows, escape "
38 "it with '\\').");
39 DEFINE_string(jvm_args, "",
40 "arguments passed to the JVM (separator is ':' on Linux/macOS "
41 "and ';' on Windows, escape it with '\\')");
42 DEFINE_string(additional_jvm_args, "",
43 "additional arguments passed to the JVM (separator is ':' on "
44 "Linux/macOS and ';' on Windows). Use this option to set further "
45 "JVM args that should not "
46 "interfere with those provided via --jvm_args.");
47 DEFINE_string(agent_path, "", "location of the fuzzing instrumentation agent");
48
49 // Arguments that are passed to the instrumentation agent.
50 // The instrumentation agent takes arguments in the form
51 // <option_1>=<option_1_val>,<option_2>=<option_2_val>,... To not expose this
52 // format to the user the available options are defined here as flags and
53 // combined during the initialization of the JVM.
54 DEFINE_string(instrumentation_includes, "",
55 "list of glob patterns for classes that will be instrumented for "
56 "fuzzing. Separated by colon \":\"");
57 DEFINE_string(instrumentation_excludes, "",
58 "list of glob patterns for classes that will not be instrumented "
59 "for fuzzing. Separated by colon \":\"");
60
61 DEFINE_string(custom_hook_includes, "",
62 "list of glob patterns for classes that will only be "
63 "instrumented using custom hooks. Separated by colon \":\"");
64 DEFINE_string(custom_hook_excludes, "",
65 "list of glob patterns for classes that will not be instrumented "
66 "using custom hooks. Separated by colon \":\"");
67 DEFINE_string(custom_hooks, "",
68 "list of classes containing custom instrumentation hooks. "
69 "Separated by colon \":\"");
70 DEFINE_string(
71 trace, "",
72 "list of instrumentation to perform separated by colon \":\". "
73 "Available options are cov, cmp, div, gep, all. These options "
74 "correspond to the \"-fsanitize-coverage=trace-*\" flags in clang.");
75 DEFINE_string(
76 id_sync_file, "",
77 "path to a file that should be used to synchronize coverage IDs "
78 "between parallel fuzzing processes. Defaults to a temporary file "
79 "created for this purpose if running in parallel.");
80 DEFINE_string(
81 dump_classes_dir, "",
82 "path to a directory in which Jazzer should dump the instrumented classes");
83
84 DEFINE_bool(hooks, true,
85 "Use JVM hooks to provide coverage information to the fuzzer. The "
86 "fuzzer uses the coverage information to perform smarter input "
87 "selection and mutation. If set to false no "
88 "coverage information will be processed. This can be useful for "
89 "running a regression test on non-instrumented bytecode.");
90
91 #ifdef _WIN32
92 #define ARG_SEPARATOR ";"
93 #else
94 #define ARG_SEPARATOR ":"
95 #endif
96
97 // Called by the agent when
98 // com.code_intelligence.jazzer.instrumentor.ClassInstrumentor is initialized.
99 // This only happens when FLAGS_hooks is true.
JNI_OnLoad_jazzer_initialize(JavaVM * vm,void *)100 extern "C" JNIEXPORT jint JNICALL JNI_OnLoad_jazzer_initialize(JavaVM *vm,
101 void *) {
102 if (!FLAGS_hooks) {
103 LOG(ERROR) << "JNI_OnLoad_jazzer_initialize called with --nohooks";
104 exit(1);
105 }
106 JNIEnv *env = nullptr;
107 jint result = vm->GetEnv(reinterpret_cast<void **>(&env), JNI_VERSION_1_8);
108 if (result != JNI_OK) {
109 LOG(FATAL) << "Failed to get JNI environment";
110 exit(1);
111 }
112 jazzer::registerFuzzerCallbacks(*env);
113 jazzer::CoverageTracker::Setup(*env);
114 jazzer::SignalHandler::Setup(*env);
115 return JNI_VERSION_1_8;
116 }
117
118 namespace {
119 constexpr auto kAgentBazelRunfilesPath = "jazzer/agent/jazzer_agent_deploy.jar";
120 constexpr auto kAgentFileName = "jazzer_agent_deploy.jar";
121 constexpr const char kExceptionUtilsClassName[] =
122 "com/code_intelligence/jazzer/runtime/ExceptionUtils";
123 } // namespace
124
125 namespace jazzer {
126
DumpJvmStackTraces()127 void DumpJvmStackTraces() {
128 JavaVM *vm;
129 jsize num_vms;
130 JNI_GetCreatedJavaVMs(&vm, 1, &num_vms);
131 if (num_vms != 1) {
132 return;
133 }
134 JNIEnv *env = nullptr;
135 if (vm->AttachCurrentThread(reinterpret_cast<void **>(&env), nullptr) !=
136 JNI_OK) {
137 return;
138 }
139 jclass exceptionUtils = env->FindClass(kExceptionUtilsClassName);
140 if (env->ExceptionCheck()) {
141 env->ExceptionDescribe();
142 return;
143 }
144 jmethodID dumpStack =
145 env->GetStaticMethodID(exceptionUtils, "dumpAllStackTraces", "()V");
146 if (env->ExceptionCheck()) {
147 env->ExceptionDescribe();
148 return;
149 }
150 env->CallStaticVoidMethod(exceptionUtils, dumpStack);
151 if (env->ExceptionCheck()) {
152 env->ExceptionDescribe();
153 return;
154 }
155 // Do not detach as we may be the main thread (but the JVM exits anyway).
156 }
157
dirFromFullPath(const std::string & path)158 std::string dirFromFullPath(const std::string &path) {
159 const auto pos = path.rfind(kPathSeparator);
160 if (pos != std::string::npos) {
161 return path.substr(0, pos);
162 }
163 return "";
164 }
165
166 // getInstrumentorAgentPath searches for the fuzzing instrumentation agent and
167 // returns the location if it is found. Otherwise it calls exit(0).
getInstrumentorAgentPath(const std::string & executable_path)168 std::string getInstrumentorAgentPath(const std::string &executable_path) {
169 // User provided agent location takes precedence.
170 if (!FLAGS_agent_path.empty()) {
171 if (std::ifstream(FLAGS_agent_path).good()) return FLAGS_agent_path;
172 LOG(ERROR) << "Could not find " << kAgentFileName << " at \""
173 << FLAGS_agent_path << "\"";
174 exit(1);
175 }
176 // First check if we are running inside the Bazel tree and use the agent
177 // runfile.
178 {
179 using bazel::tools::cpp::runfiles::Runfiles;
180 std::string error;
181 std::unique_ptr<Runfiles> runfiles(
182 Runfiles::Create(executable_path, &error));
183 if (runfiles != nullptr) {
184 auto bazel_path = runfiles->Rlocation(kAgentBazelRunfilesPath);
185 if (!bazel_path.empty() && std::ifstream(bazel_path).good())
186 return bazel_path;
187 }
188 }
189
190 // If the agent is not in the bazel path we look next to the jazzer_driver
191 // binary.
192 const auto dir = dirFromFullPath(executable_path);
193 auto agent_path =
194 absl::StrFormat("%s%c%s", dir, kPathSeparator, kAgentFileName);
195 if (std::ifstream(agent_path).good()) return agent_path;
196 LOG(ERROR) << "Could not find " << kAgentFileName
197 << ". Please provide "
198 "the pathname via the --agent_path flag.";
199 exit(1);
200 }
201
agentArgsFromFlags()202 std::string agentArgsFromFlags() {
203 std::vector<std::string> args;
204 for (const auto &flag_pair :
205 std::vector<std::pair<std::string, const std::string &>>{
206 // {<agent option>, <ref to glog flag> }
207 {"instrumentation_includes", FLAGS_instrumentation_includes},
208 {"instrumentation_excludes", FLAGS_instrumentation_excludes},
209 {"custom_hooks", FLAGS_custom_hooks},
210 {"custom_hook_includes", FLAGS_custom_hook_includes},
211 {"custom_hook_excludes", FLAGS_custom_hook_excludes},
212 {"trace", FLAGS_trace},
213 {"id_sync_file", FLAGS_id_sync_file},
214 {"dump_classes_dir", FLAGS_dump_classes_dir},
215 }) {
216 if (!flag_pair.second.empty()) {
217 args.push_back(flag_pair.first + "=" + flag_pair.second);
218 }
219 }
220 return absl::StrJoin(args, ",");
221 }
222
223 // Splits a string at the ARG_SEPARATOR unless it is escaped with a backslash.
224 // Backslash itself can be escaped with another backslash.
splitEscaped(const std::string & str)225 std::vector<std::string> splitEscaped(const std::string &str) {
226 // Protect \\ and \<separator> against splitting.
227 const std::string BACKSLASH_BACKSLASH_REPLACEMENT =
228 "%%JAZZER_BACKSLASH_BACKSLASH_REPLACEMENT%%";
229 const std::string BACKSLASH_SEPARATOR_REPLACEMENT =
230 "%%JAZZER_BACKSLASH_SEPARATOR_REPLACEMENT%%";
231 std::string protected_str =
232 absl::StrReplaceAll(str, {{"\\\\", BACKSLASH_BACKSLASH_REPLACEMENT}});
233 protected_str = absl::StrReplaceAll(
234 protected_str, {{"\\" ARG_SEPARATOR, BACKSLASH_SEPARATOR_REPLACEMENT}});
235
236 std::vector<std::string> parts = absl::StrSplit(protected_str, ARG_SEPARATOR);
237 std::transform(parts.begin(), parts.end(), parts.begin(),
238 [&BACKSLASH_SEPARATOR_REPLACEMENT,
239 &BACKSLASH_BACKSLASH_REPLACEMENT](const std::string &part) {
240 return absl::StrReplaceAll(
241 part,
242 {
243 {BACKSLASH_SEPARATOR_REPLACEMENT, ARG_SEPARATOR},
244 {BACKSLASH_BACKSLASH_REPLACEMENT, "\\"},
245 });
246 });
247
248 return parts;
249 }
250
JVM(const std::string & executable_path)251 JVM::JVM(const std::string &executable_path) {
252 // combine class path from command line flags and JAVA_FUZZER_CLASSPATH env
253 // variable
254 std::string class_path = absl::StrFormat("-Djava.class.path=%s", FLAGS_cp);
255 const auto class_path_from_env = std::getenv("JAVA_FUZZER_CLASSPATH");
256 if (class_path_from_env) {
257 class_path += absl::StrFormat(ARG_SEPARATOR "%s", class_path_from_env);
258 }
259 class_path += absl::StrFormat(ARG_SEPARATOR "%s",
260 getInstrumentorAgentPath(executable_path));
261 LOG(INFO) << "got class path " << class_path;
262
263 std::vector<JavaVMOption> options;
264 options.push_back(
265 JavaVMOption{.optionString = const_cast<char *>(class_path.c_str())});
266 // Set the maximum heap size to a value that is slightly smaller than
267 // libFuzzer's default rss_limit_mb. This prevents erroneous oom reports.
268 options.push_back(JavaVMOption{.optionString = (char *)"-Xmx1800m"});
269 options.push_back(JavaVMOption{.optionString = (char *)"-enableassertions"});
270 // Preserve and emit stack trace information even on hot paths.
271 // This may hurt performance, but also helps find flaky bugs.
272 options.push_back(
273 JavaVMOption{.optionString = (char *)"-XX:-OmitStackTraceInFastThrow"});
274 // Optimize GC for high throughput rather than low latency.
275 options.push_back(JavaVMOption{.optionString = (char *)"-XX:+UseParallelGC"});
276
277 // add additional jvm options set through command line flags
278 std::vector<std::string> jvm_args;
279 if (!FLAGS_jvm_args.empty()) {
280 jvm_args = splitEscaped(FLAGS_jvm_args);
281 }
282 for (const auto &arg : jvm_args) {
283 options.push_back(
284 JavaVMOption{.optionString = const_cast<char *>(arg.c_str())});
285 }
286 std::vector<std::string> additional_jvm_args;
287 if (!FLAGS_additional_jvm_args.empty()) {
288 additional_jvm_args = splitEscaped(FLAGS_additional_jvm_args);
289 }
290 for (const auto &arg : additional_jvm_args) {
291 options.push_back(
292 JavaVMOption{.optionString = const_cast<char *>(arg.c_str())});
293 }
294
295 std::string agent_jvm_arg;
296 if (FLAGS_hooks) {
297 agent_jvm_arg = absl::StrFormat("-javaagent:%s=%s",
298 getInstrumentorAgentPath(executable_path),
299 agentArgsFromFlags());
300 options.push_back(JavaVMOption{
301 .optionString = const_cast<char *>(agent_jvm_arg.c_str())});
302 }
303
304 JavaVMInitArgs jvm_init_args = {.version = JNI_VERSION_1_8,
305 .nOptions = (int)options.size(),
306 .options = options.data(),
307 .ignoreUnrecognized = JNI_FALSE};
308
309 auto ret = JNI_CreateJavaVM(&jvm_, (void **)&env_, &jvm_init_args);
310 if (ret != JNI_OK) {
311 throw std::runtime_error(
312 absl::StrFormat("JNI_CreateJavaVM returned code %d", ret));
313 }
314 }
315
GetEnv() const316 JNIEnv &JVM::GetEnv() const { return *env_; }
317
~JVM()318 JVM::~JVM() { jvm_->DestroyJavaVM(); }
319
FindClass(std::string class_name) const320 jclass JVM::FindClass(std::string class_name) const {
321 auto &env = GetEnv();
322 std::replace(class_name.begin(), class_name.end(), '.', '/');
323 const auto ret = env.FindClass(class_name.c_str());
324 if (ret == nullptr) {
325 if (env.ExceptionCheck()) {
326 env.ExceptionDescribe();
327 throw std::runtime_error(
328 absl::StrFormat("Could not find class %s", class_name));
329 } else {
330 throw std::runtime_error(absl::StrFormat(
331 "Java class '%s' not found without exception", class_name));
332 }
333 }
334 return ret;
335 }
336
GetStaticMethodID(jclass jclass,const std::string & jmethod,const std::string & signature,bool is_required) const337 jmethodID JVM::GetStaticMethodID(jclass jclass, const std::string &jmethod,
338 const std::string &signature,
339 bool is_required) const {
340 auto &env = GetEnv();
341 const auto ret =
342 env.GetStaticMethodID(jclass, jmethod.c_str(), signature.c_str());
343 if (ret == nullptr) {
344 if (is_required) {
345 if (env.ExceptionCheck()) {
346 env.ExceptionDescribe();
347 }
348 throw std::runtime_error(
349 absl::StrFormat("Static method '%s' not found", jmethod));
350 } else {
351 LOG(INFO) << "did not find method " << jmethod << " with signature "
352 << signature;
353 env.ExceptionClear();
354 }
355 }
356 return ret;
357 }
358
GetMethodID(jclass jclass,const std::string & jmethod,const std::string & signature) const359 jmethodID JVM::GetMethodID(jclass jclass, const std::string &jmethod,
360 const std::string &signature) const {
361 auto &env = GetEnv();
362 const auto ret = env.GetMethodID(jclass, jmethod.c_str(), signature.c_str());
363 if (ret == nullptr) {
364 if (env.ExceptionCheck()) {
365 env.ExceptionDescribe();
366 }
367 throw std::runtime_error(absl::StrFormat("Method '%s' not found", jmethod));
368 }
369 return ret;
370 }
371
GetStaticFieldID(jclass class_id,const std::string & field_name,const std::string & type) const372 jfieldID JVM::GetStaticFieldID(jclass class_id, const std::string &field_name,
373 const std::string &type) const {
374 auto &env = GetEnv();
375 const auto ret =
376 env.GetStaticFieldID(class_id, field_name.c_str(), type.c_str());
377 if (ret == nullptr) {
378 if (env.ExceptionCheck()) {
379 env.ExceptionDescribe();
380 }
381 throw std::runtime_error(
382 absl::StrFormat("Field '%s' not found", field_name));
383 }
384 return ret;
385 }
386
ExceptionPrinter(JVM & jvm)387 ExceptionPrinter::ExceptionPrinter(JVM &jvm)
388 : jvm_(jvm),
389 string_writer_class_(jvm.FindClass("java/io/StringWriter")),
390 string_writer_constructor_(
391 jvm.GetMethodID(string_writer_class_, "<init>", "()V")),
392 string_writer_to_string_method_(jvm.GetMethodID(
393 string_writer_class_, "toString", "()Ljava/lang/String;")),
394 print_writer_class_(jvm.FindClass("java/io/PrintWriter")),
395 print_writer_constructor_(jvm.GetMethodID(print_writer_class_, "<init>",
396 "(Ljava/io/Writer;)V")) {
397 auto throwable_class = jvm.FindClass("java/lang/Throwable");
398 print_stack_trace_method_ = jvm.GetMethodID(
399 throwable_class, "printStackTrace", "(Ljava/io/PrintWriter;)V");
400 if (FLAGS_hooks) {
401 exception_utils_ = jvm.FindClass(kExceptionUtilsClassName);
402 compute_dedup_token_method_ = jvm.GetStaticMethodID(
403 exception_utils_, "computeDedupToken", "(Ljava/lang/Throwable;)J");
404 preprocess_throwable_method_ =
405 jvm.GetStaticMethodID(exception_utils_, "preprocessThrowable",
406 "(Ljava/lang/Throwable;)Ljava/lang/Throwable;");
407 }
408 }
409
410 // The JNI way of writing:
411 // StringWriter stringWriter = new StringWriter();
412 // PrintWriter printWriter = new PrintWriter(stringWriter);
413 // e.printStackTrace(printWriter);
414 // return stringWriter.toString();
getStackTrace(jthrowable exception) const415 std::string ExceptionPrinter::getStackTrace(jthrowable exception) const {
416 auto &env = jvm_.GetEnv();
417 if (exception == nullptr) {
418 return "";
419 }
420
421 auto string_writer =
422 env.NewObject(string_writer_class_, string_writer_constructor_);
423 if (string_writer == nullptr) {
424 env.ExceptionDescribe();
425 return "";
426 }
427 auto print_writer = env.NewObject(print_writer_class_,
428 print_writer_constructor_, string_writer);
429 if (print_writer == nullptr) {
430 env.ExceptionDescribe();
431 return "";
432 }
433
434 env.CallVoidMethod(exception, print_stack_trace_method_, print_writer);
435 env.DeleteLocalRef(print_writer);
436 if (env.ExceptionCheck()) {
437 env.ExceptionDescribe();
438 return "";
439 }
440 auto exception_string_object = reinterpret_cast<jstring>(
441 env.CallObjectMethod(string_writer, string_writer_to_string_method_));
442 env.DeleteLocalRef(string_writer);
443 if (env.ExceptionCheck()) {
444 env.ExceptionDescribe();
445 return "";
446 }
447
448 auto char_pointer = env.GetStringUTFChars(exception_string_object, nullptr);
449 std::string exception_string(char_pointer);
450 env.ReleaseStringUTFChars(exception_string_object, char_pointer);
451 env.DeleteLocalRef(exception_string_object);
452 return exception_string;
453 }
454
preprocessException(jthrowable exception) const455 jthrowable ExceptionPrinter::preprocessException(jthrowable exception) const {
456 if (exception == nullptr) return nullptr;
457 auto &env = jvm_.GetEnv();
458 if (!FLAGS_hooks || !preprocess_throwable_method_) return exception;
459 auto processed_exception = (jthrowable)(env.CallStaticObjectMethod(
460 exception_utils_, preprocess_throwable_method_, exception));
461 if (env.ExceptionCheck()) {
462 env.ExceptionDescribe();
463 return exception;
464 }
465 return processed_exception;
466 }
467
computeDedupToken(jthrowable exception) const468 jlong ExceptionPrinter::computeDedupToken(jthrowable exception) const {
469 auto &env = jvm_.GetEnv();
470 if (!FLAGS_hooks || exception == nullptr ||
471 compute_dedup_token_method_ == nullptr)
472 return 0;
473 const auto dedup_token = env.CallStaticLongMethod(
474 exception_utils_, compute_dedup_token_method_, exception);
475 if (env.ExceptionCheck()) {
476 env.ExceptionDescribe();
477 return 0;
478 }
479 return dedup_token;
480 }
481
482 } // namespace jazzer
483