• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Code Intelligence GmbH
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "jvm_tooling.h"
16 
17 #include <fstream>
18 #include <iostream>
19 #include <memory>
20 #include <utility>
21 #include <vector>
22 
23 #include "absl/strings/str_format.h"
24 #include "absl/strings/str_join.h"
25 #include "absl/strings/str_replace.h"
26 #include "absl/strings/str_split.h"
27 #include "coverage_tracker.h"
28 #include "gflags/gflags.h"
29 #include "glog/logging.h"
30 #include "libfuzzer_callbacks.h"
31 #include "signal_handler.h"
32 #include "tools/cpp/runfiles/runfiles.h"
33 #include "utils.h"
34 
35 DEFINE_string(cp, ".",
36               "the classpath to use for fuzzing. Behaves analogously to java's "
37               "-cp (separator is ':' on Linux/macOS and ';' on Windows, escape "
38               "it with '\\').");
39 DEFINE_string(jvm_args, "",
40               "arguments passed to the JVM (separator is ':' on Linux/macOS "
41               "and ';' on Windows, escape it with '\\')");
42 DEFINE_string(additional_jvm_args, "",
43               "additional arguments passed to the JVM (separator is ':' on "
44               "Linux/macOS and ';' on Windows). Use this option to set further "
45               "JVM args that should not "
46               "interfere with those provided via --jvm_args.");
47 DEFINE_string(agent_path, "", "location of the fuzzing instrumentation agent");
48 
49 // Arguments that are passed to the instrumentation agent.
50 // The instrumentation agent takes arguments in the form
51 // <option_1>=<option_1_val>,<option_2>=<option_2_val>,... To not expose this
52 // format to the user the available options are defined here as flags and
53 // combined during the initialization of the JVM.
54 DEFINE_string(instrumentation_includes, "",
55               "list of glob patterns for classes that will be instrumented for "
56               "fuzzing. Separated by colon \":\"");
57 DEFINE_string(instrumentation_excludes, "",
58               "list of glob patterns for classes that will not be instrumented "
59               "for fuzzing. Separated by colon \":\"");
60 
61 DEFINE_string(custom_hook_includes, "",
62               "list of glob patterns for classes that will only be "
63               "instrumented using custom hooks. Separated by colon \":\"");
64 DEFINE_string(custom_hook_excludes, "",
65               "list of glob patterns for classes that will not be instrumented "
66               "using custom hooks. Separated by colon \":\"");
67 DEFINE_string(custom_hooks, "",
68               "list of classes containing custom instrumentation hooks. "
69               "Separated by colon \":\"");
70 DEFINE_string(
71     trace, "",
72     "list of instrumentation to perform separated by colon \":\". "
73     "Available options are cov, cmp, div, gep, all. These options "
74     "correspond to the \"-fsanitize-coverage=trace-*\" flags in clang.");
75 DEFINE_string(
76     id_sync_file, "",
77     "path to a file that should be used to synchronize coverage IDs "
78     "between parallel fuzzing processes. Defaults to a temporary file "
79     "created for this purpose if running in parallel.");
80 DEFINE_string(
81     dump_classes_dir, "",
82     "path to a directory in which Jazzer should dump the instrumented classes");
83 
84 DEFINE_bool(hooks, true,
85             "Use JVM hooks to provide coverage information to the fuzzer. The "
86             "fuzzer uses the coverage information to perform smarter input "
87             "selection and mutation. If set to false no "
88             "coverage information will be processed. This can be useful for "
89             "running a regression test on non-instrumented bytecode.");
90 
91 #ifdef _WIN32
92 #define ARG_SEPARATOR ";"
93 #else
94 #define ARG_SEPARATOR ":"
95 #endif
96 
97 // Called by the agent when
98 // com.code_intelligence.jazzer.instrumentor.ClassInstrumentor is initialized.
99 // This only happens when FLAGS_hooks is true.
JNI_OnLoad_jazzer_initialize(JavaVM * vm,void *)100 extern "C" JNIEXPORT jint JNICALL JNI_OnLoad_jazzer_initialize(JavaVM *vm,
101                                                                void *) {
102   if (!FLAGS_hooks) {
103     LOG(ERROR) << "JNI_OnLoad_jazzer_initialize called with --nohooks";
104     exit(1);
105   }
106   JNIEnv *env = nullptr;
107   jint result = vm->GetEnv(reinterpret_cast<void **>(&env), JNI_VERSION_1_8);
108   if (result != JNI_OK) {
109     LOG(FATAL) << "Failed to get JNI environment";
110     exit(1);
111   }
112   jazzer::registerFuzzerCallbacks(*env);
113   jazzer::CoverageTracker::Setup(*env);
114   jazzer::SignalHandler::Setup(*env);
115   return JNI_VERSION_1_8;
116 }
117 
118 namespace {
119 constexpr auto kAgentBazelRunfilesPath = "jazzer/agent/jazzer_agent_deploy.jar";
120 constexpr auto kAgentFileName = "jazzer_agent_deploy.jar";
121 constexpr const char kExceptionUtilsClassName[] =
122     "com/code_intelligence/jazzer/runtime/ExceptionUtils";
123 }  // namespace
124 
125 namespace jazzer {
126 
DumpJvmStackTraces()127 void DumpJvmStackTraces() {
128   JavaVM *vm;
129   jsize num_vms;
130   JNI_GetCreatedJavaVMs(&vm, 1, &num_vms);
131   if (num_vms != 1) {
132     return;
133   }
134   JNIEnv *env = nullptr;
135   if (vm->AttachCurrentThread(reinterpret_cast<void **>(&env), nullptr) !=
136       JNI_OK) {
137     return;
138   }
139   jclass exceptionUtils = env->FindClass(kExceptionUtilsClassName);
140   if (env->ExceptionCheck()) {
141     env->ExceptionDescribe();
142     return;
143   }
144   jmethodID dumpStack =
145       env->GetStaticMethodID(exceptionUtils, "dumpAllStackTraces", "()V");
146   if (env->ExceptionCheck()) {
147     env->ExceptionDescribe();
148     return;
149   }
150   env->CallStaticVoidMethod(exceptionUtils, dumpStack);
151   if (env->ExceptionCheck()) {
152     env->ExceptionDescribe();
153     return;
154   }
155   // Do not detach as we may be the main thread (but the JVM exits anyway).
156 }
157 
dirFromFullPath(const std::string & path)158 std::string dirFromFullPath(const std::string &path) {
159   const auto pos = path.rfind(kPathSeparator);
160   if (pos != std::string::npos) {
161     return path.substr(0, pos);
162   }
163   return "";
164 }
165 
166 // getInstrumentorAgentPath searches for the fuzzing instrumentation agent and
167 // returns the location if it is found. Otherwise it calls exit(0).
getInstrumentorAgentPath(const std::string & executable_path)168 std::string getInstrumentorAgentPath(const std::string &executable_path) {
169   // User provided agent location takes precedence.
170   if (!FLAGS_agent_path.empty()) {
171     if (std::ifstream(FLAGS_agent_path).good()) return FLAGS_agent_path;
172     LOG(ERROR) << "Could not find " << kAgentFileName << " at \""
173                << FLAGS_agent_path << "\"";
174     exit(1);
175   }
176   // First check if we are running inside the Bazel tree and use the agent
177   // runfile.
178   {
179     using bazel::tools::cpp::runfiles::Runfiles;
180     std::string error;
181     std::unique_ptr<Runfiles> runfiles(
182         Runfiles::Create(executable_path, &error));
183     if (runfiles != nullptr) {
184       auto bazel_path = runfiles->Rlocation(kAgentBazelRunfilesPath);
185       if (!bazel_path.empty() && std::ifstream(bazel_path).good())
186         return bazel_path;
187     }
188   }
189 
190   // If the agent is not in the bazel path we look next to the jazzer_driver
191   // binary.
192   const auto dir = dirFromFullPath(executable_path);
193   auto agent_path =
194       absl::StrFormat("%s%c%s", dir, kPathSeparator, kAgentFileName);
195   if (std::ifstream(agent_path).good()) return agent_path;
196   LOG(ERROR) << "Could not find " << kAgentFileName
197              << ". Please provide "
198                 "the pathname via the --agent_path flag.";
199   exit(1);
200 }
201 
agentArgsFromFlags()202 std::string agentArgsFromFlags() {
203   std::vector<std::string> args;
204   for (const auto &flag_pair :
205        std::vector<std::pair<std::string, const std::string &>>{
206            // {<agent option>, <ref to glog flag> }
207            {"instrumentation_includes", FLAGS_instrumentation_includes},
208            {"instrumentation_excludes", FLAGS_instrumentation_excludes},
209            {"custom_hooks", FLAGS_custom_hooks},
210            {"custom_hook_includes", FLAGS_custom_hook_includes},
211            {"custom_hook_excludes", FLAGS_custom_hook_excludes},
212            {"trace", FLAGS_trace},
213            {"id_sync_file", FLAGS_id_sync_file},
214            {"dump_classes_dir", FLAGS_dump_classes_dir},
215        }) {
216     if (!flag_pair.second.empty()) {
217       args.push_back(flag_pair.first + "=" + flag_pair.second);
218     }
219   }
220   return absl::StrJoin(args, ",");
221 }
222 
223 // Splits a string at the ARG_SEPARATOR unless it is escaped with a backslash.
224 // Backslash itself can be escaped with another backslash.
splitEscaped(const std::string & str)225 std::vector<std::string> splitEscaped(const std::string &str) {
226   // Protect \\ and \<separator> against splitting.
227   const std::string BACKSLASH_BACKSLASH_REPLACEMENT =
228       "%%JAZZER_BACKSLASH_BACKSLASH_REPLACEMENT%%";
229   const std::string BACKSLASH_SEPARATOR_REPLACEMENT =
230       "%%JAZZER_BACKSLASH_SEPARATOR_REPLACEMENT%%";
231   std::string protected_str =
232       absl::StrReplaceAll(str, {{"\\\\", BACKSLASH_BACKSLASH_REPLACEMENT}});
233   protected_str = absl::StrReplaceAll(
234       protected_str, {{"\\" ARG_SEPARATOR, BACKSLASH_SEPARATOR_REPLACEMENT}});
235 
236   std::vector<std::string> parts = absl::StrSplit(protected_str, ARG_SEPARATOR);
237   std::transform(parts.begin(), parts.end(), parts.begin(),
238                  [&BACKSLASH_SEPARATOR_REPLACEMENT,
239                   &BACKSLASH_BACKSLASH_REPLACEMENT](const std::string &part) {
240                    return absl::StrReplaceAll(
241                        part,
242                        {
243                            {BACKSLASH_SEPARATOR_REPLACEMENT, ARG_SEPARATOR},
244                            {BACKSLASH_BACKSLASH_REPLACEMENT, "\\"},
245                        });
246                  });
247 
248   return parts;
249 }
250 
JVM(const std::string & executable_path)251 JVM::JVM(const std::string &executable_path) {
252   // combine class path from command line flags and JAVA_FUZZER_CLASSPATH env
253   // variable
254   std::string class_path = absl::StrFormat("-Djava.class.path=%s", FLAGS_cp);
255   const auto class_path_from_env = std::getenv("JAVA_FUZZER_CLASSPATH");
256   if (class_path_from_env) {
257     class_path += absl::StrFormat(ARG_SEPARATOR "%s", class_path_from_env);
258   }
259   class_path += absl::StrFormat(ARG_SEPARATOR "%s",
260                                 getInstrumentorAgentPath(executable_path));
261   LOG(INFO) << "got class path " << class_path;
262 
263   std::vector<JavaVMOption> options;
264   options.push_back(
265       JavaVMOption{.optionString = const_cast<char *>(class_path.c_str())});
266   // Set the maximum heap size to a value that is slightly smaller than
267   // libFuzzer's default rss_limit_mb. This prevents erroneous oom reports.
268   options.push_back(JavaVMOption{.optionString = (char *)"-Xmx1800m"});
269   options.push_back(JavaVMOption{.optionString = (char *)"-enableassertions"});
270   // Preserve and emit stack trace information even on hot paths.
271   // This may hurt performance, but also helps find flaky bugs.
272   options.push_back(
273       JavaVMOption{.optionString = (char *)"-XX:-OmitStackTraceInFastThrow"});
274   // Optimize GC for high throughput rather than low latency.
275   options.push_back(JavaVMOption{.optionString = (char *)"-XX:+UseParallelGC"});
276 
277   // add additional jvm options set through command line flags
278   std::vector<std::string> jvm_args;
279   if (!FLAGS_jvm_args.empty()) {
280     jvm_args = splitEscaped(FLAGS_jvm_args);
281   }
282   for (const auto &arg : jvm_args) {
283     options.push_back(
284         JavaVMOption{.optionString = const_cast<char *>(arg.c_str())});
285   }
286   std::vector<std::string> additional_jvm_args;
287   if (!FLAGS_additional_jvm_args.empty()) {
288     additional_jvm_args = splitEscaped(FLAGS_additional_jvm_args);
289   }
290   for (const auto &arg : additional_jvm_args) {
291     options.push_back(
292         JavaVMOption{.optionString = const_cast<char *>(arg.c_str())});
293   }
294 
295   std::string agent_jvm_arg;
296   if (FLAGS_hooks) {
297     agent_jvm_arg = absl::StrFormat("-javaagent:%s=%s",
298                                     getInstrumentorAgentPath(executable_path),
299                                     agentArgsFromFlags());
300     options.push_back(JavaVMOption{
301         .optionString = const_cast<char *>(agent_jvm_arg.c_str())});
302   }
303 
304   JavaVMInitArgs jvm_init_args = {.version = JNI_VERSION_1_8,
305                                   .nOptions = (int)options.size(),
306                                   .options = options.data(),
307                                   .ignoreUnrecognized = JNI_FALSE};
308 
309   auto ret = JNI_CreateJavaVM(&jvm_, (void **)&env_, &jvm_init_args);
310   if (ret != JNI_OK) {
311     throw std::runtime_error(
312         absl::StrFormat("JNI_CreateJavaVM returned code %d", ret));
313   }
314 }
315 
GetEnv() const316 JNIEnv &JVM::GetEnv() const { return *env_; }
317 
~JVM()318 JVM::~JVM() { jvm_->DestroyJavaVM(); }
319 
FindClass(std::string class_name) const320 jclass JVM::FindClass(std::string class_name) const {
321   auto &env = GetEnv();
322   std::replace(class_name.begin(), class_name.end(), '.', '/');
323   const auto ret = env.FindClass(class_name.c_str());
324   if (ret == nullptr) {
325     if (env.ExceptionCheck()) {
326       env.ExceptionDescribe();
327       throw std::runtime_error(
328           absl::StrFormat("Could not find class %s", class_name));
329     } else {
330       throw std::runtime_error(absl::StrFormat(
331           "Java class '%s' not found without exception", class_name));
332     }
333   }
334   return ret;
335 }
336 
GetStaticMethodID(jclass jclass,const std::string & jmethod,const std::string & signature,bool is_required) const337 jmethodID JVM::GetStaticMethodID(jclass jclass, const std::string &jmethod,
338                                  const std::string &signature,
339                                  bool is_required) const {
340   auto &env = GetEnv();
341   const auto ret =
342       env.GetStaticMethodID(jclass, jmethod.c_str(), signature.c_str());
343   if (ret == nullptr) {
344     if (is_required) {
345       if (env.ExceptionCheck()) {
346         env.ExceptionDescribe();
347       }
348       throw std::runtime_error(
349           absl::StrFormat("Static method '%s' not found", jmethod));
350     } else {
351       LOG(INFO) << "did not find method " << jmethod << " with signature "
352                 << signature;
353       env.ExceptionClear();
354     }
355   }
356   return ret;
357 }
358 
GetMethodID(jclass jclass,const std::string & jmethod,const std::string & signature) const359 jmethodID JVM::GetMethodID(jclass jclass, const std::string &jmethod,
360                            const std::string &signature) const {
361   auto &env = GetEnv();
362   const auto ret = env.GetMethodID(jclass, jmethod.c_str(), signature.c_str());
363   if (ret == nullptr) {
364     if (env.ExceptionCheck()) {
365       env.ExceptionDescribe();
366     }
367     throw std::runtime_error(absl::StrFormat("Method '%s' not found", jmethod));
368   }
369   return ret;
370 }
371 
GetStaticFieldID(jclass class_id,const std::string & field_name,const std::string & type) const372 jfieldID JVM::GetStaticFieldID(jclass class_id, const std::string &field_name,
373                                const std::string &type) const {
374   auto &env = GetEnv();
375   const auto ret =
376       env.GetStaticFieldID(class_id, field_name.c_str(), type.c_str());
377   if (ret == nullptr) {
378     if (env.ExceptionCheck()) {
379       env.ExceptionDescribe();
380     }
381     throw std::runtime_error(
382         absl::StrFormat("Field '%s' not found", field_name));
383   }
384   return ret;
385 }
386 
ExceptionPrinter(JVM & jvm)387 ExceptionPrinter::ExceptionPrinter(JVM &jvm)
388     : jvm_(jvm),
389       string_writer_class_(jvm.FindClass("java/io/StringWriter")),
390       string_writer_constructor_(
391           jvm.GetMethodID(string_writer_class_, "<init>", "()V")),
392       string_writer_to_string_method_(jvm.GetMethodID(
393           string_writer_class_, "toString", "()Ljava/lang/String;")),
394       print_writer_class_(jvm.FindClass("java/io/PrintWriter")),
395       print_writer_constructor_(jvm.GetMethodID(print_writer_class_, "<init>",
396                                                 "(Ljava/io/Writer;)V")) {
397   auto throwable_class = jvm.FindClass("java/lang/Throwable");
398   print_stack_trace_method_ = jvm.GetMethodID(
399       throwable_class, "printStackTrace", "(Ljava/io/PrintWriter;)V");
400   if (FLAGS_hooks) {
401     exception_utils_ = jvm.FindClass(kExceptionUtilsClassName);
402     compute_dedup_token_method_ = jvm.GetStaticMethodID(
403         exception_utils_, "computeDedupToken", "(Ljava/lang/Throwable;)J");
404     preprocess_throwable_method_ =
405         jvm.GetStaticMethodID(exception_utils_, "preprocessThrowable",
406                               "(Ljava/lang/Throwable;)Ljava/lang/Throwable;");
407   }
408 }
409 
410 // The JNI way of writing:
411 //    StringWriter stringWriter = new StringWriter();
412 //    PrintWriter printWriter = new PrintWriter(stringWriter);
413 //    e.printStackTrace(printWriter);
414 //    return stringWriter.toString();
getStackTrace(jthrowable exception) const415 std::string ExceptionPrinter::getStackTrace(jthrowable exception) const {
416   auto &env = jvm_.GetEnv();
417   if (exception == nullptr) {
418     return "";
419   }
420 
421   auto string_writer =
422       env.NewObject(string_writer_class_, string_writer_constructor_);
423   if (string_writer == nullptr) {
424     env.ExceptionDescribe();
425     return "";
426   }
427   auto print_writer = env.NewObject(print_writer_class_,
428                                     print_writer_constructor_, string_writer);
429   if (print_writer == nullptr) {
430     env.ExceptionDescribe();
431     return "";
432   }
433 
434   env.CallVoidMethod(exception, print_stack_trace_method_, print_writer);
435   env.DeleteLocalRef(print_writer);
436   if (env.ExceptionCheck()) {
437     env.ExceptionDescribe();
438     return "";
439   }
440   auto exception_string_object = reinterpret_cast<jstring>(
441       env.CallObjectMethod(string_writer, string_writer_to_string_method_));
442   env.DeleteLocalRef(string_writer);
443   if (env.ExceptionCheck()) {
444     env.ExceptionDescribe();
445     return "";
446   }
447 
448   auto char_pointer = env.GetStringUTFChars(exception_string_object, nullptr);
449   std::string exception_string(char_pointer);
450   env.ReleaseStringUTFChars(exception_string_object, char_pointer);
451   env.DeleteLocalRef(exception_string_object);
452   return exception_string;
453 }
454 
preprocessException(jthrowable exception) const455 jthrowable ExceptionPrinter::preprocessException(jthrowable exception) const {
456   if (exception == nullptr) return nullptr;
457   auto &env = jvm_.GetEnv();
458   if (!FLAGS_hooks || !preprocess_throwable_method_) return exception;
459   auto processed_exception = (jthrowable)(env.CallStaticObjectMethod(
460       exception_utils_, preprocess_throwable_method_, exception));
461   if (env.ExceptionCheck()) {
462     env.ExceptionDescribe();
463     return exception;
464   }
465   return processed_exception;
466 }
467 
computeDedupToken(jthrowable exception) const468 jlong ExceptionPrinter::computeDedupToken(jthrowable exception) const {
469   auto &env = jvm_.GetEnv();
470   if (!FLAGS_hooks || exception == nullptr ||
471       compute_dedup_token_method_ == nullptr)
472     return 0;
473   const auto dedup_token = env.CallStaticLongMethod(
474       exception_utils_, compute_dedup_token_method_, exception);
475   if (env.ExceptionCheck()) {
476     env.ExceptionDescribe();
477     return 0;
478   }
479   return dedup_token;
480 }
481 
482 }  // namespace jazzer
483