1#!/usr/bin/env python 2 3# ################################################################ 4# Copyright (c) Facebook, Inc. 5# All rights reserved. 6# 7# This source code is licensed under both the BSD-style license (found in the 8# LICENSE file in the root directory of this source tree) and the GPLv2 (found 9# in the COPYING file in the root directory of this source tree). 10# You may select, at your option, one of the above-listed licenses. 11# ########################################################################## 12 13import argparse 14import contextlib 15import os 16import re 17import shlex 18import shutil 19import subprocess 20import sys 21import tempfile 22 23 24def abs_join(a, *p): 25 return os.path.abspath(os.path.join(a, *p)) 26 27 28class InputType(object): 29 RAW_DATA = 1 30 COMPRESSED_DATA = 2 31 DICTIONARY_DATA = 3 32 33 34class FrameType(object): 35 ZSTD = 1 36 BLOCK = 2 37 38 39class TargetInfo(object): 40 def __init__(self, input_type, frame_type=FrameType.ZSTD): 41 self.input_type = input_type 42 self.frame_type = frame_type 43 44 45# Constants 46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) 47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') 48TARGET_INFO = { 49 'simple_round_trip': TargetInfo(InputType.RAW_DATA), 50 'stream_round_trip': TargetInfo(InputType.RAW_DATA), 51 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), 52 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), 53 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), 54 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), 55 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 56 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), 57 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), 58 'simple_compress': TargetInfo(InputType.RAW_DATA), 59 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), 60 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 61 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), 62 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), 63 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), 64 'sequence_compression_api': TargetInfo(InputType.RAW_DATA), 65 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA), 66 'huf_round_trip': TargetInfo(InputType.RAW_DATA), 67 'huf_decompress': TargetInfo(InputType.RAW_DATA), 68} 69TARGETS = list(TARGET_INFO.keys()) 70ALL_TARGETS = TARGETS + ['all'] 71FUZZ_RNG_SEED_SIZE = 4 72 73# Standard environment variables 74CC = os.environ.get('CC', 'cc') 75CXX = os.environ.get('CXX', 'c++') 76CPPFLAGS = os.environ.get('CPPFLAGS', '') 77CFLAGS = os.environ.get('CFLAGS', '-O3') 78CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) 79LDFLAGS = os.environ.get('LDFLAGS', '') 80MFLAGS = os.environ.get('MFLAGS', '-j') 81 82# Fuzzing environment variables 83LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') 84AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') 85DECODECORPUS = os.environ.get('DECODECORPUS', 86 abs_join(FUZZ_DIR, '..', 'decodecorpus')) 87ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) 88 89# Sanitizer environment variables 90MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') 91MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') 92MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') 93MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') 94 95 96def create(r): 97 d = os.path.abspath(r) 98 if not os.path.isdir(d): 99 os.makedirs(d) 100 return d 101 102 103def check(r): 104 d = os.path.abspath(r) 105 if not os.path.isdir(d): 106 return None 107 return d 108 109 110@contextlib.contextmanager 111def tmpdir(): 112 dirpath = tempfile.mkdtemp() 113 try: 114 yield dirpath 115 finally: 116 shutil.rmtree(dirpath, ignore_errors=True) 117 118 119def parse_targets(in_targets): 120 targets = set() 121 for target in in_targets: 122 if not target: 123 continue 124 if target == 'all': 125 targets = targets.union(TARGETS) 126 elif target in TARGETS: 127 targets.add(target) 128 else: 129 raise RuntimeError('{} is not a valid target'.format(target)) 130 return list(targets) 131 132 133def targets_parser(args, description): 134 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 135 parser.add_argument( 136 'TARGET', 137 nargs='*', 138 type=str, 139 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) 140 args, extra = parser.parse_known_args(args) 141 args.extra = extra 142 143 args.TARGET = parse_targets(args.TARGET) 144 145 return args 146 147 148def parse_env_flags(args, flags): 149 """ 150 Look for flags set by environment variables. 151 """ 152 san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) 153 nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) 154 155 def set_sanitizer(sanitizer, default, san, nosan): 156 if sanitizer in san and sanitizer in nosan: 157 raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. 158 format(s=sanitizer)) 159 if sanitizer in san: 160 return True 161 if sanitizer in nosan: 162 return False 163 return default 164 165 san = set(san_flags.split(',')) 166 nosan = set(nosan_flags.split(',')) 167 168 args.asan = set_sanitizer('address', args.asan, san, nosan) 169 args.msan = set_sanitizer('memory', args.msan, san, nosan) 170 args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) 171 172 args.sanitize = args.asan or args.msan or args.ubsan 173 174 return args 175 176 177def compiler_version(cc, cxx): 178 """ 179 Determines the compiler and version. 180 Only works for clang and gcc. 181 """ 182 cc_version_bytes = subprocess.check_output([cc, "--version"]) 183 cxx_version_bytes = subprocess.check_output([cxx, "--version"]) 184 compiler = None 185 version = None 186 print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii'))) 187 if b'clang' in cc_version_bytes: 188 assert(b'clang' in cxx_version_bytes) 189 compiler = 'clang' 190 elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes: 191 assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) 192 compiler = 'gcc' 193 if compiler is not None: 194 version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)' 195 version_match = re.search(version_regex, cc_version_bytes) 196 version = tuple(int(version_match.group(i)) for i in range(1, 4)) 197 return compiler, version 198 199 200def overflow_ubsan_flags(cc, cxx): 201 compiler, version = compiler_version(cc, cxx) 202 if compiler == 'gcc' and version < (8, 0, 0): 203 return ['-fno-sanitize=signed-integer-overflow'] 204 if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)): 205 return ['-fno-sanitize=pointer-overflow'] 206 return [] 207 208 209def build_parser(args): 210 description = """ 211 Cleans the repository and builds a fuzz target (or all). 212 Many flags default to environment variables (default says $X='y'). 213 Options that aren't enabling features default to the correct values for 214 zstd. 215 Enable sanitizers with --enable-*san. 216 For regression testing just build. 217 For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. 218 For AFL set CC and CXX to AFL's compilers and set 219 LIB_FUZZING_ENGINE='libregression.a'. 220 """ 221 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 222 parser.add_argument( 223 '--lib-fuzzing-engine', 224 dest='lib_fuzzing_engine', 225 type=str, 226 default=LIB_FUZZING_ENGINE, 227 help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' 228 "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) 229 230 fuzz_group = parser.add_mutually_exclusive_group() 231 fuzz_group.add_argument( 232 '--enable-coverage', 233 dest='coverage', 234 action='store_true', 235 help='Enable coverage instrumentation (-fsanitize-coverage)') 236 fuzz_group.add_argument( 237 '--enable-fuzzer', 238 dest='fuzzer', 239 action='store_true', 240 help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' 241 'LIB_FUZZING_ENGINE is ignored') 242 ) 243 244 parser.add_argument( 245 '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') 246 parser.add_argument( 247 '--enable-ubsan', 248 dest='ubsan', 249 action='store_true', 250 help='Enable UBSAN') 251 parser.add_argument( 252 '--enable-ubsan-pointer-overflow', 253 dest='ubsan_pointer_overflow', 254 action='store_true', 255 help='Enable UBSAN pointer overflow check (known failure)') 256 parser.add_argument( 257 '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') 258 parser.add_argument( 259 '--enable-msan-track-origins', dest='msan_track_origins', 260 action='store_true', help='Enable MSAN origin tracking') 261 parser.add_argument( 262 '--msan-extra-cppflags', 263 dest='msan_extra_cppflags', 264 type=str, 265 default=MSAN_EXTRA_CPPFLAGS, 266 help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". 267 format(MSAN_EXTRA_CPPFLAGS)) 268 parser.add_argument( 269 '--msan-extra-cflags', 270 dest='msan_extra_cflags', 271 type=str, 272 default=MSAN_EXTRA_CFLAGS, 273 help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( 274 MSAN_EXTRA_CFLAGS)) 275 parser.add_argument( 276 '--msan-extra-cxxflags', 277 dest='msan_extra_cxxflags', 278 type=str, 279 default=MSAN_EXTRA_CXXFLAGS, 280 help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". 281 format(MSAN_EXTRA_CXXFLAGS)) 282 parser.add_argument( 283 '--msan-extra-ldflags', 284 dest='msan_extra_ldflags', 285 type=str, 286 default=MSAN_EXTRA_LDFLAGS, 287 help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". 288 format(MSAN_EXTRA_LDFLAGS)) 289 parser.add_argument( 290 '--enable-sanitize-recover', 291 dest='sanitize_recover', 292 action='store_true', 293 help='Non-fatal sanitizer errors where possible') 294 parser.add_argument( 295 '--debug', 296 dest='debug', 297 type=int, 298 default=1, 299 help='Set DEBUGLEVEL (default: 1)') 300 parser.add_argument( 301 '--force-memory-access', 302 dest='memory_access', 303 type=int, 304 default=0, 305 help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') 306 parser.add_argument( 307 '--fuzz-rng-seed-size', 308 dest='fuzz_rng_seed_size', 309 type=int, 310 default=4, 311 help='Set FUZZ_RNG_SEED_SIZE (default: 4)') 312 parser.add_argument( 313 '--disable-fuzzing-mode', 314 dest='fuzzing_mode', 315 action='store_false', 316 help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') 317 parser.add_argument( 318 '--enable-stateful-fuzzing', 319 dest='stateful_fuzzing', 320 action='store_true', 321 help='Reuse contexts between runs (makes reproduction impossible)') 322 parser.add_argument( 323 '--cc', 324 dest='cc', 325 type=str, 326 default=CC, 327 help="CC (default: $CC='{}')".format(CC)) 328 parser.add_argument( 329 '--cxx', 330 dest='cxx', 331 type=str, 332 default=CXX, 333 help="CXX (default: $CXX='{}')".format(CXX)) 334 parser.add_argument( 335 '--cppflags', 336 dest='cppflags', 337 type=str, 338 default=CPPFLAGS, 339 help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) 340 parser.add_argument( 341 '--cflags', 342 dest='cflags', 343 type=str, 344 default=CFLAGS, 345 help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) 346 parser.add_argument( 347 '--cxxflags', 348 dest='cxxflags', 349 type=str, 350 default=CXXFLAGS, 351 help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) 352 parser.add_argument( 353 '--ldflags', 354 dest='ldflags', 355 type=str, 356 default=LDFLAGS, 357 help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) 358 parser.add_argument( 359 '--mflags', 360 dest='mflags', 361 type=str, 362 default=MFLAGS, 363 help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) 364 parser.add_argument( 365 'TARGET', 366 nargs='*', 367 type=str, 368 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) 369 ) 370 args = parser.parse_args(args) 371 args = parse_env_flags(args, ' '.join( 372 [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) 373 374 # Check option sanity 375 if args.msan and (args.asan or args.ubsan): 376 raise RuntimeError('MSAN may not be used with any other sanitizers') 377 if args.msan_track_origins and not args.msan: 378 raise RuntimeError('--enable-msan-track-origins requires MSAN') 379 if args.ubsan_pointer_overflow and not args.ubsan: 380 raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN') 381 if args.sanitize_recover and not args.sanitize: 382 raise RuntimeError('--enable-sanitize-recover but no sanitizers used') 383 384 return args 385 386 387def build(args): 388 try: 389 args = build_parser(args) 390 except Exception as e: 391 print(e) 392 return 1 393 # The compilation flags we are setting 394 targets = args.TARGET 395 cc = args.cc 396 cxx = args.cxx 397 cppflags = shlex.split(args.cppflags) 398 cflags = shlex.split(args.cflags) 399 ldflags = shlex.split(args.ldflags) 400 cxxflags = shlex.split(args.cxxflags) 401 mflags = shlex.split(args.mflags) 402 # Flags to be added to both cflags and cxxflags 403 common_flags = [] 404 405 cppflags += [ 406 '-DDEBUGLEVEL={}'.format(args.debug), 407 '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), 408 '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), 409 ] 410 411 # Set flags for options 412 assert not (args.fuzzer and args.coverage) 413 if args.coverage: 414 common_flags += [ 415 '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' 416 ] 417 if args.fuzzer: 418 common_flags += ['-fsanitize=fuzzer'] 419 args.lib_fuzzing_engine = '' 420 421 mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] 422 423 if args.sanitize_recover: 424 recover_flags = ['-fsanitize-recover=all'] 425 else: 426 recover_flags = ['-fno-sanitize-recover=all'] 427 if args.sanitize: 428 common_flags += recover_flags 429 430 if args.msan: 431 msan_flags = ['-fsanitize=memory'] 432 if args.msan_track_origins: 433 msan_flags += ['-fsanitize-memory-track-origins'] 434 common_flags += msan_flags 435 # Append extra MSAN flags (it might require special setup) 436 cppflags += [args.msan_extra_cppflags] 437 cflags += [args.msan_extra_cflags] 438 cxxflags += [args.msan_extra_cxxflags] 439 ldflags += [args.msan_extra_ldflags] 440 441 if args.asan: 442 common_flags += ['-fsanitize=address'] 443 444 if args.ubsan: 445 ubsan_flags = ['-fsanitize=undefined'] 446 if not args.ubsan_pointer_overflow: 447 ubsan_flags += overflow_ubsan_flags(cc, cxx) 448 common_flags += ubsan_flags 449 450 if args.stateful_fuzzing: 451 cppflags += ['-DSTATEFUL_FUZZING'] 452 453 if args.fuzzing_mode: 454 cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] 455 456 if args.lib_fuzzing_engine == 'libregression.a': 457 targets = ['libregression.a'] + targets 458 459 # Append the common flags 460 cflags += common_flags 461 cxxflags += common_flags 462 463 # Prepare the flags for Make 464 cc_str = "CC={}".format(cc) 465 cxx_str = "CXX={}".format(cxx) 466 cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) 467 cflags_str = "CFLAGS={}".format(' '.join(cflags)) 468 cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) 469 ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) 470 471 # Print the flags 472 print('MFLAGS={}'.format(' '.join(mflags))) 473 print(cc_str) 474 print(cxx_str) 475 print(cppflags_str) 476 print(cflags_str) 477 print(cxxflags_str) 478 print(ldflags_str) 479 480 # Clean and build 481 clean_cmd = ['make', 'clean'] + mflags 482 print(' '.join(clean_cmd)) 483 subprocess.check_call(clean_cmd) 484 build_cmd = [ 485 'make', 486 cc_str, 487 cxx_str, 488 cppflags_str, 489 cflags_str, 490 cxxflags_str, 491 ldflags_str, 492 ] + mflags + targets 493 print(' '.join(build_cmd)) 494 subprocess.check_call(build_cmd) 495 return 0 496 497 498def libfuzzer_parser(args): 499 description = """ 500 Runs a libfuzzer binary. 501 Passes all extra arguments to libfuzzer. 502 The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to 503 libFuzzer.a. 504 Generates output in the CORPORA directory, puts crashes in the ARTIFACT 505 directory, and takes extra input from the SEED directory. 506 To merge AFL's output pass the SEED as AFL's output directory and pass 507 '-merge=1'. 508 """ 509 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 510 parser.add_argument( 511 '--corpora', 512 type=str, 513 help='Override the default corpora dir (default: {})'.format( 514 abs_join(CORPORA_DIR, 'TARGET'))) 515 parser.add_argument( 516 '--artifact', 517 type=str, 518 help='Override the default artifact dir (default: {})'.format( 519 abs_join(CORPORA_DIR, 'TARGET-crash'))) 520 parser.add_argument( 521 '--seed', 522 type=str, 523 help='Override the default seed dir (default: {})'.format( 524 abs_join(CORPORA_DIR, 'TARGET-seed'))) 525 parser.add_argument( 526 'TARGET', 527 type=str, 528 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 529 args, extra = parser.parse_known_args(args) 530 args.extra = extra 531 532 if args.TARGET and args.TARGET not in TARGETS: 533 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 534 535 return args 536 537 538def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): 539 if corpora is None: 540 corpora = abs_join(CORPORA_DIR, target) 541 if artifact is None: 542 artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 543 if seed is None: 544 seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) 545 if extra_args is None: 546 extra_args = [] 547 548 target = abs_join(FUZZ_DIR, target) 549 550 corpora = [create(corpora)] 551 artifact = create(artifact) 552 seed = check(seed) 553 554 corpora += [artifact] 555 if seed is not None: 556 corpora += [seed] 557 558 cmd = [target, '-artifact_prefix={}/'.format(artifact)] 559 cmd += corpora + extra_args 560 print(' '.join(cmd)) 561 subprocess.check_call(cmd) 562 563 564def libfuzzer_cmd(args): 565 try: 566 args = libfuzzer_parser(args) 567 except Exception as e: 568 print(e) 569 return 1 570 libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) 571 return 0 572 573 574def afl_parser(args): 575 description = """ 576 Runs an afl-fuzz job. 577 Passes all extra arguments to afl-fuzz. 578 The fuzzer should have been built with CC/CXX set to the AFL compilers, 579 and with LIB_FUZZING_ENGINE='libregression.a'. 580 Takes input from CORPORA and writes output to OUTPUT. 581 Uses AFL_FUZZ as the binary (set from flag or environment variable). 582 """ 583 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 584 parser.add_argument( 585 '--corpora', 586 type=str, 587 help='Override the default corpora dir (default: {})'.format( 588 abs_join(CORPORA_DIR, 'TARGET'))) 589 parser.add_argument( 590 '--output', 591 type=str, 592 help='Override the default AFL output dir (default: {})'.format( 593 abs_join(CORPORA_DIR, 'TARGET-afl'))) 594 parser.add_argument( 595 '--afl-fuzz', 596 type=str, 597 default=AFL_FUZZ, 598 help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) 599 parser.add_argument( 600 'TARGET', 601 type=str, 602 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 603 args, extra = parser.parse_known_args(args) 604 args.extra = extra 605 606 if args.TARGET and args.TARGET not in TARGETS: 607 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 608 609 if not args.corpora: 610 args.corpora = abs_join(CORPORA_DIR, args.TARGET) 611 if not args.output: 612 args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) 613 614 return args 615 616 617def afl(args): 618 try: 619 args = afl_parser(args) 620 except Exception as e: 621 print(e) 622 return 1 623 target = abs_join(FUZZ_DIR, args.TARGET) 624 625 corpora = create(args.corpora) 626 output = create(args.output) 627 628 cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra 629 cmd += [target, '@@'] 630 print(' '.join(cmd)) 631 subprocess.call(cmd) 632 return 0 633 634 635def regression(args): 636 try: 637 description = """ 638 Runs one or more regression tests. 639 The fuzzer should have been built with with 640 LIB_FUZZING_ENGINE='libregression.a'. 641 Takes input from CORPORA. 642 """ 643 args = targets_parser(args, description) 644 except Exception as e: 645 print(e) 646 return 1 647 for target in args.TARGET: 648 corpora = create(abs_join(CORPORA_DIR, target)) 649 target = abs_join(FUZZ_DIR, target) 650 cmd = [target, corpora] 651 print(' '.join(cmd)) 652 subprocess.check_call(cmd) 653 return 0 654 655 656def gen_parser(args): 657 description = """ 658 Generate a seed corpus appropriate for TARGET with data generated with 659 decodecorpus. 660 The fuzz inputs are prepended with a seed before the zstd data, so the 661 output of decodecorpus shouldn't be used directly. 662 Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and 663 puts the output in SEED. 664 DECODECORPUS is the decodecorpus binary, and must already be built. 665 """ 666 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 667 parser.add_argument( 668 '--number', 669 '-n', 670 type=int, 671 default=100, 672 help='Number of samples to generate') 673 parser.add_argument( 674 '--max-size-log', 675 type=int, 676 default=18, 677 help='Maximum sample size to generate') 678 parser.add_argument( 679 '--seed', 680 type=str, 681 help='Override the default seed dir (default: {})'.format( 682 abs_join(CORPORA_DIR, 'TARGET-seed'))) 683 parser.add_argument( 684 '--decodecorpus', 685 type=str, 686 default=DECODECORPUS, 687 help="decodecorpus binary (default: $DECODECORPUS='{}')".format( 688 DECODECORPUS)) 689 parser.add_argument( 690 '--zstd', 691 type=str, 692 default=ZSTD, 693 help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) 694 parser.add_argument( 695 '--fuzz-rng-seed-size', 696 type=int, 697 default=4, 698 help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" 699 ) 700 parser.add_argument( 701 'TARGET', 702 type=str, 703 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 704 args, extra = parser.parse_known_args(args) 705 args.extra = extra 706 707 if args.TARGET and args.TARGET not in TARGETS: 708 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 709 710 if not args.seed: 711 args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) 712 713 if not os.path.isfile(args.decodecorpus): 714 raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". 715 format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) 716 717 return args 718 719 720def gen(args): 721 try: 722 args = gen_parser(args) 723 except Exception as e: 724 print(e) 725 return 1 726 727 seed = create(args.seed) 728 with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: 729 info = TARGET_INFO[args.TARGET] 730 731 if info.input_type == InputType.DICTIONARY_DATA: 732 number = max(args.number, 1000) 733 else: 734 number = args.number 735 cmd = [ 736 args.decodecorpus, 737 '-n{}'.format(args.number), 738 '-p{}/'.format(compressed), 739 '-o{}'.format(decompressed), 740 ] 741 742 if info.frame_type == FrameType.BLOCK: 743 cmd += [ 744 '--gen-blocks', 745 '--max-block-size-log={}'.format(min(args.max_size_log, 17)) 746 ] 747 else: 748 cmd += ['--max-content-size-log={}'.format(args.max_size_log)] 749 750 print(' '.join(cmd)) 751 subprocess.check_call(cmd) 752 753 if info.input_type == InputType.RAW_DATA: 754 print('using decompressed data in {}'.format(decompressed)) 755 samples = decompressed 756 elif info.input_type == InputType.COMPRESSED_DATA: 757 print('using compressed data in {}'.format(compressed)) 758 samples = compressed 759 else: 760 assert info.input_type == InputType.DICTIONARY_DATA 761 print('making dictionary data from {}'.format(decompressed)) 762 samples = dict 763 min_dict_size_log = 9 764 max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) 765 for dict_size_log in range(min_dict_size_log, max_dict_size_log): 766 dict_size = 1 << dict_size_log 767 cmd = [ 768 args.zstd, 769 '--train', 770 '-r', decompressed, 771 '--maxdict={}'.format(dict_size), 772 '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) 773 ] 774 print(' '.join(cmd)) 775 subprocess.check_call(cmd) 776 777 # Copy the samples over and prepend the RNG seeds 778 for name in os.listdir(samples): 779 samplename = abs_join(samples, name) 780 outname = abs_join(seed, name) 781 with open(samplename, 'rb') as sample: 782 with open(outname, 'wb') as out: 783 CHUNK_SIZE = 131072 784 chunk = sample.read(CHUNK_SIZE) 785 while len(chunk) > 0: 786 out.write(chunk) 787 chunk = sample.read(CHUNK_SIZE) 788 return 0 789 790 791def minimize(args): 792 try: 793 description = """ 794 Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in 795 TARGET_seed_corpus. All extra args are passed to libfuzzer. 796 """ 797 args = targets_parser(args, description) 798 except Exception as e: 799 print(e) 800 return 1 801 802 for target in args.TARGET: 803 # Merge the corpus + anything else into the seed_corpus 804 corpus = abs_join(CORPORA_DIR, target) 805 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 806 extra_args = [corpus, "-merge=1"] + args.extra 807 libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) 808 seeds = set(os.listdir(seed_corpus)) 809 # Copy all crashes directly into the seed_corpus if not already present 810 crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 811 for crash in os.listdir(crashes): 812 if crash not in seeds: 813 shutil.copy(abs_join(crashes, crash), seed_corpus) 814 seeds.add(crash) 815 816 817def zip_cmd(args): 818 try: 819 description = """ 820 Zips up the seed corpus. 821 """ 822 args = targets_parser(args, description) 823 except Exception as e: 824 print(e) 825 return 1 826 827 for target in args.TARGET: 828 # Zip the seed_corpus 829 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 830 zip_file = "{}.zip".format(seed_corpus) 831 cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] 832 print(' '.join(cmd)) 833 subprocess.check_call(cmd, cwd=seed_corpus) 834 835 836def list_cmd(args): 837 print("\n".join(TARGETS)) 838 839 840def short_help(args): 841 name = args[0] 842 print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) 843 844 845def help(args): 846 short_help(args) 847 print("\tfuzzing helpers (select a command and pass -h for help)\n") 848 print("Options:") 849 print("\t-h, --help\tPrint this message") 850 print("") 851 print("Commands:") 852 print("\tbuild\t\tBuild a fuzzer") 853 print("\tlibfuzzer\tRun a libFuzzer fuzzer") 854 print("\tafl\t\tRun an AFL fuzzer") 855 print("\tregression\tRun a regression test") 856 print("\tgen\t\tGenerate a seed corpus for a fuzzer") 857 print("\tminimize\tMinimize the test corpora") 858 print("\tzip\t\tZip the minimized corpora up") 859 print("\tlist\t\tList the available targets") 860 861 862def main(): 863 args = sys.argv 864 if len(args) < 2: 865 help(args) 866 return 1 867 if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': 868 help(args) 869 return 1 870 command = args.pop(1) 871 args[0] = "{} {}".format(args[0], command) 872 if command == "build": 873 return build(args) 874 if command == "libfuzzer": 875 return libfuzzer_cmd(args) 876 if command == "regression": 877 return regression(args) 878 if command == "afl": 879 return afl(args) 880 if command == "gen": 881 return gen(args) 882 if command == "minimize": 883 return minimize(args) 884 if command == "zip": 885 return zip_cmd(args) 886 if command == "list": 887 return list_cmd(args) 888 short_help(args) 889 print("Error: No such command {} (pass -h for help)".format(command)) 890 return 1 891 892 893if __name__ == "__main__": 894 sys.exit(main()) 895