1#!/usr/bin/env python 2 3# ################################################################ 4# Copyright (c) 2016-2020, Facebook, Inc. 5# All rights reserved. 6# 7# This source code is licensed under both the BSD-style license (found in the 8# LICENSE file in the root directory of this source tree) and the GPLv2 (found 9# in the COPYING file in the root directory of this source tree). 10# You may select, at your option, one of the above-listed licenses. 11# ########################################################################## 12 13import argparse 14import contextlib 15import os 16import re 17import shlex 18import shutil 19import subprocess 20import sys 21import tempfile 22 23 24def abs_join(a, *p): 25 return os.path.abspath(os.path.join(a, *p)) 26 27 28class InputType(object): 29 RAW_DATA = 1 30 COMPRESSED_DATA = 2 31 DICTIONARY_DATA = 3 32 33 34class FrameType(object): 35 ZSTD = 1 36 BLOCK = 2 37 38 39class TargetInfo(object): 40 def __init__(self, input_type, frame_type=FrameType.ZSTD): 41 self.input_type = input_type 42 self.frame_type = frame_type 43 44 45# Constants 46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) 47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') 48TARGET_INFO = { 49 'simple_round_trip': TargetInfo(InputType.RAW_DATA), 50 'stream_round_trip': TargetInfo(InputType.RAW_DATA), 51 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), 52 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), 53 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), 54 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), 55 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 56 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), 57 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), 58 'simple_compress': TargetInfo(InputType.RAW_DATA), 59 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), 60 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 61 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), 62 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), 63 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), 64 'sequence_compression_api': TargetInfo(InputType.RAW_DATA), 65} 66TARGETS = list(TARGET_INFO.keys()) 67ALL_TARGETS = TARGETS + ['all'] 68FUZZ_RNG_SEED_SIZE = 4 69 70# Standard environment variables 71CC = os.environ.get('CC', 'cc') 72CXX = os.environ.get('CXX', 'c++') 73CPPFLAGS = os.environ.get('CPPFLAGS', '') 74CFLAGS = os.environ.get('CFLAGS', '-O3') 75CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) 76LDFLAGS = os.environ.get('LDFLAGS', '') 77MFLAGS = os.environ.get('MFLAGS', '-j') 78 79# Fuzzing environment variables 80LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') 81AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') 82DECODECORPUS = os.environ.get('DECODECORPUS', 83 abs_join(FUZZ_DIR, '..', 'decodecorpus')) 84ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) 85 86# Sanitizer environment variables 87MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') 88MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') 89MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') 90MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') 91 92 93def create(r): 94 d = os.path.abspath(r) 95 if not os.path.isdir(d): 96 os.makedirs(d) 97 return d 98 99 100def check(r): 101 d = os.path.abspath(r) 102 if not os.path.isdir(d): 103 return None 104 return d 105 106 107@contextlib.contextmanager 108def tmpdir(): 109 dirpath = tempfile.mkdtemp() 110 try: 111 yield dirpath 112 finally: 113 shutil.rmtree(dirpath, ignore_errors=True) 114 115 116def parse_targets(in_targets): 117 targets = set() 118 for target in in_targets: 119 if not target: 120 continue 121 if target == 'all': 122 targets = targets.union(TARGETS) 123 elif target in TARGETS: 124 targets.add(target) 125 else: 126 raise RuntimeError('{} is not a valid target'.format(target)) 127 return list(targets) 128 129 130def targets_parser(args, description): 131 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 132 parser.add_argument( 133 'TARGET', 134 nargs='*', 135 type=str, 136 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) 137 args, extra = parser.parse_known_args(args) 138 args.extra = extra 139 140 args.TARGET = parse_targets(args.TARGET) 141 142 return args 143 144 145def parse_env_flags(args, flags): 146 """ 147 Look for flags set by environment variables. 148 """ 149 san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) 150 nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) 151 152 def set_sanitizer(sanitizer, default, san, nosan): 153 if sanitizer in san and sanitizer in nosan: 154 raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. 155 format(s=sanitizer)) 156 if sanitizer in san: 157 return True 158 if sanitizer in nosan: 159 return False 160 return default 161 162 san = set(san_flags.split(',')) 163 nosan = set(nosan_flags.split(',')) 164 165 args.asan = set_sanitizer('address', args.asan, san, nosan) 166 args.msan = set_sanitizer('memory', args.msan, san, nosan) 167 args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) 168 169 args.sanitize = args.asan or args.msan or args.ubsan 170 171 return args 172 173 174def compiler_version(cc, cxx): 175 """ 176 Determines the compiler and version. 177 Only works for clang and gcc. 178 """ 179 cc_version_bytes = subprocess.check_output([cc, "--version"]) 180 cxx_version_bytes = subprocess.check_output([cxx, "--version"]) 181 compiler = None 182 version = None 183 if b'clang' in cc_version_bytes: 184 assert(b'clang' in cxx_version_bytes) 185 compiler = 'clang' 186 elif b'gcc' in cc_version_bytes: 187 assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) 188 compiler = 'gcc' 189 if compiler is not None: 190 version_regex = b'([0-9])+\.([0-9])+\.([0-9])+' 191 version_match = re.search(version_regex, cc_version_bytes) 192 version = tuple(int(version_match.group(i)) for i in range(1, 4)) 193 return compiler, version 194 195 196def overflow_ubsan_flags(cc, cxx): 197 compiler, version = compiler_version(cc, cxx) 198 if compiler == 'gcc': 199 return ['-fno-sanitize=signed-integer-overflow'] 200 if compiler == 'clang' and version >= (5, 0, 0): 201 return ['-fno-sanitize=pointer-overflow'] 202 return [] 203 204 205def build_parser(args): 206 description = """ 207 Cleans the repository and builds a fuzz target (or all). 208 Many flags default to environment variables (default says $X='y'). 209 Options that aren't enabling features default to the correct values for 210 zstd. 211 Enable sanitizers with --enable-*san. 212 For regression testing just build. 213 For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. 214 For AFL set CC and CXX to AFL's compilers and set 215 LIB_FUZZING_ENGINE='libregression.a'. 216 """ 217 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 218 parser.add_argument( 219 '--lib-fuzzing-engine', 220 dest='lib_fuzzing_engine', 221 type=str, 222 default=LIB_FUZZING_ENGINE, 223 help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' 224 "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) 225 226 fuzz_group = parser.add_mutually_exclusive_group() 227 fuzz_group.add_argument( 228 '--enable-coverage', 229 dest='coverage', 230 action='store_true', 231 help='Enable coverage instrumentation (-fsanitize-coverage)') 232 fuzz_group.add_argument( 233 '--enable-fuzzer', 234 dest='fuzzer', 235 action='store_true', 236 help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' 237 'LIB_FUZZING_ENGINE is ignored') 238 ) 239 240 parser.add_argument( 241 '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') 242 parser.add_argument( 243 '--enable-ubsan', 244 dest='ubsan', 245 action='store_true', 246 help='Enable UBSAN') 247 parser.add_argument( 248 '--enable-ubsan-pointer-overflow', 249 dest='ubsan_pointer_overflow', 250 action='store_true', 251 help='Enable UBSAN pointer overflow check (known failure)') 252 parser.add_argument( 253 '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') 254 parser.add_argument( 255 '--enable-msan-track-origins', dest='msan_track_origins', 256 action='store_true', help='Enable MSAN origin tracking') 257 parser.add_argument( 258 '--msan-extra-cppflags', 259 dest='msan_extra_cppflags', 260 type=str, 261 default=MSAN_EXTRA_CPPFLAGS, 262 help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". 263 format(MSAN_EXTRA_CPPFLAGS)) 264 parser.add_argument( 265 '--msan-extra-cflags', 266 dest='msan_extra_cflags', 267 type=str, 268 default=MSAN_EXTRA_CFLAGS, 269 help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( 270 MSAN_EXTRA_CFLAGS)) 271 parser.add_argument( 272 '--msan-extra-cxxflags', 273 dest='msan_extra_cxxflags', 274 type=str, 275 default=MSAN_EXTRA_CXXFLAGS, 276 help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". 277 format(MSAN_EXTRA_CXXFLAGS)) 278 parser.add_argument( 279 '--msan-extra-ldflags', 280 dest='msan_extra_ldflags', 281 type=str, 282 default=MSAN_EXTRA_LDFLAGS, 283 help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". 284 format(MSAN_EXTRA_LDFLAGS)) 285 parser.add_argument( 286 '--enable-sanitize-recover', 287 dest='sanitize_recover', 288 action='store_true', 289 help='Non-fatal sanitizer errors where possible') 290 parser.add_argument( 291 '--debug', 292 dest='debug', 293 type=int, 294 default=1, 295 help='Set DEBUGLEVEL (default: 1)') 296 parser.add_argument( 297 '--force-memory-access', 298 dest='memory_access', 299 type=int, 300 default=0, 301 help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') 302 parser.add_argument( 303 '--fuzz-rng-seed-size', 304 dest='fuzz_rng_seed_size', 305 type=int, 306 default=4, 307 help='Set FUZZ_RNG_SEED_SIZE (default: 4)') 308 parser.add_argument( 309 '--disable-fuzzing-mode', 310 dest='fuzzing_mode', 311 action='store_false', 312 help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') 313 parser.add_argument( 314 '--enable-stateful-fuzzing', 315 dest='stateful_fuzzing', 316 action='store_true', 317 help='Reuse contexts between runs (makes reproduction impossible)') 318 parser.add_argument( 319 '--cc', 320 dest='cc', 321 type=str, 322 default=CC, 323 help="CC (default: $CC='{}')".format(CC)) 324 parser.add_argument( 325 '--cxx', 326 dest='cxx', 327 type=str, 328 default=CXX, 329 help="CXX (default: $CXX='{}')".format(CXX)) 330 parser.add_argument( 331 '--cppflags', 332 dest='cppflags', 333 type=str, 334 default=CPPFLAGS, 335 help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) 336 parser.add_argument( 337 '--cflags', 338 dest='cflags', 339 type=str, 340 default=CFLAGS, 341 help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) 342 parser.add_argument( 343 '--cxxflags', 344 dest='cxxflags', 345 type=str, 346 default=CXXFLAGS, 347 help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) 348 parser.add_argument( 349 '--ldflags', 350 dest='ldflags', 351 type=str, 352 default=LDFLAGS, 353 help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) 354 parser.add_argument( 355 '--mflags', 356 dest='mflags', 357 type=str, 358 default=MFLAGS, 359 help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) 360 parser.add_argument( 361 'TARGET', 362 nargs='*', 363 type=str, 364 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) 365 ) 366 args = parser.parse_args(args) 367 args = parse_env_flags(args, ' '.join( 368 [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) 369 370 # Check option sanity 371 if args.msan and (args.asan or args.ubsan): 372 raise RuntimeError('MSAN may not be used with any other sanitizers') 373 if args.msan_track_origins and not args.msan: 374 raise RuntimeError('--enable-msan-track-origins requires MSAN') 375 if args.ubsan_pointer_overflow and not args.ubsan: 376 raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN') 377 if args.sanitize_recover and not args.sanitize: 378 raise RuntimeError('--enable-sanitize-recover but no sanitizers used') 379 380 return args 381 382 383def build(args): 384 try: 385 args = build_parser(args) 386 except Exception as e: 387 print(e) 388 return 1 389 # The compilation flags we are setting 390 targets = args.TARGET 391 cc = args.cc 392 cxx = args.cxx 393 cppflags = shlex.split(args.cppflags) 394 cflags = shlex.split(args.cflags) 395 ldflags = shlex.split(args.ldflags) 396 cxxflags = shlex.split(args.cxxflags) 397 mflags = shlex.split(args.mflags) 398 # Flags to be added to both cflags and cxxflags 399 common_flags = [] 400 401 cppflags += [ 402 '-DDEBUGLEVEL={}'.format(args.debug), 403 '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), 404 '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), 405 ] 406 407 # Set flags for options 408 assert not (args.fuzzer and args.coverage) 409 if args.coverage: 410 common_flags += [ 411 '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' 412 ] 413 if args.fuzzer: 414 common_flags += ['-fsanitize=fuzzer'] 415 args.lib_fuzzing_engine = '' 416 417 mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] 418 419 if args.sanitize_recover: 420 recover_flags = ['-fsanitize-recover=all'] 421 else: 422 recover_flags = ['-fno-sanitize-recover=all'] 423 if args.sanitize: 424 common_flags += recover_flags 425 426 if args.msan: 427 msan_flags = ['-fsanitize=memory'] 428 if args.msan_track_origins: 429 msan_flags += ['-fsanitize-memory-track-origins'] 430 common_flags += msan_flags 431 # Append extra MSAN flags (it might require special setup) 432 cppflags += [args.msan_extra_cppflags] 433 cflags += [args.msan_extra_cflags] 434 cxxflags += [args.msan_extra_cxxflags] 435 ldflags += [args.msan_extra_ldflags] 436 437 if args.asan: 438 common_flags += ['-fsanitize=address'] 439 440 if args.ubsan: 441 ubsan_flags = ['-fsanitize=undefined'] 442 if not args.ubsan_pointer_overflow: 443 ubsan_flags += overflow_ubsan_flags(cc, cxx) 444 common_flags += ubsan_flags 445 446 if args.stateful_fuzzing: 447 cppflags += ['-DSTATEFUL_FUZZING'] 448 449 if args.fuzzing_mode: 450 cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] 451 452 if args.lib_fuzzing_engine == 'libregression.a': 453 targets = ['libregression.a'] + targets 454 455 # Append the common flags 456 cflags += common_flags 457 cxxflags += common_flags 458 459 # Prepare the flags for Make 460 cc_str = "CC={}".format(cc) 461 cxx_str = "CXX={}".format(cxx) 462 cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) 463 cflags_str = "CFLAGS={}".format(' '.join(cflags)) 464 cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) 465 ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) 466 467 # Print the flags 468 print('MFLAGS={}'.format(' '.join(mflags))) 469 print(cc_str) 470 print(cxx_str) 471 print(cppflags_str) 472 print(cflags_str) 473 print(cxxflags_str) 474 print(ldflags_str) 475 476 # Clean and build 477 clean_cmd = ['make', 'clean'] + mflags 478 print(' '.join(clean_cmd)) 479 subprocess.check_call(clean_cmd) 480 build_cmd = [ 481 'make', 482 cc_str, 483 cxx_str, 484 cppflags_str, 485 cflags_str, 486 cxxflags_str, 487 ldflags_str, 488 ] + mflags + targets 489 print(' '.join(build_cmd)) 490 subprocess.check_call(build_cmd) 491 return 0 492 493 494def libfuzzer_parser(args): 495 description = """ 496 Runs a libfuzzer binary. 497 Passes all extra arguments to libfuzzer. 498 The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to 499 libFuzzer.a. 500 Generates output in the CORPORA directory, puts crashes in the ARTIFACT 501 directory, and takes extra input from the SEED directory. 502 To merge AFL's output pass the SEED as AFL's output directory and pass 503 '-merge=1'. 504 """ 505 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 506 parser.add_argument( 507 '--corpora', 508 type=str, 509 help='Override the default corpora dir (default: {})'.format( 510 abs_join(CORPORA_DIR, 'TARGET'))) 511 parser.add_argument( 512 '--artifact', 513 type=str, 514 help='Override the default artifact dir (default: {})'.format( 515 abs_join(CORPORA_DIR, 'TARGET-crash'))) 516 parser.add_argument( 517 '--seed', 518 type=str, 519 help='Override the default seed dir (default: {})'.format( 520 abs_join(CORPORA_DIR, 'TARGET-seed'))) 521 parser.add_argument( 522 'TARGET', 523 type=str, 524 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 525 args, extra = parser.parse_known_args(args) 526 args.extra = extra 527 528 if args.TARGET and args.TARGET not in TARGETS: 529 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 530 531 return args 532 533 534def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): 535 if corpora is None: 536 corpora = abs_join(CORPORA_DIR, target) 537 if artifact is None: 538 artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 539 if seed is None: 540 seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) 541 if extra_args is None: 542 extra_args = [] 543 544 target = abs_join(FUZZ_DIR, target) 545 546 corpora = [create(corpora)] 547 artifact = create(artifact) 548 seed = check(seed) 549 550 corpora += [artifact] 551 if seed is not None: 552 corpora += [seed] 553 554 cmd = [target, '-artifact_prefix={}/'.format(artifact)] 555 cmd += corpora + extra_args 556 print(' '.join(cmd)) 557 subprocess.check_call(cmd) 558 559 560def libfuzzer_cmd(args): 561 try: 562 args = libfuzzer_parser(args) 563 except Exception as e: 564 print(e) 565 return 1 566 libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) 567 return 0 568 569 570def afl_parser(args): 571 description = """ 572 Runs an afl-fuzz job. 573 Passes all extra arguments to afl-fuzz. 574 The fuzzer should have been built with CC/CXX set to the AFL compilers, 575 and with LIB_FUZZING_ENGINE='libregression.a'. 576 Takes input from CORPORA and writes output to OUTPUT. 577 Uses AFL_FUZZ as the binary (set from flag or environment variable). 578 """ 579 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 580 parser.add_argument( 581 '--corpora', 582 type=str, 583 help='Override the default corpora dir (default: {})'.format( 584 abs_join(CORPORA_DIR, 'TARGET'))) 585 parser.add_argument( 586 '--output', 587 type=str, 588 help='Override the default AFL output dir (default: {})'.format( 589 abs_join(CORPORA_DIR, 'TARGET-afl'))) 590 parser.add_argument( 591 '--afl-fuzz', 592 type=str, 593 default=AFL_FUZZ, 594 help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) 595 parser.add_argument( 596 'TARGET', 597 type=str, 598 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 599 args, extra = parser.parse_known_args(args) 600 args.extra = extra 601 602 if args.TARGET and args.TARGET not in TARGETS: 603 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 604 605 if not args.corpora: 606 args.corpora = abs_join(CORPORA_DIR, args.TARGET) 607 if not args.output: 608 args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) 609 610 return args 611 612 613def afl(args): 614 try: 615 args = afl_parser(args) 616 except Exception as e: 617 print(e) 618 return 1 619 target = abs_join(FUZZ_DIR, args.TARGET) 620 621 corpora = create(args.corpora) 622 output = create(args.output) 623 624 cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra 625 cmd += [target, '@@'] 626 print(' '.join(cmd)) 627 subprocess.call(cmd) 628 return 0 629 630 631def regression(args): 632 try: 633 description = """ 634 Runs one or more regression tests. 635 The fuzzer should have been built with with 636 LIB_FUZZING_ENGINE='libregression.a'. 637 Takes input from CORPORA. 638 """ 639 args = targets_parser(args, description) 640 except Exception as e: 641 print(e) 642 return 1 643 for target in args.TARGET: 644 corpora = create(abs_join(CORPORA_DIR, target)) 645 target = abs_join(FUZZ_DIR, target) 646 cmd = [target, corpora] 647 print(' '.join(cmd)) 648 subprocess.check_call(cmd) 649 return 0 650 651 652def gen_parser(args): 653 description = """ 654 Generate a seed corpus appropriate for TARGET with data generated with 655 decodecorpus. 656 The fuzz inputs are prepended with a seed before the zstd data, so the 657 output of decodecorpus shouldn't be used directly. 658 Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and 659 puts the output in SEED. 660 DECODECORPUS is the decodecorpus binary, and must already be built. 661 """ 662 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 663 parser.add_argument( 664 '--number', 665 '-n', 666 type=int, 667 default=100, 668 help='Number of samples to generate') 669 parser.add_argument( 670 '--max-size-log', 671 type=int, 672 default=18, 673 help='Maximum sample size to generate') 674 parser.add_argument( 675 '--seed', 676 type=str, 677 help='Override the default seed dir (default: {})'.format( 678 abs_join(CORPORA_DIR, 'TARGET-seed'))) 679 parser.add_argument( 680 '--decodecorpus', 681 type=str, 682 default=DECODECORPUS, 683 help="decodecorpus binary (default: $DECODECORPUS='{}')".format( 684 DECODECORPUS)) 685 parser.add_argument( 686 '--zstd', 687 type=str, 688 default=ZSTD, 689 help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) 690 parser.add_argument( 691 '--fuzz-rng-seed-size', 692 type=int, 693 default=4, 694 help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" 695 ) 696 parser.add_argument( 697 'TARGET', 698 type=str, 699 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 700 args, extra = parser.parse_known_args(args) 701 args.extra = extra 702 703 if args.TARGET and args.TARGET not in TARGETS: 704 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 705 706 if not args.seed: 707 args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) 708 709 if not os.path.isfile(args.decodecorpus): 710 raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". 711 format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) 712 713 return args 714 715 716def gen(args): 717 try: 718 args = gen_parser(args) 719 except Exception as e: 720 print(e) 721 return 1 722 723 seed = create(args.seed) 724 with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: 725 info = TARGET_INFO[args.TARGET] 726 727 if info.input_type == InputType.DICTIONARY_DATA: 728 number = max(args.number, 1000) 729 else: 730 number = args.number 731 cmd = [ 732 args.decodecorpus, 733 '-n{}'.format(args.number), 734 '-p{}/'.format(compressed), 735 '-o{}'.format(decompressed), 736 ] 737 738 if info.frame_type == FrameType.BLOCK: 739 cmd += [ 740 '--gen-blocks', 741 '--max-block-size-log={}'.format(min(args.max_size_log, 17)) 742 ] 743 else: 744 cmd += ['--max-content-size-log={}'.format(args.max_size_log)] 745 746 print(' '.join(cmd)) 747 subprocess.check_call(cmd) 748 749 if info.input_type == InputType.RAW_DATA: 750 print('using decompressed data in {}'.format(decompressed)) 751 samples = decompressed 752 elif info.input_type == InputType.COMPRESSED_DATA: 753 print('using compressed data in {}'.format(compressed)) 754 samples = compressed 755 else: 756 assert info.input_type == InputType.DICTIONARY_DATA 757 print('making dictionary data from {}'.format(decompressed)) 758 samples = dict 759 min_dict_size_log = 9 760 max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) 761 for dict_size_log in range(min_dict_size_log, max_dict_size_log): 762 dict_size = 1 << dict_size_log 763 cmd = [ 764 args.zstd, 765 '--train', 766 '-r', decompressed, 767 '--maxdict={}'.format(dict_size), 768 '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) 769 ] 770 print(' '.join(cmd)) 771 subprocess.check_call(cmd) 772 773 # Copy the samples over and prepend the RNG seeds 774 for name in os.listdir(samples): 775 samplename = abs_join(samples, name) 776 outname = abs_join(seed, name) 777 with open(samplename, 'rb') as sample: 778 with open(outname, 'wb') as out: 779 CHUNK_SIZE = 131072 780 chunk = sample.read(CHUNK_SIZE) 781 while len(chunk) > 0: 782 out.write(chunk) 783 chunk = sample.read(CHUNK_SIZE) 784 return 0 785 786 787def minimize(args): 788 try: 789 description = """ 790 Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in 791 TARGET_seed_corpus. All extra args are passed to libfuzzer. 792 """ 793 args = targets_parser(args, description) 794 except Exception as e: 795 print(e) 796 return 1 797 798 for target in args.TARGET: 799 # Merge the corpus + anything else into the seed_corpus 800 corpus = abs_join(CORPORA_DIR, target) 801 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 802 extra_args = [corpus, "-merge=1"] + args.extra 803 libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) 804 seeds = set(os.listdir(seed_corpus)) 805 # Copy all crashes directly into the seed_corpus if not already present 806 crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 807 for crash in os.listdir(crashes): 808 if crash not in seeds: 809 shutil.copy(abs_join(crashes, crash), seed_corpus) 810 seeds.add(crash) 811 812 813def zip_cmd(args): 814 try: 815 description = """ 816 Zips up the seed corpus. 817 """ 818 args = targets_parser(args, description) 819 except Exception as e: 820 print(e) 821 return 1 822 823 for target in args.TARGET: 824 # Zip the seed_corpus 825 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 826 zip_file = "{}.zip".format(seed_corpus) 827 cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] 828 print(' '.join(cmd)) 829 subprocess.check_call(cmd, cwd=seed_corpus) 830 831 832def list_cmd(args): 833 print("\n".join(TARGETS)) 834 835 836def short_help(args): 837 name = args[0] 838 print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) 839 840 841def help(args): 842 short_help(args) 843 print("\tfuzzing helpers (select a command and pass -h for help)\n") 844 print("Options:") 845 print("\t-h, --help\tPrint this message") 846 print("") 847 print("Commands:") 848 print("\tbuild\t\tBuild a fuzzer") 849 print("\tlibfuzzer\tRun a libFuzzer fuzzer") 850 print("\tafl\t\tRun an AFL fuzzer") 851 print("\tregression\tRun a regression test") 852 print("\tgen\t\tGenerate a seed corpus for a fuzzer") 853 print("\tminimize\tMinimize the test corpora") 854 print("\tzip\t\tZip the minimized corpora up") 855 print("\tlist\t\tList the available targets") 856 857 858def main(): 859 args = sys.argv 860 if len(args) < 2: 861 help(args) 862 return 1 863 if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': 864 help(args) 865 return 1 866 command = args.pop(1) 867 args[0] = "{} {}".format(args[0], command) 868 if command == "build": 869 return build(args) 870 if command == "libfuzzer": 871 return libfuzzer_cmd(args) 872 if command == "regression": 873 return regression(args) 874 if command == "afl": 875 return afl(args) 876 if command == "gen": 877 return gen(args) 878 if command == "minimize": 879 return minimize(args) 880 if command == "zip": 881 return zip_cmd(args) 882 if command == "list": 883 return list_cmd(args) 884 short_help(args) 885 print("Error: No such command {} (pass -h for help)".format(command)) 886 return 1 887 888 889if __name__ == "__main__": 890 sys.exit(main()) 891