1#!/usr/bin/env python 2 3# ################################################################ 4# Copyright (c) Meta Platforms, Inc. and affiliates. 5# All rights reserved. 6# 7# This source code is licensed under both the BSD-style license (found in the 8# LICENSE file in the root directory of this source tree) and the GPLv2 (found 9# in the COPYING file in the root directory of this source tree). 10# You may select, at your option, one of the above-listed licenses. 11# ########################################################################## 12 13import argparse 14import contextlib 15import os 16import re 17import shlex 18import shutil 19import subprocess 20import sys 21import tempfile 22 23 24def abs_join(a, *p): 25 return os.path.abspath(os.path.join(a, *p)) 26 27 28class InputType(object): 29 RAW_DATA = 1 30 COMPRESSED_DATA = 2 31 DICTIONARY_DATA = 3 32 33 34class FrameType(object): 35 ZSTD = 1 36 BLOCK = 2 37 38 39class TargetInfo(object): 40 def __init__(self, input_type, frame_type=FrameType.ZSTD): 41 self.input_type = input_type 42 self.frame_type = frame_type 43 44 45# Constants 46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) 47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') 48TARGET_INFO = { 49 'simple_round_trip': TargetInfo(InputType.RAW_DATA), 50 'stream_round_trip': TargetInfo(InputType.RAW_DATA), 51 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), 52 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), 53 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), 54 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), 55 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 56 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), 57 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), 58 'simple_compress': TargetInfo(InputType.RAW_DATA), 59 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), 60 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 61 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), 62 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), 63 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), 64 'sequence_compression_api': TargetInfo(InputType.RAW_DATA), 65 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA), 66 'huf_round_trip': TargetInfo(InputType.RAW_DATA), 67 'huf_decompress': TargetInfo(InputType.RAW_DATA), 68 'decompress_cross_format': TargetInfo(InputType.RAW_DATA), 69 'generate_sequences': TargetInfo(InputType.RAW_DATA), 70} 71TARGETS = list(TARGET_INFO.keys()) 72ALL_TARGETS = TARGETS + ['all'] 73FUZZ_RNG_SEED_SIZE = 4 74 75# Standard environment variables 76CC = os.environ.get('CC', 'cc') 77CXX = os.environ.get('CXX', 'c++') 78CPPFLAGS = os.environ.get('CPPFLAGS', '') 79CFLAGS = os.environ.get('CFLAGS', '-O3') 80CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) 81LDFLAGS = os.environ.get('LDFLAGS', '') 82MFLAGS = os.environ.get('MFLAGS', '-j') 83THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '') 84 85# Fuzzing environment variables 86LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') 87AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') 88DECODECORPUS = os.environ.get('DECODECORPUS', 89 abs_join(FUZZ_DIR, '..', 'decodecorpus')) 90ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) 91 92# Sanitizer environment variables 93MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') 94MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') 95MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') 96MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') 97 98 99def create(r): 100 d = os.path.abspath(r) 101 if not os.path.isdir(d): 102 os.makedirs(d) 103 return d 104 105 106def check(r): 107 d = os.path.abspath(r) 108 if not os.path.isdir(d): 109 return None 110 return d 111 112 113@contextlib.contextmanager 114def tmpdir(): 115 dirpath = tempfile.mkdtemp() 116 try: 117 yield dirpath 118 finally: 119 shutil.rmtree(dirpath, ignore_errors=True) 120 121 122def parse_targets(in_targets): 123 targets = set() 124 for target in in_targets: 125 if not target: 126 continue 127 if target == 'all': 128 targets = targets.union(TARGETS) 129 elif target in TARGETS: 130 targets.add(target) 131 else: 132 raise RuntimeError('{} is not a valid target'.format(target)) 133 return list(targets) 134 135 136def targets_parser(args, description): 137 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 138 parser.add_argument( 139 'TARGET', 140 nargs='*', 141 type=str, 142 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) 143 args, extra = parser.parse_known_args(args) 144 args.extra = extra 145 146 args.TARGET = parse_targets(args.TARGET) 147 148 return args 149 150 151def parse_env_flags(args, flags): 152 """ 153 Look for flags set by environment variables. 154 """ 155 san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) 156 nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) 157 158 def set_sanitizer(sanitizer, default, san, nosan): 159 if sanitizer in san and sanitizer in nosan: 160 raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. 161 format(s=sanitizer)) 162 if sanitizer in san: 163 return True 164 if sanitizer in nosan: 165 return False 166 return default 167 168 san = set(san_flags.split(',')) 169 nosan = set(nosan_flags.split(',')) 170 171 args.asan = set_sanitizer('address', args.asan, san, nosan) 172 args.msan = set_sanitizer('memory', args.msan, san, nosan) 173 args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) 174 175 args.sanitize = args.asan or args.msan or args.ubsan 176 177 return args 178 179 180def compiler_version(cc, cxx): 181 """ 182 Determines the compiler and version. 183 Only works for clang and gcc. 184 """ 185 cc_version_bytes = subprocess.check_output([cc, "--version"]) 186 cxx_version_bytes = subprocess.check_output([cxx, "--version"]) 187 compiler = None 188 version = None 189 print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii'))) 190 if b'clang' in cc_version_bytes: 191 assert(b'clang' in cxx_version_bytes) 192 compiler = 'clang' 193 elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes: 194 assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) 195 compiler = 'gcc' 196 if compiler is not None: 197 version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)' 198 version_match = re.search(version_regex, cc_version_bytes) 199 version = tuple(int(version_match.group(i)) for i in range(1, 4)) 200 return compiler, version 201 202 203def overflow_ubsan_flags(cc, cxx): 204 compiler, version = compiler_version(cc, cxx) 205 if compiler == 'gcc' and version < (8, 0, 0): 206 return ['-fno-sanitize=signed-integer-overflow'] 207 if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)): 208 return ['-fno-sanitize=pointer-overflow'] 209 return [] 210 211 212def build_parser(args): 213 description = """ 214 Cleans the repository and builds a fuzz target (or all). 215 Many flags default to environment variables (default says $X='y'). 216 Options that aren't enabling features default to the correct values for 217 zstd. 218 Enable sanitizers with --enable-*san. 219 For regression testing just build. 220 For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. 221 For AFL set CC and CXX to AFL's compilers and set 222 LIB_FUZZING_ENGINE='libregression.a'. 223 """ 224 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 225 parser.add_argument( 226 '--lib-fuzzing-engine', 227 dest='lib_fuzzing_engine', 228 type=str, 229 default=LIB_FUZZING_ENGINE, 230 help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' 231 "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) 232 233 fuzz_group = parser.add_mutually_exclusive_group() 234 fuzz_group.add_argument( 235 '--enable-coverage', 236 dest='coverage', 237 action='store_true', 238 help='Enable coverage instrumentation (-fsanitize-coverage)') 239 fuzz_group.add_argument( 240 '--enable-fuzzer', 241 dest='fuzzer', 242 action='store_true', 243 help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' 244 'LIB_FUZZING_ENGINE is ignored') 245 ) 246 247 parser.add_argument( 248 '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') 249 parser.add_argument( 250 '--enable-ubsan', 251 dest='ubsan', 252 action='store_true', 253 help='Enable UBSAN') 254 parser.add_argument( 255 '--disable-ubsan-pointer-overflow', 256 dest='ubsan_pointer_overflow', 257 action='store_false', 258 help='Disable UBSAN pointer overflow check (known failure)') 259 parser.add_argument( 260 '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') 261 parser.add_argument( 262 '--enable-msan-track-origins', dest='msan_track_origins', 263 action='store_true', help='Enable MSAN origin tracking') 264 parser.add_argument( 265 '--msan-extra-cppflags', 266 dest='msan_extra_cppflags', 267 type=str, 268 default=MSAN_EXTRA_CPPFLAGS, 269 help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". 270 format(MSAN_EXTRA_CPPFLAGS)) 271 parser.add_argument( 272 '--msan-extra-cflags', 273 dest='msan_extra_cflags', 274 type=str, 275 default=MSAN_EXTRA_CFLAGS, 276 help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( 277 MSAN_EXTRA_CFLAGS)) 278 parser.add_argument( 279 '--msan-extra-cxxflags', 280 dest='msan_extra_cxxflags', 281 type=str, 282 default=MSAN_EXTRA_CXXFLAGS, 283 help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". 284 format(MSAN_EXTRA_CXXFLAGS)) 285 parser.add_argument( 286 '--msan-extra-ldflags', 287 dest='msan_extra_ldflags', 288 type=str, 289 default=MSAN_EXTRA_LDFLAGS, 290 help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". 291 format(MSAN_EXTRA_LDFLAGS)) 292 parser.add_argument( 293 '--enable-sanitize-recover', 294 dest='sanitize_recover', 295 action='store_true', 296 help='Non-fatal sanitizer errors where possible') 297 parser.add_argument( 298 '--debug', 299 dest='debug', 300 type=int, 301 default=1, 302 help='Set DEBUGLEVEL (default: 1)') 303 parser.add_argument( 304 '--force-memory-access', 305 dest='memory_access', 306 type=int, 307 default=0, 308 help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') 309 parser.add_argument( 310 '--fuzz-rng-seed-size', 311 dest='fuzz_rng_seed_size', 312 type=int, 313 default=4, 314 help='Set FUZZ_RNG_SEED_SIZE (default: 4)') 315 parser.add_argument( 316 '--disable-fuzzing-mode', 317 dest='fuzzing_mode', 318 action='store_false', 319 help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') 320 parser.add_argument( 321 '--enable-stateful-fuzzing', 322 dest='stateful_fuzzing', 323 action='store_true', 324 help='Reuse contexts between runs (makes reproduction impossible)') 325 parser.add_argument( 326 '--custom-seq-prod', 327 dest='third_party_seq_prod_obj', 328 type=str, 329 default=THIRD_PARTY_SEQ_PROD_OBJ, 330 help='Path to an object file with symbols for fuzzing your sequence producer plugin.') 331 parser.add_argument( 332 '--cc', 333 dest='cc', 334 type=str, 335 default=CC, 336 help="CC (default: $CC='{}')".format(CC)) 337 parser.add_argument( 338 '--cxx', 339 dest='cxx', 340 type=str, 341 default=CXX, 342 help="CXX (default: $CXX='{}')".format(CXX)) 343 parser.add_argument( 344 '--cppflags', 345 dest='cppflags', 346 type=str, 347 default=CPPFLAGS, 348 help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) 349 parser.add_argument( 350 '--cflags', 351 dest='cflags', 352 type=str, 353 default=CFLAGS, 354 help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) 355 parser.add_argument( 356 '--cxxflags', 357 dest='cxxflags', 358 type=str, 359 default=CXXFLAGS, 360 help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) 361 parser.add_argument( 362 '--ldflags', 363 dest='ldflags', 364 type=str, 365 default=LDFLAGS, 366 help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) 367 parser.add_argument( 368 '--mflags', 369 dest='mflags', 370 type=str, 371 default=MFLAGS, 372 help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) 373 parser.add_argument( 374 'TARGET', 375 nargs='*', 376 type=str, 377 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) 378 ) 379 args = parser.parse_args(args) 380 args = parse_env_flags(args, ' '.join( 381 [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) 382 383 # Check option sanity 384 if args.msan and (args.asan or args.ubsan): 385 raise RuntimeError('MSAN may not be used with any other sanitizers') 386 if args.msan_track_origins and not args.msan: 387 raise RuntimeError('--enable-msan-track-origins requires MSAN') 388 if args.sanitize_recover and not args.sanitize: 389 raise RuntimeError('--enable-sanitize-recover but no sanitizers used') 390 391 return args 392 393 394def build(args): 395 try: 396 args = build_parser(args) 397 except Exception as e: 398 print(e) 399 return 1 400 # The compilation flags we are setting 401 targets = args.TARGET 402 cc = args.cc 403 cxx = args.cxx 404 cppflags = shlex.split(args.cppflags) 405 cflags = shlex.split(args.cflags) 406 ldflags = shlex.split(args.ldflags) 407 cxxflags = shlex.split(args.cxxflags) 408 mflags = shlex.split(args.mflags) 409 # Flags to be added to both cflags and cxxflags 410 common_flags = [ 411 '-Wno-error=declaration-after-statement', 412 '-Wno-error=c++-compat', 413 '-Wno-error=deprecated' # C files are sometimes compiled with CXX 414 ] 415 416 cppflags += [ 417 '-DDEBUGLEVEL={}'.format(args.debug), 418 '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), 419 '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), 420 ] 421 422 # Set flags for options 423 assert not (args.fuzzer and args.coverage) 424 if args.coverage: 425 common_flags += [ 426 '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' 427 ] 428 if args.fuzzer: 429 common_flags += ['-fsanitize=fuzzer'] 430 args.lib_fuzzing_engine = '' 431 432 mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] 433 434 if args.sanitize_recover: 435 recover_flags = ['-fsanitize-recover=all'] 436 else: 437 recover_flags = ['-fno-sanitize-recover=all'] 438 if args.sanitize: 439 common_flags += recover_flags 440 441 if args.msan: 442 msan_flags = ['-fsanitize=memory'] 443 if args.msan_track_origins: 444 msan_flags += ['-fsanitize-memory-track-origins'] 445 common_flags += msan_flags 446 # Append extra MSAN flags (it might require special setup) 447 cppflags += [args.msan_extra_cppflags] 448 cflags += [args.msan_extra_cflags] 449 cxxflags += [args.msan_extra_cxxflags] 450 ldflags += [args.msan_extra_ldflags] 451 452 if args.asan: 453 common_flags += ['-fsanitize=address'] 454 455 if args.ubsan: 456 ubsan_flags = ['-fsanitize=undefined'] 457 if not args.ubsan_pointer_overflow: 458 ubsan_flags += overflow_ubsan_flags(cc, cxx) 459 common_flags += ubsan_flags 460 461 if args.stateful_fuzzing: 462 cppflags += ['-DSTATEFUL_FUZZING'] 463 464 if args.third_party_seq_prod_obj: 465 cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD'] 466 mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)] 467 468 if args.fuzzing_mode: 469 cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] 470 471 if args.lib_fuzzing_engine == 'libregression.a': 472 targets = ['libregression.a'] + targets 473 474 # Append the common flags 475 cflags += common_flags 476 cxxflags += common_flags 477 478 # Prepare the flags for Make 479 cc_str = "CC={}".format(cc) 480 cxx_str = "CXX={}".format(cxx) 481 cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) 482 cflags_str = "CFLAGS={}".format(' '.join(cflags)) 483 cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) 484 ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) 485 486 # Print the flags 487 print('MFLAGS={}'.format(' '.join(mflags))) 488 print(cc_str) 489 print(cxx_str) 490 print(cppflags_str) 491 print(cflags_str) 492 print(cxxflags_str) 493 print(ldflags_str) 494 495 # Clean and build 496 clean_cmd = ['make', 'clean'] + mflags 497 print(' '.join(clean_cmd)) 498 subprocess.check_call(clean_cmd) 499 build_cmd = [ 500 'make', 501 '-j', 502 cc_str, 503 cxx_str, 504 cppflags_str, 505 cflags_str, 506 cxxflags_str, 507 ldflags_str, 508 ] + mflags + targets 509 print(' '.join(build_cmd)) 510 subprocess.check_call(build_cmd) 511 return 0 512 513 514def libfuzzer_parser(args): 515 description = """ 516 Runs a libfuzzer binary. 517 Passes all extra arguments to libfuzzer. 518 The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to 519 libFuzzer.a. 520 Generates output in the CORPORA directory, puts crashes in the ARTIFACT 521 directory, and takes extra input from the SEED directory. 522 To merge AFL's output pass the SEED as AFL's output directory and pass 523 '-merge=1'. 524 """ 525 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 526 parser.add_argument( 527 '--corpora', 528 type=str, 529 help='Override the default corpora dir (default: {})'.format( 530 abs_join(CORPORA_DIR, 'TARGET'))) 531 parser.add_argument( 532 '--artifact', 533 type=str, 534 help='Override the default artifact dir (default: {})'.format( 535 abs_join(CORPORA_DIR, 'TARGET-crash'))) 536 parser.add_argument( 537 '--seed', 538 type=str, 539 help='Override the default seed dir (default: {})'.format( 540 abs_join(CORPORA_DIR, 'TARGET-seed'))) 541 parser.add_argument( 542 'TARGET', 543 type=str, 544 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 545 args, extra = parser.parse_known_args(args) 546 args.extra = extra 547 548 if args.TARGET and args.TARGET not in TARGETS: 549 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 550 551 return args 552 553 554def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): 555 if corpora is None: 556 corpora = abs_join(CORPORA_DIR, target) 557 if artifact is None: 558 artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 559 if seed is None: 560 seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) 561 if extra_args is None: 562 extra_args = [] 563 564 target = abs_join(FUZZ_DIR, target) 565 566 corpora = [create(corpora)] 567 artifact = create(artifact) 568 seed = check(seed) 569 570 corpora += [artifact] 571 if seed is not None: 572 corpora += [seed] 573 574 cmd = [target, '-artifact_prefix={}/'.format(artifact)] 575 cmd += corpora + extra_args 576 print(' '.join(cmd)) 577 subprocess.check_call(cmd) 578 579 580def libfuzzer_cmd(args): 581 try: 582 args = libfuzzer_parser(args) 583 except Exception as e: 584 print(e) 585 return 1 586 libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) 587 return 0 588 589 590def afl_parser(args): 591 description = """ 592 Runs an afl-fuzz job. 593 Passes all extra arguments to afl-fuzz. 594 The fuzzer should have been built with CC/CXX set to the AFL compilers, 595 and with LIB_FUZZING_ENGINE='libregression.a'. 596 Takes input from CORPORA and writes output to OUTPUT. 597 Uses AFL_FUZZ as the binary (set from flag or environment variable). 598 """ 599 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 600 parser.add_argument( 601 '--corpora', 602 type=str, 603 help='Override the default corpora dir (default: {})'.format( 604 abs_join(CORPORA_DIR, 'TARGET'))) 605 parser.add_argument( 606 '--output', 607 type=str, 608 help='Override the default AFL output dir (default: {})'.format( 609 abs_join(CORPORA_DIR, 'TARGET-afl'))) 610 parser.add_argument( 611 '--afl-fuzz', 612 type=str, 613 default=AFL_FUZZ, 614 help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) 615 parser.add_argument( 616 'TARGET', 617 type=str, 618 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 619 args, extra = parser.parse_known_args(args) 620 args.extra = extra 621 622 if args.TARGET and args.TARGET not in TARGETS: 623 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 624 625 if not args.corpora: 626 args.corpora = abs_join(CORPORA_DIR, args.TARGET) 627 if not args.output: 628 args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) 629 630 return args 631 632 633def afl(args): 634 try: 635 args = afl_parser(args) 636 except Exception as e: 637 print(e) 638 return 1 639 target = abs_join(FUZZ_DIR, args.TARGET) 640 641 corpora = create(args.corpora) 642 output = create(args.output) 643 644 cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra 645 cmd += [target, '@@'] 646 print(' '.join(cmd)) 647 subprocess.call(cmd) 648 return 0 649 650 651def regression(args): 652 try: 653 description = """ 654 Runs one or more regression tests. 655 The fuzzer should have been built with 656 LIB_FUZZING_ENGINE='libregression.a'. 657 Takes input from CORPORA. 658 """ 659 args = targets_parser(args, description) 660 except Exception as e: 661 print(e) 662 return 1 663 for target in args.TARGET: 664 corpora = create(abs_join(CORPORA_DIR, target)) 665 target = abs_join(FUZZ_DIR, target) 666 cmd = [target, corpora] 667 print(' '.join(cmd)) 668 subprocess.check_call(cmd) 669 return 0 670 671 672def gen_parser(args): 673 description = """ 674 Generate a seed corpus appropriate for TARGET with data generated with 675 decodecorpus. 676 The fuzz inputs are prepended with a seed before the zstd data, so the 677 output of decodecorpus shouldn't be used directly. 678 Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and 679 puts the output in SEED. 680 DECODECORPUS is the decodecorpus binary, and must already be built. 681 """ 682 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 683 parser.add_argument( 684 '--number', 685 '-n', 686 type=int, 687 default=100, 688 help='Number of samples to generate') 689 parser.add_argument( 690 '--max-size-log', 691 type=int, 692 default=18, 693 help='Maximum sample size to generate') 694 parser.add_argument( 695 '--seed', 696 type=str, 697 help='Override the default seed dir (default: {})'.format( 698 abs_join(CORPORA_DIR, 'TARGET-seed'))) 699 parser.add_argument( 700 '--decodecorpus', 701 type=str, 702 default=DECODECORPUS, 703 help="decodecorpus binary (default: $DECODECORPUS='{}')".format( 704 DECODECORPUS)) 705 parser.add_argument( 706 '--zstd', 707 type=str, 708 default=ZSTD, 709 help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) 710 parser.add_argument( 711 '--fuzz-rng-seed-size', 712 type=int, 713 default=4, 714 help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" 715 ) 716 parser.add_argument( 717 'TARGET', 718 type=str, 719 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 720 args, extra = parser.parse_known_args(args) 721 args.extra = extra 722 723 if args.TARGET and args.TARGET not in TARGETS: 724 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 725 726 if not args.seed: 727 args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) 728 729 if not os.path.isfile(args.decodecorpus): 730 raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". 731 format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) 732 733 return args 734 735 736def gen(args): 737 try: 738 args = gen_parser(args) 739 except Exception as e: 740 print(e) 741 return 1 742 743 seed = create(args.seed) 744 with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: 745 info = TARGET_INFO[args.TARGET] 746 747 if info.input_type == InputType.DICTIONARY_DATA: 748 number = max(args.number, 1000) 749 else: 750 number = args.number 751 cmd = [ 752 args.decodecorpus, 753 '-n{}'.format(args.number), 754 '-p{}/'.format(compressed), 755 '-o{}'.format(decompressed), 756 ] 757 758 if info.frame_type == FrameType.BLOCK: 759 cmd += [ 760 '--gen-blocks', 761 '--max-block-size-log={}'.format(min(args.max_size_log, 17)) 762 ] 763 else: 764 cmd += ['--max-content-size-log={}'.format(args.max_size_log)] 765 766 print(' '.join(cmd)) 767 subprocess.check_call(cmd) 768 769 if info.input_type == InputType.RAW_DATA: 770 print('using decompressed data in {}'.format(decompressed)) 771 samples = decompressed 772 elif info.input_type == InputType.COMPRESSED_DATA: 773 print('using compressed data in {}'.format(compressed)) 774 samples = compressed 775 else: 776 assert info.input_type == InputType.DICTIONARY_DATA 777 print('making dictionary data from {}'.format(decompressed)) 778 samples = dict 779 min_dict_size_log = 9 780 max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) 781 for dict_size_log in range(min_dict_size_log, max_dict_size_log): 782 dict_size = 1 << dict_size_log 783 cmd = [ 784 args.zstd, 785 '--train', 786 '-r', decompressed, 787 '--maxdict={}'.format(dict_size), 788 '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) 789 ] 790 print(' '.join(cmd)) 791 subprocess.check_call(cmd) 792 793 # Copy the samples over and prepend the RNG seeds 794 for name in os.listdir(samples): 795 samplename = abs_join(samples, name) 796 outname = abs_join(seed, name) 797 with open(samplename, 'rb') as sample: 798 with open(outname, 'wb') as out: 799 CHUNK_SIZE = 131072 800 chunk = sample.read(CHUNK_SIZE) 801 while len(chunk) > 0: 802 out.write(chunk) 803 chunk = sample.read(CHUNK_SIZE) 804 return 0 805 806 807def minimize(args): 808 try: 809 description = """ 810 Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in 811 TARGET_seed_corpus. All extra args are passed to libfuzzer. 812 """ 813 args = targets_parser(args, description) 814 except Exception as e: 815 print(e) 816 return 1 817 818 for target in args.TARGET: 819 # Merge the corpus + anything else into the seed_corpus 820 corpus = abs_join(CORPORA_DIR, target) 821 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 822 extra_args = [corpus, "-merge=1"] + args.extra 823 libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) 824 seeds = set(os.listdir(seed_corpus)) 825 # Copy all crashes directly into the seed_corpus if not already present 826 crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 827 for crash in os.listdir(crashes): 828 if crash not in seeds: 829 shutil.copy(abs_join(crashes, crash), seed_corpus) 830 seeds.add(crash) 831 832 833def zip_cmd(args): 834 try: 835 description = """ 836 Zips up the seed corpus. 837 """ 838 args = targets_parser(args, description) 839 except Exception as e: 840 print(e) 841 return 1 842 843 for target in args.TARGET: 844 # Zip the seed_corpus 845 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 846 zip_file = "{}.zip".format(seed_corpus) 847 cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] 848 print(' '.join(cmd)) 849 subprocess.check_call(cmd, cwd=seed_corpus) 850 851 852def list_cmd(args): 853 print("\n".join(TARGETS)) 854 855 856def short_help(args): 857 name = args[0] 858 print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) 859 860 861def help(args): 862 short_help(args) 863 print("\tfuzzing helpers (select a command and pass -h for help)\n") 864 print("Options:") 865 print("\t-h, --help\tPrint this message") 866 print("") 867 print("Commands:") 868 print("\tbuild\t\tBuild a fuzzer") 869 print("\tlibfuzzer\tRun a libFuzzer fuzzer") 870 print("\tafl\t\tRun an AFL fuzzer") 871 print("\tregression\tRun a regression test") 872 print("\tgen\t\tGenerate a seed corpus for a fuzzer") 873 print("\tminimize\tMinimize the test corpora") 874 print("\tzip\t\tZip the minimized corpora up") 875 print("\tlist\t\tList the available targets") 876 877 878def main(): 879 args = sys.argv 880 if len(args) < 2: 881 help(args) 882 return 1 883 if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': 884 help(args) 885 return 1 886 command = args.pop(1) 887 args[0] = "{} {}".format(args[0], command) 888 if command == "build": 889 return build(args) 890 if command == "libfuzzer": 891 return libfuzzer_cmd(args) 892 if command == "regression": 893 return regression(args) 894 if command == "afl": 895 return afl(args) 896 if command == "gen": 897 return gen(args) 898 if command == "minimize": 899 return minimize(args) 900 if command == "zip": 901 return zip_cmd(args) 902 if command == "list": 903 return list_cmd(args) 904 short_help(args) 905 print("Error: No such command {} (pass -h for help)".format(command)) 906 return 1 907 908 909if __name__ == "__main__": 910 sys.exit(main()) 911