• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2
3# ################################################################
4# Copyright (c) Meta Platforms, Inc. and affiliates.
5# All rights reserved.
6#
7# This source code is licensed under both the BSD-style license (found in the
8# LICENSE file in the root directory of this source tree) and the GPLv2 (found
9# in the COPYING file in the root directory of this source tree).
10# You may select, at your option, one of the above-listed licenses.
11# ##########################################################################
12
13import argparse
14import contextlib
15import os
16import re
17import shlex
18import shutil
19import subprocess
20import sys
21import tempfile
22
23
24def abs_join(a, *p):
25    return os.path.abspath(os.path.join(a, *p))
26
27
28class InputType(object):
29    RAW_DATA = 1
30    COMPRESSED_DATA = 2
31    DICTIONARY_DATA = 3
32
33
34class FrameType(object):
35    ZSTD = 1
36    BLOCK = 2
37
38
39class TargetInfo(object):
40    def __init__(self, input_type, frame_type=FrameType.ZSTD):
41        self.input_type = input_type
42        self.frame_type = frame_type
43
44
45# Constants
46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
48TARGET_INFO = {
49    'simple_round_trip': TargetInfo(InputType.RAW_DATA),
50    'stream_round_trip': TargetInfo(InputType.RAW_DATA),
51    'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
52    'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
53    'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
54    'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
55    'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
56    'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
57    'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
58    'simple_compress': TargetInfo(InputType.RAW_DATA),
59    'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
60    'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
61    'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
62    'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
63    'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
64    'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
65    'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
66    'huf_round_trip': TargetInfo(InputType.RAW_DATA),
67    'huf_decompress': TargetInfo(InputType.RAW_DATA),
68    'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
69    'generate_sequences': TargetInfo(InputType.RAW_DATA),
70}
71TARGETS = list(TARGET_INFO.keys())
72ALL_TARGETS = TARGETS + ['all']
73FUZZ_RNG_SEED_SIZE = 4
74
75# Standard environment variables
76CC = os.environ.get('CC', 'cc')
77CXX = os.environ.get('CXX', 'c++')
78CPPFLAGS = os.environ.get('CPPFLAGS', '')
79CFLAGS = os.environ.get('CFLAGS', '-O3')
80CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
81LDFLAGS = os.environ.get('LDFLAGS', '')
82MFLAGS = os.environ.get('MFLAGS', '-j')
83THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '')
84
85# Fuzzing environment variables
86LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
87AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
88DECODECORPUS = os.environ.get('DECODECORPUS',
89                              abs_join(FUZZ_DIR, '..', 'decodecorpus'))
90ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
91
92# Sanitizer environment variables
93MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
94MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
95MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
96MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
97
98
99def create(r):
100    d = os.path.abspath(r)
101    if not os.path.isdir(d):
102        os.makedirs(d)
103    return d
104
105
106def check(r):
107    d = os.path.abspath(r)
108    if not os.path.isdir(d):
109        return None
110    return d
111
112
113@contextlib.contextmanager
114def tmpdir():
115    dirpath = tempfile.mkdtemp()
116    try:
117        yield dirpath
118    finally:
119        shutil.rmtree(dirpath, ignore_errors=True)
120
121
122def parse_targets(in_targets):
123    targets = set()
124    for target in in_targets:
125        if not target:
126            continue
127        if target == 'all':
128            targets = targets.union(TARGETS)
129        elif target in TARGETS:
130            targets.add(target)
131        else:
132            raise RuntimeError('{} is not a valid target'.format(target))
133    return list(targets)
134
135
136def targets_parser(args, description):
137    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
138    parser.add_argument(
139        'TARGET',
140        nargs='*',
141        type=str,
142        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
143    args, extra = parser.parse_known_args(args)
144    args.extra = extra
145
146    args.TARGET = parse_targets(args.TARGET)
147
148    return args
149
150
151def parse_env_flags(args, flags):
152    """
153    Look for flags set by environment variables.
154    """
155    san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
156    nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
157
158    def set_sanitizer(sanitizer, default, san, nosan):
159        if sanitizer in san and sanitizer in nosan:
160            raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
161                               format(s=sanitizer))
162        if sanitizer in san:
163            return True
164        if sanitizer in nosan:
165            return False
166        return default
167
168    san = set(san_flags.split(','))
169    nosan = set(nosan_flags.split(','))
170
171    args.asan = set_sanitizer('address', args.asan, san, nosan)
172    args.msan = set_sanitizer('memory', args.msan, san, nosan)
173    args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
174
175    args.sanitize = args.asan or args.msan or args.ubsan
176
177    return args
178
179
180def compiler_version(cc, cxx):
181    """
182    Determines the compiler and version.
183    Only works for clang and gcc.
184    """
185    cc_version_bytes = subprocess.check_output([cc, "--version"])
186    cxx_version_bytes = subprocess.check_output([cxx, "--version"])
187    compiler = None
188    version = None
189    print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii')))
190    if b'clang' in cc_version_bytes:
191        assert(b'clang' in cxx_version_bytes)
192        compiler = 'clang'
193    elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes:
194        assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
195        compiler = 'gcc'
196    if compiler is not None:
197        version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)'
198        version_match = re.search(version_regex, cc_version_bytes)
199        version = tuple(int(version_match.group(i)) for i in range(1, 4))
200    return compiler, version
201
202
203def overflow_ubsan_flags(cc, cxx):
204    compiler, version = compiler_version(cc, cxx)
205    if compiler == 'gcc' and version < (8, 0, 0):
206        return ['-fno-sanitize=signed-integer-overflow']
207    if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)):
208        return ['-fno-sanitize=pointer-overflow']
209    return []
210
211
212def build_parser(args):
213    description = """
214    Cleans the repository and builds a fuzz target (or all).
215    Many flags default to environment variables (default says $X='y').
216    Options that aren't enabling features default to the correct values for
217    zstd.
218    Enable sanitizers with --enable-*san.
219    For regression testing just build.
220    For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
221    For AFL set CC and CXX to AFL's compilers and set
222    LIB_FUZZING_ENGINE='libregression.a'.
223    """
224    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
225    parser.add_argument(
226        '--lib-fuzzing-engine',
227        dest='lib_fuzzing_engine',
228        type=str,
229        default=LIB_FUZZING_ENGINE,
230        help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
231              "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
232
233    fuzz_group = parser.add_mutually_exclusive_group()
234    fuzz_group.add_argument(
235        '--enable-coverage',
236        dest='coverage',
237        action='store_true',
238        help='Enable coverage instrumentation (-fsanitize-coverage)')
239    fuzz_group.add_argument(
240        '--enable-fuzzer',
241        dest='fuzzer',
242        action='store_true',
243        help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
244              'LIB_FUZZING_ENGINE is ignored')
245    )
246
247    parser.add_argument(
248        '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
249    parser.add_argument(
250        '--enable-ubsan',
251        dest='ubsan',
252        action='store_true',
253        help='Enable UBSAN')
254    parser.add_argument(
255        '--disable-ubsan-pointer-overflow',
256        dest='ubsan_pointer_overflow',
257        action='store_false',
258        help='Disable UBSAN pointer overflow check (known failure)')
259    parser.add_argument(
260        '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
261    parser.add_argument(
262        '--enable-msan-track-origins', dest='msan_track_origins',
263        action='store_true', help='Enable MSAN origin tracking')
264    parser.add_argument(
265        '--msan-extra-cppflags',
266        dest='msan_extra_cppflags',
267        type=str,
268        default=MSAN_EXTRA_CPPFLAGS,
269        help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
270        format(MSAN_EXTRA_CPPFLAGS))
271    parser.add_argument(
272        '--msan-extra-cflags',
273        dest='msan_extra_cflags',
274        type=str,
275        default=MSAN_EXTRA_CFLAGS,
276        help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
277            MSAN_EXTRA_CFLAGS))
278    parser.add_argument(
279        '--msan-extra-cxxflags',
280        dest='msan_extra_cxxflags',
281        type=str,
282        default=MSAN_EXTRA_CXXFLAGS,
283        help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
284        format(MSAN_EXTRA_CXXFLAGS))
285    parser.add_argument(
286        '--msan-extra-ldflags',
287        dest='msan_extra_ldflags',
288        type=str,
289        default=MSAN_EXTRA_LDFLAGS,
290        help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
291        format(MSAN_EXTRA_LDFLAGS))
292    parser.add_argument(
293        '--enable-sanitize-recover',
294        dest='sanitize_recover',
295        action='store_true',
296        help='Non-fatal sanitizer errors where possible')
297    parser.add_argument(
298        '--debug',
299        dest='debug',
300        type=int,
301        default=1,
302        help='Set DEBUGLEVEL (default: 1)')
303    parser.add_argument(
304        '--force-memory-access',
305        dest='memory_access',
306        type=int,
307        default=0,
308        help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
309    parser.add_argument(
310        '--fuzz-rng-seed-size',
311        dest='fuzz_rng_seed_size',
312        type=int,
313        default=4,
314        help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
315    parser.add_argument(
316        '--disable-fuzzing-mode',
317        dest='fuzzing_mode',
318        action='store_false',
319        help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
320    parser.add_argument(
321        '--enable-stateful-fuzzing',
322        dest='stateful_fuzzing',
323        action='store_true',
324        help='Reuse contexts between runs (makes reproduction impossible)')
325    parser.add_argument(
326        '--custom-seq-prod',
327        dest='third_party_seq_prod_obj',
328        type=str,
329        default=THIRD_PARTY_SEQ_PROD_OBJ,
330        help='Path to an object file with symbols for fuzzing your sequence producer plugin.')
331    parser.add_argument(
332        '--cc',
333        dest='cc',
334        type=str,
335        default=CC,
336        help="CC (default: $CC='{}')".format(CC))
337    parser.add_argument(
338        '--cxx',
339        dest='cxx',
340        type=str,
341        default=CXX,
342        help="CXX (default: $CXX='{}')".format(CXX))
343    parser.add_argument(
344        '--cppflags',
345        dest='cppflags',
346        type=str,
347        default=CPPFLAGS,
348        help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
349    parser.add_argument(
350        '--cflags',
351        dest='cflags',
352        type=str,
353        default=CFLAGS,
354        help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
355    parser.add_argument(
356        '--cxxflags',
357        dest='cxxflags',
358        type=str,
359        default=CXXFLAGS,
360        help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
361    parser.add_argument(
362        '--ldflags',
363        dest='ldflags',
364        type=str,
365        default=LDFLAGS,
366        help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
367    parser.add_argument(
368        '--mflags',
369        dest='mflags',
370        type=str,
371        default=MFLAGS,
372        help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
373    parser.add_argument(
374        'TARGET',
375        nargs='*',
376        type=str,
377        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
378    )
379    args = parser.parse_args(args)
380    args = parse_env_flags(args, ' '.join(
381        [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
382
383    # Check option sanity
384    if args.msan and (args.asan or args.ubsan):
385        raise RuntimeError('MSAN may not be used with any other sanitizers')
386    if args.msan_track_origins and not args.msan:
387        raise RuntimeError('--enable-msan-track-origins requires MSAN')
388    if args.sanitize_recover and not args.sanitize:
389        raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
390
391    return args
392
393
394def build(args):
395    try:
396        args = build_parser(args)
397    except Exception as e:
398        print(e)
399        return 1
400    # The compilation flags we are setting
401    targets = args.TARGET
402    cc = args.cc
403    cxx = args.cxx
404    cppflags = shlex.split(args.cppflags)
405    cflags = shlex.split(args.cflags)
406    ldflags = shlex.split(args.ldflags)
407    cxxflags = shlex.split(args.cxxflags)
408    mflags = shlex.split(args.mflags)
409    # Flags to be added to both cflags and cxxflags
410    common_flags = [
411        '-Wno-error=declaration-after-statement',
412        '-Wno-error=c++-compat',
413        '-Wno-error=deprecated' # C files are sometimes compiled with CXX
414    ]
415
416    cppflags += [
417        '-DDEBUGLEVEL={}'.format(args.debug),
418        '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
419        '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
420    ]
421
422    # Set flags for options
423    assert not (args.fuzzer and args.coverage)
424    if args.coverage:
425        common_flags += [
426            '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
427        ]
428    if args.fuzzer:
429        common_flags += ['-fsanitize=fuzzer']
430        args.lib_fuzzing_engine = ''
431
432    mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
433
434    if args.sanitize_recover:
435        recover_flags = ['-fsanitize-recover=all']
436    else:
437        recover_flags = ['-fno-sanitize-recover=all']
438    if args.sanitize:
439        common_flags += recover_flags
440
441    if args.msan:
442        msan_flags = ['-fsanitize=memory']
443        if args.msan_track_origins:
444            msan_flags += ['-fsanitize-memory-track-origins']
445        common_flags += msan_flags
446        # Append extra MSAN flags (it might require special setup)
447        cppflags += [args.msan_extra_cppflags]
448        cflags += [args.msan_extra_cflags]
449        cxxflags += [args.msan_extra_cxxflags]
450        ldflags += [args.msan_extra_ldflags]
451
452    if args.asan:
453        common_flags += ['-fsanitize=address']
454
455    if args.ubsan:
456        ubsan_flags = ['-fsanitize=undefined']
457        if not args.ubsan_pointer_overflow:
458            ubsan_flags += overflow_ubsan_flags(cc, cxx)
459        common_flags += ubsan_flags
460
461    if args.stateful_fuzzing:
462        cppflags += ['-DSTATEFUL_FUZZING']
463
464    if args.third_party_seq_prod_obj:
465        cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD']
466        mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)]
467
468    if args.fuzzing_mode:
469        cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
470
471    if args.lib_fuzzing_engine == 'libregression.a':
472        targets = ['libregression.a'] + targets
473
474    # Append the common flags
475    cflags += common_flags
476    cxxflags += common_flags
477
478    # Prepare the flags for Make
479    cc_str = "CC={}".format(cc)
480    cxx_str = "CXX={}".format(cxx)
481    cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
482    cflags_str = "CFLAGS={}".format(' '.join(cflags))
483    cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
484    ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
485
486    # Print the flags
487    print('MFLAGS={}'.format(' '.join(mflags)))
488    print(cc_str)
489    print(cxx_str)
490    print(cppflags_str)
491    print(cflags_str)
492    print(cxxflags_str)
493    print(ldflags_str)
494
495    # Clean and build
496    clean_cmd = ['make', 'clean'] + mflags
497    print(' '.join(clean_cmd))
498    subprocess.check_call(clean_cmd)
499    build_cmd = [
500        'make',
501        '-j',
502        cc_str,
503        cxx_str,
504        cppflags_str,
505        cflags_str,
506        cxxflags_str,
507        ldflags_str,
508    ] + mflags + targets
509    print(' '.join(build_cmd))
510    subprocess.check_call(build_cmd)
511    return 0
512
513
514def libfuzzer_parser(args):
515    description = """
516    Runs a libfuzzer binary.
517    Passes all extra arguments to libfuzzer.
518    The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
519    libFuzzer.a.
520    Generates output in the CORPORA directory, puts crashes in the ARTIFACT
521    directory, and takes extra input from the SEED directory.
522    To merge AFL's output pass the SEED as AFL's output directory and pass
523    '-merge=1'.
524    """
525    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
526    parser.add_argument(
527        '--corpora',
528        type=str,
529        help='Override the default corpora dir (default: {})'.format(
530            abs_join(CORPORA_DIR, 'TARGET')))
531    parser.add_argument(
532        '--artifact',
533        type=str,
534        help='Override the default artifact dir (default: {})'.format(
535            abs_join(CORPORA_DIR, 'TARGET-crash')))
536    parser.add_argument(
537        '--seed',
538        type=str,
539        help='Override the default seed dir (default: {})'.format(
540            abs_join(CORPORA_DIR, 'TARGET-seed')))
541    parser.add_argument(
542        'TARGET',
543        type=str,
544        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
545    args, extra = parser.parse_known_args(args)
546    args.extra = extra
547
548    if args.TARGET and args.TARGET not in TARGETS:
549        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
550
551    return args
552
553
554def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
555    if corpora is None:
556        corpora = abs_join(CORPORA_DIR, target)
557    if artifact is None:
558        artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
559    if seed is None:
560        seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
561    if extra_args is None:
562        extra_args = []
563
564    target = abs_join(FUZZ_DIR, target)
565
566    corpora = [create(corpora)]
567    artifact = create(artifact)
568    seed = check(seed)
569
570    corpora += [artifact]
571    if seed is not None:
572        corpora += [seed]
573
574    cmd = [target, '-artifact_prefix={}/'.format(artifact)]
575    cmd += corpora + extra_args
576    print(' '.join(cmd))
577    subprocess.check_call(cmd)
578
579
580def libfuzzer_cmd(args):
581    try:
582        args = libfuzzer_parser(args)
583    except Exception as e:
584        print(e)
585        return 1
586    libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
587    return 0
588
589
590def afl_parser(args):
591    description = """
592    Runs an afl-fuzz job.
593    Passes all extra arguments to afl-fuzz.
594    The fuzzer should have been built with CC/CXX set to the AFL compilers,
595    and with LIB_FUZZING_ENGINE='libregression.a'.
596    Takes input from CORPORA and writes output to OUTPUT.
597    Uses AFL_FUZZ as the binary (set from flag or environment variable).
598    """
599    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
600    parser.add_argument(
601        '--corpora',
602        type=str,
603        help='Override the default corpora dir (default: {})'.format(
604            abs_join(CORPORA_DIR, 'TARGET')))
605    parser.add_argument(
606        '--output',
607        type=str,
608        help='Override the default AFL output dir (default: {})'.format(
609            abs_join(CORPORA_DIR, 'TARGET-afl')))
610    parser.add_argument(
611        '--afl-fuzz',
612        type=str,
613        default=AFL_FUZZ,
614        help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
615    parser.add_argument(
616        'TARGET',
617        type=str,
618        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
619    args, extra = parser.parse_known_args(args)
620    args.extra = extra
621
622    if args.TARGET and args.TARGET not in TARGETS:
623        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
624
625    if not args.corpora:
626        args.corpora = abs_join(CORPORA_DIR, args.TARGET)
627    if not args.output:
628        args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
629
630    return args
631
632
633def afl(args):
634    try:
635        args = afl_parser(args)
636    except Exception as e:
637        print(e)
638        return 1
639    target = abs_join(FUZZ_DIR, args.TARGET)
640
641    corpora = create(args.corpora)
642    output = create(args.output)
643
644    cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
645    cmd += [target, '@@']
646    print(' '.join(cmd))
647    subprocess.call(cmd)
648    return 0
649
650
651def regression(args):
652    try:
653        description = """
654        Runs one or more regression tests.
655        The fuzzer should have been built with
656        LIB_FUZZING_ENGINE='libregression.a'.
657        Takes input from CORPORA.
658        """
659        args = targets_parser(args, description)
660    except Exception as e:
661        print(e)
662        return 1
663    for target in args.TARGET:
664        corpora = create(abs_join(CORPORA_DIR, target))
665        target = abs_join(FUZZ_DIR, target)
666        cmd = [target, corpora]
667        print(' '.join(cmd))
668        subprocess.check_call(cmd)
669    return 0
670
671
672def gen_parser(args):
673    description = """
674    Generate a seed corpus appropriate for TARGET with data generated with
675    decodecorpus.
676    The fuzz inputs are prepended with a seed before the zstd data, so the
677    output of decodecorpus shouldn't be used directly.
678    Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
679    puts the output in SEED.
680    DECODECORPUS is the decodecorpus binary, and must already be built.
681    """
682    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
683    parser.add_argument(
684        '--number',
685        '-n',
686        type=int,
687        default=100,
688        help='Number of samples to generate')
689    parser.add_argument(
690        '--max-size-log',
691        type=int,
692        default=18,
693        help='Maximum sample size to generate')
694    parser.add_argument(
695        '--seed',
696        type=str,
697        help='Override the default seed dir (default: {})'.format(
698            abs_join(CORPORA_DIR, 'TARGET-seed')))
699    parser.add_argument(
700        '--decodecorpus',
701        type=str,
702        default=DECODECORPUS,
703        help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
704            DECODECORPUS))
705    parser.add_argument(
706        '--zstd',
707        type=str,
708        default=ZSTD,
709        help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
710    parser.add_argument(
711        '--fuzz-rng-seed-size',
712        type=int,
713        default=4,
714        help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
715    )
716    parser.add_argument(
717        'TARGET',
718        type=str,
719        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
720    args, extra = parser.parse_known_args(args)
721    args.extra = extra
722
723    if args.TARGET and args.TARGET not in TARGETS:
724        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
725
726    if not args.seed:
727        args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
728
729    if not os.path.isfile(args.decodecorpus):
730        raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
731                           format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
732
733    return args
734
735
736def gen(args):
737    try:
738        args = gen_parser(args)
739    except Exception as e:
740        print(e)
741        return 1
742
743    seed = create(args.seed)
744    with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
745        info = TARGET_INFO[args.TARGET]
746
747        if info.input_type == InputType.DICTIONARY_DATA:
748            number = max(args.number, 1000)
749        else:
750            number = args.number
751        cmd = [
752            args.decodecorpus,
753            '-n{}'.format(args.number),
754            '-p{}/'.format(compressed),
755            '-o{}'.format(decompressed),
756        ]
757
758        if info.frame_type == FrameType.BLOCK:
759            cmd += [
760                '--gen-blocks',
761                '--max-block-size-log={}'.format(min(args.max_size_log, 17))
762            ]
763        else:
764            cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
765
766        print(' '.join(cmd))
767        subprocess.check_call(cmd)
768
769        if info.input_type == InputType.RAW_DATA:
770            print('using decompressed data in {}'.format(decompressed))
771            samples = decompressed
772        elif info.input_type == InputType.COMPRESSED_DATA:
773            print('using compressed data in {}'.format(compressed))
774            samples = compressed
775        else:
776            assert info.input_type == InputType.DICTIONARY_DATA
777            print('making dictionary data from {}'.format(decompressed))
778            samples = dict
779            min_dict_size_log = 9
780            max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
781            for dict_size_log in range(min_dict_size_log, max_dict_size_log):
782                dict_size = 1 << dict_size_log
783                cmd = [
784                    args.zstd,
785                    '--train',
786                    '-r', decompressed,
787                    '--maxdict={}'.format(dict_size),
788                    '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
789                ]
790                print(' '.join(cmd))
791                subprocess.check_call(cmd)
792
793        # Copy the samples over and prepend the RNG seeds
794        for name in os.listdir(samples):
795            samplename = abs_join(samples, name)
796            outname = abs_join(seed, name)
797            with open(samplename, 'rb') as sample:
798                with open(outname, 'wb') as out:
799                    CHUNK_SIZE = 131072
800                    chunk = sample.read(CHUNK_SIZE)
801                    while len(chunk) > 0:
802                        out.write(chunk)
803                        chunk = sample.read(CHUNK_SIZE)
804    return 0
805
806
807def minimize(args):
808    try:
809        description = """
810        Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
811        TARGET_seed_corpus. All extra args are passed to libfuzzer.
812        """
813        args = targets_parser(args, description)
814    except Exception as e:
815        print(e)
816        return 1
817
818    for target in args.TARGET:
819        # Merge the corpus + anything else into the seed_corpus
820        corpus = abs_join(CORPORA_DIR, target)
821        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
822        extra_args = [corpus, "-merge=1"] + args.extra
823        libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
824        seeds = set(os.listdir(seed_corpus))
825        # Copy all crashes directly into the seed_corpus if not already present
826        crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
827        for crash in os.listdir(crashes):
828            if crash not in seeds:
829                shutil.copy(abs_join(crashes, crash), seed_corpus)
830                seeds.add(crash)
831
832
833def zip_cmd(args):
834    try:
835        description = """
836        Zips up the seed corpus.
837        """
838        args = targets_parser(args, description)
839    except Exception as e:
840        print(e)
841        return 1
842
843    for target in args.TARGET:
844        # Zip the seed_corpus
845        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
846        zip_file = "{}.zip".format(seed_corpus)
847        cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
848        print(' '.join(cmd))
849        subprocess.check_call(cmd, cwd=seed_corpus)
850
851
852def list_cmd(args):
853    print("\n".join(TARGETS))
854
855
856def short_help(args):
857    name = args[0]
858    print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
859
860
861def help(args):
862    short_help(args)
863    print("\tfuzzing helpers (select a command and pass -h for help)\n")
864    print("Options:")
865    print("\t-h, --help\tPrint this message")
866    print("")
867    print("Commands:")
868    print("\tbuild\t\tBuild a fuzzer")
869    print("\tlibfuzzer\tRun a libFuzzer fuzzer")
870    print("\tafl\t\tRun an AFL fuzzer")
871    print("\tregression\tRun a regression test")
872    print("\tgen\t\tGenerate a seed corpus for a fuzzer")
873    print("\tminimize\tMinimize the test corpora")
874    print("\tzip\t\tZip the minimized corpora up")
875    print("\tlist\t\tList the available targets")
876
877
878def main():
879    args = sys.argv
880    if len(args) < 2:
881        help(args)
882        return 1
883    if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
884        help(args)
885        return 1
886    command = args.pop(1)
887    args[0] = "{} {}".format(args[0], command)
888    if command == "build":
889        return build(args)
890    if command == "libfuzzer":
891        return libfuzzer_cmd(args)
892    if command == "regression":
893        return regression(args)
894    if command == "afl":
895        return afl(args)
896    if command == "gen":
897        return gen(args)
898    if command == "minimize":
899        return minimize(args)
900    if command == "zip":
901        return zip_cmd(args)
902    if command == "list":
903        return list_cmd(args)
904    short_help(args)
905    print("Error: No such command {} (pass -h for help)".format(command))
906    return 1
907
908
909if __name__ == "__main__":
910    sys.exit(main())
911