• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2
3# ################################################################
4# Copyright (c) Facebook, Inc.
5# All rights reserved.
6#
7# This source code is licensed under both the BSD-style license (found in the
8# LICENSE file in the root directory of this source tree) and the GPLv2 (found
9# in the COPYING file in the root directory of this source tree).
10# You may select, at your option, one of the above-listed licenses.
11# ##########################################################################
12
13import argparse
14import contextlib
15import os
16import re
17import shlex
18import shutil
19import subprocess
20import sys
21import tempfile
22
23
24def abs_join(a, *p):
25    return os.path.abspath(os.path.join(a, *p))
26
27
28class InputType(object):
29    RAW_DATA = 1
30    COMPRESSED_DATA = 2
31    DICTIONARY_DATA = 3
32
33
34class FrameType(object):
35    ZSTD = 1
36    BLOCK = 2
37
38
39class TargetInfo(object):
40    def __init__(self, input_type, frame_type=FrameType.ZSTD):
41        self.input_type = input_type
42        self.frame_type = frame_type
43
44
45# Constants
46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
48TARGET_INFO = {
49    'simple_round_trip': TargetInfo(InputType.RAW_DATA),
50    'stream_round_trip': TargetInfo(InputType.RAW_DATA),
51    'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
52    'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
53    'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
54    'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
55    'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
56    'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
57    'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
58    'simple_compress': TargetInfo(InputType.RAW_DATA),
59    'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
60    'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
61    'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
62    'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
63    'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
64    'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
65    'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
66    'huf_round_trip': TargetInfo(InputType.RAW_DATA),
67    'huf_decompress': TargetInfo(InputType.RAW_DATA),
68}
69TARGETS = list(TARGET_INFO.keys())
70ALL_TARGETS = TARGETS + ['all']
71FUZZ_RNG_SEED_SIZE = 4
72
73# Standard environment variables
74CC = os.environ.get('CC', 'cc')
75CXX = os.environ.get('CXX', 'c++')
76CPPFLAGS = os.environ.get('CPPFLAGS', '')
77CFLAGS = os.environ.get('CFLAGS', '-O3')
78CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
79LDFLAGS = os.environ.get('LDFLAGS', '')
80MFLAGS = os.environ.get('MFLAGS', '-j')
81
82# Fuzzing environment variables
83LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
84AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
85DECODECORPUS = os.environ.get('DECODECORPUS',
86                              abs_join(FUZZ_DIR, '..', 'decodecorpus'))
87ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
88
89# Sanitizer environment variables
90MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
91MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
92MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
93MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
94
95
96def create(r):
97    d = os.path.abspath(r)
98    if not os.path.isdir(d):
99        os.makedirs(d)
100    return d
101
102
103def check(r):
104    d = os.path.abspath(r)
105    if not os.path.isdir(d):
106        return None
107    return d
108
109
110@contextlib.contextmanager
111def tmpdir():
112    dirpath = tempfile.mkdtemp()
113    try:
114        yield dirpath
115    finally:
116        shutil.rmtree(dirpath, ignore_errors=True)
117
118
119def parse_targets(in_targets):
120    targets = set()
121    for target in in_targets:
122        if not target:
123            continue
124        if target == 'all':
125            targets = targets.union(TARGETS)
126        elif target in TARGETS:
127            targets.add(target)
128        else:
129            raise RuntimeError('{} is not a valid target'.format(target))
130    return list(targets)
131
132
133def targets_parser(args, description):
134    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
135    parser.add_argument(
136        'TARGET',
137        nargs='*',
138        type=str,
139        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
140    args, extra = parser.parse_known_args(args)
141    args.extra = extra
142
143    args.TARGET = parse_targets(args.TARGET)
144
145    return args
146
147
148def parse_env_flags(args, flags):
149    """
150    Look for flags set by environment variables.
151    """
152    san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
153    nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
154
155    def set_sanitizer(sanitizer, default, san, nosan):
156        if sanitizer in san and sanitizer in nosan:
157            raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
158                               format(s=sanitizer))
159        if sanitizer in san:
160            return True
161        if sanitizer in nosan:
162            return False
163        return default
164
165    san = set(san_flags.split(','))
166    nosan = set(nosan_flags.split(','))
167
168    args.asan = set_sanitizer('address', args.asan, san, nosan)
169    args.msan = set_sanitizer('memory', args.msan, san, nosan)
170    args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
171
172    args.sanitize = args.asan or args.msan or args.ubsan
173
174    return args
175
176
177def compiler_version(cc, cxx):
178    """
179    Determines the compiler and version.
180    Only works for clang and gcc.
181    """
182    cc_version_bytes = subprocess.check_output([cc, "--version"])
183    cxx_version_bytes = subprocess.check_output([cxx, "--version"])
184    compiler = None
185    version = None
186    print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii')))
187    if b'clang' in cc_version_bytes:
188        assert(b'clang' in cxx_version_bytes)
189        compiler = 'clang'
190    elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes:
191        assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
192        compiler = 'gcc'
193    if compiler is not None:
194        version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)'
195        version_match = re.search(version_regex, cc_version_bytes)
196        version = tuple(int(version_match.group(i)) for i in range(1, 4))
197    return compiler, version
198
199
200def overflow_ubsan_flags(cc, cxx):
201    compiler, version = compiler_version(cc, cxx)
202    if compiler == 'gcc' and version < (8, 0, 0):
203        return ['-fno-sanitize=signed-integer-overflow']
204    if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)):
205        return ['-fno-sanitize=pointer-overflow']
206    return []
207
208
209def build_parser(args):
210    description = """
211    Cleans the repository and builds a fuzz target (or all).
212    Many flags default to environment variables (default says $X='y').
213    Options that aren't enabling features default to the correct values for
214    zstd.
215    Enable sanitizers with --enable-*san.
216    For regression testing just build.
217    For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
218    For AFL set CC and CXX to AFL's compilers and set
219    LIB_FUZZING_ENGINE='libregression.a'.
220    """
221    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
222    parser.add_argument(
223        '--lib-fuzzing-engine',
224        dest='lib_fuzzing_engine',
225        type=str,
226        default=LIB_FUZZING_ENGINE,
227        help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
228              "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
229
230    fuzz_group = parser.add_mutually_exclusive_group()
231    fuzz_group.add_argument(
232        '--enable-coverage',
233        dest='coverage',
234        action='store_true',
235        help='Enable coverage instrumentation (-fsanitize-coverage)')
236    fuzz_group.add_argument(
237        '--enable-fuzzer',
238        dest='fuzzer',
239        action='store_true',
240        help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
241              'LIB_FUZZING_ENGINE is ignored')
242    )
243
244    parser.add_argument(
245        '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
246    parser.add_argument(
247        '--enable-ubsan',
248        dest='ubsan',
249        action='store_true',
250        help='Enable UBSAN')
251    parser.add_argument(
252        '--enable-ubsan-pointer-overflow',
253        dest='ubsan_pointer_overflow',
254        action='store_true',
255        help='Enable UBSAN pointer overflow check (known failure)')
256    parser.add_argument(
257        '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
258    parser.add_argument(
259        '--enable-msan-track-origins', dest='msan_track_origins',
260        action='store_true', help='Enable MSAN origin tracking')
261    parser.add_argument(
262        '--msan-extra-cppflags',
263        dest='msan_extra_cppflags',
264        type=str,
265        default=MSAN_EXTRA_CPPFLAGS,
266        help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
267        format(MSAN_EXTRA_CPPFLAGS))
268    parser.add_argument(
269        '--msan-extra-cflags',
270        dest='msan_extra_cflags',
271        type=str,
272        default=MSAN_EXTRA_CFLAGS,
273        help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
274            MSAN_EXTRA_CFLAGS))
275    parser.add_argument(
276        '--msan-extra-cxxflags',
277        dest='msan_extra_cxxflags',
278        type=str,
279        default=MSAN_EXTRA_CXXFLAGS,
280        help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
281        format(MSAN_EXTRA_CXXFLAGS))
282    parser.add_argument(
283        '--msan-extra-ldflags',
284        dest='msan_extra_ldflags',
285        type=str,
286        default=MSAN_EXTRA_LDFLAGS,
287        help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
288        format(MSAN_EXTRA_LDFLAGS))
289    parser.add_argument(
290        '--enable-sanitize-recover',
291        dest='sanitize_recover',
292        action='store_true',
293        help='Non-fatal sanitizer errors where possible')
294    parser.add_argument(
295        '--debug',
296        dest='debug',
297        type=int,
298        default=1,
299        help='Set DEBUGLEVEL (default: 1)')
300    parser.add_argument(
301        '--force-memory-access',
302        dest='memory_access',
303        type=int,
304        default=0,
305        help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
306    parser.add_argument(
307        '--fuzz-rng-seed-size',
308        dest='fuzz_rng_seed_size',
309        type=int,
310        default=4,
311        help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
312    parser.add_argument(
313        '--disable-fuzzing-mode',
314        dest='fuzzing_mode',
315        action='store_false',
316        help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
317    parser.add_argument(
318        '--enable-stateful-fuzzing',
319        dest='stateful_fuzzing',
320        action='store_true',
321        help='Reuse contexts between runs (makes reproduction impossible)')
322    parser.add_argument(
323        '--cc',
324        dest='cc',
325        type=str,
326        default=CC,
327        help="CC (default: $CC='{}')".format(CC))
328    parser.add_argument(
329        '--cxx',
330        dest='cxx',
331        type=str,
332        default=CXX,
333        help="CXX (default: $CXX='{}')".format(CXX))
334    parser.add_argument(
335        '--cppflags',
336        dest='cppflags',
337        type=str,
338        default=CPPFLAGS,
339        help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
340    parser.add_argument(
341        '--cflags',
342        dest='cflags',
343        type=str,
344        default=CFLAGS,
345        help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
346    parser.add_argument(
347        '--cxxflags',
348        dest='cxxflags',
349        type=str,
350        default=CXXFLAGS,
351        help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
352    parser.add_argument(
353        '--ldflags',
354        dest='ldflags',
355        type=str,
356        default=LDFLAGS,
357        help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
358    parser.add_argument(
359        '--mflags',
360        dest='mflags',
361        type=str,
362        default=MFLAGS,
363        help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
364    parser.add_argument(
365        'TARGET',
366        nargs='*',
367        type=str,
368        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
369    )
370    args = parser.parse_args(args)
371    args = parse_env_flags(args, ' '.join(
372        [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
373
374    # Check option sanity
375    if args.msan and (args.asan or args.ubsan):
376        raise RuntimeError('MSAN may not be used with any other sanitizers')
377    if args.msan_track_origins and not args.msan:
378        raise RuntimeError('--enable-msan-track-origins requires MSAN')
379    if args.ubsan_pointer_overflow and not args.ubsan:
380        raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN')
381    if args.sanitize_recover and not args.sanitize:
382        raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
383
384    return args
385
386
387def build(args):
388    try:
389        args = build_parser(args)
390    except Exception as e:
391        print(e)
392        return 1
393    # The compilation flags we are setting
394    targets = args.TARGET
395    cc = args.cc
396    cxx = args.cxx
397    cppflags = shlex.split(args.cppflags)
398    cflags = shlex.split(args.cflags)
399    ldflags = shlex.split(args.ldflags)
400    cxxflags = shlex.split(args.cxxflags)
401    mflags = shlex.split(args.mflags)
402    # Flags to be added to both cflags and cxxflags
403    common_flags = []
404
405    cppflags += [
406        '-DDEBUGLEVEL={}'.format(args.debug),
407        '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
408        '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
409    ]
410
411    # Set flags for options
412    assert not (args.fuzzer and args.coverage)
413    if args.coverage:
414        common_flags += [
415            '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
416        ]
417    if args.fuzzer:
418        common_flags += ['-fsanitize=fuzzer']
419        args.lib_fuzzing_engine = ''
420
421    mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
422
423    if args.sanitize_recover:
424        recover_flags = ['-fsanitize-recover=all']
425    else:
426        recover_flags = ['-fno-sanitize-recover=all']
427    if args.sanitize:
428        common_flags += recover_flags
429
430    if args.msan:
431        msan_flags = ['-fsanitize=memory']
432        if args.msan_track_origins:
433            msan_flags += ['-fsanitize-memory-track-origins']
434        common_flags += msan_flags
435        # Append extra MSAN flags (it might require special setup)
436        cppflags += [args.msan_extra_cppflags]
437        cflags += [args.msan_extra_cflags]
438        cxxflags += [args.msan_extra_cxxflags]
439        ldflags += [args.msan_extra_ldflags]
440
441    if args.asan:
442        common_flags += ['-fsanitize=address']
443
444    if args.ubsan:
445        ubsan_flags = ['-fsanitize=undefined']
446        if not args.ubsan_pointer_overflow:
447            ubsan_flags += overflow_ubsan_flags(cc, cxx)
448        common_flags += ubsan_flags
449
450    if args.stateful_fuzzing:
451        cppflags += ['-DSTATEFUL_FUZZING']
452
453    if args.fuzzing_mode:
454        cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
455
456    if args.lib_fuzzing_engine == 'libregression.a':
457        targets = ['libregression.a'] + targets
458
459    # Append the common flags
460    cflags += common_flags
461    cxxflags += common_flags
462
463    # Prepare the flags for Make
464    cc_str = "CC={}".format(cc)
465    cxx_str = "CXX={}".format(cxx)
466    cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
467    cflags_str = "CFLAGS={}".format(' '.join(cflags))
468    cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
469    ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
470
471    # Print the flags
472    print('MFLAGS={}'.format(' '.join(mflags)))
473    print(cc_str)
474    print(cxx_str)
475    print(cppflags_str)
476    print(cflags_str)
477    print(cxxflags_str)
478    print(ldflags_str)
479
480    # Clean and build
481    clean_cmd = ['make', 'clean'] + mflags
482    print(' '.join(clean_cmd))
483    subprocess.check_call(clean_cmd)
484    build_cmd = [
485        'make',
486        cc_str,
487        cxx_str,
488        cppflags_str,
489        cflags_str,
490        cxxflags_str,
491        ldflags_str,
492    ] + mflags + targets
493    print(' '.join(build_cmd))
494    subprocess.check_call(build_cmd)
495    return 0
496
497
498def libfuzzer_parser(args):
499    description = """
500    Runs a libfuzzer binary.
501    Passes all extra arguments to libfuzzer.
502    The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
503    libFuzzer.a.
504    Generates output in the CORPORA directory, puts crashes in the ARTIFACT
505    directory, and takes extra input from the SEED directory.
506    To merge AFL's output pass the SEED as AFL's output directory and pass
507    '-merge=1'.
508    """
509    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
510    parser.add_argument(
511        '--corpora',
512        type=str,
513        help='Override the default corpora dir (default: {})'.format(
514            abs_join(CORPORA_DIR, 'TARGET')))
515    parser.add_argument(
516        '--artifact',
517        type=str,
518        help='Override the default artifact dir (default: {})'.format(
519            abs_join(CORPORA_DIR, 'TARGET-crash')))
520    parser.add_argument(
521        '--seed',
522        type=str,
523        help='Override the default seed dir (default: {})'.format(
524            abs_join(CORPORA_DIR, 'TARGET-seed')))
525    parser.add_argument(
526        'TARGET',
527        type=str,
528        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
529    args, extra = parser.parse_known_args(args)
530    args.extra = extra
531
532    if args.TARGET and args.TARGET not in TARGETS:
533        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
534
535    return args
536
537
538def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
539    if corpora is None:
540        corpora = abs_join(CORPORA_DIR, target)
541    if artifact is None:
542        artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
543    if seed is None:
544        seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
545    if extra_args is None:
546        extra_args = []
547
548    target = abs_join(FUZZ_DIR, target)
549
550    corpora = [create(corpora)]
551    artifact = create(artifact)
552    seed = check(seed)
553
554    corpora += [artifact]
555    if seed is not None:
556        corpora += [seed]
557
558    cmd = [target, '-artifact_prefix={}/'.format(artifact)]
559    cmd += corpora + extra_args
560    print(' '.join(cmd))
561    subprocess.check_call(cmd)
562
563
564def libfuzzer_cmd(args):
565    try:
566        args = libfuzzer_parser(args)
567    except Exception as e:
568        print(e)
569        return 1
570    libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
571    return 0
572
573
574def afl_parser(args):
575    description = """
576    Runs an afl-fuzz job.
577    Passes all extra arguments to afl-fuzz.
578    The fuzzer should have been built with CC/CXX set to the AFL compilers,
579    and with LIB_FUZZING_ENGINE='libregression.a'.
580    Takes input from CORPORA and writes output to OUTPUT.
581    Uses AFL_FUZZ as the binary (set from flag or environment variable).
582    """
583    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
584    parser.add_argument(
585        '--corpora',
586        type=str,
587        help='Override the default corpora dir (default: {})'.format(
588            abs_join(CORPORA_DIR, 'TARGET')))
589    parser.add_argument(
590        '--output',
591        type=str,
592        help='Override the default AFL output dir (default: {})'.format(
593            abs_join(CORPORA_DIR, 'TARGET-afl')))
594    parser.add_argument(
595        '--afl-fuzz',
596        type=str,
597        default=AFL_FUZZ,
598        help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
599    parser.add_argument(
600        'TARGET',
601        type=str,
602        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
603    args, extra = parser.parse_known_args(args)
604    args.extra = extra
605
606    if args.TARGET and args.TARGET not in TARGETS:
607        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
608
609    if not args.corpora:
610        args.corpora = abs_join(CORPORA_DIR, args.TARGET)
611    if not args.output:
612        args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
613
614    return args
615
616
617def afl(args):
618    try:
619        args = afl_parser(args)
620    except Exception as e:
621        print(e)
622        return 1
623    target = abs_join(FUZZ_DIR, args.TARGET)
624
625    corpora = create(args.corpora)
626    output = create(args.output)
627
628    cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
629    cmd += [target, '@@']
630    print(' '.join(cmd))
631    subprocess.call(cmd)
632    return 0
633
634
635def regression(args):
636    try:
637        description = """
638        Runs one or more regression tests.
639        The fuzzer should have been built with with
640        LIB_FUZZING_ENGINE='libregression.a'.
641        Takes input from CORPORA.
642        """
643        args = targets_parser(args, description)
644    except Exception as e:
645        print(e)
646        return 1
647    for target in args.TARGET:
648        corpora = create(abs_join(CORPORA_DIR, target))
649        target = abs_join(FUZZ_DIR, target)
650        cmd = [target, corpora]
651        print(' '.join(cmd))
652        subprocess.check_call(cmd)
653    return 0
654
655
656def gen_parser(args):
657    description = """
658    Generate a seed corpus appropriate for TARGET with data generated with
659    decodecorpus.
660    The fuzz inputs are prepended with a seed before the zstd data, so the
661    output of decodecorpus shouldn't be used directly.
662    Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
663    puts the output in SEED.
664    DECODECORPUS is the decodecorpus binary, and must already be built.
665    """
666    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
667    parser.add_argument(
668        '--number',
669        '-n',
670        type=int,
671        default=100,
672        help='Number of samples to generate')
673    parser.add_argument(
674        '--max-size-log',
675        type=int,
676        default=18,
677        help='Maximum sample size to generate')
678    parser.add_argument(
679        '--seed',
680        type=str,
681        help='Override the default seed dir (default: {})'.format(
682            abs_join(CORPORA_DIR, 'TARGET-seed')))
683    parser.add_argument(
684        '--decodecorpus',
685        type=str,
686        default=DECODECORPUS,
687        help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
688            DECODECORPUS))
689    parser.add_argument(
690        '--zstd',
691        type=str,
692        default=ZSTD,
693        help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
694    parser.add_argument(
695        '--fuzz-rng-seed-size',
696        type=int,
697        default=4,
698        help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
699    )
700    parser.add_argument(
701        'TARGET',
702        type=str,
703        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
704    args, extra = parser.parse_known_args(args)
705    args.extra = extra
706
707    if args.TARGET and args.TARGET not in TARGETS:
708        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
709
710    if not args.seed:
711        args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
712
713    if not os.path.isfile(args.decodecorpus):
714        raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
715                           format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
716
717    return args
718
719
720def gen(args):
721    try:
722        args = gen_parser(args)
723    except Exception as e:
724        print(e)
725        return 1
726
727    seed = create(args.seed)
728    with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
729        info = TARGET_INFO[args.TARGET]
730
731        if info.input_type == InputType.DICTIONARY_DATA:
732            number = max(args.number, 1000)
733        else:
734            number = args.number
735        cmd = [
736            args.decodecorpus,
737            '-n{}'.format(args.number),
738            '-p{}/'.format(compressed),
739            '-o{}'.format(decompressed),
740        ]
741
742        if info.frame_type == FrameType.BLOCK:
743            cmd += [
744                '--gen-blocks',
745                '--max-block-size-log={}'.format(min(args.max_size_log, 17))
746            ]
747        else:
748            cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
749
750        print(' '.join(cmd))
751        subprocess.check_call(cmd)
752
753        if info.input_type == InputType.RAW_DATA:
754            print('using decompressed data in {}'.format(decompressed))
755            samples = decompressed
756        elif info.input_type == InputType.COMPRESSED_DATA:
757            print('using compressed data in {}'.format(compressed))
758            samples = compressed
759        else:
760            assert info.input_type == InputType.DICTIONARY_DATA
761            print('making dictionary data from {}'.format(decompressed))
762            samples = dict
763            min_dict_size_log = 9
764            max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
765            for dict_size_log in range(min_dict_size_log, max_dict_size_log):
766                dict_size = 1 << dict_size_log
767                cmd = [
768                    args.zstd,
769                    '--train',
770                    '-r', decompressed,
771                    '--maxdict={}'.format(dict_size),
772                    '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
773                ]
774                print(' '.join(cmd))
775                subprocess.check_call(cmd)
776
777        # Copy the samples over and prepend the RNG seeds
778        for name in os.listdir(samples):
779            samplename = abs_join(samples, name)
780            outname = abs_join(seed, name)
781            with open(samplename, 'rb') as sample:
782                with open(outname, 'wb') as out:
783                    CHUNK_SIZE = 131072
784                    chunk = sample.read(CHUNK_SIZE)
785                    while len(chunk) > 0:
786                        out.write(chunk)
787                        chunk = sample.read(CHUNK_SIZE)
788    return 0
789
790
791def minimize(args):
792    try:
793        description = """
794        Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
795        TARGET_seed_corpus. All extra args are passed to libfuzzer.
796        """
797        args = targets_parser(args, description)
798    except Exception as e:
799        print(e)
800        return 1
801
802    for target in args.TARGET:
803        # Merge the corpus + anything else into the seed_corpus
804        corpus = abs_join(CORPORA_DIR, target)
805        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
806        extra_args = [corpus, "-merge=1"] + args.extra
807        libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
808        seeds = set(os.listdir(seed_corpus))
809        # Copy all crashes directly into the seed_corpus if not already present
810        crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
811        for crash in os.listdir(crashes):
812            if crash not in seeds:
813                shutil.copy(abs_join(crashes, crash), seed_corpus)
814                seeds.add(crash)
815
816
817def zip_cmd(args):
818    try:
819        description = """
820        Zips up the seed corpus.
821        """
822        args = targets_parser(args, description)
823    except Exception as e:
824        print(e)
825        return 1
826
827    for target in args.TARGET:
828        # Zip the seed_corpus
829        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
830        zip_file = "{}.zip".format(seed_corpus)
831        cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
832        print(' '.join(cmd))
833        subprocess.check_call(cmd, cwd=seed_corpus)
834
835
836def list_cmd(args):
837    print("\n".join(TARGETS))
838
839
840def short_help(args):
841    name = args[0]
842    print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
843
844
845def help(args):
846    short_help(args)
847    print("\tfuzzing helpers (select a command and pass -h for help)\n")
848    print("Options:")
849    print("\t-h, --help\tPrint this message")
850    print("")
851    print("Commands:")
852    print("\tbuild\t\tBuild a fuzzer")
853    print("\tlibfuzzer\tRun a libFuzzer fuzzer")
854    print("\tafl\t\tRun an AFL fuzzer")
855    print("\tregression\tRun a regression test")
856    print("\tgen\t\tGenerate a seed corpus for a fuzzer")
857    print("\tminimize\tMinimize the test corpora")
858    print("\tzip\t\tZip the minimized corpora up")
859    print("\tlist\t\tList the available targets")
860
861
862def main():
863    args = sys.argv
864    if len(args) < 2:
865        help(args)
866        return 1
867    if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
868        help(args)
869        return 1
870    command = args.pop(1)
871    args[0] = "{} {}".format(args[0], command)
872    if command == "build":
873        return build(args)
874    if command == "libfuzzer":
875        return libfuzzer_cmd(args)
876    if command == "regression":
877        return regression(args)
878    if command == "afl":
879        return afl(args)
880    if command == "gen":
881        return gen(args)
882    if command == "minimize":
883        return minimize(args)
884    if command == "zip":
885        return zip_cmd(args)
886    if command == "list":
887        return list_cmd(args)
888    short_help(args)
889    print("Error: No such command {} (pass -h for help)".format(command))
890    return 1
891
892
893if __name__ == "__main__":
894    sys.exit(main())
895