#!/usr/bin/env python3 # Copyright 2017 The Chromium Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """ Script for generating .proto and a conversion .cc file for a templated library based JavaScript parser fuzzer. """ import sys def ParseWord(word_string): # Every part of the word is either a string surrounded by "" or a placeholder # $. word_string = word_string.lstrip().rstrip() parts = [] while len(word_string) > 0: if word_string[0] == '"': end_ix = 1 + word_string[1:].index('"') parts.append(word_string[1:end_ix]) word_string = word_string[(end_ix + 1):] elif word_string[0] == '$': if ' ' in word_string: end_ix = word_string.index(' ') else: end_ix = len(word_string) parts.append(int(word_string[1:end_ix])) word_string = word_string[end_ix:] else: assert False word_string = word_string.lstrip() return parts def GenerateProtoContents(words): contents = '' for ix in range(len(words)): contents += ' token_value_' + str(ix) + ' = ' + str(ix) + ';\n' return contents def GenerateConversionContents(words): contents = '' ix = 0 for word in words: contents += ' case ' + str(ix) + ':\n' max_part = -1 first = True building_string = '' for part in word: if not first: building_string += ' + std::string(" ") + ' if isinstance(part, str): building_string += 'std::string("' + part + '")' else: if part > max_part: max_part = part building_string += ('token_to_string(token.inner_tokens(' + str(part) + '), depth)') first = False if max_part >= 0: contents += (' if (token.inner_tokens().size() < ' + str(max_part + 1) + ') return std::string("");\n') contents += ' return ' + building_string + ';\n' ix += 1 return contents def ReadDictionary(filename): with open(filename) as input_file: lines = input_file.readlines() words = [] for line in lines: if not line.startswith('#'): word = ParseWord(line) if len(word) > 0: words.append(word) return words def main(argv): output_proto_file = argv[1] output_cc_file = argv[2] input_dict_file = argv[3] words = ReadDictionary(input_dict_file) proto_header = ('// Generated by generate_javascript_parser_proto.py.\n' '\n' 'syntax = "proto2";\n' 'package javascript_parser_proto_fuzzer;\n' '\n' 'message Token {\n' ' enum Value {\n') proto_footer = (' }\n' ' required Value value = 1;\n' ' repeated Token inner_tokens = 2;\n' '}\n' '\n' 'message Source {\n' ' required bool is_module = 1;\n' ' repeated Token tokens = 2;\n' '}\n') proto_contents = proto_header + GenerateProtoContents(words) + proto_footer with open(output_proto_file, 'w') as f: f.write(proto_contents) conversion_header = ( '// Generated by generate_javascript_parser_proto.py.\n' '\n' '#include "testing/libfuzzer/fuzzers/' 'javascript_parser_proto_to_string.h"\n' '\n' '// Bound calls to token_to_string to prevent memory usage from growing\n' '// too much.\n' 'const int kMaxRecursiveDepth = 9;\n' '\n' 'std::string token_to_string(\n' ' const javascript_parser_proto_fuzzer::Token& token, int depth)' ' {\n' ' if (++depth == kMaxRecursiveDepth) return std::string("");\n' ' switch(token.value()) {\n') conversion_footer = (' default: break;\n' ' }\n' ' return std::string("");\n' '}\n') conversion_contents = (conversion_header + GenerateConversionContents(words) + conversion_footer) with open(output_cc_file, 'w') as f: f.write(conversion_contents) if __name__ == '__main__': main(sys.argv)