1#!/usr/bin/env python3 2# Copyright 2020 The ChromiumOS Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Helper tool to generate cross-compiled syscall and constant tables to JSON. 7 8This script takes the LLVM IR of libconstants.gen.c and libsyscalls.gen.c and 9generates the `constants.json` file with that. LLVM IR files are moderately 10architecture-neutral (at least for this case). 11""" 12 13import argparse 14import collections 15import json 16import re 17import sys 18 19 20_STRING_CONSTANT_RE = re.compile(r'(@[a-zA-Z0-9.]+) = .*c"([^"\\]+)\\00".*') 21_TABLE_ENTRY_RE = re.compile( 22 r"%struct.(?:constant|syscall)_entry\s*{\s*([^}]+)\s*}" 23) 24# This looks something like 25# 26# i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), i32 5 27# 28# For arm-v7a. What we are interested in are the @.str.x and the very last 29# number. 30_TABLE_ENTRY_CONTENTS = re.compile(r".*?(null|@[a-zA-Z0-9.]+).* (-?\d+)") 31 32# pylint: disable=line-too-long 33# When testing clang-r458909, we found a new constant_entry pattern: 34# %struct.constant_entry { ptr @.str.894, i32 ptrtoint (ptr @.str.895 to i32) }, 35# For the same constant, current clang-r458507 generates: 36# %struct.constant_entry { i8* getelementptr inbounds 37# ([19 x i8], [19 x i8]* @.str.894, i32 0, i32 0), 38# i32 ptrtoint ([9 x i8]* @.str.895 to i32) }, 39# This is for a char* constant defined in linux-x86/libconstants.gen.c: 40# { "FS_KEY_DESC_PREFIX", (unsigned long) FS_KEY_DESC_PREFIX }, 41# and FS_KEY_DESC_PREFIX is defined as a char* "fscrypt:" 42# Current output for that constant in constants.json is: 43# "FS_KEY_DESC_PREFIX": 0, 44# but that value does not seem to be useful or accurate. 45# So here we define a pattern to ignore such pointer constants: 46# pylint: enable=line-too-long 47_IGNORED_ENTRY_CONTENTS = re.compile(r".*? ptrto.* \(.*\)") 48 49ParseResults = collections.namedtuple( 50 "ParseResults", ["table_name", "table_entries"] 51) 52 53HELP_EPILOG = """\ 54Generate LLVM IR: clang -S -emit-llvm libconstants.gen.c libsyscalls.gen.c 55""" 56 57 58def parse_llvm_ir(ir): 59 """Parses a single LLVM IR file.""" 60 string_constants = collections.OrderedDict() 61 table_entries = collections.OrderedDict() 62 table_name = "" 63 for line in ir: 64 string_constant_match = _STRING_CONSTANT_RE.match(line) 65 if string_constant_match: 66 string_constants[ 67 string_constant_match.group(1) 68 ] = string_constant_match.group(2) 69 continue 70 71 if "@syscall_table" in line or "@constant_table" in line: 72 if "@syscall_table" in line: 73 table_name = "syscalls" 74 else: 75 table_name = "constants" 76 for entry in _TABLE_ENTRY_RE.findall(line): 77 groups = _TABLE_ENTRY_CONTENTS.match(entry) 78 if not groups: 79 if _IGNORED_ENTRY_CONTENTS.match(entry): 80 continue 81 raise ValueError("Failed to parse table entry %r" % entry) 82 name, value = groups.groups() 83 if name == "null": 84 # This is the end-of-table marker. 85 break 86 table_entries[string_constants[name]] = int(value) 87 88 return ParseResults(table_name=table_name, table_entries=table_entries) 89 90 91def main(argv=None): 92 """Main entrypoint.""" 93 94 if argv is None: 95 argv = sys.argv[1:] 96 97 parser = argparse.ArgumentParser(description=__doc__, epilog=HELP_EPILOG) 98 parser.add_argument( 99 "--output", 100 help="The path of the generated constants.json file.", 101 type=argparse.FileType("w"), 102 required=True, 103 ) 104 parser.add_argument( 105 "llvm_ir_files", 106 help="An LLVM IR file with one of the {constants,syscall} table.", 107 metavar="llvm_ir_file", 108 nargs="+", 109 type=argparse.FileType("r"), 110 ) 111 opts = parser.parse_args(argv) 112 113 constants_json = {} 114 for ir in opts.llvm_ir_files: 115 parse_results = parse_llvm_ir(ir) 116 constants_json[parse_results.table_name] = parse_results.table_entries 117 118 # Populate the top-level fields. 119 constants_json["arch_nr"] = constants_json["constants"]["MINIJAIL_ARCH_NR"] 120 constants_json["bits"] = constants_json["constants"]["MINIJAIL_ARCH_BITS"] 121 122 # It is a bit more complicated to generate the arch_name, since the 123 # constants can only output numeric values. Use a hardcoded mapping instead. 124 if constants_json["arch_nr"] == 0xC000003E: 125 constants_json["arch_name"] = "x86_64" 126 elif constants_json["arch_nr"] == 0x40000003: 127 constants_json["arch_name"] = "x86" 128 elif constants_json["arch_nr"] == 0xC00000B7: 129 constants_json["arch_name"] = "arm64" 130 elif constants_json["arch_nr"] == 0x40000028: 131 constants_json["arch_name"] = "arm" 132 elif constants_json["arch_nr"] == 0xC00000F3: 133 constants_json["arch_name"] = "riscv64" 134 else: 135 raise ValueError( 136 "Unknown architecture: 0x%08X" % constants_json["arch_nr"] 137 ) 138 139 json.dump(constants_json, opts.output, indent=" ") 140 return 0 141 142 143if __name__ == "__main__": 144 sys.exit(main(sys.argv[1:])) 145