• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2020 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Helper tool to generate cross-compiled syscall and constant tables to JSON.
7
8This script takes the LLVM IR of libconstants.gen.c and libsyscalls.gen.c and
9generates the `constants.json` file with that. LLVM IR files are moderately
10architecture-neutral (at least for this case).
11"""
12
13import argparse
14import collections
15import json
16import re
17import sys
18
19
20_STRING_CONSTANT_RE = re.compile(r'(@[a-zA-Z0-9.]+) = .*c"([^"\\]+)\\00".*')
21_TABLE_ENTRY_RE = re.compile(
22    r"%struct.(?:constant|syscall)_entry\s*{\s*([^}]+)\s*}"
23)
24# This looks something like
25#
26#  i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), i32 5
27#
28# For arm-v7a. What we are interested in are the @.str.x and the very last
29# number.
30_TABLE_ENTRY_CONTENTS = re.compile(r".*?(null|@[a-zA-Z0-9.]+).* (-?\d+)")
31
32# pylint: disable=line-too-long
33# When testing clang-r458909, we found a new constant_entry pattern:
34#   %struct.constant_entry { ptr @.str.894, i32 ptrtoint (ptr @.str.895 to i32) },
35# For the same constant, current clang-r458507 generates:
36#   %struct.constant_entry { i8* getelementptr inbounds
37#    ([19 x i8], [19 x i8]* @.str.894, i32 0, i32 0),
38#    i32 ptrtoint ([9 x i8]* @.str.895 to i32) },
39# This is for a char* constant defined in linux-x86/libconstants.gen.c:
40#   { "FS_KEY_DESC_PREFIX", (unsigned long) FS_KEY_DESC_PREFIX },
41# and FS_KEY_DESC_PREFIX is defined as a char* "fscrypt:"
42# Current output for that constant in constants.json is:
43#   "FS_KEY_DESC_PREFIX": 0,
44# but that value does not seem to be useful or accurate.
45# So here we define a pattern to ignore such pointer constants:
46# pylint: enable=line-too-long
47_IGNORED_ENTRY_CONTENTS = re.compile(r".*? ptrto.* \(.*\)")
48
49ParseResults = collections.namedtuple(
50    "ParseResults", ["table_name", "table_entries"]
51)
52
53HELP_EPILOG = """\
54Generate LLVM IR: clang -S -emit-llvm libconstants.gen.c libsyscalls.gen.c
55"""
56
57
58def parse_llvm_ir(ir):
59    """Parses a single LLVM IR file."""
60    string_constants = collections.OrderedDict()
61    table_entries = collections.OrderedDict()
62    table_name = ""
63    for line in ir:
64        string_constant_match = _STRING_CONSTANT_RE.match(line)
65        if string_constant_match:
66            string_constants[
67                string_constant_match.group(1)
68            ] = string_constant_match.group(2)
69            continue
70
71        if "@syscall_table" in line or "@constant_table" in line:
72            if "@syscall_table" in line:
73                table_name = "syscalls"
74            else:
75                table_name = "constants"
76            for entry in _TABLE_ENTRY_RE.findall(line):
77                groups = _TABLE_ENTRY_CONTENTS.match(entry)
78                if not groups:
79                    if _IGNORED_ENTRY_CONTENTS.match(entry):
80                        continue
81                    raise ValueError("Failed to parse table entry %r" % entry)
82                name, value = groups.groups()
83                if name == "null":
84                    # This is the end-of-table marker.
85                    break
86                table_entries[string_constants[name]] = int(value)
87
88    return ParseResults(table_name=table_name, table_entries=table_entries)
89
90
91def main(argv=None):
92    """Main entrypoint."""
93
94    if argv is None:
95        argv = sys.argv[1:]
96
97    parser = argparse.ArgumentParser(description=__doc__, epilog=HELP_EPILOG)
98    parser.add_argument(
99        "--output",
100        help="The path of the generated constants.json file.",
101        type=argparse.FileType("w"),
102        required=True,
103    )
104    parser.add_argument(
105        "llvm_ir_files",
106        help="An LLVM IR file with one of the {constants,syscall} table.",
107        metavar="llvm_ir_file",
108        nargs="+",
109        type=argparse.FileType("r"),
110    )
111    opts = parser.parse_args(argv)
112
113    constants_json = {}
114    for ir in opts.llvm_ir_files:
115        parse_results = parse_llvm_ir(ir)
116        constants_json[parse_results.table_name] = parse_results.table_entries
117
118    # Populate the top-level fields.
119    constants_json["arch_nr"] = constants_json["constants"]["MINIJAIL_ARCH_NR"]
120    constants_json["bits"] = constants_json["constants"]["MINIJAIL_ARCH_BITS"]
121
122    # It is a bit more complicated to generate the arch_name, since the
123    # constants can only output numeric values. Use a hardcoded mapping instead.
124    if constants_json["arch_nr"] == 0xC000003E:
125        constants_json["arch_name"] = "x86_64"
126    elif constants_json["arch_nr"] == 0x40000003:
127        constants_json["arch_name"] = "x86"
128    elif constants_json["arch_nr"] == 0xC00000B7:
129        constants_json["arch_name"] = "arm64"
130    elif constants_json["arch_nr"] == 0x40000028:
131        constants_json["arch_name"] = "arm"
132    elif constants_json["arch_nr"] == 0xC00000F3:
133        constants_json["arch_name"] = "riscv64"
134    else:
135        raise ValueError(
136            "Unknown architecture: 0x%08X" % constants_json["arch_nr"]
137        )
138
139    json.dump(constants_json, opts.output, indent="  ")
140    return 0
141
142
143if __name__ == "__main__":
144    sys.exit(main(sys.argv[1:]))
145