• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Generate opcode metadata.
2Reads the instruction definitions from bytecodes.c.
3Writes the metadata to pycore_opcode_metadata.h by default.
4"""
5
6import argparse
7import os.path
8import sys
9
10from analyzer import (
11    Analysis,
12    Instruction,
13    analyze_files,
14    Skip,
15    Uop,
16)
17from generators_common import (
18    DEFAULT_INPUT,
19    ROOT,
20    write_header,
21    cflags,
22    StackOffset,
23)
24from cwriter import CWriter
25from typing import TextIO
26from stack import get_stack_effect
27
28# Constants used instead of size for macro expansions.
29# Note: 1, 2, 4 must match actual cache entry sizes.
30OPARG_KINDS = {
31    "OPARG_FULL": 0,
32    "OPARG_CACHE_1": 1,
33    "OPARG_CACHE_2": 2,
34    "OPARG_CACHE_4": 4,
35    "OPARG_TOP": 5,
36    "OPARG_BOTTOM": 6,
37    "OPARG_SAVE_RETURN_OFFSET": 7,
38    # Skip 8 as the other powers of 2 are sizes
39    "OPARG_REPLACED": 9,
40}
41
42FLAGS = [
43    "ARG",
44    "CONST",
45    "NAME",
46    "JUMP",
47    "FREE",
48    "LOCAL",
49    "EVAL_BREAK",
50    "DEOPT",
51    "ERROR",
52    "ESCAPES",
53    "EXIT",
54    "PURE",
55    "PASSTHROUGH",
56    "OPARG_AND_1",
57    "ERROR_NO_POP",
58]
59
60
61def generate_flag_macros(out: CWriter) -> None:
62    for i, flag in enumerate(FLAGS):
63        out.emit(f"#define HAS_{flag}_FLAG ({1<<i})\n")
64    for i, flag in enumerate(FLAGS):
65        out.emit(
66            f"#define OPCODE_HAS_{flag}(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_{flag}_FLAG))\n"
67        )
68    out.emit("\n")
69
70
71def generate_oparg_macros(out: CWriter) -> None:
72    for name, value in OPARG_KINDS.items():
73        out.emit(f"#define {name} {value}\n")
74    out.emit("\n")
75
76
77def emit_stack_effect_function(
78    out: CWriter, direction: str, data: list[tuple[str, str]]
79) -> None:
80    out.emit(f"extern int _PyOpcode_num_{direction}(int opcode, int oparg);\n")
81    out.emit("#ifdef NEED_OPCODE_METADATA\n")
82    out.emit(f"int _PyOpcode_num_{direction}(int opcode, int oparg)  {{\n")
83    out.emit("switch(opcode) {\n")
84    for name, effect in data:
85        out.emit(f"case {name}:\n")
86        out.emit(f"    return {effect};\n")
87    out.emit("default:\n")
88    out.emit("    return -1;\n")
89    out.emit("}\n")
90    out.emit("}\n\n")
91    out.emit("#endif\n\n")
92
93
94def generate_stack_effect_functions(analysis: Analysis, out: CWriter) -> None:
95    popped_data: list[tuple[str, str]] = []
96    pushed_data: list[tuple[str, str]] = []
97    for inst in analysis.instructions.values():
98        stack = get_stack_effect(inst)
99        popped = (-stack.base_offset).to_c()
100        pushed = (stack.top_offset - stack.base_offset).to_c()
101        popped_data.append((inst.name, popped))
102        pushed_data.append((inst.name, pushed))
103    emit_stack_effect_function(out, "popped", sorted(popped_data))
104    emit_stack_effect_function(out, "pushed", sorted(pushed_data))
105
106
107def generate_is_pseudo(analysis: Analysis, out: CWriter) -> None:
108    """Write the IS_PSEUDO_INSTR macro"""
109    out.emit("\n\n#define IS_PSEUDO_INSTR(OP)  ( \\\n")
110    for op in analysis.pseudos:
111        out.emit(f"((OP) == {op}) || \\\n")
112    out.emit("0")
113    out.emit(")\n\n")
114
115
116def get_format(inst: Instruction) -> str:
117    if inst.properties.oparg:
118        format = "INSTR_FMT_IB"
119    else:
120        format = "INSTR_FMT_IX"
121    if inst.size > 1:
122        format += "C"
123    format += "0" * (inst.size - 2)
124    return format
125
126
127def generate_instruction_formats(analysis: Analysis, out: CWriter) -> None:
128    # Compute the set of all instruction formats.
129    formats: set[str] = set()
130    for inst in analysis.instructions.values():
131        formats.add(get_format(inst))
132    # Generate an enum for it
133    out.emit("enum InstructionFormat {\n")
134    next_id = 1
135    for format in sorted(formats):
136        out.emit(f"{format} = {next_id},\n")
137        next_id += 1
138    out.emit("};\n\n")
139
140
141def generate_deopt_table(analysis: Analysis, out: CWriter) -> None:
142    out.emit("extern const uint8_t _PyOpcode_Deopt[256];\n")
143    out.emit("#ifdef NEED_OPCODE_METADATA\n")
144    out.emit("const uint8_t _PyOpcode_Deopt[256] = {\n")
145    deopts: list[tuple[str, str]] = []
146    for inst in analysis.instructions.values():
147        deopt = inst.name
148        if inst.family is not None:
149            deopt = inst.family.name
150        deopts.append((inst.name, deopt))
151    deopts.append(("INSTRUMENTED_LINE", "INSTRUMENTED_LINE"))
152    for name, deopt in sorted(deopts):
153        out.emit(f"[{name}] = {deopt},\n")
154    out.emit("};\n\n")
155    out.emit("#endif // NEED_OPCODE_METADATA\n\n")
156
157
158def generate_cache_table(analysis: Analysis, out: CWriter) -> None:
159    out.emit("extern const uint8_t _PyOpcode_Caches[256];\n")
160    out.emit("#ifdef NEED_OPCODE_METADATA\n")
161    out.emit("const uint8_t _PyOpcode_Caches[256] = {\n")
162    for inst in analysis.instructions.values():
163        if inst.family and inst.family.name != inst.name:
164            continue
165        if inst.name.startswith("INSTRUMENTED"):
166            continue
167        if inst.size > 1:
168            out.emit(f"[{inst.name}] = {inst.size-1},\n")
169    out.emit("};\n")
170    out.emit("#endif\n\n")
171
172
173def generate_name_table(analysis: Analysis, out: CWriter) -> None:
174    table_size = 256 + len(analysis.pseudos)
175    out.emit(f"extern const char *_PyOpcode_OpName[{table_size}];\n")
176    out.emit("#ifdef NEED_OPCODE_METADATA\n")
177    out.emit(f"const char *_PyOpcode_OpName[{table_size}] = {{\n")
178    names = list(analysis.instructions) + list(analysis.pseudos)
179    names.append("INSTRUMENTED_LINE")
180    for name in sorted(names):
181        out.emit(f'[{name}] = "{name}",\n')
182    out.emit("};\n")
183    out.emit("#endif\n\n")
184
185
186def generate_metadata_table(analysis: Analysis, out: CWriter) -> None:
187    table_size = 256 + len(analysis.pseudos)
188    out.emit("struct opcode_metadata {\n")
189    out.emit("uint8_t valid_entry;\n")
190    out.emit("int8_t instr_format;\n")
191    out.emit("int16_t flags;\n")
192    out.emit("};\n\n")
193    out.emit(
194        f"extern const struct opcode_metadata _PyOpcode_opcode_metadata[{table_size}];\n"
195    )
196    out.emit("#ifdef NEED_OPCODE_METADATA\n")
197    out.emit(
198        f"const struct opcode_metadata _PyOpcode_opcode_metadata[{table_size}] = {{\n"
199    )
200    for inst in sorted(analysis.instructions.values(), key=lambda t: t.name):
201        out.emit(
202            f"[{inst.name}] = {{ true, {get_format(inst)}, {cflags(inst.properties)} }},\n"
203        )
204    for pseudo in sorted(analysis.pseudos.values(), key=lambda t: t.name):
205        flags = cflags(pseudo.properties)
206        for flag in pseudo.flags:
207            if flags == "0":
208                flags = f"{flag}_FLAG"
209            else:
210                flags += f" | {flag}_FLAG"
211        out.emit(f"[{pseudo.name}] = {{ true, -1, {flags} }},\n")
212    out.emit("};\n")
213    out.emit("#endif\n\n")
214
215
216def generate_expansion_table(analysis: Analysis, out: CWriter) -> None:
217    expansions_table: dict[str, list[tuple[str, int, int]]] = {}
218    for inst in sorted(analysis.instructions.values(), key=lambda t: t.name):
219        offset: int = 0  # Cache effect offset
220        expansions: list[tuple[str, int, int]] = []  # [(name, size, offset), ...]
221        if inst.is_super():
222            pieces = inst.name.split("_")
223            assert len(pieces) == 4, f"{inst.name} doesn't look like a super-instr"
224            name1 = "_".join(pieces[:2])
225            name2 = "_".join(pieces[2:])
226            assert name1 in analysis.instructions, f"{name1} doesn't match any instr"
227            assert name2 in analysis.instructions, f"{name2} doesn't match any instr"
228            instr1 = analysis.instructions[name1]
229            instr2 = analysis.instructions[name2]
230            assert (
231                len(instr1.parts) == 1
232            ), f"{name1} is not a good superinstruction part"
233            assert (
234                len(instr2.parts) == 1
235            ), f"{name2} is not a good superinstruction part"
236            expansions.append((instr1.parts[0].name, OPARG_KINDS["OPARG_TOP"], 0))
237            expansions.append((instr2.parts[0].name, OPARG_KINDS["OPARG_BOTTOM"], 0))
238        elif not is_viable_expansion(inst):
239            continue
240        else:
241            for part in inst.parts:
242                size = part.size
243                if part.name == "_SAVE_RETURN_OFFSET":
244                    size = OPARG_KINDS["OPARG_SAVE_RETURN_OFFSET"]
245                if isinstance(part, Uop):
246                    # Skip specializations
247                    if "specializing" in part.annotations:
248                        continue
249                    if "replaced" in part.annotations:
250                        size = OPARG_KINDS["OPARG_REPLACED"]
251                    expansions.append((part.name, size, offset if size else 0))
252                offset += part.size
253        expansions_table[inst.name] = expansions
254    max_uops = max(len(ex) for ex in expansions_table.values())
255    out.emit(f"#define MAX_UOP_PER_EXPANSION {max_uops}\n")
256    out.emit("struct opcode_macro_expansion {\n")
257    out.emit("int nuops;\n")
258    out.emit(
259        "struct { int16_t uop; int8_t size; int8_t offset; } uops[MAX_UOP_PER_EXPANSION];\n"
260    )
261    out.emit("};\n")
262    out.emit(
263        "extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];\n\n"
264    )
265    out.emit("#ifdef NEED_OPCODE_METADATA\n")
266    out.emit("const struct opcode_macro_expansion\n")
267    out.emit("_PyOpcode_macro_expansion[256] = {\n")
268    for inst_name, expansions in expansions_table.items():
269        uops = [
270            f"{{ {name}, {size}, {offset} }}" for (name, size, offset) in expansions
271        ]
272        out.emit(
273            f'[{inst_name}] = {{ .nuops = {len(expansions)}, .uops = {{ {", ".join(uops)} }} }},\n'
274        )
275    out.emit("};\n")
276    out.emit("#endif // NEED_OPCODE_METADATA\n\n")
277
278
279def is_viable_expansion(inst: Instruction) -> bool:
280    "An instruction can be expanded if all its parts are viable for tier 2"
281    for part in inst.parts:
282        if isinstance(part, Uop):
283            # Skip specializing and replaced uops
284            if "specializing" in part.annotations:
285                continue
286            if "replaced" in part.annotations:
287                continue
288            if part.properties.tier == 1 or not part.is_viable():
289                return False
290    return True
291
292
293def generate_extra_cases(analysis: Analysis, out: CWriter) -> None:
294    out.emit("#define EXTRA_CASES \\\n")
295    valid_opcodes = set(analysis.opmap.values())
296    for op in range(256):
297        if op not in valid_opcodes:
298            out.emit(f"    case {op}: \\\n")
299    out.emit("        ;\n")
300
301
302def generate_pseudo_targets(analysis: Analysis, out: CWriter) -> None:
303    table_size = len(analysis.pseudos)
304    max_targets = max(len(pseudo.targets) for pseudo in analysis.pseudos.values())
305    out.emit("struct pseudo_targets {\n")
306    out.emit(f"uint8_t targets[{max_targets + 1}];\n")
307    out.emit("};\n")
308    out.emit(
309        f"extern const struct pseudo_targets _PyOpcode_PseudoTargets[{table_size}];\n"
310    )
311    out.emit("#ifdef NEED_OPCODE_METADATA\n")
312    out.emit(
313        f"const struct pseudo_targets _PyOpcode_PseudoTargets[{table_size}] = {{\n"
314    )
315    for pseudo in analysis.pseudos.values():
316        targets = ["0"] * (max_targets + 1)
317        for i, target in enumerate(pseudo.targets):
318            targets[i] = target.name
319        out.emit(f"[{pseudo.name}-256] = {{ {{ {', '.join(targets)} }} }},\n")
320    out.emit("};\n\n")
321    out.emit("#endif // NEED_OPCODE_METADATA\n")
322    out.emit("static inline bool\n")
323    out.emit("is_pseudo_target(int pseudo, int target) {\n")
324    out.emit(f"if (pseudo < 256 || pseudo >= {256+table_size}) {{\n")
325    out.emit(f"return false;\n")
326    out.emit("}\n")
327    out.emit(
328        f"for (int i = 0; _PyOpcode_PseudoTargets[pseudo-256].targets[i]; i++) {{\n"
329    )
330    out.emit(
331        f"if (_PyOpcode_PseudoTargets[pseudo-256].targets[i] == target) return true;\n"
332    )
333    out.emit("}\n")
334    out.emit(f"return false;\n")
335    out.emit("}\n\n")
336
337
338def generate_opcode_metadata(
339    filenames: list[str], analysis: Analysis, outfile: TextIO
340) -> None:
341    write_header(__file__, filenames, outfile)
342    out = CWriter(outfile, 0, False)
343    with out.header_guard("Py_CORE_OPCODE_METADATA_H"):
344        out.emit("#ifndef Py_BUILD_CORE\n")
345        out.emit('#  error "this header requires Py_BUILD_CORE define"\n')
346        out.emit("#endif\n\n")
347        out.emit("#include <stdbool.h>              // bool\n")
348        out.emit('#include "opcode_ids.h"\n')
349        generate_is_pseudo(analysis, out)
350        out.emit('#include "pycore_uop_ids.h"\n')
351        generate_stack_effect_functions(analysis, out)
352        generate_instruction_formats(analysis, out)
353        table_size = 256 + len(analysis.pseudos)
354        out.emit("#define IS_VALID_OPCODE(OP) \\\n")
355        out.emit(f"    (((OP) >= 0) && ((OP) < {table_size}) && \\\n")
356        out.emit("     (_PyOpcode_opcode_metadata[(OP)].valid_entry))\n\n")
357        generate_flag_macros(out)
358        generate_oparg_macros(out)
359        generate_metadata_table(analysis, out)
360        generate_expansion_table(analysis, out)
361        generate_name_table(analysis, out)
362        generate_cache_table(analysis, out)
363        generate_deopt_table(analysis, out)
364        generate_extra_cases(analysis, out)
365        generate_pseudo_targets(analysis, out)
366
367
368arg_parser = argparse.ArgumentParser(
369    description="Generate the header file with opcode metadata.",
370    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
371)
372
373
374DEFAULT_OUTPUT = ROOT / "Include/internal/pycore_opcode_metadata.h"
375
376
377arg_parser.add_argument(
378    "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
379)
380
381arg_parser.add_argument(
382    "input", nargs=argparse.REMAINDER, help="Instruction definition file(s)"
383)
384
385if __name__ == "__main__":
386    args = arg_parser.parse_args()
387    if len(args.input) == 0:
388        args.input.append(DEFAULT_INPUT)
389    data = analyze_files(args.input)
390    with open(args.output, "w") as outfile:
391        generate_opcode_metadata(args.input, data, outfile)
392