1"""Generate opcode metadata. 2Reads the instruction definitions from bytecodes.c. 3Writes the metadata to pycore_opcode_metadata.h by default. 4""" 5 6import argparse 7import os.path 8import sys 9 10from analyzer import ( 11 Analysis, 12 Instruction, 13 analyze_files, 14 Skip, 15 Uop, 16) 17from generators_common import ( 18 DEFAULT_INPUT, 19 ROOT, 20 write_header, 21 cflags, 22 StackOffset, 23) 24from cwriter import CWriter 25from typing import TextIO 26from stack import get_stack_effect 27 28# Constants used instead of size for macro expansions. 29# Note: 1, 2, 4 must match actual cache entry sizes. 30OPARG_KINDS = { 31 "OPARG_FULL": 0, 32 "OPARG_CACHE_1": 1, 33 "OPARG_CACHE_2": 2, 34 "OPARG_CACHE_4": 4, 35 "OPARG_TOP": 5, 36 "OPARG_BOTTOM": 6, 37 "OPARG_SAVE_RETURN_OFFSET": 7, 38 # Skip 8 as the other powers of 2 are sizes 39 "OPARG_REPLACED": 9, 40} 41 42FLAGS = [ 43 "ARG", 44 "CONST", 45 "NAME", 46 "JUMP", 47 "FREE", 48 "LOCAL", 49 "EVAL_BREAK", 50 "DEOPT", 51 "ERROR", 52 "ESCAPES", 53 "EXIT", 54 "PURE", 55 "PASSTHROUGH", 56 "OPARG_AND_1", 57 "ERROR_NO_POP", 58] 59 60 61def generate_flag_macros(out: CWriter) -> None: 62 for i, flag in enumerate(FLAGS): 63 out.emit(f"#define HAS_{flag}_FLAG ({1<<i})\n") 64 for i, flag in enumerate(FLAGS): 65 out.emit( 66 f"#define OPCODE_HAS_{flag}(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_{flag}_FLAG))\n" 67 ) 68 out.emit("\n") 69 70 71def generate_oparg_macros(out: CWriter) -> None: 72 for name, value in OPARG_KINDS.items(): 73 out.emit(f"#define {name} {value}\n") 74 out.emit("\n") 75 76 77def emit_stack_effect_function( 78 out: CWriter, direction: str, data: list[tuple[str, str]] 79) -> None: 80 out.emit(f"extern int _PyOpcode_num_{direction}(int opcode, int oparg);\n") 81 out.emit("#ifdef NEED_OPCODE_METADATA\n") 82 out.emit(f"int _PyOpcode_num_{direction}(int opcode, int oparg) {{\n") 83 out.emit("switch(opcode) {\n") 84 for name, effect in data: 85 out.emit(f"case {name}:\n") 86 out.emit(f" return {effect};\n") 87 out.emit("default:\n") 88 out.emit(" return -1;\n") 89 out.emit("}\n") 90 out.emit("}\n\n") 91 out.emit("#endif\n\n") 92 93 94def generate_stack_effect_functions(analysis: Analysis, out: CWriter) -> None: 95 popped_data: list[tuple[str, str]] = [] 96 pushed_data: list[tuple[str, str]] = [] 97 for inst in analysis.instructions.values(): 98 stack = get_stack_effect(inst) 99 popped = (-stack.base_offset).to_c() 100 pushed = (stack.top_offset - stack.base_offset).to_c() 101 popped_data.append((inst.name, popped)) 102 pushed_data.append((inst.name, pushed)) 103 emit_stack_effect_function(out, "popped", sorted(popped_data)) 104 emit_stack_effect_function(out, "pushed", sorted(pushed_data)) 105 106 107def generate_is_pseudo(analysis: Analysis, out: CWriter) -> None: 108 """Write the IS_PSEUDO_INSTR macro""" 109 out.emit("\n\n#define IS_PSEUDO_INSTR(OP) ( \\\n") 110 for op in analysis.pseudos: 111 out.emit(f"((OP) == {op}) || \\\n") 112 out.emit("0") 113 out.emit(")\n\n") 114 115 116def get_format(inst: Instruction) -> str: 117 if inst.properties.oparg: 118 format = "INSTR_FMT_IB" 119 else: 120 format = "INSTR_FMT_IX" 121 if inst.size > 1: 122 format += "C" 123 format += "0" * (inst.size - 2) 124 return format 125 126 127def generate_instruction_formats(analysis: Analysis, out: CWriter) -> None: 128 # Compute the set of all instruction formats. 129 formats: set[str] = set() 130 for inst in analysis.instructions.values(): 131 formats.add(get_format(inst)) 132 # Generate an enum for it 133 out.emit("enum InstructionFormat {\n") 134 next_id = 1 135 for format in sorted(formats): 136 out.emit(f"{format} = {next_id},\n") 137 next_id += 1 138 out.emit("};\n\n") 139 140 141def generate_deopt_table(analysis: Analysis, out: CWriter) -> None: 142 out.emit("extern const uint8_t _PyOpcode_Deopt[256];\n") 143 out.emit("#ifdef NEED_OPCODE_METADATA\n") 144 out.emit("const uint8_t _PyOpcode_Deopt[256] = {\n") 145 deopts: list[tuple[str, str]] = [] 146 for inst in analysis.instructions.values(): 147 deopt = inst.name 148 if inst.family is not None: 149 deopt = inst.family.name 150 deopts.append((inst.name, deopt)) 151 deopts.append(("INSTRUMENTED_LINE", "INSTRUMENTED_LINE")) 152 for name, deopt in sorted(deopts): 153 out.emit(f"[{name}] = {deopt},\n") 154 out.emit("};\n\n") 155 out.emit("#endif // NEED_OPCODE_METADATA\n\n") 156 157 158def generate_cache_table(analysis: Analysis, out: CWriter) -> None: 159 out.emit("extern const uint8_t _PyOpcode_Caches[256];\n") 160 out.emit("#ifdef NEED_OPCODE_METADATA\n") 161 out.emit("const uint8_t _PyOpcode_Caches[256] = {\n") 162 for inst in analysis.instructions.values(): 163 if inst.family and inst.family.name != inst.name: 164 continue 165 if inst.name.startswith("INSTRUMENTED"): 166 continue 167 if inst.size > 1: 168 out.emit(f"[{inst.name}] = {inst.size-1},\n") 169 out.emit("};\n") 170 out.emit("#endif\n\n") 171 172 173def generate_name_table(analysis: Analysis, out: CWriter) -> None: 174 table_size = 256 + len(analysis.pseudos) 175 out.emit(f"extern const char *_PyOpcode_OpName[{table_size}];\n") 176 out.emit("#ifdef NEED_OPCODE_METADATA\n") 177 out.emit(f"const char *_PyOpcode_OpName[{table_size}] = {{\n") 178 names = list(analysis.instructions) + list(analysis.pseudos) 179 names.append("INSTRUMENTED_LINE") 180 for name in sorted(names): 181 out.emit(f'[{name}] = "{name}",\n') 182 out.emit("};\n") 183 out.emit("#endif\n\n") 184 185 186def generate_metadata_table(analysis: Analysis, out: CWriter) -> None: 187 table_size = 256 + len(analysis.pseudos) 188 out.emit("struct opcode_metadata {\n") 189 out.emit("uint8_t valid_entry;\n") 190 out.emit("int8_t instr_format;\n") 191 out.emit("int16_t flags;\n") 192 out.emit("};\n\n") 193 out.emit( 194 f"extern const struct opcode_metadata _PyOpcode_opcode_metadata[{table_size}];\n" 195 ) 196 out.emit("#ifdef NEED_OPCODE_METADATA\n") 197 out.emit( 198 f"const struct opcode_metadata _PyOpcode_opcode_metadata[{table_size}] = {{\n" 199 ) 200 for inst in sorted(analysis.instructions.values(), key=lambda t: t.name): 201 out.emit( 202 f"[{inst.name}] = {{ true, {get_format(inst)}, {cflags(inst.properties)} }},\n" 203 ) 204 for pseudo in sorted(analysis.pseudos.values(), key=lambda t: t.name): 205 flags = cflags(pseudo.properties) 206 for flag in pseudo.flags: 207 if flags == "0": 208 flags = f"{flag}_FLAG" 209 else: 210 flags += f" | {flag}_FLAG" 211 out.emit(f"[{pseudo.name}] = {{ true, -1, {flags} }},\n") 212 out.emit("};\n") 213 out.emit("#endif\n\n") 214 215 216def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: 217 expansions_table: dict[str, list[tuple[str, int, int]]] = {} 218 for inst in sorted(analysis.instructions.values(), key=lambda t: t.name): 219 offset: int = 0 # Cache effect offset 220 expansions: list[tuple[str, int, int]] = [] # [(name, size, offset), ...] 221 if inst.is_super(): 222 pieces = inst.name.split("_") 223 assert len(pieces) == 4, f"{inst.name} doesn't look like a super-instr" 224 name1 = "_".join(pieces[:2]) 225 name2 = "_".join(pieces[2:]) 226 assert name1 in analysis.instructions, f"{name1} doesn't match any instr" 227 assert name2 in analysis.instructions, f"{name2} doesn't match any instr" 228 instr1 = analysis.instructions[name1] 229 instr2 = analysis.instructions[name2] 230 assert ( 231 len(instr1.parts) == 1 232 ), f"{name1} is not a good superinstruction part" 233 assert ( 234 len(instr2.parts) == 1 235 ), f"{name2} is not a good superinstruction part" 236 expansions.append((instr1.parts[0].name, OPARG_KINDS["OPARG_TOP"], 0)) 237 expansions.append((instr2.parts[0].name, OPARG_KINDS["OPARG_BOTTOM"], 0)) 238 elif not is_viable_expansion(inst): 239 continue 240 else: 241 for part in inst.parts: 242 size = part.size 243 if part.name == "_SAVE_RETURN_OFFSET": 244 size = OPARG_KINDS["OPARG_SAVE_RETURN_OFFSET"] 245 if isinstance(part, Uop): 246 # Skip specializations 247 if "specializing" in part.annotations: 248 continue 249 if "replaced" in part.annotations: 250 size = OPARG_KINDS["OPARG_REPLACED"] 251 expansions.append((part.name, size, offset if size else 0)) 252 offset += part.size 253 expansions_table[inst.name] = expansions 254 max_uops = max(len(ex) for ex in expansions_table.values()) 255 out.emit(f"#define MAX_UOP_PER_EXPANSION {max_uops}\n") 256 out.emit("struct opcode_macro_expansion {\n") 257 out.emit("int nuops;\n") 258 out.emit( 259 "struct { int16_t uop; int8_t size; int8_t offset; } uops[MAX_UOP_PER_EXPANSION];\n" 260 ) 261 out.emit("};\n") 262 out.emit( 263 "extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];\n\n" 264 ) 265 out.emit("#ifdef NEED_OPCODE_METADATA\n") 266 out.emit("const struct opcode_macro_expansion\n") 267 out.emit("_PyOpcode_macro_expansion[256] = {\n") 268 for inst_name, expansions in expansions_table.items(): 269 uops = [ 270 f"{{ {name}, {size}, {offset} }}" for (name, size, offset) in expansions 271 ] 272 out.emit( 273 f'[{inst_name}] = {{ .nuops = {len(expansions)}, .uops = {{ {", ".join(uops)} }} }},\n' 274 ) 275 out.emit("};\n") 276 out.emit("#endif // NEED_OPCODE_METADATA\n\n") 277 278 279def is_viable_expansion(inst: Instruction) -> bool: 280 "An instruction can be expanded if all its parts are viable for tier 2" 281 for part in inst.parts: 282 if isinstance(part, Uop): 283 # Skip specializing and replaced uops 284 if "specializing" in part.annotations: 285 continue 286 if "replaced" in part.annotations: 287 continue 288 if part.properties.tier == 1 or not part.is_viable(): 289 return False 290 return True 291 292 293def generate_extra_cases(analysis: Analysis, out: CWriter) -> None: 294 out.emit("#define EXTRA_CASES \\\n") 295 valid_opcodes = set(analysis.opmap.values()) 296 for op in range(256): 297 if op not in valid_opcodes: 298 out.emit(f" case {op}: \\\n") 299 out.emit(" ;\n") 300 301 302def generate_pseudo_targets(analysis: Analysis, out: CWriter) -> None: 303 table_size = len(analysis.pseudos) 304 max_targets = max(len(pseudo.targets) for pseudo in analysis.pseudos.values()) 305 out.emit("struct pseudo_targets {\n") 306 out.emit(f"uint8_t targets[{max_targets + 1}];\n") 307 out.emit("};\n") 308 out.emit( 309 f"extern const struct pseudo_targets _PyOpcode_PseudoTargets[{table_size}];\n" 310 ) 311 out.emit("#ifdef NEED_OPCODE_METADATA\n") 312 out.emit( 313 f"const struct pseudo_targets _PyOpcode_PseudoTargets[{table_size}] = {{\n" 314 ) 315 for pseudo in analysis.pseudos.values(): 316 targets = ["0"] * (max_targets + 1) 317 for i, target in enumerate(pseudo.targets): 318 targets[i] = target.name 319 out.emit(f"[{pseudo.name}-256] = {{ {{ {', '.join(targets)} }} }},\n") 320 out.emit("};\n\n") 321 out.emit("#endif // NEED_OPCODE_METADATA\n") 322 out.emit("static inline bool\n") 323 out.emit("is_pseudo_target(int pseudo, int target) {\n") 324 out.emit(f"if (pseudo < 256 || pseudo >= {256+table_size}) {{\n") 325 out.emit(f"return false;\n") 326 out.emit("}\n") 327 out.emit( 328 f"for (int i = 0; _PyOpcode_PseudoTargets[pseudo-256].targets[i]; i++) {{\n" 329 ) 330 out.emit( 331 f"if (_PyOpcode_PseudoTargets[pseudo-256].targets[i] == target) return true;\n" 332 ) 333 out.emit("}\n") 334 out.emit(f"return false;\n") 335 out.emit("}\n\n") 336 337 338def generate_opcode_metadata( 339 filenames: list[str], analysis: Analysis, outfile: TextIO 340) -> None: 341 write_header(__file__, filenames, outfile) 342 out = CWriter(outfile, 0, False) 343 with out.header_guard("Py_CORE_OPCODE_METADATA_H"): 344 out.emit("#ifndef Py_BUILD_CORE\n") 345 out.emit('# error "this header requires Py_BUILD_CORE define"\n') 346 out.emit("#endif\n\n") 347 out.emit("#include <stdbool.h> // bool\n") 348 out.emit('#include "opcode_ids.h"\n') 349 generate_is_pseudo(analysis, out) 350 out.emit('#include "pycore_uop_ids.h"\n') 351 generate_stack_effect_functions(analysis, out) 352 generate_instruction_formats(analysis, out) 353 table_size = 256 + len(analysis.pseudos) 354 out.emit("#define IS_VALID_OPCODE(OP) \\\n") 355 out.emit(f" (((OP) >= 0) && ((OP) < {table_size}) && \\\n") 356 out.emit(" (_PyOpcode_opcode_metadata[(OP)].valid_entry))\n\n") 357 generate_flag_macros(out) 358 generate_oparg_macros(out) 359 generate_metadata_table(analysis, out) 360 generate_expansion_table(analysis, out) 361 generate_name_table(analysis, out) 362 generate_cache_table(analysis, out) 363 generate_deopt_table(analysis, out) 364 generate_extra_cases(analysis, out) 365 generate_pseudo_targets(analysis, out) 366 367 368arg_parser = argparse.ArgumentParser( 369 description="Generate the header file with opcode metadata.", 370 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 371) 372 373 374DEFAULT_OUTPUT = ROOT / "Include/internal/pycore_opcode_metadata.h" 375 376 377arg_parser.add_argument( 378 "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT 379) 380 381arg_parser.add_argument( 382 "input", nargs=argparse.REMAINDER, help="Instruction definition file(s)" 383) 384 385if __name__ == "__main__": 386 args = arg_parser.parse_args() 387 if len(args.input) == 0: 388 args.input.append(DEFAULT_INPUT) 389 data = analyze_files(args.input) 390 with open(args.output, "w") as outfile: 391 generate_opcode_metadata(args.input, data, outfile) 392