1
2 #include <stdbool.h>
3
4 #include "Python.h"
5 #include "pycore_flowgraph.h"
6 #include "pycore_compile.h"
7 #include "pycore_pymem.h" // _PyMem_IsPtrFreed()
8
9 #include "pycore_opcode_utils.h"
10 #include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
11
12
13 #undef SUCCESS
14 #undef ERROR
15 #define SUCCESS 0
16 #define ERROR -1
17
18 #define RETURN_IF_ERROR(X) \
19 if ((X) == -1) { \
20 return ERROR; \
21 }
22
23 #define DEFAULT_BLOCK_SIZE 16
24
25 typedef _Py_SourceLocation location;
26 typedef _PyJumpTargetLabel jump_target_label;
27
28 typedef struct _PyCfgInstruction {
29 int i_opcode;
30 int i_oparg;
31 _Py_SourceLocation i_loc;
32 struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
33 struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
34 } cfg_instr;
35
36 typedef struct _PyCfgBasicblock {
37 /* Each basicblock in a compilation unit is linked via b_list in the
38 reverse order that the block are allocated. b_list points to the next
39 block in this list, not to be confused with b_next, which is next by
40 control flow. */
41 struct _PyCfgBasicblock *b_list;
42 /* The label of this block if it is a jump target, -1 otherwise */
43 _PyJumpTargetLabel b_label;
44 /* Exception stack at start of block, used by assembler to create the exception handling table */
45 struct _PyCfgExceptStack *b_exceptstack;
46 /* pointer to an array of instructions, initially NULL */
47 cfg_instr *b_instr;
48 /* If b_next is non-NULL, it is a pointer to the next
49 block reached by normal control flow. */
50 struct _PyCfgBasicblock *b_next;
51 /* number of instructions used */
52 int b_iused;
53 /* length of instruction array (b_instr) */
54 int b_ialloc;
55 /* Used by add_checks_for_loads_of_unknown_variables */
56 uint64_t b_unsafe_locals_mask;
57 /* Number of predecessors that a block has. */
58 int b_predecessors;
59 /* depth of stack upon entry of block, computed by stackdepth() */
60 int b_startdepth;
61 /* Basic block is an exception handler that preserves lasti */
62 unsigned b_preserve_lasti : 1;
63 /* Used by compiler passes to mark whether they have visited a basic block. */
64 unsigned b_visited : 1;
65 /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
66 unsigned b_except_handler : 1;
67 /* b_cold is true if this block is not perf critical (like an exception handler) */
68 unsigned b_cold : 1;
69 /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
70 unsigned b_warm : 1;
71 } basicblock;
72
73
74 struct _PyCfgBuilder {
75 /* The entryblock, at which control flow begins. All blocks of the
76 CFG are reachable through the b_next links */
77 struct _PyCfgBasicblock *g_entryblock;
78 /* Pointer to the most recently allocated block. By following
79 b_list links, you can reach all allocated blocks. */
80 struct _PyCfgBasicblock *g_block_list;
81 /* pointer to the block currently being constructed */
82 struct _PyCfgBasicblock *g_curblock;
83 /* label for the next instruction to be placed */
84 _PyJumpTargetLabel g_current_label;
85 };
86
87 typedef struct _PyCfgBuilder cfg_builder;
88
89 static const jump_target_label NO_LABEL = {-1};
90
91 #define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
92 #define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
93
94 #define LOCATION(LNO, END_LNO, COL, END_COL) \
95 ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
96
97 static inline int
is_block_push(cfg_instr * i)98 is_block_push(cfg_instr *i)
99 {
100 assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
101 return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
102 }
103
104 static inline int
is_jump(cfg_instr * i)105 is_jump(cfg_instr *i)
106 {
107 return OPCODE_HAS_JUMP(i->i_opcode);
108 }
109
110 /* One arg*/
111 #define INSTR_SET_OP1(I, OP, ARG) \
112 do { \
113 assert(OPCODE_HAS_ARG(OP)); \
114 cfg_instr *_instr__ptr_ = (I); \
115 _instr__ptr_->i_opcode = (OP); \
116 _instr__ptr_->i_oparg = (ARG); \
117 } while (0);
118
119 /* No args*/
120 #define INSTR_SET_OP0(I, OP) \
121 do { \
122 assert(!OPCODE_HAS_ARG(OP)); \
123 cfg_instr *_instr__ptr_ = (I); \
124 _instr__ptr_->i_opcode = (OP); \
125 _instr__ptr_->i_oparg = 0; \
126 } while (0);
127
128 /***** Blocks *****/
129
130 /* Returns the offset of the next instruction in the current block's
131 b_instr array. Resizes the b_instr as necessary.
132 Returns -1 on failure.
133 */
134 static int
basicblock_next_instr(basicblock * b)135 basicblock_next_instr(basicblock *b)
136 {
137 assert(b != NULL);
138 RETURN_IF_ERROR(
139 _PyCompile_EnsureArrayLargeEnough(
140 b->b_iused + 1,
141 (void**)&b->b_instr,
142 &b->b_ialloc,
143 DEFAULT_BLOCK_SIZE,
144 sizeof(cfg_instr)));
145 return b->b_iused++;
146 }
147
148 static cfg_instr *
basicblock_last_instr(const basicblock * b)149 basicblock_last_instr(const basicblock *b) {
150 assert(b->b_iused >= 0);
151 if (b->b_iused > 0) {
152 assert(b->b_instr != NULL);
153 return &b->b_instr[b->b_iused - 1];
154 }
155 return NULL;
156 }
157
158 /* Allocate a new block and return a pointer to it.
159 Returns NULL on error.
160 */
161
162 static basicblock *
cfg_builder_new_block(cfg_builder * g)163 cfg_builder_new_block(cfg_builder *g)
164 {
165 basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
166 if (b == NULL) {
167 PyErr_NoMemory();
168 return NULL;
169 }
170 /* Extend the singly linked list of blocks with new block. */
171 b->b_list = g->g_block_list;
172 g->g_block_list = b;
173 b->b_label = NO_LABEL;
174 return b;
175 }
176
177 static int
basicblock_addop(basicblock * b,int opcode,int oparg,location loc)178 basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
179 {
180 assert(IS_WITHIN_OPCODE_RANGE(opcode));
181 assert(!IS_ASSEMBLER_OPCODE(opcode));
182 assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
183 assert(0 <= oparg && oparg < (1 << 30));
184
185 int off = basicblock_next_instr(b);
186 if (off < 0) {
187 return ERROR;
188 }
189 cfg_instr *i = &b->b_instr[off];
190 i->i_opcode = opcode;
191 i->i_oparg = oparg;
192 i->i_target = NULL;
193 i->i_loc = loc;
194
195 return SUCCESS;
196 }
197
198 static int
basicblock_add_jump(basicblock * b,int opcode,basicblock * target,location loc)199 basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
200 {
201 cfg_instr *last = basicblock_last_instr(b);
202 if (last && is_jump(last)) {
203 return ERROR;
204 }
205
206 RETURN_IF_ERROR(
207 basicblock_addop(b, opcode, target->b_label.id, loc));
208 last = basicblock_last_instr(b);
209 assert(last && last->i_opcode == opcode);
210 last->i_target = target;
211 return SUCCESS;
212 }
213
214 static inline int
basicblock_append_instructions(basicblock * to,basicblock * from)215 basicblock_append_instructions(basicblock *to, basicblock *from)
216 {
217 for (int i = 0; i < from->b_iused; i++) {
218 int n = basicblock_next_instr(to);
219 if (n < 0) {
220 return ERROR;
221 }
222 to->b_instr[n] = from->b_instr[i];
223 }
224 return SUCCESS;
225 }
226
227 static inline int
basicblock_nofallthrough(const basicblock * b)228 basicblock_nofallthrough(const basicblock *b) {
229 cfg_instr *last = basicblock_last_instr(b);
230 return (last &&
231 (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
232 IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
233 }
234
235 #define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
236 #define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
237
238 static basicblock *
copy_basicblock(cfg_builder * g,basicblock * block)239 copy_basicblock(cfg_builder *g, basicblock *block)
240 {
241 /* Cannot copy a block if it has a fallthrough, since
242 * a block can only have one fallthrough predecessor.
243 */
244 assert(BB_NO_FALLTHROUGH(block));
245 basicblock *result = cfg_builder_new_block(g);
246 if (result == NULL) {
247 return NULL;
248 }
249 if (basicblock_append_instructions(result, block) < 0) {
250 return NULL;
251 }
252 return result;
253 }
254
255 static int
basicblock_insert_instruction(basicblock * block,int pos,cfg_instr * instr)256 basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
257 RETURN_IF_ERROR(basicblock_next_instr(block));
258 for (int i = block->b_iused - 1; i > pos; i--) {
259 block->b_instr[i] = block->b_instr[i-1];
260 }
261 block->b_instr[pos] = *instr;
262 return SUCCESS;
263 }
264
265 /* For debugging purposes only */
266 #if 0
267 static void
268 dump_instr(cfg_instr *i)
269 {
270 const char *jump = is_jump(i) ? "jump " : "";
271
272 char arg[128];
273
274 *arg = '\0';
275 if (OPCODE_HAS_ARG(i->i_opcode)) {
276 sprintf(arg, "arg: %d ", i->i_oparg);
277 }
278 if (HAS_TARGET(i->i_opcode)) {
279 sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
280 }
281 fprintf(stderr, "line: %d, %s (%d) %s%s\n",
282 i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
283 }
284
285 static inline int
286 basicblock_returns(const basicblock *b) {
287 cfg_instr *last = basicblock_last_instr(b);
288 return last && (last->i_opcode == RETURN_VALUE || last->i_opcode == RETURN_CONST);
289 }
290
291 static void
292 dump_basicblock(const basicblock *b)
293 {
294 const char *b_return = basicblock_returns(b) ? "return " : "";
295 fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
296 b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
297 b->b_startdepth, b->b_predecessors, b_return);
298 if (b->b_instr) {
299 int i;
300 for (i = 0; i < b->b_iused; i++) {
301 fprintf(stderr, " [%02d] ", i);
302 dump_instr(b->b_instr + i);
303 }
304 }
305 }
306
307 void
308 _PyCfgBuilder_DumpGraph(const basicblock *entryblock)
309 {
310 for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
311 dump_basicblock(b);
312 }
313 }
314
315 #endif
316
317
318 /***** CFG construction and modification *****/
319
320 static basicblock *
cfg_builder_use_next_block(cfg_builder * g,basicblock * block)321 cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
322 {
323 assert(block != NULL);
324 g->g_curblock->b_next = block;
325 g->g_curblock = block;
326 return block;
327 }
328
329 static inline int
basicblock_exits_scope(const basicblock * b)330 basicblock_exits_scope(const basicblock *b) {
331 cfg_instr *last = basicblock_last_instr(b);
332 return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
333 }
334
335 static inline int
basicblock_has_eval_break(const basicblock * b)336 basicblock_has_eval_break(const basicblock *b) {
337 for (int i = 0; i < b->b_iused; i++) {
338 if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
339 return true;
340 }
341 }
342 return false;
343 }
344
345 static bool
cfg_builder_current_block_is_terminated(cfg_builder * g)346 cfg_builder_current_block_is_terminated(cfg_builder *g)
347 {
348 cfg_instr *last = basicblock_last_instr(g->g_curblock);
349 if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
350 return true;
351 }
352 if (IS_LABEL(g->g_current_label)) {
353 if (last || IS_LABEL(g->g_curblock->b_label)) {
354 return true;
355 }
356 else {
357 /* current block is empty, label it */
358 g->g_curblock->b_label = g->g_current_label;
359 g->g_current_label = NO_LABEL;
360 }
361 }
362 return false;
363 }
364
365 static int
cfg_builder_maybe_start_new_block(cfg_builder * g)366 cfg_builder_maybe_start_new_block(cfg_builder *g)
367 {
368 if (cfg_builder_current_block_is_terminated(g)) {
369 basicblock *b = cfg_builder_new_block(g);
370 if (b == NULL) {
371 return ERROR;
372 }
373 b->b_label = g->g_current_label;
374 g->g_current_label = NO_LABEL;
375 cfg_builder_use_next_block(g, b);
376 }
377 return SUCCESS;
378 }
379
380 #ifndef NDEBUG
381 static bool
cfg_builder_check(cfg_builder * g)382 cfg_builder_check(cfg_builder *g)
383 {
384 assert(g->g_entryblock->b_iused > 0);
385 for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
386 assert(!_PyMem_IsPtrFreed(block));
387 if (block->b_instr != NULL) {
388 assert(block->b_ialloc > 0);
389 assert(block->b_iused >= 0);
390 assert(block->b_ialloc >= block->b_iused);
391 }
392 else {
393 assert (block->b_iused == 0);
394 assert (block->b_ialloc == 0);
395 }
396 }
397 return true;
398 }
399 #endif
400
401 static int
init_cfg_builder(cfg_builder * g)402 init_cfg_builder(cfg_builder *g)
403 {
404 g->g_block_list = NULL;
405 basicblock *block = cfg_builder_new_block(g);
406 if (block == NULL) {
407 return ERROR;
408 }
409 g->g_curblock = g->g_entryblock = block;
410 g->g_current_label = NO_LABEL;
411 return SUCCESS;
412 }
413
414 cfg_builder *
_PyCfgBuilder_New(void)415 _PyCfgBuilder_New(void)
416 {
417 cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
418 if (g == NULL) {
419 PyErr_NoMemory();
420 return NULL;
421 }
422 memset(g, 0, sizeof(cfg_builder));
423 if (init_cfg_builder(g) < 0) {
424 PyMem_Free(g);
425 return NULL;
426 }
427 return g;
428 }
429
430 void
_PyCfgBuilder_Free(cfg_builder * g)431 _PyCfgBuilder_Free(cfg_builder *g)
432 {
433 if (g == NULL) {
434 return;
435 }
436 assert(cfg_builder_check(g));
437 basicblock *b = g->g_block_list;
438 while (b != NULL) {
439 if (b->b_instr) {
440 PyMem_Free((void *)b->b_instr);
441 }
442 basicblock *next = b->b_list;
443 PyMem_Free((void *)b);
444 b = next;
445 }
446 PyMem_Free(g);
447 }
448
449 int
_PyCfgBuilder_CheckSize(cfg_builder * g)450 _PyCfgBuilder_CheckSize(cfg_builder *g)
451 {
452 int nblocks = 0;
453 for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
454 nblocks++;
455 }
456 if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
457 PyErr_NoMemory();
458 return ERROR;
459 }
460 return SUCCESS;
461 }
462
463 int
_PyCfgBuilder_UseLabel(cfg_builder * g,jump_target_label lbl)464 _PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
465 {
466 g->g_current_label = lbl;
467 return cfg_builder_maybe_start_new_block(g);
468 }
469
470 int
_PyCfgBuilder_Addop(cfg_builder * g,int opcode,int oparg,location loc)471 _PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
472 {
473 RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
474 return basicblock_addop(g->g_curblock, opcode, oparg, loc);
475 }
476
477
478 static basicblock *
next_nonempty_block(basicblock * b)479 next_nonempty_block(basicblock *b)
480 {
481 while (b && b->b_iused == 0) {
482 b = b->b_next;
483 }
484 return b;
485 }
486
487 /***** debugging helpers *****/
488
489 #ifndef NDEBUG
490 static int remove_redundant_nops(cfg_builder *g);
491
492 static bool
no_redundant_nops(cfg_builder * g)493 no_redundant_nops(cfg_builder *g) {
494 if (remove_redundant_nops(g) != 0) {
495 return false;
496 }
497 return true;
498 }
499
500 static bool
no_redundant_jumps(cfg_builder * g)501 no_redundant_jumps(cfg_builder *g) {
502 for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
503 cfg_instr *last = basicblock_last_instr(b);
504 if (last != NULL) {
505 if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
506 basicblock *next = next_nonempty_block(b->b_next);
507 basicblock *jump_target = next_nonempty_block(last->i_target);
508 if (jump_target == next) {
509 assert(next);
510 if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
511 assert(0);
512 return false;
513 }
514 }
515 }
516 }
517 }
518 return true;
519 }
520
521 #endif
522
523 /***** CFG preprocessing (jump targets and exceptions) *****/
524
525 static int
normalize_jumps_in_block(cfg_builder * g,basicblock * b)526 normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
527 cfg_instr *last = basicblock_last_instr(b);
528 if (last == NULL || !is_jump(last) ||
529 IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
530 return SUCCESS;
531 }
532 assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
533
534 bool is_forward = last->i_target->b_visited == 0;
535 if (is_forward) {
536 return SUCCESS;
537 }
538
539 int reversed_opcode = 0;
540 switch(last->i_opcode) {
541 case POP_JUMP_IF_NOT_NONE:
542 reversed_opcode = POP_JUMP_IF_NONE;
543 break;
544 case POP_JUMP_IF_NONE:
545 reversed_opcode = POP_JUMP_IF_NOT_NONE;
546 break;
547 case POP_JUMP_IF_FALSE:
548 reversed_opcode = POP_JUMP_IF_TRUE;
549 break;
550 case POP_JUMP_IF_TRUE:
551 reversed_opcode = POP_JUMP_IF_FALSE;
552 break;
553 }
554 /* transform 'conditional jump T' to
555 * 'reversed_jump b_next' followed by 'jump_backwards T'
556 */
557
558 basicblock *target = last->i_target;
559 basicblock *backwards_jump = cfg_builder_new_block(g);
560 if (backwards_jump == NULL) {
561 return ERROR;
562 }
563 RETURN_IF_ERROR(
564 basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
565 last->i_opcode = reversed_opcode;
566 last->i_target = b->b_next;
567
568 backwards_jump->b_cold = b->b_cold;
569 backwards_jump->b_next = b->b_next;
570 b->b_next = backwards_jump;
571 return SUCCESS;
572 }
573
574
575 static int
normalize_jumps(cfg_builder * g)576 normalize_jumps(cfg_builder *g)
577 {
578 basicblock *entryblock = g->g_entryblock;
579 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
580 b->b_visited = 0;
581 }
582 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
583 b->b_visited = 1;
584 RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
585 }
586 return SUCCESS;
587 }
588
589 static int
check_cfg(cfg_builder * g)590 check_cfg(cfg_builder *g) {
591 for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
592 /* Raise SystemError if jump or exit is not last instruction in the block. */
593 for (int i = 0; i < b->b_iused; i++) {
594 int opcode = b->b_instr[i].i_opcode;
595 assert(!IS_ASSEMBLER_OPCODE(opcode));
596 if (IS_TERMINATOR_OPCODE(opcode)) {
597 if (i != b->b_iused - 1) {
598 PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
599 return ERROR;
600 }
601 }
602 }
603 }
604 return SUCCESS;
605 }
606
607 static int
get_max_label(basicblock * entryblock)608 get_max_label(basicblock *entryblock)
609 {
610 int lbl = -1;
611 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
612 if (b->b_label.id > lbl) {
613 lbl = b->b_label.id;
614 }
615 }
616 return lbl;
617 }
618
619 /* Calculate the actual jump target from the target_label */
620 static int
translate_jump_labels_to_targets(basicblock * entryblock)621 translate_jump_labels_to_targets(basicblock *entryblock)
622 {
623 int max_label = get_max_label(entryblock);
624 size_t mapsize = sizeof(basicblock *) * (max_label + 1);
625 basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
626 if (!label2block) {
627 PyErr_NoMemory();
628 return ERROR;
629 }
630 memset(label2block, 0, mapsize);
631 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
632 if (b->b_label.id >= 0) {
633 label2block[b->b_label.id] = b;
634 }
635 }
636 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
637 for (int i = 0; i < b->b_iused; i++) {
638 cfg_instr *instr = &b->b_instr[i];
639 assert(instr->i_target == NULL);
640 if (HAS_TARGET(instr->i_opcode)) {
641 int lbl = instr->i_oparg;
642 assert(lbl >= 0 && lbl <= max_label);
643 instr->i_target = label2block[lbl];
644 assert(instr->i_target != NULL);
645 assert(instr->i_target->b_label.id == lbl);
646 }
647 }
648 }
649 PyMem_Free(label2block);
650 return SUCCESS;
651 }
652
653 static int
mark_except_handlers(basicblock * entryblock)654 mark_except_handlers(basicblock *entryblock) {
655 #ifndef NDEBUG
656 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
657 assert(!b->b_except_handler);
658 }
659 #endif
660 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
661 for (int i=0; i < b->b_iused; i++) {
662 cfg_instr *instr = &b->b_instr[i];
663 if (is_block_push(instr)) {
664 instr->i_target->b_except_handler = 1;
665 }
666 }
667 }
668 return SUCCESS;
669 }
670
671
672 struct _PyCfgExceptStack {
673 basicblock *handlers[CO_MAXBLOCKS+2];
674 int depth;
675 };
676
677
678 static basicblock *
push_except_block(struct _PyCfgExceptStack * stack,cfg_instr * setup)679 push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
680 assert(is_block_push(setup));
681 int opcode = setup->i_opcode;
682 basicblock * target = setup->i_target;
683 if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
684 target->b_preserve_lasti = 1;
685 }
686 assert(stack->depth <= CO_MAXBLOCKS);
687 stack->handlers[++stack->depth] = target;
688 return target;
689 }
690
691 static basicblock *
pop_except_block(struct _PyCfgExceptStack * stack)692 pop_except_block(struct _PyCfgExceptStack *stack) {
693 assert(stack->depth > 0);
694 return stack->handlers[--stack->depth];
695 }
696
697 static basicblock *
except_stack_top(struct _PyCfgExceptStack * stack)698 except_stack_top(struct _PyCfgExceptStack *stack) {
699 return stack->handlers[stack->depth];
700 }
701
702 static struct _PyCfgExceptStack *
make_except_stack(void)703 make_except_stack(void) {
704 struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
705 if (new == NULL) {
706 PyErr_NoMemory();
707 return NULL;
708 }
709 new->depth = 0;
710 new->handlers[0] = NULL;
711 return new;
712 }
713
714 static struct _PyCfgExceptStack *
copy_except_stack(struct _PyCfgExceptStack * stack)715 copy_except_stack(struct _PyCfgExceptStack *stack) {
716 struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
717 if (copy == NULL) {
718 PyErr_NoMemory();
719 return NULL;
720 }
721 memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
722 return copy;
723 }
724
725 static basicblock**
make_cfg_traversal_stack(basicblock * entryblock)726 make_cfg_traversal_stack(basicblock *entryblock) {
727 int nblocks = 0;
728 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
729 b->b_visited = 0;
730 nblocks++;
731 }
732 basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
733 if (!stack) {
734 PyErr_NoMemory();
735 }
736 return stack;
737 }
738
739 Py_LOCAL_INLINE(int)
stackdepth_push(basicblock *** sp,basicblock * b,int depth)740 stackdepth_push(basicblock ***sp, basicblock *b, int depth)
741 {
742 if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
743 PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
744 return ERROR;
745 }
746 if (b->b_startdepth < depth && b->b_startdepth < 100) {
747 assert(b->b_startdepth < 0);
748 b->b_startdepth = depth;
749 *(*sp)++ = b;
750 }
751 return SUCCESS;
752 }
753
754 /* Find the flow path that needs the largest stack. We assume that
755 * cycles in the flow graph have no net effect on the stack depth.
756 */
757 static int
calculate_stackdepth(cfg_builder * g)758 calculate_stackdepth(cfg_builder *g)
759 {
760 basicblock *entryblock = g->g_entryblock;
761 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
762 b->b_startdepth = INT_MIN;
763 }
764 basicblock **stack = make_cfg_traversal_stack(entryblock);
765 if (!stack) {
766 return ERROR;
767 }
768
769
770 int stackdepth = -1;
771 int maxdepth = 0;
772 basicblock **sp = stack;
773 if (stackdepth_push(&sp, entryblock, 0) < 0) {
774 goto error;
775 }
776 while (sp != stack) {
777 basicblock *b = *--sp;
778 int depth = b->b_startdepth;
779 assert(depth >= 0);
780 basicblock *next = b->b_next;
781 for (int i = 0; i < b->b_iused; i++) {
782 cfg_instr *instr = &b->b_instr[i];
783 int effect = PyCompile_OpcodeStackEffectWithJump(
784 instr->i_opcode, instr->i_oparg, 0);
785 if (effect == PY_INVALID_STACK_EFFECT) {
786 PyErr_Format(PyExc_SystemError,
787 "Invalid stack effect for opcode=%d, arg=%i",
788 instr->i_opcode, instr->i_oparg);
789 goto error;
790 }
791 int new_depth = depth + effect;
792 if (new_depth < 0) {
793 PyErr_Format(PyExc_ValueError,
794 "Invalid CFG, stack underflow");
795 goto error;
796 }
797 if (new_depth > maxdepth) {
798 maxdepth = new_depth;
799 }
800 if (HAS_TARGET(instr->i_opcode)) {
801 effect = PyCompile_OpcodeStackEffectWithJump(
802 instr->i_opcode, instr->i_oparg, 1);
803 if (effect == PY_INVALID_STACK_EFFECT) {
804 PyErr_Format(PyExc_SystemError,
805 "Invalid stack effect for opcode=%d, arg=%i",
806 instr->i_opcode, instr->i_oparg);
807 goto error;
808 }
809 int target_depth = depth + effect;
810 assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
811 if (target_depth > maxdepth) {
812 maxdepth = target_depth;
813 }
814 if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
815 goto error;
816 }
817 }
818 depth = new_depth;
819 assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
820 if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
821 IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
822 {
823 /* remaining code is dead */
824 next = NULL;
825 break;
826 }
827 }
828 if (next != NULL) {
829 assert(BB_HAS_FALLTHROUGH(b));
830 if (stackdepth_push(&sp, next, depth) < 0) {
831 goto error;
832 }
833 }
834 }
835 stackdepth = maxdepth;
836 error:
837 PyMem_Free(stack);
838 return stackdepth;
839 }
840
841 static int
label_exception_targets(basicblock * entryblock)842 label_exception_targets(basicblock *entryblock) {
843 basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
844 if (todo_stack == NULL) {
845 return ERROR;
846 }
847 struct _PyCfgExceptStack *except_stack = make_except_stack();
848 if (except_stack == NULL) {
849 PyMem_Free(todo_stack);
850 PyErr_NoMemory();
851 return ERROR;
852 }
853 except_stack->depth = 0;
854 todo_stack[0] = entryblock;
855 entryblock->b_visited = 1;
856 entryblock->b_exceptstack = except_stack;
857 basicblock **todo = &todo_stack[1];
858 basicblock *handler = NULL;
859 while (todo > todo_stack) {
860 todo--;
861 basicblock *b = todo[0];
862 assert(b->b_visited == 1);
863 except_stack = b->b_exceptstack;
864 assert(except_stack != NULL);
865 b->b_exceptstack = NULL;
866 handler = except_stack_top(except_stack);
867 int last_yield_except_depth = -1;
868 for (int i = 0; i < b->b_iused; i++) {
869 cfg_instr *instr = &b->b_instr[i];
870 if (is_block_push(instr)) {
871 if (!instr->i_target->b_visited) {
872 struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
873 if (copy == NULL) {
874 goto error;
875 }
876 instr->i_target->b_exceptstack = copy;
877 todo[0] = instr->i_target;
878 instr->i_target->b_visited = 1;
879 todo++;
880 }
881 handler = push_except_block(except_stack, instr);
882 }
883 else if (instr->i_opcode == POP_BLOCK) {
884 handler = pop_except_block(except_stack);
885 INSTR_SET_OP0(instr, NOP);
886 }
887 else if (is_jump(instr)) {
888 instr->i_except = handler;
889 assert(i == b->b_iused -1);
890 if (!instr->i_target->b_visited) {
891 if (BB_HAS_FALLTHROUGH(b)) {
892 struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
893 if (copy == NULL) {
894 goto error;
895 }
896 instr->i_target->b_exceptstack = copy;
897 }
898 else {
899 instr->i_target->b_exceptstack = except_stack;
900 except_stack = NULL;
901 }
902 todo[0] = instr->i_target;
903 instr->i_target->b_visited = 1;
904 todo++;
905 }
906 }
907 else if (instr->i_opcode == YIELD_VALUE) {
908 instr->i_except = handler;
909 last_yield_except_depth = except_stack->depth;
910 }
911 else if (instr->i_opcode == RESUME) {
912 instr->i_except = handler;
913 if (instr->i_oparg != RESUME_AT_FUNC_START) {
914 assert(last_yield_except_depth >= 0);
915 if (last_yield_except_depth == 1) {
916 instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
917 }
918 last_yield_except_depth = -1;
919 }
920 }
921 else {
922 instr->i_except = handler;
923 }
924 }
925 if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
926 assert(except_stack != NULL);
927 b->b_next->b_exceptstack = except_stack;
928 todo[0] = b->b_next;
929 b->b_next->b_visited = 1;
930 todo++;
931 }
932 else if (except_stack != NULL) {
933 PyMem_Free(except_stack);
934 }
935 }
936 #ifdef Py_DEBUG
937 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
938 assert(b->b_exceptstack == NULL);
939 }
940 #endif
941 PyMem_Free(todo_stack);
942 return SUCCESS;
943 error:
944 PyMem_Free(todo_stack);
945 PyMem_Free(except_stack);
946 return ERROR;
947 }
948
949 /***** CFG optimizations *****/
950
951 static int
remove_unreachable(basicblock * entryblock)952 remove_unreachable(basicblock *entryblock) {
953 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
954 b->b_predecessors = 0;
955 }
956 basicblock **stack = make_cfg_traversal_stack(entryblock);
957 if (stack == NULL) {
958 return ERROR;
959 }
960 basicblock **sp = stack;
961 entryblock->b_predecessors = 1;
962 *sp++ = entryblock;
963 entryblock->b_visited = 1;
964 while (sp > stack) {
965 basicblock *b = *(--sp);
966 if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
967 if (!b->b_next->b_visited) {
968 assert(b->b_next->b_predecessors == 0);
969 *sp++ = b->b_next;
970 b->b_next->b_visited = 1;
971 }
972 b->b_next->b_predecessors++;
973 }
974 for (int i = 0; i < b->b_iused; i++) {
975 basicblock *target;
976 cfg_instr *instr = &b->b_instr[i];
977 if (is_jump(instr) || is_block_push(instr)) {
978 target = instr->i_target;
979 if (!target->b_visited) {
980 *sp++ = target;
981 target->b_visited = 1;
982 }
983 target->b_predecessors++;
984 }
985 }
986 }
987 PyMem_Free(stack);
988
989 /* Delete unreachable instructions */
990 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
991 if (b->b_predecessors == 0) {
992 b->b_iused = 0;
993 b->b_except_handler = 0;
994 }
995 }
996 return SUCCESS;
997 }
998
999 static int
basicblock_remove_redundant_nops(basicblock * bb)1000 basicblock_remove_redundant_nops(basicblock *bb) {
1001 /* Remove NOPs when legal to do so. */
1002 int dest = 0;
1003 int prev_lineno = -1;
1004 for (int src = 0; src < bb->b_iused; src++) {
1005 int lineno = bb->b_instr[src].i_loc.lineno;
1006 if (bb->b_instr[src].i_opcode == NOP) {
1007 /* Eliminate no-op if it doesn't have a line number */
1008 if (lineno < 0) {
1009 continue;
1010 }
1011 /* or, if the previous instruction had the same line number. */
1012 if (prev_lineno == lineno) {
1013 continue;
1014 }
1015 /* or, if the next instruction has same line number or no line number */
1016 if (src < bb->b_iused - 1) {
1017 int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1018 if (next_lineno == lineno) {
1019 continue;
1020 }
1021 if (next_lineno < 0) {
1022 bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1023 continue;
1024 }
1025 }
1026 else {
1027 basicblock *next = next_nonempty_block(bb->b_next);
1028 /* or if last instruction in BB and next BB has same line number */
1029 if (next) {
1030 location next_loc = NO_LOCATION;
1031 for (int next_i=0; next_i < next->b_iused; next_i++) {
1032 cfg_instr *instr = &next->b_instr[next_i];
1033 if (instr->i_opcode == NOP && instr->i_loc.lineno == NO_LOCATION.lineno) {
1034 /* Skip over NOPs without location, they will be removed */
1035 continue;
1036 }
1037 next_loc = instr->i_loc;
1038 break;
1039 }
1040 if (lineno == next_loc.lineno) {
1041 continue;
1042 }
1043 }
1044 }
1045
1046 }
1047 if (dest != src) {
1048 bb->b_instr[dest] = bb->b_instr[src];
1049 }
1050 dest++;
1051 prev_lineno = lineno;
1052 }
1053 assert(dest <= bb->b_iused);
1054 int num_removed = bb->b_iused - dest;
1055 bb->b_iused = dest;
1056 return num_removed;
1057 }
1058
1059 static int
remove_redundant_nops(cfg_builder * g)1060 remove_redundant_nops(cfg_builder *g) {
1061 int changes = 0;
1062 for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1063 int change = basicblock_remove_redundant_nops(b);
1064 RETURN_IF_ERROR(change);
1065 changes += change;
1066 }
1067 return changes;
1068 }
1069
1070 static int
remove_redundant_nops_and_pairs(basicblock * entryblock)1071 remove_redundant_nops_and_pairs(basicblock *entryblock)
1072 {
1073 bool done = false;
1074
1075 while (! done) {
1076 done = true;
1077 cfg_instr *prev_instr = NULL;
1078 cfg_instr *instr = NULL;
1079 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1080 RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1081 if (IS_LABEL(b->b_label)) {
1082 /* this block is a jump target, forget instr */
1083 instr = NULL;
1084 }
1085 for (int i = 0; i < b->b_iused; i++) {
1086 prev_instr = instr;
1087 instr = &b->b_instr[i];
1088 int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1089 int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1090 int opcode = instr->i_opcode;
1091 bool is_redundant_pair = false;
1092 if (opcode == POP_TOP) {
1093 if (prev_opcode == LOAD_CONST) {
1094 is_redundant_pair = true;
1095 }
1096 else if (prev_opcode == COPY && prev_oparg == 1) {
1097 is_redundant_pair = true;
1098 }
1099 }
1100 if (is_redundant_pair) {
1101 INSTR_SET_OP0(prev_instr, NOP);
1102 INSTR_SET_OP0(instr, NOP);
1103 done = false;
1104 }
1105 }
1106 if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1107 instr = NULL;
1108 }
1109 }
1110 }
1111 return SUCCESS;
1112 }
1113
1114 static int
remove_redundant_jumps(cfg_builder * g)1115 remove_redundant_jumps(cfg_builder *g) {
1116 /* If a non-empty block ends with a jump instruction, check if the next
1117 * non-empty block reached through normal flow control is the target
1118 * of that jump. If it is, then the jump instruction is redundant and
1119 * can be deleted.
1120 *
1121 * Return the number of changes applied, or -1 on error.
1122 */
1123
1124 int changes = 0;
1125 for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1126 cfg_instr *last = basicblock_last_instr(b);
1127 if (last == NULL) {
1128 continue;
1129 }
1130 assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1131 if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1132 basicblock* jump_target = next_nonempty_block(last->i_target);
1133 if (jump_target == NULL) {
1134 PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1135 return ERROR;
1136 }
1137 basicblock *next = next_nonempty_block(b->b_next);
1138 if (jump_target == next) {
1139 changes++;
1140 INSTR_SET_OP0(last, NOP);
1141 }
1142 }
1143 }
1144
1145 return changes;
1146 }
1147
1148 static inline bool
basicblock_has_no_lineno(basicblock * b)1149 basicblock_has_no_lineno(basicblock *b) {
1150 for (int i = 0; i < b->b_iused; i++) {
1151 if (b->b_instr[i].i_loc.lineno >= 0) {
1152 return false;
1153 }
1154 }
1155 return true;
1156 }
1157
1158 /* Maximum size of basic block that should be copied in optimizer */
1159 #define MAX_COPY_SIZE 4
1160
1161 /* If this block ends with an unconditional jump to a small exit block or
1162 * a block that has no line numbers (and no fallthrough), then
1163 * remove the jump and extend this block with the target.
1164 * Returns 1 if extended, 0 if no change, and -1 on error.
1165 */
1166 static int
basicblock_inline_small_or_no_lineno_blocks(basicblock * bb)1167 basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1168 cfg_instr *last = basicblock_last_instr(bb);
1169 if (last == NULL) {
1170 return 0;
1171 }
1172 if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1173 return 0;
1174 }
1175 basicblock *target = last->i_target;
1176 bool small_exit_block = (basicblock_exits_scope(target) &&
1177 target->b_iused <= MAX_COPY_SIZE);
1178 bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1179 !BB_HAS_FALLTHROUGH(target));
1180 if (small_exit_block || no_lineno_no_fallthrough) {
1181 assert(is_jump(last));
1182 int removed_jump_opcode = last->i_opcode;
1183 INSTR_SET_OP0(last, NOP);
1184 RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1185 if (no_lineno_no_fallthrough) {
1186 last = basicblock_last_instr(bb);
1187 if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1188 removed_jump_opcode == JUMP)
1189 {
1190 /* Make sure we don't lose eval breaker checks */
1191 last->i_opcode = JUMP;
1192 }
1193 }
1194 target->b_predecessors--;
1195 return 1;
1196 }
1197 return 0;
1198 }
1199
1200 static int
inline_small_or_no_lineno_blocks(basicblock * entryblock)1201 inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1202 bool changes;
1203 do {
1204 changes = false;
1205 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1206 int res = basicblock_inline_small_or_no_lineno_blocks(b);
1207 RETURN_IF_ERROR(res);
1208 if (res) {
1209 changes = true;
1210 }
1211 }
1212 } while(changes); /* every change removes a jump, ensuring convergence */
1213 return changes;
1214 }
1215
1216 // Attempt to eliminate jumps to jumps by updating inst to jump to
1217 // target->i_target using the provided opcode. Return whether or not the
1218 // optimization was successful.
1219 static bool
jump_thread(basicblock * bb,cfg_instr * inst,cfg_instr * target,int opcode)1220 jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1221 {
1222 assert(is_jump(inst));
1223 assert(is_jump(target));
1224 assert(inst == basicblock_last_instr(bb));
1225 // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1226 // changes (and we fall into an infinite loop):
1227 if (inst->i_target != target->i_target) {
1228 /* Change inst to NOP and append a jump to target->i_target. The
1229 * NOP will be removed later if it's not needed for the lineno.
1230 */
1231 INSTR_SET_OP0(inst, NOP);
1232
1233 RETURN_IF_ERROR(
1234 basicblock_add_jump(
1235 bb, opcode, target->i_target, target->i_loc));
1236
1237 return true;
1238 }
1239 return false;
1240 }
1241
1242 static PyObject*
get_const_value(int opcode,int oparg,PyObject * co_consts)1243 get_const_value(int opcode, int oparg, PyObject *co_consts)
1244 {
1245 PyObject *constant = NULL;
1246 assert(OPCODE_HAS_CONST(opcode));
1247 if (opcode == LOAD_CONST) {
1248 constant = PyList_GET_ITEM(co_consts, oparg);
1249 }
1250
1251 if (constant == NULL) {
1252 PyErr_SetString(PyExc_SystemError,
1253 "Internal error: failed to get value of a constant");
1254 return NULL;
1255 }
1256 return Py_NewRef(constant);
1257 }
1258
1259 // Steals a reference to newconst.
1260 static int
add_const(PyObject * newconst,PyObject * consts,PyObject * const_cache)1261 add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1262 {
1263 if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1264 Py_DECREF(newconst);
1265 return -1;
1266 }
1267
1268 Py_ssize_t index;
1269 for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1270 if (PyList_GET_ITEM(consts, index) == newconst) {
1271 break;
1272 }
1273 }
1274 if (index == PyList_GET_SIZE(consts)) {
1275 if ((size_t)index >= (size_t)INT_MAX - 1) {
1276 PyErr_SetString(PyExc_OverflowError, "too many constants");
1277 Py_DECREF(newconst);
1278 return -1;
1279 }
1280 if (PyList_Append(consts, newconst)) {
1281 Py_DECREF(newconst);
1282 return -1;
1283 }
1284 }
1285 Py_DECREF(newconst);
1286 return (int)index;
1287 }
1288
1289 /* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1290 with LOAD_CONST (c1, c2, ... cn).
1291 The consts table must still be in list form so that the
1292 new constant (c1, c2, ... cn) can be appended.
1293 Called with codestr pointing to the first LOAD_CONST.
1294 */
1295 static int
fold_tuple_on_constants(PyObject * const_cache,cfg_instr * inst,int n,PyObject * consts)1296 fold_tuple_on_constants(PyObject *const_cache,
1297 cfg_instr *inst,
1298 int n, PyObject *consts)
1299 {
1300 /* Pre-conditions */
1301 assert(PyDict_CheckExact(const_cache));
1302 assert(PyList_CheckExact(consts));
1303 assert(inst[n].i_opcode == BUILD_TUPLE);
1304 assert(inst[n].i_oparg == n);
1305
1306 for (int i = 0; i < n; i++) {
1307 if (!OPCODE_HAS_CONST(inst[i].i_opcode)) {
1308 return SUCCESS;
1309 }
1310 }
1311
1312 /* Buildup new tuple of constants */
1313 PyObject *newconst = PyTuple_New(n);
1314 if (newconst == NULL) {
1315 return ERROR;
1316 }
1317 for (int i = 0; i < n; i++) {
1318 int op = inst[i].i_opcode;
1319 int arg = inst[i].i_oparg;
1320 PyObject *constant = get_const_value(op, arg, consts);
1321 if (constant == NULL) {
1322 return ERROR;
1323 }
1324 PyTuple_SET_ITEM(newconst, i, constant);
1325 }
1326 int index = add_const(newconst, consts, const_cache);
1327 if (index < 0) {
1328 return ERROR;
1329 }
1330 for (int i = 0; i < n; i++) {
1331 INSTR_SET_OP0(&inst[i], NOP);
1332 }
1333 INSTR_SET_OP1(&inst[n], LOAD_CONST, index);
1334 return SUCCESS;
1335 }
1336
1337 #define VISITED (-1)
1338
1339 // Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1340 // same effect.
1341 static int
swaptimize(basicblock * block,int * ix)1342 swaptimize(basicblock *block, int *ix)
1343 {
1344 // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1345 // for this function. Make sure to blow away cached *.pyc files first!
1346 assert(*ix < block->b_iused);
1347 cfg_instr *instructions = &block->b_instr[*ix];
1348 // Find the length of the current sequence of SWAPs and NOPs, and record the
1349 // maximum depth of the stack manipulations:
1350 assert(instructions[0].i_opcode == SWAP);
1351 int depth = instructions[0].i_oparg;
1352 int len = 0;
1353 int more = false;
1354 int limit = block->b_iused - *ix;
1355 while (++len < limit) {
1356 int opcode = instructions[len].i_opcode;
1357 if (opcode == SWAP) {
1358 depth = Py_MAX(depth, instructions[len].i_oparg);
1359 more = true;
1360 }
1361 else if (opcode != NOP) {
1362 break;
1363 }
1364 }
1365 // It's already optimal if there's only one SWAP:
1366 if (!more) {
1367 return SUCCESS;
1368 }
1369 // Create an array with elements {0, 1, 2, ..., depth - 1}:
1370 int *stack = PyMem_Malloc(depth * sizeof(int));
1371 if (stack == NULL) {
1372 PyErr_NoMemory();
1373 return ERROR;
1374 }
1375 for (int i = 0; i < depth; i++) {
1376 stack[i] = i;
1377 }
1378 // Simulate the combined effect of these instructions by "running" them on
1379 // our "stack":
1380 for (int i = 0; i < len; i++) {
1381 if (instructions[i].i_opcode == SWAP) {
1382 int oparg = instructions[i].i_oparg;
1383 int top = stack[0];
1384 // SWAPs are 1-indexed:
1385 stack[0] = stack[oparg - 1];
1386 stack[oparg - 1] = top;
1387 }
1388 }
1389 // Now we can begin! Our approach here is based on a solution to a closely
1390 // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
1391 // think of this algorithm as determining the steps needed to efficiently
1392 // "un-shuffle" our stack. By performing the moves in *reverse* order,
1393 // though, we can efficiently *shuffle* it! For this reason, we will be
1394 // replacing instructions starting from the *end* of the run. Since the
1395 // solution is optimal, we don't need to worry about running out of space:
1396 int current = len - 1;
1397 for (int i = 0; i < depth; i++) {
1398 // Skip items that have already been visited, or just happen to be in
1399 // the correct location:
1400 if (stack[i] == VISITED || stack[i] == i) {
1401 continue;
1402 }
1403 // Okay, we've found an item that hasn't been visited. It forms a cycle
1404 // with other items; traversing the cycle and swapping each item with
1405 // the next will put them all in the correct place. The weird
1406 // loop-and-a-half is necessary to insert 0 into every cycle, since we
1407 // can only swap from that position:
1408 int j = i;
1409 while (true) {
1410 // Skip the actual swap if our item is zero, since swapping the top
1411 // item with itself is pointless:
1412 if (j) {
1413 assert(0 <= current);
1414 // SWAPs are 1-indexed:
1415 instructions[current].i_opcode = SWAP;
1416 instructions[current--].i_oparg = j + 1;
1417 }
1418 if (stack[j] == VISITED) {
1419 // Completed the cycle:
1420 assert(j == i);
1421 break;
1422 }
1423 int next_j = stack[j];
1424 stack[j] = VISITED;
1425 j = next_j;
1426 }
1427 }
1428 // NOP out any unused instructions:
1429 while (0 <= current) {
1430 INSTR_SET_OP0(&instructions[current--], NOP);
1431 }
1432 PyMem_Free(stack);
1433 *ix += len - 1;
1434 return SUCCESS;
1435 }
1436
1437
1438 // This list is pretty small, since it's only okay to reorder opcodes that:
1439 // - can't affect control flow (like jumping or raising exceptions)
1440 // - can't invoke arbitrary code (besides finalizers)
1441 // - only touch the TOS (and pop it when finished)
1442 #define SWAPPABLE(opcode) \
1443 ((opcode) == STORE_FAST || \
1444 (opcode) == STORE_FAST_MAYBE_NULL || \
1445 (opcode) == POP_TOP)
1446
1447 #define STORES_TO(instr) \
1448 (((instr).i_opcode == STORE_FAST || \
1449 (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
1450 ? (instr).i_oparg : -1)
1451
1452 static int
next_swappable_instruction(basicblock * block,int i,int lineno)1453 next_swappable_instruction(basicblock *block, int i, int lineno)
1454 {
1455 while (++i < block->b_iused) {
1456 cfg_instr *instruction = &block->b_instr[i];
1457 if (0 <= lineno && instruction->i_loc.lineno != lineno) {
1458 // Optimizing across this instruction could cause user-visible
1459 // changes in the names bound between line tracing events!
1460 return -1;
1461 }
1462 if (instruction->i_opcode == NOP) {
1463 continue;
1464 }
1465 if (SWAPPABLE(instruction->i_opcode)) {
1466 return i;
1467 }
1468 return -1;
1469 }
1470 return -1;
1471 }
1472
1473 // Attempt to apply SWAPs statically by swapping *instructions* rather than
1474 // stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
1475 // with the more efficient NOP, STORE_FAST(42), POP_TOP.
1476 static void
apply_static_swaps(basicblock * block,int i)1477 apply_static_swaps(basicblock *block, int i)
1478 {
1479 // SWAPs are to our left, and potential swaperands are to our right:
1480 for (; 0 <= i; i--) {
1481 assert(i < block->b_iused);
1482 cfg_instr *swap = &block->b_instr[i];
1483 if (swap->i_opcode != SWAP) {
1484 if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
1485 // Nope, but we know how to handle these. Keep looking:
1486 continue;
1487 }
1488 // We can't reason about what this instruction does. Bail:
1489 return;
1490 }
1491 int j = next_swappable_instruction(block, i, -1);
1492 if (j < 0) {
1493 return;
1494 }
1495 int k = j;
1496 int lineno = block->b_instr[j].i_loc.lineno;
1497 for (int count = swap->i_oparg - 1; 0 < count; count--) {
1498 k = next_swappable_instruction(block, k, lineno);
1499 if (k < 0) {
1500 return;
1501 }
1502 }
1503 // The reordering is not safe if the two instructions to be swapped
1504 // store to the same location, or if any intervening instruction stores
1505 // to the same location as either of them.
1506 int store_j = STORES_TO(block->b_instr[j]);
1507 int store_k = STORES_TO(block->b_instr[k]);
1508 if (store_j >= 0 || store_k >= 0) {
1509 if (store_j == store_k) {
1510 return;
1511 }
1512 for (int idx = j + 1; idx < k; idx++) {
1513 int store_idx = STORES_TO(block->b_instr[idx]);
1514 if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
1515 return;
1516 }
1517 }
1518 }
1519
1520 // Success!
1521 INSTR_SET_OP0(swap, NOP);
1522 cfg_instr temp = block->b_instr[j];
1523 block->b_instr[j] = block->b_instr[k];
1524 block->b_instr[k] = temp;
1525 }
1526 }
1527
1528 static int
basicblock_optimize_load_const(PyObject * const_cache,basicblock * bb,PyObject * consts)1529 basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
1530 {
1531 assert(PyDict_CheckExact(const_cache));
1532 assert(PyList_CheckExact(consts));
1533 int opcode = 0;
1534 int oparg = 0;
1535 for (int i = 0; i < bb->b_iused; i++) {
1536 cfg_instr *inst = &bb->b_instr[i];
1537 bool is_copy_of_load_const = (opcode == LOAD_CONST &&
1538 inst->i_opcode == COPY &&
1539 inst->i_oparg == 1);
1540 if (! is_copy_of_load_const) {
1541 opcode = inst->i_opcode;
1542 oparg = inst->i_oparg;
1543 }
1544 assert(!IS_ASSEMBLER_OPCODE(opcode));
1545 if (opcode != LOAD_CONST) {
1546 continue;
1547 }
1548 int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
1549 switch(nextop) {
1550 case POP_JUMP_IF_FALSE:
1551 case POP_JUMP_IF_TRUE:
1552 {
1553 /* Remove LOAD_CONST const; conditional jump */
1554 PyObject* cnt = get_const_value(opcode, oparg, consts);
1555 if (cnt == NULL) {
1556 return ERROR;
1557 }
1558 int is_true = PyObject_IsTrue(cnt);
1559 Py_DECREF(cnt);
1560 if (is_true == -1) {
1561 return ERROR;
1562 }
1563 INSTR_SET_OP0(inst, NOP);
1564 int jump_if_true = nextop == POP_JUMP_IF_TRUE;
1565 if (is_true == jump_if_true) {
1566 bb->b_instr[i+1].i_opcode = JUMP;
1567 }
1568 else {
1569 INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
1570 }
1571 break;
1572 }
1573 case IS_OP:
1574 {
1575 // Fold to POP_JUMP_IF_NONE:
1576 // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
1577 // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
1578 // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
1579 // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
1580 // Fold to POP_JUMP_IF_NOT_NONE:
1581 // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
1582 // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
1583 // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
1584 // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
1585 PyObject *cnt = get_const_value(opcode, oparg, consts);
1586 if (cnt == NULL) {
1587 return ERROR;
1588 }
1589 if (!Py_IsNone(cnt)) {
1590 Py_DECREF(cnt);
1591 break;
1592 }
1593 if (bb->b_iused <= i + 2) {
1594 break;
1595 }
1596 cfg_instr *is_instr = &bb->b_instr[i + 1];
1597 cfg_instr *jump_instr = &bb->b_instr[i + 2];
1598 // Get rid of TO_BOOL regardless:
1599 if (jump_instr->i_opcode == TO_BOOL) {
1600 INSTR_SET_OP0(jump_instr, NOP);
1601 if (bb->b_iused <= i + 3) {
1602 break;
1603 }
1604 jump_instr = &bb->b_instr[i + 3];
1605 }
1606 bool invert = is_instr->i_oparg;
1607 if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
1608 invert = !invert;
1609 }
1610 else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
1611 break;
1612 }
1613 INSTR_SET_OP0(inst, NOP);
1614 INSTR_SET_OP0(is_instr, NOP);
1615 jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
1616 : POP_JUMP_IF_NONE;
1617 break;
1618 }
1619 case RETURN_VALUE:
1620 {
1621 INSTR_SET_OP0(inst, NOP);
1622 INSTR_SET_OP1(&bb->b_instr[++i], RETURN_CONST, oparg);
1623 break;
1624 }
1625 case TO_BOOL:
1626 {
1627 PyObject *cnt = get_const_value(opcode, oparg, consts);
1628 if (cnt == NULL) {
1629 return ERROR;
1630 }
1631 int is_true = PyObject_IsTrue(cnt);
1632 Py_DECREF(cnt);
1633 if (is_true == -1) {
1634 return ERROR;
1635 }
1636 cnt = PyBool_FromLong(is_true);
1637 int index = add_const(cnt, consts, const_cache);
1638 if (index < 0) {
1639 return ERROR;
1640 }
1641 INSTR_SET_OP0(inst, NOP);
1642 INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
1643 break;
1644 }
1645 }
1646 }
1647 return SUCCESS;
1648 }
1649
1650 static int
optimize_load_const(PyObject * const_cache,cfg_builder * g,PyObject * consts)1651 optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
1652 for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1653 RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
1654 }
1655 return SUCCESS;
1656 }
1657
1658 static int
optimize_basic_block(PyObject * const_cache,basicblock * bb,PyObject * consts)1659 optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
1660 {
1661 assert(PyDict_CheckExact(const_cache));
1662 assert(PyList_CheckExact(consts));
1663 cfg_instr nop;
1664 INSTR_SET_OP0(&nop, NOP);
1665 for (int i = 0; i < bb->b_iused; i++) {
1666 cfg_instr *inst = &bb->b_instr[i];
1667 cfg_instr *target;
1668 int opcode = inst->i_opcode;
1669 int oparg = inst->i_oparg;
1670 if (HAS_TARGET(opcode)) {
1671 assert(inst->i_target->b_iused > 0);
1672 target = &inst->i_target->b_instr[0];
1673 assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
1674 }
1675 else {
1676 target = &nop;
1677 }
1678 int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
1679 assert(!IS_ASSEMBLER_OPCODE(opcode));
1680 switch (opcode) {
1681 /* Try to fold tuples of constants.
1682 Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
1683 Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
1684 Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
1685 case BUILD_TUPLE:
1686 if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
1687 switch(oparg) {
1688 case 1:
1689 INSTR_SET_OP0(inst, NOP);
1690 INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
1691 continue;
1692 case 2:
1693 case 3:
1694 INSTR_SET_OP0(inst, NOP);
1695 bb->b_instr[i+1].i_opcode = SWAP;
1696 continue;
1697 }
1698 }
1699 if (i >= oparg) {
1700 if (fold_tuple_on_constants(const_cache, inst-oparg, oparg, consts)) {
1701 goto error;
1702 }
1703 }
1704 break;
1705 case POP_JUMP_IF_NOT_NONE:
1706 case POP_JUMP_IF_NONE:
1707 switch (target->i_opcode) {
1708 case JUMP:
1709 i -= jump_thread(bb, inst, target, inst->i_opcode);
1710 }
1711 break;
1712 case POP_JUMP_IF_FALSE:
1713 switch (target->i_opcode) {
1714 case JUMP:
1715 i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
1716 }
1717 break;
1718 case POP_JUMP_IF_TRUE:
1719 switch (target->i_opcode) {
1720 case JUMP:
1721 i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
1722 }
1723 break;
1724 case JUMP:
1725 case JUMP_NO_INTERRUPT:
1726 switch (target->i_opcode) {
1727 case JUMP:
1728 i -= jump_thread(bb, inst, target, JUMP);
1729 continue;
1730 case JUMP_NO_INTERRUPT:
1731 i -= jump_thread(bb, inst, target, opcode);
1732 continue;
1733 }
1734 break;
1735 case FOR_ITER:
1736 if (target->i_opcode == JUMP) {
1737 /* This will not work now because the jump (at target) could
1738 * be forward or backward and FOR_ITER only jumps forward. We
1739 * can re-enable this if ever we implement a backward version
1740 * of FOR_ITER.
1741 */
1742 /*
1743 i -= jump_thread(bb, inst, target, FOR_ITER);
1744 */
1745 }
1746 break;
1747 case STORE_FAST:
1748 if (opcode == nextop &&
1749 oparg == bb->b_instr[i+1].i_oparg &&
1750 bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
1751 bb->b_instr[i].i_opcode = POP_TOP;
1752 bb->b_instr[i].i_oparg = 0;
1753 }
1754 break;
1755 case SWAP:
1756 if (oparg == 1) {
1757 INSTR_SET_OP0(inst, NOP);
1758 }
1759 break;
1760 case LOAD_GLOBAL:
1761 if (nextop == PUSH_NULL && (oparg & 1) == 0) {
1762 INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
1763 INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
1764 }
1765 break;
1766 case COMPARE_OP:
1767 if (nextop == TO_BOOL) {
1768 INSTR_SET_OP0(inst, NOP);
1769 INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
1770 continue;
1771 }
1772 break;
1773 case CONTAINS_OP:
1774 case IS_OP:
1775 if (nextop == TO_BOOL) {
1776 INSTR_SET_OP0(inst, NOP);
1777 INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
1778 continue;
1779 }
1780 break;
1781 case TO_BOOL:
1782 if (nextop == TO_BOOL) {
1783 INSTR_SET_OP0(inst, NOP);
1784 continue;
1785 }
1786 break;
1787 case UNARY_NOT:
1788 if (nextop == TO_BOOL) {
1789 INSTR_SET_OP0(inst, NOP);
1790 INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
1791 continue;
1792 }
1793 if (nextop == UNARY_NOT) {
1794 INSTR_SET_OP0(inst, NOP);
1795 INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
1796 continue;
1797 }
1798 break;
1799 }
1800 }
1801
1802 for (int i = 0; i < bb->b_iused; i++) {
1803 cfg_instr *inst = &bb->b_instr[i];
1804 if (inst->i_opcode == SWAP) {
1805 if (swaptimize(bb, &i) < 0) {
1806 goto error;
1807 }
1808 apply_static_swaps(bb, i);
1809 }
1810 }
1811 return SUCCESS;
1812 error:
1813 return ERROR;
1814 }
1815
1816 static int resolve_line_numbers(cfg_builder *g, int firstlineno);
1817
1818 static int
remove_redundant_nops_and_jumps(cfg_builder * g)1819 remove_redundant_nops_and_jumps(cfg_builder *g)
1820 {
1821 int removed_nops, removed_jumps;
1822 do {
1823 /* Convergence is guaranteed because the number of
1824 * redundant jumps and nops only decreases.
1825 */
1826 removed_nops = remove_redundant_nops(g);
1827 RETURN_IF_ERROR(removed_nops);
1828 removed_jumps = remove_redundant_jumps(g);
1829 RETURN_IF_ERROR(removed_jumps);
1830 } while(removed_nops + removed_jumps > 0);
1831 return SUCCESS;
1832 }
1833
1834 /* Perform optimizations on a control flow graph.
1835 The consts object should still be in list form to allow new constants
1836 to be appended.
1837
1838 Code trasnformations that reduce code size initially fill the gaps with
1839 NOPs. Later those NOPs are removed.
1840 */
1841 static int
optimize_cfg(cfg_builder * g,PyObject * consts,PyObject * const_cache,int firstlineno)1842 optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
1843 {
1844 assert(PyDict_CheckExact(const_cache));
1845 RETURN_IF_ERROR(check_cfg(g));
1846 RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
1847 RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
1848 RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
1849 RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
1850 for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1851 RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
1852 }
1853 RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
1854 RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
1855 RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
1856 assert(no_redundant_jumps(g));
1857 return SUCCESS;
1858 }
1859
1860 static void
make_super_instruction(cfg_instr * inst1,cfg_instr * inst2,int super_op)1861 make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
1862 {
1863 int32_t line1 = inst1->i_loc.lineno;
1864 int32_t line2 = inst2->i_loc.lineno;
1865 /* Skip if instructions are on different lines */
1866 if (line1 >= 0 && line2 >= 0 && line1 != line2) {
1867 return;
1868 }
1869 if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
1870 return;
1871 }
1872 INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
1873 INSTR_SET_OP0(inst2, NOP);
1874 }
1875
1876 static int
insert_superinstructions(cfg_builder * g)1877 insert_superinstructions(cfg_builder *g)
1878 {
1879 for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1880
1881 for (int i = 0; i < b->b_iused; i++) {
1882 cfg_instr *inst = &b->b_instr[i];
1883 int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
1884 switch(inst->i_opcode) {
1885 case LOAD_FAST:
1886 if (nextop == LOAD_FAST) {
1887 make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
1888 }
1889 break;
1890 case STORE_FAST:
1891 switch (nextop) {
1892 case LOAD_FAST:
1893 make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
1894 break;
1895 case STORE_FAST:
1896 make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
1897 break;
1898 }
1899 break;
1900 }
1901 }
1902 }
1903 int res = remove_redundant_nops(g);
1904 assert(no_redundant_nops(g));
1905 return res;
1906 }
1907
1908 // helper functions for add_checks_for_loads_of_unknown_variables
1909 static inline void
maybe_push(basicblock * b,uint64_t unsafe_mask,basicblock *** sp)1910 maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
1911 {
1912 // Push b if the unsafe mask is giving us any new information.
1913 // To avoid overflowing the stack, only allow each block once.
1914 // Use b->b_visited=1 to mean that b is currently on the stack.
1915 uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
1916 if (b->b_unsafe_locals_mask != both) {
1917 b->b_unsafe_locals_mask = both;
1918 // More work left to do.
1919 if (!b->b_visited) {
1920 // not on the stack, so push it.
1921 *(*sp)++ = b;
1922 b->b_visited = 1;
1923 }
1924 }
1925 }
1926
1927 static void
scan_block_for_locals(basicblock * b,basicblock *** sp)1928 scan_block_for_locals(basicblock *b, basicblock ***sp)
1929 {
1930 // bit i is set if local i is potentially uninitialized
1931 uint64_t unsafe_mask = b->b_unsafe_locals_mask;
1932 for (int i = 0; i < b->b_iused; i++) {
1933 cfg_instr *instr = &b->b_instr[i];
1934 assert(instr->i_opcode != EXTENDED_ARG);
1935 if (instr->i_except != NULL) {
1936 maybe_push(instr->i_except, unsafe_mask, sp);
1937 }
1938 if (instr->i_oparg >= 64) {
1939 continue;
1940 }
1941 assert(instr->i_oparg >= 0);
1942 uint64_t bit = (uint64_t)1 << instr->i_oparg;
1943 switch (instr->i_opcode) {
1944 case DELETE_FAST:
1945 case LOAD_FAST_AND_CLEAR:
1946 case STORE_FAST_MAYBE_NULL:
1947 unsafe_mask |= bit;
1948 break;
1949 case STORE_FAST:
1950 unsafe_mask &= ~bit;
1951 break;
1952 case LOAD_FAST_CHECK:
1953 // If this doesn't raise, then the local is defined.
1954 unsafe_mask &= ~bit;
1955 break;
1956 case LOAD_FAST:
1957 if (unsafe_mask & bit) {
1958 instr->i_opcode = LOAD_FAST_CHECK;
1959 }
1960 unsafe_mask &= ~bit;
1961 break;
1962 }
1963 }
1964 if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1965 maybe_push(b->b_next, unsafe_mask, sp);
1966 }
1967 cfg_instr *last = basicblock_last_instr(b);
1968 if (last && is_jump(last)) {
1969 assert(last->i_target != NULL);
1970 maybe_push(last->i_target, unsafe_mask, sp);
1971 }
1972 }
1973
1974 static int
fast_scan_many_locals(basicblock * entryblock,int nlocals)1975 fast_scan_many_locals(basicblock *entryblock, int nlocals)
1976 {
1977 assert(nlocals > 64);
1978 Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
1979 if (states == NULL) {
1980 PyErr_NoMemory();
1981 return ERROR;
1982 }
1983 Py_ssize_t blocknum = 0;
1984 // state[i - 64] == blocknum if local i is guaranteed to
1985 // be initialized, i.e., if it has had a previous LOAD_FAST or
1986 // STORE_FAST within that basicblock (not followed by
1987 // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
1988 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1989 blocknum++;
1990 for (int i = 0; i < b->b_iused; i++) {
1991 cfg_instr *instr = &b->b_instr[i];
1992 assert(instr->i_opcode != EXTENDED_ARG);
1993 int arg = instr->i_oparg;
1994 if (arg < 64) {
1995 continue;
1996 }
1997 assert(arg >= 0);
1998 switch (instr->i_opcode) {
1999 case DELETE_FAST:
2000 case LOAD_FAST_AND_CLEAR:
2001 case STORE_FAST_MAYBE_NULL:
2002 states[arg - 64] = blocknum - 1;
2003 break;
2004 case STORE_FAST:
2005 states[arg - 64] = blocknum;
2006 break;
2007 case LOAD_FAST:
2008 if (states[arg - 64] != blocknum) {
2009 instr->i_opcode = LOAD_FAST_CHECK;
2010 }
2011 states[arg - 64] = blocknum;
2012 break;
2013 Py_UNREACHABLE();
2014 }
2015 }
2016 }
2017 PyMem_Free(states);
2018 return SUCCESS;
2019 }
2020
2021 static int
remove_unused_consts(basicblock * entryblock,PyObject * consts)2022 remove_unused_consts(basicblock *entryblock, PyObject *consts)
2023 {
2024 assert(PyList_CheckExact(consts));
2025 Py_ssize_t nconsts = PyList_GET_SIZE(consts);
2026 if (nconsts == 0) {
2027 return SUCCESS; /* nothing to do */
2028 }
2029
2030 Py_ssize_t *index_map = NULL;
2031 Py_ssize_t *reverse_index_map = NULL;
2032 int err = ERROR;
2033
2034 index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
2035 if (index_map == NULL) {
2036 goto end;
2037 }
2038 for (Py_ssize_t i = 1; i < nconsts; i++) {
2039 index_map[i] = -1;
2040 }
2041 // The first constant may be docstring; keep it always.
2042 index_map[0] = 0;
2043
2044 /* mark used consts */
2045 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2046 for (int i = 0; i < b->b_iused; i++) {
2047 if (OPCODE_HAS_CONST(b->b_instr[i].i_opcode)) {
2048 int index = b->b_instr[i].i_oparg;
2049 index_map[index] = index;
2050 }
2051 }
2052 }
2053 /* now index_map[i] == i if consts[i] is used, -1 otherwise */
2054 /* condense consts */
2055 Py_ssize_t n_used_consts = 0;
2056 for (int i = 0; i < nconsts; i++) {
2057 if (index_map[i] != -1) {
2058 assert(index_map[i] == i);
2059 index_map[n_used_consts++] = index_map[i];
2060 }
2061 }
2062 if (n_used_consts == nconsts) {
2063 /* nothing to do */
2064 err = SUCCESS;
2065 goto end;
2066 }
2067
2068 /* move all used consts to the beginning of the consts list */
2069 assert(n_used_consts < nconsts);
2070 for (Py_ssize_t i = 0; i < n_used_consts; i++) {
2071 Py_ssize_t old_index = index_map[i];
2072 assert(i <= old_index && old_index < nconsts);
2073 if (i != old_index) {
2074 PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
2075 assert(value != NULL);
2076 PyList_SetItem(consts, i, Py_NewRef(value));
2077 }
2078 }
2079
2080 /* truncate the consts list at its new size */
2081 if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
2082 goto end;
2083 }
2084 /* adjust const indices in the bytecode */
2085 reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
2086 if (reverse_index_map == NULL) {
2087 goto end;
2088 }
2089 for (Py_ssize_t i = 0; i < nconsts; i++) {
2090 reverse_index_map[i] = -1;
2091 }
2092 for (Py_ssize_t i = 0; i < n_used_consts; i++) {
2093 assert(index_map[i] != -1);
2094 assert(reverse_index_map[index_map[i]] == -1);
2095 reverse_index_map[index_map[i]] = i;
2096 }
2097
2098 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2099 for (int i = 0; i < b->b_iused; i++) {
2100 if (OPCODE_HAS_CONST(b->b_instr[i].i_opcode)) {
2101 int index = b->b_instr[i].i_oparg;
2102 assert(reverse_index_map[index] >= 0);
2103 assert(reverse_index_map[index] < n_used_consts);
2104 b->b_instr[i].i_oparg = (int)reverse_index_map[index];
2105 }
2106 }
2107 }
2108
2109 err = SUCCESS;
2110 end:
2111 PyMem_Free(index_map);
2112 PyMem_Free(reverse_index_map);
2113 return err;
2114 }
2115
2116
2117
2118 static int
add_checks_for_loads_of_uninitialized_variables(basicblock * entryblock,int nlocals,int nparams)2119 add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
2120 int nlocals,
2121 int nparams)
2122 {
2123 if (nlocals == 0) {
2124 return SUCCESS;
2125 }
2126 if (nlocals > 64) {
2127 // To avoid O(nlocals**2) compilation, locals beyond the first
2128 // 64 are only analyzed one basicblock at a time: initialization
2129 // info is not passed between basicblocks.
2130 if (fast_scan_many_locals(entryblock, nlocals) < 0) {
2131 return ERROR;
2132 }
2133 nlocals = 64;
2134 }
2135 basicblock **stack = make_cfg_traversal_stack(entryblock);
2136 if (stack == NULL) {
2137 return ERROR;
2138 }
2139 basicblock **sp = stack;
2140
2141 // First origin of being uninitialized:
2142 // The non-parameter locals in the entry block.
2143 uint64_t start_mask = 0;
2144 for (int i = nparams; i < nlocals; i++) {
2145 start_mask |= (uint64_t)1 << i;
2146 }
2147 maybe_push(entryblock, start_mask, &sp);
2148
2149 // Second origin of being uninitialized:
2150 // There could be DELETE_FAST somewhere, so
2151 // be sure to scan each basicblock at least once.
2152 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2153 scan_block_for_locals(b, &sp);
2154 }
2155 // Now propagate the uncertainty from the origins we found: Use
2156 // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
2157 while (sp > stack) {
2158 basicblock *b = *--sp;
2159 // mark as no longer on stack
2160 b->b_visited = 0;
2161 scan_block_for_locals(b, &sp);
2162 }
2163 PyMem_Free(stack);
2164 return SUCCESS;
2165 }
2166
2167
2168 static int
mark_warm(basicblock * entryblock)2169 mark_warm(basicblock *entryblock) {
2170 basicblock **stack = make_cfg_traversal_stack(entryblock);
2171 if (stack == NULL) {
2172 return ERROR;
2173 }
2174 basicblock **sp = stack;
2175
2176 *sp++ = entryblock;
2177 entryblock->b_visited = 1;
2178 while (sp > stack) {
2179 basicblock *b = *(--sp);
2180 assert(!b->b_except_handler);
2181 b->b_warm = 1;
2182 basicblock *next = b->b_next;
2183 if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
2184 *sp++ = next;
2185 next->b_visited = 1;
2186 }
2187 for (int i=0; i < b->b_iused; i++) {
2188 cfg_instr *instr = &b->b_instr[i];
2189 if (is_jump(instr) && !instr->i_target->b_visited) {
2190 *sp++ = instr->i_target;
2191 instr->i_target->b_visited = 1;
2192 }
2193 }
2194 }
2195 PyMem_Free(stack);
2196 return SUCCESS;
2197 }
2198
2199 static int
mark_cold(basicblock * entryblock)2200 mark_cold(basicblock *entryblock) {
2201 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2202 assert(!b->b_cold && !b->b_warm);
2203 }
2204 if (mark_warm(entryblock) < 0) {
2205 return ERROR;
2206 }
2207
2208 basicblock **stack = make_cfg_traversal_stack(entryblock);
2209 if (stack == NULL) {
2210 return ERROR;
2211 }
2212
2213 basicblock **sp = stack;
2214 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2215 if (b->b_except_handler) {
2216 assert(!b->b_warm);
2217 *sp++ = b;
2218 b->b_visited = 1;
2219 }
2220 }
2221
2222 while (sp > stack) {
2223 basicblock *b = *(--sp);
2224 b->b_cold = 1;
2225 basicblock *next = b->b_next;
2226 if (next && BB_HAS_FALLTHROUGH(b)) {
2227 if (!next->b_warm && !next->b_visited) {
2228 *sp++ = next;
2229 next->b_visited = 1;
2230 }
2231 }
2232 for (int i = 0; i < b->b_iused; i++) {
2233 cfg_instr *instr = &b->b_instr[i];
2234 if (is_jump(instr)) {
2235 assert(i == b->b_iused - 1);
2236 basicblock *target = b->b_instr[i].i_target;
2237 if (!target->b_warm && !target->b_visited) {
2238 *sp++ = target;
2239 target->b_visited = 1;
2240 }
2241 }
2242 }
2243 }
2244 PyMem_Free(stack);
2245 return SUCCESS;
2246 }
2247
2248
2249 static int
push_cold_blocks_to_end(cfg_builder * g)2250 push_cold_blocks_to_end(cfg_builder *g) {
2251 basicblock *entryblock = g->g_entryblock;
2252 if (entryblock->b_next == NULL) {
2253 /* single basicblock, no need to reorder */
2254 return SUCCESS;
2255 }
2256 RETURN_IF_ERROR(mark_cold(entryblock));
2257
2258 int next_lbl = get_max_label(g->g_entryblock) + 1;
2259
2260 /* If we have a cold block with fallthrough to a warm block, add */
2261 /* an explicit jump instead of fallthrough */
2262 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2263 if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
2264 basicblock *explicit_jump = cfg_builder_new_block(g);
2265 if (explicit_jump == NULL) {
2266 return ERROR;
2267 }
2268 if (!IS_LABEL(b->b_next->b_label)) {
2269 b->b_next->b_label.id = next_lbl++;
2270 }
2271 basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
2272 NO_LOCATION);
2273 explicit_jump->b_cold = 1;
2274 explicit_jump->b_next = b->b_next;
2275 explicit_jump->b_predecessors = 1;
2276 b->b_next = explicit_jump;
2277
2278 /* set target */
2279 cfg_instr *last = basicblock_last_instr(explicit_jump);
2280 last->i_target = explicit_jump->b_next;
2281 }
2282 }
2283
2284 assert(!entryblock->b_cold); /* First block can't be cold */
2285 basicblock *cold_blocks = NULL;
2286 basicblock *cold_blocks_tail = NULL;
2287
2288 basicblock *b = entryblock;
2289 while(b->b_next) {
2290 assert(!b->b_cold);
2291 while (b->b_next && !b->b_next->b_cold) {
2292 b = b->b_next;
2293 }
2294 if (b->b_next == NULL) {
2295 /* no more cold blocks */
2296 break;
2297 }
2298
2299 /* b->b_next is the beginning of a cold streak */
2300 assert(!b->b_cold && b->b_next->b_cold);
2301
2302 basicblock *b_end = b->b_next;
2303 while (b_end->b_next && b_end->b_next->b_cold) {
2304 b_end = b_end->b_next;
2305 }
2306
2307 /* b_end is the end of the cold streak */
2308 assert(b_end && b_end->b_cold);
2309 assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
2310
2311 if (cold_blocks == NULL) {
2312 cold_blocks = b->b_next;
2313 }
2314 else {
2315 cold_blocks_tail->b_next = b->b_next;
2316 }
2317 cold_blocks_tail = b_end;
2318 b->b_next = b_end->b_next;
2319 b_end->b_next = NULL;
2320 }
2321 assert(b != NULL && b->b_next == NULL);
2322 b->b_next = cold_blocks;
2323
2324 if (cold_blocks != NULL) {
2325 RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2326 }
2327 return SUCCESS;
2328 }
2329
2330 static int
convert_pseudo_ops(cfg_builder * g)2331 convert_pseudo_ops(cfg_builder *g)
2332 {
2333 basicblock *entryblock = g->g_entryblock;
2334 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2335 for (int i = 0; i < b->b_iused; i++) {
2336 cfg_instr *instr = &b->b_instr[i];
2337 if (is_block_push(instr)) {
2338 INSTR_SET_OP0(instr, NOP);
2339 }
2340 else if (instr->i_opcode == LOAD_CLOSURE) {
2341 assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
2342 instr->i_opcode = LOAD_FAST;
2343 }
2344 else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
2345 assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
2346 instr->i_opcode = STORE_FAST;
2347 }
2348 }
2349 }
2350 return remove_redundant_nops_and_jumps(g);
2351 }
2352
2353 static inline bool
is_exit_or_eval_check_without_lineno(basicblock * b)2354 is_exit_or_eval_check_without_lineno(basicblock *b) {
2355 if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
2356 return basicblock_has_no_lineno(b);
2357 }
2358 else {
2359 return false;
2360 }
2361 }
2362
2363
2364 /* PEP 626 mandates that the f_lineno of a frame is correct
2365 * after a frame terminates. It would be prohibitively expensive
2366 * to continuously update the f_lineno field at runtime,
2367 * so we make sure that all exiting instruction (raises and returns)
2368 * have a valid line number, allowing us to compute f_lineno lazily.
2369 * We can do this by duplicating the exit blocks without line number
2370 * so that none have more than one predecessor. We can then safely
2371 * copy the line number from the sole predecessor block.
2372 */
2373 static int
duplicate_exits_without_lineno(cfg_builder * g)2374 duplicate_exits_without_lineno(cfg_builder *g)
2375 {
2376 int next_lbl = get_max_label(g->g_entryblock) + 1;
2377
2378 /* Copy all exit blocks without line number that are targets of a jump.
2379 */
2380 basicblock *entryblock = g->g_entryblock;
2381 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2382 cfg_instr *last = basicblock_last_instr(b);
2383 if (last == NULL) {
2384 continue;
2385 }
2386 if (is_jump(last)) {
2387 basicblock *target = next_nonempty_block(last->i_target);
2388 if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
2389 basicblock *new_target = copy_basicblock(g, target);
2390 if (new_target == NULL) {
2391 return ERROR;
2392 }
2393 new_target->b_instr[0].i_loc = last->i_loc;
2394 last->i_target = new_target;
2395 target->b_predecessors--;
2396 new_target->b_predecessors = 1;
2397 new_target->b_next = target->b_next;
2398 new_target->b_label.id = next_lbl++;
2399 target->b_next = new_target;
2400 }
2401 }
2402 }
2403
2404 /* Any remaining reachable exit blocks without line number can only be reached by
2405 * fall through, and thus can only have a single predecessor */
2406 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2407 if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
2408 if (is_exit_or_eval_check_without_lineno(b->b_next)) {
2409 cfg_instr *last = basicblock_last_instr(b);
2410 assert(last != NULL);
2411 b->b_next->b_instr[0].i_loc = last->i_loc;
2412 }
2413 }
2414 }
2415 return SUCCESS;
2416 }
2417
2418
2419 /* If an instruction has no line number, but it's predecessor in the BB does,
2420 * then copy the line number. If a successor block has no line number, and only
2421 * one predecessor, then inherit the line number.
2422 * This ensures that all exit blocks (with one predecessor) receive a line number.
2423 * Also reduces the size of the line number table,
2424 * but has no impact on the generated line number events.
2425 */
2426 static void
propagate_line_numbers(basicblock * entryblock)2427 propagate_line_numbers(basicblock *entryblock) {
2428 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2429 cfg_instr *last = basicblock_last_instr(b);
2430 if (last == NULL) {
2431 continue;
2432 }
2433
2434 location prev_location = NO_LOCATION;
2435 for (int i = 0; i < b->b_iused; i++) {
2436 if (b->b_instr[i].i_loc.lineno < 0) {
2437 b->b_instr[i].i_loc = prev_location;
2438 }
2439 else {
2440 prev_location = b->b_instr[i].i_loc;
2441 }
2442 }
2443 if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
2444 if (b->b_next->b_iused > 0) {
2445 if (b->b_next->b_instr[0].i_loc.lineno < 0) {
2446 b->b_next->b_instr[0].i_loc = prev_location;
2447 }
2448 }
2449 }
2450 if (is_jump(last)) {
2451 basicblock *target = last->i_target;
2452 if (target->b_predecessors == 1) {
2453 if (target->b_instr[0].i_loc.lineno < 0) {
2454 target->b_instr[0].i_loc = prev_location;
2455 }
2456 }
2457 }
2458 }
2459 }
2460
2461 static int
resolve_line_numbers(cfg_builder * g,int firstlineno)2462 resolve_line_numbers(cfg_builder *g, int firstlineno)
2463 {
2464 RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
2465 propagate_line_numbers(g->g_entryblock);
2466 return SUCCESS;
2467 }
2468
2469 int
_PyCfg_OptimizeCodeUnit(cfg_builder * g,PyObject * consts,PyObject * const_cache,int nlocals,int nparams,int firstlineno)2470 _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
2471 int nlocals, int nparams, int firstlineno)
2472 {
2473 assert(cfg_builder_check(g));
2474 /** Preprocessing **/
2475 /* Map labels to targets and mark exception handlers */
2476 RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
2477 RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
2478 RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
2479
2480 /** Optimization **/
2481 RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
2482 RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
2483 RETURN_IF_ERROR(
2484 add_checks_for_loads_of_uninitialized_variables(
2485 g->g_entryblock, nlocals, nparams));
2486 RETURN_IF_ERROR(insert_superinstructions(g));
2487
2488 RETURN_IF_ERROR(push_cold_blocks_to_end(g));
2489 RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2490 return SUCCESS;
2491 }
2492
2493 static int *
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata * umd)2494 build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
2495 {
2496 int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
2497 int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
2498 int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
2499
2500 int noffsets = ncellvars + nfreevars;
2501 int *fixed = PyMem_New(int, noffsets);
2502 if (fixed == NULL) {
2503 PyErr_NoMemory();
2504 return NULL;
2505 }
2506 for (int i = 0; i < noffsets; i++) {
2507 fixed[i] = nlocals + i;
2508 }
2509
2510 PyObject *varname, *cellindex;
2511 Py_ssize_t pos = 0;
2512 while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
2513 PyObject *varindex;
2514 if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
2515 goto error;
2516 }
2517 if (varindex == NULL) {
2518 continue;
2519 }
2520
2521 int argoffset = PyLong_AsInt(varindex);
2522 Py_DECREF(varindex);
2523 if (argoffset == -1 && PyErr_Occurred()) {
2524 goto error;
2525 }
2526
2527 int oldindex = PyLong_AsInt(cellindex);
2528 if (oldindex == -1 && PyErr_Occurred()) {
2529 goto error;
2530 }
2531 fixed[oldindex] = argoffset;
2532 }
2533 return fixed;
2534
2535 error:
2536 PyMem_Free(fixed);
2537 return NULL;
2538 }
2539
2540 #define IS_GENERATOR(CF) \
2541 ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
2542
2543 static int
insert_prefix_instructions(_PyCompile_CodeUnitMetadata * umd,basicblock * entryblock,int * fixed,int nfreevars,int code_flags)2544 insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
2545 int *fixed, int nfreevars, int code_flags)
2546 {
2547 assert(umd->u_firstlineno > 0);
2548
2549 /* Add the generator prefix instructions. */
2550 if (IS_GENERATOR(code_flags)) {
2551 /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
2552 * of 0. This is because RETURN_GENERATOR pushes an element
2553 * with _PyFrame_StackPush before switching stacks.
2554 */
2555
2556 location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
2557 cfg_instr make_gen = {
2558 .i_opcode = RETURN_GENERATOR,
2559 .i_oparg = 0,
2560 .i_loc = loc,
2561 .i_target = NULL,
2562 };
2563 RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
2564 cfg_instr pop_top = {
2565 .i_opcode = POP_TOP,
2566 .i_oparg = 0,
2567 .i_loc = loc,
2568 .i_target = NULL,
2569 };
2570 RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
2571 }
2572
2573 /* Set up cells for any variable that escapes, to be put in a closure. */
2574 const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
2575 if (ncellvars) {
2576 // umd->u_cellvars has the cells out of order so we sort them
2577 // before adding the MAKE_CELL instructions. Note that we
2578 // adjust for arg cells, which come first.
2579 const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
2580 int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
2581 if (sorted == NULL) {
2582 PyErr_NoMemory();
2583 return ERROR;
2584 }
2585 for (int i = 0; i < ncellvars; i++) {
2586 sorted[fixed[i]] = i + 1;
2587 }
2588 for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
2589 int oldindex = sorted[i] - 1;
2590 if (oldindex == -1) {
2591 continue;
2592 }
2593 cfg_instr make_cell = {
2594 .i_opcode = MAKE_CELL,
2595 // This will get fixed in offset_derefs().
2596 .i_oparg = oldindex,
2597 .i_loc = NO_LOCATION,
2598 .i_target = NULL,
2599 };
2600 if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
2601 PyMem_RawFree(sorted);
2602 return ERROR;
2603 }
2604 ncellsused += 1;
2605 }
2606 PyMem_RawFree(sorted);
2607 }
2608
2609 if (nfreevars) {
2610 cfg_instr copy_frees = {
2611 .i_opcode = COPY_FREE_VARS,
2612 .i_oparg = nfreevars,
2613 .i_loc = NO_LOCATION,
2614 .i_target = NULL,
2615 };
2616 RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, ©_frees));
2617 }
2618
2619 return SUCCESS;
2620 }
2621
2622 static int
fix_cell_offsets(_PyCompile_CodeUnitMetadata * umd,basicblock * entryblock,int * fixedmap)2623 fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
2624 {
2625 int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
2626 int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
2627 int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
2628 int noffsets = ncellvars + nfreevars;
2629
2630 // First deal with duplicates (arg cells).
2631 int numdropped = 0;
2632 for (int i = 0; i < noffsets ; i++) {
2633 if (fixedmap[i] == i + nlocals) {
2634 fixedmap[i] -= numdropped;
2635 }
2636 else {
2637 // It was a duplicate (cell/arg).
2638 numdropped += 1;
2639 }
2640 }
2641
2642 // Then update offsets, either relative to locals or by cell2arg.
2643 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2644 for (int i = 0; i < b->b_iused; i++) {
2645 cfg_instr *inst = &b->b_instr[i];
2646 // This is called before extended args are generated.
2647 assert(inst->i_opcode != EXTENDED_ARG);
2648 int oldoffset = inst->i_oparg;
2649 switch(inst->i_opcode) {
2650 case MAKE_CELL:
2651 case LOAD_CLOSURE:
2652 case LOAD_DEREF:
2653 case STORE_DEREF:
2654 case DELETE_DEREF:
2655 case LOAD_FROM_DICT_OR_DEREF:
2656 assert(oldoffset >= 0);
2657 assert(oldoffset < noffsets);
2658 assert(fixedmap[oldoffset] >= 0);
2659 inst->i_oparg = fixedmap[oldoffset];
2660 }
2661 }
2662 }
2663
2664 return numdropped;
2665 }
2666
2667 static int
prepare_localsplus(_PyCompile_CodeUnitMetadata * umd,cfg_builder * g,int code_flags)2668 prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
2669 {
2670 assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
2671 assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
2672 assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
2673 int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
2674 int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
2675 int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
2676 assert(INT_MAX - nlocals - ncellvars > 0);
2677 assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
2678 int nlocalsplus = nlocals + ncellvars + nfreevars;
2679 int* cellfixedoffsets = build_cellfixedoffsets(umd);
2680 if (cellfixedoffsets == NULL) {
2681 return ERROR;
2682 }
2683
2684 // This must be called before fix_cell_offsets().
2685 if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
2686 PyMem_Free(cellfixedoffsets);
2687 return ERROR;
2688 }
2689
2690 int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
2691 PyMem_Free(cellfixedoffsets); // At this point we're done with it.
2692 cellfixedoffsets = NULL;
2693 if (numdropped < 0) {
2694 return ERROR;
2695 }
2696
2697 nlocalsplus -= numdropped;
2698 return nlocalsplus;
2699 }
2700
2701 int
_PyCfg_ToInstructionSequence(cfg_builder * g,_PyInstructionSequence * seq)2702 _PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
2703 {
2704 int lbl = 0;
2705 for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2706 b->b_label = (jump_target_label){lbl};
2707 lbl += 1;
2708 }
2709 for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2710 RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
2711 for (int i = 0; i < b->b_iused; i++) {
2712 cfg_instr *instr = &b->b_instr[i];
2713 if (HAS_TARGET(instr->i_opcode)) {
2714 /* Set oparg to the label id (it will later be mapped to an offset) */
2715 instr->i_oparg = instr->i_target->b_label.id;
2716 }
2717 RETURN_IF_ERROR(
2718 _PyInstructionSequence_Addop(
2719 seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
2720
2721 _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
2722 if (instr->i_except != NULL) {
2723 hi->h_label = instr->i_except->b_label.id;
2724 hi->h_startdepth = instr->i_except->b_startdepth;
2725 hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
2726 }
2727 else {
2728 hi->h_label = -1;
2729 }
2730 }
2731 }
2732 return SUCCESS;
2733 }
2734
2735
2736 int
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder * g,_PyCompile_CodeUnitMetadata * umd,int code_flags,int * stackdepth,int * nlocalsplus,_PyInstructionSequence * seq)2737 _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
2738 _PyCompile_CodeUnitMetadata *umd, int code_flags,
2739 int *stackdepth, int *nlocalsplus,
2740 _PyInstructionSequence *seq)
2741 {
2742 *stackdepth = calculate_stackdepth(g);
2743 if (*stackdepth < 0) {
2744 return ERROR;
2745 }
2746
2747 /* prepare_localsplus adds instructions for generators that push
2748 * and pop an item on the stack. This assertion makes sure there
2749 * is space on the stack for that.
2750 * It should always be true, because a generator must have at
2751 * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
2752 * which requires stackspace.
2753 */
2754 assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
2755
2756 *nlocalsplus = prepare_localsplus(umd, g, code_flags);
2757 if (*nlocalsplus < 0) {
2758 return ERROR;
2759 }
2760
2761 RETURN_IF_ERROR(convert_pseudo_ops(g));
2762
2763 /* Order of basic blocks must have been determined by now */
2764
2765 RETURN_IF_ERROR(normalize_jumps(g));
2766 assert(no_redundant_jumps(g));
2767
2768 /* Can't modify the bytecode after computing jump offsets. */
2769 if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
2770 return ERROR;
2771 }
2772
2773 return SUCCESS;
2774 }
2775
2776 /* This is used by _PyCompile_Assemble to fill in the jump and exception
2777 * targets in a synthetic CFG (which is not the ouptut of the builtin compiler).
2778 */
2779 int
_PyCfg_JumpLabelsToTargets(cfg_builder * g)2780 _PyCfg_JumpLabelsToTargets(cfg_builder *g)
2781 {
2782 RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
2783 RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
2784 return SUCCESS;
2785 }
2786