• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 #include <stdbool.h>
3 
4 #include "Python.h"
5 #include "pycore_flowgraph.h"
6 #include "pycore_compile.h"
7 #include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8 
9 #include "pycore_opcode_utils.h"
10 #include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
11 
12 
13 #undef SUCCESS
14 #undef ERROR
15 #define SUCCESS 0
16 #define ERROR -1
17 
18 #define RETURN_IF_ERROR(X)  \
19     if ((X) == -1) {        \
20         return ERROR;       \
21     }
22 
23 #define DEFAULT_BLOCK_SIZE 16
24 
25 typedef _Py_SourceLocation location;
26 typedef _PyJumpTargetLabel jump_target_label;
27 
28 typedef struct _PyCfgInstruction {
29     int i_opcode;
30     int i_oparg;
31     _Py_SourceLocation i_loc;
32     struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
33     struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
34 } cfg_instr;
35 
36 typedef struct _PyCfgBasicblock {
37     /* Each basicblock in a compilation unit is linked via b_list in the
38        reverse order that the block are allocated.  b_list points to the next
39        block in this list, not to be confused with b_next, which is next by
40        control flow. */
41     struct _PyCfgBasicblock *b_list;
42     /* The label of this block if it is a jump target, -1 otherwise */
43     _PyJumpTargetLabel b_label;
44     /* Exception stack at start of block, used by assembler to create the exception handling table */
45     struct _PyCfgExceptStack *b_exceptstack;
46     /* pointer to an array of instructions, initially NULL */
47     cfg_instr *b_instr;
48     /* If b_next is non-NULL, it is a pointer to the next
49        block reached by normal control flow. */
50     struct _PyCfgBasicblock *b_next;
51     /* number of instructions used */
52     int b_iused;
53     /* length of instruction array (b_instr) */
54     int b_ialloc;
55     /* Used by add_checks_for_loads_of_unknown_variables */
56     uint64_t b_unsafe_locals_mask;
57     /* Number of predecessors that a block has. */
58     int b_predecessors;
59     /* depth of stack upon entry of block, computed by stackdepth() */
60     int b_startdepth;
61     /* Basic block is an exception handler that preserves lasti */
62     unsigned b_preserve_lasti : 1;
63     /* Used by compiler passes to mark whether they have visited a basic block. */
64     unsigned b_visited : 1;
65     /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
66     unsigned b_except_handler : 1;
67     /* b_cold is true if this block is not perf critical (like an exception handler) */
68     unsigned b_cold : 1;
69     /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
70     unsigned b_warm : 1;
71 } basicblock;
72 
73 
74 struct _PyCfgBuilder {
75     /* The entryblock, at which control flow begins. All blocks of the
76        CFG are reachable through the b_next links */
77     struct _PyCfgBasicblock *g_entryblock;
78     /* Pointer to the most recently allocated block.  By following
79        b_list links, you can reach all allocated blocks. */
80     struct _PyCfgBasicblock *g_block_list;
81     /* pointer to the block currently being constructed */
82     struct _PyCfgBasicblock *g_curblock;
83     /* label for the next instruction to be placed */
84     _PyJumpTargetLabel g_current_label;
85 };
86 
87 typedef struct _PyCfgBuilder cfg_builder;
88 
89 static const jump_target_label NO_LABEL = {-1};
90 
91 #define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
92 #define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
93 
94 #define LOCATION(LNO, END_LNO, COL, END_COL) \
95     ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
96 
97 static inline int
is_block_push(cfg_instr * i)98 is_block_push(cfg_instr *i)
99 {
100     assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
101     return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
102 }
103 
104 static inline int
is_jump(cfg_instr * i)105 is_jump(cfg_instr *i)
106 {
107     return OPCODE_HAS_JUMP(i->i_opcode);
108 }
109 
110 /* One arg*/
111 #define INSTR_SET_OP1(I, OP, ARG) \
112     do { \
113         assert(OPCODE_HAS_ARG(OP)); \
114         cfg_instr *_instr__ptr_ = (I); \
115         _instr__ptr_->i_opcode = (OP); \
116         _instr__ptr_->i_oparg = (ARG); \
117     } while (0);
118 
119 /* No args*/
120 #define INSTR_SET_OP0(I, OP) \
121     do { \
122         assert(!OPCODE_HAS_ARG(OP)); \
123         cfg_instr *_instr__ptr_ = (I); \
124         _instr__ptr_->i_opcode = (OP); \
125         _instr__ptr_->i_oparg = 0; \
126     } while (0);
127 
128 /***** Blocks *****/
129 
130 /* Returns the offset of the next instruction in the current block's
131    b_instr array.  Resizes the b_instr as necessary.
132    Returns -1 on failure.
133 */
134 static int
basicblock_next_instr(basicblock * b)135 basicblock_next_instr(basicblock *b)
136 {
137     assert(b != NULL);
138     RETURN_IF_ERROR(
139         _PyCompile_EnsureArrayLargeEnough(
140             b->b_iused + 1,
141             (void**)&b->b_instr,
142             &b->b_ialloc,
143             DEFAULT_BLOCK_SIZE,
144             sizeof(cfg_instr)));
145     return b->b_iused++;
146 }
147 
148 static cfg_instr *
basicblock_last_instr(const basicblock * b)149 basicblock_last_instr(const basicblock *b) {
150     assert(b->b_iused >= 0);
151     if (b->b_iused > 0) {
152         assert(b->b_instr != NULL);
153         return &b->b_instr[b->b_iused - 1];
154     }
155     return NULL;
156 }
157 
158 /* Allocate a new block and return a pointer to it.
159    Returns NULL on error.
160 */
161 
162 static basicblock *
cfg_builder_new_block(cfg_builder * g)163 cfg_builder_new_block(cfg_builder *g)
164 {
165     basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
166     if (b == NULL) {
167         PyErr_NoMemory();
168         return NULL;
169     }
170     /* Extend the singly linked list of blocks with new block. */
171     b->b_list = g->g_block_list;
172     g->g_block_list = b;
173     b->b_label = NO_LABEL;
174     return b;
175 }
176 
177 static int
basicblock_addop(basicblock * b,int opcode,int oparg,location loc)178 basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
179 {
180     assert(IS_WITHIN_OPCODE_RANGE(opcode));
181     assert(!IS_ASSEMBLER_OPCODE(opcode));
182     assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
183     assert(0 <= oparg && oparg < (1 << 30));
184 
185     int off = basicblock_next_instr(b);
186     if (off < 0) {
187         return ERROR;
188     }
189     cfg_instr *i = &b->b_instr[off];
190     i->i_opcode = opcode;
191     i->i_oparg = oparg;
192     i->i_target = NULL;
193     i->i_loc = loc;
194 
195     return SUCCESS;
196 }
197 
198 static int
basicblock_add_jump(basicblock * b,int opcode,basicblock * target,location loc)199 basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
200 {
201     cfg_instr *last = basicblock_last_instr(b);
202     if (last && is_jump(last)) {
203         return ERROR;
204     }
205 
206     RETURN_IF_ERROR(
207         basicblock_addop(b, opcode, target->b_label.id, loc));
208     last = basicblock_last_instr(b);
209     assert(last && last->i_opcode == opcode);
210     last->i_target = target;
211     return SUCCESS;
212 }
213 
214 static inline int
basicblock_append_instructions(basicblock * to,basicblock * from)215 basicblock_append_instructions(basicblock *to, basicblock *from)
216 {
217     for (int i = 0; i < from->b_iused; i++) {
218         int n = basicblock_next_instr(to);
219         if (n < 0) {
220             return ERROR;
221         }
222         to->b_instr[n] = from->b_instr[i];
223     }
224     return SUCCESS;
225 }
226 
227 static inline int
basicblock_nofallthrough(const basicblock * b)228 basicblock_nofallthrough(const basicblock *b) {
229     cfg_instr *last = basicblock_last_instr(b);
230     return (last &&
231             (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
232              IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
233 }
234 
235 #define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
236 #define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
237 
238 static basicblock *
copy_basicblock(cfg_builder * g,basicblock * block)239 copy_basicblock(cfg_builder *g, basicblock *block)
240 {
241     /* Cannot copy a block if it has a fallthrough, since
242      * a block can only have one fallthrough predecessor.
243      */
244     assert(BB_NO_FALLTHROUGH(block));
245     basicblock *result = cfg_builder_new_block(g);
246     if (result == NULL) {
247         return NULL;
248     }
249     if (basicblock_append_instructions(result, block) < 0) {
250         return NULL;
251     }
252     return result;
253 }
254 
255 static int
basicblock_insert_instruction(basicblock * block,int pos,cfg_instr * instr)256 basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
257     RETURN_IF_ERROR(basicblock_next_instr(block));
258     for (int i = block->b_iused - 1; i > pos; i--) {
259         block->b_instr[i] = block->b_instr[i-1];
260     }
261     block->b_instr[pos] = *instr;
262     return SUCCESS;
263 }
264 
265 /* For debugging purposes only */
266 #if 0
267 static void
268 dump_instr(cfg_instr *i)
269 {
270     const char *jump = is_jump(i) ? "jump " : "";
271 
272     char arg[128];
273 
274     *arg = '\0';
275     if (OPCODE_HAS_ARG(i->i_opcode)) {
276         sprintf(arg, "arg: %d ", i->i_oparg);
277     }
278     if (HAS_TARGET(i->i_opcode)) {
279         sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
280     }
281     fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
282                     i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
283 }
284 
285 static inline int
286 basicblock_returns(const basicblock *b) {
287     cfg_instr *last = basicblock_last_instr(b);
288     return last && (last->i_opcode == RETURN_VALUE || last->i_opcode == RETURN_CONST);
289 }
290 
291 static void
292 dump_basicblock(const basicblock *b)
293 {
294     const char *b_return = basicblock_returns(b) ? "return " : "";
295     fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
296         b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
297         b->b_startdepth, b->b_predecessors, b_return);
298     if (b->b_instr) {
299         int i;
300         for (i = 0; i < b->b_iused; i++) {
301             fprintf(stderr, "  [%02d] ", i);
302             dump_instr(b->b_instr + i);
303         }
304     }
305 }
306 
307 void
308 _PyCfgBuilder_DumpGraph(const basicblock *entryblock)
309 {
310     for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
311         dump_basicblock(b);
312     }
313 }
314 
315 #endif
316 
317 
318 /***** CFG construction and modification *****/
319 
320 static basicblock *
cfg_builder_use_next_block(cfg_builder * g,basicblock * block)321 cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
322 {
323     assert(block != NULL);
324     g->g_curblock->b_next = block;
325     g->g_curblock = block;
326     return block;
327 }
328 
329 static inline int
basicblock_exits_scope(const basicblock * b)330 basicblock_exits_scope(const basicblock *b) {
331     cfg_instr *last = basicblock_last_instr(b);
332     return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
333 }
334 
335 static inline int
basicblock_has_eval_break(const basicblock * b)336 basicblock_has_eval_break(const basicblock *b) {
337     for (int i = 0; i < b->b_iused; i++) {
338         if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
339             return true;
340         }
341     }
342     return false;
343 }
344 
345 static bool
cfg_builder_current_block_is_terminated(cfg_builder * g)346 cfg_builder_current_block_is_terminated(cfg_builder *g)
347 {
348     cfg_instr *last = basicblock_last_instr(g->g_curblock);
349     if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
350         return true;
351     }
352     if (IS_LABEL(g->g_current_label)) {
353         if (last || IS_LABEL(g->g_curblock->b_label)) {
354             return true;
355         }
356         else {
357             /* current block is empty, label it */
358             g->g_curblock->b_label = g->g_current_label;
359             g->g_current_label = NO_LABEL;
360         }
361     }
362     return false;
363 }
364 
365 static int
cfg_builder_maybe_start_new_block(cfg_builder * g)366 cfg_builder_maybe_start_new_block(cfg_builder *g)
367 {
368     if (cfg_builder_current_block_is_terminated(g)) {
369         basicblock *b = cfg_builder_new_block(g);
370         if (b == NULL) {
371             return ERROR;
372         }
373         b->b_label = g->g_current_label;
374         g->g_current_label = NO_LABEL;
375         cfg_builder_use_next_block(g, b);
376     }
377     return SUCCESS;
378 }
379 
380 #ifndef NDEBUG
381 static bool
cfg_builder_check(cfg_builder * g)382 cfg_builder_check(cfg_builder *g)
383 {
384     assert(g->g_entryblock->b_iused > 0);
385     for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
386         assert(!_PyMem_IsPtrFreed(block));
387         if (block->b_instr != NULL) {
388             assert(block->b_ialloc > 0);
389             assert(block->b_iused >= 0);
390             assert(block->b_ialloc >= block->b_iused);
391         }
392         else {
393             assert (block->b_iused == 0);
394             assert (block->b_ialloc == 0);
395         }
396     }
397     return true;
398 }
399 #endif
400 
401 static int
init_cfg_builder(cfg_builder * g)402 init_cfg_builder(cfg_builder *g)
403 {
404     g->g_block_list = NULL;
405     basicblock *block = cfg_builder_new_block(g);
406     if (block == NULL) {
407         return ERROR;
408     }
409     g->g_curblock = g->g_entryblock = block;
410     g->g_current_label = NO_LABEL;
411     return SUCCESS;
412 }
413 
414 cfg_builder *
_PyCfgBuilder_New(void)415 _PyCfgBuilder_New(void)
416 {
417     cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
418     if (g == NULL) {
419         PyErr_NoMemory();
420         return NULL;
421     }
422     memset(g, 0, sizeof(cfg_builder));
423     if (init_cfg_builder(g) < 0) {
424         PyMem_Free(g);
425         return NULL;
426     }
427     return g;
428 }
429 
430 void
_PyCfgBuilder_Free(cfg_builder * g)431 _PyCfgBuilder_Free(cfg_builder *g)
432 {
433     if (g == NULL) {
434         return;
435     }
436     assert(cfg_builder_check(g));
437     basicblock *b = g->g_block_list;
438     while (b != NULL) {
439         if (b->b_instr) {
440             PyMem_Free((void *)b->b_instr);
441         }
442         basicblock *next = b->b_list;
443         PyMem_Free((void *)b);
444         b = next;
445     }
446     PyMem_Free(g);
447 }
448 
449 int
_PyCfgBuilder_CheckSize(cfg_builder * g)450 _PyCfgBuilder_CheckSize(cfg_builder *g)
451 {
452     int nblocks = 0;
453     for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
454         nblocks++;
455     }
456     if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
457         PyErr_NoMemory();
458         return ERROR;
459     }
460     return SUCCESS;
461 }
462 
463 int
_PyCfgBuilder_UseLabel(cfg_builder * g,jump_target_label lbl)464 _PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
465 {
466     g->g_current_label = lbl;
467     return cfg_builder_maybe_start_new_block(g);
468 }
469 
470 int
_PyCfgBuilder_Addop(cfg_builder * g,int opcode,int oparg,location loc)471 _PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
472 {
473     RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
474     return basicblock_addop(g->g_curblock, opcode, oparg, loc);
475 }
476 
477 
478 static basicblock *
next_nonempty_block(basicblock * b)479 next_nonempty_block(basicblock *b)
480 {
481     while (b && b->b_iused == 0) {
482         b = b->b_next;
483     }
484     return b;
485 }
486 
487 /***** debugging helpers *****/
488 
489 #ifndef NDEBUG
490 static int remove_redundant_nops(cfg_builder *g);
491 
492 static bool
no_redundant_nops(cfg_builder * g)493 no_redundant_nops(cfg_builder *g) {
494     if (remove_redundant_nops(g) != 0) {
495         return false;
496     }
497     return true;
498 }
499 
500 static bool
no_redundant_jumps(cfg_builder * g)501 no_redundant_jumps(cfg_builder *g) {
502     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
503         cfg_instr *last = basicblock_last_instr(b);
504         if (last != NULL) {
505             if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
506                 basicblock *next = next_nonempty_block(b->b_next);
507                 basicblock *jump_target = next_nonempty_block(last->i_target);
508                 if (jump_target == next) {
509                     assert(next);
510                     if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
511                         assert(0);
512                         return false;
513                     }
514                 }
515             }
516         }
517     }
518     return true;
519 }
520 
521 #endif
522 
523 /***** CFG preprocessing (jump targets and exceptions) *****/
524 
525 static int
normalize_jumps_in_block(cfg_builder * g,basicblock * b)526 normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
527     cfg_instr *last = basicblock_last_instr(b);
528     if (last == NULL || !is_jump(last) ||
529         IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
530         return SUCCESS;
531     }
532     assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
533 
534     bool is_forward = last->i_target->b_visited == 0;
535     if (is_forward) {
536         return SUCCESS;
537     }
538 
539     int reversed_opcode = 0;
540     switch(last->i_opcode) {
541         case POP_JUMP_IF_NOT_NONE:
542             reversed_opcode = POP_JUMP_IF_NONE;
543             break;
544         case POP_JUMP_IF_NONE:
545             reversed_opcode = POP_JUMP_IF_NOT_NONE;
546             break;
547         case POP_JUMP_IF_FALSE:
548             reversed_opcode = POP_JUMP_IF_TRUE;
549             break;
550         case POP_JUMP_IF_TRUE:
551             reversed_opcode = POP_JUMP_IF_FALSE;
552             break;
553     }
554     /* transform 'conditional jump T' to
555      * 'reversed_jump b_next' followed by 'jump_backwards T'
556      */
557 
558     basicblock *target = last->i_target;
559     basicblock *backwards_jump = cfg_builder_new_block(g);
560     if (backwards_jump == NULL) {
561         return ERROR;
562     }
563     RETURN_IF_ERROR(
564         basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
565     last->i_opcode = reversed_opcode;
566     last->i_target = b->b_next;
567 
568     backwards_jump->b_cold = b->b_cold;
569     backwards_jump->b_next = b->b_next;
570     b->b_next = backwards_jump;
571     return SUCCESS;
572 }
573 
574 
575 static int
normalize_jumps(cfg_builder * g)576 normalize_jumps(cfg_builder *g)
577 {
578     basicblock *entryblock = g->g_entryblock;
579     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
580         b->b_visited = 0;
581     }
582     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
583         b->b_visited = 1;
584         RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
585     }
586     return SUCCESS;
587 }
588 
589 static int
check_cfg(cfg_builder * g)590 check_cfg(cfg_builder *g) {
591     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
592         /* Raise SystemError if jump or exit is not last instruction in the block. */
593         for (int i = 0; i < b->b_iused; i++) {
594             int opcode = b->b_instr[i].i_opcode;
595             assert(!IS_ASSEMBLER_OPCODE(opcode));
596             if (IS_TERMINATOR_OPCODE(opcode)) {
597                 if (i != b->b_iused - 1) {
598                     PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
599                     return ERROR;
600                 }
601             }
602         }
603     }
604     return SUCCESS;
605 }
606 
607 static int
get_max_label(basicblock * entryblock)608 get_max_label(basicblock *entryblock)
609 {
610     int lbl = -1;
611     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
612         if (b->b_label.id > lbl) {
613             lbl = b->b_label.id;
614         }
615     }
616     return lbl;
617 }
618 
619 /* Calculate the actual jump target from the target_label */
620 static int
translate_jump_labels_to_targets(basicblock * entryblock)621 translate_jump_labels_to_targets(basicblock *entryblock)
622 {
623     int max_label = get_max_label(entryblock);
624     size_t mapsize = sizeof(basicblock *) * (max_label + 1);
625     basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
626     if (!label2block) {
627         PyErr_NoMemory();
628         return ERROR;
629     }
630     memset(label2block, 0, mapsize);
631     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
632         if (b->b_label.id >= 0) {
633             label2block[b->b_label.id] = b;
634         }
635     }
636     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
637         for (int i = 0; i < b->b_iused; i++) {
638             cfg_instr *instr = &b->b_instr[i];
639             assert(instr->i_target == NULL);
640             if (HAS_TARGET(instr->i_opcode)) {
641                 int lbl = instr->i_oparg;
642                 assert(lbl >= 0 && lbl <= max_label);
643                 instr->i_target = label2block[lbl];
644                 assert(instr->i_target != NULL);
645                 assert(instr->i_target->b_label.id == lbl);
646             }
647         }
648     }
649     PyMem_Free(label2block);
650     return SUCCESS;
651 }
652 
653 static int
mark_except_handlers(basicblock * entryblock)654 mark_except_handlers(basicblock *entryblock) {
655 #ifndef NDEBUG
656     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
657         assert(!b->b_except_handler);
658     }
659 #endif
660     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
661         for (int i=0; i < b->b_iused; i++) {
662             cfg_instr *instr = &b->b_instr[i];
663             if (is_block_push(instr)) {
664                 instr->i_target->b_except_handler = 1;
665             }
666         }
667     }
668     return SUCCESS;
669 }
670 
671 
672 struct _PyCfgExceptStack {
673     basicblock *handlers[CO_MAXBLOCKS+2];
674     int depth;
675 };
676 
677 
678 static basicblock *
push_except_block(struct _PyCfgExceptStack * stack,cfg_instr * setup)679 push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
680     assert(is_block_push(setup));
681     int opcode = setup->i_opcode;
682     basicblock * target = setup->i_target;
683     if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
684         target->b_preserve_lasti = 1;
685     }
686     assert(stack->depth <= CO_MAXBLOCKS);
687     stack->handlers[++stack->depth] = target;
688     return target;
689 }
690 
691 static basicblock *
pop_except_block(struct _PyCfgExceptStack * stack)692 pop_except_block(struct _PyCfgExceptStack *stack) {
693     assert(stack->depth > 0);
694     return stack->handlers[--stack->depth];
695 }
696 
697 static basicblock *
except_stack_top(struct _PyCfgExceptStack * stack)698 except_stack_top(struct _PyCfgExceptStack *stack) {
699     return stack->handlers[stack->depth];
700 }
701 
702 static struct _PyCfgExceptStack *
make_except_stack(void)703 make_except_stack(void) {
704     struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
705     if (new == NULL) {
706         PyErr_NoMemory();
707         return NULL;
708     }
709     new->depth = 0;
710     new->handlers[0] = NULL;
711     return new;
712 }
713 
714 static struct _PyCfgExceptStack *
copy_except_stack(struct _PyCfgExceptStack * stack)715 copy_except_stack(struct _PyCfgExceptStack *stack) {
716     struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
717     if (copy == NULL) {
718         PyErr_NoMemory();
719         return NULL;
720     }
721     memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
722     return copy;
723 }
724 
725 static basicblock**
make_cfg_traversal_stack(basicblock * entryblock)726 make_cfg_traversal_stack(basicblock *entryblock) {
727     int nblocks = 0;
728     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
729         b->b_visited = 0;
730         nblocks++;
731     }
732     basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
733     if (!stack) {
734         PyErr_NoMemory();
735     }
736     return stack;
737 }
738 
739 Py_LOCAL_INLINE(int)
stackdepth_push(basicblock *** sp,basicblock * b,int depth)740 stackdepth_push(basicblock ***sp, basicblock *b, int depth)
741 {
742     if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
743         PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
744         return ERROR;
745     }
746     if (b->b_startdepth < depth && b->b_startdepth < 100) {
747         assert(b->b_startdepth < 0);
748         b->b_startdepth = depth;
749         *(*sp)++ = b;
750     }
751     return SUCCESS;
752 }
753 
754 /* Find the flow path that needs the largest stack.  We assume that
755  * cycles in the flow graph have no net effect on the stack depth.
756  */
757 static int
calculate_stackdepth(cfg_builder * g)758 calculate_stackdepth(cfg_builder *g)
759 {
760     basicblock *entryblock = g->g_entryblock;
761     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
762         b->b_startdepth = INT_MIN;
763     }
764     basicblock **stack = make_cfg_traversal_stack(entryblock);
765     if (!stack) {
766         return ERROR;
767     }
768 
769 
770     int stackdepth = -1;
771     int maxdepth = 0;
772     basicblock **sp = stack;
773     if (stackdepth_push(&sp, entryblock, 0) < 0) {
774         goto error;
775     }
776     while (sp != stack) {
777         basicblock *b = *--sp;
778         int depth = b->b_startdepth;
779         assert(depth >= 0);
780         basicblock *next = b->b_next;
781         for (int i = 0; i < b->b_iused; i++) {
782             cfg_instr *instr = &b->b_instr[i];
783             int effect = PyCompile_OpcodeStackEffectWithJump(
784                              instr->i_opcode, instr->i_oparg, 0);
785             if (effect == PY_INVALID_STACK_EFFECT) {
786                 PyErr_Format(PyExc_SystemError,
787                              "Invalid stack effect for opcode=%d, arg=%i",
788                              instr->i_opcode, instr->i_oparg);
789                 goto error;
790             }
791             int new_depth = depth + effect;
792             if (new_depth < 0) {
793                PyErr_Format(PyExc_ValueError,
794                             "Invalid CFG, stack underflow");
795                goto error;
796             }
797             if (new_depth > maxdepth) {
798                 maxdepth = new_depth;
799             }
800             if (HAS_TARGET(instr->i_opcode)) {
801                 effect = PyCompile_OpcodeStackEffectWithJump(
802                              instr->i_opcode, instr->i_oparg, 1);
803                 if (effect == PY_INVALID_STACK_EFFECT) {
804                     PyErr_Format(PyExc_SystemError,
805                                  "Invalid stack effect for opcode=%d, arg=%i",
806                                  instr->i_opcode, instr->i_oparg);
807                     goto error;
808                 }
809                 int target_depth = depth + effect;
810                 assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
811                 if (target_depth > maxdepth) {
812                     maxdepth = target_depth;
813                 }
814                 if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
815                     goto error;
816                 }
817             }
818             depth = new_depth;
819             assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
820             if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
821                 IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
822             {
823                 /* remaining code is dead */
824                 next = NULL;
825                 break;
826             }
827         }
828         if (next != NULL) {
829             assert(BB_HAS_FALLTHROUGH(b));
830             if (stackdepth_push(&sp, next, depth) < 0) {
831                 goto error;
832             }
833         }
834     }
835     stackdepth = maxdepth;
836 error:
837     PyMem_Free(stack);
838     return stackdepth;
839 }
840 
841 static int
label_exception_targets(basicblock * entryblock)842 label_exception_targets(basicblock *entryblock) {
843     basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
844     if (todo_stack == NULL) {
845         return ERROR;
846     }
847     struct _PyCfgExceptStack *except_stack = make_except_stack();
848     if (except_stack == NULL) {
849         PyMem_Free(todo_stack);
850         PyErr_NoMemory();
851         return ERROR;
852     }
853     except_stack->depth = 0;
854     todo_stack[0] = entryblock;
855     entryblock->b_visited = 1;
856     entryblock->b_exceptstack = except_stack;
857     basicblock **todo = &todo_stack[1];
858     basicblock *handler = NULL;
859     while (todo > todo_stack) {
860         todo--;
861         basicblock *b = todo[0];
862         assert(b->b_visited == 1);
863         except_stack = b->b_exceptstack;
864         assert(except_stack != NULL);
865         b->b_exceptstack = NULL;
866         handler = except_stack_top(except_stack);
867         int last_yield_except_depth = -1;
868         for (int i = 0; i < b->b_iused; i++) {
869             cfg_instr *instr = &b->b_instr[i];
870             if (is_block_push(instr)) {
871                 if (!instr->i_target->b_visited) {
872                     struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
873                     if (copy == NULL) {
874                         goto error;
875                     }
876                     instr->i_target->b_exceptstack = copy;
877                     todo[0] = instr->i_target;
878                     instr->i_target->b_visited = 1;
879                     todo++;
880                 }
881                 handler = push_except_block(except_stack, instr);
882             }
883             else if (instr->i_opcode == POP_BLOCK) {
884                 handler = pop_except_block(except_stack);
885                 INSTR_SET_OP0(instr, NOP);
886             }
887             else if (is_jump(instr)) {
888                 instr->i_except = handler;
889                 assert(i == b->b_iused -1);
890                 if (!instr->i_target->b_visited) {
891                     if (BB_HAS_FALLTHROUGH(b)) {
892                         struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
893                         if (copy == NULL) {
894                             goto error;
895                         }
896                         instr->i_target->b_exceptstack = copy;
897                     }
898                     else {
899                         instr->i_target->b_exceptstack = except_stack;
900                         except_stack = NULL;
901                     }
902                     todo[0] = instr->i_target;
903                     instr->i_target->b_visited = 1;
904                     todo++;
905                 }
906             }
907             else if (instr->i_opcode == YIELD_VALUE) {
908                 instr->i_except = handler;
909                 last_yield_except_depth = except_stack->depth;
910             }
911             else if (instr->i_opcode == RESUME) {
912                 instr->i_except = handler;
913                 if (instr->i_oparg != RESUME_AT_FUNC_START) {
914                     assert(last_yield_except_depth >= 0);
915                     if (last_yield_except_depth == 1) {
916                         instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
917                     }
918                     last_yield_except_depth = -1;
919                 }
920             }
921             else {
922                 instr->i_except = handler;
923             }
924         }
925         if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
926             assert(except_stack != NULL);
927             b->b_next->b_exceptstack = except_stack;
928             todo[0] = b->b_next;
929             b->b_next->b_visited = 1;
930             todo++;
931         }
932         else if (except_stack != NULL) {
933            PyMem_Free(except_stack);
934         }
935     }
936 #ifdef Py_DEBUG
937     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
938         assert(b->b_exceptstack == NULL);
939     }
940 #endif
941     PyMem_Free(todo_stack);
942     return SUCCESS;
943 error:
944     PyMem_Free(todo_stack);
945     PyMem_Free(except_stack);
946     return ERROR;
947 }
948 
949 /***** CFG optimizations *****/
950 
951 static int
remove_unreachable(basicblock * entryblock)952 remove_unreachable(basicblock *entryblock) {
953     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
954         b->b_predecessors = 0;
955     }
956     basicblock **stack = make_cfg_traversal_stack(entryblock);
957     if (stack == NULL) {
958         return ERROR;
959     }
960     basicblock **sp = stack;
961     entryblock->b_predecessors = 1;
962     *sp++ = entryblock;
963     entryblock->b_visited = 1;
964     while (sp > stack) {
965         basicblock *b = *(--sp);
966         if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
967             if (!b->b_next->b_visited) {
968                 assert(b->b_next->b_predecessors == 0);
969                 *sp++ = b->b_next;
970                 b->b_next->b_visited = 1;
971             }
972             b->b_next->b_predecessors++;
973         }
974         for (int i = 0; i < b->b_iused; i++) {
975             basicblock *target;
976             cfg_instr *instr = &b->b_instr[i];
977             if (is_jump(instr) || is_block_push(instr)) {
978                 target = instr->i_target;
979                 if (!target->b_visited) {
980                     *sp++ = target;
981                     target->b_visited = 1;
982                 }
983                 target->b_predecessors++;
984             }
985         }
986     }
987     PyMem_Free(stack);
988 
989     /* Delete unreachable instructions */
990     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
991        if (b->b_predecessors == 0) {
992             b->b_iused = 0;
993             b->b_except_handler = 0;
994        }
995     }
996     return SUCCESS;
997 }
998 
999 static int
basicblock_remove_redundant_nops(basicblock * bb)1000 basicblock_remove_redundant_nops(basicblock *bb) {
1001     /* Remove NOPs when legal to do so. */
1002     int dest = 0;
1003     int prev_lineno = -1;
1004     for (int src = 0; src < bb->b_iused; src++) {
1005         int lineno = bb->b_instr[src].i_loc.lineno;
1006         if (bb->b_instr[src].i_opcode == NOP) {
1007             /* Eliminate no-op if it doesn't have a line number */
1008             if (lineno < 0) {
1009                 continue;
1010             }
1011             /* or, if the previous instruction had the same line number. */
1012             if (prev_lineno == lineno) {
1013                 continue;
1014             }
1015             /* or, if the next instruction has same line number or no line number */
1016             if (src < bb->b_iused - 1) {
1017                 int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1018                 if (next_lineno == lineno) {
1019                     continue;
1020                 }
1021                 if (next_lineno < 0) {
1022                     bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1023                     continue;
1024                 }
1025             }
1026             else {
1027                 basicblock *next = next_nonempty_block(bb->b_next);
1028                 /* or if last instruction in BB and next BB has same line number */
1029                 if (next) {
1030                     location next_loc = NO_LOCATION;
1031                     for (int next_i=0; next_i < next->b_iused; next_i++) {
1032                         cfg_instr *instr = &next->b_instr[next_i];
1033                         if (instr->i_opcode == NOP && instr->i_loc.lineno == NO_LOCATION.lineno) {
1034                             /* Skip over NOPs without location, they will be removed */
1035                             continue;
1036                         }
1037                         next_loc = instr->i_loc;
1038                         break;
1039                     }
1040                     if (lineno == next_loc.lineno) {
1041                         continue;
1042                     }
1043                 }
1044             }
1045 
1046         }
1047         if (dest != src) {
1048             bb->b_instr[dest] = bb->b_instr[src];
1049         }
1050         dest++;
1051         prev_lineno = lineno;
1052     }
1053     assert(dest <= bb->b_iused);
1054     int num_removed = bb->b_iused - dest;
1055     bb->b_iused = dest;
1056     return num_removed;
1057 }
1058 
1059 static int
remove_redundant_nops(cfg_builder * g)1060 remove_redundant_nops(cfg_builder *g) {
1061     int changes = 0;
1062     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1063         int change = basicblock_remove_redundant_nops(b);
1064         RETURN_IF_ERROR(change);
1065         changes += change;
1066     }
1067     return changes;
1068 }
1069 
1070 static int
remove_redundant_nops_and_pairs(basicblock * entryblock)1071 remove_redundant_nops_and_pairs(basicblock *entryblock)
1072 {
1073     bool done = false;
1074 
1075     while (! done) {
1076         done = true;
1077         cfg_instr *prev_instr = NULL;
1078         cfg_instr *instr = NULL;
1079         for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1080             RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1081             if (IS_LABEL(b->b_label)) {
1082                 /* this block is a jump target, forget instr */
1083                 instr = NULL;
1084             }
1085             for (int i = 0; i < b->b_iused; i++) {
1086                 prev_instr = instr;
1087                 instr = &b->b_instr[i];
1088                 int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1089                 int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1090                 int opcode = instr->i_opcode;
1091                 bool is_redundant_pair = false;
1092                 if (opcode == POP_TOP) {
1093                    if (prev_opcode == LOAD_CONST) {
1094                        is_redundant_pair = true;
1095                    }
1096                    else if (prev_opcode == COPY && prev_oparg == 1) {
1097                        is_redundant_pair = true;
1098                    }
1099                 }
1100                 if (is_redundant_pair) {
1101                     INSTR_SET_OP0(prev_instr, NOP);
1102                     INSTR_SET_OP0(instr, NOP);
1103                     done = false;
1104                 }
1105             }
1106             if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1107                 instr = NULL;
1108             }
1109         }
1110     }
1111     return SUCCESS;
1112 }
1113 
1114 static int
remove_redundant_jumps(cfg_builder * g)1115 remove_redundant_jumps(cfg_builder *g) {
1116     /* If a non-empty block ends with a jump instruction, check if the next
1117      * non-empty block reached through normal flow control is the target
1118      * of that jump. If it is, then the jump instruction is redundant and
1119      * can be deleted.
1120      *
1121      * Return the number of changes applied, or -1 on error.
1122      */
1123 
1124     int changes = 0;
1125     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1126         cfg_instr *last = basicblock_last_instr(b);
1127         if (last == NULL) {
1128             continue;
1129         }
1130         assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1131         if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1132             basicblock* jump_target = next_nonempty_block(last->i_target);
1133             if (jump_target == NULL) {
1134                 PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1135                 return ERROR;
1136             }
1137             basicblock *next = next_nonempty_block(b->b_next);
1138             if (jump_target == next) {
1139                 changes++;
1140                 INSTR_SET_OP0(last, NOP);
1141             }
1142         }
1143     }
1144 
1145     return changes;
1146 }
1147 
1148 static inline bool
basicblock_has_no_lineno(basicblock * b)1149 basicblock_has_no_lineno(basicblock *b) {
1150     for (int i = 0; i < b->b_iused; i++) {
1151         if (b->b_instr[i].i_loc.lineno >= 0) {
1152             return false;
1153         }
1154     }
1155     return true;
1156 }
1157 
1158 /* Maximum size of basic block that should be copied in optimizer */
1159 #define MAX_COPY_SIZE 4
1160 
1161 /* If this block ends with an unconditional jump to a small exit block or
1162  * a block that has no line numbers (and no fallthrough), then
1163  * remove the jump and extend this block with the target.
1164  * Returns 1 if extended, 0 if no change, and -1 on error.
1165  */
1166 static int
basicblock_inline_small_or_no_lineno_blocks(basicblock * bb)1167 basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1168     cfg_instr *last = basicblock_last_instr(bb);
1169     if (last == NULL) {
1170         return 0;
1171     }
1172     if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1173         return 0;
1174     }
1175     basicblock *target = last->i_target;
1176     bool small_exit_block = (basicblock_exits_scope(target) &&
1177                              target->b_iused <= MAX_COPY_SIZE);
1178     bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1179                                      !BB_HAS_FALLTHROUGH(target));
1180     if (small_exit_block || no_lineno_no_fallthrough) {
1181         assert(is_jump(last));
1182         int removed_jump_opcode = last->i_opcode;
1183         INSTR_SET_OP0(last, NOP);
1184         RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1185         if (no_lineno_no_fallthrough) {
1186             last = basicblock_last_instr(bb);
1187             if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1188                 removed_jump_opcode == JUMP)
1189             {
1190                 /* Make sure we don't lose eval breaker checks */
1191                 last->i_opcode = JUMP;
1192             }
1193         }
1194         target->b_predecessors--;
1195         return 1;
1196     }
1197     return 0;
1198 }
1199 
1200 static int
inline_small_or_no_lineno_blocks(basicblock * entryblock)1201 inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1202     bool changes;
1203     do {
1204         changes = false;
1205         for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1206             int res = basicblock_inline_small_or_no_lineno_blocks(b);
1207             RETURN_IF_ERROR(res);
1208             if (res) {
1209                 changes = true;
1210             }
1211         }
1212     } while(changes); /* every change removes a jump, ensuring convergence */
1213     return changes;
1214 }
1215 
1216 // Attempt to eliminate jumps to jumps by updating inst to jump to
1217 // target->i_target using the provided opcode. Return whether or not the
1218 // optimization was successful.
1219 static bool
jump_thread(basicblock * bb,cfg_instr * inst,cfg_instr * target,int opcode)1220 jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1221 {
1222     assert(is_jump(inst));
1223     assert(is_jump(target));
1224     assert(inst == basicblock_last_instr(bb));
1225     // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1226     // changes (and we fall into an infinite loop):
1227     if (inst->i_target != target->i_target) {
1228         /* Change inst to NOP and append a jump to target->i_target. The
1229          * NOP will be removed later if it's not needed for the lineno.
1230          */
1231         INSTR_SET_OP0(inst, NOP);
1232 
1233         RETURN_IF_ERROR(
1234             basicblock_add_jump(
1235                 bb, opcode, target->i_target, target->i_loc));
1236 
1237         return true;
1238     }
1239     return false;
1240 }
1241 
1242 static PyObject*
get_const_value(int opcode,int oparg,PyObject * co_consts)1243 get_const_value(int opcode, int oparg, PyObject *co_consts)
1244 {
1245     PyObject *constant = NULL;
1246     assert(OPCODE_HAS_CONST(opcode));
1247     if (opcode == LOAD_CONST) {
1248         constant = PyList_GET_ITEM(co_consts, oparg);
1249     }
1250 
1251     if (constant == NULL) {
1252         PyErr_SetString(PyExc_SystemError,
1253                         "Internal error: failed to get value of a constant");
1254         return NULL;
1255     }
1256     return Py_NewRef(constant);
1257 }
1258 
1259 // Steals a reference to newconst.
1260 static int
add_const(PyObject * newconst,PyObject * consts,PyObject * const_cache)1261 add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1262 {
1263     if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1264         Py_DECREF(newconst);
1265         return -1;
1266     }
1267 
1268     Py_ssize_t index;
1269     for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1270         if (PyList_GET_ITEM(consts, index) == newconst) {
1271             break;
1272         }
1273     }
1274     if (index == PyList_GET_SIZE(consts)) {
1275         if ((size_t)index >= (size_t)INT_MAX - 1) {
1276             PyErr_SetString(PyExc_OverflowError, "too many constants");
1277             Py_DECREF(newconst);
1278             return -1;
1279         }
1280         if (PyList_Append(consts, newconst)) {
1281             Py_DECREF(newconst);
1282             return -1;
1283         }
1284     }
1285     Py_DECREF(newconst);
1286     return (int)index;
1287 }
1288 
1289 /* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1290    with    LOAD_CONST (c1, c2, ... cn).
1291    The consts table must still be in list form so that the
1292    new constant (c1, c2, ... cn) can be appended.
1293    Called with codestr pointing to the first LOAD_CONST.
1294 */
1295 static int
fold_tuple_on_constants(PyObject * const_cache,cfg_instr * inst,int n,PyObject * consts)1296 fold_tuple_on_constants(PyObject *const_cache,
1297                         cfg_instr *inst,
1298                         int n, PyObject *consts)
1299 {
1300     /* Pre-conditions */
1301     assert(PyDict_CheckExact(const_cache));
1302     assert(PyList_CheckExact(consts));
1303     assert(inst[n].i_opcode == BUILD_TUPLE);
1304     assert(inst[n].i_oparg == n);
1305 
1306     for (int i = 0; i < n; i++) {
1307         if (!OPCODE_HAS_CONST(inst[i].i_opcode)) {
1308             return SUCCESS;
1309         }
1310     }
1311 
1312     /* Buildup new tuple of constants */
1313     PyObject *newconst = PyTuple_New(n);
1314     if (newconst == NULL) {
1315         return ERROR;
1316     }
1317     for (int i = 0; i < n; i++) {
1318         int op = inst[i].i_opcode;
1319         int arg = inst[i].i_oparg;
1320         PyObject *constant = get_const_value(op, arg, consts);
1321         if (constant == NULL) {
1322             return ERROR;
1323         }
1324         PyTuple_SET_ITEM(newconst, i, constant);
1325     }
1326     int index = add_const(newconst, consts, const_cache);
1327     if (index < 0) {
1328         return ERROR;
1329     }
1330     for (int i = 0; i < n; i++) {
1331         INSTR_SET_OP0(&inst[i], NOP);
1332     }
1333     INSTR_SET_OP1(&inst[n], LOAD_CONST, index);
1334     return SUCCESS;
1335 }
1336 
1337 #define VISITED (-1)
1338 
1339 // Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1340 // same effect.
1341 static int
swaptimize(basicblock * block,int * ix)1342 swaptimize(basicblock *block, int *ix)
1343 {
1344     // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1345     // for this function. Make sure to blow away cached *.pyc files first!
1346     assert(*ix < block->b_iused);
1347     cfg_instr *instructions = &block->b_instr[*ix];
1348     // Find the length of the current sequence of SWAPs and NOPs, and record the
1349     // maximum depth of the stack manipulations:
1350     assert(instructions[0].i_opcode == SWAP);
1351     int depth = instructions[0].i_oparg;
1352     int len = 0;
1353     int more = false;
1354     int limit = block->b_iused - *ix;
1355     while (++len < limit) {
1356         int opcode = instructions[len].i_opcode;
1357         if (opcode == SWAP) {
1358             depth = Py_MAX(depth, instructions[len].i_oparg);
1359             more = true;
1360         }
1361         else if (opcode != NOP) {
1362             break;
1363         }
1364     }
1365     // It's already optimal if there's only one SWAP:
1366     if (!more) {
1367         return SUCCESS;
1368     }
1369     // Create an array with elements {0, 1, 2, ..., depth - 1}:
1370     int *stack = PyMem_Malloc(depth * sizeof(int));
1371     if (stack == NULL) {
1372         PyErr_NoMemory();
1373         return ERROR;
1374     }
1375     for (int i = 0; i < depth; i++) {
1376         stack[i] = i;
1377     }
1378     // Simulate the combined effect of these instructions by "running" them on
1379     // our "stack":
1380     for (int i = 0; i < len; i++) {
1381         if (instructions[i].i_opcode == SWAP) {
1382             int oparg = instructions[i].i_oparg;
1383             int top = stack[0];
1384             // SWAPs are 1-indexed:
1385             stack[0] = stack[oparg - 1];
1386             stack[oparg - 1] = top;
1387         }
1388     }
1389     // Now we can begin! Our approach here is based on a solution to a closely
1390     // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
1391     // think of this algorithm as determining the steps needed to efficiently
1392     // "un-shuffle" our stack. By performing the moves in *reverse* order,
1393     // though, we can efficiently *shuffle* it! For this reason, we will be
1394     // replacing instructions starting from the *end* of the run. Since the
1395     // solution is optimal, we don't need to worry about running out of space:
1396     int current = len - 1;
1397     for (int i = 0; i < depth; i++) {
1398         // Skip items that have already been visited, or just happen to be in
1399         // the correct location:
1400         if (stack[i] == VISITED || stack[i] == i) {
1401             continue;
1402         }
1403         // Okay, we've found an item that hasn't been visited. It forms a cycle
1404         // with other items; traversing the cycle and swapping each item with
1405         // the next will put them all in the correct place. The weird
1406         // loop-and-a-half is necessary to insert 0 into every cycle, since we
1407         // can only swap from that position:
1408         int j = i;
1409         while (true) {
1410             // Skip the actual swap if our item is zero, since swapping the top
1411             // item with itself is pointless:
1412             if (j) {
1413                 assert(0 <= current);
1414                 // SWAPs are 1-indexed:
1415                 instructions[current].i_opcode = SWAP;
1416                 instructions[current--].i_oparg = j + 1;
1417             }
1418             if (stack[j] == VISITED) {
1419                 // Completed the cycle:
1420                 assert(j == i);
1421                 break;
1422             }
1423             int next_j = stack[j];
1424             stack[j] = VISITED;
1425             j = next_j;
1426         }
1427     }
1428     // NOP out any unused instructions:
1429     while (0 <= current) {
1430         INSTR_SET_OP0(&instructions[current--], NOP);
1431     }
1432     PyMem_Free(stack);
1433     *ix += len - 1;
1434     return SUCCESS;
1435 }
1436 
1437 
1438 // This list is pretty small, since it's only okay to reorder opcodes that:
1439 // - can't affect control flow (like jumping or raising exceptions)
1440 // - can't invoke arbitrary code (besides finalizers)
1441 // - only touch the TOS (and pop it when finished)
1442 #define SWAPPABLE(opcode) \
1443     ((opcode) == STORE_FAST || \
1444      (opcode) == STORE_FAST_MAYBE_NULL || \
1445      (opcode) == POP_TOP)
1446 
1447 #define STORES_TO(instr) \
1448     (((instr).i_opcode == STORE_FAST || \
1449       (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
1450      ? (instr).i_oparg : -1)
1451 
1452 static int
next_swappable_instruction(basicblock * block,int i,int lineno)1453 next_swappable_instruction(basicblock *block, int i, int lineno)
1454 {
1455     while (++i < block->b_iused) {
1456         cfg_instr *instruction = &block->b_instr[i];
1457         if (0 <= lineno && instruction->i_loc.lineno != lineno) {
1458             // Optimizing across this instruction could cause user-visible
1459             // changes in the names bound between line tracing events!
1460             return -1;
1461         }
1462         if (instruction->i_opcode == NOP) {
1463             continue;
1464         }
1465         if (SWAPPABLE(instruction->i_opcode)) {
1466             return i;
1467         }
1468         return -1;
1469     }
1470     return -1;
1471 }
1472 
1473 // Attempt to apply SWAPs statically by swapping *instructions* rather than
1474 // stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
1475 // with the more efficient NOP, STORE_FAST(42), POP_TOP.
1476 static void
apply_static_swaps(basicblock * block,int i)1477 apply_static_swaps(basicblock *block, int i)
1478 {
1479     // SWAPs are to our left, and potential swaperands are to our right:
1480     for (; 0 <= i; i--) {
1481         assert(i < block->b_iused);
1482         cfg_instr *swap = &block->b_instr[i];
1483         if (swap->i_opcode != SWAP) {
1484             if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
1485                 // Nope, but we know how to handle these. Keep looking:
1486                 continue;
1487             }
1488             // We can't reason about what this instruction does. Bail:
1489             return;
1490         }
1491         int j = next_swappable_instruction(block, i, -1);
1492         if (j < 0) {
1493             return;
1494         }
1495         int k = j;
1496         int lineno = block->b_instr[j].i_loc.lineno;
1497         for (int count = swap->i_oparg - 1; 0 < count; count--) {
1498             k = next_swappable_instruction(block, k, lineno);
1499             if (k < 0) {
1500                 return;
1501             }
1502         }
1503         // The reordering is not safe if the two instructions to be swapped
1504         // store to the same location, or if any intervening instruction stores
1505         // to the same location as either of them.
1506         int store_j = STORES_TO(block->b_instr[j]);
1507         int store_k = STORES_TO(block->b_instr[k]);
1508         if (store_j >= 0 || store_k >= 0) {
1509             if (store_j == store_k) {
1510                 return;
1511             }
1512             for (int idx = j + 1; idx < k; idx++) {
1513                 int store_idx = STORES_TO(block->b_instr[idx]);
1514                 if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
1515                     return;
1516                 }
1517             }
1518         }
1519 
1520         // Success!
1521         INSTR_SET_OP0(swap, NOP);
1522         cfg_instr temp = block->b_instr[j];
1523         block->b_instr[j] = block->b_instr[k];
1524         block->b_instr[k] = temp;
1525     }
1526 }
1527 
1528 static int
basicblock_optimize_load_const(PyObject * const_cache,basicblock * bb,PyObject * consts)1529 basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
1530 {
1531     assert(PyDict_CheckExact(const_cache));
1532     assert(PyList_CheckExact(consts));
1533     int opcode = 0;
1534     int oparg = 0;
1535     for (int i = 0; i < bb->b_iused; i++) {
1536         cfg_instr *inst = &bb->b_instr[i];
1537         bool is_copy_of_load_const = (opcode == LOAD_CONST &&
1538                                       inst->i_opcode == COPY &&
1539                                       inst->i_oparg == 1);
1540         if (! is_copy_of_load_const) {
1541             opcode = inst->i_opcode;
1542             oparg = inst->i_oparg;
1543         }
1544         assert(!IS_ASSEMBLER_OPCODE(opcode));
1545         if (opcode != LOAD_CONST) {
1546             continue;
1547         }
1548         int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
1549         switch(nextop) {
1550             case POP_JUMP_IF_FALSE:
1551             case POP_JUMP_IF_TRUE:
1552             {
1553                 /* Remove LOAD_CONST const; conditional jump */
1554                 PyObject* cnt = get_const_value(opcode, oparg, consts);
1555                 if (cnt == NULL) {
1556                     return ERROR;
1557                 }
1558                 int is_true = PyObject_IsTrue(cnt);
1559                 Py_DECREF(cnt);
1560                 if (is_true == -1) {
1561                     return ERROR;
1562                 }
1563                 INSTR_SET_OP0(inst, NOP);
1564                 int jump_if_true = nextop == POP_JUMP_IF_TRUE;
1565                 if (is_true == jump_if_true) {
1566                     bb->b_instr[i+1].i_opcode = JUMP;
1567                 }
1568                 else {
1569                     INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
1570                 }
1571                 break;
1572             }
1573             case IS_OP:
1574             {
1575                 // Fold to POP_JUMP_IF_NONE:
1576                 // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
1577                 // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
1578                 // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
1579                 // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
1580                 // Fold to POP_JUMP_IF_NOT_NONE:
1581                 // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
1582                 // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
1583                 // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
1584                 // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
1585                 PyObject *cnt = get_const_value(opcode, oparg, consts);
1586                 if (cnt == NULL) {
1587                     return ERROR;
1588                 }
1589                 if (!Py_IsNone(cnt)) {
1590                     Py_DECREF(cnt);
1591                     break;
1592                 }
1593                 if (bb->b_iused <= i + 2) {
1594                     break;
1595                 }
1596                 cfg_instr *is_instr = &bb->b_instr[i + 1];
1597                 cfg_instr *jump_instr = &bb->b_instr[i + 2];
1598                 // Get rid of TO_BOOL regardless:
1599                 if (jump_instr->i_opcode == TO_BOOL) {
1600                     INSTR_SET_OP0(jump_instr, NOP);
1601                     if (bb->b_iused <= i + 3) {
1602                         break;
1603                     }
1604                     jump_instr = &bb->b_instr[i + 3];
1605                 }
1606                 bool invert = is_instr->i_oparg;
1607                 if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
1608                     invert = !invert;
1609                 }
1610                 else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
1611                     break;
1612                 }
1613                 INSTR_SET_OP0(inst, NOP);
1614                 INSTR_SET_OP0(is_instr, NOP);
1615                 jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
1616                                               : POP_JUMP_IF_NONE;
1617                 break;
1618             }
1619             case RETURN_VALUE:
1620             {
1621                 INSTR_SET_OP0(inst, NOP);
1622                 INSTR_SET_OP1(&bb->b_instr[++i], RETURN_CONST, oparg);
1623                 break;
1624             }
1625             case TO_BOOL:
1626             {
1627                 PyObject *cnt = get_const_value(opcode, oparg, consts);
1628                 if (cnt == NULL) {
1629                     return ERROR;
1630                 }
1631                 int is_true = PyObject_IsTrue(cnt);
1632                 Py_DECREF(cnt);
1633                 if (is_true == -1) {
1634                     return ERROR;
1635                 }
1636                 cnt = PyBool_FromLong(is_true);
1637                 int index = add_const(cnt, consts, const_cache);
1638                 if (index < 0) {
1639                     return ERROR;
1640                 }
1641                 INSTR_SET_OP0(inst, NOP);
1642                 INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
1643                 break;
1644             }
1645         }
1646     }
1647     return SUCCESS;
1648 }
1649 
1650 static int
optimize_load_const(PyObject * const_cache,cfg_builder * g,PyObject * consts)1651 optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
1652     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1653         RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
1654     }
1655     return SUCCESS;
1656 }
1657 
1658 static int
optimize_basic_block(PyObject * const_cache,basicblock * bb,PyObject * consts)1659 optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
1660 {
1661     assert(PyDict_CheckExact(const_cache));
1662     assert(PyList_CheckExact(consts));
1663     cfg_instr nop;
1664     INSTR_SET_OP0(&nop, NOP);
1665     for (int i = 0; i < bb->b_iused; i++) {
1666         cfg_instr *inst = &bb->b_instr[i];
1667         cfg_instr *target;
1668         int opcode = inst->i_opcode;
1669         int oparg = inst->i_oparg;
1670         if (HAS_TARGET(opcode)) {
1671             assert(inst->i_target->b_iused > 0);
1672             target = &inst->i_target->b_instr[0];
1673             assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
1674         }
1675         else {
1676             target = &nop;
1677         }
1678         int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
1679         assert(!IS_ASSEMBLER_OPCODE(opcode));
1680         switch (opcode) {
1681             /* Try to fold tuples of constants.
1682                Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
1683                Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
1684                Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
1685             case BUILD_TUPLE:
1686                 if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
1687                     switch(oparg) {
1688                         case 1:
1689                             INSTR_SET_OP0(inst, NOP);
1690                             INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
1691                             continue;
1692                         case 2:
1693                         case 3:
1694                             INSTR_SET_OP0(inst, NOP);
1695                             bb->b_instr[i+1].i_opcode = SWAP;
1696                             continue;
1697                     }
1698                 }
1699                 if (i >= oparg) {
1700                     if (fold_tuple_on_constants(const_cache, inst-oparg, oparg, consts)) {
1701                         goto error;
1702                     }
1703                 }
1704                 break;
1705             case POP_JUMP_IF_NOT_NONE:
1706             case POP_JUMP_IF_NONE:
1707                 switch (target->i_opcode) {
1708                     case JUMP:
1709                         i -= jump_thread(bb, inst, target, inst->i_opcode);
1710                 }
1711                 break;
1712             case POP_JUMP_IF_FALSE:
1713                 switch (target->i_opcode) {
1714                     case JUMP:
1715                         i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
1716                 }
1717                 break;
1718             case POP_JUMP_IF_TRUE:
1719                 switch (target->i_opcode) {
1720                     case JUMP:
1721                         i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
1722                 }
1723                 break;
1724             case JUMP:
1725             case JUMP_NO_INTERRUPT:
1726                 switch (target->i_opcode) {
1727                     case JUMP:
1728                         i -= jump_thread(bb, inst, target, JUMP);
1729                         continue;
1730                     case JUMP_NO_INTERRUPT:
1731                         i -= jump_thread(bb, inst, target, opcode);
1732                         continue;
1733                 }
1734                 break;
1735             case FOR_ITER:
1736                 if (target->i_opcode == JUMP) {
1737                     /* This will not work now because the jump (at target) could
1738                      * be forward or backward and FOR_ITER only jumps forward. We
1739                      * can re-enable this if ever we implement a backward version
1740                      * of FOR_ITER.
1741                      */
1742                     /*
1743                     i -= jump_thread(bb, inst, target, FOR_ITER);
1744                     */
1745                 }
1746                 break;
1747             case STORE_FAST:
1748                 if (opcode == nextop &&
1749                     oparg == bb->b_instr[i+1].i_oparg &&
1750                     bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
1751                     bb->b_instr[i].i_opcode = POP_TOP;
1752                     bb->b_instr[i].i_oparg = 0;
1753                 }
1754                 break;
1755             case SWAP:
1756                 if (oparg == 1) {
1757                     INSTR_SET_OP0(inst, NOP);
1758                 }
1759                 break;
1760             case LOAD_GLOBAL:
1761                 if (nextop == PUSH_NULL && (oparg & 1) == 0) {
1762                     INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
1763                     INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
1764                 }
1765                 break;
1766             case COMPARE_OP:
1767                 if (nextop == TO_BOOL) {
1768                     INSTR_SET_OP0(inst, NOP);
1769                     INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
1770                     continue;
1771                 }
1772                 break;
1773             case CONTAINS_OP:
1774             case IS_OP:
1775                 if (nextop == TO_BOOL) {
1776                     INSTR_SET_OP0(inst, NOP);
1777                     INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
1778                     continue;
1779                 }
1780                 break;
1781             case TO_BOOL:
1782                 if (nextop == TO_BOOL) {
1783                     INSTR_SET_OP0(inst, NOP);
1784                     continue;
1785                 }
1786                 break;
1787             case UNARY_NOT:
1788                 if (nextop == TO_BOOL) {
1789                     INSTR_SET_OP0(inst, NOP);
1790                     INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
1791                     continue;
1792                 }
1793                 if (nextop == UNARY_NOT) {
1794                     INSTR_SET_OP0(inst, NOP);
1795                     INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
1796                     continue;
1797                 }
1798                 break;
1799         }
1800     }
1801 
1802     for (int i = 0; i < bb->b_iused; i++) {
1803         cfg_instr *inst = &bb->b_instr[i];
1804         if (inst->i_opcode == SWAP) {
1805             if (swaptimize(bb, &i) < 0) {
1806                 goto error;
1807             }
1808             apply_static_swaps(bb, i);
1809         }
1810     }
1811     return SUCCESS;
1812 error:
1813     return ERROR;
1814 }
1815 
1816 static int resolve_line_numbers(cfg_builder *g, int firstlineno);
1817 
1818 static int
remove_redundant_nops_and_jumps(cfg_builder * g)1819 remove_redundant_nops_and_jumps(cfg_builder *g)
1820 {
1821     int removed_nops, removed_jumps;
1822     do {
1823         /* Convergence is guaranteed because the number of
1824          * redundant jumps and nops only decreases.
1825          */
1826         removed_nops = remove_redundant_nops(g);
1827         RETURN_IF_ERROR(removed_nops);
1828         removed_jumps = remove_redundant_jumps(g);
1829         RETURN_IF_ERROR(removed_jumps);
1830     } while(removed_nops + removed_jumps > 0);
1831     return SUCCESS;
1832 }
1833 
1834 /* Perform optimizations on a control flow graph.
1835    The consts object should still be in list form to allow new constants
1836    to be appended.
1837 
1838    Code trasnformations that reduce code size initially fill the gaps with
1839    NOPs.  Later those NOPs are removed.
1840 */
1841 static int
optimize_cfg(cfg_builder * g,PyObject * consts,PyObject * const_cache,int firstlineno)1842 optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
1843 {
1844     assert(PyDict_CheckExact(const_cache));
1845     RETURN_IF_ERROR(check_cfg(g));
1846     RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
1847     RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
1848     RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
1849     RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
1850     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1851         RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
1852     }
1853     RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
1854     RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
1855     RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
1856     assert(no_redundant_jumps(g));
1857     return SUCCESS;
1858 }
1859 
1860 static void
make_super_instruction(cfg_instr * inst1,cfg_instr * inst2,int super_op)1861 make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
1862 {
1863     int32_t line1 = inst1->i_loc.lineno;
1864     int32_t line2 = inst2->i_loc.lineno;
1865     /* Skip if instructions are on different lines */
1866     if (line1 >= 0 && line2 >= 0 && line1 != line2) {
1867         return;
1868     }
1869     if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
1870         return;
1871     }
1872     INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
1873     INSTR_SET_OP0(inst2, NOP);
1874 }
1875 
1876 static int
insert_superinstructions(cfg_builder * g)1877 insert_superinstructions(cfg_builder *g)
1878 {
1879     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1880 
1881         for (int i = 0; i < b->b_iused; i++) {
1882             cfg_instr *inst = &b->b_instr[i];
1883             int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
1884             switch(inst->i_opcode) {
1885                 case LOAD_FAST:
1886                     if (nextop == LOAD_FAST) {
1887                         make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
1888                     }
1889                     break;
1890                 case STORE_FAST:
1891                     switch (nextop) {
1892                         case LOAD_FAST:
1893                             make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
1894                             break;
1895                         case STORE_FAST:
1896                             make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
1897                             break;
1898                     }
1899                     break;
1900             }
1901         }
1902     }
1903     int res = remove_redundant_nops(g);
1904     assert(no_redundant_nops(g));
1905     return res;
1906 }
1907 
1908 // helper functions for add_checks_for_loads_of_unknown_variables
1909 static inline void
maybe_push(basicblock * b,uint64_t unsafe_mask,basicblock *** sp)1910 maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
1911 {
1912     // Push b if the unsafe mask is giving us any new information.
1913     // To avoid overflowing the stack, only allow each block once.
1914     // Use b->b_visited=1 to mean that b is currently on the stack.
1915     uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
1916     if (b->b_unsafe_locals_mask != both) {
1917         b->b_unsafe_locals_mask = both;
1918         // More work left to do.
1919         if (!b->b_visited) {
1920             // not on the stack, so push it.
1921             *(*sp)++ = b;
1922             b->b_visited = 1;
1923         }
1924     }
1925 }
1926 
1927 static void
scan_block_for_locals(basicblock * b,basicblock *** sp)1928 scan_block_for_locals(basicblock *b, basicblock ***sp)
1929 {
1930     // bit i is set if local i is potentially uninitialized
1931     uint64_t unsafe_mask = b->b_unsafe_locals_mask;
1932     for (int i = 0; i < b->b_iused; i++) {
1933         cfg_instr *instr = &b->b_instr[i];
1934         assert(instr->i_opcode != EXTENDED_ARG);
1935         if (instr->i_except != NULL) {
1936             maybe_push(instr->i_except, unsafe_mask, sp);
1937         }
1938         if (instr->i_oparg >= 64) {
1939             continue;
1940         }
1941         assert(instr->i_oparg >= 0);
1942         uint64_t bit = (uint64_t)1 << instr->i_oparg;
1943         switch (instr->i_opcode) {
1944             case DELETE_FAST:
1945             case LOAD_FAST_AND_CLEAR:
1946             case STORE_FAST_MAYBE_NULL:
1947                 unsafe_mask |= bit;
1948                 break;
1949             case STORE_FAST:
1950                 unsafe_mask &= ~bit;
1951                 break;
1952             case LOAD_FAST_CHECK:
1953                 // If this doesn't raise, then the local is defined.
1954                 unsafe_mask &= ~bit;
1955                 break;
1956             case LOAD_FAST:
1957                 if (unsafe_mask & bit) {
1958                     instr->i_opcode = LOAD_FAST_CHECK;
1959                 }
1960                 unsafe_mask &= ~bit;
1961                 break;
1962         }
1963     }
1964     if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1965         maybe_push(b->b_next, unsafe_mask, sp);
1966     }
1967     cfg_instr *last = basicblock_last_instr(b);
1968     if (last && is_jump(last)) {
1969         assert(last->i_target != NULL);
1970         maybe_push(last->i_target, unsafe_mask, sp);
1971     }
1972 }
1973 
1974 static int
fast_scan_many_locals(basicblock * entryblock,int nlocals)1975 fast_scan_many_locals(basicblock *entryblock, int nlocals)
1976 {
1977     assert(nlocals > 64);
1978     Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
1979     if (states == NULL) {
1980         PyErr_NoMemory();
1981         return ERROR;
1982     }
1983     Py_ssize_t blocknum = 0;
1984     // state[i - 64] == blocknum if local i is guaranteed to
1985     // be initialized, i.e., if it has had a previous LOAD_FAST or
1986     // STORE_FAST within that basicblock (not followed by
1987     // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
1988     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1989         blocknum++;
1990         for (int i = 0; i < b->b_iused; i++) {
1991             cfg_instr *instr = &b->b_instr[i];
1992             assert(instr->i_opcode != EXTENDED_ARG);
1993             int arg = instr->i_oparg;
1994             if (arg < 64) {
1995                 continue;
1996             }
1997             assert(arg >= 0);
1998             switch (instr->i_opcode) {
1999                 case DELETE_FAST:
2000                 case LOAD_FAST_AND_CLEAR:
2001                 case STORE_FAST_MAYBE_NULL:
2002                     states[arg - 64] = blocknum - 1;
2003                     break;
2004                 case STORE_FAST:
2005                     states[arg - 64] = blocknum;
2006                     break;
2007                 case LOAD_FAST:
2008                     if (states[arg - 64] != blocknum) {
2009                         instr->i_opcode = LOAD_FAST_CHECK;
2010                     }
2011                     states[arg - 64] = blocknum;
2012                     break;
2013                     Py_UNREACHABLE();
2014             }
2015         }
2016     }
2017     PyMem_Free(states);
2018     return SUCCESS;
2019 }
2020 
2021 static int
remove_unused_consts(basicblock * entryblock,PyObject * consts)2022 remove_unused_consts(basicblock *entryblock, PyObject *consts)
2023 {
2024     assert(PyList_CheckExact(consts));
2025     Py_ssize_t nconsts = PyList_GET_SIZE(consts);
2026     if (nconsts == 0) {
2027         return SUCCESS;  /* nothing to do */
2028     }
2029 
2030     Py_ssize_t *index_map = NULL;
2031     Py_ssize_t *reverse_index_map = NULL;
2032     int err = ERROR;
2033 
2034     index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
2035     if (index_map == NULL) {
2036         goto end;
2037     }
2038     for (Py_ssize_t i = 1; i < nconsts; i++) {
2039         index_map[i] = -1;
2040     }
2041     // The first constant may be docstring; keep it always.
2042     index_map[0] = 0;
2043 
2044     /* mark used consts */
2045     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2046         for (int i = 0; i < b->b_iused; i++) {
2047             if (OPCODE_HAS_CONST(b->b_instr[i].i_opcode)) {
2048                 int index = b->b_instr[i].i_oparg;
2049                 index_map[index] = index;
2050             }
2051         }
2052     }
2053     /* now index_map[i] == i if consts[i] is used, -1 otherwise */
2054     /* condense consts */
2055     Py_ssize_t n_used_consts = 0;
2056     for (int i = 0; i < nconsts; i++) {
2057         if (index_map[i] != -1) {
2058             assert(index_map[i] == i);
2059             index_map[n_used_consts++] = index_map[i];
2060         }
2061     }
2062     if (n_used_consts == nconsts) {
2063         /* nothing to do */
2064         err = SUCCESS;
2065         goto end;
2066     }
2067 
2068     /* move all used consts to the beginning of the consts list */
2069     assert(n_used_consts < nconsts);
2070     for (Py_ssize_t i = 0; i < n_used_consts; i++) {
2071         Py_ssize_t old_index = index_map[i];
2072         assert(i <= old_index && old_index < nconsts);
2073         if (i != old_index) {
2074             PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
2075             assert(value != NULL);
2076             PyList_SetItem(consts, i, Py_NewRef(value));
2077         }
2078     }
2079 
2080     /* truncate the consts list at its new size */
2081     if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
2082         goto end;
2083     }
2084     /* adjust const indices in the bytecode */
2085     reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
2086     if (reverse_index_map == NULL) {
2087         goto end;
2088     }
2089     for (Py_ssize_t i = 0; i < nconsts; i++) {
2090         reverse_index_map[i] = -1;
2091     }
2092     for (Py_ssize_t i = 0; i < n_used_consts; i++) {
2093         assert(index_map[i] != -1);
2094         assert(reverse_index_map[index_map[i]] == -1);
2095         reverse_index_map[index_map[i]] = i;
2096     }
2097 
2098     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2099         for (int i = 0; i < b->b_iused; i++) {
2100             if (OPCODE_HAS_CONST(b->b_instr[i].i_opcode)) {
2101                 int index = b->b_instr[i].i_oparg;
2102                 assert(reverse_index_map[index] >= 0);
2103                 assert(reverse_index_map[index] < n_used_consts);
2104                 b->b_instr[i].i_oparg = (int)reverse_index_map[index];
2105             }
2106         }
2107     }
2108 
2109     err = SUCCESS;
2110 end:
2111     PyMem_Free(index_map);
2112     PyMem_Free(reverse_index_map);
2113     return err;
2114 }
2115 
2116 
2117 
2118 static int
add_checks_for_loads_of_uninitialized_variables(basicblock * entryblock,int nlocals,int nparams)2119 add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
2120                                                 int nlocals,
2121                                                 int nparams)
2122 {
2123     if (nlocals == 0) {
2124         return SUCCESS;
2125     }
2126     if (nlocals > 64) {
2127         // To avoid O(nlocals**2) compilation, locals beyond the first
2128         // 64 are only analyzed one basicblock at a time: initialization
2129         // info is not passed between basicblocks.
2130         if (fast_scan_many_locals(entryblock, nlocals) < 0) {
2131             return ERROR;
2132         }
2133         nlocals = 64;
2134     }
2135     basicblock **stack = make_cfg_traversal_stack(entryblock);
2136     if (stack == NULL) {
2137         return ERROR;
2138     }
2139     basicblock **sp = stack;
2140 
2141     // First origin of being uninitialized:
2142     // The non-parameter locals in the entry block.
2143     uint64_t start_mask = 0;
2144     for (int i = nparams; i < nlocals; i++) {
2145         start_mask |= (uint64_t)1 << i;
2146     }
2147     maybe_push(entryblock, start_mask, &sp);
2148 
2149     // Second origin of being uninitialized:
2150     // There could be DELETE_FAST somewhere, so
2151     // be sure to scan each basicblock at least once.
2152     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2153         scan_block_for_locals(b, &sp);
2154     }
2155     // Now propagate the uncertainty from the origins we found: Use
2156     // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
2157     while (sp > stack) {
2158         basicblock *b = *--sp;
2159         // mark as no longer on stack
2160         b->b_visited = 0;
2161         scan_block_for_locals(b, &sp);
2162     }
2163     PyMem_Free(stack);
2164     return SUCCESS;
2165 }
2166 
2167 
2168 static int
mark_warm(basicblock * entryblock)2169 mark_warm(basicblock *entryblock) {
2170     basicblock **stack = make_cfg_traversal_stack(entryblock);
2171     if (stack == NULL) {
2172         return ERROR;
2173     }
2174     basicblock **sp = stack;
2175 
2176     *sp++ = entryblock;
2177     entryblock->b_visited = 1;
2178     while (sp > stack) {
2179         basicblock *b = *(--sp);
2180         assert(!b->b_except_handler);
2181         b->b_warm = 1;
2182         basicblock *next = b->b_next;
2183         if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
2184             *sp++ = next;
2185             next->b_visited = 1;
2186         }
2187         for (int i=0; i < b->b_iused; i++) {
2188             cfg_instr *instr = &b->b_instr[i];
2189             if (is_jump(instr) && !instr->i_target->b_visited) {
2190                 *sp++ = instr->i_target;
2191                 instr->i_target->b_visited = 1;
2192             }
2193         }
2194     }
2195     PyMem_Free(stack);
2196     return SUCCESS;
2197 }
2198 
2199 static int
mark_cold(basicblock * entryblock)2200 mark_cold(basicblock *entryblock) {
2201     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2202         assert(!b->b_cold && !b->b_warm);
2203     }
2204     if (mark_warm(entryblock) < 0) {
2205         return ERROR;
2206     }
2207 
2208     basicblock **stack = make_cfg_traversal_stack(entryblock);
2209     if (stack == NULL) {
2210         return ERROR;
2211     }
2212 
2213     basicblock **sp = stack;
2214     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2215         if (b->b_except_handler) {
2216             assert(!b->b_warm);
2217             *sp++ = b;
2218             b->b_visited = 1;
2219         }
2220     }
2221 
2222     while (sp > stack) {
2223         basicblock *b = *(--sp);
2224         b->b_cold = 1;
2225         basicblock *next = b->b_next;
2226         if (next && BB_HAS_FALLTHROUGH(b)) {
2227             if (!next->b_warm && !next->b_visited) {
2228                 *sp++ = next;
2229                 next->b_visited = 1;
2230             }
2231         }
2232         for (int i = 0; i < b->b_iused; i++) {
2233             cfg_instr *instr = &b->b_instr[i];
2234             if (is_jump(instr)) {
2235                 assert(i == b->b_iused - 1);
2236                 basicblock *target = b->b_instr[i].i_target;
2237                 if (!target->b_warm && !target->b_visited) {
2238                     *sp++ = target;
2239                     target->b_visited = 1;
2240                 }
2241             }
2242         }
2243     }
2244     PyMem_Free(stack);
2245     return SUCCESS;
2246 }
2247 
2248 
2249 static int
push_cold_blocks_to_end(cfg_builder * g)2250 push_cold_blocks_to_end(cfg_builder *g) {
2251     basicblock *entryblock = g->g_entryblock;
2252     if (entryblock->b_next == NULL) {
2253         /* single basicblock, no need to reorder */
2254         return SUCCESS;
2255     }
2256     RETURN_IF_ERROR(mark_cold(entryblock));
2257 
2258     int next_lbl = get_max_label(g->g_entryblock) + 1;
2259 
2260     /* If we have a cold block with fallthrough to a warm block, add */
2261     /* an explicit jump instead of fallthrough */
2262     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2263         if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
2264             basicblock *explicit_jump = cfg_builder_new_block(g);
2265             if (explicit_jump == NULL) {
2266                 return ERROR;
2267             }
2268             if (!IS_LABEL(b->b_next->b_label)) {
2269                 b->b_next->b_label.id = next_lbl++;
2270             }
2271             basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
2272                              NO_LOCATION);
2273             explicit_jump->b_cold = 1;
2274             explicit_jump->b_next = b->b_next;
2275             explicit_jump->b_predecessors = 1;
2276             b->b_next = explicit_jump;
2277 
2278             /* set target */
2279             cfg_instr *last = basicblock_last_instr(explicit_jump);
2280             last->i_target = explicit_jump->b_next;
2281         }
2282     }
2283 
2284     assert(!entryblock->b_cold);  /* First block can't be cold */
2285     basicblock *cold_blocks = NULL;
2286     basicblock *cold_blocks_tail = NULL;
2287 
2288     basicblock *b = entryblock;
2289     while(b->b_next) {
2290         assert(!b->b_cold);
2291         while (b->b_next && !b->b_next->b_cold) {
2292             b = b->b_next;
2293         }
2294         if (b->b_next == NULL) {
2295             /* no more cold blocks */
2296             break;
2297         }
2298 
2299         /* b->b_next is the beginning of a cold streak */
2300         assert(!b->b_cold && b->b_next->b_cold);
2301 
2302         basicblock *b_end = b->b_next;
2303         while (b_end->b_next && b_end->b_next->b_cold) {
2304             b_end = b_end->b_next;
2305         }
2306 
2307         /* b_end is the end of the cold streak */
2308         assert(b_end && b_end->b_cold);
2309         assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
2310 
2311         if (cold_blocks == NULL) {
2312             cold_blocks = b->b_next;
2313         }
2314         else {
2315             cold_blocks_tail->b_next = b->b_next;
2316         }
2317         cold_blocks_tail = b_end;
2318         b->b_next = b_end->b_next;
2319         b_end->b_next = NULL;
2320     }
2321     assert(b != NULL && b->b_next == NULL);
2322     b->b_next = cold_blocks;
2323 
2324     if (cold_blocks != NULL) {
2325         RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2326     }
2327     return SUCCESS;
2328 }
2329 
2330 static int
convert_pseudo_ops(cfg_builder * g)2331 convert_pseudo_ops(cfg_builder *g)
2332 {
2333     basicblock *entryblock = g->g_entryblock;
2334     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2335         for (int i = 0; i < b->b_iused; i++) {
2336             cfg_instr *instr = &b->b_instr[i];
2337             if (is_block_push(instr)) {
2338                 INSTR_SET_OP0(instr, NOP);
2339             }
2340             else if (instr->i_opcode == LOAD_CLOSURE) {
2341                 assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
2342                 instr->i_opcode = LOAD_FAST;
2343             }
2344             else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
2345                 assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
2346                 instr->i_opcode = STORE_FAST;
2347             }
2348         }
2349     }
2350     return remove_redundant_nops_and_jumps(g);
2351 }
2352 
2353 static inline bool
is_exit_or_eval_check_without_lineno(basicblock * b)2354 is_exit_or_eval_check_without_lineno(basicblock *b) {
2355     if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
2356         return basicblock_has_no_lineno(b);
2357     }
2358     else {
2359         return false;
2360     }
2361 }
2362 
2363 
2364 /* PEP 626 mandates that the f_lineno of a frame is correct
2365  * after a frame terminates. It would be prohibitively expensive
2366  * to continuously update the f_lineno field at runtime,
2367  * so we make sure that all exiting instruction (raises and returns)
2368  * have a valid line number, allowing us to compute f_lineno lazily.
2369  * We can do this by duplicating the exit blocks without line number
2370  * so that none have more than one predecessor. We can then safely
2371  * copy the line number from the sole predecessor block.
2372  */
2373 static int
duplicate_exits_without_lineno(cfg_builder * g)2374 duplicate_exits_without_lineno(cfg_builder *g)
2375 {
2376     int next_lbl = get_max_label(g->g_entryblock) + 1;
2377 
2378     /* Copy all exit blocks without line number that are targets of a jump.
2379      */
2380     basicblock *entryblock = g->g_entryblock;
2381     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2382         cfg_instr *last = basicblock_last_instr(b);
2383         if (last == NULL) {
2384             continue;
2385         }
2386         if (is_jump(last)) {
2387             basicblock *target = next_nonempty_block(last->i_target);
2388             if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
2389                 basicblock *new_target = copy_basicblock(g, target);
2390                 if (new_target == NULL) {
2391                     return ERROR;
2392                 }
2393                 new_target->b_instr[0].i_loc = last->i_loc;
2394                 last->i_target = new_target;
2395                 target->b_predecessors--;
2396                 new_target->b_predecessors = 1;
2397                 new_target->b_next = target->b_next;
2398                 new_target->b_label.id = next_lbl++;
2399                 target->b_next = new_target;
2400             }
2401         }
2402     }
2403 
2404     /* Any remaining reachable exit blocks without line number can only be reached by
2405      * fall through, and thus can only have a single predecessor */
2406     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2407         if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
2408             if (is_exit_or_eval_check_without_lineno(b->b_next)) {
2409                 cfg_instr *last = basicblock_last_instr(b);
2410                 assert(last != NULL);
2411                 b->b_next->b_instr[0].i_loc = last->i_loc;
2412             }
2413         }
2414     }
2415     return SUCCESS;
2416 }
2417 
2418 
2419 /* If an instruction has no line number, but it's predecessor in the BB does,
2420  * then copy the line number. If a successor block has no line number, and only
2421  * one predecessor, then inherit the line number.
2422  * This ensures that all exit blocks (with one predecessor) receive a line number.
2423  * Also reduces the size of the line number table,
2424  * but has no impact on the generated line number events.
2425  */
2426 static void
propagate_line_numbers(basicblock * entryblock)2427 propagate_line_numbers(basicblock *entryblock) {
2428     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2429         cfg_instr *last = basicblock_last_instr(b);
2430         if (last == NULL) {
2431             continue;
2432         }
2433 
2434         location prev_location = NO_LOCATION;
2435         for (int i = 0; i < b->b_iused; i++) {
2436             if (b->b_instr[i].i_loc.lineno < 0) {
2437                 b->b_instr[i].i_loc = prev_location;
2438             }
2439             else {
2440                 prev_location = b->b_instr[i].i_loc;
2441             }
2442         }
2443         if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
2444             if (b->b_next->b_iused > 0) {
2445                 if (b->b_next->b_instr[0].i_loc.lineno < 0) {
2446                     b->b_next->b_instr[0].i_loc = prev_location;
2447                 }
2448             }
2449         }
2450         if (is_jump(last)) {
2451             basicblock *target = last->i_target;
2452             if (target->b_predecessors == 1) {
2453                 if (target->b_instr[0].i_loc.lineno < 0) {
2454                     target->b_instr[0].i_loc = prev_location;
2455                 }
2456             }
2457         }
2458     }
2459 }
2460 
2461 static int
resolve_line_numbers(cfg_builder * g,int firstlineno)2462 resolve_line_numbers(cfg_builder *g, int firstlineno)
2463 {
2464     RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
2465     propagate_line_numbers(g->g_entryblock);
2466     return SUCCESS;
2467 }
2468 
2469 int
_PyCfg_OptimizeCodeUnit(cfg_builder * g,PyObject * consts,PyObject * const_cache,int nlocals,int nparams,int firstlineno)2470 _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
2471                         int nlocals, int nparams, int firstlineno)
2472 {
2473     assert(cfg_builder_check(g));
2474     /** Preprocessing **/
2475     /* Map labels to targets and mark exception handlers */
2476     RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
2477     RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
2478     RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
2479 
2480     /** Optimization **/
2481     RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
2482     RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
2483     RETURN_IF_ERROR(
2484         add_checks_for_loads_of_uninitialized_variables(
2485             g->g_entryblock, nlocals, nparams));
2486     RETURN_IF_ERROR(insert_superinstructions(g));
2487 
2488     RETURN_IF_ERROR(push_cold_blocks_to_end(g));
2489     RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2490     return SUCCESS;
2491 }
2492 
2493 static int *
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata * umd)2494 build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
2495 {
2496     int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
2497     int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
2498     int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
2499 
2500     int noffsets = ncellvars + nfreevars;
2501     int *fixed = PyMem_New(int, noffsets);
2502     if (fixed == NULL) {
2503         PyErr_NoMemory();
2504         return NULL;
2505     }
2506     for (int i = 0; i < noffsets; i++) {
2507         fixed[i] = nlocals + i;
2508     }
2509 
2510     PyObject *varname, *cellindex;
2511     Py_ssize_t pos = 0;
2512     while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
2513         PyObject *varindex;
2514         if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
2515             goto error;
2516         }
2517         if (varindex == NULL) {
2518             continue;
2519         }
2520 
2521         int argoffset = PyLong_AsInt(varindex);
2522         Py_DECREF(varindex);
2523         if (argoffset == -1 && PyErr_Occurred()) {
2524             goto error;
2525         }
2526 
2527         int oldindex = PyLong_AsInt(cellindex);
2528         if (oldindex == -1 && PyErr_Occurred()) {
2529             goto error;
2530         }
2531         fixed[oldindex] = argoffset;
2532     }
2533     return fixed;
2534 
2535 error:
2536     PyMem_Free(fixed);
2537     return NULL;
2538 }
2539 
2540 #define IS_GENERATOR(CF) \
2541     ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
2542 
2543 static int
insert_prefix_instructions(_PyCompile_CodeUnitMetadata * umd,basicblock * entryblock,int * fixed,int nfreevars,int code_flags)2544 insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
2545                            int *fixed, int nfreevars, int code_flags)
2546 {
2547     assert(umd->u_firstlineno > 0);
2548 
2549     /* Add the generator prefix instructions. */
2550     if (IS_GENERATOR(code_flags)) {
2551         /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
2552          * of 0. This is because RETURN_GENERATOR pushes an element
2553          * with _PyFrame_StackPush before switching stacks.
2554          */
2555 
2556         location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
2557         cfg_instr make_gen = {
2558             .i_opcode = RETURN_GENERATOR,
2559             .i_oparg = 0,
2560             .i_loc = loc,
2561             .i_target = NULL,
2562         };
2563         RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
2564         cfg_instr pop_top = {
2565             .i_opcode = POP_TOP,
2566             .i_oparg = 0,
2567             .i_loc = loc,
2568             .i_target = NULL,
2569         };
2570         RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
2571     }
2572 
2573     /* Set up cells for any variable that escapes, to be put in a closure. */
2574     const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
2575     if (ncellvars) {
2576         // umd->u_cellvars has the cells out of order so we sort them
2577         // before adding the MAKE_CELL instructions.  Note that we
2578         // adjust for arg cells, which come first.
2579         const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
2580         int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
2581         if (sorted == NULL) {
2582             PyErr_NoMemory();
2583             return ERROR;
2584         }
2585         for (int i = 0; i < ncellvars; i++) {
2586             sorted[fixed[i]] = i + 1;
2587         }
2588         for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
2589             int oldindex = sorted[i] - 1;
2590             if (oldindex == -1) {
2591                 continue;
2592             }
2593             cfg_instr make_cell = {
2594                 .i_opcode = MAKE_CELL,
2595                 // This will get fixed in offset_derefs().
2596                 .i_oparg = oldindex,
2597                 .i_loc = NO_LOCATION,
2598                 .i_target = NULL,
2599             };
2600             if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
2601                 PyMem_RawFree(sorted);
2602                 return ERROR;
2603             }
2604             ncellsused += 1;
2605         }
2606         PyMem_RawFree(sorted);
2607     }
2608 
2609     if (nfreevars) {
2610         cfg_instr copy_frees = {
2611             .i_opcode = COPY_FREE_VARS,
2612             .i_oparg = nfreevars,
2613             .i_loc = NO_LOCATION,
2614             .i_target = NULL,
2615         };
2616         RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
2617     }
2618 
2619     return SUCCESS;
2620 }
2621 
2622 static int
fix_cell_offsets(_PyCompile_CodeUnitMetadata * umd,basicblock * entryblock,int * fixedmap)2623 fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
2624 {
2625     int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
2626     int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
2627     int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
2628     int noffsets = ncellvars + nfreevars;
2629 
2630     // First deal with duplicates (arg cells).
2631     int numdropped = 0;
2632     for (int i = 0; i < noffsets ; i++) {
2633         if (fixedmap[i] == i + nlocals) {
2634             fixedmap[i] -= numdropped;
2635         }
2636         else {
2637             // It was a duplicate (cell/arg).
2638             numdropped += 1;
2639         }
2640     }
2641 
2642     // Then update offsets, either relative to locals or by cell2arg.
2643     for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2644         for (int i = 0; i < b->b_iused; i++) {
2645             cfg_instr *inst = &b->b_instr[i];
2646             // This is called before extended args are generated.
2647             assert(inst->i_opcode != EXTENDED_ARG);
2648             int oldoffset = inst->i_oparg;
2649             switch(inst->i_opcode) {
2650                 case MAKE_CELL:
2651                 case LOAD_CLOSURE:
2652                 case LOAD_DEREF:
2653                 case STORE_DEREF:
2654                 case DELETE_DEREF:
2655                 case LOAD_FROM_DICT_OR_DEREF:
2656                     assert(oldoffset >= 0);
2657                     assert(oldoffset < noffsets);
2658                     assert(fixedmap[oldoffset] >= 0);
2659                     inst->i_oparg = fixedmap[oldoffset];
2660             }
2661         }
2662     }
2663 
2664     return numdropped;
2665 }
2666 
2667 static int
prepare_localsplus(_PyCompile_CodeUnitMetadata * umd,cfg_builder * g,int code_flags)2668 prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
2669 {
2670     assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
2671     assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
2672     assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
2673     int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
2674     int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
2675     int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
2676     assert(INT_MAX - nlocals - ncellvars > 0);
2677     assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
2678     int nlocalsplus = nlocals + ncellvars + nfreevars;
2679     int* cellfixedoffsets = build_cellfixedoffsets(umd);
2680     if (cellfixedoffsets == NULL) {
2681         return ERROR;
2682     }
2683 
2684     // This must be called before fix_cell_offsets().
2685     if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
2686         PyMem_Free(cellfixedoffsets);
2687         return ERROR;
2688     }
2689 
2690     int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
2691     PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
2692     cellfixedoffsets = NULL;
2693     if (numdropped < 0) {
2694         return ERROR;
2695     }
2696 
2697     nlocalsplus -= numdropped;
2698     return nlocalsplus;
2699 }
2700 
2701 int
_PyCfg_ToInstructionSequence(cfg_builder * g,_PyInstructionSequence * seq)2702 _PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
2703 {
2704     int lbl = 0;
2705     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2706         b->b_label = (jump_target_label){lbl};
2707         lbl += 1;
2708     }
2709     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2710         RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
2711         for (int i = 0; i < b->b_iused; i++) {
2712             cfg_instr *instr = &b->b_instr[i];
2713             if (HAS_TARGET(instr->i_opcode)) {
2714                 /* Set oparg to the label id (it will later be mapped to an offset) */
2715                 instr->i_oparg = instr->i_target->b_label.id;
2716             }
2717             RETURN_IF_ERROR(
2718                 _PyInstructionSequence_Addop(
2719                     seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
2720 
2721             _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
2722             if (instr->i_except != NULL) {
2723                 hi->h_label = instr->i_except->b_label.id;
2724                 hi->h_startdepth = instr->i_except->b_startdepth;
2725                 hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
2726             }
2727             else {
2728                 hi->h_label = -1;
2729             }
2730         }
2731     }
2732     return SUCCESS;
2733 }
2734 
2735 
2736 int
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder * g,_PyCompile_CodeUnitMetadata * umd,int code_flags,int * stackdepth,int * nlocalsplus,_PyInstructionSequence * seq)2737 _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
2738                                      _PyCompile_CodeUnitMetadata *umd, int code_flags,
2739                                      int *stackdepth, int *nlocalsplus,
2740                                      _PyInstructionSequence *seq)
2741 {
2742     *stackdepth = calculate_stackdepth(g);
2743     if (*stackdepth < 0) {
2744         return ERROR;
2745     }
2746 
2747     /* prepare_localsplus adds instructions for generators that push
2748      * and pop an item on the stack. This assertion makes sure there
2749      * is space on the stack for that.
2750      * It should always be true, because a generator must have at
2751      * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
2752      * which requires stackspace.
2753      */
2754     assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
2755 
2756     *nlocalsplus = prepare_localsplus(umd, g, code_flags);
2757     if (*nlocalsplus < 0) {
2758         return ERROR;
2759     }
2760 
2761     RETURN_IF_ERROR(convert_pseudo_ops(g));
2762 
2763     /* Order of basic blocks must have been determined by now */
2764 
2765     RETURN_IF_ERROR(normalize_jumps(g));
2766     assert(no_redundant_jumps(g));
2767 
2768     /* Can't modify the bytecode after computing jump offsets. */
2769     if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
2770         return ERROR;
2771     }
2772 
2773     return SUCCESS;
2774 }
2775 
2776 /* This is used by _PyCompile_Assemble to fill in the jump and exception
2777  * targets in a synthetic CFG (which is not the ouptut of the builtin compiler).
2778  */
2779 int
_PyCfg_JumpLabelsToTargets(cfg_builder * g)2780 _PyCfg_JumpLabelsToTargets(cfg_builder *g)
2781 {
2782     RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
2783     RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
2784     return SUCCESS;
2785 }
2786