• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file compiles an abstract syntax tree (AST) into Python bytecode.
3  *
4  * The primary entry point is _PyAST_Compile(), which returns a
5  * PyCodeObject.  The compiler makes several passes to build the code
6  * object:
7  *   1. Checks for future statements.  See future.c
8  *   2. Builds a symbol table.  See symtable.c.
9  *   3. Generate code for basic blocks.  See compiler_mod() in this file.
10  *   4. Assemble the basic blocks into final code.  See assemble() in
11  *      this file.
12  *   5. Optimize the byte code (peephole optimizations).
13  *
14  * Note that compiler_mod() suggests module, but the module ast type
15  * (mod_ty) has cases for expressions and interactive statements.
16  *
17  * CAUTION: The VISIT_* macros abort the current function when they
18  * encounter a problem. So don't invoke them when there is memory
19  * which needs to be released. Code blocks are OK, as the compiler
20  * structure takes care of releasing those.  Use the arena to manage
21  * objects.
22  */
23 
24 #include <stdbool.h>
25 
26 #include "Python.h"
27 #include "pycore_ast.h"           // _PyAST_GetDocString()
28 #include "pycore_compile.h"       // _PyFuture_FromAST()
29 #include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
30 #include "pycore_long.h"          // _PyLong_GetZero()
31 #include "pycore_symtable.h"      // PySTEntryObject
32 
33 #define NEED_OPCODE_JUMP_TABLES
34 #include "opcode.h"               // EXTENDED_ARG
35 #include "wordcode_helpers.h"     // instrsize()
36 
37 
38 #define DEFAULT_BLOCK_SIZE 16
39 #define DEFAULT_BLOCKS 8
40 #define DEFAULT_CODE_SIZE 128
41 #define DEFAULT_LNOTAB_SIZE 16
42 
43 #define COMP_GENEXP   0
44 #define COMP_LISTCOMP 1
45 #define COMP_SETCOMP  2
46 #define COMP_DICTCOMP 3
47 
48 /* A soft limit for stack use, to avoid excessive
49  * memory use for large constants, etc.
50  *
51  * The value 30 is plucked out of thin air.
52  * Code that could use more stack than this is
53  * rare, so the exact value is unimportant.
54  */
55 #define STACK_USE_GUIDELINE 30
56 
57 /* If we exceed this limit, it should
58  * be considered a compiler bug.
59  * Currently it should be impossible
60  * to exceed STACK_USE_GUIDELINE * 100,
61  * as 100 is the maximum parse depth.
62  * For performance reasons we will
63  * want to reduce this to a
64  * few hundred in the future.
65  *
66  * NOTE: Whatever MAX_ALLOWED_STACK_USE is
67  * set to, it should never restrict what Python
68  * we can write, just how we compile it.
69  */
70 #define MAX_ALLOWED_STACK_USE (STACK_USE_GUIDELINE * 100)
71 
72 #define IS_TOP_LEVEL_AWAIT(c) ( \
73         (c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \
74         && (c->u->u_ste->ste_type == ModuleBlock))
75 
76 struct instr {
77     unsigned char i_opcode;
78     int i_oparg;
79     struct basicblock_ *i_target; /* target block (if jump instruction) */
80     int i_lineno;
81 };
82 
83 #define LOG_BITS_PER_INT 5
84 #define MASK_LOW_LOG_BITS 31
85 
86 static inline int
is_bit_set_in_table(uint32_t * table,int bitindex)87 is_bit_set_in_table(uint32_t *table, int bitindex) {
88     /* Is the relevant bit set in the relevant word? */
89     /* 256 bits fit into 8 32-bits words.
90      * Word is indexed by (bitindex>>ln(size of int in bits)).
91      * Bit within word is the low bits of bitindex.
92      */
93     uint32_t word = table[bitindex >> LOG_BITS_PER_INT];
94     return (word >> (bitindex & MASK_LOW_LOG_BITS)) & 1;
95 }
96 
97 static inline int
is_relative_jump(struct instr * i)98 is_relative_jump(struct instr *i)
99 {
100     return is_bit_set_in_table(_PyOpcode_RelativeJump, i->i_opcode);
101 }
102 
103 static inline int
is_jump(struct instr * i)104 is_jump(struct instr *i)
105 {
106     return is_bit_set_in_table(_PyOpcode_Jump, i->i_opcode);
107 }
108 
109 typedef struct basicblock_ {
110     /* Each basicblock in a compilation unit is linked via b_list in the
111        reverse order that the block are allocated.  b_list points to the next
112        block, not to be confused with b_next, which is next by control flow. */
113     struct basicblock_ *b_list;
114     /* number of instructions used */
115     int b_iused;
116     /* length of instruction array (b_instr) */
117     int b_ialloc;
118     /* pointer to an array of instructions, initially NULL */
119     struct instr *b_instr;
120     /* If b_next is non-NULL, it is a pointer to the next
121        block reached by normal control flow. */
122     struct basicblock_ *b_next;
123     /* b_return is true if a RETURN_VALUE opcode is inserted. */
124     unsigned b_return : 1;
125     /* Number of predecssors that a block has. */
126     int b_predecessors;
127     /* Basic block has no fall through (it ends with a return, raise or jump) */
128     unsigned b_nofallthrough : 1;
129     /* Basic block exits scope (it ends with a return or raise) */
130     unsigned b_exit : 1;
131     /* Used by compiler passes to mark whether they have visited a basic block. */
132     unsigned b_visited : 1;
133     /* depth of stack upon entry of block, computed by stackdepth() */
134     int b_startdepth;
135     /* instruction offset for block, computed by assemble_jump_offsets() */
136     int b_offset;
137 } basicblock;
138 
139 /* fblockinfo tracks the current frame block.
140 
141 A frame block is used to handle loops, try/except, and try/finally.
142 It's called a frame block to distinguish it from a basic block in the
143 compiler IR.
144 */
145 
146 enum fblocktype { WHILE_LOOP, FOR_LOOP, TRY_EXCEPT, FINALLY_TRY, FINALLY_END,
147                   WITH, ASYNC_WITH, HANDLER_CLEANUP, POP_VALUE, EXCEPTION_HANDLER,
148                   ASYNC_COMPREHENSION_GENERATOR };
149 
150 struct fblockinfo {
151     enum fblocktype fb_type;
152     basicblock *fb_block;
153     /* (optional) type-specific exit or cleanup block */
154     basicblock *fb_exit;
155     /* (optional) additional information required for unwinding */
156     void *fb_datum;
157 };
158 
159 enum {
160     COMPILER_SCOPE_MODULE,
161     COMPILER_SCOPE_CLASS,
162     COMPILER_SCOPE_FUNCTION,
163     COMPILER_SCOPE_ASYNC_FUNCTION,
164     COMPILER_SCOPE_LAMBDA,
165     COMPILER_SCOPE_COMPREHENSION,
166 };
167 
168 /* The following items change on entry and exit of code blocks.
169    They must be saved and restored when returning to a block.
170 */
171 struct compiler_unit {
172     PySTEntryObject *u_ste;
173 
174     PyObject *u_name;
175     PyObject *u_qualname;  /* dot-separated qualified name (lazy) */
176     int u_scope_type;
177 
178     /* The following fields are dicts that map objects to
179        the index of them in co_XXX.      The index is used as
180        the argument for opcodes that refer to those collections.
181     */
182     PyObject *u_consts;    /* all constants */
183     PyObject *u_names;     /* all names */
184     PyObject *u_varnames;  /* local variables */
185     PyObject *u_cellvars;  /* cell variables */
186     PyObject *u_freevars;  /* free variables */
187 
188     PyObject *u_private;        /* for private name mangling */
189 
190     Py_ssize_t u_argcount;        /* number of arguments for block */
191     Py_ssize_t u_posonlyargcount;        /* number of positional only arguments for block */
192     Py_ssize_t u_kwonlyargcount; /* number of keyword only arguments for block */
193     /* Pointer to the most recently allocated block.  By following b_list
194        members, you can reach all early allocated blocks. */
195     basicblock *u_blocks;
196     basicblock *u_curblock; /* pointer to current block */
197 
198     int u_nfblocks;
199     struct fblockinfo u_fblock[CO_MAXBLOCKS];
200 
201     int u_firstlineno; /* the first lineno of the block */
202     int u_lineno;          /* the lineno for the current stmt */
203     int u_col_offset;      /* the offset of the current stmt */
204     int u_end_lineno;      /* the end line of the current stmt */
205     int u_end_col_offset;  /* the end offset of the current stmt */
206 };
207 
208 /* This struct captures the global state of a compilation.
209 
210 The u pointer points to the current compilation unit, while units
211 for enclosing blocks are stored in c_stack.     The u and c_stack are
212 managed by compiler_enter_scope() and compiler_exit_scope().
213 
214 Note that we don't track recursion levels during compilation - the
215 task of detecting and rejecting excessive levels of nesting is
216 handled by the symbol analysis pass.
217 
218 */
219 
220 struct compiler {
221     PyObject *c_filename;
222     struct symtable *c_st;
223     PyFutureFeatures *c_future; /* pointer to module's __future__ */
224     PyCompilerFlags *c_flags;
225 
226     int c_optimize;              /* optimization level */
227     int c_interactive;           /* true if in interactive mode */
228     int c_nestlevel;
229     PyObject *c_const_cache;     /* Python dict holding all constants,
230                                     including names tuple */
231     struct compiler_unit *u; /* compiler state for current block */
232     PyObject *c_stack;           /* Python list holding compiler_unit ptrs */
233     PyArena *c_arena;            /* pointer to memory allocation arena */
234 };
235 
236 typedef struct {
237     // A list of strings corresponding to name captures. It is used to track:
238     // - Repeated name assignments in the same pattern.
239     // - Different name assignments in alternatives.
240     // - The order of name assignments in alternatives.
241     PyObject *stores;
242     // If 0, any name captures against our subject will raise.
243     int allow_irrefutable;
244     // An array of blocks to jump to on failure. Jumping to fail_pop[i] will pop
245     // i items off of the stack. The end result looks like this (with each block
246     // falling through to the next):
247     // fail_pop[4]: POP_TOP
248     // fail_pop[3]: POP_TOP
249     // fail_pop[2]: POP_TOP
250     // fail_pop[1]: POP_TOP
251     // fail_pop[0]: NOP
252     basicblock **fail_pop;
253     // The current length of fail_pop.
254     Py_ssize_t fail_pop_size;
255     // The number of items on top of the stack that need to *stay* on top of the
256     // stack. Variable captures go beneath these. All of them will be popped on
257     // failure.
258     Py_ssize_t on_top;
259 } pattern_context;
260 
261 static int compiler_enter_scope(struct compiler *, identifier, int, void *, int);
262 static void compiler_free(struct compiler *);
263 static basicblock *compiler_new_block(struct compiler *);
264 static int compiler_next_instr(basicblock *);
265 static int compiler_addop(struct compiler *, int);
266 static int compiler_addop_i(struct compiler *, int, Py_ssize_t);
267 static int compiler_addop_j(struct compiler *, int, basicblock *);
268 static int compiler_addop_j_noline(struct compiler *, int, basicblock *);
269 static int compiler_error(struct compiler *, const char *, ...);
270 static int compiler_warn(struct compiler *, const char *, ...);
271 static int compiler_nameop(struct compiler *, identifier, expr_context_ty);
272 
273 static PyCodeObject *compiler_mod(struct compiler *, mod_ty);
274 static int compiler_visit_stmt(struct compiler *, stmt_ty);
275 static int compiler_visit_keyword(struct compiler *, keyword_ty);
276 static int compiler_visit_expr(struct compiler *, expr_ty);
277 static int compiler_augassign(struct compiler *, stmt_ty);
278 static int compiler_annassign(struct compiler *, stmt_ty);
279 static int compiler_subscript(struct compiler *, expr_ty);
280 static int compiler_slice(struct compiler *, expr_ty);
281 
282 static int inplace_binop(operator_ty);
283 static int are_all_items_const(asdl_expr_seq *, Py_ssize_t, Py_ssize_t);
284 
285 
286 static int compiler_with(struct compiler *, stmt_ty, int);
287 static int compiler_async_with(struct compiler *, stmt_ty, int);
288 static int compiler_async_for(struct compiler *, stmt_ty);
289 static int compiler_call_helper(struct compiler *c, int n,
290                                 asdl_expr_seq *args,
291                                 asdl_keyword_seq *keywords);
292 static int compiler_try_except(struct compiler *, stmt_ty);
293 static int compiler_set_qualname(struct compiler *);
294 
295 static int compiler_sync_comprehension_generator(
296                                       struct compiler *c,
297                                       asdl_comprehension_seq *generators, int gen_index,
298                                       int depth,
299                                       expr_ty elt, expr_ty val, int type);
300 
301 static int compiler_async_comprehension_generator(
302                                       struct compiler *c,
303                                       asdl_comprehension_seq *generators, int gen_index,
304                                       int depth,
305                                       expr_ty elt, expr_ty val, int type);
306 
307 static int compiler_pattern(struct compiler *, pattern_ty, pattern_context *);
308 static int compiler_match(struct compiler *, stmt_ty);
309 static int compiler_pattern_subpattern(struct compiler *, pattern_ty,
310                                        pattern_context *);
311 
312 static PyCodeObject *assemble(struct compiler *, int addNone);
313 static PyObject *__doc__, *__annotations__;
314 
315 #define CAPSULE_NAME "compile.c compiler unit"
316 
317 PyObject *
_Py_Mangle(PyObject * privateobj,PyObject * ident)318 _Py_Mangle(PyObject *privateobj, PyObject *ident)
319 {
320     /* Name mangling: __private becomes _classname__private.
321        This is independent from how the name is used. */
322     PyObject *result;
323     size_t nlen, plen, ipriv;
324     Py_UCS4 maxchar;
325     if (privateobj == NULL || !PyUnicode_Check(privateobj) ||
326         PyUnicode_READ_CHAR(ident, 0) != '_' ||
327         PyUnicode_READ_CHAR(ident, 1) != '_') {
328         Py_INCREF(ident);
329         return ident;
330     }
331     nlen = PyUnicode_GET_LENGTH(ident);
332     plen = PyUnicode_GET_LENGTH(privateobj);
333     /* Don't mangle __id__ or names with dots.
334 
335        The only time a name with a dot can occur is when
336        we are compiling an import statement that has a
337        package name.
338 
339        TODO(jhylton): Decide whether we want to support
340        mangling of the module name, e.g. __M.X.
341     */
342     if ((PyUnicode_READ_CHAR(ident, nlen-1) == '_' &&
343          PyUnicode_READ_CHAR(ident, nlen-2) == '_') ||
344         PyUnicode_FindChar(ident, '.', 0, nlen, 1) != -1) {
345         Py_INCREF(ident);
346         return ident; /* Don't mangle __whatever__ */
347     }
348     /* Strip leading underscores from class name */
349     ipriv = 0;
350     while (PyUnicode_READ_CHAR(privateobj, ipriv) == '_')
351         ipriv++;
352     if (ipriv == plen) {
353         Py_INCREF(ident);
354         return ident; /* Don't mangle if class is just underscores */
355     }
356     plen -= ipriv;
357 
358     if (plen + nlen >= PY_SSIZE_T_MAX - 1) {
359         PyErr_SetString(PyExc_OverflowError,
360                         "private identifier too large to be mangled");
361         return NULL;
362     }
363 
364     maxchar = PyUnicode_MAX_CHAR_VALUE(ident);
365     if (PyUnicode_MAX_CHAR_VALUE(privateobj) > maxchar)
366         maxchar = PyUnicode_MAX_CHAR_VALUE(privateobj);
367 
368     result = PyUnicode_New(1 + nlen + plen, maxchar);
369     if (!result)
370         return 0;
371     /* ident = "_" + priv[ipriv:] + ident # i.e. 1+plen+nlen bytes */
372     PyUnicode_WRITE(PyUnicode_KIND(result), PyUnicode_DATA(result), 0, '_');
373     if (PyUnicode_CopyCharacters(result, 1, privateobj, ipriv, plen) < 0) {
374         Py_DECREF(result);
375         return NULL;
376     }
377     if (PyUnicode_CopyCharacters(result, plen+1, ident, 0, nlen) < 0) {
378         Py_DECREF(result);
379         return NULL;
380     }
381     assert(_PyUnicode_CheckConsistency(result, 1));
382     return result;
383 }
384 
385 static int
compiler_init(struct compiler * c)386 compiler_init(struct compiler *c)
387 {
388     memset(c, 0, sizeof(struct compiler));
389 
390     c->c_const_cache = PyDict_New();
391     if (!c->c_const_cache) {
392         return 0;
393     }
394 
395     c->c_stack = PyList_New(0);
396     if (!c->c_stack) {
397         Py_CLEAR(c->c_const_cache);
398         return 0;
399     }
400 
401     return 1;
402 }
403 
404 PyCodeObject *
_PyAST_Compile(mod_ty mod,PyObject * filename,PyCompilerFlags * flags,int optimize,PyArena * arena)405 _PyAST_Compile(mod_ty mod, PyObject *filename, PyCompilerFlags *flags,
406                int optimize, PyArena *arena)
407 {
408     struct compiler c;
409     PyCodeObject *co = NULL;
410     PyCompilerFlags local_flags = _PyCompilerFlags_INIT;
411     int merged;
412 
413     if (!__doc__) {
414         __doc__ = PyUnicode_InternFromString("__doc__");
415         if (!__doc__)
416             return NULL;
417     }
418     if (!__annotations__) {
419         __annotations__ = PyUnicode_InternFromString("__annotations__");
420         if (!__annotations__)
421             return NULL;
422     }
423     if (!compiler_init(&c))
424         return NULL;
425     Py_INCREF(filename);
426     c.c_filename = filename;
427     c.c_arena = arena;
428     c.c_future = _PyFuture_FromAST(mod, filename);
429     if (c.c_future == NULL)
430         goto finally;
431     if (!flags) {
432         flags = &local_flags;
433     }
434     merged = c.c_future->ff_features | flags->cf_flags;
435     c.c_future->ff_features = merged;
436     flags->cf_flags = merged;
437     c.c_flags = flags;
438     c.c_optimize = (optimize == -1) ? _Py_GetConfig()->optimization_level : optimize;
439     c.c_nestlevel = 0;
440 
441     _PyASTOptimizeState state;
442     state.optimize = c.c_optimize;
443     state.ff_features = merged;
444 
445     if (!_PyAST_Optimize(mod, arena, &state)) {
446         goto finally;
447     }
448 
449     c.c_st = _PySymtable_Build(mod, filename, c.c_future);
450     if (c.c_st == NULL) {
451         if (!PyErr_Occurred())
452             PyErr_SetString(PyExc_SystemError, "no symtable");
453         goto finally;
454     }
455 
456     co = compiler_mod(&c, mod);
457 
458  finally:
459     compiler_free(&c);
460     assert(co || PyErr_Occurred());
461     return co;
462 }
463 
464 static void
compiler_free(struct compiler * c)465 compiler_free(struct compiler *c)
466 {
467     if (c->c_st)
468         _PySymtable_Free(c->c_st);
469     if (c->c_future)
470         PyObject_Free(c->c_future);
471     Py_XDECREF(c->c_filename);
472     Py_DECREF(c->c_const_cache);
473     Py_DECREF(c->c_stack);
474 }
475 
476 static PyObject *
list2dict(PyObject * list)477 list2dict(PyObject *list)
478 {
479     Py_ssize_t i, n;
480     PyObject *v, *k;
481     PyObject *dict = PyDict_New();
482     if (!dict) return NULL;
483 
484     n = PyList_Size(list);
485     for (i = 0; i < n; i++) {
486         v = PyLong_FromSsize_t(i);
487         if (!v) {
488             Py_DECREF(dict);
489             return NULL;
490         }
491         k = PyList_GET_ITEM(list, i);
492         if (PyDict_SetItem(dict, k, v) < 0) {
493             Py_DECREF(v);
494             Py_DECREF(dict);
495             return NULL;
496         }
497         Py_DECREF(v);
498     }
499     return dict;
500 }
501 
502 /* Return new dict containing names from src that match scope(s).
503 
504 src is a symbol table dictionary.  If the scope of a name matches
505 either scope_type or flag is set, insert it into the new dict.  The
506 values are integers, starting at offset and increasing by one for
507 each key.
508 */
509 
510 static PyObject *
dictbytype(PyObject * src,int scope_type,int flag,Py_ssize_t offset)511 dictbytype(PyObject *src, int scope_type, int flag, Py_ssize_t offset)
512 {
513     Py_ssize_t i = offset, scope, num_keys, key_i;
514     PyObject *k, *v, *dest = PyDict_New();
515     PyObject *sorted_keys;
516 
517     assert(offset >= 0);
518     if (dest == NULL)
519         return NULL;
520 
521     /* Sort the keys so that we have a deterministic order on the indexes
522        saved in the returned dictionary.  These indexes are used as indexes
523        into the free and cell var storage.  Therefore if they aren't
524        deterministic, then the generated bytecode is not deterministic.
525     */
526     sorted_keys = PyDict_Keys(src);
527     if (sorted_keys == NULL)
528         return NULL;
529     if (PyList_Sort(sorted_keys) != 0) {
530         Py_DECREF(sorted_keys);
531         return NULL;
532     }
533     num_keys = PyList_GET_SIZE(sorted_keys);
534 
535     for (key_i = 0; key_i < num_keys; key_i++) {
536         /* XXX this should probably be a macro in symtable.h */
537         long vi;
538         k = PyList_GET_ITEM(sorted_keys, key_i);
539         v = PyDict_GetItemWithError(src, k);
540         assert(v && PyLong_Check(v));
541         vi = PyLong_AS_LONG(v);
542         scope = (vi >> SCOPE_OFFSET) & SCOPE_MASK;
543 
544         if (scope == scope_type || vi & flag) {
545             PyObject *item = PyLong_FromSsize_t(i);
546             if (item == NULL) {
547                 Py_DECREF(sorted_keys);
548                 Py_DECREF(dest);
549                 return NULL;
550             }
551             i++;
552             if (PyDict_SetItem(dest, k, item) < 0) {
553                 Py_DECREF(sorted_keys);
554                 Py_DECREF(item);
555                 Py_DECREF(dest);
556                 return NULL;
557             }
558             Py_DECREF(item);
559         }
560     }
561     Py_DECREF(sorted_keys);
562     return dest;
563 }
564 
565 static void
compiler_unit_check(struct compiler_unit * u)566 compiler_unit_check(struct compiler_unit *u)
567 {
568     basicblock *block;
569     for (block = u->u_blocks; block != NULL; block = block->b_list) {
570         assert(!_PyMem_IsPtrFreed(block));
571         if (block->b_instr != NULL) {
572             assert(block->b_ialloc > 0);
573             assert(block->b_iused >= 0);
574             assert(block->b_ialloc >= block->b_iused);
575         }
576         else {
577             assert (block->b_iused == 0);
578             assert (block->b_ialloc == 0);
579         }
580     }
581 }
582 
583 static void
compiler_unit_free(struct compiler_unit * u)584 compiler_unit_free(struct compiler_unit *u)
585 {
586     basicblock *b, *next;
587 
588     compiler_unit_check(u);
589     b = u->u_blocks;
590     while (b != NULL) {
591         if (b->b_instr)
592             PyObject_Free((void *)b->b_instr);
593         next = b->b_list;
594         PyObject_Free((void *)b);
595         b = next;
596     }
597     Py_CLEAR(u->u_ste);
598     Py_CLEAR(u->u_name);
599     Py_CLEAR(u->u_qualname);
600     Py_CLEAR(u->u_consts);
601     Py_CLEAR(u->u_names);
602     Py_CLEAR(u->u_varnames);
603     Py_CLEAR(u->u_freevars);
604     Py_CLEAR(u->u_cellvars);
605     Py_CLEAR(u->u_private);
606     PyObject_Free(u);
607 }
608 
609 static int
compiler_enter_scope(struct compiler * c,identifier name,int scope_type,void * key,int lineno)610 compiler_enter_scope(struct compiler *c, identifier name,
611                      int scope_type, void *key, int lineno)
612 {
613     struct compiler_unit *u;
614     basicblock *block;
615 
616     u = (struct compiler_unit *)PyObject_Calloc(1, sizeof(
617                                             struct compiler_unit));
618     if (!u) {
619         PyErr_NoMemory();
620         return 0;
621     }
622     u->u_scope_type = scope_type;
623     u->u_argcount = 0;
624     u->u_posonlyargcount = 0;
625     u->u_kwonlyargcount = 0;
626     u->u_ste = PySymtable_Lookup(c->c_st, key);
627     if (!u->u_ste) {
628         compiler_unit_free(u);
629         return 0;
630     }
631     Py_INCREF(name);
632     u->u_name = name;
633     u->u_varnames = list2dict(u->u_ste->ste_varnames);
634     u->u_cellvars = dictbytype(u->u_ste->ste_symbols, CELL, 0, 0);
635     if (!u->u_varnames || !u->u_cellvars) {
636         compiler_unit_free(u);
637         return 0;
638     }
639     if (u->u_ste->ste_needs_class_closure) {
640         /* Cook up an implicit __class__ cell. */
641         _Py_IDENTIFIER(__class__);
642         PyObject *name;
643         int res;
644         assert(u->u_scope_type == COMPILER_SCOPE_CLASS);
645         assert(PyDict_GET_SIZE(u->u_cellvars) == 0);
646         name = _PyUnicode_FromId(&PyId___class__);
647         if (!name) {
648             compiler_unit_free(u);
649             return 0;
650         }
651         res = PyDict_SetItem(u->u_cellvars, name, _PyLong_GetZero());
652         if (res < 0) {
653             compiler_unit_free(u);
654             return 0;
655         }
656     }
657 
658     u->u_freevars = dictbytype(u->u_ste->ste_symbols, FREE, DEF_FREE_CLASS,
659                                PyDict_GET_SIZE(u->u_cellvars));
660     if (!u->u_freevars) {
661         compiler_unit_free(u);
662         return 0;
663     }
664 
665     u->u_blocks = NULL;
666     u->u_nfblocks = 0;
667     u->u_firstlineno = lineno;
668     u->u_lineno = 0;
669     u->u_col_offset = 0;
670     u->u_end_lineno = 0;
671     u->u_end_col_offset = 0;
672     u->u_consts = PyDict_New();
673     if (!u->u_consts) {
674         compiler_unit_free(u);
675         return 0;
676     }
677     u->u_names = PyDict_New();
678     if (!u->u_names) {
679         compiler_unit_free(u);
680         return 0;
681     }
682 
683     u->u_private = NULL;
684 
685     /* Push the old compiler_unit on the stack. */
686     if (c->u) {
687         PyObject *capsule = PyCapsule_New(c->u, CAPSULE_NAME, NULL);
688         if (!capsule || PyList_Append(c->c_stack, capsule) < 0) {
689             Py_XDECREF(capsule);
690             compiler_unit_free(u);
691             return 0;
692         }
693         Py_DECREF(capsule);
694         u->u_private = c->u->u_private;
695         Py_XINCREF(u->u_private);
696     }
697     c->u = u;
698 
699     c->c_nestlevel++;
700 
701     block = compiler_new_block(c);
702     if (block == NULL)
703         return 0;
704     c->u->u_curblock = block;
705 
706     if (u->u_scope_type != COMPILER_SCOPE_MODULE) {
707         if (!compiler_set_qualname(c))
708             return 0;
709     }
710 
711     return 1;
712 }
713 
714 static void
compiler_exit_scope(struct compiler * c)715 compiler_exit_scope(struct compiler *c)
716 {
717     // Don't call PySequence_DelItem() with an exception raised
718     PyObject *exc_type, *exc_val, *exc_tb;
719     PyErr_Fetch(&exc_type, &exc_val, &exc_tb);
720 
721     c->c_nestlevel--;
722     compiler_unit_free(c->u);
723     /* Restore c->u to the parent unit. */
724     Py_ssize_t n = PyList_GET_SIZE(c->c_stack) - 1;
725     if (n >= 0) {
726         PyObject *capsule = PyList_GET_ITEM(c->c_stack, n);
727         c->u = (struct compiler_unit *)PyCapsule_GetPointer(capsule, CAPSULE_NAME);
728         assert(c->u);
729         /* we are deleting from a list so this really shouldn't fail */
730         if (PySequence_DelItem(c->c_stack, n) < 0) {
731             _PyErr_WriteUnraisableMsg("on removing the last compiler "
732                                       "stack item", NULL);
733         }
734         compiler_unit_check(c->u);
735     }
736     else {
737         c->u = NULL;
738     }
739 
740     PyErr_Restore(exc_type, exc_val, exc_tb);
741 }
742 
743 static int
compiler_set_qualname(struct compiler * c)744 compiler_set_qualname(struct compiler *c)
745 {
746     _Py_static_string(dot, ".");
747     _Py_static_string(dot_locals, ".<locals>");
748     Py_ssize_t stack_size;
749     struct compiler_unit *u = c->u;
750     PyObject *name, *base, *dot_str, *dot_locals_str;
751 
752     base = NULL;
753     stack_size = PyList_GET_SIZE(c->c_stack);
754     assert(stack_size >= 1);
755     if (stack_size > 1) {
756         int scope, force_global = 0;
757         struct compiler_unit *parent;
758         PyObject *mangled, *capsule;
759 
760         capsule = PyList_GET_ITEM(c->c_stack, stack_size - 1);
761         parent = (struct compiler_unit *)PyCapsule_GetPointer(capsule, CAPSULE_NAME);
762         assert(parent);
763 
764         if (u->u_scope_type == COMPILER_SCOPE_FUNCTION
765             || u->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION
766             || u->u_scope_type == COMPILER_SCOPE_CLASS) {
767             assert(u->u_name);
768             mangled = _Py_Mangle(parent->u_private, u->u_name);
769             if (!mangled)
770                 return 0;
771             scope = _PyST_GetScope(parent->u_ste, mangled);
772             Py_DECREF(mangled);
773             assert(scope != GLOBAL_IMPLICIT);
774             if (scope == GLOBAL_EXPLICIT)
775                 force_global = 1;
776         }
777 
778         if (!force_global) {
779             if (parent->u_scope_type == COMPILER_SCOPE_FUNCTION
780                 || parent->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION
781                 || parent->u_scope_type == COMPILER_SCOPE_LAMBDA) {
782                 dot_locals_str = _PyUnicode_FromId(&dot_locals);
783                 if (dot_locals_str == NULL)
784                     return 0;
785                 base = PyUnicode_Concat(parent->u_qualname, dot_locals_str);
786                 if (base == NULL)
787                     return 0;
788             }
789             else {
790                 Py_INCREF(parent->u_qualname);
791                 base = parent->u_qualname;
792             }
793         }
794     }
795 
796     if (base != NULL) {
797         dot_str = _PyUnicode_FromId(&dot);
798         if (dot_str == NULL) {
799             Py_DECREF(base);
800             return 0;
801         }
802         name = PyUnicode_Concat(base, dot_str);
803         Py_DECREF(base);
804         if (name == NULL)
805             return 0;
806         PyUnicode_Append(&name, u->u_name);
807         if (name == NULL)
808             return 0;
809     }
810     else {
811         Py_INCREF(u->u_name);
812         name = u->u_name;
813     }
814     u->u_qualname = name;
815 
816     return 1;
817 }
818 
819 
820 /* Allocate a new block and return a pointer to it.
821    Returns NULL on error.
822 */
823 
824 static basicblock *
compiler_new_block(struct compiler * c)825 compiler_new_block(struct compiler *c)
826 {
827     basicblock *b;
828     struct compiler_unit *u;
829 
830     u = c->u;
831     b = (basicblock *)PyObject_Calloc(1, sizeof(basicblock));
832     if (b == NULL) {
833         PyErr_NoMemory();
834         return NULL;
835     }
836     /* Extend the singly linked list of blocks with new block. */
837     b->b_list = u->u_blocks;
838     u->u_blocks = b;
839     return b;
840 }
841 
842 static basicblock *
compiler_next_block(struct compiler * c)843 compiler_next_block(struct compiler *c)
844 {
845     basicblock *block = compiler_new_block(c);
846     if (block == NULL)
847         return NULL;
848     c->u->u_curblock->b_next = block;
849     c->u->u_curblock = block;
850     return block;
851 }
852 
853 static basicblock *
compiler_use_next_block(struct compiler * c,basicblock * block)854 compiler_use_next_block(struct compiler *c, basicblock *block)
855 {
856     assert(block != NULL);
857     c->u->u_curblock->b_next = block;
858     c->u->u_curblock = block;
859     return block;
860 }
861 
862 static basicblock *
compiler_copy_block(struct compiler * c,basicblock * block)863 compiler_copy_block(struct compiler *c, basicblock *block)
864 {
865     /* Cannot copy a block if it has a fallthrough, since
866      * a block can only have one fallthrough predecessor.
867      */
868     assert(block->b_nofallthrough);
869     basicblock *result = compiler_new_block(c);
870     if (result == NULL) {
871         return NULL;
872     }
873     for (int i = 0; i < block->b_iused; i++) {
874         int n = compiler_next_instr(result);
875         if (n < 0) {
876             return NULL;
877         }
878         result->b_instr[n] = block->b_instr[i];
879     }
880     result->b_exit = block->b_exit;
881     result->b_nofallthrough = 1;
882     return result;
883 }
884 
885 /* Returns the offset of the next instruction in the current block's
886    b_instr array.  Resizes the b_instr as necessary.
887    Returns -1 on failure.
888 */
889 
890 static int
compiler_next_instr(basicblock * b)891 compiler_next_instr(basicblock *b)
892 {
893     assert(b != NULL);
894     if (b->b_instr == NULL) {
895         b->b_instr = (struct instr *)PyObject_Calloc(
896                          DEFAULT_BLOCK_SIZE, sizeof(struct instr));
897         if (b->b_instr == NULL) {
898             PyErr_NoMemory();
899             return -1;
900         }
901         b->b_ialloc = DEFAULT_BLOCK_SIZE;
902     }
903     else if (b->b_iused == b->b_ialloc) {
904         struct instr *tmp;
905         size_t oldsize, newsize;
906         oldsize = b->b_ialloc * sizeof(struct instr);
907         newsize = oldsize << 1;
908 
909         if (oldsize > (SIZE_MAX >> 1)) {
910             PyErr_NoMemory();
911             return -1;
912         }
913 
914         if (newsize == 0) {
915             PyErr_NoMemory();
916             return -1;
917         }
918         b->b_ialloc <<= 1;
919         tmp = (struct instr *)PyObject_Realloc(
920                                         (void *)b->b_instr, newsize);
921         if (tmp == NULL) {
922             PyErr_NoMemory();
923             return -1;
924         }
925         b->b_instr = tmp;
926         memset((char *)b->b_instr + oldsize, 0, newsize - oldsize);
927     }
928     return b->b_iused++;
929 }
930 
931 /* Set the line number and column offset for the following instructions.
932 
933    The line number is reset in the following cases:
934    - when entering a new scope
935    - on each statement
936    - on each expression and sub-expression
937    - before the "except" and "finally" clauses
938 */
939 
940 #define SET_LOC(c, x)                           \
941     (c)->u->u_lineno = (x)->lineno;             \
942     (c)->u->u_col_offset = (x)->col_offset;     \
943     (c)->u->u_end_lineno = (x)->end_lineno;     \
944     (c)->u->u_end_col_offset = (x)->end_col_offset;
945 
946 /* Return the stack effect of opcode with argument oparg.
947 
948    Some opcodes have different stack effect when jump to the target and
949    when not jump. The 'jump' parameter specifies the case:
950 
951    * 0 -- when not jump
952    * 1 -- when jump
953    * -1 -- maximal
954  */
955 static int
stack_effect(int opcode,int oparg,int jump)956 stack_effect(int opcode, int oparg, int jump)
957 {
958     switch (opcode) {
959         case NOP:
960         case EXTENDED_ARG:
961             return 0;
962 
963         /* Stack manipulation */
964         case POP_TOP:
965             return -1;
966         case ROT_TWO:
967         case ROT_THREE:
968         case ROT_FOUR:
969             return 0;
970         case DUP_TOP:
971             return 1;
972         case DUP_TOP_TWO:
973             return 2;
974 
975         /* Unary operators */
976         case UNARY_POSITIVE:
977         case UNARY_NEGATIVE:
978         case UNARY_NOT:
979         case UNARY_INVERT:
980             return 0;
981 
982         case SET_ADD:
983         case LIST_APPEND:
984             return -1;
985         case MAP_ADD:
986             return -2;
987 
988         /* Binary operators */
989         case BINARY_POWER:
990         case BINARY_MULTIPLY:
991         case BINARY_MATRIX_MULTIPLY:
992         case BINARY_MODULO:
993         case BINARY_ADD:
994         case BINARY_SUBTRACT:
995         case BINARY_SUBSCR:
996         case BINARY_FLOOR_DIVIDE:
997         case BINARY_TRUE_DIVIDE:
998             return -1;
999         case INPLACE_FLOOR_DIVIDE:
1000         case INPLACE_TRUE_DIVIDE:
1001             return -1;
1002 
1003         case INPLACE_ADD:
1004         case INPLACE_SUBTRACT:
1005         case INPLACE_MULTIPLY:
1006         case INPLACE_MATRIX_MULTIPLY:
1007         case INPLACE_MODULO:
1008             return -1;
1009         case STORE_SUBSCR:
1010             return -3;
1011         case DELETE_SUBSCR:
1012             return -2;
1013 
1014         case BINARY_LSHIFT:
1015         case BINARY_RSHIFT:
1016         case BINARY_AND:
1017         case BINARY_XOR:
1018         case BINARY_OR:
1019             return -1;
1020         case INPLACE_POWER:
1021             return -1;
1022         case GET_ITER:
1023             return 0;
1024 
1025         case PRINT_EXPR:
1026             return -1;
1027         case LOAD_BUILD_CLASS:
1028             return 1;
1029         case INPLACE_LSHIFT:
1030         case INPLACE_RSHIFT:
1031         case INPLACE_AND:
1032         case INPLACE_XOR:
1033         case INPLACE_OR:
1034             return -1;
1035 
1036         case SETUP_WITH:
1037             /* 1 in the normal flow.
1038              * Restore the stack position and push 6 values before jumping to
1039              * the handler if an exception be raised. */
1040             return jump ? 6 : 1;
1041         case RETURN_VALUE:
1042             return -1;
1043         case IMPORT_STAR:
1044             return -1;
1045         case SETUP_ANNOTATIONS:
1046             return 0;
1047         case YIELD_VALUE:
1048             return 0;
1049         case YIELD_FROM:
1050             return -1;
1051         case POP_BLOCK:
1052             return 0;
1053         case POP_EXCEPT:
1054             return -3;
1055 
1056         case STORE_NAME:
1057             return -1;
1058         case DELETE_NAME:
1059             return 0;
1060         case UNPACK_SEQUENCE:
1061             return oparg-1;
1062         case UNPACK_EX:
1063             return (oparg&0xFF) + (oparg>>8);
1064         case FOR_ITER:
1065             /* -1 at end of iterator, 1 if continue iterating. */
1066             return jump > 0 ? -1 : 1;
1067 
1068         case STORE_ATTR:
1069             return -2;
1070         case DELETE_ATTR:
1071             return -1;
1072         case STORE_GLOBAL:
1073             return -1;
1074         case DELETE_GLOBAL:
1075             return 0;
1076         case LOAD_CONST:
1077             return 1;
1078         case LOAD_NAME:
1079             return 1;
1080         case BUILD_TUPLE:
1081         case BUILD_LIST:
1082         case BUILD_SET:
1083         case BUILD_STRING:
1084             return 1-oparg;
1085         case BUILD_MAP:
1086             return 1 - 2*oparg;
1087         case BUILD_CONST_KEY_MAP:
1088             return -oparg;
1089         case LOAD_ATTR:
1090             return 0;
1091         case COMPARE_OP:
1092         case IS_OP:
1093         case CONTAINS_OP:
1094             return -1;
1095         case JUMP_IF_NOT_EXC_MATCH:
1096             return -2;
1097         case IMPORT_NAME:
1098             return -1;
1099         case IMPORT_FROM:
1100             return 1;
1101 
1102         /* Jumps */
1103         case JUMP_FORWARD:
1104         case JUMP_ABSOLUTE:
1105             return 0;
1106 
1107         case JUMP_IF_TRUE_OR_POP:
1108         case JUMP_IF_FALSE_OR_POP:
1109             return jump ? 0 : -1;
1110 
1111         case POP_JUMP_IF_FALSE:
1112         case POP_JUMP_IF_TRUE:
1113             return -1;
1114 
1115         case LOAD_GLOBAL:
1116             return 1;
1117 
1118         /* Exception handling */
1119         case SETUP_FINALLY:
1120             /* 0 in the normal flow.
1121              * Restore the stack position and push 6 values before jumping to
1122              * the handler if an exception be raised. */
1123             return jump ? 6 : 0;
1124         case RERAISE:
1125             return -3;
1126 
1127         case WITH_EXCEPT_START:
1128             return 1;
1129 
1130         case LOAD_FAST:
1131             return 1;
1132         case STORE_FAST:
1133             return -1;
1134         case DELETE_FAST:
1135             return 0;
1136 
1137         case RAISE_VARARGS:
1138             return -oparg;
1139 
1140         /* Functions and calls */
1141         case CALL_FUNCTION:
1142             return -oparg;
1143         case CALL_METHOD:
1144             return -oparg-1;
1145         case CALL_FUNCTION_KW:
1146             return -oparg-1;
1147         case CALL_FUNCTION_EX:
1148             return -1 - ((oparg & 0x01) != 0);
1149         case MAKE_FUNCTION:
1150             return -1 - ((oparg & 0x01) != 0) - ((oparg & 0x02) != 0) -
1151                 ((oparg & 0x04) != 0) - ((oparg & 0x08) != 0);
1152         case BUILD_SLICE:
1153             if (oparg == 3)
1154                 return -2;
1155             else
1156                 return -1;
1157 
1158         /* Closures */
1159         case LOAD_CLOSURE:
1160             return 1;
1161         case LOAD_DEREF:
1162         case LOAD_CLASSDEREF:
1163             return 1;
1164         case STORE_DEREF:
1165             return -1;
1166         case DELETE_DEREF:
1167             return 0;
1168 
1169         /* Iterators and generators */
1170         case GET_AWAITABLE:
1171             return 0;
1172         case SETUP_ASYNC_WITH:
1173             /* 0 in the normal flow.
1174              * Restore the stack position to the position before the result
1175              * of __aenter__ and push 6 values before jumping to the handler
1176              * if an exception be raised. */
1177             return jump ? -1 + 6 : 0;
1178         case BEFORE_ASYNC_WITH:
1179             return 1;
1180         case GET_AITER:
1181             return 0;
1182         case GET_ANEXT:
1183             return 1;
1184         case GET_YIELD_FROM_ITER:
1185             return 0;
1186         case END_ASYNC_FOR:
1187             return -7;
1188         case FORMAT_VALUE:
1189             /* If there's a fmt_spec on the stack, we go from 2->1,
1190                else 1->1. */
1191             return (oparg & FVS_MASK) == FVS_HAVE_SPEC ? -1 : 0;
1192         case LOAD_METHOD:
1193             return 1;
1194         case LOAD_ASSERTION_ERROR:
1195             return 1;
1196         case LIST_TO_TUPLE:
1197             return 0;
1198         case GEN_START:
1199             return -1;
1200         case LIST_EXTEND:
1201         case SET_UPDATE:
1202         case DICT_MERGE:
1203         case DICT_UPDATE:
1204             return -1;
1205         case COPY_DICT_WITHOUT_KEYS:
1206             return 0;
1207         case MATCH_CLASS:
1208             return -1;
1209         case GET_LEN:
1210         case MATCH_MAPPING:
1211         case MATCH_SEQUENCE:
1212             return 1;
1213         case MATCH_KEYS:
1214             return 2;
1215         case ROT_N:
1216             return 0;
1217         default:
1218             return PY_INVALID_STACK_EFFECT;
1219     }
1220     return PY_INVALID_STACK_EFFECT; /* not reachable */
1221 }
1222 
1223 int
PyCompile_OpcodeStackEffectWithJump(int opcode,int oparg,int jump)1224 PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
1225 {
1226     return stack_effect(opcode, oparg, jump);
1227 }
1228 
1229 int
PyCompile_OpcodeStackEffect(int opcode,int oparg)1230 PyCompile_OpcodeStackEffect(int opcode, int oparg)
1231 {
1232     return stack_effect(opcode, oparg, -1);
1233 }
1234 
1235 /* Add an opcode with no argument.
1236    Returns 0 on failure, 1 on success.
1237 */
1238 
1239 static int
compiler_addop_line(struct compiler * c,int opcode,int line)1240 compiler_addop_line(struct compiler *c, int opcode, int line)
1241 {
1242     basicblock *b;
1243     struct instr *i;
1244     int off;
1245     assert(!HAS_ARG(opcode));
1246     off = compiler_next_instr(c->u->u_curblock);
1247     if (off < 0)
1248         return 0;
1249     b = c->u->u_curblock;
1250     i = &b->b_instr[off];
1251     i->i_opcode = opcode;
1252     i->i_oparg = 0;
1253     if (opcode == RETURN_VALUE)
1254         b->b_return = 1;
1255     i->i_lineno = line;
1256     return 1;
1257 }
1258 
1259 static int
compiler_addop(struct compiler * c,int opcode)1260 compiler_addop(struct compiler *c, int opcode)
1261 {
1262     return compiler_addop_line(c, opcode, c->u->u_lineno);
1263 }
1264 
1265 static int
compiler_addop_noline(struct compiler * c,int opcode)1266 compiler_addop_noline(struct compiler *c, int opcode)
1267 {
1268     return compiler_addop_line(c, opcode, -1);
1269 }
1270 
1271 
1272 static Py_ssize_t
compiler_add_o(PyObject * dict,PyObject * o)1273 compiler_add_o(PyObject *dict, PyObject *o)
1274 {
1275     PyObject *v;
1276     Py_ssize_t arg;
1277 
1278     v = PyDict_GetItemWithError(dict, o);
1279     if (!v) {
1280         if (PyErr_Occurred()) {
1281             return -1;
1282         }
1283         arg = PyDict_GET_SIZE(dict);
1284         v = PyLong_FromSsize_t(arg);
1285         if (!v) {
1286             return -1;
1287         }
1288         if (PyDict_SetItem(dict, o, v) < 0) {
1289             Py_DECREF(v);
1290             return -1;
1291         }
1292         Py_DECREF(v);
1293     }
1294     else
1295         arg = PyLong_AsLong(v);
1296     return arg;
1297 }
1298 
1299 // Merge const *o* recursively and return constant key object.
1300 static PyObject*
merge_consts_recursive(struct compiler * c,PyObject * o)1301 merge_consts_recursive(struct compiler *c, PyObject *o)
1302 {
1303     // None and Ellipsis are singleton, and key is the singleton.
1304     // No need to merge object and key.
1305     if (o == Py_None || o == Py_Ellipsis) {
1306         Py_INCREF(o);
1307         return o;
1308     }
1309 
1310     PyObject *key = _PyCode_ConstantKey(o);
1311     if (key == NULL) {
1312         return NULL;
1313     }
1314 
1315     // t is borrowed reference
1316     PyObject *t = PyDict_SetDefault(c->c_const_cache, key, key);
1317     if (t != key) {
1318         // o is registered in c_const_cache.  Just use it.
1319         Py_XINCREF(t);
1320         Py_DECREF(key);
1321         return t;
1322     }
1323 
1324     // We registered o in c_const_cache.
1325     // When o is a tuple or frozenset, we want to merge its
1326     // items too.
1327     if (PyTuple_CheckExact(o)) {
1328         Py_ssize_t len = PyTuple_GET_SIZE(o);
1329         for (Py_ssize_t i = 0; i < len; i++) {
1330             PyObject *item = PyTuple_GET_ITEM(o, i);
1331             PyObject *u = merge_consts_recursive(c, item);
1332             if (u == NULL) {
1333                 Py_DECREF(key);
1334                 return NULL;
1335             }
1336 
1337             // See _PyCode_ConstantKey()
1338             PyObject *v;  // borrowed
1339             if (PyTuple_CheckExact(u)) {
1340                 v = PyTuple_GET_ITEM(u, 1);
1341             }
1342             else {
1343                 v = u;
1344             }
1345             if (v != item) {
1346                 Py_INCREF(v);
1347                 PyTuple_SET_ITEM(o, i, v);
1348                 Py_DECREF(item);
1349             }
1350 
1351             Py_DECREF(u);
1352         }
1353     }
1354     else if (PyFrozenSet_CheckExact(o)) {
1355         // *key* is tuple. And its first item is frozenset of
1356         // constant keys.
1357         // See _PyCode_ConstantKey() for detail.
1358         assert(PyTuple_CheckExact(key));
1359         assert(PyTuple_GET_SIZE(key) == 2);
1360 
1361         Py_ssize_t len = PySet_GET_SIZE(o);
1362         if (len == 0) {  // empty frozenset should not be re-created.
1363             return key;
1364         }
1365         PyObject *tuple = PyTuple_New(len);
1366         if (tuple == NULL) {
1367             Py_DECREF(key);
1368             return NULL;
1369         }
1370         Py_ssize_t i = 0, pos = 0;
1371         PyObject *item;
1372         Py_hash_t hash;
1373         while (_PySet_NextEntry(o, &pos, &item, &hash)) {
1374             PyObject *k = merge_consts_recursive(c, item);
1375             if (k == NULL) {
1376                 Py_DECREF(tuple);
1377                 Py_DECREF(key);
1378                 return NULL;
1379             }
1380             PyObject *u;
1381             if (PyTuple_CheckExact(k)) {
1382                 u = PyTuple_GET_ITEM(k, 1);
1383                 Py_INCREF(u);
1384                 Py_DECREF(k);
1385             }
1386             else {
1387                 u = k;
1388             }
1389             PyTuple_SET_ITEM(tuple, i, u);  // Steals reference of u.
1390             i++;
1391         }
1392 
1393         // Instead of rewriting o, we create new frozenset and embed in the
1394         // key tuple.  Caller should get merged frozenset from the key tuple.
1395         PyObject *new = PyFrozenSet_New(tuple);
1396         Py_DECREF(tuple);
1397         if (new == NULL) {
1398             Py_DECREF(key);
1399             return NULL;
1400         }
1401         assert(PyTuple_GET_ITEM(key, 1) == o);
1402         Py_DECREF(o);
1403         PyTuple_SET_ITEM(key, 1, new);
1404     }
1405 
1406     return key;
1407 }
1408 
1409 static Py_ssize_t
compiler_add_const(struct compiler * c,PyObject * o)1410 compiler_add_const(struct compiler *c, PyObject *o)
1411 {
1412     PyObject *key = merge_consts_recursive(c, o);
1413     if (key == NULL) {
1414         return -1;
1415     }
1416 
1417     Py_ssize_t arg = compiler_add_o(c->u->u_consts, key);
1418     Py_DECREF(key);
1419     return arg;
1420 }
1421 
1422 static int
compiler_addop_load_const(struct compiler * c,PyObject * o)1423 compiler_addop_load_const(struct compiler *c, PyObject *o)
1424 {
1425     Py_ssize_t arg = compiler_add_const(c, o);
1426     if (arg < 0)
1427         return 0;
1428     return compiler_addop_i(c, LOAD_CONST, arg);
1429 }
1430 
1431 static int
compiler_addop_o(struct compiler * c,int opcode,PyObject * dict,PyObject * o)1432 compiler_addop_o(struct compiler *c, int opcode, PyObject *dict,
1433                      PyObject *o)
1434 {
1435     Py_ssize_t arg = compiler_add_o(dict, o);
1436     if (arg < 0)
1437         return 0;
1438     return compiler_addop_i(c, opcode, arg);
1439 }
1440 
1441 static int
compiler_addop_name(struct compiler * c,int opcode,PyObject * dict,PyObject * o)1442 compiler_addop_name(struct compiler *c, int opcode, PyObject *dict,
1443                     PyObject *o)
1444 {
1445     Py_ssize_t arg;
1446 
1447     PyObject *mangled = _Py_Mangle(c->u->u_private, o);
1448     if (!mangled)
1449         return 0;
1450     arg = compiler_add_o(dict, mangled);
1451     Py_DECREF(mangled);
1452     if (arg < 0)
1453         return 0;
1454     return compiler_addop_i(c, opcode, arg);
1455 }
1456 
1457 /* Add an opcode with an integer argument.
1458    Returns 0 on failure, 1 on success.
1459 */
1460 
1461 static int
compiler_addop_i_line(struct compiler * c,int opcode,Py_ssize_t oparg,int lineno)1462 compiler_addop_i_line(struct compiler *c, int opcode, Py_ssize_t oparg, int lineno)
1463 {
1464     struct instr *i;
1465     int off;
1466 
1467     /* oparg value is unsigned, but a signed C int is usually used to store
1468        it in the C code (like Python/ceval.c).
1469 
1470        Limit to 32-bit signed C int (rather than INT_MAX) for portability.
1471 
1472        The argument of a concrete bytecode instruction is limited to 8-bit.
1473        EXTENDED_ARG is used for 16, 24, and 32-bit arguments. */
1474     assert(HAS_ARG(opcode));
1475     assert(0 <= oparg && oparg <= 2147483647);
1476 
1477     off = compiler_next_instr(c->u->u_curblock);
1478     if (off < 0)
1479         return 0;
1480     i = &c->u->u_curblock->b_instr[off];
1481     i->i_opcode = opcode;
1482     i->i_oparg = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int);
1483     i->i_lineno = lineno;
1484     return 1;
1485 }
1486 
1487 static int
compiler_addop_i(struct compiler * c,int opcode,Py_ssize_t oparg)1488 compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)
1489 {
1490     return compiler_addop_i_line(c, opcode, oparg, c->u->u_lineno);
1491 }
1492 
1493 static int
compiler_addop_i_noline(struct compiler * c,int opcode,Py_ssize_t oparg)1494 compiler_addop_i_noline(struct compiler *c, int opcode, Py_ssize_t oparg)
1495 {
1496     return compiler_addop_i_line(c, opcode, oparg, -1);
1497 }
1498 
add_jump_to_block(basicblock * b,int opcode,int lineno,basicblock * target)1499 static int add_jump_to_block(basicblock *b, int opcode, int lineno, basicblock *target)
1500 {
1501     assert(HAS_ARG(opcode));
1502     assert(b != NULL);
1503     assert(target != NULL);
1504 
1505     int off = compiler_next_instr(b);
1506     struct instr *i = &b->b_instr[off];
1507     if (off < 0) {
1508         return 0;
1509     }
1510     i->i_opcode = opcode;
1511     i->i_target = target;
1512     i->i_lineno = lineno;
1513     return 1;
1514 }
1515 
1516 static int
compiler_addop_j(struct compiler * c,int opcode,basicblock * b)1517 compiler_addop_j(struct compiler *c, int opcode, basicblock *b)
1518 {
1519     return add_jump_to_block(c->u->u_curblock, opcode, c->u->u_lineno, b);
1520 }
1521 
1522 static int
compiler_addop_j_noline(struct compiler * c,int opcode,basicblock * b)1523 compiler_addop_j_noline(struct compiler *c, int opcode, basicblock *b)
1524 {
1525     return add_jump_to_block(c->u->u_curblock, opcode, -1, b);
1526 }
1527 
1528 /* NEXT_BLOCK() creates an implicit jump from the current block
1529    to the new block.
1530 
1531    The returns inside this macro make it impossible to decref objects
1532    created in the local function. Local objects should use the arena.
1533 */
1534 #define NEXT_BLOCK(C) { \
1535     if (compiler_next_block((C)) == NULL) \
1536         return 0; \
1537 }
1538 
1539 #define ADDOP(C, OP) { \
1540     if (!compiler_addop((C), (OP))) \
1541         return 0; \
1542 }
1543 
1544 #define ADDOP_NOLINE(C, OP) { \
1545     if (!compiler_addop_noline((C), (OP))) \
1546         return 0; \
1547 }
1548 
1549 #define ADDOP_IN_SCOPE(C, OP) { \
1550     if (!compiler_addop((C), (OP))) { \
1551         compiler_exit_scope(c); \
1552         return 0; \
1553     } \
1554 }
1555 
1556 #define ADDOP_LOAD_CONST(C, O) { \
1557     if (!compiler_addop_load_const((C), (O))) \
1558         return 0; \
1559 }
1560 
1561 /* Same as ADDOP_LOAD_CONST, but steals a reference. */
1562 #define ADDOP_LOAD_CONST_NEW(C, O) { \
1563     PyObject *__new_const = (O); \
1564     if (__new_const == NULL) { \
1565         return 0; \
1566     } \
1567     if (!compiler_addop_load_const((C), __new_const)) { \
1568         Py_DECREF(__new_const); \
1569         return 0; \
1570     } \
1571     Py_DECREF(__new_const); \
1572 }
1573 
1574 #define ADDOP_O(C, OP, O, TYPE) { \
1575     assert((OP) != LOAD_CONST); /* use ADDOP_LOAD_CONST */ \
1576     if (!compiler_addop_o((C), (OP), (C)->u->u_ ## TYPE, (O))) \
1577         return 0; \
1578 }
1579 
1580 /* Same as ADDOP_O, but steals a reference. */
1581 #define ADDOP_N(C, OP, O, TYPE) { \
1582     assert((OP) != LOAD_CONST); /* use ADDOP_LOAD_CONST_NEW */ \
1583     if (!compiler_addop_o((C), (OP), (C)->u->u_ ## TYPE, (O))) { \
1584         Py_DECREF((O)); \
1585         return 0; \
1586     } \
1587     Py_DECREF((O)); \
1588 }
1589 
1590 #define ADDOP_NAME(C, OP, O, TYPE) { \
1591     if (!compiler_addop_name((C), (OP), (C)->u->u_ ## TYPE, (O))) \
1592         return 0; \
1593 }
1594 
1595 #define ADDOP_I(C, OP, O) { \
1596     if (!compiler_addop_i((C), (OP), (O))) \
1597         return 0; \
1598 }
1599 
1600 #define ADDOP_I_NOLINE(C, OP, O) { \
1601     if (!compiler_addop_i_noline((C), (OP), (O))) \
1602         return 0; \
1603 }
1604 
1605 #define ADDOP_JUMP(C, OP, O) { \
1606     if (!compiler_addop_j((C), (OP), (O))) \
1607         return 0; \
1608 }
1609 
1610 /* Add a jump with no line number.
1611  * Used for artificial jumps that have no corresponding
1612  * token in the source code. */
1613 #define ADDOP_JUMP_NOLINE(C, OP, O) { \
1614     if (!compiler_addop_j_noline((C), (OP), (O))) \
1615         return 0; \
1616 }
1617 
1618 #define ADDOP_COMPARE(C, CMP) { \
1619     if (!compiler_addcompare((C), (cmpop_ty)(CMP))) \
1620         return 0; \
1621 }
1622 
1623 /* VISIT and VISIT_SEQ takes an ASDL type as their second argument.  They use
1624    the ASDL name to synthesize the name of the C type and the visit function.
1625 */
1626 
1627 #define VISIT(C, TYPE, V) {\
1628     if (!compiler_visit_ ## TYPE((C), (V))) \
1629         return 0; \
1630 }
1631 
1632 #define VISIT_IN_SCOPE(C, TYPE, V) {\
1633     if (!compiler_visit_ ## TYPE((C), (V))) { \
1634         compiler_exit_scope(c); \
1635         return 0; \
1636     } \
1637 }
1638 
1639 #define VISIT_SLICE(C, V, CTX) {\
1640     if (!compiler_visit_slice((C), (V), (CTX))) \
1641         return 0; \
1642 }
1643 
1644 #define VISIT_SEQ(C, TYPE, SEQ) { \
1645     int _i; \
1646     asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \
1647     for (_i = 0; _i < asdl_seq_LEN(seq); _i++) { \
1648         TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, _i); \
1649         if (!compiler_visit_ ## TYPE((C), elt)) \
1650             return 0; \
1651     } \
1652 }
1653 
1654 #define VISIT_SEQ_IN_SCOPE(C, TYPE, SEQ) { \
1655     int _i; \
1656     asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \
1657     for (_i = 0; _i < asdl_seq_LEN(seq); _i++) { \
1658         TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, _i); \
1659         if (!compiler_visit_ ## TYPE((C), elt)) { \
1660             compiler_exit_scope(c); \
1661             return 0; \
1662         } \
1663     } \
1664 }
1665 
1666 #define RETURN_IF_FALSE(X)  \
1667     if (!(X)) {             \
1668         return 0;           \
1669     }
1670 
1671 /* Search if variable annotations are present statically in a block. */
1672 
1673 static int
find_ann(asdl_stmt_seq * stmts)1674 find_ann(asdl_stmt_seq *stmts)
1675 {
1676     int i, j, res = 0;
1677     stmt_ty st;
1678 
1679     for (i = 0; i < asdl_seq_LEN(stmts); i++) {
1680         st = (stmt_ty)asdl_seq_GET(stmts, i);
1681         switch (st->kind) {
1682         case AnnAssign_kind:
1683             return 1;
1684         case For_kind:
1685             res = find_ann(st->v.For.body) ||
1686                   find_ann(st->v.For.orelse);
1687             break;
1688         case AsyncFor_kind:
1689             res = find_ann(st->v.AsyncFor.body) ||
1690                   find_ann(st->v.AsyncFor.orelse);
1691             break;
1692         case While_kind:
1693             res = find_ann(st->v.While.body) ||
1694                   find_ann(st->v.While.orelse);
1695             break;
1696         case If_kind:
1697             res = find_ann(st->v.If.body) ||
1698                   find_ann(st->v.If.orelse);
1699             break;
1700         case With_kind:
1701             res = find_ann(st->v.With.body);
1702             break;
1703         case AsyncWith_kind:
1704             res = find_ann(st->v.AsyncWith.body);
1705             break;
1706         case Try_kind:
1707             for (j = 0; j < asdl_seq_LEN(st->v.Try.handlers); j++) {
1708                 excepthandler_ty handler = (excepthandler_ty)asdl_seq_GET(
1709                     st->v.Try.handlers, j);
1710                 if (find_ann(handler->v.ExceptHandler.body)) {
1711                     return 1;
1712                 }
1713             }
1714             res = find_ann(st->v.Try.body) ||
1715                   find_ann(st->v.Try.finalbody) ||
1716                   find_ann(st->v.Try.orelse);
1717             break;
1718         default:
1719             res = 0;
1720         }
1721         if (res) {
1722             break;
1723         }
1724     }
1725     return res;
1726 }
1727 
1728 /*
1729  * Frame block handling functions
1730  */
1731 
1732 static int
compiler_push_fblock(struct compiler * c,enum fblocktype t,basicblock * b,basicblock * exit,void * datum)1733 compiler_push_fblock(struct compiler *c, enum fblocktype t, basicblock *b,
1734                      basicblock *exit, void *datum)
1735 {
1736     struct fblockinfo *f;
1737     if (c->u->u_nfblocks >= CO_MAXBLOCKS) {
1738         return compiler_error(c, "too many statically nested blocks");
1739     }
1740     f = &c->u->u_fblock[c->u->u_nfblocks++];
1741     f->fb_type = t;
1742     f->fb_block = b;
1743     f->fb_exit = exit;
1744     f->fb_datum = datum;
1745     return 1;
1746 }
1747 
1748 static void
compiler_pop_fblock(struct compiler * c,enum fblocktype t,basicblock * b)1749 compiler_pop_fblock(struct compiler *c, enum fblocktype t, basicblock *b)
1750 {
1751     struct compiler_unit *u = c->u;
1752     assert(u->u_nfblocks > 0);
1753     u->u_nfblocks--;
1754     assert(u->u_fblock[u->u_nfblocks].fb_type == t);
1755     assert(u->u_fblock[u->u_nfblocks].fb_block == b);
1756 }
1757 
1758 static int
compiler_call_exit_with_nones(struct compiler * c)1759 compiler_call_exit_with_nones(struct compiler *c) {
1760     ADDOP_LOAD_CONST(c, Py_None);
1761     ADDOP(c, DUP_TOP);
1762     ADDOP(c, DUP_TOP);
1763     ADDOP_I(c, CALL_FUNCTION, 3);
1764     return 1;
1765 }
1766 
1767 /* Unwind a frame block.  If preserve_tos is true, the TOS before
1768  * popping the blocks will be restored afterwards, unless another
1769  * return, break or continue is found. In which case, the TOS will
1770  * be popped.
1771  */
1772 static int
compiler_unwind_fblock(struct compiler * c,struct fblockinfo * info,int preserve_tos)1773 compiler_unwind_fblock(struct compiler *c, struct fblockinfo *info,
1774                        int preserve_tos)
1775 {
1776     switch (info->fb_type) {
1777         case WHILE_LOOP:
1778         case EXCEPTION_HANDLER:
1779         case ASYNC_COMPREHENSION_GENERATOR:
1780             return 1;
1781 
1782         case FOR_LOOP:
1783             /* Pop the iterator */
1784             if (preserve_tos) {
1785                 ADDOP(c, ROT_TWO);
1786             }
1787             ADDOP(c, POP_TOP);
1788             return 1;
1789 
1790         case TRY_EXCEPT:
1791             ADDOP(c, POP_BLOCK);
1792             return 1;
1793 
1794         case FINALLY_TRY:
1795             /* This POP_BLOCK gets the line number of the unwinding statement */
1796             ADDOP(c, POP_BLOCK);
1797             if (preserve_tos) {
1798                 if (!compiler_push_fblock(c, POP_VALUE, NULL, NULL, NULL)) {
1799                     return 0;
1800                 }
1801             }
1802             /* Emit the finally block */
1803             VISIT_SEQ(c, stmt, info->fb_datum);
1804             if (preserve_tos) {
1805                 compiler_pop_fblock(c, POP_VALUE, NULL);
1806             }
1807             /* The finally block should appear to execute after the
1808              * statement causing the unwinding, so make the unwinding
1809              * instruction artificial */
1810             c->u->u_lineno = -1;
1811             return 1;
1812 
1813         case FINALLY_END:
1814             if (preserve_tos) {
1815                 ADDOP(c, ROT_FOUR);
1816             }
1817             ADDOP(c, POP_TOP);
1818             ADDOP(c, POP_TOP);
1819             ADDOP(c, POP_TOP);
1820             if (preserve_tos) {
1821                 ADDOP(c, ROT_FOUR);
1822             }
1823             ADDOP(c, POP_EXCEPT);
1824             return 1;
1825 
1826         case WITH:
1827         case ASYNC_WITH:
1828             SET_LOC(c, (stmt_ty)info->fb_datum);
1829             ADDOP(c, POP_BLOCK);
1830             if (preserve_tos) {
1831                 ADDOP(c, ROT_TWO);
1832             }
1833             if(!compiler_call_exit_with_nones(c)) {
1834                 return 0;
1835             }
1836             if (info->fb_type == ASYNC_WITH) {
1837                 ADDOP(c, GET_AWAITABLE);
1838                 ADDOP_LOAD_CONST(c, Py_None);
1839                 ADDOP(c, YIELD_FROM);
1840             }
1841             ADDOP(c, POP_TOP);
1842             /* The exit block should appear to execute after the
1843              * statement causing the unwinding, so make the unwinding
1844              * instruction artificial */
1845             c->u->u_lineno = -1;
1846             return 1;
1847 
1848         case HANDLER_CLEANUP:
1849             if (info->fb_datum) {
1850                 ADDOP(c, POP_BLOCK);
1851             }
1852             if (preserve_tos) {
1853                 ADDOP(c, ROT_FOUR);
1854             }
1855             ADDOP(c, POP_EXCEPT);
1856             if (info->fb_datum) {
1857                 ADDOP_LOAD_CONST(c, Py_None);
1858                 compiler_nameop(c, info->fb_datum, Store);
1859                 compiler_nameop(c, info->fb_datum, Del);
1860             }
1861             return 1;
1862 
1863         case POP_VALUE:
1864             if (preserve_tos) {
1865                 ADDOP(c, ROT_TWO);
1866             }
1867             ADDOP(c, POP_TOP);
1868             return 1;
1869     }
1870     Py_UNREACHABLE();
1871 }
1872 
1873 /** Unwind block stack. If loop is not NULL, then stop when the first loop is encountered. */
1874 static int
compiler_unwind_fblock_stack(struct compiler * c,int preserve_tos,struct fblockinfo ** loop)1875 compiler_unwind_fblock_stack(struct compiler *c, int preserve_tos, struct fblockinfo **loop) {
1876     if (c->u->u_nfblocks == 0) {
1877         return 1;
1878     }
1879     struct fblockinfo *top = &c->u->u_fblock[c->u->u_nfblocks-1];
1880     if (loop != NULL && (top->fb_type == WHILE_LOOP || top->fb_type == FOR_LOOP)) {
1881         *loop = top;
1882         return 1;
1883     }
1884     struct fblockinfo copy = *top;
1885     c->u->u_nfblocks--;
1886     if (!compiler_unwind_fblock(c, &copy, preserve_tos)) {
1887         return 0;
1888     }
1889     if (!compiler_unwind_fblock_stack(c, preserve_tos, loop)) {
1890         return 0;
1891     }
1892     c->u->u_fblock[c->u->u_nfblocks] = copy;
1893     c->u->u_nfblocks++;
1894     return 1;
1895 }
1896 
1897 /* Compile a sequence of statements, checking for a docstring
1898    and for annotations. */
1899 
1900 static int
compiler_body(struct compiler * c,asdl_stmt_seq * stmts)1901 compiler_body(struct compiler *c, asdl_stmt_seq *stmts)
1902 {
1903     int i = 0;
1904     stmt_ty st;
1905     PyObject *docstring;
1906 
1907     /* Set current line number to the line number of first statement.
1908        This way line number for SETUP_ANNOTATIONS will always
1909        coincide with the line number of first "real" statement in module.
1910        If body is empty, then lineno will be set later in assemble. */
1911     if (c->u->u_scope_type == COMPILER_SCOPE_MODULE && asdl_seq_LEN(stmts)) {
1912         st = (stmt_ty)asdl_seq_GET(stmts, 0);
1913         SET_LOC(c, st);
1914     }
1915     /* Every annotated class and module should have __annotations__. */
1916     if (find_ann(stmts)) {
1917         ADDOP(c, SETUP_ANNOTATIONS);
1918     }
1919     if (!asdl_seq_LEN(stmts))
1920         return 1;
1921     /* if not -OO mode, set docstring */
1922     if (c->c_optimize < 2) {
1923         docstring = _PyAST_GetDocString(stmts);
1924         if (docstring) {
1925             i = 1;
1926             st = (stmt_ty)asdl_seq_GET(stmts, 0);
1927             assert(st->kind == Expr_kind);
1928             VISIT(c, expr, st->v.Expr.value);
1929             if (!compiler_nameop(c, __doc__, Store))
1930                 return 0;
1931         }
1932     }
1933     for (; i < asdl_seq_LEN(stmts); i++)
1934         VISIT(c, stmt, (stmt_ty)asdl_seq_GET(stmts, i));
1935     return 1;
1936 }
1937 
1938 static PyCodeObject *
compiler_mod(struct compiler * c,mod_ty mod)1939 compiler_mod(struct compiler *c, mod_ty mod)
1940 {
1941     PyCodeObject *co;
1942     int addNone = 1;
1943     static PyObject *module;
1944     if (!module) {
1945         module = PyUnicode_InternFromString("<module>");
1946         if (!module)
1947             return NULL;
1948     }
1949     /* Use 0 for firstlineno initially, will fixup in assemble(). */
1950     if (!compiler_enter_scope(c, module, COMPILER_SCOPE_MODULE, mod, 1))
1951         return NULL;
1952     switch (mod->kind) {
1953     case Module_kind:
1954         if (!compiler_body(c, mod->v.Module.body)) {
1955             compiler_exit_scope(c);
1956             return 0;
1957         }
1958         break;
1959     case Interactive_kind:
1960         if (find_ann(mod->v.Interactive.body)) {
1961             ADDOP(c, SETUP_ANNOTATIONS);
1962         }
1963         c->c_interactive = 1;
1964         VISIT_SEQ_IN_SCOPE(c, stmt, mod->v.Interactive.body);
1965         break;
1966     case Expression_kind:
1967         VISIT_IN_SCOPE(c, expr, mod->v.Expression.body);
1968         addNone = 0;
1969         break;
1970     default:
1971         PyErr_Format(PyExc_SystemError,
1972                      "module kind %d should not be possible",
1973                      mod->kind);
1974         return 0;
1975     }
1976     co = assemble(c, addNone);
1977     compiler_exit_scope(c);
1978     return co;
1979 }
1980 
1981 /* The test for LOCAL must come before the test for FREE in order to
1982    handle classes where name is both local and free.  The local var is
1983    a method and the free var is a free var referenced within a method.
1984 */
1985 
1986 static int
get_ref_type(struct compiler * c,PyObject * name)1987 get_ref_type(struct compiler *c, PyObject *name)
1988 {
1989     int scope;
1990     if (c->u->u_scope_type == COMPILER_SCOPE_CLASS &&
1991         _PyUnicode_EqualToASCIIString(name, "__class__"))
1992         return CELL;
1993     scope = _PyST_GetScope(c->u->u_ste, name);
1994     if (scope == 0) {
1995         PyErr_Format(PyExc_SystemError,
1996                      "_PyST_GetScope(name=%R) failed: "
1997                      "unknown scope in unit %S (%R); "
1998                      "symbols: %R; locals: %R; globals: %R",
1999                      name,
2000                      c->u->u_name, c->u->u_ste->ste_id,
2001                      c->u->u_ste->ste_symbols, c->u->u_varnames, c->u->u_names);
2002         return -1;
2003     }
2004     return scope;
2005 }
2006 
2007 static int
compiler_lookup_arg(PyObject * dict,PyObject * name)2008 compiler_lookup_arg(PyObject *dict, PyObject *name)
2009 {
2010     PyObject *v;
2011     v = PyDict_GetItemWithError(dict, name);
2012     if (v == NULL)
2013         return -1;
2014     return PyLong_AS_LONG(v);
2015 }
2016 
2017 static int
compiler_make_closure(struct compiler * c,PyCodeObject * co,Py_ssize_t flags,PyObject * qualname)2018 compiler_make_closure(struct compiler *c, PyCodeObject *co, Py_ssize_t flags,
2019                       PyObject *qualname)
2020 {
2021     Py_ssize_t i, free = PyCode_GetNumFree(co);
2022     if (qualname == NULL)
2023         qualname = co->co_name;
2024 
2025     if (free) {
2026         for (i = 0; i < free; ++i) {
2027             /* Bypass com_addop_varname because it will generate
2028                LOAD_DEREF but LOAD_CLOSURE is needed.
2029             */
2030             PyObject *name = PyTuple_GET_ITEM(co->co_freevars, i);
2031 
2032             /* Special case: If a class contains a method with a
2033                free variable that has the same name as a method,
2034                the name will be considered free *and* local in the
2035                class.  It should be handled by the closure, as
2036                well as by the normal name lookup logic.
2037             */
2038             int reftype = get_ref_type(c, name);
2039             if (reftype == -1) {
2040                 return 0;
2041             }
2042             int arg;
2043             if (reftype == CELL) {
2044                 arg = compiler_lookup_arg(c->u->u_cellvars, name);
2045             }
2046             else {
2047                 arg = compiler_lookup_arg(c->u->u_freevars, name);
2048             }
2049             if (arg == -1) {
2050                 PyErr_Format(PyExc_SystemError,
2051                     "compiler_lookup_arg(name=%R) with reftype=%d failed in %S; "
2052                     "freevars of code %S: %R",
2053                     name,
2054                     reftype,
2055                     c->u->u_name,
2056                     co->co_name,
2057                     co->co_freevars);
2058                 return 0;
2059             }
2060             ADDOP_I(c, LOAD_CLOSURE, arg);
2061         }
2062         flags |= 0x08;
2063         ADDOP_I(c, BUILD_TUPLE, free);
2064     }
2065     ADDOP_LOAD_CONST(c, (PyObject*)co);
2066     ADDOP_LOAD_CONST(c, qualname);
2067     ADDOP_I(c, MAKE_FUNCTION, flags);
2068     return 1;
2069 }
2070 
2071 static int
compiler_decorators(struct compiler * c,asdl_expr_seq * decos)2072 compiler_decorators(struct compiler *c, asdl_expr_seq* decos)
2073 {
2074     int i;
2075 
2076     if (!decos)
2077         return 1;
2078 
2079     for (i = 0; i < asdl_seq_LEN(decos); i++) {
2080         VISIT(c, expr, (expr_ty)asdl_seq_GET(decos, i));
2081     }
2082     return 1;
2083 }
2084 
2085 static int
compiler_visit_kwonlydefaults(struct compiler * c,asdl_arg_seq * kwonlyargs,asdl_expr_seq * kw_defaults)2086 compiler_visit_kwonlydefaults(struct compiler *c, asdl_arg_seq *kwonlyargs,
2087                               asdl_expr_seq *kw_defaults)
2088 {
2089     /* Push a dict of keyword-only default values.
2090 
2091        Return 0 on error, -1 if no dict pushed, 1 if a dict is pushed.
2092        */
2093     int i;
2094     PyObject *keys = NULL;
2095 
2096     for (i = 0; i < asdl_seq_LEN(kwonlyargs); i++) {
2097         arg_ty arg = asdl_seq_GET(kwonlyargs, i);
2098         expr_ty default_ = asdl_seq_GET(kw_defaults, i);
2099         if (default_) {
2100             PyObject *mangled = _Py_Mangle(c->u->u_private, arg->arg);
2101             if (!mangled) {
2102                 goto error;
2103             }
2104             if (keys == NULL) {
2105                 keys = PyList_New(1);
2106                 if (keys == NULL) {
2107                     Py_DECREF(mangled);
2108                     return 0;
2109                 }
2110                 PyList_SET_ITEM(keys, 0, mangled);
2111             }
2112             else {
2113                 int res = PyList_Append(keys, mangled);
2114                 Py_DECREF(mangled);
2115                 if (res == -1) {
2116                     goto error;
2117                 }
2118             }
2119             if (!compiler_visit_expr(c, default_)) {
2120                 goto error;
2121             }
2122         }
2123     }
2124     if (keys != NULL) {
2125         Py_ssize_t default_count = PyList_GET_SIZE(keys);
2126         PyObject *keys_tuple = PyList_AsTuple(keys);
2127         Py_DECREF(keys);
2128         ADDOP_LOAD_CONST_NEW(c, keys_tuple);
2129         ADDOP_I(c, BUILD_CONST_KEY_MAP, default_count);
2130         assert(default_count > 0);
2131         return 1;
2132     }
2133     else {
2134         return -1;
2135     }
2136 
2137 error:
2138     Py_XDECREF(keys);
2139     return 0;
2140 }
2141 
2142 static int
compiler_visit_annexpr(struct compiler * c,expr_ty annotation)2143 compiler_visit_annexpr(struct compiler *c, expr_ty annotation)
2144 {
2145     ADDOP_LOAD_CONST_NEW(c, _PyAST_ExprAsUnicode(annotation));
2146     return 1;
2147 }
2148 
2149 static int
compiler_visit_argannotation(struct compiler * c,identifier id,expr_ty annotation,Py_ssize_t * annotations_len)2150 compiler_visit_argannotation(struct compiler *c, identifier id,
2151     expr_ty annotation, Py_ssize_t *annotations_len)
2152 {
2153     if (!annotation) {
2154         return 1;
2155     }
2156 
2157     PyObject *mangled = _Py_Mangle(c->u->u_private, id);
2158     if (!mangled) {
2159         return 0;
2160     }
2161     ADDOP_LOAD_CONST(c, mangled);
2162     Py_DECREF(mangled);
2163 
2164     if (c->c_future->ff_features & CO_FUTURE_ANNOTATIONS) {
2165         VISIT(c, annexpr, annotation)
2166     }
2167     else {
2168         VISIT(c, expr, annotation);
2169     }
2170     *annotations_len += 2;
2171     return 1;
2172 }
2173 
2174 static int
compiler_visit_argannotations(struct compiler * c,asdl_arg_seq * args,Py_ssize_t * annotations_len)2175 compiler_visit_argannotations(struct compiler *c, asdl_arg_seq* args,
2176                               Py_ssize_t *annotations_len)
2177 {
2178     int i;
2179     for (i = 0; i < asdl_seq_LEN(args); i++) {
2180         arg_ty arg = (arg_ty)asdl_seq_GET(args, i);
2181         if (!compiler_visit_argannotation(
2182                         c,
2183                         arg->arg,
2184                         arg->annotation,
2185                         annotations_len))
2186             return 0;
2187     }
2188     return 1;
2189 }
2190 
2191 static int
compiler_visit_annotations(struct compiler * c,arguments_ty args,expr_ty returns)2192 compiler_visit_annotations(struct compiler *c, arguments_ty args,
2193                            expr_ty returns)
2194 {
2195     /* Push arg annotation names and values.
2196        The expressions are evaluated out-of-order wrt the source code.
2197 
2198        Return 0 on error, -1 if no annotations pushed, 1 if a annotations is pushed.
2199        */
2200     static identifier return_str;
2201     Py_ssize_t annotations_len = 0;
2202 
2203     if (!compiler_visit_argannotations(c, args->args, &annotations_len))
2204         return 0;
2205     if (!compiler_visit_argannotations(c, args->posonlyargs, &annotations_len))
2206         return 0;
2207     if (args->vararg && args->vararg->annotation &&
2208         !compiler_visit_argannotation(c, args->vararg->arg,
2209                                      args->vararg->annotation, &annotations_len))
2210         return 0;
2211     if (!compiler_visit_argannotations(c, args->kwonlyargs, &annotations_len))
2212         return 0;
2213     if (args->kwarg && args->kwarg->annotation &&
2214         !compiler_visit_argannotation(c, args->kwarg->arg,
2215                                      args->kwarg->annotation, &annotations_len))
2216         return 0;
2217 
2218     if (!return_str) {
2219         return_str = PyUnicode_InternFromString("return");
2220         if (!return_str)
2221             return 0;
2222     }
2223     if (!compiler_visit_argannotation(c, return_str, returns, &annotations_len)) {
2224         return 0;
2225     }
2226 
2227     if (annotations_len) {
2228         ADDOP_I(c, BUILD_TUPLE, annotations_len);
2229         return 1;
2230     }
2231 
2232     return -1;
2233 }
2234 
2235 static int
compiler_visit_defaults(struct compiler * c,arguments_ty args)2236 compiler_visit_defaults(struct compiler *c, arguments_ty args)
2237 {
2238     VISIT_SEQ(c, expr, args->defaults);
2239     ADDOP_I(c, BUILD_TUPLE, asdl_seq_LEN(args->defaults));
2240     return 1;
2241 }
2242 
2243 static Py_ssize_t
compiler_default_arguments(struct compiler * c,arguments_ty args)2244 compiler_default_arguments(struct compiler *c, arguments_ty args)
2245 {
2246     Py_ssize_t funcflags = 0;
2247     if (args->defaults && asdl_seq_LEN(args->defaults) > 0) {
2248         if (!compiler_visit_defaults(c, args))
2249             return -1;
2250         funcflags |= 0x01;
2251     }
2252     if (args->kwonlyargs) {
2253         int res = compiler_visit_kwonlydefaults(c, args->kwonlyargs,
2254                                                 args->kw_defaults);
2255         if (res == 0) {
2256             return -1;
2257         }
2258         else if (res > 0) {
2259             funcflags |= 0x02;
2260         }
2261     }
2262     return funcflags;
2263 }
2264 
2265 static int
forbidden_name(struct compiler * c,identifier name,expr_context_ty ctx)2266 forbidden_name(struct compiler *c, identifier name, expr_context_ty ctx)
2267 {
2268 
2269     if (ctx == Store && _PyUnicode_EqualToASCIIString(name, "__debug__")) {
2270         compiler_error(c, "cannot assign to __debug__");
2271         return 1;
2272     }
2273     if (ctx == Del && _PyUnicode_EqualToASCIIString(name, "__debug__")) {
2274         compiler_error(c, "cannot delete __debug__");
2275         return 1;
2276     }
2277     return 0;
2278 }
2279 
2280 static int
compiler_check_debug_one_arg(struct compiler * c,arg_ty arg)2281 compiler_check_debug_one_arg(struct compiler *c, arg_ty arg)
2282 {
2283     if (arg != NULL) {
2284         if (forbidden_name(c, arg->arg, Store))
2285             return 0;
2286     }
2287     return 1;
2288 }
2289 
2290 static int
compiler_check_debug_args_seq(struct compiler * c,asdl_arg_seq * args)2291 compiler_check_debug_args_seq(struct compiler *c, asdl_arg_seq *args)
2292 {
2293     if (args != NULL) {
2294         for (Py_ssize_t i = 0, n = asdl_seq_LEN(args); i < n; i++) {
2295             if (!compiler_check_debug_one_arg(c, asdl_seq_GET(args, i)))
2296                 return 0;
2297         }
2298     }
2299     return 1;
2300 }
2301 
2302 static int
compiler_check_debug_args(struct compiler * c,arguments_ty args)2303 compiler_check_debug_args(struct compiler *c, arguments_ty args)
2304 {
2305     if (!compiler_check_debug_args_seq(c, args->posonlyargs))
2306         return 0;
2307     if (!compiler_check_debug_args_seq(c, args->args))
2308         return 0;
2309     if (!compiler_check_debug_one_arg(c, args->vararg))
2310         return 0;
2311     if (!compiler_check_debug_args_seq(c, args->kwonlyargs))
2312         return 0;
2313     if (!compiler_check_debug_one_arg(c, args->kwarg))
2314         return 0;
2315     return 1;
2316 }
2317 
2318 static int
compiler_function(struct compiler * c,stmt_ty s,int is_async)2319 compiler_function(struct compiler *c, stmt_ty s, int is_async)
2320 {
2321     PyCodeObject *co;
2322     PyObject *qualname, *docstring = NULL;
2323     arguments_ty args;
2324     expr_ty returns;
2325     identifier name;
2326     asdl_expr_seq* decos;
2327     asdl_stmt_seq *body;
2328     Py_ssize_t i, funcflags;
2329     int annotations;
2330     int scope_type;
2331     int firstlineno;
2332 
2333     if (is_async) {
2334         assert(s->kind == AsyncFunctionDef_kind);
2335 
2336         args = s->v.AsyncFunctionDef.args;
2337         returns = s->v.AsyncFunctionDef.returns;
2338         decos = s->v.AsyncFunctionDef.decorator_list;
2339         name = s->v.AsyncFunctionDef.name;
2340         body = s->v.AsyncFunctionDef.body;
2341 
2342         scope_type = COMPILER_SCOPE_ASYNC_FUNCTION;
2343     } else {
2344         assert(s->kind == FunctionDef_kind);
2345 
2346         args = s->v.FunctionDef.args;
2347         returns = s->v.FunctionDef.returns;
2348         decos = s->v.FunctionDef.decorator_list;
2349         name = s->v.FunctionDef.name;
2350         body = s->v.FunctionDef.body;
2351 
2352         scope_type = COMPILER_SCOPE_FUNCTION;
2353     }
2354 
2355     if (!compiler_check_debug_args(c, args))
2356         return 0;
2357 
2358     if (!compiler_decorators(c, decos))
2359         return 0;
2360 
2361     firstlineno = s->lineno;
2362     if (asdl_seq_LEN(decos)) {
2363         firstlineno = ((expr_ty)asdl_seq_GET(decos, 0))->lineno;
2364     }
2365 
2366     funcflags = compiler_default_arguments(c, args);
2367     if (funcflags == -1) {
2368         return 0;
2369     }
2370 
2371     annotations = compiler_visit_annotations(c, args, returns);
2372     if (annotations == 0) {
2373         return 0;
2374     }
2375     else if (annotations > 0) {
2376         funcflags |= 0x04;
2377     }
2378 
2379     if (!compiler_enter_scope(c, name, scope_type, (void *)s, firstlineno)) {
2380         return 0;
2381     }
2382 
2383     /* if not -OO mode, add docstring */
2384     if (c->c_optimize < 2) {
2385         docstring = _PyAST_GetDocString(body);
2386     }
2387     if (compiler_add_const(c, docstring ? docstring : Py_None) < 0) {
2388         compiler_exit_scope(c);
2389         return 0;
2390     }
2391 
2392     c->u->u_argcount = asdl_seq_LEN(args->args);
2393     c->u->u_posonlyargcount = asdl_seq_LEN(args->posonlyargs);
2394     c->u->u_kwonlyargcount = asdl_seq_LEN(args->kwonlyargs);
2395     for (i = docstring ? 1 : 0; i < asdl_seq_LEN(body); i++) {
2396         VISIT_IN_SCOPE(c, stmt, (stmt_ty)asdl_seq_GET(body, i));
2397     }
2398     co = assemble(c, 1);
2399     qualname = c->u->u_qualname;
2400     Py_INCREF(qualname);
2401     compiler_exit_scope(c);
2402     if (co == NULL) {
2403         Py_XDECREF(qualname);
2404         Py_XDECREF(co);
2405         return 0;
2406     }
2407 
2408     if (!compiler_make_closure(c, co, funcflags, qualname)) {
2409         Py_DECREF(qualname);
2410         Py_DECREF(co);
2411         return 0;
2412     }
2413     Py_DECREF(qualname);
2414     Py_DECREF(co);
2415 
2416     /* decorators */
2417     for (i = 0; i < asdl_seq_LEN(decos); i++) {
2418         ADDOP_I(c, CALL_FUNCTION, 1);
2419     }
2420 
2421     return compiler_nameop(c, name, Store);
2422 }
2423 
2424 static int
compiler_class(struct compiler * c,stmt_ty s)2425 compiler_class(struct compiler *c, stmt_ty s)
2426 {
2427     PyCodeObject *co;
2428     PyObject *str;
2429     int i, firstlineno;
2430     asdl_expr_seq *decos = s->v.ClassDef.decorator_list;
2431 
2432     if (!compiler_decorators(c, decos))
2433         return 0;
2434 
2435     firstlineno = s->lineno;
2436     if (asdl_seq_LEN(decos)) {
2437         firstlineno = ((expr_ty)asdl_seq_GET(decos, 0))->lineno;
2438     }
2439 
2440     /* ultimately generate code for:
2441          <name> = __build_class__(<func>, <name>, *<bases>, **<keywords>)
2442        where:
2443          <func> is a zero arg function/closure created from the class body.
2444             It mutates its locals to build the class namespace.
2445          <name> is the class name
2446          <bases> is the positional arguments and *varargs argument
2447          <keywords> is the keyword arguments and **kwds argument
2448        This borrows from compiler_call.
2449     */
2450 
2451     /* 1. compile the class body into a code object */
2452     if (!compiler_enter_scope(c, s->v.ClassDef.name,
2453                               COMPILER_SCOPE_CLASS, (void *)s, firstlineno))
2454         return 0;
2455     /* this block represents what we do in the new scope */
2456     {
2457         /* use the class name for name mangling */
2458         Py_INCREF(s->v.ClassDef.name);
2459         Py_XSETREF(c->u->u_private, s->v.ClassDef.name);
2460         /* load (global) __name__ ... */
2461         str = PyUnicode_InternFromString("__name__");
2462         if (!str || !compiler_nameop(c, str, Load)) {
2463             Py_XDECREF(str);
2464             compiler_exit_scope(c);
2465             return 0;
2466         }
2467         Py_DECREF(str);
2468         /* ... and store it as __module__ */
2469         str = PyUnicode_InternFromString("__module__");
2470         if (!str || !compiler_nameop(c, str, Store)) {
2471             Py_XDECREF(str);
2472             compiler_exit_scope(c);
2473             return 0;
2474         }
2475         Py_DECREF(str);
2476         assert(c->u->u_qualname);
2477         ADDOP_LOAD_CONST(c, c->u->u_qualname);
2478         str = PyUnicode_InternFromString("__qualname__");
2479         if (!str || !compiler_nameop(c, str, Store)) {
2480             Py_XDECREF(str);
2481             compiler_exit_scope(c);
2482             return 0;
2483         }
2484         Py_DECREF(str);
2485         /* compile the body proper */
2486         if (!compiler_body(c, s->v.ClassDef.body)) {
2487             compiler_exit_scope(c);
2488             return 0;
2489         }
2490         /* The following code is artificial */
2491         c->u->u_lineno = -1;
2492         /* Return __classcell__ if it is referenced, otherwise return None */
2493         if (c->u->u_ste->ste_needs_class_closure) {
2494             /* Store __classcell__ into class namespace & return it */
2495             str = PyUnicode_InternFromString("__class__");
2496             if (str == NULL) {
2497                 compiler_exit_scope(c);
2498                 return 0;
2499             }
2500             i = compiler_lookup_arg(c->u->u_cellvars, str);
2501             Py_DECREF(str);
2502             if (i < 0) {
2503                 compiler_exit_scope(c);
2504                 return 0;
2505             }
2506             assert(i == 0);
2507 
2508             ADDOP_I(c, LOAD_CLOSURE, i);
2509             ADDOP(c, DUP_TOP);
2510             str = PyUnicode_InternFromString("__classcell__");
2511             if (!str || !compiler_nameop(c, str, Store)) {
2512                 Py_XDECREF(str);
2513                 compiler_exit_scope(c);
2514                 return 0;
2515             }
2516             Py_DECREF(str);
2517         }
2518         else {
2519             /* No methods referenced __class__, so just return None */
2520             assert(PyDict_GET_SIZE(c->u->u_cellvars) == 0);
2521             ADDOP_LOAD_CONST(c, Py_None);
2522         }
2523         ADDOP_IN_SCOPE(c, RETURN_VALUE);
2524         /* create the code object */
2525         co = assemble(c, 1);
2526     }
2527     /* leave the new scope */
2528     compiler_exit_scope(c);
2529     if (co == NULL)
2530         return 0;
2531 
2532     /* 2. load the 'build_class' function */
2533     ADDOP(c, LOAD_BUILD_CLASS);
2534 
2535     /* 3. load a function (or closure) made from the code object */
2536     if (!compiler_make_closure(c, co, 0, NULL)) {
2537         Py_DECREF(co);
2538         return 0;
2539     }
2540     Py_DECREF(co);
2541 
2542     /* 4. load class name */
2543     ADDOP_LOAD_CONST(c, s->v.ClassDef.name);
2544 
2545     /* 5. generate the rest of the code for the call */
2546     if (!compiler_call_helper(c, 2, s->v.ClassDef.bases, s->v.ClassDef.keywords))
2547         return 0;
2548 
2549     /* 6. apply decorators */
2550     for (i = 0; i < asdl_seq_LEN(decos); i++) {
2551         ADDOP_I(c, CALL_FUNCTION, 1);
2552     }
2553 
2554     /* 7. store into <name> */
2555     if (!compiler_nameop(c, s->v.ClassDef.name, Store))
2556         return 0;
2557     return 1;
2558 }
2559 
2560 /* Return 0 if the expression is a constant value except named singletons.
2561    Return 1 otherwise. */
2562 static int
check_is_arg(expr_ty e)2563 check_is_arg(expr_ty e)
2564 {
2565     if (e->kind != Constant_kind) {
2566         return 1;
2567     }
2568     PyObject *value = e->v.Constant.value;
2569     return (value == Py_None
2570          || value == Py_False
2571          || value == Py_True
2572          || value == Py_Ellipsis);
2573 }
2574 
2575 /* Check operands of identity chacks ("is" and "is not").
2576    Emit a warning if any operand is a constant except named singletons.
2577    Return 0 on error.
2578  */
2579 static int
check_compare(struct compiler * c,expr_ty e)2580 check_compare(struct compiler *c, expr_ty e)
2581 {
2582     Py_ssize_t i, n;
2583     int left = check_is_arg(e->v.Compare.left);
2584     n = asdl_seq_LEN(e->v.Compare.ops);
2585     for (i = 0; i < n; i++) {
2586         cmpop_ty op = (cmpop_ty)asdl_seq_GET(e->v.Compare.ops, i);
2587         int right = check_is_arg((expr_ty)asdl_seq_GET(e->v.Compare.comparators, i));
2588         if (op == Is || op == IsNot) {
2589             if (!right || !left) {
2590                 const char *msg = (op == Is)
2591                         ? "\"is\" with a literal. Did you mean \"==\"?"
2592                         : "\"is not\" with a literal. Did you mean \"!=\"?";
2593                 return compiler_warn(c, msg);
2594             }
2595         }
2596         left = right;
2597     }
2598     return 1;
2599 }
2600 
compiler_addcompare(struct compiler * c,cmpop_ty op)2601 static int compiler_addcompare(struct compiler *c, cmpop_ty op)
2602 {
2603     int cmp;
2604     switch (op) {
2605     case Eq:
2606         cmp = Py_EQ;
2607         break;
2608     case NotEq:
2609         cmp = Py_NE;
2610         break;
2611     case Lt:
2612         cmp = Py_LT;
2613         break;
2614     case LtE:
2615         cmp = Py_LE;
2616         break;
2617     case Gt:
2618         cmp = Py_GT;
2619         break;
2620     case GtE:
2621         cmp = Py_GE;
2622         break;
2623     case Is:
2624         ADDOP_I(c, IS_OP, 0);
2625         return 1;
2626     case IsNot:
2627         ADDOP_I(c, IS_OP, 1);
2628         return 1;
2629     case In:
2630         ADDOP_I(c, CONTAINS_OP, 0);
2631         return 1;
2632     case NotIn:
2633         ADDOP_I(c, CONTAINS_OP, 1);
2634         return 1;
2635     default:
2636         Py_UNREACHABLE();
2637     }
2638     ADDOP_I(c, COMPARE_OP, cmp);
2639     return 1;
2640 }
2641 
2642 
2643 
2644 static int
compiler_jump_if(struct compiler * c,expr_ty e,basicblock * next,int cond)2645 compiler_jump_if(struct compiler *c, expr_ty e, basicblock *next, int cond)
2646 {
2647     switch (e->kind) {
2648     case UnaryOp_kind:
2649         if (e->v.UnaryOp.op == Not)
2650             return compiler_jump_if(c, e->v.UnaryOp.operand, next, !cond);
2651         /* fallback to general implementation */
2652         break;
2653     case BoolOp_kind: {
2654         asdl_expr_seq *s = e->v.BoolOp.values;
2655         Py_ssize_t i, n = asdl_seq_LEN(s) - 1;
2656         assert(n >= 0);
2657         int cond2 = e->v.BoolOp.op == Or;
2658         basicblock *next2 = next;
2659         if (!cond2 != !cond) {
2660             next2 = compiler_new_block(c);
2661             if (next2 == NULL)
2662                 return 0;
2663         }
2664         for (i = 0; i < n; ++i) {
2665             if (!compiler_jump_if(c, (expr_ty)asdl_seq_GET(s, i), next2, cond2))
2666                 return 0;
2667         }
2668         if (!compiler_jump_if(c, (expr_ty)asdl_seq_GET(s, n), next, cond))
2669             return 0;
2670         if (next2 != next)
2671             compiler_use_next_block(c, next2);
2672         return 1;
2673     }
2674     case IfExp_kind: {
2675         basicblock *end, *next2;
2676         end = compiler_new_block(c);
2677         if (end == NULL)
2678             return 0;
2679         next2 = compiler_new_block(c);
2680         if (next2 == NULL)
2681             return 0;
2682         if (!compiler_jump_if(c, e->v.IfExp.test, next2, 0))
2683             return 0;
2684         if (!compiler_jump_if(c, e->v.IfExp.body, next, cond))
2685             return 0;
2686         ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, end);
2687         compiler_use_next_block(c, next2);
2688         if (!compiler_jump_if(c, e->v.IfExp.orelse, next, cond))
2689             return 0;
2690         compiler_use_next_block(c, end);
2691         return 1;
2692     }
2693     case Compare_kind: {
2694         Py_ssize_t i, n = asdl_seq_LEN(e->v.Compare.ops) - 1;
2695         if (n > 0) {
2696             if (!check_compare(c, e)) {
2697                 return 0;
2698             }
2699             basicblock *cleanup = compiler_new_block(c);
2700             if (cleanup == NULL)
2701                 return 0;
2702             VISIT(c, expr, e->v.Compare.left);
2703             for (i = 0; i < n; i++) {
2704                 VISIT(c, expr,
2705                     (expr_ty)asdl_seq_GET(e->v.Compare.comparators, i));
2706                 ADDOP(c, DUP_TOP);
2707                 ADDOP(c, ROT_THREE);
2708                 ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, i));
2709                 ADDOP_JUMP(c, POP_JUMP_IF_FALSE, cleanup);
2710                 NEXT_BLOCK(c);
2711             }
2712             VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Compare.comparators, n));
2713             ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, n));
2714             ADDOP_JUMP(c, cond ? POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE, next);
2715             NEXT_BLOCK(c);
2716             basicblock *end = compiler_new_block(c);
2717             if (end == NULL)
2718                 return 0;
2719             ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, end);
2720             compiler_use_next_block(c, cleanup);
2721             ADDOP(c, POP_TOP);
2722             if (!cond) {
2723                 ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, next);
2724             }
2725             compiler_use_next_block(c, end);
2726             return 1;
2727         }
2728         /* fallback to general implementation */
2729         break;
2730     }
2731     default:
2732         /* fallback to general implementation */
2733         break;
2734     }
2735 
2736     /* general implementation */
2737     VISIT(c, expr, e);
2738     ADDOP_JUMP(c, cond ? POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE, next);
2739     NEXT_BLOCK(c);
2740     return 1;
2741 }
2742 
2743 static int
compiler_ifexp(struct compiler * c,expr_ty e)2744 compiler_ifexp(struct compiler *c, expr_ty e)
2745 {
2746     basicblock *end, *next;
2747 
2748     assert(e->kind == IfExp_kind);
2749     end = compiler_new_block(c);
2750     if (end == NULL)
2751         return 0;
2752     next = compiler_new_block(c);
2753     if (next == NULL)
2754         return 0;
2755     if (!compiler_jump_if(c, e->v.IfExp.test, next, 0))
2756         return 0;
2757     VISIT(c, expr, e->v.IfExp.body);
2758     ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, end);
2759     compiler_use_next_block(c, next);
2760     VISIT(c, expr, e->v.IfExp.orelse);
2761     compiler_use_next_block(c, end);
2762     return 1;
2763 }
2764 
2765 static int
compiler_lambda(struct compiler * c,expr_ty e)2766 compiler_lambda(struct compiler *c, expr_ty e)
2767 {
2768     PyCodeObject *co;
2769     PyObject *qualname;
2770     static identifier name;
2771     Py_ssize_t funcflags;
2772     arguments_ty args = e->v.Lambda.args;
2773     assert(e->kind == Lambda_kind);
2774 
2775     if (!compiler_check_debug_args(c, args))
2776         return 0;
2777 
2778     if (!name) {
2779         name = PyUnicode_InternFromString("<lambda>");
2780         if (!name)
2781             return 0;
2782     }
2783 
2784     funcflags = compiler_default_arguments(c, args);
2785     if (funcflags == -1) {
2786         return 0;
2787     }
2788 
2789     if (!compiler_enter_scope(c, name, COMPILER_SCOPE_LAMBDA,
2790                               (void *)e, e->lineno))
2791         return 0;
2792 
2793     /* Make None the first constant, so the lambda can't have a
2794        docstring. */
2795     if (compiler_add_const(c, Py_None) < 0)
2796         return 0;
2797 
2798     c->u->u_argcount = asdl_seq_LEN(args->args);
2799     c->u->u_posonlyargcount = asdl_seq_LEN(args->posonlyargs);
2800     c->u->u_kwonlyargcount = asdl_seq_LEN(args->kwonlyargs);
2801     VISIT_IN_SCOPE(c, expr, e->v.Lambda.body);
2802     if (c->u->u_ste->ste_generator) {
2803         co = assemble(c, 0);
2804     }
2805     else {
2806         ADDOP_IN_SCOPE(c, RETURN_VALUE);
2807         co = assemble(c, 1);
2808     }
2809     qualname = c->u->u_qualname;
2810     Py_INCREF(qualname);
2811     compiler_exit_scope(c);
2812     if (co == NULL) {
2813         Py_DECREF(qualname);
2814         return 0;
2815     }
2816 
2817     if (!compiler_make_closure(c, co, funcflags, qualname)) {
2818         Py_DECREF(qualname);
2819         Py_DECREF(co);
2820         return 0;
2821     }
2822     Py_DECREF(qualname);
2823     Py_DECREF(co);
2824 
2825     return 1;
2826 }
2827 
2828 static int
compiler_if(struct compiler * c,stmt_ty s)2829 compiler_if(struct compiler *c, stmt_ty s)
2830 {
2831     basicblock *end, *next;
2832     assert(s->kind == If_kind);
2833     end = compiler_new_block(c);
2834     if (end == NULL) {
2835         return 0;
2836     }
2837     if (asdl_seq_LEN(s->v.If.orelse)) {
2838         next = compiler_new_block(c);
2839         if (next == NULL) {
2840             return 0;
2841         }
2842     }
2843     else {
2844         next = end;
2845     }
2846     if (!compiler_jump_if(c, s->v.If.test, next, 0)) {
2847         return 0;
2848     }
2849     VISIT_SEQ(c, stmt, s->v.If.body);
2850     if (asdl_seq_LEN(s->v.If.orelse)) {
2851         ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, end);
2852         compiler_use_next_block(c, next);
2853         VISIT_SEQ(c, stmt, s->v.If.orelse);
2854     }
2855     compiler_use_next_block(c, end);
2856     return 1;
2857 }
2858 
2859 static int
compiler_for(struct compiler * c,stmt_ty s)2860 compiler_for(struct compiler *c, stmt_ty s)
2861 {
2862     basicblock *start, *body, *cleanup, *end;
2863 
2864     start = compiler_new_block(c);
2865     body = compiler_new_block(c);
2866     cleanup = compiler_new_block(c);
2867     end = compiler_new_block(c);
2868     if (start == NULL || body == NULL || end == NULL || cleanup == NULL) {
2869         return 0;
2870     }
2871     if (!compiler_push_fblock(c, FOR_LOOP, start, end, NULL)) {
2872         return 0;
2873     }
2874     VISIT(c, expr, s->v.For.iter);
2875     ADDOP(c, GET_ITER);
2876     compiler_use_next_block(c, start);
2877     ADDOP_JUMP(c, FOR_ITER, cleanup);
2878     compiler_use_next_block(c, body);
2879     VISIT(c, expr, s->v.For.target);
2880     VISIT_SEQ(c, stmt, s->v.For.body);
2881     /* Mark jump as artificial */
2882     c->u->u_lineno = -1;
2883     ADDOP_JUMP(c, JUMP_ABSOLUTE, start);
2884     compiler_use_next_block(c, cleanup);
2885 
2886     compiler_pop_fblock(c, FOR_LOOP, start);
2887 
2888     VISIT_SEQ(c, stmt, s->v.For.orelse);
2889     compiler_use_next_block(c, end);
2890     return 1;
2891 }
2892 
2893 
2894 static int
compiler_async_for(struct compiler * c,stmt_ty s)2895 compiler_async_for(struct compiler *c, stmt_ty s)
2896 {
2897     basicblock *start, *except, *end;
2898     if (IS_TOP_LEVEL_AWAIT(c)){
2899         c->u->u_ste->ste_coroutine = 1;
2900     } else if (c->u->u_scope_type != COMPILER_SCOPE_ASYNC_FUNCTION) {
2901         return compiler_error(c, "'async for' outside async function");
2902     }
2903 
2904     start = compiler_new_block(c);
2905     except = compiler_new_block(c);
2906     end = compiler_new_block(c);
2907 
2908     if (start == NULL || except == NULL || end == NULL) {
2909         return 0;
2910     }
2911     VISIT(c, expr, s->v.AsyncFor.iter);
2912     ADDOP(c, GET_AITER);
2913 
2914     compiler_use_next_block(c, start);
2915     if (!compiler_push_fblock(c, FOR_LOOP, start, end, NULL)) {
2916         return 0;
2917     }
2918     /* SETUP_FINALLY to guard the __anext__ call */
2919     ADDOP_JUMP(c, SETUP_FINALLY, except);
2920     ADDOP(c, GET_ANEXT);
2921     ADDOP_LOAD_CONST(c, Py_None);
2922     ADDOP(c, YIELD_FROM);
2923     ADDOP(c, POP_BLOCK);  /* for SETUP_FINALLY */
2924 
2925     /* Success block for __anext__ */
2926     VISIT(c, expr, s->v.AsyncFor.target);
2927     VISIT_SEQ(c, stmt, s->v.AsyncFor.body);
2928     ADDOP_JUMP(c, JUMP_ABSOLUTE, start);
2929 
2930     compiler_pop_fblock(c, FOR_LOOP, start);
2931 
2932     /* Except block for __anext__ */
2933     compiler_use_next_block(c, except);
2934 
2935     /* Use same line number as the iterator,
2936      * as the END_ASYNC_FOR succeeds the `for`, not the body. */
2937     SET_LOC(c, s->v.AsyncFor.iter);
2938     ADDOP(c, END_ASYNC_FOR);
2939 
2940     /* `else` block */
2941     VISIT_SEQ(c, stmt, s->v.For.orelse);
2942 
2943     compiler_use_next_block(c, end);
2944 
2945     return 1;
2946 }
2947 
2948 static int
compiler_while(struct compiler * c,stmt_ty s)2949 compiler_while(struct compiler *c, stmt_ty s)
2950 {
2951     basicblock *loop, *body, *end, *anchor = NULL;
2952     loop = compiler_new_block(c);
2953     body = compiler_new_block(c);
2954     anchor = compiler_new_block(c);
2955     end = compiler_new_block(c);
2956     if (loop == NULL || body == NULL || anchor == NULL || end == NULL) {
2957         return 0;
2958     }
2959     compiler_use_next_block(c, loop);
2960     if (!compiler_push_fblock(c, WHILE_LOOP, loop, end, NULL)) {
2961         return 0;
2962     }
2963     if (!compiler_jump_if(c, s->v.While.test, anchor, 0)) {
2964         return 0;
2965     }
2966 
2967     compiler_use_next_block(c, body);
2968     VISIT_SEQ(c, stmt, s->v.While.body);
2969     SET_LOC(c, s);
2970     if (!compiler_jump_if(c, s->v.While.test, body, 1)) {
2971         return 0;
2972     }
2973 
2974     compiler_pop_fblock(c, WHILE_LOOP, loop);
2975 
2976     compiler_use_next_block(c, anchor);
2977     if (s->v.While.orelse) {
2978         VISIT_SEQ(c, stmt, s->v.While.orelse);
2979     }
2980     compiler_use_next_block(c, end);
2981 
2982     return 1;
2983 }
2984 
2985 static int
compiler_return(struct compiler * c,stmt_ty s)2986 compiler_return(struct compiler *c, stmt_ty s)
2987 {
2988     int preserve_tos = ((s->v.Return.value != NULL) &&
2989                         (s->v.Return.value->kind != Constant_kind));
2990     if (c->u->u_ste->ste_type != FunctionBlock)
2991         return compiler_error(c, "'return' outside function");
2992     if (s->v.Return.value != NULL &&
2993         c->u->u_ste->ste_coroutine && c->u->u_ste->ste_generator)
2994     {
2995             return compiler_error(
2996                 c, "'return' with value in async generator");
2997     }
2998     if (preserve_tos) {
2999         VISIT(c, expr, s->v.Return.value);
3000     } else {
3001         /* Emit instruction with line number for return value */
3002         if (s->v.Return.value != NULL) {
3003             SET_LOC(c, s->v.Return.value);
3004             ADDOP(c, NOP);
3005         }
3006     }
3007     if (s->v.Return.value == NULL || s->v.Return.value->lineno != s->lineno) {
3008         SET_LOC(c, s);
3009         ADDOP(c, NOP);
3010     }
3011 
3012     if (!compiler_unwind_fblock_stack(c, preserve_tos, NULL))
3013         return 0;
3014     if (s->v.Return.value == NULL) {
3015         ADDOP_LOAD_CONST(c, Py_None);
3016     }
3017     else if (!preserve_tos) {
3018         ADDOP_LOAD_CONST(c, s->v.Return.value->v.Constant.value);
3019     }
3020     ADDOP(c, RETURN_VALUE);
3021     NEXT_BLOCK(c);
3022 
3023     return 1;
3024 }
3025 
3026 static int
compiler_break(struct compiler * c)3027 compiler_break(struct compiler *c)
3028 {
3029     struct fblockinfo *loop = NULL;
3030     /* Emit instruction with line number */
3031     ADDOP(c, NOP);
3032     if (!compiler_unwind_fblock_stack(c, 0, &loop)) {
3033         return 0;
3034     }
3035     if (loop == NULL) {
3036         return compiler_error(c, "'break' outside loop");
3037     }
3038     if (!compiler_unwind_fblock(c, loop, 0)) {
3039         return 0;
3040     }
3041     ADDOP_JUMP(c, JUMP_ABSOLUTE, loop->fb_exit);
3042     NEXT_BLOCK(c);
3043     return 1;
3044 }
3045 
3046 static int
compiler_continue(struct compiler * c)3047 compiler_continue(struct compiler *c)
3048 {
3049     struct fblockinfo *loop = NULL;
3050     /* Emit instruction with line number */
3051     ADDOP(c, NOP);
3052     if (!compiler_unwind_fblock_stack(c, 0, &loop)) {
3053         return 0;
3054     }
3055     if (loop == NULL) {
3056         return compiler_error(c, "'continue' not properly in loop");
3057     }
3058     ADDOP_JUMP(c, JUMP_ABSOLUTE, loop->fb_block);
3059     NEXT_BLOCK(c)
3060     return 1;
3061 }
3062 
3063 
3064 /* Code generated for "try: <body> finally: <finalbody>" is as follows:
3065 
3066         SETUP_FINALLY           L
3067         <code for body>
3068         POP_BLOCK
3069         <code for finalbody>
3070         JUMP E
3071     L:
3072         <code for finalbody>
3073     E:
3074 
3075    The special instructions use the block stack.  Each block
3076    stack entry contains the instruction that created it (here
3077    SETUP_FINALLY), the level of the value stack at the time the
3078    block stack entry was created, and a label (here L).
3079 
3080    SETUP_FINALLY:
3081     Pushes the current value stack level and the label
3082     onto the block stack.
3083    POP_BLOCK:
3084     Pops en entry from the block stack.
3085 
3086    The block stack is unwound when an exception is raised:
3087    when a SETUP_FINALLY entry is found, the raised and the caught
3088    exceptions are pushed onto the value stack (and the exception
3089    condition is cleared), and the interpreter jumps to the label
3090    gotten from the block stack.
3091 */
3092 
3093 static int
compiler_try_finally(struct compiler * c,stmt_ty s)3094 compiler_try_finally(struct compiler *c, stmt_ty s)
3095 {
3096     basicblock *body, *end, *exit;
3097 
3098     body = compiler_new_block(c);
3099     end = compiler_new_block(c);
3100     exit = compiler_new_block(c);
3101     if (body == NULL || end == NULL || exit == NULL)
3102         return 0;
3103 
3104     /* `try` block */
3105     ADDOP_JUMP(c, SETUP_FINALLY, end);
3106     compiler_use_next_block(c, body);
3107     if (!compiler_push_fblock(c, FINALLY_TRY, body, end, s->v.Try.finalbody))
3108         return 0;
3109     if (s->v.Try.handlers && asdl_seq_LEN(s->v.Try.handlers)) {
3110         if (!compiler_try_except(c, s))
3111             return 0;
3112     }
3113     else {
3114         VISIT_SEQ(c, stmt, s->v.Try.body);
3115     }
3116     ADDOP_NOLINE(c, POP_BLOCK);
3117     compiler_pop_fblock(c, FINALLY_TRY, body);
3118     VISIT_SEQ(c, stmt, s->v.Try.finalbody);
3119     ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, exit);
3120     /* `finally` block */
3121     compiler_use_next_block(c, end);
3122     if (!compiler_push_fblock(c, FINALLY_END, end, NULL, NULL))
3123         return 0;
3124     VISIT_SEQ(c, stmt, s->v.Try.finalbody);
3125     compiler_pop_fblock(c, FINALLY_END, end);
3126     ADDOP_I(c, RERAISE, 0);
3127     compiler_use_next_block(c, exit);
3128     return 1;
3129 }
3130 
3131 /*
3132    Code generated for "try: S except E1 as V1: S1 except E2 as V2: S2 ...":
3133    (The contents of the value stack is shown in [], with the top
3134    at the right; 'tb' is trace-back info, 'val' the exception's
3135    associated value, and 'exc' the exception.)
3136 
3137    Value stack          Label   Instruction     Argument
3138    []                           SETUP_FINALLY   L1
3139    []                           <code for S>
3140    []                           POP_BLOCK
3141    []                           JUMP_FORWARD    L0
3142 
3143    [tb, val, exc]       L1:     DUP                             )
3144    [tb, val, exc, exc]          <evaluate E1>                   )
3145    [tb, val, exc, exc, E1]      JUMP_IF_NOT_EXC_MATCH L2        ) only if E1
3146    [tb, val, exc]               POP
3147    [tb, val]                    <assign to V1>  (or POP if no V1)
3148    [tb]                         POP
3149    []                           <code for S1>
3150                                 JUMP_FORWARD    L0
3151 
3152    [tb, val, exc]       L2:     DUP
3153    .............................etc.......................
3154 
3155    [tb, val, exc]       Ln+1:   RERAISE     # re-raise exception
3156 
3157    []                   L0:     <next statement>
3158 
3159    Of course, parts are not generated if Vi or Ei is not present.
3160 */
3161 static int
compiler_try_except(struct compiler * c,stmt_ty s)3162 compiler_try_except(struct compiler *c, stmt_ty s)
3163 {
3164     basicblock *body, *orelse, *except, *end;
3165     Py_ssize_t i, n;
3166 
3167     body = compiler_new_block(c);
3168     except = compiler_new_block(c);
3169     orelse = compiler_new_block(c);
3170     end = compiler_new_block(c);
3171     if (body == NULL || except == NULL || orelse == NULL || end == NULL)
3172         return 0;
3173     ADDOP_JUMP(c, SETUP_FINALLY, except);
3174     compiler_use_next_block(c, body);
3175     if (!compiler_push_fblock(c, TRY_EXCEPT, body, NULL, NULL))
3176         return 0;
3177     VISIT_SEQ(c, stmt, s->v.Try.body);
3178     compiler_pop_fblock(c, TRY_EXCEPT, body);
3179     ADDOP_NOLINE(c, POP_BLOCK);
3180     ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, orelse);
3181     n = asdl_seq_LEN(s->v.Try.handlers);
3182     compiler_use_next_block(c, except);
3183     /* Runtime will push a block here, so we need to account for that */
3184     if (!compiler_push_fblock(c, EXCEPTION_HANDLER, NULL, NULL, NULL))
3185         return 0;
3186     for (i = 0; i < n; i++) {
3187         excepthandler_ty handler = (excepthandler_ty)asdl_seq_GET(
3188             s->v.Try.handlers, i);
3189         SET_LOC(c, handler);
3190         if (!handler->v.ExceptHandler.type && i < n-1)
3191             return compiler_error(c, "default 'except:' must be last");
3192         except = compiler_new_block(c);
3193         if (except == NULL)
3194             return 0;
3195         if (handler->v.ExceptHandler.type) {
3196             ADDOP(c, DUP_TOP);
3197             VISIT(c, expr, handler->v.ExceptHandler.type);
3198             ADDOP_JUMP(c, JUMP_IF_NOT_EXC_MATCH, except);
3199             NEXT_BLOCK(c);
3200         }
3201         ADDOP(c, POP_TOP);
3202         if (handler->v.ExceptHandler.name) {
3203             basicblock *cleanup_end, *cleanup_body;
3204 
3205             cleanup_end = compiler_new_block(c);
3206             cleanup_body = compiler_new_block(c);
3207             if (cleanup_end == NULL || cleanup_body == NULL) {
3208                 return 0;
3209             }
3210 
3211             compiler_nameop(c, handler->v.ExceptHandler.name, Store);
3212             ADDOP(c, POP_TOP);
3213 
3214             /*
3215               try:
3216                   # body
3217               except type as name:
3218                   try:
3219                       # body
3220                   finally:
3221                       name = None # in case body contains "del name"
3222                       del name
3223             */
3224 
3225             /* second try: */
3226             ADDOP_JUMP(c, SETUP_FINALLY, cleanup_end);
3227             compiler_use_next_block(c, cleanup_body);
3228             if (!compiler_push_fblock(c, HANDLER_CLEANUP, cleanup_body, NULL, handler->v.ExceptHandler.name))
3229                 return 0;
3230 
3231             /* second # body */
3232             VISIT_SEQ(c, stmt, handler->v.ExceptHandler.body);
3233             compiler_pop_fblock(c, HANDLER_CLEANUP, cleanup_body);
3234             /* name = None; del name; # Mark as artificial */
3235             c->u->u_lineno = -1;
3236             ADDOP(c, POP_BLOCK);
3237             ADDOP(c, POP_EXCEPT);
3238             ADDOP_LOAD_CONST(c, Py_None);
3239             compiler_nameop(c, handler->v.ExceptHandler.name, Store);
3240             compiler_nameop(c, handler->v.ExceptHandler.name, Del);
3241             ADDOP_JUMP(c, JUMP_FORWARD, end);
3242 
3243             /* except: */
3244             compiler_use_next_block(c, cleanup_end);
3245 
3246             /* name = None; del name; # Mark as artificial */
3247             c->u->u_lineno = -1;
3248             ADDOP_LOAD_CONST(c, Py_None);
3249             compiler_nameop(c, handler->v.ExceptHandler.name, Store);
3250             compiler_nameop(c, handler->v.ExceptHandler.name, Del);
3251 
3252             ADDOP_I(c, RERAISE, 1);
3253         }
3254         else {
3255             basicblock *cleanup_body;
3256 
3257             cleanup_body = compiler_new_block(c);
3258             if (!cleanup_body)
3259                 return 0;
3260 
3261             ADDOP(c, POP_TOP);
3262             ADDOP(c, POP_TOP);
3263             compiler_use_next_block(c, cleanup_body);
3264             if (!compiler_push_fblock(c, HANDLER_CLEANUP, cleanup_body, NULL, NULL))
3265                 return 0;
3266             VISIT_SEQ(c, stmt, handler->v.ExceptHandler.body);
3267             compiler_pop_fblock(c, HANDLER_CLEANUP, cleanup_body);
3268             c->u->u_lineno = -1;
3269             ADDOP(c, POP_EXCEPT);
3270             ADDOP_JUMP(c, JUMP_FORWARD, end);
3271         }
3272         compiler_use_next_block(c, except);
3273     }
3274     compiler_pop_fblock(c, EXCEPTION_HANDLER, NULL);
3275     /* Mark as artificial */
3276     c->u->u_lineno = -1;
3277     ADDOP_I(c, RERAISE, 0);
3278     compiler_use_next_block(c, orelse);
3279     VISIT_SEQ(c, stmt, s->v.Try.orelse);
3280     compiler_use_next_block(c, end);
3281     return 1;
3282 }
3283 
3284 static int
compiler_try(struct compiler * c,stmt_ty s)3285 compiler_try(struct compiler *c, stmt_ty s) {
3286     if (s->v.Try.finalbody && asdl_seq_LEN(s->v.Try.finalbody))
3287         return compiler_try_finally(c, s);
3288     else
3289         return compiler_try_except(c, s);
3290 }
3291 
3292 
3293 static int
compiler_import_as(struct compiler * c,identifier name,identifier asname)3294 compiler_import_as(struct compiler *c, identifier name, identifier asname)
3295 {
3296     /* The IMPORT_NAME opcode was already generated.  This function
3297        merely needs to bind the result to a name.
3298 
3299        If there is a dot in name, we need to split it and emit a
3300        IMPORT_FROM for each name.
3301     */
3302     Py_ssize_t len = PyUnicode_GET_LENGTH(name);
3303     Py_ssize_t dot = PyUnicode_FindChar(name, '.', 0, len, 1);
3304     if (dot == -2)
3305         return 0;
3306     if (dot != -1) {
3307         /* Consume the base module name to get the first attribute */
3308         while (1) {
3309             Py_ssize_t pos = dot + 1;
3310             PyObject *attr;
3311             dot = PyUnicode_FindChar(name, '.', pos, len, 1);
3312             if (dot == -2)
3313                 return 0;
3314             attr = PyUnicode_Substring(name, pos, (dot != -1) ? dot : len);
3315             if (!attr)
3316                 return 0;
3317             ADDOP_N(c, IMPORT_FROM, attr, names);
3318             if (dot == -1) {
3319                 break;
3320             }
3321             ADDOP(c, ROT_TWO);
3322             ADDOP(c, POP_TOP);
3323         }
3324         if (!compiler_nameop(c, asname, Store)) {
3325             return 0;
3326         }
3327         ADDOP(c, POP_TOP);
3328         return 1;
3329     }
3330     return compiler_nameop(c, asname, Store);
3331 }
3332 
3333 static int
compiler_import(struct compiler * c,stmt_ty s)3334 compiler_import(struct compiler *c, stmt_ty s)
3335 {
3336     /* The Import node stores a module name like a.b.c as a single
3337        string.  This is convenient for all cases except
3338          import a.b.c as d
3339        where we need to parse that string to extract the individual
3340        module names.
3341        XXX Perhaps change the representation to make this case simpler?
3342      */
3343     Py_ssize_t i, n = asdl_seq_LEN(s->v.Import.names);
3344 
3345     PyObject *zero = _PyLong_GetZero();  // borrowed reference
3346     for (i = 0; i < n; i++) {
3347         alias_ty alias = (alias_ty)asdl_seq_GET(s->v.Import.names, i);
3348         int r;
3349 
3350         ADDOP_LOAD_CONST(c, zero);
3351         ADDOP_LOAD_CONST(c, Py_None);
3352         ADDOP_NAME(c, IMPORT_NAME, alias->name, names);
3353 
3354         if (alias->asname) {
3355             r = compiler_import_as(c, alias->name, alias->asname);
3356             if (!r)
3357                 return r;
3358         }
3359         else {
3360             identifier tmp = alias->name;
3361             Py_ssize_t dot = PyUnicode_FindChar(
3362                 alias->name, '.', 0, PyUnicode_GET_LENGTH(alias->name), 1);
3363             if (dot != -1) {
3364                 tmp = PyUnicode_Substring(alias->name, 0, dot);
3365                 if (tmp == NULL)
3366                     return 0;
3367             }
3368             r = compiler_nameop(c, tmp, Store);
3369             if (dot != -1) {
3370                 Py_DECREF(tmp);
3371             }
3372             if (!r)
3373                 return r;
3374         }
3375     }
3376     return 1;
3377 }
3378 
3379 static int
compiler_from_import(struct compiler * c,stmt_ty s)3380 compiler_from_import(struct compiler *c, stmt_ty s)
3381 {
3382     Py_ssize_t i, n = asdl_seq_LEN(s->v.ImportFrom.names);
3383     PyObject *names;
3384     static PyObject *empty_string;
3385 
3386     if (!empty_string) {
3387         empty_string = PyUnicode_FromString("");
3388         if (!empty_string)
3389             return 0;
3390     }
3391 
3392     ADDOP_LOAD_CONST_NEW(c, PyLong_FromLong(s->v.ImportFrom.level));
3393 
3394     names = PyTuple_New(n);
3395     if (!names)
3396         return 0;
3397 
3398     /* build up the names */
3399     for (i = 0; i < n; i++) {
3400         alias_ty alias = (alias_ty)asdl_seq_GET(s->v.ImportFrom.names, i);
3401         Py_INCREF(alias->name);
3402         PyTuple_SET_ITEM(names, i, alias->name);
3403     }
3404 
3405     if (s->lineno > c->c_future->ff_lineno && s->v.ImportFrom.module &&
3406         _PyUnicode_EqualToASCIIString(s->v.ImportFrom.module, "__future__")) {
3407         Py_DECREF(names);
3408         return compiler_error(c, "from __future__ imports must occur "
3409                               "at the beginning of the file");
3410     }
3411     ADDOP_LOAD_CONST_NEW(c, names);
3412 
3413     if (s->v.ImportFrom.module) {
3414         ADDOP_NAME(c, IMPORT_NAME, s->v.ImportFrom.module, names);
3415     }
3416     else {
3417         ADDOP_NAME(c, IMPORT_NAME, empty_string, names);
3418     }
3419     for (i = 0; i < n; i++) {
3420         alias_ty alias = (alias_ty)asdl_seq_GET(s->v.ImportFrom.names, i);
3421         identifier store_name;
3422 
3423         if (i == 0 && PyUnicode_READ_CHAR(alias->name, 0) == '*') {
3424             assert(n == 1);
3425             ADDOP(c, IMPORT_STAR);
3426             return 1;
3427         }
3428 
3429         ADDOP_NAME(c, IMPORT_FROM, alias->name, names);
3430         store_name = alias->name;
3431         if (alias->asname)
3432             store_name = alias->asname;
3433 
3434         if (!compiler_nameop(c, store_name, Store)) {
3435             return 0;
3436         }
3437     }
3438     /* remove imported module */
3439     ADDOP(c, POP_TOP);
3440     return 1;
3441 }
3442 
3443 static int
compiler_assert(struct compiler * c,stmt_ty s)3444 compiler_assert(struct compiler *c, stmt_ty s)
3445 {
3446     basicblock *end;
3447 
3448     /* Always emit a warning if the test is a non-zero length tuple */
3449     if ((s->v.Assert.test->kind == Tuple_kind &&
3450         asdl_seq_LEN(s->v.Assert.test->v.Tuple.elts) > 0) ||
3451         (s->v.Assert.test->kind == Constant_kind &&
3452          PyTuple_Check(s->v.Assert.test->v.Constant.value) &&
3453          PyTuple_Size(s->v.Assert.test->v.Constant.value) > 0))
3454     {
3455         if (!compiler_warn(c, "assertion is always true, "
3456                               "perhaps remove parentheses?"))
3457         {
3458             return 0;
3459         }
3460     }
3461     if (c->c_optimize)
3462         return 1;
3463     end = compiler_new_block(c);
3464     if (end == NULL)
3465         return 0;
3466     if (!compiler_jump_if(c, s->v.Assert.test, end, 1))
3467         return 0;
3468     ADDOP(c, LOAD_ASSERTION_ERROR);
3469     if (s->v.Assert.msg) {
3470         VISIT(c, expr, s->v.Assert.msg);
3471         ADDOP_I(c, CALL_FUNCTION, 1);
3472     }
3473     ADDOP_I(c, RAISE_VARARGS, 1);
3474     compiler_use_next_block(c, end);
3475     return 1;
3476 }
3477 
3478 static int
compiler_visit_stmt_expr(struct compiler * c,expr_ty value)3479 compiler_visit_stmt_expr(struct compiler *c, expr_ty value)
3480 {
3481     if (c->c_interactive && c->c_nestlevel <= 1) {
3482         VISIT(c, expr, value);
3483         ADDOP(c, PRINT_EXPR);
3484         return 1;
3485     }
3486 
3487     if (value->kind == Constant_kind) {
3488         /* ignore constant statement */
3489         ADDOP(c, NOP);
3490         return 1;
3491     }
3492 
3493     VISIT(c, expr, value);
3494     /* Mark POP_TOP as artificial */
3495     c->u->u_lineno = -1;
3496     ADDOP(c, POP_TOP);
3497     return 1;
3498 }
3499 
3500 static int
compiler_visit_stmt(struct compiler * c,stmt_ty s)3501 compiler_visit_stmt(struct compiler *c, stmt_ty s)
3502 {
3503     Py_ssize_t i, n;
3504 
3505     /* Always assign a lineno to the next instruction for a stmt. */
3506     SET_LOC(c, s);
3507 
3508     switch (s->kind) {
3509     case FunctionDef_kind:
3510         return compiler_function(c, s, 0);
3511     case ClassDef_kind:
3512         return compiler_class(c, s);
3513     case Return_kind:
3514         return compiler_return(c, s);
3515     case Delete_kind:
3516         VISIT_SEQ(c, expr, s->v.Delete.targets)
3517         break;
3518     case Assign_kind:
3519         n = asdl_seq_LEN(s->v.Assign.targets);
3520         VISIT(c, expr, s->v.Assign.value);
3521         for (i = 0; i < n; i++) {
3522             if (i < n - 1)
3523                 ADDOP(c, DUP_TOP);
3524             VISIT(c, expr,
3525                   (expr_ty)asdl_seq_GET(s->v.Assign.targets, i));
3526         }
3527         break;
3528     case AugAssign_kind:
3529         return compiler_augassign(c, s);
3530     case AnnAssign_kind:
3531         return compiler_annassign(c, s);
3532     case For_kind:
3533         return compiler_for(c, s);
3534     case While_kind:
3535         return compiler_while(c, s);
3536     case If_kind:
3537         return compiler_if(c, s);
3538     case Match_kind:
3539         return compiler_match(c, s);
3540     case Raise_kind:
3541         n = 0;
3542         if (s->v.Raise.exc) {
3543             VISIT(c, expr, s->v.Raise.exc);
3544             n++;
3545             if (s->v.Raise.cause) {
3546                 VISIT(c, expr, s->v.Raise.cause);
3547                 n++;
3548             }
3549         }
3550         ADDOP_I(c, RAISE_VARARGS, (int)n);
3551         NEXT_BLOCK(c);
3552         break;
3553     case Try_kind:
3554         return compiler_try(c, s);
3555     case Assert_kind:
3556         return compiler_assert(c, s);
3557     case Import_kind:
3558         return compiler_import(c, s);
3559     case ImportFrom_kind:
3560         return compiler_from_import(c, s);
3561     case Global_kind:
3562     case Nonlocal_kind:
3563         break;
3564     case Expr_kind:
3565         return compiler_visit_stmt_expr(c, s->v.Expr.value);
3566     case Pass_kind:
3567         ADDOP(c, NOP);
3568         break;
3569     case Break_kind:
3570         return compiler_break(c);
3571     case Continue_kind:
3572         return compiler_continue(c);
3573     case With_kind:
3574         return compiler_with(c, s, 0);
3575     case AsyncFunctionDef_kind:
3576         return compiler_function(c, s, 1);
3577     case AsyncWith_kind:
3578         return compiler_async_with(c, s, 0);
3579     case AsyncFor_kind:
3580         return compiler_async_for(c, s);
3581     }
3582 
3583     return 1;
3584 }
3585 
3586 static int
unaryop(unaryop_ty op)3587 unaryop(unaryop_ty op)
3588 {
3589     switch (op) {
3590     case Invert:
3591         return UNARY_INVERT;
3592     case Not:
3593         return UNARY_NOT;
3594     case UAdd:
3595         return UNARY_POSITIVE;
3596     case USub:
3597         return UNARY_NEGATIVE;
3598     default:
3599         PyErr_Format(PyExc_SystemError,
3600             "unary op %d should not be possible", op);
3601         return 0;
3602     }
3603 }
3604 
3605 static int
binop(operator_ty op)3606 binop(operator_ty op)
3607 {
3608     switch (op) {
3609     case Add:
3610         return BINARY_ADD;
3611     case Sub:
3612         return BINARY_SUBTRACT;
3613     case Mult:
3614         return BINARY_MULTIPLY;
3615     case MatMult:
3616         return BINARY_MATRIX_MULTIPLY;
3617     case Div:
3618         return BINARY_TRUE_DIVIDE;
3619     case Mod:
3620         return BINARY_MODULO;
3621     case Pow:
3622         return BINARY_POWER;
3623     case LShift:
3624         return BINARY_LSHIFT;
3625     case RShift:
3626         return BINARY_RSHIFT;
3627     case BitOr:
3628         return BINARY_OR;
3629     case BitXor:
3630         return BINARY_XOR;
3631     case BitAnd:
3632         return BINARY_AND;
3633     case FloorDiv:
3634         return BINARY_FLOOR_DIVIDE;
3635     default:
3636         PyErr_Format(PyExc_SystemError,
3637             "binary op %d should not be possible", op);
3638         return 0;
3639     }
3640 }
3641 
3642 static int
inplace_binop(operator_ty op)3643 inplace_binop(operator_ty op)
3644 {
3645     switch (op) {
3646     case Add:
3647         return INPLACE_ADD;
3648     case Sub:
3649         return INPLACE_SUBTRACT;
3650     case Mult:
3651         return INPLACE_MULTIPLY;
3652     case MatMult:
3653         return INPLACE_MATRIX_MULTIPLY;
3654     case Div:
3655         return INPLACE_TRUE_DIVIDE;
3656     case Mod:
3657         return INPLACE_MODULO;
3658     case Pow:
3659         return INPLACE_POWER;
3660     case LShift:
3661         return INPLACE_LSHIFT;
3662     case RShift:
3663         return INPLACE_RSHIFT;
3664     case BitOr:
3665         return INPLACE_OR;
3666     case BitXor:
3667         return INPLACE_XOR;
3668     case BitAnd:
3669         return INPLACE_AND;
3670     case FloorDiv:
3671         return INPLACE_FLOOR_DIVIDE;
3672     default:
3673         PyErr_Format(PyExc_SystemError,
3674             "inplace binary op %d should not be possible", op);
3675         return 0;
3676     }
3677 }
3678 
3679 static int
compiler_nameop(struct compiler * c,identifier name,expr_context_ty ctx)3680 compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx)
3681 {
3682     int op, scope;
3683     Py_ssize_t arg;
3684     enum { OP_FAST, OP_GLOBAL, OP_DEREF, OP_NAME } optype;
3685 
3686     PyObject *dict = c->u->u_names;
3687     PyObject *mangled;
3688 
3689     assert(!_PyUnicode_EqualToASCIIString(name, "None") &&
3690            !_PyUnicode_EqualToASCIIString(name, "True") &&
3691            !_PyUnicode_EqualToASCIIString(name, "False"));
3692 
3693     if (forbidden_name(c, name, ctx))
3694         return 0;
3695 
3696     mangled = _Py_Mangle(c->u->u_private, name);
3697     if (!mangled)
3698         return 0;
3699 
3700     op = 0;
3701     optype = OP_NAME;
3702     scope = _PyST_GetScope(c->u->u_ste, mangled);
3703     switch (scope) {
3704     case FREE:
3705         dict = c->u->u_freevars;
3706         optype = OP_DEREF;
3707         break;
3708     case CELL:
3709         dict = c->u->u_cellvars;
3710         optype = OP_DEREF;
3711         break;
3712     case LOCAL:
3713         if (c->u->u_ste->ste_type == FunctionBlock)
3714             optype = OP_FAST;
3715         break;
3716     case GLOBAL_IMPLICIT:
3717         if (c->u->u_ste->ste_type == FunctionBlock)
3718             optype = OP_GLOBAL;
3719         break;
3720     case GLOBAL_EXPLICIT:
3721         optype = OP_GLOBAL;
3722         break;
3723     default:
3724         /* scope can be 0 */
3725         break;
3726     }
3727 
3728     /* XXX Leave assert here, but handle __doc__ and the like better */
3729     assert(scope || PyUnicode_READ_CHAR(name, 0) == '_');
3730 
3731     switch (optype) {
3732     case OP_DEREF:
3733         switch (ctx) {
3734         case Load:
3735             op = (c->u->u_ste->ste_type == ClassBlock) ? LOAD_CLASSDEREF : LOAD_DEREF;
3736             break;
3737         case Store: op = STORE_DEREF; break;
3738         case Del: op = DELETE_DEREF; break;
3739         }
3740         break;
3741     case OP_FAST:
3742         switch (ctx) {
3743         case Load: op = LOAD_FAST; break;
3744         case Store: op = STORE_FAST; break;
3745         case Del: op = DELETE_FAST; break;
3746         }
3747         ADDOP_N(c, op, mangled, varnames);
3748         return 1;
3749     case OP_GLOBAL:
3750         switch (ctx) {
3751         case Load: op = LOAD_GLOBAL; break;
3752         case Store: op = STORE_GLOBAL; break;
3753         case Del: op = DELETE_GLOBAL; break;
3754         }
3755         break;
3756     case OP_NAME:
3757         switch (ctx) {
3758         case Load: op = LOAD_NAME; break;
3759         case Store: op = STORE_NAME; break;
3760         case Del: op = DELETE_NAME; break;
3761         }
3762         break;
3763     }
3764 
3765     assert(op);
3766     arg = compiler_add_o(dict, mangled);
3767     Py_DECREF(mangled);
3768     if (arg < 0)
3769         return 0;
3770     return compiler_addop_i(c, op, arg);
3771 }
3772 
3773 static int
compiler_boolop(struct compiler * c,expr_ty e)3774 compiler_boolop(struct compiler *c, expr_ty e)
3775 {
3776     basicblock *end;
3777     int jumpi;
3778     Py_ssize_t i, n;
3779     asdl_expr_seq *s;
3780 
3781     assert(e->kind == BoolOp_kind);
3782     if (e->v.BoolOp.op == And)
3783         jumpi = JUMP_IF_FALSE_OR_POP;
3784     else
3785         jumpi = JUMP_IF_TRUE_OR_POP;
3786     end = compiler_new_block(c);
3787     if (end == NULL)
3788         return 0;
3789     s = e->v.BoolOp.values;
3790     n = asdl_seq_LEN(s) - 1;
3791     assert(n >= 0);
3792     for (i = 0; i < n; ++i) {
3793         VISIT(c, expr, (expr_ty)asdl_seq_GET(s, i));
3794         ADDOP_JUMP(c, jumpi, end);
3795         basicblock *next = compiler_new_block(c);
3796         if (next == NULL) {
3797             return 0;
3798         }
3799         compiler_use_next_block(c, next);
3800     }
3801     VISIT(c, expr, (expr_ty)asdl_seq_GET(s, n));
3802     compiler_use_next_block(c, end);
3803     return 1;
3804 }
3805 
3806 static int
starunpack_helper(struct compiler * c,asdl_expr_seq * elts,int pushed,int build,int add,int extend,int tuple)3807 starunpack_helper(struct compiler *c, asdl_expr_seq *elts, int pushed,
3808                   int build, int add, int extend, int tuple)
3809 {
3810     Py_ssize_t n = asdl_seq_LEN(elts);
3811     if (n > 2 && are_all_items_const(elts, 0, n)) {
3812         PyObject *folded = PyTuple_New(n);
3813         if (folded == NULL) {
3814             return 0;
3815         }
3816         PyObject *val;
3817         for (Py_ssize_t i = 0; i < n; i++) {
3818             val = ((expr_ty)asdl_seq_GET(elts, i))->v.Constant.value;
3819             Py_INCREF(val);
3820             PyTuple_SET_ITEM(folded, i, val);
3821         }
3822         if (tuple) {
3823             ADDOP_LOAD_CONST_NEW(c, folded);
3824         } else {
3825             if (add == SET_ADD) {
3826                 Py_SETREF(folded, PyFrozenSet_New(folded));
3827                 if (folded == NULL) {
3828                     return 0;
3829                 }
3830             }
3831             ADDOP_I(c, build, pushed);
3832             ADDOP_LOAD_CONST_NEW(c, folded);
3833             ADDOP_I(c, extend, 1);
3834         }
3835         return 1;
3836     }
3837 
3838     int big = n+pushed > STACK_USE_GUIDELINE;
3839     int seen_star = 0;
3840     for (Py_ssize_t i = 0; i < n; i++) {
3841         expr_ty elt = asdl_seq_GET(elts, i);
3842         if (elt->kind == Starred_kind) {
3843             seen_star = 1;
3844         }
3845     }
3846     if (!seen_star && !big) {
3847         for (Py_ssize_t i = 0; i < n; i++) {
3848             expr_ty elt = asdl_seq_GET(elts, i);
3849             VISIT(c, expr, elt);
3850         }
3851         if (tuple) {
3852             ADDOP_I(c, BUILD_TUPLE, n+pushed);
3853         } else {
3854             ADDOP_I(c, build, n+pushed);
3855         }
3856         return 1;
3857     }
3858     int sequence_built = 0;
3859     if (big) {
3860         ADDOP_I(c, build, pushed);
3861         sequence_built = 1;
3862     }
3863     for (Py_ssize_t i = 0; i < n; i++) {
3864         expr_ty elt = asdl_seq_GET(elts, i);
3865         if (elt->kind == Starred_kind) {
3866             if (sequence_built == 0) {
3867                 ADDOP_I(c, build, i+pushed);
3868                 sequence_built = 1;
3869             }
3870             VISIT(c, expr, elt->v.Starred.value);
3871             ADDOP_I(c, extend, 1);
3872         }
3873         else {
3874             VISIT(c, expr, elt);
3875             if (sequence_built) {
3876                 ADDOP_I(c, add, 1);
3877             }
3878         }
3879     }
3880     assert(sequence_built);
3881     if (tuple) {
3882         ADDOP(c, LIST_TO_TUPLE);
3883     }
3884     return 1;
3885 }
3886 
3887 static int
unpack_helper(struct compiler * c,asdl_expr_seq * elts)3888 unpack_helper(struct compiler *c, asdl_expr_seq *elts)
3889 {
3890     Py_ssize_t n = asdl_seq_LEN(elts);
3891     int seen_star = 0;
3892     for (Py_ssize_t i = 0; i < n; i++) {
3893         expr_ty elt = asdl_seq_GET(elts, i);
3894         if (elt->kind == Starred_kind && !seen_star) {
3895             if ((i >= (1 << 8)) ||
3896                 (n-i-1 >= (INT_MAX >> 8)))
3897                 return compiler_error(c,
3898                     "too many expressions in "
3899                     "star-unpacking assignment");
3900             ADDOP_I(c, UNPACK_EX, (i + ((n-i-1) << 8)));
3901             seen_star = 1;
3902         }
3903         else if (elt->kind == Starred_kind) {
3904             return compiler_error(c,
3905                 "multiple starred expressions in assignment");
3906         }
3907     }
3908     if (!seen_star) {
3909         ADDOP_I(c, UNPACK_SEQUENCE, n);
3910     }
3911     return 1;
3912 }
3913 
3914 static int
assignment_helper(struct compiler * c,asdl_expr_seq * elts)3915 assignment_helper(struct compiler *c, asdl_expr_seq *elts)
3916 {
3917     Py_ssize_t n = asdl_seq_LEN(elts);
3918     RETURN_IF_FALSE(unpack_helper(c, elts));
3919     for (Py_ssize_t i = 0; i < n; i++) {
3920         expr_ty elt = asdl_seq_GET(elts, i);
3921         VISIT(c, expr, elt->kind != Starred_kind ? elt : elt->v.Starred.value);
3922     }
3923     return 1;
3924 }
3925 
3926 static int
compiler_list(struct compiler * c,expr_ty e)3927 compiler_list(struct compiler *c, expr_ty e)
3928 {
3929     asdl_expr_seq *elts = e->v.List.elts;
3930     if (e->v.List.ctx == Store) {
3931         return assignment_helper(c, elts);
3932     }
3933     else if (e->v.List.ctx == Load) {
3934         return starunpack_helper(c, elts, 0, BUILD_LIST,
3935                                  LIST_APPEND, LIST_EXTEND, 0);
3936     }
3937     else
3938         VISIT_SEQ(c, expr, elts);
3939     return 1;
3940 }
3941 
3942 static int
compiler_tuple(struct compiler * c,expr_ty e)3943 compiler_tuple(struct compiler *c, expr_ty e)
3944 {
3945     asdl_expr_seq *elts = e->v.Tuple.elts;
3946     if (e->v.Tuple.ctx == Store) {
3947         return assignment_helper(c, elts);
3948     }
3949     else if (e->v.Tuple.ctx == Load) {
3950         return starunpack_helper(c, elts, 0, BUILD_LIST,
3951                                  LIST_APPEND, LIST_EXTEND, 1);
3952     }
3953     else
3954         VISIT_SEQ(c, expr, elts);
3955     return 1;
3956 }
3957 
3958 static int
compiler_set(struct compiler * c,expr_ty e)3959 compiler_set(struct compiler *c, expr_ty e)
3960 {
3961     return starunpack_helper(c, e->v.Set.elts, 0, BUILD_SET,
3962                              SET_ADD, SET_UPDATE, 0);
3963 }
3964 
3965 static int
are_all_items_const(asdl_expr_seq * seq,Py_ssize_t begin,Py_ssize_t end)3966 are_all_items_const(asdl_expr_seq *seq, Py_ssize_t begin, Py_ssize_t end)
3967 {
3968     Py_ssize_t i;
3969     for (i = begin; i < end; i++) {
3970         expr_ty key = (expr_ty)asdl_seq_GET(seq, i);
3971         if (key == NULL || key->kind != Constant_kind)
3972             return 0;
3973     }
3974     return 1;
3975 }
3976 
3977 static int
compiler_subdict(struct compiler * c,expr_ty e,Py_ssize_t begin,Py_ssize_t end)3978 compiler_subdict(struct compiler *c, expr_ty e, Py_ssize_t begin, Py_ssize_t end)
3979 {
3980     Py_ssize_t i, n = end - begin;
3981     PyObject *keys, *key;
3982     int big = n*2 > STACK_USE_GUIDELINE;
3983     if (n > 1 && !big && are_all_items_const(e->v.Dict.keys, begin, end)) {
3984         for (i = begin; i < end; i++) {
3985             VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i));
3986         }
3987         keys = PyTuple_New(n);
3988         if (keys == NULL) {
3989             return 0;
3990         }
3991         for (i = begin; i < end; i++) {
3992             key = ((expr_ty)asdl_seq_GET(e->v.Dict.keys, i))->v.Constant.value;
3993             Py_INCREF(key);
3994             PyTuple_SET_ITEM(keys, i - begin, key);
3995         }
3996         ADDOP_LOAD_CONST_NEW(c, keys);
3997         ADDOP_I(c, BUILD_CONST_KEY_MAP, n);
3998         return 1;
3999     }
4000     if (big) {
4001         ADDOP_I(c, BUILD_MAP, 0);
4002     }
4003     for (i = begin; i < end; i++) {
4004         VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.keys, i));
4005         VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i));
4006         if (big) {
4007             ADDOP_I(c, MAP_ADD, 1);
4008         }
4009     }
4010     if (!big) {
4011         ADDOP_I(c, BUILD_MAP, n);
4012     }
4013     return 1;
4014 }
4015 
4016 static int
compiler_dict(struct compiler * c,expr_ty e)4017 compiler_dict(struct compiler *c, expr_ty e)
4018 {
4019     Py_ssize_t i, n, elements;
4020     int have_dict;
4021     int is_unpacking = 0;
4022     n = asdl_seq_LEN(e->v.Dict.values);
4023     have_dict = 0;
4024     elements = 0;
4025     for (i = 0; i < n; i++) {
4026         is_unpacking = (expr_ty)asdl_seq_GET(e->v.Dict.keys, i) == NULL;
4027         if (is_unpacking) {
4028             if (elements) {
4029                 if (!compiler_subdict(c, e, i - elements, i)) {
4030                     return 0;
4031                 }
4032                 if (have_dict) {
4033                     ADDOP_I(c, DICT_UPDATE, 1);
4034                 }
4035                 have_dict = 1;
4036                 elements = 0;
4037             }
4038             if (have_dict == 0) {
4039                 ADDOP_I(c, BUILD_MAP, 0);
4040                 have_dict = 1;
4041             }
4042             VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i));
4043             ADDOP_I(c, DICT_UPDATE, 1);
4044         }
4045         else {
4046             if (elements*2 > STACK_USE_GUIDELINE) {
4047                 if (!compiler_subdict(c, e, i - elements, i + 1)) {
4048                     return 0;
4049                 }
4050                 if (have_dict) {
4051                     ADDOP_I(c, DICT_UPDATE, 1);
4052                 }
4053                 have_dict = 1;
4054                 elements = 0;
4055             }
4056             else {
4057                 elements++;
4058             }
4059         }
4060     }
4061     if (elements) {
4062         if (!compiler_subdict(c, e, n - elements, n)) {
4063             return 0;
4064         }
4065         if (have_dict) {
4066             ADDOP_I(c, DICT_UPDATE, 1);
4067         }
4068         have_dict = 1;
4069     }
4070     if (!have_dict) {
4071         ADDOP_I(c, BUILD_MAP, 0);
4072     }
4073     return 1;
4074 }
4075 
4076 static int
compiler_compare(struct compiler * c,expr_ty e)4077 compiler_compare(struct compiler *c, expr_ty e)
4078 {
4079     Py_ssize_t i, n;
4080 
4081     if (!check_compare(c, e)) {
4082         return 0;
4083     }
4084     VISIT(c, expr, e->v.Compare.left);
4085     assert(asdl_seq_LEN(e->v.Compare.ops) > 0);
4086     n = asdl_seq_LEN(e->v.Compare.ops) - 1;
4087     if (n == 0) {
4088         VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Compare.comparators, 0));
4089         ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, 0));
4090     }
4091     else {
4092         basicblock *cleanup = compiler_new_block(c);
4093         if (cleanup == NULL)
4094             return 0;
4095         for (i = 0; i < n; i++) {
4096             VISIT(c, expr,
4097                 (expr_ty)asdl_seq_GET(e->v.Compare.comparators, i));
4098             ADDOP(c, DUP_TOP);
4099             ADDOP(c, ROT_THREE);
4100             ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, i));
4101             ADDOP_JUMP(c, JUMP_IF_FALSE_OR_POP, cleanup);
4102             NEXT_BLOCK(c);
4103         }
4104         VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Compare.comparators, n));
4105         ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, n));
4106         basicblock *end = compiler_new_block(c);
4107         if (end == NULL)
4108             return 0;
4109         ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, end);
4110         compiler_use_next_block(c, cleanup);
4111         ADDOP(c, ROT_TWO);
4112         ADDOP(c, POP_TOP);
4113         compiler_use_next_block(c, end);
4114     }
4115     return 1;
4116 }
4117 
4118 static PyTypeObject *
infer_type(expr_ty e)4119 infer_type(expr_ty e)
4120 {
4121     switch (e->kind) {
4122     case Tuple_kind:
4123         return &PyTuple_Type;
4124     case List_kind:
4125     case ListComp_kind:
4126         return &PyList_Type;
4127     case Dict_kind:
4128     case DictComp_kind:
4129         return &PyDict_Type;
4130     case Set_kind:
4131     case SetComp_kind:
4132         return &PySet_Type;
4133     case GeneratorExp_kind:
4134         return &PyGen_Type;
4135     case Lambda_kind:
4136         return &PyFunction_Type;
4137     case JoinedStr_kind:
4138     case FormattedValue_kind:
4139         return &PyUnicode_Type;
4140     case Constant_kind:
4141         return Py_TYPE(e->v.Constant.value);
4142     default:
4143         return NULL;
4144     }
4145 }
4146 
4147 static int
check_caller(struct compiler * c,expr_ty e)4148 check_caller(struct compiler *c, expr_ty e)
4149 {
4150     switch (e->kind) {
4151     case Constant_kind:
4152     case Tuple_kind:
4153     case List_kind:
4154     case ListComp_kind:
4155     case Dict_kind:
4156     case DictComp_kind:
4157     case Set_kind:
4158     case SetComp_kind:
4159     case GeneratorExp_kind:
4160     case JoinedStr_kind:
4161     case FormattedValue_kind:
4162         return compiler_warn(c, "'%.200s' object is not callable; "
4163                                 "perhaps you missed a comma?",
4164                                 infer_type(e)->tp_name);
4165     default:
4166         return 1;
4167     }
4168 }
4169 
4170 static int
check_subscripter(struct compiler * c,expr_ty e)4171 check_subscripter(struct compiler *c, expr_ty e)
4172 {
4173     PyObject *v;
4174 
4175     switch (e->kind) {
4176     case Constant_kind:
4177         v = e->v.Constant.value;
4178         if (!(v == Py_None || v == Py_Ellipsis ||
4179               PyLong_Check(v) || PyFloat_Check(v) || PyComplex_Check(v) ||
4180               PyAnySet_Check(v)))
4181         {
4182             return 1;
4183         }
4184         /* fall through */
4185     case Set_kind:
4186     case SetComp_kind:
4187     case GeneratorExp_kind:
4188     case Lambda_kind:
4189         return compiler_warn(c, "'%.200s' object is not subscriptable; "
4190                                 "perhaps you missed a comma?",
4191                                 infer_type(e)->tp_name);
4192     default:
4193         return 1;
4194     }
4195 }
4196 
4197 static int
check_index(struct compiler * c,expr_ty e,expr_ty s)4198 check_index(struct compiler *c, expr_ty e, expr_ty s)
4199 {
4200     PyObject *v;
4201 
4202     PyTypeObject *index_type = infer_type(s);
4203     if (index_type == NULL
4204         || PyType_FastSubclass(index_type, Py_TPFLAGS_LONG_SUBCLASS)
4205         || index_type == &PySlice_Type) {
4206         return 1;
4207     }
4208 
4209     switch (e->kind) {
4210     case Constant_kind:
4211         v = e->v.Constant.value;
4212         if (!(PyUnicode_Check(v) || PyBytes_Check(v) || PyTuple_Check(v))) {
4213             return 1;
4214         }
4215         /* fall through */
4216     case Tuple_kind:
4217     case List_kind:
4218     case ListComp_kind:
4219     case JoinedStr_kind:
4220     case FormattedValue_kind:
4221         return compiler_warn(c, "%.200s indices must be integers or slices, "
4222                                 "not %.200s; "
4223                                 "perhaps you missed a comma?",
4224                                 infer_type(e)->tp_name,
4225                                 index_type->tp_name);
4226     default:
4227         return 1;
4228     }
4229 }
4230 
4231 // Return 1 if the method call was optimized, -1 if not, and 0 on error.
4232 static int
maybe_optimize_method_call(struct compiler * c,expr_ty e)4233 maybe_optimize_method_call(struct compiler *c, expr_ty e)
4234 {
4235     Py_ssize_t argsl, i;
4236     expr_ty meth = e->v.Call.func;
4237     asdl_expr_seq *args = e->v.Call.args;
4238 
4239     /* Check that the call node is an attribute access, and that
4240        the call doesn't have keyword parameters. */
4241     if (meth->kind != Attribute_kind || meth->v.Attribute.ctx != Load ||
4242             asdl_seq_LEN(e->v.Call.keywords)) {
4243         return -1;
4244     }
4245     /* Check that there aren't too many arguments */
4246     argsl = asdl_seq_LEN(args);
4247     if (argsl >= STACK_USE_GUIDELINE) {
4248         return -1;
4249     }
4250     /* Check that there are no *varargs types of arguments. */
4251     for (i = 0; i < argsl; i++) {
4252         expr_ty elt = asdl_seq_GET(args, i);
4253         if (elt->kind == Starred_kind) {
4254             return -1;
4255         }
4256     }
4257 
4258     /* Alright, we can optimize the code. */
4259     VISIT(c, expr, meth->v.Attribute.value);
4260     int old_lineno = c->u->u_lineno;
4261     c->u->u_lineno = meth->end_lineno;
4262     ADDOP_NAME(c, LOAD_METHOD, meth->v.Attribute.attr, names);
4263     VISIT_SEQ(c, expr, e->v.Call.args);
4264     ADDOP_I(c, CALL_METHOD, asdl_seq_LEN(e->v.Call.args));
4265     c->u->u_lineno = old_lineno;
4266     return 1;
4267 }
4268 
4269 static int
validate_keywords(struct compiler * c,asdl_keyword_seq * keywords)4270 validate_keywords(struct compiler *c, asdl_keyword_seq *keywords)
4271 {
4272     Py_ssize_t nkeywords = asdl_seq_LEN(keywords);
4273     for (Py_ssize_t i = 0; i < nkeywords; i++) {
4274         keyword_ty key = ((keyword_ty)asdl_seq_GET(keywords, i));
4275         if (key->arg == NULL) {
4276             continue;
4277         }
4278         if (forbidden_name(c, key->arg, Store)) {
4279             return -1;
4280         }
4281         for (Py_ssize_t j = i + 1; j < nkeywords; j++) {
4282             keyword_ty other = ((keyword_ty)asdl_seq_GET(keywords, j));
4283             if (other->arg && !PyUnicode_Compare(key->arg, other->arg)) {
4284                 SET_LOC(c, other);
4285                 compiler_error(c, "keyword argument repeated: %U", key->arg);
4286                 return -1;
4287             }
4288         }
4289     }
4290     return 0;
4291 }
4292 
4293 static int
compiler_call(struct compiler * c,expr_ty e)4294 compiler_call(struct compiler *c, expr_ty e)
4295 {
4296     int ret = maybe_optimize_method_call(c, e);
4297     if (ret >= 0) {
4298         return ret;
4299     }
4300     if (!check_caller(c, e->v.Call.func)) {
4301         return 0;
4302     }
4303     VISIT(c, expr, e->v.Call.func);
4304     return compiler_call_helper(c, 0,
4305                                 e->v.Call.args,
4306                                 e->v.Call.keywords);
4307 }
4308 
4309 static int
compiler_joined_str(struct compiler * c,expr_ty e)4310 compiler_joined_str(struct compiler *c, expr_ty e)
4311 {
4312 
4313     Py_ssize_t value_count = asdl_seq_LEN(e->v.JoinedStr.values);
4314     if (value_count > STACK_USE_GUIDELINE) {
4315         ADDOP_LOAD_CONST_NEW(c, _PyUnicode_FromASCII("", 0));
4316         PyObject *join = _PyUnicode_FromASCII("join", 4);
4317         if (join == NULL) {
4318             return 0;
4319         }
4320         ADDOP_NAME(c, LOAD_METHOD, join, names);
4321         Py_DECREF(join);
4322         ADDOP_I(c, BUILD_LIST, 0);
4323         for (Py_ssize_t i = 0; i < asdl_seq_LEN(e->v.JoinedStr.values); i++) {
4324             VISIT(c, expr, asdl_seq_GET(e->v.JoinedStr.values, i));
4325             ADDOP_I(c, LIST_APPEND, 1);
4326         }
4327         ADDOP_I(c, CALL_METHOD, 1);
4328     }
4329     else {
4330         VISIT_SEQ(c, expr, e->v.JoinedStr.values);
4331         if (asdl_seq_LEN(e->v.JoinedStr.values) != 1) {
4332             ADDOP_I(c, BUILD_STRING, asdl_seq_LEN(e->v.JoinedStr.values));
4333         }
4334     }
4335     return 1;
4336 }
4337 
4338 /* Used to implement f-strings. Format a single value. */
4339 static int
compiler_formatted_value(struct compiler * c,expr_ty e)4340 compiler_formatted_value(struct compiler *c, expr_ty e)
4341 {
4342     /* Our oparg encodes 2 pieces of information: the conversion
4343        character, and whether or not a format_spec was provided.
4344 
4345        Convert the conversion char to 3 bits:
4346            : 000  0x0  FVC_NONE   The default if nothing specified.
4347        !s  : 001  0x1  FVC_STR
4348        !r  : 010  0x2  FVC_REPR
4349        !a  : 011  0x3  FVC_ASCII
4350 
4351        next bit is whether or not we have a format spec:
4352        yes : 100  0x4
4353        no  : 000  0x0
4354     */
4355 
4356     int conversion = e->v.FormattedValue.conversion;
4357     int oparg;
4358 
4359     /* The expression to be formatted. */
4360     VISIT(c, expr, e->v.FormattedValue.value);
4361 
4362     switch (conversion) {
4363     case 's': oparg = FVC_STR;   break;
4364     case 'r': oparg = FVC_REPR;  break;
4365     case 'a': oparg = FVC_ASCII; break;
4366     case -1:  oparg = FVC_NONE;  break;
4367     default:
4368         PyErr_Format(PyExc_SystemError,
4369                      "Unrecognized conversion character %d", conversion);
4370         return 0;
4371     }
4372     if (e->v.FormattedValue.format_spec) {
4373         /* Evaluate the format spec, and update our opcode arg. */
4374         VISIT(c, expr, e->v.FormattedValue.format_spec);
4375         oparg |= FVS_HAVE_SPEC;
4376     }
4377 
4378     /* And push our opcode and oparg */
4379     ADDOP_I(c, FORMAT_VALUE, oparg);
4380 
4381     return 1;
4382 }
4383 
4384 static int
compiler_subkwargs(struct compiler * c,asdl_keyword_seq * keywords,Py_ssize_t begin,Py_ssize_t end)4385 compiler_subkwargs(struct compiler *c, asdl_keyword_seq *keywords, Py_ssize_t begin, Py_ssize_t end)
4386 {
4387     Py_ssize_t i, n = end - begin;
4388     keyword_ty kw;
4389     PyObject *keys, *key;
4390     assert(n > 0);
4391     int big = n*2 > STACK_USE_GUIDELINE;
4392     if (n > 1 && !big) {
4393         for (i = begin; i < end; i++) {
4394             kw = asdl_seq_GET(keywords, i);
4395             VISIT(c, expr, kw->value);
4396         }
4397         keys = PyTuple_New(n);
4398         if (keys == NULL) {
4399             return 0;
4400         }
4401         for (i = begin; i < end; i++) {
4402             key = ((keyword_ty) asdl_seq_GET(keywords, i))->arg;
4403             Py_INCREF(key);
4404             PyTuple_SET_ITEM(keys, i - begin, key);
4405         }
4406         ADDOP_LOAD_CONST_NEW(c, keys);
4407         ADDOP_I(c, BUILD_CONST_KEY_MAP, n);
4408         return 1;
4409     }
4410     if (big) {
4411         ADDOP_I_NOLINE(c, BUILD_MAP, 0);
4412     }
4413     for (i = begin; i < end; i++) {
4414         kw = asdl_seq_GET(keywords, i);
4415         ADDOP_LOAD_CONST(c, kw->arg);
4416         VISIT(c, expr, kw->value);
4417         if (big) {
4418             ADDOP_I_NOLINE(c, MAP_ADD, 1);
4419         }
4420     }
4421     if (!big) {
4422         ADDOP_I(c, BUILD_MAP, n);
4423     }
4424     return 1;
4425 }
4426 
4427 /* shared code between compiler_call and compiler_class */
4428 static int
compiler_call_helper(struct compiler * c,int n,asdl_expr_seq * args,asdl_keyword_seq * keywords)4429 compiler_call_helper(struct compiler *c,
4430                      int n, /* Args already pushed */
4431                      asdl_expr_seq *args,
4432                      asdl_keyword_seq *keywords)
4433 {
4434     Py_ssize_t i, nseen, nelts, nkwelts;
4435 
4436     if (validate_keywords(c, keywords) == -1) {
4437         return 0;
4438     }
4439 
4440     nelts = asdl_seq_LEN(args);
4441     nkwelts = asdl_seq_LEN(keywords);
4442 
4443     if (nelts + nkwelts*2 > STACK_USE_GUIDELINE) {
4444          goto ex_call;
4445     }
4446     for (i = 0; i < nelts; i++) {
4447         expr_ty elt = asdl_seq_GET(args, i);
4448         if (elt->kind == Starred_kind) {
4449             goto ex_call;
4450         }
4451     }
4452     for (i = 0; i < nkwelts; i++) {
4453         keyword_ty kw = asdl_seq_GET(keywords, i);
4454         if (kw->arg == NULL) {
4455             goto ex_call;
4456         }
4457     }
4458 
4459     /* No * or ** args, so can use faster calling sequence */
4460     for (i = 0; i < nelts; i++) {
4461         expr_ty elt = asdl_seq_GET(args, i);
4462         assert(elt->kind != Starred_kind);
4463         VISIT(c, expr, elt);
4464     }
4465     if (nkwelts) {
4466         PyObject *names;
4467         VISIT_SEQ(c, keyword, keywords);
4468         names = PyTuple_New(nkwelts);
4469         if (names == NULL) {
4470             return 0;
4471         }
4472         for (i = 0; i < nkwelts; i++) {
4473             keyword_ty kw = asdl_seq_GET(keywords, i);
4474             Py_INCREF(kw->arg);
4475             PyTuple_SET_ITEM(names, i, kw->arg);
4476         }
4477         ADDOP_LOAD_CONST_NEW(c, names);
4478         ADDOP_I(c, CALL_FUNCTION_KW, n + nelts + nkwelts);
4479         return 1;
4480     }
4481     else {
4482         ADDOP_I(c, CALL_FUNCTION, n + nelts);
4483         return 1;
4484     }
4485 
4486 ex_call:
4487 
4488     /* Do positional arguments. */
4489     if (n ==0 && nelts == 1 && ((expr_ty)asdl_seq_GET(args, 0))->kind == Starred_kind) {
4490         VISIT(c, expr, ((expr_ty)asdl_seq_GET(args, 0))->v.Starred.value);
4491     }
4492     else if (starunpack_helper(c, args, n, BUILD_LIST,
4493                                  LIST_APPEND, LIST_EXTEND, 1) == 0) {
4494         return 0;
4495     }
4496     /* Then keyword arguments */
4497     if (nkwelts) {
4498         /* Has a new dict been pushed */
4499         int have_dict = 0;
4500 
4501         nseen = 0;  /* the number of keyword arguments on the stack following */
4502         for (i = 0; i < nkwelts; i++) {
4503             keyword_ty kw = asdl_seq_GET(keywords, i);
4504             if (kw->arg == NULL) {
4505                 /* A keyword argument unpacking. */
4506                 if (nseen) {
4507                     if (!compiler_subkwargs(c, keywords, i - nseen, i)) {
4508                         return 0;
4509                     }
4510                     if (have_dict) {
4511                         ADDOP_I(c, DICT_MERGE, 1);
4512                     }
4513                     have_dict = 1;
4514                     nseen = 0;
4515                 }
4516                 if (!have_dict) {
4517                     ADDOP_I(c, BUILD_MAP, 0);
4518                     have_dict = 1;
4519                 }
4520                 VISIT(c, expr, kw->value);
4521                 ADDOP_I(c, DICT_MERGE, 1);
4522             }
4523             else {
4524                 nseen++;
4525             }
4526         }
4527         if (nseen) {
4528             /* Pack up any trailing keyword arguments. */
4529             if (!compiler_subkwargs(c, keywords, nkwelts - nseen, nkwelts)) {
4530                 return 0;
4531             }
4532             if (have_dict) {
4533                 ADDOP_I(c, DICT_MERGE, 1);
4534             }
4535             have_dict = 1;
4536         }
4537         assert(have_dict);
4538     }
4539     ADDOP_I(c, CALL_FUNCTION_EX, nkwelts > 0);
4540     return 1;
4541 }
4542 
4543 
4544 /* List and set comprehensions and generator expressions work by creating a
4545   nested function to perform the actual iteration. This means that the
4546   iteration variables don't leak into the current scope.
4547   The defined function is called immediately following its definition, with the
4548   result of that call being the result of the expression.
4549   The LC/SC version returns the populated container, while the GE version is
4550   flagged in symtable.c as a generator, so it returns the generator object
4551   when the function is called.
4552 
4553   Possible cleanups:
4554     - iterate over the generator sequence instead of using recursion
4555 */
4556 
4557 
4558 static int
compiler_comprehension_generator(struct compiler * c,asdl_comprehension_seq * generators,int gen_index,int depth,expr_ty elt,expr_ty val,int type)4559 compiler_comprehension_generator(struct compiler *c,
4560                                  asdl_comprehension_seq *generators, int gen_index,
4561                                  int depth,
4562                                  expr_ty elt, expr_ty val, int type)
4563 {
4564     comprehension_ty gen;
4565     gen = (comprehension_ty)asdl_seq_GET(generators, gen_index);
4566     if (gen->is_async) {
4567         return compiler_async_comprehension_generator(
4568             c, generators, gen_index, depth, elt, val, type);
4569     } else {
4570         return compiler_sync_comprehension_generator(
4571             c, generators, gen_index, depth, elt, val, type);
4572     }
4573 }
4574 
4575 static int
compiler_sync_comprehension_generator(struct compiler * c,asdl_comprehension_seq * generators,int gen_index,int depth,expr_ty elt,expr_ty val,int type)4576 compiler_sync_comprehension_generator(struct compiler *c,
4577                                       asdl_comprehension_seq *generators, int gen_index,
4578                                       int depth,
4579                                       expr_ty elt, expr_ty val, int type)
4580 {
4581     /* generate code for the iterator, then each of the ifs,
4582        and then write to the element */
4583 
4584     comprehension_ty gen;
4585     basicblock *start, *anchor, *skip, *if_cleanup;
4586     Py_ssize_t i, n;
4587 
4588     start = compiler_new_block(c);
4589     skip = compiler_new_block(c);
4590     if_cleanup = compiler_new_block(c);
4591     anchor = compiler_new_block(c);
4592 
4593     if (start == NULL || skip == NULL || if_cleanup == NULL ||
4594         anchor == NULL)
4595         return 0;
4596 
4597     gen = (comprehension_ty)asdl_seq_GET(generators, gen_index);
4598 
4599     if (gen_index == 0) {
4600         /* Receive outermost iter as an implicit argument */
4601         c->u->u_argcount = 1;
4602         ADDOP_I(c, LOAD_FAST, 0);
4603     }
4604     else {
4605         /* Sub-iter - calculate on the fly */
4606         /* Fast path for the temporary variable assignment idiom:
4607              for y in [f(x)]
4608          */
4609         asdl_expr_seq *elts;
4610         switch (gen->iter->kind) {
4611             case List_kind:
4612                 elts = gen->iter->v.List.elts;
4613                 break;
4614             case Tuple_kind:
4615                 elts = gen->iter->v.Tuple.elts;
4616                 break;
4617             default:
4618                 elts = NULL;
4619         }
4620         if (asdl_seq_LEN(elts) == 1) {
4621             expr_ty elt = asdl_seq_GET(elts, 0);
4622             if (elt->kind != Starred_kind) {
4623                 VISIT(c, expr, elt);
4624                 start = NULL;
4625             }
4626         }
4627         if (start) {
4628             VISIT(c, expr, gen->iter);
4629             ADDOP(c, GET_ITER);
4630         }
4631     }
4632     if (start) {
4633         depth++;
4634         compiler_use_next_block(c, start);
4635         ADDOP_JUMP(c, FOR_ITER, anchor);
4636         NEXT_BLOCK(c);
4637     }
4638     VISIT(c, expr, gen->target);
4639 
4640     /* XXX this needs to be cleaned up...a lot! */
4641     n = asdl_seq_LEN(gen->ifs);
4642     for (i = 0; i < n; i++) {
4643         expr_ty e = (expr_ty)asdl_seq_GET(gen->ifs, i);
4644         if (!compiler_jump_if(c, e, if_cleanup, 0))
4645             return 0;
4646         NEXT_BLOCK(c);
4647     }
4648 
4649     if (++gen_index < asdl_seq_LEN(generators))
4650         if (!compiler_comprehension_generator(c,
4651                                               generators, gen_index, depth,
4652                                               elt, val, type))
4653         return 0;
4654 
4655     /* only append after the last for generator */
4656     if (gen_index >= asdl_seq_LEN(generators)) {
4657         /* comprehension specific code */
4658         switch (type) {
4659         case COMP_GENEXP:
4660             VISIT(c, expr, elt);
4661             ADDOP(c, YIELD_VALUE);
4662             ADDOP(c, POP_TOP);
4663             break;
4664         case COMP_LISTCOMP:
4665             VISIT(c, expr, elt);
4666             ADDOP_I(c, LIST_APPEND, depth + 1);
4667             break;
4668         case COMP_SETCOMP:
4669             VISIT(c, expr, elt);
4670             ADDOP_I(c, SET_ADD, depth + 1);
4671             break;
4672         case COMP_DICTCOMP:
4673             /* With '{k: v}', k is evaluated before v, so we do
4674                the same. */
4675             VISIT(c, expr, elt);
4676             VISIT(c, expr, val);
4677             ADDOP_I(c, MAP_ADD, depth + 1);
4678             break;
4679         default:
4680             return 0;
4681         }
4682 
4683         compiler_use_next_block(c, skip);
4684     }
4685     compiler_use_next_block(c, if_cleanup);
4686     if (start) {
4687         ADDOP_JUMP(c, JUMP_ABSOLUTE, start);
4688         compiler_use_next_block(c, anchor);
4689     }
4690 
4691     return 1;
4692 }
4693 
4694 static int
compiler_async_comprehension_generator(struct compiler * c,asdl_comprehension_seq * generators,int gen_index,int depth,expr_ty elt,expr_ty val,int type)4695 compiler_async_comprehension_generator(struct compiler *c,
4696                                       asdl_comprehension_seq *generators, int gen_index,
4697                                       int depth,
4698                                       expr_ty elt, expr_ty val, int type)
4699 {
4700     comprehension_ty gen;
4701     basicblock *start, *if_cleanup, *except;
4702     Py_ssize_t i, n;
4703     start = compiler_new_block(c);
4704     except = compiler_new_block(c);
4705     if_cleanup = compiler_new_block(c);
4706 
4707     if (start == NULL || if_cleanup == NULL || except == NULL) {
4708         return 0;
4709     }
4710 
4711     gen = (comprehension_ty)asdl_seq_GET(generators, gen_index);
4712 
4713     if (gen_index == 0) {
4714         /* Receive outermost iter as an implicit argument */
4715         c->u->u_argcount = 1;
4716         ADDOP_I(c, LOAD_FAST, 0);
4717     }
4718     else {
4719         /* Sub-iter - calculate on the fly */
4720         VISIT(c, expr, gen->iter);
4721         ADDOP(c, GET_AITER);
4722     }
4723 
4724     compiler_use_next_block(c, start);
4725     /* Runtime will push a block here, so we need to account for that */
4726     if (!compiler_push_fblock(c, ASYNC_COMPREHENSION_GENERATOR, start,
4727                               NULL, NULL)) {
4728         return 0;
4729     }
4730 
4731     ADDOP_JUMP(c, SETUP_FINALLY, except);
4732     ADDOP(c, GET_ANEXT);
4733     ADDOP_LOAD_CONST(c, Py_None);
4734     ADDOP(c, YIELD_FROM);
4735     ADDOP(c, POP_BLOCK);
4736     VISIT(c, expr, gen->target);
4737 
4738     n = asdl_seq_LEN(gen->ifs);
4739     for (i = 0; i < n; i++) {
4740         expr_ty e = (expr_ty)asdl_seq_GET(gen->ifs, i);
4741         if (!compiler_jump_if(c, e, if_cleanup, 0))
4742             return 0;
4743         NEXT_BLOCK(c);
4744     }
4745 
4746     depth++;
4747     if (++gen_index < asdl_seq_LEN(generators))
4748         if (!compiler_comprehension_generator(c,
4749                                               generators, gen_index, depth,
4750                                               elt, val, type))
4751         return 0;
4752 
4753     /* only append after the last for generator */
4754     if (gen_index >= asdl_seq_LEN(generators)) {
4755         /* comprehension specific code */
4756         switch (type) {
4757         case COMP_GENEXP:
4758             VISIT(c, expr, elt);
4759             ADDOP(c, YIELD_VALUE);
4760             ADDOP(c, POP_TOP);
4761             break;
4762         case COMP_LISTCOMP:
4763             VISIT(c, expr, elt);
4764             ADDOP_I(c, LIST_APPEND, depth + 1);
4765             break;
4766         case COMP_SETCOMP:
4767             VISIT(c, expr, elt);
4768             ADDOP_I(c, SET_ADD, depth + 1);
4769             break;
4770         case COMP_DICTCOMP:
4771             /* With '{k: v}', k is evaluated before v, so we do
4772                the same. */
4773             VISIT(c, expr, elt);
4774             VISIT(c, expr, val);
4775             ADDOP_I(c, MAP_ADD, depth + 1);
4776             break;
4777         default:
4778             return 0;
4779         }
4780     }
4781     compiler_use_next_block(c, if_cleanup);
4782     ADDOP_JUMP(c, JUMP_ABSOLUTE, start);
4783 
4784     compiler_pop_fblock(c, ASYNC_COMPREHENSION_GENERATOR, start);
4785 
4786     compiler_use_next_block(c, except);
4787     ADDOP(c, END_ASYNC_FOR);
4788 
4789     return 1;
4790 }
4791 
4792 static int
compiler_comprehension(struct compiler * c,expr_ty e,int type,identifier name,asdl_comprehension_seq * generators,expr_ty elt,expr_ty val)4793 compiler_comprehension(struct compiler *c, expr_ty e, int type,
4794                        identifier name, asdl_comprehension_seq *generators, expr_ty elt,
4795                        expr_ty val)
4796 {
4797     PyCodeObject *co = NULL;
4798     comprehension_ty outermost;
4799     PyObject *qualname = NULL;
4800     int is_async_generator = 0;
4801     int top_level_await = IS_TOP_LEVEL_AWAIT(c);
4802 
4803 
4804     int is_async_function = c->u->u_ste->ste_coroutine;
4805 
4806     outermost = (comprehension_ty) asdl_seq_GET(generators, 0);
4807     if (!compiler_enter_scope(c, name, COMPILER_SCOPE_COMPREHENSION,
4808                               (void *)e, e->lineno))
4809     {
4810         goto error;
4811     }
4812     SET_LOC(c, e);
4813 
4814     is_async_generator = c->u->u_ste->ste_coroutine;
4815 
4816     if (is_async_generator && !is_async_function && type != COMP_GENEXP  && !top_level_await) {
4817         compiler_error(c, "asynchronous comprehension outside of "
4818                           "an asynchronous function");
4819         goto error_in_scope;
4820     }
4821 
4822     if (type != COMP_GENEXP) {
4823         int op;
4824         switch (type) {
4825         case COMP_LISTCOMP:
4826             op = BUILD_LIST;
4827             break;
4828         case COMP_SETCOMP:
4829             op = BUILD_SET;
4830             break;
4831         case COMP_DICTCOMP:
4832             op = BUILD_MAP;
4833             break;
4834         default:
4835             PyErr_Format(PyExc_SystemError,
4836                          "unknown comprehension type %d", type);
4837             goto error_in_scope;
4838         }
4839 
4840         ADDOP_I(c, op, 0);
4841     }
4842 
4843     if (!compiler_comprehension_generator(c, generators, 0, 0, elt,
4844                                           val, type))
4845         goto error_in_scope;
4846 
4847     if (type != COMP_GENEXP) {
4848         ADDOP(c, RETURN_VALUE);
4849     }
4850 
4851     co = assemble(c, 1);
4852     qualname = c->u->u_qualname;
4853     Py_INCREF(qualname);
4854     compiler_exit_scope(c);
4855     if (top_level_await && is_async_generator){
4856         c->u->u_ste->ste_coroutine = 1;
4857     }
4858     if (co == NULL)
4859         goto error;
4860 
4861     if (!compiler_make_closure(c, co, 0, qualname)) {
4862         goto error;
4863     }
4864     Py_DECREF(qualname);
4865     Py_DECREF(co);
4866 
4867     VISIT(c, expr, outermost->iter);
4868 
4869     if (outermost->is_async) {
4870         ADDOP(c, GET_AITER);
4871     } else {
4872         ADDOP(c, GET_ITER);
4873     }
4874 
4875     ADDOP_I(c, CALL_FUNCTION, 1);
4876 
4877     if (is_async_generator && type != COMP_GENEXP) {
4878         ADDOP(c, GET_AWAITABLE);
4879         ADDOP_LOAD_CONST(c, Py_None);
4880         ADDOP(c, YIELD_FROM);
4881     }
4882 
4883     return 1;
4884 error_in_scope:
4885     compiler_exit_scope(c);
4886 error:
4887     Py_XDECREF(qualname);
4888     Py_XDECREF(co);
4889     return 0;
4890 }
4891 
4892 static int
compiler_genexp(struct compiler * c,expr_ty e)4893 compiler_genexp(struct compiler *c, expr_ty e)
4894 {
4895     static identifier name;
4896     if (!name) {
4897         name = PyUnicode_InternFromString("<genexpr>");
4898         if (!name)
4899             return 0;
4900     }
4901     assert(e->kind == GeneratorExp_kind);
4902     return compiler_comprehension(c, e, COMP_GENEXP, name,
4903                                   e->v.GeneratorExp.generators,
4904                                   e->v.GeneratorExp.elt, NULL);
4905 }
4906 
4907 static int
compiler_listcomp(struct compiler * c,expr_ty e)4908 compiler_listcomp(struct compiler *c, expr_ty e)
4909 {
4910     static identifier name;
4911     if (!name) {
4912         name = PyUnicode_InternFromString("<listcomp>");
4913         if (!name)
4914             return 0;
4915     }
4916     assert(e->kind == ListComp_kind);
4917     return compiler_comprehension(c, e, COMP_LISTCOMP, name,
4918                                   e->v.ListComp.generators,
4919                                   e->v.ListComp.elt, NULL);
4920 }
4921 
4922 static int
compiler_setcomp(struct compiler * c,expr_ty e)4923 compiler_setcomp(struct compiler *c, expr_ty e)
4924 {
4925     static identifier name;
4926     if (!name) {
4927         name = PyUnicode_InternFromString("<setcomp>");
4928         if (!name)
4929             return 0;
4930     }
4931     assert(e->kind == SetComp_kind);
4932     return compiler_comprehension(c, e, COMP_SETCOMP, name,
4933                                   e->v.SetComp.generators,
4934                                   e->v.SetComp.elt, NULL);
4935 }
4936 
4937 
4938 static int
compiler_dictcomp(struct compiler * c,expr_ty e)4939 compiler_dictcomp(struct compiler *c, expr_ty e)
4940 {
4941     static identifier name;
4942     if (!name) {
4943         name = PyUnicode_InternFromString("<dictcomp>");
4944         if (!name)
4945             return 0;
4946     }
4947     assert(e->kind == DictComp_kind);
4948     return compiler_comprehension(c, e, COMP_DICTCOMP, name,
4949                                   e->v.DictComp.generators,
4950                                   e->v.DictComp.key, e->v.DictComp.value);
4951 }
4952 
4953 
4954 static int
compiler_visit_keyword(struct compiler * c,keyword_ty k)4955 compiler_visit_keyword(struct compiler *c, keyword_ty k)
4956 {
4957     VISIT(c, expr, k->value);
4958     return 1;
4959 }
4960 
4961 /* Test whether expression is constant.  For constants, report
4962    whether they are true or false.
4963 
4964    Return values: 1 for true, 0 for false, -1 for non-constant.
4965  */
4966 
4967 static int
compiler_with_except_finish(struct compiler * c)4968 compiler_with_except_finish(struct compiler *c) {
4969     basicblock *exit;
4970     exit = compiler_new_block(c);
4971     if (exit == NULL)
4972         return 0;
4973     ADDOP_JUMP(c, POP_JUMP_IF_TRUE, exit);
4974     NEXT_BLOCK(c);
4975     ADDOP_I(c, RERAISE, 1);
4976     compiler_use_next_block(c, exit);
4977     ADDOP(c, POP_TOP);
4978     ADDOP(c, POP_TOP);
4979     ADDOP(c, POP_TOP);
4980     ADDOP(c, POP_EXCEPT);
4981     ADDOP(c, POP_TOP);
4982     return 1;
4983 }
4984 
4985 /*
4986    Implements the async with statement.
4987 
4988    The semantics outlined in that PEP are as follows:
4989 
4990    async with EXPR as VAR:
4991        BLOCK
4992 
4993    It is implemented roughly as:
4994 
4995    context = EXPR
4996    exit = context.__aexit__  # not calling it
4997    value = await context.__aenter__()
4998    try:
4999        VAR = value  # if VAR present in the syntax
5000        BLOCK
5001    finally:
5002        if an exception was raised:
5003            exc = copy of (exception, instance, traceback)
5004        else:
5005            exc = (None, None, None)
5006        if not (await exit(*exc)):
5007            raise
5008  */
5009 static int
compiler_async_with(struct compiler * c,stmt_ty s,int pos)5010 compiler_async_with(struct compiler *c, stmt_ty s, int pos)
5011 {
5012     basicblock *block, *final, *exit;
5013     withitem_ty item = asdl_seq_GET(s->v.AsyncWith.items, pos);
5014 
5015     assert(s->kind == AsyncWith_kind);
5016     if (IS_TOP_LEVEL_AWAIT(c)){
5017         c->u->u_ste->ste_coroutine = 1;
5018     } else if (c->u->u_scope_type != COMPILER_SCOPE_ASYNC_FUNCTION){
5019         return compiler_error(c, "'async with' outside async function");
5020     }
5021 
5022     block = compiler_new_block(c);
5023     final = compiler_new_block(c);
5024     exit = compiler_new_block(c);
5025     if (!block || !final || !exit)
5026         return 0;
5027 
5028     /* Evaluate EXPR */
5029     VISIT(c, expr, item->context_expr);
5030 
5031     ADDOP(c, BEFORE_ASYNC_WITH);
5032     ADDOP(c, GET_AWAITABLE);
5033     ADDOP_LOAD_CONST(c, Py_None);
5034     ADDOP(c, YIELD_FROM);
5035 
5036     ADDOP_JUMP(c, SETUP_ASYNC_WITH, final);
5037 
5038     /* SETUP_ASYNC_WITH pushes a finally block. */
5039     compiler_use_next_block(c, block);
5040     if (!compiler_push_fblock(c, ASYNC_WITH, block, final, s)) {
5041         return 0;
5042     }
5043 
5044     if (item->optional_vars) {
5045         VISIT(c, expr, item->optional_vars);
5046     }
5047     else {
5048     /* Discard result from context.__aenter__() */
5049         ADDOP(c, POP_TOP);
5050     }
5051 
5052     pos++;
5053     if (pos == asdl_seq_LEN(s->v.AsyncWith.items))
5054         /* BLOCK code */
5055         VISIT_SEQ(c, stmt, s->v.AsyncWith.body)
5056     else if (!compiler_async_with(c, s, pos))
5057             return 0;
5058 
5059     compiler_pop_fblock(c, ASYNC_WITH, block);
5060     ADDOP(c, POP_BLOCK);
5061     /* End of body; start the cleanup */
5062 
5063     /* For successful outcome:
5064      * call __exit__(None, None, None)
5065      */
5066     SET_LOC(c, s);
5067     if(!compiler_call_exit_with_nones(c))
5068         return 0;
5069     ADDOP(c, GET_AWAITABLE);
5070     ADDOP_LOAD_CONST(c, Py_None);
5071     ADDOP(c, YIELD_FROM);
5072 
5073     ADDOP(c, POP_TOP);
5074 
5075     ADDOP_JUMP(c, JUMP_ABSOLUTE, exit);
5076 
5077     /* For exceptional outcome: */
5078     compiler_use_next_block(c, final);
5079     ADDOP(c, WITH_EXCEPT_START);
5080     ADDOP(c, GET_AWAITABLE);
5081     ADDOP_LOAD_CONST(c, Py_None);
5082     ADDOP(c, YIELD_FROM);
5083     compiler_with_except_finish(c);
5084 
5085 compiler_use_next_block(c, exit);
5086     return 1;
5087 }
5088 
5089 
5090 /*
5091    Implements the with statement from PEP 343.
5092    with EXPR as VAR:
5093        BLOCK
5094    is implemented as:
5095         <code for EXPR>
5096         SETUP_WITH  E
5097         <code to store to VAR> or POP_TOP
5098         <code for BLOCK>
5099         LOAD_CONST (None, None, None)
5100         CALL_FUNCTION_EX 0
5101         JUMP_FORWARD  EXIT
5102     E:  WITH_EXCEPT_START (calls EXPR.__exit__)
5103         POP_JUMP_IF_TRUE T:
5104         RERAISE
5105     T:  POP_TOP * 3 (remove exception from stack)
5106         POP_EXCEPT
5107         POP_TOP
5108     EXIT:
5109  */
5110 
5111 static int
compiler_with(struct compiler * c,stmt_ty s,int pos)5112 compiler_with(struct compiler *c, stmt_ty s, int pos)
5113 {
5114     basicblock *block, *final, *exit;
5115     withitem_ty item = asdl_seq_GET(s->v.With.items, pos);
5116 
5117     assert(s->kind == With_kind);
5118 
5119     block = compiler_new_block(c);
5120     final = compiler_new_block(c);
5121     exit = compiler_new_block(c);
5122     if (!block || !final || !exit)
5123         return 0;
5124 
5125     /* Evaluate EXPR */
5126     VISIT(c, expr, item->context_expr);
5127     /* Will push bound __exit__ */
5128     ADDOP_JUMP(c, SETUP_WITH, final);
5129 
5130     /* SETUP_WITH pushes a finally block. */
5131     compiler_use_next_block(c, block);
5132     if (!compiler_push_fblock(c, WITH, block, final, s)) {
5133         return 0;
5134     }
5135 
5136     if (item->optional_vars) {
5137         VISIT(c, expr, item->optional_vars);
5138     }
5139     else {
5140     /* Discard result from context.__enter__() */
5141         ADDOP(c, POP_TOP);
5142     }
5143 
5144     pos++;
5145     if (pos == asdl_seq_LEN(s->v.With.items))
5146         /* BLOCK code */
5147         VISIT_SEQ(c, stmt, s->v.With.body)
5148     else if (!compiler_with(c, s, pos))
5149             return 0;
5150 
5151 
5152     /* Mark all following code as artificial */
5153     c->u->u_lineno = -1;
5154     ADDOP(c, POP_BLOCK);
5155     compiler_pop_fblock(c, WITH, block);
5156 
5157     /* End of body; start the cleanup. */
5158 
5159     /* For successful outcome:
5160      * call __exit__(None, None, None)
5161      */
5162     SET_LOC(c, s);
5163     if (!compiler_call_exit_with_nones(c))
5164         return 0;
5165     ADDOP(c, POP_TOP);
5166     ADDOP_JUMP(c, JUMP_FORWARD, exit);
5167 
5168     /* For exceptional outcome: */
5169     compiler_use_next_block(c, final);
5170     ADDOP(c, WITH_EXCEPT_START);
5171     compiler_with_except_finish(c);
5172 
5173     compiler_use_next_block(c, exit);
5174     return 1;
5175 }
5176 
5177 static int
compiler_visit_expr1(struct compiler * c,expr_ty e)5178 compiler_visit_expr1(struct compiler *c, expr_ty e)
5179 {
5180     switch (e->kind) {
5181     case NamedExpr_kind:
5182         VISIT(c, expr, e->v.NamedExpr.value);
5183         ADDOP(c, DUP_TOP);
5184         VISIT(c, expr, e->v.NamedExpr.target);
5185         break;
5186     case BoolOp_kind:
5187         return compiler_boolop(c, e);
5188     case BinOp_kind:
5189         VISIT(c, expr, e->v.BinOp.left);
5190         VISIT(c, expr, e->v.BinOp.right);
5191         ADDOP(c, binop(e->v.BinOp.op));
5192         break;
5193     case UnaryOp_kind:
5194         VISIT(c, expr, e->v.UnaryOp.operand);
5195         ADDOP(c, unaryop(e->v.UnaryOp.op));
5196         break;
5197     case Lambda_kind:
5198         return compiler_lambda(c, e);
5199     case IfExp_kind:
5200         return compiler_ifexp(c, e);
5201     case Dict_kind:
5202         return compiler_dict(c, e);
5203     case Set_kind:
5204         return compiler_set(c, e);
5205     case GeneratorExp_kind:
5206         return compiler_genexp(c, e);
5207     case ListComp_kind:
5208         return compiler_listcomp(c, e);
5209     case SetComp_kind:
5210         return compiler_setcomp(c, e);
5211     case DictComp_kind:
5212         return compiler_dictcomp(c, e);
5213     case Yield_kind:
5214         if (c->u->u_ste->ste_type != FunctionBlock)
5215             return compiler_error(c, "'yield' outside function");
5216         if (e->v.Yield.value) {
5217             VISIT(c, expr, e->v.Yield.value);
5218         }
5219         else {
5220             ADDOP_LOAD_CONST(c, Py_None);
5221         }
5222         ADDOP(c, YIELD_VALUE);
5223         break;
5224     case YieldFrom_kind:
5225         if (c->u->u_ste->ste_type != FunctionBlock)
5226             return compiler_error(c, "'yield' outside function");
5227 
5228         if (c->u->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION)
5229             return compiler_error(c, "'yield from' inside async function");
5230 
5231         VISIT(c, expr, e->v.YieldFrom.value);
5232         ADDOP(c, GET_YIELD_FROM_ITER);
5233         ADDOP_LOAD_CONST(c, Py_None);
5234         ADDOP(c, YIELD_FROM);
5235         break;
5236     case Await_kind:
5237         if (!IS_TOP_LEVEL_AWAIT(c)){
5238             if (c->u->u_ste->ste_type != FunctionBlock){
5239                 return compiler_error(c, "'await' outside function");
5240             }
5241 
5242             if (c->u->u_scope_type != COMPILER_SCOPE_ASYNC_FUNCTION &&
5243                     c->u->u_scope_type != COMPILER_SCOPE_COMPREHENSION){
5244                 return compiler_error(c, "'await' outside async function");
5245             }
5246         }
5247 
5248         VISIT(c, expr, e->v.Await.value);
5249         ADDOP(c, GET_AWAITABLE);
5250         ADDOP_LOAD_CONST(c, Py_None);
5251         ADDOP(c, YIELD_FROM);
5252         break;
5253     case Compare_kind:
5254         return compiler_compare(c, e);
5255     case Call_kind:
5256         return compiler_call(c, e);
5257     case Constant_kind:
5258         ADDOP_LOAD_CONST(c, e->v.Constant.value);
5259         break;
5260     case JoinedStr_kind:
5261         return compiler_joined_str(c, e);
5262     case FormattedValue_kind:
5263         return compiler_formatted_value(c, e);
5264     /* The following exprs can be assignment targets. */
5265     case Attribute_kind:
5266         VISIT(c, expr, e->v.Attribute.value);
5267         switch (e->v.Attribute.ctx) {
5268         case Load:
5269         {
5270             int old_lineno = c->u->u_lineno;
5271             c->u->u_lineno = e->end_lineno;
5272             ADDOP_NAME(c, LOAD_ATTR, e->v.Attribute.attr, names);
5273             c->u->u_lineno = old_lineno;
5274             break;
5275         }
5276         case Store:
5277             if (forbidden_name(c, e->v.Attribute.attr, e->v.Attribute.ctx)) {
5278                 return 0;
5279             }
5280             int old_lineno = c->u->u_lineno;
5281             c->u->u_lineno = e->end_lineno;
5282             ADDOP_NAME(c, STORE_ATTR, e->v.Attribute.attr, names);
5283             c->u->u_lineno = old_lineno;
5284             break;
5285         case Del:
5286             ADDOP_NAME(c, DELETE_ATTR, e->v.Attribute.attr, names);
5287             break;
5288         }
5289         break;
5290     case Subscript_kind:
5291         return compiler_subscript(c, e);
5292     case Starred_kind:
5293         switch (e->v.Starred.ctx) {
5294         case Store:
5295             /* In all legitimate cases, the Starred node was already replaced
5296              * by compiler_list/compiler_tuple. XXX: is that okay? */
5297             return compiler_error(c,
5298                 "starred assignment target must be in a list or tuple");
5299         default:
5300             return compiler_error(c,
5301                 "can't use starred expression here");
5302         }
5303         break;
5304     case Slice_kind:
5305         return compiler_slice(c, e);
5306     case Name_kind:
5307         return compiler_nameop(c, e->v.Name.id, e->v.Name.ctx);
5308     /* child nodes of List and Tuple will have expr_context set */
5309     case List_kind:
5310         return compiler_list(c, e);
5311     case Tuple_kind:
5312         return compiler_tuple(c, e);
5313     }
5314     return 1;
5315 }
5316 
5317 static int
compiler_visit_expr(struct compiler * c,expr_ty e)5318 compiler_visit_expr(struct compiler *c, expr_ty e)
5319 {
5320     int old_lineno = c->u->u_lineno;
5321     int old_end_lineno = c->u->u_end_lineno;
5322     int old_col_offset = c->u->u_col_offset;
5323     int old_end_col_offset = c->u->u_end_col_offset;
5324     SET_LOC(c, e);
5325     int res = compiler_visit_expr1(c, e);
5326     c->u->u_lineno = old_lineno;
5327     c->u->u_end_lineno = old_end_lineno;
5328     c->u->u_col_offset = old_col_offset;
5329     c->u->u_end_col_offset = old_end_col_offset;
5330     return res;
5331 }
5332 
5333 static int
compiler_augassign(struct compiler * c,stmt_ty s)5334 compiler_augassign(struct compiler *c, stmt_ty s)
5335 {
5336     assert(s->kind == AugAssign_kind);
5337     expr_ty e = s->v.AugAssign.target;
5338 
5339     int old_lineno = c->u->u_lineno;
5340     int old_end_lineno = c->u->u_end_lineno;
5341     int old_col_offset = c->u->u_col_offset;
5342     int old_end_col_offset = c->u->u_end_col_offset;
5343     SET_LOC(c, e);
5344 
5345     switch (e->kind) {
5346     case Attribute_kind:
5347         VISIT(c, expr, e->v.Attribute.value);
5348         ADDOP(c, DUP_TOP);
5349         int old_lineno = c->u->u_lineno;
5350         c->u->u_lineno = e->end_lineno;
5351         ADDOP_NAME(c, LOAD_ATTR, e->v.Attribute.attr, names);
5352         c->u->u_lineno = old_lineno;
5353         break;
5354     case Subscript_kind:
5355         VISIT(c, expr, e->v.Subscript.value);
5356         VISIT(c, expr, e->v.Subscript.slice);
5357         ADDOP(c, DUP_TOP_TWO);
5358         ADDOP(c, BINARY_SUBSCR);
5359         break;
5360     case Name_kind:
5361         if (!compiler_nameop(c, e->v.Name.id, Load))
5362             return 0;
5363         break;
5364     default:
5365         PyErr_Format(PyExc_SystemError,
5366             "invalid node type (%d) for augmented assignment",
5367             e->kind);
5368         return 0;
5369     }
5370 
5371     c->u->u_lineno = old_lineno;
5372     c->u->u_end_lineno = old_end_lineno;
5373     c->u->u_col_offset = old_col_offset;
5374     c->u->u_end_col_offset = old_end_col_offset;
5375 
5376     VISIT(c, expr, s->v.AugAssign.value);
5377     ADDOP(c, inplace_binop(s->v.AugAssign.op));
5378 
5379     SET_LOC(c, e);
5380 
5381     switch (e->kind) {
5382     case Attribute_kind:
5383         c->u->u_lineno = e->end_lineno;
5384         ADDOP(c, ROT_TWO);
5385         ADDOP_NAME(c, STORE_ATTR, e->v.Attribute.attr, names);
5386         break;
5387     case Subscript_kind:
5388         ADDOP(c, ROT_THREE);
5389         ADDOP(c, STORE_SUBSCR);
5390         break;
5391     case Name_kind:
5392         return compiler_nameop(c, e->v.Name.id, Store);
5393     default:
5394         Py_UNREACHABLE();
5395     }
5396     return 1;
5397 }
5398 
5399 static int
check_ann_expr(struct compiler * c,expr_ty e)5400 check_ann_expr(struct compiler *c, expr_ty e)
5401 {
5402     VISIT(c, expr, e);
5403     ADDOP(c, POP_TOP);
5404     return 1;
5405 }
5406 
5407 static int
check_annotation(struct compiler * c,stmt_ty s)5408 check_annotation(struct compiler *c, stmt_ty s)
5409 {
5410     /* Annotations of complex targets does not produce anything
5411        under annotations future */
5412     if (c->c_future->ff_features & CO_FUTURE_ANNOTATIONS) {
5413         return 1;
5414     }
5415 
5416     /* Annotations are only evaluated in a module or class. */
5417     if (c->u->u_scope_type == COMPILER_SCOPE_MODULE ||
5418         c->u->u_scope_type == COMPILER_SCOPE_CLASS) {
5419         return check_ann_expr(c, s->v.AnnAssign.annotation);
5420     }
5421     return 1;
5422 }
5423 
5424 static int
check_ann_subscr(struct compiler * c,expr_ty e)5425 check_ann_subscr(struct compiler *c, expr_ty e)
5426 {
5427     /* We check that everything in a subscript is defined at runtime. */
5428     switch (e->kind) {
5429     case Slice_kind:
5430         if (e->v.Slice.lower && !check_ann_expr(c, e->v.Slice.lower)) {
5431             return 0;
5432         }
5433         if (e->v.Slice.upper && !check_ann_expr(c, e->v.Slice.upper)) {
5434             return 0;
5435         }
5436         if (e->v.Slice.step && !check_ann_expr(c, e->v.Slice.step)) {
5437             return 0;
5438         }
5439         return 1;
5440     case Tuple_kind: {
5441         /* extended slice */
5442         asdl_expr_seq *elts = e->v.Tuple.elts;
5443         Py_ssize_t i, n = asdl_seq_LEN(elts);
5444         for (i = 0; i < n; i++) {
5445             if (!check_ann_subscr(c, asdl_seq_GET(elts, i))) {
5446                 return 0;
5447             }
5448         }
5449         return 1;
5450     }
5451     default:
5452         return check_ann_expr(c, e);
5453     }
5454 }
5455 
5456 static int
compiler_annassign(struct compiler * c,stmt_ty s)5457 compiler_annassign(struct compiler *c, stmt_ty s)
5458 {
5459     expr_ty targ = s->v.AnnAssign.target;
5460     PyObject* mangled;
5461 
5462     assert(s->kind == AnnAssign_kind);
5463 
5464     /* We perform the actual assignment first. */
5465     if (s->v.AnnAssign.value) {
5466         VISIT(c, expr, s->v.AnnAssign.value);
5467         VISIT(c, expr, targ);
5468     }
5469     switch (targ->kind) {
5470     case Name_kind:
5471         if (forbidden_name(c, targ->v.Name.id, Store))
5472             return 0;
5473         /* If we have a simple name in a module or class, store annotation. */
5474         if (s->v.AnnAssign.simple &&
5475             (c->u->u_scope_type == COMPILER_SCOPE_MODULE ||
5476              c->u->u_scope_type == COMPILER_SCOPE_CLASS)) {
5477             if (c->c_future->ff_features & CO_FUTURE_ANNOTATIONS) {
5478                 VISIT(c, annexpr, s->v.AnnAssign.annotation)
5479             }
5480             else {
5481                 VISIT(c, expr, s->v.AnnAssign.annotation);
5482             }
5483             ADDOP_NAME(c, LOAD_NAME, __annotations__, names);
5484             mangled = _Py_Mangle(c->u->u_private, targ->v.Name.id);
5485             ADDOP_LOAD_CONST_NEW(c, mangled);
5486             ADDOP(c, STORE_SUBSCR);
5487         }
5488         break;
5489     case Attribute_kind:
5490         if (forbidden_name(c, targ->v.Attribute.attr, Store))
5491             return 0;
5492         if (!s->v.AnnAssign.value &&
5493             !check_ann_expr(c, targ->v.Attribute.value)) {
5494             return 0;
5495         }
5496         break;
5497     case Subscript_kind:
5498         if (!s->v.AnnAssign.value &&
5499             (!check_ann_expr(c, targ->v.Subscript.value) ||
5500              !check_ann_subscr(c, targ->v.Subscript.slice))) {
5501                 return 0;
5502         }
5503         break;
5504     default:
5505         PyErr_Format(PyExc_SystemError,
5506                      "invalid node type (%d) for annotated assignment",
5507                      targ->kind);
5508             return 0;
5509     }
5510     /* Annotation is evaluated last. */
5511     if (!s->v.AnnAssign.simple && !check_annotation(c, s)) {
5512         return 0;
5513     }
5514     return 1;
5515 }
5516 
5517 /* Raises a SyntaxError and returns 0.
5518    If something goes wrong, a different exception may be raised.
5519 */
5520 
5521 static int
compiler_error(struct compiler * c,const char * format,...)5522 compiler_error(struct compiler *c, const char *format, ...)
5523 {
5524     va_list vargs;
5525 #ifdef HAVE_STDARG_PROTOTYPES
5526     va_start(vargs, format);
5527 #else
5528     va_start(vargs);
5529 #endif
5530     PyObject *msg = PyUnicode_FromFormatV(format, vargs);
5531     va_end(vargs);
5532     if (msg == NULL) {
5533         return 0;
5534     }
5535     PyObject *loc = PyErr_ProgramTextObject(c->c_filename, c->u->u_lineno);
5536     if (loc == NULL) {
5537         Py_INCREF(Py_None);
5538         loc = Py_None;
5539     }
5540     PyObject *args = Py_BuildValue("O(OiiOii)", msg, c->c_filename,
5541                                    c->u->u_lineno, c->u->u_col_offset + 1, loc,
5542                                    c->u->u_end_lineno, c->u->u_end_col_offset + 1);
5543     Py_DECREF(msg);
5544     if (args == NULL) {
5545         goto exit;
5546     }
5547     PyErr_SetObject(PyExc_SyntaxError, args);
5548  exit:
5549     Py_DECREF(loc);
5550     Py_XDECREF(args);
5551     return 0;
5552 }
5553 
5554 /* Emits a SyntaxWarning and returns 1 on success.
5555    If a SyntaxWarning raised as error, replaces it with a SyntaxError
5556    and returns 0.
5557 */
5558 static int
compiler_warn(struct compiler * c,const char * format,...)5559 compiler_warn(struct compiler *c, const char *format, ...)
5560 {
5561     va_list vargs;
5562 #ifdef HAVE_STDARG_PROTOTYPES
5563     va_start(vargs, format);
5564 #else
5565     va_start(vargs);
5566 #endif
5567     PyObject *msg = PyUnicode_FromFormatV(format, vargs);
5568     va_end(vargs);
5569     if (msg == NULL) {
5570         return 0;
5571     }
5572     if (PyErr_WarnExplicitObject(PyExc_SyntaxWarning, msg, c->c_filename,
5573                                  c->u->u_lineno, NULL, NULL) < 0)
5574     {
5575         if (PyErr_ExceptionMatches(PyExc_SyntaxWarning)) {
5576             /* Replace the SyntaxWarning exception with a SyntaxError
5577                to get a more accurate error report */
5578             PyErr_Clear();
5579             assert(PyUnicode_AsUTF8(msg) != NULL);
5580             compiler_error(c, PyUnicode_AsUTF8(msg));
5581         }
5582         Py_DECREF(msg);
5583         return 0;
5584     }
5585     Py_DECREF(msg);
5586     return 1;
5587 }
5588 
5589 static int
compiler_subscript(struct compiler * c,expr_ty e)5590 compiler_subscript(struct compiler *c, expr_ty e)
5591 {
5592     expr_context_ty ctx = e->v.Subscript.ctx;
5593     int op = 0;
5594 
5595     if (ctx == Load) {
5596         if (!check_subscripter(c, e->v.Subscript.value)) {
5597             return 0;
5598         }
5599         if (!check_index(c, e->v.Subscript.value, e->v.Subscript.slice)) {
5600             return 0;
5601         }
5602     }
5603 
5604     switch (ctx) {
5605         case Load:    op = BINARY_SUBSCR; break;
5606         case Store:   op = STORE_SUBSCR; break;
5607         case Del:     op = DELETE_SUBSCR; break;
5608     }
5609     assert(op);
5610     VISIT(c, expr, e->v.Subscript.value);
5611     VISIT(c, expr, e->v.Subscript.slice);
5612     ADDOP(c, op);
5613     return 1;
5614 }
5615 
5616 static int
compiler_slice(struct compiler * c,expr_ty s)5617 compiler_slice(struct compiler *c, expr_ty s)
5618 {
5619     int n = 2;
5620     assert(s->kind == Slice_kind);
5621 
5622     /* only handles the cases where BUILD_SLICE is emitted */
5623     if (s->v.Slice.lower) {
5624         VISIT(c, expr, s->v.Slice.lower);
5625     }
5626     else {
5627         ADDOP_LOAD_CONST(c, Py_None);
5628     }
5629 
5630     if (s->v.Slice.upper) {
5631         VISIT(c, expr, s->v.Slice.upper);
5632     }
5633     else {
5634         ADDOP_LOAD_CONST(c, Py_None);
5635     }
5636 
5637     if (s->v.Slice.step) {
5638         n++;
5639         VISIT(c, expr, s->v.Slice.step);
5640     }
5641     ADDOP_I(c, BUILD_SLICE, n);
5642     return 1;
5643 }
5644 
5645 
5646 // PEP 634: Structural Pattern Matching
5647 
5648 // To keep things simple, all compiler_pattern_* and pattern_helper_* routines
5649 // follow the convention of consuming TOS (the subject for the given pattern)
5650 // and calling jump_to_fail_pop on failure (no match).
5651 
5652 // When calling into these routines, it's important that pc->on_top be kept
5653 // updated to reflect the current number of items that we are using on the top
5654 // of the stack: they will be popped on failure, and any name captures will be
5655 // stored *underneath* them on success. This lets us defer all names stores
5656 // until the *entire* pattern matches.
5657 
5658 #define WILDCARD_CHECK(N) \
5659     ((N)->kind == MatchAs_kind && !(N)->v.MatchAs.name)
5660 
5661 #define WILDCARD_STAR_CHECK(N) \
5662     ((N)->kind == MatchStar_kind && !(N)->v.MatchStar.name)
5663 
5664 // Limit permitted subexpressions, even if the parser & AST validator let them through
5665 #define MATCH_VALUE_EXPR(N) \
5666     ((N)->kind == Constant_kind || (N)->kind == Attribute_kind)
5667 
5668 // Allocate or resize pc->fail_pop to allow for n items to be popped on failure.
5669 static int
ensure_fail_pop(struct compiler * c,pattern_context * pc,Py_ssize_t n)5670 ensure_fail_pop(struct compiler *c, pattern_context *pc, Py_ssize_t n)
5671 {
5672     Py_ssize_t size = n + 1;
5673     if (size <= pc->fail_pop_size) {
5674         return 1;
5675     }
5676     Py_ssize_t needed = sizeof(basicblock*) * size;
5677     basicblock **resized = PyObject_Realloc(pc->fail_pop, needed);
5678     if (resized == NULL) {
5679         PyErr_NoMemory();
5680         return 0;
5681     }
5682     pc->fail_pop = resized;
5683     while (pc->fail_pop_size < size) {
5684         basicblock *new_block;
5685         RETURN_IF_FALSE(new_block = compiler_new_block(c));
5686         pc->fail_pop[pc->fail_pop_size++] = new_block;
5687     }
5688     return 1;
5689 }
5690 
5691 // Use op to jump to the correct fail_pop block.
5692 static int
jump_to_fail_pop(struct compiler * c,pattern_context * pc,int op)5693 jump_to_fail_pop(struct compiler *c, pattern_context *pc, int op)
5694 {
5695     // Pop any items on the top of the stack, plus any objects we were going to
5696     // capture on success:
5697     Py_ssize_t pops = pc->on_top + PyList_GET_SIZE(pc->stores);
5698     RETURN_IF_FALSE(ensure_fail_pop(c, pc, pops));
5699     ADDOP_JUMP(c, op, pc->fail_pop[pops]);
5700     NEXT_BLOCK(c);
5701     return 1;
5702 }
5703 
5704 // Build all of the fail_pop blocks and reset fail_pop.
5705 static int
emit_and_reset_fail_pop(struct compiler * c,pattern_context * pc)5706 emit_and_reset_fail_pop(struct compiler *c, pattern_context *pc)
5707 {
5708     if (!pc->fail_pop_size) {
5709         assert(pc->fail_pop == NULL);
5710         NEXT_BLOCK(c);
5711         return 1;
5712     }
5713     while (--pc->fail_pop_size) {
5714         compiler_use_next_block(c, pc->fail_pop[pc->fail_pop_size]);
5715         if (!compiler_addop(c, POP_TOP)) {
5716             pc->fail_pop_size = 0;
5717             PyObject_Free(pc->fail_pop);
5718             pc->fail_pop = NULL;
5719             return 0;
5720         }
5721     }
5722     compiler_use_next_block(c, pc->fail_pop[0]);
5723     PyObject_Free(pc->fail_pop);
5724     pc->fail_pop = NULL;
5725     return 1;
5726 }
5727 
5728 static int
compiler_error_duplicate_store(struct compiler * c,identifier n)5729 compiler_error_duplicate_store(struct compiler *c, identifier n)
5730 {
5731     return compiler_error(c, "multiple assignments to name %R in pattern", n);
5732 }
5733 
5734 static int
pattern_helper_store_name(struct compiler * c,identifier n,pattern_context * pc)5735 pattern_helper_store_name(struct compiler *c, identifier n, pattern_context *pc)
5736 {
5737     if (n == NULL) {
5738         ADDOP(c, POP_TOP);
5739         return 1;
5740     }
5741     if (forbidden_name(c, n, Store)) {
5742         return 0;
5743     }
5744     // Can't assign to the same name twice:
5745     int duplicate = PySequence_Contains(pc->stores, n);
5746     if (duplicate < 0) {
5747         return 0;
5748     }
5749     if (duplicate) {
5750         return compiler_error_duplicate_store(c, n);
5751     }
5752     // Rotate this object underneath any items we need to preserve:
5753     ADDOP_I(c, ROT_N, pc->on_top + PyList_GET_SIZE(pc->stores) + 1);
5754     return !PyList_Append(pc->stores, n);
5755 }
5756 
5757 
5758 static int
pattern_unpack_helper(struct compiler * c,asdl_pattern_seq * elts)5759 pattern_unpack_helper(struct compiler *c, asdl_pattern_seq *elts)
5760 {
5761     Py_ssize_t n = asdl_seq_LEN(elts);
5762     int seen_star = 0;
5763     for (Py_ssize_t i = 0; i < n; i++) {
5764         pattern_ty elt = asdl_seq_GET(elts, i);
5765         if (elt->kind == MatchStar_kind && !seen_star) {
5766             if ((i >= (1 << 8)) ||
5767                 (n-i-1 >= (INT_MAX >> 8)))
5768                 return compiler_error(c,
5769                     "too many expressions in "
5770                     "star-unpacking sequence pattern");
5771             ADDOP_I(c, UNPACK_EX, (i + ((n-i-1) << 8)));
5772             seen_star = 1;
5773         }
5774         else if (elt->kind == MatchStar_kind) {
5775             return compiler_error(c,
5776                 "multiple starred expressions in sequence pattern");
5777         }
5778     }
5779     if (!seen_star) {
5780         ADDOP_I(c, UNPACK_SEQUENCE, n);
5781     }
5782     return 1;
5783 }
5784 
5785 static int
pattern_helper_sequence_unpack(struct compiler * c,asdl_pattern_seq * patterns,Py_ssize_t star,pattern_context * pc)5786 pattern_helper_sequence_unpack(struct compiler *c, asdl_pattern_seq *patterns,
5787                                Py_ssize_t star, pattern_context *pc)
5788 {
5789     RETURN_IF_FALSE(pattern_unpack_helper(c, patterns));
5790     Py_ssize_t size = asdl_seq_LEN(patterns);
5791     // We've now got a bunch of new subjects on the stack. They need to remain
5792     // there after each subpattern match:
5793     pc->on_top += size;
5794     for (Py_ssize_t i = 0; i < size; i++) {
5795         // One less item to keep track of each time we loop through:
5796         pc->on_top--;
5797         pattern_ty pattern = asdl_seq_GET(patterns, i);
5798         RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc));
5799     }
5800     return 1;
5801 }
5802 
5803 // Like pattern_helper_sequence_unpack, but uses BINARY_SUBSCR instead of
5804 // UNPACK_SEQUENCE / UNPACK_EX. This is more efficient for patterns with a
5805 // starred wildcard like [first, *_] / [first, *_, last] / [*_, last] / etc.
5806 static int
pattern_helper_sequence_subscr(struct compiler * c,asdl_pattern_seq * patterns,Py_ssize_t star,pattern_context * pc)5807 pattern_helper_sequence_subscr(struct compiler *c, asdl_pattern_seq *patterns,
5808                                Py_ssize_t star, pattern_context *pc)
5809 {
5810     // We need to keep the subject around for extracting elements:
5811     pc->on_top++;
5812     Py_ssize_t size = asdl_seq_LEN(patterns);
5813     for (Py_ssize_t i = 0; i < size; i++) {
5814         pattern_ty pattern = asdl_seq_GET(patterns, i);
5815         if (WILDCARD_CHECK(pattern)) {
5816             continue;
5817         }
5818         if (i == star) {
5819             assert(WILDCARD_STAR_CHECK(pattern));
5820             continue;
5821         }
5822         ADDOP(c, DUP_TOP);
5823         if (i < star) {
5824             ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(i));
5825         }
5826         else {
5827             // The subject may not support negative indexing! Compute a
5828             // nonnegative index:
5829             ADDOP(c, GET_LEN);
5830             ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size - i));
5831             ADDOP(c, BINARY_SUBTRACT);
5832         }
5833         ADDOP(c, BINARY_SUBSCR);
5834         RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc));
5835     }
5836     // Pop the subject, we're done with it:
5837     pc->on_top--;
5838     ADDOP(c, POP_TOP);
5839     return 1;
5840 }
5841 
5842 // Like compiler_pattern, but turn off checks for irrefutability.
5843 static int
compiler_pattern_subpattern(struct compiler * c,pattern_ty p,pattern_context * pc)5844 compiler_pattern_subpattern(struct compiler *c, pattern_ty p, pattern_context *pc)
5845 {
5846     int allow_irrefutable = pc->allow_irrefutable;
5847     pc->allow_irrefutable = 1;
5848     RETURN_IF_FALSE(compiler_pattern(c, p, pc));
5849     pc->allow_irrefutable = allow_irrefutable;
5850     return 1;
5851 }
5852 
5853 static int
compiler_pattern_as(struct compiler * c,pattern_ty p,pattern_context * pc)5854 compiler_pattern_as(struct compiler *c, pattern_ty p, pattern_context *pc)
5855 {
5856     assert(p->kind == MatchAs_kind);
5857     if (p->v.MatchAs.pattern == NULL) {
5858         // An irrefutable match:
5859         if (!pc->allow_irrefutable) {
5860             if (p->v.MatchAs.name) {
5861                 const char *e = "name capture %R makes remaining patterns unreachable";
5862                 return compiler_error(c, e, p->v.MatchAs.name);
5863             }
5864             const char *e = "wildcard makes remaining patterns unreachable";
5865             return compiler_error(c, e);
5866         }
5867         return pattern_helper_store_name(c, p->v.MatchAs.name, pc);
5868     }
5869     // Need to make a copy for (possibly) storing later:
5870     pc->on_top++;
5871     ADDOP(c, DUP_TOP);
5872     RETURN_IF_FALSE(compiler_pattern(c, p->v.MatchAs.pattern, pc));
5873     // Success! Store it:
5874     pc->on_top--;
5875     RETURN_IF_FALSE(pattern_helper_store_name(c, p->v.MatchAs.name, pc));
5876     return 1;
5877 }
5878 
5879 static int
compiler_pattern_star(struct compiler * c,pattern_ty p,pattern_context * pc)5880 compiler_pattern_star(struct compiler *c, pattern_ty p, pattern_context *pc)
5881 {
5882     assert(p->kind == MatchStar_kind);
5883     RETURN_IF_FALSE(pattern_helper_store_name(c, p->v.MatchStar.name, pc));
5884     return 1;
5885 }
5886 
5887 static int
validate_kwd_attrs(struct compiler * c,asdl_identifier_seq * attrs,asdl_pattern_seq * patterns)5888 validate_kwd_attrs(struct compiler *c, asdl_identifier_seq *attrs, asdl_pattern_seq* patterns)
5889 {
5890     // Any errors will point to the pattern rather than the arg name as the
5891     // parser is only supplying identifiers rather than Name or keyword nodes
5892     Py_ssize_t nattrs = asdl_seq_LEN(attrs);
5893     for (Py_ssize_t i = 0; i < nattrs; i++) {
5894         identifier attr = ((identifier)asdl_seq_GET(attrs, i));
5895         SET_LOC(c, ((pattern_ty) asdl_seq_GET(patterns, i)));
5896         if (forbidden_name(c, attr, Store)) {
5897             return -1;
5898         }
5899         for (Py_ssize_t j = i + 1; j < nattrs; j++) {
5900             identifier other = ((identifier)asdl_seq_GET(attrs, j));
5901             if (!PyUnicode_Compare(attr, other)) {
5902                 SET_LOC(c, ((pattern_ty) asdl_seq_GET(patterns, j)));
5903                 compiler_error(c, "attribute name repeated in class pattern: %U", attr);
5904                 return -1;
5905             }
5906         }
5907     }
5908     return 0;
5909 }
5910 
5911 static int
compiler_pattern_class(struct compiler * c,pattern_ty p,pattern_context * pc)5912 compiler_pattern_class(struct compiler *c, pattern_ty p, pattern_context *pc)
5913 {
5914     assert(p->kind == MatchClass_kind);
5915     asdl_pattern_seq *patterns = p->v.MatchClass.patterns;
5916     asdl_identifier_seq *kwd_attrs = p->v.MatchClass.kwd_attrs;
5917     asdl_pattern_seq *kwd_patterns = p->v.MatchClass.kwd_patterns;
5918     Py_ssize_t nargs = asdl_seq_LEN(patterns);
5919     Py_ssize_t nattrs = asdl_seq_LEN(kwd_attrs);
5920     Py_ssize_t nkwd_patterns = asdl_seq_LEN(kwd_patterns);
5921     if (nattrs != nkwd_patterns) {
5922         // AST validator shouldn't let this happen, but if it does,
5923         // just fail, don't crash out of the interpreter
5924         const char * e = "kwd_attrs (%d) / kwd_patterns (%d) length mismatch in class pattern";
5925         return compiler_error(c, e, nattrs, nkwd_patterns);
5926     }
5927     if (INT_MAX < nargs || INT_MAX < nargs + nattrs - 1) {
5928         const char *e = "too many sub-patterns in class pattern %R";
5929         return compiler_error(c, e, p->v.MatchClass.cls);
5930     }
5931     if (nattrs) {
5932         RETURN_IF_FALSE(!validate_kwd_attrs(c, kwd_attrs, kwd_patterns));
5933         SET_LOC(c, p);
5934     }
5935     VISIT(c, expr, p->v.MatchClass.cls);
5936     PyObject *attr_names;
5937     RETURN_IF_FALSE(attr_names = PyTuple_New(nattrs));
5938     Py_ssize_t i;
5939     for (i = 0; i < nattrs; i++) {
5940         PyObject *name = asdl_seq_GET(kwd_attrs, i);
5941         Py_INCREF(name);
5942         PyTuple_SET_ITEM(attr_names, i, name);
5943     }
5944     ADDOP_LOAD_CONST_NEW(c, attr_names);
5945     ADDOP_I(c, MATCH_CLASS, nargs);
5946     // TOS is now a tuple of (nargs + nattrs) attributes. Preserve it:
5947     pc->on_top++;
5948     RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
5949     for (i = 0; i < nargs + nattrs; i++) {
5950         pattern_ty pattern;
5951         if (i < nargs) {
5952             // Positional:
5953             pattern = asdl_seq_GET(patterns, i);
5954         }
5955         else {
5956             // Keyword:
5957             pattern = asdl_seq_GET(kwd_patterns, i - nargs);
5958         }
5959         if (WILDCARD_CHECK(pattern)) {
5960             continue;
5961         }
5962         // Get the i-th attribute, and match it against the i-th pattern:
5963         ADDOP(c, DUP_TOP);
5964         ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(i));
5965         ADDOP(c, BINARY_SUBSCR);
5966         RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc));
5967     }
5968     // Success! Pop the tuple of attributes:
5969     pc->on_top--;
5970     ADDOP(c, POP_TOP);
5971     return 1;
5972 }
5973 
5974 static int
compiler_pattern_mapping(struct compiler * c,pattern_ty p,pattern_context * pc)5975 compiler_pattern_mapping(struct compiler *c, pattern_ty p, pattern_context *pc)
5976 {
5977     assert(p->kind == MatchMapping_kind);
5978     asdl_expr_seq *keys = p->v.MatchMapping.keys;
5979     asdl_pattern_seq *patterns = p->v.MatchMapping.patterns;
5980     Py_ssize_t size = asdl_seq_LEN(keys);
5981     Py_ssize_t npatterns = asdl_seq_LEN(patterns);
5982     if (size != npatterns) {
5983         // AST validator shouldn't let this happen, but if it does,
5984         // just fail, don't crash out of the interpreter
5985         const char * e = "keys (%d) / patterns (%d) length mismatch in mapping pattern";
5986         return compiler_error(c, e, size, npatterns);
5987     }
5988     // We have a double-star target if "rest" is set
5989     PyObject *star_target = p->v.MatchMapping.rest;
5990     // We need to keep the subject on top during the mapping and length checks:
5991     pc->on_top++;
5992     ADDOP(c, MATCH_MAPPING);
5993     RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
5994     if (!size && !star_target) {
5995         // If the pattern is just "{}", we're done! Pop the subject:
5996         pc->on_top--;
5997         ADDOP(c, POP_TOP);
5998         return 1;
5999     }
6000     if (size) {
6001         // If the pattern has any keys in it, perform a length check:
6002         ADDOP(c, GET_LEN);
6003         ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size));
6004         ADDOP_COMPARE(c, GtE);
6005         RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6006     }
6007     if (INT_MAX < size - 1) {
6008         return compiler_error(c, "too many sub-patterns in mapping pattern");
6009     }
6010     // Collect all of the keys into a tuple for MATCH_KEYS and
6011     // COPY_DICT_WITHOUT_KEYS. They can either be dotted names or literals:
6012 
6013     // Maintaining a set of Constant_kind kind keys allows us to raise a
6014     // SyntaxError in the case of duplicates.
6015     PyObject *seen = PySet_New(NULL);
6016     if (seen == NULL) {
6017         return 0;
6018     }
6019 
6020     // NOTE: goto error on failure in the loop below to avoid leaking `seen`
6021     for (Py_ssize_t i = 0; i < size; i++) {
6022         expr_ty key = asdl_seq_GET(keys, i);
6023         if (key == NULL) {
6024             const char *e = "can't use NULL keys in MatchMapping "
6025                             "(set 'rest' parameter instead)";
6026             SET_LOC(c, ((pattern_ty) asdl_seq_GET(patterns, i)));
6027             compiler_error(c, e);
6028             goto error;
6029         }
6030 
6031         if (key->kind == Constant_kind) {
6032             int in_seen = PySet_Contains(seen, key->v.Constant.value);
6033             if (in_seen < 0) {
6034                 goto error;
6035             }
6036             if (in_seen) {
6037                 const char *e = "mapping pattern checks duplicate key (%R)";
6038                 compiler_error(c, e, key->v.Constant.value);
6039                 goto error;
6040             }
6041             if (PySet_Add(seen, key->v.Constant.value)) {
6042                 goto error;
6043             }
6044         }
6045 
6046         else if (key->kind != Attribute_kind) {
6047             const char *e = "mapping pattern keys may only match literals and attribute lookups";
6048             compiler_error(c, e);
6049             goto error;
6050         }
6051         if (!compiler_visit_expr(c, key)) {
6052             goto error;
6053         }
6054     }
6055 
6056     // all keys have been checked; there are no duplicates
6057     Py_DECREF(seen);
6058 
6059     ADDOP_I(c, BUILD_TUPLE, size);
6060     ADDOP(c, MATCH_KEYS);
6061     // There's now a tuple of keys and a tuple of values on top of the subject:
6062     pc->on_top += 2;
6063     RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6064     // So far so good. Use that tuple of values on the stack to match
6065     // sub-patterns against:
6066     for (Py_ssize_t i = 0; i < size; i++) {
6067         pattern_ty pattern = asdl_seq_GET(patterns, i);
6068         if (WILDCARD_CHECK(pattern)) {
6069             continue;
6070         }
6071         ADDOP(c, DUP_TOP);
6072         ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(i));
6073         ADDOP(c, BINARY_SUBSCR);
6074         RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc));
6075     }
6076     // If we get this far, it's a match! We're done with the tuple of values,
6077     // and whatever happens next should consume the tuple of keys underneath it:
6078     pc->on_top -= 2;
6079     ADDOP(c, POP_TOP);
6080     if (star_target) {
6081         // If we have a starred name, bind a dict of remaining items to it:
6082         ADDOP(c, COPY_DICT_WITHOUT_KEYS);
6083         RETURN_IF_FALSE(pattern_helper_store_name(c, star_target, pc));
6084     }
6085     else {
6086         // Otherwise, we don't care about this tuple of keys anymore:
6087         ADDOP(c, POP_TOP);
6088     }
6089     // Pop the subject:
6090     pc->on_top--;
6091     ADDOP(c, POP_TOP);
6092     return 1;
6093 
6094 error:
6095     Py_DECREF(seen);
6096     return 0;
6097 }
6098 
6099 static int
compiler_pattern_or(struct compiler * c,pattern_ty p,pattern_context * pc)6100 compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc)
6101 {
6102     assert(p->kind == MatchOr_kind);
6103     basicblock *end;
6104     RETURN_IF_FALSE(end = compiler_new_block(c));
6105     Py_ssize_t size = asdl_seq_LEN(p->v.MatchOr.patterns);
6106     assert(size > 1);
6107     // We're going to be messing with pc. Keep the original info handy:
6108     pattern_context old_pc = *pc;
6109     Py_INCREF(pc->stores);
6110     // control is the list of names bound by the first alternative. It is used
6111     // for checking different name bindings in alternatives, and for correcting
6112     // the order in which extracted elements are placed on the stack.
6113     PyObject *control = NULL;
6114     // NOTE: We can't use returning macros anymore! goto error on error.
6115     for (Py_ssize_t i = 0; i < size; i++) {
6116         pattern_ty alt = asdl_seq_GET(p->v.MatchOr.patterns, i);
6117         SET_LOC(c, alt);
6118         PyObject *pc_stores = PyList_New(0);
6119         if (pc_stores == NULL) {
6120             goto error;
6121         }
6122         Py_SETREF(pc->stores, pc_stores);
6123         // An irrefutable sub-pattern must be last, if it is allowed at all:
6124         pc->allow_irrefutable = (i == size - 1) && old_pc.allow_irrefutable;
6125         pc->fail_pop = NULL;
6126         pc->fail_pop_size = 0;
6127         pc->on_top = 0;
6128         if (!compiler_addop(c, DUP_TOP) || !compiler_pattern(c, alt, pc)) {
6129             goto error;
6130         }
6131         // Success!
6132         Py_ssize_t nstores = PyList_GET_SIZE(pc->stores);
6133         if (!i) {
6134             // This is the first alternative, so save its stores as a "control"
6135             // for the others (they can't bind a different set of names, and
6136             // might need to be reordered):
6137             assert(control == NULL);
6138             control = pc->stores;
6139             Py_INCREF(control);
6140         }
6141         else if (nstores != PyList_GET_SIZE(control)) {
6142             goto diff;
6143         }
6144         else if (nstores) {
6145             // There were captures. Check to see if we differ from control:
6146             Py_ssize_t icontrol = nstores;
6147             while (icontrol--) {
6148                 PyObject *name = PyList_GET_ITEM(control, icontrol);
6149                 Py_ssize_t istores = PySequence_Index(pc->stores, name);
6150                 if (istores < 0) {
6151                     PyErr_Clear();
6152                     goto diff;
6153                 }
6154                 if (icontrol != istores) {
6155                     // Reorder the names on the stack to match the order of the
6156                     // names in control. There's probably a better way of doing
6157                     // this; the current solution is potentially very
6158                     // inefficient when each alternative subpattern binds lots
6159                     // of names in different orders. It's fine for reasonable
6160                     // cases, though.
6161                     assert(istores < icontrol);
6162                     Py_ssize_t rotations = istores + 1;
6163                     // Perform the same rotation on pc->stores:
6164                     PyObject *rotated = PyList_GetSlice(pc->stores, 0,
6165                                                         rotations);
6166                     if (rotated == NULL ||
6167                         PyList_SetSlice(pc->stores, 0, rotations, NULL) ||
6168                         PyList_SetSlice(pc->stores, icontrol - istores,
6169                                         icontrol - istores, rotated))
6170                     {
6171                         Py_XDECREF(rotated);
6172                         goto error;
6173                     }
6174                     Py_DECREF(rotated);
6175                     // That just did:
6176                     // rotated = pc_stores[:rotations]
6177                     // del pc_stores[:rotations]
6178                     // pc_stores[icontrol-istores:icontrol-istores] = rotated
6179                     // Do the same thing to the stack, using several ROT_Ns:
6180                     while (rotations--) {
6181                         if (!compiler_addop_i(c, ROT_N, icontrol + 1)) {
6182                             goto error;
6183                         }
6184                     }
6185                 }
6186             }
6187         }
6188         assert(control);
6189         if (!compiler_addop_j(c, JUMP_FORWARD, end) ||
6190             !compiler_next_block(c) ||
6191             !emit_and_reset_fail_pop(c, pc))
6192         {
6193             goto error;
6194         }
6195     }
6196     Py_DECREF(pc->stores);
6197     *pc = old_pc;
6198     Py_INCREF(pc->stores);
6199     // Need to NULL this for the PyObject_Free call in the error block.
6200     old_pc.fail_pop = NULL;
6201     // No match. Pop the remaining copy of the subject and fail:
6202     if (!compiler_addop(c, POP_TOP) || !jump_to_fail_pop(c, pc, JUMP_FORWARD)) {
6203         goto error;
6204     }
6205     compiler_use_next_block(c, end);
6206     Py_ssize_t nstores = PyList_GET_SIZE(control);
6207     // There's a bunch of stuff on the stack between any where the new stores
6208     // are and where they need to be:
6209     // - The other stores.
6210     // - A copy of the subject.
6211     // - Anything else that may be on top of the stack.
6212     // - Any previous stores we've already stashed away on the stack.
6213     Py_ssize_t nrots = nstores + 1 + pc->on_top + PyList_GET_SIZE(pc->stores);
6214     for (Py_ssize_t i = 0; i < nstores; i++) {
6215         // Rotate this capture to its proper place on the stack:
6216         if (!compiler_addop_i(c, ROT_N, nrots)) {
6217             goto error;
6218         }
6219         // Update the list of previous stores with this new name, checking for
6220         // duplicates:
6221         PyObject *name = PyList_GET_ITEM(control, i);
6222         int dupe = PySequence_Contains(pc->stores, name);
6223         if (dupe < 0) {
6224             goto error;
6225         }
6226         if (dupe) {
6227             compiler_error_duplicate_store(c, name);
6228             goto error;
6229         }
6230         if (PyList_Append(pc->stores, name)) {
6231             goto error;
6232         }
6233     }
6234     Py_DECREF(old_pc.stores);
6235     Py_DECREF(control);
6236     // NOTE: Returning macros are safe again.
6237     // Pop the copy of the subject:
6238     ADDOP(c, POP_TOP);
6239     return 1;
6240 diff:
6241     compiler_error(c, "alternative patterns bind different names");
6242 error:
6243     PyObject_Free(old_pc.fail_pop);
6244     Py_DECREF(old_pc.stores);
6245     Py_XDECREF(control);
6246     return 0;
6247 }
6248 
6249 
6250 static int
compiler_pattern_sequence(struct compiler * c,pattern_ty p,pattern_context * pc)6251 compiler_pattern_sequence(struct compiler *c, pattern_ty p, pattern_context *pc)
6252 {
6253     assert(p->kind == MatchSequence_kind);
6254     asdl_pattern_seq *patterns = p->v.MatchSequence.patterns;
6255     Py_ssize_t size = asdl_seq_LEN(patterns);
6256     Py_ssize_t star = -1;
6257     int only_wildcard = 1;
6258     int star_wildcard = 0;
6259     // Find a starred name, if it exists. There may be at most one:
6260     for (Py_ssize_t i = 0; i < size; i++) {
6261         pattern_ty pattern = asdl_seq_GET(patterns, i);
6262         if (pattern->kind == MatchStar_kind) {
6263             if (star >= 0) {
6264                 const char *e = "multiple starred names in sequence pattern";
6265                 return compiler_error(c, e);
6266             }
6267             star_wildcard = WILDCARD_STAR_CHECK(pattern);
6268             only_wildcard &= star_wildcard;
6269             star = i;
6270             continue;
6271         }
6272         only_wildcard &= WILDCARD_CHECK(pattern);
6273     }
6274     // We need to keep the subject on top during the sequence and length checks:
6275     pc->on_top++;
6276     ADDOP(c, MATCH_SEQUENCE);
6277     RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6278     if (star < 0) {
6279         // No star: len(subject) == size
6280         ADDOP(c, GET_LEN);
6281         ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size));
6282         ADDOP_COMPARE(c, Eq);
6283         RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6284     }
6285     else if (size > 1) {
6286         // Star: len(subject) >= size - 1
6287         ADDOP(c, GET_LEN);
6288         ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size - 1));
6289         ADDOP_COMPARE(c, GtE);
6290         RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6291     }
6292     // Whatever comes next should consume the subject:
6293     pc->on_top--;
6294     if (only_wildcard) {
6295         // Patterns like: [] / [_] / [_, _] / [*_] / [_, *_] / [_, _, *_] / etc.
6296         ADDOP(c, POP_TOP);
6297     }
6298     else if (star_wildcard) {
6299         RETURN_IF_FALSE(pattern_helper_sequence_subscr(c, patterns, star, pc));
6300     }
6301     else {
6302         RETURN_IF_FALSE(pattern_helper_sequence_unpack(c, patterns, star, pc));
6303     }
6304     return 1;
6305 }
6306 
6307 static int
compiler_pattern_value(struct compiler * c,pattern_ty p,pattern_context * pc)6308 compiler_pattern_value(struct compiler *c, pattern_ty p, pattern_context *pc)
6309 {
6310     assert(p->kind == MatchValue_kind);
6311     expr_ty value = p->v.MatchValue.value;
6312     if (!MATCH_VALUE_EXPR(value)) {
6313         const char *e = "patterns may only match literals and attribute lookups";
6314         return compiler_error(c, e);
6315     }
6316     VISIT(c, expr, value);
6317     ADDOP_COMPARE(c, Eq);
6318     RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6319     return 1;
6320 }
6321 
6322 static int
compiler_pattern_singleton(struct compiler * c,pattern_ty p,pattern_context * pc)6323 compiler_pattern_singleton(struct compiler *c, pattern_ty p, pattern_context *pc)
6324 {
6325     assert(p->kind == MatchSingleton_kind);
6326     ADDOP_LOAD_CONST(c, p->v.MatchSingleton.value);
6327     ADDOP_COMPARE(c, Is);
6328     RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6329     return 1;
6330 }
6331 
6332 static int
compiler_pattern(struct compiler * c,pattern_ty p,pattern_context * pc)6333 compiler_pattern(struct compiler *c, pattern_ty p, pattern_context *pc)
6334 {
6335     SET_LOC(c, p);
6336     switch (p->kind) {
6337         case MatchValue_kind:
6338             return compiler_pattern_value(c, p, pc);
6339         case MatchSingleton_kind:
6340             return compiler_pattern_singleton(c, p, pc);
6341         case MatchSequence_kind:
6342             return compiler_pattern_sequence(c, p, pc);
6343         case MatchMapping_kind:
6344             return compiler_pattern_mapping(c, p, pc);
6345         case MatchClass_kind:
6346             return compiler_pattern_class(c, p, pc);
6347         case MatchStar_kind:
6348             return compiler_pattern_star(c, p, pc);
6349         case MatchAs_kind:
6350             return compiler_pattern_as(c, p, pc);
6351         case MatchOr_kind:
6352             return compiler_pattern_or(c, p, pc);
6353     }
6354     // AST validator shouldn't let this happen, but if it does,
6355     // just fail, don't crash out of the interpreter
6356     const char *e = "invalid match pattern node in AST (kind=%d)";
6357     return compiler_error(c, e, p->kind);
6358 }
6359 
6360 static int
compiler_match_inner(struct compiler * c,stmt_ty s,pattern_context * pc)6361 compiler_match_inner(struct compiler *c, stmt_ty s, pattern_context *pc)
6362 {
6363     VISIT(c, expr, s->v.Match.subject);
6364     basicblock *end;
6365     RETURN_IF_FALSE(end = compiler_new_block(c));
6366     Py_ssize_t cases = asdl_seq_LEN(s->v.Match.cases);
6367     assert(cases > 0);
6368     match_case_ty m = asdl_seq_GET(s->v.Match.cases, cases - 1);
6369     int has_default = WILDCARD_CHECK(m->pattern) && 1 < cases;
6370     for (Py_ssize_t i = 0; i < cases - has_default; i++) {
6371         m = asdl_seq_GET(s->v.Match.cases, i);
6372         SET_LOC(c, m->pattern);
6373         // Only copy the subject if we're *not* on the last case:
6374         if (i != cases - has_default - 1) {
6375             ADDOP(c, DUP_TOP);
6376         }
6377         RETURN_IF_FALSE(pc->stores = PyList_New(0));
6378         // Irrefutable cases must be either guarded, last, or both:
6379         pc->allow_irrefutable = m->guard != NULL || i == cases - 1;
6380         pc->fail_pop = NULL;
6381         pc->fail_pop_size = 0;
6382         pc->on_top = 0;
6383         // NOTE: Can't use returning macros here (they'll leak pc->stores)!
6384         if (!compiler_pattern(c, m->pattern, pc)) {
6385             Py_DECREF(pc->stores);
6386             return 0;
6387         }
6388         assert(!pc->on_top);
6389         // It's a match! Store all of the captured names (they're on the stack).
6390         Py_ssize_t nstores = PyList_GET_SIZE(pc->stores);
6391         for (Py_ssize_t n = 0; n < nstores; n++) {
6392             PyObject *name = PyList_GET_ITEM(pc->stores, n);
6393             if (!compiler_nameop(c, name, Store)) {
6394                 Py_DECREF(pc->stores);
6395                 return 0;
6396             }
6397         }
6398         Py_DECREF(pc->stores);
6399         // NOTE: Returning macros are safe again.
6400         if (m->guard) {
6401             RETURN_IF_FALSE(ensure_fail_pop(c, pc, 0));
6402             RETURN_IF_FALSE(compiler_jump_if(c, m->guard, pc->fail_pop[0], 0));
6403         }
6404         // Success! Pop the subject off, we're done with it:
6405         if (i != cases - has_default - 1) {
6406             ADDOP(c, POP_TOP);
6407         }
6408         VISIT_SEQ(c, stmt, m->body);
6409         ADDOP_JUMP(c, JUMP_FORWARD, end);
6410         // If the pattern fails to match, we want the line number of the
6411         // cleanup to be associated with the failed pattern, not the last line
6412         // of the body
6413         SET_LOC(c, m->pattern);
6414         RETURN_IF_FALSE(emit_and_reset_fail_pop(c, pc));
6415     }
6416     if (has_default) {
6417         // A trailing "case _" is common, and lets us save a bit of redundant
6418         // pushing and popping in the loop above:
6419         m = asdl_seq_GET(s->v.Match.cases, cases - 1);
6420         SET_LOC(c, m->pattern);
6421         if (cases == 1) {
6422             // No matches. Done with the subject:
6423             ADDOP(c, POP_TOP);
6424         }
6425         else {
6426             // Show line coverage for default case (it doesn't create bytecode)
6427             ADDOP(c, NOP);
6428         }
6429         if (m->guard) {
6430             RETURN_IF_FALSE(compiler_jump_if(c, m->guard, end, 0));
6431         }
6432         VISIT_SEQ(c, stmt, m->body);
6433     }
6434     compiler_use_next_block(c, end);
6435     return 1;
6436 }
6437 
6438 static int
compiler_match(struct compiler * c,stmt_ty s)6439 compiler_match(struct compiler *c, stmt_ty s)
6440 {
6441     pattern_context pc;
6442     pc.fail_pop = NULL;
6443     int result = compiler_match_inner(c, s, &pc);
6444     PyObject_Free(pc.fail_pop);
6445     return result;
6446 }
6447 
6448 #undef WILDCARD_CHECK
6449 #undef WILDCARD_STAR_CHECK
6450 
6451 /* End of the compiler section, beginning of the assembler section */
6452 
6453 /* do depth-first search of basic block graph, starting with block.
6454    post records the block indices in post-order.
6455 
6456    XXX must handle implicit jumps from one block to next
6457 */
6458 
6459 struct assembler {
6460     PyObject *a_bytecode;  /* string containing bytecode */
6461     int a_offset;              /* offset into bytecode */
6462     int a_nblocks;             /* number of reachable blocks */
6463     PyObject *a_lnotab;    /* string containing lnotab */
6464     int a_lnotab_off;      /* offset into lnotab */
6465     int a_prevlineno;     /* lineno of last emitted line in line table */
6466     int a_lineno;          /* lineno of last emitted instruction */
6467     int a_lineno_start;    /* bytecode start offset of current lineno */
6468     basicblock *a_entry;
6469 };
6470 
6471 Py_LOCAL_INLINE(void)
stackdepth_push(basicblock *** sp,basicblock * b,int depth)6472 stackdepth_push(basicblock ***sp, basicblock *b, int depth)
6473 {
6474     assert(b->b_startdepth < 0 || b->b_startdepth == depth);
6475     if (b->b_startdepth < depth && b->b_startdepth < 100) {
6476         assert(b->b_startdepth < 0);
6477         b->b_startdepth = depth;
6478         *(*sp)++ = b;
6479     }
6480 }
6481 
6482 /* Find the flow path that needs the largest stack.  We assume that
6483  * cycles in the flow graph have no net effect on the stack depth.
6484  */
6485 static int
stackdepth(struct compiler * c)6486 stackdepth(struct compiler *c)
6487 {
6488     basicblock *b, *entryblock = NULL;
6489     basicblock **stack, **sp;
6490     int nblocks = 0, maxdepth = 0;
6491     for (b = c->u->u_blocks; b != NULL; b = b->b_list) {
6492         b->b_startdepth = INT_MIN;
6493         entryblock = b;
6494         nblocks++;
6495     }
6496     assert(entryblock!= NULL);
6497     stack = (basicblock **)PyObject_Malloc(sizeof(basicblock *) * nblocks);
6498     if (!stack) {
6499         PyErr_NoMemory();
6500         return -1;
6501     }
6502 
6503     sp = stack;
6504     if (c->u->u_ste->ste_generator || c->u->u_ste->ste_coroutine) {
6505         stackdepth_push(&sp, entryblock, 1);
6506     } else {
6507         stackdepth_push(&sp, entryblock, 0);
6508     }
6509     while (sp != stack) {
6510         b = *--sp;
6511         int depth = b->b_startdepth;
6512         assert(depth >= 0);
6513         basicblock *next = b->b_next;
6514         for (int i = 0; i < b->b_iused; i++) {
6515             struct instr *instr = &b->b_instr[i];
6516             int effect = stack_effect(instr->i_opcode, instr->i_oparg, 0);
6517             if (effect == PY_INVALID_STACK_EFFECT) {
6518                 PyErr_Format(PyExc_SystemError,
6519                              "compiler stack_effect(opcode=%d, arg=%i) failed",
6520                              instr->i_opcode, instr->i_oparg);
6521                 return -1;
6522             }
6523             int new_depth = depth + effect;
6524             if (new_depth > maxdepth) {
6525                 maxdepth = new_depth;
6526             }
6527             assert(depth >= 0); /* invalid code or bug in stackdepth() */
6528             if (is_jump(instr)) {
6529                 effect = stack_effect(instr->i_opcode, instr->i_oparg, 1);
6530                 assert(effect != PY_INVALID_STACK_EFFECT);
6531                 int target_depth = depth + effect;
6532                 if (target_depth > maxdepth) {
6533                     maxdepth = target_depth;
6534                 }
6535                 assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
6536                 stackdepth_push(&sp, instr->i_target, target_depth);
6537             }
6538             depth = new_depth;
6539             if (instr->i_opcode == JUMP_ABSOLUTE ||
6540                 instr->i_opcode == JUMP_FORWARD ||
6541                 instr->i_opcode == RETURN_VALUE ||
6542                 instr->i_opcode == RAISE_VARARGS ||
6543                 instr->i_opcode == RERAISE)
6544             {
6545                 /* remaining code is dead */
6546                 next = NULL;
6547                 break;
6548             }
6549         }
6550         if (next != NULL) {
6551             assert(b->b_nofallthrough == 0);
6552             stackdepth_push(&sp, next, depth);
6553         }
6554     }
6555     PyObject_Free(stack);
6556     return maxdepth;
6557 }
6558 
6559 static int
assemble_init(struct assembler * a,int nblocks,int firstlineno)6560 assemble_init(struct assembler *a, int nblocks, int firstlineno)
6561 {
6562     memset(a, 0, sizeof(struct assembler));
6563     a->a_prevlineno = a->a_lineno = firstlineno;
6564     a->a_lnotab = NULL;
6565     a->a_bytecode = PyBytes_FromStringAndSize(NULL, DEFAULT_CODE_SIZE);
6566     if (a->a_bytecode == NULL) {
6567         goto error;
6568     }
6569     a->a_lnotab = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE);
6570     if (a->a_lnotab == NULL) {
6571         goto error;
6572     }
6573     if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
6574         PyErr_NoMemory();
6575         goto error;
6576     }
6577     return 1;
6578 error:
6579     Py_XDECREF(a->a_bytecode);
6580     Py_XDECREF(a->a_lnotab);
6581     return 0;
6582 }
6583 
6584 static void
assemble_free(struct assembler * a)6585 assemble_free(struct assembler *a)
6586 {
6587     Py_XDECREF(a->a_bytecode);
6588     Py_XDECREF(a->a_lnotab);
6589 }
6590 
6591 static int
blocksize(basicblock * b)6592 blocksize(basicblock *b)
6593 {
6594     int i;
6595     int size = 0;
6596 
6597     for (i = 0; i < b->b_iused; i++)
6598         size += instrsize(b->b_instr[i].i_oparg);
6599     return size;
6600 }
6601 
6602 static int
assemble_emit_linetable_pair(struct assembler * a,int bdelta,int ldelta)6603 assemble_emit_linetable_pair(struct assembler *a, int bdelta, int ldelta)
6604 {
6605     Py_ssize_t len = PyBytes_GET_SIZE(a->a_lnotab);
6606     if (a->a_lnotab_off + 2 >= len) {
6607         if (_PyBytes_Resize(&a->a_lnotab, len * 2) < 0)
6608             return 0;
6609     }
6610     unsigned char *lnotab = (unsigned char *) PyBytes_AS_STRING(a->a_lnotab);
6611     lnotab += a->a_lnotab_off;
6612     a->a_lnotab_off += 2;
6613     *lnotab++ = bdelta;
6614     *lnotab++ = ldelta;
6615     return 1;
6616 }
6617 
6618 /* Appends a range to the end of the line number table. See
6619  *  Objects/lnotab_notes.txt for the description of the line number table. */
6620 
6621 static int
assemble_line_range(struct assembler * a)6622 assemble_line_range(struct assembler *a)
6623 {
6624     int ldelta, bdelta;
6625     bdelta =  (a->a_offset - a->a_lineno_start) * sizeof(_Py_CODEUNIT);
6626     if (bdelta == 0) {
6627         return 1;
6628     }
6629     if (a->a_lineno < 0) {
6630         ldelta = -128;
6631     }
6632     else {
6633         ldelta = a->a_lineno - a->a_prevlineno;
6634         a->a_prevlineno = a->a_lineno;
6635         while (ldelta > 127) {
6636             if (!assemble_emit_linetable_pair(a, 0, 127)) {
6637                 return 0;
6638             }
6639             ldelta -= 127;
6640         }
6641         while (ldelta < -127) {
6642             if (!assemble_emit_linetable_pair(a, 0, -127)) {
6643                 return 0;
6644             }
6645             ldelta += 127;
6646         }
6647     }
6648     assert(-128 <= ldelta && ldelta < 128);
6649     while (bdelta > 254) {
6650         if (!assemble_emit_linetable_pair(a, 254, ldelta)) {
6651             return 0;
6652         }
6653         ldelta = a->a_lineno < 0 ? -128 : 0;
6654         bdelta -= 254;
6655     }
6656     if (!assemble_emit_linetable_pair(a, bdelta, ldelta)) {
6657         return 0;
6658     }
6659     a->a_lineno_start = a->a_offset;
6660     return 1;
6661 }
6662 
6663 static int
assemble_lnotab(struct assembler * a,struct instr * i)6664 assemble_lnotab(struct assembler *a, struct instr *i)
6665 {
6666     if (i->i_lineno == a->a_lineno) {
6667         return 1;
6668     }
6669     if (!assemble_line_range(a)) {
6670         return 0;
6671     }
6672     a->a_lineno = i->i_lineno;
6673     return 1;
6674 }
6675 
6676 
6677 /* assemble_emit()
6678    Extend the bytecode with a new instruction.
6679    Update lnotab if necessary.
6680 */
6681 
6682 static int
assemble_emit(struct assembler * a,struct instr * i)6683 assemble_emit(struct assembler *a, struct instr *i)
6684 {
6685     int size, arg = 0;
6686     Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
6687     _Py_CODEUNIT *code;
6688 
6689     arg = i->i_oparg;
6690     size = instrsize(arg);
6691     if (i->i_lineno && !assemble_lnotab(a, i))
6692         return 0;
6693     if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {
6694         if (len > PY_SSIZE_T_MAX / 2)
6695             return 0;
6696         if (_PyBytes_Resize(&a->a_bytecode, len * 2) < 0)
6697             return 0;
6698     }
6699     code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset;
6700     a->a_offset += size;
6701     write_op_arg(code, i->i_opcode, arg, size);
6702     return 1;
6703 }
6704 
6705 static void
normalize_jumps(struct assembler * a)6706 normalize_jumps(struct assembler *a)
6707 {
6708     for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
6709         b->b_visited = 0;
6710     }
6711     for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
6712         b->b_visited = 1;
6713         if (b->b_iused == 0) {
6714             continue;
6715         }
6716         struct instr *last = &b->b_instr[b->b_iused-1];
6717         if (last->i_opcode == JUMP_ABSOLUTE) {
6718             if (last->i_target->b_visited == 0) {
6719                 last->i_opcode = JUMP_FORWARD;
6720             }
6721         }
6722         if (last->i_opcode == JUMP_FORWARD) {
6723             if (last->i_target->b_visited == 1) {
6724                 last->i_opcode = JUMP_ABSOLUTE;
6725             }
6726         }
6727     }
6728 }
6729 
6730 static void
assemble_jump_offsets(struct assembler * a,struct compiler * c)6731 assemble_jump_offsets(struct assembler *a, struct compiler *c)
6732 {
6733     basicblock *b;
6734     int bsize, totsize, extended_arg_recompile;
6735     int i;
6736 
6737     /* Compute the size of each block and fixup jump args.
6738        Replace block pointer with position in bytecode. */
6739     do {
6740         totsize = 0;
6741         for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
6742             bsize = blocksize(b);
6743             b->b_offset = totsize;
6744             totsize += bsize;
6745         }
6746         extended_arg_recompile = 0;
6747         for (b = c->u->u_blocks; b != NULL; b = b->b_list) {
6748             bsize = b->b_offset;
6749             for (i = 0; i < b->b_iused; i++) {
6750                 struct instr *instr = &b->b_instr[i];
6751                 int isize = instrsize(instr->i_oparg);
6752                 /* Relative jumps are computed relative to
6753                    the instruction pointer after fetching
6754                    the jump instruction.
6755                 */
6756                 bsize += isize;
6757                 if (is_jump(instr)) {
6758                     instr->i_oparg = instr->i_target->b_offset;
6759                     if (is_relative_jump(instr)) {
6760                         instr->i_oparg -= bsize;
6761                     }
6762                     if (instrsize(instr->i_oparg) != isize) {
6763                         extended_arg_recompile = 1;
6764                     }
6765                 }
6766             }
6767         }
6768 
6769     /* XXX: This is an awful hack that could hurt performance, but
6770         on the bright side it should work until we come up
6771         with a better solution.
6772 
6773         The issue is that in the first loop blocksize() is called
6774         which calls instrsize() which requires i_oparg be set
6775         appropriately. There is a bootstrap problem because
6776         i_oparg is calculated in the second loop above.
6777 
6778         So we loop until we stop seeing new EXTENDED_ARGs.
6779         The only EXTENDED_ARGs that could be popping up are
6780         ones in jump instructions.  So this should converge
6781         fairly quickly.
6782     */
6783     } while (extended_arg_recompile);
6784 }
6785 
6786 static PyObject *
dict_keys_inorder(PyObject * dict,Py_ssize_t offset)6787 dict_keys_inorder(PyObject *dict, Py_ssize_t offset)
6788 {
6789     PyObject *tuple, *k, *v;
6790     Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict);
6791 
6792     tuple = PyTuple_New(size);
6793     if (tuple == NULL)
6794         return NULL;
6795     while (PyDict_Next(dict, &pos, &k, &v)) {
6796         i = PyLong_AS_LONG(v);
6797         Py_INCREF(k);
6798         assert((i - offset) < size);
6799         assert((i - offset) >= 0);
6800         PyTuple_SET_ITEM(tuple, i - offset, k);
6801     }
6802     return tuple;
6803 }
6804 
6805 static PyObject *
consts_dict_keys_inorder(PyObject * dict)6806 consts_dict_keys_inorder(PyObject *dict)
6807 {
6808     PyObject *consts, *k, *v;
6809     Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict);
6810 
6811     consts = PyList_New(size);   /* PyCode_Optimize() requires a list */
6812     if (consts == NULL)
6813         return NULL;
6814     while (PyDict_Next(dict, &pos, &k, &v)) {
6815         i = PyLong_AS_LONG(v);
6816         /* The keys of the dictionary can be tuples wrapping a constant.
6817          * (see compiler_add_o and _PyCode_ConstantKey). In that case
6818          * the object we want is always second. */
6819         if (PyTuple_CheckExact(k)) {
6820             k = PyTuple_GET_ITEM(k, 1);
6821         }
6822         Py_INCREF(k);
6823         assert(i < size);
6824         assert(i >= 0);
6825         PyList_SET_ITEM(consts, i, k);
6826     }
6827     return consts;
6828 }
6829 
6830 static int
compute_code_flags(struct compiler * c)6831 compute_code_flags(struct compiler *c)
6832 {
6833     PySTEntryObject *ste = c->u->u_ste;
6834     int flags = 0;
6835     if (ste->ste_type == FunctionBlock) {
6836         flags |= CO_NEWLOCALS | CO_OPTIMIZED;
6837         if (ste->ste_nested)
6838             flags |= CO_NESTED;
6839         if (ste->ste_generator && !ste->ste_coroutine)
6840             flags |= CO_GENERATOR;
6841         if (!ste->ste_generator && ste->ste_coroutine)
6842             flags |= CO_COROUTINE;
6843         if (ste->ste_generator && ste->ste_coroutine)
6844             flags |= CO_ASYNC_GENERATOR;
6845         if (ste->ste_varargs)
6846             flags |= CO_VARARGS;
6847         if (ste->ste_varkeywords)
6848             flags |= CO_VARKEYWORDS;
6849     }
6850 
6851     /* (Only) inherit compilerflags in PyCF_MASK */
6852     flags |= (c->c_flags->cf_flags & PyCF_MASK);
6853 
6854     if ((IS_TOP_LEVEL_AWAIT(c)) &&
6855          ste->ste_coroutine &&
6856          !ste->ste_generator) {
6857         flags |= CO_COROUTINE;
6858     }
6859 
6860     return flags;
6861 }
6862 
6863 // Merge *obj* with constant cache.
6864 // Unlike merge_consts_recursive(), this function doesn't work recursively.
6865 static int
merge_const_one(struct compiler * c,PyObject ** obj)6866 merge_const_one(struct compiler *c, PyObject **obj)
6867 {
6868     PyObject *key = _PyCode_ConstantKey(*obj);
6869     if (key == NULL) {
6870         return 0;
6871     }
6872 
6873     // t is borrowed reference
6874     PyObject *t = PyDict_SetDefault(c->c_const_cache, key, key);
6875     Py_DECREF(key);
6876     if (t == NULL) {
6877         return 0;
6878     }
6879     if (t == key) {  // obj is new constant.
6880         return 1;
6881     }
6882 
6883     if (PyTuple_CheckExact(t)) {
6884         // t is still borrowed reference
6885         t = PyTuple_GET_ITEM(t, 1);
6886     }
6887 
6888     Py_INCREF(t);
6889     Py_DECREF(*obj);
6890     *obj = t;
6891     return 1;
6892 }
6893 
6894 static PyCodeObject *
makecode(struct compiler * c,struct assembler * a,PyObject * consts)6895 makecode(struct compiler *c, struct assembler *a, PyObject *consts)
6896 {
6897     PyCodeObject *co = NULL;
6898     PyObject *names = NULL;
6899     PyObject *varnames = NULL;
6900     PyObject *name = NULL;
6901     PyObject *freevars = NULL;
6902     PyObject *cellvars = NULL;
6903     Py_ssize_t nlocals;
6904     int nlocals_int;
6905     int flags;
6906     int posorkeywordargcount, posonlyargcount, kwonlyargcount, maxdepth;
6907 
6908     names = dict_keys_inorder(c->u->u_names, 0);
6909     varnames = dict_keys_inorder(c->u->u_varnames, 0);
6910     if (!names || !varnames) {
6911         goto error;
6912     }
6913     cellvars = dict_keys_inorder(c->u->u_cellvars, 0);
6914     if (!cellvars)
6915         goto error;
6916     freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_GET_SIZE(cellvars));
6917     if (!freevars)
6918         goto error;
6919 
6920     if (!merge_const_one(c, &names) ||
6921             !merge_const_one(c, &varnames) ||
6922             !merge_const_one(c, &cellvars) ||
6923             !merge_const_one(c, &freevars))
6924     {
6925         goto error;
6926     }
6927 
6928     nlocals = PyDict_GET_SIZE(c->u->u_varnames);
6929     assert(nlocals < INT_MAX);
6930     nlocals_int = Py_SAFE_DOWNCAST(nlocals, Py_ssize_t, int);
6931 
6932     flags = compute_code_flags(c);
6933     if (flags < 0)
6934         goto error;
6935 
6936     consts = PyList_AsTuple(consts); /* PyCode_New requires a tuple */
6937     if (consts == NULL) {
6938         goto error;
6939     }
6940     if (!merge_const_one(c, &consts)) {
6941         Py_DECREF(consts);
6942         goto error;
6943     }
6944 
6945     posonlyargcount = Py_SAFE_DOWNCAST(c->u->u_posonlyargcount, Py_ssize_t, int);
6946     posorkeywordargcount = Py_SAFE_DOWNCAST(c->u->u_argcount, Py_ssize_t, int);
6947     kwonlyargcount = Py_SAFE_DOWNCAST(c->u->u_kwonlyargcount, Py_ssize_t, int);
6948     maxdepth = stackdepth(c);
6949     if (maxdepth < 0) {
6950         Py_DECREF(consts);
6951         goto error;
6952     }
6953     if (maxdepth > MAX_ALLOWED_STACK_USE) {
6954         PyErr_Format(PyExc_SystemError,
6955                      "excessive stack use: stack is %d deep",
6956                      maxdepth);
6957         Py_DECREF(consts);
6958         goto error;
6959     }
6960     co = PyCode_NewWithPosOnlyArgs(posonlyargcount+posorkeywordargcount,
6961                                    posonlyargcount, kwonlyargcount, nlocals_int,
6962                                    maxdepth, flags, a->a_bytecode, consts, names,
6963                                    varnames, freevars, cellvars, c->c_filename,
6964                                    c->u->u_name, c->u->u_firstlineno, a->a_lnotab);
6965     Py_DECREF(consts);
6966  error:
6967     Py_XDECREF(names);
6968     Py_XDECREF(varnames);
6969     Py_XDECREF(name);
6970     Py_XDECREF(freevars);
6971     Py_XDECREF(cellvars);
6972     return co;
6973 }
6974 
6975 
6976 /* For debugging purposes only */
6977 #if 0
6978 static void
6979 dump_instr(struct instr *i)
6980 {
6981     const char *jrel = (is_relative_jump(i)) ? "jrel " : "";
6982     const char *jabs = (is_jump(i) && !is_relative_jump(i))? "jabs " : "";
6983 
6984     char arg[128];
6985 
6986     *arg = '\0';
6987     if (HAS_ARG(i->i_opcode)) {
6988         sprintf(arg, "arg: %d ", i->i_oparg);
6989     }
6990     fprintf(stderr, "line: %d, opcode: %d %s%s%s\n",
6991                     i->i_lineno, i->i_opcode, arg, jabs, jrel);
6992 }
6993 
6994 static void
6995 dump_basicblock(const basicblock *b)
6996 {
6997     const char *b_return = b->b_return ? "return " : "";
6998     fprintf(stderr, "used: %d, depth: %d, offset: %d %s\n",
6999         b->b_iused, b->b_startdepth, b->b_offset, b_return);
7000     if (b->b_instr) {
7001         int i;
7002         for (i = 0; i < b->b_iused; i++) {
7003             fprintf(stderr, "  [%02d] ", i);
7004             dump_instr(b->b_instr + i);
7005         }
7006     }
7007 }
7008 #endif
7009 
7010 
7011 static int
7012 normalize_basic_block(basicblock *bb);
7013 
7014 static int
7015 optimize_cfg(struct compiler *c, struct assembler *a, PyObject *consts);
7016 
7017 static int
7018 trim_unused_consts(struct compiler *c, struct assembler *a, PyObject *consts);
7019 
7020 /* Duplicates exit BBs, so that line numbers can be propagated to them */
7021 static int
7022 duplicate_exits_without_lineno(struct compiler *c);
7023 
7024 static int
7025 extend_block(basicblock *bb);
7026 
7027 static int
insert_generator_prefix(struct compiler * c,basicblock * entryblock)7028 insert_generator_prefix(struct compiler *c, basicblock *entryblock) {
7029 
7030     int flags = compute_code_flags(c);
7031     if (flags < 0) {
7032         return -1;
7033     }
7034     int kind;
7035     if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
7036         if (flags & CO_COROUTINE) {
7037             kind = 1;
7038         }
7039         else if (flags & CO_ASYNC_GENERATOR) {
7040             kind = 2;
7041         }
7042         else {
7043             kind = 0;
7044         }
7045     }
7046     else {
7047         return 0;
7048     }
7049     if (compiler_next_instr(entryblock) < 0) {
7050         return -1;
7051     }
7052     for (int i = entryblock->b_iused-1; i > 0; i--) {
7053         entryblock->b_instr[i] = entryblock->b_instr[i-1];
7054     }
7055     entryblock->b_instr[0].i_opcode = GEN_START;
7056     entryblock->b_instr[0].i_oparg = kind;
7057     entryblock->b_instr[0].i_lineno = -1;
7058     entryblock->b_instr[0].i_target = NULL;
7059     return 0;
7060 }
7061 
7062 /* Make sure that all returns have a line number, even if early passes
7063  * have failed to propagate a correct line number.
7064  * The resulting line number may not be correct according to PEP 626,
7065  * but should be "good enough", and no worse than in older versions. */
7066 static void
guarantee_lineno_for_exits(struct assembler * a,int firstlineno)7067 guarantee_lineno_for_exits(struct assembler *a, int firstlineno) {
7068     int lineno = firstlineno;
7069     assert(lineno > 0);
7070     for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
7071         if (b->b_iused == 0) {
7072             continue;
7073         }
7074         struct instr *last = &b->b_instr[b->b_iused-1];
7075         if (last->i_lineno < 0) {
7076             if (last->i_opcode == RETURN_VALUE) {
7077                 for (int i = 0; i < b->b_iused; i++) {
7078                     assert(b->b_instr[i].i_lineno < 0);
7079 
7080                     b->b_instr[i].i_lineno = lineno;
7081                 }
7082             }
7083         }
7084         else {
7085             lineno = last->i_lineno;
7086         }
7087     }
7088 }
7089 
7090 static void
7091 propagate_line_numbers(struct assembler *a);
7092 
7093 static PyCodeObject *
assemble(struct compiler * c,int addNone)7094 assemble(struct compiler *c, int addNone)
7095 {
7096     basicblock *b, *entryblock;
7097     struct assembler a;
7098     int j, nblocks;
7099     PyCodeObject *co = NULL;
7100     PyObject *consts = NULL;
7101 
7102     /* Make sure every block that falls off the end returns None.
7103        XXX NEXT_BLOCK() isn't quite right, because if the last
7104        block ends with a jump or return b_next shouldn't set.
7105      */
7106     if (!c->u->u_curblock->b_return) {
7107         c->u->u_lineno = -1;
7108         if (addNone)
7109             ADDOP_LOAD_CONST(c, Py_None);
7110         ADDOP(c, RETURN_VALUE);
7111     }
7112 
7113     for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
7114         if (normalize_basic_block(b)) {
7115             return NULL;
7116         }
7117     }
7118 
7119     for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
7120         if (extend_block(b)) {
7121             return NULL;
7122         }
7123     }
7124 
7125     nblocks = 0;
7126     entryblock = NULL;
7127     for (b = c->u->u_blocks; b != NULL; b = b->b_list) {
7128         nblocks++;
7129         entryblock = b;
7130     }
7131     assert(entryblock != NULL);
7132 
7133     if (insert_generator_prefix(c, entryblock)) {
7134         goto error;
7135     }
7136 
7137     /* Set firstlineno if it wasn't explicitly set. */
7138     if (!c->u->u_firstlineno) {
7139         if (entryblock->b_instr && entryblock->b_instr->i_lineno)
7140             c->u->u_firstlineno = entryblock->b_instr->i_lineno;
7141        else
7142             c->u->u_firstlineno = 1;
7143     }
7144 
7145     if (!assemble_init(&a, nblocks, c->u->u_firstlineno))
7146         goto error;
7147     a.a_entry = entryblock;
7148     a.a_nblocks = nblocks;
7149 
7150     consts = consts_dict_keys_inorder(c->u->u_consts);
7151     if (consts == NULL) {
7152         goto error;
7153     }
7154 
7155     if (optimize_cfg(c, &a, consts)) {
7156         goto error;
7157     }
7158     if (duplicate_exits_without_lineno(c)) {
7159         return NULL;
7160     }
7161     if (trim_unused_consts(c, &a, consts)) {
7162         goto error;
7163     }
7164     propagate_line_numbers(&a);
7165     guarantee_lineno_for_exits(&a, c->u->u_firstlineno);
7166 
7167     /* Order of basic blocks must have been determined by now */
7168     normalize_jumps(&a);
7169 
7170     /* Can't modify the bytecode after computing jump offsets. */
7171     assemble_jump_offsets(&a, c);
7172 
7173     /* Emit code. */
7174     for(b = entryblock; b != NULL; b = b->b_next) {
7175         for (j = 0; j < b->b_iused; j++)
7176             if (!assemble_emit(&a, &b->b_instr[j]))
7177                 goto error;
7178     }
7179     if (!assemble_line_range(&a)) {
7180         return 0;
7181     }
7182 
7183     if (_PyBytes_Resize(&a.a_lnotab, a.a_lnotab_off) < 0) {
7184         goto error;
7185     }
7186     if (!merge_const_one(c, &a.a_lnotab)) {
7187         goto error;
7188     }
7189     if (_PyBytes_Resize(&a.a_bytecode, a.a_offset * sizeof(_Py_CODEUNIT)) < 0) {
7190         goto error;
7191     }
7192     if (!merge_const_one(c, &a.a_bytecode)) {
7193         goto error;
7194     }
7195 
7196     co = makecode(c, &a, consts);
7197  error:
7198     Py_XDECREF(consts);
7199     assemble_free(&a);
7200     return co;
7201 }
7202 
7203 /* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
7204    with    LOAD_CONST (c1, c2, ... cn).
7205    The consts table must still be in list form so that the
7206    new constant (c1, c2, ... cn) can be appended.
7207    Called with codestr pointing to the first LOAD_CONST.
7208 */
7209 static int
fold_tuple_on_constants(struct compiler * c,struct instr * inst,int n,PyObject * consts)7210 fold_tuple_on_constants(struct compiler *c,
7211                         struct instr *inst,
7212                         int n, PyObject *consts)
7213 {
7214     /* Pre-conditions */
7215     assert(PyList_CheckExact(consts));
7216     assert(inst[n].i_opcode == BUILD_TUPLE);
7217     assert(inst[n].i_oparg == n);
7218 
7219     for (int i = 0; i < n; i++) {
7220         if (inst[i].i_opcode != LOAD_CONST) {
7221             return 0;
7222         }
7223     }
7224 
7225     /* Buildup new tuple of constants */
7226     PyObject *newconst = PyTuple_New(n);
7227     if (newconst == NULL) {
7228         return -1;
7229     }
7230     for (int i = 0; i < n; i++) {
7231         int arg = inst[i].i_oparg;
7232         PyObject *constant = PyList_GET_ITEM(consts, arg);
7233         Py_INCREF(constant);
7234         PyTuple_SET_ITEM(newconst, i, constant);
7235     }
7236     if (merge_const_one(c, &newconst) == 0) {
7237         Py_DECREF(newconst);
7238         return -1;
7239     }
7240 
7241     Py_ssize_t index;
7242     for (index = 0; index < PyList_GET_SIZE(consts); index++) {
7243         if (PyList_GET_ITEM(consts, index) == newconst) {
7244             break;
7245         }
7246     }
7247     if (index == PyList_GET_SIZE(consts)) {
7248         if ((size_t)index >= (size_t)INT_MAX - 1) {
7249             Py_DECREF(newconst);
7250             PyErr_SetString(PyExc_OverflowError, "too many constants");
7251             return -1;
7252         }
7253         if (PyList_Append(consts, newconst)) {
7254             Py_DECREF(newconst);
7255             return -1;
7256         }
7257     }
7258     Py_DECREF(newconst);
7259     for (int i = 0; i < n; i++) {
7260         inst[i].i_opcode = NOP;
7261     }
7262     inst[n].i_opcode = LOAD_CONST;
7263     inst[n].i_oparg = (int)index;
7264     return 0;
7265 }
7266 
7267 
7268 // Eliminate n * ROT_N(n).
7269 static void
fold_rotations(struct instr * inst,int n)7270 fold_rotations(struct instr *inst, int n)
7271 {
7272     for (int i = 0; i < n; i++) {
7273         int rot;
7274         switch (inst[i].i_opcode) {
7275             case ROT_N:
7276                 rot = inst[i].i_oparg;
7277                 break;
7278             case ROT_FOUR:
7279                 rot = 4;
7280                 break;
7281             case ROT_THREE:
7282                 rot = 3;
7283                 break;
7284             case ROT_TWO:
7285                 rot = 2;
7286                 break;
7287             default:
7288                 return;
7289         }
7290         if (rot != n) {
7291             return;
7292         }
7293     }
7294     for (int i = 0; i < n; i++) {
7295         inst[i].i_opcode = NOP;
7296     }
7297 }
7298 
7299 // Attempt to eliminate jumps to jumps by updating inst to jump to
7300 // target->i_target using the provided opcode. Return whether or not the
7301 // optimization was successful.
7302 static bool
jump_thread(struct instr * inst,struct instr * target,int opcode)7303 jump_thread(struct instr *inst, struct instr *target, int opcode)
7304 {
7305     assert(is_jump(inst));
7306     assert(is_jump(target));
7307     // bpo-45773: If inst->i_target == target->i_target, then nothing actually
7308     // changes (and we fall into an infinite loop):
7309     if (inst->i_lineno == target->i_lineno &&
7310         inst->i_target != target->i_target)
7311     {
7312         inst->i_target = target->i_target;
7313         inst->i_opcode = opcode;
7314         return true;
7315     }
7316     return false;
7317 }
7318 
7319 /* Maximum size of basic block that should be copied in optimizer */
7320 #define MAX_COPY_SIZE 4
7321 
7322 /* Optimization */
7323 static int
optimize_basic_block(struct compiler * c,basicblock * bb,PyObject * consts)7324 optimize_basic_block(struct compiler *c, basicblock *bb, PyObject *consts)
7325 {
7326     assert(PyList_CheckExact(consts));
7327     struct instr nop;
7328     nop.i_opcode = NOP;
7329     struct instr *target;
7330     for (int i = 0; i < bb->b_iused; i++) {
7331         struct instr *inst = &bb->b_instr[i];
7332         int oparg = inst->i_oparg;
7333         int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
7334         if (is_jump(inst)) {
7335             /* Skip over empty basic blocks. */
7336             while (inst->i_target->b_iused == 0) {
7337                 inst->i_target = inst->i_target->b_next;
7338             }
7339             target = &inst->i_target->b_instr[0];
7340         }
7341         else {
7342             target = &nop;
7343         }
7344         switch (inst->i_opcode) {
7345             /* Remove LOAD_CONST const; conditional jump */
7346             case LOAD_CONST:
7347             {
7348                 PyObject* cnt;
7349                 int is_true;
7350                 int jump_if_true;
7351                 switch(nextop) {
7352                     case POP_JUMP_IF_FALSE:
7353                     case POP_JUMP_IF_TRUE:
7354                         cnt = PyList_GET_ITEM(consts, oparg);
7355                         is_true = PyObject_IsTrue(cnt);
7356                         if (is_true == -1) {
7357                             goto error;
7358                         }
7359                         inst->i_opcode = NOP;
7360                         jump_if_true = nextop == POP_JUMP_IF_TRUE;
7361                         if (is_true == jump_if_true) {
7362                             bb->b_instr[i+1].i_opcode = JUMP_ABSOLUTE;
7363                             bb->b_nofallthrough = 1;
7364                         }
7365                         else {
7366                             bb->b_instr[i+1].i_opcode = NOP;
7367                         }
7368                         break;
7369                     case JUMP_IF_FALSE_OR_POP:
7370                     case JUMP_IF_TRUE_OR_POP:
7371                         cnt = PyList_GET_ITEM(consts, oparg);
7372                         is_true = PyObject_IsTrue(cnt);
7373                         if (is_true == -1) {
7374                             goto error;
7375                         }
7376                         jump_if_true = nextop == JUMP_IF_TRUE_OR_POP;
7377                         if (is_true == jump_if_true) {
7378                             bb->b_instr[i+1].i_opcode = JUMP_ABSOLUTE;
7379                             bb->b_nofallthrough = 1;
7380                         }
7381                         else {
7382                             inst->i_opcode = NOP;
7383                             bb->b_instr[i+1].i_opcode = NOP;
7384                         }
7385                         break;
7386                 }
7387                 break;
7388             }
7389 
7390                 /* Try to fold tuples of constants.
7391                    Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
7392                    Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
7393                    Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
7394             case BUILD_TUPLE:
7395                 if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
7396                     switch(oparg) {
7397                         case 1:
7398                             inst->i_opcode = NOP;
7399                             bb->b_instr[i+1].i_opcode = NOP;
7400                             break;
7401                         case 2:
7402                             inst->i_opcode = ROT_TWO;
7403                             bb->b_instr[i+1].i_opcode = NOP;
7404                             break;
7405                         case 3:
7406                             inst->i_opcode = ROT_THREE;
7407                             bb->b_instr[i+1].i_opcode = ROT_TWO;
7408                     }
7409                     break;
7410                 }
7411                 if (i >= oparg) {
7412                     if (fold_tuple_on_constants(c, inst-oparg, oparg, consts)) {
7413                         goto error;
7414                     }
7415                 }
7416                 break;
7417 
7418                 /* Simplify conditional jump to conditional jump where the
7419                    result of the first test implies the success of a similar
7420                    test or the failure of the opposite test.
7421                    Arises in code like:
7422                    "a and b or c"
7423                    "(a and b) and c"
7424                    "(a or b) or c"
7425                    "(a or b) and c"
7426                    x:JUMP_IF_FALSE_OR_POP y   y:JUMP_IF_FALSE_OR_POP z
7427                       -->  x:JUMP_IF_FALSE_OR_POP z
7428                    x:JUMP_IF_FALSE_OR_POP y   y:JUMP_IF_TRUE_OR_POP z
7429                       -->  x:POP_JUMP_IF_FALSE y+1
7430                    where y+1 is the instruction following the second test.
7431                 */
7432             case JUMP_IF_FALSE_OR_POP:
7433                 switch (target->i_opcode) {
7434                     case POP_JUMP_IF_FALSE:
7435                         i -= jump_thread(inst, target, POP_JUMP_IF_FALSE);
7436                         break;
7437                     case JUMP_ABSOLUTE:
7438                     case JUMP_FORWARD:
7439                     case JUMP_IF_FALSE_OR_POP:
7440                         i -= jump_thread(inst, target, JUMP_IF_FALSE_OR_POP);
7441                         break;
7442                     case JUMP_IF_TRUE_OR_POP:
7443                     case POP_JUMP_IF_TRUE:
7444                         if (inst->i_lineno == target->i_lineno) {
7445                             // We don't need to bother checking for loops here,
7446                             // since a block's b_next cannot point to itself:
7447                             assert(inst->i_target != inst->i_target->b_next);
7448                             inst->i_opcode = POP_JUMP_IF_FALSE;
7449                             inst->i_target = inst->i_target->b_next;
7450                             --i;
7451                         }
7452                         break;
7453                 }
7454                 break;
7455             case JUMP_IF_TRUE_OR_POP:
7456                 switch (target->i_opcode) {
7457                     case POP_JUMP_IF_TRUE:
7458                         i -= jump_thread(inst, target, POP_JUMP_IF_TRUE);
7459                         break;
7460                     case JUMP_ABSOLUTE:
7461                     case JUMP_FORWARD:
7462                     case JUMP_IF_TRUE_OR_POP:
7463                         i -= jump_thread(inst, target, JUMP_IF_TRUE_OR_POP);
7464                         break;
7465                     case JUMP_IF_FALSE_OR_POP:
7466                     case POP_JUMP_IF_FALSE:
7467                         if (inst->i_lineno == target->i_lineno) {
7468                             // We don't need to bother checking for loops here,
7469                             // since a block's b_next cannot point to itself:
7470                             assert(inst->i_target != inst->i_target->b_next);
7471                             inst->i_opcode = POP_JUMP_IF_TRUE;
7472                             inst->i_target = inst->i_target->b_next;
7473                             --i;
7474                         }
7475                         break;
7476                 }
7477                 break;
7478             case POP_JUMP_IF_FALSE:
7479                 switch (target->i_opcode) {
7480                     case JUMP_ABSOLUTE:
7481                     case JUMP_FORWARD:
7482                         i -= jump_thread(inst, target, POP_JUMP_IF_FALSE);
7483                 }
7484                 break;
7485             case POP_JUMP_IF_TRUE:
7486                 switch (target->i_opcode) {
7487                     case JUMP_ABSOLUTE:
7488                     case JUMP_FORWARD:
7489                         i -= jump_thread(inst, target, POP_JUMP_IF_TRUE);
7490                 }
7491                 break;
7492             case JUMP_ABSOLUTE:
7493             case JUMP_FORWARD:
7494                 switch (target->i_opcode) {
7495                     case JUMP_ABSOLUTE:
7496                     case JUMP_FORWARD:
7497                         i -= jump_thread(inst, target, JUMP_ABSOLUTE);
7498                 }
7499                 break;
7500             case FOR_ITER:
7501                 if (target->i_opcode == JUMP_FORWARD) {
7502                     i -= jump_thread(inst, target, FOR_ITER);
7503                 }
7504                 break;
7505             case ROT_N:
7506                 switch (oparg) {
7507                     case 0:
7508                     case 1:
7509                         inst->i_opcode = NOP;
7510                         continue;
7511                     case 2:
7512                         inst->i_opcode = ROT_TWO;
7513                         break;
7514                     case 3:
7515                         inst->i_opcode = ROT_THREE;
7516                         break;
7517                     case 4:
7518                         inst->i_opcode = ROT_FOUR;
7519                         break;
7520                 }
7521                 if (i >= oparg - 1) {
7522                     fold_rotations(inst - oparg + 1, oparg);
7523                 }
7524                 break;
7525         }
7526     }
7527     return 0;
7528 error:
7529     return -1;
7530 }
7531 
7532 /* If this block ends with an unconditional jump to an exit block,
7533  * then remove the jump and extend this block with the target.
7534  */
7535 static int
extend_block(basicblock * bb)7536 extend_block(basicblock *bb) {
7537     if (bb->b_iused == 0) {
7538         return 0;
7539     }
7540     struct instr *last = &bb->b_instr[bb->b_iused-1];
7541     if (last->i_opcode != JUMP_ABSOLUTE && last->i_opcode != JUMP_FORWARD) {
7542         return 0;
7543     }
7544     if (last->i_target->b_exit && last->i_target->b_iused <= MAX_COPY_SIZE) {
7545         basicblock *to_copy = last->i_target;
7546         last->i_opcode = NOP;
7547         for (int i = 0; i < to_copy->b_iused; i++) {
7548             int index = compiler_next_instr(bb);
7549             if (index < 0) {
7550                 return -1;
7551             }
7552             bb->b_instr[index] = to_copy->b_instr[i];
7553         }
7554         bb->b_exit = 1;
7555     }
7556     return 0;
7557 }
7558 
7559 static void
clean_basic_block(basicblock * bb,int prev_lineno)7560 clean_basic_block(basicblock *bb, int prev_lineno) {
7561     /* Remove NOPs when legal to do so. */
7562     int dest = 0;
7563     for (int src = 0; src < bb->b_iused; src++) {
7564         int lineno = bb->b_instr[src].i_lineno;
7565         if (bb->b_instr[src].i_opcode == NOP) {
7566             /* Eliminate no-op if it doesn't have a line number */
7567             if (lineno < 0) {
7568                 continue;
7569             }
7570             /* or, if the previous instruction had the same line number. */
7571             if (prev_lineno == lineno) {
7572                 continue;
7573             }
7574             /* or, if the next instruction has same line number or no line number */
7575             if (src < bb->b_iused - 1) {
7576                 int next_lineno = bb->b_instr[src+1].i_lineno;
7577                 if (next_lineno < 0 || next_lineno == lineno) {
7578                     bb->b_instr[src+1].i_lineno = lineno;
7579                     continue;
7580                 }
7581             }
7582             else {
7583                 basicblock* next = bb->b_next;
7584                 while (next && next->b_iused == 0) {
7585                     next = next->b_next;
7586                 }
7587                 /* or if last instruction in BB and next BB has same line number */
7588                 if (next) {
7589                     if (lineno == next->b_instr[0].i_lineno) {
7590                         continue;
7591                     }
7592                 }
7593             }
7594 
7595         }
7596         if (dest != src) {
7597             bb->b_instr[dest] = bb->b_instr[src];
7598         }
7599         dest++;
7600         prev_lineno = lineno;
7601     }
7602     assert(dest <= bb->b_iused);
7603     bb->b_iused = dest;
7604 }
7605 
7606 static int
normalize_basic_block(basicblock * bb)7607 normalize_basic_block(basicblock *bb) {
7608     /* Mark blocks as exit and/or nofallthrough.
7609      Raise SystemError if CFG is malformed. */
7610     for (int i = 0; i < bb->b_iused; i++) {
7611         switch(bb->b_instr[i].i_opcode) {
7612             case RETURN_VALUE:
7613             case RAISE_VARARGS:
7614             case RERAISE:
7615                 bb->b_exit = 1;
7616                 bb->b_nofallthrough = 1;
7617                 break;
7618             case JUMP_ABSOLUTE:
7619             case JUMP_FORWARD:
7620                 bb->b_nofallthrough = 1;
7621                 /* fall through */
7622             case POP_JUMP_IF_FALSE:
7623             case POP_JUMP_IF_TRUE:
7624             case JUMP_IF_FALSE_OR_POP:
7625             case JUMP_IF_TRUE_OR_POP:
7626             case FOR_ITER:
7627                 if (i != bb->b_iused-1) {
7628                     PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
7629                     return -1;
7630                 }
7631                 /* Skip over empty basic blocks. */
7632                 while (bb->b_instr[i].i_target->b_iused == 0) {
7633                     bb->b_instr[i].i_target = bb->b_instr[i].i_target->b_next;
7634                 }
7635 
7636         }
7637     }
7638     return 0;
7639 }
7640 
7641 static int
mark_reachable(struct assembler * a)7642 mark_reachable(struct assembler *a) {
7643     basicblock **stack, **sp;
7644     sp = stack = (basicblock **)PyObject_Malloc(sizeof(basicblock *) * a->a_nblocks);
7645     if (stack == NULL) {
7646         return -1;
7647     }
7648     a->a_entry->b_predecessors = 1;
7649     *sp++ = a->a_entry;
7650     while (sp > stack) {
7651         basicblock *b = *(--sp);
7652         if (b->b_next && !b->b_nofallthrough) {
7653             if (b->b_next->b_predecessors == 0) {
7654                 *sp++ = b->b_next;
7655             }
7656             b->b_next->b_predecessors++;
7657         }
7658         for (int i = 0; i < b->b_iused; i++) {
7659             basicblock *target;
7660             if (is_jump(&b->b_instr[i])) {
7661                 target = b->b_instr[i].i_target;
7662                 if (target->b_predecessors == 0) {
7663                     *sp++ = target;
7664                 }
7665                 target->b_predecessors++;
7666             }
7667         }
7668     }
7669     PyObject_Free(stack);
7670     return 0;
7671 }
7672 
7673 static void
eliminate_empty_basic_blocks(basicblock * entry)7674 eliminate_empty_basic_blocks(basicblock *entry) {
7675     /* Eliminate empty blocks */
7676     for (basicblock *b = entry; b != NULL; b = b->b_next) {
7677         basicblock *next = b->b_next;
7678         if (next) {
7679             while (next->b_iused == 0 && next->b_next) {
7680                 next = next->b_next;
7681             }
7682             b->b_next = next;
7683         }
7684     }
7685     for (basicblock *b = entry; b != NULL; b = b->b_next) {
7686         if (b->b_iused == 0) {
7687             continue;
7688         }
7689         if (is_jump(&b->b_instr[b->b_iused-1])) {
7690             basicblock *target = b->b_instr[b->b_iused-1].i_target;
7691             while (target->b_iused == 0) {
7692                 target = target->b_next;
7693             }
7694             b->b_instr[b->b_iused-1].i_target = target;
7695         }
7696     }
7697 }
7698 
7699 
7700 /* If an instruction has no line number, but it's predecessor in the BB does,
7701  * then copy the line number. If a successor block has no line number, and only
7702  * one predecessor, then inherit the line number.
7703  * This ensures that all exit blocks (with one predecessor) receive a line number.
7704  * Also reduces the size of the line number table,
7705  * but has no impact on the generated line number events.
7706  */
7707 static void
propagate_line_numbers(struct assembler * a)7708 propagate_line_numbers(struct assembler *a) {
7709     for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
7710         if (b->b_iused == 0) {
7711             continue;
7712         }
7713         int prev_lineno = -1;
7714         for (int i = 0; i < b->b_iused; i++) {
7715             if (b->b_instr[i].i_lineno < 0) {
7716                 b->b_instr[i].i_lineno = prev_lineno;
7717             }
7718             else {
7719                 prev_lineno = b->b_instr[i].i_lineno;
7720             }
7721         }
7722         if (!b->b_nofallthrough && b->b_next->b_predecessors == 1) {
7723             assert(b->b_next->b_iused);
7724             if (b->b_next->b_instr[0].i_lineno < 0) {
7725                 b->b_next->b_instr[0].i_lineno = prev_lineno;
7726             }
7727         }
7728         if (is_jump(&b->b_instr[b->b_iused-1])) {
7729             switch (b->b_instr[b->b_iused-1].i_opcode) {
7730                 /* Note: Only actual jumps, not exception handlers */
7731                 case SETUP_ASYNC_WITH:
7732                 case SETUP_WITH:
7733                 case SETUP_FINALLY:
7734                     continue;
7735             }
7736             basicblock *target = b->b_instr[b->b_iused-1].i_target;
7737             if (target->b_predecessors == 1) {
7738                 if (target->b_instr[0].i_lineno < 0) {
7739                     target->b_instr[0].i_lineno = prev_lineno;
7740                 }
7741             }
7742         }
7743     }
7744 }
7745 
7746 /* Perform optimizations on a control flow graph.
7747    The consts object should still be in list form to allow new constants
7748    to be appended.
7749 
7750    All transformations keep the code size the same or smaller.
7751    For those that reduce size, the gaps are initially filled with
7752    NOPs.  Later those NOPs are removed.
7753 */
7754 
7755 static int
optimize_cfg(struct compiler * c,struct assembler * a,PyObject * consts)7756 optimize_cfg(struct compiler *c, struct assembler *a, PyObject *consts)
7757 {
7758     for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
7759         if (optimize_basic_block(c, b, consts)) {
7760             return -1;
7761         }
7762         clean_basic_block(b, -1);
7763         assert(b->b_predecessors == 0);
7764     }
7765     for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
7766         if (extend_block(b)) {
7767             return -1;
7768         }
7769     }
7770     if (mark_reachable(a)) {
7771         return -1;
7772     }
7773     /* Delete unreachable instructions */
7774     for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
7775        if (b->b_predecessors == 0) {
7776             b->b_iused = 0;
7777             b->b_nofallthrough = 0;
7778        }
7779     }
7780     basicblock *pred = NULL;
7781     for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
7782         int prev_lineno = -1;
7783         if (pred && pred->b_iused) {
7784             prev_lineno = pred->b_instr[pred->b_iused-1].i_lineno;
7785         }
7786         clean_basic_block(b, prev_lineno);
7787         pred = b->b_nofallthrough ? NULL : b;
7788     }
7789     eliminate_empty_basic_blocks(a->a_entry);
7790     /* Delete jump instructions made redundant by previous step. If a non-empty
7791        block ends with a jump instruction, check if the next non-empty block
7792        reached through normal flow control is the target of that jump. If it
7793        is, then the jump instruction is redundant and can be deleted.
7794     */
7795     int maybe_empty_blocks = 0;
7796     for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
7797         if (b->b_iused > 0) {
7798             struct instr *b_last_instr = &b->b_instr[b->b_iused - 1];
7799             if (b_last_instr->i_opcode == JUMP_ABSOLUTE ||
7800                 b_last_instr->i_opcode == JUMP_FORWARD) {
7801                 if (b_last_instr->i_target == b->b_next) {
7802                     assert(b->b_next->b_iused);
7803                     b->b_nofallthrough = 0;
7804                     b_last_instr->i_opcode = NOP;
7805                     clean_basic_block(b, -1);
7806                     maybe_empty_blocks = 1;
7807                 }
7808             }
7809         }
7810     }
7811     if (maybe_empty_blocks) {
7812         eliminate_empty_basic_blocks(a->a_entry);
7813     }
7814     return 0;
7815 }
7816 
7817 // Remove trailing unused constants.
7818 static int
trim_unused_consts(struct compiler * c,struct assembler * a,PyObject * consts)7819 trim_unused_consts(struct compiler *c, struct assembler *a, PyObject *consts)
7820 {
7821     assert(PyList_CheckExact(consts));
7822 
7823     // The first constant may be docstring; keep it always.
7824     int max_const_index = 0;
7825     for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
7826         for (int i = 0; i < b->b_iused; i++) {
7827             if (b->b_instr[i].i_opcode == LOAD_CONST &&
7828                     b->b_instr[i].i_oparg > max_const_index) {
7829                 max_const_index = b->b_instr[i].i_oparg;
7830             }
7831         }
7832     }
7833     if (max_const_index+1 < PyList_GET_SIZE(consts)) {
7834         //fprintf(stderr, "removing trailing consts: max=%d, size=%d\n",
7835         //        max_const_index, (int)PyList_GET_SIZE(consts));
7836         if (PyList_SetSlice(consts, max_const_index+1,
7837                             PyList_GET_SIZE(consts), NULL) < 0) {
7838             return 1;
7839         }
7840     }
7841     return 0;
7842 }
7843 
7844 static inline int
is_exit_without_lineno(basicblock * b)7845 is_exit_without_lineno(basicblock *b) {
7846     return b->b_exit && b->b_instr[0].i_lineno < 0;
7847 }
7848 
7849 /* PEP 626 mandates that the f_lineno of a frame is correct
7850  * after a frame terminates. It would be prohibitively expensive
7851  * to continuously update the f_lineno field at runtime,
7852  * so we make sure that all exiting instruction (raises and returns)
7853  * have a valid line number, allowing us to compute f_lineno lazily.
7854  * We can do this by duplicating the exit blocks without line number
7855  * so that none have more than one predecessor. We can then safely
7856  * copy the line number from the sole predecessor block.
7857  */
7858 static int
duplicate_exits_without_lineno(struct compiler * c)7859 duplicate_exits_without_lineno(struct compiler *c)
7860 {
7861     /* Copy all exit blocks without line number that are targets of a jump.
7862      */
7863     for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
7864         if (b->b_iused > 0 && is_jump(&b->b_instr[b->b_iused-1])) {
7865             switch (b->b_instr[b->b_iused-1].i_opcode) {
7866                 /* Note: Only actual jumps, not exception handlers */
7867                 case SETUP_ASYNC_WITH:
7868                 case SETUP_WITH:
7869                 case SETUP_FINALLY:
7870                     continue;
7871             }
7872             basicblock *target = b->b_instr[b->b_iused-1].i_target;
7873             if (is_exit_without_lineno(target) && target->b_predecessors > 1) {
7874                 basicblock *new_target = compiler_copy_block(c, target);
7875                 if (new_target == NULL) {
7876                     return -1;
7877                 }
7878                 new_target->b_instr[0].i_lineno = b->b_instr[b->b_iused-1].i_lineno;
7879                 b->b_instr[b->b_iused-1].i_target = new_target;
7880                 target->b_predecessors--;
7881                 new_target->b_predecessors = 1;
7882                 new_target->b_next = target->b_next;
7883                 target->b_next = new_target;
7884             }
7885         }
7886     }
7887     /* Eliminate empty blocks */
7888     for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
7889         while (b->b_next && b->b_next->b_iused == 0) {
7890             b->b_next = b->b_next->b_next;
7891         }
7892     }
7893     /* Any remaining reachable exit blocks without line number can only be reached by
7894      * fall through, and thus can only have a single predecessor */
7895     for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
7896         if (!b->b_nofallthrough && b->b_next && b->b_iused > 0) {
7897             if (is_exit_without_lineno(b->b_next)) {
7898                 assert(b->b_next->b_iused > 0);
7899                 b->b_next->b_instr[0].i_lineno = b->b_instr[b->b_iused-1].i_lineno;
7900             }
7901         }
7902     }
7903     return 0;
7904 }
7905 
7906 
7907 /* Retained for API compatibility.
7908  * Optimization is now done in optimize_cfg */
7909 
7910 PyObject *
PyCode_Optimize(PyObject * code,PyObject * Py_UNUSED (consts),PyObject * Py_UNUSED (names),PyObject * Py_UNUSED (lnotab_obj))7911 PyCode_Optimize(PyObject *code, PyObject* Py_UNUSED(consts),
7912                 PyObject *Py_UNUSED(names), PyObject *Py_UNUSED(lnotab_obj))
7913 {
7914     Py_INCREF(code);
7915     return code;
7916 }
7917