• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifdef _Py_TIER2
2 
3 #include "Python.h"
4 #include "opcode.h"
5 #include "pycore_interp.h"
6 #include "pycore_backoff.h"
7 #include "pycore_bitutils.h"        // _Py_popcount32()
8 #include "pycore_object.h"          // _PyObject_GC_UNTRACK()
9 #include "pycore_opcode_metadata.h" // _PyOpcode_OpName[]
10 #include "pycore_opcode_utils.h"  // MAX_REAL_OPCODE
11 #include "pycore_optimizer.h"     // _Py_uop_analyze_and_optimize()
12 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
13 #include "pycore_uop_ids.h"
14 #include "pycore_jit.h"
15 #include <stdbool.h>
16 #include <stdint.h>
17 #include <stddef.h>
18 
19 #define NEED_OPCODE_METADATA
20 #include "pycore_uop_metadata.h" // Uop tables
21 #undef NEED_OPCODE_METADATA
22 
23 #define MAX_EXECUTORS_SIZE 256
24 
25 #ifdef Py_DEBUG
26 static int
base_opcode(PyCodeObject * code,int offset)27 base_opcode(PyCodeObject *code, int offset)
28 {
29     int opcode = _Py_GetBaseOpcode(code, offset);
30     if (opcode == ENTER_EXECUTOR) {
31         int oparg = _PyCode_CODE(code)[offset].op.arg;
32         _PyExecutorObject *ex = code->co_executors->executors[oparg];
33         return ex->vm_data.opcode;
34     }
35     return opcode;
36 }
37 #endif
38 
39 static bool
has_space_for_executor(PyCodeObject * code,_Py_CODEUNIT * instr)40 has_space_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr)
41 {
42     if (instr->op.code == ENTER_EXECUTOR) {
43         return true;
44     }
45     if (code->co_executors == NULL) {
46         return true;
47     }
48     return code->co_executors->size < MAX_EXECUTORS_SIZE;
49 }
50 
51 static int32_t
get_index_for_executor(PyCodeObject * code,_Py_CODEUNIT * instr)52 get_index_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr)
53 {
54     if (instr->op.code == ENTER_EXECUTOR) {
55         return instr->op.arg;
56     }
57     _PyExecutorArray *old = code->co_executors;
58     int size = 0;
59     int capacity = 0;
60     if (old != NULL) {
61         size = old->size;
62         capacity = old->capacity;
63         assert(size < MAX_EXECUTORS_SIZE);
64     }
65     assert(size <= capacity);
66     if (size == capacity) {
67         /* Array is full. Grow array */
68         int new_capacity = capacity ? capacity * 2 : 4;
69         _PyExecutorArray *new = PyMem_Realloc(
70             old,
71             offsetof(_PyExecutorArray, executors) +
72             new_capacity * sizeof(_PyExecutorObject *));
73         if (new == NULL) {
74             return -1;
75         }
76         new->capacity = new_capacity;
77         new->size = size;
78         code->co_executors = new;
79     }
80     assert(size < code->co_executors->capacity);
81     return size;
82 }
83 
84 static void
insert_executor(PyCodeObject * code,_Py_CODEUNIT * instr,int index,_PyExecutorObject * executor)85 insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorObject *executor)
86 {
87     Py_INCREF(executor);
88     if (instr->op.code == ENTER_EXECUTOR) {
89         assert(index == instr->op.arg);
90         _Py_ExecutorDetach(code->co_executors->executors[index]);
91     }
92     else {
93         assert(code->co_executors->size == index);
94         assert(code->co_executors->capacity > index);
95         code->co_executors->size++;
96     }
97     executor->vm_data.opcode = instr->op.code;
98     executor->vm_data.oparg = instr->op.arg;
99     executor->vm_data.code = code;
100     executor->vm_data.index = (int)(instr - _PyCode_CODE(code));
101     code->co_executors->executors[index] = executor;
102     assert(index < MAX_EXECUTORS_SIZE);
103     instr->op.code = ENTER_EXECUTOR;
104     instr->op.arg = index;
105 }
106 
107 
108 static int
never_optimize(_PyOptimizerObject * self,_PyInterpreterFrame * frame,_Py_CODEUNIT * instr,_PyExecutorObject ** exec,int Py_UNUSED (stack_entries))109 never_optimize(
110     _PyOptimizerObject* self,
111     _PyInterpreterFrame *frame,
112     _Py_CODEUNIT *instr,
113     _PyExecutorObject **exec,
114     int Py_UNUSED(stack_entries))
115 {
116     // This may be called if the optimizer is reset
117     return 0;
118 }
119 
120 PyTypeObject _PyDefaultOptimizer_Type = {
121     PyVarObject_HEAD_INIT(&PyType_Type, 0)
122     .tp_name = "noop_optimizer",
123     .tp_basicsize = sizeof(_PyOptimizerObject),
124     .tp_itemsize = 0,
125     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
126 };
127 
128 static _PyOptimizerObject _PyOptimizer_Default = {
129     PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
130     .optimize = never_optimize,
131 };
132 
133 _PyOptimizerObject *
_Py_GetOptimizer(void)134 _Py_GetOptimizer(void)
135 {
136     PyInterpreterState *interp = _PyInterpreterState_GET();
137     if (interp->optimizer == &_PyOptimizer_Default) {
138         return NULL;
139     }
140     Py_INCREF(interp->optimizer);
141     return interp->optimizer;
142 }
143 
144 static _PyExecutorObject *
145 make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies);
146 
147 static int
148 init_cold_exit_executor(_PyExecutorObject *executor, int oparg);
149 
150 /* It is impossible for the number of exits to reach 1/4 of the total length,
151  * as the number of exits cannot reach 1/3 of the number of non-exits, due to
152  * the presence of CHECK_VALIDITY checks and instructions to produce the values
153  * being checked in exits. */
154 #define COLD_EXIT_COUNT (UOP_MAX_TRACE_LENGTH/4)
155 
156 static int cold_exits_initialized = 0;
157 static _PyExecutorObject COLD_EXITS[COLD_EXIT_COUNT] = { 0 };
158 
159 static const _PyBloomFilter EMPTY_FILTER = { 0 };
160 
161 _PyOptimizerObject *
_Py_SetOptimizer(PyInterpreterState * interp,_PyOptimizerObject * optimizer)162 _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
163 {
164     if (optimizer == NULL) {
165         optimizer = &_PyOptimizer_Default;
166     }
167     else if (cold_exits_initialized == 0) {
168         cold_exits_initialized = 1;
169         for (int i = 0; i < COLD_EXIT_COUNT; i++) {
170             if (init_cold_exit_executor(&COLD_EXITS[i], i)) {
171                 return NULL;
172             }
173         }
174     }
175     _PyOptimizerObject *old = interp->optimizer;
176     if (old == NULL) {
177         old = &_PyOptimizer_Default;
178     }
179     Py_INCREF(optimizer);
180     interp->optimizer = optimizer;
181     return old;
182 }
183 
184 int
_Py_SetTier2Optimizer(_PyOptimizerObject * optimizer)185 _Py_SetTier2Optimizer(_PyOptimizerObject *optimizer)
186 {
187     PyInterpreterState *interp = _PyInterpreterState_GET();
188     _PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer);
189     Py_XDECREF(old);
190     return old == NULL ? -1 : 0;
191 }
192 
193 /* Returns 1 if optimized, 0 if not optimized, and -1 for an error.
194  * If optimized, *executor_ptr contains a new reference to the executor
195  */
196 int
_PyOptimizer_Optimize(_PyInterpreterFrame * frame,_Py_CODEUNIT * start,PyObject ** stack_pointer,_PyExecutorObject ** executor_ptr)197 _PyOptimizer_Optimize(
198     _PyInterpreterFrame *frame, _Py_CODEUNIT *start,
199     PyObject **stack_pointer, _PyExecutorObject **executor_ptr)
200 {
201     PyCodeObject *code = _PyFrame_GetCode(frame);
202     assert(PyCode_Check(code));
203     PyInterpreterState *interp = _PyInterpreterState_GET();
204     if (!has_space_for_executor(code, start)) {
205         return 0;
206     }
207     _PyOptimizerObject *opt = interp->optimizer;
208     int err = opt->optimize(opt, frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
209     if (err <= 0) {
210         return err;
211     }
212     assert(*executor_ptr != NULL);
213     int index = get_index_for_executor(code, start);
214     if (index < 0) {
215         /* Out of memory. Don't raise and assume that the
216          * error will show up elsewhere.
217          *
218          * If an optimizer has already produced an executor,
219          * it might get confused by the executor disappearing,
220          * but there is not much we can do about that here. */
221         Py_DECREF(*executor_ptr);
222         return 0;
223     }
224     insert_executor(code, start, index, *executor_ptr);
225     assert((*executor_ptr)->vm_data.valid);
226     return 1;
227 }
228 
229 _PyExecutorObject *
_Py_GetExecutor(PyCodeObject * code,int offset)230 _Py_GetExecutor(PyCodeObject *code, int offset)
231 {
232     int code_len = (int)Py_SIZE(code);
233     for (int i = 0 ; i < code_len;) {
234         if (_PyCode_CODE(code)[i].op.code == ENTER_EXECUTOR && i*2 == offset) {
235             int oparg = _PyCode_CODE(code)[i].op.arg;
236             _PyExecutorObject *res = code->co_executors->executors[oparg];
237             Py_INCREF(res);
238             return res;
239         }
240         i += _PyInstruction_GetLength(code, i);
241     }
242     PyErr_SetString(PyExc_ValueError, "no executor at given byte offset");
243     return NULL;
244 }
245 
246 static PyObject *
is_valid(PyObject * self,PyObject * Py_UNUSED (ignored))247 is_valid(PyObject *self, PyObject *Py_UNUSED(ignored))
248 {
249     return PyBool_FromLong(((_PyExecutorObject *)self)->vm_data.valid);
250 }
251 
252 static PyObject *
get_opcode(PyObject * self,PyObject * Py_UNUSED (ignored))253 get_opcode(PyObject *self, PyObject *Py_UNUSED(ignored))
254 {
255     return PyLong_FromUnsignedLong(((_PyExecutorObject *)self)->vm_data.opcode);
256 }
257 
258 static PyObject *
get_oparg(PyObject * self,PyObject * Py_UNUSED (ignored))259 get_oparg(PyObject *self, PyObject *Py_UNUSED(ignored))
260 {
261     return PyLong_FromUnsignedLong(((_PyExecutorObject *)self)->vm_data.oparg);
262 }
263 
264 static PyMethodDef executor_methods[] = {
265     { "is_valid", is_valid, METH_NOARGS, NULL },
266     { "get_opcode", get_opcode, METH_NOARGS, NULL },
267     { "get_oparg", get_oparg, METH_NOARGS, NULL },
268     { NULL, NULL },
269 };
270 
271 ///////////////////// Experimental UOp Optimizer /////////////////////
272 
273 static int executor_clear(_PyExecutorObject *executor);
274 static void unlink_executor(_PyExecutorObject *executor);
275 
276 static void
uop_dealloc(_PyExecutorObject * self)277 uop_dealloc(_PyExecutorObject *self) {
278     _PyObject_GC_UNTRACK(self);
279     assert(self->vm_data.code == NULL);
280     unlink_executor(self);
281 #ifdef _Py_JIT
282     _PyJIT_Free(self);
283 #endif
284     PyObject_GC_Del(self);
285 }
286 
287 const char *
_PyUOpName(int index)288 _PyUOpName(int index)
289 {
290     if (index < 0 || index > MAX_UOP_ID) {
291         return NULL;
292     }
293     return _PyOpcode_uop_name[index];
294 }
295 
296 #ifdef Py_DEBUG
297 void
_PyUOpPrint(const _PyUOpInstruction * uop)298 _PyUOpPrint(const _PyUOpInstruction *uop)
299 {
300     const char *name = _PyUOpName(uop->opcode);
301     if (name == NULL) {
302         printf("<uop %d>", uop->opcode);
303     }
304     else {
305         printf("%s", name);
306     }
307     switch(uop->format) {
308         case UOP_FORMAT_TARGET:
309             printf(" (%d, target=%d, operand=%#" PRIx64,
310                 uop->oparg,
311                 uop->target,
312                 (uint64_t)uop->operand);
313             break;
314         case UOP_FORMAT_JUMP:
315             printf(" (%d, jump_target=%d, operand=%#" PRIx64,
316                 uop->oparg,
317                 uop->jump_target,
318                 (uint64_t)uop->operand);
319             break;
320         case UOP_FORMAT_EXIT:
321             printf(" (%d, exit_index=%d, operand=%#" PRIx64,
322                 uop->oparg,
323                 uop->exit_index,
324                 (uint64_t)uop->operand);
325             break;
326         default:
327             printf(" (%d, Unknown format)", uop->oparg);
328     }
329     if (_PyUop_Flags[uop->opcode] & HAS_ERROR_FLAG) {
330         printf(", error_target=%d", uop->error_target);
331     }
332 
333     printf(")");
334 }
335 #endif
336 
337 static Py_ssize_t
uop_len(_PyExecutorObject * self)338 uop_len(_PyExecutorObject *self)
339 {
340     return self->code_size;
341 }
342 
343 static PyObject *
uop_item(_PyExecutorObject * self,Py_ssize_t index)344 uop_item(_PyExecutorObject *self, Py_ssize_t index)
345 {
346     Py_ssize_t len = uop_len(self);
347     if (index < 0 || index >= len) {
348         PyErr_SetNone(PyExc_IndexError);
349         return NULL;
350     }
351     const char *name = _PyUOpName(self->trace[index].opcode);
352     if (name == NULL) {
353         name = "<nil>";
354     }
355     PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
356     if (oname == NULL) {
357         return NULL;
358     }
359     PyObject *oparg = PyLong_FromUnsignedLong(self->trace[index].oparg);
360     if (oparg == NULL) {
361         Py_DECREF(oname);
362         return NULL;
363     }
364     PyObject *target = PyLong_FromUnsignedLong(self->trace[index].target);
365     if (oparg == NULL) {
366         Py_DECREF(oparg);
367         Py_DECREF(oname);
368         return NULL;
369     }
370     PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand);
371     if (operand == NULL) {
372         Py_DECREF(target);
373         Py_DECREF(oparg);
374         Py_DECREF(oname);
375         return NULL;
376     }
377     PyObject *args[4] = { oname, oparg, target, operand };
378     return _PyTuple_FromArraySteal(args, 4);
379 }
380 
381 PySequenceMethods uop_as_sequence = {
382     .sq_length = (lenfunc)uop_len,
383     .sq_item = (ssizeargfunc)uop_item,
384 };
385 
386 static int
executor_traverse(PyObject * o,visitproc visit,void * arg)387 executor_traverse(PyObject *o, visitproc visit, void *arg)
388 {
389     _PyExecutorObject *executor = (_PyExecutorObject *)o;
390     for (uint32_t i = 0; i < executor->exit_count; i++) {
391         Py_VISIT(executor->exits[i].executor);
392     }
393     return 0;
394 }
395 
396 static PyObject *
get_jit_code(PyObject * self,PyObject * Py_UNUSED (ignored))397 get_jit_code(PyObject *self, PyObject *Py_UNUSED(ignored))
398 {
399 #ifndef _Py_JIT
400     PyErr_SetString(PyExc_RuntimeError, "JIT support not enabled.");
401     return NULL;
402 #else
403     _PyExecutorObject *executor = (_PyExecutorObject *)self;
404     if (executor->jit_code == NULL || executor->jit_size == 0) {
405         Py_RETURN_NONE;
406     }
407     return PyBytes_FromStringAndSize(executor->jit_code, executor->jit_size);
408 #endif
409 }
410 
411 static PyMethodDef uop_executor_methods[] = {
412     { "is_valid", is_valid, METH_NOARGS, NULL },
413     { "get_jit_code", get_jit_code, METH_NOARGS, NULL},
414     { "get_opcode", get_opcode, METH_NOARGS, NULL },
415     { "get_oparg", get_oparg, METH_NOARGS, NULL },
416     { NULL, NULL },
417 };
418 
419 static int
executor_is_gc(PyObject * o)420 executor_is_gc(PyObject *o)
421 {
422     return !_Py_IsImmortal(o);
423 }
424 
425 PyTypeObject _PyUOpExecutor_Type = {
426     PyVarObject_HEAD_INIT(&PyType_Type, 0)
427     .tp_name = "uop_executor",
428     .tp_basicsize = offsetof(_PyExecutorObject, exits),
429     .tp_itemsize = 1,
430     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC,
431     .tp_dealloc = (destructor)uop_dealloc,
432     .tp_as_sequence = &uop_as_sequence,
433     .tp_methods = uop_executor_methods,
434     .tp_traverse = executor_traverse,
435     .tp_clear = (inquiry)executor_clear,
436     .tp_is_gc = executor_is_gc,
437 };
438 
439 /* TO DO -- Generate these tables */
440 static const uint16_t
441 _PyUOp_Replacements[MAX_UOP_ID + 1] = {
442     [_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE,
443     [_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST,
444     [_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE,
445     [_FOR_ITER] = _FOR_ITER_TIER_TWO,
446 };
447 
448 static const uint8_t
449 is_for_iter_test[MAX_UOP_ID + 1] = {
450     [_GUARD_NOT_EXHAUSTED_RANGE] = 1,
451     [_GUARD_NOT_EXHAUSTED_LIST] = 1,
452     [_GUARD_NOT_EXHAUSTED_TUPLE] = 1,
453     [_FOR_ITER_TIER_TWO] = 1,
454 };
455 
456 static const uint16_t
457 BRANCH_TO_GUARD[4][2] = {
458     [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_TRUE_POP,
459     [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_FALSE_POP,
460     [POP_JUMP_IF_TRUE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_FALSE_POP,
461     [POP_JUMP_IF_TRUE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_TRUE_POP,
462     [POP_JUMP_IF_NONE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_NOT_NONE_POP,
463     [POP_JUMP_IF_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NONE_POP,
464     [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_NONE_POP,
465     [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP,
466 };
467 
468 
469 #define CONFIDENCE_RANGE 1000
470 #define CONFIDENCE_CUTOFF 333
471 
472 #ifdef Py_DEBUG
473 #define DPRINTF(level, ...) \
474     if (lltrace >= (level)) { printf(__VA_ARGS__); }
475 #else
476 #define DPRINTF(level, ...)
477 #endif
478 
479 
480 static inline int
add_to_trace(_PyUOpInstruction * trace,int trace_length,uint16_t opcode,uint16_t oparg,uint64_t operand,uint32_t target)481 add_to_trace(
482     _PyUOpInstruction *trace,
483     int trace_length,
484     uint16_t opcode,
485     uint16_t oparg,
486     uint64_t operand,
487     uint32_t target)
488 {
489     trace[trace_length].opcode = opcode;
490     trace[trace_length].format = UOP_FORMAT_TARGET;
491     trace[trace_length].target = target;
492     trace[trace_length].oparg = oparg;
493     trace[trace_length].operand = operand;
494     return trace_length + 1;
495 }
496 
497 #ifdef Py_DEBUG
498 #define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
499     assert(trace_length < max_length); \
500     trace_length = add_to_trace(trace, trace_length, (OPCODE), (OPARG), (OPERAND), (TARGET)); \
501     if (lltrace >= 2) { \
502         printf("%4d ADD_TO_TRACE: ", trace_length); \
503         _PyUOpPrint(&trace[trace_length-1]); \
504         printf("\n"); \
505     }
506 #else
507 #define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
508     assert(trace_length < max_length); \
509     trace_length = add_to_trace(trace, trace_length, (OPCODE), (OPARG), (OPERAND), (TARGET));
510 #endif
511 
512 #define INSTR_IP(INSTR, CODE) \
513     ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
514 
515 // Reserve space for n uops
516 #define RESERVE_RAW(n, opname) \
517     if (trace_length + (n) > max_length) { \
518         DPRINTF(2, "No room for %s (need %d, got %d)\n", \
519                 (opname), (n), max_length - trace_length); \
520         OPT_STAT_INC(trace_too_long); \
521         goto done; \
522     }
523 
524 // Reserve space for N uops, plus 3 for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE
525 #define RESERVE(needed) RESERVE_RAW((needed) + 3, _PyUOpName(opcode))
526 
527 // Trace stack operations (used by _PUSH_FRAME, _POP_FRAME)
528 #define TRACE_STACK_PUSH() \
529     if (trace_stack_depth >= TRACE_STACK_SIZE) { \
530         DPRINTF(2, "Trace stack overflow\n"); \
531         OPT_STAT_INC(trace_stack_overflow); \
532         trace_length = 0; \
533         goto done; \
534     } \
535     assert(func == NULL || func->func_code == (PyObject *)code); \
536     trace_stack[trace_stack_depth].func = func; \
537     trace_stack[trace_stack_depth].code = code; \
538     trace_stack[trace_stack_depth].instr = instr; \
539     trace_stack_depth++;
540 #define TRACE_STACK_POP() \
541     if (trace_stack_depth <= 0) { \
542         Py_FatalError("Trace stack underflow\n"); \
543     } \
544     trace_stack_depth--; \
545     func = trace_stack[trace_stack_depth].func; \
546     code = trace_stack[trace_stack_depth].code; \
547     assert(func == NULL || func->func_code == (PyObject *)code); \
548     instr = trace_stack[trace_stack_depth].instr;
549 
550 /* Returns the length of the trace on success,
551  * 0 if it failed to produce a worthwhile trace,
552  * and -1 on an error.
553  */
554 static int
translate_bytecode_to_trace(_PyInterpreterFrame * frame,_Py_CODEUNIT * instr,_PyUOpInstruction * trace,int buffer_size,_PyBloomFilter * dependencies)555 translate_bytecode_to_trace(
556     _PyInterpreterFrame *frame,
557     _Py_CODEUNIT *instr,
558     _PyUOpInstruction *trace,
559     int buffer_size,
560     _PyBloomFilter *dependencies)
561 {
562     bool progress_needed = true;
563     PyCodeObject *code = _PyFrame_GetCode(frame);
564     PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
565     assert(PyFunction_Check(func));
566     PyCodeObject *initial_code = code;
567     _Py_BloomFilter_Add(dependencies, initial_code);
568     _Py_CODEUNIT *initial_instr = instr;
569     int trace_length = 0;
570     // Leave space for possible trailing _EXIT_TRACE
571     int max_length = buffer_size-2;
572     struct {
573         PyFunctionObject *func;
574         PyCodeObject *code;
575         _Py_CODEUNIT *instr;
576     } trace_stack[TRACE_STACK_SIZE];
577     int trace_stack_depth = 0;
578     int confidence = CONFIDENCE_RANGE;  // Adjusted by branch instructions
579 
580 #ifdef Py_DEBUG
581     char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
582     int lltrace = 0;
583     if (python_lltrace != NULL && *python_lltrace >= '0') {
584         lltrace = *python_lltrace - '0';  // TODO: Parse an int and all that
585     }
586 #endif
587 
588     DPRINTF(2,
589             "Optimizing %s (%s:%d) at byte offset %d\n",
590             PyUnicode_AsUTF8(code->co_qualname),
591             PyUnicode_AsUTF8(code->co_filename),
592             code->co_firstlineno,
593             2 * INSTR_IP(initial_instr, code));
594     ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code));
595     uint32_t target = 0;
596 
597 top:  // Jump here after _PUSH_FRAME or likely branches
598     for (;;) {
599         target = INSTR_IP(instr, code);
600         // Need space for _DEOPT
601         max_length--;
602 
603         uint32_t opcode = instr->op.code;
604         uint32_t oparg = instr->op.arg;
605 
606         DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg);
607 
608         if (opcode == ENTER_EXECUTOR) {
609             assert(oparg < 256);
610             _PyExecutorObject *executor = code->co_executors->executors[oparg];
611             opcode = executor->vm_data.opcode;
612             DPRINTF(2, "  * ENTER_EXECUTOR -> %s\n",  _PyOpcode_OpName[opcode]);
613             oparg = executor->vm_data.oparg;
614         }
615 
616         if (opcode == EXTENDED_ARG) {
617             instr++;
618             opcode = instr->op.code;
619             oparg = (oparg << 8) | instr->op.arg;
620             if (opcode == EXTENDED_ARG) {
621                 instr--;
622                 goto done;
623             }
624         }
625         assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
626         RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
627         ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
628 
629         /* Special case the first instruction,
630          * so that we can guarantee forward progress */
631         if (progress_needed) {
632             progress_needed = false;
633             if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) {
634                 instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
635                 initial_instr = instr;
636                 if (opcode == JUMP_BACKWARD) {
637                     ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target);
638                 }
639                 continue;
640             }
641             else {
642                 if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
643                     opcode = _PyOpcode_Deopt[opcode];
644                 }
645                 assert(!OPCODE_HAS_EXIT(opcode));
646                 assert(!OPCODE_HAS_DEOPT(opcode));
647             }
648         }
649 
650         if (OPCODE_HAS_EXIT(opcode)) {
651             // Make space for exit code
652             max_length--;
653         }
654         if (OPCODE_HAS_ERROR(opcode)) {
655             // Make space for error code
656             max_length--;
657         }
658         switch (opcode) {
659             case POP_JUMP_IF_NONE:
660             case POP_JUMP_IF_NOT_NONE:
661             case POP_JUMP_IF_FALSE:
662             case POP_JUMP_IF_TRUE:
663             {
664                 RESERVE(1);
665                 int counter = instr[1].cache;
666                 int bitcount = _Py_popcount32(counter);
667                 int jump_likely = bitcount > 8;
668                 /* If bitcount is 8 (half the jumps were taken), adjust confidence by 50%.
669                    If it's 16 or 0 (all or none were taken), adjust by 10%
670                    (since the future is still somewhat uncertain).
671                    For values in between, adjust proportionally. */
672                 if (jump_likely) {
673                     confidence = confidence * (bitcount + 2) / 20;
674                 }
675                 else {
676                     confidence = confidence * (18 - bitcount) / 20;
677                 }
678                 uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely];
679                 DPRINTF(2, "%d: %s(%d): counter=%04x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n",
680                         target, _PyOpcode_OpName[opcode], oparg,
681                         counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode));
682                 if (confidence < CONFIDENCE_CUTOFF) {
683                     DPRINTF(2, "Confidence too low (%d < %d)\n", confidence, CONFIDENCE_CUTOFF);
684                     OPT_STAT_INC(low_confidence);
685                     goto done;
686                 }
687                 _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
688                 _Py_CODEUNIT *target_instr = next_instr + oparg;
689                 if (jump_likely) {
690                     DPRINTF(2, "Jump likely (%04x = %d bits), continue at byte offset %d\n",
691                             instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code));
692                     instr = target_instr;
693                     ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(next_instr, code));
694                     goto top;
695                 }
696                 ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(target_instr, code));
697                 break;
698             }
699 
700             case JUMP_BACKWARD:
701             case JUMP_BACKWARD_NO_INTERRUPT:
702             {
703                 _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg;
704                 if (target == initial_instr) {
705                     /* We have looped round to the start */
706                     RESERVE(1);
707                     ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
708                 }
709                 else {
710                     OPT_STAT_INC(inner_loop);
711                     DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n");
712                 }
713                 goto done;
714             }
715 
716             case JUMP_FORWARD:
717             {
718                 RESERVE(0);
719                 // This will emit two _SET_IP instructions; leave it to the optimizer
720                 instr += oparg;
721                 break;
722             }
723 
724             case RESUME:
725                 /* Use a special tier 2 version of RESUME_CHECK to allow traces to
726                  *  start with RESUME_CHECK */
727                 ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target);
728                 break;
729 
730             default:
731             {
732                 const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
733                 if (expansion->nuops > 0) {
734                     // Reserve space for nuops (+ _SET_IP + _EXIT_TRACE)
735                     int nuops = expansion->nuops;
736                     RESERVE(nuops + 1); /* One extra for exit */
737                     int16_t last_op = expansion->uops[nuops-1].uop;
738                     if (last_op == _POP_FRAME || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) {
739                         // Check for trace stack underflow now:
740                         // We can't bail e.g. in the middle of
741                         // LOAD_CONST + _POP_FRAME.
742                         if (trace_stack_depth == 0) {
743                             DPRINTF(2, "Trace stack underflow\n");
744                             OPT_STAT_INC(trace_stack_underflow);
745                             goto done;
746                         }
747                     }
748                     uint32_t orig_oparg = oparg;  // For OPARG_TOP/BOTTOM
749                     for (int i = 0; i < nuops; i++) {
750                         oparg = orig_oparg;
751                         uint32_t uop = expansion->uops[i].uop;
752                         uint64_t operand = 0;
753                         // Add one to account for the actual opcode/oparg pair:
754                         int offset = expansion->uops[i].offset + 1;
755                         switch (expansion->uops[i].size) {
756                             case OPARG_FULL:
757                                 assert(opcode != JUMP_BACKWARD_NO_INTERRUPT && opcode != JUMP_BACKWARD);
758                                 break;
759                             case OPARG_CACHE_1:
760                                 operand = read_u16(&instr[offset].cache);
761                                 break;
762                             case OPARG_CACHE_2:
763                                 operand = read_u32(&instr[offset].cache);
764                                 break;
765                             case OPARG_CACHE_4:
766                                 operand = read_u64(&instr[offset].cache);
767                                 break;
768                             case OPARG_TOP:  // First half of super-instr
769                                 oparg = orig_oparg >> 4;
770                                 break;
771                             case OPARG_BOTTOM:  // Second half of super-instr
772                                 oparg = orig_oparg & 0xF;
773                                 break;
774                             case OPARG_SAVE_RETURN_OFFSET:  // op=_SAVE_RETURN_OFFSET; oparg=return_offset
775                                 oparg = offset;
776                                 assert(uop == _SAVE_RETURN_OFFSET);
777                                 break;
778                             case OPARG_REPLACED:
779                                 uop = _PyUOp_Replacements[uop];
780                                 assert(uop != 0);
781 #ifdef Py_DEBUG
782                                 {
783                                     uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + (oparg > 255);
784                                     uint32_t jump_target = next_inst + oparg;
785                                     assert(base_opcode(code, jump_target) == END_FOR ||
786                                         base_opcode(code, jump_target) == INSTRUMENTED_END_FOR);
787                                     assert(base_opcode(code, jump_target+1) == POP_TOP);
788                                 }
789 #endif
790                                 break;
791                             default:
792                                 fprintf(stderr,
793                                         "opcode=%d, oparg=%d; nuops=%d, i=%d; size=%d, offset=%d\n",
794                                         opcode, oparg, nuops, i,
795                                         expansion->uops[i].size,
796                                         expansion->uops[i].offset);
797                                 Py_FatalError("garbled expansion");
798                         }
799 
800                         if (uop == _POP_FRAME || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) {
801                             TRACE_STACK_POP();
802                             /* Set the operand to the function or code object returned to,
803                              * to assist optimization passes. (See _PUSH_FRAME below.)
804                              */
805                             if (func != NULL) {
806                                 operand = (uintptr_t)func;
807                             }
808                             else if (code != NULL) {
809                                 operand = (uintptr_t)code | 1;
810                             }
811                             else {
812                                 operand = 0;
813                             }
814                             ADD_TO_TRACE(uop, oparg, operand, target);
815                             DPRINTF(2,
816                                 "Returning to %s (%s:%d) at byte offset %d\n",
817                                 PyUnicode_AsUTF8(code->co_qualname),
818                                 PyUnicode_AsUTF8(code->co_filename),
819                                 code->co_firstlineno,
820                                 2 * INSTR_IP(instr, code));
821                             goto top;
822                         }
823 
824                         if (uop == _PUSH_FRAME) {
825                             assert(i + 1 == nuops);
826                             int func_version_offset =
827                                 offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT)
828                                 // Add one to account for the actual opcode/oparg pair:
829                                 + 1;
830                             uint32_t func_version = read_u32(&instr[func_version_offset].cache);
831                             PyCodeObject *new_code = NULL;
832                             PyFunctionObject *new_func =
833                                 _PyFunction_LookupByVersion(func_version, (PyObject **) &new_code);
834                             DPRINTF(2, "Function: version=%#x; new_func=%p, new_code=%p\n",
835                                     (int)func_version, new_func, new_code);
836                             if (new_code != NULL) {
837                                 if (new_code == code) {
838                                     // Recursive call, bail (we could be here forever).
839                                     DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
840                                             PyUnicode_AsUTF8(new_code->co_qualname),
841                                             PyUnicode_AsUTF8(new_code->co_filename),
842                                             new_code->co_firstlineno);
843                                     OPT_STAT_INC(recursive_call);
844                                     ADD_TO_TRACE(uop, oparg, 0, target);
845                                     ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
846                                     goto done;
847                                 }
848                                 if (new_code->co_version != func_version) {
849                                     // func.__code__ was updated.
850                                     // Perhaps it may happen again, so don't bother tracing.
851                                     // TODO: Reason about this -- is it better to bail or not?
852                                     DPRINTF(2, "Bailing because co_version != func_version\n");
853                                     ADD_TO_TRACE(uop, oparg, 0, target);
854                                     ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
855                                     goto done;
856                                 }
857                                 if (opcode == FOR_ITER_GEN) {
858                                     DPRINTF(2, "Bailing due to dynamic target\n");
859                                     ADD_TO_TRACE(uop, oparg, 0, target);
860                                     ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0);
861                                     goto done;
862                                 }
863                                 // Increment IP to the return address
864                                 instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
865                                 TRACE_STACK_PUSH();
866                                 _Py_BloomFilter_Add(dependencies, new_code);
867                                 /* Set the operand to the callee's function or code object,
868                                  * to assist optimization passes.
869                                  * We prefer setting it to the function (for remove_globals())
870                                  * but if that's not available but the code is available,
871                                  * use the code, setting the low bit so the optimizer knows.
872                                  */
873                                 if (new_func != NULL) {
874                                     operand = (uintptr_t)new_func;
875                                 }
876                                 else if (new_code != NULL) {
877                                     operand = (uintptr_t)new_code | 1;
878                                 }
879                                 else {
880                                     operand = 0;
881                                 }
882                                 ADD_TO_TRACE(uop, oparg, operand, target);
883                                 code = new_code;
884                                 func = new_func;
885                                 instr = _PyCode_CODE(code);
886                                 DPRINTF(2,
887                                     "Continuing in %s (%s:%d) at byte offset %d\n",
888                                     PyUnicode_AsUTF8(code->co_qualname),
889                                     PyUnicode_AsUTF8(code->co_filename),
890                                     code->co_firstlineno,
891                                     2 * INSTR_IP(instr, code));
892                                 goto top;
893                             }
894                             DPRINTF(2, "Bail, new_code == NULL\n");
895                             ADD_TO_TRACE(uop, oparg, 0, target);
896                             ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0);
897                             goto done;
898                         }
899 
900                         // All other instructions
901                         ADD_TO_TRACE(uop, oparg, operand, target);
902                     }
903                     break;
904                 }
905                 DPRINTF(2, "Unsupported opcode %s\n", _PyOpcode_OpName[opcode]);
906                 OPT_UNSUPPORTED_OPCODE(opcode);
907                 goto done;  // Break out of loop
908             }  // End default
909 
910         }  // End switch (opcode)
911 
912         instr++;
913         // Add cache size for opcode
914         instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
915     }  // End for (;;)
916 
917 done:
918     while (trace_stack_depth > 0) {
919         TRACE_STACK_POP();
920     }
921     assert(code == initial_code);
922     // Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
923     if (progress_needed || trace_length < 5) {
924         OPT_STAT_INC(trace_too_short);
925         DPRINTF(2,
926                 "No trace for %s (%s:%d) at byte offset %d (%s)\n",
927                 PyUnicode_AsUTF8(code->co_qualname),
928                 PyUnicode_AsUTF8(code->co_filename),
929                 code->co_firstlineno,
930                 2 * INSTR_IP(initial_instr, code),
931                 progress_needed ? "no progress" : "too short");
932         return 0;
933     }
934     if (trace[trace_length-1].opcode != _JUMP_TO_TOP) {
935         ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
936     }
937     DPRINTF(1,
938             "Created a proto-trace for %s (%s:%d) at byte offset %d -- length %d\n",
939             PyUnicode_AsUTF8(code->co_qualname),
940             PyUnicode_AsUTF8(code->co_filename),
941             code->co_firstlineno,
942             2 * INSTR_IP(initial_instr, code),
943             trace_length);
944     OPT_HIST(trace_length, trace_length_hist);
945     return trace_length;
946 }
947 
948 #undef RESERVE
949 #undef RESERVE_RAW
950 #undef INSTR_IP
951 #undef ADD_TO_TRACE
952 #undef DPRINTF
953 
954 #define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31)))
955 #define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31)))
956 #define BIT_IS_SET(array, bit) (array[(bit)>>5] & (1<<((bit)&31)))
957 
958 /* Count the number of unused uops and exits
959 */
960 static int
count_exits(_PyUOpInstruction * buffer,int length)961 count_exits(_PyUOpInstruction *buffer, int length)
962 {
963     int exit_count = 0;
964     for (int i = 0; i < length; i++) {
965         int opcode = buffer[i].opcode;
966         if (opcode == _EXIT_TRACE || opcode == _DYNAMIC_EXIT) {
967             exit_count++;
968         }
969     }
970     return exit_count;
971 }
972 
make_exit(_PyUOpInstruction * inst,int opcode,int target)973 static void make_exit(_PyUOpInstruction *inst, int opcode, int target)
974 {
975     inst->opcode = opcode;
976     inst->oparg = 0;
977     inst->operand = 0;
978     inst->format = UOP_FORMAT_TARGET;
979     inst->target = target;
980 }
981 
982 /* Convert implicit exits, errors and deopts
983  * into explicit ones. */
984 static int
prepare_for_execution(_PyUOpInstruction * buffer,int length)985 prepare_for_execution(_PyUOpInstruction *buffer, int length)
986 {
987     int32_t current_jump = -1;
988     int32_t current_jump_target = -1;
989     int32_t current_error = -1;
990     int32_t current_error_target = -1;
991     int32_t current_popped = -1;
992     int32_t current_exit_op = -1;
993     /* Leaving in NOPs slows down the interpreter and messes up the stats */
994     _PyUOpInstruction *copy_to = &buffer[0];
995     for (int i = 0; i < length; i++) {
996         _PyUOpInstruction *inst = &buffer[i];
997         if (inst->opcode != _NOP) {
998             if (copy_to != inst) {
999                 *copy_to = *inst;
1000             }
1001             copy_to++;
1002         }
1003     }
1004     length = (int)(copy_to - buffer);
1005     int next_spare = length;
1006     for (int i = 0; i < length; i++) {
1007         _PyUOpInstruction *inst = &buffer[i];
1008         int opcode = inst->opcode;
1009         int32_t target = (int32_t)uop_get_target(inst);
1010         if (_PyUop_Flags[opcode] & (HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) {
1011             uint16_t exit_op = (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) ?
1012                 _EXIT_TRACE : _DEOPT;
1013             int32_t jump_target = target;
1014             if (is_for_iter_test[opcode]) {
1015                 /* Target the POP_TOP immediately after the END_FOR,
1016                  * leaving only the iterator on the stack. */
1017                 int extended_arg = inst->oparg > 255;
1018                 int32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended_arg;
1019                 jump_target = next_inst + inst->oparg + 1;
1020             }
1021             if (jump_target != current_jump_target || current_exit_op != exit_op) {
1022                 make_exit(&buffer[next_spare], exit_op, jump_target);
1023                 current_exit_op = exit_op;
1024                 current_jump_target = jump_target;
1025                 current_jump = next_spare;
1026                 next_spare++;
1027             }
1028             buffer[i].jump_target = current_jump;
1029             buffer[i].format = UOP_FORMAT_JUMP;
1030         }
1031         if (_PyUop_Flags[opcode] & HAS_ERROR_FLAG) {
1032             int popped = (_PyUop_Flags[opcode] & HAS_ERROR_NO_POP_FLAG) ?
1033                 0 : _PyUop_num_popped(opcode, inst->oparg);
1034             if (target != current_error_target || popped != current_popped) {
1035                 current_popped = popped;
1036                 current_error = next_spare;
1037                 current_error_target = target;
1038                 make_exit(&buffer[next_spare], _ERROR_POP_N, 0);
1039                 buffer[next_spare].oparg = popped;
1040                 buffer[next_spare].operand = target;
1041                 next_spare++;
1042             }
1043             buffer[i].error_target = current_error;
1044             if (buffer[i].format == UOP_FORMAT_TARGET) {
1045                 buffer[i].format = UOP_FORMAT_JUMP;
1046                 buffer[i].jump_target = 0;
1047             }
1048         }
1049     }
1050     return next_spare;
1051 }
1052 
1053 /* Executor side exits */
1054 
1055 static _PyExecutorObject *
allocate_executor(int exit_count,int length)1056 allocate_executor(int exit_count, int length)
1057 {
1058     int size = exit_count*sizeof(_PyExitData) + length*sizeof(_PyUOpInstruction);
1059     _PyExecutorObject *res = PyObject_GC_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, size);
1060     if (res == NULL) {
1061         return NULL;
1062     }
1063     res->trace = (_PyUOpInstruction *)(res->exits + exit_count);
1064     res->code_size = length;
1065     res->exit_count = exit_count;
1066     return res;
1067 }
1068 
1069 #ifdef Py_DEBUG
1070 
1071 #define CHECK(PRED) \
1072 if (!(PRED)) { \
1073     printf(#PRED " at %d\n", i); \
1074     assert(0); \
1075 }
1076 
1077 static int
target_unused(int opcode)1078 target_unused(int opcode)
1079 {
1080     return (_PyUop_Flags[opcode] & (HAS_ERROR_FLAG | HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) == 0;
1081 }
1082 
1083 static void
sanity_check(_PyExecutorObject * executor)1084 sanity_check(_PyExecutorObject *executor)
1085 {
1086     for (uint32_t i = 0; i < executor->exit_count; i++) {
1087         _PyExitData *exit = &executor->exits[i];
1088         CHECK(exit->target < (1 << 25));
1089     }
1090     bool ended = false;
1091     uint32_t i = 0;
1092     CHECK(executor->trace[0].opcode == _START_EXECUTOR || executor->trace[0].opcode == _COLD_EXIT);
1093     for (; i < executor->code_size; i++) {
1094         const _PyUOpInstruction *inst = &executor->trace[i];
1095         uint16_t opcode = inst->opcode;
1096         CHECK(opcode <= MAX_UOP_ID);
1097         CHECK(_PyOpcode_uop_name[opcode] != NULL);
1098         switch(inst->format) {
1099             case UOP_FORMAT_TARGET:
1100                 CHECK(target_unused(opcode));
1101                 break;
1102             case UOP_FORMAT_EXIT:
1103                 CHECK(opcode == _EXIT_TRACE);
1104                 CHECK(inst->exit_index < executor->exit_count);
1105                 break;
1106             case UOP_FORMAT_JUMP:
1107                 CHECK(inst->jump_target < executor->code_size);
1108                 break;
1109             case UOP_FORMAT_UNUSED:
1110                 CHECK(0);
1111                 break;
1112         }
1113         if (_PyUop_Flags[opcode] & HAS_ERROR_FLAG) {
1114             CHECK(inst->format == UOP_FORMAT_JUMP);
1115             CHECK(inst->error_target < executor->code_size);
1116         }
1117         if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE || opcode == _COLD_EXIT) {
1118             ended = true;
1119             i++;
1120             break;
1121         }
1122     }
1123     CHECK(ended);
1124     for (; i < executor->code_size; i++) {
1125         const _PyUOpInstruction *inst = &executor->trace[i];
1126         uint16_t opcode = inst->opcode;
1127         CHECK(
1128             opcode == _DEOPT ||
1129             opcode == _EXIT_TRACE ||
1130             opcode == _ERROR_POP_N);
1131         if (opcode == _EXIT_TRACE) {
1132             CHECK(inst->format == UOP_FORMAT_EXIT);
1133         }
1134     }
1135 }
1136 
1137 #undef CHECK
1138 #endif
1139 
1140 /* Makes an executor from a buffer of uops.
1141  * Account for the buffer having gaps and NOPs by computing a "used"
1142  * bit vector and only copying the used uops. Here "used" means reachable
1143  * and not a NOP.
1144  */
1145 static _PyExecutorObject *
make_executor_from_uops(_PyUOpInstruction * buffer,int length,const _PyBloomFilter * dependencies)1146 make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies)
1147 {
1148     int exit_count = count_exits(buffer, length);
1149     _PyExecutorObject *executor = allocate_executor(exit_count, length);
1150     if (executor == NULL) {
1151         return NULL;
1152     }
1153 
1154     /* Initialize exits */
1155     assert(exit_count < COLD_EXIT_COUNT);
1156     for (int i = 0; i < exit_count; i++) {
1157         executor->exits[i].executor = &COLD_EXITS[i];
1158         executor->exits[i].temperature = initial_temperature_backoff_counter();
1159     }
1160     int next_exit = exit_count-1;
1161     _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length];
1162     assert(buffer[0].opcode == _START_EXECUTOR);
1163     buffer[0].operand = (uint64_t)executor;
1164     for (int i = length-1; i >= 0; i--) {
1165         int opcode = buffer[i].opcode;
1166         dest--;
1167         *dest = buffer[i];
1168         assert(opcode != _POP_JUMP_IF_FALSE && opcode != _POP_JUMP_IF_TRUE);
1169         if (opcode == _EXIT_TRACE) {
1170             executor->exits[next_exit].target = buffer[i].target;
1171             dest->exit_index = next_exit;
1172             dest->format = UOP_FORMAT_EXIT;
1173             next_exit--;
1174         }
1175         if (opcode == _DYNAMIC_EXIT) {
1176             executor->exits[next_exit].target = 0;
1177             dest->oparg = next_exit;
1178             next_exit--;
1179         }
1180     }
1181     assert(next_exit == -1);
1182     assert(dest == executor->trace);
1183     assert(dest->opcode == _START_EXECUTOR);
1184     _Py_ExecutorInit(executor, dependencies);
1185 #ifdef Py_DEBUG
1186     char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
1187     int lltrace = 0;
1188     if (python_lltrace != NULL && *python_lltrace >= '0') {
1189         lltrace = *python_lltrace - '0';  // TODO: Parse an int and all that
1190     }
1191     if (lltrace >= 2) {
1192         printf("Optimized trace (length %d):\n", length);
1193         for (int i = 0; i < length; i++) {
1194             printf("%4d OPTIMIZED: ", i);
1195             _PyUOpPrint(&executor->trace[i]);
1196             printf("\n");
1197         }
1198     }
1199     sanity_check(executor);
1200 #endif
1201 #ifdef _Py_JIT
1202     executor->jit_code = NULL;
1203     executor->jit_side_entry = NULL;
1204     executor->jit_size = 0;
1205     if (_PyJIT_Compile(executor, executor->trace, length)) {
1206         Py_DECREF(executor);
1207         return NULL;
1208     }
1209 #endif
1210     _PyObject_GC_TRACK(executor);
1211     return executor;
1212 }
1213 
1214 static int
init_cold_exit_executor(_PyExecutorObject * executor,int oparg)1215 init_cold_exit_executor(_PyExecutorObject *executor, int oparg)
1216 {
1217     _Py_SetImmortalUntracked((PyObject *)executor);
1218     Py_SET_TYPE(executor, &_PyUOpExecutor_Type);
1219     executor->trace = (_PyUOpInstruction *)executor->exits;
1220     executor->code_size = 1;
1221     executor->exit_count = 0;
1222     _PyUOpInstruction *inst = (_PyUOpInstruction *)&executor->trace[0];
1223     inst->opcode = _COLD_EXIT;
1224     inst->oparg = oparg;
1225     executor->vm_data.valid = true;
1226     executor->vm_data.linked = false;
1227     for (int i = 0; i < BLOOM_FILTER_WORDS; i++) {
1228         assert(executor->vm_data.bloom.bits[i] == 0);
1229     }
1230 #ifdef Py_DEBUG
1231     sanity_check(executor);
1232 #endif
1233 #ifdef _Py_JIT
1234     executor->jit_code = NULL;
1235     executor->jit_side_entry = NULL;
1236     executor->jit_size = 0;
1237     if (_PyJIT_Compile(executor, executor->trace, 1)) {
1238         return -1;
1239     }
1240 #endif
1241     return 0;
1242 }
1243 
1244 #ifdef Py_STATS
1245 /* Returns the effective trace length.
1246  * Ignores NOPs and trailing exit and error handling.*/
effective_trace_length(_PyUOpInstruction * buffer,int length)1247 int effective_trace_length(_PyUOpInstruction *buffer, int length)
1248 {
1249     int nop_count = 0;
1250     for (int i = 0; i < length; i++) {
1251         int opcode = buffer[i].opcode;
1252         if (opcode == _NOP) {
1253             nop_count++;
1254         }
1255         if (opcode == _EXIT_TRACE ||
1256             opcode == _JUMP_TO_TOP ||
1257             opcode == _COLD_EXIT) {
1258             return i+1-nop_count;
1259         }
1260     }
1261     Py_FatalError("No terminating instruction");
1262     Py_UNREACHABLE();
1263 }
1264 #endif
1265 
1266 static int
uop_optimize(_PyOptimizerObject * self,_PyInterpreterFrame * frame,_Py_CODEUNIT * instr,_PyExecutorObject ** exec_ptr,int curr_stackentries)1267 uop_optimize(
1268     _PyOptimizerObject *self,
1269     _PyInterpreterFrame *frame,
1270     _Py_CODEUNIT *instr,
1271     _PyExecutorObject **exec_ptr,
1272     int curr_stackentries)
1273 {
1274     _PyBloomFilter dependencies;
1275     _Py_BloomFilter_Init(&dependencies);
1276     _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH];
1277     OPT_STAT_INC(attempts);
1278     int length = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies);
1279     if (length <= 0) {
1280         // Error or nothing translated
1281         return length;
1282     }
1283     assert(length < UOP_MAX_TRACE_LENGTH);
1284     OPT_STAT_INC(traces_created);
1285     char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE");
1286     if (env_var == NULL || *env_var == '\0' || *env_var > '0') {
1287         length = _Py_uop_analyze_and_optimize(frame, buffer,
1288                                            length,
1289                                            curr_stackentries, &dependencies);
1290         if (length <= 0) {
1291             return length;
1292         }
1293     }
1294     assert(length < UOP_MAX_TRACE_LENGTH);
1295     assert(length >= 1);
1296     /* Fix up */
1297     for (int pc = 0; pc < length; pc++) {
1298         int opcode = buffer[pc].opcode;
1299         int oparg = buffer[pc].oparg;
1300         if (_PyUop_Flags[opcode] & HAS_OPARG_AND_1_FLAG) {
1301             buffer[pc].opcode = opcode + 1 + (oparg & 1);
1302         }
1303         else if (oparg < _PyUop_Replication[opcode]) {
1304             buffer[pc].opcode = opcode + oparg + 1;
1305         }
1306         else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) {
1307             break;
1308         }
1309         assert(_PyOpcode_uop_name[buffer[pc].opcode]);
1310         assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0);
1311     }
1312     OPT_HIST(effective_trace_length(buffer, length), optimized_trace_length_hist);
1313     length = prepare_for_execution(buffer, length);
1314     assert(length <= UOP_MAX_TRACE_LENGTH);
1315     _PyExecutorObject *executor = make_executor_from_uops(buffer, length,  &dependencies);
1316     if (executor == NULL) {
1317         return -1;
1318     }
1319     assert(length <= UOP_MAX_TRACE_LENGTH);
1320     *exec_ptr = executor;
1321     return 1;
1322 }
1323 
1324 static void
uop_opt_dealloc(PyObject * self)1325 uop_opt_dealloc(PyObject *self) {
1326     PyObject_Free(self);
1327 }
1328 
1329 PyTypeObject _PyUOpOptimizer_Type = {
1330     PyVarObject_HEAD_INIT(&PyType_Type, 0)
1331     .tp_name = "uop_optimizer",
1332     .tp_basicsize = sizeof(_PyOptimizerObject),
1333     .tp_itemsize = 0,
1334     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
1335     .tp_dealloc = uop_opt_dealloc,
1336 };
1337 
1338 PyObject *
_PyOptimizer_NewUOpOptimizer(void)1339 _PyOptimizer_NewUOpOptimizer(void)
1340 {
1341     _PyOptimizerObject *opt = PyObject_New(_PyOptimizerObject, &_PyUOpOptimizer_Type);
1342     if (opt == NULL) {
1343         return NULL;
1344     }
1345     opt->optimize = uop_optimize;
1346     return (PyObject *)opt;
1347 }
1348 
1349 static void
counter_dealloc(_PyExecutorObject * self)1350 counter_dealloc(_PyExecutorObject *self) {
1351     /* The optimizer is the operand of the second uop. */
1352     PyObject *opt = (PyObject *)self->trace[1].operand;
1353     Py_DECREF(opt);
1354     uop_dealloc(self);
1355 }
1356 
1357 PyTypeObject _PyCounterExecutor_Type = {
1358     PyVarObject_HEAD_INIT(&PyType_Type, 0)
1359     .tp_name = "counting_executor",
1360     .tp_basicsize = offsetof(_PyExecutorObject, exits),
1361     .tp_itemsize = 1,
1362     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC,
1363     .tp_dealloc = (destructor)counter_dealloc,
1364     .tp_methods = executor_methods,
1365     .tp_traverse = executor_traverse,
1366     .tp_clear = (inquiry)executor_clear,
1367 };
1368 
1369 static int
counter_optimize(_PyOptimizerObject * self,_PyInterpreterFrame * frame,_Py_CODEUNIT * instr,_PyExecutorObject ** exec_ptr,int Py_UNUSED (curr_stackentries))1370 counter_optimize(
1371     _PyOptimizerObject* self,
1372     _PyInterpreterFrame *frame,
1373     _Py_CODEUNIT *instr,
1374     _PyExecutorObject **exec_ptr,
1375     int Py_UNUSED(curr_stackentries)
1376 )
1377 {
1378     PyCodeObject *code = _PyFrame_GetCode(frame);
1379     int oparg = instr->op.arg;
1380     while (instr->op.code == EXTENDED_ARG) {
1381         instr++;
1382         oparg = (oparg << 8) | instr->op.arg;
1383     }
1384     if (instr->op.code != JUMP_BACKWARD) {
1385         /* Counter optimizer can only handle backward edges */
1386         return 0;
1387     }
1388     _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg;
1389     _PyUOpInstruction buffer[4] = {
1390         { .opcode = _START_EXECUTOR, .jump_target = 3, .format=UOP_FORMAT_JUMP },
1391         { .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self },
1392         { .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
1393         { .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)), .format=UOP_FORMAT_TARGET }
1394     };
1395     _PyExecutorObject *executor = make_executor_from_uops(buffer, 4, &EMPTY_FILTER);
1396     if (executor == NULL) {
1397         return -1;
1398     }
1399     Py_INCREF(self);
1400     Py_SET_TYPE(executor, &_PyCounterExecutor_Type);
1401     *exec_ptr = executor;
1402     return 1;
1403 }
1404 
1405 static PyObject *
counter_get_counter(PyObject * self,PyObject * args)1406 counter_get_counter(PyObject *self, PyObject *args)
1407 {
1408     return PyLong_FromLongLong(((_PyCounterOptimizerObject *)self)->count);
1409 }
1410 
1411 static PyMethodDef counter_optimizer_methods[] = {
1412     { "get_count", counter_get_counter, METH_NOARGS, NULL },
1413     { NULL, NULL },
1414 };
1415 
1416 PyTypeObject _PyCounterOptimizer_Type = {
1417     PyVarObject_HEAD_INIT(&PyType_Type, 0)
1418     .tp_name = "Counter optimizer",
1419     .tp_basicsize = sizeof(_PyCounterOptimizerObject),
1420     .tp_itemsize = 0,
1421     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
1422     .tp_methods = counter_optimizer_methods,
1423     .tp_dealloc = (destructor)PyObject_Del,
1424 };
1425 
1426 PyObject *
_PyOptimizer_NewCounter(void)1427 _PyOptimizer_NewCounter(void)
1428 {
1429     _PyCounterOptimizerObject *opt = (_PyCounterOptimizerObject *)_PyObject_New(&_PyCounterOptimizer_Type);
1430     if (opt == NULL) {
1431         return NULL;
1432     }
1433     opt->base.optimize = counter_optimize;
1434     opt->count = 0;
1435     return (PyObject *)opt;
1436 }
1437 
1438 
1439 /*****************************************
1440  *        Executor management
1441  ****************************************/
1442 
1443 /* We use a bloomfilter with k = 6, m = 256
1444  * The choice of k and the following constants
1445  * could do with a more rigourous analysis,
1446  * but here is a simple analysis:
1447  *
1448  * We want to keep the false positive rate low.
1449  * For n = 5 (a trace depends on 5 objects),
1450  * we expect 30 bits set, giving a false positive
1451  * rate of (30/256)**6 == 2.5e-6 which is plenty
1452  * good enough.
1453  *
1454  * However with n = 10 we expect 60 bits set (worst case),
1455  * giving a false positive of (60/256)**6 == 0.0001
1456  *
1457  * We choose k = 6, rather than a higher number as
1458  * it means the false positive rate grows slower for high n.
1459  *
1460  * n = 5, k = 6 => fp = 2.6e-6
1461  * n = 5, k = 8 => fp = 3.5e-7
1462  * n = 10, k = 6 => fp = 1.6e-4
1463  * n = 10, k = 8 => fp = 0.9e-4
1464  * n = 15, k = 6 => fp = 0.18%
1465  * n = 15, k = 8 => fp = 0.23%
1466  * n = 20, k = 6 => fp = 1.1%
1467  * n = 20, k = 8 => fp = 2.3%
1468  *
1469  * The above analysis assumes perfect hash functions,
1470  * but those don't exist, so the real false positive
1471  * rates may be worse.
1472  */
1473 
1474 #define K 6
1475 
1476 #define SEED 20221211
1477 
1478 /* TO DO -- Use more modern hash functions with better distribution of bits */
1479 static uint64_t
address_to_hash(void * ptr)1480 address_to_hash(void *ptr) {
1481     assert(ptr != NULL);
1482     uint64_t uhash = SEED;
1483     uintptr_t addr = (uintptr_t)ptr;
1484     for (int i = 0; i < SIZEOF_VOID_P; i++) {
1485         uhash ^= addr & 255;
1486         uhash *= (uint64_t)PyHASH_MULTIPLIER;
1487         addr >>= 8;
1488     }
1489     return uhash;
1490 }
1491 
1492 void
_Py_BloomFilter_Init(_PyBloomFilter * bloom)1493 _Py_BloomFilter_Init(_PyBloomFilter *bloom)
1494 {
1495     for (int i = 0; i < BLOOM_FILTER_WORDS; i++) {
1496         bloom->bits[i] = 0;
1497     }
1498 }
1499 
1500 /* We want K hash functions that each set 1 bit.
1501  * A hash function that sets 1 bit in M bits can be trivially
1502  * derived from a log2(M) bit hash function.
1503  * So we extract 8 (log2(256)) bits at a time from
1504  * the 64bit hash. */
1505 void
_Py_BloomFilter_Add(_PyBloomFilter * bloom,void * ptr)1506 _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *ptr)
1507 {
1508     uint64_t hash = address_to_hash(ptr);
1509     assert(K <= 8);
1510     for (int i = 0; i < K; i++) {
1511         uint8_t bits = hash & 255;
1512         bloom->bits[bits >> 5] |= (1 << (bits&31));
1513         hash >>= 8;
1514     }
1515 }
1516 
1517 static bool
bloom_filter_may_contain(_PyBloomFilter * bloom,_PyBloomFilter * hashes)1518 bloom_filter_may_contain(_PyBloomFilter *bloom, _PyBloomFilter *hashes)
1519 {
1520     for (int i = 0; i < BLOOM_FILTER_WORDS; i++) {
1521         if ((bloom->bits[i] & hashes->bits[i]) != hashes->bits[i]) {
1522             return false;
1523         }
1524     }
1525     return true;
1526 }
1527 
1528 static void
link_executor(_PyExecutorObject * executor)1529 link_executor(_PyExecutorObject *executor)
1530 {
1531     PyInterpreterState *interp = _PyInterpreterState_GET();
1532     _PyExecutorLinkListNode *links = &executor->vm_data.links;
1533     _PyExecutorObject *head = interp->executor_list_head;
1534     if (head == NULL) {
1535         interp->executor_list_head = executor;
1536         links->previous = NULL;
1537         links->next = NULL;
1538     }
1539     else {
1540         assert(head->vm_data.links.previous == NULL);
1541         links->previous = NULL;
1542         links->next = head;
1543         head->vm_data.links.previous = executor;
1544         interp->executor_list_head = executor;
1545     }
1546     executor->vm_data.linked = true;
1547     /* executor_list_head must be first in list */
1548     assert(interp->executor_list_head->vm_data.links.previous == NULL);
1549 }
1550 
1551 static void
unlink_executor(_PyExecutorObject * executor)1552 unlink_executor(_PyExecutorObject *executor)
1553 {
1554     if (!executor->vm_data.linked) {
1555         return;
1556     }
1557     _PyExecutorLinkListNode *links = &executor->vm_data.links;
1558     assert(executor->vm_data.valid);
1559     _PyExecutorObject *next = links->next;
1560     _PyExecutorObject *prev = links->previous;
1561     if (next != NULL) {
1562         next->vm_data.links.previous = prev;
1563     }
1564     if (prev != NULL) {
1565         prev->vm_data.links.next = next;
1566     }
1567     else {
1568         // prev == NULL implies that executor is the list head
1569         PyInterpreterState *interp = PyInterpreterState_Get();
1570         assert(interp->executor_list_head == executor);
1571         interp->executor_list_head = next;
1572     }
1573     executor->vm_data.linked = false;
1574 }
1575 
1576 /* This must be called by optimizers before using the executor */
1577 void
_Py_ExecutorInit(_PyExecutorObject * executor,const _PyBloomFilter * dependency_set)1578 _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_set)
1579 {
1580     executor->vm_data.valid = true;
1581     for (int i = 0; i < BLOOM_FILTER_WORDS; i++) {
1582         executor->vm_data.bloom.bits[i] = dependency_set->bits[i];
1583     }
1584     link_executor(executor);
1585 }
1586 
1587 /* Detaches the executor from the code object (if any) that
1588  * holds a reference to it */
1589 void
_Py_ExecutorDetach(_PyExecutorObject * executor)1590 _Py_ExecutorDetach(_PyExecutorObject *executor)
1591 {
1592     PyCodeObject *code = executor->vm_data.code;
1593     if (code == NULL) {
1594         return;
1595     }
1596     _Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index];
1597     assert(instruction->op.code == ENTER_EXECUTOR);
1598     int index = instruction->op.arg;
1599     assert(code->co_executors->executors[index] == executor);
1600     instruction->op.code = executor->vm_data.opcode;
1601     instruction->op.arg = executor->vm_data.oparg;
1602     executor->vm_data.code = NULL;
1603     code->co_executors->executors[index] = NULL;
1604     Py_DECREF(executor);
1605 }
1606 
1607 static int
executor_clear(_PyExecutorObject * executor)1608 executor_clear(_PyExecutorObject *executor)
1609 {
1610     if (!executor->vm_data.valid) {
1611         return 0;
1612     }
1613     assert(executor->vm_data.valid == 1);
1614     unlink_executor(executor);
1615     executor->vm_data.valid = 0;
1616     /* It is possible for an executor to form a reference
1617      * cycle with itself, so decref'ing a side exit could
1618      * free the executor unless we hold a strong reference to it
1619      */
1620     Py_INCREF(executor);
1621     for (uint32_t i = 0; i < executor->exit_count; i++) {
1622         const _PyExecutorObject *cold = &COLD_EXITS[i];
1623         const _PyExecutorObject *side = executor->exits[i].executor;
1624         executor->exits[i].temperature = initial_unreachable_backoff_counter();
1625         if (side != cold) {
1626             executor->exits[i].executor = cold;
1627             Py_DECREF(side);
1628         }
1629     }
1630     _Py_ExecutorDetach(executor);
1631     Py_DECREF(executor);
1632     return 0;
1633 }
1634 
1635 void
_Py_Executor_DependsOn(_PyExecutorObject * executor,void * obj)1636 _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj)
1637 {
1638     assert(executor->vm_data.valid);
1639     _Py_BloomFilter_Add(&executor->vm_data.bloom, obj);
1640 }
1641 
1642 /* Invalidate all executors that depend on `obj`
1643  * May cause other executors to be invalidated as well
1644  */
1645 void
_Py_Executors_InvalidateDependency(PyInterpreterState * interp,void * obj,int is_invalidation)1646 _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation)
1647 {
1648     _PyBloomFilter obj_filter;
1649     _Py_BloomFilter_Init(&obj_filter);
1650     _Py_BloomFilter_Add(&obj_filter, obj);
1651     /* Walk the list of executors */
1652     /* TO DO -- Use a tree to avoid traversing as many objects */
1653     bool no_memory = false;
1654     PyObject *invalidate = PyList_New(0);
1655     if (invalidate == NULL) {
1656         PyErr_Clear();
1657         no_memory = true;
1658     }
1659     /* Clearing an executor can deallocate others, so we need to make a list of
1660      * executors to invalidate first */
1661     for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
1662         assert(exec->vm_data.valid);
1663         _PyExecutorObject *next = exec->vm_data.links.next;
1664         if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) {
1665             unlink_executor(exec);
1666             if (no_memory) {
1667                 exec->vm_data.valid = 0;
1668             } else {
1669                 if (PyList_Append(invalidate, (PyObject *)exec) < 0) {
1670                     PyErr_Clear();
1671                     no_memory = true;
1672                     exec->vm_data.valid = 0;
1673                 }
1674             }
1675             if (is_invalidation) {
1676                 OPT_STAT_INC(executors_invalidated);
1677             }
1678         }
1679         exec = next;
1680     }
1681     if (invalidate != NULL) {
1682         for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
1683             _PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i);
1684             executor_clear(exec);
1685         }
1686         Py_DECREF(invalidate);
1687     }
1688     return;
1689 }
1690 
1691 /* Invalidate all executors */
1692 void
_Py_Executors_InvalidateAll(PyInterpreterState * interp,int is_invalidation)1693 _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
1694 {
1695     while (interp->executor_list_head) {
1696         _PyExecutorObject *executor = interp->executor_list_head;
1697         assert(executor->vm_data.valid == 1 && executor->vm_data.linked == 1);
1698         if (executor->vm_data.code) {
1699             // Clear the entire code object so its co_executors array be freed:
1700             _PyCode_Clear_Executors(executor->vm_data.code);
1701         }
1702         else {
1703             executor_clear(executor);
1704         }
1705         if (is_invalidation) {
1706             OPT_STAT_INC(executors_invalidated);
1707         }
1708     }
1709 }
1710 
1711 #endif /* _Py_TIER2 */
1712