• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifdef _Py_TIER2
2 
3 /*
4  * This file contains the support code for CPython's uops optimizer.
5  * It also performs some simple optimizations.
6  * It performs a traditional data-flow analysis[1] over the trace of uops.
7  * Using the information gained, it chooses to emit, or skip certain instructions
8  * if possible.
9  *
10  * [1] For information on data-flow analysis, please see
11  * https://clang.llvm.org/docs/DataFlowAnalysisIntro.html
12  *
13  * */
14 #include "Python.h"
15 #include "opcode.h"
16 #include "pycore_dict.h"
17 #include "pycore_interp.h"
18 #include "pycore_opcode_metadata.h"
19 #include "pycore_opcode_utils.h"
20 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
21 #include "pycore_uop_metadata.h"
22 #include "pycore_dict.h"
23 #include "pycore_long.h"
24 #include "pycore_optimizer.h"
25 #include "pycore_object.h"
26 #include "pycore_dict.h"
27 #include "pycore_function.h"
28 #include "pycore_uop_metadata.h"
29 #include "pycore_uop_ids.h"
30 #include "pycore_range.h"
31 
32 #include <stdarg.h>
33 #include <stdbool.h>
34 #include <stdint.h>
35 #include <stddef.h>
36 
37 #ifdef Py_DEBUG
38     extern const char *_PyUOpName(int index);
39     extern void _PyUOpPrint(const _PyUOpInstruction *uop);
40     static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG";
get_lltrace(void)41     static inline int get_lltrace(void) {
42         char *uop_debug = Py_GETENV(DEBUG_ENV);
43         int lltrace = 0;
44         if (uop_debug != NULL && *uop_debug >= '0') {
45             lltrace = *uop_debug - '0';  // TODO: Parse an int and all that
46         }
47         return lltrace;
48     }
49     #define DPRINTF(level, ...) \
50     if (get_lltrace() >= (level)) { printf(__VA_ARGS__); }
51 #else
52     #define DPRINTF(level, ...)
53 #endif
54 
55 
56 
57 static inline bool
op_is_end(uint32_t opcode)58 op_is_end(uint32_t opcode)
59 {
60     return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP;
61 }
62 
63 static int
get_mutations(PyObject * dict)64 get_mutations(PyObject* dict) {
65     assert(PyDict_CheckExact(dict));
66     PyDictObject *d = (PyDictObject *)dict;
67     return (d->ma_version_tag >> DICT_MAX_WATCHERS) & ((1 << DICT_WATCHED_MUTATION_BITS)-1);
68 }
69 
70 static void
increment_mutations(PyObject * dict)71 increment_mutations(PyObject* dict) {
72     assert(PyDict_CheckExact(dict));
73     PyDictObject *d = (PyDictObject *)dict;
74     d->ma_version_tag += (1 << DICT_MAX_WATCHERS);
75 }
76 
77 /* The first two dict watcher IDs are reserved for CPython,
78  * so we don't need to check that they haven't been used */
79 #define BUILTINS_WATCHER_ID 0
80 #define GLOBALS_WATCHER_ID  1
81 
82 static int
globals_watcher_callback(PyDict_WatchEvent event,PyObject * dict,PyObject * key,PyObject * new_value)83 globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict,
84                          PyObject* key, PyObject* new_value)
85 {
86     RARE_EVENT_STAT_INC(watched_globals_modification);
87     assert(get_mutations(dict) < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS);
88     _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), dict, 1);
89     increment_mutations(dict);
90     PyDict_Unwatch(GLOBALS_WATCHER_ID, dict);
91     return 0;
92 }
93 
94 static PyObject *
convert_global_to_const(_PyUOpInstruction * inst,PyObject * obj)95 convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj)
96 {
97     assert(inst->opcode == _LOAD_GLOBAL_MODULE || inst->opcode == _LOAD_GLOBAL_BUILTINS || inst->opcode == _LOAD_ATTR_MODULE);
98     assert(PyDict_CheckExact(obj));
99     PyDictObject *dict = (PyDictObject *)obj;
100     assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE);
101     PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys);
102     assert(inst->operand <= UINT16_MAX);
103     if ((int)inst->operand >= dict->ma_keys->dk_nentries) {
104         return NULL;
105     }
106     PyObject *res = entries[inst->operand].me_value;
107     if (res == NULL) {
108         return NULL;
109     }
110     if (_Py_IsImmortal(res)) {
111         inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_BORROW_WITH_NULL : _LOAD_CONST_INLINE_BORROW;
112     }
113     else {
114         inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_WITH_NULL : _LOAD_CONST_INLINE;
115     }
116     inst->operand = (uint64_t)res;
117     return res;
118 }
119 
120 static int
incorrect_keys(_PyUOpInstruction * inst,PyObject * obj)121 incorrect_keys(_PyUOpInstruction *inst, PyObject *obj)
122 {
123     if (!PyDict_CheckExact(obj)) {
124         return 1;
125     }
126     PyDictObject *dict = (PyDictObject *)obj;
127     if (dict->ma_keys->dk_version != inst->operand) {
128         return 1;
129     }
130     return 0;
131 }
132 
133 /* Returns 1 if successfully optimized
134  *         0 if the trace is not suitable for optimization (yet)
135  *        -1 if there was an error. */
136 static int
remove_globals(_PyInterpreterFrame * frame,_PyUOpInstruction * buffer,int buffer_size,_PyBloomFilter * dependencies)137 remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
138                int buffer_size, _PyBloomFilter *dependencies)
139 {
140     PyInterpreterState *interp = _PyInterpreterState_GET();
141     PyObject *builtins = frame->f_builtins;
142     if (builtins != interp->builtins) {
143         OPT_STAT_INC(remove_globals_builtins_changed);
144         return 1;
145     }
146     PyObject *globals = frame->f_globals;
147     PyFunctionObject *function = (PyFunctionObject *)frame->f_funcobj;
148     assert(PyFunction_Check(function));
149     assert(function->func_builtins == builtins);
150     assert(function->func_globals == globals);
151     uint32_t function_version = _PyFunction_GetVersionForCurrentState(function);
152     /* In order to treat globals as constants, we need to
153      * know that the globals dict is the one we expected, and
154      * that it hasn't changed
155      * In order to treat builtins as constants,  we need to
156      * know that the builtins dict is the one we expected, and
157      * that it hasn't changed and that the global dictionary's
158      * keys have not changed */
159 
160     /* These values represent stacks of booleans (one bool per bit).
161      * Pushing a frame shifts left, popping a frame shifts right. */
162     uint32_t function_checked = 0;
163     uint32_t builtins_watched = 0;
164     uint32_t globals_watched = 0;
165     uint32_t prechecked_function_version = 0;
166     if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) {
167         interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback;
168     }
169     for (int pc = 0; pc < buffer_size; pc++) {
170         _PyUOpInstruction *inst = &buffer[pc];
171         int opcode = inst->opcode;
172         switch(opcode) {
173             case _GUARD_BUILTINS_VERSION:
174                 if (incorrect_keys(inst, builtins)) {
175                     OPT_STAT_INC(remove_globals_incorrect_keys);
176                     return 0;
177                 }
178                 if (interp->rare_events.builtin_dict >= _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) {
179                     continue;
180                 }
181                 if ((builtins_watched & 1) == 0) {
182                     PyDict_Watch(BUILTINS_WATCHER_ID, builtins);
183                     builtins_watched |= 1;
184                 }
185                 if (function_checked & 1) {
186                     buffer[pc].opcode = NOP;
187                 }
188                 else {
189                     buffer[pc].opcode = _CHECK_FUNCTION;
190                     buffer[pc].operand = function_version;
191                     function_checked |= 1;
192                 }
193                 break;
194             case _GUARD_GLOBALS_VERSION:
195                 if (incorrect_keys(inst, globals)) {
196                     OPT_STAT_INC(remove_globals_incorrect_keys);
197                     return 0;
198                 }
199                 uint64_t watched_mutations = get_mutations(globals);
200                 if (watched_mutations >= _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) {
201                     continue;
202                 }
203                 if ((globals_watched & 1) == 0) {
204                     PyDict_Watch(GLOBALS_WATCHER_ID, globals);
205                     _Py_BloomFilter_Add(dependencies, globals);
206                     globals_watched |= 1;
207                 }
208                 if (function_checked & 1) {
209                     buffer[pc].opcode = NOP;
210                 }
211                 else {
212                     buffer[pc].opcode = _CHECK_FUNCTION;
213                     buffer[pc].operand = function_version;
214                     function_checked |= 1;
215                 }
216                 break;
217             case _LOAD_GLOBAL_BUILTINS:
218                 if (function_checked & globals_watched & builtins_watched & 1) {
219                     convert_global_to_const(inst, builtins);
220                 }
221                 break;
222             case _LOAD_GLOBAL_MODULE:
223                 if (function_checked & globals_watched & 1) {
224                     convert_global_to_const(inst, globals);
225                 }
226                 break;
227             case _PUSH_FRAME:
228             {
229                 builtins_watched <<= 1;
230                 globals_watched <<= 1;
231                 function_checked <<= 1;
232                 uint64_t operand = buffer[pc].operand;
233                 if (operand == 0 || (operand & 1)) {
234                     // It's either a code object or NULL, so bail
235                     return 1;
236                 }
237                 PyFunctionObject *func = (PyFunctionObject *)operand;
238                 if (func == NULL) {
239                     return 1;
240                 }
241                 assert(PyFunction_Check(func));
242                 function_version = func->func_version;
243                 if (prechecked_function_version == function_version) {
244                     function_checked |= 1;
245                 }
246                 prechecked_function_version = 0;
247                 globals = func->func_globals;
248                 builtins = func->func_builtins;
249                 if (builtins != interp->builtins) {
250                     OPT_STAT_INC(remove_globals_builtins_changed);
251                     return 1;
252                 }
253                 break;
254             }
255             case _POP_FRAME:
256             {
257                 builtins_watched >>= 1;
258                 globals_watched >>= 1;
259                 function_checked >>= 1;
260                 uint64_t operand = buffer[pc].operand;
261                 if (operand == 0 || (operand & 1)) {
262                     // It's either a code object or NULL, so bail
263                     return 1;
264                 }
265                 PyFunctionObject *func = (PyFunctionObject *)operand;
266                 if (func == NULL) {
267                     return 1;
268                 }
269                 assert(PyFunction_Check(func));
270                 function_version = func->func_version;
271                 globals = func->func_globals;
272                 builtins = func->func_builtins;
273                 break;
274             }
275             case _CHECK_FUNCTION_EXACT_ARGS:
276                 prechecked_function_version = (uint32_t)buffer[pc].operand;
277                 break;
278             default:
279                 if (op_is_end(opcode)) {
280                     return 1;
281                 }
282                 break;
283         }
284     }
285     return 0;
286 }
287 
288 
289 
290 #define STACK_LEVEL()     ((int)(stack_pointer - ctx->frame->stack))
291 
292 #define GETLOCAL(idx)          ((ctx->frame->locals[idx]))
293 
294 #define REPLACE_OP(INST, OP, ARG, OPERAND)    \
295     INST->opcode = OP;            \
296     INST->oparg = ARG;            \
297     INST->operand = OPERAND;
298 
299 #define OUT_OF_SPACE_IF_NULL(EXPR)     \
300     do {                               \
301         if ((EXPR) == NULL) {          \
302             goto out_of_space;         \
303         }                              \
304     } while (0);
305 
306 #define _LOAD_ATTR_NOT_NULL \
307     do {                    \
308     OUT_OF_SPACE_IF_NULL(attr = _Py_uop_sym_new_not_null(ctx)); \
309     OUT_OF_SPACE_IF_NULL(null = _Py_uop_sym_new_null(ctx)); \
310     } while (0);
311 
312 
313 /* Shortened forms for convenience, used in optimizer_bytecodes.c */
314 #define sym_is_not_null _Py_uop_sym_is_not_null
315 #define sym_is_const _Py_uop_sym_is_const
316 #define sym_get_const _Py_uop_sym_get_const
317 #define sym_new_unknown _Py_uop_sym_new_unknown
318 #define sym_new_not_null _Py_uop_sym_new_not_null
319 #define sym_new_type _Py_uop_sym_new_type
320 #define sym_is_null _Py_uop_sym_is_null
321 #define sym_new_const _Py_uop_sym_new_const
322 #define sym_new_null _Py_uop_sym_new_null
323 #define sym_has_type _Py_uop_sym_has_type
324 #define sym_get_type _Py_uop_sym_get_type
325 #define sym_matches_type _Py_uop_sym_matches_type
326 #define sym_set_null _Py_uop_sym_set_null
327 #define sym_set_non_null _Py_uop_sym_set_non_null
328 #define sym_set_type _Py_uop_sym_set_type
329 #define sym_set_const _Py_uop_sym_set_const
330 #define sym_is_bottom _Py_uop_sym_is_bottom
331 #define sym_truthiness _Py_uop_sym_truthiness
332 #define frame_new _Py_uop_frame_new
333 #define frame_pop _Py_uop_frame_pop
334 
335 static int
optimize_to_bool(_PyUOpInstruction * this_instr,_Py_UOpsContext * ctx,_Py_UopsSymbol * value,_Py_UopsSymbol ** result_ptr)336 optimize_to_bool(
337     _PyUOpInstruction *this_instr,
338     _Py_UOpsContext *ctx,
339     _Py_UopsSymbol *value,
340     _Py_UopsSymbol **result_ptr)
341 {
342     if (sym_matches_type(value, &PyBool_Type)) {
343         REPLACE_OP(this_instr, _NOP, 0, 0);
344         *result_ptr = value;
345         return 1;
346     }
347     int truthiness = sym_truthiness(value);
348     if (truthiness >= 0) {
349         PyObject *load = truthiness ? Py_True : Py_False;
350         REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)load);
351         *result_ptr = sym_new_const(ctx, load);
352         return 1;
353     }
354     return 0;
355 }
356 
357 static void
eliminate_pop_guard(_PyUOpInstruction * this_instr,bool exit)358 eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
359 {
360     REPLACE_OP(this_instr, _POP_TOP, 0, 0);
361     if (exit) {
362         REPLACE_OP((this_instr+1), _EXIT_TRACE, 0, 0);
363         this_instr[1].target = this_instr->target;
364     }
365 }
366 
367 /* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a
368  * PyCodeObject *. Retrieve the code object if possible.
369  */
370 static PyCodeObject *
get_code(_PyUOpInstruction * op)371 get_code(_PyUOpInstruction *op)
372 {
373     assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME || op->opcode == _RETURN_GENERATOR);
374     PyCodeObject *co = NULL;
375     uint64_t operand = op->operand;
376     if (operand == 0) {
377         return NULL;
378     }
379     if (operand & 1) {
380         co = (PyCodeObject *)(operand & ~1);
381     }
382     else {
383         PyFunctionObject *func = (PyFunctionObject *)operand;
384         assert(PyFunction_Check(func));
385         co = (PyCodeObject *)func->func_code;
386     }
387     assert(PyCode_Check(co));
388     return co;
389 }
390 
391 /* 1 for success, 0 for not ready, cannot error at the moment. */
392 static int
optimize_uops(PyCodeObject * co,_PyUOpInstruction * trace,int trace_len,int curr_stacklen,_PyBloomFilter * dependencies)393 optimize_uops(
394     PyCodeObject *co,
395     _PyUOpInstruction *trace,
396     int trace_len,
397     int curr_stacklen,
398     _PyBloomFilter *dependencies
399 )
400 {
401 
402     _Py_UOpsContext context;
403     _Py_UOpsContext *ctx = &context;
404     uint32_t opcode = UINT16_MAX;
405     int curr_space = 0;
406     int max_space = 0;
407     _PyUOpInstruction *first_valid_check_stack = NULL;
408     _PyUOpInstruction *corresponding_check_stack = NULL;
409 
410     if (_Py_uop_abstractcontext_init(ctx) < 0) {
411         goto out_of_space;
412     }
413     _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, curr_stacklen, NULL, 0);
414     if (frame == NULL) {
415         return -1;
416     }
417     ctx->curr_frame_depth++;
418     ctx->frame = frame;
419 
420     _PyUOpInstruction *this_instr = NULL;
421     for (int i = 0; i < trace_len; i++) {
422         this_instr = &trace[i];
423 
424         int oparg = this_instr->oparg;
425         opcode = this_instr->opcode;
426         _Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer;
427 
428 #ifdef Py_DEBUG
429         if (get_lltrace() >= 3) {
430             printf("%4d abs: ", (int)(this_instr - trace));
431             _PyUOpPrint(this_instr);
432             printf(" ");
433         }
434 #endif
435 
436         switch (opcode) {
437 
438 #include "optimizer_cases.c.h"
439 
440             default:
441                 DPRINTF(1, "\nUnknown opcode in abstract interpreter\n");
442                 Py_UNREACHABLE();
443         }
444         assert(ctx->frame != NULL);
445         DPRINTF(3, " stack_level %d\n", STACK_LEVEL());
446         ctx->frame->stack_pointer = stack_pointer;
447         assert(STACK_LEVEL() >= 0);
448     }
449     Py_UNREACHABLE();
450 
451 out_of_space:
452     DPRINTF(3, "\n");
453     DPRINTF(1, "Out of space in abstract interpreter\n");
454     goto done;
455 error:
456     DPRINTF(3, "\n");
457     DPRINTF(1, "Encountered error in abstract interpreter\n");
458     if (opcode <= MAX_UOP_ID) {
459         OPT_ERROR_IN_OPCODE(opcode);
460     }
461     _Py_uop_abstractcontext_fini(ctx);
462     return -1;
463 
464 hit_bottom:
465     // Attempted to push a "bottom" (contradition) symbol onto the stack.
466     // This means that the abstract interpreter has hit unreachable code.
467     // We *could* generate an _EXIT_TRACE or _FATAL_ERROR here, but hitting
468     // bottom indicates type instability, so we are probably better off
469     // retrying later.
470     DPRINTF(3, "\n");
471     DPRINTF(1, "Hit bottom in abstract interpreter\n");
472     _Py_uop_abstractcontext_fini(ctx);
473     return 0;
474 done:
475     /* Either reached the end or cannot optimize further, but there
476      * would be no benefit in retrying later */
477     _Py_uop_abstractcontext_fini(ctx);
478     if (first_valid_check_stack != NULL) {
479         assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
480         assert(max_space > 0);
481         assert(max_space <= INT_MAX);
482         assert(max_space <= INT32_MAX);
483         first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
484         first_valid_check_stack->operand = max_space;
485     }
486     return trace_len;
487 }
488 
489 
490 static int
remove_unneeded_uops(_PyUOpInstruction * buffer,int buffer_size)491 remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
492 {
493     /* Remove _SET_IP and _CHECK_VALIDITY where possible.
494      * _SET_IP is needed if the following instruction escapes or
495      * could error. _CHECK_VALIDITY is needed if the previous
496      * instruction could have escaped. */
497     int last_set_ip = -1;
498     bool may_have_escaped = true;
499     for (int pc = 0; pc < buffer_size; pc++) {
500         int opcode = buffer[pc].opcode;
501         switch (opcode) {
502             case _START_EXECUTOR:
503                 may_have_escaped = false;
504                 break;
505             case _SET_IP:
506                 buffer[pc].opcode = _NOP;
507                 last_set_ip = pc;
508                 break;
509             case _CHECK_VALIDITY:
510                 if (may_have_escaped) {
511                     may_have_escaped = false;
512                 }
513                 else {
514                     buffer[pc].opcode = _NOP;
515                 }
516                 break;
517             case _CHECK_VALIDITY_AND_SET_IP:
518                 if (may_have_escaped) {
519                     may_have_escaped = false;
520                     buffer[pc].opcode = _CHECK_VALIDITY;
521                 }
522                 else {
523                     buffer[pc].opcode = _NOP;
524                 }
525                 last_set_ip = pc;
526                 break;
527             case _POP_TOP:
528             {
529                 _PyUOpInstruction *last = &buffer[pc-1];
530                 while (last->opcode == _NOP) {
531                     last--;
532                 }
533                 if (last->opcode == _LOAD_CONST_INLINE  ||
534                     last->opcode == _LOAD_CONST_INLINE_BORROW ||
535                     last->opcode == _LOAD_FAST ||
536                     last->opcode == _COPY
537                 ) {
538                     last->opcode = _NOP;
539                     buffer[pc].opcode = _NOP;
540                 }
541                 if (last->opcode == _REPLACE_WITH_TRUE) {
542                     last->opcode = _NOP;
543                 }
544                 break;
545             }
546             case _JUMP_TO_TOP:
547             case _EXIT_TRACE:
548                 return pc + 1;
549             default:
550             {
551                 /* _PUSH_FRAME doesn't escape or error, but it
552                  * does need the IP for the return address */
553                 bool needs_ip = opcode == _PUSH_FRAME;
554                 if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) {
555                     needs_ip = true;
556                     may_have_escaped = true;
557                 }
558                 if (needs_ip && last_set_ip >= 0) {
559                     if (buffer[last_set_ip].opcode == _CHECK_VALIDITY) {
560                         buffer[last_set_ip].opcode = _CHECK_VALIDITY_AND_SET_IP;
561                     }
562                     else {
563                         assert(buffer[last_set_ip].opcode == _NOP);
564                         buffer[last_set_ip].opcode = _SET_IP;
565                     }
566                     last_set_ip = -1;
567                 }
568             }
569         }
570     }
571     Py_UNREACHABLE();
572 }
573 
574 //  0 - failure, no error raised, just fall back to Tier 1
575 // -1 - failure, and raise error
576 //  > 0 - length of optimized trace
577 int
_Py_uop_analyze_and_optimize(_PyInterpreterFrame * frame,_PyUOpInstruction * buffer,int length,int curr_stacklen,_PyBloomFilter * dependencies)578 _Py_uop_analyze_and_optimize(
579     _PyInterpreterFrame *frame,
580     _PyUOpInstruction *buffer,
581     int length,
582     int curr_stacklen,
583     _PyBloomFilter *dependencies
584 )
585 {
586     OPT_STAT_INC(optimizer_attempts);
587 
588     int err = remove_globals(frame, buffer, length, dependencies);
589     if (err <= 0) {
590         return err;
591     }
592 
593     length = optimize_uops(
594         _PyFrame_GetCode(frame), buffer,
595         length, curr_stacklen, dependencies);
596 
597     if (length <= 0) {
598         return length;
599     }
600 
601     length = remove_unneeded_uops(buffer, length);
602     assert(length > 0);
603 
604     OPT_STAT_INC(optimizer_successes);
605     return length;
606 }
607 
608 #endif /* _Py_TIER2 */
609