1 #ifdef _Py_TIER2
2
3 /*
4 * This file contains the support code for CPython's uops optimizer.
5 * It also performs some simple optimizations.
6 * It performs a traditional data-flow analysis[1] over the trace of uops.
7 * Using the information gained, it chooses to emit, or skip certain instructions
8 * if possible.
9 *
10 * [1] For information on data-flow analysis, please see
11 * https://clang.llvm.org/docs/DataFlowAnalysisIntro.html
12 *
13 * */
14 #include "Python.h"
15 #include "opcode.h"
16 #include "pycore_dict.h"
17 #include "pycore_interp.h"
18 #include "pycore_opcode_metadata.h"
19 #include "pycore_opcode_utils.h"
20 #include "pycore_pystate.h" // _PyInterpreterState_GET()
21 #include "pycore_uop_metadata.h"
22 #include "pycore_dict.h"
23 #include "pycore_long.h"
24 #include "pycore_optimizer.h"
25 #include "pycore_object.h"
26 #include "pycore_dict.h"
27 #include "pycore_function.h"
28 #include "pycore_uop_metadata.h"
29 #include "pycore_uop_ids.h"
30 #include "pycore_range.h"
31
32 #include <stdarg.h>
33 #include <stdbool.h>
34 #include <stdint.h>
35 #include <stddef.h>
36
37 #ifdef Py_DEBUG
38 extern const char *_PyUOpName(int index);
39 extern void _PyUOpPrint(const _PyUOpInstruction *uop);
40 static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG";
get_lltrace(void)41 static inline int get_lltrace(void) {
42 char *uop_debug = Py_GETENV(DEBUG_ENV);
43 int lltrace = 0;
44 if (uop_debug != NULL && *uop_debug >= '0') {
45 lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
46 }
47 return lltrace;
48 }
49 #define DPRINTF(level, ...) \
50 if (get_lltrace() >= (level)) { printf(__VA_ARGS__); }
51 #else
52 #define DPRINTF(level, ...)
53 #endif
54
55
56
57 static inline bool
op_is_end(uint32_t opcode)58 op_is_end(uint32_t opcode)
59 {
60 return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP;
61 }
62
63 static int
get_mutations(PyObject * dict)64 get_mutations(PyObject* dict) {
65 assert(PyDict_CheckExact(dict));
66 PyDictObject *d = (PyDictObject *)dict;
67 return (d->ma_version_tag >> DICT_MAX_WATCHERS) & ((1 << DICT_WATCHED_MUTATION_BITS)-1);
68 }
69
70 static void
increment_mutations(PyObject * dict)71 increment_mutations(PyObject* dict) {
72 assert(PyDict_CheckExact(dict));
73 PyDictObject *d = (PyDictObject *)dict;
74 d->ma_version_tag += (1 << DICT_MAX_WATCHERS);
75 }
76
77 /* The first two dict watcher IDs are reserved for CPython,
78 * so we don't need to check that they haven't been used */
79 #define BUILTINS_WATCHER_ID 0
80 #define GLOBALS_WATCHER_ID 1
81
82 static int
globals_watcher_callback(PyDict_WatchEvent event,PyObject * dict,PyObject * key,PyObject * new_value)83 globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict,
84 PyObject* key, PyObject* new_value)
85 {
86 RARE_EVENT_STAT_INC(watched_globals_modification);
87 assert(get_mutations(dict) < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS);
88 _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), dict, 1);
89 increment_mutations(dict);
90 PyDict_Unwatch(GLOBALS_WATCHER_ID, dict);
91 return 0;
92 }
93
94 static PyObject *
convert_global_to_const(_PyUOpInstruction * inst,PyObject * obj)95 convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj)
96 {
97 assert(inst->opcode == _LOAD_GLOBAL_MODULE || inst->opcode == _LOAD_GLOBAL_BUILTINS || inst->opcode == _LOAD_ATTR_MODULE);
98 assert(PyDict_CheckExact(obj));
99 PyDictObject *dict = (PyDictObject *)obj;
100 assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE);
101 PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys);
102 assert(inst->operand <= UINT16_MAX);
103 if ((int)inst->operand >= dict->ma_keys->dk_nentries) {
104 return NULL;
105 }
106 PyObject *res = entries[inst->operand].me_value;
107 if (res == NULL) {
108 return NULL;
109 }
110 if (_Py_IsImmortal(res)) {
111 inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_BORROW_WITH_NULL : _LOAD_CONST_INLINE_BORROW;
112 }
113 else {
114 inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_WITH_NULL : _LOAD_CONST_INLINE;
115 }
116 inst->operand = (uint64_t)res;
117 return res;
118 }
119
120 static int
incorrect_keys(_PyUOpInstruction * inst,PyObject * obj)121 incorrect_keys(_PyUOpInstruction *inst, PyObject *obj)
122 {
123 if (!PyDict_CheckExact(obj)) {
124 return 1;
125 }
126 PyDictObject *dict = (PyDictObject *)obj;
127 if (dict->ma_keys->dk_version != inst->operand) {
128 return 1;
129 }
130 return 0;
131 }
132
133 /* Returns 1 if successfully optimized
134 * 0 if the trace is not suitable for optimization (yet)
135 * -1 if there was an error. */
136 static int
remove_globals(_PyInterpreterFrame * frame,_PyUOpInstruction * buffer,int buffer_size,_PyBloomFilter * dependencies)137 remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
138 int buffer_size, _PyBloomFilter *dependencies)
139 {
140 PyInterpreterState *interp = _PyInterpreterState_GET();
141 PyObject *builtins = frame->f_builtins;
142 if (builtins != interp->builtins) {
143 OPT_STAT_INC(remove_globals_builtins_changed);
144 return 1;
145 }
146 PyObject *globals = frame->f_globals;
147 PyFunctionObject *function = (PyFunctionObject *)frame->f_funcobj;
148 assert(PyFunction_Check(function));
149 assert(function->func_builtins == builtins);
150 assert(function->func_globals == globals);
151 uint32_t function_version = _PyFunction_GetVersionForCurrentState(function);
152 /* In order to treat globals as constants, we need to
153 * know that the globals dict is the one we expected, and
154 * that it hasn't changed
155 * In order to treat builtins as constants, we need to
156 * know that the builtins dict is the one we expected, and
157 * that it hasn't changed and that the global dictionary's
158 * keys have not changed */
159
160 /* These values represent stacks of booleans (one bool per bit).
161 * Pushing a frame shifts left, popping a frame shifts right. */
162 uint32_t function_checked = 0;
163 uint32_t builtins_watched = 0;
164 uint32_t globals_watched = 0;
165 uint32_t prechecked_function_version = 0;
166 if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) {
167 interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback;
168 }
169 for (int pc = 0; pc < buffer_size; pc++) {
170 _PyUOpInstruction *inst = &buffer[pc];
171 int opcode = inst->opcode;
172 switch(opcode) {
173 case _GUARD_BUILTINS_VERSION:
174 if (incorrect_keys(inst, builtins)) {
175 OPT_STAT_INC(remove_globals_incorrect_keys);
176 return 0;
177 }
178 if (interp->rare_events.builtin_dict >= _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) {
179 continue;
180 }
181 if ((builtins_watched & 1) == 0) {
182 PyDict_Watch(BUILTINS_WATCHER_ID, builtins);
183 builtins_watched |= 1;
184 }
185 if (function_checked & 1) {
186 buffer[pc].opcode = NOP;
187 }
188 else {
189 buffer[pc].opcode = _CHECK_FUNCTION;
190 buffer[pc].operand = function_version;
191 function_checked |= 1;
192 }
193 break;
194 case _GUARD_GLOBALS_VERSION:
195 if (incorrect_keys(inst, globals)) {
196 OPT_STAT_INC(remove_globals_incorrect_keys);
197 return 0;
198 }
199 uint64_t watched_mutations = get_mutations(globals);
200 if (watched_mutations >= _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) {
201 continue;
202 }
203 if ((globals_watched & 1) == 0) {
204 PyDict_Watch(GLOBALS_WATCHER_ID, globals);
205 _Py_BloomFilter_Add(dependencies, globals);
206 globals_watched |= 1;
207 }
208 if (function_checked & 1) {
209 buffer[pc].opcode = NOP;
210 }
211 else {
212 buffer[pc].opcode = _CHECK_FUNCTION;
213 buffer[pc].operand = function_version;
214 function_checked |= 1;
215 }
216 break;
217 case _LOAD_GLOBAL_BUILTINS:
218 if (function_checked & globals_watched & builtins_watched & 1) {
219 convert_global_to_const(inst, builtins);
220 }
221 break;
222 case _LOAD_GLOBAL_MODULE:
223 if (function_checked & globals_watched & 1) {
224 convert_global_to_const(inst, globals);
225 }
226 break;
227 case _PUSH_FRAME:
228 {
229 builtins_watched <<= 1;
230 globals_watched <<= 1;
231 function_checked <<= 1;
232 uint64_t operand = buffer[pc].operand;
233 if (operand == 0 || (operand & 1)) {
234 // It's either a code object or NULL, so bail
235 return 1;
236 }
237 PyFunctionObject *func = (PyFunctionObject *)operand;
238 if (func == NULL) {
239 return 1;
240 }
241 assert(PyFunction_Check(func));
242 function_version = func->func_version;
243 if (prechecked_function_version == function_version) {
244 function_checked |= 1;
245 }
246 prechecked_function_version = 0;
247 globals = func->func_globals;
248 builtins = func->func_builtins;
249 if (builtins != interp->builtins) {
250 OPT_STAT_INC(remove_globals_builtins_changed);
251 return 1;
252 }
253 break;
254 }
255 case _POP_FRAME:
256 {
257 builtins_watched >>= 1;
258 globals_watched >>= 1;
259 function_checked >>= 1;
260 uint64_t operand = buffer[pc].operand;
261 if (operand == 0 || (operand & 1)) {
262 // It's either a code object or NULL, so bail
263 return 1;
264 }
265 PyFunctionObject *func = (PyFunctionObject *)operand;
266 if (func == NULL) {
267 return 1;
268 }
269 assert(PyFunction_Check(func));
270 function_version = func->func_version;
271 globals = func->func_globals;
272 builtins = func->func_builtins;
273 break;
274 }
275 case _CHECK_FUNCTION_EXACT_ARGS:
276 prechecked_function_version = (uint32_t)buffer[pc].operand;
277 break;
278 default:
279 if (op_is_end(opcode)) {
280 return 1;
281 }
282 break;
283 }
284 }
285 return 0;
286 }
287
288
289
290 #define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack))
291
292 #define GETLOCAL(idx) ((ctx->frame->locals[idx]))
293
294 #define REPLACE_OP(INST, OP, ARG, OPERAND) \
295 INST->opcode = OP; \
296 INST->oparg = ARG; \
297 INST->operand = OPERAND;
298
299 #define OUT_OF_SPACE_IF_NULL(EXPR) \
300 do { \
301 if ((EXPR) == NULL) { \
302 goto out_of_space; \
303 } \
304 } while (0);
305
306 #define _LOAD_ATTR_NOT_NULL \
307 do { \
308 OUT_OF_SPACE_IF_NULL(attr = _Py_uop_sym_new_not_null(ctx)); \
309 OUT_OF_SPACE_IF_NULL(null = _Py_uop_sym_new_null(ctx)); \
310 } while (0);
311
312
313 /* Shortened forms for convenience, used in optimizer_bytecodes.c */
314 #define sym_is_not_null _Py_uop_sym_is_not_null
315 #define sym_is_const _Py_uop_sym_is_const
316 #define sym_get_const _Py_uop_sym_get_const
317 #define sym_new_unknown _Py_uop_sym_new_unknown
318 #define sym_new_not_null _Py_uop_sym_new_not_null
319 #define sym_new_type _Py_uop_sym_new_type
320 #define sym_is_null _Py_uop_sym_is_null
321 #define sym_new_const _Py_uop_sym_new_const
322 #define sym_new_null _Py_uop_sym_new_null
323 #define sym_has_type _Py_uop_sym_has_type
324 #define sym_get_type _Py_uop_sym_get_type
325 #define sym_matches_type _Py_uop_sym_matches_type
326 #define sym_set_null _Py_uop_sym_set_null
327 #define sym_set_non_null _Py_uop_sym_set_non_null
328 #define sym_set_type _Py_uop_sym_set_type
329 #define sym_set_const _Py_uop_sym_set_const
330 #define sym_is_bottom _Py_uop_sym_is_bottom
331 #define sym_truthiness _Py_uop_sym_truthiness
332 #define frame_new _Py_uop_frame_new
333 #define frame_pop _Py_uop_frame_pop
334
335 static int
optimize_to_bool(_PyUOpInstruction * this_instr,_Py_UOpsContext * ctx,_Py_UopsSymbol * value,_Py_UopsSymbol ** result_ptr)336 optimize_to_bool(
337 _PyUOpInstruction *this_instr,
338 _Py_UOpsContext *ctx,
339 _Py_UopsSymbol *value,
340 _Py_UopsSymbol **result_ptr)
341 {
342 if (sym_matches_type(value, &PyBool_Type)) {
343 REPLACE_OP(this_instr, _NOP, 0, 0);
344 *result_ptr = value;
345 return 1;
346 }
347 int truthiness = sym_truthiness(value);
348 if (truthiness >= 0) {
349 PyObject *load = truthiness ? Py_True : Py_False;
350 REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)load);
351 *result_ptr = sym_new_const(ctx, load);
352 return 1;
353 }
354 return 0;
355 }
356
357 static void
eliminate_pop_guard(_PyUOpInstruction * this_instr,bool exit)358 eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
359 {
360 REPLACE_OP(this_instr, _POP_TOP, 0, 0);
361 if (exit) {
362 REPLACE_OP((this_instr+1), _EXIT_TRACE, 0, 0);
363 this_instr[1].target = this_instr->target;
364 }
365 }
366
367 /* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a
368 * PyCodeObject *. Retrieve the code object if possible.
369 */
370 static PyCodeObject *
get_code(_PyUOpInstruction * op)371 get_code(_PyUOpInstruction *op)
372 {
373 assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME || op->opcode == _RETURN_GENERATOR);
374 PyCodeObject *co = NULL;
375 uint64_t operand = op->operand;
376 if (operand == 0) {
377 return NULL;
378 }
379 if (operand & 1) {
380 co = (PyCodeObject *)(operand & ~1);
381 }
382 else {
383 PyFunctionObject *func = (PyFunctionObject *)operand;
384 assert(PyFunction_Check(func));
385 co = (PyCodeObject *)func->func_code;
386 }
387 assert(PyCode_Check(co));
388 return co;
389 }
390
391 /* 1 for success, 0 for not ready, cannot error at the moment. */
392 static int
optimize_uops(PyCodeObject * co,_PyUOpInstruction * trace,int trace_len,int curr_stacklen,_PyBloomFilter * dependencies)393 optimize_uops(
394 PyCodeObject *co,
395 _PyUOpInstruction *trace,
396 int trace_len,
397 int curr_stacklen,
398 _PyBloomFilter *dependencies
399 )
400 {
401
402 _Py_UOpsContext context;
403 _Py_UOpsContext *ctx = &context;
404 uint32_t opcode = UINT16_MAX;
405 int curr_space = 0;
406 int max_space = 0;
407 _PyUOpInstruction *first_valid_check_stack = NULL;
408 _PyUOpInstruction *corresponding_check_stack = NULL;
409
410 if (_Py_uop_abstractcontext_init(ctx) < 0) {
411 goto out_of_space;
412 }
413 _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, curr_stacklen, NULL, 0);
414 if (frame == NULL) {
415 return -1;
416 }
417 ctx->curr_frame_depth++;
418 ctx->frame = frame;
419
420 _PyUOpInstruction *this_instr = NULL;
421 for (int i = 0; i < trace_len; i++) {
422 this_instr = &trace[i];
423
424 int oparg = this_instr->oparg;
425 opcode = this_instr->opcode;
426 _Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer;
427
428 #ifdef Py_DEBUG
429 if (get_lltrace() >= 3) {
430 printf("%4d abs: ", (int)(this_instr - trace));
431 _PyUOpPrint(this_instr);
432 printf(" ");
433 }
434 #endif
435
436 switch (opcode) {
437
438 #include "optimizer_cases.c.h"
439
440 default:
441 DPRINTF(1, "\nUnknown opcode in abstract interpreter\n");
442 Py_UNREACHABLE();
443 }
444 assert(ctx->frame != NULL);
445 DPRINTF(3, " stack_level %d\n", STACK_LEVEL());
446 ctx->frame->stack_pointer = stack_pointer;
447 assert(STACK_LEVEL() >= 0);
448 }
449 Py_UNREACHABLE();
450
451 out_of_space:
452 DPRINTF(3, "\n");
453 DPRINTF(1, "Out of space in abstract interpreter\n");
454 goto done;
455 error:
456 DPRINTF(3, "\n");
457 DPRINTF(1, "Encountered error in abstract interpreter\n");
458 if (opcode <= MAX_UOP_ID) {
459 OPT_ERROR_IN_OPCODE(opcode);
460 }
461 _Py_uop_abstractcontext_fini(ctx);
462 return -1;
463
464 hit_bottom:
465 // Attempted to push a "bottom" (contradition) symbol onto the stack.
466 // This means that the abstract interpreter has hit unreachable code.
467 // We *could* generate an _EXIT_TRACE or _FATAL_ERROR here, but hitting
468 // bottom indicates type instability, so we are probably better off
469 // retrying later.
470 DPRINTF(3, "\n");
471 DPRINTF(1, "Hit bottom in abstract interpreter\n");
472 _Py_uop_abstractcontext_fini(ctx);
473 return 0;
474 done:
475 /* Either reached the end or cannot optimize further, but there
476 * would be no benefit in retrying later */
477 _Py_uop_abstractcontext_fini(ctx);
478 if (first_valid_check_stack != NULL) {
479 assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
480 assert(max_space > 0);
481 assert(max_space <= INT_MAX);
482 assert(max_space <= INT32_MAX);
483 first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
484 first_valid_check_stack->operand = max_space;
485 }
486 return trace_len;
487 }
488
489
490 static int
remove_unneeded_uops(_PyUOpInstruction * buffer,int buffer_size)491 remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
492 {
493 /* Remove _SET_IP and _CHECK_VALIDITY where possible.
494 * _SET_IP is needed if the following instruction escapes or
495 * could error. _CHECK_VALIDITY is needed if the previous
496 * instruction could have escaped. */
497 int last_set_ip = -1;
498 bool may_have_escaped = true;
499 for (int pc = 0; pc < buffer_size; pc++) {
500 int opcode = buffer[pc].opcode;
501 switch (opcode) {
502 case _START_EXECUTOR:
503 may_have_escaped = false;
504 break;
505 case _SET_IP:
506 buffer[pc].opcode = _NOP;
507 last_set_ip = pc;
508 break;
509 case _CHECK_VALIDITY:
510 if (may_have_escaped) {
511 may_have_escaped = false;
512 }
513 else {
514 buffer[pc].opcode = _NOP;
515 }
516 break;
517 case _CHECK_VALIDITY_AND_SET_IP:
518 if (may_have_escaped) {
519 may_have_escaped = false;
520 buffer[pc].opcode = _CHECK_VALIDITY;
521 }
522 else {
523 buffer[pc].opcode = _NOP;
524 }
525 last_set_ip = pc;
526 break;
527 case _POP_TOP:
528 {
529 _PyUOpInstruction *last = &buffer[pc-1];
530 while (last->opcode == _NOP) {
531 last--;
532 }
533 if (last->opcode == _LOAD_CONST_INLINE ||
534 last->opcode == _LOAD_CONST_INLINE_BORROW ||
535 last->opcode == _LOAD_FAST ||
536 last->opcode == _COPY
537 ) {
538 last->opcode = _NOP;
539 buffer[pc].opcode = _NOP;
540 }
541 if (last->opcode == _REPLACE_WITH_TRUE) {
542 last->opcode = _NOP;
543 }
544 break;
545 }
546 case _JUMP_TO_TOP:
547 case _EXIT_TRACE:
548 return pc + 1;
549 default:
550 {
551 /* _PUSH_FRAME doesn't escape or error, but it
552 * does need the IP for the return address */
553 bool needs_ip = opcode == _PUSH_FRAME;
554 if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) {
555 needs_ip = true;
556 may_have_escaped = true;
557 }
558 if (needs_ip && last_set_ip >= 0) {
559 if (buffer[last_set_ip].opcode == _CHECK_VALIDITY) {
560 buffer[last_set_ip].opcode = _CHECK_VALIDITY_AND_SET_IP;
561 }
562 else {
563 assert(buffer[last_set_ip].opcode == _NOP);
564 buffer[last_set_ip].opcode = _SET_IP;
565 }
566 last_set_ip = -1;
567 }
568 }
569 }
570 }
571 Py_UNREACHABLE();
572 }
573
574 // 0 - failure, no error raised, just fall back to Tier 1
575 // -1 - failure, and raise error
576 // > 0 - length of optimized trace
577 int
_Py_uop_analyze_and_optimize(_PyInterpreterFrame * frame,_PyUOpInstruction * buffer,int length,int curr_stacklen,_PyBloomFilter * dependencies)578 _Py_uop_analyze_and_optimize(
579 _PyInterpreterFrame *frame,
580 _PyUOpInstruction *buffer,
581 int length,
582 int curr_stacklen,
583 _PyBloomFilter *dependencies
584 )
585 {
586 OPT_STAT_INC(optimizer_attempts);
587
588 int err = remove_globals(frame, buffer, length, dependencies);
589 if (err <= 0) {
590 return err;
591 }
592
593 length = optimize_uops(
594 _PyFrame_GetCode(frame), buffer,
595 length, curr_stacklen, dependencies);
596
597 if (length <= 0) {
598 return length;
599 }
600
601 length = remove_unneeded_uops(buffer, length);
602 assert(length > 0);
603
604 OPT_STAT_INC(optimizer_successes);
605 return length;
606 }
607
608 #endif /* _Py_TIER2 */
609