1 #ifndef Py_INTERNAL_OPTIMIZER_H
2 #define Py_INTERNAL_OPTIMIZER_H
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
6
7 #ifndef Py_BUILD_CORE
8 # error "this header requires Py_BUILD_CORE define"
9 #endif
10
11 #include "pycore_uop_ids.h"
12 #include <stdbool.h>
13
14
15 typedef struct _PyExecutorLinkListNode {
16 struct _PyExecutorObject *next;
17 struct _PyExecutorObject *previous;
18 } _PyExecutorLinkListNode;
19
20
21 /* Bloom filter with m = 256
22 * https://en.wikipedia.org/wiki/Bloom_filter */
23 #define BLOOM_FILTER_WORDS 8
24
25 typedef struct _bloom_filter {
26 uint32_t bits[BLOOM_FILTER_WORDS];
27 } _PyBloomFilter;
28
29 typedef struct {
30 uint8_t opcode;
31 uint8_t oparg;
32 uint8_t valid;
33 uint8_t linked;
34 int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below).
35 _PyBloomFilter bloom;
36 _PyExecutorLinkListNode links;
37 PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR).
38 } _PyVMData;
39
40 #define UOP_FORMAT_TARGET 0
41 #define UOP_FORMAT_EXIT 1
42 #define UOP_FORMAT_JUMP 2
43 #define UOP_FORMAT_UNUSED 3
44
45 /* Depending on the format,
46 * the 32 bits between the oparg and operand are:
47 * UOP_FORMAT_TARGET:
48 * uint32_t target;
49 * UOP_FORMAT_EXIT
50 * uint16_t exit_index;
51 * uint16_t error_target;
52 * UOP_FORMAT_JUMP
53 * uint16_t jump_target;
54 * uint16_t error_target;
55 */
56 typedef struct {
57 uint16_t opcode:14;
58 uint16_t format:2;
59 uint16_t oparg;
60 union {
61 uint32_t target;
62 struct {
63 union {
64 uint16_t exit_index;
65 uint16_t jump_target;
66 };
67 uint16_t error_target;
68 };
69 };
70 uint64_t operand; // A cache entry
71 } _PyUOpInstruction;
72
uop_get_target(const _PyUOpInstruction * inst)73 static inline uint32_t uop_get_target(const _PyUOpInstruction *inst)
74 {
75 assert(inst->format == UOP_FORMAT_TARGET);
76 return inst->target;
77 }
78
uop_get_exit_index(const _PyUOpInstruction * inst)79 static inline uint16_t uop_get_exit_index(const _PyUOpInstruction *inst)
80 {
81 assert(inst->format == UOP_FORMAT_EXIT);
82 return inst->exit_index;
83 }
84
uop_get_jump_target(const _PyUOpInstruction * inst)85 static inline uint16_t uop_get_jump_target(const _PyUOpInstruction *inst)
86 {
87 assert(inst->format == UOP_FORMAT_JUMP);
88 return inst->jump_target;
89 }
90
uop_get_error_target(const _PyUOpInstruction * inst)91 static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
92 {
93 assert(inst->format != UOP_FORMAT_TARGET);
94 return inst->error_target;
95 }
96
97 typedef struct _exit_data {
98 uint32_t target;
99 _Py_BackoffCounter temperature;
100 const struct _PyExecutorObject *executor;
101 } _PyExitData;
102
103 typedef struct _PyExecutorObject {
104 PyObject_VAR_HEAD
105 const _PyUOpInstruction *trace;
106 _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */
107 uint32_t exit_count;
108 uint32_t code_size;
109 size_t jit_size;
110 void *jit_code;
111 void *jit_side_entry;
112 _PyExitData exits[1];
113 } _PyExecutorObject;
114
115 typedef struct _PyOptimizerObject _PyOptimizerObject;
116
117 /* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */
118 typedef int (*optimize_func)(
119 _PyOptimizerObject* self, struct _PyInterpreterFrame *frame,
120 _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr,
121 int curr_stackentries);
122
123 struct _PyOptimizerObject {
124 PyObject_HEAD
125 optimize_func optimize;
126 /* Data needed by the optimizer goes here, but is opaque to the VM */
127 };
128
129 /** Test support **/
130 typedef struct {
131 _PyOptimizerObject base;
132 int64_t count;
133 } _PyCounterOptimizerObject;
134
135 _PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer);
136
137 PyAPI_FUNC(int) _Py_SetTier2Optimizer(_PyOptimizerObject* optimizer);
138
139 PyAPI_FUNC(_PyOptimizerObject *) _Py_GetOptimizer(void);
140
141 PyAPI_FUNC(_PyExecutorObject *) _Py_GetExecutor(PyCodeObject *code, int offset);
142
143 void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
144 void _Py_ExecutorDetach(_PyExecutorObject *);
145 void _Py_BloomFilter_Init(_PyBloomFilter *);
146 void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
147 PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj);
148 /* For testing */
149 PyAPI_FUNC(PyObject *) _PyOptimizer_NewCounter(void);
150 PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void);
151
152 #define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
153 #define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
154
155 #ifdef _Py_TIER2
156 PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation);
157 PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation);
158 #else
159 # define _Py_Executors_InvalidateDependency(A, B, C) ((void)0)
160 # define _Py_Executors_InvalidateAll(A, B) ((void)0)
161 #endif
162
163
164 // This is the length of the trace we project initially.
165 #define UOP_MAX_TRACE_LENGTH 800
166
167 #define TRACE_STACK_SIZE 5
168
169 int _Py_uop_analyze_and_optimize(struct _PyInterpreterFrame *frame,
170 _PyUOpInstruction *trace, int trace_len, int curr_stackentries,
171 _PyBloomFilter *dependencies);
172
173 extern PyTypeObject _PyCounterExecutor_Type;
174 extern PyTypeObject _PyCounterOptimizer_Type;
175 extern PyTypeObject _PyDefaultOptimizer_Type;
176 extern PyTypeObject _PyUOpExecutor_Type;
177 extern PyTypeObject _PyUOpOptimizer_Type;
178
179 /* Symbols */
180 /* See explanation in optimizer_symbols.c */
181
182 struct _Py_UopsSymbol {
183 int flags; // 0 bits: Top; 2 or more bits: Bottom
184 PyTypeObject *typ; // Borrowed reference
185 PyObject *const_val; // Owned reference (!)
186 };
187
188 // Holds locals, stack, locals, stack ... co_consts (in that order)
189 #define MAX_ABSTRACT_INTERP_SIZE 4096
190
191 #define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
192
193 // Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
194 #define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2)
195
196 typedef struct _Py_UopsSymbol _Py_UopsSymbol;
197
198 struct _Py_UOpsAbstractFrame {
199 // Max stacklen
200 int stack_len;
201 int locals_len;
202
203 _Py_UopsSymbol **stack_pointer;
204 _Py_UopsSymbol **stack;
205 _Py_UopsSymbol **locals;
206 };
207
208 typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
209
210 typedef struct ty_arena {
211 int ty_curr_number;
212 int ty_max_number;
213 _Py_UopsSymbol arena[TY_ARENA_SIZE];
214 } ty_arena;
215
216 struct _Py_UOpsContext {
217 PyObject_HEAD
218 // The current "executing" frame.
219 _Py_UOpsAbstractFrame *frame;
220 _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
221 int curr_frame_depth;
222
223 // Arena for the symbolic types.
224 ty_arena t_arena;
225
226 _Py_UopsSymbol **n_consumed;
227 _Py_UopsSymbol **limit;
228 _Py_UopsSymbol *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
229 };
230
231 typedef struct _Py_UOpsContext _Py_UOpsContext;
232
233 extern bool _Py_uop_sym_is_null(_Py_UopsSymbol *sym);
234 extern bool _Py_uop_sym_is_not_null(_Py_UopsSymbol *sym);
235 extern bool _Py_uop_sym_is_const(_Py_UopsSymbol *sym);
236 extern PyObject *_Py_uop_sym_get_const(_Py_UopsSymbol *sym);
237 extern _Py_UopsSymbol *_Py_uop_sym_new_unknown(_Py_UOpsContext *ctx);
238 extern _Py_UopsSymbol *_Py_uop_sym_new_not_null(_Py_UOpsContext *ctx);
239 extern _Py_UopsSymbol *_Py_uop_sym_new_type(
240 _Py_UOpsContext *ctx, PyTypeObject *typ);
241 extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *const_val);
242 extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx);
243 extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym);
244 extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ);
245 extern bool _Py_uop_sym_set_null(_Py_UopsSymbol *sym);
246 extern bool _Py_uop_sym_set_non_null(_Py_UopsSymbol *sym);
247 extern bool _Py_uop_sym_set_type(_Py_UopsSymbol *sym, PyTypeObject *typ);
248 extern bool _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val);
249 extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym);
250 extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym);
251 extern PyTypeObject *_Py_uop_sym_get_type(_Py_UopsSymbol *sym);
252
253
254 extern int _Py_uop_abstractcontext_init(_Py_UOpsContext *ctx);
255 extern void _Py_uop_abstractcontext_fini(_Py_UOpsContext *ctx);
256
257 extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
258 _Py_UOpsContext *ctx,
259 PyCodeObject *co,
260 int curr_stackentries,
261 _Py_UopsSymbol **args,
262 int arg_len);
263 extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx);
264
265 PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
266
267 PyAPI_FUNC(int) _PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr);
268
269 #ifdef __cplusplus
270 }
271 #endif
272 #endif /* !Py_INTERNAL_OPTIMIZER_H */
273