• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef Py_INTERNAL_OPTIMIZER_H
2 #define Py_INTERNAL_OPTIMIZER_H
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
6 
7 #ifndef Py_BUILD_CORE
8 #  error "this header requires Py_BUILD_CORE define"
9 #endif
10 
11 #include "pycore_uop_ids.h"
12 #include <stdbool.h>
13 
14 
15 typedef struct _PyExecutorLinkListNode {
16     struct _PyExecutorObject *next;
17     struct _PyExecutorObject *previous;
18 } _PyExecutorLinkListNode;
19 
20 
21 /* Bloom filter with m = 256
22  * https://en.wikipedia.org/wiki/Bloom_filter */
23 #define BLOOM_FILTER_WORDS 8
24 
25 typedef struct _bloom_filter {
26     uint32_t bits[BLOOM_FILTER_WORDS];
27 } _PyBloomFilter;
28 
29 typedef struct {
30     uint8_t opcode;
31     uint8_t oparg;
32     uint8_t valid;
33     uint8_t linked;
34     int index;           // Index of ENTER_EXECUTOR (if code isn't NULL, below).
35     _PyBloomFilter bloom;
36     _PyExecutorLinkListNode links;
37     PyCodeObject *code;  // Weak (NULL if no corresponding ENTER_EXECUTOR).
38 } _PyVMData;
39 
40 #define UOP_FORMAT_TARGET 0
41 #define UOP_FORMAT_EXIT 1
42 #define UOP_FORMAT_JUMP 2
43 #define UOP_FORMAT_UNUSED 3
44 
45 /* Depending on the format,
46  * the 32 bits between the oparg and operand are:
47  * UOP_FORMAT_TARGET:
48  *    uint32_t target;
49  * UOP_FORMAT_EXIT
50  *    uint16_t exit_index;
51  *    uint16_t error_target;
52  * UOP_FORMAT_JUMP
53  *    uint16_t jump_target;
54  *    uint16_t error_target;
55  */
56 typedef struct {
57     uint16_t opcode:14;
58     uint16_t format:2;
59     uint16_t oparg;
60     union {
61         uint32_t target;
62         struct {
63             union {
64                 uint16_t exit_index;
65                 uint16_t jump_target;
66             };
67             uint16_t error_target;
68         };
69     };
70     uint64_t operand;  // A cache entry
71 } _PyUOpInstruction;
72 
uop_get_target(const _PyUOpInstruction * inst)73 static inline uint32_t uop_get_target(const _PyUOpInstruction *inst)
74 {
75     assert(inst->format == UOP_FORMAT_TARGET);
76     return inst->target;
77 }
78 
uop_get_exit_index(const _PyUOpInstruction * inst)79 static inline uint16_t uop_get_exit_index(const _PyUOpInstruction *inst)
80 {
81     assert(inst->format == UOP_FORMAT_EXIT);
82     return inst->exit_index;
83 }
84 
uop_get_jump_target(const _PyUOpInstruction * inst)85 static inline uint16_t uop_get_jump_target(const _PyUOpInstruction *inst)
86 {
87     assert(inst->format == UOP_FORMAT_JUMP);
88     return inst->jump_target;
89 }
90 
uop_get_error_target(const _PyUOpInstruction * inst)91 static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
92 {
93     assert(inst->format != UOP_FORMAT_TARGET);
94     return inst->error_target;
95 }
96 
97 typedef struct _exit_data {
98     uint32_t target;
99     _Py_BackoffCounter temperature;
100     const struct _PyExecutorObject *executor;
101 } _PyExitData;
102 
103 typedef struct _PyExecutorObject {
104     PyObject_VAR_HEAD
105     const _PyUOpInstruction *trace;
106     _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */
107     uint32_t exit_count;
108     uint32_t code_size;
109     size_t jit_size;
110     void *jit_code;
111     void *jit_side_entry;
112     _PyExitData exits[1];
113 } _PyExecutorObject;
114 
115 typedef struct _PyOptimizerObject _PyOptimizerObject;
116 
117 /* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */
118 typedef int (*optimize_func)(
119     _PyOptimizerObject* self, struct _PyInterpreterFrame *frame,
120     _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr,
121     int curr_stackentries);
122 
123 struct _PyOptimizerObject {
124     PyObject_HEAD
125     optimize_func optimize;
126     /* Data needed by the optimizer goes here, but is opaque to the VM */
127 };
128 
129 /** Test support **/
130 typedef struct {
131     _PyOptimizerObject base;
132     int64_t count;
133 } _PyCounterOptimizerObject;
134 
135 _PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer);
136 
137 PyAPI_FUNC(int) _Py_SetTier2Optimizer(_PyOptimizerObject* optimizer);
138 
139 PyAPI_FUNC(_PyOptimizerObject *) _Py_GetOptimizer(void);
140 
141 PyAPI_FUNC(_PyExecutorObject *) _Py_GetExecutor(PyCodeObject *code, int offset);
142 
143 void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
144 void _Py_ExecutorDetach(_PyExecutorObject *);
145 void _Py_BloomFilter_Init(_PyBloomFilter *);
146 void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
147 PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj);
148 /* For testing */
149 PyAPI_FUNC(PyObject *) _PyOptimizer_NewCounter(void);
150 PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void);
151 
152 #define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
153 #define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
154 
155 #ifdef _Py_TIER2
156 PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation);
157 PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation);
158 #else
159 #  define _Py_Executors_InvalidateDependency(A, B, C) ((void)0)
160 #  define _Py_Executors_InvalidateAll(A, B) ((void)0)
161 #endif
162 
163 
164 // This is the length of the trace we project initially.
165 #define UOP_MAX_TRACE_LENGTH 800
166 
167 #define TRACE_STACK_SIZE 5
168 
169 int _Py_uop_analyze_and_optimize(struct _PyInterpreterFrame *frame,
170     _PyUOpInstruction *trace, int trace_len, int curr_stackentries,
171     _PyBloomFilter *dependencies);
172 
173 extern PyTypeObject _PyCounterExecutor_Type;
174 extern PyTypeObject _PyCounterOptimizer_Type;
175 extern PyTypeObject _PyDefaultOptimizer_Type;
176 extern PyTypeObject _PyUOpExecutor_Type;
177 extern PyTypeObject _PyUOpOptimizer_Type;
178 
179 /* Symbols */
180 /* See explanation in optimizer_symbols.c */
181 
182 struct _Py_UopsSymbol {
183     int flags;  // 0 bits: Top; 2 or more bits: Bottom
184     PyTypeObject *typ;  // Borrowed reference
185     PyObject *const_val;  // Owned reference (!)
186 };
187 
188 // Holds locals, stack, locals, stack ... co_consts (in that order)
189 #define MAX_ABSTRACT_INTERP_SIZE 4096
190 
191 #define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
192 
193 // Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
194 #define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2)
195 
196 typedef struct _Py_UopsSymbol _Py_UopsSymbol;
197 
198 struct _Py_UOpsAbstractFrame {
199     // Max stacklen
200     int stack_len;
201     int locals_len;
202 
203     _Py_UopsSymbol **stack_pointer;
204     _Py_UopsSymbol **stack;
205     _Py_UopsSymbol **locals;
206 };
207 
208 typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
209 
210 typedef struct ty_arena {
211     int ty_curr_number;
212     int ty_max_number;
213     _Py_UopsSymbol arena[TY_ARENA_SIZE];
214 } ty_arena;
215 
216 struct _Py_UOpsContext {
217     PyObject_HEAD
218     // The current "executing" frame.
219     _Py_UOpsAbstractFrame *frame;
220     _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
221     int curr_frame_depth;
222 
223     // Arena for the symbolic types.
224     ty_arena t_arena;
225 
226     _Py_UopsSymbol **n_consumed;
227     _Py_UopsSymbol **limit;
228     _Py_UopsSymbol *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
229 };
230 
231 typedef struct _Py_UOpsContext _Py_UOpsContext;
232 
233 extern bool _Py_uop_sym_is_null(_Py_UopsSymbol *sym);
234 extern bool _Py_uop_sym_is_not_null(_Py_UopsSymbol *sym);
235 extern bool _Py_uop_sym_is_const(_Py_UopsSymbol *sym);
236 extern PyObject *_Py_uop_sym_get_const(_Py_UopsSymbol *sym);
237 extern _Py_UopsSymbol *_Py_uop_sym_new_unknown(_Py_UOpsContext *ctx);
238 extern _Py_UopsSymbol *_Py_uop_sym_new_not_null(_Py_UOpsContext *ctx);
239 extern _Py_UopsSymbol *_Py_uop_sym_new_type(
240     _Py_UOpsContext *ctx, PyTypeObject *typ);
241 extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *const_val);
242 extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx);
243 extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym);
244 extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ);
245 extern bool _Py_uop_sym_set_null(_Py_UopsSymbol *sym);
246 extern bool _Py_uop_sym_set_non_null(_Py_UopsSymbol *sym);
247 extern bool _Py_uop_sym_set_type(_Py_UopsSymbol *sym, PyTypeObject *typ);
248 extern bool _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val);
249 extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym);
250 extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym);
251 extern PyTypeObject *_Py_uop_sym_get_type(_Py_UopsSymbol *sym);
252 
253 
254 extern int _Py_uop_abstractcontext_init(_Py_UOpsContext *ctx);
255 extern void _Py_uop_abstractcontext_fini(_Py_UOpsContext *ctx);
256 
257 extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
258     _Py_UOpsContext *ctx,
259     PyCodeObject *co,
260     int curr_stackentries,
261     _Py_UopsSymbol **args,
262     int arg_len);
263 extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx);
264 
265 PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
266 
267 PyAPI_FUNC(int) _PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr);
268 
269 #ifdef __cplusplus
270 }
271 #endif
272 #endif /* !Py_INTERNAL_OPTIMIZER_H */
273