1 /*
2 * This file implements a data structure representing a sequence of
3 * instructions, which is used by different parts of the compilation
4 * pipeline.
5 */
6
7
8 #include <stdbool.h>
9
10 #include "Python.h"
11
12 #include "pycore_compile.h" // _PyCompile_EnsureArrayLargeEnough
13 #include "pycore_opcode_utils.h"
14 #include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
15
16 typedef _PyInstruction instruction;
17 typedef _PyInstructionSequence instr_sequence;
18 typedef _Py_SourceLocation location;
19
20 #define INITIAL_INSTR_SEQUENCE_SIZE 100
21 #define INITIAL_INSTR_SEQUENCE_LABELS_MAP_SIZE 10
22
23 #include "clinic/instruction_sequence.c.h"
24
25 #undef SUCCESS
26 #undef ERROR
27 #define SUCCESS 0
28 #define ERROR -1
29
30 #define RETURN_IF_ERROR(X) \
31 if ((X) == -1) { \
32 return ERROR; \
33 }
34
35 static int
instr_sequence_next_inst(instr_sequence * seq)36 instr_sequence_next_inst(instr_sequence *seq) {
37 assert(seq->s_instrs != NULL || seq->s_used == 0);
38
39 RETURN_IF_ERROR(
40 _PyCompile_EnsureArrayLargeEnough(seq->s_used + 1,
41 (void**)&seq->s_instrs,
42 &seq->s_allocated,
43 INITIAL_INSTR_SEQUENCE_SIZE,
44 sizeof(instruction)));
45 assert(seq->s_allocated >= 0);
46 assert(seq->s_used < seq->s_allocated);
47 return seq->s_used++;
48 }
49
50 _PyJumpTargetLabel
_PyInstructionSequence_NewLabel(instr_sequence * seq)51 _PyInstructionSequence_NewLabel(instr_sequence *seq)
52 {
53 _PyJumpTargetLabel lbl = {++seq->s_next_free_label};
54 return lbl;
55 }
56
57 int
_PyInstructionSequence_UseLabel(instr_sequence * seq,int lbl)58 _PyInstructionSequence_UseLabel(instr_sequence *seq, int lbl)
59 {
60 int old_size = seq->s_labelmap_size;
61 RETURN_IF_ERROR(
62 _PyCompile_EnsureArrayLargeEnough(lbl,
63 (void**)&seq->s_labelmap,
64 &seq->s_labelmap_size,
65 INITIAL_INSTR_SEQUENCE_LABELS_MAP_SIZE,
66 sizeof(int)));
67
68 for(int i = old_size; i < seq->s_labelmap_size; i++) {
69 seq->s_labelmap[i] = -111; /* something weird, for debugging */
70 }
71 seq->s_labelmap[lbl] = seq->s_used; /* label refers to the next instruction */
72 return SUCCESS;
73 }
74
75 int
_PyInstructionSequence_ApplyLabelMap(instr_sequence * instrs)76 _PyInstructionSequence_ApplyLabelMap(instr_sequence *instrs)
77 {
78 if (instrs->s_labelmap == NULL) {
79 /* Already applied - nothing to do */
80 return SUCCESS;
81 }
82 /* Replace labels by offsets in the code */
83 for (int i=0; i < instrs->s_used; i++) {
84 instruction *instr = &instrs->s_instrs[i];
85 if (HAS_TARGET(instr->i_opcode)) {
86 assert(instr->i_oparg < instrs->s_labelmap_size);
87 instr->i_oparg = instrs->s_labelmap[instr->i_oparg];
88 }
89 _PyExceptHandlerInfo *hi = &instr->i_except_handler_info;
90 if (hi->h_label >= 0) {
91 assert(hi->h_label < instrs->s_labelmap_size);
92 hi->h_label = instrs->s_labelmap[hi->h_label];
93 }
94 }
95 /* Clear label map so it's never used again */
96 PyMem_Free(instrs->s_labelmap);
97 instrs->s_labelmap = NULL;
98 instrs->s_labelmap_size = 0;
99 return SUCCESS;
100 }
101
102 #define MAX_OPCODE 511
103
104 int
_PyInstructionSequence_Addop(instr_sequence * seq,int opcode,int oparg,location loc)105 _PyInstructionSequence_Addop(instr_sequence *seq, int opcode, int oparg,
106 location loc)
107 {
108 assert(0 <= opcode && opcode <= MAX_OPCODE);
109 assert(IS_WITHIN_OPCODE_RANGE(opcode));
110 assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
111 assert(0 <= oparg && oparg < (1 << 30));
112
113 int idx = instr_sequence_next_inst(seq);
114 RETURN_IF_ERROR(idx);
115 instruction *ci = &seq->s_instrs[idx];
116 ci->i_opcode = opcode;
117 ci->i_oparg = oparg;
118 ci->i_loc = loc;
119 return SUCCESS;
120 }
121
122 int
_PyInstructionSequence_InsertInstruction(instr_sequence * seq,int pos,int opcode,int oparg,location loc)123 _PyInstructionSequence_InsertInstruction(instr_sequence *seq, int pos,
124 int opcode, int oparg, location loc)
125 {
126 assert(pos >= 0 && pos <= seq->s_used);
127 int last_idx = instr_sequence_next_inst(seq);
128 RETURN_IF_ERROR(last_idx);
129 for (int i=last_idx-1; i >= pos; i--) {
130 seq->s_instrs[i+1] = seq->s_instrs[i];
131 }
132 instruction *ci = &seq->s_instrs[pos];
133 ci->i_opcode = opcode;
134 ci->i_oparg = oparg;
135 ci->i_loc = loc;
136
137 /* fix the labels map */
138 for(int lbl=0; lbl < seq->s_labelmap_size; lbl++) {
139 if (seq->s_labelmap[lbl] >= pos) {
140 seq->s_labelmap[lbl]++;
141 }
142 }
143 return SUCCESS;
144 }
145
146 int
_PyInstructionSequence_AddNested(instr_sequence * seq,instr_sequence * nested)147 _PyInstructionSequence_AddNested(instr_sequence *seq, instr_sequence *nested)
148 {
149 if (seq->s_nested == NULL) {
150 seq->s_nested = PyList_New(0);
151 if (seq->s_nested == NULL) {
152 return ERROR;
153 }
154 }
155 if (PyList_Append(seq->s_nested, (PyObject*)nested) < 0) {
156 return ERROR;
157 }
158 return SUCCESS;
159 }
160
161 void
PyInstructionSequence_Fini(instr_sequence * seq)162 PyInstructionSequence_Fini(instr_sequence *seq) {
163 Py_XDECREF(seq->s_nested);
164
165 PyMem_Free(seq->s_labelmap);
166 seq->s_labelmap = NULL;
167
168 PyMem_Free(seq->s_instrs);
169 seq->s_instrs = NULL;
170 }
171
172 /*[clinic input]
173 class InstructionSequenceType "_PyInstructionSequence *" "&_PyInstructionSequence_Type"
174 [clinic start generated code]*/
175 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=589963e07480390f]*/
176
177 static _PyInstructionSequence*
inst_seq_create(void)178 inst_seq_create(void)
179 {
180 _PyInstructionSequence *seq;
181 seq = PyObject_GC_New(_PyInstructionSequence, &_PyInstructionSequence_Type);
182 if (seq == NULL) {
183 return NULL;
184 }
185 seq->s_instrs = NULL;
186 seq->s_allocated = 0;
187 seq->s_used = 0;
188 seq->s_next_free_label = 0;
189 seq->s_labelmap = NULL;
190 seq->s_labelmap_size = 0;
191 seq->s_nested = NULL;
192
193 PyObject_GC_Track(seq);
194 return seq;
195 }
196
197 PyObject*
_PyInstructionSequence_New(void)198 _PyInstructionSequence_New(void)
199 {
200 _PyInstructionSequence *seq = inst_seq_create();
201 if (seq == NULL) {
202 return NULL;
203 }
204 return (PyObject*)seq;
205 }
206
207 /*[clinic input]
208 @classmethod
209 InstructionSequenceType.__new__ as inst_seq_new
210
211 Create a new InstructionSequence object.
212 [clinic start generated code]*/
213
214 static PyObject *
inst_seq_new_impl(PyTypeObject * type)215 inst_seq_new_impl(PyTypeObject *type)
216 /*[clinic end generated code: output=98881de92c8876f6 input=b393150146849c74]*/
217 {
218 return (PyObject*)inst_seq_create();
219 }
220
221 /*[clinic input]
222 InstructionSequenceType.use_label
223
224 label: int
225
226 Place label at current location.
227 [clinic start generated code]*/
228
229 static PyObject *
InstructionSequenceType_use_label_impl(_PyInstructionSequence * self,int label)230 InstructionSequenceType_use_label_impl(_PyInstructionSequence *self,
231 int label)
232 /*[clinic end generated code: output=4c06bbacb2854755 input=da55f49bb91841f3]*/
233
234 {
235 if (_PyInstructionSequence_UseLabel(self, label) < 0) {
236 return NULL;
237 }
238 Py_RETURN_NONE;
239 }
240
241 /*[clinic input]
242 InstructionSequenceType.addop
243
244 opcode: int
245 oparg: int
246 lineno: int
247 col_offset: int
248 end_lineno: int
249 end_col_offset: int
250
251 Append an instruction.
252 [clinic start generated code]*/
253
254 static PyObject *
InstructionSequenceType_addop_impl(_PyInstructionSequence * self,int opcode,int oparg,int lineno,int col_offset,int end_lineno,int end_col_offset)255 InstructionSequenceType_addop_impl(_PyInstructionSequence *self, int opcode,
256 int oparg, int lineno, int col_offset,
257 int end_lineno, int end_col_offset)
258 /*[clinic end generated code: output=af0cc22c048dfbf3 input=012762ac88198713]*/
259 {
260 _Py_SourceLocation loc = {lineno, col_offset, end_lineno, end_col_offset};
261 if (_PyInstructionSequence_Addop(self, opcode, oparg, loc) < 0) {
262 return NULL;
263 }
264 Py_RETURN_NONE;
265 }
266
267 /*[clinic input]
268 InstructionSequenceType.new_label -> int
269
270 Return a new label.
271 [clinic start generated code]*/
272
273 static int
InstructionSequenceType_new_label_impl(_PyInstructionSequence * self)274 InstructionSequenceType_new_label_impl(_PyInstructionSequence *self)
275 /*[clinic end generated code: output=dcb0589e4f5bf4bd input=c66040b9897bc327]*/
276 {
277 _PyJumpTargetLabel lbl = _PyInstructionSequence_NewLabel(self);
278 return lbl.id;
279 }
280
281 /*[clinic input]
282 InstructionSequenceType.add_nested
283
284 nested: object
285
286 Add a nested sequence.
287 [clinic start generated code]*/
288
289 static PyObject *
InstructionSequenceType_add_nested_impl(_PyInstructionSequence * self,PyObject * nested)290 InstructionSequenceType_add_nested_impl(_PyInstructionSequence *self,
291 PyObject *nested)
292 /*[clinic end generated code: output=14540fad459f7971 input=f2c482568b3b3c0f]*/
293 {
294 if (!_PyInstructionSequence_Check(nested)) {
295 PyErr_Format(PyExc_TypeError,
296 "expected an instruction sequence, not %T",
297 Py_TYPE(nested));
298 return NULL;
299 }
300 if (_PyInstructionSequence_AddNested(self, (_PyInstructionSequence*)nested) < 0) {
301 return NULL;
302 }
303 Py_RETURN_NONE;
304 }
305
306 /*[clinic input]
307 InstructionSequenceType.get_nested
308
309 Add a nested sequence.
310 [clinic start generated code]*/
311
312 static PyObject *
InstructionSequenceType_get_nested_impl(_PyInstructionSequence * self)313 InstructionSequenceType_get_nested_impl(_PyInstructionSequence *self)
314 /*[clinic end generated code: output=f415112c292630cb input=e429e474c57b95b4]*/
315 {
316 if (self->s_nested == NULL) {
317 return PyList_New(0);
318 }
319 return Py_NewRef(self->s_nested);
320 }
321
322 /*[clinic input]
323 InstructionSequenceType.get_instructions
324
325 Return the instructions as a list of tuples or labels.
326 [clinic start generated code]*/
327
328 static PyObject *
InstructionSequenceType_get_instructions_impl(_PyInstructionSequence * self)329 InstructionSequenceType_get_instructions_impl(_PyInstructionSequence *self)
330 /*[clinic end generated code: output=23f4f3f894c301b3 input=fbadb5dadb611291]*/
331 {
332 if (_PyInstructionSequence_ApplyLabelMap(self) < 0) {
333 return NULL;
334 }
335 PyObject *instructions = PyList_New(0);
336 if (instructions == NULL) {
337 return NULL;
338 }
339 for (int i = 0; i < self->s_used; i++) {
340 instruction *instr = &self->s_instrs[i];
341 location loc = instr->i_loc;
342 PyObject *inst_tuple;
343
344 if (OPCODE_HAS_ARG(instr->i_opcode)) {
345 inst_tuple = Py_BuildValue(
346 "(iiiiii)", instr->i_opcode, instr->i_oparg,
347 loc.lineno, loc.end_lineno,
348 loc.col_offset, loc.end_col_offset);
349 }
350 else {
351 inst_tuple = Py_BuildValue(
352 "(iOiiii)", instr->i_opcode, Py_None,
353 loc.lineno, loc.end_lineno,
354 loc.col_offset, loc.end_col_offset);
355 }
356 if (inst_tuple == NULL) {
357 goto error;
358 }
359
360 int res = PyList_Append(instructions, inst_tuple);
361 Py_DECREF(inst_tuple);
362 if (res != 0) {
363 goto error;
364 }
365 }
366 return instructions;
367 error:
368 Py_XDECREF(instructions);
369 return NULL;
370 }
371
372 static PyMethodDef inst_seq_methods[] = {
373 INSTRUCTIONSEQUENCETYPE_ADDOP_METHODDEF
374 INSTRUCTIONSEQUENCETYPE_NEW_LABEL_METHODDEF
375 INSTRUCTIONSEQUENCETYPE_USE_LABEL_METHODDEF
376 INSTRUCTIONSEQUENCETYPE_ADD_NESTED_METHODDEF
377 INSTRUCTIONSEQUENCETYPE_GET_NESTED_METHODDEF
378 INSTRUCTIONSEQUENCETYPE_GET_INSTRUCTIONS_METHODDEF
379 {NULL, NULL, 0, NULL},
380 };
381
382 static PyMemberDef inst_seq_memberlist[] = {
383 {NULL} /* Sentinel */
384 };
385
386 static PyGetSetDef inst_seq_getsetters[] = {
387 {NULL} /* Sentinel */
388 };
389
390 static void
inst_seq_dealloc(_PyInstructionSequence * seq)391 inst_seq_dealloc(_PyInstructionSequence *seq)
392 {
393 PyObject_GC_UnTrack(seq);
394 Py_TRASHCAN_BEGIN(seq, inst_seq_dealloc)
395 PyInstructionSequence_Fini(seq);
396 PyObject_GC_Del(seq);
397 Py_TRASHCAN_END
398 }
399
400 static int
inst_seq_traverse(_PyInstructionSequence * seq,visitproc visit,void * arg)401 inst_seq_traverse(_PyInstructionSequence *seq, visitproc visit, void *arg)
402 {
403 Py_VISIT(seq->s_nested);
404 return 0;
405 }
406
407 static int
inst_seq_clear(_PyInstructionSequence * seq)408 inst_seq_clear(_PyInstructionSequence *seq)
409 {
410 Py_CLEAR(seq->s_nested);
411 return 0;
412 }
413
414 PyTypeObject _PyInstructionSequence_Type = {
415 PyVarObject_HEAD_INIT(&PyType_Type, 0)
416 "InstructionSequence",
417 sizeof(_PyInstructionSequence),
418 0,
419 (destructor)inst_seq_dealloc, /*tp_dealloc*/
420 0, /*tp_vectorcall_offset*/
421 0, /*tp_getattr*/
422 0, /*tp_setattr*/
423 0, /*tp_as_async*/
424 0, /*tp_repr*/
425 0, /*tp_as_number*/
426 0, /*tp_as_sequence*/
427 0, /*tp_as_mapping*/
428 0, /* tp_hash */
429 0, /* tp_call */
430 0, /* tp_str */
431 PyObject_GenericGetAttr, /* tp_getattro */
432 0, /* tp_setattro */
433 0, /* tp_as_buffer */
434 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
435 inst_seq_new__doc__, /* tp_doc */
436 (traverseproc)inst_seq_traverse, /* tp_traverse */
437 (inquiry)inst_seq_clear, /* tp_clear */
438 0, /* tp_richcompare */
439 0, /* tp_weaklistoffset */
440 0, /* tp_iter */
441 0, /* tp_iternext */
442 inst_seq_methods, /* tp_methods */
443 inst_seq_memberlist, /* tp_members */
444 inst_seq_getsetters, /* tp_getset */
445 0, /* tp_base */
446 0, /* tp_dict */
447 0, /* tp_descr_get */
448 0, /* tp_descr_set */
449 0, /* tp_dictoffset */
450 0, /* tp_init */
451 0, /* tp_alloc */
452 inst_seq_new, /* tp_new */
453 };
454