• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Linearize - walk the statement tree (but _not_ the expressions)
3  * to generate a linear version of it and the basic blocks.
4  *
5  * NOTE! We're not interested in the actual sub-expressions yet,
6  * even though they can generate conditional branches and
7  * subroutine calls. That's all "local" behaviour.
8  *
9  * Copyright (C) 2004 Linus Torvalds
10  * Copyright (C) 2004 Christopher Li
11  */
12 
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <assert.h>
18 
19 #include "parse.h"
20 #include "expression.h"
21 #include "linearize.h"
22 #include "optimize.h"
23 #include "flow.h"
24 #include "target.h"
25 
26 static pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
27 static pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
28 
29 static pseudo_t add_cast(struct entrypoint *ep, struct symbol *to, struct symbol *from, int op, pseudo_t src);
30 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right);
31 static pseudo_t linearize_one_symbol(struct entrypoint *ep, struct symbol *sym);
32 
33 static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to);
34 
35 struct pseudo void_pseudo = {};
36 
37 static struct position current_pos;
38 
39 ALLOCATOR(pseudo_user, "pseudo_user");
40 
alloc_instruction(int opcode,int size)41 static struct instruction *alloc_instruction(int opcode, int size)
42 {
43 	struct instruction * insn = __alloc_instruction(0);
44 	insn->opcode = opcode;
45 	insn->size = size;
46 	insn->pos = current_pos;
47 	return insn;
48 }
49 
type_size(struct symbol * type)50 static inline int type_size(struct symbol *type)
51 {
52 	return type ? type->bit_size > 0 ? type->bit_size : 0 : 0;
53 }
54 
alloc_typed_instruction(int opcode,struct symbol * type)55 static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type)
56 {
57 	struct instruction *insn = alloc_instruction(opcode, type_size(type));
58 	insn->type = type;
59 	return insn;
60 }
61 
alloc_entrypoint(void)62 static struct entrypoint *alloc_entrypoint(void)
63 {
64 	return __alloc_entrypoint(0);
65 }
66 
alloc_basic_block(struct entrypoint * ep,struct position pos)67 static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos)
68 {
69 	static int nr;
70 	struct basic_block *bb = __alloc_basic_block(0);
71 	bb->pos = pos;
72 	bb->ep = ep;
73 	bb->nr = nr++;
74 	return bb;
75 }
76 
alloc_multijmp(struct basic_block * target,long long begin,long long end)77 static struct multijmp *alloc_multijmp(struct basic_block *target, long long begin, long long end)
78 {
79 	struct multijmp *multijmp = __alloc_multijmp(0);
80 	multijmp->target = target;
81 	multijmp->begin = begin;
82 	multijmp->end = end;
83 	return multijmp;
84 }
85 
show_label(struct basic_block * bb)86 const char *show_label(struct basic_block *bb)
87 {
88 	static int n;
89 	static char buffer[4][16];
90 	char *buf = buffer[3 & ++n];
91 
92 	if (!bb)
93 		return ".L???";
94 	snprintf(buf, 64, ".L%u", bb->nr);
95 	return buf;
96 }
97 
show_pseudo(pseudo_t pseudo)98 const char *show_pseudo(pseudo_t pseudo)
99 {
100 	static int n;
101 	static char buffer[4][64];
102 	char *buf;
103 	int i;
104 
105 	if (!pseudo)
106 		return "no pseudo";
107 	if (pseudo == VOID)
108 		return "VOID";
109 	buf = buffer[3 & ++n];
110 	switch(pseudo->type) {
111 	case PSEUDO_SYM: {
112 		struct symbol *sym = pseudo->sym;
113 		struct expression *expr;
114 
115 		if (!sym) {
116 			snprintf(buf, 64, "<bad symbol>");
117 			break;
118 		}
119 		if (sym->bb_target) {
120 			snprintf(buf, 64, "%s", show_label(sym->bb_target));
121 			break;
122 		}
123 		if (sym->ident) {
124 			snprintf(buf, 64, "%s", show_ident(sym->ident));
125 			break;
126 		}
127 		expr = sym->initializer;
128 		snprintf(buf, 64, "<anon symbol:%p>", verbose ? sym : NULL);
129 		if (expr) {
130 			switch (expr->type) {
131 			case EXPR_VALUE:
132 				snprintf(buf, 64, "<symbol value: %lld>", expr->value);
133 				break;
134 			case EXPR_STRING:
135 				return show_string(expr->string);
136 			default:
137 				break;
138 			}
139 		}
140 		break;
141 	}
142 	case PSEUDO_REG:
143 		i = snprintf(buf, 64, "%%r%d", pseudo->nr);
144 		if (pseudo->ident)
145 			sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
146 		break;
147 	case PSEUDO_VAL: {
148 		long long value = pseudo->value;
149 		if (value > 1000 || value < -1000)
150 			snprintf(buf, 64, "$%#llx", value);
151 		else
152 			snprintf(buf, 64, "$%lld", value);
153 		break;
154 	}
155 	case PSEUDO_ARG:
156 		snprintf(buf, 64, "%%arg%d", pseudo->nr);
157 		break;
158 	case PSEUDO_PHI:
159 		i = snprintf(buf, 64, "%%phi%d", pseudo->nr);
160 		if (pseudo->ident)
161 			sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
162 		break;
163 	case PSEUDO_UNDEF:
164 		return "UNDEF";
165 	default:
166 		snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type);
167 	}
168 	return buf;
169 }
170 
171 static const char *opcodes[] = {
172 	[OP_BADOP] = "bad_op",
173 
174 	/* Fn entrypoint */
175 	[OP_ENTRY] = "<entry-point>",
176 
177 	/* Terminator */
178 	[OP_RET] = "ret",
179 	[OP_BR] = "br",
180 	[OP_CBR] = "cbr",
181 	[OP_SWITCH] = "switch",
182 	[OP_UNREACH] = "unreachable",
183 	[OP_COMPUTEDGOTO] = "jmp *",
184 
185 	/* Binary */
186 	[OP_ADD] = "add",
187 	[OP_SUB] = "sub",
188 	[OP_MUL] = "mul",
189 	[OP_DIVU] = "divu",
190 	[OP_DIVS] = "divs",
191 	[OP_MODU] = "modu",
192 	[OP_MODS] = "mods",
193 	[OP_SHL] = "shl",
194 	[OP_LSR] = "lsr",
195 	[OP_ASR] = "asr",
196 
197 	/* Floating-point Binary */
198 	[OP_FADD] = "fadd",
199 	[OP_FSUB] = "fsub",
200 	[OP_FMUL] = "fmul",
201 	[OP_FDIV] = "fdiv",
202 
203 	/* Logical */
204 	[OP_AND] = "and",
205 	[OP_OR] = "or",
206 	[OP_XOR] = "xor",
207 
208 	/* Binary comparison */
209 	[OP_SET_EQ] = "seteq",
210 	[OP_SET_NE] = "setne",
211 	[OP_SET_LE] = "setle",
212 	[OP_SET_GE] = "setge",
213 	[OP_SET_LT] = "setlt",
214 	[OP_SET_GT] = "setgt",
215 	[OP_SET_B] = "setb",
216 	[OP_SET_A] = "seta",
217 	[OP_SET_BE] = "setbe",
218 	[OP_SET_AE] = "setae",
219 
220 	/* floating-point comparison */
221 	[OP_FCMP_ORD] = "fcmpord",
222 	[OP_FCMP_OEQ] = "fcmpoeq",
223 	[OP_FCMP_ONE] = "fcmpone",
224 	[OP_FCMP_OLE] = "fcmpole",
225 	[OP_FCMP_OGE] = "fcmpoge",
226 	[OP_FCMP_OLT] = "fcmpolt",
227 	[OP_FCMP_OGT] = "fcmpogt",
228 	[OP_FCMP_UEQ] = "fcmpueq",
229 	[OP_FCMP_UNE] = "fcmpune",
230 	[OP_FCMP_ULE] = "fcmpule",
231 	[OP_FCMP_UGE] = "fcmpuge",
232 	[OP_FCMP_ULT] = "fcmpult",
233 	[OP_FCMP_UGT] = "fcmpugt",
234 	[OP_FCMP_UNO] = "fcmpuno",
235 
236 	/* Uni */
237 	[OP_NOT] = "not",
238 	[OP_NEG] = "neg",
239 	[OP_FNEG] = "fneg",
240 
241 	/* Special three-input */
242 	[OP_SEL] = "select",
243 	[OP_FMADD] = "fmadd",
244 
245 	/* Memory */
246 	[OP_LOAD] = "load",
247 	[OP_STORE] = "store",
248 	[OP_LABEL] = "label",
249 	[OP_SETVAL] = "set",
250 	[OP_SETFVAL] = "setfval",
251 	[OP_SYMADDR] = "symaddr",
252 
253 	/* Other */
254 	[OP_PHI] = "phi",
255 	[OP_PHISOURCE] = "phisrc",
256 	[OP_SEXT] = "sext",
257 	[OP_ZEXT] = "zext",
258 	[OP_TRUNC] = "trunc",
259 	[OP_FCVTU] = "fcvtu",
260 	[OP_FCVTS] = "fcvts",
261 	[OP_UCVTF] = "ucvtf",
262 	[OP_SCVTF] = "scvtf",
263 	[OP_FCVTF] = "fcvtf",
264 	[OP_UTPTR] = "utptr",
265 	[OP_PTRTU] = "ptrtu",
266 	[OP_PTRCAST] = "ptrcast",
267 	[OP_INLINED_CALL] = "# call",
268 	[OP_CALL] = "call",
269 	[OP_SLICE] = "slice",
270 	[OP_NOP] = "nop",
271 	[OP_DEATHNOTE] = "dead",
272 	[OP_ASM] = "asm",
273 
274 	/* Sparse tagging (line numbers, context, whatever) */
275 	[OP_CONTEXT] = "context",
276 	[OP_RANGE] = "range-check",
277 
278 	[OP_COPY] = "copy",
279 };
280 
show_asm_constraints(char * buf,const char * sep,struct asm_constraint_list * list)281 static char *show_asm_constraints(char *buf, const char *sep, struct asm_constraint_list *list)
282 {
283 	struct asm_constraint *entry;
284 
285 	FOR_EACH_PTR(list, entry) {
286 		buf += sprintf(buf, "%s\"%s\"", sep, entry->constraint);
287 		if (entry->pseudo)
288 			buf += sprintf(buf, " (%s)", show_pseudo(entry->pseudo));
289 		if (entry->ident)
290 			buf += sprintf(buf, " [%s]", show_ident(entry->ident));
291 		sep = ", ";
292 	} END_FOR_EACH_PTR(entry);
293 	return buf;
294 }
295 
show_asm(char * buf,struct instruction * insn)296 static char *show_asm(char *buf, struct instruction *insn)
297 {
298 	struct asm_rules *rules = insn->asm_rules;
299 
300 	buf += sprintf(buf, "\"%s\"", insn->string);
301 	buf = show_asm_constraints(buf, "\n\t\tout: ", rules->outputs);
302 	buf = show_asm_constraints(buf, "\n\t\tin: ", rules->inputs);
303 	buf = show_asm_constraints(buf, "\n\t\tclobber: ", rules->clobbers);
304 	return buf;
305 }
306 
show_instruction(struct instruction * insn)307 const char *show_instruction(struct instruction *insn)
308 {
309 	int opcode = insn->opcode;
310 	static char buffer[4096];
311 	char *buf;
312 
313 	buf = buffer;
314 	if (!insn->bb)
315 		buf += sprintf(buf, "# ");
316 
317 	if (opcode < ARRAY_SIZE(opcodes)) {
318 		const char *op = opcodes[opcode];
319 		if (!op)
320 			buf += sprintf(buf, "opcode:%d", opcode);
321 		else
322 			buf += sprintf(buf, "%s", op);
323 		if (insn->size)
324 			buf += sprintf(buf, ".%d", insn->size);
325 		memset(buf, ' ', 20);
326 		buf++;
327 	}
328 
329 	if (buf < buffer + 12)
330 		buf = buffer + 12;
331 	switch (opcode) {
332 	case OP_RET:
333 		if (insn->src && insn->src != VOID)
334 			buf += sprintf(buf, "%s", show_pseudo(insn->src));
335 		break;
336 
337 	case OP_CBR:
338 		buf += sprintf(buf, "%s, %s, %s", show_pseudo(insn->cond), show_label(insn->bb_true), show_label(insn->bb_false));
339 		break;
340 
341 	case OP_BR:
342 		buf += sprintf(buf, "%s", show_label(insn->bb_true));
343 		break;
344 
345 	case OP_LABEL:
346 		buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
347 		buf += sprintf(buf, "%s", show_label(insn->bb_true));
348 		break;
349 
350 	case OP_SETVAL: {
351 		struct expression *expr = insn->val;
352 		buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
353 
354 		if (!expr) {
355 			buf += sprintf(buf, "%s", "<none>");
356 			break;
357 		}
358 
359 		switch (expr->type) {
360 		case EXPR_VALUE:
361 			buf += sprintf(buf, "%lld", expr->value);
362 			break;
363 		case EXPR_FVALUE:
364 			buf += sprintf(buf, "%Le", expr->fvalue);
365 			break;
366 		case EXPR_STRING:
367 			buf += sprintf(buf, "%.40s", show_string(expr->string));
368 			break;
369 		case EXPR_SYMBOL:
370 			buf += sprintf(buf, "%s", show_ident(expr->symbol->ident));
371 			break;
372 		case EXPR_LABEL:
373 			buf += sprintf(buf, "%s", show_label(expr->symbol->bb_target));
374 			break;
375 		default:
376 			buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type);
377 		}
378 		break;
379 	}
380 	case OP_SETFVAL:
381 		buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
382 		buf += sprintf(buf, "%Le", insn->fvalue);
383 		break;
384 
385 	case OP_SWITCH: {
386 		struct multijmp *jmp;
387 		buf += sprintf(buf, "%s", show_pseudo(insn->cond));
388 		FOR_EACH_PTR(insn->multijmp_list, jmp) {
389 			if (jmp->begin == jmp->end)
390 				buf += sprintf(buf, ", %lld -> %s", jmp->begin, show_label(jmp->target));
391 			else if (jmp->begin < jmp->end)
392 				buf += sprintf(buf, ", %lld ... %lld -> %s", jmp->begin, jmp->end, show_label(jmp->target));
393 			else
394 				buf += sprintf(buf, ", default -> %s", show_label(jmp->target));
395 		} END_FOR_EACH_PTR(jmp);
396 		break;
397 	}
398 	case OP_COMPUTEDGOTO: {
399 		struct multijmp *jmp;
400 		buf += sprintf(buf, "%s", show_pseudo(insn->src));
401 		FOR_EACH_PTR(insn->multijmp_list, jmp) {
402 			buf += sprintf(buf, ", %s", show_label(jmp->target));
403 		} END_FOR_EACH_PTR(jmp);
404 		break;
405 	}
406 	case OP_UNREACH:
407 		break;
408 
409 	case OP_PHISOURCE: {
410 		buf += sprintf(buf, "%s <- %s    ", show_pseudo(insn->target), show_pseudo(insn->phi_src));
411 		break;
412 	}
413 
414 	case OP_PHI: {
415 		pseudo_t phi;
416 		const char *s = " <-";
417 		buf += sprintf(buf, "%s", show_pseudo(insn->target));
418 		FOR_EACH_PTR(insn->phi_list, phi) {
419 			if (phi == VOID && !verbose)
420 				continue;
421 			buf += sprintf(buf, "%s %s", s, show_pseudo(phi));
422 			s = ",";
423 		} END_FOR_EACH_PTR(phi);
424 		break;
425 	}
426 	case OP_LOAD:
427 		buf += sprintf(buf, "%s <- %lld[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
428 		break;
429 	case OP_STORE:
430 		buf += sprintf(buf, "%s -> %lld[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
431 		break;
432 	case OP_INLINED_CALL:
433 	case OP_CALL: {
434 		struct pseudo *arg;
435 		if (insn->target && insn->target != VOID)
436 			buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
437 		buf += sprintf(buf, "%s", show_pseudo(insn->func));
438 		FOR_EACH_PTR(insn->arguments, arg) {
439 			buf += sprintf(buf, ", %s", show_pseudo(arg));
440 		} END_FOR_EACH_PTR(arg);
441 		break;
442 	}
443 	case OP_SEXT: case OP_ZEXT:
444 	case OP_TRUNC:
445 	case OP_FCVTU: case OP_FCVTS:
446 	case OP_UCVTF: case OP_SCVTF:
447 	case OP_FCVTF:
448 	case OP_UTPTR:
449 	case OP_PTRTU:
450 	case OP_PTRCAST:
451 		buf += sprintf(buf, "%s <- (%d) %s",
452 			show_pseudo(insn->target),
453 			type_size(insn->orig_type),
454 			show_pseudo(insn->src));
455 		break;
456 	case OP_BINARY ... OP_BINARY_END:
457 	case OP_FPCMP ... OP_FPCMP_END:
458 	case OP_BINCMP ... OP_BINCMP_END:
459 		buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2));
460 		break;
461 
462 	case OP_SEL:
463 	case OP_FMADD:
464 		buf += sprintf(buf, "%s <- %s, %s, %s", show_pseudo(insn->target),
465 			show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
466 		break;
467 
468 	case OP_SLICE:
469 		buf += sprintf(buf, "%s <- (%d) %s, %d", show_pseudo(insn->target), type_size(insn->orig_type), show_pseudo(insn->src), insn->from);
470 		break;
471 
472 	case OP_NOT: case OP_NEG:
473 	case OP_FNEG:
474 	case OP_SYMADDR:
475 		buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
476 		break;
477 
478 	case OP_CONTEXT:
479 		buf += sprintf(buf, "%s%d", insn->check ? "check: " : "", insn->increment);
480 		break;
481 	case OP_RANGE:
482 		buf += sprintf(buf, "%s between %s..%s", show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
483 		break;
484 	case OP_NOP:
485 		buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
486 		break;
487 	case OP_DEATHNOTE:
488 		buf += sprintf(buf, "%s", show_pseudo(insn->target));
489 		break;
490 	case OP_ASM:
491 		buf = show_asm(buf, insn);
492 		break;
493 	case OP_COPY:
494 		buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src));
495 		break;
496 	default:
497 		break;
498 	}
499 
500 	if (buf >= buffer + sizeof(buffer))
501 		die("instruction buffer overflowed %td\n", buf - buffer);
502 	do { --buf; } while (*buf == ' ');
503 	*++buf = 0;
504 	return buffer;
505 }
506 
show_bb(struct basic_block * bb)507 void show_bb(struct basic_block *bb)
508 {
509 	struct instruction *insn;
510 
511 	printf("%s:\n", show_label(bb));
512 	if (verbose) {
513 		pseudo_t needs, defines;
514 		printf("%s:%d\n", stream_name(bb->pos.stream), bb->pos.line);
515 
516 		FOR_EACH_PTR(bb->needs, needs) {
517 			struct instruction *def = needs->def;
518 			if (def->opcode != OP_PHI) {
519 				printf("  **uses %s (from %s)**\n", show_pseudo(needs), show_label(def->bb));
520 			} else {
521 				pseudo_t phi;
522 				const char *sep = " ";
523 				printf("  **uses %s (from", show_pseudo(needs));
524 				FOR_EACH_PTR(def->phi_list, phi) {
525 					if (phi == VOID)
526 						continue;
527 					printf("%s(%s:%s)", sep, show_pseudo(phi), show_label(phi->def->bb));
528 					sep = ", ";
529 				} END_FOR_EACH_PTR(phi);
530 				printf(")**\n");
531 			}
532 		} END_FOR_EACH_PTR(needs);
533 
534 		FOR_EACH_PTR(bb->defines, defines) {
535 			printf("  **defines %s **\n", show_pseudo(defines));
536 		} END_FOR_EACH_PTR(defines);
537 
538 		if (bb->parents) {
539 			struct basic_block *from;
540 			FOR_EACH_PTR(bb->parents, from) {
541 				printf("  **from %s (%s:%d:%d)**\n", show_label(from),
542 					stream_name(from->pos.stream), from->pos.line, from->pos.pos);
543 			} END_FOR_EACH_PTR(from);
544 		}
545 
546 		if (bb->children) {
547 			struct basic_block *to;
548 			FOR_EACH_PTR(bb->children, to) {
549 				printf("  **to %s (%s:%d:%d)**\n", show_label(to),
550 					stream_name(to->pos.stream), to->pos.line, to->pos.pos);
551 			} END_FOR_EACH_PTR(to);
552 		}
553 	}
554 
555 	FOR_EACH_PTR(bb->insns, insn) {
556 		if (!insn->bb && verbose < 2)
557 			continue;
558 		printf("\t%s\n", show_instruction(insn));
559 	} END_FOR_EACH_PTR(insn);
560 	if (!bb_terminated(bb))
561 		printf("\tEND\n");
562 }
563 
564 ///
565 // show BB of non-removed instruction
show_insn_bb(struct instruction * insn)566 void show_insn_bb(struct instruction *insn)
567 {
568 	if (!insn || !insn->bb)
569 		return;
570 	show_bb(insn->bb);
571 }
572 
show_symbol_usage(pseudo_t pseudo)573 static void show_symbol_usage(pseudo_t pseudo)
574 {
575 	struct pseudo_user *pu;
576 
577 	if (pseudo) {
578 		FOR_EACH_PTR(pseudo->users, pu) {
579 			printf("\t%s\n", show_instruction(pu->insn));
580 		} END_FOR_EACH_PTR(pu);
581 	}
582 }
583 
show_entry(struct entrypoint * ep)584 void show_entry(struct entrypoint *ep)
585 {
586 	struct symbol *sym;
587 	struct basic_block *bb;
588 
589 	printf("%s:\n", show_ident(ep->name->ident));
590 
591 	if (verbose) {
592 		printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
593 
594 		FOR_EACH_PTR(ep->syms, sym) {
595 			if (!sym->pseudo)
596 				continue;
597 			if (!sym->pseudo->users)
598 				continue;
599 			printf("   sym: %p %s\n", sym, show_ident(sym->ident));
600 			if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
601 				printf("\texternal visibility\n");
602 			show_symbol_usage(sym->pseudo);
603 		} END_FOR_EACH_PTR(sym);
604 
605 		printf("\n");
606 	}
607 
608 	FOR_EACH_PTR(ep->bbs, bb) {
609 		if (!bb)
610 			continue;
611 		if (!bb->parents && !bb->children && !bb->insns && verbose < 2)
612 			continue;
613 		show_bb(bb);
614 		printf("\n");
615 	} END_FOR_EACH_PTR(bb);
616 
617 	printf("\n");
618 }
619 
620 ///
621 // show the function containing the instruction but only if not already removed.
show_insn_entry(struct instruction * insn)622 void show_insn_entry(struct instruction *insn)
623 {
624 	if (!insn || !insn->bb || !insn->bb->ep)
625 		return;
626 	show_entry(insn->bb->ep);
627 }
628 
bind_label(struct symbol * label,struct basic_block * bb,struct position pos)629 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
630 {
631 	if (label->bb_target)
632 		warning(pos, "label '%s' already bound", show_ident(label->ident));
633 	label->bb_target = bb;
634 }
635 
get_bound_block(struct entrypoint * ep,struct symbol * label)636 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
637 {
638 	struct basic_block *bb = label->bb_target;
639 
640 	if (!bb) {
641 		bb = alloc_basic_block(ep, label->pos);
642 		label->bb_target = bb;
643 	}
644 	return bb;
645 }
646 
finish_block(struct entrypoint * ep)647 static void finish_block(struct entrypoint *ep)
648 {
649 	struct basic_block *src = ep->active;
650 	if (bb_reachable(src))
651 		ep->active = NULL;
652 }
653 
add_goto(struct entrypoint * ep,struct basic_block * dst)654 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
655 {
656 	struct basic_block *src = ep->active;
657 	if (bb_reachable(src)) {
658 		struct instruction *br = alloc_instruction(OP_BR, 0);
659 		br->bb_true = dst;
660 		add_bb(&dst->parents, src);
661 		add_bb(&src->children, dst);
662 		br->bb = src;
663 		add_instruction(&src->insns, br);
664 		ep->active = NULL;
665 	}
666 }
667 
add_one_insn(struct entrypoint * ep,struct instruction * insn)668 static void add_one_insn(struct entrypoint *ep, struct instruction *insn)
669 {
670 	struct basic_block *bb = ep->active;
671 
672 	if (bb_reachable(bb)) {
673 		insn->bb = bb;
674 		add_instruction(&bb->insns, insn);
675 	}
676 }
677 
add_unreachable(struct entrypoint * ep)678 static void add_unreachable(struct entrypoint *ep)
679 {
680 	struct instruction *insn = alloc_instruction(OP_UNREACH, 0);
681 	add_one_insn(ep, insn);
682 	ep->active = NULL;
683 }
684 
set_activeblock(struct entrypoint * ep,struct basic_block * bb)685 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
686 {
687 	if (!bb_terminated(ep->active))
688 		add_goto(ep, bb);
689 
690 	ep->active = bb;
691 	if (bb_reachable(bb))
692 		add_bb(&ep->bbs, bb);
693 }
694 
insert_select(struct basic_block * bb,struct instruction * br,struct instruction * phi_node,pseudo_t if_true,pseudo_t if_false)695 void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t if_true, pseudo_t if_false)
696 {
697 	pseudo_t target;
698 	struct instruction *select;
699 
700 	select = alloc_typed_instruction(OP_SEL, phi_node->type);
701 
702 	assert(br->cond);
703 	use_pseudo(select, br->cond, &select->src1);
704 
705 	target = phi_node->target;
706 	assert(target->def == phi_node);
707 	select->target = target;
708 	target->def = select;
709 
710 	use_pseudo(select, if_true, &select->src2);
711 	use_pseudo(select, if_false, &select->src3);
712 
713 	insert_last_instruction(bb, select);
714 }
715 
bb_empty(struct basic_block * bb)716 static inline int bb_empty(struct basic_block *bb)
717 {
718 	return !bb->insns;
719 }
720 
721 /* Add a label to the currently active block, return new active block */
add_label(struct entrypoint * ep,struct symbol * label)722 static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label)
723 {
724 	struct basic_block *bb = label->bb_target;
725 
726 	if (bb) {
727 		set_activeblock(ep, bb);
728 		return bb;
729 	}
730 	bb = ep->active;
731 	if (!bb_reachable(bb) || !bb_empty(bb)) {
732 		bb = alloc_basic_block(ep, label->pos);
733 		set_activeblock(ep, bb);
734 	}
735 	label->bb_target = bb;
736 	return bb;
737 }
738 
add_branch(struct entrypoint * ep,pseudo_t cond,struct basic_block * bb_true,struct basic_block * bb_false)739 static void add_branch(struct entrypoint *ep, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
740 {
741 	struct basic_block *bb = ep->active;
742 	struct instruction *br;
743 
744 	if (bb_reachable(bb)) {
745 		br = alloc_instruction(OP_CBR, 0);
746 		use_pseudo(br, cond, &br->cond);
747 		br->bb_true = bb_true;
748 		br->bb_false = bb_false;
749 		add_bb(&bb_true->parents, bb);
750 		add_bb(&bb_false->parents, bb);
751 		add_bb(&bb->children, bb_true);
752 		add_bb(&bb->children, bb_false);
753 		add_one_insn(ep, br);
754 	}
755 }
756 
alloc_pseudo(struct instruction * def)757 pseudo_t alloc_pseudo(struct instruction *def)
758 {
759 	static int nr = 0;
760 	struct pseudo * pseudo = __alloc_pseudo(0);
761 	pseudo->type = PSEUDO_REG;
762 	pseudo->nr = ++nr;
763 	pseudo->def = def;
764 	return pseudo;
765 }
766 
symbol_pseudo(struct entrypoint * ep,struct symbol * sym)767 static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym)
768 {
769 	pseudo_t pseudo;
770 
771 	if (!sym)
772 		return VOID;
773 
774 	pseudo = sym->pseudo;
775 	if (!pseudo) {
776 		pseudo = __alloc_pseudo(0);
777 		pseudo->nr = -1;
778 		pseudo->type = PSEUDO_SYM;
779 		pseudo->sym = sym;
780 		pseudo->ident = sym->ident;
781 		sym->pseudo = pseudo;
782 		add_pseudo(&ep->accesses, pseudo);
783 	}
784 	/* Symbol pseudos have neither nr nor def */
785 	return pseudo;
786 }
787 
value_pseudo(long long val)788 pseudo_t value_pseudo(long long val)
789 {
790 #define MAX_VAL_HASH 64
791 	static struct pseudo_list *prev[MAX_VAL_HASH];
792 	int hash = val & (MAX_VAL_HASH-1);
793 	struct pseudo_list **list = prev + hash;
794 	pseudo_t pseudo;
795 
796 	FOR_EACH_PTR(*list, pseudo) {
797 		if (pseudo->value == val)
798 			return pseudo;
799 	} END_FOR_EACH_PTR(pseudo);
800 
801 	pseudo = __alloc_pseudo(0);
802 	pseudo->type = PSEUDO_VAL;
803 	pseudo->value = val;
804 	add_pseudo(list, pseudo);
805 
806 	/* Value pseudos have neither nr, usage nor def */
807 	return pseudo;
808 }
809 
undef_pseudo(void)810 pseudo_t undef_pseudo(void)
811 {
812 	pseudo_t pseudo = __alloc_pseudo(0);
813 	pseudo->type = PSEUDO_UNDEF;
814 	return pseudo;
815 }
816 
argument_pseudo(struct entrypoint * ep,int nr)817 static pseudo_t argument_pseudo(struct entrypoint *ep, int nr)
818 {
819 	pseudo_t pseudo = __alloc_pseudo(0);
820 	struct instruction *entry = ep->entry;
821 
822 	pseudo->type = PSEUDO_ARG;
823 	pseudo->nr = nr;
824 	pseudo->def = entry;
825 	add_pseudo(&entry->arg_list, pseudo);
826 
827 	/* Argument pseudos have neither usage nor def */
828 	return pseudo;
829 }
830 
alloc_phisrc(pseudo_t pseudo,struct symbol * type)831 struct instruction *alloc_phisrc(pseudo_t pseudo, struct symbol *type)
832 {
833 	struct instruction *insn = alloc_typed_instruction(OP_PHISOURCE, type);
834 	pseudo_t phi = __alloc_pseudo(0);
835 	static int nr = 0;
836 
837 	phi->type = PSEUDO_PHI;
838 	phi->nr = ++nr;
839 	phi->def = insn;
840 
841 	use_pseudo(insn, pseudo, &insn->phi_src);
842 	insn->target = phi;
843 	return insn;
844 }
845 
alloc_phi(struct basic_block * source,pseudo_t pseudo,struct symbol * type)846 pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, struct symbol *type)
847 {
848 	struct instruction *insn;
849 
850 	if (!source)
851 		return VOID;
852 
853 	insn = alloc_phisrc(pseudo, type);
854 	insn->bb = source;
855 	add_instruction(&source->insns, insn);
856 	return insn->target;
857 }
858 
alloc_phi_node(struct basic_block * bb,struct symbol * type,struct ident * ident)859 struct instruction *alloc_phi_node(struct basic_block *bb, struct symbol *type, struct ident *ident)
860 {
861 	struct instruction *phi_node = alloc_typed_instruction(OP_PHI, type);
862 	pseudo_t phi;
863 
864 	phi = alloc_pseudo(phi_node);
865 	phi->ident = ident;
866 	phi->def = phi_node;
867 	phi_node->target = phi;
868 	phi_node->bb = bb;
869 	return phi_node;
870 }
871 
add_phi_node(struct basic_block * bb,struct instruction * phi_node)872 void add_phi_node(struct basic_block *bb, struct instruction *phi_node)
873 {
874 	struct instruction *insn;
875 
876 	FOR_EACH_PTR(bb->insns, insn) {
877 		enum opcode op = insn->opcode;
878 		if (op == OP_PHI)
879 			continue;
880 		INSERT_CURRENT(phi_node, insn);
881 		return;
882 	} END_FOR_EACH_PTR(insn);
883 
884 	// FIXME
885 	add_instruction(&bb->insns, phi_node);
886 }
887 
insert_phi_node(struct basic_block * bb,struct symbol * var)888 struct instruction *insert_phi_node(struct basic_block *bb, struct symbol *var)
889 {
890 	struct instruction *phi_node = alloc_phi_node(bb, var, var->ident);
891 	add_phi_node(bb, phi_node);
892 	return phi_node;
893 }
894 
895 /*
896  * We carry the "access_data" structure around for any accesses,
897  * which simplifies things a lot. It contains all the access
898  * information in one place.
899  */
900 struct access_data {
901 	struct symbol *type;		// ctype
902 	struct symbol *btype;		// base type of bitfields
903 	pseudo_t address;		// pseudo containing address ..
904 	long long offset;		// byte offset
905 };
906 
linearize_simple_address(struct entrypoint * ep,struct expression * addr,struct access_data * ad)907 static int linearize_simple_address(struct entrypoint *ep,
908 	struct expression *addr,
909 	struct access_data *ad)
910 {
911 	if (addr->type == EXPR_SYMBOL) {
912 		linearize_one_symbol(ep, addr->symbol);
913 		ad->address = symbol_pseudo(ep, addr->symbol);
914 		return 1;
915 	}
916 	if (addr->type == EXPR_BINOP) {
917 		if (addr->right->type == EXPR_VALUE) {
918 			if (addr->op == '+') {
919 				ad->offset += get_expression_value(addr->right);
920 				return linearize_simple_address(ep, addr->left, ad);
921 			}
922 		}
923 	}
924 	ad->address = linearize_expression(ep, addr);
925 	return 1;
926 }
927 
bitfield_base_type(struct symbol * sym)928 static struct symbol *bitfield_base_type(struct symbol *sym)
929 {
930 	struct symbol *base = sym;
931 
932 	if (sym) {
933 		if (sym->type == SYM_NODE)
934 			base = base->ctype.base_type;
935 		if (base->type == SYM_BITFIELD) {
936 			base = base->ctype.base_type;
937 			if (sym->packed) {
938 				int size = bits_to_bytes(sym->bit_offset + sym->bit_size);
939 				sym = __alloc_symbol(0);
940 				*sym = *base;
941 				sym->bit_size = bytes_to_bits(size);
942 				return sym;
943 			}
944 			return base;
945 		}
946 	}
947 	return sym;
948 }
949 
linearize_address_gen(struct entrypoint * ep,struct expression * expr,struct access_data * ad)950 static int linearize_address_gen(struct entrypoint *ep,
951 	struct expression *expr,
952 	struct access_data *ad)
953 {
954 	struct symbol *ctype = expr->ctype;
955 
956 	if (!ctype)
957 		return 0;
958 	ad->type = ctype;
959 	if (expr->type == EXPR_PREOP && expr->op == '*')
960 		return linearize_simple_address(ep, expr->unop, ad);
961 
962 	warning(expr->pos, "generating address of non-lvalue (%d)", expr->type);
963 	return 0;
964 }
965 
add_load(struct entrypoint * ep,struct access_data * ad)966 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad)
967 {
968 	struct instruction *insn;
969 	pseudo_t new;
970 
971 	if (!ep->active)
972 		return VOID;
973 
974 	insn = alloc_typed_instruction(OP_LOAD, ad->btype);
975 	new = alloc_pseudo(insn);
976 
977 	insn->target = new;
978 	insn->offset = ad->offset;
979 	insn->is_volatile = ad->type && (ad->type->ctype.modifiers & MOD_VOLATILE);
980 	use_pseudo(insn, ad->address, &insn->src);
981 	add_one_insn(ep, insn);
982 	return new;
983 }
984 
add_store(struct entrypoint * ep,struct access_data * ad,pseudo_t value)985 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value)
986 {
987 	struct basic_block *bb = ep->active;
988 	struct instruction *store;
989 
990 	if (!bb)
991 		return;
992 
993 	store = alloc_typed_instruction(OP_STORE, ad->btype);
994 	store->offset = ad->offset;
995 	store->is_volatile = ad->type && (ad->type->ctype.modifiers & MOD_VOLATILE);
996 	use_pseudo(store, value, &store->target);
997 	use_pseudo(store, ad->address, &store->src);
998 	add_one_insn(ep, store);
999 }
1000 
linearize_bitfield_insert(struct entrypoint * ep,pseudo_t ori,pseudo_t val,struct symbol * ctype,struct symbol * btype)1001 static pseudo_t linearize_bitfield_insert(struct entrypoint *ep,
1002 	pseudo_t ori, pseudo_t val, struct symbol *ctype, struct symbol *btype)
1003 {
1004 	unsigned int shift = ctype->bit_offset;
1005 	unsigned int size = ctype->bit_size;
1006 	unsigned long long mask = ((1ULL << size) - 1);
1007 	unsigned long long smask= bits_mask(btype->bit_size);
1008 
1009 	val = add_cast(ep, btype, ctype, OP_ZEXT, val);
1010 	if (shift) {
1011 		val = add_binary_op(ep, btype, OP_SHL, val, value_pseudo(shift));
1012 		mask <<= shift;
1013 	}
1014 	ori = add_binary_op(ep, btype, OP_AND, ori, value_pseudo(~mask & smask));
1015 	val = add_binary_op(ep, btype, OP_OR, ori, val);
1016 
1017 	return val;
1018 }
1019 
linearize_store_gen(struct entrypoint * ep,pseudo_t value,struct access_data * ad)1020 static pseudo_t linearize_store_gen(struct entrypoint *ep,
1021 		pseudo_t value,
1022 		struct access_data *ad)
1023 {
1024 	struct symbol *ctype = ad->type;
1025 	struct symbol *btype;
1026 	pseudo_t store = value;
1027 
1028 	if (!ep->active)
1029 		return VOID;
1030 
1031 	btype = ad->btype = bitfield_base_type(ctype);
1032 	if (type_size(btype) != type_size(ctype)) {
1033 		pseudo_t orig = add_load(ep, ad);
1034 		store = linearize_bitfield_insert(ep, orig, value, ctype, btype);
1035 	}
1036 	add_store(ep, ad, store);
1037 	return value;
1038 }
1039 
taint_undefined_behaviour(struct instruction * insn)1040 static void taint_undefined_behaviour(struct instruction *insn)
1041 {
1042 	pseudo_t src2;
1043 
1044 	switch (insn->opcode) {
1045 	case OP_LSR:
1046 	case OP_ASR:
1047 	case OP_SHL:
1048 		src2 = insn->src2;
1049 		if (src2->type != PSEUDO_VAL)
1050 			break;
1051 		if ((unsigned long long)src2->value >= insn->size)
1052 			insn->tainted = 1;
1053 		break;
1054 	}
1055 }
1056 
add_binary_op(struct entrypoint * ep,struct symbol * ctype,int op,pseudo_t left,pseudo_t right)1057 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right)
1058 {
1059 	struct instruction *insn = alloc_typed_instruction(op, ctype);
1060 	pseudo_t target = alloc_pseudo(insn);
1061 	insn->target = target;
1062 	use_pseudo(insn, left, &insn->src1);
1063 	use_pseudo(insn, right, &insn->src2);
1064 	add_one_insn(ep, insn);
1065 	return target;
1066 }
1067 
add_cmp_op(struct entrypoint * ep,struct symbol * ctype,int op,struct symbol * itype,pseudo_t left,pseudo_t right)1068 static pseudo_t add_cmp_op(struct entrypoint *ep, struct symbol *ctype, int op, struct symbol *itype, pseudo_t left, pseudo_t right)
1069 {
1070 	pseudo_t target = add_binary_op(ep, ctype, op, left, right);
1071 	target->def->itype = itype;
1072 	return target;
1073 }
1074 
add_setval(struct entrypoint * ep,struct symbol * ctype,struct expression * val)1075 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
1076 {
1077 	struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype);
1078 	pseudo_t target = alloc_pseudo(insn);
1079 	insn->target = target;
1080 	insn->val = val;
1081 	add_one_insn(ep, insn);
1082 	return target;
1083 }
1084 
add_setfval(struct entrypoint * ep,struct symbol * ctype,long double fval)1085 static pseudo_t add_setfval(struct entrypoint *ep, struct symbol *ctype, long double fval)
1086 {
1087 	struct instruction *insn = alloc_typed_instruction(OP_SETFVAL, ctype);
1088 	pseudo_t target = alloc_pseudo(insn);
1089 	insn->target = target;
1090 	insn->fvalue = fval;
1091 	add_one_insn(ep, insn);
1092 	return target;
1093 }
1094 
add_symbol_address(struct entrypoint * ep,struct expression * expr)1095 static pseudo_t add_symbol_address(struct entrypoint *ep, struct expression *expr)
1096 {
1097 	struct instruction *insn = alloc_typed_instruction(OP_SYMADDR, expr->ctype);
1098 	pseudo_t target = alloc_pseudo(insn);
1099 
1100 	insn->target = target;
1101 	use_pseudo(insn, symbol_pseudo(ep, expr->symbol), &insn->src);
1102 	add_one_insn(ep, insn);
1103 	return target;
1104 }
1105 
linearize_bitfield_extract(struct entrypoint * ep,pseudo_t val,struct symbol * ctype,struct symbol * btype)1106 static pseudo_t linearize_bitfield_extract(struct entrypoint *ep,
1107 		pseudo_t val, struct symbol *ctype, struct symbol *btype)
1108 {
1109 	unsigned int off = ctype->bit_offset;
1110 
1111 	if (off) {
1112 		pseudo_t shift = value_pseudo(off);
1113 		val = add_binary_op(ep, btype, OP_LSR, val, shift);
1114 	}
1115 	val = cast_pseudo(ep, val, btype, ctype);
1116 	return val;
1117 }
1118 
linearize_load_gen(struct entrypoint * ep,struct access_data * ad)1119 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad)
1120 {
1121 	struct symbol *ctype = ad->type;
1122 	struct symbol *btype;
1123 	pseudo_t new;
1124 
1125 	if (!ep->active)
1126 		return VOID;
1127 
1128 	btype = ad->btype = bitfield_base_type(ctype);
1129 	new = add_load(ep, ad);
1130 	if (ctype->bit_size != type_size(btype))
1131 		new = linearize_bitfield_extract(ep, new, ctype, btype);
1132 	return new;
1133 }
1134 
linearize_access(struct entrypoint * ep,struct expression * expr)1135 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
1136 {
1137 	struct access_data ad = { NULL, };
1138 	pseudo_t value;
1139 
1140 	if (!linearize_address_gen(ep, expr, &ad))
1141 		return VOID;
1142 	value = linearize_load_gen(ep, &ad);
1143 	return value;
1144 }
1145 
linearize_inc_dec(struct entrypoint * ep,struct expression * expr,int postop)1146 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
1147 {
1148 	struct access_data ad = { NULL, };
1149 	pseudo_t old, new, one;
1150 	int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
1151 
1152 	if (!linearize_address_gen(ep, expr->unop, &ad))
1153 		return VOID;
1154 
1155 	old = linearize_load_gen(ep, &ad);
1156 	op = opcode_float(op, expr->ctype);
1157 	if (is_float_type(expr->ctype))
1158 		one = add_setfval(ep, expr->ctype, expr->op_value);
1159 	else
1160 		one = value_pseudo(expr->op_value);
1161 	if (ad.btype != ad.type)
1162 		old = cast_pseudo(ep, old, ad.type, ad.btype);
1163 	new = add_binary_op(ep, ad.btype, op, old, one);
1164 	if (ad.btype != ad.type)
1165 		new = cast_pseudo(ep, new, ad.btype, ad.type);
1166 	linearize_store_gen(ep, new, &ad);
1167 	return postop ? old : new;
1168 }
1169 
add_unop(struct entrypoint * ep,struct symbol * ctype,int op,pseudo_t src)1170 static pseudo_t add_unop(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t src)
1171 {
1172 	struct instruction *insn = alloc_typed_instruction(op, ctype);
1173 	pseudo_t new = alloc_pseudo(insn);
1174 
1175 	insn->target = new;
1176 	use_pseudo(insn, src, &insn->src1);
1177 	add_one_insn(ep, insn);
1178 	return new;
1179 }
1180 
add_cast(struct entrypoint * ep,struct symbol * to,struct symbol * from,int op,pseudo_t src)1181 static pseudo_t add_cast(struct entrypoint *ep, struct symbol *to,
1182 			 struct symbol *from, int op, pseudo_t src)
1183 {
1184 	pseudo_t new = add_unop(ep, to, op, src);
1185 	new->def->orig_type = from;
1186 	return new;
1187 }
1188 
linearize_slice(struct entrypoint * ep,struct expression * expr)1189 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
1190 {
1191 	pseudo_t pre = linearize_expression(ep, expr->base);
1192 	struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype);
1193 	pseudo_t new = alloc_pseudo(insn);
1194 
1195 	insn->target = new;
1196 	insn->from = expr->r_bitpos;
1197 	insn->orig_type = expr->base->ctype;
1198 	use_pseudo(insn, pre, &insn->src);
1199 	add_one_insn(ep, insn);
1200 	return new;
1201 }
1202 
linearize_regular_preop(struct entrypoint * ep,struct expression * expr)1203 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
1204 {
1205 	pseudo_t pre = linearize_expression(ep, expr->unop);
1206 	struct symbol *ctype = expr->ctype;
1207 	switch (expr->op) {
1208 	case '+':
1209 		return pre;
1210 	case '!': {
1211 		pseudo_t zero = value_pseudo(0);
1212 		return add_cmp_op(ep, ctype, OP_SET_EQ, expr->unop->ctype, pre, zero);
1213 	}
1214 	case '~':
1215 		return add_unop(ep, ctype, OP_NOT, pre);
1216 	case '-':
1217 		return add_unop(ep, ctype, opcode_float(OP_NEG, ctype), pre);
1218 	}
1219 	return VOID;
1220 }
1221 
linearize_preop(struct entrypoint * ep,struct expression * expr)1222 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
1223 {
1224 	/*
1225 	 * '*' is an lvalue access, and is fundamentally different
1226 	 * from an arithmetic operation. Maybe it should have an
1227 	 * expression type of its own..
1228 	 */
1229 	if (expr->op == '*')
1230 		return linearize_access(ep, expr);
1231 	if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
1232 		return linearize_inc_dec(ep, expr, 0);
1233 	return linearize_regular_preop(ep, expr);
1234 }
1235 
linearize_postop(struct entrypoint * ep,struct expression * expr)1236 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
1237 {
1238 	return linearize_inc_dec(ep, expr, 1);
1239 }
1240 
1241 /*
1242  * Casts to pointers are "less safe" than other casts, since
1243  * they imply type-unsafe accesses. "void *" is a special
1244  * case, since you can't access through it anyway without another
1245  * cast.
1246  */
1247 enum mtype {
1248 	MTYPE_UINT,
1249 	MTYPE_SINT,
1250 	MTYPE_PTR,
1251 	MTYPE_VPTR,	// TODO: must be removed ?
1252 	MTYPE_FLOAT,
1253 	MTYPE_BAD,
1254 };
1255 
get_mtype(struct symbol * s)1256 static enum mtype get_mtype(struct symbol *s)
1257 {
1258 	int sign = (s->ctype.modifiers & MOD_SIGNED) ? 1 : 0;
1259 
1260 retry:	switch (s->type) {
1261 	case SYM_NODE:
1262 		s = s->ctype.base_type;
1263 		goto retry;
1264 	case SYM_PTR:
1265 		if (s->ctype.base_type == &void_ctype)
1266 			return MTYPE_VPTR;
1267 		return MTYPE_PTR;
1268 	case SYM_BITFIELD:
1269 	case SYM_RESTRICT:
1270 	case SYM_FOULED:
1271 	case SYM_ENUM:
1272 		s = s->ctype.base_type;
1273 		/* fall-through */
1274 	case_int:
1275 		return sign ? MTYPE_SINT : MTYPE_UINT;
1276 	case SYM_BASETYPE:
1277 		if (s->ctype.base_type == &fp_type)
1278 			return MTYPE_FLOAT;
1279 		if (s->ctype.base_type == &int_type)
1280 			goto case_int;
1281 		/* fall-through */
1282 	default:
1283 		return MTYPE_BAD;
1284 	}
1285 }
1286 
get_cast_opcode(struct symbol * dst,struct symbol * src)1287 static int get_cast_opcode(struct symbol *dst, struct symbol *src)
1288 {
1289 	enum mtype stype = get_mtype(src);
1290 	enum mtype dtype = get_mtype(dst);
1291 
1292 	switch (dtype) {
1293 	case MTYPE_FLOAT:
1294 		switch (stype) {
1295 		case MTYPE_FLOAT:
1296 			if (dst->bit_size == src->bit_size)
1297 				return OP_NOP;
1298 			return OP_FCVTF;
1299 		case MTYPE_UINT:
1300 			return OP_UCVTF;
1301 		case MTYPE_SINT:
1302 			return OP_SCVTF;
1303 		default:
1304 			return OP_BADOP;
1305 		}
1306 	case MTYPE_PTR:
1307 		switch (stype) {
1308 		case MTYPE_UINT:
1309 		case MTYPE_SINT:
1310 			return OP_UTPTR;
1311 		case MTYPE_PTR:
1312 		case MTYPE_VPTR:
1313 			return OP_PTRCAST;
1314 		default:
1315 			return OP_BADOP;
1316 		}
1317 	case MTYPE_VPTR:
1318 		switch (stype) {
1319 		case MTYPE_PTR:
1320 		case MTYPE_VPTR:
1321 		case MTYPE_UINT:
1322 			stype = MTYPE_UINT;
1323 			/* fall through */
1324 		case MTYPE_SINT:
1325 			break;
1326 		default:
1327 			return OP_BADOP;
1328 		}
1329 		/* fall through */
1330 	case MTYPE_UINT:
1331 	case MTYPE_SINT:
1332 		switch (stype) {
1333 		case MTYPE_FLOAT:
1334 			return dtype == MTYPE_UINT ? OP_FCVTU : OP_FCVTS;
1335 		case MTYPE_PTR:
1336 			return OP_PTRTU;
1337 		case MTYPE_VPTR:
1338 		case MTYPE_UINT:
1339 		case MTYPE_SINT:
1340 			if (dst->bit_size ==src->bit_size)
1341 				return OP_NOP;
1342 			if (dst->bit_size  < src->bit_size)
1343 				return OP_TRUNC;
1344 			return stype == MTYPE_SINT ? OP_SEXT : OP_ZEXT;
1345 		default:
1346 			return OP_BADOP;
1347 		}
1348 		/* fall through */
1349 	default:
1350 		if (src->type == SYM_NODE)
1351 			src = src->ctype.base_type;
1352 		if (dst->type == SYM_NODE)
1353 			dst = dst->ctype.base_type;
1354 		if (src == dst)
1355 			return OP_NOP;
1356 		return OP_BADOP;
1357 	}
1358 }
1359 
cast_pseudo(struct entrypoint * ep,pseudo_t src,struct symbol * from,struct symbol * to)1360 static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to)
1361 {
1362 	const struct position pos = current_pos;
1363 	pseudo_t result;
1364 	struct instruction *insn;
1365 	int opcode;
1366 
1367 	if (src == VOID)
1368 		return VOID;
1369 	if (!from || !to)
1370 		return VOID;
1371 	if (from->bit_size < 0 || to->bit_size < 0)
1372 		return VOID;
1373 	opcode = get_cast_opcode(to, from);
1374 	switch (opcode) {
1375 	case OP_NOP:
1376 		return src;
1377 	case OP_UTPTR:
1378 		if (from->bit_size == to->bit_size)
1379 			break;
1380 		if (src == value_pseudo(0))
1381 			break;
1382 		if (Wint_to_pointer_cast)
1383 			warning(pos, "non size-preserving integer to pointer cast");
1384 		src = cast_pseudo(ep, src, from, size_t_ctype);
1385 		from = size_t_ctype;
1386 		break;
1387 	case OP_PTRTU:
1388 		if (from->bit_size == to->bit_size)
1389 			break;
1390 		if (Wpointer_to_int_cast)
1391 			warning(pos, "non size-preserving pointer to integer cast");
1392 		src = cast_pseudo(ep, src, from, size_t_ctype);
1393 		return cast_pseudo(ep, src, size_t_ctype, to);
1394 	case OP_BADOP:
1395 		return VOID;
1396 	default:
1397 		break;
1398 	}
1399 	insn = alloc_typed_instruction(opcode, to);
1400 	result = alloc_pseudo(insn);
1401 	insn->target = result;
1402 	insn->orig_type = from;
1403 	use_pseudo(insn, src, &insn->src);
1404 	add_one_insn(ep, insn);
1405 	return result;
1406 }
1407 
map_opcode(int opcode,struct symbol * ctype)1408 static int map_opcode(int opcode, struct symbol *ctype)
1409 {
1410 	if (ctype && is_float_type(ctype))
1411 		return opcode_table[opcode].to_float;
1412 	if (ctype && (ctype->ctype.modifiers & MOD_SIGNED)) {
1413 		switch(opcode) {
1414 		case OP_DIVU: case OP_MODU: case OP_LSR:
1415 			opcode++;
1416 		}
1417 	}
1418 	return opcode;
1419 }
1420 
add_convert_to_bool(struct entrypoint * ep,pseudo_t src,struct symbol * type)1421 static inline pseudo_t add_convert_to_bool(struct entrypoint *ep, pseudo_t src, struct symbol *type)
1422 {
1423 	pseudo_t zero;
1424 	int op;
1425 
1426 	if (!type || src == VOID)
1427 		return VOID;
1428 	if (is_bool_type(type))
1429 		return src;
1430 	if (src->type == PSEUDO_VAL && (src->value == 0 || src->value == 1))
1431 		return src;
1432 	if (is_float_type(type)) {
1433 		zero = add_setfval(ep, type, 0.0);
1434 		op = map_opcode(OP_SET_NE, type);
1435 	} else {
1436 		zero = value_pseudo(0);
1437 		op = OP_SET_NE;
1438 	}
1439 	return add_cmp_op(ep, &bool_ctype, op, type, src, zero);
1440 }
1441 
linearize_expression_to_bool(struct entrypoint * ep,struct expression * expr)1442 static pseudo_t linearize_expression_to_bool(struct entrypoint *ep, struct expression *expr)
1443 {
1444 	pseudo_t dst;
1445 	dst = linearize_expression(ep, expr);
1446 	dst = add_convert_to_bool(ep, dst, expr->ctype);
1447 	return dst;
1448 }
1449 
linearize_assignment(struct entrypoint * ep,struct expression * expr)1450 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
1451 {
1452 	struct access_data ad = { NULL, };
1453 	struct expression *target = expr->left;
1454 	struct expression *src = expr->right;
1455 	struct symbol *ctype;
1456 	pseudo_t value;
1457 
1458 	value = linearize_expression(ep, src);
1459 	if (!target || !linearize_address_gen(ep, target, &ad))
1460 		return value;
1461 	if (expr->op != '=') {
1462 		pseudo_t oldvalue = linearize_load_gen(ep, &ad);
1463 		pseudo_t dst;
1464 		static const int op_trans[] = {
1465 			[SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
1466 			[SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
1467 			[SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL,
1468 			[SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIVU,
1469 			[SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MODU,
1470 			[SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
1471 			[SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_LSR,
1472 			[SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
1473 			[SPECIAL_OR_ASSIGN  - SPECIAL_BASE] = OP_OR,
1474 			[SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
1475 		};
1476 		int opcode;
1477 
1478 		if (!src)
1479 			return VOID;
1480 
1481 		ctype = src->ctype;
1482 		oldvalue = cast_pseudo(ep, oldvalue, target->ctype, ctype);
1483 		opcode = map_opcode(op_trans[expr->op - SPECIAL_BASE], ctype);
1484 		dst = add_binary_op(ep, ctype, opcode, oldvalue, value);
1485 		taint_undefined_behaviour(dst->def);
1486 		value = cast_pseudo(ep, dst, ctype, expr->ctype);
1487 	}
1488 	value = linearize_store_gen(ep, value, &ad);
1489 	return value;
1490 }
1491 
linearize_call_expression(struct entrypoint * ep,struct expression * expr)1492 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
1493 {
1494 	struct expression *arg, *fn;
1495 	struct instruction *insn;
1496 	pseudo_t retval, call;
1497 	struct ctype *ctype = NULL;
1498 	struct symbol *fntype;
1499 	struct context *context;
1500 
1501 	if (!expr->ctype)
1502 		return VOID;
1503 
1504 	fn = expr->fn;
1505 	fntype = fn->ctype;
1506 
1507 	// handle builtins
1508 	if (fntype->op && fntype->op->linearize) {
1509 		retval = fntype->op->linearize(ep, expr);
1510 		if (retval)
1511 			return retval;
1512 	}
1513 
1514 	ctype = &fntype->ctype;
1515 
1516 	insn = alloc_typed_instruction(OP_CALL, expr->ctype);
1517 	add_symbol(&insn->fntypes, fntype);
1518 	FOR_EACH_PTR(expr->args, arg) {
1519 		pseudo_t new = linearize_expression(ep, arg);
1520 		use_pseudo(insn, new, add_pseudo(&insn->arguments, new));
1521 		add_symbol(&insn->fntypes, arg->ctype);
1522 	} END_FOR_EACH_PTR(arg);
1523 
1524 	if (fn->type == EXPR_PREOP && fn->op == '*' && is_func_type(fn->ctype))
1525 		fn = fn->unop;
1526 
1527 	if (fn->type == EXPR_SYMBOL) {
1528 		call = symbol_pseudo(ep, fn->symbol);
1529 	} else {
1530 		call = linearize_expression(ep, fn);
1531 	}
1532 	use_pseudo(insn, call, &insn->func);
1533 	retval = VOID;
1534 	if (expr->ctype != &void_ctype)
1535 		retval = alloc_pseudo(insn);
1536 	insn->target = retval;
1537 	add_one_insn(ep, insn);
1538 
1539 	if (ctype) {
1540 		FOR_EACH_PTR(ctype->contexts, context) {
1541 			int in = context->in;
1542 			int out = context->out;
1543 			int check = 0;
1544 			int context_diff;
1545 			if (in < 0) {
1546 				check = 1;
1547 				in = 0;
1548 			}
1549 			if (out < 0) {
1550 				check = 0;
1551 				out = 0;
1552 			}
1553 			context_diff = out - in;
1554 			if (check || context_diff) {
1555 				insn = alloc_instruction(OP_CONTEXT, 0);
1556 				insn->increment = context_diff;
1557 				insn->check = check;
1558 				insn->context_expr = context->context;
1559 				add_one_insn(ep, insn);
1560 			}
1561 		} END_FOR_EACH_PTR(context);
1562 
1563 		if (ctype->modifiers & MOD_NORETURN)
1564 			add_unreachable(ep);
1565 	}
1566 
1567 	return retval;
1568 }
1569 
linearize_binop_bool(struct entrypoint * ep,struct expression * expr)1570 static pseudo_t linearize_binop_bool(struct entrypoint *ep, struct expression *expr)
1571 {
1572 	pseudo_t src1, src2, dst;
1573 	int op = (expr->op == SPECIAL_LOGICAL_OR) ? OP_OR : OP_AND;
1574 
1575 	src1 = linearize_expression_to_bool(ep, expr->left);
1576 	src2 = linearize_expression_to_bool(ep, expr->right);
1577 	dst = add_binary_op(ep, &bool_ctype, op, src1, src2);
1578 	if (expr->ctype != &bool_ctype)
1579 		dst = cast_pseudo(ep, dst, &bool_ctype, expr->ctype);
1580 	return dst;
1581 }
1582 
linearize_binop(struct entrypoint * ep,struct expression * expr)1583 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
1584 {
1585 	pseudo_t src1, src2, dst;
1586 	static const int opcode[] = {
1587 		['+'] = OP_ADD, ['-'] = OP_SUB,
1588 		['*'] = OP_MUL, ['/'] = OP_DIVU,
1589 		['%'] = OP_MODU, ['&'] = OP_AND,
1590 		['|'] = OP_OR,  ['^'] = OP_XOR,
1591 		[SPECIAL_LEFTSHIFT] = OP_SHL,
1592 		[SPECIAL_RIGHTSHIFT] = OP_LSR,
1593 	};
1594 	int op;
1595 
1596 	src1 = linearize_expression(ep, expr->left);
1597 	src2 = linearize_expression(ep, expr->right);
1598 	op = map_opcode(opcode[expr->op], expr->ctype);
1599 	dst = add_binary_op(ep, expr->ctype, op, src1, src2);
1600 	taint_undefined_behaviour(dst->def);
1601 	return dst;
1602 }
1603 
1604 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1605 
1606 static pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1607 
linearize_select(struct entrypoint * ep,struct expression * expr)1608 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
1609 {
1610 	pseudo_t cond, valt, valf, res;
1611 	struct instruction *insn;
1612 
1613 	valt = linearize_expression(ep, expr->cond_true);
1614 	valf = linearize_expression(ep, expr->cond_false);
1615 	cond = linearize_expression(ep, expr->conditional);
1616 
1617 	insn = alloc_typed_instruction(OP_SEL, expr->ctype);
1618 	if (!expr->cond_true)
1619 		valt = cond;
1620 	use_pseudo(insn, cond, &insn->src1);
1621 	use_pseudo(insn, valt, &insn->src2);
1622 	use_pseudo(insn, valf, &insn->src3);
1623 
1624 	res = alloc_pseudo(insn);
1625 	insn->target = res;
1626 	add_one_insn(ep, insn);
1627 	return res;
1628 }
1629 
add_join_conditional(struct entrypoint * ep,struct expression * expr,pseudo_t phi1,pseudo_t phi2)1630 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr,
1631 				     pseudo_t phi1, pseudo_t phi2)
1632 {
1633 	pseudo_t target;
1634 	struct instruction *phi_node;
1635 
1636 	if (phi1 == VOID)
1637 		return (phi2 == VOID) ? phi2 : phi2->def->src;
1638 	if (phi2 == VOID)
1639 		return (phi1 == VOID) ? phi1 : phi1->def->src;
1640 
1641 	phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1642 	link_phi(phi_node, phi1);
1643 	link_phi(phi_node, phi2);
1644 	phi_node->target = target = alloc_pseudo(phi_node);
1645 	add_one_insn(ep, phi_node);
1646 	return target;
1647 }
1648 
linearize_short_conditional(struct entrypoint * ep,struct expression * expr,struct expression * cond,struct expression * expr_false)1649 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr,
1650 					    struct expression *cond,
1651 					    struct expression *expr_false)
1652 {
1653 	pseudo_t src1, src2;
1654 	struct basic_block *bb_false;
1655 	struct basic_block *merge;
1656 	pseudo_t phi1, phi2;
1657 
1658 	if (!expr_false || !ep->active)
1659 		return VOID;
1660 
1661 	bb_false = alloc_basic_block(ep, expr_false->pos);
1662 	merge = alloc_basic_block(ep, expr->pos);
1663 
1664 	src1 = linearize_expression(ep, cond);
1665 	phi1 = alloc_phi(ep->active, src1, expr->ctype);
1666 	add_branch(ep, src1, merge, bb_false);
1667 
1668 	set_activeblock(ep, bb_false);
1669 	src2 = linearize_expression(ep, expr_false);
1670 	phi2 = alloc_phi(ep->active, src2, expr->ctype);
1671 	set_activeblock(ep, merge);
1672 
1673 	return add_join_conditional(ep, expr, phi1, phi2);
1674 }
1675 
linearize_conditional(struct entrypoint * ep,struct expression * expr,struct expression * cond,struct expression * expr_true,struct expression * expr_false)1676 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
1677 				      struct expression *cond,
1678 				      struct expression *expr_true,
1679 				      struct expression *expr_false)
1680 {
1681 	pseudo_t src1, src2;
1682 	pseudo_t phi1, phi2;
1683 	struct basic_block *bb_true, *bb_false, *merge;
1684 
1685 	if (!cond || !expr_true || !expr_false || !ep->active)
1686 		return VOID;
1687 	bb_true = alloc_basic_block(ep, expr_true->pos);
1688 	bb_false = alloc_basic_block(ep, expr_false->pos);
1689 	merge = alloc_basic_block(ep, expr->pos);
1690 
1691 	linearize_cond_branch(ep, cond, bb_true, bb_false);
1692 
1693 	set_activeblock(ep, bb_true);
1694 	src1 = linearize_expression(ep, expr_true);
1695 	phi1 = alloc_phi(ep->active, src1, expr->ctype);
1696 	add_goto(ep, merge);
1697 
1698 	set_activeblock(ep, bb_false);
1699 	src2 = linearize_expression(ep, expr_false);
1700 	phi2 = alloc_phi(ep->active, src2, expr->ctype);
1701 	set_activeblock(ep, merge);
1702 
1703 	return add_join_conditional(ep, expr, phi1, phi2);
1704 }
1705 
insert_phis(struct basic_block * bb,pseudo_t src,struct symbol * ctype,struct instruction * node)1706 static void insert_phis(struct basic_block *bb, pseudo_t src, struct symbol *ctype,
1707 	struct instruction *node)
1708 {
1709 	struct basic_block *parent;
1710 
1711 	FOR_EACH_PTR(bb->parents, parent) {
1712 		struct instruction *phisrc = alloc_phisrc(src, ctype);
1713 		insert_last_instruction(parent, phisrc);
1714 		link_phi(node, phisrc->target);
1715 	} END_FOR_EACH_PTR(parent);
1716 }
1717 
linearize_logical(struct entrypoint * ep,struct expression * expr)1718 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
1719 {
1720 	struct symbol *ctype = expr->ctype;
1721 	struct basic_block *other, *merge;
1722 	struct instruction *node;
1723 	pseudo_t src1, src2, phi2;
1724 
1725 	if (!ep->active || !expr->left || !expr->right)
1726 		return VOID;
1727 
1728 	other = alloc_basic_block(ep, expr->right->pos);
1729 	merge = alloc_basic_block(ep, expr->pos);
1730 	node = alloc_phi_node(merge, ctype, NULL);
1731 
1732 	// LHS and its shortcut
1733 	if (expr->op == SPECIAL_LOGICAL_OR) {
1734 		linearize_cond_branch(ep, expr->left, merge, other);
1735 		src1 = value_pseudo(1);
1736 	} else {
1737 		linearize_cond_branch(ep, expr->left, other, merge);
1738 		src1 = value_pseudo(0);
1739 	}
1740 	insert_phis(merge, src1, ctype, node);
1741 
1742 	// RHS
1743 	set_activeblock(ep, other);
1744 	src2 = linearize_expression_to_bool(ep, expr->right);
1745 	src2 = cast_pseudo(ep, src2, &bool_ctype, ctype);
1746 	phi2 = alloc_phi(ep->active, src2, ctype);
1747 	link_phi(node, phi2);
1748 
1749 	// join
1750 	set_activeblock(ep, merge);
1751 	add_instruction(&merge->insns, node);
1752 	return node->target;
1753 }
1754 
linearize_compare(struct entrypoint * ep,struct expression * expr)1755 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
1756 {
1757 	static const int cmpop[] = {
1758 		['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
1759 		[SPECIAL_EQUAL] = OP_SET_EQ,
1760 		[SPECIAL_NOTEQUAL] = OP_SET_NE,
1761 		[SPECIAL_GTE] = OP_SET_GE,
1762 		[SPECIAL_LTE] = OP_SET_LE,
1763 		[SPECIAL_UNSIGNED_LT] = OP_SET_B,
1764 		[SPECIAL_UNSIGNED_GT] = OP_SET_A,
1765 		[SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
1766 		[SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
1767 	};
1768 	struct symbol *itype = expr->right->ctype;
1769 	int op = opcode_float(cmpop[expr->op], itype);
1770 	pseudo_t src1 = linearize_expression(ep, expr->left);
1771 	pseudo_t src2 = linearize_expression(ep, expr->right);
1772 	pseudo_t dst = add_cmp_op(ep, expr->ctype, op, itype, src1, src2);
1773 	return dst;
1774 }
1775 
1776 
linearize_cond_branch(struct entrypoint * ep,struct expression * expr,struct basic_block * bb_true,struct basic_block * bb_false)1777 static pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1778 {
1779 	pseudo_t cond;
1780 
1781 	if (!expr || !valid_type(expr->ctype) || !bb_reachable(ep->active))
1782 		return VOID;
1783 
1784 	switch (expr->type) {
1785 
1786 	case EXPR_STRING:
1787 	case EXPR_VALUE:
1788 		add_goto(ep, expr->value ? bb_true : bb_false);
1789 		break;
1790 	case EXPR_FVALUE:
1791 		add_goto(ep, expr->fvalue ? bb_true : bb_false);
1792 		break;
1793 	case EXPR_LOGICAL:
1794 		linearize_logical_branch(ep, expr, bb_true, bb_false);
1795 		break;
1796 	case EXPR_COMPARE:
1797 		cond = linearize_compare(ep, expr);
1798 		add_branch(ep, cond, bb_true, bb_false);
1799 		break;
1800 	case EXPR_PREOP:
1801 		if (expr->op == '!')
1802 			return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
1803 		/* fall through */
1804 	default:
1805 		cond = linearize_expression_to_bool(ep, expr);
1806 		add_branch(ep, cond, bb_true, bb_false);
1807 		break;
1808 	}
1809 	return VOID;
1810 }
1811 
1812 
linearize_logical_branch(struct entrypoint * ep,struct expression * expr,struct basic_block * bb_true,struct basic_block * bb_false)1813 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1814 {
1815 	struct basic_block *next = alloc_basic_block(ep, expr->pos);
1816 
1817 	if (expr->op == SPECIAL_LOGICAL_OR)
1818 		linearize_cond_branch(ep, expr->left, bb_true, next);
1819 	else
1820 		linearize_cond_branch(ep, expr->left, next, bb_false);
1821 	set_activeblock(ep, next);
1822 	linearize_cond_branch(ep, expr->right, bb_true, bb_false);
1823 	return VOID;
1824 }
1825 
linearize_cast(struct entrypoint * ep,struct expression * expr)1826 static pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
1827 {
1828 	pseudo_t src;
1829 	struct expression *orig = expr->cast_expression;
1830 
1831 	if (!orig)
1832 		return VOID;
1833 
1834 	src = linearize_expression(ep, orig);
1835 	return cast_pseudo(ep, src, orig->ctype, expr->ctype);
1836 }
1837 
linearize_initializer(struct entrypoint * ep,struct expression * initializer,struct access_data * ad)1838 static pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad)
1839 {
1840 	switch (initializer->type) {
1841 	case EXPR_INITIALIZER: {
1842 		struct expression *expr;
1843 		FOR_EACH_PTR(initializer->expr_list, expr) {
1844 			linearize_initializer(ep, expr, ad);
1845 		} END_FOR_EACH_PTR(expr);
1846 		break;
1847 	}
1848 	case EXPR_POS:
1849 		ad->offset = initializer->init_offset;
1850 		linearize_initializer(ep, initializer->init_expr, ad);
1851 		break;
1852 	default: {
1853 		pseudo_t value = linearize_expression(ep, initializer);
1854 		ad->type = initializer->ctype;
1855 		linearize_store_gen(ep, value, ad);
1856 		return value;
1857 	}
1858 	}
1859 
1860 	return VOID;
1861 }
1862 
linearize_argument(struct entrypoint * ep,struct symbol * arg,int nr)1863 static void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr)
1864 {
1865 	struct access_data ad = { NULL, };
1866 
1867 	ad.type = arg;
1868 	ad.address = symbol_pseudo(ep, arg);
1869 	linearize_store_gen(ep, argument_pseudo(ep, nr), &ad);
1870 }
1871 
linearize_expression(struct entrypoint * ep,struct expression * expr)1872 static pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
1873 {
1874 	if (!expr || !valid_type(expr->ctype))
1875 		return VOID;
1876 
1877 	current_pos = expr->pos;
1878 	switch (expr->type) {
1879 	case EXPR_SYMBOL:
1880 		linearize_one_symbol(ep, expr->symbol);
1881 		return add_symbol_address(ep, expr);
1882 
1883 	case EXPR_VALUE:
1884 		return value_pseudo(expr->value);
1885 
1886 	case EXPR_STRING:
1887 	case EXPR_LABEL:
1888 		return add_setval(ep, expr->ctype, expr);
1889 
1890 	case EXPR_FVALUE:
1891 		return add_setfval(ep, expr->ctype, expr->fvalue);
1892 
1893 	case EXPR_STATEMENT:
1894 		return linearize_statement(ep, expr->statement);
1895 
1896 	case EXPR_CALL:
1897 		return linearize_call_expression(ep, expr);
1898 
1899 	case EXPR_BINOP:
1900 		if (expr->op == SPECIAL_LOGICAL_AND || expr->op == SPECIAL_LOGICAL_OR)
1901 			return linearize_binop_bool(ep, expr);
1902 		return linearize_binop(ep, expr);
1903 
1904 	case EXPR_LOGICAL:
1905 		return linearize_logical(ep, expr);
1906 
1907 	case EXPR_COMPARE:
1908 		return  linearize_compare(ep, expr);
1909 
1910 	case EXPR_SELECT:
1911 		return	linearize_select(ep, expr);
1912 
1913 	case EXPR_CONDITIONAL:
1914 		if (!expr->cond_true)
1915 			return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false);
1916 
1917 		return  linearize_conditional(ep, expr, expr->conditional,
1918 					      expr->cond_true, expr->cond_false);
1919 
1920 	case EXPR_COMMA:
1921 		linearize_expression(ep, expr->left);
1922 		return linearize_expression(ep, expr->right);
1923 
1924 	case EXPR_ASSIGNMENT:
1925 		return linearize_assignment(ep, expr);
1926 
1927 	case EXPR_PREOP:
1928 		return linearize_preop(ep, expr);
1929 
1930 	case EXPR_POSTOP:
1931 		return linearize_postop(ep, expr);
1932 
1933 	case EXPR_CAST:
1934 	case EXPR_FORCE_CAST:
1935 	case EXPR_IMPLIED_CAST:
1936 		return linearize_cast(ep, expr);
1937 
1938 	case EXPR_SLICE:
1939 		return linearize_slice(ep, expr);
1940 
1941 	case EXPR_INITIALIZER:
1942 	case EXPR_POS:
1943 		warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op);
1944 		return VOID;
1945 	default:
1946 		warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
1947 		return VOID;
1948 	}
1949 	return VOID;
1950 }
1951 
linearize_one_symbol(struct entrypoint * ep,struct symbol * sym)1952 static pseudo_t linearize_one_symbol(struct entrypoint *ep, struct symbol *sym)
1953 {
1954 	struct access_data ad = { NULL, };
1955 	pseudo_t value;
1956 
1957 	if (!sym || !sym->initializer || sym->initialized)
1958 		return VOID;
1959 
1960 	/* We need to output these puppies some day too.. */
1961 	if (sym->ctype.modifiers & (MOD_STATIC | MOD_TOPLEVEL))
1962 		return VOID;
1963 
1964 	sym->initialized = 1;
1965 	ad.address = symbol_pseudo(ep, sym);
1966 
1967 	if (sym->initializer && !is_scalar_type(sym)) {
1968 		// default zero initialization [6.7.9.21]
1969 		// FIXME: this init the whole aggregate while
1970 		// only the existing fields need to be initialized.
1971 		// FIXME: this init the whole aggregate even if
1972 		// all fields arelater  explicitely initialized.
1973 		ad.type = sym;
1974 		ad.address = symbol_pseudo(ep, sym);
1975 		linearize_store_gen(ep, value_pseudo(0), &ad);
1976 	}
1977 
1978 	value = linearize_initializer(ep, sym->initializer, &ad);
1979 	return value;
1980 }
1981 
linearize_compound_statement(struct entrypoint * ep,struct statement * stmt)1982 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt)
1983 {
1984 	pseudo_t pseudo;
1985 	struct statement *s;
1986 
1987 	pseudo = VOID;
1988 	FOR_EACH_PTR(stmt->stmts, s) {
1989 		pseudo = linearize_statement(ep, s);
1990 	} END_FOR_EACH_PTR(s);
1991 
1992 	return pseudo;
1993 }
1994 
add_return(struct entrypoint * ep,struct basic_block * bb,struct symbol * ctype,pseudo_t src)1995 static void add_return(struct entrypoint *ep, struct basic_block *bb, struct symbol *ctype, pseudo_t src)
1996 {
1997 	struct instruction *phi_node = first_instruction(bb->insns);
1998 	pseudo_t phi;
1999 	if (!phi_node) {
2000 		phi_node = alloc_typed_instruction(OP_PHI, ctype);
2001 		phi_node->target = alloc_pseudo(phi_node);
2002 		phi_node->bb = bb;
2003 		add_instruction(&bb->insns, phi_node);
2004 	}
2005 	phi = alloc_phi(ep->active, src, ctype);
2006 	phi->ident = &return_ident;
2007 	link_phi(phi_node, phi);
2008 }
2009 
linearize_fn_statement(struct entrypoint * ep,struct statement * stmt)2010 static pseudo_t linearize_fn_statement(struct entrypoint *ep, struct statement *stmt)
2011 {
2012 	struct instruction *phi_node;
2013 	struct basic_block *bb;
2014 	pseudo_t pseudo;
2015 
2016 	pseudo = linearize_compound_statement(ep, stmt);
2017 	if (!is_void_type(stmt->ret)) {			// non-void function
2018 		struct basic_block *active = ep->active;
2019 		if (active && !bb_terminated(active)) {	// missing return
2020 			struct basic_block *bb_ret;
2021 			bb_ret = get_bound_block(ep, stmt->ret);
2022 			add_return(ep, bb_ret, stmt->ret, undef_pseudo());
2023 		}
2024 	}
2025 	bb = add_label(ep, stmt->ret);
2026 	phi_node = first_instruction(bb->insns);
2027 	if (phi_node)
2028 		pseudo = phi_node->target;
2029 	return pseudo;
2030 }
2031 
linearize_inlined_call(struct entrypoint * ep,struct statement * stmt)2032 static pseudo_t linearize_inlined_call(struct entrypoint *ep, struct statement *stmt)
2033 {
2034 	struct instruction *insn = alloc_instruction(OP_INLINED_CALL, 0);
2035 	struct statement *args = stmt->args;
2036 	struct basic_block *bb;
2037 	pseudo_t pseudo;
2038 
2039 	if (args) {
2040 		struct symbol *sym;
2041 
2042 		concat_symbol_list(args->declaration, &ep->syms);
2043 		FOR_EACH_PTR(args->declaration, sym) {
2044 			pseudo_t value = linearize_one_symbol(ep, sym);
2045 			add_pseudo(&insn->arguments, value);
2046 		} END_FOR_EACH_PTR(sym);
2047 	}
2048 
2049 	pseudo = linearize_fn_statement(ep, stmt);
2050 	insn->target = pseudo;
2051 
2052 	insn->func = symbol_pseudo(ep, stmt->inline_fn);
2053 	bb = ep->active;
2054 	if (!bb->insns)
2055 		bb->pos = stmt->pos;
2056 	add_one_insn(ep, insn);
2057 	return pseudo;
2058 }
2059 
linearize_context(struct entrypoint * ep,struct statement * stmt)2060 static pseudo_t linearize_context(struct entrypoint *ep, struct statement *stmt)
2061 {
2062 	struct instruction *insn = alloc_instruction(OP_CONTEXT, 0);
2063 	struct expression *expr = stmt->expression;
2064 
2065 	insn->increment = get_expression_value(expr);
2066 	insn->context_expr = stmt->context;
2067 	add_one_insn(ep, insn);
2068 	return VOID;
2069 }
2070 
linearize_range(struct entrypoint * ep,struct statement * stmt)2071 static pseudo_t linearize_range(struct entrypoint *ep, struct statement *stmt)
2072 {
2073 	struct instruction *insn = alloc_instruction(OP_RANGE, 0);
2074 
2075 	use_pseudo(insn, linearize_expression(ep, stmt->range_expression), &insn->src1);
2076 	use_pseudo(insn, linearize_expression(ep, stmt->range_low), &insn->src2);
2077 	use_pseudo(insn, linearize_expression(ep, stmt->range_high), &insn->src3);
2078 	add_one_insn(ep, insn);
2079 	return VOID;
2080 }
2081 
2082 ALLOCATOR(asm_rules, "asm rules");
2083 ALLOCATOR(asm_constraint, "asm constraints");
2084 
add_asm_rule(struct instruction * insn,struct asm_constraint_list ** list,struct asm_operand * op,pseudo_t pseudo)2085 static void add_asm_rule(struct instruction *insn, struct asm_constraint_list **list, struct asm_operand *op, pseudo_t pseudo)
2086 {
2087 	struct asm_constraint *rule = __alloc_asm_constraint(0);
2088 	rule->is_memory = op->is_memory;
2089 	rule->ident = op->name;
2090 	rule->constraint = op->constraint ? op->constraint->string->data : "";
2091 	use_pseudo(insn, pseudo, &rule->pseudo);
2092 	add_ptr_list(list, rule);
2093 }
2094 
add_asm_input(struct entrypoint * ep,struct instruction * insn,struct asm_operand * op)2095 static void add_asm_input(struct entrypoint *ep, struct instruction *insn, struct asm_operand *op)
2096 {
2097 	pseudo_t pseudo = linearize_expression(ep, op->expr);
2098 
2099 	add_asm_rule(insn, &insn->asm_rules->inputs, op, pseudo);
2100 }
2101 
add_asm_output_address(struct entrypoint * ep,struct instruction * insn,struct asm_operand * op)2102 static void add_asm_output_address(struct entrypoint *ep, struct instruction *insn, struct asm_operand *op)
2103 {
2104 	pseudo_t pseudo;
2105 
2106 	if (!op->is_memory)
2107 		return;
2108 
2109 	pseudo = linearize_expression(ep, op->expr);
2110 	add_asm_rule(insn, &insn->asm_rules->outputs, op, pseudo);
2111 	insn->output_memory = 1;
2112 }
2113 
add_asm_output(struct entrypoint * ep,struct instruction * insn,struct asm_operand * op)2114 static void add_asm_output(struct entrypoint *ep, struct instruction *insn, struct asm_operand *op)
2115 {
2116 	struct access_data ad = { NULL, };
2117 	pseudo_t pseudo;
2118 
2119 	if (op->is_memory)
2120 		return;
2121 
2122 	if (!linearize_address_gen(ep, op->expr, &ad))
2123 		return;
2124 	pseudo = alloc_pseudo(insn);
2125 	linearize_store_gen(ep, pseudo, &ad);
2126 
2127 	add_asm_rule(insn, &insn->asm_rules->outputs, op, pseudo);
2128 }
2129 
linearize_asm_statement(struct entrypoint * ep,struct statement * stmt)2130 static pseudo_t linearize_asm_statement(struct entrypoint *ep, struct statement *stmt)
2131 {
2132 	struct instruction *insn;
2133 	struct expression *expr, *clob;
2134 	struct asm_rules *rules;
2135 	struct asm_operand *op;
2136 
2137 	insn = alloc_instruction(OP_ASM, 0);
2138 	expr = stmt->asm_string;
2139 	if (!expr || expr->type != EXPR_STRING) {
2140 		warning(stmt->pos, "expected string in inline asm");
2141 		return VOID;
2142 	}
2143 	insn->string = expr->string->data;
2144 
2145 	rules = __alloc_asm_rules(0);
2146 	insn->asm_rules = rules;
2147 
2148 	/* Gather the inputs.. */
2149 	FOR_EACH_PTR(stmt->asm_inputs, op) {
2150 		add_asm_input(ep, insn, op);
2151 	} END_FOR_EACH_PTR(op);
2152 
2153 	/* ... and the addresses for memory outputs */
2154 	FOR_EACH_PTR(stmt->asm_outputs, op) {
2155 		add_asm_output_address(ep, insn, op);
2156 	} END_FOR_EACH_PTR(op);
2157 
2158 	add_one_insn(ep, insn);
2159 
2160 	/* Assign the outputs */
2161 	FOR_EACH_PTR(stmt->asm_outputs, op) {
2162 		add_asm_output(ep, insn, op);
2163 	} END_FOR_EACH_PTR(op);
2164 
2165 	/* and finally, look if it clobbers memory */
2166 	FOR_EACH_PTR(stmt->asm_clobbers, clob) {
2167 		if (!strcmp(clob->string->data, "memory"))
2168 			insn->clobber_memory = 1;
2169 	} END_FOR_EACH_PTR(clob);
2170 
2171 	return VOID;
2172 }
2173 
multijmp_cmp(const void * _a,const void * _b)2174 static int multijmp_cmp(const void *_a, const void *_b)
2175 {
2176 	const struct multijmp *a = _a;
2177 	const struct multijmp *b = _b;
2178 
2179 	// "default" case?
2180 	if (a->begin > a->end) {
2181 		if (b->begin > b->end)
2182 			return 0;
2183 		return 1;
2184 	}
2185 	if (b->begin > b->end)
2186 		return -1;
2187 	if (a->begin == b->begin) {
2188 		if (a->end == b->end)
2189 			return 0;
2190 		return (a->end < b->end) ? -1 : 1;
2191 	}
2192 	return a->begin < b->begin ? -1 : 1;
2193 }
2194 
sort_switch_cases(struct instruction * insn)2195 static void sort_switch_cases(struct instruction *insn)
2196 {
2197 	sort_list((struct ptr_list **)&insn->multijmp_list, multijmp_cmp);
2198 }
2199 
linearize_declaration(struct entrypoint * ep,struct statement * stmt)2200 static pseudo_t linearize_declaration(struct entrypoint *ep, struct statement *stmt)
2201 {
2202 	struct symbol *sym;
2203 
2204 	concat_symbol_list(stmt->declaration, &ep->syms);
2205 
2206 	FOR_EACH_PTR(stmt->declaration, sym) {
2207 		linearize_one_symbol(ep, sym);
2208 	} END_FOR_EACH_PTR(sym);
2209 	return VOID;
2210 }
2211 
linearize_return(struct entrypoint * ep,struct statement * stmt)2212 static pseudo_t linearize_return(struct entrypoint *ep, struct statement *stmt)
2213 {
2214 	struct expression *expr = stmt->expression;
2215 	struct symbol *ret = stmt->ret_target;
2216 	struct basic_block *bb_return = get_bound_block(ep, ret);
2217 	struct basic_block *active;
2218 	pseudo_t src = linearize_expression(ep, expr);
2219 	active = ep->active;
2220 	if (active && !is_void_type(ret)) {
2221 		add_return(ep, bb_return, ret, src);
2222 	}
2223 	add_goto(ep, bb_return);
2224 	return VOID;
2225 }
2226 
linearize_switch(struct entrypoint * ep,struct statement * stmt)2227 static pseudo_t linearize_switch(struct entrypoint *ep, struct statement *stmt)
2228 {
2229 	struct symbol *sym;
2230 	struct instruction *switch_ins;
2231 	struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos);
2232 	struct basic_block *active, *default_case;
2233 	struct expression *expr = stmt->switch_expression;
2234 	struct multijmp *jmp;
2235 	pseudo_t pseudo;
2236 
2237 	if (!expr || !expr->ctype)
2238 		return VOID;
2239 	pseudo = linearize_expression(ep, expr);
2240 	active = ep->active;
2241 	if (!active) {
2242 		active = alloc_basic_block(ep, stmt->pos);
2243 		set_activeblock(ep, active);
2244 	}
2245 
2246 	switch_ins = alloc_typed_instruction(OP_SWITCH, expr->ctype);
2247 	use_pseudo(switch_ins, pseudo, &switch_ins->cond);
2248 	add_one_insn(ep, switch_ins);
2249 	finish_block(ep);
2250 
2251 	default_case = NULL;
2252 	FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
2253 		struct statement *case_stmt = sym->stmt;
2254 		struct basic_block *bb_case = get_bound_block(ep, sym);
2255 
2256 		if (!case_stmt->case_expression) {
2257 			default_case = bb_case;
2258 			continue;
2259 		} else if (case_stmt->case_expression->type != EXPR_VALUE) {
2260 			continue;
2261 		} else {
2262 			struct expression *case_to = case_stmt->case_to;
2263 			long long begin, end;
2264 
2265 			begin = end = case_stmt->case_expression->value;
2266 			if (case_to && case_to->type == EXPR_VALUE)
2267 				end = case_to->value;
2268 			if (begin > end)
2269 				jmp = alloc_multijmp(bb_case, end, begin);
2270 			else
2271 				jmp = alloc_multijmp(bb_case, begin, end);
2272 
2273 		}
2274 		add_multijmp(&switch_ins->multijmp_list, jmp);
2275 		add_bb(&bb_case->parents, active);
2276 		add_bb(&active->children, bb_case);
2277 	} END_FOR_EACH_PTR(sym);
2278 
2279 	bind_label(stmt->switch_break, switch_end, stmt->pos);
2280 
2281 	/* And linearize the actual statement */
2282 	linearize_statement(ep, stmt->switch_statement);
2283 	set_activeblock(ep, switch_end);
2284 
2285 	if (!default_case)
2286 		default_case = switch_end;
2287 
2288 	jmp = alloc_multijmp(default_case, 1, 0);
2289 	add_multijmp(&switch_ins->multijmp_list, jmp);
2290 	add_bb(&default_case->parents, active);
2291 	add_bb(&active->children, default_case);
2292 	sort_switch_cases(switch_ins);
2293 
2294 	return VOID;
2295 }
2296 
linearize_iterator(struct entrypoint * ep,struct statement * stmt)2297 static pseudo_t linearize_iterator(struct entrypoint *ep, struct statement *stmt)
2298 {
2299 	struct statement  *pre_statement = stmt->iterator_pre_statement;
2300 	struct expression *pre_condition = stmt->iterator_pre_condition;
2301 	struct statement  *statement = stmt->iterator_statement;
2302 	struct statement  *post_statement = stmt->iterator_post_statement;
2303 	struct expression *post_condition = stmt->iterator_post_condition;
2304 	struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
2305 	struct symbol *sym;
2306 
2307 	FOR_EACH_PTR(stmt->iterator_syms, sym) {
2308 		linearize_one_symbol(ep, sym);
2309 	} END_FOR_EACH_PTR(sym);
2310 	concat_symbol_list(stmt->iterator_syms, &ep->syms);
2311 	linearize_statement(ep, pre_statement);
2312 
2313 	loop_body = loop_top = alloc_basic_block(ep, stmt->pos);
2314 	loop_continue = alloc_basic_block(ep, stmt->pos);
2315 	loop_end = alloc_basic_block(ep, stmt->pos);
2316 
2317 	/* An empty post-condition means that it's the same as the pre-condition */
2318 	if (!post_condition) {
2319 		loop_top = alloc_basic_block(ep, stmt->pos);
2320 		set_activeblock(ep, loop_top);
2321 	}
2322 
2323 	if (pre_condition)
2324 			linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
2325 
2326 	bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
2327 	bind_label(stmt->iterator_break, loop_end, stmt->pos);
2328 
2329 	set_activeblock(ep, loop_body);
2330 	linearize_statement(ep, statement);
2331 	add_goto(ep, loop_continue);
2332 
2333 	set_activeblock(ep, loop_continue);
2334 	linearize_statement(ep, post_statement);
2335 	if (!post_condition)
2336 		add_goto(ep, loop_top);
2337 	else
2338 		linearize_cond_branch(ep, post_condition, loop_top, loop_end);
2339 	set_activeblock(ep, loop_end);
2340 
2341 	return VOID;
2342 }
2343 
linearize_statement(struct entrypoint * ep,struct statement * stmt)2344 static pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
2345 {
2346 	struct basic_block *bb;
2347 
2348 	if (!stmt)
2349 		return VOID;
2350 
2351 	bb = ep->active;
2352 	if (bb && !bb->insns)
2353 		bb->pos = stmt->pos;
2354 	current_pos = stmt->pos;
2355 
2356 	switch (stmt->type) {
2357 	case STMT_NONE:
2358 		break;
2359 
2360 	case STMT_DECLARATION:
2361 		return linearize_declaration(ep, stmt);
2362 
2363 	case STMT_CONTEXT:
2364 		return linearize_context(ep, stmt);
2365 
2366 	case STMT_RANGE:
2367 		return linearize_range(ep, stmt);
2368 
2369 	case STMT_EXPRESSION:
2370 		return linearize_expression(ep, stmt->expression);
2371 
2372 	case STMT_ASM:
2373 		return linearize_asm_statement(ep, stmt);
2374 
2375 	case STMT_RETURN:
2376 		return linearize_return(ep, stmt);
2377 
2378 	case STMT_CASE: {
2379 		add_label(ep, stmt->case_label);
2380 		linearize_statement(ep, stmt->case_statement);
2381 		break;
2382 	}
2383 
2384 	case STMT_LABEL: {
2385 		struct symbol *label = stmt->label_identifier;
2386 
2387 		if (label->used) {
2388 			add_label(ep, label);
2389 		}
2390 		return linearize_statement(ep, stmt->label_statement);
2391 	}
2392 
2393 	case STMT_GOTO: {
2394 		struct symbol *sym;
2395 		struct expression *expr;
2396 		struct instruction *goto_ins;
2397 		struct basic_block *active;
2398 		pseudo_t pseudo;
2399 
2400 		active = ep->active;
2401 		if (!bb_reachable(active))
2402 			break;
2403 
2404 		if (stmt->goto_label) {
2405 			add_goto(ep, get_bound_block(ep, stmt->goto_label));
2406 			break;
2407 		}
2408 
2409 		expr = stmt->goto_expression;
2410 		if (!expr)
2411 			break;
2412 
2413 		/* This can happen as part of simplification */
2414 		if (expr->type == EXPR_LABEL) {
2415 			add_goto(ep, get_bound_block(ep, expr->label_symbol));
2416 			break;
2417 		}
2418 
2419 		pseudo = linearize_expression(ep, expr);
2420 		goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0);
2421 		use_pseudo(goto_ins, pseudo, &goto_ins->src);
2422 		add_one_insn(ep, goto_ins);
2423 
2424 		FOR_EACH_PTR(stmt->target_list, sym) {
2425 			struct basic_block *bb_computed = get_bound_block(ep, sym);
2426 			struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
2427 			add_multijmp(&goto_ins->multijmp_list, jmp);
2428 			add_bb(&bb_computed->parents, ep->active);
2429 			add_bb(&active->children, bb_computed);
2430 		} END_FOR_EACH_PTR(sym);
2431 
2432 		finish_block(ep);
2433 		break;
2434 	}
2435 
2436 	case STMT_COMPOUND:
2437 		if (stmt->inline_fn)
2438 			return linearize_inlined_call(ep, stmt);
2439 		return linearize_compound_statement(ep, stmt);
2440 
2441 	/*
2442 	 * This could take 'likely/unlikely' into account, and
2443 	 * switch the arms around appropriately..
2444 	 */
2445 	case STMT_IF: {
2446 		struct basic_block *bb_true, *bb_false, *endif;
2447  		struct expression *cond = stmt->if_conditional;
2448 
2449 		bb_true = alloc_basic_block(ep, stmt->pos);
2450 		bb_false = endif = alloc_basic_block(ep, stmt->pos);
2451 
2452 		// If the condition is invalid, the following
2453 		// statement(s) are not evaluated.
2454 		if (!cond || !valid_type(cond->ctype))
2455 			return VOID;
2456  		linearize_cond_branch(ep, cond, bb_true, bb_false);
2457 
2458 		set_activeblock(ep, bb_true);
2459  		linearize_statement(ep, stmt->if_true);
2460 
2461  		if (stmt->if_false) {
2462 			endif = alloc_basic_block(ep, stmt->pos);
2463 			add_goto(ep, endif);
2464 			set_activeblock(ep, bb_false);
2465  			linearize_statement(ep, stmt->if_false);
2466 		}
2467 		set_activeblock(ep, endif);
2468 		break;
2469 	}
2470 
2471 	case STMT_SWITCH:
2472 		return linearize_switch(ep, stmt);
2473 
2474 	case STMT_ITERATOR:
2475 		return linearize_iterator(ep, stmt);
2476 
2477 	default:
2478 		break;
2479 	}
2480 	return VOID;
2481 }
2482 
check_tainted_insn(struct instruction * insn)2483 static void check_tainted_insn(struct instruction *insn)
2484 {
2485 	unsigned long long uval;
2486 	long long sval;
2487 	pseudo_t src2;
2488 
2489 	switch (insn->opcode) {
2490 	case OP_DIVU: case OP_DIVS:
2491 	case OP_MODU: case OP_MODS:
2492 		if (insn->src2 == value_pseudo(0))
2493 			warning(insn->pos, "divide by zero");
2494 		break;
2495 	case OP_SHL: case OP_LSR: case OP_ASR:
2496 		src2 = insn->src2;
2497 		if (src2->type != PSEUDO_VAL)
2498 			break;
2499 		uval = src2->value;
2500 		if (uval < insn->size)
2501 			break;
2502 		sval = sign_extend(uval, insn->size);
2503 		if (Wshift_count_negative && sval < 0)
2504 			warning(insn->pos, "shift count is negative (%lld)", sval);
2505 		else if (Wshift_count_overflow)
2506 			warning(insn->pos, "shift too big (%llu) for type %s", uval, show_typename(insn->type));
2507 	}
2508 }
2509 
2510 ///
2511 // issue warnings after all possible DCE
late_warnings(struct entrypoint * ep)2512 static void late_warnings(struct entrypoint *ep)
2513 {
2514 	struct basic_block *bb;
2515 	FOR_EACH_PTR(ep->bbs, bb) {
2516 		struct instruction *insn;
2517 		FOR_EACH_PTR(bb->insns, insn) {
2518 			if (!insn->bb)
2519 				continue;
2520 			if (insn->tainted)
2521 				check_tainted_insn(insn);
2522 			switch (insn->opcode) {
2523 			case OP_LOAD:
2524 				// Check for illegal offsets.
2525 				check_access(insn);
2526 				break;
2527 			}
2528 		} END_FOR_EACH_PTR(insn);
2529 	} END_FOR_EACH_PTR(bb);
2530 }
2531 
linearize_fn(struct symbol * sym,struct symbol * base_type)2532 static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type)
2533 {
2534 	struct statement *stmt = base_type->stmt;
2535 	struct entrypoint *ep;
2536 	struct basic_block *bb;
2537 	struct symbol *ret_type;
2538 	struct symbol *arg;
2539 	struct instruction *entry;
2540 	struct instruction *ret;
2541 	pseudo_t result;
2542 	int i;
2543 
2544 	if (!stmt || sym->bogus_linear)
2545 		return NULL;
2546 
2547 	ep = alloc_entrypoint();
2548 	ep->name = sym;
2549 	sym->ep = ep;
2550 	bb = alloc_basic_block(ep, sym->pos);
2551 	set_activeblock(ep, bb);
2552 
2553 	if (stmt->type == STMT_ASM) {	// top-level asm
2554 		linearize_asm_statement(ep, stmt);
2555 		return ep;
2556 	}
2557 
2558 	entry = alloc_instruction(OP_ENTRY, 0);
2559 	add_one_insn(ep, entry);
2560 	ep->entry = entry;
2561 
2562 	concat_symbol_list(base_type->arguments, &ep->syms);
2563 
2564 	/* FIXME!! We should do something else about varargs.. */
2565 	i = 0;
2566 	FOR_EACH_PTR(base_type->arguments, arg) {
2567 		linearize_argument(ep, arg, ++i);
2568 	} END_FOR_EACH_PTR(arg);
2569 
2570 	result = linearize_fn_statement(ep, stmt);
2571 	ret_type = base_type->ctype.base_type;
2572 	ret = alloc_typed_instruction(OP_RET, ret_type);
2573 	if (type_size(ret_type) > 0)
2574 		use_pseudo(ret, result, &ret->src);
2575 	add_one_insn(ep, ret);
2576 
2577 	optimize(ep);
2578 	late_warnings(ep);
2579 	return ep;
2580 }
2581 
linearize_symbol(struct symbol * sym)2582 struct entrypoint *linearize_symbol(struct symbol *sym)
2583 {
2584 	struct symbol *base_type;
2585 
2586 	if (!sym)
2587 		return NULL;
2588 	current_pos = sym->pos;
2589 	base_type = sym->ctype.base_type;
2590 	if (!base_type)
2591 		return NULL;
2592 	if (base_type->type == SYM_FN)
2593 		return linearize_fn(sym, base_type);
2594 	return NULL;
2595 }
2596 
2597 /*
2598  * Builtin functions
2599  */
2600 
linearize_fma(struct entrypoint * ep,struct expression * expr)2601 static pseudo_t linearize_fma(struct entrypoint *ep, struct expression *expr)
2602 {
2603 	struct instruction *insn = alloc_typed_instruction(OP_FMADD, expr->ctype);
2604 	struct expression *arg;
2605 
2606 	PREPARE_PTR_LIST(expr->args, arg);
2607 		use_pseudo(insn, linearize_expression(ep, arg), &insn->src1);
2608 		NEXT_PTR_LIST(arg)
2609 		use_pseudo(insn, linearize_expression(ep, arg), &insn->src2);
2610 		NEXT_PTR_LIST(arg)
2611 		use_pseudo(insn, linearize_expression(ep, arg), &insn->src3);
2612 	FINISH_PTR_LIST(arg);
2613 
2614 	add_one_insn(ep, insn);
2615 	return insn->target = alloc_pseudo(insn);
2616 }
2617 
linearize_isdigit(struct entrypoint * ep,struct expression * expr)2618 static pseudo_t linearize_isdigit(struct entrypoint *ep, struct expression *expr)
2619 {
2620 	struct instruction *insn;
2621 	pseudo_t src;
2622 
2623 	insn = alloc_typed_instruction(OP_SUB, &int_ctype);
2624 	src = linearize_expression(ep, first_expression(expr->args));
2625 	use_pseudo(insn, src, &insn->src1);
2626 	insn->src2 = value_pseudo('0');
2627 	src = insn->target = alloc_pseudo(insn);
2628 	add_one_insn(ep, insn);
2629 
2630 	insn = alloc_typed_instruction(OP_SET_BE, &int_ctype);
2631 	use_pseudo(insn, src, &insn->src1);
2632 	insn->src2 = value_pseudo(9);
2633 	insn->target = alloc_pseudo(insn);
2634 	insn->itype = &int_ctype;
2635 	add_one_insn(ep, insn);
2636 
2637 	return insn->target;
2638 }
2639 
linearize_unreachable(struct entrypoint * ep,struct expression * exp)2640 static pseudo_t linearize_unreachable(struct entrypoint *ep, struct expression *exp)
2641 {
2642 	add_unreachable(ep);
2643 	return VOID;
2644 }
2645 
2646 static struct sym_init {
2647 	const char *name;
2648 	pseudo_t (*linearize)(struct entrypoint *, struct expression*);
2649 	struct symbol_op op;
2650 } builtins_table[] = {
2651 	// must be declared in builtin.c:declare_builtins[]
2652 	{ "__builtin_fma", linearize_fma },
2653 	{ "__builtin_fmaf", linearize_fma },
2654 	{ "__builtin_fmal", linearize_fma },
2655 	{ "__builtin_isdigit", linearize_isdigit },
2656 	{ "__builtin_unreachable", linearize_unreachable },
2657 	{ }
2658 };
2659 
init_linearized_builtins(int stream)2660 void init_linearized_builtins(int stream)
2661 {
2662 	struct sym_init *ptr;
2663 
2664 	for (ptr = builtins_table; ptr->name; ptr++) {
2665 		struct symbol *sym;
2666 		sym = create_symbol(stream, ptr->name, SYM_NODE, NS_SYMBOL);
2667 		if (!sym->op)
2668 			sym->op = &ptr->op;
2669 		sym->op->type |= KW_BUILTIN;
2670 		sym->op->linearize = ptr->linearize;
2671 	}
2672 }
2673