• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Do C preprocessing, based on a token list gathered by
3  * the tokenizer.
4  *
5  * This may not be the smartest preprocessor on the planet.
6  *
7  * Copyright (C) 2003 Transmeta Corp.
8  *               2003-2004 Linus Torvalds
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  */
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdarg.h>
31 #include <stddef.h>
32 #include <string.h>
33 #include <ctype.h>
34 #include <unistd.h>
35 #include <fcntl.h>
36 #include <limits.h>
37 #include <time.h>
38 
39 #include "lib.h"
40 #include "allocate.h"
41 #include "parse.h"
42 #include "token.h"
43 #include "symbol.h"
44 #include "expression.h"
45 #include "scope.h"
46 
47 static struct ident_list *macros;	// only needed for -dD
48 static int false_nesting = 0;
49 static int counter_macro = 0;		// __COUNTER__ expansion
50 static int include_level = 0;
51 static int expanding = 0;
52 
53 #define INCLUDEPATHS 300
54 const char *includepath[INCLUDEPATHS+1] = {
55 	"",
56 	"/usr/include",
57 	"/usr/local/include",
58 	NULL
59 };
60 
61 static const char **quote_includepath = includepath;
62 static const char **angle_includepath = includepath + 1;
63 static const char **isys_includepath   = includepath + 1;
64 static const char **sys_includepath   = includepath + 1;
65 static const char **dirafter_includepath = includepath + 3;
66 
67 #define dirty_stream(stream)				\
68 	do {						\
69 		if (!stream->dirty) {			\
70 			stream->dirty = 1;		\
71 			if (!stream->ifndef)		\
72 				stream->protect = NULL;	\
73 		}					\
74 	} while(0)
75 
76 #define end_group(stream)					\
77 	do {							\
78 		if (stream->ifndef == stream->top_if) {		\
79 			stream->ifndef = NULL;			\
80 			if (!stream->dirty)			\
81 				stream->protect = NULL;		\
82 			else if (stream->protect)		\
83 				stream->dirty = 0;		\
84 		}						\
85 	} while(0)
86 
87 #define nesting_error(stream)		\
88 	do {				\
89 		stream->dirty = 1;	\
90 		stream->ifndef = NULL;	\
91 		stream->protect = NULL;	\
92 	} while(0)
93 
alloc_token(struct position * pos)94 static struct token *alloc_token(struct position *pos)
95 {
96 	struct token *token = __alloc_token(0);
97 
98 	token->pos.stream = pos->stream;
99 	token->pos.line = pos->line;
100 	token->pos.pos = pos->pos;
101 	token->pos.whitespace = 1;
102 	return token;
103 }
104 
105 /* Expand symbol 'sym' at '*list' */
106 static int expand(struct token **, struct symbol *);
107 
replace_with_string(struct token * token,const char * str)108 static void replace_with_string(struct token *token, const char *str)
109 {
110 	int size = strlen(str) + 1;
111 	struct string *s = __alloc_string(size);
112 
113 	s->length = size;
114 	memcpy(s->data, str, size);
115 	token_type(token) = TOKEN_STRING;
116 	token->string = s;
117 }
118 
replace_with_integer(struct token * token,unsigned int val)119 static void replace_with_integer(struct token *token, unsigned int val)
120 {
121 	char *buf = __alloc_bytes(11);
122 	sprintf(buf, "%u", val);
123 	token_type(token) = TOKEN_NUMBER;
124 	token->number = buf;
125 }
126 
lookup_macro(struct ident * ident)127 static struct symbol *lookup_macro(struct ident *ident)
128 {
129 	struct symbol *sym = lookup_symbol(ident, NS_MACRO | NS_UNDEF);
130 	if (sym && sym->namespace != NS_MACRO)
131 		sym = NULL;
132 	return sym;
133 }
134 
token_defined(struct token * token)135 static int token_defined(struct token *token)
136 {
137 	if (token_type(token) == TOKEN_IDENT) {
138 		struct symbol *sym = lookup_macro(token->ident);
139 		if (sym) {
140 			sym->used_in = file_scope;
141 			return 1;
142 		}
143 		return 0;
144 	}
145 
146 	sparse_error(token->pos, "expected preprocessor identifier");
147 	return 0;
148 }
149 
replace_with_bool(struct token * token,bool val)150 static void replace_with_bool(struct token *token, bool val)
151 {
152 	static const char *string[] = { "0", "1" };
153 
154 	token_type(token) = TOKEN_NUMBER;
155 	token->number = string[val];
156 }
157 
replace_with_defined(struct token * token)158 static void replace_with_defined(struct token *token)
159 {
160 	replace_with_bool(token, token_defined(token));
161 }
162 
expand_line(struct token * token)163 static void expand_line(struct token *token)
164 {
165 	replace_with_integer(token, token->pos.line);
166 }
167 
expand_file(struct token * token)168 static void expand_file(struct token *token)
169 {
170 	replace_with_string(token, stream_name(token->pos.stream));
171 }
172 
expand_basefile(struct token * token)173 static void expand_basefile(struct token *token)
174 {
175 	replace_with_string(token, base_filename);
176 }
177 
178 static time_t t = 0;
expand_date(struct token * token)179 static void expand_date(struct token *token)
180 {
181 	static char buffer[12]; /* __DATE__: 3 + ' ' + 2 + ' ' + 4 + '\0' */
182 
183 	if (!t)
184 		time(&t);
185 	strftime(buffer, 12, "%b %e %Y", localtime(&t));
186 	replace_with_string(token, buffer);
187 }
188 
expand_time(struct token * token)189 static void expand_time(struct token *token)
190 {
191 	static char buffer[9]; /* __TIME__: 2 + ':' + 2 + ':' + 2 + '\0' */
192 
193 	if (!t)
194 		time(&t);
195 	strftime(buffer, 9, "%T", localtime(&t));
196 	replace_with_string(token, buffer);
197 }
198 
expand_counter(struct token * token)199 static void expand_counter(struct token *token)
200 {
201 	replace_with_integer(token, counter_macro++);
202 }
203 
expand_include_level(struct token * token)204 static void expand_include_level(struct token *token)
205 {
206 	replace_with_integer(token, include_level - 1);
207 }
208 
expand_one_symbol(struct token ** list)209 static int expand_one_symbol(struct token **list)
210 {
211 	struct token *token = *list;
212 	struct symbol *sym;
213 
214 	if (token->pos.noexpand)
215 		return 1;
216 
217 	sym = lookup_macro(token->ident);
218 	if (!sym)
219 		return 1;
220 	if (sym->expand_simple) {
221 		sym->expand_simple(token);
222 		return 1;
223 	} else {
224 		int rc;
225 
226 		sym->used_in = file_scope;
227 		expanding = 1;
228 		rc = expand(list, sym);
229 		expanding = 0;
230 		return rc;
231 	}
232 }
233 
scan_next(struct token ** where)234 static inline struct token *scan_next(struct token **where)
235 {
236 	struct token *token = *where;
237 	if (token_type(token) != TOKEN_UNTAINT)
238 		return token;
239 	do {
240 		token->ident->tainted = 0;
241 		token = token->next;
242 	} while (token_type(token) == TOKEN_UNTAINT);
243 	*where = token;
244 	return token;
245 }
246 
expand_list(struct token ** list)247 static void expand_list(struct token **list)
248 {
249 	struct token *next;
250 	while (!eof_token(next = scan_next(list))) {
251 		if (token_type(next) != TOKEN_IDENT || expand_one_symbol(list))
252 			list = &next->next;
253 	}
254 }
255 
256 static void preprocessor_line(struct stream *stream, struct token **line);
257 
collect_arg(struct token * prev,int vararg,struct position * pos,int count)258 static struct token *collect_arg(struct token *prev, int vararg, struct position *pos, int count)
259 {
260 	struct stream *stream = input_streams + prev->pos.stream;
261 	struct token **p = &prev->next;
262 	struct token *next;
263 	int nesting = 0;
264 
265 	while (!eof_token(next = scan_next(p))) {
266 		if (next->pos.newline && match_op(next, '#')) {
267 			if (!next->pos.noexpand) {
268 				preprocessor_line(stream, p);
269 				__free_token(next);	/* Free the '#' token */
270 				continue;
271 			}
272 		}
273 		switch (token_type(next)) {
274 		case TOKEN_STREAMEND:
275 		case TOKEN_STREAMBEGIN:
276 			*p = &eof_token_entry;
277 			return next;
278 		case TOKEN_STRING:
279 		case TOKEN_WIDE_STRING:
280 			if (count > 1)
281 				next->string->immutable = 1;
282 			break;
283 		}
284 		if (false_nesting) {
285 			*p = next->next;
286 			__free_token(next);
287 			continue;
288 		}
289 		if (match_op(next, '(')) {
290 			nesting++;
291 		} else if (match_op(next, ')')) {
292 			if (!nesting--)
293 				break;
294 		} else if (match_op(next, ',') && !nesting && !vararg) {
295 			break;
296 		}
297 		next->pos.stream = pos->stream;
298 		next->pos.line = pos->line;
299 		next->pos.pos = pos->pos;
300 		next->pos.newline = 0;
301 		p = &next->next;
302 	}
303 	*p = &eof_token_entry;
304 	return next;
305 }
306 
307 /*
308  * We store arglist as <counter> [arg1] <number of uses for arg1> ... eof
309  */
310 
311 struct arg {
312 	struct token *arg;
313 	struct token *expanded;
314 	struct token *str;
315 	int n_normal;
316 	int n_quoted;
317 	int n_str;
318 };
319 
collect_arguments(struct token * start,struct token * arglist,struct arg * args,struct token * what)320 static int collect_arguments(struct token *start, struct token *arglist, struct arg *args, struct token *what)
321 {
322 	int wanted = arglist->count.normal;
323 	struct token *next = NULL;
324 	int count = 0;
325 
326 	arglist = arglist->next;	/* skip counter */
327 
328 	if (!wanted) {
329 		next = collect_arg(start, 0, &what->pos, 0);
330 		if (eof_token(next))
331 			goto Eclosing;
332 		if (!eof_token(start->next) || !match_op(next, ')')) {
333 			count++;
334 			goto Emany;
335 		}
336 	} else {
337 		for (count = 0; count < wanted; count++) {
338 			struct argcount *p = &arglist->next->count;
339 			next = collect_arg(start, p->vararg, &what->pos, p->normal);
340 			if (eof_token(next))
341 				goto Eclosing;
342 			if (p->vararg && wanted == 1 && eof_token(start->next))
343 				break;
344 			arglist = arglist->next->next;
345 			args[count].arg = start->next;
346 			args[count].n_normal = p->normal;
347 			args[count].n_quoted = p->quoted;
348 			args[count].n_str = p->str;
349 			if (match_op(next, ')')) {
350 				count++;
351 				break;
352 			}
353 			start = next;
354 		}
355 		if (count == wanted && !match_op(next, ')'))
356 			goto Emany;
357 		if (count == wanted - 1) {
358 			struct argcount *p = &arglist->next->count;
359 			if (!p->vararg)
360 				goto Efew;
361 			args[count].arg = NULL;
362 			args[count].n_normal = p->normal;
363 			args[count].n_quoted = p->quoted;
364 			args[count].n_str = p->str;
365 		}
366 		if (count < wanted - 1)
367 			goto Efew;
368 	}
369 	what->next = next->next;
370 	return 1;
371 
372 Efew:
373 	sparse_error(what->pos, "macro \"%s\" requires %d arguments, but only %d given",
374 		show_token(what), wanted, count);
375 	goto out;
376 Emany:
377 	while (match_op(next, ',')) {
378 		next = collect_arg(next, 0, &what->pos, 0);
379 		count++;
380 	}
381 	if (eof_token(next))
382 		goto Eclosing;
383 	sparse_error(what->pos, "macro \"%s\" passed %d arguments, but takes just %d",
384 		show_token(what), count, wanted);
385 	goto out;
386 Eclosing:
387 	sparse_error(what->pos, "unterminated argument list invoking macro \"%s\"",
388 		show_token(what));
389 out:
390 	what->next = next->next;
391 	return 0;
392 }
393 
dup_list(struct token * list)394 static struct token *dup_list(struct token *list)
395 {
396 	struct token *res = NULL;
397 	struct token **p = &res;
398 
399 	while (!eof_token(list)) {
400 		struct token *newtok = __alloc_token(0);
401 		*newtok = *list;
402 		*p = newtok;
403 		p = &newtok->next;
404 		list = list->next;
405 	}
406 	return res;
407 }
408 
show_token_sequence(struct token * token,int quote)409 static const char *show_token_sequence(struct token *token, int quote)
410 {
411 	static char buffer[MAX_STRING];
412 	char *ptr = buffer;
413 	int whitespace = 0;
414 
415 	if (!token && !quote)
416 		return "<none>";
417 	while (!eof_token(token)) {
418 		const char *val = quote ? quote_token(token) : show_token(token);
419 		int len = strlen(val);
420 
421 		if (ptr + whitespace + len >= buffer + sizeof(buffer)) {
422 			sparse_error(token->pos, "too long token expansion");
423 			break;
424 		}
425 
426 		if (whitespace)
427 			*ptr++ = ' ';
428 		memcpy(ptr, val, len);
429 		ptr += len;
430 		token = token->next;
431 		whitespace = token->pos.whitespace;
432 	}
433 	*ptr = 0;
434 	return buffer;
435 }
436 
stringify(struct token * arg)437 static struct token *stringify(struct token *arg)
438 {
439 	const char *s = show_token_sequence(arg, 1);
440 	int size = strlen(s)+1;
441 	struct token *token = __alloc_token(0);
442 	struct string *string = __alloc_string(size);
443 
444 	memcpy(string->data, s, size);
445 	string->length = size;
446 	token->pos = arg->pos;
447 	token_type(token) = TOKEN_STRING;
448 	token->string = string;
449 	token->next = &eof_token_entry;
450 	return token;
451 }
452 
expand_arguments(int count,struct arg * args)453 static void expand_arguments(int count, struct arg *args)
454 {
455 	int i;
456 	for (i = 0; i < count; i++) {
457 		struct token *arg = args[i].arg;
458 		if (!arg)
459 			arg = &eof_token_entry;
460 		if (args[i].n_str)
461 			args[i].str = stringify(arg);
462 		if (args[i].n_normal) {
463 			if (!args[i].n_quoted) {
464 				args[i].expanded = arg;
465 				args[i].arg = NULL;
466 			} else if (eof_token(arg)) {
467 				args[i].expanded = arg;
468 			} else {
469 				args[i].expanded = dup_list(arg);
470 			}
471 			expand_list(&args[i].expanded);
472 		}
473 	}
474 }
475 
476 /*
477  * Possibly valid combinations:
478  *  - ident + ident -> ident
479  *  - ident + number -> ident unless number contains '.', '+' or '-'.
480  *  - 'L' + char constant -> wide char constant
481  *  - 'L' + string literal -> wide string literal
482  *  - number + number -> number
483  *  - number + ident -> number
484  *  - number + '.' -> number
485  *  - number + '+' or '-' -> number, if number used to end on [eEpP].
486  *  - '.' + number -> number, if number used to start with a digit.
487  *  - special + special -> either special or an error.
488  */
combine(struct token * left,struct token * right,char * p)489 static enum token_type combine(struct token *left, struct token *right, char *p)
490 {
491 	int len;
492 	enum token_type t1 = token_type(left), t2 = token_type(right);
493 
494 	if (t1 != TOKEN_IDENT && t1 != TOKEN_NUMBER && t1 != TOKEN_SPECIAL)
495 		return TOKEN_ERROR;
496 
497 	if (t1 == TOKEN_IDENT && left->ident == &L_ident) {
498 		if (t2 >= TOKEN_CHAR && t2 < TOKEN_WIDE_CHAR)
499 			return t2 + TOKEN_WIDE_CHAR - TOKEN_CHAR;
500 		if (t2 == TOKEN_STRING)
501 			return TOKEN_WIDE_STRING;
502 	}
503 
504 	if (t2 != TOKEN_IDENT && t2 != TOKEN_NUMBER && t2 != TOKEN_SPECIAL)
505 		return TOKEN_ERROR;
506 
507 	strcpy(p, show_token(left));
508 	strcat(p, show_token(right));
509 	len = strlen(p);
510 
511 	if (len >= 256)
512 		return TOKEN_ERROR;
513 
514 	if (t1 == TOKEN_IDENT) {
515 		if (t2 == TOKEN_SPECIAL)
516 			return TOKEN_ERROR;
517 		if (t2 == TOKEN_NUMBER && strpbrk(p, "+-."))
518 			return TOKEN_ERROR;
519 		return TOKEN_IDENT;
520 	}
521 
522 	if (t1 == TOKEN_NUMBER) {
523 		if (t2 == TOKEN_SPECIAL) {
524 			switch (right->special) {
525 			case '.':
526 				break;
527 			case '+': case '-':
528 				if (strchr("eEpP", p[len - 2]))
529 					break;
530 			default:
531 				return TOKEN_ERROR;
532 			}
533 		}
534 		return TOKEN_NUMBER;
535 	}
536 
537 	if (p[0] == '.' && isdigit((unsigned char)p[1]))
538 		return TOKEN_NUMBER;
539 
540 	return TOKEN_SPECIAL;
541 }
542 
merge(struct token * left,struct token * right)543 static int merge(struct token *left, struct token *right)
544 {
545 	static char buffer[512];
546 	enum token_type res = combine(left, right, buffer);
547 	int n;
548 
549 	switch (res) {
550 	case TOKEN_IDENT:
551 		left->ident = built_in_ident(buffer);
552 		left->pos.noexpand = 0;
553 		return 1;
554 
555 	case TOKEN_NUMBER:
556 		token_type(left) = TOKEN_NUMBER;	/* could be . + num */
557 		left->number = xstrdup(buffer);
558 		return 1;
559 
560 	case TOKEN_SPECIAL:
561 		if (buffer[2] && buffer[3])
562 			break;
563 		for (n = SPECIAL_BASE; n < SPECIAL_ARG_SEPARATOR; n++) {
564 			if (!memcmp(buffer, combinations[n-SPECIAL_BASE], 3)) {
565 				left->special = n;
566 				return 1;
567 			}
568 		}
569 		break;
570 
571 	case TOKEN_WIDE_CHAR:
572 	case TOKEN_WIDE_STRING:
573 		token_type(left) = res;
574 		left->pos.noexpand = 0;
575 		left->string = right->string;
576 		return 1;
577 
578 	case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3:
579 		token_type(left) = res;
580 		left->pos.noexpand = 0;
581 		memcpy(left->embedded, right->embedded, 4);
582 		return 1;
583 
584 	default:
585 		;
586 	}
587 	sparse_error(left->pos, "'##' failed: concatenation is not a valid token");
588 	return 0;
589 }
590 
dup_token(struct token * token,struct position * streampos)591 static struct token *dup_token(struct token *token, struct position *streampos)
592 {
593 	struct token *alloc = alloc_token(streampos);
594 	token_type(alloc) = token_type(token);
595 	alloc->pos.newline = token->pos.newline;
596 	alloc->pos.whitespace = token->pos.whitespace;
597 	alloc->number = token->number;
598 	alloc->pos.noexpand = token->pos.noexpand;
599 	return alloc;
600 }
601 
copy(struct token ** where,struct token * list,int * count)602 static struct token **copy(struct token **where, struct token *list, int *count)
603 {
604 	int need_copy = --*count;
605 	while (!eof_token(list)) {
606 		struct token *token;
607 		if (need_copy)
608 			token = dup_token(list, &list->pos);
609 		else
610 			token = list;
611 		if (token_type(token) == TOKEN_IDENT && token->ident->tainted)
612 			token->pos.noexpand = 1;
613 		*where = token;
614 		where = &token->next;
615 		list = list->next;
616 	}
617 	*where = &eof_token_entry;
618 	return where;
619 }
620 
handle_kludge(struct token ** p,struct arg * args)621 static int handle_kludge(struct token **p, struct arg *args)
622 {
623 	struct token *t = (*p)->next->next;
624 	while (1) {
625 		struct arg *v = &args[t->argnum];
626 		if (token_type(t->next) != TOKEN_CONCAT) {
627 			if (v->arg) {
628 				/* ignore the first ## */
629 				*p = (*p)->next;
630 				return 0;
631 			}
632 			/* skip the entire thing */
633 			*p = t;
634 			return 1;
635 		}
636 		if (v->arg && !eof_token(v->arg))
637 			return 0; /* no magic */
638 		t = t->next->next;
639 	}
640 }
641 
substitute(struct token ** list,struct token * body,struct arg * args)642 static struct token **substitute(struct token **list, struct token *body, struct arg *args)
643 {
644 	struct position *base_pos = &(*list)->pos;
645 	int *count;
646 	enum {Normal, Placeholder, Concat} state = Normal;
647 
648 	for (; !eof_token(body); body = body->next) {
649 		struct token *added, *arg;
650 		struct token **tail;
651 		struct token *t;
652 
653 		switch (token_type(body)) {
654 		case TOKEN_GNU_KLUDGE:
655 			/*
656 			 * GNU kludge: if we had <comma>##<vararg>, behaviour
657 			 * depends on whether we had enough arguments to have
658 			 * a vararg.  If we did, ## is just ignored.  Otherwise
659 			 * both , and ## are ignored.  Worse, there can be
660 			 * an arbitrary number of ##<arg> in between; if all of
661 			 * those are empty, we act as if they hadn't been there,
662 			 * otherwise we act as if the kludge didn't exist.
663 			 */
664 			t = body;
665 			if (handle_kludge(&body, args)) {
666 				if (state == Concat)
667 					state = Normal;
668 				else
669 					state = Placeholder;
670 				continue;
671 			}
672 			added = dup_token(t, base_pos);
673 			token_type(added) = TOKEN_SPECIAL;
674 			tail = &added->next;
675 			break;
676 
677 		case TOKEN_STR_ARGUMENT:
678 			arg = args[body->argnum].str;
679 			count = &args[body->argnum].n_str;
680 			goto copy_arg;
681 
682 		case TOKEN_QUOTED_ARGUMENT:
683 			arg = args[body->argnum].arg;
684 			count = &args[body->argnum].n_quoted;
685 			if (!arg || eof_token(arg)) {
686 				if (state == Concat)
687 					state = Normal;
688 				else
689 					state = Placeholder;
690 				continue;
691 			}
692 			goto copy_arg;
693 
694 		case TOKEN_MACRO_ARGUMENT:
695 			arg = args[body->argnum].expanded;
696 			count = &args[body->argnum].n_normal;
697 			if (eof_token(arg)) {
698 				state = Normal;
699 				continue;
700 			}
701 		copy_arg:
702 			tail = copy(&added, arg, count);
703 			added->pos.newline = body->pos.newline;
704 			added->pos.whitespace = body->pos.whitespace;
705 			break;
706 
707 		case TOKEN_CONCAT:
708 			if (state == Placeholder)
709 				state = Normal;
710 			else
711 				state = Concat;
712 			continue;
713 
714 		case TOKEN_IDENT:
715 			added = dup_token(body, base_pos);
716 			if (added->ident->tainted)
717 				added->pos.noexpand = 1;
718 			tail = &added->next;
719 			break;
720 
721 		default:
722 			added = dup_token(body, base_pos);
723 			tail = &added->next;
724 			break;
725 		}
726 
727 		/*
728 		 * if we got to doing real concatenation, we already have
729 		 * added something into the list, so containing_token() is OK.
730 		 */
731 		if (state == Concat && merge(containing_token(list), added)) {
732 			*list = added->next;
733 			if (tail != &added->next)
734 				list = tail;
735 		} else {
736 			*list = added;
737 			list = tail;
738 		}
739 		state = Normal;
740 	}
741 	*list = &eof_token_entry;
742 	return list;
743 }
744 
expand(struct token ** list,struct symbol * sym)745 static int expand(struct token **list, struct symbol *sym)
746 {
747 	struct token *last;
748 	struct token *token = *list;
749 	struct ident *expanding = token->ident;
750 	struct token **tail;
751 	struct token *expansion = sym->expansion;
752 	int nargs = sym->arglist ? sym->arglist->count.normal : 0;
753 	struct arg args[nargs];
754 
755 	if (expanding->tainted) {
756 		token->pos.noexpand = 1;
757 		return 1;
758 	}
759 
760 	if (sym->arglist) {
761 		if (!match_op(scan_next(&token->next), '('))
762 			return 1;
763 		if (!collect_arguments(token->next, sym->arglist, args, token))
764 			return 1;
765 		expand_arguments(nargs, args);
766 	}
767 
768 	if (sym->expand)
769 		return sym->expand(token, args) ? 0 : 1;
770 
771 	expanding->tainted = 1;
772 
773 	last = token->next;
774 	tail = substitute(list, expansion, args);
775 	/*
776 	 * Note that it won't be eof - at least TOKEN_UNTAINT will be there.
777 	 * We still can lose the newline flag if the sucker expands to nothing,
778 	 * but the price of dealing with that is probably too high (we'd need
779 	 * to collect the flags during scan_next())
780 	 */
781 	(*list)->pos.newline = token->pos.newline;
782 	(*list)->pos.whitespace = token->pos.whitespace;
783 	*tail = last;
784 
785 	return 0;
786 }
787 
token_name_sequence(struct token * token,int endop,struct token * start)788 static const char *token_name_sequence(struct token *token, int endop, struct token *start)
789 {
790 	static char buffer[256];
791 	char *ptr = buffer;
792 
793 	while (!eof_token(token) && !match_op(token, endop)) {
794 		int len;
795 		const char *val = token->string->data;
796 		if (token_type(token) != TOKEN_STRING)
797 			val = show_token(token);
798 		len = strlen(val);
799 		memcpy(ptr, val, len);
800 		ptr += len;
801 		token = token->next;
802 	}
803 	*ptr = 0;
804 	if (endop && !match_op(token, endop))
805 		sparse_error(start->pos, "expected '>' at end of filename");
806 	return buffer;
807 }
808 
already_tokenized(const char * path)809 static int already_tokenized(const char *path)
810 {
811 	int stream, next;
812 
813 	for (stream = *hash_stream(path); stream >= 0 ; stream = next) {
814 		struct stream *s = input_streams + stream;
815 
816 		next = s->next_stream;
817 		if (s->once) {
818 			if (strcmp(path, s->name))
819 				continue;
820 			return 1;
821 		}
822 		if (s->constant != CONSTANT_FILE_YES)
823 			continue;
824 		if (strcmp(path, s->name))
825 			continue;
826 		if (s->protect && !lookup_macro(s->protect))
827 			continue;
828 		return 1;
829 	}
830 	return 0;
831 }
832 
833 /* Handle include of header files.
834  * The relevant options are made compatible with gcc. The only options that
835  * are not supported is -withprefix and friends.
836  *
837  * Three set of include paths are known:
838  * quote_includepath:	Path to search when using #include "file.h"
839  * angle_includepath:	Paths to search when using #include <file.h>
840  * isys_includepath:	Paths specified with -isystem, come before the
841  *			built-in system include paths. Gcc would suppress
842  *			warnings from system headers. Here we separate
843  *			them from the angle_ ones to keep search ordering.
844  *
845  * sys_includepath:	Built-in include paths.
846  * dirafter_includepath Paths added with -dirafter.
847  *
848  * The above is implemented as one array with pointers
849  *                         +--------------+
850  * quote_includepath --->  |              |
851  *                         +--------------+
852  *                         |              |
853  *                         +--------------+
854  * angle_includepath --->  |              |
855  *                         +--------------+
856  * isys_includepath  --->  |              |
857  *                         +--------------+
858  * sys_includepath   --->  |              |
859  *                         +--------------+
860  * dirafter_includepath -> |              |
861  *                         +--------------+
862  *
863  * -I dir insert dir just before isys_includepath and move the rest
864  * -I- makes all dirs specified with -I before to quote dirs only and
865  *   angle_includepath is set equal to isys_includepath.
866  * -nostdinc removes all sys dirs by storing NULL in entry pointed
867  *   to by * sys_includepath. Note that this will reset all dirs built-in
868  *   and added before -nostdinc by -isystem and -idirafter.
869  * -isystem dir adds dir where isys_includepath points adding this dir as
870  *   first systemdir
871  * -idirafter dir adds dir to the end of the list
872  */
873 
set_stream_include_path(struct stream * stream)874 static void set_stream_include_path(struct stream *stream)
875 {
876 	const char *path = stream->path;
877 	if (!path) {
878 		const char *p = strrchr(stream->name, '/');
879 		path = "";
880 		if (p) {
881 			int len = p - stream->name + 1;
882 			char *m = malloc(len+1);
883 			/* This includes the final "/" */
884 			memcpy(m, stream->name, len);
885 			m[len] = 0;
886 			path = m;
887 			/* normalize this path */
888 			while (path[0] == '.' && path[1] == '/') {
889 				path += 2;
890 				while (path[0] == '/')
891 					path++;
892 			}
893 		}
894 		stream->path = path;
895 	}
896 	includepath[0] = path;
897 }
898 
try_include(struct position pos,const char * path,const char * filename,int flen,struct token ** where,const char ** next_path)899 static int try_include(struct position pos, const char *path, const char *filename, int flen, struct token **where, const char **next_path)
900 {
901 	int fd;
902 	int plen = strlen(path);
903 	static char fullname[PATH_MAX];
904 
905 	memcpy(fullname, path, plen);
906 	if (plen && path[plen-1] != '/') {
907 		fullname[plen] = '/';
908 		plen++;
909 	}
910 	memcpy(fullname+plen, filename, flen);
911 	if (already_tokenized(fullname))
912 		return 1;
913 	fd = open(fullname, O_RDONLY);
914 	if (fd >= 0) {
915 		char *streamname = xmemdup(fullname, plen + flen);
916 		*where = tokenize(&pos, streamname, fd, *where, next_path);
917 		close(fd);
918 		return 1;
919 	}
920 	return 0;
921 }
922 
do_include_path(const char ** pptr,struct token ** list,struct token * token,const char * filename,int flen)923 static int do_include_path(const char **pptr, struct token **list, struct token *token, const char *filename, int flen)
924 {
925 	const char *path;
926 
927 	while ((path = *pptr++) != NULL) {
928 		if (!try_include(token->pos, path, filename, flen, list, pptr))
929 			continue;
930 		return 1;
931 	}
932 	return 0;
933 }
934 
free_preprocessor_line(struct token * token)935 static int free_preprocessor_line(struct token *token)
936 {
937 	while (token_type(token) != TOKEN_EOF) {
938 		struct token *free = token;
939 		token = token->next;
940 		__free_token(free);
941 	};
942 	return 1;
943 }
944 
handle_include_path(struct stream * stream,struct token ** list,struct token * token,int how)945 static int handle_include_path(struct stream *stream, struct token **list, struct token *token, int how)
946 {
947 	const char *filename;
948 	struct token *next;
949 	const char **path;
950 	int expect;
951 	int flen;
952 
953 	next = token->next;
954 	expect = '>';
955 	if (!match_op(next, '<')) {
956 		expand_list(&token->next);
957 		expect = 0;
958 		next = token;
959 		if (match_op(token->next, '<')) {
960 			next = token->next;
961 			expect = '>';
962 		}
963 	}
964 
965 	token = next->next;
966 	filename = token_name_sequence(token, expect, token);
967 	flen = strlen(filename) + 1;
968 
969 	/* Absolute path? */
970 	if (filename[0] == '/') {
971 		if (try_include(token->pos, "", filename, flen, list, includepath))
972 			return 0;
973 		goto out;
974 	}
975 
976 	switch (how) {
977 	case 1:
978 		path = stream->next_path;
979 		break;
980 	case 2:
981 		includepath[0] = "";
982 		path = includepath;
983 		break;
984 	default:
985 		/* Dir of input file is first dir to search for quoted includes */
986 		set_stream_include_path(stream);
987 		path = expect ? angle_includepath : quote_includepath;
988 		break;
989 	}
990 	/* Check the standard include paths.. */
991 	if (do_include_path(path, list, token, filename, flen))
992 		return 0;
993 out:
994 	error_die(token->pos, "unable to open '%s'", filename);
995 }
996 
handle_include(struct stream * stream,struct token ** list,struct token * token)997 static int handle_include(struct stream *stream, struct token **list, struct token *token)
998 {
999 	return handle_include_path(stream, list, token, 0);
1000 }
1001 
handle_include_next(struct stream * stream,struct token ** list,struct token * token)1002 static int handle_include_next(struct stream *stream, struct token **list, struct token *token)
1003 {
1004 	return handle_include_path(stream, list, token, 1);
1005 }
1006 
handle_argv_include(struct stream * stream,struct token ** list,struct token * token)1007 static int handle_argv_include(struct stream *stream, struct token **list, struct token *token)
1008 {
1009 	return handle_include_path(stream, list, token, 2);
1010 }
1011 
token_different(struct token * t1,struct token * t2)1012 static int token_different(struct token *t1, struct token *t2)
1013 {
1014 	int different;
1015 
1016 	if (token_type(t1) != token_type(t2))
1017 		return 1;
1018 
1019 	switch (token_type(t1)) {
1020 	case TOKEN_IDENT:
1021 		different = t1->ident != t2->ident;
1022 		break;
1023 	case TOKEN_ARG_COUNT:
1024 	case TOKEN_UNTAINT:
1025 	case TOKEN_CONCAT:
1026 	case TOKEN_GNU_KLUDGE:
1027 		different = 0;
1028 		break;
1029 	case TOKEN_NUMBER:
1030 		different = strcmp(t1->number, t2->number);
1031 		break;
1032 	case TOKEN_SPECIAL:
1033 		different = t1->special != t2->special;
1034 		break;
1035 	case TOKEN_MACRO_ARGUMENT:
1036 	case TOKEN_QUOTED_ARGUMENT:
1037 	case TOKEN_STR_ARGUMENT:
1038 		different = t1->argnum != t2->argnum;
1039 		break;
1040 	case TOKEN_CHAR_EMBEDDED_0 ... TOKEN_CHAR_EMBEDDED_3:
1041 	case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3:
1042 		different = memcmp(t1->embedded, t2->embedded, 4);
1043 		break;
1044 	case TOKEN_CHAR:
1045 	case TOKEN_WIDE_CHAR:
1046 	case TOKEN_STRING:
1047 	case TOKEN_WIDE_STRING: {
1048 		struct string *s1, *s2;
1049 
1050 		s1 = t1->string;
1051 		s2 = t2->string;
1052 		different = 1;
1053 		if (s1->length != s2->length)
1054 			break;
1055 		different = memcmp(s1->data, s2->data, s1->length);
1056 		break;
1057 	}
1058 	default:
1059 		different = 1;
1060 		break;
1061 	}
1062 	return different;
1063 }
1064 
token_list_different(struct token * list1,struct token * list2)1065 static int token_list_different(struct token *list1, struct token *list2)
1066 {
1067 	for (;;) {
1068 		if (list1 == list2)
1069 			return 0;
1070 		if (!list1 || !list2)
1071 			return 1;
1072 		if (token_different(list1, list2))
1073 			return 1;
1074 		list1 = list1->next;
1075 		list2 = list2->next;
1076 	}
1077 }
1078 
set_arg_count(struct token * token)1079 static inline void set_arg_count(struct token *token)
1080 {
1081 	token_type(token) = TOKEN_ARG_COUNT;
1082 	token->count.normal = token->count.quoted =
1083 	token->count.str = token->count.vararg = 0;
1084 }
1085 
parse_arguments(struct token * list)1086 static struct token *parse_arguments(struct token *list)
1087 {
1088 	struct token *arg = list->next, *next = list;
1089 	struct argcount *count = &list->count;
1090 
1091 	set_arg_count(list);
1092 
1093 	if (match_op(arg, ')')) {
1094 		next = arg->next;
1095 		list->next = &eof_token_entry;
1096 		return next;
1097 	}
1098 
1099 	while (token_type(arg) == TOKEN_IDENT) {
1100 		if (arg->ident == &__VA_ARGS___ident)
1101 			goto Eva_args;
1102 		if (!++count->normal)
1103 			goto Eargs;
1104 		next = arg->next;
1105 
1106 		if (match_op(next, ',')) {
1107 			set_arg_count(next);
1108 			arg = next->next;
1109 			continue;
1110 		}
1111 
1112 		if (match_op(next, ')')) {
1113 			set_arg_count(next);
1114 			next = next->next;
1115 			arg->next->next = &eof_token_entry;
1116 			return next;
1117 		}
1118 
1119 		/* normal cases are finished here */
1120 
1121 		if (match_op(next, SPECIAL_ELLIPSIS)) {
1122 			if (match_op(next->next, ')')) {
1123 				set_arg_count(next);
1124 				next->count.vararg = 1;
1125 				next = next->next;
1126 				arg->next->next = &eof_token_entry;
1127 				return next->next;
1128 			}
1129 
1130 			arg = next;
1131 			goto Enotclosed;
1132 		}
1133 
1134 		if (eof_token(next)) {
1135 			goto Enotclosed;
1136 		} else {
1137 			arg = next;
1138 			goto Ebadstuff;
1139 		}
1140 	}
1141 
1142 	if (match_op(arg, SPECIAL_ELLIPSIS)) {
1143 		next = arg->next;
1144 		token_type(arg) = TOKEN_IDENT;
1145 		arg->ident = &__VA_ARGS___ident;
1146 		if (!match_op(next, ')'))
1147 			goto Enotclosed;
1148 		if (!++count->normal)
1149 			goto Eargs;
1150 		set_arg_count(next);
1151 		next->count.vararg = 1;
1152 		next = next->next;
1153 		arg->next->next = &eof_token_entry;
1154 		return next;
1155 	}
1156 
1157 	if (eof_token(arg)) {
1158 		arg = next;
1159 		goto Enotclosed;
1160 	}
1161 	if (match_op(arg, ','))
1162 		goto Emissing;
1163 	else
1164 		goto Ebadstuff;
1165 
1166 
1167 Emissing:
1168 	sparse_error(arg->pos, "parameter name missing");
1169 	return NULL;
1170 Ebadstuff:
1171 	sparse_error(arg->pos, "\"%s\" may not appear in macro parameter list",
1172 		show_token(arg));
1173 	return NULL;
1174 Enotclosed:
1175 	sparse_error(arg->pos, "missing ')' in macro parameter list");
1176 	return NULL;
1177 Eva_args:
1178 	sparse_error(arg->pos, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
1179 	return NULL;
1180 Eargs:
1181 	sparse_error(arg->pos, "too many arguments in macro definition");
1182 	return NULL;
1183 }
1184 
try_arg(struct token * token,enum token_type type,struct token * arglist)1185 static int try_arg(struct token *token, enum token_type type, struct token *arglist)
1186 {
1187 	struct ident *ident = token->ident;
1188 	int nr;
1189 
1190 	if (!arglist || token_type(token) != TOKEN_IDENT)
1191 		return 0;
1192 
1193 	arglist = arglist->next;
1194 
1195 	for (nr = 0; !eof_token(arglist); nr++, arglist = arglist->next->next) {
1196 		if (arglist->ident == ident) {
1197 			struct argcount *count = &arglist->next->count;
1198 			int n;
1199 
1200 			token->argnum = nr;
1201 			token_type(token) = type;
1202 			switch (type) {
1203 			case TOKEN_MACRO_ARGUMENT:
1204 				n = ++count->normal;
1205 				break;
1206 			case TOKEN_QUOTED_ARGUMENT:
1207 				n = ++count->quoted;
1208 				break;
1209 			default:
1210 				n = ++count->str;
1211 			}
1212 			if (n)
1213 				return count->vararg ? 2 : 1;
1214 			/*
1215 			 * XXX - need saner handling of that
1216 			 * (>= 1024 instances of argument)
1217 			 */
1218 			token_type(token) = TOKEN_ERROR;
1219 			return -1;
1220 		}
1221 	}
1222 	return 0;
1223 }
1224 
handle_hash(struct token ** p,struct token * arglist)1225 static struct token *handle_hash(struct token **p, struct token *arglist)
1226 {
1227 	struct token *token = *p;
1228 	if (arglist) {
1229 		struct token *next = token->next;
1230 		if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
1231 			goto Equote;
1232 		next->pos.whitespace = token->pos.whitespace;
1233 		__free_token(token);
1234 		token = *p = next;
1235 	} else {
1236 		token->pos.noexpand = 1;
1237 	}
1238 	return token;
1239 
1240 Equote:
1241 	sparse_error(token->pos, "'#' is not followed by a macro parameter");
1242 	return NULL;
1243 }
1244 
1245 /* token->next is ## */
handle_hashhash(struct token * token,struct token * arglist)1246 static struct token *handle_hashhash(struct token *token, struct token *arglist)
1247 {
1248 	struct token *last = token;
1249 	struct token *concat;
1250 	int state = match_op(token, ',');
1251 
1252 	try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist);
1253 
1254 	while (1) {
1255 		struct token *t;
1256 		int is_arg;
1257 
1258 		/* eat duplicate ## */
1259 		concat = token->next;
1260 		while (match_op(t = concat->next, SPECIAL_HASHHASH)) {
1261 			token->next = t;
1262 			__free_token(concat);
1263 			concat = t;
1264 		}
1265 		token_type(concat) = TOKEN_CONCAT;
1266 
1267 		if (eof_token(t))
1268 			goto Econcat;
1269 
1270 		if (match_op(t, '#')) {
1271 			t = handle_hash(&concat->next, arglist);
1272 			if (!t)
1273 				return NULL;
1274 		}
1275 
1276 		is_arg = try_arg(t, TOKEN_QUOTED_ARGUMENT, arglist);
1277 
1278 		if (state == 1 && is_arg) {
1279 			state = is_arg;
1280 		} else {
1281 			last = t;
1282 			state = match_op(t, ',');
1283 		}
1284 
1285 		token = t;
1286 		if (!match_op(token->next, SPECIAL_HASHHASH))
1287 			break;
1288 	}
1289 	/* handle GNU ,##__VA_ARGS__ kludge, in all its weirdness */
1290 	if (state == 2)
1291 		token_type(last) = TOKEN_GNU_KLUDGE;
1292 	return token;
1293 
1294 Econcat:
1295 	sparse_error(concat->pos, "'##' cannot appear at the ends of macro expansion");
1296 	return NULL;
1297 }
1298 
parse_expansion(struct token * expansion,struct token * arglist,struct ident * name)1299 static struct token *parse_expansion(struct token *expansion, struct token *arglist, struct ident *name)
1300 {
1301 	struct token *token = expansion;
1302 	struct token **p;
1303 
1304 	if (match_op(token, SPECIAL_HASHHASH))
1305 		goto Econcat;
1306 
1307 	for (p = &expansion; !eof_token(token); p = &token->next, token = *p) {
1308 		if (match_op(token, '#')) {
1309 			token = handle_hash(p, arglist);
1310 			if (!token)
1311 				return NULL;
1312 		}
1313 		if (match_op(token->next, SPECIAL_HASHHASH)) {
1314 			token = handle_hashhash(token, arglist);
1315 			if (!token)
1316 				return NULL;
1317 		} else {
1318 			try_arg(token, TOKEN_MACRO_ARGUMENT, arglist);
1319 		}
1320 		switch (token_type(token)) {
1321 		case TOKEN_ERROR:
1322 			goto Earg;
1323 
1324 		case TOKEN_STRING:
1325 		case TOKEN_WIDE_STRING:
1326 			token->string->immutable = 1;
1327 			break;
1328 		}
1329 	}
1330 	token = alloc_token(&expansion->pos);
1331 	token_type(token) = TOKEN_UNTAINT;
1332 	token->ident = name;
1333 	token->next = *p;
1334 	*p = token;
1335 	return expansion;
1336 
1337 Econcat:
1338 	sparse_error(token->pos, "'##' cannot appear at the ends of macro expansion");
1339 	return NULL;
1340 Earg:
1341 	sparse_error(token->pos, "too many instances of argument in body");
1342 	return NULL;
1343 }
1344 
do_define(struct position pos,struct token * token,struct ident * name,struct token * arglist,struct token * expansion,int attr)1345 static int do_define(struct position pos, struct token *token, struct ident *name,
1346 		     struct token *arglist, struct token *expansion, int attr)
1347 {
1348 	struct symbol *sym;
1349 	int ret = 1;
1350 
1351 	expansion = parse_expansion(expansion, arglist, name);
1352 	if (!expansion)
1353 		return 1;
1354 
1355 	sym = lookup_symbol(name, NS_MACRO | NS_UNDEF);
1356 	if (sym) {
1357 		int clean;
1358 
1359 		if (attr < sym->attr)
1360 			goto out;
1361 
1362 		clean = (attr == sym->attr && sym->namespace == NS_MACRO);
1363 
1364 		if (token_list_different(sym->expansion, expansion) ||
1365 		    token_list_different(sym->arglist, arglist)) {
1366 			ret = 0;
1367 			if ((clean && attr == SYM_ATTR_NORMAL)
1368 					|| sym->used_in == file_scope) {
1369 				warning(pos, "preprocessor token %.*s redefined",
1370 						name->len, name->name);
1371 				info(sym->pos, "this was the original definition");
1372 			}
1373 		} else if (clean)
1374 			goto out;
1375 	}
1376 
1377 	if (!sym || sym->scope != file_scope) {
1378 		sym = alloc_symbol(pos, SYM_NODE);
1379 		bind_symbol(sym, name, NS_MACRO);
1380 		add_ident(&macros, name);
1381 		ret = 0;
1382 	}
1383 
1384 	if (!ret) {
1385 		sym->expansion = expansion;
1386 		sym->arglist = arglist;
1387 		if (token) /* Free the "define" token, but not the rest of the line */
1388 			__free_token(token);
1389 	}
1390 
1391 	sym->namespace = NS_MACRO;
1392 	sym->used_in = NULL;
1393 	sym->attr = attr;
1394 out:
1395 	return ret;
1396 }
1397 
1398 ///
1399 // predefine a macro with a printf-formatted value
1400 // @name: the name of the macro
1401 // @weak: 0/1 for a normal or a weak define
1402 // @fmt: the printf format followed by it's arguments.
1403 //
1404 // The type of the value is automatically infered:
1405 // TOKEN_NUMBER if it starts by a digit, TOKEN_IDENT otherwise.
1406 // If @fmt is null or empty, the macro is defined with an empty definition.
predefine(const char * name,int weak,const char * fmt,...)1407 void predefine(const char *name, int weak, const char *fmt, ...)
1408 {
1409 	struct ident *ident = built_in_ident(name);
1410 	struct token *value = &eof_token_entry;
1411 	int attr = weak ? SYM_ATTR_WEAK : SYM_ATTR_NORMAL;
1412 
1413 	if (fmt && fmt[0]) {
1414 		static char buf[256];
1415 		va_list ap;
1416 
1417 		va_start(ap, fmt);
1418 		vsnprintf(buf, sizeof(buf), fmt, ap);
1419 		va_end(ap);
1420 
1421 		value = __alloc_token(0);
1422 		if (isdigit((unsigned char)buf[0])) {
1423 			token_type(value) = TOKEN_NUMBER;
1424 			value->number = xstrdup(buf);
1425 		} else {
1426 			token_type(value) = TOKEN_IDENT;
1427 			value->ident = built_in_ident(buf);
1428 		}
1429 		value->pos.whitespace = 1;
1430 		value->next = &eof_token_entry;
1431 	}
1432 
1433 	do_define(value->pos, NULL, ident, NULL, value, attr);
1434 }
1435 
1436 ///
1437 // like predefine() but only if one of the non-standard dialect is chosen
predefine_nostd(const char * name)1438 void predefine_nostd(const char *name)
1439 {
1440 	if ((standard & STANDARD_GNU) || (standard == STANDARD_NONE))
1441 		predefine(name, 1, "1");
1442 }
1443 
predefine_fmt(const char * fmt,int weak,va_list ap)1444 static void predefine_fmt(const char *fmt, int weak, va_list ap)
1445 {
1446 	static char buf[256];
1447 
1448 	vsnprintf(buf, sizeof(buf), fmt, ap);
1449 	predefine(buf, weak, "1");
1450 }
1451 
predefine_strong(const char * fmt,...)1452 void predefine_strong(const char *fmt, ...)
1453 {
1454 	va_list ap;
1455 
1456 	va_start(ap, fmt);
1457 	predefine_fmt(fmt, 0, ap);
1458 	va_end(ap);
1459 }
1460 
predefine_weak(const char * fmt,...)1461 void predefine_weak(const char *fmt, ...)
1462 {
1463 	va_list ap;
1464 
1465 	va_start(ap, fmt);
1466 	predefine_fmt(fmt, 1, ap);
1467 	va_end(ap);
1468 }
1469 
do_handle_define(struct stream * stream,struct token ** line,struct token * token,int attr)1470 static int do_handle_define(struct stream *stream, struct token **line, struct token *token, int attr)
1471 {
1472 	struct token *arglist, *expansion;
1473 	struct token *left = token->next;
1474 	struct ident *name;
1475 
1476 	if (token_type(left) != TOKEN_IDENT) {
1477 		sparse_error(token->pos, "expected identifier to 'define'");
1478 		return 1;
1479 	}
1480 
1481 	name = left->ident;
1482 
1483 	arglist = NULL;
1484 	expansion = left->next;
1485 	if (!expansion->pos.whitespace) {
1486 		if (match_op(expansion, '(')) {
1487 			arglist = expansion;
1488 			expansion = parse_arguments(expansion);
1489 			if (!expansion)
1490 				return 1;
1491 		} else if (!eof_token(expansion)) {
1492 			warning(expansion->pos,
1493 				"no whitespace before object-like macro body");
1494 		}
1495 	}
1496 
1497 	return do_define(left->pos, token, name, arglist, expansion, attr);
1498 }
1499 
handle_define(struct stream * stream,struct token ** line,struct token * token)1500 static int handle_define(struct stream *stream, struct token **line, struct token *token)
1501 {
1502 	return do_handle_define(stream, line, token, SYM_ATTR_NORMAL);
1503 }
1504 
handle_weak_define(struct stream * stream,struct token ** line,struct token * token)1505 static int handle_weak_define(struct stream *stream, struct token **line, struct token *token)
1506 {
1507 	return do_handle_define(stream, line, token, SYM_ATTR_WEAK);
1508 }
1509 
handle_strong_define(struct stream * stream,struct token ** line,struct token * token)1510 static int handle_strong_define(struct stream *stream, struct token **line, struct token *token)
1511 {
1512 	return do_handle_define(stream, line, token, SYM_ATTR_STRONG);
1513 }
1514 
do_handle_undef(struct stream * stream,struct token ** line,struct token * token,int attr)1515 static int do_handle_undef(struct stream *stream, struct token **line, struct token *token, int attr)
1516 {
1517 	struct token *left = token->next;
1518 	struct symbol *sym;
1519 
1520 	if (token_type(left) != TOKEN_IDENT) {
1521 		sparse_error(token->pos, "expected identifier to 'undef'");
1522 		return 1;
1523 	}
1524 
1525 	sym = lookup_symbol(left->ident, NS_MACRO | NS_UNDEF);
1526 	if (sym) {
1527 		if (attr < sym->attr)
1528 			return 1;
1529 		if (attr == sym->attr && sym->namespace == NS_UNDEF)
1530 			return 1;
1531 	} else if (attr <= SYM_ATTR_NORMAL)
1532 		return 1;
1533 
1534 	if (!sym || sym->scope != file_scope) {
1535 		sym = alloc_symbol(left->pos, SYM_NODE);
1536 		bind_symbol(sym, left->ident, NS_MACRO);
1537 	}
1538 
1539 	sym->namespace = NS_UNDEF;
1540 	sym->used_in = NULL;
1541 	sym->attr = attr;
1542 
1543 	return 1;
1544 }
1545 
handle_undef(struct stream * stream,struct token ** line,struct token * token)1546 static int handle_undef(struct stream *stream, struct token **line, struct token *token)
1547 {
1548 	return do_handle_undef(stream, line, token, SYM_ATTR_NORMAL);
1549 }
1550 
handle_strong_undef(struct stream * stream,struct token ** line,struct token * token)1551 static int handle_strong_undef(struct stream *stream, struct token **line, struct token *token)
1552 {
1553 	return do_handle_undef(stream, line, token, SYM_ATTR_STRONG);
1554 }
1555 
preprocessor_if(struct stream * stream,struct token * token,int cond)1556 static int preprocessor_if(struct stream *stream, struct token *token, int cond)
1557 {
1558 	token_type(token) = false_nesting ? TOKEN_SKIP_GROUPS : TOKEN_IF;
1559 	free_preprocessor_line(token->next);
1560 	token->next = stream->top_if;
1561 	stream->top_if = token;
1562 	if (false_nesting || cond != 1)
1563 		false_nesting++;
1564 	return 0;
1565 }
1566 
handle_ifdef(struct stream * stream,struct token ** line,struct token * token)1567 static int handle_ifdef(struct stream *stream, struct token **line, struct token *token)
1568 {
1569 	struct token *next = token->next;
1570 	int arg;
1571 	if (token_type(next) == TOKEN_IDENT) {
1572 		arg = token_defined(next);
1573 	} else {
1574 		dirty_stream(stream);
1575 		if (!false_nesting)
1576 			sparse_error(token->pos, "expected preprocessor identifier");
1577 		arg = -1;
1578 	}
1579 	return preprocessor_if(stream, token, arg);
1580 }
1581 
handle_ifndef(struct stream * stream,struct token ** line,struct token * token)1582 static int handle_ifndef(struct stream *stream, struct token **line, struct token *token)
1583 {
1584 	struct token *next = token->next;
1585 	int arg;
1586 	if (token_type(next) == TOKEN_IDENT) {
1587 		if (!stream->dirty && !stream->ifndef) {
1588 			if (!stream->protect) {
1589 				stream->ifndef = token;
1590 				stream->protect = next->ident;
1591 			} else if (stream->protect == next->ident) {
1592 				stream->ifndef = token;
1593 				stream->dirty = 1;
1594 			}
1595 		}
1596 		arg = !token_defined(next);
1597 	} else {
1598 		dirty_stream(stream);
1599 		if (!false_nesting)
1600 			sparse_error(token->pos, "expected preprocessor identifier");
1601 		arg = -1;
1602 	}
1603 
1604 	return preprocessor_if(stream, token, arg);
1605 }
1606 
1607 /*
1608  * Expression handling for #if and #elif; it differs from normal expansion
1609  * due to special treatment of "defined".
1610  */
expression_value(struct token ** where)1611 static int expression_value(struct token **where)
1612 {
1613 	struct expression *expr;
1614 	struct token *p;
1615 	struct token **list = where, **beginning = NULL;
1616 	long long value;
1617 	int state = 0;
1618 
1619 	while (!eof_token(p = scan_next(list))) {
1620 		switch (state) {
1621 		case 0:
1622 			if (token_type(p) != TOKEN_IDENT)
1623 				break;
1624 			if (p->ident == &defined_ident) {
1625 				state = 1;
1626 				beginning = list;
1627 				break;
1628 			}
1629 			if (!expand_one_symbol(list))
1630 				continue;
1631 			if (token_type(p) != TOKEN_IDENT)
1632 				break;
1633 			token_type(p) = TOKEN_ZERO_IDENT;
1634 			break;
1635 		case 1:
1636 			if (match_op(p, '(')) {
1637 				state = 2;
1638 			} else {
1639 				state = 0;
1640 				replace_with_defined(p);
1641 				*beginning = p;
1642 			}
1643 			break;
1644 		case 2:
1645 			if (token_type(p) == TOKEN_IDENT)
1646 				state = 3;
1647 			else
1648 				state = 0;
1649 			replace_with_defined(p);
1650 			*beginning = p;
1651 			break;
1652 		case 3:
1653 			state = 0;
1654 			if (!match_op(p, ')'))
1655 				sparse_error(p->pos, "missing ')' after \"defined\"");
1656 			*list = p->next;
1657 			continue;
1658 		}
1659 		list = &p->next;
1660 	}
1661 
1662 	p = constant_expression(*where, &expr);
1663 	if (!eof_token(p))
1664 		sparse_error(p->pos, "garbage at end: %s", show_token_sequence(p, 0));
1665 	value = get_expression_value(expr);
1666 	return value != 0;
1667 }
1668 
handle_if(struct stream * stream,struct token ** line,struct token * token)1669 static int handle_if(struct stream *stream, struct token **line, struct token *token)
1670 {
1671 	int value = 0;
1672 	if (!false_nesting)
1673 		value = expression_value(&token->next);
1674 
1675 	dirty_stream(stream);
1676 	return preprocessor_if(stream, token, value);
1677 }
1678 
handle_elif(struct stream * stream,struct token ** line,struct token * token)1679 static int handle_elif(struct stream * stream, struct token **line, struct token *token)
1680 {
1681 	struct token *top_if = stream->top_if;
1682 	end_group(stream);
1683 
1684 	if (!top_if) {
1685 		nesting_error(stream);
1686 		sparse_error(token->pos, "unmatched #elif within stream");
1687 		return 1;
1688 	}
1689 
1690 	if (token_type(top_if) == TOKEN_ELSE) {
1691 		nesting_error(stream);
1692 		sparse_error(token->pos, "#elif after #else");
1693 		if (!false_nesting)
1694 			false_nesting = 1;
1695 		return 1;
1696 	}
1697 
1698 	dirty_stream(stream);
1699 	if (token_type(top_if) != TOKEN_IF)
1700 		return 1;
1701 	if (false_nesting) {
1702 		false_nesting = 0;
1703 		if (!expression_value(&token->next))
1704 			false_nesting = 1;
1705 	} else {
1706 		false_nesting = 1;
1707 		token_type(top_if) = TOKEN_SKIP_GROUPS;
1708 	}
1709 	return 1;
1710 }
1711 
handle_else(struct stream * stream,struct token ** line,struct token * token)1712 static int handle_else(struct stream *stream, struct token **line, struct token *token)
1713 {
1714 	struct token *top_if = stream->top_if;
1715 	end_group(stream);
1716 
1717 	if (!top_if) {
1718 		nesting_error(stream);
1719 		sparse_error(token->pos, "unmatched #else within stream");
1720 		return 1;
1721 	}
1722 
1723 	if (token_type(top_if) == TOKEN_ELSE) {
1724 		nesting_error(stream);
1725 		sparse_error(token->pos, "#else after #else");
1726 	}
1727 	if (false_nesting) {
1728 		if (token_type(top_if) == TOKEN_IF)
1729 			false_nesting = 0;
1730 	} else {
1731 		false_nesting = 1;
1732 	}
1733 	token_type(top_if) = TOKEN_ELSE;
1734 	return 1;
1735 }
1736 
handle_endif(struct stream * stream,struct token ** line,struct token * token)1737 static int handle_endif(struct stream *stream, struct token **line, struct token *token)
1738 {
1739 	struct token *top_if = stream->top_if;
1740 	end_group(stream);
1741 	if (!top_if) {
1742 		nesting_error(stream);
1743 		sparse_error(token->pos, "unmatched #endif in stream");
1744 		return 1;
1745 	}
1746 	if (false_nesting)
1747 		false_nesting--;
1748 	stream->top_if = top_if->next;
1749 	__free_token(top_if);
1750 	return 1;
1751 }
1752 
handle_warning(struct stream * stream,struct token ** line,struct token * token)1753 static int handle_warning(struct stream *stream, struct token **line, struct token *token)
1754 {
1755 	warning(token->pos, "%s", show_token_sequence(token->next, 0));
1756 	return 1;
1757 }
1758 
handle_error(struct stream * stream,struct token ** line,struct token * token)1759 static int handle_error(struct stream *stream, struct token **line, struct token *token)
1760 {
1761 	sparse_error(token->pos, "%s", show_token_sequence(token->next, 0));
1762 	return 1;
1763 }
1764 
handle_nostdinc(struct stream * stream,struct token ** line,struct token * token)1765 static int handle_nostdinc(struct stream *stream, struct token **line, struct token *token)
1766 {
1767 	/*
1768 	 * Do we have any non-system includes?
1769 	 * Clear them out if so..
1770 	 */
1771 	*sys_includepath = NULL;
1772 	return 1;
1773 }
1774 
update_inc_ptrs(const char *** where)1775 static inline void update_inc_ptrs(const char ***where)
1776 {
1777 
1778 	if (*where <= dirafter_includepath) {
1779 		dirafter_includepath++;
1780 		/* If this was the entry that we prepend, don't
1781 		 * rise the lower entries, even if they are at
1782 		 * the same level. */
1783 		if (where == &dirafter_includepath)
1784 			return;
1785 	}
1786 	if (*where <= sys_includepath) {
1787 		sys_includepath++;
1788 		if (where == &sys_includepath)
1789 			return;
1790 	}
1791 	if (*where <= isys_includepath) {
1792 		isys_includepath++;
1793 		if (where == &isys_includepath)
1794 			return;
1795 	}
1796 
1797 	/* angle_includepath is actually never updated, since we
1798 	 * don't suppport -iquote rught now. May change some day. */
1799 	if (*where <= angle_includepath) {
1800 		angle_includepath++;
1801 		if (where == &angle_includepath)
1802 			return;
1803 	}
1804 }
1805 
1806 /* Add a path before 'where' and update the pointers associated with the
1807  * includepath array */
add_path_entry(struct token * token,const char * path,const char *** where)1808 static void add_path_entry(struct token *token, const char *path,
1809 	const char ***where)
1810 {
1811 	const char **dst;
1812 	const char *next;
1813 
1814 	/* Need one free entry.. */
1815 	if (includepath[INCLUDEPATHS-2])
1816 		error_die(token->pos, "too many include path entries");
1817 
1818 	/* check that this is not a duplicate */
1819 	dst = includepath;
1820 	while (*dst) {
1821 		if (strcmp(*dst, path) == 0)
1822 			return;
1823 		dst++;
1824 	}
1825 	next = path;
1826 	dst = *where;
1827 
1828 	update_inc_ptrs(where);
1829 
1830 	/*
1831 	 * Move them all up starting at dst,
1832 	 * insert the new entry..
1833 	 */
1834 	do {
1835 		const char *tmp = *dst;
1836 		*dst = next;
1837 		next = tmp;
1838 		dst++;
1839 	} while (next);
1840 }
1841 
handle_add_include(struct stream * stream,struct token ** line,struct token * token)1842 static int handle_add_include(struct stream *stream, struct token **line, struct token *token)
1843 {
1844 	for (;;) {
1845 		token = token->next;
1846 		if (eof_token(token))
1847 			return 1;
1848 		if (token_type(token) != TOKEN_STRING) {
1849 			warning(token->pos, "expected path string");
1850 			return 1;
1851 		}
1852 		add_path_entry(token, token->string->data, &isys_includepath);
1853 	}
1854 }
1855 
handle_add_isystem(struct stream * stream,struct token ** line,struct token * token)1856 static int handle_add_isystem(struct stream *stream, struct token **line, struct token *token)
1857 {
1858 	for (;;) {
1859 		token = token->next;
1860 		if (eof_token(token))
1861 			return 1;
1862 		if (token_type(token) != TOKEN_STRING) {
1863 			sparse_error(token->pos, "expected path string");
1864 			return 1;
1865 		}
1866 		add_path_entry(token, token->string->data, &sys_includepath);
1867 	}
1868 }
1869 
handle_add_system(struct stream * stream,struct token ** line,struct token * token)1870 static int handle_add_system(struct stream *stream, struct token **line, struct token *token)
1871 {
1872 	for (;;) {
1873 		token = token->next;
1874 		if (eof_token(token))
1875 			return 1;
1876 		if (token_type(token) != TOKEN_STRING) {
1877 			sparse_error(token->pos, "expected path string");
1878 			return 1;
1879 		}
1880 		add_path_entry(token, token->string->data, &dirafter_includepath);
1881 	}
1882 }
1883 
1884 /* Add to end on includepath list - no pointer updates */
add_dirafter_entry(struct token * token,const char * path)1885 static void add_dirafter_entry(struct token *token, const char *path)
1886 {
1887 	const char **dst = includepath;
1888 
1889 	/* Need one free entry.. */
1890 	if (includepath[INCLUDEPATHS-2])
1891 		error_die(token->pos, "too many include path entries");
1892 
1893 	/* Add to the end */
1894 	while (*dst)
1895 		dst++;
1896 	*dst = path;
1897 	dst++;
1898 	*dst = NULL;
1899 }
1900 
handle_add_dirafter(struct stream * stream,struct token ** line,struct token * token)1901 static int handle_add_dirafter(struct stream *stream, struct token **line, struct token *token)
1902 {
1903 	for (;;) {
1904 		token = token->next;
1905 		if (eof_token(token))
1906 			return 1;
1907 		if (token_type(token) != TOKEN_STRING) {
1908 			sparse_error(token->pos, "expected path string");
1909 			return 1;
1910 		}
1911 		add_dirafter_entry(token, token->string->data);
1912 	}
1913 }
1914 
handle_split_include(struct stream * stream,struct token ** line,struct token * token)1915 static int handle_split_include(struct stream *stream, struct token **line, struct token *token)
1916 {
1917 	/*
1918 	 * -I-
1919 	 *  From info gcc:
1920 	 *  Split the include path.  Any directories specified with `-I'
1921 	 *  options before `-I-' are searched only for headers requested with
1922 	 *  `#include "FILE"'; they are not searched for `#include <FILE>'.
1923 	 *  If additional directories are specified with `-I' options after
1924 	 *  the `-I-', those directories are searched for all `#include'
1925 	 *  directives.
1926 	 *  In addition, `-I-' inhibits the use of the directory of the current
1927 	 *  file directory as the first search directory for `#include "FILE"'.
1928 	 */
1929 	quote_includepath = includepath+1;
1930 	angle_includepath = sys_includepath;
1931 	return 1;
1932 }
1933 
1934 /*
1935  * We replace "#pragma xxx" with "__pragma__" in the token
1936  * stream. Just as an example.
1937  *
1938  * We'll just #define that away for now, but the theory here
1939  * is that we can use this to insert arbitrary token sequences
1940  * to turn the pragmas into internal front-end sequences for
1941  * when we actually start caring about them.
1942  *
1943  * So eventually this will turn into some kind of extended
1944  * __attribute__() like thing, except called __pragma__(xxx).
1945  */
handle_pragma(struct stream * stream,struct token ** line,struct token * token)1946 static int handle_pragma(struct stream *stream, struct token **line, struct token *token)
1947 {
1948 	struct token *next = *line;
1949 
1950 	if (match_ident(token->next, &once_ident) && eof_token(token->next->next)) {
1951 		stream->once = 1;
1952 		return 1;
1953 	}
1954 	token->ident = &pragma_ident;
1955 	token->pos.newline = 1;
1956 	token->pos.whitespace = 1;
1957 	token->pos.pos = 1;
1958 	*line = token;
1959 	token->next = next;
1960 	return 0;
1961 }
1962 
1963 /*
1964  * We ignore #line for now.
1965  */
handle_line(struct stream * stream,struct token ** line,struct token * token)1966 static int handle_line(struct stream *stream, struct token **line, struct token *token)
1967 {
1968 	return 1;
1969 }
1970 
handle_ident(struct stream * stream,struct token ** line,struct token * token)1971 static int handle_ident(struct stream *stream, struct token **line, struct token *token)
1972 {
1973 	return 1;
1974 }
1975 
handle_nondirective(struct stream * stream,struct token ** line,struct token * token)1976 static int handle_nondirective(struct stream *stream, struct token **line, struct token *token)
1977 {
1978 	sparse_error(token->pos, "unrecognized preprocessor line '%s'", show_token_sequence(token, 0));
1979 	return 1;
1980 }
1981 
expand_has_attribute(struct token * token,struct arg * args)1982 static bool expand_has_attribute(struct token *token, struct arg *args)
1983 {
1984 	struct token *arg = args[0].expanded;
1985 	struct symbol *sym;
1986 
1987 	if (token_type(arg) != TOKEN_IDENT) {
1988 		sparse_error(arg->pos, "identifier expected");
1989 		return false;
1990 	}
1991 
1992 	sym = lookup_symbol(arg->ident, NS_KEYWORD);
1993 	replace_with_bool(token, sym && sym->op && sym->op->attribute);
1994 	return true;
1995 }
1996 
expand_has_builtin(struct token * token,struct arg * args)1997 static bool expand_has_builtin(struct token *token, struct arg *args)
1998 {
1999 	struct token *arg = args[0].expanded;
2000 	struct symbol *sym;
2001 
2002 	if (token_type(arg) != TOKEN_IDENT) {
2003 		sparse_error(arg->pos, "identifier expected");
2004 		return false;
2005 	}
2006 
2007 	sym = lookup_symbol(arg->ident, NS_SYMBOL);
2008 	replace_with_bool(token, sym && sym->builtin);
2009 	return true;
2010 }
2011 
expand_has_extension(struct token * token,struct arg * args)2012 static bool expand_has_extension(struct token *token, struct arg *args)
2013 {
2014 	struct token *arg = args[0].expanded;
2015 	struct ident *ident;
2016 	bool val = false;
2017 
2018 	if (token_type(arg) != TOKEN_IDENT) {
2019 		sparse_error(arg->pos, "identifier expected");
2020 		return false;
2021 	}
2022 
2023 	ident = arg->ident;
2024 	if (ident == &c_alignas_ident)
2025 		val = true;
2026 	else if (ident == &c_alignof_ident)
2027 		val = true;
2028 	else if (ident == &c_generic_selections_ident)
2029 		val = true;
2030 	else if (ident == &c_static_assert_ident)
2031 		val = true;
2032 
2033 	replace_with_bool(token, val);
2034 	return 1;
2035 }
2036 
expand_has_feature(struct token * token,struct arg * args)2037 static bool expand_has_feature(struct token *token, struct arg *args)
2038 {
2039 	struct token *arg = args[0].expanded;
2040 	struct ident *ident;
2041 	bool val = false;
2042 
2043 	if (token_type(arg) != TOKEN_IDENT) {
2044 		sparse_error(arg->pos, "identifier expected");
2045 		return false;
2046 	}
2047 
2048 	ident = arg->ident;
2049 	if (standard >= STANDARD_C11) {
2050 		if (ident == &c_alignas_ident)
2051 			val = true;
2052 		else if (ident == &c_alignof_ident)
2053 			val = true;
2054 		else if (ident == &c_generic_selections_ident)
2055 			val = true;
2056 		else if (ident == &c_static_assert_ident)
2057 			val = true;
2058 	}
2059 
2060 	replace_with_bool(token, val);
2061 	return 1;
2062 }
2063 
create_arglist(struct symbol * sym,int count)2064 static void create_arglist(struct symbol *sym, int count)
2065 {
2066 	struct token *token;
2067 	struct token **next;
2068 
2069 	if (!count)
2070 		return;
2071 
2072 	token = __alloc_token(0);
2073 	token_type(token) = TOKEN_ARG_COUNT;
2074 	token->count.normal = count;
2075 	sym->arglist = token;
2076 	next = &token->next;
2077 
2078 	while (count--) {
2079 		struct token *id, *uses;
2080 		id = __alloc_token(0);
2081 		token_type(id) = TOKEN_IDENT;
2082 		uses = __alloc_token(0);
2083 		token_type(uses) = TOKEN_ARG_COUNT;
2084 		uses->count.normal = 1;
2085 
2086 		*next = id;
2087 		id->next = uses;
2088 		next = &uses->next;
2089 	}
2090 	*next = &eof_token_entry;
2091 }
2092 
init_preprocessor(void)2093 static void init_preprocessor(void)
2094 {
2095 	int i;
2096 	int stream = init_stream(NULL, "preprocessor", -1, includepath);
2097 	static struct {
2098 		const char *name;
2099 		int (*handler)(struct stream *, struct token **, struct token *);
2100 	} normal[] = {
2101 		{ "define",		handle_define },
2102 		{ "weak_define",	handle_weak_define },
2103 		{ "strong_define",	handle_strong_define },
2104 		{ "undef",		handle_undef },
2105 		{ "strong_undef",	handle_strong_undef },
2106 		{ "warning",		handle_warning },
2107 		{ "error",		handle_error },
2108 		{ "include",		handle_include },
2109 		{ "include_next",	handle_include_next },
2110 		{ "pragma",		handle_pragma },
2111 		{ "line",		handle_line },
2112 		{ "ident",		handle_ident },
2113 
2114 		// our internal preprocessor tokens
2115 		{ "nostdinc",	   handle_nostdinc },
2116 		{ "add_include",   handle_add_include },
2117 		{ "add_isystem",   handle_add_isystem },
2118 		{ "add_system",    handle_add_system },
2119 		{ "add_dirafter",  handle_add_dirafter },
2120 		{ "split_include", handle_split_include },
2121 		{ "argv_include",  handle_argv_include },
2122 	}, special[] = {
2123 		{ "ifdef",	handle_ifdef },
2124 		{ "ifndef",	handle_ifndef },
2125 		{ "else",	handle_else },
2126 		{ "endif",	handle_endif },
2127 		{ "if",		handle_if },
2128 		{ "elif",	handle_elif },
2129 	};
2130 	static struct {
2131 		const char *name;
2132 		void (*expand_simple)(struct token *);
2133 		bool (*expand)(struct token *, struct arg *args);
2134 	} dynamic[] = {
2135 		{ "__LINE__",		expand_line },
2136 		{ "__FILE__",		expand_file },
2137 		{ "__BASE_FILE__",	expand_basefile },
2138 		{ "__DATE__",		expand_date },
2139 		{ "__TIME__",		expand_time },
2140 		{ "__COUNTER__",	expand_counter },
2141 		{ "__INCLUDE_LEVEL__",	expand_include_level },
2142 		{ "__has_attribute",	NULL, expand_has_attribute },
2143 		{ "__has_builtin",	NULL, expand_has_builtin },
2144 		{ "__has_extension",	NULL, expand_has_extension },
2145 		{ "__has_feature",	NULL, expand_has_feature },
2146 	};
2147 
2148 	for (i = 0; i < ARRAY_SIZE(normal); i++) {
2149 		struct symbol *sym;
2150 		sym = create_symbol(stream, normal[i].name, SYM_PREPROCESSOR, NS_PREPROCESSOR);
2151 		sym->handler = normal[i].handler;
2152 		sym->normal = 1;
2153 	}
2154 	for (i = 0; i < ARRAY_SIZE(special); i++) {
2155 		struct symbol *sym;
2156 		sym = create_symbol(stream, special[i].name, SYM_PREPROCESSOR, NS_PREPROCESSOR);
2157 		sym->handler = special[i].handler;
2158 		sym->normal = 0;
2159 	}
2160 	for (i = 0; i < ARRAY_SIZE(dynamic); i++) {
2161 		struct symbol *sym;
2162 		sym = create_symbol(stream, dynamic[i].name, SYM_NODE, NS_MACRO);
2163 		sym->expand_simple = dynamic[i].expand_simple;
2164 		if ((sym->expand = dynamic[i].expand) != NULL)
2165 			create_arglist(sym, 1);
2166 	}
2167 
2168 	counter_macro = 0;
2169 }
2170 
handle_preprocessor_line(struct stream * stream,struct token ** line,struct token * start)2171 static void handle_preprocessor_line(struct stream *stream, struct token **line, struct token *start)
2172 {
2173 	int (*handler)(struct stream *, struct token **, struct token *);
2174 	struct token *token = start->next;
2175 	int is_normal = 1;
2176 	int is_cond = 0;	// is one of {is,ifdef,ifndef,elif,else,endif}
2177 
2178 	if (eof_token(token))
2179 		return;
2180 
2181 	if (token_type(token) == TOKEN_IDENT) {
2182 		struct symbol *sym = lookup_symbol(token->ident, NS_PREPROCESSOR);
2183 		if (sym) {
2184 			handler = sym->handler;
2185 			is_normal = sym->normal;
2186 			is_cond = !sym->normal;
2187 		} else {
2188 			handler = handle_nondirective;
2189 		}
2190 	} else if (token_type(token) == TOKEN_NUMBER) {
2191 		handler = handle_line;
2192 	} else {
2193 		handler = handle_nondirective;
2194 	}
2195 
2196 	if (is_normal) {
2197 		dirty_stream(stream);
2198 		if (false_nesting)
2199 			goto out;
2200 	}
2201 
2202 	if (expanding) {
2203 		if (!is_cond || Wpedantic)
2204 			warning(start->pos, "directive in macro's argument list");
2205 	}
2206 	if (!handler(stream, line, token))	/* all set */
2207 		return;
2208 
2209 out:
2210 	free_preprocessor_line(token);
2211 }
2212 
preprocessor_line(struct stream * stream,struct token ** line)2213 static void preprocessor_line(struct stream *stream, struct token **line)
2214 {
2215 	struct token *start = *line, *next;
2216 	struct token **tp = &start->next;
2217 
2218 	for (;;) {
2219 		next = *tp;
2220 		if (next->pos.newline)
2221 			break;
2222 		tp = &next->next;
2223 	}
2224 	*line = next;
2225 	*tp = &eof_token_entry;
2226 	handle_preprocessor_line(stream, line, start);
2227 }
2228 
do_preprocess(struct token ** list)2229 static void do_preprocess(struct token **list)
2230 {
2231 	struct token *next;
2232 
2233 	while (!eof_token(next = scan_next(list))) {
2234 		struct stream *stream = input_streams + next->pos.stream;
2235 
2236 		if (next->pos.newline && match_op(next, '#')) {
2237 			if (!next->pos.noexpand) {
2238 				preprocessor_line(stream, list);
2239 				__free_token(next);	/* Free the '#' token */
2240 				continue;
2241 			}
2242 		}
2243 
2244 		switch (token_type(next)) {
2245 		case TOKEN_STREAMEND:
2246 			if (stream->top_if) {
2247 				nesting_error(stream);
2248 				sparse_error(stream->top_if->pos, "unterminated preprocessor conditional");
2249 				stream->top_if = NULL;
2250 				false_nesting = 0;
2251 			}
2252 			if (!stream->dirty)
2253 				stream->constant = CONSTANT_FILE_YES;
2254 			*list = next->next;
2255 			include_level--;
2256 			continue;
2257 		case TOKEN_STREAMBEGIN:
2258 			*list = next->next;
2259 			include_level++;
2260 			continue;
2261 
2262 		default:
2263 			dirty_stream(stream);
2264 			if (false_nesting) {
2265 				*list = next->next;
2266 				__free_token(next);
2267 				continue;
2268 			}
2269 
2270 			if (token_type(next) != TOKEN_IDENT ||
2271 			    expand_one_symbol(list))
2272 				list = &next->next;
2273 		}
2274 	}
2275 }
2276 
preprocess(struct token * token)2277 struct token * preprocess(struct token *token)
2278 {
2279 	preprocessing = 1;
2280 	init_preprocessor();
2281 	do_preprocess(&token);
2282 
2283 	// Drop all expressions from preprocessing, they're not used any more.
2284 	// This is not true when we have multiple files, though ;/
2285 	// clear_expression_alloc();
2286 	preprocessing = 0;
2287 
2288 	return token;
2289 }
2290 
is_VA_ARGS_token(struct token * token)2291 static int is_VA_ARGS_token(struct token *token)
2292 {
2293 	return (token_type(token) == TOKEN_IDENT) &&
2294 		(token->ident == &__VA_ARGS___ident);
2295 }
2296 
dump_macro(struct symbol * sym)2297 static void dump_macro(struct symbol *sym)
2298 {
2299 	int nargs = sym->arglist ? sym->arglist->count.normal : 0;
2300 	struct token *args[nargs];
2301 	struct token *token;
2302 
2303 	printf("#define %s", show_ident(sym->ident));
2304 	token = sym->arglist;
2305 	if (token) {
2306 		const char *sep = "";
2307 		int narg = 0;
2308 		putchar('(');
2309 		for (; !eof_token(token); token = token->next) {
2310 			if (token_type(token) == TOKEN_ARG_COUNT)
2311 				continue;
2312 			if (is_VA_ARGS_token(token))
2313 				printf("%s...", sep);
2314 			else
2315 				printf("%s%s", sep, show_token(token));
2316 			args[narg++] = token;
2317 			sep = ",";
2318 		}
2319 		putchar(')');
2320 	}
2321 
2322 	token = sym->expansion;
2323 	while (token_type(token) != TOKEN_UNTAINT) {
2324 		struct token *next = token->next;
2325 		if (token->pos.whitespace)
2326 			putchar(' ');
2327 		switch (token_type(token)) {
2328 		case TOKEN_CONCAT:
2329 			printf("##");
2330 			break;
2331 		case TOKEN_STR_ARGUMENT:
2332 			printf("#");
2333 			/* fall-through */
2334 		case TOKEN_QUOTED_ARGUMENT:
2335 		case TOKEN_MACRO_ARGUMENT:
2336 			token = args[token->argnum];
2337 			/* fall-through */
2338 		default:
2339 			printf("%s", show_token(token));
2340 		}
2341 		token = next;
2342 	}
2343 	putchar('\n');
2344 }
2345 
dump_macro_definitions(void)2346 void dump_macro_definitions(void)
2347 {
2348 	struct ident *name;
2349 
2350 	FOR_EACH_PTR(macros, name) {
2351 		struct symbol *sym = lookup_macro(name);
2352 		if (sym)
2353 			dump_macro(sym);
2354 	} END_FOR_EACH_PTR(name);
2355 }
2356