1from collections import namedtuple 2import shlex 3import os 4import re 5 6from ..common import util, info 7 8 9CONTINUATION = '\\' + os.linesep 10 11IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)' 12IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$') 13 14 15def _coerce_str(value): 16 if not value: 17 return '' 18 return str(value).strip() 19 20 21############################# 22# directives 23 24DIRECTIVE_START = r''' 25 (?: 26 ^ \s* 27 [#] \s* 28 )''' 29DIRECTIVE_TEXT = r''' 30 (?: 31 (?: \s+ ( .*\S ) )? 32 \s* $ 33 )''' 34DIRECTIVE = rf''' 35 (?: 36 {DIRECTIVE_START} 37 ( 38 include | 39 error | warning | 40 pragma | 41 define | undef | 42 if | ifdef | ifndef | elseif | else | endif | 43 __FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__ 44 ) 45 {DIRECTIVE_TEXT} 46 )''' 47# (?: 48# [^\\\n] | 49# \\ [^\n] | 50# \\ \n 51# )+ 52# ) \n 53# )''' 54DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE) 55 56DEFINE = rf''' 57 (?: 58 {DIRECTIVE_START} define \s+ 59 (?: 60 ( \w*[a-zA-Z]\w* ) 61 (?: \s* [(] ([^)]*) [)] )? 62 ) 63 {DIRECTIVE_TEXT} 64 )''' 65DEFINE_RE = re.compile(DEFINE, re.VERBOSE) 66 67 68def parse_directive(line): 69 """Return the appropriate directive for the given line.""" 70 line = line.strip() 71 if line.startswith('#'): 72 line = line[1:].lstrip() 73 line = '#' + line 74 directive = line 75 #directive = '#' + line 76 while ' ' in directive: 77 directive = directive.replace(' ', ' ') 78 return _parse_directive(directive) 79 80 81def _parse_directive(line): 82 m = DEFINE_RE.match(line) 83 if m: 84 name, args, text = m.groups() 85 if args: 86 args = [a.strip() for a in args.split(',')] 87 return Macro(name, args, text) 88 else: 89 return Constant(name, text) 90 91 m = DIRECTIVE_RE.match(line) 92 if not m: 93 raise ValueError(f'unsupported directive {line!r}') 94 kind, text = m.groups() 95 if not text: 96 if kind not in ('else', 'endif'): 97 raise ValueError(f'missing text in directive {line!r}') 98 elif kind in ('else', 'endif', 'define'): 99 raise ValueError(f'unexpected text in directive {line!r}') 100 if kind == 'include': 101 directive = Include(text) 102 elif kind in IfDirective.KINDS: 103 directive = IfDirective(kind, text) 104 else: 105 directive = OtherDirective(kind, text) 106 directive.validate() 107 return directive 108 109 110class PreprocessorDirective(util._NTBase): 111 """The base class for directives.""" 112 113 __slots__ = () 114 115 KINDS = frozenset([ 116 'include', 117 'pragma', 118 'error', 'warning', 119 'define', 'undef', 120 'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif', 121 '__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__', 122 ]) 123 124 @property 125 def text(self): 126 return ' '.join(v for v in self[1:] if v and v.strip()) or None 127 128 def validate(self): 129 """Fail if the object is invalid (i.e. init with bad data).""" 130 super().validate() 131 132 if not self.kind: 133 raise TypeError('missing kind') 134 elif self.kind not in self.KINDS: 135 raise ValueError 136 137 # text can be anything, including None. 138 139 140class Constant(PreprocessorDirective, 141 namedtuple('Constant', 'kind name value')): 142 """A single "constant" directive ("define").""" 143 144 __slots__ = () 145 146 def __new__(cls, name, value=None): 147 self = super().__new__( 148 cls, 149 'define', 150 name=_coerce_str(name) or None, 151 value=_coerce_str(value) or None, 152 ) 153 return self 154 155 def validate(self): 156 """Fail if the object is invalid (i.e. init with bad data).""" 157 super().validate() 158 159 if not self.name: 160 raise TypeError('missing name') 161 elif not IDENTIFIER_RE.match(self.name): 162 raise ValueError(f'name must be identifier, got {self.name!r}') 163 164 # value can be anything, including None 165 166 167class Macro(PreprocessorDirective, 168 namedtuple('Macro', 'kind name args body')): 169 """A single "macro" directive ("define").""" 170 171 __slots__ = () 172 173 def __new__(cls, name, args, body=None): 174 # "args" must be a string or an iterable of strings (or "empty"). 175 if isinstance(args, str): 176 args = [v.strip() for v in args.split(',')] 177 if args: 178 args = tuple(_coerce_str(a) or None for a in args) 179 self = super().__new__( 180 cls, 181 kind='define', 182 name=_coerce_str(name) or None, 183 args=args if args else (), 184 body=_coerce_str(body) or None, 185 ) 186 return self 187 188 @property 189 def text(self): 190 if self.body: 191 return f'{self.name}({", ".join(self.args)}) {self.body}' 192 else: 193 return f'{self.name}({", ".join(self.args)})' 194 195 def validate(self): 196 """Fail if the object is invalid (i.e. init with bad data).""" 197 super().validate() 198 199 if not self.name: 200 raise TypeError('missing name') 201 elif not IDENTIFIER_RE.match(self.name): 202 raise ValueError(f'name must be identifier, got {self.name!r}') 203 204 for arg in self.args: 205 if not arg: 206 raise ValueError(f'missing arg in {self.args}') 207 elif not IDENTIFIER_RE.match(arg): 208 raise ValueError(f'arg must be identifier, got {arg!r}') 209 210 # body can be anything, including None 211 212 213class IfDirective(PreprocessorDirective, 214 namedtuple('IfDirective', 'kind condition')): 215 """A single conditional directive (e.g. "if", "ifdef"). 216 217 This only includes directives that actually provide conditions. The 218 related directives "else" and "endif" are covered by OtherDirective 219 instead. 220 """ 221 222 __slots__ = () 223 224 KINDS = frozenset([ 225 'if', 226 'ifdef', 227 'ifndef', 228 'elseif', 229 ]) 230 231 @classmethod 232 def _condition_from_raw(cls, raw, kind): 233 #return Condition.from_raw(raw, _kind=kind) 234 condition = _coerce_str(raw) 235 if not condition: 236 return None 237 238 if kind == 'ifdef': 239 condition = f'defined({condition})' 240 elif kind == 'ifndef': 241 condition = f'! defined({condition})' 242 243 return condition 244 245 def __new__(cls, kind, condition): 246 kind = _coerce_str(kind) 247 self = super().__new__( 248 cls, 249 kind=kind or None, 250 condition=cls._condition_from_raw(condition, kind), 251 ) 252 return self 253 254 @property 255 def text(self): 256 if self.kind == 'ifdef': 257 return self.condition[8:-1] # strip "defined(" 258 elif self.kind == 'ifndef': 259 return self.condition[10:-1] # strip "! defined(" 260 else: 261 return self.condition 262 #return str(self.condition) 263 264 def validate(self): 265 """Fail if the object is invalid (i.e. init with bad data).""" 266 super().validate() 267 268 if not self.condition: 269 raise TypeError('missing condition') 270 #else: 271 # for cond in self.condition: 272 # if not cond: 273 # raise ValueError(f'missing condition in {self.condition}') 274 # cond.validate() 275 # if self.kind in ('ifdef', 'ifndef'): 276 # if len(self.condition) != 1: 277 # raise ValueError('too many condition') 278 # if self.kind == 'ifdef': 279 # if not self.condition[0].startswith('defined '): 280 # raise ValueError('bad condition') 281 # else: 282 # if not self.condition[0].startswith('! defined '): 283 # raise ValueError('bad condition') 284 285 286class Include(PreprocessorDirective, 287 namedtuple('Include', 'kind file')): 288 """A single "include" directive. 289 290 Supported "file" values are either follow the bracket style 291 (<stdio>) or double quotes ("spam.h"). 292 """ 293 294 __slots__ = () 295 296 def __new__(cls, file): 297 self = super().__new__( 298 cls, 299 kind='include', 300 file=_coerce_str(file) or None, 301 ) 302 return self 303 304 def validate(self): 305 """Fail if the object is invalid (i.e. init with bad data).""" 306 super().validate() 307 308 if not self.file: 309 raise TypeError('missing file') 310 311 312class OtherDirective(PreprocessorDirective, 313 namedtuple('OtherDirective', 'kind text')): 314 """A single directive not covered by another class. 315 316 This includes the "else", "endif", and "undef" directives, which are 317 otherwise inherently related to the directives covered by the 318 Constant, Macro, and IfCondition classes. 319 320 Note that all directives must have a text value, except for "else" 321 and "endif" (which must have no text). 322 """ 323 324 __slots__ = () 325 326 KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS 327 328 def __new__(cls, kind, text): 329 self = super().__new__( 330 cls, 331 kind=_coerce_str(kind) or None, 332 text=_coerce_str(text) or None, 333 ) 334 return self 335 336 def validate(self): 337 """Fail if the object is invalid (i.e. init with bad data).""" 338 super().validate() 339 340 if self.text: 341 if self.kind in ('else', 'endif'): 342 raise ValueError('unexpected text in directive') 343 elif self.kind not in ('else', 'endif'): 344 raise TypeError('missing text') 345 346 347############################# 348# iterating lines 349 350def _recompute_conditions(directive, ifstack): 351 if directive.kind in ('if', 'ifdef', 'ifndef'): 352 ifstack.append( 353 ([], directive.condition)) 354 elif directive.kind == 'elseif': 355 if ifstack: 356 negated, active = ifstack.pop() 357 if active: 358 negated.append(active) 359 else: 360 negated = [] 361 ifstack.append( 362 (negated, directive.condition)) 363 elif directive.kind == 'else': 364 if ifstack: 365 negated, active = ifstack.pop() 366 if active: 367 negated.append(active) 368 ifstack.append( 369 (negated, None)) 370 elif directive.kind == 'endif': 371 if ifstack: 372 ifstack.pop() 373 374 conditions = [] 375 for negated, active in ifstack: 376 for condition in negated: 377 conditions.append(f'! ({condition})') 378 if active: 379 conditions.append(active) 380 return tuple(conditions) 381 382 383def _iter_clean_lines(lines): 384 lines = iter(enumerate(lines, 1)) 385 for lno, line in lines: 386 # Handle line continuations. 387 while line.endswith(CONTINUATION): 388 try: 389 lno, _line = next(lines) 390 except StopIteration: 391 break 392 line = line[:-len(CONTINUATION)] + ' ' + _line 393 394 # Deal with comments. 395 after = line 396 line = '' 397 while True: 398 # Look for a comment. 399 before, begin, remainder = after.partition('/*') 400 if '//' in before: 401 before, _, _ = before.partition('//') 402 line += before + ' ' # per the C99 spec 403 break 404 line += before 405 if not begin: 406 break 407 line += ' ' # per the C99 spec 408 409 # Go until we find the end of the comment. 410 _, end, after = remainder.partition('*/') 411 while not end: 412 try: 413 lno, remainder = next(lines) 414 except StopIteration: 415 raise Exception('unterminated comment') 416 _, end, after = remainder.partition('*/') 417 418 yield lno, line 419 420 421def iter_lines(lines, *, 422 _iter_clean_lines=_iter_clean_lines, 423 _parse_directive=_parse_directive, 424 _recompute_conditions=_recompute_conditions, 425 ): 426 """Yield (lno, line, directive, active conditions) for each given line. 427 428 This is effectively a subset of the operations taking place in 429 translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see 430 section 5.1.1.2. Line continuations are removed and comments 431 replaced with a single space. (In both cases "lno" will be the last 432 line involved.) Otherwise each line is returned as-is. 433 434 "lno" is the (1-indexed) line number for the line. 435 436 "directive" will be a PreprocessorDirective or None, depending on 437 whether or not there is a directive on the line. 438 439 "active conditions" is the set of preprocessor conditions (e.g. 440 "defined()") under which the current line of code will be included 441 in compilation. That set is derived from every conditional 442 directive block (e.g. "if defined()", "ifdef", "else") containing 443 that line. That includes nested directives. Note that the 444 current line does not affect the active conditions for iteself. 445 It only impacts subsequent lines. That applies to directives 446 that close blocks (e.g. "endif") just as much as conditional 447 directvies. Also note that "else" and "elseif" directives 448 update the active conditions (for later lines), rather than 449 adding to them. 450 """ 451 ifstack = [] 452 conditions = () 453 for lno, line in _iter_clean_lines(lines): 454 stripped = line.strip() 455 if not stripped.startswith('#'): 456 yield lno, line, None, conditions 457 continue 458 459 directive = '#' + stripped[1:].lstrip() 460 while ' ' in directive: 461 directive = directive.replace(' ', ' ') 462 directive = _parse_directive(directive) 463 yield lno, line, directive, conditions 464 465 if directive.kind in ('else', 'endif'): 466 conditions = _recompute_conditions(directive, ifstack) 467 elif isinstance(directive, IfDirective): 468 conditions = _recompute_conditions(directive, ifstack) 469 470 471############################# 472# running (platform-specific?) 473 474def _gcc(filename, *, 475 _get_argv=(lambda: _get_gcc_argv()), 476 _run=util.run_cmd, 477 ): 478 argv = _get_argv() 479 argv.extend([ 480 '-E', filename, 481 ]) 482 output = _run(argv) 483 return output 484 485 486def _get_gcc_argv(*, 487 _open=open, 488 _run=util.run_cmd, 489 ): 490 with _open('/tmp/print.mk', 'w') as tmpfile: 491 tmpfile.write('print-%:\n') 492 #tmpfile.write('\t@echo $* = $($*)\n') 493 tmpfile.write('\t@echo $($*)\n') 494 argv = ['/usr/bin/make', 495 '-f', 'Makefile', 496 '-f', '/tmp/print.mk', 497 'print-CC', 498 'print-PY_CORE_CFLAGS', 499 ] 500 output = _run(argv) 501 gcc, cflags = output.strip().splitlines() 502 argv = shlex.split(gcc.strip()) 503 cflags = shlex.split(cflags.strip()) 504 return argv + cflags 505 506 507def run(filename, *, 508 _gcc=_gcc, 509 ): 510 """Return the text of the given file after running the preprocessor.""" 511 return _gcc(filename) 512