1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Generates a syntax tree from a Mojo IDL file.""" 6 7import imp 8import os.path 9import sys 10 11def _GetDirAbove(dirname): 12 """Returns the directory "above" this file containing |dirname| (which must 13 also be "above" this file).""" 14 path = os.path.abspath(__file__) 15 while True: 16 path, tail = os.path.split(path) 17 assert tail 18 if tail == dirname: 19 return path 20 21try: 22 imp.find_module("ply") 23except ImportError: 24 sys.path.append(os.path.join(_GetDirAbove("mojo"), "third_party")) 25from ply import lex 26from ply import yacc 27 28from ..error import Error 29from . import ast 30from .lexer import Lexer 31 32 33_MAX_ORDINAL_VALUE = 0xffffffff 34_MAX_ARRAY_SIZE = 0xffffffff 35 36 37class ParseError(Error): 38 """Class for errors from the parser.""" 39 40 def __init__(self, filename, message, lineno=None, snippet=None): 41 Error.__init__(self, filename, message, lineno=lineno, 42 addenda=([snippet] if snippet else None)) 43 44 45# We have methods which look like they could be functions: 46# pylint: disable=R0201 47class Parser(object): 48 49 def __init__(self, lexer, source, filename): 50 self.tokens = lexer.tokens 51 self.source = source 52 self.filename = filename 53 54 # Names of functions 55 # 56 # In general, we name functions after the left-hand-side of the rule(s) that 57 # they handle. E.g., |p_foo_bar| for a rule |foo_bar : ...|. 58 # 59 # There may be multiple functions handling rules for the same left-hand-side; 60 # then we name the functions |p_foo_bar_N| (for left-hand-side |foo_bar|), 61 # where N is a number (numbered starting from 1). Note that using multiple 62 # functions is actually more efficient than having single functions handle 63 # multiple rules (and, e.g., distinguishing them by examining |len(p)|). 64 # 65 # It's also possible to have a function handling multiple rules with different 66 # left-hand-sides. We do not do this. 67 # 68 # See http://www.dabeaz.com/ply/ply.html#ply_nn25 for more details. 69 70 # TODO(vtl): Get rid of the braces in the module "statement". (Consider 71 # renaming "module" -> "package".) Then we'll be able to have a single rule 72 # for root (by making module "optional"). 73 def p_root_1(self, p): 74 """root : """ 75 p[0] = ast.Mojom(None, ast.ImportList(), []) 76 77 def p_root_2(self, p): 78 """root : root module""" 79 if p[1].module is not None: 80 raise ParseError(self.filename, 81 "Multiple \"module\" statements not allowed:", 82 p[2].lineno, snippet=self._GetSnippet(p[2].lineno)) 83 if p[1].import_list.items or p[1].definition_list: 84 raise ParseError( 85 self.filename, 86 "\"module\" statements must precede imports and definitions:", 87 p[2].lineno, snippet=self._GetSnippet(p[2].lineno)) 88 p[0] = p[1] 89 p[0].module = p[2] 90 91 def p_root_3(self, p): 92 """root : root import""" 93 if p[1].definition_list: 94 raise ParseError(self.filename, 95 "\"import\" statements must precede definitions:", 96 p[2].lineno, snippet=self._GetSnippet(p[2].lineno)) 97 p[0] = p[1] 98 p[0].import_list.Append(p[2]) 99 100 def p_root_4(self, p): 101 """root : root definition""" 102 p[0] = p[1] 103 p[0].definition_list.append(p[2]) 104 105 def p_import(self, p): 106 """import : IMPORT STRING_LITERAL SEMI""" 107 # 'eval' the literal to strip the quotes. 108 # TODO(vtl): This eval is dubious. We should unquote/unescape ourselves. 109 p[0] = ast.Import(eval(p[2]), filename=self.filename, lineno=p.lineno(2)) 110 111 def p_module(self, p): 112 """module : attribute_section MODULE identifier_wrapped SEMI""" 113 p[0] = ast.Module(p[3], p[1], filename=self.filename, lineno=p.lineno(2)) 114 115 def p_definition(self, p): 116 """definition : struct 117 | union 118 | interface 119 | enum 120 | const""" 121 p[0] = p[1] 122 123 def p_attribute_section_1(self, p): 124 """attribute_section : """ 125 p[0] = None 126 127 def p_attribute_section_2(self, p): 128 """attribute_section : LBRACKET attribute_list RBRACKET""" 129 p[0] = p[2] 130 131 def p_attribute_list_1(self, p): 132 """attribute_list : """ 133 p[0] = ast.AttributeList() 134 135 def p_attribute_list_2(self, p): 136 """attribute_list : nonempty_attribute_list""" 137 p[0] = p[1] 138 139 def p_nonempty_attribute_list_1(self, p): 140 """nonempty_attribute_list : attribute""" 141 p[0] = ast.AttributeList(p[1]) 142 143 def p_nonempty_attribute_list_2(self, p): 144 """nonempty_attribute_list : nonempty_attribute_list COMMA attribute""" 145 p[0] = p[1] 146 p[0].Append(p[3]) 147 148 def p_attribute_1(self, p): 149 """attribute : NAME EQUALS evaled_literal 150 | NAME EQUALS NAME""" 151 p[0] = ast.Attribute(p[1], p[3], filename=self.filename, lineno=p.lineno(1)) 152 153 def p_attribute_2(self, p): 154 """attribute : NAME""" 155 p[0] = ast.Attribute(p[1], True, filename=self.filename, lineno=p.lineno(1)) 156 157 def p_evaled_literal(self, p): 158 """evaled_literal : literal""" 159 # 'eval' the literal to strip the quotes. Handle keywords "true" and "false" 160 # specially since they cannot directly be evaluated to python boolean 161 # values. 162 if p[1] == "true": 163 p[0] = True 164 elif p[1] == "false": 165 p[0] = False 166 else: 167 p[0] = eval(p[1]) 168 169 def p_struct_1(self, p): 170 """struct : attribute_section STRUCT NAME LBRACE struct_body RBRACE SEMI""" 171 p[0] = ast.Struct(p[3], p[1], p[5]) 172 173 def p_struct_2(self, p): 174 """struct : attribute_section STRUCT NAME SEMI""" 175 p[0] = ast.Struct(p[3], p[1], None) 176 177 def p_struct_body_1(self, p): 178 """struct_body : """ 179 p[0] = ast.StructBody() 180 181 def p_struct_body_2(self, p): 182 """struct_body : struct_body const 183 | struct_body enum 184 | struct_body struct_field""" 185 p[0] = p[1] 186 p[0].Append(p[2]) 187 188 def p_struct_field(self, p): 189 """struct_field : attribute_section typename NAME ordinal default SEMI""" 190 p[0] = ast.StructField(p[3], p[1], p[4], p[2], p[5]) 191 192 def p_union(self, p): 193 """union : attribute_section UNION NAME LBRACE union_body RBRACE SEMI""" 194 p[0] = ast.Union(p[3], p[1], p[5]) 195 196 def p_union_body_1(self, p): 197 """union_body : """ 198 p[0] = ast.UnionBody() 199 200 def p_union_body_2(self, p): 201 """union_body : union_body union_field""" 202 p[0] = p[1] 203 p[1].Append(p[2]) 204 205 def p_union_field(self, p): 206 """union_field : attribute_section typename NAME ordinal SEMI""" 207 p[0] = ast.UnionField(p[3], p[1], p[4], p[2]) 208 209 def p_default_1(self, p): 210 """default : """ 211 p[0] = None 212 213 def p_default_2(self, p): 214 """default : EQUALS constant""" 215 p[0] = p[2] 216 217 def p_interface(self, p): 218 """interface : attribute_section INTERFACE NAME LBRACE interface_body \ 219 RBRACE SEMI""" 220 p[0] = ast.Interface(p[3], p[1], p[5]) 221 222 def p_interface_body_1(self, p): 223 """interface_body : """ 224 p[0] = ast.InterfaceBody() 225 226 def p_interface_body_2(self, p): 227 """interface_body : interface_body const 228 | interface_body enum 229 | interface_body method""" 230 p[0] = p[1] 231 p[0].Append(p[2]) 232 233 def p_response_1(self, p): 234 """response : """ 235 p[0] = None 236 237 def p_response_2(self, p): 238 """response : RESPONSE LPAREN parameter_list RPAREN""" 239 p[0] = p[3] 240 241 def p_method(self, p): 242 """method : attribute_section NAME ordinal LPAREN parameter_list RPAREN \ 243 response SEMI""" 244 p[0] = ast.Method(p[2], p[1], p[3], p[5], p[7]) 245 246 def p_parameter_list_1(self, p): 247 """parameter_list : """ 248 p[0] = ast.ParameterList() 249 250 def p_parameter_list_2(self, p): 251 """parameter_list : nonempty_parameter_list""" 252 p[0] = p[1] 253 254 def p_nonempty_parameter_list_1(self, p): 255 """nonempty_parameter_list : parameter""" 256 p[0] = ast.ParameterList(p[1]) 257 258 def p_nonempty_parameter_list_2(self, p): 259 """nonempty_parameter_list : nonempty_parameter_list COMMA parameter""" 260 p[0] = p[1] 261 p[0].Append(p[3]) 262 263 def p_parameter(self, p): 264 """parameter : attribute_section typename NAME ordinal""" 265 p[0] = ast.Parameter(p[3], p[1], p[4], p[2], 266 filename=self.filename, lineno=p.lineno(3)) 267 268 def p_typename(self, p): 269 """typename : nonnullable_typename QSTN 270 | nonnullable_typename""" 271 if len(p) == 2: 272 p[0] = p[1] 273 else: 274 p[0] = p[1] + "?" 275 276 def p_nonnullable_typename(self, p): 277 """nonnullable_typename : basictypename 278 | array 279 | fixed_array 280 | associative_array 281 | interfacerequest""" 282 p[0] = p[1] 283 284 def p_basictypename(self, p): 285 """basictypename : identifier 286 | ASSOCIATED identifier 287 | handletype""" 288 if len(p) == 2: 289 p[0] = p[1] 290 else: 291 p[0] = "asso<" + p[2] + ">" 292 293 def p_handletype(self, p): 294 """handletype : HANDLE 295 | HANDLE LANGLE NAME RANGLE""" 296 if len(p) == 2: 297 p[0] = p[1] 298 else: 299 if p[3] not in ('data_pipe_consumer', 300 'data_pipe_producer', 301 'message_pipe', 302 'shared_buffer'): 303 # Note: We don't enable tracking of line numbers for everything, so we 304 # can't use |p.lineno(3)|. 305 raise ParseError(self.filename, "Invalid handle type %r:" % p[3], 306 lineno=p.lineno(1), 307 snippet=self._GetSnippet(p.lineno(1))) 308 p[0] = "handle<" + p[3] + ">" 309 310 def p_array(self, p): 311 """array : ARRAY LANGLE typename RANGLE""" 312 p[0] = p[3] + "[]" 313 314 def p_fixed_array(self, p): 315 """fixed_array : ARRAY LANGLE typename COMMA INT_CONST_DEC RANGLE""" 316 value = int(p[5]) 317 if value == 0 or value > _MAX_ARRAY_SIZE: 318 raise ParseError(self.filename, "Fixed array size %d invalid:" % value, 319 lineno=p.lineno(5), 320 snippet=self._GetSnippet(p.lineno(5))) 321 p[0] = p[3] + "[" + p[5] + "]" 322 323 def p_associative_array(self, p): 324 """associative_array : MAP LANGLE identifier COMMA typename RANGLE""" 325 p[0] = p[5] + "{" + p[3] + "}" 326 327 def p_interfacerequest(self, p): 328 """interfacerequest : identifier AMP 329 | ASSOCIATED identifier AMP""" 330 if len(p) == 3: 331 p[0] = p[1] + "&" 332 else: 333 p[0] = "asso<" + p[2] + "&>" 334 335 def p_ordinal_1(self, p): 336 """ordinal : """ 337 p[0] = None 338 339 def p_ordinal_2(self, p): 340 """ordinal : ORDINAL""" 341 value = int(p[1][1:]) 342 if value > _MAX_ORDINAL_VALUE: 343 raise ParseError(self.filename, "Ordinal value %d too large:" % value, 344 lineno=p.lineno(1), 345 snippet=self._GetSnippet(p.lineno(1))) 346 p[0] = ast.Ordinal(value, filename=self.filename, lineno=p.lineno(1)) 347 348 def p_enum_1(self, p): 349 """enum : attribute_section ENUM NAME LBRACE enum_value_list \ 350 RBRACE SEMI 351 | attribute_section ENUM NAME LBRACE nonempty_enum_value_list \ 352 COMMA RBRACE SEMI""" 353 p[0] = ast.Enum(p[3], p[1], p[5], filename=self.filename, 354 lineno=p.lineno(2)) 355 356 def p_enum_2(self, p): 357 """enum : attribute_section ENUM NAME SEMI""" 358 p[0] = ast.Enum(p[3], p[1], None, filename=self.filename, 359 lineno=p.lineno(2)) 360 361 def p_enum_value_list_1(self, p): 362 """enum_value_list : """ 363 p[0] = ast.EnumValueList() 364 365 def p_enum_value_list_2(self, p): 366 """enum_value_list : nonempty_enum_value_list""" 367 p[0] = p[1] 368 369 def p_nonempty_enum_value_list_1(self, p): 370 """nonempty_enum_value_list : enum_value""" 371 p[0] = ast.EnumValueList(p[1]) 372 373 def p_nonempty_enum_value_list_2(self, p): 374 """nonempty_enum_value_list : nonempty_enum_value_list COMMA enum_value""" 375 p[0] = p[1] 376 p[0].Append(p[3]) 377 378 def p_enum_value(self, p): 379 """enum_value : attribute_section NAME 380 | attribute_section NAME EQUALS int 381 | attribute_section NAME EQUALS identifier_wrapped""" 382 p[0] = ast.EnumValue(p[2], p[1], p[4] if len(p) == 5 else None, 383 filename=self.filename, lineno=p.lineno(2)) 384 385 def p_const(self, p): 386 """const : CONST typename NAME EQUALS constant SEMI""" 387 p[0] = ast.Const(p[3], p[2], p[5]) 388 389 def p_constant(self, p): 390 """constant : literal 391 | identifier_wrapped""" 392 p[0] = p[1] 393 394 def p_identifier_wrapped(self, p): 395 """identifier_wrapped : identifier""" 396 p[0] = ('IDENTIFIER', p[1]) 397 398 # TODO(vtl): Make this produce a "wrapped" identifier (probably as an 399 # |ast.Identifier|, to be added) and get rid of identifier_wrapped. 400 def p_identifier(self, p): 401 """identifier : NAME 402 | NAME DOT identifier""" 403 p[0] = ''.join(p[1:]) 404 405 def p_literal(self, p): 406 """literal : int 407 | float 408 | TRUE 409 | FALSE 410 | DEFAULT 411 | STRING_LITERAL""" 412 p[0] = p[1] 413 414 def p_int(self, p): 415 """int : int_const 416 | PLUS int_const 417 | MINUS int_const""" 418 p[0] = ''.join(p[1:]) 419 420 def p_int_const(self, p): 421 """int_const : INT_CONST_DEC 422 | INT_CONST_HEX""" 423 p[0] = p[1] 424 425 def p_float(self, p): 426 """float : FLOAT_CONST 427 | PLUS FLOAT_CONST 428 | MINUS FLOAT_CONST""" 429 p[0] = ''.join(p[1:]) 430 431 def p_error(self, e): 432 if e is None: 433 # Unexpected EOF. 434 # TODO(vtl): Can we figure out what's missing? 435 raise ParseError(self.filename, "Unexpected end of file") 436 437 raise ParseError(self.filename, "Unexpected %r:" % e.value, lineno=e.lineno, 438 snippet=self._GetSnippet(e.lineno)) 439 440 def _GetSnippet(self, lineno): 441 return self.source.split('\n')[lineno - 1] 442 443 444def Parse(source, filename): 445 """Parse source file to AST. 446 447 Args: 448 source: The source text as a str. 449 filename: The filename that |source| originates from. 450 451 Returns: 452 The AST as a mojom.parse.ast.Mojom object. 453 """ 454 lexer = Lexer(filename) 455 parser = Parser(lexer, source, filename) 456 457 lex.lex(object=lexer) 458 yacc.yacc(module=parser, debug=0, write_tables=0) 459 460 tree = yacc.parse(source) 461 return tree 462