1# Copyright 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Generates a syntax tree from a Mojo IDL file.""" 6 7import os.path 8import sys 9 10_current_dir = os.path.dirname(os.path.realpath(__file__)) 11sys.path.insert( 12 1, os.path.join(_current_dir, *([os.pardir] * 7 + ['third_party']))) 13from ply import lex 14from ply import yacc 15 16from ..error import Error 17from . import ast 18from .lexer import Lexer 19 20 21_MAX_ORDINAL_VALUE = 0xffffffff 22_MAX_ARRAY_SIZE = 0xffffffff 23 24 25class ParseError(Error): 26 """Class for errors from the parser.""" 27 28 def __init__(self, filename, message, lineno=None, snippet=None): 29 Error.__init__(self, filename, message, lineno=lineno, 30 addenda=([snippet] if snippet else None)) 31 32 33# We have methods which look like they could be functions: 34# pylint: disable=R0201 35class Parser(object): 36 37 def __init__(self, lexer, source, filename): 38 self.tokens = lexer.tokens 39 self.source = source 40 self.filename = filename 41 42 # Names of functions 43 # 44 # In general, we name functions after the left-hand-side of the rule(s) that 45 # they handle. E.g., |p_foo_bar| for a rule |foo_bar : ...|. 46 # 47 # There may be multiple functions handling rules for the same left-hand-side; 48 # then we name the functions |p_foo_bar_N| (for left-hand-side |foo_bar|), 49 # where N is a number (numbered starting from 1). Note that using multiple 50 # functions is actually more efficient than having single functions handle 51 # multiple rules (and, e.g., distinguishing them by examining |len(p)|). 52 # 53 # It's also possible to have a function handling multiple rules with different 54 # left-hand-sides. We do not do this. 55 # 56 # See http://www.dabeaz.com/ply/ply.html#ply_nn25 for more details. 57 58 # TODO(vtl): Get rid of the braces in the module "statement". (Consider 59 # renaming "module" -> "package".) Then we'll be able to have a single rule 60 # for root (by making module "optional"). 61 def p_root_1(self, p): 62 """root : """ 63 p[0] = ast.Mojom(None, ast.ImportList(), []) 64 65 def p_root_2(self, p): 66 """root : root module""" 67 if p[1].module is not None: 68 raise ParseError(self.filename, 69 "Multiple \"module\" statements not allowed:", 70 p[2].lineno, snippet=self._GetSnippet(p[2].lineno)) 71 if p[1].import_list.items or p[1].definition_list: 72 raise ParseError( 73 self.filename, 74 "\"module\" statements must precede imports and definitions:", 75 p[2].lineno, snippet=self._GetSnippet(p[2].lineno)) 76 p[0] = p[1] 77 p[0].module = p[2] 78 79 def p_root_3(self, p): 80 """root : root import""" 81 if p[1].definition_list: 82 raise ParseError(self.filename, 83 "\"import\" statements must precede definitions:", 84 p[2].lineno, snippet=self._GetSnippet(p[2].lineno)) 85 p[0] = p[1] 86 p[0].import_list.Append(p[2]) 87 88 def p_root_4(self, p): 89 """root : root definition""" 90 p[0] = p[1] 91 p[0].definition_list.append(p[2]) 92 93 def p_import(self, p): 94 """import : attribute_section IMPORT STRING_LITERAL SEMI""" 95 # 'eval' the literal to strip the quotes. 96 # TODO(vtl): This eval is dubious. We should unquote/unescape ourselves. 97 p[0] = ast.Import(p[1], eval(p[3]), filename=self.filename, 98 lineno=p.lineno(2)) 99 100 def p_module(self, p): 101 """module : attribute_section MODULE identifier_wrapped SEMI""" 102 p[0] = ast.Module(p[3], p[1], filename=self.filename, lineno=p.lineno(2)) 103 104 def p_definition(self, p): 105 """definition : struct 106 | union 107 | interface 108 | enum 109 | const""" 110 p[0] = p[1] 111 112 def p_attribute_section_1(self, p): 113 """attribute_section : """ 114 p[0] = None 115 116 def p_attribute_section_2(self, p): 117 """attribute_section : LBRACKET attribute_list RBRACKET""" 118 p[0] = p[2] 119 120 def p_attribute_list_1(self, p): 121 """attribute_list : """ 122 p[0] = ast.AttributeList() 123 124 def p_attribute_list_2(self, p): 125 """attribute_list : nonempty_attribute_list""" 126 p[0] = p[1] 127 128 def p_nonempty_attribute_list_1(self, p): 129 """nonempty_attribute_list : attribute""" 130 p[0] = ast.AttributeList(p[1]) 131 132 def p_nonempty_attribute_list_2(self, p): 133 """nonempty_attribute_list : nonempty_attribute_list COMMA attribute""" 134 p[0] = p[1] 135 p[0].Append(p[3]) 136 137 def p_attribute_1(self, p): 138 """attribute : NAME EQUALS evaled_literal 139 | NAME EQUALS NAME""" 140 p[0] = ast.Attribute(p[1], p[3], filename=self.filename, lineno=p.lineno(1)) 141 142 def p_attribute_2(self, p): 143 """attribute : NAME""" 144 p[0] = ast.Attribute(p[1], True, filename=self.filename, lineno=p.lineno(1)) 145 146 def p_evaled_literal(self, p): 147 """evaled_literal : literal""" 148 # 'eval' the literal to strip the quotes. Handle keywords "true" and "false" 149 # specially since they cannot directly be evaluated to python boolean 150 # values. 151 if p[1] == "true": 152 p[0] = True 153 elif p[1] == "false": 154 p[0] = False 155 else: 156 p[0] = eval(p[1]) 157 158 def p_struct_1(self, p): 159 """struct : attribute_section STRUCT NAME LBRACE struct_body RBRACE SEMI""" 160 p[0] = ast.Struct(p[3], p[1], p[5]) 161 162 def p_struct_2(self, p): 163 """struct : attribute_section STRUCT NAME SEMI""" 164 p[0] = ast.Struct(p[3], p[1], None) 165 166 def p_struct_body_1(self, p): 167 """struct_body : """ 168 p[0] = ast.StructBody() 169 170 def p_struct_body_2(self, p): 171 """struct_body : struct_body const 172 | struct_body enum 173 | struct_body struct_field""" 174 p[0] = p[1] 175 p[0].Append(p[2]) 176 177 def p_struct_field(self, p): 178 """struct_field : attribute_section typename NAME ordinal default SEMI""" 179 p[0] = ast.StructField(p[3], p[1], p[4], p[2], p[5]) 180 181 def p_union(self, p): 182 """union : attribute_section UNION NAME LBRACE union_body RBRACE SEMI""" 183 p[0] = ast.Union(p[3], p[1], p[5]) 184 185 def p_union_body_1(self, p): 186 """union_body : """ 187 p[0] = ast.UnionBody() 188 189 def p_union_body_2(self, p): 190 """union_body : union_body union_field""" 191 p[0] = p[1] 192 p[1].Append(p[2]) 193 194 def p_union_field(self, p): 195 """union_field : attribute_section typename NAME ordinal SEMI""" 196 p[0] = ast.UnionField(p[3], p[1], p[4], p[2]) 197 198 def p_default_1(self, p): 199 """default : """ 200 p[0] = None 201 202 def p_default_2(self, p): 203 """default : EQUALS constant""" 204 p[0] = p[2] 205 206 def p_interface(self, p): 207 """interface : attribute_section INTERFACE NAME LBRACE interface_body \ 208 RBRACE SEMI""" 209 p[0] = ast.Interface(p[3], p[1], p[5]) 210 211 def p_interface_body_1(self, p): 212 """interface_body : """ 213 p[0] = ast.InterfaceBody() 214 215 def p_interface_body_2(self, p): 216 """interface_body : interface_body const 217 | interface_body enum 218 | interface_body method""" 219 p[0] = p[1] 220 p[0].Append(p[2]) 221 222 def p_response_1(self, p): 223 """response : """ 224 p[0] = None 225 226 def p_response_2(self, p): 227 """response : RESPONSE LPAREN parameter_list RPAREN""" 228 p[0] = p[3] 229 230 def p_method(self, p): 231 """method : attribute_section NAME ordinal LPAREN parameter_list RPAREN \ 232 response SEMI""" 233 p[0] = ast.Method(p[2], p[1], p[3], p[5], p[7]) 234 235 def p_parameter_list_1(self, p): 236 """parameter_list : """ 237 p[0] = ast.ParameterList() 238 239 def p_parameter_list_2(self, p): 240 """parameter_list : nonempty_parameter_list""" 241 p[0] = p[1] 242 243 def p_nonempty_parameter_list_1(self, p): 244 """nonempty_parameter_list : parameter""" 245 p[0] = ast.ParameterList(p[1]) 246 247 def p_nonempty_parameter_list_2(self, p): 248 """nonempty_parameter_list : nonempty_parameter_list COMMA parameter""" 249 p[0] = p[1] 250 p[0].Append(p[3]) 251 252 def p_parameter(self, p): 253 """parameter : attribute_section typename NAME ordinal""" 254 p[0] = ast.Parameter(p[3], p[1], p[4], p[2], 255 filename=self.filename, lineno=p.lineno(3)) 256 257 def p_typename(self, p): 258 """typename : nonnullable_typename QSTN 259 | nonnullable_typename""" 260 if len(p) == 2: 261 p[0] = p[1] 262 else: 263 p[0] = p[1] + "?" 264 265 def p_nonnullable_typename(self, p): 266 """nonnullable_typename : basictypename 267 | array 268 | fixed_array 269 | associative_array 270 | interfacerequest""" 271 p[0] = p[1] 272 273 def p_basictypename(self, p): 274 """basictypename : identifier 275 | ASSOCIATED identifier 276 | handletype""" 277 if len(p) == 2: 278 p[0] = p[1] 279 else: 280 p[0] = "asso<" + p[2] + ">" 281 282 def p_handletype(self, p): 283 """handletype : HANDLE 284 | HANDLE LANGLE NAME RANGLE""" 285 if len(p) == 2: 286 p[0] = p[1] 287 else: 288 if p[3] not in ('data_pipe_consumer', 289 'data_pipe_producer', 290 'message_pipe', 291 'shared_buffer'): 292 # Note: We don't enable tracking of line numbers for everything, so we 293 # can't use |p.lineno(3)|. 294 raise ParseError(self.filename, "Invalid handle type %r:" % p[3], 295 lineno=p.lineno(1), 296 snippet=self._GetSnippet(p.lineno(1))) 297 p[0] = "handle<" + p[3] + ">" 298 299 def p_array(self, p): 300 """array : ARRAY LANGLE typename RANGLE""" 301 p[0] = p[3] + "[]" 302 303 def p_fixed_array(self, p): 304 """fixed_array : ARRAY LANGLE typename COMMA INT_CONST_DEC RANGLE""" 305 value = int(p[5]) 306 if value == 0 or value > _MAX_ARRAY_SIZE: 307 raise ParseError(self.filename, "Fixed array size %d invalid:" % value, 308 lineno=p.lineno(5), 309 snippet=self._GetSnippet(p.lineno(5))) 310 p[0] = p[3] + "[" + p[5] + "]" 311 312 def p_associative_array(self, p): 313 """associative_array : MAP LANGLE identifier COMMA typename RANGLE""" 314 p[0] = p[5] + "{" + p[3] + "}" 315 316 def p_interfacerequest(self, p): 317 """interfacerequest : identifier AMP 318 | ASSOCIATED identifier AMP""" 319 if len(p) == 3: 320 p[0] = p[1] + "&" 321 else: 322 p[0] = "asso<" + p[2] + "&>" 323 324 def p_ordinal_1(self, p): 325 """ordinal : """ 326 p[0] = None 327 328 def p_ordinal_2(self, p): 329 """ordinal : ORDINAL""" 330 value = int(p[1][1:]) 331 if value > _MAX_ORDINAL_VALUE: 332 raise ParseError(self.filename, "Ordinal value %d too large:" % value, 333 lineno=p.lineno(1), 334 snippet=self._GetSnippet(p.lineno(1))) 335 p[0] = ast.Ordinal(value, filename=self.filename, lineno=p.lineno(1)) 336 337 def p_enum_1(self, p): 338 """enum : attribute_section ENUM NAME LBRACE enum_value_list \ 339 RBRACE SEMI 340 | attribute_section ENUM NAME LBRACE nonempty_enum_value_list \ 341 COMMA RBRACE SEMI""" 342 p[0] = ast.Enum(p[3], p[1], p[5], filename=self.filename, 343 lineno=p.lineno(2)) 344 345 def p_enum_2(self, p): 346 """enum : attribute_section ENUM NAME SEMI""" 347 p[0] = ast.Enum(p[3], p[1], None, filename=self.filename, 348 lineno=p.lineno(2)) 349 350 def p_enum_value_list_1(self, p): 351 """enum_value_list : """ 352 p[0] = ast.EnumValueList() 353 354 def p_enum_value_list_2(self, p): 355 """enum_value_list : nonempty_enum_value_list""" 356 p[0] = p[1] 357 358 def p_nonempty_enum_value_list_1(self, p): 359 """nonempty_enum_value_list : enum_value""" 360 p[0] = ast.EnumValueList(p[1]) 361 362 def p_nonempty_enum_value_list_2(self, p): 363 """nonempty_enum_value_list : nonempty_enum_value_list COMMA enum_value""" 364 p[0] = p[1] 365 p[0].Append(p[3]) 366 367 def p_enum_value(self, p): 368 """enum_value : attribute_section NAME 369 | attribute_section NAME EQUALS int 370 | attribute_section NAME EQUALS identifier_wrapped""" 371 p[0] = ast.EnumValue(p[2], p[1], p[4] if len(p) == 5 else None, 372 filename=self.filename, lineno=p.lineno(2)) 373 374 def p_const(self, p): 375 """const : attribute_section CONST typename NAME EQUALS constant SEMI""" 376 p[0] = ast.Const(p[4], p[1], p[3], p[6]) 377 378 def p_constant(self, p): 379 """constant : literal 380 | identifier_wrapped""" 381 p[0] = p[1] 382 383 def p_identifier_wrapped(self, p): 384 """identifier_wrapped : identifier""" 385 p[0] = ('IDENTIFIER', p[1]) 386 387 # TODO(vtl): Make this produce a "wrapped" identifier (probably as an 388 # |ast.Identifier|, to be added) and get rid of identifier_wrapped. 389 def p_identifier(self, p): 390 """identifier : NAME 391 | NAME DOT identifier""" 392 p[0] = ''.join(p[1:]) 393 394 def p_literal(self, p): 395 """literal : int 396 | float 397 | TRUE 398 | FALSE 399 | DEFAULT 400 | STRING_LITERAL""" 401 p[0] = p[1] 402 403 def p_int(self, p): 404 """int : int_const 405 | PLUS int_const 406 | MINUS int_const""" 407 p[0] = ''.join(p[1:]) 408 409 def p_int_const(self, p): 410 """int_const : INT_CONST_DEC 411 | INT_CONST_HEX""" 412 p[0] = p[1] 413 414 def p_float(self, p): 415 """float : FLOAT_CONST 416 | PLUS FLOAT_CONST 417 | MINUS FLOAT_CONST""" 418 p[0] = ''.join(p[1:]) 419 420 def p_error(self, e): 421 if e is None: 422 # Unexpected EOF. 423 # TODO(vtl): Can we figure out what's missing? 424 raise ParseError(self.filename, "Unexpected end of file") 425 426 raise ParseError(self.filename, "Unexpected %r:" % e.value, lineno=e.lineno, 427 snippet=self._GetSnippet(e.lineno)) 428 429 def _GetSnippet(self, lineno): 430 return self.source.split('\n')[lineno - 1] 431 432 433def Parse(source, filename): 434 """Parse source file to AST. 435 436 Args: 437 source: The source text as a str. 438 filename: The filename that |source| originates from. 439 440 Returns: 441 The AST as a mojom.parse.ast.Mojom object. 442 """ 443 lexer = Lexer(filename) 444 parser = Parser(lexer, source, filename) 445 446 lex.lex(object=lexer) 447 yacc.yacc(module=parser, debug=0, write_tables=0) 448 449 tree = yacc.parse(source) 450 return tree 451