• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import re
2import sys
3
4from lark import *
5from lark.visitors import *
6
7# This grammar derived from:
8# https://mesonbuild.com/Syntax.html#grammar
9meson_grammar = r"""
10    ?start: (statement | COMMENT | NEWLINE)*
11
12    ?additive_expression: multiplicative_expression | (additive_expression additive_operator multiplicative_expression)
13    additive_operator: PLUS | MINUS
14    argument_list: positional_arguments [COMMA keyword_arguments] [COMMA] | keyword_arguments
15    array_literal: LBRACKET [expression_list] RBRACKET
16    ?assignment_statement: assignment_expression
17    assignment_expression: expression assignment_operator expression
18    assignment_operator: EQUALS | PLUS_EQUALS
19    binary_literal: "0b" BINARY_NUMBER
20    BINARY_NUMBER: /[01]+/
21    boolean_literal: TRUE | FALSE
22    build_definition: (NEWLINE | statement)*
23    condition: expression
24    ?conditional_expression: logical_or_expression | (logical_or_expression "?" expression ":" expression)
25    decimal_literal: DECIMAL_NUMBER
26    DECIMAL_NUMBER: /[0-9][0-9]*/
27    dictionary_literal: LBRACE [key_value_list] RBRACE
28    ?equality_expression: relational_expression | (equality_expression equality_operator relational_expression)
29    equality_operator: DOUBLE_EQUAL | NOT_EQUAL
30    ?expression: conditional_expression | logical_or_expression
31    expression_list: expression (COMMA expression)* COMMA?
32    ?expression_statement: expression
33    ?function_expression: id_expression LPAREN [argument_list] RPAREN
34    hex_literal: "0x" HEX_NUMBER
35    HEX_NUMBER: /[a-fA-F0-9]+/
36    id_expression: IDENTIFIER
37    IDENTIFIER: /[a-zA-Z_][a-zA-Z_0-9]*/
38    identifier_list: id_expression (COMMA id_expression)*
39    integer_literal: decimal_literal | octal_literal | hex_literal
40    iteration_statement: FOREACH identifier_list COLON expression NEWLINE (statement | jump_statement)* ENDFOREACH
41    jump_statement: (BREAK | CONTINUE) NEWLINE
42    key_value_item: expression COLON expression
43    key_value_list: key_value_item (COMMA key_value_item)* COMMA?
44    keyword_item: id_expression ":" expression
45    keyword_arguments: keyword_item (COMMA keyword_item)* COMMA?
46    ?literal: integer_literal | string_literal | boolean_literal | array_literal | dictionary_literal
47    ?logical_and_expression: equality_expression | (logical_and_expression AND ["\\"] equality_expression)
48    ?logical_or_expression: logical_and_expression | (logical_or_expression OR ["\\"] logical_and_expression)
49    ?method_expression: postfix_expression ["\\"] DOT function_expression
50    ?multiplicative_expression: unary_expression | (multiplicative_expression multiplicative_operator unary_expression)
51    multiplicative_operator: ASTERISK | SLASH | PERCENT
52    octal_literal: "0o" OCTAL_NUMBER
53    OCTAL_NUMBER: /[0-7]+/
54    positional_arguments: expression (COMMA expression)*
55    postfix_expression: primary_expression | subscript_expression | function_expression | method_expression
56    ?primary_expression: literal | (LPAREN expression RPAREN) | id_expression
57    ?relational_expression: additive_expression | (relational_expression relational_operator additive_expression)
58    relational_operator: GREATER | LESSTHAN | GREATER_OR_EQUAL | LESSTHAN_OR_EQUAL | IN | (NOT IN)
59    selection_statement: IF condition NEWLINE (statement)* (ELIF condition NEWLINE (statement)*)* [ELSE NEWLINE (statement)*] ENDIF
60    statement: (expression_statement | selection_statement | iteration_statement | assignment_statement) NEWLINE
61    string_literal: STRING_SIMPLE_VALUE | STRING_MULTILINE_VALUE
62    ?subscript_expression: postfix_expression LBRACKET expression RBRACKET
63    ?unary_expression: postfix_expression | (unary_operator unary_expression)
64    unary_operator: NOT | DASH
65
66    AND: /and/
67    ASTERISK: /\*/
68    BREAK: /break/
69    CONTINUE: /continue/
70    COLON: /:/
71    COMMA: /,/
72    DASH: /-/
73    DOT: /\./
74    DOUBLE_EQUAL: /==/
75    EQUALS: /=/
76    FOREACH: /foreach/
77    GREATER: />/
78    GREATER_OR_EQUAL: />=/
79    # Raise priorities to avoid elif parsed as a statement
80    ELIF.1: /elif/
81    ELSE.1: /else/
82    ENDIF.1: /endif/
83    ENDFOREACH: /endforeach/
84    FALSE: /false/
85    IF: /if /
86    IN: / in /
87    LBRACKET: /\[/
88    NOT: /not /
89    NOT_EQUAL: /!=/
90    RBRACKET: /\]/
91    LESSTHAN: /</
92    LESSTHAN_OR_EQUAL: /<=/
93    LBRACE: /{/
94    LPAREN: /\(/
95    RBRACE: /}/
96    RPAREN: /\)/
97    OR: /or/
98    PERCENT: /%/
99    PLUS: /\+/
100    MINUS: /-/
101    PLUS_EQUALS: /\+=/
102    NEWLINE: ( / *\r?\n/ | COMMENT )+
103    COMMENT: / *\#.*\n/
104    SLASH: /\//
105    STRING_SIMPLE_VALUE: /'(.*\\')*.*?'/
106    STRING_MULTILINE_VALUE: /'''.*?'''/s
107    TRUE: /true/
108
109    %import common.WS
110
111    %ignore WS
112    # Comments would be nice to keep, but parsing fails end-of-line comments
113    %ignore COMMENT
114"""
115
116
117class TreeToCode(Interpreter):
118  indent = ''
119
120  def statement(self, tree):
121    str = ''
122    for child in tree.children:
123      if isinstance(child, Tree):
124        str += self.visit(child)
125      elif child != None:
126        str += child
127    return self.indent + str
128
129  def more_indent(self):
130    self.indent += '  '
131
132  def less_indent(self):
133    self.indent = self.indent[0 : len(self.indent) - 2]
134
135  # Ensure spaces around 'and'
136  def logical_and_expression(self, tree):
137    assert len(tree.children) == 3
138    lhs = self.visit(tree.children[0])
139    rhs = self.visit(tree.children[2])
140    return lhs + ' and ' + rhs
141
142  # Ensure spaces around 'or'
143  def logical_or_expression(self, tree):
144    assert len(tree.children) == 3
145    lhs = self.visit(tree.children[0])
146    rhs = self.visit(tree.children[2])
147    return lhs + ' or ' + rhs
148
149  # A ? B : C becomes B if A else C
150  def conditional_expression(self, tree):
151    assert len(tree.children) == 3
152    expr = self.visit(tree.children[0])
153    first = self.visit(tree.children[1])
154    second = self.visit(tree.children[2])
155    return first + ' if ' + expr + ' else ' + second
156
157  def assignment_expression(self, tree):
158    assert len(tree.children) == 3
159    lhs = self.visit(tree.children[0])
160    operator = self.visit(tree.children[1])
161    rhs = self.visit(tree.children[2])
162    if operator == '+=' and rhs.startswith('{'):
163      # Convert += to |= for dictionaries
164      return lhs + ' |= ' + rhs
165    elif operator == '+=' and rhs.startswith("'"):
166      # Handle literal string append to list or string
167      return (
168          lhs
169          + ' += '
170          + '['
171          + rhs
172          + '] if isinstance('
173          + lhs
174          + ', list) else '
175          + rhs
176      )
177    return lhs + operator + rhs
178
179  def iteration_statement(self, tree):
180    foreach = tree.children[0]
181    identifier_list = self.visit(tree.children[1])
182    colon = tree.children[2]
183    id_expression = self.visit(tree.children[3])
184    newline = tree.children[4]
185    str = 'for ' + identifier_list + ' in ' + id_expression
186    str += '.items():\n' if re.search(r',', identifier_list) != None else ':\n'
187    self.more_indent()
188    lastindex = len(tree.children) - 1
189    for child in tree.children[5:lastindex]:
190      if isinstance(child, Tree):
191        str += self.visit(child)
192      elif child != None:
193        str += child
194    self.less_indent()
195    return str
196
197  def selection_statement(self, tree):
198    str = ''
199    index = 0
200    while index < len(tree.children):
201      prefix = tree.children[index]
202      index = index + 1
203      if prefix == None:
204        continue
205      if isinstance(prefix, Tree):
206        exit('unexpected prefix: ' + prefix.pretty())
207      if re.match(r' *endif', prefix) != None:
208        break
209
210      if re.match(r'if', prefix) != None:
211        condition = self.visit(tree.children[index])
212        index = index + 1
213        # Skip indent here because all statements are prepended with the indentation
214        str += 'if ' + condition + ':\n'
215      elif re.match(r'elif', prefix) != None:
216        condition = self.visit(tree.children[index])
217        index = index + 1
218        str += self.indent + 'elif ' + condition + ':\n'
219      elif re.match(r'else', prefix) != None:
220        str += self.indent + 'else:\n'
221      else:
222        exit('Not a prefix: ' + prefix)
223
224      newline = tree.children[index]
225      index = index + 1
226
227      statement_count = 0
228      self.more_indent()
229      while index < len(tree.children):
230        statement = tree.children[index]
231        if not isinstance(statement, Tree):
232          break
233        str += self.visit(statement)
234        index = index + 1
235        statement_count = statement_count + 1
236      if statement_count == 0:
237        str += self.indent + 'noop()\n'
238      self.less_indent()
239
240    return str
241
242  def postfix_expression(self, tree):
243    str = ''
244    for child in tree.children:
245      if isinstance(child, Tree):
246        subtree = self.visit(child)
247        subtree = re.sub(r'(.+)\.to_int\(\)', r'int(\g<1>)', subtree)
248        subtree = re.sub(r'(.+)\.to_string\(\)', r'str(\g<1>)', subtree)
249        subtree = re.sub(r'(.+)\.length\(\)', r'len(\g<1>)', subtree)
250        subtree = re.sub(r'(.+)\.to_upper\(\)', r'\g<1>.upper()', subtree)
251        subtree = re.sub(
252            r'(.+)\.underscorify\(\)',
253            r"\g<1>.replace('.', '_').replace('/', '_')",
254            subtree,
255        )
256        str += subtree
257      elif child != None:
258        str += child
259    return str
260
261  def function_expression(self, tree):
262    assert len(tree.children) == 4
263    identifier = self.visit(tree.children[0])
264    if identifier == 'import':
265      identifier = 'module_import'
266    lparen = tree.children[1]
267    args = (
268        self.visit(tree.children[2])
269        if isinstance(tree.children[2], Tree)
270        else ''
271    )
272    rparen = tree.children[3]
273    if identifier == 'contains':
274      return 'count' + lparen + args + rparen + ' > 0'
275    return identifier + lparen + args + rparen
276
277  def multiplicative_expression(self, tree):
278    assert len(tree.children) == 3
279    lhs = self.visit(tree.children[0])
280    operator = self.visit(tree.children[1])
281    rhs = self.visit(tree.children[2])
282    # Slash used mostly to concatenate strings
283    if operator == '/':
284      return (
285          '('
286          + lhs
287          + ' + '
288          + rhs
289          + ') if isinstance('
290          + lhs
291          + ', str) else ('
292          + lhs
293          + ' / '
294          + rhs
295          + ')'
296      )
297    return lhs + operator + rhs
298
299  # Switch from colon to equals
300  def keyword_item(self, tree):
301    str = ''
302    id = self.visit(tree.children[0])
303    args = self.visit(tree.children[1])
304    return id + '=' + args
305
306  def boolean_literal(self, tree):
307    assert len(tree.children) == 1
308    value = tree.children[0]
309    if value == 'true':
310      return 'True'
311    elif value == 'false':
312      return 'False'
313    exit('Unhandled value: ' + value)
314
315  def string_literal(self, tree):
316    assert len(tree.children) == 1
317    string = tree.children[0]
318    string = re.sub(r'(@[0-9]@)', r'{}', string)
319    return string
320
321  def __default__(self, tree):
322    str = ''
323    for child in tree.children:
324      if isinstance(child, Tree):
325        str += self.visit(child)
326      elif child != None:
327        str += child
328    return str
329
330
331# Converts the given file from meson to python and returns the content as a string
332def meson2python(file_name):
333  meson_parser = Lark(meson_grammar, parser='earley')
334  with open(file_name) as f:
335    tree = meson_parser.parse(f.read())
336    code = TreeToCode().visit(tree)
337    return code
338
339
340if __name__ == '__main__':
341  meson2python(sys.argv[1])
342