• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2023 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4import dataclasses
5import os
6import re
7from typing import List
8from typing import Optional
9
10import java_types
11
12_MODIFIER_KEYWORDS = (r'(?:(?:' + '|'.join([
13    'abstract',
14    'default',
15    'final',
16    'native',
17    'private',
18    'protected',
19    'public',
20    'static',
21    'synchronized',
22]) + r')\s+)*')
23
24
25class ParseError(Exception):
26  suffix = ''
27
28  def __str__(self):
29    return super().__str__() + self.suffix
30
31
32@dataclasses.dataclass(order=True)
33class ParsedNative:
34  name: str
35  signature: java_types.JavaSignature
36  native_class_name: str
37  static: bool = False
38
39
40@dataclasses.dataclass(order=True)
41class ParsedCalledByNative:
42  java_class: java_types.JavaClass
43  name: str
44  signature: java_types.JavaSignature
45  static: bool
46  unchecked: bool = False
47
48
49@dataclasses.dataclass(order=True)
50class ParsedConstantField(object):
51  name: str
52  value: str
53
54
55@dataclasses.dataclass
56class ParsedFile:
57  filename: str
58  type_resolver: java_types.TypeResolver
59  proxy_methods: List[ParsedNative]
60  non_proxy_methods: List[ParsedNative]
61  called_by_natives: List[ParsedCalledByNative]
62  constant_fields: List[ParsedConstantField]
63  proxy_interface: Optional[java_types.JavaClass] = None
64  proxy_visibility: Optional[str] = None
65  module_name: Optional[str] = None  # E.g. @NativeMethods("module_name")
66  jni_namespace: Optional[str] = None  # E.g. @JNINamespace("content")
67
68
69@dataclasses.dataclass
70class _ParsedProxyNatives:
71  interface_name: str
72  visibility: str
73  module_name: str
74  methods: List[ParsedNative]
75
76
77# Match single line comments, multiline comments, character literals, and
78# double-quoted strings.
79_COMMENT_REMOVER_REGEX = re.compile(
80    r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
81    re.DOTALL | re.MULTILINE)
82
83
84def _remove_comments(contents):
85  # We need to support both inline and block comments, and we need to handle
86  # strings that contain '//' or '/*'.
87  def replacer(match):
88    # Replace matches that are comments with nothing; return literals/strings
89    # unchanged.
90    s = match.group(0)
91    if s.startswith('/'):
92      return ''
93    else:
94      return s
95
96  return _COMMENT_REMOVER_REGEX.sub(replacer, contents)
97
98
99# This will also break lines with comparison operators, but we don't care.
100_GENERICS_REGEX = re.compile(r'<[^<>\n]*>')
101
102
103def _remove_generics(value):
104  """Strips Java generics from a string."""
105  while True:
106    ret = _GENERICS_REGEX.sub('', value)
107    if len(ret) == len(value):
108      return ret
109    value = ret
110
111
112_PACKAGE_REGEX = re.compile('^package\s+(\S+?);', flags=re.MULTILINE)
113
114
115def _parse_package(contents):
116  match = _PACKAGE_REGEX.search(contents)
117  if not match:
118    raise ParseError('Unable to find "package" line')
119  return match.group(1)
120
121
122_CLASSES_REGEX = re.compile(
123    r'^(.*?)(?:\b(?:public|protected|private)?\b)\s*'
124    r'(?:\b(?:static|abstract|final|sealed)\s+)*'
125    r'\b(?:class|interface|enum)\s+(\w+?)\b[^"]*?$',
126    flags=re.MULTILINE)
127
128
129# Does not handle doubly-nested classes.
130def _parse_java_classes(contents):
131  package = _parse_package(contents).replace('.', '/')
132  outer_class = None
133  nested_classes = []
134  for m in _CLASSES_REGEX.finditer(contents):
135    preamble, class_name = m.groups()
136    # Ignore annoations like @Foo("contains the words class Bar")
137    if preamble.count('"') % 2 != 0:
138      continue
139    if outer_class is None:
140      outer_class = java_types.JavaClass(f'{package}/{class_name}')
141    else:
142      nested_classes.append(outer_class.make_nested(class_name))
143
144  if outer_class is None:
145    raise ParseError('No classes found.')
146
147  return outer_class, nested_classes
148
149
150# Supports only @Foo and @Foo("value").
151_ANNOTATION_REGEX = re.compile(r'@([\w.]+)(?:\(\s*"(.*?)\"\s*\))?\s*')
152
153
154def _parse_annotations(value):
155  annotations = {}
156  last_idx = 0
157  for m in _ANNOTATION_REGEX.finditer(value):
158    annotations[m.group(1)] = m.group(2)
159    last_idx = m.end()
160
161  return annotations, value[last_idx:]
162
163
164def _parse_type(type_resolver, value):
165  """Parses a string into a JavaType."""
166  annotations, value = _parse_annotations(value)
167  array_dimensions = 0
168  while value[-2:] == '[]':
169    array_dimensions += 1
170    value = value[:-2]
171
172  if value in java_types.PRIMITIVES:
173    primitive_name = value
174    java_class = None
175  else:
176    primitive_name = None
177    java_class = type_resolver.resolve(value)
178
179  return java_types.JavaType(array_dimensions=array_dimensions,
180                             primitive_name=primitive_name,
181                             java_class=java_class,
182                             annotations=annotations)
183
184
185_FINAL_REGEX = re.compile(r'\bfinal\s')
186
187
188def _parse_param_list(type_resolver, value) -> java_types.JavaParamList:
189  if not value or value.isspace():
190    return java_types.EMPTY_PARAM_LIST
191  params = []
192  value = _FINAL_REGEX.sub('', value)
193  for param_str in value.split(','):
194    param_str = param_str.strip()
195    param_str, _, param_name = param_str.rpartition(' ')
196    param_str = param_str.rstrip()
197
198    # Handle varargs.
199    if param_str.endswith('...'):
200      param_str = param_str[:-3] + '[]'
201
202    param_type = _parse_type(type_resolver, param_str)
203    params.append(java_types.JavaParam(param_type, param_name))
204
205  return java_types.JavaParamList(params)
206
207
208_NATIVE_METHODS_INTERFACE_REGEX = re.compile(
209    r'@NativeMethods(?:\(\s*"(?P<module_name>\w+)"\s*\))?[\S\s]+?'
210    r'(?P<visibility>public)?\s*\binterface\s*'
211    r'(?P<interface_name>\w*)\s*{(?P<interface_body>(\s*.*)+?\s*)}')
212
213_PROXY_NATIVE_REGEX = re.compile(r'\s*(.*?)\s+(\w+)\((.*?)\);', flags=re.DOTALL)
214
215_PUBLIC_REGEX = re.compile(r'\bpublic\s')
216
217
218def _parse_proxy_natives(type_resolver, contents):
219  matches = list(_NATIVE_METHODS_INTERFACE_REGEX.finditer(contents))
220  if not matches:
221    return None
222  if len(matches) > 1:
223    raise ParseError(
224        'Multiple @NativeMethod interfaces in one class is not supported.')
225
226  match = matches[0]
227  ret = _ParsedProxyNatives(interface_name=match.group('interface_name'),
228                            visibility=match.group('visibility'),
229                            module_name=match.group('module_name'),
230                            methods=[])
231  interface_body = match.group('interface_body')
232
233  for m in _PROXY_NATIVE_REGEX.finditer(interface_body):
234    preamble, name, params_part = m.groups()
235    preamble = _PUBLIC_REGEX.sub('', preamble)
236    annotations, return_type_part = _parse_annotations(preamble)
237    params = _parse_param_list(type_resolver, params_part)
238    return_type = _parse_type(type_resolver, return_type_part)
239    signature = java_types.JavaSignature.from_params(return_type, params)
240    ret.methods.append(
241        ParsedNative(
242            name=name,
243            signature=signature,
244            native_class_name=annotations.get('NativeClassQualifiedName')))
245  if not ret.methods:
246    raise ParseError('Found no methods within @NativeMethod interface.')
247  ret.methods.sort()
248  return ret
249
250
251_NON_PROXY_NATIVES_REGEX = re.compile(
252    r'(@NativeClassQualifiedName'
253    r'\(\"(?P<native_class_name>\S*?)\"\)\s+)?'
254    r'(?P<qualifiers>\w+\s\w+|\w+|\s+)\s*native\s+'
255    r'(?P<return_type>\S*)\s+'
256    r'(?P<name>native\w+)\((?P<params>.*?)\);', re.DOTALL)
257
258
259def _parse_non_proxy_natives(type_resolver, contents):
260  ret = []
261  for match in _NON_PROXY_NATIVES_REGEX.finditer(contents):
262    name = match.group('name').replace('native', '')
263    return_type = _parse_type(type_resolver, match.group('return_type'))
264    params = _parse_param_list(type_resolver, match.group('params'))
265    signature = java_types.JavaSignature.from_params(return_type, params)
266    native_class_name = match.group('native_class_name')
267    static = 'static' in match.group('qualifiers')
268    ret.append(
269        ParsedNative(name=name,
270                     signature=signature,
271                     native_class_name=native_class_name,
272                     static=static))
273  ret.sort()
274  return ret
275
276
277# Regex to match a string like "@CalledByNative public void foo(int bar)".
278_CALLED_BY_NATIVE_REGEX = re.compile(
279    r'@CalledByNative((?P<Unchecked>(?:Unchecked)?|ForTesting))'
280    r'(?:\("(?P<annotation>.*)"\))?'
281    r'(?:\s+@\w+(?:\(.*\))?)*'  # Ignore any other annotations.
282    r'\s+(?P<modifiers>' + _MODIFIER_KEYWORDS + r')' +
283    r'(?:\s*@\w+)?'  # Ignore annotations in return types.
284    r'\s*(?P<return_type>\S*?)'
285    r'\s*(?P<name>\w+)'
286    r'\s*\((?P<params>[^\)]*)\)')
287
288
289def _parse_called_by_natives(type_resolver, contents):
290  ret = []
291  for match in _CALLED_BY_NATIVE_REGEX.finditer(contents):
292    return_type_str = match.group('return_type')
293    name = match.group('name')
294    if return_type_str:
295      return_type = _parse_type(type_resolver, return_type_str)
296    else:
297      return_type = java_types.VOID
298      name = '<init>'
299
300    params = _parse_param_list(type_resolver, match.group('params'))
301    signature = java_types.JavaSignature.from_params(return_type, params)
302    inner_class_name = match.group('annotation')
303    java_class = type_resolver.java_class
304    if inner_class_name:
305      java_class = java_class.make_nested(inner_class_name)
306
307    ret.append(
308        ParsedCalledByNative(java_class=java_class,
309                             name=name,
310                             signature=signature,
311                             static='static' in match.group('modifiers'),
312                             unchecked='Unchecked' in match.group('Unchecked')))
313
314  # Check for any @CalledByNative occurrences that were not matched.
315  unmatched_lines = _CALLED_BY_NATIVE_REGEX.sub('', contents).splitlines()
316  for i, line in enumerate(unmatched_lines):
317    if '@CalledByNative' in line:
318      context = '\n'.join(unmatched_lines[i:i + 5])
319      raise ParseError('Could not parse @CalledByNative method signature:\n' +
320                       context)
321
322  ret.sort()
323  return ret
324
325
326_IMPORT_REGEX = re.compile(r'^import\s+([^\s*]+);', flags=re.MULTILINE)
327_IMPORT_CLASS_NAME_REGEX = re.compile(r'^(.*?)\.([A-Z].*)')
328
329
330def _parse_imports(contents):
331  # Regex skips static imports as well as wildcard imports.
332  names = _IMPORT_REGEX.findall(contents)
333  for name in names:
334    m = _IMPORT_CLASS_NAME_REGEX.match(name)
335    if m:
336      package, class_name = m.groups()
337      yield java_types.JavaClass(
338          package.replace('.', '/') + '/' + class_name.replace('.', '$'))
339
340
341_JNI_NAMESPACE_REGEX = re.compile('@JNINamespace\("(.*?)"\)')
342
343
344def _parse_jni_namespace(contents):
345  m = _JNI_NAMESPACE_REGEX.findall(contents)
346  if not m:
347    return ''
348  if len(m) > 1:
349    raise ParseError('Found multiple @JNINamespace annotations.')
350  return m[0]
351
352
353def _do_parse(filename, *, package_prefix):
354  assert not filename.endswith('.kt'), (
355      f'Found {filename}, but Kotlin is not supported by JNI generator.')
356  with open(filename) as f:
357    contents = f.read()
358  contents = _remove_comments(contents)
359  contents = _remove_generics(contents)
360
361  outer_class, nested_classes = _parse_java_classes(contents)
362
363  expected_name = os.path.splitext(os.path.basename(filename))[0]
364  if outer_class.name != expected_name:
365    raise ParseError(
366        f'Found class "{outer_class.name}" but expected "{expected_name}".')
367
368  if package_prefix:
369    outer_class = outer_class.make_prefixed(package_prefix)
370    nested_classes = [c.make_prefixed(package_prefix) for c in nested_classes]
371
372  type_resolver = java_types.TypeResolver(outer_class)
373  for java_class in _parse_imports(contents):
374    type_resolver.add_import(java_class)
375  for java_class in nested_classes:
376    type_resolver.add_nested_class(java_class)
377
378  parsed_proxy_natives = _parse_proxy_natives(type_resolver, contents)
379  jni_namespace = _parse_jni_namespace(contents)
380
381  non_proxy_methods = _parse_non_proxy_natives(type_resolver, contents)
382  called_by_natives = _parse_called_by_natives(type_resolver, contents)
383
384  ret = ParsedFile(filename=filename,
385                   jni_namespace=jni_namespace,
386                   type_resolver=type_resolver,
387                   proxy_methods=[],
388                   non_proxy_methods=non_proxy_methods,
389                   called_by_natives=called_by_natives,
390                   constant_fields=[])
391
392  if parsed_proxy_natives:
393    ret.module_name = parsed_proxy_natives.module_name
394    ret.proxy_interface = outer_class.make_nested(
395        parsed_proxy_natives.interface_name)
396    ret.proxy_visibility = parsed_proxy_natives.visibility
397    ret.proxy_methods = parsed_proxy_natives.methods
398
399  return ret
400
401
402def parse_java_file(filename, *, package_prefix=None):
403  try:
404    return _do_parse(filename, package_prefix=package_prefix)
405  except ParseError as e:
406    e.suffix = f' (when parsing {filename})'
407    raise
408
409
410_JAVAP_CLASS_REGEX = re.compile(r'\b(?:class|interface) (\S+)')
411_JAVAP_FINAL_FIELD_REGEX = re.compile(
412    r'^\s+public static final \S+ (.*?) = (\d+);', flags=re.MULTILINE)
413_JAVAP_METHOD_REGEX = re.compile(
414    rf'^\s*({_MODIFIER_KEYWORDS}).*?(\S+?)\(.*\n\s+descriptor: (.*)',
415    flags=re.MULTILINE)
416
417
418def parse_javap(filename, contents):
419  contents = _remove_generics(contents)
420  match = _JAVAP_CLASS_REGEX.search(contents)
421  if not match:
422    raise ParseError('Could not find java class in javap output')
423  java_class = java_types.JavaClass(match.group(1).replace('.', '/'))
424  type_resolver = java_types.TypeResolver(java_class)
425
426  constant_fields = []
427  for match in _JAVAP_FINAL_FIELD_REGEX.finditer(contents):
428    name, value = match.groups()
429    constant_fields.append(ParsedConstantField(name=name, value=value))
430  constant_fields.sort()
431
432  called_by_natives = []
433  for match in _JAVAP_METHOD_REGEX.finditer(contents):
434    modifiers, name, descriptor = match.groups()
435    if name == java_class.full_name_with_dots:
436      name = '<init>'
437    signature = java_types.JavaSignature.from_descriptor(descriptor)
438
439    called_by_natives.append(
440        ParsedCalledByNative(java_class=java_class,
441                             name=name,
442                             signature=signature,
443                             static='static' in modifiers))
444  called_by_natives.sort()
445
446  # Although javac will not allow multiple methods with no args and different
447  # return types, Class.class has just that, and it breaks with our
448  # name-mangling logic which assumes this cannot happen.
449  if java_class.full_name_with_slashes == 'java/lang/Class':
450    called_by_natives = [
451        x for x in called_by_natives if 'TypeDescriptor' not in (
452            x.signature.return_type.non_array_full_name_with_slashes)
453    ]
454
455  return ParsedFile(filename=filename,
456                    type_resolver=type_resolver,
457                    proxy_methods=[],
458                    non_proxy_methods=[],
459                    called_by_natives=called_by_natives,
460                    constant_fields=constant_fields)
461