1# Copyright 2023 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4import dataclasses 5import os 6import re 7from typing import List 8from typing import Optional 9 10import java_types 11 12_MODIFIER_KEYWORDS = (r'(?:(?:' + '|'.join([ 13 'abstract', 14 'default', 15 'final', 16 'native', 17 'private', 18 'protected', 19 'public', 20 'static', 21 'synchronized', 22]) + r')\s+)*') 23 24 25class ParseError(Exception): 26 suffix = '' 27 28 def __str__(self): 29 return super().__str__() + self.suffix 30 31 32@dataclasses.dataclass(order=True) 33class ParsedNative: 34 name: str 35 signature: java_types.JavaSignature 36 native_class_name: str 37 static: bool = False 38 39 40@dataclasses.dataclass(order=True) 41class ParsedCalledByNative: 42 java_class: java_types.JavaClass 43 name: str 44 signature: java_types.JavaSignature 45 static: bool 46 unchecked: bool = False 47 48 49@dataclasses.dataclass(order=True) 50class ParsedConstantField(object): 51 name: str 52 value: str 53 54 55@dataclasses.dataclass 56class ParsedFile: 57 filename: str 58 type_resolver: java_types.TypeResolver 59 proxy_methods: List[ParsedNative] 60 non_proxy_methods: List[ParsedNative] 61 called_by_natives: List[ParsedCalledByNative] 62 constant_fields: List[ParsedConstantField] 63 proxy_interface: Optional[java_types.JavaClass] = None 64 proxy_visibility: Optional[str] = None 65 module_name: Optional[str] = None # E.g. @NativeMethods("module_name") 66 jni_namespace: Optional[str] = None # E.g. @JNINamespace("content") 67 68 69@dataclasses.dataclass 70class _ParsedProxyNatives: 71 interface_name: str 72 visibility: str 73 module_name: str 74 methods: List[ParsedNative] 75 76 77# Match single line comments, multiline comments, character literals, and 78# double-quoted strings. 79_COMMENT_REMOVER_REGEX = re.compile( 80 r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', 81 re.DOTALL | re.MULTILINE) 82 83 84def _remove_comments(contents): 85 # We need to support both inline and block comments, and we need to handle 86 # strings that contain '//' or '/*'. 87 def replacer(match): 88 # Replace matches that are comments with nothing; return literals/strings 89 # unchanged. 90 s = match.group(0) 91 if s.startswith('/'): 92 return '' 93 else: 94 return s 95 96 return _COMMENT_REMOVER_REGEX.sub(replacer, contents) 97 98 99# This will also break lines with comparison operators, but we don't care. 100_GENERICS_REGEX = re.compile(r'<[^<>\n]*>') 101 102 103def _remove_generics(value): 104 """Strips Java generics from a string.""" 105 while True: 106 ret = _GENERICS_REGEX.sub('', value) 107 if len(ret) == len(value): 108 return ret 109 value = ret 110 111 112_PACKAGE_REGEX = re.compile('^package\s+(\S+?);', flags=re.MULTILINE) 113 114 115def _parse_package(contents): 116 match = _PACKAGE_REGEX.search(contents) 117 if not match: 118 raise ParseError('Unable to find "package" line') 119 return match.group(1) 120 121 122_CLASSES_REGEX = re.compile( 123 r'^(.*?)(?:\b(?:public|protected|private)?\b)\s*' 124 r'(?:\b(?:static|abstract|final|sealed)\s+)*' 125 r'\b(?:class|interface|enum)\s+(\w+?)\b[^"]*?$', 126 flags=re.MULTILINE) 127 128 129# Does not handle doubly-nested classes. 130def _parse_java_classes(contents): 131 package = _parse_package(contents).replace('.', '/') 132 outer_class = None 133 nested_classes = [] 134 for m in _CLASSES_REGEX.finditer(contents): 135 preamble, class_name = m.groups() 136 # Ignore annoations like @Foo("contains the words class Bar") 137 if preamble.count('"') % 2 != 0: 138 continue 139 if outer_class is None: 140 outer_class = java_types.JavaClass(f'{package}/{class_name}') 141 else: 142 nested_classes.append(outer_class.make_nested(class_name)) 143 144 if outer_class is None: 145 raise ParseError('No classes found.') 146 147 return outer_class, nested_classes 148 149 150# Supports only @Foo and @Foo("value"). 151_ANNOTATION_REGEX = re.compile(r'@([\w.]+)(?:\(\s*"(.*?)\"\s*\))?\s*') 152 153 154def _parse_annotations(value): 155 annotations = {} 156 last_idx = 0 157 for m in _ANNOTATION_REGEX.finditer(value): 158 annotations[m.group(1)] = m.group(2) 159 last_idx = m.end() 160 161 return annotations, value[last_idx:] 162 163 164def _parse_type(type_resolver, value): 165 """Parses a string into a JavaType.""" 166 annotations, value = _parse_annotations(value) 167 array_dimensions = 0 168 while value[-2:] == '[]': 169 array_dimensions += 1 170 value = value[:-2] 171 172 if value in java_types.PRIMITIVES: 173 primitive_name = value 174 java_class = None 175 else: 176 primitive_name = None 177 java_class = type_resolver.resolve(value) 178 179 return java_types.JavaType(array_dimensions=array_dimensions, 180 primitive_name=primitive_name, 181 java_class=java_class, 182 annotations=annotations) 183 184 185_FINAL_REGEX = re.compile(r'\bfinal\s') 186 187 188def _parse_param_list(type_resolver, value) -> java_types.JavaParamList: 189 if not value or value.isspace(): 190 return java_types.EMPTY_PARAM_LIST 191 params = [] 192 value = _FINAL_REGEX.sub('', value) 193 for param_str in value.split(','): 194 param_str = param_str.strip() 195 param_str, _, param_name = param_str.rpartition(' ') 196 param_str = param_str.rstrip() 197 198 # Handle varargs. 199 if param_str.endswith('...'): 200 param_str = param_str[:-3] + '[]' 201 202 param_type = _parse_type(type_resolver, param_str) 203 params.append(java_types.JavaParam(param_type, param_name)) 204 205 return java_types.JavaParamList(params) 206 207 208_NATIVE_METHODS_INTERFACE_REGEX = re.compile( 209 r'@NativeMethods(?:\(\s*"(?P<module_name>\w+)"\s*\))?[\S\s]+?' 210 r'(?P<visibility>public)?\s*\binterface\s*' 211 r'(?P<interface_name>\w*)\s*{(?P<interface_body>(\s*.*)+?\s*)}') 212 213_PROXY_NATIVE_REGEX = re.compile(r'\s*(.*?)\s+(\w+)\((.*?)\);', flags=re.DOTALL) 214 215_PUBLIC_REGEX = re.compile(r'\bpublic\s') 216 217 218def _parse_proxy_natives(type_resolver, contents): 219 matches = list(_NATIVE_METHODS_INTERFACE_REGEX.finditer(contents)) 220 if not matches: 221 return None 222 if len(matches) > 1: 223 raise ParseError( 224 'Multiple @NativeMethod interfaces in one class is not supported.') 225 226 match = matches[0] 227 ret = _ParsedProxyNatives(interface_name=match.group('interface_name'), 228 visibility=match.group('visibility'), 229 module_name=match.group('module_name'), 230 methods=[]) 231 interface_body = match.group('interface_body') 232 233 for m in _PROXY_NATIVE_REGEX.finditer(interface_body): 234 preamble, name, params_part = m.groups() 235 preamble = _PUBLIC_REGEX.sub('', preamble) 236 annotations, return_type_part = _parse_annotations(preamble) 237 params = _parse_param_list(type_resolver, params_part) 238 return_type = _parse_type(type_resolver, return_type_part) 239 signature = java_types.JavaSignature.from_params(return_type, params) 240 ret.methods.append( 241 ParsedNative( 242 name=name, 243 signature=signature, 244 native_class_name=annotations.get('NativeClassQualifiedName'))) 245 if not ret.methods: 246 raise ParseError('Found no methods within @NativeMethod interface.') 247 ret.methods.sort() 248 return ret 249 250 251_NON_PROXY_NATIVES_REGEX = re.compile( 252 r'(@NativeClassQualifiedName' 253 r'\(\"(?P<native_class_name>\S*?)\"\)\s+)?' 254 r'(?P<qualifiers>\w+\s\w+|\w+|\s+)\s*native\s+' 255 r'(?P<return_type>\S*)\s+' 256 r'(?P<name>native\w+)\((?P<params>.*?)\);', re.DOTALL) 257 258 259def _parse_non_proxy_natives(type_resolver, contents): 260 ret = [] 261 for match in _NON_PROXY_NATIVES_REGEX.finditer(contents): 262 name = match.group('name').replace('native', '') 263 return_type = _parse_type(type_resolver, match.group('return_type')) 264 params = _parse_param_list(type_resolver, match.group('params')) 265 signature = java_types.JavaSignature.from_params(return_type, params) 266 native_class_name = match.group('native_class_name') 267 static = 'static' in match.group('qualifiers') 268 ret.append( 269 ParsedNative(name=name, 270 signature=signature, 271 native_class_name=native_class_name, 272 static=static)) 273 ret.sort() 274 return ret 275 276 277# Regex to match a string like "@CalledByNative public void foo(int bar)". 278_CALLED_BY_NATIVE_REGEX = re.compile( 279 r'@CalledByNative((?P<Unchecked>(?:Unchecked)?|ForTesting))' 280 r'(?:\("(?P<annotation>.*)"\))?' 281 r'(?:\s+@\w+(?:\(.*\))?)*' # Ignore any other annotations. 282 r'\s+(?P<modifiers>' + _MODIFIER_KEYWORDS + r')' + 283 r'(?:\s*@\w+)?' # Ignore annotations in return types. 284 r'\s*(?P<return_type>\S*?)' 285 r'\s*(?P<name>\w+)' 286 r'\s*\((?P<params>[^\)]*)\)') 287 288 289def _parse_called_by_natives(type_resolver, contents): 290 ret = [] 291 for match in _CALLED_BY_NATIVE_REGEX.finditer(contents): 292 return_type_str = match.group('return_type') 293 name = match.group('name') 294 if return_type_str: 295 return_type = _parse_type(type_resolver, return_type_str) 296 else: 297 return_type = java_types.VOID 298 name = '<init>' 299 300 params = _parse_param_list(type_resolver, match.group('params')) 301 signature = java_types.JavaSignature.from_params(return_type, params) 302 inner_class_name = match.group('annotation') 303 java_class = type_resolver.java_class 304 if inner_class_name: 305 java_class = java_class.make_nested(inner_class_name) 306 307 ret.append( 308 ParsedCalledByNative(java_class=java_class, 309 name=name, 310 signature=signature, 311 static='static' in match.group('modifiers'), 312 unchecked='Unchecked' in match.group('Unchecked'))) 313 314 # Check for any @CalledByNative occurrences that were not matched. 315 unmatched_lines = _CALLED_BY_NATIVE_REGEX.sub('', contents).splitlines() 316 for i, line in enumerate(unmatched_lines): 317 if '@CalledByNative' in line: 318 context = '\n'.join(unmatched_lines[i:i + 5]) 319 raise ParseError('Could not parse @CalledByNative method signature:\n' + 320 context) 321 322 ret.sort() 323 return ret 324 325 326_IMPORT_REGEX = re.compile(r'^import\s+([^\s*]+);', flags=re.MULTILINE) 327_IMPORT_CLASS_NAME_REGEX = re.compile(r'^(.*?)\.([A-Z].*)') 328 329 330def _parse_imports(contents): 331 # Regex skips static imports as well as wildcard imports. 332 names = _IMPORT_REGEX.findall(contents) 333 for name in names: 334 m = _IMPORT_CLASS_NAME_REGEX.match(name) 335 if m: 336 package, class_name = m.groups() 337 yield java_types.JavaClass( 338 package.replace('.', '/') + '/' + class_name.replace('.', '$')) 339 340 341_JNI_NAMESPACE_REGEX = re.compile('@JNINamespace\("(.*?)"\)') 342 343 344def _parse_jni_namespace(contents): 345 m = _JNI_NAMESPACE_REGEX.findall(contents) 346 if not m: 347 return '' 348 if len(m) > 1: 349 raise ParseError('Found multiple @JNINamespace annotations.') 350 return m[0] 351 352 353def _do_parse(filename, *, package_prefix): 354 assert not filename.endswith('.kt'), ( 355 f'Found {filename}, but Kotlin is not supported by JNI generator.') 356 with open(filename) as f: 357 contents = f.read() 358 contents = _remove_comments(contents) 359 contents = _remove_generics(contents) 360 361 outer_class, nested_classes = _parse_java_classes(contents) 362 363 expected_name = os.path.splitext(os.path.basename(filename))[0] 364 if outer_class.name != expected_name: 365 raise ParseError( 366 f'Found class "{outer_class.name}" but expected "{expected_name}".') 367 368 if package_prefix: 369 outer_class = outer_class.make_prefixed(package_prefix) 370 nested_classes = [c.make_prefixed(package_prefix) for c in nested_classes] 371 372 type_resolver = java_types.TypeResolver(outer_class) 373 for java_class in _parse_imports(contents): 374 type_resolver.add_import(java_class) 375 for java_class in nested_classes: 376 type_resolver.add_nested_class(java_class) 377 378 parsed_proxy_natives = _parse_proxy_natives(type_resolver, contents) 379 jni_namespace = _parse_jni_namespace(contents) 380 381 non_proxy_methods = _parse_non_proxy_natives(type_resolver, contents) 382 called_by_natives = _parse_called_by_natives(type_resolver, contents) 383 384 ret = ParsedFile(filename=filename, 385 jni_namespace=jni_namespace, 386 type_resolver=type_resolver, 387 proxy_methods=[], 388 non_proxy_methods=non_proxy_methods, 389 called_by_natives=called_by_natives, 390 constant_fields=[]) 391 392 if parsed_proxy_natives: 393 ret.module_name = parsed_proxy_natives.module_name 394 ret.proxy_interface = outer_class.make_nested( 395 parsed_proxy_natives.interface_name) 396 ret.proxy_visibility = parsed_proxy_natives.visibility 397 ret.proxy_methods = parsed_proxy_natives.methods 398 399 return ret 400 401 402def parse_java_file(filename, *, package_prefix=None): 403 try: 404 return _do_parse(filename, package_prefix=package_prefix) 405 except ParseError as e: 406 e.suffix = f' (when parsing {filename})' 407 raise 408 409 410_JAVAP_CLASS_REGEX = re.compile(r'\b(?:class|interface) (\S+)') 411_JAVAP_FINAL_FIELD_REGEX = re.compile( 412 r'^\s+public static final \S+ (.*?) = (\d+);', flags=re.MULTILINE) 413_JAVAP_METHOD_REGEX = re.compile( 414 rf'^\s*({_MODIFIER_KEYWORDS}).*?(\S+?)\(.*\n\s+descriptor: (.*)', 415 flags=re.MULTILINE) 416 417 418def parse_javap(filename, contents): 419 contents = _remove_generics(contents) 420 match = _JAVAP_CLASS_REGEX.search(contents) 421 if not match: 422 raise ParseError('Could not find java class in javap output') 423 java_class = java_types.JavaClass(match.group(1).replace('.', '/')) 424 type_resolver = java_types.TypeResolver(java_class) 425 426 constant_fields = [] 427 for match in _JAVAP_FINAL_FIELD_REGEX.finditer(contents): 428 name, value = match.groups() 429 constant_fields.append(ParsedConstantField(name=name, value=value)) 430 constant_fields.sort() 431 432 called_by_natives = [] 433 for match in _JAVAP_METHOD_REGEX.finditer(contents): 434 modifiers, name, descriptor = match.groups() 435 if name == java_class.full_name_with_dots: 436 name = '<init>' 437 signature = java_types.JavaSignature.from_descriptor(descriptor) 438 439 called_by_natives.append( 440 ParsedCalledByNative(java_class=java_class, 441 name=name, 442 signature=signature, 443 static='static' in modifiers)) 444 called_by_natives.sort() 445 446 # Although javac will not allow multiple methods with no args and different 447 # return types, Class.class has just that, and it breaks with our 448 # name-mangling logic which assumes this cannot happen. 449 if java_class.full_name_with_slashes == 'java/lang/Class': 450 called_by_natives = [ 451 x for x in called_by_natives if 'TypeDescriptor' not in ( 452 x.signature.return_type.non_array_full_name_with_slashes) 453 ] 454 455 return ParsedFile(filename=filename, 456 type_resolver=type_resolver, 457 proxy_methods=[], 458 non_proxy_methods=[], 459 called_by_natives=called_by_natives, 460 constant_fields=constant_fields) 461