1# Copyright 2023 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4import dataclasses 5import os 6import re 7from typing import List 8from typing import Optional 9 10import java_types 11import common 12 13_MODIFIER_KEYWORDS = (r'(?:(?:' + '|'.join([ 14 'abstract', 15 'default', 16 'final', 17 'native', 18 'private', 19 'protected', 20 'public', 21 'static', 22 'synchronized', 23]) + r')\s+)*') 24 25 26class ParseError(Exception): 27 suffix = '' 28 29 def __str__(self): 30 return super().__str__() + self.suffix 31 32 33@dataclasses.dataclass(order=True) 34class ParsedNative: 35 name: str 36 signature: java_types.JavaSignature 37 native_class_name: str 38 static: bool = False 39 40 41@dataclasses.dataclass(order=True) 42class ParsedCalledByNative: 43 java_class: java_types.JavaClass 44 name: str 45 signature: java_types.JavaSignature 46 static: bool 47 unchecked: bool = False 48 49 50@dataclasses.dataclass(order=True) 51class ParsedConstantField(object): 52 name: str 53 value: str 54 55 56@dataclasses.dataclass 57class ParsedFile: 58 filename: str 59 type_resolver: java_types.TypeResolver 60 proxy_methods: List[ParsedNative] 61 non_proxy_methods: List[ParsedNative] 62 called_by_natives: List[ParsedCalledByNative] 63 constant_fields: List[ParsedConstantField] 64 proxy_interface: Optional[java_types.JavaClass] = None 65 proxy_visibility: Optional[str] = None 66 module_name: Optional[str] = None # E.g. @NativeMethods("module_name") 67 jni_namespace: Optional[str] = None # E.g. @JNINamespace("content") 68 69 70@dataclasses.dataclass 71class _ParsedProxyNatives: 72 interface_name: str 73 visibility: str 74 module_name: str 75 methods: List[ParsedNative] 76 77 78# Match single line comments, multiline comments, character literals, and 79# double-quoted strings. 80_COMMENT_REMOVER_REGEX = re.compile( 81 r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', 82 re.DOTALL | re.MULTILINE) 83 84 85def _remove_comments(contents): 86 # We need to support both inline and block comments, and we need to handle 87 # strings that contain '//' or '/*'. 88 def replacer(match): 89 # Replace matches that are comments with nothing; return literals/strings 90 # unchanged. 91 s = match.group(0) 92 if s.startswith('/'): 93 return '' 94 else: 95 return s 96 97 return _COMMENT_REMOVER_REGEX.sub(replacer, contents) 98 99 100# Remove everything between and including <> except at the end of a string, e.g. 101# @JniType("std::vector<int>") 102# This will also break lines with comparison operators, but we don't care. 103_GENERICS_REGEX = re.compile(r'<[^<>\n]*>(?!>*")') 104 105 106def _remove_generics(value): 107 """Strips Java generics from a string.""" 108 while True: 109 ret = _GENERICS_REGEX.sub(' ', value) 110 if len(ret) == len(value): 111 return ret 112 value = ret 113 114 115_PACKAGE_REGEX = re.compile(r'^package\s+(\S+?);', flags=re.MULTILINE) 116 117 118def _parse_package(contents): 119 match = _PACKAGE_REGEX.search(contents) 120 if not match: 121 raise ParseError('Unable to find "package" line') 122 return match.group(1) 123 124 125_CLASSES_REGEX = re.compile( 126 r'^(.*?)(?:\b(?:public|protected|private)?\b)\s*' 127 r'(?:\b(?:static|abstract|final|sealed)\s+)*' 128 r'\b(?:class|interface|enum)\s+(\w+?)\b[^"]*?$', 129 flags=re.MULTILINE) 130 131 132# Does not handle doubly-nested classes. 133def _parse_java_classes(contents): 134 package = _parse_package(contents).replace('.', '/') 135 outer_class = None 136 nested_classes = [] 137 for m in _CLASSES_REGEX.finditer(contents): 138 preamble, class_name = m.groups() 139 # Ignore annotations like @Foo("contains the words class Bar") 140 if preamble.count('"') % 2 != 0: 141 continue 142 if outer_class is None: 143 outer_class = java_types.JavaClass(f'{package}/{class_name}') 144 else: 145 nested_classes.append(outer_class.make_nested(class_name)) 146 147 if outer_class is None: 148 raise ParseError('No classes found.') 149 150 return outer_class, nested_classes 151 152 153_ANNOTATION_REGEX = re.compile( 154 r'@(?P<annotation_name>[\w.]+)(?P<annotation_args>\(\s*(?:[^)]+)\s*\))?\s*') 155# Only supports ("foo") 156_ANNOTATION_ARGS_REGEX = re.compile( 157 r'\(\s*"(?P<annotation_value>[^"]*?)"\s*\)\s*') 158 159def _parse_annotations(value): 160 annotations = {} 161 last_idx = 0 162 for m in _ANNOTATION_REGEX.finditer(value): 163 string_value = '' 164 if match_args := m.group('annotation_args'): 165 if match_arg_value := _ANNOTATION_ARGS_REGEX.match(match_args): 166 string_value = match_arg_value.group('annotation_value') 167 annotations[m.group('annotation_name')] = string_value 168 last_idx = m.end() 169 170 return annotations, value[last_idx:] 171 172 173def _parse_type(type_resolver, value): 174 """Parses a string into a JavaType.""" 175 annotations, parsed_value = _parse_annotations(value) 176 array_dimensions = 0 177 while parsed_value[-2:] == '[]': 178 array_dimensions += 1 179 # strip to remove possible spaces between type and []. 180 parsed_value = parsed_value[:-2].strip() 181 182 if parsed_value in java_types.PRIMITIVES: 183 primitive_name = parsed_value 184 java_class = None 185 else: 186 primitive_name = None 187 java_class = type_resolver.resolve(parsed_value) 188 189 converted_type = annotations.get('JniType', None) 190 if converted_type == 'std::vector': 191 # Allow "std::vector" as shorthand for types that can be inferred: 192 if array_dimensions == 1 and primitive_name: 193 # e.g.: std::vector<jint> 194 converted_type += f'<j{primitive_name}>' 195 elif array_dimensions > 0 or java_class in java_types.COLLECTION_CLASSES: 196 # std::vector<jni_zero::ScopedJavaLocalRef<jobject>> 197 converted_type += '<jni_zero::ScopedJavaLocalRef<jobject>>' 198 else: 199 raise ParseError('Found non-templatized @JniType("std::vector") on ' 200 'non-array, non-List type: ' + value) 201 202 nullable = annotations.get('NonNull', True) 203 204 return java_types.JavaType(array_dimensions=array_dimensions, 205 primitive_name=primitive_name, 206 java_class=java_class, 207 converted_type=converted_type, 208 nullable=nullable) 209 210 211_FINAL_REGEX = re.compile(r'\bfinal\s') 212 213 214def _parse_param_list(type_resolver, value) -> java_types.JavaParamList: 215 if not value or value.isspace(): 216 return java_types.EMPTY_PARAM_LIST 217 params = [] 218 value = _FINAL_REGEX.sub('', value) 219 pending = '' 220 for param_str in value.split(','): 221 # Combine multiple entries when , is in an annotation. 222 # E.g.: @JniType("std::map<std::string, std::string>") Map arg0 223 if pending: 224 pending += ',' + param_str 225 if '"' not in param_str: 226 continue 227 param_str = pending 228 pending = '' 229 elif param_str.count('"') == 1: 230 pending = param_str 231 continue 232 param_str = param_str.strip() 233 param_str, _, param_name = param_str.rpartition(' ') 234 param_str = param_str.rstrip() 235 236 # Handle varargs. 237 if param_str.endswith('...'): 238 param_str = param_str[:-3] + '[]' 239 240 param_type = _parse_type(type_resolver, param_str) 241 params.append(java_types.JavaParam(param_type, param_name)) 242 243 return java_types.JavaParamList(params) 244 245 246_NATIVE_METHODS_INTERFACE_REGEX = re.compile( 247 r'@NativeMethods(?:\(\s*"(?P<module_name>\w+)"\s*\))?[\S\s]+?' 248 r'(?P<visibility>public)?\s*\binterface\s*' 249 r'(?P<interface_name>\w*)\s*{(?P<interface_body>(\s*.*)+?\s*)}') 250 251_PROXY_NATIVE_REGEX = re.compile(r'\s*(.*?)\s+(\w+)\((.*?)\);', flags=re.DOTALL) 252 253_PUBLIC_REGEX = re.compile(r'\bpublic\s') 254 255 256def _parse_proxy_natives(type_resolver, contents): 257 matches = list(_NATIVE_METHODS_INTERFACE_REGEX.finditer(contents)) 258 if not matches: 259 return None 260 if len(matches) > 1: 261 raise ParseError( 262 'Multiple @NativeMethod interfaces in one class is not supported.') 263 264 match = matches[0] 265 ret = _ParsedProxyNatives(interface_name=match.group('interface_name'), 266 visibility=match.group('visibility'), 267 module_name=match.group('module_name'), 268 methods=[]) 269 interface_body = match.group('interface_body') 270 271 for m in _PROXY_NATIVE_REGEX.finditer(interface_body): 272 preamble, name, params_part = m.groups() 273 preamble = _PUBLIC_REGEX.sub('', preamble) 274 annotations, _ = _parse_annotations(preamble) 275 params = _parse_param_list(type_resolver, params_part) 276 return_type = _parse_type(type_resolver, preamble) 277 signature = java_types.JavaSignature.from_params(return_type, params) 278 ret.methods.append( 279 ParsedNative( 280 name=name, 281 signature=signature, 282 native_class_name=annotations.get('NativeClassQualifiedName'))) 283 if not ret.methods: 284 raise ParseError('Found no methods within @NativeMethod interface.') 285 ret.methods.sort() 286 return ret 287 288 289_NON_PROXY_NATIVES_REGEX = re.compile( 290 r'(@NativeClassQualifiedName' 291 r'\(\"(?P<native_class_name>\S*?)\"\)\s+)?' 292 r'(?P<qualifiers>\w+\s\w+|\w+|\s+)\s*native\s+' 293 r'(?P<return_type>\S*)\s+' 294 r'native(?P<name>\w+)\((?P<params>.*?)\);', re.DOTALL) 295 296 297def _parse_non_proxy_natives(type_resolver, contents): 298 ret = [] 299 for match in _NON_PROXY_NATIVES_REGEX.finditer(contents): 300 name = match.group('name') 301 return_type = _parse_type(type_resolver, match.group('return_type')) 302 params = _parse_param_list(type_resolver, match.group('params')) 303 signature = java_types.JavaSignature.from_params(return_type, params) 304 native_class_name = match.group('native_class_name') 305 static = 'static' in match.group('qualifiers') 306 ret.append( 307 ParsedNative(name=name, 308 signature=signature, 309 native_class_name=native_class_name, 310 static=static)) 311 ret.sort() 312 return ret 313 314 315# Regex to match a string like "@CalledByNative public void foo(int bar)". 316_CALLED_BY_NATIVE_REGEX = re.compile( 317 r'@CalledByNative((?P<Unchecked>(?:Unchecked)?|ForTesting))' 318 r'(?:\("(?P<annotation_value>.*)"\))?' 319 r'(?P<method_annotations>(?:\s*@\w+(?:\(.*?\))?)+)?' 320 r'\s+(?P<modifiers>' + _MODIFIER_KEYWORDS + r')' + 321 r'(?P<return_type_annotations>(?:\s*@\w+(?:\(.*?\))?)+)?' 322 r'\s*(?P<return_type>\S*?)' 323 r'\s*(?P<name>\w+)' 324 r'\s*\(\s*(?P<params>[^{;]*)\)' 325 r'\s*(?:throws\s+[^{;]+)?' 326 r'[{;]') 327 328 329def _parse_called_by_natives(type_resolver, contents): 330 ret = [] 331 for match in _CALLED_BY_NATIVE_REGEX.finditer(contents): 332 return_type_grp = match.group('return_type') 333 name = match.group('name') 334 if return_type_grp: 335 pre_annotations = match.group('method_annotations') or '' 336 post_annotations = match.group('return_type_annotations') or '' 337 # Combine all the annotations before parsing the return type. 338 return_type_str = str.strip(f'{pre_annotations} {post_annotations}' 339 f' {return_type_grp}') 340 return_type = _parse_type(type_resolver, return_type_str) 341 else: 342 return_type = java_types.VOID 343 name = '<init>' 344 345 params = _parse_param_list(type_resolver, match.group('params')) 346 signature = java_types.JavaSignature.from_params(return_type, params) 347 inner_class_name = match.group('annotation_value') 348 java_class = type_resolver.java_class 349 if inner_class_name: 350 java_class = java_class.make_nested(inner_class_name) 351 352 ret.append( 353 ParsedCalledByNative(java_class=java_class, 354 name=name, 355 signature=signature, 356 static='static' in match.group('modifiers'), 357 unchecked='Unchecked' in match.group('Unchecked'))) 358 359 # Check for any @CalledByNative occurrences that were not matched. 360 unmatched_lines = _CALLED_BY_NATIVE_REGEX.sub('', contents).splitlines() 361 for i, line in enumerate(unmatched_lines): 362 if '@CalledByNative' in line: 363 context = '\n'.join(unmatched_lines[i:i + 5]) 364 raise ParseError('Could not parse @CalledByNative method signature:\n' + 365 context) 366 367 ret.sort() 368 return ret 369 370 371_IMPORT_REGEX = re.compile(r'^import\s+([^\s*]+);', flags=re.MULTILINE) 372_IMPORT_CLASS_NAME_REGEX = re.compile(r'^(.*?)\.([A-Z].*)') 373 374 375def _parse_imports(contents): 376 # Regex skips static imports as well as wildcard imports. 377 names = _IMPORT_REGEX.findall(contents) 378 for name in names: 379 m = _IMPORT_CLASS_NAME_REGEX.match(name) 380 if m: 381 package, class_name = m.groups() 382 yield java_types.JavaClass( 383 package.replace('.', '/') + '/' + class_name.replace('.', '$')) 384 385 386_JNI_NAMESPACE_REGEX = re.compile(r'@JNINamespace\("(.*?)"\)') 387 388 389def _parse_jni_namespace(contents): 390 m = _JNI_NAMESPACE_REGEX.findall(contents) 391 if not m: 392 return '' 393 if len(m) > 1: 394 raise ParseError('Found multiple @JNINamespace annotations.') 395 return m[0] 396 397 398def _do_parse(filename, *, package_prefix, package_prefix_filter): 399 assert not filename.endswith('.kt'), ( 400 f'Found {filename}, but Kotlin is not supported by JNI generator.') 401 with open(filename) as f: 402 contents = f.read() 403 contents = _remove_comments(contents) 404 contents = _remove_generics(contents) 405 406 outer_class, nested_classes = _parse_java_classes(contents) 407 408 expected_name = os.path.splitext(os.path.basename(filename))[0] 409 if outer_class.name != expected_name: 410 raise ParseError( 411 f'Found class "{outer_class.name}" but expected "{expected_name}".') 412 413 if package_prefix and common.should_rename_package( 414 outer_class.package_with_dots, package_prefix_filter): 415 outer_class = outer_class.make_prefixed(package_prefix) 416 nested_classes = [c.make_prefixed(package_prefix) for c in nested_classes] 417 418 type_resolver = java_types.TypeResolver(outer_class) 419 for java_class in _parse_imports(contents): 420 type_resolver.add_import(java_class) 421 for java_class in nested_classes: 422 type_resolver.add_nested_class(java_class) 423 424 parsed_proxy_natives = _parse_proxy_natives(type_resolver, contents) 425 jni_namespace = _parse_jni_namespace(contents) 426 427 non_proxy_methods = _parse_non_proxy_natives(type_resolver, contents) 428 called_by_natives = _parse_called_by_natives(type_resolver, contents) 429 430 ret = ParsedFile(filename=filename, 431 jni_namespace=jni_namespace, 432 type_resolver=type_resolver, 433 proxy_methods=[], 434 non_proxy_methods=non_proxy_methods, 435 called_by_natives=called_by_natives, 436 constant_fields=[]) 437 438 if parsed_proxy_natives: 439 ret.module_name = parsed_proxy_natives.module_name 440 ret.proxy_interface = outer_class.make_nested( 441 parsed_proxy_natives.interface_name) 442 ret.proxy_visibility = parsed_proxy_natives.visibility 443 ret.proxy_methods = parsed_proxy_natives.methods 444 445 return ret 446 447 448def parse_java_file(filename, 449 *, 450 package_prefix=None, 451 package_prefix_filter=None): 452 try: 453 return _do_parse(filename, 454 package_prefix=package_prefix, 455 package_prefix_filter=package_prefix_filter) 456 except Exception as e: 457 note = f' (when parsing {filename})' 458 if e.args and isinstance(e.args[0], str): 459 e.args = (e.args[0] + note, *e.args[1:]) 460 else: 461 e.args = e.args + (note, ) 462 raise 463 464 465_JAVAP_CLASS_REGEX = re.compile(r'\b(?:class|interface) (\S+)') 466_JAVAP_FINAL_FIELD_REGEX = re.compile( 467 r'^\s+public static final \S+ (.*?) = (\d+);', flags=re.MULTILINE) 468_JAVAP_METHOD_REGEX = re.compile( 469 rf'^\s*({_MODIFIER_KEYWORDS}).*?(\S+?)\(.*\n\s+descriptor: (.*)', 470 flags=re.MULTILINE) 471 472 473def parse_javap(filename, contents): 474 contents = _remove_generics(contents) 475 match = _JAVAP_CLASS_REGEX.search(contents) 476 if not match: 477 raise ParseError('Could not find java class in javap output') 478 java_class = java_types.JavaClass(match.group(1).replace('.', '/')) 479 type_resolver = java_types.TypeResolver(java_class) 480 481 constant_fields = [] 482 for match in _JAVAP_FINAL_FIELD_REGEX.finditer(contents): 483 name, value = match.groups() 484 constant_fields.append(ParsedConstantField(name=name, value=value)) 485 constant_fields.sort() 486 487 called_by_natives = [] 488 for match in _JAVAP_METHOD_REGEX.finditer(contents): 489 modifiers, name, descriptor = match.groups() 490 if name == java_class.full_name_with_dots: 491 name = '<init>' 492 signature = java_types.JavaSignature.from_descriptor(descriptor) 493 494 called_by_natives.append( 495 ParsedCalledByNative(java_class=java_class, 496 name=name, 497 signature=signature, 498 static='static' in modifiers)) 499 called_by_natives.sort() 500 return ParsedFile(filename=filename, 501 type_resolver=type_resolver, 502 proxy_methods=[], 503 non_proxy_methods=[], 504 called_by_natives=called_by_natives, 505 constant_fields=constant_fields) 506