1# Copyright (C) 2018 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15 16"""Utility for ICU4C code generation""" 17 18from __future__ import absolute_import 19from __future__ import division 20from __future__ import print_function 21 22import logging 23import os 24import site 25import sys 26import textwrap 27from collections import deque 28 29THIS_DIR = os.path.dirname(os.path.realpath(__file__)) 30ANDROID_TOP = os.path.realpath(os.path.join(THIS_DIR, '../../../..')) 31 32 33def android_path(*args): 34 """Returns the absolute path to a directory within the Android tree.""" 35 return os.path.join(ANDROID_TOP, *args) 36 37 38# TODO: Include clang bindings in prebuilt package. http://b/119270767 39site.addsitedir(android_path('external/clang/bindings/python')) 40import clang.cindex # pylint: disable=import-error,wrong-import-position 41 42# TODO: Do not hardcode clang version. http://b/119270767 43CLANG_REVISION = 'r346389b' 44CLANG_LIB_VERSION = '8svn' 45CLANG_HEADER_VERSION = '8.0.6' 46CLANG_PATH = android_path('prebuilts/clang/host/linux-x86/clang-%s' % CLANG_REVISION) 47 48 49class Function(object): 50 """A visible function found in an ICU header.""" 51 52 def __init__(self, name, result_type, params, is_variadic, module): 53 self.name = name 54 self.result_type = result_type 55 self.params = params 56 self.is_variadic = is_variadic 57 self.va_list_insert_position = -1 58 59 # callee will be used in dlsym and may be identical to others for 60 # functions with variable argument lists. 61 self.callee = self.name 62 if self.is_variadic: 63 self.last_param = self.params[-1][1] 64 self.handle = 'handle_' + module 65 self.return_void = self.result_type == 'void' 66 67 @property 68 def param_str(self): 69 """Returns a string usable as a parameter list in a function decl.""" 70 params = [] 71 for param_type, param_name in self.params: 72 if '[' in param_type: 73 # `int foo[42]` will be a param_type of `int [42]` and a 74 # param_name of `foo`. We need to put these back in the right 75 # order. 76 param_name += param_type[param_type.find('['):] 77 param_type = param_type[:param_type.find('[')] 78 params.append('{} {}'.format(param_type, param_name)) 79 if self.is_variadic: 80 params.append('...') 81 return ', '.join(params) 82 83 @property 84 def arg_str(self): 85 """Returns a string usable as an argument list in a function call.""" 86 args = [] 87 for _, param_name in self.params: 88 args.append(param_name) 89 if self.is_variadic: 90 if self.va_list_insert_position >= 0: 91 args.insert(self.va_list_insert_position, 'args') 92 else: 93 raise ValueError(textwrap.dedent("""\ 94 {}({}) is variadic, but has no valid \ 95 inserted position""".format( 96 self.name, 97 self.param_str))) 98 return ', '.join(args) 99 100 def set_variadic_callee(self, callee, inserted_position): 101 """Set variadic callee with callee name and inserted position""" 102 if self.is_variadic: 103 self.callee = callee 104 self.va_list_insert_position = inserted_position 105 106 107def logger(): 108 """Returns the module level logger.""" 109 return logging.getLogger(__name__) 110 111 112class DeclaredFunctionsParser(object): 113 """Parser to get declared functions from ICU4C headers. """ 114 115 def __init__(self, decl_filters, whitelisted_decl_filter): 116 """ 117 Args: 118 decl_filters: A list of filters for declared functions. 119 whitelisted_decl_filter: A list of whitelisting filters for declared functions. 120 If the function is whitelisted here, the function will not filtered by the filter added 121 in decl_filters 122 """ 123 self.decl_filters = decl_filters 124 self.whitelisted_decl_filters = whitelisted_decl_filter 125 self.va_functions_mapping = {} 126 127 # properties to store the parsing result 128 self.all_headers = [] 129 self.all_header_paths_to_copy = set() 130 self.all_declared_functions = [] 131 self.seen_functions = set() 132 133 # Configures libclang to load in our environment 134 # Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, etc. Note 135 # that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help. 136 # clang.cindex.Config.set_library_path(os.path.join(CLANG_PATH, 'lib64')) 137 clang.cindex.Config.set_library_file( 138 os.path.join(CLANG_PATH, 'lib64', 'libclang.so.%s' % CLANG_LIB_VERSION)) 139 140 def set_va_functions_mapping(self, mapping): 141 """Set mapping from a variable argument function to an implementation. 142 143 Functions w/ variable argument lists (...) need special care to call 144 their corresponding v- versions that accept a va_list argument. Note that 145 although '...' will always appear as the last parameter, its v- version 146 may put the va_list arg in a different place. Hence we provide an index 147 to indicate the position. 148 e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of 149 'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg 150 inserted as the 3rd argument.""" 151 self.va_functions_mapping = mapping 152 153 @property 154 def header_includes(self): 155 """Return all headers declaring the functions returned in get_all_declared_functions. 156 157 If all functions in the header are filtered, the header is not included in here.""" 158 return [self.short_header_path(header) for header in self.all_headers] 159 160 @property 161 def header_paths_to_copy(self): 162 """Return all headers needed to be copied""" 163 return self.all_header_paths_to_copy 164 165 @property 166 def declared_functions(self): 167 """Return all declared functions after filtering""" 168 return self.all_declared_functions 169 170 def get_cflags(self): 171 """Returns the cflags that should be used for parsing.""" 172 clang_flags = [ 173 '-x', 174 'c', 175 '-std=c99', 176 '-DU_DISABLE_RENAMING=1', 177 '-DU_SHOW_CPLUSPLUS_API=0', 178 '-DU_HIDE_DRAFT_API', 179 '-DU_HIDE_DEPRECATED_API', 180 '-DU_HIDE_INTERNAL_API', 181 '-DANDROID_LINK_SHARED_ICU4C', 182 ] 183 184 include_dirs = [ 185 # TODO: Do not hardcode clang version. http://b/119270767 186 os.path.join(CLANG_PATH, 'lib64/clang/', CLANG_HEADER_VERSION, 'include/'), 187 android_path('bionic/libc/include'), 188 android_path('external/icu/android_icu4c/include'), 189 android_path('external/icu/icu4c/source/common'), 190 android_path('external/icu/icu4c/source/i18n'), 191 ] 192 193 for include_dir in include_dirs: 194 clang_flags.append('-I' + include_dir) 195 return clang_flags 196 197 def get_all_cpp_headers(self): 198 """Return all C++ header names in icu4c/source/test/hdrtst/cxxfiles.txt""" 199 cpp_headers = [] 200 with open(android_path('external/icu/icu4c/source/test/hdrtst/cxxfiles.txt'), 201 'r') as f: 202 for line in f: 203 line = line.strip() 204 if not line.startswith("#"): 205 cpp_headers.append(line) 206 return cpp_headers 207 208 def parse(self): 209 """Parse the headers and collect the declared functions after filtering 210 and the headers containing the functions.""" 211 index = clang.cindex.Index.create() 212 213 icu_modules = ( 214 'common', 215 'i18n', 216 ) 217 header_dependencies = {} 218 for module in icu_modules: 219 path = android_path(android_path('external/icu/icu4c/source', module, 'unicode')) 220 files = [os.path.join(path, f) 221 for f in os.listdir(path) if f.endswith('.h')] 222 223 for file_path in files: 224 # Ignore C++ headers. 225 if os.path.basename(file_path) in self.get_all_cpp_headers(): 226 continue 227 228 tunit = index.parse(file_path, self.get_cflags()) 229 self.handle_diagnostics(tunit) 230 header_dependencies[file_path] = [file_inclusion.include.name for file_inclusion 231 in tunit.get_includes()] 232 visible_functions = self.get_visible_functions( 233 tunit.cursor, module, file_path) 234 for function in visible_functions: 235 self.seen_functions.add(function.name) 236 self.all_declared_functions.append(function) 237 if visible_functions: 238 self.all_headers.append(file_path) 239 240 # Sort to produce an deterministic output 241 self.all_declared_functions = sorted(self.all_declared_functions, key=lambda f: f.name) 242 self.all_headers = sorted(self.all_headers) 243 244 # Build the headers required for using your restricted API set, and put the set into 245 # all_header_files_to_copy. 246 # header_dependencies is a map from icu4c header file path to a list of included headers. 247 # The key must be a ICU4C header, but the value could contain non-ICU4C headers, e.g. 248 # { 249 # ".../icu4c/source/common/unicode/utype.h": [ 250 # ".../icu4c/source/common/unicode/uversion.h", 251 # ".../bionic/libc/include/ctype.h", 252 # ], 253 # ... 254 # } 255 file_queue = deque() 256 file_processed = set() 257 for header in self.all_headers: 258 file_queue.appendleft(header) 259 self.all_header_paths_to_copy.add(header) 260 while file_queue: 261 f = file_queue.pop() 262 if f in file_processed: 263 continue 264 file_processed.add(f) 265 for header in header_dependencies[f]: 266 if header in header_dependencies: # Do not care non-icu4c headers 267 self.all_header_paths_to_copy.add(header) 268 file_queue.appendleft(header) 269 270 def handle_diagnostics(self, tunit): 271 """Prints compiler diagnostics to stdout. Exits if errors occurred.""" 272 errors = 0 273 for diag in tunit.diagnostics: 274 if diag.severity == clang.cindex.Diagnostic.Fatal: 275 level = logging.CRITICAL 276 errors += 1 277 elif diag.severity == clang.cindex.Diagnostic.Error: 278 level = logging.ERROR 279 errors += 1 280 elif diag.severity == clang.cindex.Diagnostic.Warning: 281 level = logging.WARNING 282 elif diag.severity == clang.cindex.Diagnostic.Note: 283 level = logging.INFO 284 logger().log( 285 level, '%s:%s:%s %s', diag.location.file, diag.location.line, 286 diag.location.column, diag.spelling) 287 if errors: 288 sys.exit('Errors occurred during parsing. Exiting.') 289 290 def get_visible_functions(self, cursor, module, file_name): 291 """Returns a list of all visible functions in a header file.""" 292 functions = [] 293 for child in cursor.get_children(): 294 if self.should_process_decl(child, file_name): 295 functions.append(self.from_cursor(child, module)) 296 return functions 297 298 def should_process_decl(self, decl, file_name): 299 """Returns True if this function needs to be processed.""" 300 if decl.kind != clang.cindex.CursorKind.FUNCTION_DECL: 301 return False 302 if decl.location.file.name != file_name: 303 return False 304 if decl.spelling in self.seen_functions: 305 return False 306 if not self.is_function_visible(decl): 307 return False 308 for whitlisted_decl_filter in self.whitelisted_decl_filters: 309 if whitlisted_decl_filter(decl): 310 return True 311 for decl_filter in self.decl_filters: 312 if not decl_filter(decl): 313 return False 314 return True 315 316 def is_function_visible(self, decl): 317 """Returns True if the function has default visibility.""" 318 visible = False 319 vis_attrs = self.get_children_by_kind( 320 decl, clang.cindex.CursorKind.VISIBILITY_ATTR) 321 for child in vis_attrs: 322 visible = child.spelling == 'default' 323 return visible 324 325 def get_children_by_kind(self, cursor, kind): 326 """Returns a generator of cursor's children of a specific kind.""" 327 for child in cursor.get_children(): 328 if child.kind == kind: 329 yield child 330 331 def short_header_path(self, name): 332 """Trim the given file name to 'unicode/xyz.h'.""" 333 return name[name.rfind('unicode/'):] 334 335 def from_cursor(self, cursor, module): 336 """Creates a Function object from the decl at the cursor.""" 337 if cursor.type.kind != clang.cindex.TypeKind.FUNCTIONPROTO: 338 raise ValueError(textwrap.dedent("""\ 339 {}'s type kind is {}, expected TypeKind.FUNCTIONPROTO. 340 {} Line {} Column {}""".format( 341 cursor.spelling, 342 cursor.type.kind, 343 cursor.location.file, 344 cursor.location.line, 345 cursor.location.column))) 346 347 name = cursor.spelling 348 result_type = cursor.result_type.spelling 349 is_variadic = cursor.type.is_function_variadic() 350 params = [] 351 for arg in cursor.get_arguments(): 352 params.append((arg.type.spelling, arg.spelling)) 353 function = Function(name, result_type, params, is_variadic, module) 354 # For variadic function, set the callee and va_list position 355 if function.is_variadic and function.name in self.va_functions_mapping: 356 m = self.va_functions_mapping[function.name] 357 function.set_variadic_callee(m[0], m[1]) 358 return function 359 360 361class StableDeclarationFilter(object): 362 """Return true if it's @stable API""" 363 def __call__(self, decl): 364 """Returns True if the given decl has a doxygen stable tag.""" 365 if not decl.raw_comment: 366 return False 367 if '@stable' in decl.raw_comment: 368 return True 369 return False 370 371 372class WhitelistedDeclarationFilter(object): 373 """A filter for whitelisting function declarations.""" 374 def __init__(self, whitelisted_function_names): 375 self.whitelisted_function_names = whitelisted_function_names 376 377 def __call__(self, decl): 378 """Returns True if the given decl is whitelisted""" 379 return decl.spelling in self.whitelisted_function_names 380 381 382class BlacklistedlistedDeclarationFilter(object): 383 """A filter for blacklisting function declarations.""" 384 def __init__(self, blacklisted_function_names): 385 self.blacklisted_function_names = blacklisted_function_names 386 387 def __call__(self, decl): 388 """Returns True if the given decl is nor blacklisted""" 389 return decl.spelling not in self.blacklisted_function_names 390 391 392# Functions w/ variable argument lists (...) need special care to call 393# their corresponding v- versions that accept a va_list argument. Note that 394# although '...' will always appear as the last parameter, its v- version 395# may put the va_list arg in a different place. Hence we provide an index 396# to indicate the position. 397# 398# e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of 399# 'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg 400# inserted as the 3rd argument. 401 402# We need to insert the va_list (named args) at the position 403# indicated by the KNOWN_VA_FUNCTIONS map. 404KNOWN_VA_FUNCTIONS = { 405 'u_formatMessage': ('u_vformatMessage', 5), 406 'u_parseMessage': ('u_vparseMessage', 5), 407 'u_formatMessageWithError': ('u_vformatMessageWithError', 6), 408 'u_parseMessageWithError': ('u_vparseMessageWithError', 5), 409 'umsg_format': ('umsg_vformat', 3), 410 'umsg_parse': ('umsg_vparse', 4), 411 'utrace_format': ('utrace_vformat', 4), 412} 413 414# The following functions are not @stable 415WHITELISTED_FUNCTION_NAMES = ( 416 # Not intended to be called directly, but are used by @stable macros. 417 'utf8_nextCharSafeBody', 418 'utf8_appendCharSafeBody', 419 'utf8_prevCharSafeBody', 420 'utf8_back1SafeBody', 421) 422