1#!/usr/bin/env python 2# coding=utf-8 3############################################## 4# Copyright (c) 2021-2022 Huawei Device Co., Ltd. 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16############################################## 17 18import re 19import os 20import json 21import clang.cindex 22from clang.cindex import Config 23from clang.cindex import Index 24from clang.cindex import CursorKind 25from clang.cindex import TypeKind 26from utils.constants import StringConstant 27from utils.constants import RegularExpressions 28from typedef.parser.parser import NodeKind 29 30 31line_dist = {} 32calculation_times = 0 33 34 35def find_parent(cursor): # 获取父节点 36 cursor_parent = cursor.semantic_parent 37 if cursor_parent is not None: 38 if cursor_parent.kind == CursorKind.VAR_DECL: # 父节点为VAR_DECL 用于整型变量节点 39 return cursor_parent.kind 40 41 # 用于判断里面成员属于那类 42 elif cursor_parent.kind == CursorKind.STRUCT_DECL or cursor_parent.kind == CursorKind.UNION_DECL: 43 return cursor_parent.kind 44 else: 45 parent_kind = processing_root_parent(cursor_parent) 46 return parent_kind 47 return None 48 49 50def processing_root_parent(cursor_parent): 51 parent = cursor_parent.semantic_parent 52 if parent is not None: 53 if parent.type.kind == TypeKind.INVALID: 54 parent_kind = CursorKind.TRANSLATION_UNIT 55 return parent_kind 56 else: 57 return parent.kind 58 return None 59 60 61def processing_no_child(cursor, data): # 处理没有子节点的节点 62 if cursor.kind == CursorKind.INTEGER_LITERAL: # 整型字面量类型节点,没有子节点 63 tokens = cursor.get_tokens() 64 for token in tokens: 65 data["integer_value"] = token.spelling # 获取整型变量值 66 67 68def get_token(cursor): 69 tokens = [] 70 for token in cursor.get_tokens(): 71 tokens.append(token.spelling) 72 73 return tokens 74 75 76def judgment_extern(cursor, data): # 判断是否带有extern 77 tokens = get_token(cursor) 78 if cursor.kind == CursorKind.FUNCTION_DECL: 79 if 'static' in tokens or 'deprecated' in tokens: 80 is_extern = False 81 else: 82 is_extern = True 83 elif cursor.kind == CursorKind.VAR_DECL: 84 if 'extern' in tokens: 85 is_extern = True 86 else: 87 is_extern = False 88 if 'const' in tokens: 89 data["is_const"] = True 90 else: 91 data["is_const"] = False 92 else: 93 is_extern = True 94 95 data["is_extern"] = is_extern 96 97 98def binary_operator(cursor, data): # 二元操作符处理 99 data["name"] = "" 100 tokens = cursor.get_tokens() 101 spelling_arr = ['<<', '>>', '+', '-', '*', '/'] 102 for token in tokens: 103 if token.spelling in spelling_arr: 104 data["operator"] = token.spelling 105 106 107def distinction_member(cursor, data): # 区别结构体和联合体成员 108 parent_kind = find_parent(cursor) # 查找父节点类型 109 if parent_kind: 110 if parent_kind == CursorKind.UNION_DECL: 111 data["member"] = "union_member" 112 elif parent_kind == CursorKind.STRUCT_DECL: 113 data["member"] = "struct_member" 114 115 116def processing_parm(cursor, data): # 函数参数节点处理 117 if cursor.spelling: # 函数参数是否带参数名 118 data["name"] = cursor.spelling 119 else: 120 data["name"] = "" 121 122 if cursor.type.get_pointee().kind == TypeKind.FUNCTIONPROTO: # 参数为函数指针,获取对应的返回类型 123 data["func_pointer_result_type"] = cursor.type.get_pointee().get_result().spelling 124 125 126def processing_enum(cursor, data): # 获取枚举值 127 data["value"] = cursor.enum_value 128 129 130def processing_def(cursor, data): # 处理宏定义 131 data['is_def_func'] = False 132 data['name'] = cursor.spelling 133 name_len = len(data['name']) 134 str1_len = len(data['node_content']['content']) 135 text = '' 136 if name_len != str1_len: 137 if data['node_content']['content']: 138 if data['node_content']['content'][name_len] == '(': 139 right_index = data['node_content']['content'].index(')') 140 param = data['node_content']['content'][name_len:right_index + 1] 141 text = data['node_content']['content'][right_index + 1:] 142 data['is_def_func'] = True 143 data['def_func_name'] = data['name'] 144 data['def_func_param'] = param 145 data['name'] = ''.join(data['name'] + param) 146 else: 147 text = data['node_content']['content'][name_len:] 148 else: 149 print('mar_define error, its content is none') 150 if text: 151 text = text.strip() # 删除两边的字符(默认是删除左右空格) 152 data['text'] = text 153 data["type"] = "def_no_type" 154 155 156def processing_func(cursor, data): # 处理函数 157 data["return_type"] = cursor.result_type.spelling # 增加返回类型键值对 158 judgment_extern(cursor, data) 159 160 161def processing_type(cursor, data): # 没有类型的节点处理 162 if cursor.kind == CursorKind.MACRO_INSTANTIATION: # 也属于宏定义 --宏引用 163 data["type"] = "insta_no_type" 164 165 elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE: # 头文件也没type,规定 166 data["type"] = "inclusion_no_type" 167 168 169def processing_name(cursor, data): # 没有名的节点处理 170 if cursor.kind == CursorKind.PAREN_EXPR: # 括号表达式() 171 data["paren"] = "()" 172 data["name"] = "" 173 174 elif cursor.kind == CursorKind.UNEXPOSED_EXPR: # 未公开表达式,用于表示未明确定义的表达式 175 data["name"] = "" 176 177 178def processing_char(cursor, data): # 字符节点处理 179 tokens = list(cursor.get_tokens()) 180 char_value = (tokens[0].spelling.strip("'")) 181 data["name"] = char_value 182 183 184special_node_process = { 185 CursorKind.ENUM_CONSTANT_DECL.name: processing_enum, 186 CursorKind.MACRO_DEFINITION.name: processing_def, 187 CursorKind.FUNCTION_DECL.name: processing_func, 188 CursorKind.VAR_DECL.name: judgment_extern, 189 CursorKind.PARM_DECL.name: processing_parm, 190 CursorKind.FIELD_DECL.name: distinction_member, 191 CursorKind.MACRO_INSTANTIATION.name: processing_type, 192 CursorKind.INCLUSION_DIRECTIVE.name: processing_type, 193 CursorKind.BINARY_OPERATOR.name: binary_operator, 194 CursorKind.PAREN_EXPR.name: processing_name, 195 CursorKind.UNEXPOSED_EXPR.name: processing_name, 196 CursorKind.CHARACTER_LITERAL.name: processing_char 197} 198 199 200def get_api_unique_id(cursor, loc, data): 201 unique_id = '' 202 if cursor.kind == CursorKind.MACRO_DEFINITION: 203 unique_id = '{}#{}'.format(loc["location_path"], cursor.spelling) 204 return unique_id 205 206 parent_of_cursor = cursor.semantic_parent 207 struct_union_enum = [NodeKind.STRUCT_DECL.value, NodeKind.UNION_DECL.value, 208 NodeKind.ENUM_DECL.value] 209 if parent_of_cursor: 210 unique_name = cursor.spelling 211 try: 212 if parent_of_cursor.kind == CursorKind.TRANSLATION_UNIT: 213 parent_name_str = '' 214 elif parent_of_cursor.kind.name in struct_union_enum: 215 parent_name_str = parent_of_cursor.type.spelling 216 data['class_name'] = parent_of_cursor.spelling 217 else: 218 parent_name_str = parent_of_cursor.spelling 219 except ValueError: 220 parent_name_str = '' 221 if cursor.kind.name in struct_union_enum: 222 unique_name = cursor.type.spelling 223 if not parent_name_str: 224 unique_id = '{}#{}'.format(loc["location_path"], unique_name) 225 else: 226 unique_id = '{}#{}#{}'.format(loc["location_path"], parent_name_str, unique_name) 227 return unique_id 228 229 230def processing_special_node(cursor, data, key, gn_path): # 处理需要特殊处理的节点 231 if key == 0: 232 location_path = cursor.spelling 233 kind_name = CursorKind.TRANSLATION_UNIT.name 234 else: 235 location_path = cursor.location.file.name 236 kind_name = cursor.kind.name 237 238 loc = { 239 "location_path": '{}'.format(location_path), 240 "location_line": cursor.location.line, 241 "location_column": cursor.location.column 242 } 243 if gn_path: 244 relative_path = os.path.relpath(location_path, gn_path) # 获取头文件相对路 245 loc["location_path"] = relative_path 246 data["location"] = loc 247 data["unique_id"] = get_api_unique_id(cursor, loc, data) 248 if key == 0: 249 data["unique_id"] = data["name"] 250 if kind_name in special_node_process.keys(): 251 node_process = special_node_process[kind_name] 252 node_process(cursor, data) # 调用对应节点处理函数 253 254 255def node_extent(cursor, current_file): 256 start_offset = cursor.extent.start.offset 257 end_offset = cursor.extent.end.offset 258 with open(current_file, 'r', encoding='utf=8') as f: 259 f.seek(start_offset) 260 content = f.read(end_offset - start_offset) 261 262 extent = { 263 "start_offset": start_offset, 264 "end_offset": end_offset, 265 "content": content 266 } 267 f.close() 268 return extent 269 270 271def define_comment(cursor, current_file, data): 272 line = cursor.location.line 273 with open(current_file, mode='r', encoding='utf-8') as file: 274 file_content = file.readlines()[:line] 275 file_content = ''.join(file_content) 276 pattern = '{} {})'.format(RegularExpressions.DEFINE_COMMENT.value, cursor.spelling) 277 matches = re.search(pattern, file_content) 278 if matches: 279 data['comment'] = matches.group() 280 281 282def get_default_node_data(cursor, gn_path): 283 data = { 284 "name": cursor.spelling, 285 "kind": '', 286 "type": cursor.type.spelling, 287 "gn_path": gn_path, 288 "node_content": {}, 289 "comment": '', 290 "syscap": '', 291 "since": '', 292 "kit_name": '', 293 "sub_system": '', 294 "module_name": '', 295 "permission": '', 296 "class_name": 'global', 297 "deprecate_since": '', 298 "error_num": 'NA', 299 "is_system_api": 'NA', 300 "model_constraint": 'NA', 301 "cross_platform": 'NA', 302 "form": 'NA', 303 "atomic_service": 'NA', 304 "decorator": 'NA', 305 "unique_id": '' 306 } 307 return data 308 309 310def parser_data_assignment(cursor, current_file, gn_path, comment=None, key=0): 311 data = get_default_node_data(cursor, gn_path) 312 get_comment(cursor, data) 313 if key == 0: 314 data["kind"] = CursorKind.TRANSLATION_UNIT.name 315 if comment: 316 data["comment"] = comment 317 if gn_path: 318 relative_path = os.path.relpath(cursor.spelling, gn_path) 319 data["name"] = relative_path 320 else: 321 content = node_extent(cursor, current_file) 322 data["node_content"] = dict(content) 323 data["kind"] = cursor.kind.name 324 if cursor.kind.name == CursorKind.MACRO_DEFINITION.name: 325 define_comment(cursor, current_file, data) 326 struct_union_enum = [NodeKind.STRUCT_DECL.value, NodeKind.UNION_DECL.value, 327 NodeKind.ENUM_DECL.value] 328 if data.get('kind') in struct_union_enum and 'class_name' in data: 329 data['class_name'] = data.get('name') 330 get_syscap_value(data) 331 get_since_value(data) 332 get_kit_value(data) 333 get_permission_value(data) 334 get_module_name_value(data) 335 get_deprecate_since_value(data) 336 processing_special_node(cursor, data, key, gn_path) # 节点处理 337 get_file_kit_or_system(data) 338 339 return data 340 341 342def ast_to_dict(cursor, current_file, last_data, gn_path, comment=None, key=0): # 解析数据的整理 343 # 通用赋值 344 data = parser_data_assignment(cursor, current_file, gn_path, comment, key) 345 if last_data: 346 data['module_name'] = last_data['module_name'] 347 data['kit_name'] = last_data['kit_name'] 348 data['syscap'] = last_data['syscap'] 349 children = list(cursor.get_children()) # 判断是否有子节点,有就追加children,没有根据情况来 350 if len(children) > 0: 351 if key != 0: 352 if cursor.kind == CursorKind.FUNCTION_DECL: # 函数参数 353 name = "parm" 354 elif (cursor.kind == CursorKind.ENUM_DECL 355 or cursor.kind == CursorKind.STRUCT_DECL 356 or cursor.kind == CursorKind.UNION_DECL): 357 name = "members" 358 else: 359 name = "children" 360 else: 361 name = "children" 362 data[name] = [] 363 364 for child in children: 365 # 剔除多余宏定义和跳过UNEXPOSED_ATTR节点 366 if (child.location.file is not None) and (not child.kind.is_attribute()) \ 367 and child.kind.name != CursorKind.MACRO_INSTANTIATION.name \ 368 and child.kind.name != CursorKind.INCLUSION_DIRECTIVE.name \ 369 and (child.location.file.name == current_file): 370 processing_ast_node(child, current_file, data, name, gn_path) 371 else: 372 if cursor.kind == CursorKind.FUNCTION_DECL: # 防止clang默认处理(对于头文件没有的情况)出现没有该键值对 373 data["parm"] = [] 374 processing_no_child(cursor, data) # 处理没有子节点的节点 375 return data 376 377 378def get_syscap_value(data: dict): 379 syscap_list = [] 380 if 'none_comment' != data["comment"]: 381 pattern = r'@([Ss]yscap).*?(?=\n)' 382 matches = re.finditer(pattern, data['comment'], re.DOTALL | re.MULTILINE) 383 for mat in matches: 384 syscap_list.append(mat.group()) 385 if len(syscap_list) > 1: 386 data["syscap"] = re.sub('@syscap', '', syscap_list[len(syscap_list) - 1], flags=re.IGNORECASE) 387 elif 1 == len(syscap_list): 388 data["syscap"] = re.sub('@syscap', '', syscap_list[0], flags=re.IGNORECASE) 389 390 391def get_since_value(data: dict): 392 since_list = [] 393 if 'none_comment' != data["comment"]: 394 pattern = r'@(since).*?(?=\n)' 395 matches = re.finditer(pattern, data['comment'], re.DOTALL | re.MULTILINE) 396 for mat in matches: 397 since_list.append(mat.group()) 398 if len(since_list) > 1: 399 data["since"] = since_list[len(since_list) - 1].replace('@since', '') 400 elif 1 == len(since_list): 401 data["since"] = since_list[0].replace('@since', '') 402 403 404def get_kit_value(data: dict): 405 kit_list = [] 406 if 'none_comment' != data["comment"]: 407 pattern = r'@(kit).*?(?=\n)' 408 matches = re.finditer(pattern, data['comment'], re.DOTALL | re.MULTILINE) 409 for mat in matches: 410 kit_list.append(mat.group()) 411 if len(kit_list) > 1: 412 data["kit_name"] = kit_list[len(kit_list) - 1].replace('@kit', '') 413 elif 1 == len(kit_list): 414 data["kit_name"] = kit_list[0].replace('@kit', '') 415 416 417def get_module_name_value(data: dict): 418 module_name_list = [] 419 if 'none_comment' != data["comment"]: 420 pattern = r'@(addtogroup).*?(?=\n)' 421 matches = re.finditer(pattern, data['comment'], re.DOTALL | re.MULTILINE) 422 for mat in matches: 423 module_name_list.append(mat.group()) 424 if len(module_name_list) > 1: 425 data["module_name"] = module_name_list[len(module_name_list) - 1].replace('@addtogroup', '') 426 elif 1 == len(module_name_list): 427 data["module_name"] = module_name_list[0].replace('@addtogroup', '') 428 429 430def get_permission_value(data: dict): 431 permission_list = [] 432 if 'none_comment' != data["comment"]: 433 pattern = r'@(permission).*?(?=\n)' 434 matches = re.finditer(pattern, data['comment'], re.DOTALL | re.MULTILINE) 435 for mat in matches: 436 permission_list.append(mat.group()) 437 if len(permission_list) > 1: 438 data["permission"] = permission_list[len(permission_list) - 1].replace('@permission', '') 439 elif 1 == len(permission_list): 440 data["permission"] = permission_list[0].replace('@permission', '') 441 442 443def get_deprecate_since_value(data: dict): 444 deprecate_list = [] 445 if 'none_comment' != data["comment"]: 446 pattern = r'@(deprecated).*?(?=\n)' 447 matches = re.finditer(pattern, data['comment'], re.DOTALL | re.MULTILINE) 448 for mat in matches: 449 deprecate_list.append(mat.group()) 450 if len(deprecate_list) > 1: 451 data["deprecate_since"] = (deprecate_list[len(deprecate_list) - 1].replace('@deprecated', '') 452 .replace('since', '')) 453 elif 1 == len(deprecate_list): 454 data["deprecate_since"] = (deprecate_list[0].replace('@deprecated', '') 455 .replace('since', '')) 456 457 458def get_file_kit_or_system(node_data): 459 current_file = os.path.dirname(__file__) 460 kit_json_file_path = os.path.abspath(os.path.join(current_file, 461 r"kit_sub_system/c_file_kit_sub_system.json")) 462 if 'kit_name' in node_data and 'sub_system' in node_data and \ 463 (not node_data['kit_name'] or not node_data['sub_system']): 464 relative_path = node_data.get('location').get('location_path').replace('\\', '/') 465 kit_name, sub_system = get_kit_system_data(kit_json_file_path, relative_path) 466 if not node_data['kit_name']: 467 node_data['kit_name'] = kit_name 468 if not node_data['sub_system']: 469 node_data['sub_system'] = sub_system 470 471 472def get_kit_system_data(json_path, relative_path): 473 kit_name = '' 474 sub_system_name = '' 475 with open(json_path, 'r', encoding='utf-8') as fs: 476 kit_system_data = json.load(fs) 477 for data in kit_system_data['data']: 478 if 'filePath' in data and relative_path in data['filePath']: 479 kit_name = data['kitName'] 480 sub_system_name = data['subSystem'] 481 break 482 return kit_name, sub_system_name 483 484 485def get_comment(cursor, data: dict): 486 if cursor.raw_comment: # 是否有注释信息,有就取,没有过 487 data["comment"] = cursor.raw_comment 488 else: 489 data["comment"] = 'none_comment' 490 491 492def processing_ast_node(child, current_file, data, name, gn_path): 493 child_data = ast_to_dict(child, current_file, data, gn_path, key=1) 494 if child.kind == CursorKind.TYPE_REF: 495 data["type_ref"] = child_data 496 else: 497 data[name].append(child_data) 498 499 500def preorder_travers_ast(cursor, comment, current_file, gn_path): # 获取属性 501 previous_data = {} 502 ast_dict = ast_to_dict(cursor, current_file, previous_data, gn_path, comment) # 获取节点属性 503 return ast_dict 504 505 506def get_start_comments(include_path): # 获取每个头文件的最开始注释 507 global line_dist 508 line_dist = {} 509 global calculation_times 510 with open(include_path, 'r', encoding='utf-8') as f: 511 file_line_data = f.readlines() 512 if file_line_data: 513 last_line = file_line_data[-1] 514 else: 515 last_line = -1 516 f.seek(0) 517 content = '' 518 mark = 0 519 max_line = 0 520 end_line_mark = r'#' 521 line = f.readline() 522 line_number = 1 523 line_list = [] 524 while line: 525 if line.startswith(end_line_mark): 526 mark = 1 527 max_line = line_number 528 line_dist[calculation_times] = line_list 529 calculation_times += 1 530 break 531 if line.startswith('/**'): 532 line_list.append(line_number) 533 line_number += 1 534 content += line 535 line = f.readline() 536 if line == last_line and last_line != -1: 537 mark = 0 538 if 0 == mark: 539 content = '' 540 line_dist[calculation_times] = [] 541 calculation_times += 1 542 f.seek(0) 543 content_all = f.read() 544 pattern_high = RegularExpressions.END_COMMENT.value 545 matches_high = re.finditer(pattern_high, content_all) 546 for mat in matches_high: 547 # 获取匹配项开始的行数 548 start_line = content_all.count('\n', 0, mat.start()) + 1 549 # 当前行数大于开头记录行数,则加入到结果中 550 if start_line > max_line: 551 line_list.append(start_line) 552 content = "{}{}".format(content, '/** @} */\n') 553 f.close() 554 return content 555 556 557def api_entrance(share_lib, include_path, gn_path, link_path): # 统计入口 558 # clang.cindex需要用到libclang.dll共享库 所以配置共享库 559 if not Config.loaded: 560 Config.set_library_file(share_lib) 561 print("lib.dll: install path") 562 # 创建AST索引 563 index = Index.create() 564 # options赋值为如下,代表宏定义解析数据也要 565 args = ['-I{}'.format(path) for path in link_path] 566 args.append('-std=c99') 567 options = clang.cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD 568 569 data_total = [] # 列表对象-用于统计 570 for item in include_path: # 对每个头文件做处理 571 tu = index.parse(item, args=args, options=options) 572 ast_root_node = tu.cursor # 获取根节点 573 matches = get_start_comments(item) # 接收文件最开始的注释 574 # 前序遍历AST 575 file_result_data = preorder_travers_ast(ast_root_node, matches, item, gn_path) # 调用处理函数 576 data_total.append(file_result_data) 577 iter_line_dist = iter(line_dist) 578 first = next(iter_line_dist) 579 array_index = int(first) 580 if len(data_total) - 1 >= array_index and first in line_dist: 581 data_dist = data_total.__getitem__(array_index) # ==>data_total[array_index] 582 data_dist['line_list'] = line_dist.get(first) 583 584 return data_total 585 586 587def get_include_file(include_file_path, link_path, gn_path): # 库路径、.h文件路径、链接头文件路径 588 # libclang.dll库路径 589 libclang_path = StringConstant.LIB_CLG_PATH.value 590 # c头文件的路径 591 file_path = include_file_path 592 # 头文件链接路径 593 link_include_path = link_path # 可以通过列表传入 594 data = api_entrance(libclang_path, file_path, gn_path, link_include_path) # 调用接口 595 return data 596