1#!/usr/bin/env python 2# coding=utf-8 3############################################## 4# Copyright (c) 2021-2022 Huawei Device Co., Ltd. 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16############################################## 17 18import re 19import os 20import clang.cindex 21from clang.cindex import Config 22from clang.cindex import Index 23from clang.cindex import CursorKind 24from clang.cindex import TypeKind 25from utils.constants import StringConstant 26from utils.constants import RegularExpressions 27 28 29def find_parent(cursor): # 获取父节点 30 cursor_parent = cursor.semantic_parent 31 if cursor_parent is not None: 32 if cursor_parent.kind == CursorKind.VAR_DECL: # 父节点为VAR_DECL 用于整型变量节点 33 return cursor_parent.kind 34 35 # 用于判断里面成员属于那类 36 elif cursor_parent.kind == CursorKind.STRUCT_DECL or cursor_parent.kind == CursorKind.UNION_DECL: 37 return cursor_parent.kind 38 else: 39 parent_kind = processing_root_parent(cursor_parent) 40 return parent_kind 41 return None 42 43 44def processing_root_parent(cursor_parent): 45 parent = cursor_parent.semantic_parent 46 if parent is not None: 47 if parent.type.kind == TypeKind.INVALID: 48 parent_kind = CursorKind.TRANSLATION_UNIT 49 return parent_kind 50 else: 51 return parent.kind 52 return None 53 54 55def processing_no_child(cursor, data): # 处理没有子节点的节点 56 if cursor.kind == CursorKind.INTEGER_LITERAL: # 整型字面量类型节点,没有子节点 57 parent_kind = find_parent(cursor) # 判断是属于那类的 58 if parent_kind: 59 if parent_kind == CursorKind.STRUCT_DECL: 60 data["name"] = 'struct_int_no_spelling' 61 elif parent_kind == CursorKind.UNION_DECL: 62 data["name"] = 'union_int_no_spelling' 63 elif parent_kind == CursorKind.ENUM_DECL: 64 data["name"] = 'enum_int_no_spelling' 65 elif parent_kind == CursorKind.VAR_DECL: 66 data["name"] = 'var_int_no_spelling' 67 else: 68 data["name"] = "integer_no_spelling" 69 tokens = cursor.get_tokens() 70 for token in tokens: 71 data["integer_value"] = token.spelling # 获取整型变量值 72 73 74def get_token(cursor): 75 tokens = [] 76 for token in cursor.get_tokens(): 77 tokens.append(token.spelling) 78 79 return tokens 80 81 82def judgment_extern(cursor, data): # 判断是否带有extern 83 tokens = get_token(cursor) 84 if cursor.kind == CursorKind.FUNCTION_DECL: 85 if 'static' in tokens or 'deprecated' in tokens: 86 is_extern = False 87 else: 88 is_extern = True 89 elif cursor.kind == CursorKind.VAR_DECL: 90 if 'extern' in tokens: 91 is_extern = True 92 else: 93 is_extern = False 94 if 'const' in tokens: 95 data["is_const"] = True 96 else: 97 data["is_const"] = False 98 else: 99 is_extern = True 100 101 data["is_extern"] = is_extern 102 103 104def binary_operator(cursor, data): # 二元操作符处理 105 data["name"] = "binary_ope_no_spelling" 106 tokens = cursor.get_tokens() 107 spelling_arr = ['<<', '>>', '+', '-', '*', '/'] 108 for token in tokens: 109 if token.spelling in spelling_arr: 110 data["operator"] = token.spelling 111 112 113def distinction_member(cursor, data): # 区别结构体和联合体成员 114 parent_kind = find_parent(cursor) # 查找父节点类型 115 if parent_kind: 116 if parent_kind == CursorKind.UNION_DECL: 117 data["member"] = "union_member" 118 elif parent_kind == CursorKind.STRUCT_DECL: 119 data["member"] = "struct_member" 120 121 122def processing_parm(cursor, data): # 函数参数节点处理 123 if cursor.spelling: # 函数参数是否带参数名 124 data["name"] = cursor.spelling 125 else: 126 data["name"] = "arg_no_spelling" 127 128 if cursor.type.get_pointee().kind == TypeKind.FUNCTIONPROTO: # 参数为函数指针,获取对应的返回类型 129 data["func_pointer_result_type"] = cursor.type.get_pointee().get_result().spelling 130 131 132def processing_enum(cursor, data): # 获取枚举值 133 data["value"] = cursor.enum_value 134 135 136def processing_def(cursor, data): # 处理宏定义 137 data['is_def_func'] = False 138 data['name'] = cursor.spelling 139 name_len = len(data['name']) 140 str1_len = len(data['node_content']['content']) 141 text = '' 142 if name_len != str1_len: 143 if data['node_content']['content']: 144 if data['node_content']['content'][name_len] == '(': 145 right_index = data['node_content']['content'].index(')') 146 param = data['node_content']['content'][name_len:right_index + 1] 147 text = data['node_content']['content'][right_index + 1:] 148 data['is_def_func'] = True 149 data['def_func_name'] = data['name'] 150 data['def_func_param'] = param 151 data['name'] = ''.join(data['name'] + param) 152 else: 153 text = data['node_content']['content'][name_len:] 154 else: 155 print('mar_define error, its content is none') 156 if text: 157 text = text.strip() # 删除两边的字符(默认是删除左右空格) 158 data['text'] = text 159 data["type"] = "def_no_type" 160 161 162def processing_func(cursor, data): # 处理函数 163 data["return_type"] = cursor.result_type.spelling # 增加返回类型键值对 164 judgment_extern(cursor, data) 165 166 167def processing_type(cursor, data): # 没有类型的节点处理 168 if cursor.kind == CursorKind.MACRO_INSTANTIATION: # 也属于宏定义 --宏引用 169 data["type"] = "insta_no_type" 170 171 elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE: # 头文件也没type,规定 172 data["type"] = "inclusion_no_type" 173 174 175def processing_name(cursor, data): # 没有名的节点处理 176 if cursor.kind == CursorKind.PAREN_EXPR: # 括号表达式() 177 data["paren"] = "()" 178 data["name"] = "paren_expr_no_spelling" 179 180 elif cursor.kind == CursorKind.UNEXPOSED_EXPR: # 未公开表达式,用于表示未明确定义的表达式 181 data["name"] = "unexposed_expr_no_spelling" 182 183 184def processing_char(cursor, data): # 字符节点处理 185 tokens = list(cursor.get_tokens()) 186 char_value = (tokens[0].spelling.strip("'")) 187 data["name"] = char_value 188 189 190special_node_process = { 191 CursorKind.ENUM_CONSTANT_DECL.name: processing_enum, 192 CursorKind.MACRO_DEFINITION.name: processing_def, 193 CursorKind.FUNCTION_DECL.name: processing_func, 194 CursorKind.VAR_DECL.name: judgment_extern, 195 CursorKind.PARM_DECL.name: processing_parm, 196 CursorKind.FIELD_DECL.name: distinction_member, 197 CursorKind.MACRO_INSTANTIATION.name: processing_type, 198 CursorKind.INCLUSION_DIRECTIVE.name: processing_type, 199 CursorKind.BINARY_OPERATOR.name: binary_operator, 200 CursorKind.PAREN_EXPR.name: processing_name, 201 CursorKind.UNEXPOSED_EXPR.name: processing_name, 202 CursorKind.CHARACTER_LITERAL.name: processing_char 203} 204 205 206def processing_special_node(cursor, data, key, gn_path=None): # 处理需要特殊处理的节点 207 if key == 0: 208 location_path = cursor.spelling 209 kind_name = CursorKind.TRANSLATION_UNIT.name 210 else: 211 location_path = cursor.location.file.name 212 kind_name = cursor.kind.name 213 214 loc = { 215 "location_path": '{}'.format(location_path), 216 "location_line": cursor.location.line, 217 "location_column": cursor.location.column 218 } 219 if gn_path: 220 relative_path = os.path.relpath(location_path, gn_path) # 获取头文件相对路 221 loc["location_path"] = relative_path 222 data["location"] = loc 223 if kind_name in special_node_process.keys(): 224 node_process = special_node_process[kind_name] 225 node_process(cursor, data) # 调用对应节点处理函数 226 227 228def node_extent(cursor, current_file): 229 start_offset = cursor.extent.start.offset 230 end_offset = cursor.extent.end.offset 231 with open(current_file, 'r', encoding='utf=8') as f: 232 f.seek(start_offset) 233 content = f.read(end_offset - start_offset) 234 235 extent = { 236 "start_offset": start_offset, 237 "end_offset": end_offset, 238 "content": content 239 } 240 f.close() 241 return extent 242 243 244def ast_to_dict(cursor, current_file, gn_path=None, comment=None, key=0): # 解析数据的整理 245 # 通用 246 data = { 247 "name": cursor.spelling, 248 "kind": '', 249 "type": cursor.type.spelling, 250 "gn_path": gn_path, 251 "node_content": {}, 252 "comment": '' 253 } 254 if cursor.raw_comment: # 是否有注释信息,有就取,没有过 255 data["comment"] = cursor.raw_comment 256 else: 257 data["comment"] = 'none_comment' 258 259 if key == 0: 260 data["kind"] = CursorKind.TRANSLATION_UNIT.name 261 if comment: 262 data["comment"] = comment 263 if gn_path: 264 relative_path = os.path.relpath(cursor.spelling, gn_path) 265 data["name"] = relative_path 266 else: 267 content = node_extent(cursor, current_file) 268 data["node_content"] = content 269 data["kind"] = cursor.kind.name 270 271 processing_special_node(cursor, data, key, gn_path) # 节点处理 272 children = list(cursor.get_children()) # 判断是否有子节点,有就追加children,没有根据情况来 273 if len(children) > 0: 274 if key != 0: 275 if cursor.kind == CursorKind.FUNCTION_DECL: # 函数参数 276 name = "parm" 277 elif (cursor.kind == CursorKind.ENUM_DECL 278 or cursor.kind == CursorKind.STRUCT_DECL 279 or cursor.kind == CursorKind.UNION_DECL): 280 name = "members" 281 else: 282 name = "children" 283 else: 284 name = "children" 285 data[name] = [] 286 287 for child in children: 288 # 剔除多余宏定义和跳过UNEXPOSED_ATTR节点 289 if child.location.file is not None and child.kind != CursorKind.UNEXPOSED_ATTR \ 290 and child.location.file.name == current_file: 291 processing_ast_node(child, current_file, data, name, gn_path) 292 else: 293 processing_no_child(cursor, data) # 处理没有子节点的节点 294 return data 295 296 297def processing_ast_node(child, current_file, data, name, gn_path): 298 child_data = ast_to_dict(child, current_file, gn_path, key=1) 299 if child.kind == CursorKind.TYPE_REF: 300 data["type_ref"] = child_data 301 else: 302 data[name].append(child_data) 303 304 305def preorder_travers_ast(cursor, total, comment, current_file, gn_path=None): # 获取属性 306 ast_dict = ast_to_dict(cursor, current_file, gn_path, comment) # 获取节点属性 307 total.append(ast_dict) # 追加到数据统计列表里面 308 309 310def get_start_comments(include_path): # 获取每个头文件的最开始注释 311 file_comment = [] 312 content = open_file(include_path) 313 if content: 314 pattern = RegularExpressions.START_COMMENT.value 315 matches = re.finditer(pattern, content, re.DOTALL | re.MULTILINE) 316 for mat in matches: 317 file_comment.append(mat.group()) 318 319 with open(include_path, 'r', encoding='utf-8') as f: 320 f.seek(0) 321 content = f.read() 322 pattern_high = RegularExpressions.END_COMMENT.value 323 matches_high = re.findall(pattern_high, content, re.DOTALL | re.MULTILINE) 324 if matches_high: 325 file_comment.extend(matches_high) 326 f.close() 327 str_file_comment = '\n'.join(file_comment) 328 return str_file_comment 329 330 331def open_file(include_path): 332 with open(include_path, 'r', encoding='utf-8') as f: 333 content = '' 334 loge = 0 335 for line in f: 336 if line.startswith('#ifdef __cplusplus'): 337 loge = 1 338 break 339 else: 340 inside_ifdef = True 341 342 if inside_ifdef: 343 content += line 344 if loge == 0: 345 content = '' 346 f.close() 347 return content 348 349 350def api_entrance(share_lib, include_path, gn_path=None, link_path=None): # 统计入口 351 # clang.cindex需要用到libclang.dll共享库 所以配置共享库 352 if Config.loaded: 353 print("config.loaded == true") 354 else: 355 Config.set_library_file(share_lib) 356 print("lib.dll: install path") 357 # 创建AST索引 358 index = Index.create() 359 print('=' * 50) 360 # options赋值为如下,代表宏定义解析数据也要 361 args = ['-I{}'.format(path) for path in link_path] 362 args.append('-std=c99') 363 options = clang.cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD 364 365 data_total = [] # 列表对象-用于统计 366 for item in include_path: # 对每个头文件做处理 367 tu = index.parse(item, args=args, options=options) 368 print(tu) 369 print('=' * 50) 370 ast_root_node = tu.cursor # 获取根节点 371 print(ast_root_node) 372 matches = get_start_comments(item) # 接收文件最开始的注释 373 # 前序遍历AST 374 preorder_travers_ast(ast_root_node, data_total, matches, item, gn_path) # 调用处理函数 375 print('=' * 50) 376 377 return data_total 378 379 380def get_include_file(include_file_path, link_path, gn_path=None): # 库路径、.h文件路径、链接头文件路径 381 # libclang.dll库路径 382 libclang_path = StringConstant.LIB_CLG_PATH.value 383 # c头文件的路径 384 file_path = include_file_path 385 print(file_path) 386 # 头文件链接路径 387 link_include_path = link_path # 可以通过列表传入 388 data = api_entrance(libclang_path, file_path, gn_path, link_include_path) # 调用接口 389 390 return data 391