1# Copyright 2017 syzkaller project authors. All rights reserved. 2# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4''' 5This module provides classes which implement AST traversal in order to extract 6items belonging to a struct. 7''' 8 9import collections 10import logging 11 12from pycparser import c_ast 13from header_preprocessor import HeaderFilePreprocessor 14 15 16class StructWalkerException(Exception): 17 pass 18 19 20class StructWalker(c_ast.NodeVisitor): 21 ''' 22 Given an ast obtained by parsing a header file, return a hierarchy 23 dictionary. The ast is expected to be of type pycparser.c_ast.FileAST. 24 25 Usage : 26 27 >>> import tempfile 28 >>> t = tempfile.NamedTemporaryFile() 29 >>> contents = """ 30 ... #define STRUCT_SIZE 1337 31 ... struct ARRAY_OF_POINTERS_CONTAINER { 32 ... unsigned int *ptr[10]; 33 ... int **n; 34 ... }; 35 ... struct ARRAY_CONTAINER { 36 ... int g[10]; 37 ... int h[20][30]; 38 ... }; 39 ... struct REGULAR_STRUCT { 40 ... int x; 41 ... char *y; 42 ... void *ptr; 43 ... }; 44 ... struct STRUCT_WITH_STRUCT_PTR { 45 ... struct REGULAR_STRUCT *struct_ptr; 46 ... int z; 47 ... }; 48 ... struct STRUCT_WITH_STRUCT_INST { 49 ... struct REGULAR_STRUCT regular_struct_inst; 50 ... int a; 51 ... }; 52 ... struct STRUCT_WITH_STRUCT_ARRAY { 53 ... struct REGULAR_STRUCT regular_struct_array[100]; 54 ... int b; 55 ... }; 56 ... struct STRUCT_WITH_ANONYMOUS_STRUCT { 57 ... struct { 58 ... int g; 59 ... int h; 60 ... int i; 61 ... } anonymous_struct; 62 ... }; 63 ... struct STRUCT_WITH_ANONYMOUS_UNION { 64 ... union { 65 ... int t; 66 ... char r[100]; 67 ... } anonymous_union; 68 ... }; 69 ... struct STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO { 70 ... struct REGULAR_STRUCT regular_struct_array[STRUCT_SIZE]; 71 ... }; 72 ... struct STRUCT_WITH_2D_ARRAY_INST { 73 ... struct REGULAR_STRUCT regular_struct_array_2D[10][10]; 74 ... }; 75 ... struct NESTED_ANONYMOUS_STRUCT { 76 ... struct { 77 ... int x; 78 ... struct { 79 ... int y; 80 ... int z; 81 ... } level_2; 82 ... } level_1; 83 ... }; 84 ... """ 85 >>> t.write(contents) ; t.flush() 86 >>> struct_walker = StructWalker(filenames=[t.name]) 87 >>> local_hierarchy = struct_walker.generate_local_hierarchy() 88 >>> for k in local_hierarchy: 89 ... print k 90 ... print local_hierarchy[k] 91 ARRAY_OF_POINTERS_CONTAINER 92 [('unsigned int*[10]', 'ptr'), ('int**', 'n')] 93 STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO 94 [('struct REGULAR_STRUCT[1337]', 'regular_struct_array')] 95 STRUCT_WITH_2D_ARRAY_INST 96 [('struct REGULAR_STRUCT[10][10]', 'regular_struct_array_2D')] 97 STRUCT_WITH_STRUCT_ARRAY 98 [('struct REGULAR_STRUCT[100]', 'regular_struct_array'), ('int', 'b')] 99 NESTED_ANONYMOUS_STRUCT 100 [('int', 'level_1.x'), ('int', 'level_1.level_2.y'), ('int', 'level_1.level_2.z')] 101 STRUCT_WITH_ANONYMOUS_STRUCT 102 [('int', 'anonymous_struct.g'), ('int', 'anonymous_struct.h'), ('int', 'anonymous_struct.i')] 103 STRUCT_WITH_ANONYMOUS_UNION 104 [('int', 'anonymous_union.t'), ('char[100]', 'anonymous_union.r')] 105 STRUCT_WITH_STRUCT_INST 106 [('struct REGULAR_STRUCT', 'regular_struct_inst'), ('int', 'a')] 107 ARRAY_CONTAINER 108 [('int[10]', 'g'), ('int[20][30]', 'h')] 109 REGULAR_STRUCT 110 [('int', 'x'), ('char*', 'y'), ('void*', 'ptr')] 111 STRUCT_WITH_STRUCT_PTR 112 [('struct REGULAR_STRUCT*', 'struct_ptr'), ('int', 'z')] 113 ''' 114 115 def __init__(self, ast=None, filenames=[], include_lines='', loglvl=logging.INFO): 116 super(StructWalker, self).__init__() 117 self.ast = ast 118 self.filenames = filenames 119 120 if not filenames and not ast: 121 raise StructWalkerException('Specify either "filename" or "ast" to create' 122 'StructParser object') 123 124 if not self.ast: 125 self.ast = HeaderFilePreprocessor(self.filenames, include_lines=include_lines, 126 loglvl=loglvl).get_ast() 127 128 self.include_lines = include_lines 129 self.local_structs_hierarchy = {} 130 self._setuplogging(loglvl) 131 132 def _setuplogging(self, loglvl): 133 self.logger = logging.getLogger(self.__class__.__name__) 134 formatter = logging.Formatter('DEBUG:%(name)s:%(message)s') 135 sh = logging.StreamHandler() 136 sh.setFormatter(formatter) 137 sh.setLevel(loglvl) 138 self.logger.addHandler(sh) 139 self.logger.setLevel(loglvl) 140 141 def _format_item(self, processed_item): 142 fmt_type = processed_item['type'] 143 fmt_type = ' '.join(fmt_type) 144 145 self.logger.debug('_format_item : %s', processed_item) 146 147 if 'is_ptr' in processed_item and 'is_fnptr' not in processed_item: 148 fmt_type = '%s%s' % (fmt_type, '*' * processed_item['is_ptr']) 149 150 if 'is_array' in processed_item and 'array_size' in processed_item: 151 size_str = str(processed_item['array_size']).replace(', ', '][') 152 fmt_type = '%s%s' % (fmt_type, size_str) 153 154 fmt_identifier = processed_item['identifier'] 155 156 return [(fmt_type, fmt_identifier)] 157 158 def _recursive_process_item(self, item_ast, processed_item, parent): 159 self.logger.debug('--- _recursive_process_item : %s', type(item_ast)) 160 if isinstance(item_ast, c_ast.Decl): 161 processed_item['identifier'] = item_ast.name 162 return self._recursive_process_item(item_ast.type, processed_item, item_ast) 163 164 elif isinstance(item_ast, c_ast.TypeDecl): 165 return self._recursive_process_item(item_ast.type, processed_item, item_ast) 166 167 elif isinstance(item_ast, c_ast.IdentifierType): 168 if len(item_ast.names) > 0: 169 processed_item['type'] = item_ast.names 170 return self._format_item(processed_item) 171 172 elif (isinstance(item_ast, c_ast.Struct) or 173 isinstance(item_ast, c_ast.Union)): 174 if not item_ast.name: 175 nodename, _items_list = self._traverse_ast(item_ast, toplevel=False) 176 try: 177 items_list = [(i[0], '%s.%s' % (parent.declname, i[1])) for i in _items_list] 178 except AttributeError as e: 179 self.logger.info('-- Encountered anonymous_struct/anonymous_union with no name') 180 raise StructWalkerException('Encountered anonymous_struct/anonymous_union with no name') 181 182 return items_list 183 else: 184 processed_item['type'] = ['struct %s' % (item_ast.name)] 185 return self._format_item(processed_item) 186 187 elif isinstance(item_ast, c_ast.PtrDecl): 188 if 'is_ptr' not in processed_item: 189 processed_item['is_ptr'] = 0 190 processed_item['is_ptr'] = processed_item['is_ptr'] + 1 191 return self._recursive_process_item(item_ast.type, processed_item, item_ast) 192 193 elif isinstance(item_ast, c_ast.ArrayDecl): 194 processed_item['is_array'] = True 195 if 'array_size' not in processed_item: 196 processed_item['array_size'] = [] 197 processed_item['array_size'].append(int(item_ast.dim.value)) 198 return self._recursive_process_item(item_ast.type, processed_item, item_ast) 199 200 elif isinstance(item_ast, c_ast.Enum): 201 processed_item['type'] = ['enum %s' % (item_ast.name)] 202 return self._format_item(processed_item) 203 204 elif isinstance(item_ast, c_ast.FuncDecl): 205 processed_item['is_fnptr'] = True 206 processed_item['type'] = ['void (*)()'] 207 return self._format_item(processed_item) 208 209 def _traverse_ast(self, node, toplevel=True): 210 items_list = [] 211 212 # Argument structs are used as types, hence anonymous top-level 213 # structs are ignored. 214 if toplevel and not node.name: 215 return None 216 217 if not node.children(): 218 return None 219 220 self.logger.debug('>>> Struct name = %s, coord: %s', node.name, node.coord) 221 for child in node.children(): 222 item = self._recursive_process_item(child[1], {}, None) 223 items_list.extend(item) 224 225 self.logger.debug('_traverse_ast returns: %s', str((node.name, items_list))) 226 return (node.name, items_list) 227 228 def visit_Struct(self, node, *a): 229 if node.name in self.local_structs_hierarchy: 230 self.logger.info('Encountered %s again. Ignoring.', repr(node.name)) 231 return 232 233 try: 234 desc = self._traverse_ast(node) 235 except StructWalkerException as e: 236 self.logger.info('-- Exception raised by StructWalkerException in %s,' 237 'inspect manually.', 238 repr(node.name)) 239 self.logger.info(str(e)) 240 return 241 242 if not desc: 243 return 244 245 struct_name, struct_items = desc 246 self.local_structs_hierarchy[struct_name] = struct_items 247 248 def generate_local_hierarchy(self): 249 self.visit(self.ast) 250 return self.local_structs_hierarchy 251