• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2017 syzkaller project authors. All rights reserved.
2# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4'''
5This module provides classes which implement AST traversal in order to extract
6items belonging to a struct.
7'''
8
9import collections
10import logging
11
12from pycparser import c_ast
13from header_preprocessor import HeaderFilePreprocessor
14
15
16class StructWalkerException(Exception):
17    pass
18
19
20class StructWalker(c_ast.NodeVisitor):
21    '''
22    Given an ast obtained by parsing a header file, return a hierarchy
23    dictionary. The ast is expected to be of type pycparser.c_ast.FileAST.
24
25    Usage :
26
27    >>> import tempfile
28    >>> t = tempfile.NamedTemporaryFile()
29    >>> contents = """
30    ... #define STRUCT_SIZE 1337
31    ... struct ARRAY_OF_POINTERS_CONTAINER {
32    ... 	unsigned int *ptr[10];
33    ... 	int **n;
34    ... };
35    ... struct ARRAY_CONTAINER {
36    ... 	  int g[10];
37    ... 		  int h[20][30];
38    ... };
39    ... struct REGULAR_STRUCT {
40    ... 	int x;
41    ... 	char *y;
42    ... 	void *ptr;
43    ... };
44    ... struct STRUCT_WITH_STRUCT_PTR {
45    ... 	struct REGULAR_STRUCT *struct_ptr;
46    ... 	int z;
47    ... };
48    ... struct STRUCT_WITH_STRUCT_INST {
49    ... 	struct REGULAR_STRUCT regular_struct_inst;
50    ... 	int a;
51    ... };
52    ... struct STRUCT_WITH_STRUCT_ARRAY {
53    ... 	struct REGULAR_STRUCT regular_struct_array[100];
54    ... 	int b;
55    ... };
56    ... struct STRUCT_WITH_ANONYMOUS_STRUCT {
57    ... 	struct {
58    ... 		int g;
59    ... 		int h;
60    ... 		int i;
61    ... 	} anonymous_struct;
62    ... };
63    ... struct STRUCT_WITH_ANONYMOUS_UNION {
64    ... 	union {
65    ... 		int t;
66    ... 		char r[100];
67    ... 	} anonymous_union;
68    ... };
69    ... struct STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO {
70    ... 	struct REGULAR_STRUCT regular_struct_array[STRUCT_SIZE];
71    ... };
72    ... struct STRUCT_WITH_2D_ARRAY_INST {
73    ... 	struct REGULAR_STRUCT regular_struct_array_2D[10][10];
74    ... };
75    ... struct NESTED_ANONYMOUS_STRUCT {
76    ... 	struct {
77    ... 		int x;
78    ... 		struct {
79    ... 			int y;
80    ... 			int z;
81    ... 		} level_2;
82    ... 	} level_1;
83    ... };
84    ... """
85	>>> t.write(contents) ; t.flush()
86    >>> struct_walker = StructWalker(filenames=[t.name])
87    >>> local_hierarchy = struct_walker.generate_local_hierarchy()
88    >>> for k in local_hierarchy:
89    ...     print k
90    ...     print local_hierarchy[k]
91    ARRAY_OF_POINTERS_CONTAINER
92    [('unsigned int*[10]', 'ptr'), ('int**', 'n')]
93    STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO
94    [('struct REGULAR_STRUCT[1337]', 'regular_struct_array')]
95    STRUCT_WITH_2D_ARRAY_INST
96    [('struct REGULAR_STRUCT[10][10]', 'regular_struct_array_2D')]
97    STRUCT_WITH_STRUCT_ARRAY
98    [('struct REGULAR_STRUCT[100]', 'regular_struct_array'), ('int', 'b')]
99    NESTED_ANONYMOUS_STRUCT
100    [('int', 'level_1.x'), ('int', 'level_1.level_2.y'), ('int', 'level_1.level_2.z')]
101    STRUCT_WITH_ANONYMOUS_STRUCT
102    [('int', 'anonymous_struct.g'), ('int', 'anonymous_struct.h'), ('int', 'anonymous_struct.i')]
103    STRUCT_WITH_ANONYMOUS_UNION
104    [('int', 'anonymous_union.t'), ('char[100]', 'anonymous_union.r')]
105    STRUCT_WITH_STRUCT_INST
106    [('struct REGULAR_STRUCT', 'regular_struct_inst'), ('int', 'a')]
107    ARRAY_CONTAINER
108    [('int[10]', 'g'), ('int[20][30]', 'h')]
109    REGULAR_STRUCT
110    [('int', 'x'), ('char*', 'y'), ('void*', 'ptr')]
111    STRUCT_WITH_STRUCT_PTR
112    [('struct REGULAR_STRUCT*', 'struct_ptr'), ('int', 'z')]
113    '''
114
115    def __init__(self, ast=None, filenames=[], include_lines='', loglvl=logging.INFO):
116        super(StructWalker, self).__init__()
117        self.ast = ast
118        self.filenames = filenames
119
120        if not filenames and not ast:
121            raise StructWalkerException('Specify either "filename" or "ast" to create'
122                                        'StructParser object')
123
124        if not self.ast:
125            self.ast = HeaderFilePreprocessor(self.filenames, include_lines=include_lines,
126                                              loglvl=loglvl).get_ast()
127
128        self.include_lines = include_lines
129        self.local_structs_hierarchy = {}
130        self._setuplogging(loglvl)
131
132    def _setuplogging(self, loglvl):
133        self.logger = logging.getLogger(self.__class__.__name__)
134        formatter = logging.Formatter('DEBUG:%(name)s:%(message)s')
135        sh = logging.StreamHandler()
136        sh.setFormatter(formatter)
137        sh.setLevel(loglvl)
138        self.logger.addHandler(sh)
139        self.logger.setLevel(loglvl)
140
141    def _format_item(self, processed_item):
142        fmt_type = processed_item['type']
143        fmt_type = ' '.join(fmt_type)
144
145        self.logger.debug('_format_item : %s', processed_item)
146
147        if 'is_ptr' in processed_item and 'is_fnptr' not in processed_item:
148            fmt_type = '%s%s' % (fmt_type, '*' * processed_item['is_ptr'])
149
150        if 'is_array' in processed_item and 'array_size' in processed_item:
151            size_str = str(processed_item['array_size']).replace(', ', '][')
152            fmt_type = '%s%s' % (fmt_type, size_str)
153
154        fmt_identifier = processed_item['identifier']
155
156        return [(fmt_type, fmt_identifier)]
157
158    def _recursive_process_item(self, item_ast, processed_item, parent):
159        self.logger.debug('--- _recursive_process_item : %s', type(item_ast))
160        if isinstance(item_ast, c_ast.Decl):
161            processed_item['identifier'] = item_ast.name
162            return self._recursive_process_item(item_ast.type, processed_item, item_ast)
163
164        elif isinstance(item_ast, c_ast.TypeDecl):
165            return self._recursive_process_item(item_ast.type, processed_item, item_ast)
166
167        elif isinstance(item_ast, c_ast.IdentifierType):
168            if len(item_ast.names) > 0:
169                processed_item['type'] = item_ast.names
170                return self._format_item(processed_item)
171
172        elif (isinstance(item_ast, c_ast.Struct) or
173              isinstance(item_ast, c_ast.Union)):
174            if not item_ast.name:
175                nodename, _items_list = self._traverse_ast(item_ast, toplevel=False)
176                try:
177                    items_list = [(i[0], '%s.%s' % (parent.declname, i[1])) for i in _items_list]
178                except AttributeError as e:
179                    self.logger.info('-- Encountered anonymous_struct/anonymous_union with no name')
180                    raise StructWalkerException('Encountered anonymous_struct/anonymous_union with no name')
181
182                return items_list
183            else:
184                processed_item['type'] = ['struct %s' % (item_ast.name)]
185                return self._format_item(processed_item)
186
187        elif isinstance(item_ast, c_ast.PtrDecl):
188            if 'is_ptr' not in processed_item:
189                processed_item['is_ptr'] = 0
190            processed_item['is_ptr'] = processed_item['is_ptr'] + 1
191            return self._recursive_process_item(item_ast.type, processed_item, item_ast)
192
193        elif isinstance(item_ast, c_ast.ArrayDecl):
194            processed_item['is_array'] = True
195            if 'array_size' not in processed_item:
196                processed_item['array_size'] = []
197            processed_item['array_size'].append(int(item_ast.dim.value))
198            return self._recursive_process_item(item_ast.type, processed_item, item_ast)
199
200        elif isinstance(item_ast, c_ast.Enum):
201            processed_item['type'] = ['enum %s' % (item_ast.name)]
202            return self._format_item(processed_item)
203
204        elif isinstance(item_ast, c_ast.FuncDecl):
205            processed_item['is_fnptr'] = True
206            processed_item['type'] = ['void (*)()']
207            return self._format_item(processed_item)
208
209    def _traverse_ast(self, node, toplevel=True):
210        items_list = []
211
212        # Argument structs are used as types, hence anonymous top-level
213        # structs are ignored.
214        if toplevel and not node.name:
215            return None
216
217        if not node.children():
218            return None
219
220        self.logger.debug('>>> Struct name = %s, coord: %s', node.name, node.coord)
221        for child in node.children():
222            item = self._recursive_process_item(child[1], {}, None)
223            items_list.extend(item)
224
225        self.logger.debug('_traverse_ast returns: %s', str((node.name, items_list)))
226        return (node.name, items_list)
227
228    def visit_Struct(self, node, *a):
229        if node.name in self.local_structs_hierarchy:
230            self.logger.info('Encountered %s again. Ignoring.', repr(node.name))
231            return
232
233        try:
234            desc = self._traverse_ast(node)
235        except StructWalkerException as e:
236            self.logger.info('-- Exception raised by StructWalkerException in %s,'
237                             'inspect manually.',
238                             repr(node.name))
239            self.logger.info(str(e))
240            return
241
242        if not desc:
243            return
244
245        struct_name, struct_items = desc
246        self.local_structs_hierarchy[struct_name] = struct_items
247
248    def generate_local_hierarchy(self):
249        self.visit(self.ast)
250        return self.local_structs_hierarchy
251