1#------------------------------------------------------------------------------ 2# pycparser: c_json.py 3# 4# by Michael White (@mypalmike) 5# 6# This example includes functions to serialize and deserialize an ast 7# to and from json format. Serializing involves walking the ast and converting 8# each node from a python Node object into a python dict. Deserializing 9# involves the opposite conversion, walking the tree formed by the 10# dict and converting each dict into the specific Node object it represents. 11# The dict itself is serialized and deserialized using the python json module. 12# 13# The dict representation is a fairly direct transformation of the object 14# attributes. Each node in the dict gets one metadata field referring to the 15# specific node class name, _nodetype. Each local attribute (i.e. not linking 16# to child nodes) has a string value or array of string values. Each child 17# attribute is either another dict or an array of dicts, exactly as in the 18# Node object representation. The "coord" attribute, representing the 19# node's location within the source code, is serialized/deserialized from 20# a Coord object into a string of the format "filename:line[:column]". 21# 22# Example TypeDecl node, with IdentifierType child node, represented as a dict: 23# "type": { 24# "_nodetype": "TypeDecl", 25# "coord": "c_files/funky.c:8", 26# "declname": "o", 27# "quals": [], 28# "type": { 29# "_nodetype": "IdentifierType", 30# "coord": "c_files/funky.c:8", 31# "names": [ 32# "char" 33# ] 34# } 35# } 36#------------------------------------------------------------------------------ 37from __future__ import print_function 38 39import json 40import sys 41import re 42 43# This is not required if you've installed pycparser into 44# your site-packages/ with setup.py 45# 46sys.path.extend(['.', '..']) 47 48from pycparser import parse_file, c_ast 49from pycparser.plyparser import Coord 50 51 52RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]') 53RE_INTERNAL_ATTR = re.compile('__.*__') 54 55 56class CJsonError(Exception): 57 pass 58 59 60def memodict(fn): 61 """ Fast memoization decorator for a function taking a single argument """ 62 class memodict(dict): 63 def __missing__(self, key): 64 ret = self[key] = fn(key) 65 return ret 66 return memodict().__getitem__ 67 68 69@memodict 70def child_attrs_of(klass): 71 """ 72 Given a Node class, get a set of child attrs. 73 Memoized to avoid highly repetitive string manipulation 74 75 """ 76 non_child_attrs = set(klass.attr_names) 77 all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)]) 78 return all_attrs - non_child_attrs 79 80 81def to_dict(node): 82 """ Recursively convert an ast into dict representation. """ 83 klass = node.__class__ 84 85 result = {} 86 87 # Metadata 88 result['_nodetype'] = klass.__name__ 89 90 # Local node attributes 91 for attr in klass.attr_names: 92 result[attr] = getattr(node, attr) 93 94 # Coord object 95 if node.coord: 96 result['coord'] = str(node.coord) 97 else: 98 result['coord'] = None 99 100 # Child attributes 101 for child_name, child in node.children(): 102 # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]') 103 match = RE_CHILD_ARRAY.match(child_name) 104 if match: 105 array_name, array_index = match.groups() 106 array_index = int(array_index) 107 # arrays come in order, so we verify and append. 108 result[array_name] = result.get(array_name, []) 109 if array_index != len(result[array_name]): 110 raise CJsonError('Internal ast error. Array {} out of order. ' 111 'Expected index {}, got {}'.format( 112 array_name, len(result[array_name]), array_index)) 113 result[array_name].append(to_dict(child)) 114 else: 115 result[child_name] = to_dict(child) 116 117 # Any child attributes that were missing need "None" values in the json. 118 for child_attr in child_attrs_of(klass): 119 if child_attr not in result: 120 result[child_attr] = None 121 122 return result 123 124 125def to_json(node, **kwargs): 126 """ Convert ast node to json string """ 127 return json.dumps(to_dict(node), **kwargs) 128 129 130def file_to_dict(filename): 131 """ Load C file into dict representation of ast """ 132 ast = parse_file(filename, use_cpp=True) 133 return to_dict(ast) 134 135 136def file_to_json(filename, **kwargs): 137 """ Load C file into json string representation of ast """ 138 ast = parse_file(filename, use_cpp=True) 139 return to_json(ast, **kwargs) 140 141 142def _parse_coord(coord_str): 143 """ Parse coord string (file:line[:column]) into Coord object. """ 144 if coord_str is None: 145 return None 146 147 vals = coord_str.split(':') 148 vals.extend([None] * 3) 149 filename, line, column = vals[:3] 150 return Coord(filename, line, column) 151 152 153def _convert_to_obj(value): 154 """ 155 Convert an object in the dict representation into an object. 156 Note: Mutually recursive with from_dict. 157 158 """ 159 value_type = type(value) 160 if value_type == dict: 161 return from_dict(value) 162 elif value_type == list: 163 return [_convert_to_obj(item) for item in value] 164 else: 165 # String 166 return value 167 168 169def from_dict(node_dict): 170 """ Recursively build an ast from dict representation """ 171 class_name = node_dict.pop('_nodetype') 172 173 klass = getattr(c_ast, class_name) 174 175 # Create a new dict containing the key-value pairs which we can pass 176 # to node constructors. 177 objs = {} 178 for key, value in node_dict.items(): 179 if key == 'coord': 180 objs[key] = _parse_coord(value) 181 else: 182 objs[key] = _convert_to_obj(value) 183 184 # Use keyword parameters, which works thanks to beautifully consistent 185 # ast Node initializers. 186 return klass(**objs) 187 188 189def from_json(ast_json): 190 """ Build an ast from json string representation """ 191 return from_dict(json.loads(ast_json)) 192 193 194#------------------------------------------------------------------------------ 195if __name__ == "__main__": 196 if len(sys.argv) > 1: 197 # Some test code... 198 # Do trip from C -> ast -> dict -> ast -> json, then print. 199 ast_dict = file_to_dict(sys.argv[1]) 200 ast = from_dict(ast_dict) 201 print(to_json(ast, sort_keys=True, indent=4)) 202 else: 203 print("Please provide a filename as argument") 204