1import gdb 2 3 4def parse_address_to_int(address): 5 int_address_string = gdb.execute( 6 'p/d {}'.format(address), to_string=True) 7 int_address = int(int_address_string.split('=')[1].strip()) 8 return int_address 9 10 11def parse_gdb_equals(str): 12 """ 13 str is $1 = value. so it returns value 14 """ 15 return str.split("=")[1].strip() 16 17 18class HeapMapping: 19 """ 20 Wrapper class for dictionary to have customization for the dictionary 21 and one entry point 22 """ 23 24 address_length_mapping = {} 25 address_set = set() 26 27 @staticmethod 28 def put(address, length): 29 HeapMapping.address_length_mapping[address] = length 30 HeapMapping.address_set.add(address) 31 32 @staticmethod 33 def get(address): 34 """ 35 Gets the length of the dynamic array corresponding to address. Suppose dynamic 36 array is {1,2,3,4,5} and starting address is 400 which is passed as address to this 37 method, then method would return 20(i.e. 5 * sizeof(int)). When this address 38 is offsetted for eg 408 is passed to this method, then it will return remainder 39 number of bytes allocated, here it would be 12 (i.e. 420 - 408) 40 Algorithm tries to find address in address_length_apping, if it doesn't find it 41 then it tries to find the range that can fit the address. if it fails to find such 42 mapping then it would return None. 43 """ 44 45 length_found = HeapMapping.address_length_mapping.get(address) 46 if length_found: 47 return length_found 48 else: 49 address_list = list(HeapMapping.address_set) 50 address_list.sort() 51 left = 0 52 right = len(address_list) - 1 53 while left <= right: 54 mid = int((left + right) / 2) 55 if address > address_list[mid]: 56 left = mid + 1 57 # only < case would be accounted in else. 58 # As == would be handled in the if-check above (outside while) 59 else: 60 right = mid - 1 61 62 index = left - 1 63 if index == -1: 64 return None 65 base_address = address_list[index] 66 base_len = HeapMapping.address_length_mapping.get(base_address) 67 if base_address + base_len > address: 68 return base_address + base_len - address 69 else: 70 return None 71 72 @staticmethod 73 def remove(address): 74 HeapMapping.address_length_mapping.pop(address, None) 75 HeapMapping.address_set.discard(address) 76 77 78class AllocationFinishedBreakpoint(gdb.FinishBreakpoint): 79 """ 80 Sets temporary breakpoints on returns (specifically returns of memory allocations) 81 to record address allocated. 82 It get instantiated from AllocationBreakpoint and ReallocationBreakpoint. When it is 83 instantiated from ReallocationBreakPoint, it carries prev_address. 84 """ 85 86 def __init__(self, length, prev_address=None): 87 super().__init__(internal=True) 88 self.length = length 89 self.prev_address = prev_address 90 91 def stop(self): 92 """ 93 Called when the return address in the current frame is hit. It parses hex address 94 into int address. If return address is not null then it stores address and length 95 into the address_length_mapping dictionary. 96 """ 97 98 return_address = self.return_value 99 if return_address is not None or return_address == 0x0: 100 if self.prev_address != None: 101 HeapMapping.remove(self.prev_address) 102 103 # Converting hex address to int address 104 int_address = parse_address_to_int(return_address) 105 HeapMapping.put(int_address, self.length) 106 return False 107 108 109class AllocationBreakpoint(gdb.Breakpoint): 110 """ 111 Handler class when malloc and operator new[] gets hit 112 """ 113 114 def __init__(self, spec): 115 super().__init__(spec, internal=True) 116 117 def stop(self): 118 # handle malloc and new 119 func_args_string = gdb.execute('info args', to_string=True) 120 if func_args_string.find("=") != -1: 121 # There will be just 1 argument to malloc. So no need to handle multiline 122 length = int(parse_gdb_equals(func_args_string)) 123 AllocationFinishedBreakpoint(length) 124 return False 125 126 127class ReallocationBreakpoint(gdb.Breakpoint): 128 """ 129 Handler class when realloc gets hit 130 """ 131 132 def __init__(self, spec): 133 super().__init__(spec, internal=True) 134 135 def stop(self): 136 # handle realloc 137 func_args_string = gdb.execute('info args', to_string=True) 138 if func_args_string.find("=") != -1: 139 args = func_args_string.split("\n") 140 address = parse_gdb_equals(args[0]) 141 int_address = parse_address_to_int(address) 142 length = int(parse_gdb_equals(args[1])) 143 AllocationFinishedBreakpoint(length, int_address) 144 return False 145 146 147class DeallocationBreakpoint(gdb.Breakpoint): 148 """ 149 Handler class when free and operator delete[] gets hit 150 """ 151 152 def __init__(self, spec): 153 super().__init__(spec, internal=True) 154 155 def stop(self): 156 func_args_string = gdb.execute('info args', to_string=True) 157 if func_args_string.find("=") != -1: 158 address = parse_gdb_equals(func_args_string) 159 int_address = parse_address_to_int(address) 160 HeapMapping.remove(int_address) 161 return False 162 163 164class WatchHeap(gdb.Command): 165 """ 166 Custom Command to keep track of Heap Memory Allocation. 167 Currently keeps tracks of memory allocated/deallocated using 168 malloc, realloc, free, operator new[] and operator delete[] 169 """ 170 171 def __init__(self): 172 super(WatchHeap, self).__init__("watch_heap", gdb.COMMAND_USER) 173 174 def complete(self, text, word): 175 return gdb.COMPLETE_COMMAND 176 177 def invoke(self, args, from_tty): 178 # TODO : Check whether break location methods are defined 179 AllocationBreakpoint("malloc") 180 AllocationBreakpoint("operator new[]") 181 ReallocationBreakpoint("realloc") 182 DeallocationBreakpoint("free") 183 DeallocationBreakpoint("operator delete[]") 184 185 186class PrintHeapPointer(gdb.Command): 187 """ 188 Custom command to print memory allocated at dynamic time 189 """ 190 191 def __init__(self): 192 super(PrintHeapPointer, self).__init__("print_ptr", gdb.COMMAND_USER) 193 194 def complete(self, text, word): 195 return gdb.COMPLETE_COMMAND 196 197 def invoke(self, args, from_tty=True): 198 try: 199 value = gdb.parse_and_eval(args) 200 if value.type.code == gdb.TYPE_CODE_PTR: 201 print("Type : ", value.type) 202 starting_address_string = gdb.execute( 203 'p/x {}'.format(value), to_string=True) 204 print("Address: ", 205 parse_gdb_equals(starting_address_string)) 206 int_address = parse_address_to_int(value) 207 # print memory 208 self.print_heap(int_address) 209 except Exception: 210 print('No symbol found!') 211 212 def print_heap(self, address): 213 """ 214 Prints the memory that is being pointed by address in hex format 215 216 Parameters 217 --------- 218 address : raw pointer 219 """ 220 221 memory_size = HeapMapping.get(address) 222 if memory_size: 223 print('Length :', memory_size) 224 result = '' 225 i = 0 226 while i < memory_size: 227 byte_string = gdb.execute( 228 'x/1bx {}'.format(address), to_string=True) 229 result += byte_string.split(':')[1].strip() + " " 230 address += 1 231 i += 1 232 print(result) 233 else: 234 print("No address mapping found!") 235 236 237if __name__ == '__main__': 238 WatchHeap() 239 PrintHeapPointer() 240