1#!/usr/bin/env python 2# Merge or print the coverage data collected by asan's coverage. 3# Input files are sequences of 4-byte integers. 4# We need to merge these integers into a set and then 5# either print them (as hex) or dump them into another file. 6import array 7import bisect 8import glob 9import os.path 10import struct 11import subprocess 12import sys 13 14prog_name = "" 15 16def Usage(): 17 print >> sys.stderr, "Usage: \n" + \ 18 " " + prog_name + " merge FILE [FILE...] > OUTPUT\n" \ 19 " " + prog_name + " print FILE [FILE...]\n" \ 20 " " + prog_name + " unpack FILE [FILE...]\n" \ 21 " " + prog_name + " rawunpack FILE [FILE ...]\n" \ 22 " " + prog_name + " missing BINARY < LIST_OF_PCS\n" 23 exit(1) 24 25def CheckBits(bits): 26 if bits != 32 and bits != 64: 27 raise Exception("Wrong bitness: %d" % bits) 28 29def TypeCodeForBits(bits): 30 CheckBits(bits) 31 return 'L' if bits == 64 else 'I' 32 33kMagic32SecondHalf = 0xFFFFFF32; 34kMagic64SecondHalf = 0xFFFFFF64; 35kMagicFirstHalf = 0xC0BFFFFF; 36 37def MagicForBits(bits): 38 CheckBits(bits) 39 if sys.byteorder == 'little': 40 return [kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf, kMagicFirstHalf] 41 else: 42 return [kMagicFirstHalf, kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf] 43 44def ReadMagicAndReturnBitness(f, path): 45 magic_bytes = f.read(8) 46 magic_words = struct.unpack('II', magic_bytes); 47 bits = 0 48 idx = 1 if sys.byteorder == 'little' else 0 49 if magic_words[idx] == kMagicFirstHalf: 50 if magic_words[1-idx] == kMagic64SecondHalf: 51 bits = 64 52 elif magic_words[1-idx] == kMagic32SecondHalf: 53 bits = 32 54 if bits == 0: 55 raise Exception('Bad magic word in %s' % path) 56 return bits 57 58def ReadOneFile(path): 59 with open(path, mode="rb") as f: 60 f.seek(0, 2) 61 size = f.tell() 62 f.seek(0, 0) 63 if size < 8: 64 raise Exception('File %s is short (< 8 bytes)' % path) 65 bits = ReadMagicAndReturnBitness(f, path) 66 size -= 8 67 s = array.array(TypeCodeForBits(bits), f.read(size)) 68 print >>sys.stderr, "%s: read %d %d-bit PCs from %s" % (prog_name, size * 8 / bits, bits, path) 69 return s 70 71def Merge(files): 72 s = set() 73 for f in files: 74 s = s.union(set(ReadOneFile(f))) 75 print >> sys.stderr, "%s: %d files merged; %d PCs total" % \ 76 (prog_name, len(files), len(s)) 77 return sorted(s) 78 79def PrintFiles(files): 80 if len(files) > 1: 81 s = Merge(files) 82 else: # If there is just on file, print the PCs in order. 83 s = ReadOneFile(files[0]) 84 print >> sys.stderr, "%s: 1 file merged; %d PCs total" % \ 85 (prog_name, len(s)) 86 for i in s: 87 print "0x%x" % i 88 89def MergeAndPrint(files): 90 if sys.stdout.isatty(): 91 Usage() 92 s = Merge(files) 93 bits = 32 94 if max(s) > 0xFFFFFFFF: 95 bits = 64 96 array.array('I', MagicForBits(bits)).tofile(sys.stdout) 97 a = array.array(TypeCodeForBits(bits), s) 98 a.tofile(sys.stdout) 99 100 101def UnpackOneFile(path): 102 with open(path, mode="rb") as f: 103 print >> sys.stderr, "%s: unpacking %s" % (prog_name, path) 104 while True: 105 header = f.read(12) 106 if not header: return 107 if len(header) < 12: 108 break 109 pid, module_length, blob_size = struct.unpack('iII', header) 110 module = f.read(module_length) 111 blob = f.read(blob_size) 112 assert(len(module) == module_length) 113 assert(len(blob) == blob_size) 114 extracted_file = "%s.%d.sancov" % (module, pid) 115 print >> sys.stderr, "%s: extracting %s" % \ 116 (prog_name, extracted_file) 117 # The packed file may contain multiple blobs for the same pid/module 118 # pair. Append to the end of the file instead of overwriting. 119 with open(extracted_file, 'ab') as f2: 120 f2.write(blob) 121 # fail 122 raise Exception('Error reading file %s' % path) 123 124 125def Unpack(files): 126 for f in files: 127 UnpackOneFile(f) 128 129def UnpackOneRawFile(path, map_path): 130 mem_map = [] 131 with open(map_path, mode="rt") as f_map: 132 print >> sys.stderr, "%s: reading map %s" % (prog_name, map_path) 133 bits = int(f_map.readline()) 134 if bits != 32 and bits != 64: 135 raise Exception('Wrong bits size in the map') 136 for line in f_map: 137 parts = line.rstrip().split() 138 mem_map.append((int(parts[0], 16), 139 int(parts[1], 16), 140 int(parts[2], 16), 141 ' '.join(parts[3:]))) 142 mem_map.sort(key=lambda m : m[0]) 143 mem_map_keys = [m[0] for m in mem_map] 144 145 with open(path, mode="rb") as f: 146 print >> sys.stderr, "%s: unpacking %s" % (prog_name, path) 147 148 f.seek(0, 2) 149 size = f.tell() 150 f.seek(0, 0) 151 pcs = array.array(TypeCodeForBits(bits), f.read(size)) 152 mem_map_pcs = [[] for i in range(0, len(mem_map))] 153 154 for pc in pcs: 155 if pc == 0: continue 156 map_idx = bisect.bisect(mem_map_keys, pc) - 1 157 (start, end, base, module_path) = mem_map[map_idx] 158 assert pc >= start 159 if pc >= end: 160 print >> sys.stderr, "warning: %s: pc %x outside of any known mapping" % (prog_name, pc) 161 continue 162 mem_map_pcs[map_idx].append(pc - base) 163 164 for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs): 165 if len(pc_list) == 0: continue 166 assert path.endswith('.sancov.raw') 167 dst_path = module_path + '.' + os.path.basename(path)[:-4] 168 print >> sys.stderr, "%s: writing %d PCs to %s" % (prog_name, len(pc_list), dst_path) 169 arr = array.array(TypeCodeForBits(bits)) 170 arr.fromlist(sorted(pc_list)) 171 with open(dst_path, 'ab') as f2: 172 array.array('I', MagicForBits(bits)).tofile(f2) 173 arr.tofile(f2) 174 175def RawUnpack(files): 176 for f in files: 177 if not f.endswith('.sancov.raw'): 178 raise Exception('Unexpected raw file name %s' % f) 179 f_map = f[:-3] + 'map' 180 UnpackOneRawFile(f, f_map) 181 182def GetInstrumentedPCs(binary): 183 # This looks scary, but all it does is extract all offsets where we call: 184 # - __sanitizer_cov() or __sanitizer_cov_with_check(), 185 # - with call or callq, 186 # - directly or via PLT. 187 cmd = "objdump -d %s | " \ 188 "grep '^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\)\(@plt\|\)>' | " \ 189 "grep '^\s\+[0-9a-f]\+' -o" % binary 190 proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, 191 shell=True) 192 proc.stdin.close() 193 # The PCs we get from objdump are off by 4 bytes, as they point to the 194 # beginning of the callq instruction. Empirically this is true on x86 and 195 # x86_64. 196 return set(int(line.strip(), 16) + 4 for line in proc.stdout) 197 198def PrintMissing(binary): 199 if not os.path.isfile(binary): 200 raise Exception('File not found: %s' % binary) 201 instrumented = GetInstrumentedPCs(binary) 202 print >> sys.stderr, "%s: found %d instrumented PCs in %s" % (prog_name, 203 len(instrumented), 204 binary) 205 covered = set(int(line, 16) for line in sys.stdin) 206 print >> sys.stderr, "%s: read %d PCs from stdin" % (prog_name, len(covered)) 207 missing = instrumented - covered 208 print >> sys.stderr, "%s: %d PCs missing from coverage" % (prog_name, len(missing)) 209 if (len(missing) > len(instrumented) - len(covered)): 210 print >> sys.stderr, \ 211 "%s: WARNING: stdin contains PCs not found in binary" % prog_name 212 for pc in sorted(missing): 213 print "0x%x" % pc 214 215if __name__ == '__main__': 216 prog_name = sys.argv[0] 217 if len(sys.argv) <= 2: 218 Usage(); 219 220 if sys.argv[1] == "missing": 221 if len(sys.argv) != 3: 222 Usage() 223 PrintMissing(sys.argv[2]) 224 exit(0) 225 226 file_list = [] 227 for f in sys.argv[2:]: 228 file_list += glob.glob(f) 229 if not file_list: 230 Usage() 231 232 if sys.argv[1] == "print": 233 PrintFiles(file_list) 234 elif sys.argv[1] == "merge": 235 MergeAndPrint(file_list) 236 elif sys.argv[1] == "unpack": 237 Unpack(file_list) 238 elif sys.argv[1] == "rawunpack": 239 RawUnpack(file_list) 240 else: 241 Usage() 242