1#!/usr/bin/env python 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17"""Analyze ext4 trace with custom open trace""" 18import collections 19import math 20import os 21import re 22import string 23import sys 24 25DBG = False 26DBG_ISSUE = False 27 28# hard coded maps to detect partition for given device or the other way around 29# this can be different per each device. This works for marlin. 30DEVICE_TO_PARTITION = { "253,0": "/system/", "253,1": "/vendor/", "259,19": "/data/" } 31PARTITION_TO_DEVICE = {} 32for key, value in DEVICE_TO_PARTITION.iteritems(): 33 PARTITION_TO_DEVICE[value] = key 34 35# init-1 [003] .... 2.703964: do_sys_open: init: open("/sys/fs/selinux/null", 131074, 0) fd = 0, inode = 22 36RE_DO_SYS_OPEN = r"""\s+\S+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+do_sys_open:\s+(\S+):\sopen..(\S+).,\s([0-9]+).\s+.+inode\s=\s([0-9]+)""" 37# init-1 [003] ...1 2.703991: ext4_ext_map_blocks_enter: dev 253,0 ino 2719 lblk 154 len 30 flags 38RE_EXT4_MA_BLOCKS_ENTER = r"""\s+(\S+)-([0-9]+).+\s+([0-9]+\.[0-9]+):\s+ext4_ext_map_blocks_enter:\s+dev\s+(\S+)\s+ino\s+([0-9]+)\s+lblk\s+([0-9]+)\s+len\s+([0-9]+)""" 39# init-1 [002] ...1 2.687205: ext4_ext_map_blocks_exit: dev 253,0 ino 8 flags lblk 0 pblk 196608 len 1 mflags M ret 1 40RE_EXT4_MA_BLOCKS_EXIT = r"""\s+(\S+)-([0-9]+).+\s+([0-9]+\.[0-9]+):\s+ext4_ext_map_blocks_exit:\s+dev\s+(\S+)\s+ino\s+([0-9]+)\sflags.*\slblk\s+([0-9]+)\spblk\s([0-9]+)\slen\s+([0-9]+).*mflags\s(\S*)\sret\s([0-9]+)""" 41# init-1 [002] ...1 2.887119: block_bio_remap: 8,0 R 10010384 + 8 <- (259,18) 3998944 42RE_BLOCK_BIO_REMAP = r""".+block_bio_remap:\s\d+,\d+\s\S+\s(\d+)\s\+\s\d+\s<-\s\([^\)]+\)\s(\d+)""" 43# kworker/u9:1-83 [003] d..2 2.682991: block_rq_issue: 8,0 RA 0 () 10140208 + 32 [kworker/u9:1] 44RE_BLOCK_RQ_ISSUE = r"""\s+\S+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+block_rq_issue:\s+([0-9]+)\,([0-9]+)\s+([RW]\S*)\s[0-9]+\s\([^\)]*\)\s([0-9]+)\s+\+\s+([0-9]+)\s+\[([^\]]+)\]""" 45 46EXT4_SIZE_TO_BLOCK_SIZE = 8 # ext4: 4KB, block device block size: 512B 47 48class FileAccess: 49 def __init__(self, file): 50 self.file = file 51 self.accesses = [] 52 self.total_access = 0 53 self.ext4_access_size_histogram = {} #key: read size, value: occurrence 54 self.block_access_size_histogram = {} 55 self.ext4_single_block_accesses = {} # process name, occurrence 56 self.block_single_block_accesses = {} # process name, occurrence 57 self.blocks_histogram = {} # K: offset, V: read counter 58 59 def add_if_single_block(self, container, size, offset, process_name): 60 if size != 1: 61 return 62 offsets = container.get(process_name) 63 if not offsets: 64 offsets = [] 65 container[process_name] = offsets 66 offsets.append(offset) 67 68 def add_access(self, time, offset, size, process_name, read_sizes): 69 self.accesses.append((time, offset, size, process_name)) 70 self.total_access += size 71 self.ext4_access_size_histogram[size] = self.ext4_access_size_histogram.get(size, 0) + 1 72 read_offset = offset 73 for s in read_sizes: 74 self.block_access_size_histogram[s] = self.block_access_size_histogram.get(s, 0) + 1 75 self.add_if_single_block(self.block_single_block_accesses, s, read_offset, process_name) 76 read_offset += s 77 for i in range(size): 78 self.blocks_histogram[offset + i] = self.blocks_histogram.get(offset + i, 0) + 1 79 self.add_if_single_block(self.ext4_single_block_accesses, size, offset, process_name) 80 81 def add_merged_access(self, time, offsets, lens, process_names): 82 total_read_offsets = set() # each read can overwrap. So count only once for block level counting 83 for i in range(len(offsets)): 84 self.accesses.append((time, offsets[i], lens[i], process_names[i])) 85 self.ext4_access_size_histogram[lens[i]] = self.ext4_access_size_histogram.get(lens[i], 0) + 1 86 self.add_if_single_block(self.ext4_single_block_accesses, lens[i], offsets[i], process_names[i]) 87 for j in range(len(lens)): 88 total_read_offsets.add(offsets[i] + j) 89 total_lens = len(total_read_offsets) 90 start_offset = min(total_read_offsets) 91 self.total_access += total_lens 92 self.block_access_size_histogram[total_lens] = self.block_access_size_histogram.get(total_lens, 0) \ 93 + 1 94 self.add_if_single_block(self.block_single_block_accesses, total_lens, start_offset, \ 95 process_names[0]) 96 for s in range(total_lens): 97 self.blocks_histogram[start_offset + s] = self.blocks_histogram.get(start_offset + s, 0) + 1 98 99 100 def dump(self): 101 if len(self.ext4_access_size_histogram) > 1: 102 print " Ext4 access size histograms:", collections.OrderedDict( \ 103 sorted(self.ext4_access_size_histogram.items(), key = lambda item: item[0])) 104 if len(self.ext4_single_block_accesses) > 0 and self.total_access > 1: 105 print " Ext4 single block accesses:", collections.OrderedDict( \ 106 sorted(self.ext4_single_block_accesses.items(), key = lambda item: item[1], reverse = True)) 107 if len(self.block_access_size_histogram) > 1: 108 print " Block access size histograms:", collections.OrderedDict( \ 109 sorted(self.block_access_size_histogram.items(), key = lambda item: item[0])) 110 if len(self.block_single_block_accesses) > 0 and self.total_access > 1: 111 print " Block single block accesses:", collections.OrderedDict( \ 112 sorted(self.block_single_block_accesses.items(), key = lambda item: item[1], reverse = True)) 113 if self.total_access > 1: 114 sorted_blocks_histogram = sorted(self.blocks_histogram.items(), key = lambda item: item[1], \ 115 reverse = True) 116 prints = [] 117 repeating_reads_counter = 0 118 for entry in sorted_blocks_histogram: 119 offset = entry[0] 120 counter = entry[1] 121 if counter == 1: 122 break 123 prints.append(str(offset) + ":" + str(counter)) 124 repeating_reads_counter += (counter - 1) 125 if len(prints) > 0: 126 print " repeating accesses", repeating_reads_counter, " offset:count ->", ','.join(prints) 127 128class FileEvent: 129 def __init__(self, open_time, file_name, process_name, inode, flags): 130 self.file_name = file_name 131 self.inode = inode 132 self.total_open = 1 133 self.processes = [] 134 self.processes.append((open_time, process_name, flags)) 135 self.read = FileAccess(self) 136 self.write = FileAccess(self) 137 138 139 def add_open(self, open_time, process_name, flags): 140 self.processes.append((open_time, process_name, flags)) 141 self.total_open += 1 142 143 def add_access(self, is_read, time, offset, size, process_name, read_sizes): 144 if is_read: 145 self.read.add_access(time, offset, size, process_name, read_sizes) 146 else: 147 self.write.add_access(time, offset, size, process_name, read_sizes) 148 149 def add_merged_access(self, is_read, time, offsets, lens, process_names): 150 if is_read: 151 self.read.add_merged_access(time, offsets, lens, process_names) 152 else: 153 self.write.add_merged_access(time, offsets, lens, process_names) 154 155 def dump(self, name_to_pid_map): 156 print " ***filename %s, total reads %d, total writes %d, total open %d inode %s" \ 157 % (self.file_name, self.read.total_access, self.write.total_access, self.total_open,\ 158 self.inode) 159 process_names = [] 160 for opener in self.processes: 161 process_names.append(opener[1] + "-" + name_to_pid_map.get(opener[1], '?') + " t:" + \ 162 str(opener[0]) + " flags:" + opener[2]) 163 print " Processes opened this file:", ','.join(process_names) 164 if self.read.total_access > 0: 165 print " ****Reads:" 166 self.read.dump() 167 if self.write.total_access > 0: 168 print " ****Writes:" 169 self.write.dump() 170 171 def dump_short(self): 172 print " filename %s, total reads %d, total writes %d" % (self.file_name, 173 self.read.total_access, self.write.total_access) 174 175class PendingAccess: 176 def __init__(self, process_name, pid, time, dev, inode, lblk, pblk, len, fevent): 177 self.process_name = process_name 178 self.pid = pid 179 self.time = time 180 self.dev = dev 181 self.inode = inode 182 self.lblk = lblk 183 self.pblk = pblk 184 self.blk_len = len * EXT4_SIZE_TO_BLOCK_SIZE 185 self.len = len 186 self.fevent = fevent 187 self.pending_accesses = set() 188 for i in range(len): 189 self.pending_accesses.add(i) 190 self.access_sizes = [] # valid read for this file in block dev level. 191 self.block_access_counter = 0 192 193 def get_valid_access(self, block_offset, block_len): 194 ext4_offset = block_offset / EXT4_SIZE_TO_BLOCK_SIZE 195 if ext4_offset > self.len: 196 return 0, 0 197 ext4_len = block_len / EXT4_SIZE_TO_BLOCK_SIZE 198 if (ext4_offset + ext4_len) > self.len: 199 ext4_len = self.len - ext4_offset 200 return ext4_offset, ext4_len 201 202 def queue_block_access(self, ext4_offset, ext4_len): 203 if ext4_len <= 0: 204 return 205 self.block_access_counter += 1 206 ext4_blocks_accessed = 0 207 for i in range(ext4_len): 208 ext4_block_i = i + ext4_offset 209 if ext4_block_i in self.pending_accesses: 210 ext4_blocks_accessed += 1 211 self.pending_accesses.remove(ext4_block_i) 212 if ext4_blocks_accessed > 0: 213 self.access_sizes.append(ext4_blocks_accessed) 214 215 def handle_req_complete(self, time, is_read): 216 self.fevent.add_access(is_read, self.time, self.lblk, self.len, self.process_name,\ 217 self.access_sizes) 218 219 def handle_merged_req(self, time, offsets, lens, names, is_read): 220 self.fevent.add_merged_access(is_read, time, offsets, lens, names) 221 222 def is_req_complete(self): 223 return len(self.pending_accesses) == 0 224 225 def is_req_started(self): 226 return self.len is not len(self.pending_accesses) 227 228class Trace: 229 def __init__(self): 230 self.files_per_device = {} # key: device, value: { key: inode, value; FileEvent } 231 self.re_open = re.compile(RE_DO_SYS_OPEN) 232 self.re_ext4_access = re.compile(RE_EXT4_MA_BLOCKS_EXIT) 233 self.re_bio_remap = re.compile(RE_BLOCK_BIO_REMAP) 234 self.re_block_issue = re.compile(RE_BLOCK_RQ_ISSUE) 235 # req from ext4 that has not gone down to block level yet, K:block address, 236 # V: list of PendingRead 237 self.pending_accesses = {} 238 self.remap = {} 239 self.process_names = {} # K: PID, V : name 240 241 def handle_line(self, line): 242 match = self.re_open.match(line) 243 if match: 244 self.handle_open(match) 245 return 246 match = self.re_ext4_access.match(line) 247 if match: 248 self.handle_ext4_block_exit(match) 249 return 250 match = self.re_bio_remap.match(line) 251 if match: 252 self.handle_bio_remap(match) 253 return 254 match = self.re_block_issue.match(line) 255 if match: 256 self.handle_block_issue(match) 257 return 258 259 def handle_open(self, match): 260 pid = int(match.group(1)) 261 time = match.group(2) 262 process_name = match.group(3) 263 file_name = match.group(4) 264 flag = match.group(5) 265 inode = int(match.group(6)) 266 dev_name = None 267 self.process_names[pid] = process_name 268 #print "open", pid, process_name, file_name, inode 269 for p in PARTITION_TO_DEVICE: 270 if file_name.startswith(p): 271 dev_name = PARTITION_TO_DEVICE[p] 272 if not dev_name: 273 if DBG: 274 print "Ignore open for file", file_name 275 return 276 files = self.files_per_device[dev_name] 277 fevent = files.get(inode) 278 if not fevent: 279 fevent = FileEvent(time, file_name, process_name, inode, flag) 280 files[inode] = fevent 281 else: 282 fevent.add_open(time, process_name, flag) 283 284 def handle_ext4_block_exit(self, match): 285 process_name = match.group(1) 286 pid = int(match.group(2)) 287 time = float(match.group(3)) 288 dev = match.group(4) 289 inode = int(match.group(5)) 290 lblk = int(match.group(6)) 291 pblk = int(match.group(7)) * EXT4_SIZE_TO_BLOCK_SIZE # address in ext4 blocks, ... 292 l = int(match.group(8)) 293 mflags = match.group(9) 294 ret = int(match.group(10)) 295 if ret <= 0: # no block access 296 return 297 process_name = self.process_names.get(pid, process_name) 298 if process_name == '<...>': 299 process_name = "pid-" + str(pid) 300 if DBG_ISSUE: 301 print "ext4", pblk, l, inode, process_name 302 files = self.files_per_device.get(dev) 303 if not files: 304 if DEVICE_TO_PARTITION.get(dev): 305 files = {} 306 self.files_per_device[dev] = files 307 else: 308 if DBG: 309 print "access ignored for device", dev 310 return 311 fevent = files.get(inode) 312 if not fevent: 313 if DBG: 314 print 'no open for device %s with inode %s' % (dev, inode) 315 fevent = FileEvent(time, "unknown", process_name, inode, "-") 316 files[inode] = fevent 317 pending_access = PendingAccess(process_name, pid, time, dev, inode, lblk, pblk, l,\ 318 fevent) 319 access_list = self.pending_accesses.get(pblk, []) 320 access_list.append(pending_access) 321 self.pending_accesses[pblk] = access_list 322 323 def handle_bio_remap(self, match): 324 new_addr = int(match.group(1)) 325 old_addr = int(match.group(2)) 326 self.remap[new_addr] = old_addr 327 if DBG_ISSUE: 328 print "remap", new_addr, old_addr 329 330 def handle_block_issue(self, match): 331 pid = int(match.group(1)) 332 time = float(match.group(2)) 333 dev_major = match.group(3) 334 dev_minor = match.group(4) 335 access = match.group(5) 336 new_address = int(match.group(6)) 337 l = int(match.group(7)) 338 name = match.group(8) 339 name = self.process_names.get(pid, name) 340 if name == '<...>': 341 name = "pid-" + str(pid) 342 is_read = not 'W' in access 343 accesses_per_inodes = {} # K:inodes, V: list of two entries, 1st: offsets, 2nd: length 344 addrs_to_remove = [] 345 completed_reqs = [] 346 address = self.remap.get(new_address, new_address) 347 if DBG_ISSUE: 348 print "issue", address, l, is_read, access 349 for access_addr, access_list in self.pending_accesses.iteritems(): 350 if (address >= access_addr) and (address + l) > access_addr: 351 reqs_to_remove = [] 352 for pending in access_list: 353 offset, valid_access_size = pending.get_valid_access(address - access_addr, l) 354 if valid_access_size > 0: 355 if pending.is_req_started(): # spread across multiple reads. complete alone 356 pending.queue_block_access(offset, valid_access_size) 357 if pending.is_req_complete(): 358 pending.handle_req_complete(time, is_read) 359 reqs_to_remove.append(pending) 360 else: # possible multiple reads completed in this read. complete them together 361 pending.queue_block_access(offset, valid_access_size) 362 if pending.is_req_complete(): 363 reads = accesses_per_inodes.get(pending.inode, [[], [], []]) 364 reads[0].append(offset + pending.lblk) 365 reads[1].append(valid_access_size) 366 reads[2].append(pending.process_name) 367 accesses_per_inodes[pending.inode] = reads 368 completed_reqs.append(pending) 369 reqs_to_remove.append(pending) 370 for to_remove in reqs_to_remove: 371 access_list.remove(to_remove) 372 if len(access_list) == 0: 373 addrs_to_remove.append(access_addr) 374 for addr in addrs_to_remove: 375 del self.pending_accesses[addr] 376 for pending in completed_reqs: # these will be reported as batch 377 accesses = accesses_per_inodes.get(pending.inode) 378 if not accesses: # merged one already dispatched 379 continue 380 if len(accesses[0]) == 1: 381 pending.handle_req_complete(time, is_read) 382 else: #merged 383 pending.handle_merged_req(time, accesses[0], accesses[1], accesses[2], is_read) 384 del accesses_per_inodes[pending.inode] 385 386 def dump_partition(self, partition_name, files): 387 name_to_pid_map = {} 388 for pid, name in self.process_names.iteritems(): 389 name_to_pid_map[name] = str(pid) 390 print "**Dump partition:", partition_name, "total number of files:", len(files) 391 total_reads = 0 392 total_writes = 0 393 files_sorted_by_read = files.values() 394 files_sorted_by_read.sort(key=lambda f : f.read.total_access, reverse = True) 395 files_sorted_by_write = files.values() 396 files_sorted_by_write.sort(key=lambda f : f.write.total_access, reverse = True) 397 print " Top 10 readers:" 398 for i in range(min(10, len(files_sorted_by_read))): 399 files_sorted_by_read[i].dump_short() 400 print " Top 10 writers:" 401 for i in range(min(10, len(files_sorted_by_write))): 402 files_sorted_by_write[i].dump_short() 403 for f in files_sorted_by_read: 404 f.dump(name_to_pid_map) 405 total_reads += f.read.total_access 406 total_writes += f.write.total_access 407 print " Total reads:", total_reads, " total writes:", total_writes 408 return total_reads, total_writes, len(files) 409 410 411 def dump(self): 412 print "*Dump R/W per each partition" 413 total_reads = 0 414 total_writes = 0 415 summaries = [] 416 for d in self.files_per_device: 417 reads, writes, num_files = self.dump_partition(DEVICE_TO_PARTITION[d], \ 418 self.files_per_device[d]) 419 total_reads += reads 420 total_writes += writes 421 summaries.append((DEVICE_TO_PARTITION[d], reads, writes, num_files)) 422 print "*Summary*" 423 print "Total blocks read", total_reads 424 print "Total blocks wrote", total_writes 425 print "Partition total_reads total_writes num_files" 426 for s in summaries: 427 print s[0], s[1], s[2], s[3] 428 429def main(argv): 430 if (len(argv) < 2): 431 print "check_file_read.py filename" 432 return 433 filename = argv[1] 434 trace = Trace() 435 with open(filename) as f: 436 for l in f: 437 trace.handle_line(l) 438 trace.dump() 439 440if __name__ == '__main__': 441 main(sys.argv) 442