• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17"""Analyze ext4 trace with custom open trace"""
18import collections
19import math
20import os
21import re
22import string
23import sys
24
25DBG = False
26DBG_ISSUE = False
27
28# hard coded maps to detect partition for given device or the other way around
29# this can be different per each device. This works for marlin.
30DEVICE_TO_PARTITION = { "253,0": "/system/", "253,1": "/vendor/", "259,19": "/data/" }
31PARTITION_TO_DEVICE = {}
32for key, value in DEVICE_TO_PARTITION.iteritems():
33  PARTITION_TO_DEVICE[value] = key
34
35# init-1     [003] ....     2.703964: do_sys_open: init: open("/sys/fs/selinux/null", 131074, 0) fd = 0, inode = 22
36RE_DO_SYS_OPEN = r"""\s+\S+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+do_sys_open:\s+(\S+):\sopen..(\S+).,\s([0-9]+).\s+.+inode\s=\s([0-9]+)"""
37# init-1     [003] ...1     2.703991: ext4_ext_map_blocks_enter: dev 253,0 ino 2719 lblk 154 len 30 flags
38RE_EXT4_MA_BLOCKS_ENTER = r"""\s+(\S+)-([0-9]+).+\s+([0-9]+\.[0-9]+):\s+ext4_ext_map_blocks_enter:\s+dev\s+(\S+)\s+ino\s+([0-9]+)\s+lblk\s+([0-9]+)\s+len\s+([0-9]+)"""
39# init-1     [002] ...1     2.687205: ext4_ext_map_blocks_exit: dev 253,0 ino 8 flags  lblk 0 pblk 196608 len 1 mflags M ret 1
40RE_EXT4_MA_BLOCKS_EXIT = r"""\s+(\S+)-([0-9]+).+\s+([0-9]+\.[0-9]+):\s+ext4_ext_map_blocks_exit:\s+dev\s+(\S+)\s+ino\s+([0-9]+)\sflags.*\slblk\s+([0-9]+)\spblk\s([0-9]+)\slen\s+([0-9]+).*mflags\s(\S*)\sret\s([0-9]+)"""
41#  init-1     [002] ...1     2.887119: block_bio_remap: 8,0 R 10010384 + 8 <- (259,18) 3998944
42RE_BLOCK_BIO_REMAP = r""".+block_bio_remap:\s\d+,\d+\s\S+\s(\d+)\s\+\s\d+\s<-\s\([^\)]+\)\s(\d+)"""
43# kworker/u9:1-83    [003] d..2     2.682991: block_rq_issue: 8,0 RA 0 () 10140208 + 32 [kworker/u9:1]
44RE_BLOCK_RQ_ISSUE = r"""\s+\S+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+block_rq_issue:\s+([0-9]+)\,([0-9]+)\s+([RW]\S*)\s[0-9]+\s\([^\)]*\)\s([0-9]+)\s+\+\s+([0-9]+)\s+\[([^\]]+)\]"""
45
46EXT4_SIZE_TO_BLOCK_SIZE = 8 # ext4: 4KB, block device block size: 512B
47
48class FileAccess:
49  def __init__(self, file):
50    self.file = file
51    self.accesses = []
52    self.total_access = 0
53    self.ext4_access_size_histogram = {} #key: read size, value: occurrence
54    self.block_access_size_histogram = {}
55    self.ext4_single_block_accesses = {} # process name, occurrence
56    self.block_single_block_accesses = {} # process name, occurrence
57    self.blocks_histogram = {} # K: offset, V: read counter
58
59  def add_if_single_block(self, container, size, offset, process_name):
60    if size != 1:
61      return
62    offsets = container.get(process_name)
63    if not offsets:
64      offsets = []
65      container[process_name] = offsets
66    offsets.append(offset)
67
68  def add_access(self, time, offset, size, process_name, read_sizes):
69    self.accesses.append((time, offset, size, process_name))
70    self.total_access += size
71    self.ext4_access_size_histogram[size] = self.ext4_access_size_histogram.get(size, 0) + 1
72    read_offset = offset
73    for s in read_sizes:
74      self.block_access_size_histogram[s] = self.block_access_size_histogram.get(s, 0) + 1
75      self.add_if_single_block(self.block_single_block_accesses, s, read_offset, process_name)
76      read_offset += s
77    for i in range(size):
78      self.blocks_histogram[offset + i] = self.blocks_histogram.get(offset + i, 0) + 1
79    self.add_if_single_block(self.ext4_single_block_accesses, size, offset, process_name)
80
81  def add_merged_access(self, time, offsets, lens, process_names):
82    total_read_offsets = set() # each read can overwrap. So count only once for block level counting
83    for i in range(len(offsets)):
84      self.accesses.append((time, offsets[i], lens[i], process_names[i]))
85      self.ext4_access_size_histogram[lens[i]] = self.ext4_access_size_histogram.get(lens[i], 0) + 1
86      self.add_if_single_block(self.ext4_single_block_accesses, lens[i], offsets[i], process_names[i])
87      for j in range(len(lens)):
88        total_read_offsets.add(offsets[i] + j)
89    total_lens = len(total_read_offsets)
90    start_offset = min(total_read_offsets)
91    self.total_access += total_lens
92    self.block_access_size_histogram[total_lens] = self.block_access_size_histogram.get(total_lens, 0) \
93                                                   + 1
94    self.add_if_single_block(self.block_single_block_accesses, total_lens, start_offset, \
95                             process_names[0])
96    for s in range(total_lens):
97      self.blocks_histogram[start_offset + s] = self.blocks_histogram.get(start_offset + s, 0) + 1
98
99
100  def dump(self):
101    if len(self.ext4_access_size_histogram) > 1:
102      print "   Ext4 access size histograms:", collections.OrderedDict( \
103        sorted(self.ext4_access_size_histogram.items(), key = lambda item: item[0]))
104    if len(self.ext4_single_block_accesses) > 0 and self.total_access > 1:
105      print "   Ext4 single block accesses:", collections.OrderedDict( \
106        sorted(self.ext4_single_block_accesses.items(), key = lambda item: item[1], reverse = True))
107    if len(self.block_access_size_histogram) > 1:
108      print "   Block access size histograms:", collections.OrderedDict( \
109        sorted(self.block_access_size_histogram.items(), key = lambda item: item[0]))
110    if len(self.block_single_block_accesses) > 0 and self.total_access > 1:
111      print "   Block single block accesses:", collections.OrderedDict( \
112        sorted(self.block_single_block_accesses.items(), key = lambda item: item[1], reverse = True))
113    if self.total_access > 1:
114      sorted_blocks_histogram = sorted(self.blocks_histogram.items(), key = lambda item: item[1], \
115                                       reverse = True)
116      prints = []
117      repeating_reads_counter = 0
118      for entry in sorted_blocks_histogram:
119        offset = entry[0]
120        counter = entry[1]
121        if counter == 1:
122          break
123        prints.append(str(offset) + ":" + str(counter))
124        repeating_reads_counter += (counter - 1)
125      if len(prints) > 0:
126        print "   repeating accesses", repeating_reads_counter, " offset:count ->", ','.join(prints)
127
128class FileEvent:
129  def __init__(self, open_time, file_name, process_name, inode, flags):
130    self.file_name = file_name
131    self.inode = inode
132    self.total_open = 1
133    self.processes = []
134    self.processes.append((open_time, process_name, flags))
135    self.read = FileAccess(self)
136    self.write = FileAccess(self)
137
138
139  def add_open(self, open_time, process_name, flags):
140    self.processes.append((open_time, process_name, flags))
141    self.total_open += 1
142
143  def add_access(self, is_read, time, offset, size, process_name, read_sizes):
144    if is_read:
145      self.read.add_access(time, offset, size, process_name, read_sizes)
146    else:
147      self.write.add_access(time, offset, size, process_name, read_sizes)
148
149  def add_merged_access(self, is_read, time, offsets, lens, process_names):
150    if is_read:
151      self.read.add_merged_access(time, offsets, lens, process_names)
152    else:
153      self.write.add_merged_access(time, offsets, lens, process_names)
154
155  def dump(self, name_to_pid_map):
156    print " ***filename %s, total reads %d, total writes %d, total open %d inode %s" \
157          % (self.file_name, self.read.total_access, self.write.total_access, self.total_open,\
158             self.inode)
159    process_names = []
160    for opener in self.processes:
161      process_names.append(opener[1] + "-" + name_to_pid_map.get(opener[1], '?') + " t:" + \
162                           str(opener[0]) + " flags:" + opener[2])
163    print "  Processes opened this file:", ','.join(process_names)
164    if self.read.total_access > 0:
165      print "  ****Reads:"
166      self.read.dump()
167    if self.write.total_access > 0:
168      print "  ****Writes:"
169      self.write.dump()
170
171  def dump_short(self):
172    print "    filename %s, total reads %d, total writes %d" % (self.file_name,
173      self.read.total_access, self.write.total_access)
174
175class PendingAccess:
176  def __init__(self, process_name, pid, time, dev, inode, lblk, pblk, len, fevent):
177    self.process_name = process_name
178    self.pid = pid
179    self.time = time
180    self.dev = dev
181    self.inode = inode
182    self.lblk = lblk
183    self.pblk = pblk
184    self.blk_len = len * EXT4_SIZE_TO_BLOCK_SIZE
185    self.len = len
186    self.fevent = fevent
187    self.pending_accesses = set()
188    for i in range(len):
189      self.pending_accesses.add(i)
190    self.access_sizes = [] # valid read for this file in block dev level.
191    self.block_access_counter = 0
192
193  def get_valid_access(self, block_offset, block_len):
194    ext4_offset = block_offset / EXT4_SIZE_TO_BLOCK_SIZE
195    if ext4_offset > self.len:
196      return 0, 0
197    ext4_len = block_len / EXT4_SIZE_TO_BLOCK_SIZE
198    if (ext4_offset + ext4_len) > self.len:
199      ext4_len = self.len - ext4_offset
200    return ext4_offset, ext4_len
201
202  def queue_block_access(self, ext4_offset, ext4_len):
203    if ext4_len <= 0:
204      return
205    self.block_access_counter += 1
206    ext4_blocks_accessed = 0
207    for i in range(ext4_len):
208      ext4_block_i = i + ext4_offset
209      if ext4_block_i in self.pending_accesses:
210        ext4_blocks_accessed += 1
211        self.pending_accesses.remove(ext4_block_i)
212    if ext4_blocks_accessed > 0:
213      self.access_sizes.append(ext4_blocks_accessed)
214
215  def handle_req_complete(self, time, is_read):
216    self.fevent.add_access(is_read, self.time, self.lblk, self.len, self.process_name,\
217                           self.access_sizes)
218
219  def handle_merged_req(self, time, offsets, lens, names, is_read):
220    self.fevent.add_merged_access(is_read, time, offsets, lens, names)
221
222  def is_req_complete(self):
223    return len(self.pending_accesses) == 0
224
225  def is_req_started(self):
226    return self.len is not len(self.pending_accesses)
227
228class Trace:
229  def __init__(self):
230    self.files_per_device = {} # key: device, value: { key: inode, value; FileEvent }
231    self.re_open = re.compile(RE_DO_SYS_OPEN)
232    self.re_ext4_access = re.compile(RE_EXT4_MA_BLOCKS_EXIT)
233    self.re_bio_remap = re.compile(RE_BLOCK_BIO_REMAP)
234    self.re_block_issue = re.compile(RE_BLOCK_RQ_ISSUE)
235    # req from ext4 that has not gone down to block level yet, K:block address,
236    #  V: list of PendingRead
237    self.pending_accesses = {}
238    self.remap = {}
239    self.process_names = {} # K: PID, V : name
240
241  def handle_line(self, line):
242    match = self.re_open.match(line)
243    if match:
244      self.handle_open(match)
245      return
246    match = self.re_ext4_access.match(line)
247    if match:
248      self.handle_ext4_block_exit(match)
249      return
250    match = self.re_bio_remap.match(line)
251    if match:
252      self.handle_bio_remap(match)
253      return
254    match = self.re_block_issue.match(line)
255    if match:
256      self.handle_block_issue(match)
257      return
258
259  def handle_open(self, match):
260    pid = int(match.group(1))
261    time = match.group(2)
262    process_name = match.group(3)
263    file_name = match.group(4)
264    flag = match.group(5)
265    inode = int(match.group(6))
266    dev_name = None
267    self.process_names[pid] = process_name
268    #print "open", pid, process_name, file_name, inode
269    for p in PARTITION_TO_DEVICE:
270      if file_name.startswith(p):
271        dev_name = PARTITION_TO_DEVICE[p]
272    if not dev_name:
273      if DBG:
274        print "Ignore open for file", file_name
275      return
276    files = self.files_per_device[dev_name]
277    fevent = files.get(inode)
278    if not fevent:
279      fevent = FileEvent(time, file_name, process_name, inode, flag)
280      files[inode] = fevent
281    else:
282      fevent.add_open(time, process_name, flag)
283
284  def handle_ext4_block_exit(self, match):
285    process_name = match.group(1)
286    pid = int(match.group(2))
287    time = float(match.group(3))
288    dev = match.group(4)
289    inode = int(match.group(5))
290    lblk = int(match.group(6))
291    pblk = int(match.group(7)) * EXT4_SIZE_TO_BLOCK_SIZE # address in ext4 blocks, ...
292    l = int(match.group(8))
293    mflags = match.group(9)
294    ret = int(match.group(10))
295    if ret <= 0: # no block access
296      return
297    process_name = self.process_names.get(pid, process_name)
298    if process_name == '<...>':
299      process_name = "pid-" + str(pid)
300    if DBG_ISSUE:
301      print "ext4", pblk, l, inode, process_name
302    files = self.files_per_device.get(dev)
303    if not files:
304      if DEVICE_TO_PARTITION.get(dev):
305        files = {}
306        self.files_per_device[dev] = files
307      else:
308        if DBG:
309          print "access ignored for device", dev
310        return
311    fevent = files.get(inode)
312    if not fevent:
313      if DBG:
314        print 'no open for device %s with inode %s' % (dev, inode)
315      fevent = FileEvent(time, "unknown", process_name, inode, "-")
316      files[inode] = fevent
317    pending_access = PendingAccess(process_name, pid, time, dev, inode, lblk, pblk, l,\
318                                   fevent)
319    access_list = self.pending_accesses.get(pblk, [])
320    access_list.append(pending_access)
321    self.pending_accesses[pblk] = access_list
322
323  def handle_bio_remap(self, match):
324    new_addr = int(match.group(1))
325    old_addr = int(match.group(2))
326    self.remap[new_addr] = old_addr
327    if DBG_ISSUE:
328      print "remap", new_addr, old_addr
329
330  def handle_block_issue(self, match):
331    pid = int(match.group(1))
332    time = float(match.group(2))
333    dev_major = match.group(3)
334    dev_minor = match.group(4)
335    access = match.group(5)
336    new_address = int(match.group(6))
337    l = int(match.group(7))
338    name = match.group(8)
339    name = self.process_names.get(pid, name)
340    if name == '<...>':
341      name = "pid-" + str(pid)
342    is_read = not 'W' in access
343    accesses_per_inodes = {} # K:inodes, V: list of two entries, 1st: offsets, 2nd: length
344    addrs_to_remove = []
345    completed_reqs = []
346    address = self.remap.get(new_address, new_address)
347    if DBG_ISSUE:
348      print "issue", address, l, is_read, access
349    for access_addr, access_list in self.pending_accesses.iteritems():
350      if (address >= access_addr) and (address + l) > access_addr:
351        reqs_to_remove = []
352        for pending in access_list:
353          offset, valid_access_size = pending.get_valid_access(address - access_addr, l)
354          if valid_access_size > 0:
355            if pending.is_req_started(): # spread across multiple reads. complete alone
356              pending.queue_block_access(offset, valid_access_size)
357              if pending.is_req_complete():
358                pending.handle_req_complete(time, is_read)
359                reqs_to_remove.append(pending)
360            else: # possible multiple reads completed in this read. complete them together
361              pending.queue_block_access(offset, valid_access_size)
362              if pending.is_req_complete():
363                reads = accesses_per_inodes.get(pending.inode, [[], [], []])
364                reads[0].append(offset + pending.lblk)
365                reads[1].append(valid_access_size)
366                reads[2].append(pending.process_name)
367                accesses_per_inodes[pending.inode] = reads
368                completed_reqs.append(pending)
369                reqs_to_remove.append(pending)
370        for to_remove in reqs_to_remove:
371          access_list.remove(to_remove)
372        if len(access_list) == 0:
373          addrs_to_remove.append(access_addr)
374    for addr in addrs_to_remove:
375      del self.pending_accesses[addr]
376    for pending in completed_reqs: # these will be reported as batch
377      accesses = accesses_per_inodes.get(pending.inode)
378      if not accesses: # merged one already dispatched
379        continue
380      if len(accesses[0]) == 1:
381        pending.handle_req_complete(time, is_read)
382      else: #merged
383        pending.handle_merged_req(time, accesses[0], accesses[1], accesses[2], is_read)
384        del accesses_per_inodes[pending.inode]
385
386  def dump_partition(self, partition_name, files):
387    name_to_pid_map = {}
388    for pid, name in self.process_names.iteritems():
389      name_to_pid_map[name] = str(pid)
390    print "**Dump partition:", partition_name, "total number of files:", len(files)
391    total_reads = 0
392    total_writes = 0
393    files_sorted_by_read = files.values()
394    files_sorted_by_read.sort(key=lambda f : f.read.total_access, reverse = True)
395    files_sorted_by_write = files.values()
396    files_sorted_by_write.sort(key=lambda f : f.write.total_access, reverse = True)
397    print " Top 10 readers:"
398    for i in range(min(10, len(files_sorted_by_read))):
399      files_sorted_by_read[i].dump_short()
400    print " Top 10 writers:"
401    for i in range(min(10, len(files_sorted_by_write))):
402      files_sorted_by_write[i].dump_short()
403    for f in files_sorted_by_read:
404      f.dump(name_to_pid_map)
405      total_reads += f.read.total_access
406      total_writes += f.write.total_access
407    print " Total reads:", total_reads, " total writes:", total_writes
408    return total_reads, total_writes, len(files)
409
410
411  def dump(self):
412    print "*Dump R/W per each partition"
413    total_reads = 0
414    total_writes = 0
415    summaries = []
416    for d in self.files_per_device:
417      reads, writes, num_files = self.dump_partition(DEVICE_TO_PARTITION[d], \
418        self.files_per_device[d])
419      total_reads += reads
420      total_writes += writes
421      summaries.append((DEVICE_TO_PARTITION[d], reads, writes, num_files))
422    print "*Summary*"
423    print "Total blocks read", total_reads
424    print "Total blocks wrote", total_writes
425    print "Partition total_reads total_writes num_files"
426    for s in summaries:
427      print s[0], s[1], s[2], s[3]
428
429def main(argv):
430  if (len(argv) < 2):
431    print "check_file_read.py filename"
432    return
433  filename = argv[1]
434  trace = Trace()
435  with open(filename) as f:
436    for l in f:
437      trace.handle_line(l)
438  trace.dump()
439
440if __name__ == '__main__':
441  main(sys.argv)
442