• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python3
2# Copyright (C) 2019 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import re
17import sys
18
19from sqlalchemy import create_engine
20from sqlalchemy import Column, Date, Integer, Float, String, ForeignKey
21from sqlalchemy.ext.declarative import declarative_base
22from sqlalchemy.orm import relationship
23
24from sqlalchemy.orm import sessionmaker
25
26import sqlalchemy
27
28from typing import Optional, Tuple
29
30_DEBUG = False        # print sql commands to console
31_FLUSH_LIMIT = 10000  # how many entries are parsed before flushing to DB from memory
32
33Base = declarative_base()
34
35class RawFtraceEntry(Base):
36  __tablename__ = 'raw_ftrace_entries'
37
38  id = Column(Integer, primary_key=True)
39  task_name = Column(String, nullable=True) # <...> -> None.
40  task_pid = Column(String, nullable=False)
41  tgid = Column(Integer, nullable=True)     # ----- -> None.
42  cpu = Column(Integer, nullable=False)
43  timestamp = Column(Float, nullable=False)
44  function = Column(String, nullable=False)
45  function_args = Column(String, nullable=False)
46
47  # 1:1 relation with MmFilemapAddToPageCache.
48  mm_filemap_add_to_page_cache = relationship("MmFilemapAddToPageCache",
49                                              back_populates="raw_ftrace_entry")
50
51  @staticmethod
52  def parse_dict(line):
53    # '           <...>-5521  (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266'
54    m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line)
55    if not m:
56      return None
57
58    groups = m.groups()
59    # groups example:
60    # ('<...>',
61    #  '5521',
62    #  '-----',
63    #  '003',
64    #  '...1',
65    #  '17148.446877',
66    #  'tracing_mark_write',
67    #  'trace_event_clock_sync: parent_ts=17148.447266')
68    task_name = groups[0]
69    if task_name == '<...>':
70      task_name = None
71
72    task_pid = int(groups[1])
73    tgid = groups[2]
74    if tgid == '-----':
75      tgid = None
76
77    cpu = int(groups[3])
78    # irq_flags = groups[4]
79    timestamp = float(groups[5])
80    function = groups[6]
81    function_args = groups[7]
82
83    return {'task_name': task_name, 'task_pid': task_pid, 'tgid': tgid, 'cpu': cpu, 'timestamp': timestamp, 'function': function, 'function_args': function_args}
84
85class SchedSwitch(Base):
86  __tablename__ = 'sched_switches'
87
88  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
89
90  prev_comm = Column(String, nullable=False)
91  prev_pid = Column(Integer, nullable=False)
92  prev_prio = Column(Integer, nullable=False)
93  prev_state = Column(String, nullable=False)
94
95  next_comm = Column(String, nullable=False)
96  next_pid = Column(Integer, nullable=False)
97  next_prio = Column(Integer, nullable=False)
98
99  @staticmethod
100  def parse_dict(function_args, id = None):
101    # 'prev_comm=kworker/u16:5 prev_pid=13971 prev_prio=120 prev_state=S ==> next_comm=swapper/4 next_pid=0 next_prio=120'
102    m = re.match("prev_comm=(.*) prev_pid=(\d+) prev_prio=(\d+) prev_state=(.*) ==> next_comm=(.*) next_pid=(\d+) next_prio=(\d+) ?", function_args)
103    if not m:
104      return None
105
106    groups = m.groups()
107    # ('kworker/u16:5', '13971', '120', 'S', 'swapper/4', '0', '120')
108    d = {}
109    if id is not None:
110      d['id'] = id
111    d['prev_comm'] = groups[0]
112    d['prev_pid'] = int(groups[1])
113    d['prev_prio'] = int(groups[2])
114    d['prev_state'] = groups[3]
115    d['next_comm'] = groups[4]
116    d['next_pid'] = int(groups[5])
117    d['next_prio'] = int(groups[6])
118
119    return d
120
121class SchedBlockedReason(Base):
122  __tablename__ = 'sched_blocked_reasons'
123
124  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
125
126  pid = Column(Integer, nullable=False)
127  iowait = Column(Integer, nullable=False)
128  caller = Column(String, nullable=False)
129
130  @staticmethod
131  def parse_dict(function_args, id = None):
132    # 'pid=2289 iowait=1 caller=wait_on_page_bit_common+0x2a8/0x5f'
133    m = re.match("pid=(\d+) iowait=(\d+) caller=(.*) ?", function_args)
134    if not m:
135      return None
136
137    groups = m.groups()
138    # ('2289', '1', 'wait_on_page_bit_common+0x2a8/0x5f8')
139    d = {}
140    if id is not None:
141      d['id'] = id
142    d['pid'] = int(groups[0])
143    d['iowait'] = int(groups[1])
144    d['caller'] = groups[2]
145
146    return d
147
148class MmFilemapAddToPageCache(Base):
149  __tablename__ = 'mm_filemap_add_to_page_caches'
150
151  id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True)
152
153  dev = Column(Integer, nullable=False)        # decoded from ${major}:${minor} syntax.
154  dev_major = Column(Integer, nullable=False)  # original ${major} value.
155  dev_minor = Column(Integer, nullable=False)  # original ${minor} value.
156
157  ino = Column(Integer, nullable=False)  # decoded from hex to base 10
158  page = Column(Integer, nullable=False) # decoded from hex to base 10
159
160  pfn = Column(Integer, nullable=False)
161  ofs = Column(Integer, nullable=False)
162
163  # 1:1 relation with RawFtraceEntry.
164  raw_ftrace_entry = relationship("RawFtraceEntry", uselist=False)
165
166  @staticmethod
167  def parse_dict(function_args, id = None):
168    # dev 253:6 ino b2c7 page=00000000ec787cd9 pfn=1478539 ofs=4096
169    m = re.match("dev (\d+):(\d+) ino ([0-9a-fA-F]+) page=([0-9a-fA-F]+) pfn=(\d+) ofs=(\d+)", function_args)
170    if not m:
171      return None
172
173    groups = m.groups()
174    # ('253', '6', 'b2c7', '00000000ec787cd9', '1478539', '4096')
175    d = {}
176    if id is not None:
177      d['id'] = id
178
179    device_major = d['dev_major'] = int(groups[0])
180    device_minor = d['dev_minor'] = int(groups[1])
181    d['dev'] = device_major << 8 | device_minor
182    d['ino'] = int(groups[2], 16)
183    d['page'] = int(groups[3], 16)
184    d['pfn'] = int(groups[4])
185    d['ofs'] = int(groups[5])
186
187    return d
188
189class Trace2Db:
190  def __init__(self, db_filename: str):
191    (s, e) = self._init_sqlalchemy(db_filename)
192    self._session = s
193    self._engine = e
194    self._raw_ftrace_entry_filter = lambda x: True
195
196  def set_raw_ftrace_entry_filter(self, flt):
197    """
198    Install a function dict(RawFtraceEntry) -> bool
199
200    If this returns 'false', then we skip adding the RawFtraceEntry to the database.
201    """
202    self._raw_ftrace_entry_filter = flt
203
204  @staticmethod
205  def _init_sqlalchemy(db_filename: str) -> Tuple[object, object]:
206    global _DEBUG
207    engine = create_engine('sqlite:///' + db_filename, echo=_DEBUG)
208
209    # CREATE ... (tables)
210    Base.metadata.create_all(engine)
211
212    Session = sessionmaker(bind=engine)
213    session = Session()
214    return (session, engine)
215
216  def parse_file_into_db(self, filename: str, limit: Optional[int] = None):
217    """
218    Parse the ftrace/systrace at 'filename',
219    inserting the values into the current sqlite database.
220
221    :return: number of RawFtraceEntry inserted.
222    """
223    return parse_file(filename, self._session, self._engine, self._raw_ftrace_entry_filter, limit)
224
225  def parse_file_buf_into_db(self, file_buf, limit: Optional[int] = None):
226    """
227    Parse the ftrace/systrace at 'filename',
228    inserting the values into the current sqlite database.
229
230    :return: number of RawFtraceEntry inserted.
231    """
232    return parse_file_buf(file_buf, self._session, self._engine, self._raw_ftrace_entry_filter, limit)
233
234
235  @property
236  def session(self):
237    return self._session
238
239def insert_pending_entries(engine, kls, lst):
240  if len(lst) > 0:
241    # for some reason, it tries to generate an empty INSERT statement with len=0,
242    # which of course violates the first non-null constraint.
243    try:
244      # Performance-sensitive parsing according to:
245      # https://docs.sqlalchemy.org/en/13/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow
246      engine.execute(kls.__table__.insert(), lst)
247      lst.clear()
248    except sqlalchemy.exc.IntegrityError as err:
249      # possibly violating some SQL constraint, print data here.
250      print(err)
251      print(lst)
252      raise
253
254def parse_file(filename: str, *args, **kwargs) -> int:
255  # use explicit encoding to avoid UnicodeDecodeError.
256  with open(filename, encoding="ISO-8859-1") as f:
257    return parse_file_buf(f, *args, **kwargs)
258
259def parse_file_buf(filebuf, session, engine, raw_ftrace_entry_filter, limit=None) -> int:
260  global _FLUSH_LIMIT
261  count = 0
262  # count and id are not equal, because count still increases for invalid lines.
263  id = 0
264
265  pending_entries = []
266  pending_sched_switch = []
267  pending_sched_blocked_reasons = []
268  pending_mm_filemap_add_to_pagecaches = []
269
270  def insert_all_pending_entries():
271    insert_pending_entries(engine, RawFtraceEntry, pending_entries)
272    insert_pending_entries(engine, SchedSwitch, pending_sched_switch)
273    insert_pending_entries(engine, SchedBlockedReason, pending_sched_blocked_reasons)
274    insert_pending_entries(engine, MmFilemapAddToPageCache, pending_mm_filemap_add_to_pagecaches)
275
276  # for trace.html files produced by systrace,
277  # the actual ftrace is in the 'second' trace-data script class.
278  parsing_trace_data = 0
279  parsing_systrace_file = False
280
281  f = filebuf
282  for l in f:
283    if parsing_trace_data == 0 and l == "<!DOCTYPE html>\n":
284      parsing_systrace_file = True
285      continue
286    if parsing_trace_data != 2 and parsing_systrace_file:
287      if l == '  <script class="trace-data" type="application/text">\n':
288        parsing_trace_data = parsing_trace_data + 1
289      continue
290
291    if parsing_systrace_file and parsing_trace_data != 2:
292      continue
293    elif parsing_systrace_file and parsing_trace_data == 2 and l == "  </script>\n":
294      # the rest of this file is just random html
295      break
296
297    # now parsing the ftrace data.
298    if len(l) > 1 and l[0] == '#':
299      continue
300
301    count = count + 1
302
303    if limit and count >= limit:
304      break
305
306    raw_ftrace_entry = RawFtraceEntry.parse_dict(l)
307    if not raw_ftrace_entry:
308      print("WARNING: Failed to parse raw ftrace entry: " + l)
309      continue
310
311    if not raw_ftrace_entry_filter(raw_ftrace_entry):
312      # Skip processing raw ftrace entries that don't match a filter.
313      # This is an optimization for when Trace2Db is used programatically
314      # to avoid having an overly large database.
315      continue
316
317    pending_entries.append(raw_ftrace_entry)
318    id = id + 1
319
320    if raw_ftrace_entry['function'] == 'sched_switch':
321      sched_switch = SchedSwitch.parse_dict(raw_ftrace_entry['function_args'], id)
322
323      if not sched_switch:
324        print("WARNING: Failed to parse sched_switch: " + l)
325      else:
326        pending_sched_switch.append(sched_switch)
327
328    elif raw_ftrace_entry['function'] == 'sched_blocked_reason':
329      sbr = SchedBlockedReason.parse_dict(raw_ftrace_entry['function_args'], id)
330
331      if not sbr:
332        print("WARNING: Failed to parse sched_blocked_reason: " + l)
333      else:
334        pending_sched_blocked_reasons.append(sbr)
335
336    elif raw_ftrace_entry['function'] == 'mm_filemap_add_to_page_cache':
337      d = MmFilemapAddToPageCache.parse_dict(raw_ftrace_entry['function_args'],
338                                             id)
339      if not d:
340        print("WARNING: Failed to parse mm_filemap_add_to_page_cache: " + l)
341      else:
342        pending_mm_filemap_add_to_pagecaches.append(d)
343
344    # Objects are cached in python memory, not yet sent to SQL database.
345
346    # Send INSERT/UPDATE/etc statements to the underlying SQL database.
347    if count % _FLUSH_LIMIT == 0:
348      insert_all_pending_entries()
349
350  insert_all_pending_entries()
351
352  # Ensure underlying database commits changes from memory to disk.
353  session.commit()
354
355  return count
356