1#!/usr/bin/python3 2# Copyright (C) 2019 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16import re 17import sys 18 19from sqlalchemy import create_engine 20from sqlalchemy import Column, Date, Integer, Float, String, ForeignKey 21from sqlalchemy.ext.declarative import declarative_base 22from sqlalchemy.orm import relationship 23 24from sqlalchemy.orm import sessionmaker 25 26import sqlalchemy 27 28from typing import Optional, Tuple 29 30_DEBUG = False # print sql commands to console 31_FLUSH_LIMIT = 10000 # how many entries are parsed before flushing to DB from memory 32 33Base = declarative_base() 34 35class RawFtraceEntry(Base): 36 __tablename__ = 'raw_ftrace_entries' 37 38 id = Column(Integer, primary_key=True) 39 task_name = Column(String, nullable=True) # <...> -> None. 40 task_pid = Column(String, nullable=False) 41 tgid = Column(Integer, nullable=True) # ----- -> None. 42 cpu = Column(Integer, nullable=False) 43 timestamp = Column(Float, nullable=False) 44 function = Column(String, nullable=False) 45 function_args = Column(String, nullable=False) 46 47 # 1:1 relation with MmFilemapAddToPageCache. 48 mm_filemap_add_to_page_cache = relationship("MmFilemapAddToPageCache", 49 back_populates="raw_ftrace_entry") 50 51 @staticmethod 52 def parse_dict(line): 53 # ' <...>-5521 (-----) [003] ...1 17148.446877: tracing_mark_write: trace_event_clock_sync: parent_ts=17148.447266' 54 m = re.match('\s*(.*)-(\d+)\s+\(([^\)]+)\)\s+\[(\d+)\]\s+([\w.]{4})\s+(\d+[.]\d+):\s+(\w+):\s+(.*)', line) 55 if not m: 56 return None 57 58 groups = m.groups() 59 # groups example: 60 # ('<...>', 61 # '5521', 62 # '-----', 63 # '003', 64 # '...1', 65 # '17148.446877', 66 # 'tracing_mark_write', 67 # 'trace_event_clock_sync: parent_ts=17148.447266') 68 task_name = groups[0] 69 if task_name == '<...>': 70 task_name = None 71 72 task_pid = int(groups[1]) 73 tgid = groups[2] 74 if tgid == '-----': 75 tgid = None 76 77 cpu = int(groups[3]) 78 # irq_flags = groups[4] 79 timestamp = float(groups[5]) 80 function = groups[6] 81 function_args = groups[7] 82 83 return {'task_name': task_name, 'task_pid': task_pid, 'tgid': tgid, 'cpu': cpu, 'timestamp': timestamp, 'function': function, 'function_args': function_args} 84 85class SchedSwitch(Base): 86 __tablename__ = 'sched_switches' 87 88 id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) 89 90 prev_comm = Column(String, nullable=False) 91 prev_pid = Column(Integer, nullable=False) 92 prev_prio = Column(Integer, nullable=False) 93 prev_state = Column(String, nullable=False) 94 95 next_comm = Column(String, nullable=False) 96 next_pid = Column(Integer, nullable=False) 97 next_prio = Column(Integer, nullable=False) 98 99 @staticmethod 100 def parse_dict(function_args, id = None): 101 # 'prev_comm=kworker/u16:5 prev_pid=13971 prev_prio=120 prev_state=S ==> next_comm=swapper/4 next_pid=0 next_prio=120' 102 m = re.match("prev_comm=(.*) prev_pid=(\d+) prev_prio=(\d+) prev_state=(.*) ==> next_comm=(.*) next_pid=(\d+) next_prio=(\d+) ?", function_args) 103 if not m: 104 return None 105 106 groups = m.groups() 107 # ('kworker/u16:5', '13971', '120', 'S', 'swapper/4', '0', '120') 108 d = {} 109 if id is not None: 110 d['id'] = id 111 d['prev_comm'] = groups[0] 112 d['prev_pid'] = int(groups[1]) 113 d['prev_prio'] = int(groups[2]) 114 d['prev_state'] = groups[3] 115 d['next_comm'] = groups[4] 116 d['next_pid'] = int(groups[5]) 117 d['next_prio'] = int(groups[6]) 118 119 return d 120 121class SchedBlockedReason(Base): 122 __tablename__ = 'sched_blocked_reasons' 123 124 id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) 125 126 pid = Column(Integer, nullable=False) 127 iowait = Column(Integer, nullable=False) 128 caller = Column(String, nullable=False) 129 130 @staticmethod 131 def parse_dict(function_args, id = None): 132 # 'pid=2289 iowait=1 caller=wait_on_page_bit_common+0x2a8/0x5f' 133 m = re.match("pid=(\d+) iowait=(\d+) caller=(.*) ?", function_args) 134 if not m: 135 return None 136 137 groups = m.groups() 138 # ('2289', '1', 'wait_on_page_bit_common+0x2a8/0x5f8') 139 d = {} 140 if id is not None: 141 d['id'] = id 142 d['pid'] = int(groups[0]) 143 d['iowait'] = int(groups[1]) 144 d['caller'] = groups[2] 145 146 return d 147 148class MmFilemapAddToPageCache(Base): 149 __tablename__ = 'mm_filemap_add_to_page_caches' 150 151 id = Column(Integer, ForeignKey('raw_ftrace_entries.id'), primary_key=True) 152 153 dev = Column(Integer, nullable=False) # decoded from ${major}:${minor} syntax. 154 dev_major = Column(Integer, nullable=False) # original ${major} value. 155 dev_minor = Column(Integer, nullable=False) # original ${minor} value. 156 157 ino = Column(Integer, nullable=False) # decoded from hex to base 10 158 page = Column(Integer, nullable=False) # decoded from hex to base 10 159 160 pfn = Column(Integer, nullable=False) 161 ofs = Column(Integer, nullable=False) 162 163 # 1:1 relation with RawFtraceEntry. 164 raw_ftrace_entry = relationship("RawFtraceEntry", uselist=False) 165 166 @staticmethod 167 def parse_dict(function_args, id = None): 168 # dev 253:6 ino b2c7 page=00000000ec787cd9 pfn=1478539 ofs=4096 169 m = re.match("dev (\d+):(\d+) ino ([0-9a-fA-F]+) page=([0-9a-fA-F]+) pfn=(\d+) ofs=(\d+)", function_args) 170 if not m: 171 return None 172 173 groups = m.groups() 174 # ('253', '6', 'b2c7', '00000000ec787cd9', '1478539', '4096') 175 d = {} 176 if id is not None: 177 d['id'] = id 178 179 device_major = d['dev_major'] = int(groups[0]) 180 device_minor = d['dev_minor'] = int(groups[1]) 181 d['dev'] = device_major << 8 | device_minor 182 d['ino'] = int(groups[2], 16) 183 d['page'] = int(groups[3], 16) 184 d['pfn'] = int(groups[4]) 185 d['ofs'] = int(groups[5]) 186 187 return d 188 189class Trace2Db: 190 def __init__(self, db_filename: str): 191 (s, e) = self._init_sqlalchemy(db_filename) 192 self._session = s 193 self._engine = e 194 self._raw_ftrace_entry_filter = lambda x: True 195 196 def set_raw_ftrace_entry_filter(self, flt): 197 """ 198 Install a function dict(RawFtraceEntry) -> bool 199 200 If this returns 'false', then we skip adding the RawFtraceEntry to the database. 201 """ 202 self._raw_ftrace_entry_filter = flt 203 204 @staticmethod 205 def _init_sqlalchemy(db_filename: str) -> Tuple[object, object]: 206 global _DEBUG 207 engine = create_engine('sqlite:///' + db_filename, echo=_DEBUG) 208 209 # CREATE ... (tables) 210 Base.metadata.create_all(engine) 211 212 Session = sessionmaker(bind=engine) 213 session = Session() 214 return (session, engine) 215 216 def parse_file_into_db(self, filename: str, limit: Optional[int] = None): 217 """ 218 Parse the ftrace/systrace at 'filename', 219 inserting the values into the current sqlite database. 220 221 :return: number of RawFtraceEntry inserted. 222 """ 223 return parse_file(filename, self._session, self._engine, self._raw_ftrace_entry_filter, limit) 224 225 def parse_file_buf_into_db(self, file_buf, limit: Optional[int] = None): 226 """ 227 Parse the ftrace/systrace at 'filename', 228 inserting the values into the current sqlite database. 229 230 :return: number of RawFtraceEntry inserted. 231 """ 232 return parse_file_buf(file_buf, self._session, self._engine, self._raw_ftrace_entry_filter, limit) 233 234 235 @property 236 def session(self): 237 return self._session 238 239def insert_pending_entries(engine, kls, lst): 240 if len(lst) > 0: 241 # for some reason, it tries to generate an empty INSERT statement with len=0, 242 # which of course violates the first non-null constraint. 243 try: 244 # Performance-sensitive parsing according to: 245 # https://docs.sqlalchemy.org/en/13/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow 246 engine.execute(kls.__table__.insert(), lst) 247 lst.clear() 248 except sqlalchemy.exc.IntegrityError as err: 249 # possibly violating some SQL constraint, print data here. 250 print(err) 251 print(lst) 252 raise 253 254def parse_file(filename: str, *args, **kwargs) -> int: 255 # use explicit encoding to avoid UnicodeDecodeError. 256 with open(filename, encoding="ISO-8859-1") as f: 257 return parse_file_buf(f, *args, **kwargs) 258 259def parse_file_buf(filebuf, session, engine, raw_ftrace_entry_filter, limit=None) -> int: 260 global _FLUSH_LIMIT 261 count = 0 262 # count and id are not equal, because count still increases for invalid lines. 263 id = 0 264 265 pending_entries = [] 266 pending_sched_switch = [] 267 pending_sched_blocked_reasons = [] 268 pending_mm_filemap_add_to_pagecaches = [] 269 270 def insert_all_pending_entries(): 271 insert_pending_entries(engine, RawFtraceEntry, pending_entries) 272 insert_pending_entries(engine, SchedSwitch, pending_sched_switch) 273 insert_pending_entries(engine, SchedBlockedReason, pending_sched_blocked_reasons) 274 insert_pending_entries(engine, MmFilemapAddToPageCache, pending_mm_filemap_add_to_pagecaches) 275 276 # for trace.html files produced by systrace, 277 # the actual ftrace is in the 'second' trace-data script class. 278 parsing_trace_data = 0 279 parsing_systrace_file = False 280 281 f = filebuf 282 for l in f: 283 if parsing_trace_data == 0 and l == "<!DOCTYPE html>\n": 284 parsing_systrace_file = True 285 continue 286 if parsing_trace_data != 2 and parsing_systrace_file: 287 if l == ' <script class="trace-data" type="application/text">\n': 288 parsing_trace_data = parsing_trace_data + 1 289 continue 290 291 if parsing_systrace_file and parsing_trace_data != 2: 292 continue 293 elif parsing_systrace_file and parsing_trace_data == 2 and l == " </script>\n": 294 # the rest of this file is just random html 295 break 296 297 # now parsing the ftrace data. 298 if len(l) > 1 and l[0] == '#': 299 continue 300 301 count = count + 1 302 303 if limit and count >= limit: 304 break 305 306 raw_ftrace_entry = RawFtraceEntry.parse_dict(l) 307 if not raw_ftrace_entry: 308 print("WARNING: Failed to parse raw ftrace entry: " + l) 309 continue 310 311 if not raw_ftrace_entry_filter(raw_ftrace_entry): 312 # Skip processing raw ftrace entries that don't match a filter. 313 # This is an optimization for when Trace2Db is used programatically 314 # to avoid having an overly large database. 315 continue 316 317 pending_entries.append(raw_ftrace_entry) 318 id = id + 1 319 320 if raw_ftrace_entry['function'] == 'sched_switch': 321 sched_switch = SchedSwitch.parse_dict(raw_ftrace_entry['function_args'], id) 322 323 if not sched_switch: 324 print("WARNING: Failed to parse sched_switch: " + l) 325 else: 326 pending_sched_switch.append(sched_switch) 327 328 elif raw_ftrace_entry['function'] == 'sched_blocked_reason': 329 sbr = SchedBlockedReason.parse_dict(raw_ftrace_entry['function_args'], id) 330 331 if not sbr: 332 print("WARNING: Failed to parse sched_blocked_reason: " + l) 333 else: 334 pending_sched_blocked_reasons.append(sbr) 335 336 elif raw_ftrace_entry['function'] == 'mm_filemap_add_to_page_cache': 337 d = MmFilemapAddToPageCache.parse_dict(raw_ftrace_entry['function_args'], 338 id) 339 if not d: 340 print("WARNING: Failed to parse mm_filemap_add_to_page_cache: " + l) 341 else: 342 pending_mm_filemap_add_to_pagecaches.append(d) 343 344 # Objects are cached in python memory, not yet sent to SQL database. 345 346 # Send INSERT/UPDATE/etc statements to the underlying SQL database. 347 if count % _FLUSH_LIMIT == 0: 348 insert_all_pending_entries() 349 350 insert_all_pending_entries() 351 352 # Ensure underlying database commits changes from memory to disk. 353 session.commit() 354 355 return count 356