1#! /usr/bin/env python3 2# 3# Class for profiling python code. rev 1.0 6/2/94 4# 5# Written by James Roskind 6# Based on prior profile module by Sjoerd Mullender... 7# which was hacked somewhat by: Guido van Rossum 8 9"""Class for profiling Python code.""" 10 11# Copyright Disney Enterprises, Inc. All Rights Reserved. 12# Licensed to PSF under a Contributor Agreement 13# 14# Licensed under the Apache License, Version 2.0 (the "License"); 15# you may not use this file except in compliance with the License. 16# You may obtain a copy of the License at 17# 18# http://www.apache.org/licenses/LICENSE-2.0 19# 20# Unless required by applicable law or agreed to in writing, software 21# distributed under the License is distributed on an "AS IS" BASIS, 22# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 23# either express or implied. See the License for the specific language 24# governing permissions and limitations under the License. 25 26 27import importlib.machinery 28import io 29import sys 30import time 31import marshal 32 33__all__ = ["run", "runctx", "Profile"] 34 35# Sample timer for use with 36#i_count = 0 37#def integer_timer(): 38# global i_count 39# i_count = i_count + 1 40# return i_count 41#itimes = integer_timer # replace with C coded timer returning integers 42 43class _Utils: 44 """Support class for utility functions which are shared by 45 profile.py and cProfile.py modules. 46 Not supposed to be used directly. 47 """ 48 49 def __init__(self, profiler): 50 self.profiler = profiler 51 52 def run(self, statement, filename, sort): 53 prof = self.profiler() 54 try: 55 prof.run(statement) 56 except SystemExit: 57 pass 58 finally: 59 self._show(prof, filename, sort) 60 61 def runctx(self, statement, globals, locals, filename, sort): 62 prof = self.profiler() 63 try: 64 prof.runctx(statement, globals, locals) 65 except SystemExit: 66 pass 67 finally: 68 self._show(prof, filename, sort) 69 70 def _show(self, prof, filename, sort): 71 if filename is not None: 72 prof.dump_stats(filename) 73 else: 74 prof.print_stats(sort) 75 76 77#************************************************************************** 78# The following are the static member functions for the profiler class 79# Note that an instance of Profile() is *not* needed to call them. 80#************************************************************************** 81 82def run(statement, filename=None, sort=-1): 83 """Run statement under profiler optionally saving results in filename 84 85 This function takes a single argument that can be passed to the 86 "exec" statement, and an optional file name. In all cases this 87 routine attempts to "exec" its first argument and gather profiling 88 statistics from the execution. If no file name is present, then this 89 function automatically prints a simple profiling report, sorted by the 90 standard name string (file/line/function-name) that is presented in 91 each line. 92 """ 93 return _Utils(Profile).run(statement, filename, sort) 94 95def runctx(statement, globals, locals, filename=None, sort=-1): 96 """Run statement under profiler, supplying your own globals and locals, 97 optionally saving results in filename. 98 99 statement and filename have the same semantics as profile.run 100 """ 101 return _Utils(Profile).runctx(statement, globals, locals, filename, sort) 102 103 104class Profile: 105 """Profiler class. 106 107 self.cur is always a tuple. Each such tuple corresponds to a stack 108 frame that is currently active (self.cur[-2]). The following are the 109 definitions of its members. We use this external "parallel stack" to 110 avoid contaminating the program that we are profiling. (old profiler 111 used to write into the frames local dictionary!!) Derived classes 112 can change the definition of some entries, as long as they leave 113 [-2:] intact (frame and previous tuple). In case an internal error is 114 detected, the -3 element is used as the function name. 115 116 [ 0] = Time that needs to be charged to the parent frame's function. 117 It is used so that a function call will not have to access the 118 timing data for the parent frame. 119 [ 1] = Total time spent in this frame's function, excluding time in 120 subfunctions (this latter is tallied in cur[2]). 121 [ 2] = Total time spent in subfunctions, excluding time executing the 122 frame's function (this latter is tallied in cur[1]). 123 [-3] = Name of the function that corresponds to this frame. 124 [-2] = Actual frame that we correspond to (used to sync exception handling). 125 [-1] = Our parent 6-tuple (corresponds to frame.f_back). 126 127 Timing data for each function is stored as a 5-tuple in the dictionary 128 self.timings[]. The index is always the name stored in self.cur[-3]. 129 The following are the definitions of the members: 130 131 [0] = The number of times this function was called, not counting direct 132 or indirect recursion, 133 [1] = Number of times this function appears on the stack, minus one 134 [2] = Total time spent internal to this function 135 [3] = Cumulative time that this function was present on the stack. In 136 non-recursive functions, this is the total execution time from start 137 to finish of each invocation of a function, including time spent in 138 all subfunctions. 139 [4] = A dictionary indicating for each function name, the number of times 140 it was called by us. 141 """ 142 143 bias = 0 # calibration constant 144 145 def __init__(self, timer=None, bias=None): 146 self.timings = {} 147 self.cur = None 148 self.cmd = "" 149 self.c_func_name = "" 150 151 if bias is None: 152 bias = self.bias 153 self.bias = bias # Materialize in local dict for lookup speed. 154 155 if not timer: 156 self.timer = self.get_time = time.process_time 157 self.dispatcher = self.trace_dispatch_i 158 else: 159 self.timer = timer 160 t = self.timer() # test out timer function 161 try: 162 length = len(t) 163 except TypeError: 164 self.get_time = timer 165 self.dispatcher = self.trace_dispatch_i 166 else: 167 if length == 2: 168 self.dispatcher = self.trace_dispatch 169 else: 170 self.dispatcher = self.trace_dispatch_l 171 # This get_time() implementation needs to be defined 172 # here to capture the passed-in timer in the parameter 173 # list (for performance). Note that we can't assume 174 # the timer() result contains two values in all 175 # cases. 176 def get_time_timer(timer=timer, sum=sum): 177 return sum(timer()) 178 self.get_time = get_time_timer 179 self.t = self.get_time() 180 self.simulate_call('profiler') 181 182 # Heavily optimized dispatch routine for time.process_time() timer 183 184 def trace_dispatch(self, frame, event, arg): 185 timer = self.timer 186 t = timer() 187 t = t[0] + t[1] - self.t - self.bias 188 189 if event == "c_call": 190 self.c_func_name = arg.__name__ 191 192 if self.dispatch[event](self, frame,t): 193 t = timer() 194 self.t = t[0] + t[1] 195 else: 196 r = timer() 197 self.t = r[0] + r[1] - t # put back unrecorded delta 198 199 # Dispatch routine for best timer program (return = scalar, fastest if 200 # an integer but float works too -- and time.process_time() relies on that). 201 202 def trace_dispatch_i(self, frame, event, arg): 203 timer = self.timer 204 t = timer() - self.t - self.bias 205 206 if event == "c_call": 207 self.c_func_name = arg.__name__ 208 209 if self.dispatch[event](self, frame, t): 210 self.t = timer() 211 else: 212 self.t = timer() - t # put back unrecorded delta 213 214 # Dispatch routine for macintosh (timer returns time in ticks of 215 # 1/60th second) 216 217 def trace_dispatch_mac(self, frame, event, arg): 218 timer = self.timer 219 t = timer()/60.0 - self.t - self.bias 220 221 if event == "c_call": 222 self.c_func_name = arg.__name__ 223 224 if self.dispatch[event](self, frame, t): 225 self.t = timer()/60.0 226 else: 227 self.t = timer()/60.0 - t # put back unrecorded delta 228 229 # SLOW generic dispatch routine for timer returning lists of numbers 230 231 def trace_dispatch_l(self, frame, event, arg): 232 get_time = self.get_time 233 t = get_time() - self.t - self.bias 234 235 if event == "c_call": 236 self.c_func_name = arg.__name__ 237 238 if self.dispatch[event](self, frame, t): 239 self.t = get_time() 240 else: 241 self.t = get_time() - t # put back unrecorded delta 242 243 # In the event handlers, the first 3 elements of self.cur are unpacked 244 # into vrbls w/ 3-letter names. The last two characters are meant to be 245 # mnemonic: 246 # _pt self.cur[0] "parent time" time to be charged to parent frame 247 # _it self.cur[1] "internal time" time spent directly in the function 248 # _et self.cur[2] "external time" time spent in subfunctions 249 250 def trace_dispatch_exception(self, frame, t): 251 rpt, rit, ret, rfn, rframe, rcur = self.cur 252 if (rframe is not frame) and rcur: 253 return self.trace_dispatch_return(rframe, t) 254 self.cur = rpt, rit+t, ret, rfn, rframe, rcur 255 return 1 256 257 258 def trace_dispatch_call(self, frame, t): 259 if self.cur and frame.f_back is not self.cur[-2]: 260 rpt, rit, ret, rfn, rframe, rcur = self.cur 261 if not isinstance(rframe, Profile.fake_frame): 262 assert rframe.f_back is frame.f_back, ("Bad call", rfn, 263 rframe, rframe.f_back, 264 frame, frame.f_back) 265 self.trace_dispatch_return(rframe, 0) 266 assert (self.cur is None or \ 267 frame.f_back is self.cur[-2]), ("Bad call", 268 self.cur[-3]) 269 fcode = frame.f_code 270 fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name) 271 self.cur = (t, 0, 0, fn, frame, self.cur) 272 timings = self.timings 273 if fn in timings: 274 cc, ns, tt, ct, callers = timings[fn] 275 timings[fn] = cc, ns + 1, tt, ct, callers 276 else: 277 timings[fn] = 0, 0, 0, 0, {} 278 return 1 279 280 def trace_dispatch_c_call (self, frame, t): 281 fn = ("", 0, self.c_func_name) 282 self.cur = (t, 0, 0, fn, frame, self.cur) 283 timings = self.timings 284 if fn in timings: 285 cc, ns, tt, ct, callers = timings[fn] 286 timings[fn] = cc, ns+1, tt, ct, callers 287 else: 288 timings[fn] = 0, 0, 0, 0, {} 289 return 1 290 291 def trace_dispatch_return(self, frame, t): 292 if frame is not self.cur[-2]: 293 assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3]) 294 self.trace_dispatch_return(self.cur[-2], 0) 295 296 # Prefix "r" means part of the Returning or exiting frame. 297 # Prefix "p" means part of the Previous or Parent or older frame. 298 299 rpt, rit, ret, rfn, frame, rcur = self.cur 300 rit = rit + t 301 frame_total = rit + ret 302 303 ppt, pit, pet, pfn, pframe, pcur = rcur 304 self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur 305 306 timings = self.timings 307 cc, ns, tt, ct, callers = timings[rfn] 308 if not ns: 309 # This is the only occurrence of the function on the stack. 310 # Else this is a (directly or indirectly) recursive call, and 311 # its cumulative time will get updated when the topmost call to 312 # it returns. 313 ct = ct + frame_total 314 cc = cc + 1 315 316 if pfn in callers: 317 callers[pfn] = callers[pfn] + 1 # hack: gather more 318 # stats such as the amount of time added to ct courtesy 319 # of this specific call, and the contribution to cc 320 # courtesy of this call. 321 else: 322 callers[pfn] = 1 323 324 timings[rfn] = cc, ns - 1, tt + rit, ct, callers 325 326 return 1 327 328 329 dispatch = { 330 "call": trace_dispatch_call, 331 "exception": trace_dispatch_exception, 332 "return": trace_dispatch_return, 333 "c_call": trace_dispatch_c_call, 334 "c_exception": trace_dispatch_return, # the C function returned 335 "c_return": trace_dispatch_return, 336 } 337 338 339 # The next few functions play with self.cmd. By carefully preloading 340 # our parallel stack, we can force the profiled result to include 341 # an arbitrary string as the name of the calling function. 342 # We use self.cmd as that string, and the resulting stats look 343 # very nice :-). 344 345 def set_cmd(self, cmd): 346 if self.cur[-1]: return # already set 347 self.cmd = cmd 348 self.simulate_call(cmd) 349 350 class fake_code: 351 def __init__(self, filename, line, name): 352 self.co_filename = filename 353 self.co_line = line 354 self.co_name = name 355 self.co_firstlineno = 0 356 357 def __repr__(self): 358 return repr((self.co_filename, self.co_line, self.co_name)) 359 360 class fake_frame: 361 def __init__(self, code, prior): 362 self.f_code = code 363 self.f_back = prior 364 365 def simulate_call(self, name): 366 code = self.fake_code('profile', 0, name) 367 if self.cur: 368 pframe = self.cur[-2] 369 else: 370 pframe = None 371 frame = self.fake_frame(code, pframe) 372 self.dispatch['call'](self, frame, 0) 373 374 # collect stats from pending stack, including getting final 375 # timings for self.cmd frame. 376 377 def simulate_cmd_complete(self): 378 get_time = self.get_time 379 t = get_time() - self.t 380 while self.cur[-1]: 381 # We *can* cause assertion errors here if 382 # dispatch_trace_return checks for a frame match! 383 self.dispatch['return'](self, self.cur[-2], t) 384 t = 0 385 self.t = get_time() - t 386 387 388 def print_stats(self, sort=-1): 389 import pstats 390 if not isinstance(sort, tuple): 391 sort = (sort,) 392 pstats.Stats(self).strip_dirs().sort_stats(*sort).print_stats() 393 394 def dump_stats(self, file): 395 with open(file, 'wb') as f: 396 self.create_stats() 397 marshal.dump(self.stats, f) 398 399 def create_stats(self): 400 self.simulate_cmd_complete() 401 self.snapshot_stats() 402 403 def snapshot_stats(self): 404 self.stats = {} 405 for func, (cc, ns, tt, ct, callers) in self.timings.items(): 406 callers = callers.copy() 407 nc = 0 408 for callcnt in callers.values(): 409 nc += callcnt 410 self.stats[func] = cc, nc, tt, ct, callers 411 412 413 # The following two methods can be called by clients to use 414 # a profiler to profile a statement, given as a string. 415 416 def run(self, cmd): 417 import __main__ 418 dict = __main__.__dict__ 419 return self.runctx(cmd, dict, dict) 420 421 def runctx(self, cmd, globals, locals): 422 self.set_cmd(cmd) 423 sys.setprofile(self.dispatcher) 424 try: 425 exec(cmd, globals, locals) 426 finally: 427 sys.setprofile(None) 428 return self 429 430 # This method is more useful to profile a single function call. 431 def runcall(self, func, /, *args, **kw): 432 self.set_cmd(repr(func)) 433 sys.setprofile(self.dispatcher) 434 try: 435 return func(*args, **kw) 436 finally: 437 sys.setprofile(None) 438 439 440 #****************************************************************** 441 # The following calculates the overhead for using a profiler. The 442 # problem is that it takes a fair amount of time for the profiler 443 # to stop the stopwatch (from the time it receives an event). 444 # Similarly, there is a delay from the time that the profiler 445 # re-starts the stopwatch before the user's code really gets to 446 # continue. The following code tries to measure the difference on 447 # a per-event basis. 448 # 449 # Note that this difference is only significant if there are a lot of 450 # events, and relatively little user code per event. For example, 451 # code with small functions will typically benefit from having the 452 # profiler calibrated for the current platform. This *could* be 453 # done on the fly during init() time, but it is not worth the 454 # effort. Also note that if too large a value specified, then 455 # execution time on some functions will actually appear as a 456 # negative number. It is *normal* for some functions (with very 457 # low call counts) to have such negative stats, even if the 458 # calibration figure is "correct." 459 # 460 # One alternative to profile-time calibration adjustments (i.e., 461 # adding in the magic little delta during each event) is to track 462 # more carefully the number of events (and cumulatively, the number 463 # of events during sub functions) that are seen. If this were 464 # done, then the arithmetic could be done after the fact (i.e., at 465 # display time). Currently, we track only call/return events. 466 # These values can be deduced by examining the callees and callers 467 # vectors for each functions. Hence we *can* almost correct the 468 # internal time figure at print time (note that we currently don't 469 # track exception event processing counts). Unfortunately, there 470 # is currently no similar information for cumulative sub-function 471 # time. It would not be hard to "get all this info" at profiler 472 # time. Specifically, we would have to extend the tuples to keep 473 # counts of this in each frame, and then extend the defs of timing 474 # tuples to include the significant two figures. I'm a bit fearful 475 # that this additional feature will slow the heavily optimized 476 # event/time ratio (i.e., the profiler would run slower, fur a very 477 # low "value added" feature.) 478 #************************************************************** 479 480 def calibrate(self, m, verbose=0): 481 if self.__class__ is not Profile: 482 raise TypeError("Subclasses must override .calibrate().") 483 484 saved_bias = self.bias 485 self.bias = 0 486 try: 487 return self._calibrate_inner(m, verbose) 488 finally: 489 self.bias = saved_bias 490 491 def _calibrate_inner(self, m, verbose): 492 get_time = self.get_time 493 494 # Set up a test case to be run with and without profiling. Include 495 # lots of calls, because we're trying to quantify stopwatch overhead. 496 # Do not raise any exceptions, though, because we want to know 497 # exactly how many profile events are generated (one call event, + 498 # one return event, per Python-level call). 499 500 def f1(n): 501 for i in range(n): 502 x = 1 503 504 def f(m, f1=f1): 505 for i in range(m): 506 f1(100) 507 508 f(m) # warm up the cache 509 510 # elapsed_noprofile <- time f(m) takes without profiling. 511 t0 = get_time() 512 f(m) 513 t1 = get_time() 514 elapsed_noprofile = t1 - t0 515 if verbose: 516 print("elapsed time without profiling =", elapsed_noprofile) 517 518 # elapsed_profile <- time f(m) takes with profiling. The difference 519 # is profiling overhead, only some of which the profiler subtracts 520 # out on its own. 521 p = Profile() 522 t0 = get_time() 523 p.runctx('f(m)', globals(), locals()) 524 t1 = get_time() 525 elapsed_profile = t1 - t0 526 if verbose: 527 print("elapsed time with profiling =", elapsed_profile) 528 529 # reported_time <- "CPU seconds" the profiler charged to f and f1. 530 total_calls = 0.0 531 reported_time = 0.0 532 for (filename, line, funcname), (cc, ns, tt, ct, callers) in \ 533 p.timings.items(): 534 if funcname in ("f", "f1"): 535 total_calls += cc 536 reported_time += tt 537 538 if verbose: 539 print("'CPU seconds' profiler reported =", reported_time) 540 print("total # calls =", total_calls) 541 if total_calls != m + 1: 542 raise ValueError("internal error: total calls = %d" % total_calls) 543 544 # reported_time - elapsed_noprofile = overhead the profiler wasn't 545 # able to measure. Divide by twice the number of calls (since there 546 # are two profiler events per call in this test) to get the hidden 547 # overhead per event. 548 mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls 549 if verbose: 550 print("mean stopwatch overhead per profile event =", mean) 551 return mean 552 553#**************************************************************************** 554 555def main(): 556 import os 557 from optparse import OptionParser 558 559 usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..." 560 parser = OptionParser(usage=usage) 561 parser.allow_interspersed_args = False 562 parser.add_option('-o', '--outfile', dest="outfile", 563 help="Save stats to <outfile>", default=None) 564 parser.add_option('-m', dest="module", action="store_true", 565 help="Profile a library module.", default=False) 566 parser.add_option('-s', '--sort', dest="sort", 567 help="Sort order when printing to stdout, based on pstats.Stats class", 568 default=-1) 569 570 if not sys.argv[1:]: 571 parser.print_usage() 572 sys.exit(2) 573 574 (options, args) = parser.parse_args() 575 sys.argv[:] = args 576 577 # The script that we're profiling may chdir, so capture the absolute path 578 # to the output file at startup. 579 if options.outfile is not None: 580 options.outfile = os.path.abspath(options.outfile) 581 582 if len(args) > 0: 583 if options.module: 584 import runpy 585 code = "run_module(modname, run_name='__main__')" 586 globs = { 587 'run_module': runpy.run_module, 588 'modname': args[0] 589 } 590 else: 591 progname = args[0] 592 sys.path.insert(0, os.path.dirname(progname)) 593 with io.open_code(progname) as fp: 594 code = compile(fp.read(), progname, 'exec') 595 spec = importlib.machinery.ModuleSpec(name='__main__', loader=None, 596 origin=progname) 597 globs = { 598 '__spec__': spec, 599 '__file__': spec.origin, 600 '__name__': spec.name, 601 '__package__': None, 602 '__cached__': None, 603 } 604 try: 605 runctx(code, globs, None, options.outfile, options.sort) 606 except BrokenPipeError as exc: 607 # Prevent "Exception ignored" during interpreter shutdown. 608 sys.stdout = None 609 sys.exit(exc.errno) 610 else: 611 parser.print_usage() 612 return parser 613 614# When invoked as main program, invoke the profiler on a script 615if __name__ == '__main__': 616 main() 617