• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python
2#
3# Class for profiling python code. rev 1.0  6/2/94
4#
5# Written by James Roskind
6# Based on prior profile module by Sjoerd Mullender...
7#   which was hacked somewhat by: Guido van Rossum
8
9"""Class for profiling Python code."""
10
11# Copyright Disney Enterprises, Inc.  All Rights Reserved.
12# Licensed to PSF under a Contributor Agreement
13#
14# Licensed under the Apache License, Version 2.0 (the "License");
15# you may not use this file except in compliance with the License.
16# You may obtain a copy of the License at
17#
18# http://www.apache.org/licenses/LICENSE-2.0
19#
20# Unless required by applicable law or agreed to in writing, software
21# distributed under the License is distributed on an "AS IS" BASIS,
22# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
23# either express or implied.  See the License for the specific language
24# governing permissions and limitations under the License.
25
26
27import sys
28import os
29import time
30import marshal
31from optparse import OptionParser
32
33__all__ = ["run", "runctx", "help", "Profile"]
34
35# Sample timer for use with
36#i_count = 0
37#def integer_timer():
38#       global i_count
39#       i_count = i_count + 1
40#       return i_count
41#itimes = integer_timer # replace with C coded timer returning integers
42
43#**************************************************************************
44# The following are the static member functions for the profiler class
45# Note that an instance of Profile() is *not* needed to call them.
46#**************************************************************************
47
48def run(statement, filename=None, sort=-1):
49    """Run statement under profiler optionally saving results in filename
50
51    This function takes a single argument that can be passed to the
52    "exec" statement, and an optional file name.  In all cases this
53    routine attempts to "exec" its first argument and gather profiling
54    statistics from the execution. If no file name is present, then this
55    function automatically prints a simple profiling report, sorted by the
56    standard name string (file/line/function-name) that is presented in
57    each line.
58    """
59    prof = Profile()
60    try:
61        prof = prof.run(statement)
62    except SystemExit:
63        pass
64    if filename is not None:
65        prof.dump_stats(filename)
66    else:
67        return prof.print_stats(sort)
68
69def runctx(statement, globals, locals, filename=None, sort=-1):
70    """Run statement under profiler, supplying your own globals and locals,
71    optionally saving results in filename.
72
73    statement and filename have the same semantics as profile.run
74    """
75    prof = Profile()
76    try:
77        prof = prof.runctx(statement, globals, locals)
78    except SystemExit:
79        pass
80
81    if filename is not None:
82        prof.dump_stats(filename)
83    else:
84        return prof.print_stats(sort)
85
86# Backwards compatibility.
87def help():
88    print "Documentation for the profile module can be found "
89    print "in the Python Library Reference, section 'The Python Profiler'."
90
91if hasattr(os, "times"):
92    def _get_time_times(timer=os.times):
93        t = timer()
94        return t[0] + t[1]
95
96# Using getrusage(3) is better than clock(3) if available:
97# on some systems (e.g. FreeBSD), getrusage has a higher resolution
98# Furthermore, on a POSIX system, returns microseconds, which
99# wrap around after 36min.
100_has_res = 0
101try:
102    import resource
103    resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
104    def _get_time_resource(timer=resgetrusage):
105        t = timer()
106        return t[0] + t[1]
107    _has_res = 1
108except ImportError:
109    pass
110
111class Profile:
112    """Profiler class.
113
114    self.cur is always a tuple.  Each such tuple corresponds to a stack
115    frame that is currently active (self.cur[-2]).  The following are the
116    definitions of its members.  We use this external "parallel stack" to
117    avoid contaminating the program that we are profiling. (old profiler
118    used to write into the frames local dictionary!!) Derived classes
119    can change the definition of some entries, as long as they leave
120    [-2:] intact (frame and previous tuple).  In case an internal error is
121    detected, the -3 element is used as the function name.
122
123    [ 0] = Time that needs to be charged to the parent frame's function.
124           It is used so that a function call will not have to access the
125           timing data for the parent frame.
126    [ 1] = Total time spent in this frame's function, excluding time in
127           subfunctions (this latter is tallied in cur[2]).
128    [ 2] = Total time spent in subfunctions, excluding time executing the
129           frame's function (this latter is tallied in cur[1]).
130    [-3] = Name of the function that corresponds to this frame.
131    [-2] = Actual frame that we correspond to (used to sync exception handling).
132    [-1] = Our parent 6-tuple (corresponds to frame.f_back).
133
134    Timing data for each function is stored as a 5-tuple in the dictionary
135    self.timings[].  The index is always the name stored in self.cur[-3].
136    The following are the definitions of the members:
137
138    [0] = The number of times this function was called, not counting direct
139          or indirect recursion,
140    [1] = Number of times this function appears on the stack, minus one
141    [2] = Total time spent internal to this function
142    [3] = Cumulative time that this function was present on the stack.  In
143          non-recursive functions, this is the total execution time from start
144          to finish of each invocation of a function, including time spent in
145          all subfunctions.
146    [4] = A dictionary indicating for each function name, the number of times
147          it was called by us.
148    """
149
150    bias = 0  # calibration constant
151
152    def __init__(self, timer=None, bias=None):
153        self.timings = {}
154        self.cur = None
155        self.cmd = ""
156        self.c_func_name = ""
157
158        if bias is None:
159            bias = self.bias
160        self.bias = bias     # Materialize in local dict for lookup speed.
161
162        if not timer:
163            if _has_res:
164                self.timer = resgetrusage
165                self.dispatcher = self.trace_dispatch
166                self.get_time = _get_time_resource
167            elif hasattr(time, 'clock'):
168                self.timer = self.get_time = time.clock
169                self.dispatcher = self.trace_dispatch_i
170            elif hasattr(os, 'times'):
171                self.timer = os.times
172                self.dispatcher = self.trace_dispatch
173                self.get_time = _get_time_times
174            else:
175                self.timer = self.get_time = time.time
176                self.dispatcher = self.trace_dispatch_i
177        else:
178            self.timer = timer
179            t = self.timer() # test out timer function
180            try:
181                length = len(t)
182            except TypeError:
183                self.get_time = timer
184                self.dispatcher = self.trace_dispatch_i
185            else:
186                if length == 2:
187                    self.dispatcher = self.trace_dispatch
188                else:
189                    self.dispatcher = self.trace_dispatch_l
190                # This get_time() implementation needs to be defined
191                # here to capture the passed-in timer in the parameter
192                # list (for performance).  Note that we can't assume
193                # the timer() result contains two values in all
194                # cases.
195                def get_time_timer(timer=timer, sum=sum):
196                    return sum(timer())
197                self.get_time = get_time_timer
198        self.t = self.get_time()
199        self.simulate_call('profiler')
200
201    # Heavily optimized dispatch routine for os.times() timer
202
203    def trace_dispatch(self, frame, event, arg):
204        timer = self.timer
205        t = timer()
206        t = t[0] + t[1] - self.t - self.bias
207
208        if event == "c_call":
209            self.c_func_name = arg.__name__
210
211        if self.dispatch[event](self, frame,t):
212            t = timer()
213            self.t = t[0] + t[1]
214        else:
215            r = timer()
216            self.t = r[0] + r[1] - t # put back unrecorded delta
217
218    # Dispatch routine for best timer program (return = scalar, fastest if
219    # an integer but float works too -- and time.clock() relies on that).
220
221    def trace_dispatch_i(self, frame, event, arg):
222        timer = self.timer
223        t = timer() - self.t - self.bias
224
225        if event == "c_call":
226            self.c_func_name = arg.__name__
227
228        if self.dispatch[event](self, frame, t):
229            self.t = timer()
230        else:
231            self.t = timer() - t  # put back unrecorded delta
232
233    # Dispatch routine for macintosh (timer returns time in ticks of
234    # 1/60th second)
235
236    def trace_dispatch_mac(self, frame, event, arg):
237        timer = self.timer
238        t = timer()/60.0 - self.t - self.bias
239
240        if event == "c_call":
241            self.c_func_name = arg.__name__
242
243        if self.dispatch[event](self, frame, t):
244            self.t = timer()/60.0
245        else:
246            self.t = timer()/60.0 - t  # put back unrecorded delta
247
248    # SLOW generic dispatch routine for timer returning lists of numbers
249
250    def trace_dispatch_l(self, frame, event, arg):
251        get_time = self.get_time
252        t = get_time() - self.t - self.bias
253
254        if event == "c_call":
255            self.c_func_name = arg.__name__
256
257        if self.dispatch[event](self, frame, t):
258            self.t = get_time()
259        else:
260            self.t = get_time() - t # put back unrecorded delta
261
262    # In the event handlers, the first 3 elements of self.cur are unpacked
263    # into vrbls w/ 3-letter names.  The last two characters are meant to be
264    # mnemonic:
265    #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
266    #     _it  self.cur[1] "internal time" time spent directly in the function
267    #     _et  self.cur[2] "external time" time spent in subfunctions
268
269    def trace_dispatch_exception(self, frame, t):
270        rpt, rit, ret, rfn, rframe, rcur = self.cur
271        if (rframe is not frame) and rcur:
272            return self.trace_dispatch_return(rframe, t)
273        self.cur = rpt, rit+t, ret, rfn, rframe, rcur
274        return 1
275
276
277    def trace_dispatch_call(self, frame, t):
278        if self.cur and frame.f_back is not self.cur[-2]:
279            rpt, rit, ret, rfn, rframe, rcur = self.cur
280            if not isinstance(rframe, Profile.fake_frame):
281                assert rframe.f_back is frame.f_back, ("Bad call", rfn,
282                                                       rframe, rframe.f_back,
283                                                       frame, frame.f_back)
284                self.trace_dispatch_return(rframe, 0)
285                assert (self.cur is None or \
286                        frame.f_back is self.cur[-2]), ("Bad call",
287                                                        self.cur[-3])
288        fcode = frame.f_code
289        fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
290        self.cur = (t, 0, 0, fn, frame, self.cur)
291        timings = self.timings
292        if fn in timings:
293            cc, ns, tt, ct, callers = timings[fn]
294            timings[fn] = cc, ns + 1, tt, ct, callers
295        else:
296            timings[fn] = 0, 0, 0, 0, {}
297        return 1
298
299    def trace_dispatch_c_call (self, frame, t):
300        fn = ("", 0, self.c_func_name)
301        self.cur = (t, 0, 0, fn, frame, self.cur)
302        timings = self.timings
303        if fn in timings:
304            cc, ns, tt, ct, callers = timings[fn]
305            timings[fn] = cc, ns+1, tt, ct, callers
306        else:
307            timings[fn] = 0, 0, 0, 0, {}
308        return 1
309
310    def trace_dispatch_return(self, frame, t):
311        if frame is not self.cur[-2]:
312            assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
313            self.trace_dispatch_return(self.cur[-2], 0)
314
315        # Prefix "r" means part of the Returning or exiting frame.
316        # Prefix "p" means part of the Previous or Parent or older frame.
317
318        rpt, rit, ret, rfn, frame, rcur = self.cur
319        rit = rit + t
320        frame_total = rit + ret
321
322        ppt, pit, pet, pfn, pframe, pcur = rcur
323        self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
324
325        timings = self.timings
326        cc, ns, tt, ct, callers = timings[rfn]
327        if not ns:
328            # This is the only occurrence of the function on the stack.
329            # Else this is a (directly or indirectly) recursive call, and
330            # its cumulative time will get updated when the topmost call to
331            # it returns.
332            ct = ct + frame_total
333            cc = cc + 1
334
335        if pfn in callers:
336            callers[pfn] = callers[pfn] + 1  # hack: gather more
337            # stats such as the amount of time added to ct courtesy
338            # of this specific call, and the contribution to cc
339            # courtesy of this call.
340        else:
341            callers[pfn] = 1
342
343        timings[rfn] = cc, ns - 1, tt + rit, ct, callers
344
345        return 1
346
347
348    dispatch = {
349        "call": trace_dispatch_call,
350        "exception": trace_dispatch_exception,
351        "return": trace_dispatch_return,
352        "c_call": trace_dispatch_c_call,
353        "c_exception": trace_dispatch_return,  # the C function returned
354        "c_return": trace_dispatch_return,
355        }
356
357
358    # The next few functions play with self.cmd. By carefully preloading
359    # our parallel stack, we can force the profiled result to include
360    # an arbitrary string as the name of the calling function.
361    # We use self.cmd as that string, and the resulting stats look
362    # very nice :-).
363
364    def set_cmd(self, cmd):
365        if self.cur[-1]: return   # already set
366        self.cmd = cmd
367        self.simulate_call(cmd)
368
369    class fake_code:
370        def __init__(self, filename, line, name):
371            self.co_filename = filename
372            self.co_line = line
373            self.co_name = name
374            self.co_firstlineno = 0
375
376        def __repr__(self):
377            return repr((self.co_filename, self.co_line, self.co_name))
378
379    class fake_frame:
380        def __init__(self, code, prior):
381            self.f_code = code
382            self.f_back = prior
383
384    def simulate_call(self, name):
385        code = self.fake_code('profile', 0, name)
386        if self.cur:
387            pframe = self.cur[-2]
388        else:
389            pframe = None
390        frame = self.fake_frame(code, pframe)
391        self.dispatch['call'](self, frame, 0)
392
393    # collect stats from pending stack, including getting final
394    # timings for self.cmd frame.
395
396    def simulate_cmd_complete(self):
397        get_time = self.get_time
398        t = get_time() - self.t
399        while self.cur[-1]:
400            # We *can* cause assertion errors here if
401            # dispatch_trace_return checks for a frame match!
402            self.dispatch['return'](self, self.cur[-2], t)
403            t = 0
404        self.t = get_time() - t
405
406
407    def print_stats(self, sort=-1):
408        import pstats
409        pstats.Stats(self).strip_dirs().sort_stats(sort). \
410                  print_stats()
411
412    def dump_stats(self, file):
413        f = open(file, 'wb')
414        self.create_stats()
415        marshal.dump(self.stats, f)
416        f.close()
417
418    def create_stats(self):
419        self.simulate_cmd_complete()
420        self.snapshot_stats()
421
422    def snapshot_stats(self):
423        self.stats = {}
424        for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
425            callers = callers.copy()
426            nc = 0
427            for callcnt in callers.itervalues():
428                nc += callcnt
429            self.stats[func] = cc, nc, tt, ct, callers
430
431
432    # The following two methods can be called by clients to use
433    # a profiler to profile a statement, given as a string.
434
435    def run(self, cmd):
436        import __main__
437        dict = __main__.__dict__
438        return self.runctx(cmd, dict, dict)
439
440    def runctx(self, cmd, globals, locals):
441        self.set_cmd(cmd)
442        sys.setprofile(self.dispatcher)
443        try:
444            exec cmd in globals, locals
445        finally:
446            sys.setprofile(None)
447        return self
448
449    # This method is more useful to profile a single function call.
450    def runcall(self, func, *args, **kw):
451        self.set_cmd(repr(func))
452        sys.setprofile(self.dispatcher)
453        try:
454            return func(*args, **kw)
455        finally:
456            sys.setprofile(None)
457
458
459    #******************************************************************
460    # The following calculates the overhead for using a profiler.  The
461    # problem is that it takes a fair amount of time for the profiler
462    # to stop the stopwatch (from the time it receives an event).
463    # Similarly, there is a delay from the time that the profiler
464    # re-starts the stopwatch before the user's code really gets to
465    # continue.  The following code tries to measure the difference on
466    # a per-event basis.
467    #
468    # Note that this difference is only significant if there are a lot of
469    # events, and relatively little user code per event.  For example,
470    # code with small functions will typically benefit from having the
471    # profiler calibrated for the current platform.  This *could* be
472    # done on the fly during init() time, but it is not worth the
473    # effort.  Also note that if too large a value specified, then
474    # execution time on some functions will actually appear as a
475    # negative number.  It is *normal* for some functions (with very
476    # low call counts) to have such negative stats, even if the
477    # calibration figure is "correct."
478    #
479    # One alternative to profile-time calibration adjustments (i.e.,
480    # adding in the magic little delta during each event) is to track
481    # more carefully the number of events (and cumulatively, the number
482    # of events during sub functions) that are seen.  If this were
483    # done, then the arithmetic could be done after the fact (i.e., at
484    # display time).  Currently, we track only call/return events.
485    # These values can be deduced by examining the callees and callers
486    # vectors for each functions.  Hence we *can* almost correct the
487    # internal time figure at print time (note that we currently don't
488    # track exception event processing counts).  Unfortunately, there
489    # is currently no similar information for cumulative sub-function
490    # time.  It would not be hard to "get all this info" at profiler
491    # time.  Specifically, we would have to extend the tuples to keep
492    # counts of this in each frame, and then extend the defs of timing
493    # tuples to include the significant two figures. I'm a bit fearful
494    # that this additional feature will slow the heavily optimized
495    # event/time ratio (i.e., the profiler would run slower, fur a very
496    # low "value added" feature.)
497    #**************************************************************
498
499    def calibrate(self, m, verbose=0):
500        if self.__class__ is not Profile:
501            raise TypeError("Subclasses must override .calibrate().")
502
503        saved_bias = self.bias
504        self.bias = 0
505        try:
506            return self._calibrate_inner(m, verbose)
507        finally:
508            self.bias = saved_bias
509
510    def _calibrate_inner(self, m, verbose):
511        get_time = self.get_time
512
513        # Set up a test case to be run with and without profiling.  Include
514        # lots of calls, because we're trying to quantify stopwatch overhead.
515        # Do not raise any exceptions, though, because we want to know
516        # exactly how many profile events are generated (one call event, +
517        # one return event, per Python-level call).
518
519        def f1(n):
520            for i in range(n):
521                x = 1
522
523        def f(m, f1=f1):
524            for i in range(m):
525                f1(100)
526
527        f(m)    # warm up the cache
528
529        # elapsed_noprofile <- time f(m) takes without profiling.
530        t0 = get_time()
531        f(m)
532        t1 = get_time()
533        elapsed_noprofile = t1 - t0
534        if verbose:
535            print "elapsed time without profiling =", elapsed_noprofile
536
537        # elapsed_profile <- time f(m) takes with profiling.  The difference
538        # is profiling overhead, only some of which the profiler subtracts
539        # out on its own.
540        p = Profile()
541        t0 = get_time()
542        p.runctx('f(m)', globals(), locals())
543        t1 = get_time()
544        elapsed_profile = t1 - t0
545        if verbose:
546            print "elapsed time with profiling =", elapsed_profile
547
548        # reported_time <- "CPU seconds" the profiler charged to f and f1.
549        total_calls = 0.0
550        reported_time = 0.0
551        for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
552                p.timings.items():
553            if funcname in ("f", "f1"):
554                total_calls += cc
555                reported_time += tt
556
557        if verbose:
558            print "'CPU seconds' profiler reported =", reported_time
559            print "total # calls =", total_calls
560        if total_calls != m + 1:
561            raise ValueError("internal error: total calls = %d" % total_calls)
562
563        # reported_time - elapsed_noprofile = overhead the profiler wasn't
564        # able to measure.  Divide by twice the number of calls (since there
565        # are two profiler events per call in this test) to get the hidden
566        # overhead per event.
567        mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
568        if verbose:
569            print "mean stopwatch overhead per profile event =", mean
570        return mean
571
572#****************************************************************************
573def Stats(*args):
574    print 'Report generating functions are in the "pstats" module\a'
575
576def main():
577    usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
578    parser = OptionParser(usage=usage)
579    parser.allow_interspersed_args = False
580    parser.add_option('-o', '--outfile', dest="outfile",
581        help="Save stats to <outfile>", default=None)
582    parser.add_option('-s', '--sort', dest="sort",
583        help="Sort order when printing to stdout, based on pstats.Stats class",
584        default=-1)
585
586    if not sys.argv[1:]:
587        parser.print_usage()
588        sys.exit(2)
589
590    (options, args) = parser.parse_args()
591    sys.argv[:] = args
592
593    if len(args) > 0:
594        progname = args[0]
595        sys.path.insert(0, os.path.dirname(progname))
596        with open(progname, 'rb') as fp:
597            code = compile(fp.read(), progname, 'exec')
598        globs = {
599            '__file__': progname,
600            '__name__': '__main__',
601            '__package__': None,
602        }
603        runctx(code, globs, None, options.outfile, options.sort)
604    else:
605        parser.print_usage()
606    return parser
607
608# When invoked as main program, invoke the profiler on a script
609if __name__ == '__main__':
610    main()
611