• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python3
2#
3# Class for profiling python code. rev 1.0  6/2/94
4#
5# Written by James Roskind
6# Based on prior profile module by Sjoerd Mullender...
7#   which was hacked somewhat by: Guido van Rossum
8
9"""Class for profiling Python code."""
10
11# Copyright Disney Enterprises, Inc.  All Rights Reserved.
12# Licensed to PSF under a Contributor Agreement
13#
14# Licensed under the Apache License, Version 2.0 (the "License");
15# you may not use this file except in compliance with the License.
16# You may obtain a copy of the License at
17#
18# http://www.apache.org/licenses/LICENSE-2.0
19#
20# Unless required by applicable law or agreed to in writing, software
21# distributed under the License is distributed on an "AS IS" BASIS,
22# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
23# either express or implied.  See the License for the specific language
24# governing permissions and limitations under the License.
25
26
27import importlib.machinery
28import io
29import sys
30import time
31import marshal
32
33__all__ = ["run", "runctx", "Profile"]
34
35# Sample timer for use with
36#i_count = 0
37#def integer_timer():
38#       global i_count
39#       i_count = i_count + 1
40#       return i_count
41#itimes = integer_timer # replace with C coded timer returning integers
42
43class _Utils:
44    """Support class for utility functions which are shared by
45    profile.py and cProfile.py modules.
46    Not supposed to be used directly.
47    """
48
49    def __init__(self, profiler):
50        self.profiler = profiler
51
52    def run(self, statement, filename, sort):
53        prof = self.profiler()
54        try:
55            prof.run(statement)
56        except SystemExit:
57            pass
58        finally:
59            self._show(prof, filename, sort)
60
61    def runctx(self, statement, globals, locals, filename, sort):
62        prof = self.profiler()
63        try:
64            prof.runctx(statement, globals, locals)
65        except SystemExit:
66            pass
67        finally:
68            self._show(prof, filename, sort)
69
70    def _show(self, prof, filename, sort):
71        if filename is not None:
72            prof.dump_stats(filename)
73        else:
74            prof.print_stats(sort)
75
76
77#**************************************************************************
78# The following are the static member functions for the profiler class
79# Note that an instance of Profile() is *not* needed to call them.
80#**************************************************************************
81
82def run(statement, filename=None, sort=-1):
83    """Run statement under profiler optionally saving results in filename
84
85    This function takes a single argument that can be passed to the
86    "exec" statement, and an optional file name.  In all cases this
87    routine attempts to "exec" its first argument and gather profiling
88    statistics from the execution. If no file name is present, then this
89    function automatically prints a simple profiling report, sorted by the
90    standard name string (file/line/function-name) that is presented in
91    each line.
92    """
93    return _Utils(Profile).run(statement, filename, sort)
94
95def runctx(statement, globals, locals, filename=None, sort=-1):
96    """Run statement under profiler, supplying your own globals and locals,
97    optionally saving results in filename.
98
99    statement and filename have the same semantics as profile.run
100    """
101    return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
102
103
104class Profile:
105    """Profiler class.
106
107    self.cur is always a tuple.  Each such tuple corresponds to a stack
108    frame that is currently active (self.cur[-2]).  The following are the
109    definitions of its members.  We use this external "parallel stack" to
110    avoid contaminating the program that we are profiling. (old profiler
111    used to write into the frames local dictionary!!) Derived classes
112    can change the definition of some entries, as long as they leave
113    [-2:] intact (frame and previous tuple).  In case an internal error is
114    detected, the -3 element is used as the function name.
115
116    [ 0] = Time that needs to be charged to the parent frame's function.
117           It is used so that a function call will not have to access the
118           timing data for the parent frame.
119    [ 1] = Total time spent in this frame's function, excluding time in
120           subfunctions (this latter is tallied in cur[2]).
121    [ 2] = Total time spent in subfunctions, excluding time executing the
122           frame's function (this latter is tallied in cur[1]).
123    [-3] = Name of the function that corresponds to this frame.
124    [-2] = Actual frame that we correspond to (used to sync exception handling).
125    [-1] = Our parent 6-tuple (corresponds to frame.f_back).
126
127    Timing data for each function is stored as a 5-tuple in the dictionary
128    self.timings[].  The index is always the name stored in self.cur[-3].
129    The following are the definitions of the members:
130
131    [0] = The number of times this function was called, not counting direct
132          or indirect recursion,
133    [1] = Number of times this function appears on the stack, minus one
134    [2] = Total time spent internal to this function
135    [3] = Cumulative time that this function was present on the stack.  In
136          non-recursive functions, this is the total execution time from start
137          to finish of each invocation of a function, including time spent in
138          all subfunctions.
139    [4] = A dictionary indicating for each function name, the number of times
140          it was called by us.
141    """
142
143    bias = 0  # calibration constant
144
145    def __init__(self, timer=None, bias=None):
146        self.timings = {}
147        self.cur = None
148        self.cmd = ""
149        self.c_func_name = ""
150
151        if bias is None:
152            bias = self.bias
153        self.bias = bias     # Materialize in local dict for lookup speed.
154
155        if not timer:
156            self.timer = self.get_time = time.process_time
157            self.dispatcher = self.trace_dispatch_i
158        else:
159            self.timer = timer
160            t = self.timer() # test out timer function
161            try:
162                length = len(t)
163            except TypeError:
164                self.get_time = timer
165                self.dispatcher = self.trace_dispatch_i
166            else:
167                if length == 2:
168                    self.dispatcher = self.trace_dispatch
169                else:
170                    self.dispatcher = self.trace_dispatch_l
171                # This get_time() implementation needs to be defined
172                # here to capture the passed-in timer in the parameter
173                # list (for performance).  Note that we can't assume
174                # the timer() result contains two values in all
175                # cases.
176                def get_time_timer(timer=timer, sum=sum):
177                    return sum(timer())
178                self.get_time = get_time_timer
179        self.t = self.get_time()
180        self.simulate_call('profiler')
181
182    # Heavily optimized dispatch routine for time.process_time() timer
183
184    def trace_dispatch(self, frame, event, arg):
185        timer = self.timer
186        t = timer()
187        t = t[0] + t[1] - self.t - self.bias
188
189        if event == "c_call":
190            self.c_func_name = arg.__name__
191
192        if self.dispatch[event](self, frame,t):
193            t = timer()
194            self.t = t[0] + t[1]
195        else:
196            r = timer()
197            self.t = r[0] + r[1] - t # put back unrecorded delta
198
199    # Dispatch routine for best timer program (return = scalar, fastest if
200    # an integer but float works too -- and time.process_time() relies on that).
201
202    def trace_dispatch_i(self, frame, event, arg):
203        timer = self.timer
204        t = timer() - self.t - self.bias
205
206        if event == "c_call":
207            self.c_func_name = arg.__name__
208
209        if self.dispatch[event](self, frame, t):
210            self.t = timer()
211        else:
212            self.t = timer() - t  # put back unrecorded delta
213
214    # Dispatch routine for macintosh (timer returns time in ticks of
215    # 1/60th second)
216
217    def trace_dispatch_mac(self, frame, event, arg):
218        timer = self.timer
219        t = timer()/60.0 - self.t - self.bias
220
221        if event == "c_call":
222            self.c_func_name = arg.__name__
223
224        if self.dispatch[event](self, frame, t):
225            self.t = timer()/60.0
226        else:
227            self.t = timer()/60.0 - t  # put back unrecorded delta
228
229    # SLOW generic dispatch routine for timer returning lists of numbers
230
231    def trace_dispatch_l(self, frame, event, arg):
232        get_time = self.get_time
233        t = get_time() - self.t - self.bias
234
235        if event == "c_call":
236            self.c_func_name = arg.__name__
237
238        if self.dispatch[event](self, frame, t):
239            self.t = get_time()
240        else:
241            self.t = get_time() - t # put back unrecorded delta
242
243    # In the event handlers, the first 3 elements of self.cur are unpacked
244    # into vrbls w/ 3-letter names.  The last two characters are meant to be
245    # mnemonic:
246    #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
247    #     _it  self.cur[1] "internal time" time spent directly in the function
248    #     _et  self.cur[2] "external time" time spent in subfunctions
249
250    def trace_dispatch_exception(self, frame, t):
251        rpt, rit, ret, rfn, rframe, rcur = self.cur
252        if (rframe is not frame) and rcur:
253            return self.trace_dispatch_return(rframe, t)
254        self.cur = rpt, rit+t, ret, rfn, rframe, rcur
255        return 1
256
257
258    def trace_dispatch_call(self, frame, t):
259        if self.cur and frame.f_back is not self.cur[-2]:
260            rpt, rit, ret, rfn, rframe, rcur = self.cur
261            if not isinstance(rframe, Profile.fake_frame):
262                assert rframe.f_back is frame.f_back, ("Bad call", rfn,
263                                                       rframe, rframe.f_back,
264                                                       frame, frame.f_back)
265                self.trace_dispatch_return(rframe, 0)
266                assert (self.cur is None or \
267                        frame.f_back is self.cur[-2]), ("Bad call",
268                                                        self.cur[-3])
269        fcode = frame.f_code
270        fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
271        self.cur = (t, 0, 0, fn, frame, self.cur)
272        timings = self.timings
273        if fn in timings:
274            cc, ns, tt, ct, callers = timings[fn]
275            timings[fn] = cc, ns + 1, tt, ct, callers
276        else:
277            timings[fn] = 0, 0, 0, 0, {}
278        return 1
279
280    def trace_dispatch_c_call (self, frame, t):
281        fn = ("", 0, self.c_func_name)
282        self.cur = (t, 0, 0, fn, frame, self.cur)
283        timings = self.timings
284        if fn in timings:
285            cc, ns, tt, ct, callers = timings[fn]
286            timings[fn] = cc, ns+1, tt, ct, callers
287        else:
288            timings[fn] = 0, 0, 0, 0, {}
289        return 1
290
291    def trace_dispatch_return(self, frame, t):
292        if frame is not self.cur[-2]:
293            assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
294            self.trace_dispatch_return(self.cur[-2], 0)
295
296        # Prefix "r" means part of the Returning or exiting frame.
297        # Prefix "p" means part of the Previous or Parent or older frame.
298
299        rpt, rit, ret, rfn, frame, rcur = self.cur
300        rit = rit + t
301        frame_total = rit + ret
302
303        ppt, pit, pet, pfn, pframe, pcur = rcur
304        self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
305
306        timings = self.timings
307        cc, ns, tt, ct, callers = timings[rfn]
308        if not ns:
309            # This is the only occurrence of the function on the stack.
310            # Else this is a (directly or indirectly) recursive call, and
311            # its cumulative time will get updated when the topmost call to
312            # it returns.
313            ct = ct + frame_total
314            cc = cc + 1
315
316        if pfn in callers:
317            callers[pfn] = callers[pfn] + 1  # hack: gather more
318            # stats such as the amount of time added to ct courtesy
319            # of this specific call, and the contribution to cc
320            # courtesy of this call.
321        else:
322            callers[pfn] = 1
323
324        timings[rfn] = cc, ns - 1, tt + rit, ct, callers
325
326        return 1
327
328
329    dispatch = {
330        "call": trace_dispatch_call,
331        "exception": trace_dispatch_exception,
332        "return": trace_dispatch_return,
333        "c_call": trace_dispatch_c_call,
334        "c_exception": trace_dispatch_return,  # the C function returned
335        "c_return": trace_dispatch_return,
336        }
337
338
339    # The next few functions play with self.cmd. By carefully preloading
340    # our parallel stack, we can force the profiled result to include
341    # an arbitrary string as the name of the calling function.
342    # We use self.cmd as that string, and the resulting stats look
343    # very nice :-).
344
345    def set_cmd(self, cmd):
346        if self.cur[-1]: return   # already set
347        self.cmd = cmd
348        self.simulate_call(cmd)
349
350    class fake_code:
351        def __init__(self, filename, line, name):
352            self.co_filename = filename
353            self.co_line = line
354            self.co_name = name
355            self.co_firstlineno = 0
356
357        def __repr__(self):
358            return repr((self.co_filename, self.co_line, self.co_name))
359
360    class fake_frame:
361        def __init__(self, code, prior):
362            self.f_code = code
363            self.f_back = prior
364
365    def simulate_call(self, name):
366        code = self.fake_code('profile', 0, name)
367        if self.cur:
368            pframe = self.cur[-2]
369        else:
370            pframe = None
371        frame = self.fake_frame(code, pframe)
372        self.dispatch['call'](self, frame, 0)
373
374    # collect stats from pending stack, including getting final
375    # timings for self.cmd frame.
376
377    def simulate_cmd_complete(self):
378        get_time = self.get_time
379        t = get_time() - self.t
380        while self.cur[-1]:
381            # We *can* cause assertion errors here if
382            # dispatch_trace_return checks for a frame match!
383            self.dispatch['return'](self, self.cur[-2], t)
384            t = 0
385        self.t = get_time() - t
386
387
388    def print_stats(self, sort=-1):
389        import pstats
390        if not isinstance(sort, tuple):
391            sort = (sort,)
392        pstats.Stats(self).strip_dirs().sort_stats(*sort).print_stats()
393
394    def dump_stats(self, file):
395        with open(file, 'wb') as f:
396            self.create_stats()
397            marshal.dump(self.stats, f)
398
399    def create_stats(self):
400        self.simulate_cmd_complete()
401        self.snapshot_stats()
402
403    def snapshot_stats(self):
404        self.stats = {}
405        for func, (cc, ns, tt, ct, callers) in self.timings.items():
406            callers = callers.copy()
407            nc = 0
408            for callcnt in callers.values():
409                nc += callcnt
410            self.stats[func] = cc, nc, tt, ct, callers
411
412
413    # The following two methods can be called by clients to use
414    # a profiler to profile a statement, given as a string.
415
416    def run(self, cmd):
417        import __main__
418        dict = __main__.__dict__
419        return self.runctx(cmd, dict, dict)
420
421    def runctx(self, cmd, globals, locals):
422        self.set_cmd(cmd)
423        sys.setprofile(self.dispatcher)
424        try:
425            exec(cmd, globals, locals)
426        finally:
427            sys.setprofile(None)
428        return self
429
430    # This method is more useful to profile a single function call.
431    def runcall(self, func, /, *args, **kw):
432        self.set_cmd(repr(func))
433        sys.setprofile(self.dispatcher)
434        try:
435            return func(*args, **kw)
436        finally:
437            sys.setprofile(None)
438
439
440    #******************************************************************
441    # The following calculates the overhead for using a profiler.  The
442    # problem is that it takes a fair amount of time for the profiler
443    # to stop the stopwatch (from the time it receives an event).
444    # Similarly, there is a delay from the time that the profiler
445    # re-starts the stopwatch before the user's code really gets to
446    # continue.  The following code tries to measure the difference on
447    # a per-event basis.
448    #
449    # Note that this difference is only significant if there are a lot of
450    # events, and relatively little user code per event.  For example,
451    # code with small functions will typically benefit from having the
452    # profiler calibrated for the current platform.  This *could* be
453    # done on the fly during init() time, but it is not worth the
454    # effort.  Also note that if too large a value specified, then
455    # execution time on some functions will actually appear as a
456    # negative number.  It is *normal* for some functions (with very
457    # low call counts) to have such negative stats, even if the
458    # calibration figure is "correct."
459    #
460    # One alternative to profile-time calibration adjustments (i.e.,
461    # adding in the magic little delta during each event) is to track
462    # more carefully the number of events (and cumulatively, the number
463    # of events during sub functions) that are seen.  If this were
464    # done, then the arithmetic could be done after the fact (i.e., at
465    # display time).  Currently, we track only call/return events.
466    # These values can be deduced by examining the callees and callers
467    # vectors for each functions.  Hence we *can* almost correct the
468    # internal time figure at print time (note that we currently don't
469    # track exception event processing counts).  Unfortunately, there
470    # is currently no similar information for cumulative sub-function
471    # time.  It would not be hard to "get all this info" at profiler
472    # time.  Specifically, we would have to extend the tuples to keep
473    # counts of this in each frame, and then extend the defs of timing
474    # tuples to include the significant two figures. I'm a bit fearful
475    # that this additional feature will slow the heavily optimized
476    # event/time ratio (i.e., the profiler would run slower, fur a very
477    # low "value added" feature.)
478    #**************************************************************
479
480    def calibrate(self, m, verbose=0):
481        if self.__class__ is not Profile:
482            raise TypeError("Subclasses must override .calibrate().")
483
484        saved_bias = self.bias
485        self.bias = 0
486        try:
487            return self._calibrate_inner(m, verbose)
488        finally:
489            self.bias = saved_bias
490
491    def _calibrate_inner(self, m, verbose):
492        get_time = self.get_time
493
494        # Set up a test case to be run with and without profiling.  Include
495        # lots of calls, because we're trying to quantify stopwatch overhead.
496        # Do not raise any exceptions, though, because we want to know
497        # exactly how many profile events are generated (one call event, +
498        # one return event, per Python-level call).
499
500        def f1(n):
501            for i in range(n):
502                x = 1
503
504        def f(m, f1=f1):
505            for i in range(m):
506                f1(100)
507
508        f(m)    # warm up the cache
509
510        # elapsed_noprofile <- time f(m) takes without profiling.
511        t0 = get_time()
512        f(m)
513        t1 = get_time()
514        elapsed_noprofile = t1 - t0
515        if verbose:
516            print("elapsed time without profiling =", elapsed_noprofile)
517
518        # elapsed_profile <- time f(m) takes with profiling.  The difference
519        # is profiling overhead, only some of which the profiler subtracts
520        # out on its own.
521        p = Profile()
522        t0 = get_time()
523        p.runctx('f(m)', globals(), locals())
524        t1 = get_time()
525        elapsed_profile = t1 - t0
526        if verbose:
527            print("elapsed time with profiling =", elapsed_profile)
528
529        # reported_time <- "CPU seconds" the profiler charged to f and f1.
530        total_calls = 0.0
531        reported_time = 0.0
532        for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
533                p.timings.items():
534            if funcname in ("f", "f1"):
535                total_calls += cc
536                reported_time += tt
537
538        if verbose:
539            print("'CPU seconds' profiler reported =", reported_time)
540            print("total # calls =", total_calls)
541        if total_calls != m + 1:
542            raise ValueError("internal error: total calls = %d" % total_calls)
543
544        # reported_time - elapsed_noprofile = overhead the profiler wasn't
545        # able to measure.  Divide by twice the number of calls (since there
546        # are two profiler events per call in this test) to get the hidden
547        # overhead per event.
548        mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
549        if verbose:
550            print("mean stopwatch overhead per profile event =", mean)
551        return mean
552
553#****************************************************************************
554
555def main():
556    import os
557    from optparse import OptionParser
558
559    usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
560    parser = OptionParser(usage=usage)
561    parser.allow_interspersed_args = False
562    parser.add_option('-o', '--outfile', dest="outfile",
563        help="Save stats to <outfile>", default=None)
564    parser.add_option('-m', dest="module", action="store_true",
565        help="Profile a library module.", default=False)
566    parser.add_option('-s', '--sort', dest="sort",
567        help="Sort order when printing to stdout, based on pstats.Stats class",
568        default=-1)
569
570    if not sys.argv[1:]:
571        parser.print_usage()
572        sys.exit(2)
573
574    (options, args) = parser.parse_args()
575    sys.argv[:] = args
576
577    # The script that we're profiling may chdir, so capture the absolute path
578    # to the output file at startup.
579    if options.outfile is not None:
580        options.outfile = os.path.abspath(options.outfile)
581
582    if len(args) > 0:
583        if options.module:
584            import runpy
585            code = "run_module(modname, run_name='__main__')"
586            globs = {
587                'run_module': runpy.run_module,
588                'modname': args[0]
589            }
590        else:
591            progname = args[0]
592            sys.path.insert(0, os.path.dirname(progname))
593            with io.open_code(progname) as fp:
594                code = compile(fp.read(), progname, 'exec')
595            spec = importlib.machinery.ModuleSpec(name='__main__', loader=None,
596                                                  origin=progname)
597            globs = {
598                '__spec__': spec,
599                '__file__': spec.origin,
600                '__name__': spec.name,
601                '__package__': None,
602                '__cached__': None,
603            }
604        try:
605            runctx(code, globs, None, options.outfile, options.sort)
606        except BrokenPipeError as exc:
607            # Prevent "Exception ignored" during interpreter shutdown.
608            sys.stdout = None
609            sys.exit(exc.errno)
610    else:
611        parser.print_usage()
612    return parser
613
614# When invoked as main program, invoke the profiler on a script
615if __name__ == '__main__':
616    main()
617