• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Branch predictor simulation                  cg_branchpred.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Cachegrind, a Valgrind tool for cache
8    profiling programs.
9 
10    Copyright (C) 2002-2010 Nicholas Nethercote
11       njn@valgrind.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 
32 /* This file contains the actual branch predictor simulator and its
33    associated state.  As with cg_sim.c it is #included directly into
34    cg_main.c.  It provides:
35 
36    - a taken/not-taken predictor for conditional branches
37    - a branch target address predictor for indirect branches
38 
39    Function return-address prediction is not modelled, on the basis
40    that return stack predictors almost always predict correctly, and
41    also that it is difficult for Valgrind to robustly identify
42    function calls and returns.
43 */
44 
45 /* How many bits at the bottom of an instruction address are
46    guaranteed to be zero? */
47 #if defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm)
48 #  define N_IADDR_LO_ZERO_BITS 2
49 #elif defined(VGA_x86) || defined(VGA_amd64)
50 #  define N_IADDR_LO_ZERO_BITS 0
51 #else
52 #  error "Unsupported architecture"
53 #endif
54 
55 
56 /* Get a taken/not-taken prediction for the instruction (presumably a
57    conditional branch) at instr_addr.  Once that's done, update the
58    predictor state based on whether or not it was actually taken, as
59    indicated by 'taken'.  Finally, return 1 for a mispredict and 0 for
60    a successful predict.
61 
62    The predictor is an array of 16k (== 2^14) 2-bit saturating
63    counters.  Given the address of the branch instruction, the array
64    index to use is computed both from the low order bits of the branch
65    instruction's address, and the global history - that is, from the
66    taken/not-taken behaviour of the most recent few branches.  This
67    makes the predictor able to correlate this branch's behaviour with
68    that of other branches.
69 
70    TODO: use predictor written by someone who understands this stuff.
71    Perhaps it would be better to move to a standard GShare predictor
72    and/or tournament predictor.
73 */
74 /* The index is composed of N_HIST bits at the top and N_IADD bits at
75    the bottom.  These numbers chosen somewhat arbitrarily, but note
76    that making N_IADD_BITS too small (eg 4) can cause large amounts of
77    aliasing, and hence misprediction, particularly if the history bits
78    are mostly unchanging. */
79 #define N_HIST_BITS 7
80 #define N_IADD_BITS 7
81 
82 #define N_BITS     (N_HIST_BITS + N_IADD_BITS)
83 #define N_COUNTERS (1 << N_BITS)
84 
85 static UWord shift_register = 0;   /* Contains global history */
86 static UChar counters[N_COUNTERS]; /* Counter array; presumably auto-zeroed */
87 
88 
do_cond_branch_predict(Addr instr_addr,Word takenW)89 static ULong do_cond_branch_predict ( Addr instr_addr, Word takenW )
90 {
91    UWord indx;
92    Bool  predicted_taken, actually_taken, mispredict;
93 
94    const UWord hist_mask = (1 << N_HIST_BITS) - 1;
95    const UWord iadd_mask = (1 << N_IADD_BITS) - 1;
96          UWord hist_bits = shift_register & hist_mask;
97          UWord iadd_bits = (instr_addr >> N_IADDR_LO_ZERO_BITS)
98                            & iadd_mask;
99 
100    tl_assert(hist_bits <= hist_mask);
101    tl_assert(iadd_bits <= iadd_mask);
102    indx = (hist_bits << N_IADD_BITS) | iadd_bits;
103    tl_assert(indx < N_COUNTERS);
104    if (0) VG_(printf)("index = %d\n", (Int)indx);
105 
106    tl_assert(takenW <= 1);
107    predicted_taken = counters[ indx ] >= 2;
108    actually_taken  = takenW > 0;
109 
110    mispredict = (actually_taken && (!predicted_taken))
111                 || ((!actually_taken) && predicted_taken);
112 
113    shift_register <<= 1;
114    shift_register |= (actually_taken ? 1 : 0);
115 
116    if (actually_taken) {
117       if (counters[indx] < 3)
118          counters[indx]++;
119    } else {
120       if (counters[indx] > 0)
121          counters[indx]--;
122    }
123 
124    tl_assert(counters[indx] <= 3);
125 
126    return mispredict ? 1 : 0;
127 }
128 
129 
130 /* A very simple indirect branch predictor.  Use the branch's address
131    to index a table which records the previous target address for this
132    branch (or whatever aliased with it) and use that as the
133    prediction. */
134 #define N_BTAC_BITS 9
135 #define N_BTAC      (1 << N_BTAC_BITS)
136 static Addr btac[N_BTAC]; /* BTAC; presumably auto-zeroed */
137 
do_ind_branch_predict(Addr instr_addr,Addr actual)138 static ULong do_ind_branch_predict ( Addr instr_addr, Addr actual )
139 {
140    Bool mispredict;
141    const UWord mask = (1 << N_BTAC_BITS) - 1;
142          UWord indx = (instr_addr >> N_IADDR_LO_ZERO_BITS)
143                       & mask;
144    tl_assert(indx < N_BTAC);
145    mispredict = btac[indx] != actual;
146    btac[indx] = actual;
147    return mispredict ? 1 : 0;
148 }
149 
150 
151 /*--------------------------------------------------------------------*/
152 /*--- end                                          cg_branchpred.c ---*/
153 /*--------------------------------------------------------------------*/
154 
155