1 /**
2 * @file op_alloc_counter.c
3 * hardware counter allocation
4 *
5 * You can have silliness here.
6 *
7 * @remark Copyright 2002 OProfile authors
8 * @remark Read the file COPYING
9 *
10 * @author John Levon
11 * @author Philippe Elie
12 */
13
14 #include <stdlib.h>
15 #include <ctype.h>
16 #include <dirent.h>
17
18 #include "op_events.h"
19 #include "op_libiberty.h"
20
21
22 typedef struct counter_arc_head {
23 /** the head of allowed counter for this event */
24 struct list_head next;
25 } counter_arc_head;
26
27
28 typedef struct counter_arc {
29 /** counter nr */
30 int counter;
31 /** the next counter allowed for this event */
32 struct list_head next;
33 } counter_arc;
34
35
36 /**
37 * @param pev an array of event
38 * @param nr_events number of entry in pev
39 *
40 * build an array of counter list allowed for each events
41 * counter_arc_head[i] is the list of allowed counter for pev[i] events
42 * The returned pointer is an array of nr_events entry
43 */
44 static counter_arc_head *
build_counter_arc(struct op_event const * pev[],int nr_events)45 build_counter_arc(struct op_event const * pev[], int nr_events)
46 {
47 counter_arc_head * ctr_arc;
48 int i;
49
50 ctr_arc = xmalloc(nr_events * sizeof(*ctr_arc));
51
52 for (i = 0; i < nr_events; ++i) {
53 int j;
54 u32 mask = pev[i]->counter_mask;
55
56 list_init(&ctr_arc[i].next);
57 for (j = 0; mask; ++j) {
58 if (mask & (1 << j)) {
59 counter_arc * arc =
60 xmalloc(sizeof(counter_arc));
61 arc->counter = j;
62 /* we are looping by increasing counter number,
63 * allocation use a left to right tree walking
64 * so we add at end to ensure counter will
65 * be allocated by increasing number: it's not
66 * required but a bit less surprising when
67 * debugging code
68 */
69 list_add_tail(&arc->next, &ctr_arc[i].next);
70 mask &= ~(1 << j);
71 }
72 }
73 }
74
75 return ctr_arc;
76 }
77
78
79 /**
80 * @param ctr_arc the array to deallocate
81 * @param nr_events number of entry in array
82 *
83 * deallocate all previously allocated resource by build_counter_arc()
84 */
delete_counter_arc(counter_arc_head * ctr_arc,int nr_events)85 static void delete_counter_arc(counter_arc_head * ctr_arc, int nr_events)
86 {
87 int i;
88 for (i = 0; i < nr_events; ++i) {
89 struct list_head * pos, * pos2;
90 list_for_each_safe(pos, pos2, &ctr_arc[i].next) {
91 counter_arc * arc = list_entry(pos, counter_arc, next);
92 list_del(&arc->next);
93 free(arc);
94 }
95 }
96 free(ctr_arc);
97 }
98
99
100 /**
101 * @param ctr_arc tree description, ctr_arc[i] is the i-th level of tree.
102 * @param max_depth number of entry in array ctr_arc == depth of tree
103 * @param depth current level we are exploring
104 * @param allocated_mask current counter already allocated mask
105 * @param counter_map array of counter number mapping, returned results go
106 * here
107 *
108 * return non zero on succees, in this case counter_map is set to the counter
109 * mapping number.
110 *
111 * Solution is searched through a simple backtracking exploring recursively all
112 * possible solution until one is found, prunning is done in O(1) by tracking
113 * a bitmask of already allocated counter. Walking through node is done in
114 * preorder left to right.
115 *
116 * In case of extended events (required no phisical counters), the associated
117 * counter_map entry will be -1.
118 *
119 * Possible improvment if neccessary: partition counters in class of counter,
120 * two counter belong to the same class if they allow exactly the same set of
121 * event. Now using a variant of the backtrack algo can works on class of
122 * counter rather on counter (this is not an improvment if each counter goes
123 * in it's own class)
124 */
125 static int
allocate_counter(counter_arc_head const * ctr_arc,int max_depth,int depth,u32 allocated_mask,size_t * counter_map)126 allocate_counter(counter_arc_head const * ctr_arc, int max_depth, int depth,
127 u32 allocated_mask, size_t * counter_map)
128 {
129 struct list_head * pos;
130
131 if (depth == max_depth)
132 return 1;
133
134 /* If ctr_arc is not available, counter_map is -1 */
135 if((&ctr_arc[depth].next)->next == &ctr_arc[depth].next) {
136 counter_map[depth] = -1;
137 if (allocate_counter(ctr_arc, max_depth, depth + 1,
138 allocated_mask,
139 counter_map))
140 return 1;
141 } else {
142 list_for_each(pos, &ctr_arc[depth].next) {
143 counter_arc const * arc = list_entry(pos, counter_arc, next);
144
145 if (allocated_mask & (1 << arc->counter))
146 continue;
147
148 counter_map[depth] = arc->counter;
149
150 if (allocate_counter(ctr_arc, max_depth, depth + 1,
151 allocated_mask | (1 << arc->counter),
152 counter_map))
153 return 1;
154 }
155 }
156
157 return 0;
158 }
159
160 /* determine which directories are counter directories
161 */
perfcounterdir(const struct dirent * entry)162 static int perfcounterdir(const struct dirent * entry)
163 {
164 return (isdigit(entry->d_name[0]));
165 }
166
167
168 /**
169 * @param mask pointer where to place bit mask of unavailable counters
170 *
171 * return >= 0 number of counters that are available
172 * < 0 could not determine number of counters
173 *
174 */
op_get_counter_mask(u32 * mask)175 static int op_get_counter_mask(u32 * mask)
176 {
177 struct dirent **counterlist;
178 int count, i;
179 /* assume nothing is available */
180 u32 available=0;
181
182 count = scandir("/dev/oprofile", &counterlist, perfcounterdir,
183 alphasort);
184 if (count < 0)
185 /* unable to determine bit mask */
186 return -1;
187 /* convert to bit map (0 where counter exists) */
188 for (i=0; i<count; ++i) {
189 available |= 1 << atoi(counterlist[i]->d_name);
190 free(counterlist[i]);
191 }
192 *mask=~available;
193 free(counterlist);
194 return count;
195 }
196
map_event_to_counter(struct op_event const * pev[],int nr_events,op_cpu cpu_type)197 size_t * map_event_to_counter(struct op_event const * pev[], int nr_events,
198 op_cpu cpu_type)
199 {
200 counter_arc_head * ctr_arc;
201 size_t * counter_map;
202 int i, nr_counters, nr_pmc_events;
203 op_cpu curr_cpu_type;
204 u32 unavailable_counters = 0;
205
206 /* Either ophelp or one of the libop tests may invoke this
207 * function with a non-native cpu_type. If so, we should not
208 * call op_get_counter_mask because that will look for real counter
209 * information in oprofilefs.
210 */
211 curr_cpu_type = op_get_cpu_type();
212 if (cpu_type != curr_cpu_type)
213 nr_counters = op_get_nr_counters(cpu_type);
214 else
215 nr_counters = op_get_counter_mask(&unavailable_counters);
216
217 /* no counters then probably perfmon managing perfmon hw */
218 if (nr_counters <= 0) {
219 nr_counters = op_get_nr_counters(cpu_type);
220 unavailable_counters = (~0) << nr_counters;
221 }
222
223 /* Check to see if we have enough physical counters to map events*/
224 for (i = 0, nr_pmc_events = 0; i < nr_events; i++)
225 if(pev[i]->ext == NULL)
226 if (++nr_pmc_events > nr_counters)
227 return 0;
228
229 ctr_arc = build_counter_arc(pev, nr_events);
230
231 counter_map = xmalloc(nr_events * sizeof(size_t));
232
233 if (!allocate_counter(ctr_arc, nr_events, 0, unavailable_counters,
234 counter_map)) {
235 free(counter_map);
236 counter_map = 0;
237 }
238
239 delete_counter_arc(ctr_arc, nr_events);
240 return counter_map;
241 }
242