1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
2 |*
3 |* The LLVM Compiler Infrastructure
4 |*
5 |* This file is distributed under the University of Illinois Open Source
6 |* License. See LICENSE.TXT for details.
7 |*
8 \*===----------------------------------------------------------------------===*/
9
10 #include "InstrProfiling.h"
11 #include "InstrProfilingInternal.h"
12 #include "InstrProfilingUtil.h" /* For PS4 getenv shim. */
13 #include <limits.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #define INSTR_PROF_VALUE_PROF_DATA
18 #define INSTR_PROF_COMMON_API_IMPL
19 #include "InstrProfData.inc"
20
21 static int hasStaticCounters = 1;
22 static int OutOfNodesWarnings = 0;
23 static int hasNonDefaultValsPerSite = 0;
24 #define INSTR_PROF_MAX_VP_WARNS 10
25 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 8
26 #define INSTR_PROF_VNODE_POOL_SIZE 1024
27
28 #ifndef _MSC_VER
29 /* A shared static pool in addition to the vnodes statically
30 * allocated by the compiler. */
31 COMPILER_RT_VISIBILITY ValueProfNode
32 lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
33 COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME_STR);
34 #endif
35
36 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
37 INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
38
lprofSetupValueProfiler()39 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() {
40 const char *Str = 0;
41 Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
42 if (Str && Str[0]) {
43 VPMaxNumValsPerSite = atoi(Str);
44 hasNonDefaultValsPerSite = 1;
45 }
46 if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
47 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
48 }
49
lprofSetMaxValsPerSite(uint32_t MaxVals)50 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
51 VPMaxNumValsPerSite = MaxVals;
52 hasNonDefaultValsPerSite = 1;
53 }
54
55 /* This method is only used in value profiler mock testing. */
56 COMPILER_RT_VISIBILITY void
__llvm_profile_set_num_value_sites(__llvm_profile_data * Data,uint32_t ValueKind,uint16_t NumValueSites)57 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
58 uint32_t ValueKind, uint16_t NumValueSites) {
59 *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
60 }
61
62 /* This method is only used in value profiler mock testing. */
63 COMPILER_RT_VISIBILITY const __llvm_profile_data *
__llvm_profile_iterate_data(const __llvm_profile_data * Data)64 __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
65 return Data + 1;
66 }
67
68 /* This method is only used in value profiler mock testing. */
69 COMPILER_RT_VISIBILITY void *
__llvm_get_function_addr(const __llvm_profile_data * Data)70 __llvm_get_function_addr(const __llvm_profile_data *Data) {
71 return Data->FunctionPointer;
72 }
73
74 /* Allocate an array that holds the pointers to the linked lists of
75 * value profile counter nodes. The number of element of the array
76 * is the total number of value profile sites instrumented. Returns
77 * 0 if allocation fails.
78 */
79
allocateValueProfileCounters(__llvm_profile_data * Data)80 static int allocateValueProfileCounters(__llvm_profile_data *Data) {
81 uint64_t NumVSites = 0;
82 uint32_t VKI;
83
84 /* This function will never be called when value site array is allocated
85 statically at compile time. */
86 hasStaticCounters = 0;
87 /* When dynamic allocation is enabled, allow tracking the max number of
88 * values allowd. */
89 if (!hasNonDefaultValsPerSite)
90 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
91
92 for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
93 NumVSites += Data->NumValueSites[VKI];
94
95 ValueProfNode **Mem =
96 (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
97 if (!Mem)
98 return 0;
99 if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
100 free(Mem);
101 return 0;
102 }
103 return 1;
104 }
105
allocateOneNode(__llvm_profile_data * Data,uint32_t Index,uint64_t Value)106 static ValueProfNode *allocateOneNode(__llvm_profile_data *Data, uint32_t Index,
107 uint64_t Value) {
108 ValueProfNode *Node;
109
110 if (!hasStaticCounters)
111 return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
112
113 /* Early check to avoid value wrapping around. */
114 if (CurrentVNode + 1 > EndVNode) {
115 if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
116 PROF_WARN("Unable to track new values: %s. "
117 " Consider using option -mllvm -vp-counters-per-site=<n> to "
118 "allocate more"
119 " value profile counters at compile time. \n",
120 "Running out of static counters");
121 }
122 return 0;
123 }
124 Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
125 /* Due to section padding, EndVNode point to a byte which is one pass
126 * an incomplete VNode, so we need to skip the last incomplete node. */
127 if (Node + 1 > EndVNode)
128 return 0;
129
130 return Node;
131 }
132
133 COMPILER_RT_VISIBILITY void
__llvm_profile_instrument_target(uint64_t TargetValue,void * Data,uint32_t CounterIndex)134 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
135 uint32_t CounterIndex) {
136 __llvm_profile_data *PData = (__llvm_profile_data *)Data;
137 if (!PData)
138 return;
139
140 if (!PData->Values) {
141 if (!allocateValueProfileCounters(PData))
142 return;
143 }
144
145 ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
146 ValueProfNode *PrevVNode = NULL;
147 ValueProfNode *MinCountVNode = NULL;
148 ValueProfNode *CurVNode = ValueCounters[CounterIndex];
149 uint64_t MinCount = UINT64_MAX;
150
151 uint8_t VDataCount = 0;
152 while (CurVNode) {
153 if (TargetValue == CurVNode->Value) {
154 CurVNode->Count++;
155 return;
156 }
157 if (CurVNode->Count < MinCount) {
158 MinCount = CurVNode->Count;
159 MinCountVNode = CurVNode;
160 }
161 PrevVNode = CurVNode;
162 CurVNode = CurVNode->Next;
163 ++VDataCount;
164 }
165
166 if (VDataCount >= VPMaxNumValsPerSite) {
167 /* Bump down the min count node's count. If it reaches 0,
168 * evict it. This eviction/replacement policy makes hot
169 * targets more sticky while cold targets less so. In other
170 * words, it makes it less likely for the hot targets to be
171 * prematurally evicted during warmup/establishment period,
172 * when their counts are still low. In a special case when
173 * the number of values tracked is reduced to only one, this
174 * policy will guarantee that the dominating target with >50%
175 * total count will survive in the end. Note that this scheme
176 * allows the runtime to track the min count node in an adaptive
177 * manner. It can correct previous mistakes and eventually
178 * lock on a cold target that is alread in stable state.
179 *
180 * In very rare cases, this replacement scheme may still lead
181 * to target loss. For instance, out of \c N value slots, \c N-1
182 * slots are occupied by luke warm targets during the warmup
183 * period and the remaining one slot is competed by two or more
184 * very hot targets. If those hot targets occur in an interleaved
185 * way, none of them will survive (gain enough weight to throw out
186 * other established entries) due to the ping-pong effect.
187 * To handle this situation, user can choose to increase the max
188 * number of tracked values per value site. Alternatively, a more
189 * expensive eviction mechanism can be implemented. It requires
190 * the runtime to track the total number of evictions per-site.
191 * When the total number of evictions reaches certain threshold,
192 * the runtime can wipe out more than one lowest count entries
193 * to give space for hot targets.
194 */
195 if (!(--MinCountVNode->Count)) {
196 CurVNode = MinCountVNode;
197 CurVNode->Value = TargetValue;
198 CurVNode->Count++;
199 }
200 return;
201 }
202
203 CurVNode = allocateOneNode(PData, CounterIndex, TargetValue);
204 if (!CurVNode)
205 return;
206 CurVNode->Value = TargetValue;
207 CurVNode->Count++;
208
209 uint32_t Success = 0;
210 if (!ValueCounters[CounterIndex])
211 Success =
212 COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
213 else if (PrevVNode && !PrevVNode->Next)
214 Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
215
216 if (!Success && !hasStaticCounters) {
217 free(CurVNode);
218 return;
219 }
220 }
221
222 /*
223 * A wrapper struct that represents value profile runtime data.
224 * Like InstrProfRecord class which is used by profiling host tools,
225 * ValueProfRuntimeRecord also implements the abstract intefaces defined in
226 * ValueProfRecordClosure so that the runtime data can be serialized using
227 * shared C implementation.
228 */
229 typedef struct ValueProfRuntimeRecord {
230 const __llvm_profile_data *Data;
231 ValueProfNode **NodesKind[IPVK_Last + 1];
232 uint8_t **SiteCountArray;
233 } ValueProfRuntimeRecord;
234
235 /* ValueProfRecordClosure Interface implementation. */
236
getNumValueSitesRT(const void * R,uint32_t VK)237 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
238 return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
239 }
240
getNumValueDataRT(const void * R,uint32_t VK)241 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
242 uint32_t S = 0, I;
243 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
244 if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
245 return 0;
246 for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
247 S += Record->SiteCountArray[VK][I];
248 return S;
249 }
250
getNumValueDataForSiteRT(const void * R,uint32_t VK,uint32_t S)251 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
252 uint32_t S) {
253 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
254 return Record->SiteCountArray[VK][S];
255 }
256
257 static ValueProfRuntimeRecord RTRecord;
258 static ValueProfRecordClosure RTRecordClosure = {
259 &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */
260 getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT,
261 INSTR_PROF_NULLPTR, /* RemapValueData */
262 INSTR_PROF_NULLPTR, /* GetValueForSite, */
263 INSTR_PROF_NULLPTR /* AllocValueProfData */
264 };
265
266 static uint32_t
initializeValueProfRuntimeRecord(const __llvm_profile_data * Data,uint8_t * SiteCountArray[])267 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
268 uint8_t *SiteCountArray[]) {
269 unsigned I, J, S = 0, NumValueKinds = 0;
270 ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
271 RTRecord.Data = Data;
272 RTRecord.SiteCountArray = SiteCountArray;
273 for (I = 0; I <= IPVK_Last; I++) {
274 uint16_t N = Data->NumValueSites[I];
275 if (!N)
276 continue;
277
278 NumValueKinds++;
279
280 RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
281 for (J = 0; J < N; J++) {
282 /* Compute value count for each site. */
283 uint32_t C = 0;
284 ValueProfNode *Site =
285 Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
286 while (Site) {
287 C++;
288 Site = Site->Next;
289 }
290 if (C > UCHAR_MAX)
291 C = UCHAR_MAX;
292 RTRecord.SiteCountArray[I][J] = C;
293 }
294 S += N;
295 }
296 return NumValueKinds;
297 }
298
getNextNValueData(uint32_t VK,uint32_t Site,InstrProfValueData * Dst,ValueProfNode * StartNode,uint32_t N)299 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
300 InstrProfValueData *Dst,
301 ValueProfNode *StartNode, uint32_t N) {
302 unsigned I;
303 ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
304 for (I = 0; I < N; I++) {
305 Dst[I].Value = VNode->Value;
306 Dst[I].Count = VNode->Count;
307 VNode = VNode->Next;
308 }
309 return VNode;
310 }
311
getValueProfDataSizeWrapper(void)312 static uint32_t getValueProfDataSizeWrapper(void) {
313 return getValueProfDataSize(&RTRecordClosure);
314 }
315
getNumValueDataForSiteWrapper(uint32_t VK,uint32_t S)316 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
317 return getNumValueDataForSiteRT(&RTRecord, VK, S);
318 }
319
320 static VPDataReaderType TheVPDataReader = {
321 initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
322 getFirstValueProfRecord, getNumValueDataForSiteWrapper,
323 getValueProfDataSizeWrapper, getNextNValueData};
324
lprofGetVPDataReader()325 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() {
326 return &TheVPDataReader;
327 }
328