1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // instrumentation.h: contains the definitions needed to
16 // instrument code for profiling:
17 // ScopedProfilingLabel, RegisterCurrentThreadForProfiling.
18 //
19 // profiler.h is only needed to drive the profiler:
20 // StartProfiling, FinishProfiling.
21 //
22 // See the usage example in profiler.h.
23
24 #ifndef GEMMLOWP_PROFILING_INSTRUMENTATION_H_
25 #define GEMMLOWP_PROFILING_INSTRUMENTATION_H_
26
27 #include <cstdio>
28
29 #ifndef GEMMLOWP_USE_STLPORT
30 #include <cstdint>
31 #else
32 #include <stdint.h>
33 namespace std {
34 using ::int16_t;
35 using ::int32_t;
36 using ::int8_t;
37 using ::size_t;
38 using ::uint16_t;
39 using ::uint32_t;
40 using ::uint8_t;
41 using ::uintptr_t;
42 } // namespace std
43 #endif
44
45 #include <algorithm>
46 #include <cassert>
47 #include <cstdlib>
48
49 #ifdef GEMMLOWP_PROFILING
50 #include <cstring>
51 #include <set>
52 #endif
53
54 #include "./pthread_everywhere.h"
55
56 namespace gemmlowp {
57
ReleaseBuildAssertion(bool condition,const char * msg)58 inline void ReleaseBuildAssertion(bool condition, const char* msg) {
59 if (!condition) {
60 fprintf(stderr, "gemmlowp error: %s\n", msg);
61 abort();
62 }
63 }
64
65 class Mutex {
66 public:
67 Mutex(const Mutex&) = delete;
68 Mutex& operator=(const Mutex&) = delete;
69
Mutex()70 Mutex() { pthread_mutex_init(&m, NULL); }
~Mutex()71 ~Mutex() { pthread_mutex_destroy(&m); }
72
Lock()73 void Lock() { pthread_mutex_lock(&m); }
Unlock()74 void Unlock() { pthread_mutex_unlock(&m); }
75
76 private:
77 pthread_mutex_t m;
78 };
79
80 class GlobalMutexes {
81 public:
Profiler()82 static Mutex* Profiler() {
83 static Mutex m;
84 return &m;
85 }
86
EightBitIntGemm()87 static Mutex* EightBitIntGemm() {
88 static Mutex m;
89 return &m;
90 }
91 };
92
93 // A very simple RAII helper to lock and unlock a Mutex
94 struct ScopedLock {
ScopedLockScopedLock95 ScopedLock(Mutex* m) : _m(m) { _m->Lock(); }
~ScopedLockScopedLock96 ~ScopedLock() { _m->Unlock(); }
97
98 private:
99 Mutex* _m;
100 };
101
102 // Profiling definitions. Two paths: when profiling is enabled,
103 // and when profiling is disabled.
104 #ifdef GEMMLOWP_PROFILING
105 // This code path is when profiling is enabled.
106
107 // A pseudo-call-stack. Contrary to a real call-stack, this only
108 // contains pointers to literal strings that were manually entered
109 // in the instrumented code (see ScopedProfilingLabel).
110 struct ProfilingStack {
111 static const std::size_t kMaxSize = 14;
112 typedef const char* LabelsArrayType[kMaxSize];
113 LabelsArrayType labels;
114 std::size_t size;
115 Mutex* lock;
116
ProfilingStackProfilingStack117 ProfilingStack() { memset(this, 0, sizeof(ProfilingStack)); }
118
PushProfilingStack119 void Push(const char* label) {
120 ScopedLock sl(lock);
121 ReleaseBuildAssertion(size < kMaxSize, "ProfilingStack overflow");
122 labels[size] = label;
123 size++;
124 }
125
PopProfilingStack126 void Pop() {
127 ScopedLock sl(lock);
128 ReleaseBuildAssertion(size > 0, "ProfilingStack underflow");
129 size--;
130 }
131
UpdateTopProfilingStack132 void UpdateTop(const char* new_label) {
133 ScopedLock sl(lock);
134 assert(size);
135 labels[size - 1] = new_label;
136 }
137
138 ProfilingStack& operator=(const ProfilingStack& other) {
139 memcpy(this, &other, sizeof(ProfilingStack));
140 return *this;
141 }
142
143 bool operator==(const ProfilingStack& other) const {
144 return !memcmp(this, &other, sizeof(ProfilingStack));
145 }
146 };
147
148 static_assert(
149 !(sizeof(ProfilingStack) & (sizeof(ProfilingStack) - 1)),
150 "ProfilingStack should have power-of-two size to fit in cache lines");
151
152 struct ThreadInfo;
153
154 // The global set of threads being profiled.
ThreadsUnderProfiling()155 inline std::set<ThreadInfo*>& ThreadsUnderProfiling() {
156 static std::set<ThreadInfo*> v;
157 return v;
158 }
159
160 struct ThreadInfo {
161 pthread_key_t key; // used only to get a callback at thread exit.
162 ProfilingStack stack;
163
ThreadInfoThreadInfo164 ThreadInfo() {
165 pthread_key_create(&key, ThreadExitCallback);
166 pthread_setspecific(key, this);
167 stack.lock = new Mutex();
168 }
169
ThreadExitCallbackThreadInfo170 static void ThreadExitCallback(void* ptr) {
171 ScopedLock sl(GlobalMutexes::Profiler());
172 ThreadInfo* self = static_cast<ThreadInfo*>(ptr);
173 ThreadsUnderProfiling().erase(self);
174 pthread_key_delete(self->key);
175 delete self->stack.lock;
176 }
177 };
178
ThreadLocalThreadInfo()179 inline ThreadInfo& ThreadLocalThreadInfo() {
180 static pthread_key_t key;
181 static auto DeleteThreadInfo = [](void* threadInfoPtr) {
182 ThreadInfo* threadInfo = static_cast<ThreadInfo*>(threadInfoPtr);
183 if (threadInfo) {
184 delete threadInfo;
185 }
186 };
187
188 static int key_result = pthread_key_create(&key, DeleteThreadInfo);
189
190 ThreadInfo* threadInfo = static_cast<ThreadInfo*>(pthread_getspecific(key));
191 if (!threadInfo) {
192 threadInfo = new ThreadInfo();
193 pthread_setspecific(key, threadInfo);
194 }
195 return *threadInfo;
196 }
197
198 // ScopedProfilingLabel is how one instruments code for profiling
199 // with this profiler. Construct local ScopedProfilingLabel variables,
200 // passing a literal string describing the local code. Profile
201 // samples will then be annotated with this label, while it is in scope
202 // (whence the name --- also known as RAII).
203 // See the example in profiler.h.
204 class ScopedProfilingLabel {
205 ProfilingStack* profiling_stack_;
206
207 public:
ScopedProfilingLabel(const char * label)208 explicit ScopedProfilingLabel(const char* label)
209 : profiling_stack_(&ThreadLocalThreadInfo().stack) {
210 profiling_stack_->Push(label);
211 }
212
~ScopedProfilingLabel()213 ~ScopedProfilingLabel() { profiling_stack_->Pop(); }
214
Update(const char * new_label)215 void Update(const char* new_label) { profiling_stack_->UpdateTop(new_label); }
216 };
217
218 // To be called once on each thread to be profiled.
RegisterCurrentThreadForProfiling()219 inline void RegisterCurrentThreadForProfiling() {
220 ScopedLock sl(GlobalMutexes::Profiler());
221 ThreadsUnderProfiling().insert(&ThreadLocalThreadInfo());
222 }
223
224 #else // not GEMMLOWP_PROFILING
225 // This code path is when profiling is disabled.
226
227 // This empty definition of ScopedProfilingLabel ensures that
228 // it has zero runtime overhead when profiling is disabled.
229 struct ScopedProfilingLabel {
ScopedProfilingLabelScopedProfilingLabel230 explicit ScopedProfilingLabel(const char*) {}
UpdateScopedProfilingLabel231 void Update(const char*) {}
232 };
233
RegisterCurrentThreadForProfiling()234 inline void RegisterCurrentThreadForProfiling() {}
235
236 #endif
237
238 } // end namespace gemmlowp
239
240 #endif // GEMMLOWP_PROFILING_INSTRUMENTATION_H_
241