1 /*
2
3 ***************************************************************************************************
4 **
5 ** profile.cpp
6 **
7 ** Real-Time Hierarchical Profiling for Game Programming Gems 3
8 **
9 ** by Greg Hjelstrom & Byon Garrabrant
10 **
11 ***************************************************************************************************/
12
13 // Credits: The Clock class was inspired by the Timer classes in
14 // Ogre (www.ogre3d.org).
15
16 #include "btQuickprof.h"
17
18 #ifndef BT_NO_PROFILE
19
20
21 static btClock gProfileClock;
22
23
24 #ifdef __CELLOS_LV2__
25 #include <sys/sys_time.h>
26 #include <sys/time_util.h>
27 #include <stdio.h>
28 #endif
29
30 #if defined (SUNOS) || defined (__SUNOS__)
31 #include <stdio.h>
32 #endif
33
34 #if defined(WIN32) || defined(_WIN32)
35
36 #define BT_USE_WINDOWS_TIMERS
37 #define WIN32_LEAN_AND_MEAN
38 #define NOWINRES
39 #define NOMCX
40 #define NOIME
41
42 #ifdef _XBOX
43 #include <Xtl.h>
44 #else //_XBOX
45 #include <windows.h>
46
47 #if WINVER <0x0602
48 #define GetTickCount64 GetTickCount
49 #endif
50
51 #endif //_XBOX
52
53 #include <time.h>
54
55
56 #else //_WIN32
57 #include <sys/time.h>
58 #endif //_WIN32
59
60 #define mymin(a,b) (a > b ? a : b)
61
62 struct btClockData
63 {
64
65 #ifdef BT_USE_WINDOWS_TIMERS
66 LARGE_INTEGER mClockFrequency;
67 LONGLONG mStartTick;
68 LONGLONG mPrevElapsedTime;
69 LARGE_INTEGER mStartTime;
70 #else
71 #ifdef __CELLOS_LV2__
72 uint64_t mStartTime;
73 #else
74 struct timeval mStartTime;
75 #endif
76 #endif //__CELLOS_LV2__
77
78 };
79
80 ///The btClock is a portable basic clock that measures accurate time in seconds, use for profiling.
btClock()81 btClock::btClock()
82 {
83 m_data = new btClockData;
84 #ifdef BT_USE_WINDOWS_TIMERS
85 QueryPerformanceFrequency(&m_data->mClockFrequency);
86 #endif
87 reset();
88 }
89
~btClock()90 btClock::~btClock()
91 {
92 delete m_data;
93 }
94
btClock(const btClock & other)95 btClock::btClock(const btClock& other)
96 {
97 m_data = new btClockData;
98 *m_data = *other.m_data;
99 }
100
operator =(const btClock & other)101 btClock& btClock::operator=(const btClock& other)
102 {
103 *m_data = *other.m_data;
104 return *this;
105 }
106
107
108 /// Resets the initial reference time.
reset()109 void btClock::reset()
110 {
111 #ifdef BT_USE_WINDOWS_TIMERS
112 QueryPerformanceCounter(&m_data->mStartTime);
113 m_data->mStartTick = GetTickCount64();
114 m_data->mPrevElapsedTime = 0;
115 #else
116 #ifdef __CELLOS_LV2__
117
118 typedef uint64_t ClockSize;
119 ClockSize newTime;
120 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
121 SYS_TIMEBASE_GET( newTime );
122 m_data->mStartTime = newTime;
123 #else
124 gettimeofday(&m_data->mStartTime, 0);
125 #endif
126 #endif
127 }
128
129 /// Returns the time in ms since the last call to reset or since
130 /// the btClock was created.
getTimeMilliseconds()131 unsigned long int btClock::getTimeMilliseconds()
132 {
133 #ifdef BT_USE_WINDOWS_TIMERS
134 LARGE_INTEGER currentTime;
135 QueryPerformanceCounter(¤tTime);
136 LONGLONG elapsedTime = currentTime.QuadPart -
137 m_data->mStartTime.QuadPart;
138 // Compute the number of millisecond ticks elapsed.
139 unsigned long msecTicks = (unsigned long)(1000 * elapsedTime /
140 m_data->mClockFrequency.QuadPart);
141 // Check for unexpected leaps in the Win32 performance counter.
142 // (This is caused by unexpected data across the PCI to ISA
143 // bridge, aka south bridge. See Microsoft KB274323.)
144 unsigned long elapsedTicks = (unsigned long)(GetTickCount64() - m_data->mStartTick);
145 signed long msecOff = (signed long)(msecTicks - elapsedTicks);
146 if (msecOff < -100 || msecOff > 100)
147 {
148 // Adjust the starting time forwards.
149 LONGLONG msecAdjustment = mymin(msecOff *
150 m_data->mClockFrequency.QuadPart / 1000, elapsedTime -
151 m_data->mPrevElapsedTime);
152 m_data->mStartTime.QuadPart += msecAdjustment;
153 elapsedTime -= msecAdjustment;
154
155 // Recompute the number of millisecond ticks elapsed.
156 msecTicks = (unsigned long)(1000 * elapsedTime /
157 m_data->mClockFrequency.QuadPart);
158 }
159
160 // Store the current elapsed time for adjustments next time.
161 m_data->mPrevElapsedTime = elapsedTime;
162
163 return msecTicks;
164 #else
165
166 #ifdef __CELLOS_LV2__
167 uint64_t freq=sys_time_get_timebase_frequency();
168 double dFreq=((double) freq) / 1000.0;
169 typedef uint64_t ClockSize;
170 ClockSize newTime;
171 SYS_TIMEBASE_GET( newTime );
172 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
173
174 return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
175 #else
176
177 struct timeval currentTime;
178 gettimeofday(¤tTime, 0);
179 return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 +
180 (currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000;
181 #endif //__CELLOS_LV2__
182 #endif
183 }
184
185 /// Returns the time in us since the last call to reset or since
186 /// the Clock was created.
getTimeMicroseconds()187 unsigned long int btClock::getTimeMicroseconds()
188 {
189 #ifdef BT_USE_WINDOWS_TIMERS
190 LARGE_INTEGER currentTime;
191 QueryPerformanceCounter(¤tTime);
192 LONGLONG elapsedTime = currentTime.QuadPart -
193 m_data->mStartTime.QuadPart;
194
195 // Compute the number of millisecond ticks elapsed.
196 unsigned long msecTicks = (unsigned long)(1000 * elapsedTime /
197 m_data->mClockFrequency.QuadPart);
198
199 // Check for unexpected leaps in the Win32 performance counter.
200 // (This is caused by unexpected data across the PCI to ISA
201 // bridge, aka south bridge. See Microsoft KB274323.)
202 unsigned long elapsedTicks = (unsigned long)(GetTickCount64() - m_data->mStartTick);
203 signed long msecOff = (signed long)(msecTicks - elapsedTicks);
204 if (msecOff < -100 || msecOff > 100)
205 {
206 // Adjust the starting time forwards.
207 LONGLONG msecAdjustment = mymin(msecOff *
208 m_data->mClockFrequency.QuadPart / 1000, elapsedTime -
209 m_data->mPrevElapsedTime);
210 m_data->mStartTime.QuadPart += msecAdjustment;
211 elapsedTime -= msecAdjustment;
212 }
213
214 // Store the current elapsed time for adjustments next time.
215 m_data->mPrevElapsedTime = elapsedTime;
216
217 // Convert to microseconds.
218 unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime /
219 m_data->mClockFrequency.QuadPart);
220
221 return usecTicks;
222 #else
223
224 #ifdef __CELLOS_LV2__
225 uint64_t freq=sys_time_get_timebase_frequency();
226 double dFreq=((double) freq)/ 1000000.0;
227 typedef uint64_t ClockSize;
228 ClockSize newTime;
229 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
230 SYS_TIMEBASE_GET( newTime );
231
232 return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
233 #else
234
235 struct timeval currentTime;
236 gettimeofday(¤tTime, 0);
237 return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 +
238 (currentTime.tv_usec - m_data->mStartTime.tv_usec);
239 #endif//__CELLOS_LV2__
240 #endif
241 }
242
243
244
245 /// Returns the time in s since the last call to reset or since
246 /// the Clock was created.
getTimeSeconds()247 btScalar btClock::getTimeSeconds()
248 {
249 static const btScalar microseconds_to_seconds = btScalar(0.000001);
250 return btScalar(getTimeMicroseconds()) * microseconds_to_seconds;
251 }
252
253
254
Profile_Get_Ticks(unsigned long int * ticks)255 inline void Profile_Get_Ticks(unsigned long int * ticks)
256 {
257 *ticks = gProfileClock.getTimeMicroseconds();
258 }
259
Profile_Get_Tick_Rate(void)260 inline float Profile_Get_Tick_Rate(void)
261 {
262 // return 1000000.f;
263 return 1000.f;
264
265 }
266
267
268
269 /***************************************************************************************************
270 **
271 ** CProfileNode
272 **
273 ***************************************************************************************************/
274
275 /***********************************************************************************************
276 * INPUT: *
277 * name - pointer to a static string which is the name of this profile node *
278 * parent - parent pointer *
279 * *
280 * WARNINGS: *
281 * The name is assumed to be a static pointer, only the pointer is stored and compared for *
282 * efficiency reasons. *
283 *=============================================================================================*/
CProfileNode(const char * name,CProfileNode * parent)284 CProfileNode::CProfileNode( const char * name, CProfileNode * parent ) :
285 Name( name ),
286 TotalCalls( 0 ),
287 TotalTime( 0 ),
288 StartTime( 0 ),
289 RecursionCounter( 0 ),
290 Parent( parent ),
291 Child( NULL ),
292 Sibling( NULL ),
293 m_userPtr(0)
294 {
295 Reset();
296 }
297
298
CleanupMemory()299 void CProfileNode::CleanupMemory()
300 {
301 delete ( Child);
302 Child = NULL;
303 delete ( Sibling);
304 Sibling = NULL;
305 }
306
~CProfileNode(void)307 CProfileNode::~CProfileNode( void )
308 {
309 CleanupMemory();
310 }
311
312
313 /***********************************************************************************************
314 * INPUT: *
315 * name - static string pointer to the name of the node we are searching for *
316 * *
317 * WARNINGS: *
318 * All profile names are assumed to be static strings so this function uses pointer compares *
319 * to find the named node. *
320 *=============================================================================================*/
Get_Sub_Node(const char * name)321 CProfileNode * CProfileNode::Get_Sub_Node( const char * name )
322 {
323 // Try to find this sub node
324 CProfileNode * child = Child;
325 while ( child ) {
326 if ( child->Name == name ) {
327 return child;
328 }
329 child = child->Sibling;
330 }
331
332 // We didn't find it, so add it
333
334 CProfileNode * node = new CProfileNode( name, this );
335 node->Sibling = Child;
336 Child = node;
337 return node;
338 }
339
340
Reset(void)341 void CProfileNode::Reset( void )
342 {
343 TotalCalls = 0;
344 TotalTime = 0.0f;
345
346
347 if ( Child ) {
348 Child->Reset();
349 }
350 if ( Sibling ) {
351 Sibling->Reset();
352 }
353 }
354
355
Call(void)356 void CProfileNode::Call( void )
357 {
358 TotalCalls++;
359 if (RecursionCounter++ == 0) {
360 Profile_Get_Ticks(&StartTime);
361 }
362 }
363
364
Return(void)365 bool CProfileNode::Return( void )
366 {
367 if ( --RecursionCounter == 0 && TotalCalls != 0 ) {
368 unsigned long int time;
369 Profile_Get_Ticks(&time);
370 time-=StartTime;
371 TotalTime += (float)time / Profile_Get_Tick_Rate();
372 }
373 return ( RecursionCounter == 0 );
374 }
375
376
377 /***************************************************************************************************
378 **
379 ** CProfileIterator
380 **
381 ***************************************************************************************************/
CProfileIterator(CProfileNode * start)382 CProfileIterator::CProfileIterator( CProfileNode * start )
383 {
384 CurrentParent = start;
385 CurrentChild = CurrentParent->Get_Child();
386 }
387
388
First(void)389 void CProfileIterator::First(void)
390 {
391 CurrentChild = CurrentParent->Get_Child();
392 }
393
394
Next(void)395 void CProfileIterator::Next(void)
396 {
397 CurrentChild = CurrentChild->Get_Sibling();
398 }
399
400
Is_Done(void)401 bool CProfileIterator::Is_Done(void)
402 {
403 return CurrentChild == NULL;
404 }
405
406
Enter_Child(int index)407 void CProfileIterator::Enter_Child( int index )
408 {
409 CurrentChild = CurrentParent->Get_Child();
410 while ( (CurrentChild != NULL) && (index != 0) ) {
411 index--;
412 CurrentChild = CurrentChild->Get_Sibling();
413 }
414
415 if ( CurrentChild != NULL ) {
416 CurrentParent = CurrentChild;
417 CurrentChild = CurrentParent->Get_Child();
418 }
419 }
420
421
Enter_Parent(void)422 void CProfileIterator::Enter_Parent( void )
423 {
424 if ( CurrentParent->Get_Parent() != NULL ) {
425 CurrentParent = CurrentParent->Get_Parent();
426 }
427 CurrentChild = CurrentParent->Get_Child();
428 }
429
430
431 /***************************************************************************************************
432 **
433 ** CProfileManager
434 **
435 ***************************************************************************************************/
436
437 CProfileNode CProfileManager::Root( "Root", NULL );
438 CProfileNode * CProfileManager::CurrentNode = &CProfileManager::Root;
439 int CProfileManager::FrameCounter = 0;
440 unsigned long int CProfileManager::ResetTime = 0;
441
442
443 /***********************************************************************************************
444 * CProfileManager::Start_Profile -- Begin a named profile *
445 * *
446 * Steps one level deeper into the tree, if a child already exists with the specified name *
447 * then it accumulates the profiling; otherwise a new child node is added to the profile tree. *
448 * *
449 * INPUT: *
450 * name - name of this profiling record *
451 * *
452 * WARNINGS: *
453 * The string used is assumed to be a static string; pointer compares are used throughout *
454 * the profiling code for efficiency. *
455 *=============================================================================================*/
Start_Profile(const char * name)456 void CProfileManager::Start_Profile( const char * name )
457 {
458 if (name != CurrentNode->Get_Name()) {
459 CurrentNode = CurrentNode->Get_Sub_Node( name );
460 }
461
462 CurrentNode->Call();
463 }
464
465
466 /***********************************************************************************************
467 * CProfileManager::Stop_Profile -- Stop timing and record the results. *
468 *=============================================================================================*/
Stop_Profile(void)469 void CProfileManager::Stop_Profile( void )
470 {
471 // Return will indicate whether we should back up to our parent (we may
472 // be profiling a recursive function)
473 if (CurrentNode->Return()) {
474 CurrentNode = CurrentNode->Get_Parent();
475 }
476 }
477
478
479 /***********************************************************************************************
480 * CProfileManager::Reset -- Reset the contents of the profiling system *
481 * *
482 * This resets everything except for the tree structure. All of the timing data is reset. *
483 *=============================================================================================*/
Reset(void)484 void CProfileManager::Reset( void )
485 {
486 gProfileClock.reset();
487 Root.Reset();
488 Root.Call();
489 FrameCounter = 0;
490 Profile_Get_Ticks(&ResetTime);
491 }
492
493
494 /***********************************************************************************************
495 * CProfileManager::Increment_Frame_Counter -- Increment the frame counter *
496 *=============================================================================================*/
Increment_Frame_Counter(void)497 void CProfileManager::Increment_Frame_Counter( void )
498 {
499 FrameCounter++;
500 }
501
502
503 /***********************************************************************************************
504 * CProfileManager::Get_Time_Since_Reset -- returns the elapsed time since last reset *
505 *=============================================================================================*/
Get_Time_Since_Reset(void)506 float CProfileManager::Get_Time_Since_Reset( void )
507 {
508 unsigned long int time;
509 Profile_Get_Ticks(&time);
510 time -= ResetTime;
511 return (float)time / Profile_Get_Tick_Rate();
512 }
513
514 #include <stdio.h>
515
dumpRecursive(CProfileIterator * profileIterator,int spacing)516 void CProfileManager::dumpRecursive(CProfileIterator* profileIterator, int spacing)
517 {
518 profileIterator->First();
519 if (profileIterator->Is_Done())
520 return;
521
522 float accumulated_time=0,parent_time = profileIterator->Is_Root() ? CProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time();
523 int i;
524 int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset();
525 for (i=0;i<spacing;i++) printf(".");
526 printf("----------------------------------\n");
527 for (i=0;i<spacing;i++) printf(".");
528 printf("Profiling: %s (total running time: %.3f ms) ---\n", profileIterator->Get_Current_Parent_Name(), parent_time );
529 float totalTime = 0.f;
530
531
532 int numChildren = 0;
533
534 for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next())
535 {
536 numChildren++;
537 float current_total_time = profileIterator->Get_Current_Total_Time();
538 accumulated_time += current_total_time;
539 float fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
540 {
541 int i; for (i=0;i<spacing;i++) printf(".");
542 }
543 printf("%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)\n",i, profileIterator->Get_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls());
544 totalTime += current_total_time;
545 //recurse into children
546 }
547
548 if (parent_time < accumulated_time)
549 {
550 //printf("what's wrong\n");
551 }
552 for (i=0;i<spacing;i++) printf(".");
553 printf("%s (%.3f %%) :: %.3f ms\n", "Unaccounted:",parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
554
555 for (i=0;i<numChildren;i++)
556 {
557 profileIterator->Enter_Child(i);
558 dumpRecursive(profileIterator,spacing+3);
559 profileIterator->Enter_Parent();
560 }
561 }
562
563
564
dumpAll()565 void CProfileManager::dumpAll()
566 {
567 CProfileIterator* profileIterator = 0;
568 profileIterator = CProfileManager::Get_Iterator();
569
570 dumpRecursive(profileIterator,0);
571
572 CProfileManager::Release_Iterator(profileIterator);
573 }
574
575
576
577
578 #endif //BT_NO_PROFILE
579