1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Gareth Hughes
26 */
27
28 #ifndef __M_DEBUG_UTIL_H__
29 #define __M_DEBUG_UTIL_H__
30
31
32 #ifdef DEBUG_MATH /* This code only used for debugging */
33
34
35 #include "c99_math.h"
36
37
38 /* Comment this out to deactivate the cycle counter.
39 * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher)
40 * (hope, you don't try to debug Mesa on a 386 ;)
41 */
42 #if defined(__GNUC__) && \
43 ((defined(__i386__) && defined(USE_X86_ASM)) || \
44 (defined(__sparc__) && defined(USE_SPARC_ASM)))
45 #define RUN_DEBUG_BENCHMARK
46 #endif
47
48 #define TEST_COUNT 128 /* size of the tested vector array */
49
50 #define REQUIRED_PRECISION 10 /* allow 4 bits to miss */
51 #define MAX_PRECISION 24 /* max. precision possible */
52
53
54 #ifdef RUN_DEBUG_BENCHMARK
55 /* Overhead of profiling counter in cycles. Automatically adjusted to
56 * your machine at run time - counter initialization should give very
57 * consistent results.
58 */
59 extern long counter_overhead;
60
61 /* This is the value of the environment variable MESA_PROFILE, and is
62 * used to determine if we should benchmark the functions as well as
63 * verify their correctness.
64 */
65 extern char *mesa_profile;
66
67 /* Modify the number of tests if you like.
68 * We take the minimum of all results, because every error should be
69 * positive (time used by other processes, task switches etc).
70 * It is assumed that all calculations are done in the cache.
71 */
72
73 #if defined(__i386__)
74
75 #if 1 /* PPro, PII, PIII version */
76
77 /* Profiling on the P6 architecture requires a little more work, due to
78 * the internal out-of-order execution. We must perform a serializing
79 * 'cpuid' instruction before and after the 'rdtsc' instructions to make
80 * sure no other uops are executed when we sample the timestamp counter.
81 */
82 #define INIT_COUNTER() \
83 do { \
84 int cycle_i; \
85 counter_overhead = LONG_MAX; \
86 for ( cycle_i = 0 ; cycle_i < 8 ; cycle_i++ ) { \
87 long cycle_tmp1 = 0, cycle_tmp2 = 0; \
88 __asm__ __volatile__ ( "push %%ebx \n" \
89 "xor %%eax, %%eax \n" \
90 "cpuid \n" \
91 "rdtsc \n" \
92 "mov %%eax, %0 \n" \
93 "xor %%eax, %%eax \n" \
94 "cpuid \n" \
95 "pop %%ebx \n" \
96 "push %%ebx \n" \
97 "xor %%eax, %%eax \n" \
98 "cpuid \n" \
99 "rdtsc \n" \
100 "mov %%eax, %1 \n" \
101 "xor %%eax, %%eax \n" \
102 "cpuid \n" \
103 "pop %%ebx \n" \
104 : "=m" (cycle_tmp1), "=m" (cycle_tmp2) \
105 : : "eax", "ecx", "edx" ); \
106 if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \
107 counter_overhead = cycle_tmp2 - cycle_tmp1; \
108 } \
109 } \
110 } while (0)
111
112 #define BEGIN_RACE(x) \
113 x = LONG_MAX; \
114 for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \
115 long cycle_tmp1 = 0, cycle_tmp2 = 0; \
116 __asm__ __volatile__ ( "push %%ebx \n" \
117 "xor %%eax, %%eax \n" \
118 "cpuid \n" \
119 "rdtsc \n" \
120 "mov %%eax, %0 \n" \
121 "xor %%eax, %%eax \n" \
122 "cpuid \n" \
123 "pop %%ebx \n" \
124 : "=m" (cycle_tmp1) \
125 : : "eax", "ecx", "edx" );
126
127 #define END_RACE(x) \
128 __asm__ __volatile__ ( "push %%ebx \n" \
129 "xor %%eax, %%eax \n" \
130 "cpuid \n" \
131 "rdtsc \n" \
132 "mov %%eax, %0 \n" \
133 "xor %%eax, %%eax \n" \
134 "cpuid \n" \
135 "pop %%ebx \n" \
136 : "=m" (cycle_tmp2) \
137 : : "eax", "ecx", "edx" ); \
138 if ( x > (cycle_tmp2 - cycle_tmp1) ) { \
139 x = cycle_tmp2 - cycle_tmp1; \
140 } \
141 } \
142 x -= counter_overhead;
143
144 #else /* PPlain, PMMX version */
145
146 /* To ensure accurate results, we stall the pipelines with the
147 * non-pairable 'cdq' instruction. This ensures all the code being
148 * profiled is complete when the 'rdtsc' instruction executes.
149 */
150 #define INIT_COUNTER(x) \
151 do { \
152 int cycle_i; \
153 x = LONG_MAX; \
154 for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) { \
155 long cycle_tmp1, cycle_tmp2, dummy; \
156 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \
157 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \
158 __asm__ ( "cdq" ); \
159 __asm__ ( "cdq" ); \
160 __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); \
161 __asm__ ( "cdq" ); \
162 __asm__ ( "cdq" ); \
163 __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \
164 if ( x > (cycle_tmp2 - cycle_tmp1) ) \
165 x = cycle_tmp2 - cycle_tmp1; \
166 } \
167 } while (0)
168
169 #define BEGIN_RACE(x) \
170 x = LONG_MAX; \
171 for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \
172 long cycle_tmp1, cycle_tmp2, dummy; \
173 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \
174 __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \
175 __asm__ ( "cdq" ); \
176 __asm__ ( "cdq" ); \
177 __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) );
178
179
180 #define END_RACE(x) \
181 __asm__ ( "cdq" ); \
182 __asm__ ( "cdq" ); \
183 __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \
184 if ( x > (cycle_tmp2 - cycle_tmp1) ) \
185 x = cycle_tmp2 - cycle_tmp1; \
186 } \
187 x -= counter_overhead;
188
189 #endif
190
191 #elif defined(__x86_64__)
192
193 #define rdtscll(val) do { \
194 unsigned int a,d; \
195 __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); \
196 (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
197 } while(0)
198
199 /* Copied from i386 PIII version */
200 #define INIT_COUNTER() \
201 do { \
202 int cycle_i; \
203 counter_overhead = LONG_MAX; \
204 for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \
205 unsigned long cycle_tmp1, cycle_tmp2; \
206 rdtscll(cycle_tmp1); \
207 rdtscll(cycle_tmp2); \
208 if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \
209 counter_overhead = cycle_tmp2 - cycle_tmp1; \
210 } \
211 } \
212 } while (0)
213
214
215 #define BEGIN_RACE(x) \
216 x = LONG_MAX; \
217 for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \
218 unsigned long cycle_tmp1, cycle_tmp2; \
219 rdtscll(cycle_tmp1);
220
221 #define END_RACE(x) \
222 rdtscll(cycle_tmp2); \
223 if ( x > (cycle_tmp2 - cycle_tmp1) ) { \
224 x = cycle_tmp2 - cycle_tmp1; \
225 } \
226 } \
227 x -= counter_overhead;
228
229 #elif defined(__sparc__)
230
231 #define INIT_COUNTER() \
232 do { counter_overhead = 5; } while(0)
233
234 #define BEGIN_RACE(x) \
235 x = LONG_MAX; \
236 for (cycle_i = 0; cycle_i <10; cycle_i++) { \
237 register long cycle_tmp1 __asm__("l0"); \
238 register long cycle_tmp2 __asm__("l1"); \
239 /* rd %tick, %l0 */ \
240 __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1)); /* save timestamp */
241
242 #define END_RACE(x) \
243 /* rd %tick, %l1 */ \
244 __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2)); \
245 if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1; \
246 } \
247 x -= counter_overhead;
248
249 #else
250 #error Your processor is not supported for RUN_XFORM_BENCHMARK
251 #endif
252
253 #else
254
255 #define BEGIN_RACE(x)
256 #define END_RACE(x)
257
258 #endif
259
260
261 /* =============================================================
262 * Helper functions
263 */
264
rnd(void)265 static GLfloat rnd( void )
266 {
267 GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX;
268 GLfloat gran = (GLfloat)(1 << 13);
269
270 f = (GLfloat)(GLint)(f * gran) / gran;
271
272 return f * 2.0 - 1.0;
273 }
274
significand_match(GLfloat a,GLfloat b)275 static int significand_match( GLfloat a, GLfloat b )
276 {
277 GLfloat d = a - b;
278 int a_ex, b_ex, d_ex;
279
280 if ( d == 0.0F ) {
281 return MAX_PRECISION; /* Exact match */
282 }
283
284 if ( a == 0.0F || b == 0.0F ) {
285 /* It would probably be better to check if the
286 * non-zero number is denormalized and return
287 * the index of the highest set bit here.
288 */
289 return 0;
290 }
291
292 frexpf( a, &a_ex );
293 frexpf( b, &b_ex );
294 frexpf( d, &d_ex );
295
296 if ( a_ex < b_ex ) {
297 return a_ex - d_ex;
298 } else {
299 return b_ex - d_ex;
300 }
301 }
302
303 enum { NIL = 0, ONE = 1, NEG = -1, VAR = 2 };
304
305 /* Ensure our arrays are correctly aligned.
306 */
307 #if defined(__GNUC__)
308 # define ALIGN16(type, array) type array __attribute__ ((aligned (16)))
309 #elif defined(_MSC_VER)
310 # define ALIGN16(type, array) type array __declspec(align(16)) /* GH: Does this work? */
311 #elif defined(__xlC__)
312 # define ALIGN16(type, array) type __align (16) array
313 #else
314 # warning "ALIGN16 will not 16-byte align!\n"
315 # define ALIGN16
316 #endif
317
318
319 #endif /* DEBUG_MATH */
320
321 #endif /* __M_DEBUG_UTIL_H__ */
322