1
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <assert.h>
5
6 #define VERBOSE 0
7
8 typedef unsigned int UInt;
9 typedef unsigned char UChar;
10 typedef unsigned long long int ULong;
11 typedef signed long long int Long;
12 typedef signed int Int;
13 typedef unsigned short UShort;
14 typedef unsigned long UWord;
15 typedef char HChar;
16
17 /////////////////////////////////////////////////////////////////
18 // BEGIN crc32 stuff //
19 /////////////////////////////////////////////////////////////////
20
21 static const UInt crc32Table[256] = {
22
23 /*-- Ugly, innit? --*/
24
25 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
26 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
27 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
28 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
29 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
30 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
31 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
32 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
33 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
34 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
35 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
36 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
37 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
38 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
39 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
40 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
41 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
42 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
43 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
44 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
45 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
46 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
47 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
48 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
49 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
50 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
51 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
52 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
53 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
54 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
55 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
56 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
57 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
58 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
59 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
60 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
61 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
62 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
63 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
64 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
65 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
66 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
67 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
68 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
69 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
70 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
71 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
72 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
73 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
74 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
75 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
76 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
77 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
78 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
79 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
80 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
81 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
82 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
83 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
84 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
85 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
86 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
87 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
88 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
89 };
90
91 #define UPDATE_CRC(crcVar,cha) \
92 { \
93 crcVar = (crcVar << 8) ^ \
94 crc32Table[(crcVar >> 24) ^ \
95 ((UChar)cha)]; \
96 }
97
crcBytes(UChar * bytes,UWord nBytes,UInt crcIn)98 static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn )
99 {
100 UInt crc = crcIn;
101 while (nBytes >= 4) {
102 UPDATE_CRC(crc, bytes[0]);
103 UPDATE_CRC(crc, bytes[1]);
104 UPDATE_CRC(crc, bytes[2]);
105 UPDATE_CRC(crc, bytes[3]);
106 bytes += 4;
107 nBytes -= 4;
108 }
109 while (nBytes >= 1) {
110 UPDATE_CRC(crc, bytes[0]);
111 bytes += 1;
112 nBytes -= 1;
113 }
114 return crc;
115 }
116
crcFinalise(UInt crc)117 static UInt crcFinalise ( UInt crc ) {
118 return ~crc;
119 }
120
121 ////////
122
123 static UInt theCRC = 0xFFFFFFFF;
124
125 static HChar outBuf[1024];
126 // take output that's in outBuf, length as specified, and
127 // update the running crc.
send(int nbytes)128 static void send ( int nbytes )
129 {
130 assert( ((unsigned int)nbytes) < sizeof(outBuf)-1);
131 assert(outBuf[nbytes] == 0);
132 theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC );
133 if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf);
134 }
135
136
137 /////////////////////////////////////////////////////////////////
138 // END crc32 stuff //
139 /////////////////////////////////////////////////////////////////
140
141 #if 0
142
143 // full version
144 #define NVALS 57
145
146 static unsigned int val[NVALS]
147 = { 0x00, 0x01, 0x02, 0x03,
148 0x3F, 0x40, 0x41,
149 0x7E, 0x7F, 0x80, 0x81, 0x82,
150 0xBF, 0xC0, 0xC1,
151 0xFC, 0xFD, 0xFE, 0xFF,
152
153 0xFF00, 0xFF01, 0xFF02, 0xFF03,
154 0xFF3F, 0xFF40, 0xFF41,
155 0xFF7E, 0xFF7F, 0xFF80, 0xFF81, 0xFF82,
156 0xFFBF, 0xFFC0, 0xFFC1,
157 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF,
158
159 0xFFFFFF00, 0xFFFFFF01, 0xFFFFFF02, 0xFFFFFF03,
160 0xFFFFFF3F, 0xFFFFFF40, 0xFFFFFF41,
161 0xFFFFFF7E, 0xFFFFFF7F, 0xFFFFFF80, 0xFFFFFF81, 0xFFFFFF82,
162 0xFFFFFFBF, 0xFFFFFFC0, 0xFFFFFFC1,
163 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF
164 };
165
166 #else
167
168 // shortened version, for use as valgrind regtest
169 #define NVALS 27
170
171 static unsigned int val[NVALS]
172 = { 0x00, 0x01,
173 0x3F, 0x40,
174 0x7F, 0x80,
175 0xBF, 0xC0,
176 0xFF,
177
178 0xFF00, 0xFF01,
179 0xFF3F, 0xFF40,
180 0xFF7F, 0xFF80,
181 0xFFBF, 0xFFC0,
182 0xFFFF,
183
184 0xFFFFFF00, 0xFFFFFF01,
185 0xFFFFFF3F, 0xFFFFFF40,
186 0xFFFFFF7F, 0xFFFFFF80,
187 0xFFFFFFBF, 0xFFFFFFC0,
188 0xFFFFFFFF
189 };
190
191 #endif
192
193 /////////////////////////////////////
194
195 #define CC_C 0x0001
196 #define CC_P 0x0004
197 #define CC_A 0x0010
198 #define CC_Z 0x0040
199 #define CC_S 0x0080
200 #define CC_O 0x0800
201
202 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
203
204 #define GEN_do_locked_G_E(_name,_eax) \
205 \
206 __attribute__((noinline)) void do_locked_G_E_##_name ( void ) \
207 { \
208 volatile int e_val, g_val, e_val_before; \
209 int o, s, z, a, c, p, v1, v2, flags_in; \
210 int block[4]; \
211 \
212 for (v1 = 0; v1 < NVALS; v1++) { \
213 for (v2 = 0; v2 < NVALS; v2++) { \
214 \
215 for (o = 0; o < 2; o++) { \
216 for (s = 0; s < 2; s++) { \
217 for (z = 0; z < 2; z++) { \
218 for (a = 0; a < 2; a++) { \
219 for (c = 0; c < 2; c++) { \
220 for (p = 0; p < 2; p++) { \
221 \
222 flags_in = (o ? CC_O : 0) \
223 | (s ? CC_S : 0) \
224 | (z ? CC_Z : 0) \
225 | (a ? CC_A : 0) \
226 | (c ? CC_C : 0) \
227 | (p ? CC_P : 0); \
228 \
229 g_val = val[v1]; \
230 e_val = val[v2]; \
231 e_val_before = e_val; \
232 \
233 block[0] = flags_in; \
234 block[1] = g_val; \
235 block[2] = (int)(long)&e_val; \
236 block[3] = 0; \
237 __asm__ __volatile__( \
238 "movl 0(%0), %%eax\n\t" \
239 "pushl %%eax\n\t" \
240 "popfl\n\t" \
241 "movl 4(%0), %%eax\n\t" \
242 "movl 8(%0), %%ebx\n\t" \
243 "lock; " #_name " %%" #_eax ",(%%ebx)\n\t" \
244 "pushfl\n\t" \
245 "popl %%eax\n\t" \
246 "movl %%eax, 12(%0)\n\t" \
247 : : "r"(&block[0]) : "eax","ebx","cc","memory" \
248 ); \
249 \
250 send( \
251 sprintf(outBuf, \
252 "%s G=%08x E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \
253 #_name, g_val, e_val_before, flags_in, \
254 e_val, block[3] & CC_MASK) ); \
255 \
256 }}}}}} \
257 \
258 }} \
259 }
260
GEN_do_locked_G_E(addb,al)261 GEN_do_locked_G_E(addb,al)
262 GEN_do_locked_G_E(addw,ax)
263 GEN_do_locked_G_E(addl,eax)
264
265 GEN_do_locked_G_E(orb, al)
266 GEN_do_locked_G_E(orw, ax)
267 GEN_do_locked_G_E(orl, eax)
268
269 GEN_do_locked_G_E(adcb,al)
270 GEN_do_locked_G_E(adcw,ax)
271 GEN_do_locked_G_E(adcl,eax)
272
273 GEN_do_locked_G_E(sbbb,al)
274 GEN_do_locked_G_E(sbbw,ax)
275 GEN_do_locked_G_E(sbbl,eax)
276
277 GEN_do_locked_G_E(andb,al)
278 GEN_do_locked_G_E(andw,ax)
279 GEN_do_locked_G_E(andl,eax)
280
281 GEN_do_locked_G_E(subb,al)
282 GEN_do_locked_G_E(subw,ax)
283 GEN_do_locked_G_E(subl,eax)
284
285 GEN_do_locked_G_E(xorb,al)
286 GEN_do_locked_G_E(xorw,ax)
287 GEN_do_locked_G_E(xorl,eax)
288
289
290
291
292 #define GEN_do_locked_imm_E(_name,_eax,_imm) \
293 \
294 __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void ) \
295 { \
296 volatile int e_val, e_val_before; \
297 int o, s, z, a, c, p, v2, flags_in; \
298 int block[3]; \
299 \
300 for (v2 = 0; v2 < NVALS; v2++) { \
301 \
302 for (o = 0; o < 2; o++) { \
303 for (s = 0; s < 2; s++) { \
304 for (z = 0; z < 2; z++) { \
305 for (a = 0; a < 2; a++) { \
306 for (c = 0; c < 2; c++) { \
307 for (p = 0; p < 2; p++) { \
308 \
309 flags_in = (o ? CC_O : 0) \
310 | (s ? CC_S : 0) \
311 | (z ? CC_Z : 0) \
312 | (a ? CC_A : 0) \
313 | (c ? CC_C : 0) \
314 | (p ? CC_P : 0); \
315 \
316 e_val = val[v2]; \
317 e_val_before = e_val; \
318 \
319 block[0] = flags_in; \
320 block[1] = (int)(long)&e_val; \
321 block[2] = 0; \
322 __asm__ __volatile__( \
323 "movl 0(%0), %%eax\n\t" \
324 "pushl %%eax\n\t" \
325 "popfl\n\t" \
326 "movl 4(%0), %%ebx\n\t" \
327 "lock; " #_name " $" #_imm ",(%%ebx)\n\t" \
328 "pushfl\n\t" \
329 "popl %%eax\n\t" \
330 "movl %%eax, 8(%0)\n\t" \
331 : : "r"(&block[0]) : "eax","ebx","cc","memory" \
332 ); \
333 \
334 send( \
335 sprintf(outBuf, \
336 "%s I=%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \
337 #_name, #_imm, e_val_before, flags_in, \
338 e_val, block[2] & CC_MASK) ); \
339 \
340 }}}}}} \
341 \
342 } \
343 }
344
345 GEN_do_locked_imm_E(addb,al,0x7F)
346 GEN_do_locked_imm_E(addb,al,0xF1)
347 GEN_do_locked_imm_E(addw,ax,0x7E)
348 GEN_do_locked_imm_E(addw,ax,0x9325)
349 GEN_do_locked_imm_E(addl,eax,0x7D)
350 GEN_do_locked_imm_E(addl,eax,0x31415927)
351
352 GEN_do_locked_imm_E(orb,al,0x7F)
353 GEN_do_locked_imm_E(orb,al,0xF1)
354 GEN_do_locked_imm_E(orw,ax,0x7E)
355 GEN_do_locked_imm_E(orw,ax,0x9325)
356 GEN_do_locked_imm_E(orl,eax,0x7D)
357 GEN_do_locked_imm_E(orl,eax,0x31415927)
358
359 GEN_do_locked_imm_E(adcb,al,0x7F)
360 GEN_do_locked_imm_E(adcb,al,0xF1)
361 GEN_do_locked_imm_E(adcw,ax,0x7E)
362 GEN_do_locked_imm_E(adcw,ax,0x9325)
363 GEN_do_locked_imm_E(adcl,eax,0x7D)
364 GEN_do_locked_imm_E(adcl,eax,0x31415927)
365
366 GEN_do_locked_imm_E(sbbb,al,0x7F)
367 GEN_do_locked_imm_E(sbbb,al,0xF1)
368 GEN_do_locked_imm_E(sbbw,ax,0x7E)
369 GEN_do_locked_imm_E(sbbw,ax,0x9325)
370 GEN_do_locked_imm_E(sbbl,eax,0x7D)
371 GEN_do_locked_imm_E(sbbl,eax,0x31415927)
372
373 GEN_do_locked_imm_E(andb,al,0x7F)
374 GEN_do_locked_imm_E(andb,al,0xF1)
375 GEN_do_locked_imm_E(andw,ax,0x7E)
376 GEN_do_locked_imm_E(andw,ax,0x9325)
377 GEN_do_locked_imm_E(andl,eax,0x7D)
378 GEN_do_locked_imm_E(andl,eax,0x31415927)
379
380 GEN_do_locked_imm_E(subb,al,0x7F)
381 GEN_do_locked_imm_E(subb,al,0xF1)
382 GEN_do_locked_imm_E(subw,ax,0x7E)
383 GEN_do_locked_imm_E(subw,ax,0x9325)
384 GEN_do_locked_imm_E(subl,eax,0x7D)
385 GEN_do_locked_imm_E(subl,eax,0x31415927)
386
387 GEN_do_locked_imm_E(xorb,al,0x7F)
388 GEN_do_locked_imm_E(xorb,al,0xF1)
389 GEN_do_locked_imm_E(xorw,ax,0x7E)
390 GEN_do_locked_imm_E(xorw,ax,0x9325)
391 GEN_do_locked_imm_E(xorl,eax,0x7D)
392 GEN_do_locked_imm_E(xorl,eax,0x31415927)
393
394 #define GEN_do_locked_unary_E(_name,_eax) \
395 \
396 __attribute__((noinline)) void do_locked_unary_E_##_name ( void ) \
397 { \
398 volatile int e_val, e_val_before; \
399 int o, s, z, a, c, p, v2, flags_in; \
400 int block[3]; \
401 \
402 for (v2 = 0; v2 < NVALS; v2++) { \
403 \
404 for (o = 0; o < 2; o++) { \
405 for (s = 0; s < 2; s++) { \
406 for (z = 0; z < 2; z++) { \
407 for (a = 0; a < 2; a++) { \
408 for (c = 0; c < 2; c++) { \
409 for (p = 0; p < 2; p++) { \
410 \
411 flags_in = (o ? CC_O : 0) \
412 | (s ? CC_S : 0) \
413 | (z ? CC_Z : 0) \
414 | (a ? CC_A : 0) \
415 | (c ? CC_C : 0) \
416 | (p ? CC_P : 0); \
417 \
418 e_val = val[v2]; \
419 e_val_before = e_val; \
420 \
421 block[0] = flags_in; \
422 block[1] = (int)(long)&e_val; \
423 block[2] = 0; \
424 __asm__ __volatile__( \
425 "movl 0(%0), %%eax\n\t" \
426 "pushl %%eax\n\t" \
427 "popfl\n\t" \
428 "movl 4(%0), %%ebx\n\t" \
429 "lock; " #_name " (%%ebx)\n\t" \
430 "pushfl\n\t" \
431 "popl %%eax\n\t" \
432 "movl %%eax, 8(%0)\n\t" \
433 : : "r"(&block[0]) : "eax","ebx","cc","memory" \
434 ); \
435 \
436 send( \
437 sprintf(outBuf, \
438 "%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \
439 #_name, e_val_before, flags_in, \
440 e_val, block[2] & CC_MASK)); \
441 \
442 }}}}}} \
443 \
444 } \
445 }
446
447 GEN_do_locked_unary_E(decb,al)
448 GEN_do_locked_unary_E(decw,ax)
449 GEN_do_locked_unary_E(decl,eax)
450
451 GEN_do_locked_unary_E(incb,al)
452 GEN_do_locked_unary_E(incw,ax)
453 GEN_do_locked_unary_E(incl,eax)
454
455 GEN_do_locked_unary_E(negb,al)
456 GEN_do_locked_unary_E(negw,ax)
457 GEN_do_locked_unary_E(negl,eax)
458
459 GEN_do_locked_unary_E(notb,al)
460 GEN_do_locked_unary_E(notw,ax)
461 GEN_do_locked_unary_E(notl,eax)
462
463
464 /////////////////////////////////////////////////////////////////
465
466 unsigned int btsl_mem ( UChar* base, int bitno )
467 {
468 unsigned char res;
469 __asm__
470 __volatile__("lock; btsl\t%2, %0\n\t"
471 "setc\t%1"
472 : "=m" (*base), "=q" (res)
473 : "r" (bitno));
474 /* Pretty meaningless to dereference base here, but that's what you
475 have to do to get a btsl insn which refers to memory starting at
476 base. */
477 return res;
478 }
btsw_mem(UChar * base,int bitno)479 unsigned int btsw_mem ( UChar* base, int bitno )
480 {
481 unsigned char res;
482 __asm__
483 __volatile__("lock; btsw\t%w2, %0\n\t"
484 "setc\t%1"
485 : "=m" (*base), "=q" (res)
486 : "r" (bitno));
487 return res;
488 }
489
btrl_mem(UChar * base,int bitno)490 unsigned int btrl_mem ( UChar* base, int bitno )
491 {
492 unsigned char res;
493 __asm__
494 __volatile__("lock; btrl\t%2, %0\n\t"
495 "setc\t%1"
496 : "=m" (*base), "=q" (res)
497 : "r" (bitno));
498 return res;
499 }
btrw_mem(UChar * base,int bitno)500 unsigned int btrw_mem ( UChar* base, int bitno )
501 {
502 unsigned char res;
503 __asm__
504 __volatile__("lock; btrw\t%w2, %0\n\t"
505 "setc\t%1"
506 : "=m" (*base), "=q" (res)
507 : "r" (bitno));
508 return res;
509 }
510
btcl_mem(UChar * base,int bitno)511 unsigned int btcl_mem ( UChar* base, int bitno )
512 {
513 unsigned char res;
514 __asm__
515 __volatile__("lock; btcl\t%2, %0\n\t"
516 "setc\t%1"
517 : "=m" (*base), "=q" (res)
518 : "r" (bitno));
519 return res;
520 }
btcw_mem(UChar * base,int bitno)521 unsigned int btcw_mem ( UChar* base, int bitno )
522 {
523 unsigned char res;
524 __asm__
525 __volatile__("lock; btcw\t%w2, %0\n\t"
526 "setc\t%1"
527 : "=m" (*base), "=q" (res)
528 : "r" (bitno));
529 return res;
530 }
531
btl_mem(UChar * base,int bitno)532 unsigned int btl_mem ( UChar* base, int bitno )
533 {
534 unsigned char res;
535 __asm__
536 __volatile__("btl\t%2, %0\n\t"
537 "setc\t%1"
538 : "=m" (*base), "=q" (res)
539 : "r" (bitno)
540 : "cc", "memory");
541 return res;
542 }
btw_mem(UChar * base,int bitno)543 unsigned int btw_mem ( UChar* base, int bitno )
544 {
545 unsigned char res;
546 __asm__
547 __volatile__("btw\t%w2, %0\n\t"
548 "setc\t%1"
549 : "=m" (*base), "=q" (res)
550 : "r" (bitno));
551 return res;
552 }
553
rol1(ULong x)554 ULong rol1 ( ULong x )
555 {
556 return (x << 1) | (x >> 63);
557 }
558
do_bt_G_E_tests(void)559 void do_bt_G_E_tests ( void )
560 {
561 UInt n, bitoff, op;
562 UInt c;
563 UChar* block;
564 ULong carrydep, res;;
565
566 /*------------------------ MEM-L -----------------------*/
567
568 carrydep = 0;
569 block = calloc(200,1);
570 block += 100;
571 /* Valid bit offsets are -800 .. 799 inclusive. */
572
573 for (n = 0; n < 10000; n++) {
574 bitoff = (random() % 1600) - 800;
575 op = random() % 4;
576 c = 2;
577 switch (op) {
578 case 0: c = btsl_mem(block, bitoff); break;
579 case 1: c = btrl_mem(block, bitoff); break;
580 case 2: c = btcl_mem(block, bitoff); break;
581 case 3: c = btl_mem(block, bitoff); break;
582 }
583 c &= 255;
584 assert(c == 0 || c == 1);
585 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
586 }
587
588 /* Compute final result */
589 block -= 100;
590 res = 0;
591 for (n = 0; n < 200; n++) {
592 UChar ch = block[n];
593 /* printf("%d ", (int)block[n]); */
594 res = rol1(res) ^ (ULong)ch;
595 }
596
597 send( sprintf(outBuf,
598 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
599 res, carrydep ));
600 free(block);
601
602 /*------------------------ MEM-W -----------------------*/
603
604 carrydep = 0;
605 block = calloc(200,1);
606 block += 100;
607 /* Valid bit offsets are -800 .. 799 inclusive. */
608
609 for (n = 0; n < 10000; n++) {
610 bitoff = (random() % 1600) - 800;
611 op = random() % 4;
612 c = 2;
613 switch (op) {
614 case 0: c = btsw_mem(block, bitoff); break;
615 case 1: c = btrw_mem(block, bitoff); break;
616 case 2: c = btcw_mem(block, bitoff); break;
617 case 3: c = btw_mem(block, bitoff); break;
618 }
619 c &= 255;
620 assert(c == 0 || c == 1);
621 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
622 }
623
624 /* Compute final result */
625 block -= 100;
626 res = 0;
627 for (n = 0; n < 200; n++) {
628 UChar ch = block[n];
629 /* printf("%d ", (int)block[n]); */
630 res = rol1(res) ^ (ULong)ch;
631 }
632
633 send( sprintf(outBuf,
634 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
635 res, carrydep ));
636 free(block);
637 }
638
639
640 /////////////////////////////////////////////////////////////////
641
642 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
643 also reconstruct the original bits 0, 1, 2, 3 by looking at the
644 carry flag. Returned result has mashed bits 0-3 at the bottom and
645 the reconstructed original bits 0-3 as 4-7. */
646
mash_mem_L(UInt * origp)647 UInt mash_mem_L ( UInt* origp )
648 {
649 UInt reconstructed, mashed;
650 __asm__ __volatile__ (
651 "movl %2, %%edx\n\t"
652 ""
653 "movl $0, %%eax\n\t"
654 "\n\t"
655 "btl $0, (%%edx)\n\t"
656 "setb %%cl\n\t"
657 "movzbl %%cl, %%ecx\n\t"
658 "orl %%ecx, %%eax\n\t"
659 "\n\t"
660 "lock; btsl $1, (%%edx)\n\t"
661 "setb %%cl\n\t"
662 "movzbl %%cl, %%ecx\n\t"
663 "shll $1, %%ecx\n\t"
664 "orl %%ecx, %%eax\n\t"
665 "\n\t"
666 "lock; btrl $2, (%%edx)\n\t"
667 "setb %%cl\n\t"
668 "movzbl %%cl, %%ecx\n\t"
669 "shll $2, %%ecx\n\t"
670 "orl %%ecx, %%eax\n\t"
671 "\n\t"
672 "lock; btcl $3, (%%edx)\n\t"
673 "setb %%cl\n\t"
674 "movzbl %%cl, %%ecx\n\t"
675 "shll $3, %%ecx\n\t"
676 "orl %%ecx, %%eax\n\t"
677 "\n\t"
678 "movl %%eax, %0\n\t"
679 "movl (%%edx), %1"
680
681 : "=r" (reconstructed), "=r" (mashed)
682 : "r" (origp)
683 : "eax", "ecx", "edx", "cc");
684 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
685 }
686
mash_mem_W(UShort * origp)687 UInt mash_mem_W ( UShort* origp )
688 {
689 UInt reconstructed, mashed;
690 __asm__ __volatile__ (
691 "movl %2, %%edx\n\t"
692 ""
693 "movl $0, %%eax\n\t"
694 "\n\t"
695 "btw $0, (%%edx)\n\t"
696 "setb %%cl\n\t"
697 "movzbl %%cl, %%ecx\n\t"
698 "orl %%ecx, %%eax\n\t"
699 "\n\t"
700 "lock; btsw $1, (%%edx)\n\t"
701 "setb %%cl\n\t"
702 "movzbl %%cl, %%ecx\n\t"
703 "shll $1, %%ecx\n\t"
704 "orl %%ecx, %%eax\n\t"
705 "\n\t"
706 "lock; btrw $2, (%%edx)\n\t"
707 "setb %%cl\n\t"
708 "movzbl %%cl, %%ecx\n\t"
709 "shll $2, %%ecx\n\t"
710 "orl %%ecx, %%eax\n\t"
711 "\n\t"
712 "lock; btcw $3, (%%edx)\n\t"
713 "setb %%cl\n\t"
714 "movzbl %%cl, %%ecx\n\t"
715 "shll $3, %%ecx\n\t"
716 "orl %%ecx, %%eax\n\t"
717 "\n\t"
718 "movl %%eax, %0\n\t"
719 "movzwl (%%edx), %1"
720
721 : "=r" (reconstructed), "=r" (mashed)
722 : "r" (origp)
723 : "eax", "ecx", "edx", "cc");
724 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
725 }
726
727
do_bt_imm_E_tests(void)728 void do_bt_imm_E_tests( void )
729 {
730 int i;
731 UInt* iil = malloc(sizeof(UInt));
732 UShort* iiw = malloc(sizeof(UShort));
733 for (i = 0; i < 0x10; i++) {
734 *iil = i;
735 *iiw = i;
736 send( sprintf(outBuf, "0x%x -> 0x%02x 0x%02x\n", i,
737 mash_mem_L(iil), mash_mem_W(iiw)));
738 }
739 free(iil);
740 free(iiw);
741 }
742
743
744
745 /////////////////////////////////////////////////////////////////
746
main(void)747 int main ( void )
748 {
749 do_locked_G_E_addb();
750 do_locked_G_E_addw();
751 do_locked_G_E_addl();
752
753 do_locked_G_E_orb();
754 do_locked_G_E_orw();
755 do_locked_G_E_orl();
756
757 do_locked_G_E_adcb();
758 do_locked_G_E_adcw();
759 do_locked_G_E_adcl();
760
761 do_locked_G_E_sbbb();
762 do_locked_G_E_sbbw();
763 do_locked_G_E_sbbl();
764
765 do_locked_G_E_andb();
766 do_locked_G_E_andw();
767 do_locked_G_E_andl();
768
769 do_locked_G_E_subb();
770 do_locked_G_E_subw();
771 do_locked_G_E_subl();
772
773 do_locked_G_E_xorb();
774 do_locked_G_E_xorw();
775 do_locked_G_E_xorl();
776 //21
777 do_locked_imm_E_addb_0x7F();
778 do_locked_imm_E_addb_0xF1();
779 do_locked_imm_E_addw_0x7E();
780 do_locked_imm_E_addw_0x9325();
781 do_locked_imm_E_addl_0x7D();
782 do_locked_imm_E_addl_0x31415927();
783
784 do_locked_imm_E_orb_0x7F();
785 do_locked_imm_E_orb_0xF1();
786 do_locked_imm_E_orw_0x7E();
787 do_locked_imm_E_orw_0x9325();
788 do_locked_imm_E_orl_0x7D();
789 do_locked_imm_E_orl_0x31415927();
790
791 do_locked_imm_E_adcb_0x7F();
792 do_locked_imm_E_adcb_0xF1();
793 do_locked_imm_E_adcw_0x7E();
794 do_locked_imm_E_adcw_0x9325();
795 do_locked_imm_E_adcl_0x7D();
796 do_locked_imm_E_adcl_0x31415927();
797
798 do_locked_imm_E_sbbb_0x7F();
799 do_locked_imm_E_sbbb_0xF1();
800 do_locked_imm_E_sbbw_0x7E();
801 do_locked_imm_E_sbbw_0x9325();
802 do_locked_imm_E_sbbl_0x7D();
803 do_locked_imm_E_sbbl_0x31415927();
804
805 do_locked_imm_E_andb_0x7F();
806 do_locked_imm_E_andb_0xF1();
807 do_locked_imm_E_andw_0x7E();
808 do_locked_imm_E_andw_0x9325();
809 do_locked_imm_E_andl_0x7D();
810 do_locked_imm_E_andl_0x31415927();
811
812 do_locked_imm_E_subb_0x7F();
813 do_locked_imm_E_subb_0xF1();
814 do_locked_imm_E_subw_0x7E();
815 do_locked_imm_E_subw_0x9325();
816 do_locked_imm_E_subl_0x7D();
817 do_locked_imm_E_subl_0x31415927();
818
819 do_locked_imm_E_xorb_0x7F();
820 do_locked_imm_E_xorb_0xF1();
821 do_locked_imm_E_xorw_0x7E();
822 do_locked_imm_E_xorw_0x9325();
823 do_locked_imm_E_xorl_0x7D();
824 do_locked_imm_E_xorl_0x31415927();
825 // 63
826 do_locked_unary_E_decb();
827 do_locked_unary_E_decw();
828 do_locked_unary_E_decl();
829
830 do_locked_unary_E_incb();
831 do_locked_unary_E_incw();
832 do_locked_unary_E_incl();
833
834 do_locked_unary_E_negb();
835 do_locked_unary_E_negw();
836 do_locked_unary_E_negl();
837
838 do_locked_unary_E_notb();
839 do_locked_unary_E_notw();
840 do_locked_unary_E_notl();
841 // 75
842 do_bt_G_E_tests();
843 // 81
844 do_bt_imm_E_tests();
845 // 87
846 // So there should be 87 lock-prefixed instructions in the
847 // disassembly of this compilation unit.
848 // confirm with
849 // objdump -d ./x86locked | grep lock | grep -v do_lock | grep -v elf32 | wc
850
851 { UInt crcExpd = 0x8235DC9C;
852 theCRC = crcFinalise( theCRC );
853 if (theCRC == crcExpd) {
854 printf("x86locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
855 theCRC, crcExpd);
856 } else {
857 printf("x86locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
858 theCRC, crcExpd);
859 printf("x86locked: set #define VERBOSE 1 to diagnose\n");
860 }
861 }
862
863 return 0;
864 }
865