1
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <assert.h>
5
6 #define VERBOSE 0
7
8 typedef unsigned int UInt;
9 typedef unsigned char UChar;
10 typedef unsigned long long int ULong;
11 typedef signed long long int Long;
12 typedef signed int Int;
13 typedef unsigned short UShort;
14 typedef unsigned long UWord;
15 typedef char HChar;
16
myrandom(void)17 unsigned myrandom(void)
18 {
19 /* Simple multiply-with-carry random generator. */
20 static unsigned m_w = 11;
21 static unsigned m_z = 13;
22
23 m_z = 36969 * (m_z & 65535) + (m_z >> 16);
24 m_w = 18000 * (m_w & 65535) + (m_w >> 16);
25
26 return (m_z << 16) + m_w;
27 }
28
29 /////////////////////////////////////////////////////////////////
30 // BEGIN crc32 stuff //
31 /////////////////////////////////////////////////////////////////
32
33 static const UInt crc32Table[256] = {
34
35 /*-- Ugly, innit? --*/
36
37 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
38 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
39 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
40 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
41 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
42 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
43 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
44 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
45 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
46 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
47 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
48 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
49 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
50 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
51 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
52 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
53 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
54 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
55 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
56 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
57 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
58 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
59 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
60 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
61 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
62 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
63 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
64 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
65 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
66 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
67 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
68 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
69 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
70 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
71 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
72 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
73 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
74 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
75 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
76 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
77 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
78 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
79 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
80 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
81 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
82 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
83 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
84 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
85 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
86 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
87 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
88 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
89 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
90 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
91 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
92 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
93 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
94 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
95 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
96 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
97 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
98 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
99 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
100 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
101 };
102
103 #define UPDATE_CRC(crcVar,cha) \
104 { \
105 crcVar = (crcVar << 8) ^ \
106 crc32Table[(crcVar >> 24) ^ \
107 ((UChar)cha)]; \
108 }
109
crcBytes(UChar * bytes,UWord nBytes,UInt crcIn)110 static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn )
111 {
112 UInt crc = crcIn;
113 while (nBytes >= 4) {
114 UPDATE_CRC(crc, bytes[0]);
115 UPDATE_CRC(crc, bytes[1]);
116 UPDATE_CRC(crc, bytes[2]);
117 UPDATE_CRC(crc, bytes[3]);
118 bytes += 4;
119 nBytes -= 4;
120 }
121 while (nBytes >= 1) {
122 UPDATE_CRC(crc, bytes[0]);
123 bytes += 1;
124 nBytes -= 1;
125 }
126 return crc;
127 }
128
crcFinalise(UInt crc)129 static UInt crcFinalise ( UInt crc ) {
130 return ~crc;
131 }
132
133 ////////
134
135 static UInt theCRC = 0xFFFFFFFF;
136
137 static HChar outBuf[1024];
138 // take output that's in outBuf, length as specified, and
139 // update the running crc.
send(int nbytes)140 static void send ( int nbytes )
141 {
142 assert( ((unsigned int)nbytes) < sizeof(outBuf)-1);
143 assert(outBuf[nbytes] == 0);
144 theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC );
145 if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf);
146 }
147
148
149 /////////////////////////////////////////////////////////////////
150 // END crc32 stuff //
151 /////////////////////////////////////////////////////////////////
152
153 #if 0
154
155 // full version
156 #define NVALS 76
157
158 static ULong val[NVALS]
159 = { 0x00ULL, 0x01ULL, 0x02ULL, 0x03ULL,
160 0x3FULL, 0x40ULL, 0x41ULL,
161 0x7EULL, 0x7FULL, 0x80ULL, 0x81ULL, 0x82ULL,
162 0xBFULL, 0xC0ULL, 0xC1ULL,
163 0xFCULL, 0xFDULL, 0xFEULL, 0xFFULL,
164
165 0xFF00ULL, 0xFF01ULL, 0xFF02ULL, 0xFF03ULL,
166 0xFF3FULL, 0xFF40ULL, 0xFF41ULL,
167 0xFF7EULL, 0xFF7FULL, 0xFF80ULL, 0xFF81ULL, 0xFF82ULL,
168 0xFFBFULL, 0xFFC0ULL, 0xFFC1ULL,
169 0xFFFCULL, 0xFFFDULL, 0xFFFEULL, 0xFFFFULL,
170
171 0xFFFFFF00ULL, 0xFFFFFF01ULL, 0xFFFFFF02ULL, 0xFFFFFF03ULL,
172 0xFFFFFF3FULL, 0xFFFFFF40ULL, 0xFFFFFF41ULL,
173 0xFFFFFF7EULL, 0xFFFFFF7FULL, 0xFFFFFF80ULL, 0xFFFFFF81ULL, 0xFFFFFF82ULL,
174 0xFFFFFFBFULL, 0xFFFFFFC0ULL, 0xFFFFFFC1ULL,
175 0xFFFFFFFCULL, 0xFFFFFFFDULL, 0xFFFFFFFEULL, 0xFFFFFFFFULL,
176
177 0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL, 0xFFFFFFFFFFFFFF02ULL,
178 0xFFFFFFFFFFFFFF03ULL,
179 0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL, 0xFFFFFFFFFFFFFF41ULL,
180 0xFFFFFFFFFFFFFF7EULL, 0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL,
181 0xFFFFFFFFFFFFFF81ULL, 0xFFFFFFFFFFFFFF82ULL,
182 0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL, 0xFFFFFFFFFFFFFFC1ULL,
183 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFEULL,
184 0xFFFFFFFFFFFFFFFFULL
185 };
186
187 #else
188
189 // shortened version, for use as valgrind regtest
190 #define NVALS 36
191
192 static ULong val[NVALS]
193 = { 0x00ULL, 0x01ULL,
194 0x3FULL, 0x40ULL,
195 0x7FULL, 0x80ULL,
196 0xBFULL, 0xC0ULL,
197 0xFFULL,
198
199 0xFF00ULL, 0xFF01ULL,
200 0xFF3FULL, 0xFF40ULL,
201 0xFF7FULL, 0xFF80ULL,
202 0xFFBFULL, 0xFFC0ULL,
203 0xFFFFULL,
204
205 0xFFFFFF00ULL, 0xFFFFFF01ULL,
206 0xFFFFFF3FULL, 0xFFFFFF40ULL,
207 0xFFFFFF7EULL, 0xFFFFFF7FULL,
208 0xFFFFFFBFULL, 0xFFFFFFC0ULL,
209 0xFFFFFFFFULL,
210
211 0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL,
212 0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL,
213 0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL,
214 0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL,
215 0xFFFFFFFFFFFFFFFFULL
216 };
217
218 #endif
219
220 /////////////////////////////////////
221
222 #define CC_C 0x0001
223 #define CC_P 0x0004
224 #define CC_A 0x0010
225 #define CC_Z 0x0040
226 #define CC_S 0x0080
227 #define CC_O 0x0800
228
229 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
230
231 #define GEN_do_locked_G_E(_name,_eax) \
232 \
233 __attribute__((noinline)) void do_locked_G_E_##_name ( void ) \
234 { \
235 volatile Long e_val, g_val, e_val_before; \
236 Long o, s, z, a, c, p, v1, v2, flags_in; \
237 Long block[4]; \
238 \
239 for (v1 = 0; v1 < NVALS; v1++) { \
240 for (v2 = 0; v2 < NVALS; v2++) { \
241 \
242 for (o = 0; o < 2; o++) { \
243 for (s = 0; s < 2; s++) { \
244 for (z = 0; z < 2; z++) { \
245 for (a = 0; a < 2; a++) { \
246 for (c = 0; c < 2; c++) { \
247 for (p = 0; p < 2; p++) { \
248 \
249 flags_in = (o ? CC_O : 0) \
250 | (s ? CC_S : 0) \
251 | (z ? CC_Z : 0) \
252 | (a ? CC_A : 0) \
253 | (c ? CC_C : 0) \
254 | (p ? CC_P : 0); \
255 \
256 g_val = val[v1]; \
257 e_val = val[v2]; \
258 e_val_before = e_val; \
259 \
260 block[0] = flags_in; \
261 block[1] = g_val; \
262 block[2] = (long)&e_val; \
263 block[3] = 0; \
264 __asm__ __volatile__( \
265 "movq 0(%0), %%rax\n\t" \
266 "pushq %%rax\n\t" \
267 "popfq\n\t" \
268 "movq 8(%0), %%rax\n\t" \
269 "movq 16(%0), %%rbx\n\t" \
270 "lock; " #_name " %%" #_eax ",(%%rbx)\n\t" \
271 "pushfq\n\t" \
272 "popq %%rax\n\t" \
273 "movq %%rax, 24(%0)\n\t" \
274 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
275 ); \
276 \
277 send( \
278 sprintf(outBuf, \
279 "%s G=%016llx E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
280 #_name, g_val, e_val_before, flags_in, \
281 e_val, block[3] & CC_MASK)); \
282 \
283 }}}}}} \
284 \
285 }} \
286 }
287
GEN_do_locked_G_E(addb,al)288 GEN_do_locked_G_E(addb,al)
289 GEN_do_locked_G_E(addw,ax)
290 GEN_do_locked_G_E(addl,eax)
291 GEN_do_locked_G_E(addq,rax)
292
293 GEN_do_locked_G_E(orb, al)
294 GEN_do_locked_G_E(orw, ax)
295 GEN_do_locked_G_E(orl, eax)
296 GEN_do_locked_G_E(orq, rax)
297
298 GEN_do_locked_G_E(adcb,al)
299 GEN_do_locked_G_E(adcw,ax)
300 GEN_do_locked_G_E(adcl,eax)
301 GEN_do_locked_G_E(adcq,rax)
302
303 GEN_do_locked_G_E(sbbb,al)
304 GEN_do_locked_G_E(sbbw,ax)
305 GEN_do_locked_G_E(sbbl,eax)
306 GEN_do_locked_G_E(sbbq,rax)
307
308 GEN_do_locked_G_E(andb,al)
309 GEN_do_locked_G_E(andw,ax)
310 GEN_do_locked_G_E(andl,eax)
311 GEN_do_locked_G_E(andq,rax)
312
313 GEN_do_locked_G_E(subb,al)
314 GEN_do_locked_G_E(subw,ax)
315 GEN_do_locked_G_E(subl,eax)
316 GEN_do_locked_G_E(subq,rax)
317
318 GEN_do_locked_G_E(xorb,al)
319 GEN_do_locked_G_E(xorw,ax)
320 GEN_do_locked_G_E(xorl,eax)
321 GEN_do_locked_G_E(xorq,rax)
322
323
324
325
326 #define GEN_do_locked_imm_E(_name,_eax,_imm) \
327 \
328 __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void ) \
329 { \
330 volatile Long e_val, e_val_before; \
331 Long o, s, z, a, c, p, v2, flags_in; \
332 Long block[3]; \
333 \
334 for (v2 = 0; v2 < NVALS; v2++) { \
335 \
336 for (o = 0; o < 2; o++) { \
337 for (s = 0; s < 2; s++) { \
338 for (z = 0; z < 2; z++) { \
339 for (a = 0; a < 2; a++) { \
340 for (c = 0; c < 2; c++) { \
341 for (p = 0; p < 2; p++) { \
342 \
343 flags_in = (o ? CC_O : 0) \
344 | (s ? CC_S : 0) \
345 | (z ? CC_Z : 0) \
346 | (a ? CC_A : 0) \
347 | (c ? CC_C : 0) \
348 | (p ? CC_P : 0); \
349 \
350 e_val = val[v2]; \
351 e_val_before = e_val; \
352 \
353 block[0] = flags_in; \
354 block[1] = (long)&e_val; \
355 block[2] = 0; \
356 __asm__ __volatile__( \
357 "movq 0(%0), %%rax\n\t" \
358 "pushq %%rax\n\t" \
359 "popfq\n\t" \
360 "movq 8(%0), %%rbx\n\t" \
361 "lock; " #_name " $" #_imm ",(%%rbx)\n\t" \
362 "pushfq\n\t" \
363 "popq %%rax\n\t" \
364 "movq %%rax, 16(%0)\n\t" \
365 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
366 ); \
367 \
368 send( \
369 sprintf(outBuf, \
370 "%s I=%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
371 #_name, #_imm, e_val_before, flags_in, \
372 e_val, block[2] & CC_MASK)); \
373 \
374 }}}}}} \
375 \
376 } \
377 }
378
379 GEN_do_locked_imm_E(addb,al,0x7F)
380 GEN_do_locked_imm_E(addb,al,0xF1)
381 GEN_do_locked_imm_E(addw,ax,0x7E)
382 GEN_do_locked_imm_E(addw,ax,0x9325)
383 GEN_do_locked_imm_E(addl,eax,0x7D)
384 GEN_do_locked_imm_E(addl,eax,0x31415927)
385 GEN_do_locked_imm_E(addq,rax,0x7D)
386 GEN_do_locked_imm_E(addq,rax,0x31415927)
387
388 GEN_do_locked_imm_E(orb,al,0x7F)
389 GEN_do_locked_imm_E(orb,al,0xF1)
390 GEN_do_locked_imm_E(orw,ax,0x7E)
391 GEN_do_locked_imm_E(orw,ax,0x9325)
392 GEN_do_locked_imm_E(orl,eax,0x7D)
393 GEN_do_locked_imm_E(orl,eax,0x31415927)
394 GEN_do_locked_imm_E(orq,rax,0x7D)
395 GEN_do_locked_imm_E(orq,rax,0x31415927)
396
397 GEN_do_locked_imm_E(adcb,al,0x7F)
398 GEN_do_locked_imm_E(adcb,al,0xF1)
399 GEN_do_locked_imm_E(adcw,ax,0x7E)
400 GEN_do_locked_imm_E(adcw,ax,0x9325)
401 GEN_do_locked_imm_E(adcl,eax,0x7D)
402 GEN_do_locked_imm_E(adcl,eax,0x31415927)
403 GEN_do_locked_imm_E(adcq,rax,0x7D)
404 GEN_do_locked_imm_E(adcq,rax,0x31415927)
405
406 GEN_do_locked_imm_E(sbbb,al,0x7F)
407 GEN_do_locked_imm_E(sbbb,al,0xF1)
408 GEN_do_locked_imm_E(sbbw,ax,0x7E)
409 GEN_do_locked_imm_E(sbbw,ax,0x9325)
410 GEN_do_locked_imm_E(sbbl,eax,0x7D)
411 GEN_do_locked_imm_E(sbbl,eax,0x31415927)
412 GEN_do_locked_imm_E(sbbq,rax,0x7D)
413 GEN_do_locked_imm_E(sbbq,rax,0x31415927)
414
415 GEN_do_locked_imm_E(andb,al,0x7F)
416 GEN_do_locked_imm_E(andb,al,0xF1)
417 GEN_do_locked_imm_E(andw,ax,0x7E)
418 GEN_do_locked_imm_E(andw,ax,0x9325)
419 GEN_do_locked_imm_E(andl,eax,0x7D)
420 GEN_do_locked_imm_E(andl,eax,0x31415927)
421 GEN_do_locked_imm_E(andq,rax,0x7D)
422 GEN_do_locked_imm_E(andq,rax,0x31415927)
423
424 GEN_do_locked_imm_E(subb,al,0x7F)
425 GEN_do_locked_imm_E(subb,al,0xF1)
426 GEN_do_locked_imm_E(subw,ax,0x7E)
427 GEN_do_locked_imm_E(subw,ax,0x9325)
428 GEN_do_locked_imm_E(subl,eax,0x7D)
429 GEN_do_locked_imm_E(subl,eax,0x31415927)
430 GEN_do_locked_imm_E(subq,rax,0x7D)
431 GEN_do_locked_imm_E(subq,rax,0x31415927)
432
433 GEN_do_locked_imm_E(xorb,al,0x7F)
434 GEN_do_locked_imm_E(xorb,al,0xF1)
435 GEN_do_locked_imm_E(xorw,ax,0x7E)
436 GEN_do_locked_imm_E(xorw,ax,0x9325)
437 GEN_do_locked_imm_E(xorl,eax,0x7D)
438 GEN_do_locked_imm_E(xorl,eax,0x31415927)
439 GEN_do_locked_imm_E(xorq,rax,0x7D)
440 GEN_do_locked_imm_E(xorq,rax,0x31415927)
441
442 #define GEN_do_locked_unary_E(_name,_eax) \
443 \
444 __attribute__((noinline)) void do_locked_unary_E_##_name ( void ) \
445 { \
446 volatile Long e_val, e_val_before; \
447 Long o, s, z, a, c, p, v2, flags_in; \
448 Long block[3]; \
449 \
450 for (v2 = 0; v2 < NVALS; v2++) { \
451 \
452 for (o = 0; o < 2; o++) { \
453 for (s = 0; s < 2; s++) { \
454 for (z = 0; z < 2; z++) { \
455 for (a = 0; a < 2; a++) { \
456 for (c = 0; c < 2; c++) { \
457 for (p = 0; p < 2; p++) { \
458 \
459 flags_in = (o ? CC_O : 0) \
460 | (s ? CC_S : 0) \
461 | (z ? CC_Z : 0) \
462 | (a ? CC_A : 0) \
463 | (c ? CC_C : 0) \
464 | (p ? CC_P : 0); \
465 \
466 e_val = val[v2]; \
467 e_val_before = e_val; \
468 \
469 block[0] = flags_in; \
470 block[1] = (long)&e_val; \
471 block[2] = 0; \
472 __asm__ __volatile__( \
473 "movq 0(%0), %%rax\n\t" \
474 "pushq %%rax\n\t" \
475 "popfq\n\t" \
476 "movq 8(%0), %%rbx\n\t" \
477 "lock; " #_name " (%%rbx)\n\t" \
478 "pushfq\n\t" \
479 "popq %%rax\n\t" \
480 "movq %%rax, 16(%0)\n\t" \
481 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
482 ); \
483 \
484 send( \
485 sprintf(outBuf, \
486 "%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
487 #_name, e_val_before, flags_in, \
488 e_val, block[2] & CC_MASK)); \
489 \
490 }}}}}} \
491 \
492 } \
493 }
494
495 GEN_do_locked_unary_E(decb,al)
496 GEN_do_locked_unary_E(decw,ax)
497 GEN_do_locked_unary_E(decl,eax)
498 GEN_do_locked_unary_E(decq,rax)
499
500 GEN_do_locked_unary_E(incb,al)
501 GEN_do_locked_unary_E(incw,ax)
502 GEN_do_locked_unary_E(incl,eax)
503 GEN_do_locked_unary_E(incq,rax)
504
505 GEN_do_locked_unary_E(negb,al)
506 GEN_do_locked_unary_E(negw,ax)
507 GEN_do_locked_unary_E(negl,eax)
508 GEN_do_locked_unary_E(negq,rax)
509
510 GEN_do_locked_unary_E(notb,al)
511 GEN_do_locked_unary_E(notw,ax)
512 GEN_do_locked_unary_E(notl,eax)
513 GEN_do_locked_unary_E(notq,rax)
514
515
516 /////////////////////////////////////////////////////////////////
517
518 ULong btsq_mem ( UChar* base, int bitno )
519 {
520 ULong res;
521 __asm__
522 __volatile__("lock; btsq\t%2, %0\n\t"
523 "setc %%dl\n\t"
524 "movzbq %%dl,%1\n"
525 : "=m" (*base), "=r" (res)
526 : "r" ((ULong)bitno) : "rdx","cc","memory" );
527 /* Pretty meaningless to dereference base here, but that's what you
528 have to do to get a btsl insn which refers to memory starting at
529 base. */
530 return res;
531 }
btsl_mem(UChar * base,int bitno)532 ULong btsl_mem ( UChar* base, int bitno )
533 {
534 ULong res;
535 __asm__
536 __volatile__("lock; btsl\t%2, %0\n\t"
537 "setc %%dl\n\t"
538 "movzbq %%dl,%1\n"
539 : "=m" (*base), "=r" (res)
540 : "r" ((UInt)bitno));
541 return res;
542 }
btsw_mem(UChar * base,int bitno)543 ULong btsw_mem ( UChar* base, int bitno )
544 {
545 ULong res;
546 __asm__
547 __volatile__("lock; btsw\t%w2, %0\n\t"
548 "setc %%dl\n\t"
549 "movzbq %%dl,%1\n"
550 : "=m" (*base), "=r" (res)
551 : "r" ((ULong)bitno));
552 return res;
553 }
554
btrq_mem(UChar * base,int bitno)555 ULong btrq_mem ( UChar* base, int bitno )
556 {
557 ULong res;
558 __asm__
559 __volatile__("lock; btrq\t%2, %0\n\t"
560 "setc %%dl\n\t"
561 "movzbq %%dl,%1\n"
562 : "=m" (*base), "=r" (res)
563 : "r" ((ULong)bitno));
564 return res;
565 }
btrl_mem(UChar * base,int bitno)566 ULong btrl_mem ( UChar* base, int bitno )
567 {
568 ULong res;
569 __asm__
570 __volatile__("lock; btrl\t%2, %0\n\t"
571 "setc %%dl\n\t"
572 "movzbq %%dl,%1\n"
573 : "=m" (*base), "=r" (res)
574 : "r" ((UInt)bitno));
575 return res;
576 }
btrw_mem(UChar * base,int bitno)577 ULong btrw_mem ( UChar* base, int bitno )
578 {
579 ULong res;
580 __asm__
581 __volatile__("lock; btrw\t%w2, %0\n\t"
582 "setc %%dl\n\t"
583 "movzbq %%dl,%1\n"
584 : "=m" (*base), "=r" (res)
585 : "r" ((ULong)bitno));
586 return res;
587 }
588
btcq_mem(UChar * base,int bitno)589 ULong btcq_mem ( UChar* base, int bitno )
590 {
591 ULong res;
592 __asm__
593 __volatile__("lock; btcq\t%2, %0\n\t"
594 "setc %%dl\n\t"
595 "movzbq %%dl,%1\n"
596 : "=m" (*base), "=r" (res)
597 : "r" ((ULong)bitno));
598 return res;
599 }
btcl_mem(UChar * base,int bitno)600 ULong btcl_mem ( UChar* base, int bitno )
601 {
602 ULong res;
603 __asm__
604 __volatile__("lock; btcl\t%2, %0\n\t"
605 "setc %%dl\n\t"
606 "movzbq %%dl,%1\n"
607 : "=m" (*base), "=r" (res)
608 : "r" ((UInt)bitno));
609 return res;
610 }
btcw_mem(UChar * base,int bitno)611 ULong btcw_mem ( UChar* base, int bitno )
612 {
613 ULong res;
614 __asm__
615 __volatile__("lock; btcw\t%w2, %0\n\t"
616 "setc %%dl\n\t"
617 "movzbq %%dl,%1\n"
618 : "=m" (*base), "=r" (res)
619 : "r" ((ULong)bitno));
620 return res;
621 }
622
btq_mem(UChar * base,int bitno)623 ULong btq_mem ( UChar* base, int bitno )
624 {
625 ULong res;
626 __asm__
627 __volatile__("btq\t%2, %0\n\t"
628 "setc %%dl\n\t"
629 "movzbq %%dl,%1\n"
630 : "=m" (*base), "=r" (res)
631 : "r" ((ULong)bitno)
632 : "cc", "memory");
633 return res;
634 }
btl_mem(UChar * base,int bitno)635 ULong btl_mem ( UChar* base, int bitno )
636 {
637 ULong res;
638 __asm__
639 __volatile__("btl\t%2, %0\n\t"
640 "setc %%dl\n\t"
641 "movzbq %%dl,%1\n"
642 : "=m" (*base), "=r" (res)
643 : "r" ((UInt)bitno)
644 : "cc", "memory");
645 return res;
646 }
btw_mem(UChar * base,int bitno)647 ULong btw_mem ( UChar* base, int bitno )
648 {
649 ULong res;
650 __asm__
651 __volatile__("btw\t%w2, %0\n\t"
652 "setc %%dl\n\t"
653 "movzbq %%dl,%1\n"
654 : "=m" (*base), "=r" (res)
655 : "r" ((ULong)bitno));
656 return res;
657 }
658
rol1(ULong x)659 ULong rol1 ( ULong x )
660 {
661 return (x << 1) | (x >> 63);
662 }
663
do_bt_G_E_tests(void)664 void do_bt_G_E_tests ( void )
665 {
666 ULong n, bitoff, op;
667 ULong c;
668 UChar* block;
669 ULong carrydep, res;;
670
671 /*------------------------ MEM-Q -----------------------*/
672
673 carrydep = 0;
674 block = calloc(200,1);
675 block += 100;
676 /* Valid bit offsets are -800 .. 799 inclusive. */
677
678 for (n = 0; n < 10000; n++) {
679 bitoff = (myrandom() % 1600) - 800;
680 op = myrandom() % 4;
681 c = 2;
682 switch (op) {
683 case 0: c = btsq_mem(block, bitoff); break;
684 case 1: c = btrq_mem(block, bitoff); break;
685 case 2: c = btcq_mem(block, bitoff); break;
686 case 3: c = btq_mem(block, bitoff); break;
687 }
688 c &= 255;
689 assert(c == 0 || c == 1);
690 carrydep = c ? (rol1(carrydep) ^ (Long)bitoff) : carrydep;
691 }
692
693 /* Compute final result */
694 block -= 100;
695 res = 0;
696 for (n = 0; n < 200; n++) {
697 UChar ch = block[n];
698 /* printf("%d ", (int)block[n]); */
699 res = rol1(res) ^ (ULong)ch;
700 }
701
702 send( sprintf(outBuf,
703 "bt{s,r,c}q: final res 0x%llx, carrydep 0x%llx\n",
704 res, carrydep));
705 free(block);
706
707 /*------------------------ MEM-L -----------------------*/
708
709 carrydep = 0;
710 block = calloc(200,1);
711 block += 100;
712 /* Valid bit offsets are -800 .. 799 inclusive. */
713
714 for (n = 0; n < 10000; n++) {
715 bitoff = (myrandom() % 1600) - 800;
716 op = myrandom() % 4;
717 c = 2;
718 switch (op) {
719 case 0: c = btsl_mem(block, bitoff); break;
720 case 1: c = btrl_mem(block, bitoff); break;
721 case 2: c = btcl_mem(block, bitoff); break;
722 case 3: c = btl_mem(block, bitoff); break;
723 }
724 c &= 255;
725 assert(c == 0 || c == 1);
726 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
727 }
728
729 /* Compute final result */
730 block -= 100;
731 res = 0;
732 for (n = 0; n < 200; n++) {
733 UChar ch = block[n];
734 /* printf("%d ", (int)block[n]); */
735 res = rol1(res) ^ (ULong)ch;
736 }
737
738 send( sprintf(outBuf,
739 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
740 res, carrydep));
741 free(block);
742
743 /*------------------------ MEM-W -----------------------*/
744
745 carrydep = 0;
746 block = calloc(200,1);
747 block += 100;
748 /* Valid bit offsets are -800 .. 799 inclusive. */
749
750 for (n = 0; n < 10000; n++) {
751 bitoff = (myrandom() % 1600) - 800;
752 op = myrandom() % 4;
753 c = 2;
754 switch (op) {
755 case 0: c = btsw_mem(block, bitoff); break;
756 case 1: c = btrw_mem(block, bitoff); break;
757 case 2: c = btcw_mem(block, bitoff); break;
758 case 3: c = btw_mem(block, bitoff); break;
759 }
760 c &= 255;
761 assert(c == 0 || c == 1);
762 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
763 }
764
765 /* Compute final result */
766 block -= 100;
767 res = 0;
768 for (n = 0; n < 200; n++) {
769 UChar ch = block[n];
770 /* printf("%d ", (int)block[n]); */
771 res = rol1(res) ^ (ULong)ch;
772 }
773
774 send(sprintf(outBuf,
775 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
776 res, carrydep));
777 free(block);
778 }
779
780
781 /////////////////////////////////////////////////////////////////
782
783 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
784 also reconstruct the original bits 0, 1, 2, 3 by looking at the
785 carry flag. Returned result has mashed bits 0-3 at the bottom and
786 the reconstructed original bits 0-3 as 4-7. */
787
mash_mem_Q(ULong * origp)788 ULong mash_mem_Q ( ULong* origp )
789 {
790 ULong reconstructed, mashed;
791 __asm__ __volatile__ (
792 "movq %2, %%rdx\n\t"
793 ""
794 "movq $0, %%rax\n\t"
795 "\n\t"
796 "btq $0, (%%rdx)\n\t"
797 "setb %%cl\n\t"
798 "movzbq %%cl, %%rcx\n\t"
799 "orq %%rcx, %%rax\n\t"
800 "\n\t"
801 "lock; btsq $1, (%%rdx)\n\t"
802 "setb %%cl\n\t"
803 "movzbq %%cl, %%rcx\n\t"
804 "shlq $1, %%rcx\n\t"
805 "orq %%rcx, %%rax\n\t"
806 "\n\t"
807 "lock; btrq $2, (%%rdx)\n\t"
808 "setb %%cl\n\t"
809 "movzbq %%cl, %%rcx\n\t"
810 "shlq $2, %%rcx\n\t"
811 "orq %%rcx, %%rax\n\t"
812 "\n\t"
813 "lock; btcq $3, (%%rdx)\n\t"
814 "setb %%cl\n\t"
815 "movzbq %%cl, %%rcx\n\t"
816 "shlq $3, %%rcx\n\t"
817 "orq %%rcx, %%rax\n\t"
818 "\n\t"
819 "movq %%rax, %0\n\t"
820 "movq (%%rdx), %1"
821 : "=r" (reconstructed), "=r" (mashed)
822 : "r" (origp)
823 : "rax", "rcx", "rdx", "cc");
824 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
825 }
826
mash_mem_L(UInt * origp)827 ULong mash_mem_L ( UInt* origp )
828 {
829 ULong reconstructed; UInt mashed;
830 __asm__ __volatile__ (
831 "movq %2, %%rdx\n\t"
832 ""
833 "movq $0, %%rax\n\t"
834 "\n\t"
835 "btl $0, (%%rdx)\n\t"
836 "setb %%cl\n\t"
837 "movzbq %%cl, %%rcx\n\t"
838 "orq %%rcx, %%rax\n\t"
839 "\n\t"
840 "lock; btsl $1, (%%rdx)\n\t"
841 "setb %%cl\n\t"
842 "movzbq %%cl, %%rcx\n\t"
843 "shlq $1, %%rcx\n\t"
844 "orq %%rcx, %%rax\n\t"
845 "\n\t"
846 "lock; btrl $2, (%%rdx)\n\t"
847 "setb %%cl\n\t"
848 "movzbq %%cl, %%rcx\n\t"
849 "shlq $2, %%rcx\n\t"
850 "orq %%rcx, %%rax\n\t"
851 "\n\t"
852 "lock; btcl $3, (%%rdx)\n\t"
853 "setb %%cl\n\t"
854 "movzbq %%cl, %%rcx\n\t"
855 "shlq $3, %%rcx\n\t"
856 "orq %%rcx, %%rax\n\t"
857 "\n\t"
858 "movq %%rax, %0\n\t"
859 "movl (%%rdx), %1"
860 : "=r" (reconstructed), "=r" (mashed)
861 : "r" (origp)
862 : "rax", "rcx", "rdx", "cc");
863 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
864 }
865
mash_mem_W(UShort * origp)866 ULong mash_mem_W ( UShort* origp )
867 {
868 ULong reconstructed, mashed;
869 __asm__ __volatile__ (
870 "movq %2, %%rdx\n\t"
871 ""
872 "movq $0, %%rax\n\t"
873 "\n\t"
874 "btw $0, (%%rdx)\n\t"
875 "setb %%cl\n\t"
876 "movzbq %%cl, %%rcx\n\t"
877 "orq %%rcx, %%rax\n\t"
878 "\n\t"
879 "lock; btsw $1, (%%rdx)\n\t"
880 "setb %%cl\n\t"
881 "movzbq %%cl, %%rcx\n\t"
882 "shlq $1, %%rcx\n\t"
883 "orq %%rcx, %%rax\n\t"
884 "\n\t"
885 "lock; btrw $2, (%%rdx)\n\t"
886 "setb %%cl\n\t"
887 "movzbq %%cl, %%rcx\n\t"
888 "shlq $2, %%rcx\n\t"
889 "orq %%rcx, %%rax\n\t"
890 "\n\t"
891 "lock; btcw $3, (%%rdx)\n\t"
892 "setb %%cl\n\t"
893 "movzbq %%cl, %%rcx\n\t"
894 "shlq $3, %%rcx\n\t"
895 "orq %%rcx, %%rax\n\t"
896 "\n\t"
897 "movq %%rax, %0\n\t"
898 "movzwq (%%rdx), %1"
899 : "=r" (reconstructed), "=r" (mashed)
900 : "r" (origp)
901 : "rax", "rcx", "rdx", "cc");
902 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
903 }
904
905
do_bt_imm_E_tests(void)906 void do_bt_imm_E_tests( void )
907 {
908 ULong i;
909 ULong* iiq = malloc(sizeof(ULong));
910 UInt* iil = malloc(sizeof(UInt));
911 UShort* iiw = malloc(sizeof(UShort));
912 for (i = 0; i < 0x10; i++) {
913 *iiq = i;
914 *iil = i;
915 *iiw = i;
916 send(sprintf(outBuf,"0x%llx -> 0x%02llx 0x%02llx 0x%02llx\n", i,
917 mash_mem_Q(iiq), mash_mem_L(iil), mash_mem_W(iiw)));
918 }
919 free(iiq);
920 free(iil);
921 free(iiw);
922 }
923
924
925 /////////////////////////////////////////////////////////////////
926
main(void)927 int main ( void )
928 {
929 do_locked_G_E_addb();
930 do_locked_G_E_addw();
931 do_locked_G_E_addl();
932 do_locked_G_E_addq();
933
934 do_locked_G_E_orb();
935 do_locked_G_E_orw();
936 do_locked_G_E_orl();
937 do_locked_G_E_orq();
938
939 do_locked_G_E_adcb();
940 do_locked_G_E_adcw();
941 do_locked_G_E_adcl();
942 do_locked_G_E_adcq();
943
944 do_locked_G_E_sbbb();
945 do_locked_G_E_sbbw();
946 do_locked_G_E_sbbl();
947 do_locked_G_E_sbbq();
948
949 do_locked_G_E_andb();
950 do_locked_G_E_andw();
951 do_locked_G_E_andl();
952 do_locked_G_E_andq();
953
954 do_locked_G_E_subb();
955 do_locked_G_E_subw();
956 do_locked_G_E_subl();
957 do_locked_G_E_subq();
958
959 do_locked_G_E_xorb();
960 do_locked_G_E_xorw();
961 do_locked_G_E_xorl();
962 do_locked_G_E_xorq();
963 // 4 * 7
964
965 do_locked_imm_E_addb_0x7F();
966 do_locked_imm_E_addb_0xF1();
967 do_locked_imm_E_addw_0x7E();
968 do_locked_imm_E_addw_0x9325();
969 do_locked_imm_E_addl_0x7D();
970 do_locked_imm_E_addl_0x31415927();
971 do_locked_imm_E_addq_0x7D();
972 do_locked_imm_E_addq_0x31415927();
973
974 do_locked_imm_E_orb_0x7F();
975 do_locked_imm_E_orb_0xF1();
976 do_locked_imm_E_orw_0x7E();
977 do_locked_imm_E_orw_0x9325();
978 do_locked_imm_E_orl_0x7D();
979 do_locked_imm_E_orl_0x31415927();
980 do_locked_imm_E_orq_0x7D();
981 do_locked_imm_E_orq_0x31415927();
982
983 do_locked_imm_E_adcb_0x7F();
984 do_locked_imm_E_adcb_0xF1();
985 do_locked_imm_E_adcw_0x7E();
986 do_locked_imm_E_adcw_0x9325();
987 do_locked_imm_E_adcl_0x7D();
988 do_locked_imm_E_adcl_0x31415927();
989 do_locked_imm_E_adcq_0x7D();
990 do_locked_imm_E_adcq_0x31415927();
991
992 do_locked_imm_E_sbbb_0x7F();
993 do_locked_imm_E_sbbb_0xF1();
994 do_locked_imm_E_sbbw_0x7E();
995 do_locked_imm_E_sbbw_0x9325();
996 do_locked_imm_E_sbbl_0x7D();
997 do_locked_imm_E_sbbl_0x31415927();
998 do_locked_imm_E_sbbq_0x7D();
999 do_locked_imm_E_sbbq_0x31415927();
1000
1001 do_locked_imm_E_andb_0x7F();
1002 do_locked_imm_E_andb_0xF1();
1003 do_locked_imm_E_andw_0x7E();
1004 do_locked_imm_E_andw_0x9325();
1005 do_locked_imm_E_andl_0x7D();
1006 do_locked_imm_E_andl_0x31415927();
1007 do_locked_imm_E_andq_0x7D();
1008 do_locked_imm_E_andq_0x31415927();
1009
1010 do_locked_imm_E_subb_0x7F();
1011 do_locked_imm_E_subb_0xF1();
1012 do_locked_imm_E_subw_0x7E();
1013 do_locked_imm_E_subw_0x9325();
1014 do_locked_imm_E_subl_0x7D();
1015 do_locked_imm_E_subl_0x31415927();
1016 do_locked_imm_E_subq_0x7D();
1017 do_locked_imm_E_subq_0x31415927();
1018
1019 do_locked_imm_E_xorb_0x7F();
1020 do_locked_imm_E_xorb_0xF1();
1021 do_locked_imm_E_xorw_0x7E();
1022 do_locked_imm_E_xorw_0x9325();
1023 do_locked_imm_E_xorl_0x7D();
1024 do_locked_imm_E_xorl_0x31415927();
1025 do_locked_imm_E_xorq_0x7D();
1026 do_locked_imm_E_xorq_0x31415927();
1027 // 4 * 7 + 8 * 7 == 84
1028
1029 do_locked_unary_E_decb();
1030 do_locked_unary_E_decw();
1031 do_locked_unary_E_decl();
1032 do_locked_unary_E_decq();
1033
1034 do_locked_unary_E_incb();
1035 do_locked_unary_E_incw();
1036 do_locked_unary_E_incl();
1037 do_locked_unary_E_incq();
1038
1039 do_locked_unary_E_negb();
1040 do_locked_unary_E_negw();
1041 do_locked_unary_E_negl();
1042 do_locked_unary_E_negq();
1043
1044 do_locked_unary_E_notb();
1045 do_locked_unary_E_notw();
1046 do_locked_unary_E_notl();
1047 do_locked_unary_E_notq();
1048 // 100
1049
1050 do_bt_G_E_tests();
1051 // 109
1052 do_bt_imm_E_tests();
1053 // 118
1054
1055 // So there should be 118 lock-prefixed instructions in the
1056 // disassembly of this compilation unit.
1057 // confirm with
1058 // objdump -d ./amd64locked | grep lock | grep -v do_lock | grep -v elf64 | wc
1059
1060
1061 { UInt crcExpd = 0xDF0656F1;
1062 theCRC = crcFinalise( theCRC );
1063 if (theCRC == crcExpd) {
1064 printf("amd64locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
1065 theCRC, crcExpd);
1066 } else {
1067 printf("amd64locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
1068 theCRC, crcExpd);
1069 printf("amd64locked: set #define VERBOSE 1 to diagnose\n");
1070 }
1071 }
1072
1073 return 0;
1074 }
1075