• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <assert.h>
5 
6 #define VERBOSE 0
7 
8 typedef  unsigned int            UInt;
9 typedef  unsigned char           UChar;
10 typedef  unsigned long long int  ULong;
11 typedef  signed long long int    Long;
12 typedef  signed int              Int;
13 typedef  unsigned short          UShort;
14 typedef  unsigned long           UWord;
15 typedef  char                    HChar;
16 
myrandom(void)17 unsigned myrandom(void)
18 {
19    /* Simple multiply-with-carry random generator. */
20    static unsigned m_w = 11;
21    static unsigned m_z = 13;
22 
23    m_z = 36969 * (m_z & 65535) + (m_z >> 16);
24    m_w = 18000 * (m_w & 65535) + (m_w >> 16);
25 
26    return (m_z << 16) + m_w;
27 }
28 
29 /////////////////////////////////////////////////////////////////
30 // BEGIN crc32 stuff                                           //
31 /////////////////////////////////////////////////////////////////
32 
33 static const UInt crc32Table[256] = {
34 
35    /*-- Ugly, innit? --*/
36 
37    0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
38    0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
39    0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
40    0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
41    0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
42    0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
43    0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
44    0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
45    0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
46    0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
47    0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
48    0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
49    0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
50    0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
51    0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
52    0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
53    0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
54    0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
55    0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
56    0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
57    0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
58    0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
59    0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
60    0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
61    0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
62    0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
63    0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
64    0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
65    0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
66    0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
67    0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
68    0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
69    0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
70    0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
71    0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
72    0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
73    0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
74    0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
75    0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
76    0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
77    0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
78    0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
79    0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
80    0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
81    0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
82    0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
83    0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
84    0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
85    0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
86    0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
87    0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
88    0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
89    0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
90    0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
91    0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
92    0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
93    0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
94    0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
95    0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
96    0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
97    0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
98    0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
99    0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
100    0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
101 };
102 
103 #define UPDATE_CRC(crcVar,cha)                 \
104 {                                              \
105    crcVar = (crcVar << 8) ^                    \
106             crc32Table[(crcVar >> 24) ^        \
107                        ((UChar)cha)];          \
108 }
109 
crcBytes(UChar * bytes,UWord nBytes,UInt crcIn)110 static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn )
111 {
112    UInt crc = crcIn;
113    while (nBytes >= 4) {
114       UPDATE_CRC(crc, bytes[0]);
115       UPDATE_CRC(crc, bytes[1]);
116       UPDATE_CRC(crc, bytes[2]);
117       UPDATE_CRC(crc, bytes[3]);
118       bytes += 4;
119       nBytes -= 4;
120    }
121    while (nBytes >= 1) {
122       UPDATE_CRC(crc, bytes[0]);
123       bytes += 1;
124       nBytes -= 1;
125    }
126    return crc;
127 }
128 
crcFinalise(UInt crc)129 static UInt crcFinalise ( UInt crc ) {
130    return ~crc;
131 }
132 
133 ////////
134 
135 static UInt theCRC = 0xFFFFFFFF;
136 
137 static HChar outBuf[1024];
138 // take output that's in outBuf, length as specified, and
139 // update the running crc.
send(int nbytes)140 static void send ( int nbytes )
141 {
142    assert( ((unsigned int)nbytes) < sizeof(outBuf)-1);
143    assert(outBuf[nbytes] == 0);
144    theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC );
145    if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf);
146 }
147 
148 
149 /////////////////////////////////////////////////////////////////
150 // END crc32 stuff                                             //
151 /////////////////////////////////////////////////////////////////
152 
153 #if 0
154 
155 // full version
156 #define NVALS 76
157 
158 static ULong val[NVALS]
159     = { 0x00ULL, 0x01ULL, 0x02ULL, 0x03ULL,
160         0x3FULL, 0x40ULL, 0x41ULL,
161         0x7EULL, 0x7FULL, 0x80ULL, 0x81ULL, 0x82ULL,
162         0xBFULL, 0xC0ULL, 0xC1ULL,
163         0xFCULL, 0xFDULL, 0xFEULL, 0xFFULL,
164 
165         0xFF00ULL, 0xFF01ULL, 0xFF02ULL, 0xFF03ULL,
166         0xFF3FULL, 0xFF40ULL, 0xFF41ULL,
167         0xFF7EULL, 0xFF7FULL, 0xFF80ULL, 0xFF81ULL, 0xFF82ULL,
168         0xFFBFULL, 0xFFC0ULL, 0xFFC1ULL,
169         0xFFFCULL, 0xFFFDULL, 0xFFFEULL, 0xFFFFULL,
170 
171         0xFFFFFF00ULL, 0xFFFFFF01ULL, 0xFFFFFF02ULL, 0xFFFFFF03ULL,
172         0xFFFFFF3FULL, 0xFFFFFF40ULL, 0xFFFFFF41ULL,
173         0xFFFFFF7EULL, 0xFFFFFF7FULL, 0xFFFFFF80ULL, 0xFFFFFF81ULL, 0xFFFFFF82ULL,
174         0xFFFFFFBFULL, 0xFFFFFFC0ULL, 0xFFFFFFC1ULL,
175         0xFFFFFFFCULL, 0xFFFFFFFDULL, 0xFFFFFFFEULL, 0xFFFFFFFFULL,
176 
177         0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL, 0xFFFFFFFFFFFFFF02ULL,
178                                0xFFFFFFFFFFFFFF03ULL,
179         0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL, 0xFFFFFFFFFFFFFF41ULL,
180         0xFFFFFFFFFFFFFF7EULL, 0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL,
181                                0xFFFFFFFFFFFFFF81ULL, 0xFFFFFFFFFFFFFF82ULL,
182         0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL, 0xFFFFFFFFFFFFFFC1ULL,
183         0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFEULL,
184                                0xFFFFFFFFFFFFFFFFULL
185       };
186 
187 #else
188 
189 // shortened version, for use as valgrind regtest
190 #define NVALS 36
191 
192 static ULong val[NVALS]
193     = { 0x00ULL, 0x01ULL,
194         0x3FULL, 0x40ULL,
195         0x7FULL, 0x80ULL,
196         0xBFULL, 0xC0ULL,
197         0xFFULL,
198 
199         0xFF00ULL, 0xFF01ULL,
200         0xFF3FULL, 0xFF40ULL,
201         0xFF7FULL, 0xFF80ULL,
202         0xFFBFULL, 0xFFC0ULL,
203         0xFFFFULL,
204 
205         0xFFFFFF00ULL, 0xFFFFFF01ULL,
206         0xFFFFFF3FULL, 0xFFFFFF40ULL,
207         0xFFFFFF7EULL, 0xFFFFFF7FULL,
208         0xFFFFFFBFULL, 0xFFFFFFC0ULL,
209         0xFFFFFFFFULL,
210 
211         0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL,
212         0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL,
213         0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL,
214         0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL,
215         0xFFFFFFFFFFFFFFFFULL
216       };
217 
218 #endif
219 
220 /////////////////////////////////////
221 
222 #define CC_C    0x0001
223 #define CC_P    0x0004
224 #define CC_A    0x0010
225 #define CC_Z    0x0040
226 #define CC_S    0x0080
227 #define CC_O    0x0800
228 
229 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
230 
231 #define GEN_do_locked_G_E(_name,_eax)   \
232   \
233   __attribute__((noinline)) void do_locked_G_E_##_name ( void )  \
234   {   \
235     volatile Long e_val, g_val, e_val_before;   \
236     Long o, s, z, a, c, p, v1, v2, flags_in;       \
237     Long block[4];   \
238     \
239     for (v1 = 0; v1 < NVALS; v1++) {   \
240     for (v2 = 0; v2 < NVALS; v2++) {   \
241     \
242     for (o = 0; o < 2; o++) {   \
243     for (s = 0; s < 2; s++) {   \
244     for (z = 0; z < 2; z++) {   \
245     for (a = 0; a < 2; a++) {   \
246     for (c = 0; c < 2; c++) {   \
247     for (p = 0; p < 2; p++) {   \
248       \
249       flags_in = (o ? CC_O : 0)   \
250                | (s ? CC_S : 0)   \
251                | (z ? CC_Z : 0)   \
252                | (a ? CC_A : 0)   \
253                | (c ? CC_C : 0)   \
254                | (p ? CC_P : 0);   \
255       \
256       g_val = val[v1];   \
257       e_val = val[v2];   \
258       e_val_before = e_val;   \
259       \
260       block[0] = flags_in;   \
261       block[1] = g_val;   \
262       block[2] = (long)&e_val;   \
263       block[3] = 0;   \
264       __asm__ __volatile__(   \
265           "movq 0(%0), %%rax\n\t"   \
266           "pushq %%rax\n\t"   \
267           "popfq\n\t"   \
268           "movq 8(%0), %%rax\n\t"   \
269           "movq 16(%0), %%rbx\n\t"   \
270           "lock; " #_name " %%" #_eax ",(%%rbx)\n\t"   \
271           "pushfq\n\t"   \
272           "popq %%rax\n\t"   \
273           "movq %%rax, 24(%0)\n\t"   \
274           : : "r"(&block[0]) : "rax","rbx","cc","memory"   \
275       );   \
276       \
277       send( \
278       sprintf(outBuf, \
279              "%s G=%016llx E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n",       \
280              #_name, g_val, e_val_before, flags_in,   \
281               e_val, block[3] & CC_MASK));            \
282       \
283     }}}}}}   \
284     \
285     }}   \
286   }
287 
GEN_do_locked_G_E(addb,al)288 GEN_do_locked_G_E(addb,al)
289 GEN_do_locked_G_E(addw,ax)
290 GEN_do_locked_G_E(addl,eax)
291 GEN_do_locked_G_E(addq,rax)
292 
293 GEN_do_locked_G_E(orb, al)
294 GEN_do_locked_G_E(orw, ax)
295 GEN_do_locked_G_E(orl, eax)
296 GEN_do_locked_G_E(orq, rax)
297 
298 GEN_do_locked_G_E(adcb,al)
299 GEN_do_locked_G_E(adcw,ax)
300 GEN_do_locked_G_E(adcl,eax)
301 GEN_do_locked_G_E(adcq,rax)
302 
303 GEN_do_locked_G_E(sbbb,al)
304 GEN_do_locked_G_E(sbbw,ax)
305 GEN_do_locked_G_E(sbbl,eax)
306 GEN_do_locked_G_E(sbbq,rax)
307 
308 GEN_do_locked_G_E(andb,al)
309 GEN_do_locked_G_E(andw,ax)
310 GEN_do_locked_G_E(andl,eax)
311 GEN_do_locked_G_E(andq,rax)
312 
313 GEN_do_locked_G_E(subb,al)
314 GEN_do_locked_G_E(subw,ax)
315 GEN_do_locked_G_E(subl,eax)
316 GEN_do_locked_G_E(subq,rax)
317 
318 GEN_do_locked_G_E(xorb,al)
319 GEN_do_locked_G_E(xorw,ax)
320 GEN_do_locked_G_E(xorl,eax)
321 GEN_do_locked_G_E(xorq,rax)
322 
323 
324 
325 
326 #define GEN_do_locked_imm_E(_name,_eax,_imm)        \
327   \
328   __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void )  \
329   {   \
330     volatile Long e_val, e_val_before;   \
331     Long o, s, z, a, c, p, v2, flags_in;   \
332     Long block[3];   \
333     \
334     for (v2 = 0; v2 < NVALS; v2++) {   \
335     \
336     for (o = 0; o < 2; o++) {   \
337     for (s = 0; s < 2; s++) {   \
338     for (z = 0; z < 2; z++) {   \
339     for (a = 0; a < 2; a++) {   \
340     for (c = 0; c < 2; c++) {   \
341     for (p = 0; p < 2; p++) {   \
342       \
343       flags_in = (o ? CC_O : 0)   \
344                | (s ? CC_S : 0)   \
345                | (z ? CC_Z : 0)   \
346                | (a ? CC_A : 0)   \
347                | (c ? CC_C : 0)   \
348                | (p ? CC_P : 0);   \
349       \
350       e_val = val[v2];   \
351       e_val_before = e_val;   \
352       \
353       block[0] = flags_in;   \
354       block[1] = (long)&e_val;   \
355       block[2] = 0;   \
356       __asm__ __volatile__(   \
357           "movq 0(%0), %%rax\n\t"   \
358           "pushq %%rax\n\t"   \
359           "popfq\n\t"   \
360           "movq 8(%0), %%rbx\n\t"   \
361           "lock; " #_name " $" #_imm ",(%%rbx)\n\t"   \
362           "pushfq\n\t"   \
363           "popq %%rax\n\t"   \
364           "movq %%rax, 16(%0)\n\t"   \
365           : : "r"(&block[0]) : "rax","rbx","cc","memory"   \
366       );   \
367       \
368       send( \
369            sprintf(outBuf, \
370            "%s I=%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n",    \
371              #_name, #_imm, e_val_before, flags_in,         \
372                    e_val, block[2] & CC_MASK));             \
373       \
374     }}}}}}   \
375     \
376     }   \
377   }
378 
379 GEN_do_locked_imm_E(addb,al,0x7F)
380 GEN_do_locked_imm_E(addb,al,0xF1)
381 GEN_do_locked_imm_E(addw,ax,0x7E)
382 GEN_do_locked_imm_E(addw,ax,0x9325)
383 GEN_do_locked_imm_E(addl,eax,0x7D)
384 GEN_do_locked_imm_E(addl,eax,0x31415927)
385 GEN_do_locked_imm_E(addq,rax,0x7D)
386 GEN_do_locked_imm_E(addq,rax,0x31415927)
387 
388 GEN_do_locked_imm_E(orb,al,0x7F)
389 GEN_do_locked_imm_E(orb,al,0xF1)
390 GEN_do_locked_imm_E(orw,ax,0x7E)
391 GEN_do_locked_imm_E(orw,ax,0x9325)
392 GEN_do_locked_imm_E(orl,eax,0x7D)
393 GEN_do_locked_imm_E(orl,eax,0x31415927)
394 GEN_do_locked_imm_E(orq,rax,0x7D)
395 GEN_do_locked_imm_E(orq,rax,0x31415927)
396 
397 GEN_do_locked_imm_E(adcb,al,0x7F)
398 GEN_do_locked_imm_E(adcb,al,0xF1)
399 GEN_do_locked_imm_E(adcw,ax,0x7E)
400 GEN_do_locked_imm_E(adcw,ax,0x9325)
401 GEN_do_locked_imm_E(adcl,eax,0x7D)
402 GEN_do_locked_imm_E(adcl,eax,0x31415927)
403 GEN_do_locked_imm_E(adcq,rax,0x7D)
404 GEN_do_locked_imm_E(adcq,rax,0x31415927)
405 
406 GEN_do_locked_imm_E(sbbb,al,0x7F)
407 GEN_do_locked_imm_E(sbbb,al,0xF1)
408 GEN_do_locked_imm_E(sbbw,ax,0x7E)
409 GEN_do_locked_imm_E(sbbw,ax,0x9325)
410 GEN_do_locked_imm_E(sbbl,eax,0x7D)
411 GEN_do_locked_imm_E(sbbl,eax,0x31415927)
412 GEN_do_locked_imm_E(sbbq,rax,0x7D)
413 GEN_do_locked_imm_E(sbbq,rax,0x31415927)
414 
415 GEN_do_locked_imm_E(andb,al,0x7F)
416 GEN_do_locked_imm_E(andb,al,0xF1)
417 GEN_do_locked_imm_E(andw,ax,0x7E)
418 GEN_do_locked_imm_E(andw,ax,0x9325)
419 GEN_do_locked_imm_E(andl,eax,0x7D)
420 GEN_do_locked_imm_E(andl,eax,0x31415927)
421 GEN_do_locked_imm_E(andq,rax,0x7D)
422 GEN_do_locked_imm_E(andq,rax,0x31415927)
423 
424 GEN_do_locked_imm_E(subb,al,0x7F)
425 GEN_do_locked_imm_E(subb,al,0xF1)
426 GEN_do_locked_imm_E(subw,ax,0x7E)
427 GEN_do_locked_imm_E(subw,ax,0x9325)
428 GEN_do_locked_imm_E(subl,eax,0x7D)
429 GEN_do_locked_imm_E(subl,eax,0x31415927)
430 GEN_do_locked_imm_E(subq,rax,0x7D)
431 GEN_do_locked_imm_E(subq,rax,0x31415927)
432 
433 GEN_do_locked_imm_E(xorb,al,0x7F)
434 GEN_do_locked_imm_E(xorb,al,0xF1)
435 GEN_do_locked_imm_E(xorw,ax,0x7E)
436 GEN_do_locked_imm_E(xorw,ax,0x9325)
437 GEN_do_locked_imm_E(xorl,eax,0x7D)
438 GEN_do_locked_imm_E(xorl,eax,0x31415927)
439 GEN_do_locked_imm_E(xorq,rax,0x7D)
440 GEN_do_locked_imm_E(xorq,rax,0x31415927)
441 
442 #define GEN_do_locked_unary_E(_name,_eax)        \
443   \
444   __attribute__((noinline)) void do_locked_unary_E_##_name ( void )  \
445   {   \
446     volatile Long e_val, e_val_before;   \
447     Long o, s, z, a, c, p, v2, flags_in;     \
448     Long block[3];   \
449     \
450     for (v2 = 0; v2 < NVALS; v2++) {   \
451     \
452     for (o = 0; o < 2; o++) {   \
453     for (s = 0; s < 2; s++) {   \
454     for (z = 0; z < 2; z++) {   \
455     for (a = 0; a < 2; a++) {   \
456     for (c = 0; c < 2; c++) {   \
457     for (p = 0; p < 2; p++) {   \
458       \
459       flags_in = (o ? CC_O : 0)   \
460                | (s ? CC_S : 0)   \
461                | (z ? CC_Z : 0)   \
462                | (a ? CC_A : 0)   \
463                | (c ? CC_C : 0)   \
464                | (p ? CC_P : 0);   \
465       \
466       e_val = val[v2];   \
467       e_val_before = e_val;   \
468       \
469       block[0] = flags_in;   \
470       block[1] = (long)&e_val;   \
471       block[2] = 0;   \
472       __asm__ __volatile__(   \
473           "movq 0(%0), %%rax\n\t"   \
474           "pushq %%rax\n\t"   \
475           "popfq\n\t"   \
476           "movq 8(%0), %%rbx\n\t"   \
477           "lock; " #_name " (%%rbx)\n\t"   \
478           "pushfq\n\t"   \
479           "popq %%rax\n\t"   \
480           "movq %%rax, 16(%0)\n\t"   \
481           : : "r"(&block[0]) : "rax","rbx","cc","memory"   \
482       );   \
483       \
484       send( \
485            sprintf(outBuf, \
486             "%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
487              #_name, e_val_before, flags_in,         \
488             e_val, block[2] & CC_MASK));                       \
489       \
490     }}}}}}   \
491     \
492     }   \
493   }
494 
495 GEN_do_locked_unary_E(decb,al)
496 GEN_do_locked_unary_E(decw,ax)
497 GEN_do_locked_unary_E(decl,eax)
498 GEN_do_locked_unary_E(decq,rax)
499 
500 GEN_do_locked_unary_E(incb,al)
501 GEN_do_locked_unary_E(incw,ax)
502 GEN_do_locked_unary_E(incl,eax)
503 GEN_do_locked_unary_E(incq,rax)
504 
505 GEN_do_locked_unary_E(negb,al)
506 GEN_do_locked_unary_E(negw,ax)
507 GEN_do_locked_unary_E(negl,eax)
508 GEN_do_locked_unary_E(negq,rax)
509 
510 GEN_do_locked_unary_E(notb,al)
511 GEN_do_locked_unary_E(notw,ax)
512 GEN_do_locked_unary_E(notl,eax)
513 GEN_do_locked_unary_E(notq,rax)
514 
515 
516 /////////////////////////////////////////////////////////////////
517 
518 ULong btsq_mem ( UChar* base, int bitno )
519 {
520    ULong res;
521    __asm__
522    __volatile__("lock; btsq\t%2, %0\n\t"
523                 "setc   %%dl\n\t"
524                 "movzbq %%dl,%1\n"
525                 : "=m" (*base), "=r" (res)
526                 : "r" ((ULong)bitno) : "rdx","cc","memory" );
527    /* Pretty meaningless to dereference base here, but that's what you
528       have to do to get a btsl insn which refers to memory starting at
529       base. */
530    return res;
531 }
btsl_mem(UChar * base,int bitno)532 ULong btsl_mem ( UChar* base, int bitno )
533 {
534    ULong res;
535    __asm__
536    __volatile__("lock; btsl\t%2, %0\n\t"
537                 "setc   %%dl\n\t"
538                 "movzbq %%dl,%1\n"
539                 : "=m" (*base), "=r" (res)
540                 : "r" ((UInt)bitno));
541    return res;
542 }
btsw_mem(UChar * base,int bitno)543 ULong btsw_mem ( UChar* base, int bitno )
544 {
545    ULong res;
546    __asm__
547    __volatile__("lock; btsw\t%w2, %0\n\t"
548                 "setc   %%dl\n\t"
549                 "movzbq %%dl,%1\n"
550                 : "=m" (*base), "=r" (res)
551                 : "r" ((ULong)bitno));
552    return res;
553 }
554 
btrq_mem(UChar * base,int bitno)555 ULong btrq_mem ( UChar* base, int bitno )
556 {
557    ULong res;
558    __asm__
559    __volatile__("lock; btrq\t%2, %0\n\t"
560                 "setc   %%dl\n\t"
561                 "movzbq %%dl,%1\n"
562                 : "=m" (*base), "=r" (res)
563                 : "r" ((ULong)bitno));
564    return res;
565 }
btrl_mem(UChar * base,int bitno)566 ULong btrl_mem ( UChar* base, int bitno )
567 {
568    ULong res;
569    __asm__
570    __volatile__("lock; btrl\t%2, %0\n\t"
571                 "setc   %%dl\n\t"
572                 "movzbq %%dl,%1\n"
573                 : "=m" (*base), "=r" (res)
574                 : "r" ((UInt)bitno));
575    return res;
576 }
btrw_mem(UChar * base,int bitno)577 ULong btrw_mem ( UChar* base, int bitno )
578 {
579    ULong res;
580    __asm__
581    __volatile__("lock; btrw\t%w2, %0\n\t"
582                 "setc   %%dl\n\t"
583                 "movzbq %%dl,%1\n"
584                 : "=m" (*base), "=r" (res)
585                 : "r" ((ULong)bitno));
586    return res;
587 }
588 
btcq_mem(UChar * base,int bitno)589 ULong btcq_mem ( UChar* base, int bitno )
590 {
591    ULong res;
592    __asm__
593    __volatile__("lock; btcq\t%2, %0\n\t"
594                 "setc   %%dl\n\t"
595                 "movzbq %%dl,%1\n"
596                 : "=m" (*base), "=r" (res)
597                 : "r" ((ULong)bitno));
598    return res;
599 }
btcl_mem(UChar * base,int bitno)600 ULong btcl_mem ( UChar* base, int bitno )
601 {
602    ULong res;
603    __asm__
604    __volatile__("lock; btcl\t%2, %0\n\t"
605                 "setc   %%dl\n\t"
606                 "movzbq %%dl,%1\n"
607                 : "=m" (*base), "=r" (res)
608                 : "r" ((UInt)bitno));
609    return res;
610 }
btcw_mem(UChar * base,int bitno)611 ULong btcw_mem ( UChar* base, int bitno )
612 {
613    ULong res;
614    __asm__
615    __volatile__("lock; btcw\t%w2, %0\n\t"
616                 "setc   %%dl\n\t"
617                 "movzbq %%dl,%1\n"
618                 : "=m" (*base), "=r" (res)
619                 : "r" ((ULong)bitno));
620    return res;
621 }
622 
btq_mem(UChar * base,int bitno)623 ULong btq_mem ( UChar* base, int bitno )
624 {
625    ULong res;
626    __asm__
627    __volatile__("btq\t%2, %0\n\t"
628                 "setc   %%dl\n\t"
629                 "movzbq %%dl,%1\n"
630                 : "=m" (*base), "=r" (res)
631                 : "r" ((ULong)bitno)
632                 : "cc", "memory");
633    return res;
634 }
btl_mem(UChar * base,int bitno)635 ULong btl_mem ( UChar* base, int bitno )
636 {
637    ULong res;
638    __asm__
639    __volatile__("btl\t%2, %0\n\t"
640                 "setc   %%dl\n\t"
641                 "movzbq %%dl,%1\n"
642                 : "=m" (*base), "=r" (res)
643                 : "r" ((UInt)bitno)
644                 : "cc", "memory");
645    return res;
646 }
btw_mem(UChar * base,int bitno)647 ULong btw_mem ( UChar* base, int bitno )
648 {
649    ULong res;
650    __asm__
651    __volatile__("btw\t%w2, %0\n\t"
652                 "setc   %%dl\n\t"
653                 "movzbq %%dl,%1\n"
654                 : "=m" (*base), "=r" (res)
655                 : "r" ((ULong)bitno));
656    return res;
657 }
658 
rol1(ULong x)659 ULong rol1 ( ULong x )
660 {
661   return (x << 1) | (x >> 63);
662 }
663 
do_bt_G_E_tests(void)664 void do_bt_G_E_tests ( void )
665 {
666    ULong  n, bitoff, op;
667    ULong  c;
668    UChar* block;
669    ULong  carrydep, res;;
670 
671    /*------------------------ MEM-Q -----------------------*/
672 
673    carrydep = 0;
674    block = calloc(200,1);
675    block += 100;
676    /* Valid bit offsets are -800 .. 799 inclusive. */
677 
678    for (n = 0; n < 10000; n++) {
679       bitoff = (myrandom() % 1600) - 800;
680       op = myrandom() % 4;
681       c = 2;
682       switch (op) {
683          case 0: c = btsq_mem(block, bitoff); break;
684          case 1: c = btrq_mem(block, bitoff); break;
685          case 2: c = btcq_mem(block, bitoff); break;
686          case 3: c = btq_mem(block, bitoff); break;
687       }
688       c &= 255;
689       assert(c == 0 || c == 1);
690       carrydep = c ? (rol1(carrydep) ^ (Long)bitoff) : carrydep;
691    }
692 
693    /* Compute final result */
694    block -= 100;
695    res = 0;
696    for (n = 0; n < 200; n++) {
697       UChar ch = block[n];
698       /* printf("%d ", (int)block[n]); */
699       res = rol1(res) ^ (ULong)ch;
700    }
701 
702    send( sprintf(outBuf,
703                  "bt{s,r,c}q: final res 0x%llx, carrydep 0x%llx\n",
704                  res, carrydep));
705    free(block);
706 
707    /*------------------------ MEM-L -----------------------*/
708 
709    carrydep = 0;
710    block = calloc(200,1);
711    block += 100;
712    /* Valid bit offsets are -800 .. 799 inclusive. */
713 
714    for (n = 0; n < 10000; n++) {
715       bitoff = (myrandom() % 1600) - 800;
716       op = myrandom() % 4;
717       c = 2;
718       switch (op) {
719          case 0: c = btsl_mem(block, bitoff); break;
720          case 1: c = btrl_mem(block, bitoff); break;
721          case 2: c = btcl_mem(block, bitoff); break;
722          case 3: c = btl_mem(block, bitoff); break;
723       }
724       c &= 255;
725       assert(c == 0 || c == 1);
726       carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
727    }
728 
729    /* Compute final result */
730    block -= 100;
731    res = 0;
732    for (n = 0; n < 200; n++) {
733       UChar ch = block[n];
734       /* printf("%d ", (int)block[n]); */
735       res = rol1(res) ^ (ULong)ch;
736    }
737 
738    send( sprintf(outBuf,
739                  "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
740                  res, carrydep));
741    free(block);
742 
743    /*------------------------ MEM-W -----------------------*/
744 
745    carrydep = 0;
746    block = calloc(200,1);
747    block += 100;
748    /* Valid bit offsets are -800 .. 799 inclusive. */
749 
750    for (n = 0; n < 10000; n++) {
751       bitoff = (myrandom() % 1600) - 800;
752       op = myrandom() % 4;
753       c = 2;
754       switch (op) {
755          case 0: c = btsw_mem(block, bitoff); break;
756          case 1: c = btrw_mem(block, bitoff); break;
757          case 2: c = btcw_mem(block, bitoff); break;
758          case 3: c = btw_mem(block, bitoff); break;
759       }
760       c &= 255;
761       assert(c == 0 || c == 1);
762       carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
763    }
764 
765    /* Compute final result */
766    block -= 100;
767    res = 0;
768    for (n = 0; n < 200; n++) {
769       UChar ch = block[n];
770       /* printf("%d ", (int)block[n]); */
771       res = rol1(res) ^ (ULong)ch;
772    }
773 
774    send(sprintf(outBuf,
775                 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
776                 res, carrydep));
777    free(block);
778 }
779 
780 
781 /////////////////////////////////////////////////////////////////
782 
783 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
784    also reconstruct the original bits 0, 1, 2, 3 by looking at the
785    carry flag.  Returned result has mashed bits 0-3 at the bottom and
786    the reconstructed original bits 0-3 as 4-7. */
787 
mash_mem_Q(ULong * origp)788 ULong mash_mem_Q ( ULong* origp )
789 {
790   ULong reconstructed, mashed;
791   __asm__ __volatile__ (
792      "movq %2, %%rdx\n\t"
793      ""
794      "movq $0, %%rax\n\t"
795      "\n\t"
796      "btq  $0, (%%rdx)\n\t"
797      "setb %%cl\n\t"
798      "movzbq %%cl, %%rcx\n\t"
799      "orq %%rcx, %%rax\n\t"
800      "\n\t"
801      "lock; btsq $1, (%%rdx)\n\t"
802      "setb %%cl\n\t"
803      "movzbq %%cl, %%rcx\n\t"
804      "shlq $1, %%rcx\n\t"
805      "orq %%rcx, %%rax\n\t"
806      "\n\t"
807      "lock; btrq $2, (%%rdx)\n\t"
808      "setb %%cl\n\t"
809      "movzbq %%cl, %%rcx\n\t"
810      "shlq $2, %%rcx\n\t"
811      "orq %%rcx, %%rax\n\t"
812      "\n\t"
813      "lock; btcq $3, (%%rdx)\n\t"
814      "setb %%cl\n\t"
815      "movzbq %%cl, %%rcx\n\t"
816      "shlq $3, %%rcx\n\t"
817      "orq %%rcx, %%rax\n\t"
818      "\n\t"
819      "movq %%rax, %0\n\t"
820      "movq (%%rdx), %1"
821      : "=r" (reconstructed), "=r" (mashed)
822      : "r" (origp)
823      : "rax", "rcx", "rdx", "cc");
824   return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
825 }
826 
mash_mem_L(UInt * origp)827 ULong mash_mem_L ( UInt* origp )
828 {
829   ULong reconstructed; UInt mashed;
830   __asm__ __volatile__ (
831      "movq %2, %%rdx\n\t"
832      ""
833      "movq $0, %%rax\n\t"
834      "\n\t"
835      "btl  $0, (%%rdx)\n\t"
836      "setb %%cl\n\t"
837      "movzbq %%cl, %%rcx\n\t"
838      "orq %%rcx, %%rax\n\t"
839      "\n\t"
840      "lock; btsl $1, (%%rdx)\n\t"
841      "setb %%cl\n\t"
842      "movzbq %%cl, %%rcx\n\t"
843      "shlq $1, %%rcx\n\t"
844      "orq %%rcx, %%rax\n\t"
845      "\n\t"
846      "lock; btrl $2, (%%rdx)\n\t"
847      "setb %%cl\n\t"
848      "movzbq %%cl, %%rcx\n\t"
849      "shlq $2, %%rcx\n\t"
850      "orq %%rcx, %%rax\n\t"
851      "\n\t"
852      "lock; btcl $3, (%%rdx)\n\t"
853      "setb %%cl\n\t"
854      "movzbq %%cl, %%rcx\n\t"
855      "shlq $3, %%rcx\n\t"
856      "orq %%rcx, %%rax\n\t"
857      "\n\t"
858      "movq %%rax, %0\n\t"
859      "movl (%%rdx), %1"
860      : "=r" (reconstructed), "=r" (mashed)
861      : "r" (origp)
862      : "rax", "rcx", "rdx", "cc");
863   return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
864 }
865 
mash_mem_W(UShort * origp)866 ULong mash_mem_W ( UShort* origp )
867 {
868   ULong reconstructed, mashed;
869   __asm__ __volatile__ (
870      "movq %2, %%rdx\n\t"
871      ""
872      "movq $0, %%rax\n\t"
873      "\n\t"
874      "btw  $0, (%%rdx)\n\t"
875      "setb %%cl\n\t"
876      "movzbq %%cl, %%rcx\n\t"
877      "orq %%rcx, %%rax\n\t"
878      "\n\t"
879      "lock; btsw $1, (%%rdx)\n\t"
880      "setb %%cl\n\t"
881      "movzbq %%cl, %%rcx\n\t"
882      "shlq $1, %%rcx\n\t"
883      "orq %%rcx, %%rax\n\t"
884      "\n\t"
885      "lock; btrw $2, (%%rdx)\n\t"
886      "setb %%cl\n\t"
887      "movzbq %%cl, %%rcx\n\t"
888      "shlq $2, %%rcx\n\t"
889      "orq %%rcx, %%rax\n\t"
890      "\n\t"
891      "lock; btcw $3, (%%rdx)\n\t"
892      "setb %%cl\n\t"
893      "movzbq %%cl, %%rcx\n\t"
894      "shlq $3, %%rcx\n\t"
895      "orq %%rcx, %%rax\n\t"
896      "\n\t"
897      "movq %%rax, %0\n\t"
898      "movzwq (%%rdx), %1"
899      : "=r" (reconstructed), "=r" (mashed)
900      : "r" (origp)
901      : "rax", "rcx", "rdx", "cc");
902   return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
903 }
904 
905 
do_bt_imm_E_tests(void)906 void do_bt_imm_E_tests( void )
907 {
908   ULong i;
909   ULong*  iiq = malloc(sizeof(ULong));
910   UInt*   iil = malloc(sizeof(UInt));
911   UShort* iiw = malloc(sizeof(UShort));
912   for (i = 0; i < 0x10; i++) {
913     *iiq = i;
914     *iil = i;
915     *iiw = i;
916     send(sprintf(outBuf,"0x%llx -> 0x%02llx 0x%02llx 0x%02llx\n", i,
917                  mash_mem_Q(iiq), mash_mem_L(iil), mash_mem_W(iiw)));
918   }
919   free(iiq);
920   free(iil);
921   free(iiw);
922 }
923 
924 
925 /////////////////////////////////////////////////////////////////
926 
main(void)927 int main ( void )
928 {
929   do_locked_G_E_addb();
930   do_locked_G_E_addw();
931   do_locked_G_E_addl();
932   do_locked_G_E_addq();
933 
934   do_locked_G_E_orb();
935   do_locked_G_E_orw();
936   do_locked_G_E_orl();
937   do_locked_G_E_orq();
938 
939   do_locked_G_E_adcb();
940   do_locked_G_E_adcw();
941   do_locked_G_E_adcl();
942   do_locked_G_E_adcq();
943 
944   do_locked_G_E_sbbb();
945   do_locked_G_E_sbbw();
946   do_locked_G_E_sbbl();
947   do_locked_G_E_sbbq();
948 
949   do_locked_G_E_andb();
950   do_locked_G_E_andw();
951   do_locked_G_E_andl();
952   do_locked_G_E_andq();
953 
954   do_locked_G_E_subb();
955   do_locked_G_E_subw();
956   do_locked_G_E_subl();
957   do_locked_G_E_subq();
958 
959   do_locked_G_E_xorb();
960   do_locked_G_E_xorw();
961   do_locked_G_E_xorl();
962   do_locked_G_E_xorq();
963   // 4 * 7
964 
965   do_locked_imm_E_addb_0x7F();
966   do_locked_imm_E_addb_0xF1();
967   do_locked_imm_E_addw_0x7E();
968   do_locked_imm_E_addw_0x9325();
969   do_locked_imm_E_addl_0x7D();
970   do_locked_imm_E_addl_0x31415927();
971   do_locked_imm_E_addq_0x7D();
972   do_locked_imm_E_addq_0x31415927();
973 
974   do_locked_imm_E_orb_0x7F();
975   do_locked_imm_E_orb_0xF1();
976   do_locked_imm_E_orw_0x7E();
977   do_locked_imm_E_orw_0x9325();
978   do_locked_imm_E_orl_0x7D();
979   do_locked_imm_E_orl_0x31415927();
980   do_locked_imm_E_orq_0x7D();
981   do_locked_imm_E_orq_0x31415927();
982 
983   do_locked_imm_E_adcb_0x7F();
984   do_locked_imm_E_adcb_0xF1();
985   do_locked_imm_E_adcw_0x7E();
986   do_locked_imm_E_adcw_0x9325();
987   do_locked_imm_E_adcl_0x7D();
988   do_locked_imm_E_adcl_0x31415927();
989   do_locked_imm_E_adcq_0x7D();
990   do_locked_imm_E_adcq_0x31415927();
991 
992   do_locked_imm_E_sbbb_0x7F();
993   do_locked_imm_E_sbbb_0xF1();
994   do_locked_imm_E_sbbw_0x7E();
995   do_locked_imm_E_sbbw_0x9325();
996   do_locked_imm_E_sbbl_0x7D();
997   do_locked_imm_E_sbbl_0x31415927();
998   do_locked_imm_E_sbbq_0x7D();
999   do_locked_imm_E_sbbq_0x31415927();
1000 
1001   do_locked_imm_E_andb_0x7F();
1002   do_locked_imm_E_andb_0xF1();
1003   do_locked_imm_E_andw_0x7E();
1004   do_locked_imm_E_andw_0x9325();
1005   do_locked_imm_E_andl_0x7D();
1006   do_locked_imm_E_andl_0x31415927();
1007   do_locked_imm_E_andq_0x7D();
1008   do_locked_imm_E_andq_0x31415927();
1009 
1010   do_locked_imm_E_subb_0x7F();
1011   do_locked_imm_E_subb_0xF1();
1012   do_locked_imm_E_subw_0x7E();
1013   do_locked_imm_E_subw_0x9325();
1014   do_locked_imm_E_subl_0x7D();
1015   do_locked_imm_E_subl_0x31415927();
1016   do_locked_imm_E_subq_0x7D();
1017   do_locked_imm_E_subq_0x31415927();
1018 
1019   do_locked_imm_E_xorb_0x7F();
1020   do_locked_imm_E_xorb_0xF1();
1021   do_locked_imm_E_xorw_0x7E();
1022   do_locked_imm_E_xorw_0x9325();
1023   do_locked_imm_E_xorl_0x7D();
1024   do_locked_imm_E_xorl_0x31415927();
1025   do_locked_imm_E_xorq_0x7D();
1026   do_locked_imm_E_xorq_0x31415927();
1027   // 4 * 7 + 8 * 7 == 84
1028 
1029   do_locked_unary_E_decb();
1030   do_locked_unary_E_decw();
1031   do_locked_unary_E_decl();
1032   do_locked_unary_E_decq();
1033 
1034   do_locked_unary_E_incb();
1035   do_locked_unary_E_incw();
1036   do_locked_unary_E_incl();
1037   do_locked_unary_E_incq();
1038 
1039   do_locked_unary_E_negb();
1040   do_locked_unary_E_negw();
1041   do_locked_unary_E_negl();
1042   do_locked_unary_E_negq();
1043 
1044   do_locked_unary_E_notb();
1045   do_locked_unary_E_notw();
1046   do_locked_unary_E_notl();
1047   do_locked_unary_E_notq();
1048   // 100
1049 
1050   do_bt_G_E_tests();
1051   // 109
1052   do_bt_imm_E_tests();
1053   // 118
1054 
1055   // So there should be 118 lock-prefixed instructions in the
1056   // disassembly of this compilation unit.
1057   // confirm with
1058   // objdump -d ./amd64locked | grep lock | grep -v do_lock | grep -v elf64 | wc
1059 
1060 
1061   { UInt crcExpd = 0xDF0656F1;
1062     theCRC = crcFinalise( theCRC );
1063     if (theCRC == crcExpd) {
1064        printf("amd64locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
1065               theCRC, crcExpd);
1066     } else {
1067        printf("amd64locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
1068               theCRC, crcExpd);
1069        printf("amd64locked: set #define VERBOSE 1 to diagnose\n");
1070     }
1071   }
1072 
1073   return 0;
1074 }
1075