1
2 /* Contrary to what the next comment says, this is now an amd64 CPU
3 test. */
4
5 /*
6 * x86 CPU test
7 *
8 * Copyright (c) 2003 Fabrice Bellard
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <inttypes.h>
29 #include <math.h>
30 #include <stdarg.h>
31 #include <assert.h>
32
33
34 //////////////////////////////////////////////////////////////////
35 //////////////////////////////////////////////////////////////////
36
37 /*
38 * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
39 * MD5 Message-Digest Algorithm (RFC 1321).
40 *
41 * Homepage:
42 * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
43 *
44 * Author:
45 * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
46 *
47 * This software was written by Alexander Peslyak in 2001. No copyright is
48 * claimed, and the software is hereby placed in the public domain.
49 * In case this attempt to disclaim copyright and place the software in the
50 * public domain is deemed null and void, then the software is
51 * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
52 * general public under the following terms:
53 *
54 * Redistribution and use in source and binary forms, with or without
55 * modification, are permitted.
56 *
57 * There's ABSOLUTELY NO WARRANTY, express or implied.
58 *
59 * (This is a heavily cut-down "BSD license".)
60 *
61 * This differs from Colin Plumb's older public domain implementation in that
62 * no exactly 32-bit integer data type is required (any 32-bit or wider
63 * unsigned integer data type will do), there's no compile-time endianness
64 * configuration, and the function prototypes match OpenSSL's. No code from
65 * Colin Plumb's implementation has been reused; this comment merely compares
66 * the properties of the two independent implementations.
67 *
68 * The primary goals of this implementation are portability and ease of use.
69 * It is meant to be fast, but not as fast as possible. Some known
70 * optimizations are not included to reduce source code size and avoid
71 * compile-time configuration.
72 */
73
74 #include <string.h>
75
76 // BEGIN #include "md5.h"
77 /* Any 32-bit or wider unsigned integer data type will do */
78 typedef unsigned int MD5_u32plus;
79
80 typedef struct {
81 MD5_u32plus lo, hi;
82 MD5_u32plus a, b, c, d;
83 unsigned char buffer[64];
84 MD5_u32plus block[16];
85 } MD5_CTX;
86
87 void MD5_Init(MD5_CTX *ctx);
88 void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
89 void MD5_Final(unsigned char *result, MD5_CTX *ctx);
90 // END #include "md5.h"
91
92 /*
93 * The basic MD5 functions.
94 *
95 * F and G are optimized compared to their RFC 1321 definitions for
96 * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
97 * implementation.
98 */
99 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
100 #define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
101 #define H(x, y, z) (((x) ^ (y)) ^ (z))
102 #define H2(x, y, z) ((x) ^ ((y) ^ (z)))
103 #define I(x, y, z) ((y) ^ ((x) | ~(z)))
104
105 /*
106 * The MD5 transformation for all four rounds.
107 */
108 #define STEP(f, a, b, c, d, x, t, s) \
109 (a) += f((b), (c), (d)) + (x) + (t); \
110 (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
111 (a) += (b);
112
113 /*
114 * SET reads 4 input bytes in little-endian byte order and stores them in a
115 * properly aligned word in host byte order.
116 *
117 * The check for little-endian architectures that tolerate unaligned memory
118 * accesses is just an optimization. Nothing will break if it fails to detect
119 * a suitable architecture.
120 *
121 * Unfortunately, this optimization may be a C strict aliasing rules violation
122 * if the caller's data buffer has effective type that cannot be aliased by
123 * MD5_u32plus. In practice, this problem may occur if these MD5 routines are
124 * inlined into a calling function, or with future and dangerously advanced
125 * link-time optimizations. For the time being, keeping these MD5 routines in
126 * their own translation unit avoids the problem.
127 */
128 #if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
129 #define SET(n) \
130 (*(MD5_u32plus *)&ptr[(n) * 4])
131 #define GET(n) \
132 SET(n)
133 #else
134 #define SET(n) \
135 (ctx->block[(n)] = \
136 (MD5_u32plus)ptr[(n) * 4] | \
137 ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
138 ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
139 ((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
140 #define GET(n) \
141 (ctx->block[(n)])
142 #endif
143
144 /*
145 * This processes one or more 64-byte data blocks, but does NOT update the bit
146 * counters. There are no alignment requirements.
147 */
body(MD5_CTX * ctx,const void * data,unsigned long size)148 static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
149 {
150 const unsigned char *ptr;
151 MD5_u32plus a, b, c, d;
152 MD5_u32plus saved_a, saved_b, saved_c, saved_d;
153
154 ptr = (const unsigned char *)data;
155
156 a = ctx->a;
157 b = ctx->b;
158 c = ctx->c;
159 d = ctx->d;
160
161 do {
162 saved_a = a;
163 saved_b = b;
164 saved_c = c;
165 saved_d = d;
166
167 /* Round 1 */
168 STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
169 STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
170 STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
171 STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
172 STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
173 STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
174 STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
175 STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
176 STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
177 STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
178 STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
179 STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
180 STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
181 STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
182 STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
183 STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
184
185 /* Round 2 */
186 STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
187 STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
188 STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
189 STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
190 STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
191 STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
192 STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
193 STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
194 STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
195 STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
196 STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
197 STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
198 STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
199 STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
200 STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
201 STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
202
203 /* Round 3 */
204 STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
205 STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
206 STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
207 STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
208 STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
209 STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
210 STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
211 STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
212 STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
213 STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
214 STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
215 STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
216 STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
217 STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
218 STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
219 STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
220
221 /* Round 4 */
222 STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
223 STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
224 STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
225 STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
226 STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
227 STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
228 STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
229 STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
230 STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
231 STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
232 STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
233 STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
234 STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
235 STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
236 STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
237 STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
238
239 a += saved_a;
240 b += saved_b;
241 c += saved_c;
242 d += saved_d;
243
244 ptr += 64;
245 } while (size -= 64);
246
247 ctx->a = a;
248 ctx->b = b;
249 ctx->c = c;
250 ctx->d = d;
251
252 return ptr;
253 }
254
MD5_Init(MD5_CTX * ctx)255 void MD5_Init(MD5_CTX *ctx)
256 {
257 ctx->a = 0x67452301;
258 ctx->b = 0xefcdab89;
259 ctx->c = 0x98badcfe;
260 ctx->d = 0x10325476;
261
262 ctx->lo = 0;
263 ctx->hi = 0;
264 }
265
MD5_Update(MD5_CTX * ctx,const void * data,unsigned long size)266 void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size)
267 {
268 MD5_u32plus saved_lo;
269 unsigned long used, available;
270
271 saved_lo = ctx->lo;
272 if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
273 ctx->hi++;
274 ctx->hi += size >> 29;
275
276 used = saved_lo & 0x3f;
277
278 if (used) {
279 available = 64 - used;
280
281 if (size < available) {
282 memcpy(&ctx->buffer[used], data, size);
283 return;
284 }
285
286 memcpy(&ctx->buffer[used], data, available);
287 data = (const unsigned char *)data + available;
288 size -= available;
289 body(ctx, ctx->buffer, 64);
290 }
291
292 if (size >= 64) {
293 data = body(ctx, data, size & ~(unsigned long)0x3f);
294 size &= 0x3f;
295 }
296
297 memcpy(ctx->buffer, data, size);
298 }
299
300 #define OUT(dst, src) \
301 (dst)[0] = (unsigned char)(src); \
302 (dst)[1] = (unsigned char)((src) >> 8); \
303 (dst)[2] = (unsigned char)((src) >> 16); \
304 (dst)[3] = (unsigned char)((src) >> 24);
305
MD5_Final(unsigned char * result,MD5_CTX * ctx)306 void MD5_Final(unsigned char *result, MD5_CTX *ctx)
307 {
308 unsigned long used, available;
309
310 used = ctx->lo & 0x3f;
311
312 ctx->buffer[used++] = 0x80;
313
314 available = 64 - used;
315
316 if (available < 8) {
317 memset(&ctx->buffer[used], 0, available);
318 body(ctx, ctx->buffer, 64);
319 used = 0;
320 available = 64;
321 }
322
323 memset(&ctx->buffer[used], 0, available - 8);
324
325 ctx->lo <<= 3;
326 OUT(&ctx->buffer[56], ctx->lo)
327 OUT(&ctx->buffer[60], ctx->hi)
328
329 body(ctx, ctx->buffer, 64);
330
331 OUT(&result[0], ctx->a)
332 OUT(&result[4], ctx->b)
333 OUT(&result[8], ctx->c)
334 OUT(&result[12], ctx->d)
335
336 memset(ctx, 0, sizeof(*ctx));
337 }
338
339
340 //////////////////////////////////////////////////////////////////
341 //////////////////////////////////////////////////////////////////
342
343 static MD5_CTX md5ctx;
344
xxprintf_start(void)345 void xxprintf_start(void)
346 {
347 MD5_Init(&md5ctx);
348 }
349
xxprintf_done(void)350 void xxprintf_done(void)
351 {
352 const char hexchar[16] = "0123456789abcdef";
353 unsigned char result[100];
354 memset(result, 0, sizeof(result));
355 MD5_Final(&result[0], &md5ctx);
356 printf("final MD5 = ");
357 int i;
358 for (i = 0; i < 16; i++) {
359 printf("%c%c", hexchar[0xF & (result[i] >> 4)],
360 hexchar[0xF & (result[i] >> 0)]);
361 }
362 printf("\n");
363 }
364
365 __attribute__((format(__printf__, 1, 2)))
xxprintf(const char * format,...)366 void xxprintf (const char *format, ...)
367 {
368 char buf[128];
369 memset(buf, 0, sizeof(buf));
370
371 va_list vargs;
372 va_start(vargs, format);
373 int n = vsnprintf(buf, sizeof(buf)-1, format, vargs);
374 va_end(vargs);
375
376 assert(n < sizeof(buf)-1);
377 assert(buf[sizeof(buf)-1] == 0);
378 assert(buf[sizeof(buf)-2] == 0);
379
380 MD5_Update(&md5ctx, buf, strlen(buf));
381 if (0) printf("QQQ %s", buf);
382 }
383
384 //////////////////////////////////////////////////////////////////
385 //////////////////////////////////////////////////////////////////
386
387
388 /* Setting this to 1 creates a very comprehensive test of
389 integer condition codes. */
390 #define TEST_INTEGER_VERBOSE 1
391
392 typedef long long int int64;
393
394 //#define LINUX_VM86_IOPL_FIX
395 //#define TEST_P4_FLAGS
396
397 #define xglue(x, y) x ## y
398 #define glue(x, y) xglue(x, y)
399 #define stringify(s) tostring(s)
400 #define tostring(s) #s
401
402 #define CC_C 0x0001
403 #define CC_P 0x0004
404 #define CC_A 0x0010
405 #define CC_Z 0x0040
406 #define CC_S 0x0080
407 #define CC_O 0x0800
408
409 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
410
411 #define OP add
412 #include "fb_test_amd64.h"
413
414 #define OP sub
415 #include "fb_test_amd64.h"
416
417 #define OP xor
418 #include "fb_test_amd64.h"
419
420 #define OP and
421 #include "fb_test_amd64.h"
422
423 #define OP or
424 #include "fb_test_amd64.h"
425
426 #define OP cmp
427 #include "fb_test_amd64.h"
428
429 #define OP adc
430 #define OP_CC
431 #include "fb_test_amd64.h"
432
433 #define OP sbb
434 #define OP_CC
435 #include "fb_test_amd64.h"
436
437 #define OP adcx
438 #define NSH
439 #define OP_CC
440 #include "fb_test_amd64.h"
441
442 #define OP adox
443 #define NSH
444 #define OP_CC
445 #include "fb_test_amd64.h"
446
447 #define OP inc
448 #define OP_CC
449 #define OP1
450 #include "fb_test_amd64.h"
451
452 #define OP dec
453 #define OP_CC
454 #define OP1
455 #include "fb_test_amd64.h"
456
457 #define OP neg
458 #define OP_CC
459 #define OP1
460 #include "fb_test_amd64.h"
461
462 #define OP not
463 #define OP_CC
464 #define OP1
465 #include "fb_test_amd64.h"
466
467 #undef CC_MASK
468 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O)
469
470 #define OP shl
471 #include "fb_test_amd64_shift.h"
472
473 #define OP shr
474 #include "fb_test_amd64_shift.h"
475
476 #define OP sar
477 #include "fb_test_amd64_shift.h"
478
479 #define OP rol
480 #include "fb_test_amd64_shift.h"
481
482 #define OP ror
483 #include "fb_test_amd64_shift.h"
484
485 #define OP rcr
486 #define OP_CC
487 #include "fb_test_amd64_shift.h"
488
489 #define OP rcl
490 #define OP_CC
491 #include "fb_test_amd64_shift.h"
492
493 /* XXX: should be more precise ? */
494 #undef CC_MASK
495 #define CC_MASK (CC_C)
496
497 /* lea test (modrm support) */
498 #define TEST_LEA(STR)\
499 {\
500 asm("leaq " STR ", %0"\
501 : "=r" (res)\
502 : "a" (rax), "b" (rbx), "c" (rcx), "d" (rdx), "S" (rsi), "D" (rdi));\
503 xxprintf("lea %s = %016llx\n", STR, res);\
504 }
505
506 #define TEST_LEA16(STR)\
507 {\
508 asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
509 : "=wq" (res)\
510 : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
511 xxprintf("lea %s = %08x\n", STR, res);\
512 }
513
514
test_lea(void)515 void test_lea(void)
516 {
517 int64 rax, rbx, rcx, rdx, rsi, rdi, res;
518 rax = 0x0001;
519 rbx = 0x0002;
520 rcx = 0x0004;
521 rdx = 0x0008;
522 rsi = 0x0010;
523 rdi = 0x0020;
524
525 TEST_LEA("0x4000");
526
527 TEST_LEA("(%%rax)");
528 TEST_LEA("(%%rbx)");
529 TEST_LEA("(%%rcx)");
530 TEST_LEA("(%%rdx)");
531 TEST_LEA("(%%rsi)");
532 TEST_LEA("(%%rdi)");
533
534 TEST_LEA("0x40(%%rax)");
535 TEST_LEA("0x40(%%rbx)");
536 TEST_LEA("0x40(%%rcx)");
537 TEST_LEA("0x40(%%rdx)");
538 TEST_LEA("0x40(%%rsi)");
539 TEST_LEA("0x40(%%rdi)");
540
541 TEST_LEA("0x4000(%%rax)");
542 TEST_LEA("0x4000(%%rbx)");
543 TEST_LEA("0x4000(%%rcx)");
544 TEST_LEA("0x4000(%%rdx)");
545 TEST_LEA("0x4000(%%rsi)");
546 TEST_LEA("0x4000(%%rdi)");
547
548 TEST_LEA("(%%rax, %%rcx)");
549 TEST_LEA("(%%rbx, %%rdx)");
550 TEST_LEA("(%%rcx, %%rcx)");
551 TEST_LEA("(%%rdx, %%rcx)");
552 TEST_LEA("(%%rsi, %%rcx)");
553 TEST_LEA("(%%rdi, %%rcx)");
554
555 TEST_LEA("0x40(%%rax, %%rcx)");
556 TEST_LEA("0x4000(%%rbx, %%rdx)");
557
558 TEST_LEA("(%%rcx, %%rcx, 2)");
559 TEST_LEA("(%%rdx, %%rcx, 4)");
560 TEST_LEA("(%%rsi, %%rcx, 8)");
561
562 TEST_LEA("(,%%rax, 2)");
563 TEST_LEA("(,%%rbx, 4)");
564 TEST_LEA("(,%%rcx, 8)");
565
566 TEST_LEA("0x40(,%%rax, 2)");
567 TEST_LEA("0x40(,%%rbx, 4)");
568 TEST_LEA("0x40(,%%rcx, 8)");
569
570
571 TEST_LEA("-10(%%rcx, %%rcx, 2)");
572 TEST_LEA("-10(%%rdx, %%rcx, 4)");
573 TEST_LEA("-10(%%rsi, %%rcx, 8)");
574
575 TEST_LEA("0x4000(%%rcx, %%rcx, 2)");
576 TEST_LEA("0x4000(%%rdx, %%rcx, 4)");
577 TEST_LEA("0x4000(%%rsi, %%rcx, 8)");
578 }
579
580 #define TEST_JCC(JCC, v1, v2)\
581 { int one = 1; \
582 int res;\
583 asm("movl $1, %0\n\t"\
584 "cmpl %2, %1\n\t"\
585 "j" JCC " 1f\n\t"\
586 "movl $0, %0\n\t"\
587 "1:\n\t"\
588 : "=r" (res)\
589 : "r" (v1), "r" (v2));\
590 xxprintf("%-10s %d\n", "j" JCC, res);\
591 \
592 asm("movl $0, %0\n\t"\
593 "cmpl %2, %1\n\t"\
594 "set" JCC " %b0\n\t"\
595 : "=r" (res)\
596 : "r" (v1), "r" (v2));\
597 xxprintf("%-10s %d\n", "set" JCC, res);\
598 {\
599 asm("movl $0x12345678, %0\n\t"\
600 "cmpl %2, %1\n\t"\
601 "cmov" JCC "l %3, %0\n\t"\
602 : "=r" (res)\
603 : "r" (v1), "r" (v2), "m" (one));\
604 xxprintf("%-10s R=0x%08x\n", "cmov" JCC "l", res);\
605 asm("movl $0x12345678, %0\n\t"\
606 "cmpl %2, %1\n\t"\
607 "cmov" JCC "w %w3, %w0\n\t"\
608 : "=r" (res)\
609 : "r" (v1), "r" (v2), "r" (one));\
610 xxprintf("%-10s R=0x%08x\n", "cmov" JCC "w", res);\
611 } \
612 }
613
614 /* various jump tests */
test_jcc(void)615 void test_jcc(void)
616 {
617 TEST_JCC("ne", 1, 1);
618 TEST_JCC("ne", 1, 0);
619
620 TEST_JCC("e", 1, 1);
621 TEST_JCC("e", 1, 0);
622
623 TEST_JCC("l", 1, 1);
624 TEST_JCC("l", 1, 0);
625 TEST_JCC("l", 1, -1);
626
627 TEST_JCC("le", 1, 1);
628 TEST_JCC("le", 1, 0);
629 TEST_JCC("le", 1, -1);
630
631 TEST_JCC("ge", 1, 1);
632 TEST_JCC("ge", 1, 0);
633 TEST_JCC("ge", -1, 1);
634
635 TEST_JCC("g", 1, 1);
636 TEST_JCC("g", 1, 0);
637 TEST_JCC("g", 1, -1);
638
639 TEST_JCC("b", 1, 1);
640 TEST_JCC("b", 1, 0);
641 TEST_JCC("b", 1, -1);
642
643 TEST_JCC("be", 1, 1);
644 TEST_JCC("be", 1, 0);
645 TEST_JCC("be", 1, -1);
646
647 TEST_JCC("ae", 1, 1);
648 TEST_JCC("ae", 1, 0);
649 TEST_JCC("ae", 1, -1);
650
651 TEST_JCC("a", 1, 1);
652 TEST_JCC("a", 1, 0);
653 TEST_JCC("a", 1, -1);
654
655
656 TEST_JCC("p", 1, 1);
657 TEST_JCC("p", 1, 0);
658
659 TEST_JCC("np", 1, 1);
660 TEST_JCC("np", 1, 0);
661
662 TEST_JCC("o", 0x7fffffff, 0);
663 TEST_JCC("o", 0x7fffffff, -1);
664
665 TEST_JCC("no", 0x7fffffff, 0);
666 TEST_JCC("no", 0x7fffffff, -1);
667
668 TEST_JCC("s", 0, 1);
669 TEST_JCC("s", 0, -1);
670 TEST_JCC("s", 0, 0);
671
672 TEST_JCC("ns", 0, 1);
673 TEST_JCC("ns", 0, -1);
674 TEST_JCC("ns", 0, 0);
675 }
676
677 #undef CC_MASK
678 #ifdef TEST_P4_FLAGS
679 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
680 #else
681 #define CC_MASK (CC_O | CC_C)
682 #endif
683
684 #define OP mul
685 #include "fb_test_amd64_muldiv.h"
686
687 #define OP imul
688 #include "fb_test_amd64_muldiv.h"
689
test_imulw2(int64 op0,int64 op1)690 void test_imulw2(int64 op0, int64 op1)
691 {
692 int64 res, s1, s0, flags;
693 s0 = op0;
694 s1 = op1;
695 res = s0;
696 flags = 0;
697 asm ("pushq %4\n\t"
698 "popfq\n\t"
699 "imulw %w2, %w0\n\t"
700 "pushfq\n\t"
701 "popq %1\n\t"
702 : "=q" (res), "=g" (flags)
703 : "q" (s1), "0" (res), "1" (flags));
704 xxprintf("%-10s A=%016llx B=%016llx R=%016llx CC=%04llx\n",
705 "imulw", s0, s1, res, flags & CC_MASK);
706 }
707
test_imull2(int64 op0,int64 op1)708 void test_imull2(int64 op0, int64 op1)
709 {
710 int res, s1;
711 int64 s0, flags;
712 s0 = op0;
713 s1 = op1;
714 res = s0;
715 flags = 0;
716 asm ("pushq %4\n\t"
717 "popfq\n\t"
718 "imull %2, %0\n\t"
719 "pushfq\n\t"
720 "popq %1\n\t"
721 : "=q" (res), "=g" (flags)
722 : "q" (s1), "0" (res), "1" (flags));
723 xxprintf("%-10s A=%016llx B=%08x R=%08x CC=%04llx\n",
724 "imull", s0, s1, res, flags & CC_MASK);
725 }
726
727 #define TEST_IMUL_IM(size, size1, op0, op1)\
728 {\
729 int64 res, flags;\
730 flags = 0;\
731 res = 0;\
732 asm ("pushq %3\n\t"\
733 "popfq\n\t"\
734 "imul" size " $" #op0 ", %" size1 "2, %" size1 "0\n\t" \
735 "pushfq\n\t"\
736 "popq %1\n\t"\
737 : "=r" (res), "=g" (flags)\
738 : "r" (op1), "1" (flags), "0" (res));\
739 xxprintf("%-10s A=%08x B=%08x R=%016llx CC=%04llx\n",\
740 "imul" size, op0, op1, res, flags & CC_MASK);\
741 }
742
743 #define TEST_IMUL_IM_L(op0, op1)\
744 {\
745 int64 flags = 0;\
746 int res = 0;\
747 int res64 = 0;\
748 asm ("pushq %3\n\t"\
749 "popfq\n\t"\
750 "imul $" #op0 ", %2, %0\n\t" \
751 "pushfq\n\t"\
752 "popq %1\n\t"\
753 : "=r" (res64), "=g" (flags)\
754 : "r" (op1), "1" (flags), "0" (res));\
755 xxprintf("%-10s A=%08x B=%08x R=%08x CC=%04llx\n",\
756 "imull", op0, op1, res, flags & CC_MASK);\
757 }
758
759
760 #undef CC_MASK
761 #define CC_MASK (0)
762
763 #define OP div
764 #include "fb_test_amd64_muldiv.h"
765
766 #define OP idiv
767 #include "fb_test_amd64_muldiv.h"
768
test_mul(void)769 void test_mul(void)
770 {
771 test_imulb(0x1234561d, 4);
772 test_imulb(3, -4);
773 test_imulb(0x80, 0x80);
774 test_imulb(0x10, 0x10);
775
776 test_imulw(0, 0, 0);
777 test_imulw(0, 0xFF, 0xFF);
778 test_imulw(0, 0xFF, 0x100);
779 test_imulw(0, 0x1234001d, 45);
780 test_imulw(0, 23, -45);
781 test_imulw(0, 0x8000, 0x8000);
782 test_imulw(0, 0x100, 0x100);
783
784 test_imull(0, 0, 0);
785 test_imull(0, 0xFFFF, 0xFFFF);
786 test_imull(0, 0xFFFF, 0x10000);
787 test_imull(0, 0x1234001d, 45);
788 test_imull(0, 23, -45);
789 test_imull(0, 0x80000000, 0x80000000);
790 test_imull(0, 0x10000, 0x10000);
791
792 test_mulb(0x1234561d, 4);
793 test_mulb(3, -4);
794 test_mulb(0x80, 0x80);
795 test_mulb(0x10, 0x10);
796
797 test_mulw(0, 0x1234001d, 45);
798 test_mulw(0, 23, -45);
799 test_mulw(0, 0x8000, 0x8000);
800 test_mulw(0, 0x100, 0x100);
801
802 test_mull(0, 0x1234001d, 45);
803 test_mull(0, 23, -45);
804 test_mull(0, 0x80000000, 0x80000000);
805 test_mull(0, 0x10000, 0x10000);
806
807 test_imulw2(0x1234001d, 45);
808 test_imulw2(23, -45);
809 test_imulw2(0x8000, 0x8000);
810 test_imulw2(0x100, 0x100);
811
812 test_imull2(0x1234001d, 45);
813 test_imull2(23, -45);
814 test_imull2(0x80000000, 0x80000000);
815 test_imull2(0x10000, 0x10000);
816
817 TEST_IMUL_IM("w", "w", 45, 0x1234);
818 TEST_IMUL_IM("w", "w", -45, 23);
819 TEST_IMUL_IM("w", "w", 0x8000, 0x80000000);
820 TEST_IMUL_IM("w", "w", 0x7fff, 0x1000);
821
822 TEST_IMUL_IM_L(45, 0x1234);
823 TEST_IMUL_IM_L(-45, 23);
824 TEST_IMUL_IM_L(0x8000, 0x80000000);
825 TEST_IMUL_IM_L(0x7fff, 0x1000);
826
827 test_idivb(0x12341678, 0x127e);
828 test_idivb(0x43210123, -5);
829 test_idivb(0x12340004, -1);
830
831 test_idivw(0, 0x12345678, 12347);
832 test_idivw(0, -23223, -45);
833 test_idivw(0, 0x12348000, -1);
834 test_idivw(0x12343, 0x12345678, 0x81238567);
835
836 test_idivl(0, 0x12345678, 12347);
837 test_idivl(0, -233223, -45);
838 test_idivl(0, 0x80000000, -1);
839 test_idivl(0x12343, 0x12345678, 0x81234567);
840
841 test_idivq(0, 0x12345678, 12347);
842 test_idivq(0, -233223, -45);
843 test_idivq(0, 0x80000000, -1);
844 test_idivq(0x12343, 0x12345678, 0x81234567);
845
846 test_divb(0x12341678, 0x127e);
847 test_divb(0x43210123, -5);
848 test_divb(0x12340004, -1);
849
850 test_divw(0, 0x12345678, 12347);
851 test_divw(0, -23223, -45);
852 test_divw(0, 0x12348000, -1);
853 test_divw(0x12343, 0x12345678, 0x81238567);
854
855 test_divl(0, 0x12345678, 12347);
856 test_divl(0, -233223, -45);
857 test_divl(0, 0x80000000, -1);
858 test_divl(0x12343, 0x12345678, 0x81234567);
859
860 test_divq(0, 0x12345678, 12347);
861 test_divq(0, -233223, -45);
862 test_divq(0, 0x80000000, -1);
863 test_divq(0x12343, 0x12345678, 0x81234567);
864 }
865
866 #define TEST_BSX(op, size, op0)\
867 {\
868 int res, val, resz;\
869 val = op0;\
870 asm("xorl %1, %1\n"\
871 "movl $0x12345678, %0\n"\
872 #op " %" size "2, %" size "0 ; setz %b1" \
873 : "=r" (res), "=q" (resz)\
874 : "r" (val));\
875 xxprintf("%-10s A=%08x R=%08x %d\n", #op, val, res, resz);\
876 }
877
test_bsx(void)878 void test_bsx(void)
879 {
880 TEST_BSX(bsrw, "w", 0);
881 TEST_BSX(bsrw, "w", 0x12340128);
882 TEST_BSX(bsrl, "", 0);
883 TEST_BSX(bsrl, "", 0x00340128);
884 TEST_BSX(bsfw, "w", 0);
885 TEST_BSX(bsfw, "w", 0x12340128);
886 TEST_BSX(bsfl, "", 0);
887 TEST_BSX(bsfl, "", 0x00340128);
888 }
889
890 /**********************************************/
891
test_fops(double a,double b)892 void test_fops(double a, double b)
893 {
894 xxprintf("a=%f b=%f a+b=%f\n", a, b, a + b);
895 xxprintf("a=%f b=%f a-b=%f\n", a, b, a - b);
896 xxprintf("a=%f b=%f a*b=%f\n", a, b, a * b);
897 xxprintf("a=%f b=%f a/b=%f\n", a, b, a / b);
898 xxprintf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b));
899 xxprintf("a=%f sqrt(a)=%f\n", a, sqrt(a));
900 xxprintf("a=%f sin(a)=%f\n", a, sin(a));
901 xxprintf("a=%f cos(a)=%f\n", a, cos(a));
902 xxprintf("a=%f tan(a)=%f\n", a, tan(a));
903 xxprintf("a=%f log(a)=%f\n", a, log(a));
904 xxprintf("a=%f exp(a)=%f\n", a, exp(a));
905 xxprintf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b));
906 /* just to test some op combining */
907 xxprintf("a=%f asin(sin(a))=%f\n", a, asin(sin(a)));
908 xxprintf("a=%f acos(cos(a))=%f\n", a, acos(cos(a)));
909 xxprintf("a=%f atan(tan(a))=%f\n", a, atan(tan(a)));
910 }
911
test_fcmp(double a,double b)912 void test_fcmp(double a, double b)
913 {
914 xxprintf("(%f<%f)=%d\n",
915 a, b, a < b);
916 xxprintf("(%f<=%f)=%d\n",
917 a, b, a <= b);
918 xxprintf("(%f==%f)=%d\n",
919 a, b, a == b);
920 xxprintf("(%f>%f)=%d\n",
921 a, b, a > b);
922 xxprintf("(%f<=%f)=%d\n",
923 a, b, a >= b);
924 {
925 unsigned long long int rflags;
926 /* test f(u)comi instruction */
927 asm("fcomi %2, %1\n"
928 "pushfq\n"
929 "popq %0\n"
930 : "=r" (rflags)
931 : "t" (a), "u" (b));
932 xxprintf("fcomi(%f %f)=%016llx\n", a, b, rflags & (CC_Z | CC_P | CC_C));
933 }
934 }
935
test_fcvt(double a)936 void test_fcvt(double a)
937 {
938 float fa;
939 long double la;
940 int16_t fpuc;
941 int i;
942 int64 lla;
943 int ia;
944 int16_t wa;
945 double ra;
946
947 fa = a;
948 la = a;
949 xxprintf("(float)%f = %f\n", a, fa);
950 xxprintf("(long double)%f = %Lf\n", a, la);
951 xxprintf("a=%016llx\n", *(unsigned long long int *) &a);
952 xxprintf("la=%016llx %04x\n", *(unsigned long long int *) &la,
953 *(unsigned short *) ((char *)(&la) + 8));
954
955 /* test all roundings */
956 asm volatile ("fstcw %0" : "=m" (fpuc));
957 for(i=0;i<4;i++) {
958 short zz = (fpuc & ~0x0c00) | (i << 10);
959 asm volatile ("fldcw %0" : : "m" (zz));
960 asm volatile ("fists %0" : "=m" (wa) : "t" (a));
961 asm volatile ("fistl %0" : "=m" (ia) : "t" (a));
962 asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st");
963 asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a));
964 asm volatile ("fldcw %0" : : "m" (fpuc));
965 xxprintf("(short)a = %d\n", wa);
966 xxprintf("(int)a = %d\n", ia);
967 xxprintf("(int64_t)a = %lld\n", lla);
968 xxprintf("rint(a) = %f\n", ra);
969 }
970 }
971
972 #define TEST(N) \
973 asm("fld" #N : "=t" (a)); \
974 xxprintf("fld" #N "= %f\n", a);
975
test_fconst(void)976 void test_fconst(void)
977 {
978 double a;
979 TEST(1);
980 TEST(l2t);
981 TEST(l2e);
982 TEST(pi);
983 TEST(lg2);
984 TEST(ln2);
985 TEST(z);
986 }
987
test_fbcd(double a)988 void test_fbcd(double a)
989 {
990 unsigned short bcd[5];
991 double b;
992
993 asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st");
994 asm("fbld %1" : "=t" (b) : "m" (bcd[0]));
995 xxprintf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n",
996 a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b);
997 }
998
999 #define TEST_ENV(env, save, restore)\
1000 {\
1001 memset((env), 0xaa, sizeof(*(env)));\
1002 for(i=0;i<5;i++)\
1003 asm volatile ("fldl %0" : : "m" (dtab[i]));\
1004 asm(save " %0\n" : : "m" (*(env)));\
1005 asm(restore " %0\n": : "m" (*(env)));\
1006 for(i=0;i<5;i++)\
1007 asm volatile ("fstpl %0" : "=m" (rtab[i]));\
1008 for(i=0;i<5;i++)\
1009 xxprintf("res[%d]=%f\n", i, rtab[i]);\
1010 xxprintf("fpuc=%04x fpus=%04x fptag=%04x\n",\
1011 (env)->fpuc,\
1012 (env)->fpus & 0xff00,\
1013 (env)->fptag);\
1014 }
1015
test_fenv(void)1016 void test_fenv(void)
1017 {
1018 struct __attribute__((packed)) {
1019 uint16_t fpuc;
1020 uint16_t dummy1;
1021 uint16_t fpus;
1022 uint16_t dummy2;
1023 uint16_t fptag;
1024 uint16_t dummy3;
1025 uint32_t ignored[4];
1026 long double fpregs[8];
1027 } float_env32;
1028 double dtab[8];
1029 double rtab[8];
1030 int i;
1031
1032 for(i=0;i<8;i++)
1033 dtab[i] = i + 1;
1034
1035 TEST_ENV(&float_env32, "fnstenv", "fldenv");
1036 TEST_ENV(&float_env32, "fnsave", "frstor");
1037
1038 /* test for ffree */
1039 for(i=0;i<5;i++)
1040 asm volatile ("fldl %0" : : "m" (dtab[i]));
1041 asm volatile("ffree %st(2)");
1042 asm volatile ("fnstenv %0\n" : : "m" (float_env32));
1043 asm volatile ("fninit");
1044 xxprintf("fptag=%04x\n", float_env32.fptag);
1045 }
1046
1047
1048 #define TEST_FCMOV(a, b, rflags, CC)\
1049 {\
1050 double res;\
1051 asm("pushq %3\n"\
1052 "popfq\n"\
1053 "fcmov" CC " %2, %0\n"\
1054 : "=t" (res)\
1055 : "0" (a), "u" (b), "g" (rflags));\
1056 xxprintf("fcmov%s rflags=0x%04llx-> %f\n", \
1057 CC, rflags, res);\
1058 }
1059
test_fcmov(void)1060 void test_fcmov(void)
1061 {
1062 double a, b;
1063 int64 rflags, i;
1064
1065 a = 1.0;
1066 b = 2.0;
1067 for(i = 0; i < 4; i++) {
1068 rflags = 0;
1069 if (i & 1)
1070 rflags |= CC_C;
1071 if (i & 2)
1072 rflags |= CC_Z;
1073 TEST_FCMOV(a, b, rflags, "b");
1074 TEST_FCMOV(a, b, rflags, "e");
1075 TEST_FCMOV(a, b, rflags, "be");
1076 TEST_FCMOV(a, b, rflags, "nb");
1077 TEST_FCMOV(a, b, rflags, "ne");
1078 TEST_FCMOV(a, b, rflags, "nbe");
1079 }
1080 TEST_FCMOV(a, b, (int64)0, "u");
1081 TEST_FCMOV(a, b, (int64)CC_P, "u");
1082 TEST_FCMOV(a, b, (int64)0, "nu");
1083 TEST_FCMOV(a, b, (int64)CC_P, "nu");
1084 }
1085
test_floats(void)1086 void test_floats(void)
1087 {
1088 test_fops(2, 3);
1089 test_fops(1.4, -5);
1090 test_fcmp(2, -1);
1091 test_fcmp(2, 2);
1092 test_fcmp(2, 3);
1093 test_fcvt(0.5);
1094 test_fcvt(-0.5);
1095 test_fcvt(1.0/7.0);
1096 test_fcvt(-1.0/9.0);
1097 test_fcvt(32768);
1098 test_fcvt(-1e20);
1099 test_fconst();
1100 // REINSTATE (maybe): test_fbcd(1234567890123456);
1101 // REINSTATE (maybe): test_fbcd(-123451234567890);
1102 // REINSTATE: test_fenv();
1103 // REINSTATE: test_fcmov();
1104 }
1105
1106 /**********************************************/
1107
1108 #define TEST_XCHG(op, size, opconst)\
1109 {\
1110 int op0, op1;\
1111 op0 = 0x12345678;\
1112 op1 = 0xfbca7654;\
1113 asm(#op " %" size "0, %" size "1" \
1114 : "=q" (op0), opconst (op1) \
1115 : "0" (op0), "1" (op1));\
1116 xxprintf("%-10s A=%08x B=%08x\n",\
1117 #op, op0, op1);\
1118 }
1119
1120 #define TEST_CMPXCHG(op, size, opconst, eax)\
1121 {\
1122 int op0, op1;\
1123 op0 = 0x12345678;\
1124 op1 = 0xfbca7654;\
1125 asm(#op " %" size "0, %" size "1" \
1126 : "=q" (op0), opconst (op1) \
1127 : "0" (op0), "1" (op1), "a" (eax));\
1128 xxprintf("%-10s EAX=%08x A=%08x C=%08x\n",\
1129 #op, eax, op0, op1);\
1130 }
1131
1132
1133 /**********************************************/
1134 /* segmentation tests */
1135
1136 extern char func_lret32;
1137 extern char func_iret32;
1138
1139 uint8_t str_buffer[4096];
1140
1141 #define TEST_STRING1(OP, size, DF, REP)\
1142 {\
1143 int64 rsi, rdi, rax, rcx, rflags;\
1144 \
1145 rsi = (long)(str_buffer + sizeof(str_buffer) / 2);\
1146 rdi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\
1147 rax = 0x12345678;\
1148 rcx = 17;\
1149 \
1150 asm volatile ("pushq $0\n\t"\
1151 "popfq\n\t"\
1152 DF "\n\t"\
1153 REP #OP size "\n\t"\
1154 "cld\n\t"\
1155 "pushfq\n\t"\
1156 "popq %4\n\t"\
1157 : "=S" (rsi), "=D" (rdi), "=a" (rax), "=c" (rcx), "=g" (rflags)\
1158 : "0" (rsi), "1" (rdi), "2" (rax), "3" (rcx));\
1159 xxprintf("%-10s ESI=%016llx EDI=%016llx EAX=%016llx ECX=%016llx EFL=%04llx\n",\
1160 REP #OP size, rsi, rdi, rax, rcx,\
1161 rflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
1162 }
1163
1164 #define TEST_STRING(OP, REP)\
1165 TEST_STRING1(OP, "b", "", REP);\
1166 TEST_STRING1(OP, "w", "", REP);\
1167 TEST_STRING1(OP, "l", "", REP);\
1168 TEST_STRING1(OP, "b", "std", REP);\
1169 TEST_STRING1(OP, "w", "std", REP);\
1170 TEST_STRING1(OP, "l", "std", REP)
1171
test_string(void)1172 void test_string(void)
1173 {
1174 int64 i;
1175 for(i = 0;i < sizeof(str_buffer); i++)
1176 str_buffer[i] = i + 0x56;
1177 TEST_STRING(stos, "");
1178 TEST_STRING(stos, "rep ");
1179 TEST_STRING(lods, ""); /* to verify stos */
1180 // TEST_STRING(lods, "rep ");
1181 TEST_STRING(movs, "");
1182 TEST_STRING(movs, "rep ");
1183 TEST_STRING(lods, ""); /* to verify stos */
1184
1185 /* XXX: better tests */
1186 TEST_STRING(scas, "");
1187 TEST_STRING(scas, "repz ");
1188 TEST_STRING(scas, "repnz ");
1189 // REINSTATE? TEST_STRING(cmps, "");
1190 TEST_STRING(cmps, "repz ");
1191 // REINSTATE? TEST_STRING(cmps, "repnz ");
1192 }
1193
main(int argc,char ** argv)1194 int main(int argc, char **argv)
1195 {
1196 // The three commented out test cases produce different results at different
1197 // compiler optimisation levels. This suggests to me that their inline
1198 // assembly is incorrect. I don't have time to investigate now, though. So
1199 // they are disabled.
1200 xxprintf_start();
1201 test_adc();
1202 test_adcx();
1203 test_add();
1204 test_adox();
1205 test_and();
1206 // test_bsx();
1207 test_cmp();
1208 test_dec();
1209 test_fcmov();
1210 test_fconst();
1211 test_fenv();
1212 test_floats();
1213 test_inc();
1214 // test_jcc();
1215 test_lea();
1216 test_mul();
1217 test_neg();
1218 test_not();
1219 test_or();
1220 test_rcl();
1221 test_rcr();
1222 test_rol();
1223 test_ror();
1224 test_sar();
1225 test_sbb();
1226 test_shl();
1227 test_shr();
1228 // test_string();
1229 test_sub();
1230 test_xor();
1231 xxprintf_done();
1232 // the expected MD5SUM is 66802c845574c7c69f30d29ef85f7ca3
1233 return 0;
1234 }
1235