1
2 /* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not
3 check the core arithmetic in any detail. This file checks the 16-bit
4 character versions (w is for wide) */
5
6 #include <string.h>
7 #include <stdio.h>
8 #include <assert.h>
9
10 typedef unsigned char V128[16];
11 typedef unsigned int UInt;
12 typedef signed int Int;
13 typedef unsigned char UChar;
14 typedef unsigned long long int ULong;
15 typedef UChar Bool;
16 #define False ((Bool)0)
17 #define True ((Bool)1)
18
show_V128(V128 * vec)19 void show_V128 ( V128* vec )
20 {
21 Int i;
22 for (i = 15; i >= 0; i--)
23 printf("%02x", (UInt)( (*vec)[i] ));
24 }
25
expand(V128 * dst,char * summary)26 void expand ( V128* dst, char* summary )
27 {
28 Int i;
29 assert( strlen(summary) == 16 );
30 for (i = 0; i < 16; i++) {
31 UChar xx = 0;
32 UChar x = summary[15-i];
33 if (x >= '0' && x <= '9') { xx = x - '0'; }
34 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
35 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
36 else assert(0);
37
38 assert(xx < 16);
39 xx = (xx << 4) | xx;
40 assert(xx < 256);
41 (*dst)[i] = xx;
42 }
43 }
44
one_test(char * summL,ULong rdxIN,char * summR,ULong raxIN)45 void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
46 {
47 V128 argL, argR;
48 expand( &argL, summL );
49 expand( &argR, summR );
50 printf("\n");
51 printf("rdx %016llx argL ", rdxIN);
52 show_V128(&argL);
53 printf(" rax %016llx argR ", raxIN);
54 show_V128(&argR);
55 printf("\n");
56
57 ULong block[ 2/*in:argL*/ // 0 0
58 + 2/*in:argR*/ // 2 16
59 + 1/*in:rdx*/ // 4 32
60 + 1/*in:rax*/ // 5 40
61 + 2/*inout:xmm0*/ // 6 48
62 + 1/*inout:rcx*/ // 8 64
63 + 1/*out:rflags*/ ]; // 9 72
64 assert(sizeof(block) == 80);
65
66 UChar* blockC = (UChar*)&block[0];
67
68 /* ---------------- ISTRI_4B ---------------- */
69 memset(blockC, 0x55, 80);
70 memcpy(blockC + 0, &argL, 16);
71 memcpy(blockC + 16, &argR, 16);
72 memcpy(blockC + 24, &rdxIN, 8);
73 memcpy(blockC + 32, &raxIN, 8);
74 memcpy(blockC + 40, &rdxIN, 8);
75 __asm__ __volatile__(
76 "movupd 0(%0), %%xmm2" "\n\t"
77 "movupd 16(%0), %%xmm13" "\n\t"
78 "movq 32(%0), %%rdx" "\n\t"
79 "movq 40(%0), %%rax" "\n\t"
80 "movupd 48(%0), %%xmm0" "\n\t"
81 "movw 64(%0), %%cx" "\n\t"
82 "pcmpistri $0x4B, %%xmm2, %%xmm13" "\n\t"
83 "movupd %%xmm0, 48(%0)" "\n\t"
84 "movw %%cx, 64(%0)" "\n\t"
85 "pushfq" "\n\t"
86 "popq %%r15" "\n\t"
87 "movq %%r15, 72(%0)" "\n\t"
88 : /*out*/
89 : /*in*/"r"(blockC)
90 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
91 );
92 printf(" istri $0x4B: ");
93 printf(" xmm0 ");
94 show_V128( (V128*)(blockC+48) );
95 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
96
97 /* ---------------- ISTRI_0B ---------------- */
98 memset(blockC, 0x55, 80);
99 memcpy(blockC + 0, &argL, 16);
100 memcpy(blockC + 16, &argR, 16);
101 memcpy(blockC + 24, &rdxIN, 8);
102 memcpy(blockC + 32, &raxIN, 8);
103 memcpy(blockC + 40, &rdxIN, 8);
104 __asm__ __volatile__(
105 "movupd 0(%0), %%xmm2" "\n\t"
106 "movupd 16(%0), %%xmm13" "\n\t"
107 "movq 32(%0), %%rdx" "\n\t"
108 "movq 40(%0), %%rax" "\n\t"
109 "movupd 48(%0), %%xmm0" "\n\t"
110 "movw 64(%0), %%cx" "\n\t"
111 "pcmpistri $0x0B, %%xmm2, %%xmm13" "\n\t"
112 "movupd %%xmm0, 48(%0)" "\n\t"
113 "movw %%cx, 64(%0)" "\n\t"
114 "pushfq" "\n\t"
115 "popq %%r15" "\n\t"
116 "movq %%r15, 72(%0)" "\n\t"
117 : /*out*/
118 : /*in*/"r"(blockC)
119 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
120 );
121 printf(" istri $0x0B: ");
122 printf(" xmm0 ");
123 show_V128( (V128*)(blockC+48) );
124 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
125
126 /* ---------------- ISTRM_4B ---------------- */
127 memset(blockC, 0x55, 80);
128 memcpy(blockC + 0, &argL, 16);
129 memcpy(blockC + 16, &argR, 16);
130 memcpy(blockC + 24, &rdxIN, 8);
131 memcpy(blockC + 32, &raxIN, 8);
132 memcpy(blockC + 40, &rdxIN, 8);
133 __asm__ __volatile__(
134 "movupd 0(%0), %%xmm2" "\n\t"
135 "movupd 16(%0), %%xmm13" "\n\t"
136 "movq 32(%0), %%rdx" "\n\t"
137 "movq 40(%0), %%rax" "\n\t"
138 "movupd 48(%0), %%xmm0" "\n\t"
139 "movw 64(%0), %%cx" "\n\t"
140 "pcmpistrm $0x4B, %%xmm2, %%xmm13" "\n\t"
141 "movupd %%xmm0, 48(%0)" "\n\t"
142 "movw %%cx, 64(%0)" "\n\t"
143 "pushfq" "\n\t"
144 "popq %%r15" "\n\t"
145 "movq %%r15, 72(%0)" "\n\t"
146 : /*out*/
147 : /*in*/"r"(blockC)
148 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
149 );
150 printf(" istrm $0x4B: ");
151 printf(" xmm0 ");
152 show_V128( (V128*)(blockC+48) );
153 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
154
155 /* ---------------- ISTRM_0B ---------------- */
156 memset(blockC, 0x55, 80);
157 memcpy(blockC + 0, &argL, 16);
158 memcpy(blockC + 16, &argR, 16);
159 memcpy(blockC + 24, &rdxIN, 8);
160 memcpy(blockC + 32, &raxIN, 8);
161 memcpy(blockC + 40, &rdxIN, 8);
162 __asm__ __volatile__(
163 "movupd 0(%0), %%xmm2" "\n\t"
164 "movupd 16(%0), %%xmm13" "\n\t"
165 "movq 32(%0), %%rdx" "\n\t"
166 "movq 40(%0), %%rax" "\n\t"
167 "movupd 48(%0), %%xmm0" "\n\t"
168 "movw 64(%0), %%cx" "\n\t"
169 "pcmpistrm $0x0B, %%xmm2, %%xmm13" "\n\t"
170 "movupd %%xmm0, 48(%0)" "\n\t"
171 "movw %%cx, 64(%0)" "\n\t"
172 "pushfq" "\n\t"
173 "popq %%r15" "\n\t"
174 "movq %%r15, 72(%0)" "\n\t"
175 : /*out*/
176 : /*in*/"r"(blockC)
177 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
178 );
179 printf(" istrm $0x0B: ");
180 printf(" xmm0 ");
181 show_V128( (V128*)(blockC+48) );
182 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
183
184 /* ---------------- ESTRI_4B ---------------- */
185 memset(blockC, 0x55, 80);
186 memcpy(blockC + 0, &argL, 16);
187 memcpy(blockC + 16, &argR, 16);
188 memcpy(blockC + 24, &rdxIN, 8);
189 memcpy(blockC + 32, &raxIN, 8);
190 memcpy(blockC + 40, &rdxIN, 8);
191 __asm__ __volatile__(
192 "movupd 0(%0), %%xmm2" "\n\t"
193 "movupd 16(%0), %%xmm13" "\n\t"
194 "movq 32(%0), %%rdx" "\n\t"
195 "movq 40(%0), %%rax" "\n\t"
196 "movupd 48(%0), %%xmm0" "\n\t"
197 "movw 64(%0), %%cx" "\n\t"
198 "pcmpestri $0x4B, %%xmm2, %%xmm13" "\n\t"
199 "movupd %%xmm0, 48(%0)" "\n\t"
200 "movw %%cx, 64(%0)" "\n\t"
201 "pushfq" "\n\t"
202 "popq %%r15" "\n\t"
203 "movq %%r15, 72(%0)" "\n\t"
204 : /*out*/
205 : /*in*/"r"(blockC)
206 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
207 );
208 printf(" estri $0x4B: ");
209 printf(" xmm0 ");
210 show_V128( (V128*)(blockC+48) );
211 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
212
213 /* ---------------- ESTRI_0B ---------------- */
214 memset(blockC, 0x55, 80);
215 memcpy(blockC + 0, &argL, 16);
216 memcpy(blockC + 16, &argR, 16);
217 memcpy(blockC + 24, &rdxIN, 8);
218 memcpy(blockC + 32, &raxIN, 8);
219 memcpy(blockC + 40, &rdxIN, 8);
220 __asm__ __volatile__(
221 "movupd 0(%0), %%xmm2" "\n\t"
222 "movupd 16(%0), %%xmm13" "\n\t"
223 "movq 32(%0), %%rdx" "\n\t"
224 "movq 40(%0), %%rax" "\n\t"
225 "movupd 48(%0), %%xmm0" "\n\t"
226 "movw 64(%0), %%cx" "\n\t"
227 "pcmpestri $0x0B, %%xmm2, %%xmm13" "\n\t"
228 "movupd %%xmm0, 48(%0)" "\n\t"
229 "movw %%cx, 64(%0)" "\n\t"
230 "pushfq" "\n\t"
231 "popq %%r15" "\n\t"
232 "movq %%r15, 72(%0)" "\n\t"
233 : /*out*/
234 : /*in*/"r"(blockC)
235 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
236 );
237 printf(" estri $0x0B: ");
238 printf(" xmm0 ");
239 show_V128( (V128*)(blockC+48) );
240 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
241
242 /* ---------------- ESTRM_4B ---------------- */
243 memset(blockC, 0x55, 80);
244 memcpy(blockC + 0, &argL, 16);
245 memcpy(blockC + 16, &argR, 16);
246 memcpy(blockC + 24, &rdxIN, 8);
247 memcpy(blockC + 32, &raxIN, 8);
248 memcpy(blockC + 40, &rdxIN, 8);
249 __asm__ __volatile__(
250 "movupd 0(%0), %%xmm2" "\n\t"
251 "movupd 16(%0), %%xmm13" "\n\t"
252 "movq 32(%0), %%rdx" "\n\t"
253 "movq 40(%0), %%rax" "\n\t"
254 "movupd 48(%0), %%xmm0" "\n\t"
255 "movw 64(%0), %%cx" "\n\t"
256 "pcmpestrm $0x4B, %%xmm2, %%xmm13" "\n\t"
257 "movupd %%xmm0, 48(%0)" "\n\t"
258 "movw %%cx, 64(%0)" "\n\t"
259 "pushfq" "\n\t"
260 "popq %%r15" "\n\t"
261 "movq %%r15, 72(%0)" "\n\t"
262 : /*out*/
263 : /*in*/"r"(blockC)
264 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
265 );
266 printf(" estrm $0x4B: ");
267 printf(" xmm0 ");
268 show_V128( (V128*)(blockC+48) );
269 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
270
271 /* ---------------- ESTRM_0B ---------------- */
272 memset(blockC, 0x55, 80);
273 memcpy(blockC + 0, &argL, 16);
274 memcpy(blockC + 16, &argR, 16);
275 memcpy(blockC + 24, &rdxIN, 8);
276 memcpy(blockC + 32, &raxIN, 8);
277 memcpy(blockC + 40, &rdxIN, 8);
278 __asm__ __volatile__(
279 "movupd 0(%0), %%xmm2" "\n\t"
280 "movupd 16(%0), %%xmm13" "\n\t"
281 "movq 32(%0), %%rdx" "\n\t"
282 "movq 40(%0), %%rax" "\n\t"
283 "movupd 48(%0), %%xmm0" "\n\t"
284 "movw 64(%0), %%cx" "\n\t"
285 "pcmpestrm $0x0B, %%xmm2, %%xmm13" "\n\t"
286 "movupd %%xmm0, 48(%0)" "\n\t"
287 "movw %%cx, 64(%0)" "\n\t"
288 "pushfq" "\n\t"
289 "popq %%r15" "\n\t"
290 "movq %%r15, 72(%0)" "\n\t"
291 : /*out*/
292 : /*in*/"r"(blockC)
293 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
294 );
295 printf(" estrm $0x0B: ");
296 printf(" xmm0 ");
297 show_V128( (V128*)(blockC+48) );
298 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
299
300
301
302
303 }
304
main(void)305 int main ( void )
306 {
307 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa00aaaaaa", 0 );
308 one_test("0000000000000000", 0, "aaaaaaaa00aaaaaa", 0 );
309
310 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
311 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
312 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
313
314 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
315 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
316 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
317 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
318
319 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
320 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
321 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
322 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
323
324 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
325 one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
326 one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
327 one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
328
329 one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 );
330 one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
331 one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
332 one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
333
334 return 0;
335 }
336