1 #include <stdio.h>
2 #include <string.h>
3
4 #define N 64
5 struct float_test {
6 float x[N], y[N], z[N], expected[N], res[N];
7 } ft __attribute__((aligned (32)));
8
9 struct double_test {
10 double x[N], y[N], z[N], expected[N], res[N];
11 } dt __attribute__((aligned (32)));
12
13 float plus_zero, plus_infty, minus_infty, nan_value;
14
testf(float x,float y)15 static int testf( float x, float y )
16 {
17 unsigned int a, b;
18 memcpy( &a, &x, sizeof (a) );
19 memcpy( &b, &y, sizeof (b) );
20 if ((a & 0x7fc00000U) == 0x7fc00000U)
21 return (b & 0x7fc00000U) != 0x7fc00000U;
22 return memcmp( &a, &b, sizeof (a) ) != 0;
23 }
24
test_fmaf(void)25 static int test_fmaf( void )
26 {
27 int res = 0, i, j;
28 float w;
29 for (i = 0; i < N; i++) {
30 int thisres = 0;
31 __asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
32 thisres |= testf( w, ft.expected[i] );
33 __asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
34 thisres |= testf( w, ft.expected[i] );
35 __asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
36 thisres |= testf( w, ft.expected[i] );
37 __asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
38 thisres |= testf( w, ft.expected[i] );
39 __asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
40 thisres |= testf( w, ft.expected[i] );
41 __asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
42 thisres |= testf( w, ft.expected[i] );
43 if (thisres)
44 printf( "Failure 1 %d %a %a\n", i, w, ft.expected[i] );
45 res |= thisres;
46 thisres = 0;
47 __asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
48 thisres |= testf( -w, ft.expected[i] );
49 __asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
50 thisres |= testf( -w, ft.expected[i] );
51 __asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
52 thisres |= testf( -w, ft.expected[i] );
53 __asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
54 thisres |= testf( -w, ft.expected[i] );
55 __asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
56 thisres |= testf( -w, ft.expected[i] );
57 __asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
58 thisres |= testf( -w, ft.expected[i] );
59 if (thisres)
60 printf( "Failure 2 %d %a %a\n", i, w, ft.expected[i] );
61 res |= thisres;
62 }
63 for (i = 0; i < N; i++)
64 ft.z[i] = -ft.z[i];
65 for (i = 0; i < N; i++) {
66 int thisres = 0;
67 __asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
68 thisres |= testf( w, ft.expected[i] );
69 __asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
70 thisres |= testf( w, ft.expected[i] );
71 __asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
72 thisres |= testf( w, ft.expected[i] );
73 __asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
74 thisres |= testf( w, ft.expected[i] );
75 __asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
76 thisres |= testf( w, ft.expected[i] );
77 __asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
78 thisres |= testf( w, ft.expected[i] );
79 if (thisres)
80 printf( "Failure 3 %d %a %a\n", i, w, ft.expected[i] );
81 res |= thisres;
82 thisres = 0;
83 __asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
84 thisres |= testf( -w, ft.expected[i] );
85 __asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i]));
86 thisres |= testf( -w, ft.expected[i] );
87 __asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i]));
88 thisres |= testf( -w, ft.expected[i] );
89 __asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i]));
90 thisres |= testf( -w, ft.expected[i] );
91 __asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i]));
92 thisres |= testf( -w, ft.expected[i] );
93 __asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i]));
94 thisres |= testf( -w, ft.expected[i] );
95 if (thisres)
96 printf( "Failure 4 %d %a %a\n", i, w, ft.expected[i] );
97 res |= thisres;
98 }
99 for (i = 0; i < N; i++)
100 ft.z[i] = -ft.z[i];
101 for (i = 0; i < N; i += 4) {
102 int thisres = 0;
103 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
104 "vfmadd132ps %%xmm7, %%xmm8, %%xmm9;"
105 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
106 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
107 for (j = 0; j < 4; j++)
108 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
109 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
110 "vfmadd132ps (%2), %%xmm8, %%xmm9;"
111 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
112 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
113 for (j = 0; j < 4; j++)
114 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
115 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
116 "vfmadd213ps %%xmm7, %%xmm8, %%xmm9;"
117 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
118 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
119 for (j = 0; j < 4; j++)
120 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
121 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
122 "vfmadd213ps (%3), %%xmm8, %%xmm9;"
123 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
124 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
125 for (j = 0; j < 4; j++)
126 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
127 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
128 "vfmadd231ps %%xmm7, %%xmm8, %%xmm9;"
129 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
130 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
131 for (j = 0; j < 4; j++)
132 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
133 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
134 "vfmadd231ps (%2), %%xmm8, %%xmm9;"
135 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
136 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
137 for (j = 0; j < 4; j++)
138 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
139 if (thisres) {
140 printf( "Failure 5 %d", i );
141 for (j = 0; j < 4; j++)
142 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
143 printf( "\n" );
144 }
145 res |= thisres;
146 thisres = 0;
147 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
148 "vfnmsub132ps %%xmm7, %%xmm8, %%xmm9;"
149 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
150 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
151 for (j = 0; j < 4; j++)
152 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
153 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
154 "vfnmsub132ps (%2), %%xmm8, %%xmm9;"
155 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
156 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
157 for (j = 0; j < 4; j++)
158 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
159 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
160 "vfnmsub213ps %%xmm7, %%xmm8, %%xmm9;"
161 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
162 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
163 for (j = 0; j < 4; j++)
164 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
165 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
166 "vfnmsub213ps (%3), %%xmm8, %%xmm9;"
167 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
168 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
169 for (j = 0; j < 4; j++)
170 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
171 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
172 "vfnmsub231ps %%xmm7, %%xmm8, %%xmm9;"
173 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
174 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
175 for (j = 0; j < 4; j++)
176 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
177 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
178 "vfnmsub231ps (%2), %%xmm8, %%xmm9;"
179 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
180 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
181 for (j = 0; j < 4; j++)
182 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
183 if (thisres) {
184 printf( "Failure 6 %d", i );
185 for (j = 0; j < 4; j++)
186 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
187 printf( "\n" );
188 }
189 res |= thisres;
190 }
191 for (i = 0; i < N; i++)
192 ft.z[i] = -ft.z[i];
193 for (i = 0; i < N; i += 4) {
194 int thisres = 0;
195 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
196 "vfmsub132ps %%xmm7, %%xmm8, %%xmm9;"
197 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
198 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
199 for (j = 0; j < 4; j++)
200 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
201 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
202 "vfmsub132ps (%2), %%xmm8, %%xmm9;"
203 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
204 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
205 for (j = 0; j < 4; j++)
206 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
207 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
208 "vfmsub213ps %%xmm7, %%xmm8, %%xmm9;"
209 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
210 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
211 for (j = 0; j < 4; j++)
212 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
213 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
214 "vfmsub213ps (%3), %%xmm8, %%xmm9;"
215 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
216 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
217 for (j = 0; j < 4; j++)
218 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
219 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
220 "vfmsub231ps %%xmm7, %%xmm8, %%xmm9;"
221 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
222 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
223 for (j = 0; j < 4; j++)
224 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
225 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
226 "vfmsub231ps (%2), %%xmm8, %%xmm9;"
227 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
228 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
229 for (j = 0; j < 4; j++)
230 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
231 if (thisres) {
232 printf( "Failure 7 %d", i );
233 for (j = 0; j < 4; j++)
234 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
235 printf( "\n" );
236 }
237 res |= thisres;
238 thisres = 0;
239 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
240 "vfnmadd132ps %%xmm7, %%xmm8, %%xmm9;"
241 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
242 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
243 for (j = 0; j < 4; j++)
244 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
245 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
246 "vfnmadd132ps (%2), %%xmm8, %%xmm9;"
247 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
248 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
249 for (j = 0; j < 4; j++)
250 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
251 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
252 "vfnmadd213ps %%xmm7, %%xmm8, %%xmm9;"
253 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
254 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
255 for (j = 0; j < 4; j++)
256 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
257 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
258 "vfnmadd213ps (%3), %%xmm8, %%xmm9;"
259 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
260 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
261 for (j = 0; j < 4; j++)
262 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
263 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
264 "vfnmadd231ps %%xmm7, %%xmm8, %%xmm9;"
265 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
266 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
267 for (j = 0; j < 4; j++)
268 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
269 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
270 "vfnmadd231ps (%2), %%xmm8, %%xmm9;"
271 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
272 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
273 for (j = 0; j < 4; j++)
274 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
275 if (thisres) {
276 printf( "Failure 8 %d", i );
277 for (j = 0; j < 4; j++)
278 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
279 printf( "\n" );
280 }
281 res |= thisres;
282 }
283 for (i = 1; i < N; i += 2)
284 ft.z[i] = -ft.z[i];
285 for (i = 0; i < N; i += 4) {
286 int thisres = 0;
287 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
288 "vfmaddsub132ps %%xmm7, %%xmm8, %%xmm9;"
289 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
290 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
291 for (j = 0; j < 4; j++)
292 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
293 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
294 "vfmaddsub132ps (%2), %%xmm8, %%xmm9;"
295 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
296 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
297 for (j = 0; j < 4; j++)
298 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
299 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
300 "vfmaddsub213ps %%xmm7, %%xmm8, %%xmm9;"
301 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
302 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
303 for (j = 0; j < 4; j++)
304 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
305 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
306 "vfmaddsub213ps (%3), %%xmm8, %%xmm9;"
307 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
308 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
309 for (j = 0; j < 4; j++)
310 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
311 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
312 "vfmaddsub231ps %%xmm7, %%xmm8, %%xmm9;"
313 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
314 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
315 for (j = 0; j < 4; j++)
316 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
317 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
318 "vfmaddsub231ps (%2), %%xmm8, %%xmm9;"
319 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
320 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
321 for (j = 0; j < 4; j++)
322 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
323 if (thisres) {
324 printf( "Failure 9 %d", i );
325 for (j = 0; j < 4; j++)
326 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
327 printf( "\n" );
328 }
329 res |= thisres;
330 }
331 for (i = 0; i < N; i++)
332 ft.z[i] = -ft.z[i];
333 for (i = 0; i < N; i += 4) {
334 int thisres = 0;
335 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
336 "vfmsubadd132ps %%xmm7, %%xmm8, %%xmm9;"
337 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
338 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
339 for (j = 0; j < 4; j++)
340 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
341 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
342 "vfmsubadd132ps (%2), %%xmm8, %%xmm9;"
343 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
344 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
345 for (j = 0; j < 4; j++)
346 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
347 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
348 "vfmsubadd213ps %%xmm7, %%xmm8, %%xmm9;"
349 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
350 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
351 for (j = 0; j < 4; j++)
352 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
353 __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
354 "vfmsubadd213ps (%3), %%xmm8, %%xmm9;"
355 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
356 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
357 for (j = 0; j < 4; j++)
358 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
359 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
360 "vfmsubadd231ps %%xmm7, %%xmm8, %%xmm9;"
361 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
362 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
363 for (j = 0; j < 4; j++)
364 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
365 __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
366 "vfmsubadd231ps (%2), %%xmm8, %%xmm9;"
367 "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
368 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
369 for (j = 0; j < 4; j++)
370 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
371 if (thisres) {
372 printf( "Failure 10 %d", i );
373 for (j = 0; j < 4; j++)
374 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
375 printf( "\n" );
376 }
377 res |= thisres;
378 }
379 for (i = 1; i < N; i += 2)
380 ft.z[i] = -ft.z[i];
381 for (i = 0; i < N; i += 8) {
382 int thisres = 0;
383 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
384 "vfmadd132ps %%ymm7, %%ymm8, %%ymm9;"
385 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
386 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
387 for (j = 0; j < 8; j++)
388 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
389 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
390 "vfmadd132ps (%2), %%ymm8, %%ymm9;"
391 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
392 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
393 for (j = 0; j < 8; j++)
394 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
395 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
396 "vfmadd213ps %%ymm7, %%ymm8, %%ymm9;"
397 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
398 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
399 for (j = 0; j < 8; j++)
400 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
401 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
402 "vfmadd213ps (%3), %%ymm8, %%ymm9;"
403 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
404 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
405 for (j = 0; j < 8; j++)
406 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
407 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
408 "vfmadd231ps %%ymm7, %%ymm8, %%ymm9;"
409 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
410 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
411 for (j = 0; j < 8; j++)
412 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
413 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
414 "vfmadd231ps (%2), %%ymm8, %%ymm9;"
415 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
416 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
417 for (j = 0; j < 8; j++)
418 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
419 if (thisres) {
420 printf( "Failure 11 %d", i );
421 for (j = 0; j < 8; j++)
422 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
423 printf( "\n" );
424 }
425 res |= thisres;
426 thisres = 0;
427 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
428 "vfnmsub132ps %%ymm7, %%ymm8, %%ymm9;"
429 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
430 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
431 for (j = 0; j < 8; j++)
432 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
433 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
434 "vfnmsub132ps (%2), %%ymm8, %%ymm9;"
435 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
436 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
437 for (j = 0; j < 8; j++)
438 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
439 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
440 "vfnmsub213ps %%ymm7, %%ymm8, %%ymm9;"
441 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
442 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
443 for (j = 0; j < 8; j++)
444 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
445 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
446 "vfnmsub213ps (%3), %%ymm8, %%ymm9;"
447 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
448 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
449 for (j = 0; j < 8; j++)
450 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
451 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
452 "vfnmsub231ps %%ymm7, %%ymm8, %%ymm9;"
453 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
454 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
455 for (j = 0; j < 8; j++)
456 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
457 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
458 "vfnmsub231ps (%2), %%ymm8, %%ymm9;"
459 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
460 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
461 for (j = 0; j < 8; j++)
462 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
463 if (thisres) {
464 printf( "Failure 12 %d", i );
465 for (j = 0; j < 8; j++)
466 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
467 printf( "\n" );
468 }
469 res |= thisres;
470 }
471 for (i = 0; i < N; i++)
472 ft.z[i] = -ft.z[i];
473 for (i = 0; i < N; i += 8) {
474 int thisres = 0;
475 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
476 "vfmsub132ps %%ymm7, %%ymm8, %%ymm9;"
477 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
478 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
479 for (j = 0; j < 8; j++)
480 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
481 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
482 "vfmsub132ps (%2), %%ymm8, %%ymm9;"
483 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
484 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
485 for (j = 0; j < 8; j++)
486 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
487 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
488 "vfmsub213ps %%ymm7, %%ymm8, %%ymm9;"
489 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
490 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
491 for (j = 0; j < 8; j++)
492 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
493 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
494 "vfmsub213ps (%3), %%ymm8, %%ymm9;"
495 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
496 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
497 for (j = 0; j < 8; j++)
498 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
499 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
500 "vfmsub231ps %%ymm7, %%ymm8, %%ymm9;"
501 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
502 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
503 for (j = 0; j < 8; j++)
504 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
505 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
506 "vfmsub231ps (%2), %%ymm8, %%ymm9;"
507 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
508 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
509 for (j = 0; j < 8; j++)
510 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
511 if (thisres) {
512 printf( "Failure 13 %d", i );
513 for (j = 0; j < 8; j++)
514 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
515 printf( "\n" );
516 }
517 res |= thisres;
518 thisres = 0;
519 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
520 "vfnmadd132ps %%ymm7, %%ymm8, %%ymm9;"
521 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
522 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
523 for (j = 0; j < 8; j++)
524 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
525 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
526 "vfnmadd132ps (%2), %%ymm8, %%ymm9;"
527 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
528 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
529 for (j = 0; j < 8; j++)
530 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
531 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
532 "vfnmadd213ps %%ymm7, %%ymm8, %%ymm9;"
533 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
534 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
535 for (j = 0; j < 8; j++)
536 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
537 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
538 "vfnmadd213ps (%3), %%ymm8, %%ymm9;"
539 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
540 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
541 for (j = 0; j < 8; j++)
542 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
543 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
544 "vfnmadd231ps %%ymm7, %%ymm8, %%ymm9;"
545 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
546 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
547 for (j = 0; j < 8; j++)
548 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
549 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
550 "vfnmadd231ps (%2), %%ymm8, %%ymm9;"
551 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
552 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
553 for (j = 0; j < 8; j++)
554 thisres |= testf( -ft.res[i+j], ft.expected[i+j] );
555 if (thisres) {
556 printf( "Failure 14 %d", i );
557 for (j = 0; j < 8; j++)
558 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
559 printf( "\n" );
560 }
561 res |= thisres;
562 }
563 for (i = 1; i < N; i += 2)
564 ft.z[i] = -ft.z[i];
565 for (i = 0; i < N; i += 8) {
566 int thisres = 0;
567 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
568 "vfmaddsub132ps %%ymm7, %%ymm8, %%ymm9;"
569 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
570 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
571 for (j = 0; j < 8; j++)
572 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
573 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
574 "vfmaddsub132ps (%2), %%ymm8, %%ymm9;"
575 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
576 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
577 for (j = 0; j < 8; j++)
578 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
579 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
580 "vfmaddsub213ps %%ymm7, %%ymm8, %%ymm9;"
581 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
582 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
583 for (j = 0; j < 8; j++)
584 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
585 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
586 "vfmaddsub213ps (%3), %%ymm8, %%ymm9;"
587 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
588 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
589 for (j = 0; j < 8; j++)
590 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
591 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
592 "vfmaddsub231ps %%ymm7, %%ymm8, %%ymm9;"
593 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
594 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
595 for (j = 0; j < 8; j++)
596 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
597 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
598 "vfmaddsub231ps (%2), %%ymm8, %%ymm9;"
599 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
600 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
601 for (j = 0; j < 8; j++)
602 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
603 if (thisres) {
604 printf( "Failure 15 %d", i );
605 for (j = 0; j < 8; j++)
606 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
607 printf( "\n" );
608 }
609 res |= thisres;
610 }
611 for (i = 0; i < N; i++)
612 ft.z[i] = -ft.z[i];
613 for (i = 0; i < N; i += 8) {
614 int thisres = 0;
615 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
616 "vfmsubadd132ps %%ymm7, %%ymm8, %%ymm9;"
617 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
618 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
619 for (j = 0; j < 8; j++)
620 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
621 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
622 "vfmsubadd132ps (%2), %%ymm8, %%ymm9;"
623 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
624 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
625 for (j = 0; j < 8; j++)
626 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
627 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
628 "vfmsubadd213ps %%ymm7, %%ymm8, %%ymm9;"
629 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
630 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
631 for (j = 0; j < 8; j++)
632 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
633 __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
634 "vfmsubadd213ps (%3), %%ymm8, %%ymm9;"
635 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
636 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
637 for (j = 0; j < 8; j++)
638 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
639 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
640 "vfmsubadd231ps %%ymm7, %%ymm8, %%ymm9;"
641 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
642 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
643 for (j = 0; j < 8; j++)
644 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
645 __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
646 "vfmsubadd231ps (%2), %%ymm8, %%ymm9;"
647 "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]),
648 "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9");
649 for (j = 0; j < 8; j++)
650 thisres |= testf( ft.res[i+j], ft.expected[i+j] );
651 if (thisres) {
652 printf( "Failure 16 %d", i );
653 for (j = 0; j < 8; j++)
654 printf( " %a %a", ft.res[i+j], ft.expected[i+j] );
655 printf( "\n" );
656 }
657 res |= thisres;
658 }
659 for (i = 1; i < N; i += 2)
660 ft.z[i] = -ft.z[i];
661 return res;
662 }
663
test(double x,double y)664 static int test( double x, double y )
665 {
666 unsigned long long a, b;
667 memcpy( &a, &x, sizeof (a) );
668 memcpy( &b, &y, sizeof (b) );
669 if ((a & 0x7ff8000000000000ULL) == 0x7ff8000000000000ULL)
670 return (b & 0x7ff8000000000000ULL) != 0x7ff8000000000000ULL;
671 return memcmp( &a, &b, sizeof (a) ) != 0;
672 }
673
test_fma(void)674 static int test_fma( void )
675 {
676 int res = 0, i, j;
677 double w;
678 for (i = 0; i < N; i++) {
679 int thisres = 0;
680 __asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
681 thisres |= test( w, dt.expected[i] );
682 __asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
683 thisres |= test( w, dt.expected[i] );
684 __asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
685 thisres |= test( w, dt.expected[i] );
686 __asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
687 thisres |= test( w, dt.expected[i] );
688 __asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
689 thisres |= test( w, dt.expected[i] );
690 __asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
691 thisres |= test( w, dt.expected[i] );
692 if (thisres)
693 printf( "Failure 1 %d %a %a\n", i, w, dt.expected[i] );
694 res |= thisres;
695 thisres = 0;
696 __asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
697 thisres |= test( -w, dt.expected[i] );
698 __asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
699 thisres |= test( -w, dt.expected[i] );
700 __asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
701 thisres |= test( -w, dt.expected[i] );
702 __asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
703 thisres |= test( -w, dt.expected[i] );
704 __asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
705 thisres |= test( -w, dt.expected[i] );
706 __asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
707 thisres |= test( -w, dt.expected[i] );
708 if (thisres)
709 printf( "Failure 2 %d %a %a\n", i, w, dt.expected[i] );
710 res |= thisres;
711 }
712 for (i = 0; i < N; i++)
713 dt.z[i] = -dt.z[i];
714 for (i = 0; i < N; i++) {
715 int thisres = 0;
716 __asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
717 thisres |= test( w, dt.expected[i] );
718 __asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
719 thisres |= test( w, dt.expected[i] );
720 __asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
721 thisres |= test( w, dt.expected[i] );
722 __asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
723 thisres |= test( w, dt.expected[i] );
724 __asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
725 thisres |= test( w, dt.expected[i] );
726 __asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
727 thisres |= test( w, dt.expected[i] );
728 if (thisres)
729 printf( "Failure 3 %d %a %a\n", i, w, dt.expected[i] );
730 res |= thisres;
731 thisres = 0;
732 __asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
733 thisres |= test( -w, dt.expected[i] );
734 __asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i]));
735 thisres |= test( -w, dt.expected[i] );
736 __asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i]));
737 thisres |= test( -w, dt.expected[i] );
738 __asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i]));
739 thisres |= test( -w, dt.expected[i] );
740 __asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i]));
741 thisres |= test( -w, dt.expected[i] );
742 __asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i]));
743 thisres |= test( -w, dt.expected[i] );
744 if (thisres)
745 printf( "Failure 4 %d %a %a\n", i, w, dt.expected[i] );
746 res |= thisres;
747 }
748 for (i = 0; i < N; i++)
749 dt.z[i] = -dt.z[i];
750 for (i = 0; i < N; i += 2) {
751 int thisres = 0;
752 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
753 "vfmadd132pd %%xmm7, %%xmm8, %%xmm9;"
754 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
755 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
756 for (j = 0; j < 2; j++)
757 thisres |= test( dt.res[i+j], dt.expected[i+j] );
758 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
759 "vfmadd132pd (%2), %%xmm8, %%xmm9;"
760 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
761 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
762 for (j = 0; j < 2; j++)
763 thisres |= test( dt.res[i+j], dt.expected[i+j] );
764 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
765 "vfmadd213pd %%xmm7, %%xmm8, %%xmm9;"
766 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
767 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
768 for (j = 0; j < 2; j++)
769 thisres |= test( dt.res[i+j], dt.expected[i+j] );
770 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
771 "vfmadd213pd (%3), %%xmm8, %%xmm9;"
772 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
773 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
774 for (j = 0; j < 2; j++)
775 thisres |= test( dt.res[i+j], dt.expected[i+j] );
776 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
777 "vfmadd231pd %%xmm7, %%xmm8, %%xmm9;"
778 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
779 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
780 for (j = 0; j < 2; j++)
781 thisres |= test( dt.res[i+j], dt.expected[i+j] );
782 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
783 "vfmadd231pd (%2), %%xmm8, %%xmm9;"
784 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
785 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
786 for (j = 0; j < 2; j++)
787 thisres |= test( dt.res[i+j], dt.expected[i+j] );
788 if (thisres) {
789 printf( "Failure 5 %d", i );
790 for (j = 0; j < 2; j++)
791 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
792 printf( "\n" );
793 }
794 res |= thisres;
795 thisres = 0;
796 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
797 "vfnmsub132pd %%xmm7, %%xmm8, %%xmm9;"
798 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
799 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
800 for (j = 0; j < 2; j++)
801 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
802 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
803 "vfnmsub132pd (%2), %%xmm8, %%xmm9;"
804 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
805 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
806 for (j = 0; j < 2; j++)
807 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
808 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
809 "vfnmsub213pd %%xmm7, %%xmm8, %%xmm9;"
810 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
811 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
812 for (j = 0; j < 2; j++)
813 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
814 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
815 "vfnmsub213pd (%3), %%xmm8, %%xmm9;"
816 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
817 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
818 for (j = 0; j < 2; j++)
819 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
820 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
821 "vfnmsub231pd %%xmm7, %%xmm8, %%xmm9;"
822 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
823 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
824 for (j = 0; j < 2; j++)
825 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
826 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
827 "vfnmsub231pd (%2), %%xmm8, %%xmm9;"
828 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
829 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
830 for (j = 0; j < 2; j++)
831 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
832 if (thisres) {
833 printf( "Failure 6 %d", i );
834 for (j = 0; j < 2; j++)
835 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
836 printf( "\n" );
837 }
838 res |= thisres;
839 }
840 for (i = 0; i < N; i++)
841 dt.z[i] = -dt.z[i];
842 for (i = 0; i < N; i += 2) {
843 int thisres = 0;
844 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
845 "vfmsub132pd %%xmm7, %%xmm8, %%xmm9;"
846 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
847 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
848 for (j = 0; j < 2; j++)
849 thisres |= test( dt.res[i+j], dt.expected[i+j] );
850 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
851 "vfmsub132pd (%2), %%xmm8, %%xmm9;"
852 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
853 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
854 for (j = 0; j < 2; j++)
855 thisres |= test( dt.res[i+j], dt.expected[i+j] );
856 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
857 "vfmsub213pd %%xmm7, %%xmm8, %%xmm9;"
858 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
859 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
860 for (j = 0; j < 2; j++)
861 thisres |= test( dt.res[i+j], dt.expected[i+j] );
862 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
863 "vfmsub213pd (%3), %%xmm8, %%xmm9;"
864 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
865 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
866 for (j = 0; j < 2; j++)
867 thisres |= test( dt.res[i+j], dt.expected[i+j] );
868 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
869 "vfmsub231pd %%xmm7, %%xmm8, %%xmm9;"
870 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
871 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
872 for (j = 0; j < 2; j++)
873 thisres |= test( dt.res[i+j], dt.expected[i+j] );
874 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
875 "vfmsub231pd (%2), %%xmm8, %%xmm9;"
876 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
877 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
878 for (j = 0; j < 2; j++)
879 thisres |= test( dt.res[i+j], dt.expected[i+j] );
880 if (thisres) {
881 printf( "Failure 7 %d", i );
882 for (j = 0; j < 2; j++)
883 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
884 printf( "\n" );
885 }
886 res |= thisres;
887 thisres = 0;
888 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
889 "vfnmadd132pd %%xmm7, %%xmm8, %%xmm9;"
890 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
891 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
892 for (j = 0; j < 2; j++)
893 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
894 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
895 "vfnmadd132pd (%2), %%xmm8, %%xmm9;"
896 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
897 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
898 for (j = 0; j < 2; j++)
899 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
900 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
901 "vfnmadd213pd %%xmm7, %%xmm8, %%xmm9;"
902 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
903 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
904 for (j = 0; j < 2; j++)
905 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
906 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
907 "vfnmadd213pd (%3), %%xmm8, %%xmm9;"
908 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
909 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
910 for (j = 0; j < 2; j++)
911 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
912 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
913 "vfnmadd231pd %%xmm7, %%xmm8, %%xmm9;"
914 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
915 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
916 for (j = 0; j < 2; j++)
917 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
918 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
919 "vfnmadd231pd (%2), %%xmm8, %%xmm9;"
920 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
921 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
922 for (j = 0; j < 2; j++)
923 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
924 if (thisres) {
925 printf( "Failure 8 %d", i );
926 for (j = 0; j < 2; j++)
927 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
928 printf( "\n" );
929 }
930 res |= thisres;
931 }
932 for (i = 1; i < N; i += 2)
933 dt.z[i] = -dt.z[i];
934 for (i = 0; i < N; i += 2) {
935 int thisres = 0;
936 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
937 "vfmaddsub132pd %%xmm7, %%xmm8, %%xmm9;"
938 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
939 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
940 for (j = 0; j < 2; j++)
941 thisres |= test( dt.res[i+j], dt.expected[i+j] );
942 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
943 "vfmaddsub132pd (%2), %%xmm8, %%xmm9;"
944 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
945 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
946 for (j = 0; j < 2; j++)
947 thisres |= test( dt.res[i+j], dt.expected[i+j] );
948 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
949 "vfmaddsub213pd %%xmm7, %%xmm8, %%xmm9;"
950 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
951 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
952 for (j = 0; j < 2; j++)
953 thisres |= test( dt.res[i+j], dt.expected[i+j] );
954 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
955 "vfmaddsub213pd (%3), %%xmm8, %%xmm9;"
956 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
957 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
958 for (j = 0; j < 2; j++)
959 thisres |= test( dt.res[i+j], dt.expected[i+j] );
960 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
961 "vfmaddsub231pd %%xmm7, %%xmm8, %%xmm9;"
962 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
963 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
964 for (j = 0; j < 2; j++)
965 thisres |= test( dt.res[i+j], dt.expected[i+j] );
966 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
967 "vfmaddsub231pd (%2), %%xmm8, %%xmm9;"
968 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
969 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
970 for (j = 0; j < 2; j++)
971 thisres |= test( dt.res[i+j], dt.expected[i+j] );
972 if (thisres) {
973 printf( "Failure 9 %d", i );
974 for (j = 0; j < 2; j++)
975 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
976 printf( "\n" );
977 }
978 res |= thisres;
979 }
980 for (i = 0; i < N; i++)
981 dt.z[i] = -dt.z[i];
982 for (i = 0; i < N; i += 2) {
983 int thisres = 0;
984 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
985 "vfmsubadd132pd %%xmm7, %%xmm8, %%xmm9;"
986 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
987 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
988 for (j = 0; j < 2; j++)
989 thisres |= test( dt.res[i+j], dt.expected[i+j] );
990 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
991 "vfmsubadd132pd (%2), %%xmm8, %%xmm9;"
992 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
993 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
994 for (j = 0; j < 2; j++)
995 thisres |= test( dt.res[i+j], dt.expected[i+j] );
996 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
997 "vfmsubadd213pd %%xmm7, %%xmm8, %%xmm9;"
998 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
999 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1000 for (j = 0; j < 2; j++)
1001 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1002 __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
1003 "vfmsubadd213pd (%3), %%xmm8, %%xmm9;"
1004 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1005 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1006 for (j = 0; j < 2; j++)
1007 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1008 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
1009 "vfmsubadd231pd %%xmm7, %%xmm8, %%xmm9;"
1010 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1011 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1012 for (j = 0; j < 2; j++)
1013 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1014 __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
1015 "vfmsubadd231pd (%2), %%xmm8, %%xmm9;"
1016 "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1017 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1018 for (j = 0; j < 2; j++)
1019 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1020 if (thisres) {
1021 printf( "Failure 10 %d", i );
1022 for (j = 0; j < 2; j++)
1023 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1024 printf( "\n" );
1025 }
1026 res |= thisres;
1027 }
1028 for (i = 1; i < N; i += 2)
1029 dt.z[i] = -dt.z[i];
1030 for (i = 0; i < N; i += 4) {
1031 int thisres = 0;
1032 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1033 "vfmadd132pd %%ymm7, %%ymm8, %%ymm9;"
1034 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1035 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1036 for (j = 0; j < 4; j++)
1037 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1038 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1039 "vfmadd132pd (%2), %%ymm8, %%ymm9;"
1040 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1041 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1042 for (j = 0; j < 4; j++)
1043 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1044 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1045 "vfmadd213pd %%ymm7, %%ymm8, %%ymm9;"
1046 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1047 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1048 for (j = 0; j < 4; j++)
1049 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1050 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1051 "vfmadd213pd (%3), %%ymm8, %%ymm9;"
1052 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1053 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1054 for (j = 0; j < 4; j++)
1055 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1056 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1057 "vfmadd231pd %%ymm7, %%ymm8, %%ymm9;"
1058 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1059 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1060 for (j = 0; j < 4; j++)
1061 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1062 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1063 "vfmadd231pd (%2), %%ymm8, %%ymm9;"
1064 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1065 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1066 for (j = 0; j < 4; j++)
1067 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1068 if (thisres) {
1069 printf( "Failure 11 %d", i );
1070 for (j = 0; j < 4; j++)
1071 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1072 printf( "\n" );
1073 }
1074 res |= thisres;
1075 thisres = 0;
1076 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1077 "vfnmsub132pd %%ymm7, %%ymm8, %%ymm9;"
1078 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1079 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1080 for (j = 0; j < 4; j++)
1081 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1082 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1083 "vfnmsub132pd (%2), %%ymm8, %%ymm9;"
1084 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1085 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1086 for (j = 0; j < 4; j++)
1087 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1088 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1089 "vfnmsub213pd %%ymm7, %%ymm8, %%ymm9;"
1090 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1091 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1092 for (j = 0; j < 4; j++)
1093 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1094 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1095 "vfnmsub213pd (%3), %%ymm8, %%ymm9;"
1096 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1097 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1098 for (j = 0; j < 4; j++)
1099 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1100 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1101 "vfnmsub231pd %%ymm7, %%ymm8, %%ymm9;"
1102 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1103 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1104 for (j = 0; j < 4; j++)
1105 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1106 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1107 "vfnmsub231pd (%2), %%ymm8, %%ymm9;"
1108 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1109 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1110 for (j = 0; j < 4; j++)
1111 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1112 if (thisres) {
1113 printf( "Failure 12 %d", i );
1114 for (j = 0; j < 4; j++)
1115 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1116 printf( "\n" );
1117 }
1118 res |= thisres;
1119 }
1120 for (i = 0; i < N; i++)
1121 dt.z[i] = -dt.z[i];
1122 for (i = 0; i < N; i += 4) {
1123 int thisres = 0;
1124 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1125 "vfmsub132pd %%ymm7, %%ymm8, %%ymm9;"
1126 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1127 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1128 for (j = 0; j < 4; j++)
1129 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1130 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1131 "vfmsub132pd (%2), %%ymm8, %%ymm9;"
1132 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1133 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1134 for (j = 0; j < 4; j++)
1135 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1136 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1137 "vfmsub213pd %%ymm7, %%ymm8, %%ymm9;"
1138 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1139 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1140 for (j = 0; j < 4; j++)
1141 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1142 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1143 "vfmsub213pd (%3), %%ymm8, %%ymm9;"
1144 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1145 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1146 for (j = 0; j < 4; j++)
1147 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1148 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1149 "vfmsub231pd %%ymm7, %%ymm8, %%ymm9;"
1150 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1151 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1152 for (j = 0; j < 4; j++)
1153 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1154 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1155 "vfmsub231pd (%2), %%ymm8, %%ymm9;"
1156 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1157 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1158 for (j = 0; j < 4; j++)
1159 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1160 if (thisres) {
1161 printf( "Failure 13 %d", i );
1162 for (j = 0; j < 4; j++)
1163 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1164 printf( "\n" );
1165 }
1166 res |= thisres;
1167 thisres = 0;
1168 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1169 "vfnmadd132pd %%ymm7, %%ymm8, %%ymm9;"
1170 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1171 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1172 for (j = 0; j < 4; j++)
1173 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1174 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1175 "vfnmadd132pd (%2), %%ymm8, %%ymm9;"
1176 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1177 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1178 for (j = 0; j < 4; j++)
1179 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1180 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1181 "vfnmadd213pd %%ymm7, %%ymm8, %%ymm9;"
1182 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1183 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1184 for (j = 0; j < 4; j++)
1185 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1186 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1187 "vfnmadd213pd (%3), %%ymm8, %%ymm9;"
1188 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1189 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1190 for (j = 0; j < 4; j++)
1191 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1192 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1193 "vfnmadd231pd %%ymm7, %%ymm8, %%ymm9;"
1194 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1195 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1196 for (j = 0; j < 4; j++)
1197 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1198 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1199 "vfnmadd231pd (%2), %%ymm8, %%ymm9;"
1200 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1201 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1202 for (j = 0; j < 4; j++)
1203 thisres |= test( -dt.res[i+j], dt.expected[i+j] );
1204 if (thisres) {
1205 printf( "Failure 14 %d", i );
1206 for (j = 0; j < 4; j++)
1207 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1208 printf( "\n" );
1209 }
1210 res |= thisres;
1211 }
1212 for (i = 1; i < N; i += 2)
1213 dt.z[i] = -dt.z[i];
1214 for (i = 0; i < N; i += 4) {
1215 int thisres = 0;
1216 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1217 "vfmaddsub132pd %%ymm7, %%ymm8, %%ymm9;"
1218 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1219 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1220 for (j = 0; j < 4; j++)
1221 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1222 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1223 "vfmaddsub132pd (%2), %%ymm8, %%ymm9;"
1224 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1225 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1226 for (j = 0; j < 4; j++)
1227 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1228 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1229 "vfmaddsub213pd %%ymm7, %%ymm8, %%ymm9;"
1230 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1231 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1232 for (j = 0; j < 4; j++)
1233 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1234 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1235 "vfmaddsub213pd (%3), %%ymm8, %%ymm9;"
1236 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1237 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1238 for (j = 0; j < 4; j++)
1239 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1240 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1241 "vfmaddsub231pd %%ymm7, %%ymm8, %%ymm9;"
1242 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1243 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1244 for (j = 0; j < 4; j++)
1245 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1246 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1247 "vfmaddsub231pd (%2), %%ymm8, %%ymm9;"
1248 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1249 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1250 for (j = 0; j < 4; j++)
1251 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1252 if (thisres) {
1253 printf( "Failure 15 %d", i );
1254 for (j = 0; j < 4; j++)
1255 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1256 printf( "\n" );
1257 }
1258 res |= thisres;
1259 }
1260 for (i = 0; i < N; i++)
1261 dt.z[i] = -dt.z[i];
1262 for (i = 0; i < N; i += 4) {
1263 int thisres = 0;
1264 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1265 "vfmsubadd132pd %%ymm7, %%ymm8, %%ymm9;"
1266 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1267 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1268 for (j = 0; j < 4; j++)
1269 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1270 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1271 "vfmsubadd132pd (%2), %%ymm8, %%ymm9;"
1272 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1273 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1274 for (j = 0; j < 4; j++)
1275 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1276 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1277 "vfmsubadd213pd %%ymm7, %%ymm8, %%ymm9;"
1278 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1279 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1280 for (j = 0; j < 4; j++)
1281 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1282 __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1283 "vfmsubadd213pd (%3), %%ymm8, %%ymm9;"
1284 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1285 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1286 for (j = 0; j < 4; j++)
1287 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1288 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1289 "vfmsubadd231pd %%ymm7, %%ymm8, %%ymm9;"
1290 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1291 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1292 for (j = 0; j < 4; j++)
1293 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1294 __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1295 "vfmsubadd231pd (%2), %%ymm8, %%ymm9;"
1296 "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]),
1297 "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9");
1298 for (j = 0; j < 4; j++)
1299 thisres |= test( dt.res[i+j], dt.expected[i+j] );
1300 if (thisres) {
1301 printf( "Failure 16 %d", i );
1302 for (j = 0; j < 4; j++)
1303 printf( " %a %a", dt.res[i+j], dt.expected[i+j] );
1304 printf( "\n" );
1305 }
1306 res |= thisres;
1307 }
1308 for (i = 1; i < N; i += 2)
1309 dt.z[i] = -dt.z[i];
1310 return res;
1311 }
1312
main()1313 int main( )
1314 {
1315 int res = 0;
1316 int i = 0;
1317 plus_zero = 0.0;
1318 __asm __volatile__ ("" : : "r" (&plus_zero) : "memory");
1319 nan_value = plus_zero / plus_zero;
1320 plus_infty = 3.40282346638528859812e+38F * 16.0F;
1321 minus_infty = -plus_infty;
1322 #define TEST_F( a, b, c, d ) \
1323 do { \
1324 ft.x[i] = a; \
1325 ft.y[i] = b; \
1326 ft.z[i] = c; \
1327 ft.expected[i] = d; \
1328 i++; \
1329 } while (0)
1330 TEST_F( 1.0, 2.0, 3.0, 5.0 );
1331 TEST_F( nan_value, 2.0, 3.0, nan_value );
1332 TEST_F( 1.0, nan_value, 3.0, nan_value );
1333 TEST_F( 1.0, 2.0, nan_value, nan_value );
1334 TEST_F( plus_infty, 0.0, nan_value, nan_value );
1335 TEST_F( minus_infty, 0.0, nan_value, nan_value );
1336 TEST_F( 0.0, plus_infty, nan_value, nan_value );
1337 TEST_F( 0.0, minus_infty, nan_value, nan_value );
1338 TEST_F( plus_infty, 0.0, 1.0, nan_value );
1339 TEST_F( minus_infty, 0.0, 1.0, nan_value );
1340 TEST_F( 0.0, plus_infty, 1.0, nan_value );
1341 TEST_F( 0.0, minus_infty, 1.0, nan_value );
1342 TEST_F( plus_infty, plus_infty, minus_infty, nan_value );
1343 TEST_F( minus_infty, plus_infty, plus_infty, nan_value );
1344 TEST_F( plus_infty, minus_infty, plus_infty, nan_value );
1345 TEST_F( minus_infty, minus_infty, minus_infty, nan_value );
1346 TEST_F( plus_infty, 3.5L, minus_infty, nan_value );
1347 TEST_F( minus_infty, -7.5L, minus_infty, nan_value );
1348 TEST_F( -13.5L, plus_infty, plus_infty, nan_value );
1349 TEST_F( minus_infty, 7.5L, plus_infty, nan_value );
1350 TEST_F( 1.25L, 0.75L, 0.0625L, 1.0L );
1351 TEST_F( -3.40282346638528859812e+38F, -3.40282346638528859812e+38F, minus_infty, minus_infty );
1352 TEST_F( 3.40282346638528859812e+38F / 2, 3.40282346638528859812e+38F / 2, minus_infty, minus_infty );
1353 TEST_F( -3.40282346638528859812e+38F, 3.40282346638528859812e+38F, plus_infty, plus_infty );
1354 TEST_F( 3.40282346638528859812e+38F / 2, -3.40282346638528859812e+38F / 4, plus_infty, plus_infty );
1355 TEST_F( plus_infty, 4, plus_infty, plus_infty );
1356 TEST_F( 2, minus_infty, minus_infty, minus_infty );
1357 TEST_F( minus_infty, minus_infty, plus_infty, plus_infty );
1358 TEST_F( plus_infty, minus_infty, minus_infty, minus_infty );
1359 TEST_F( 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13 );
1360 TEST_F( 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20 );
1361 TEST_F( 0x1.9abcdep+127, 0x0.9abcdep-126, -0x1.f08948p+0, 0x1.bb421p-25 );
1362 TEST_F( 0x1.9abcdep+100, 0x0.9abcdep-126, -0x1.f08948p-27, 0x1.bb421p-52 );
1363 TEST_F( 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115 );
1364 TEST_F( -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127 );
1365 TEST_F( 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127 );
1366
1367 res |= test_fmaf( );
1368 i = 0;
1369 #define TEST( a, b, c, d ) \
1370 do { \
1371 dt.x[i] = a; \
1372 dt.y[i] = b; \
1373 dt.z[i] = c; \
1374 dt.expected[i] = d; \
1375 i++; \
1376 } while (0)
1377 TEST( 1.0, 2.0, 3.0, 5.0 );
1378 TEST( nan_value, 2.0, 3.0, nan_value );
1379 TEST( 1.0, nan_value, 3.0, nan_value );
1380 TEST( 1.0, 2.0, nan_value, nan_value );
1381 TEST( plus_infty, 0.0, nan_value, nan_value );
1382 TEST( minus_infty, 0.0, nan_value, nan_value );
1383 TEST( 0.0, plus_infty, nan_value, nan_value );
1384 TEST( 0.0, minus_infty, nan_value, nan_value );
1385 TEST( plus_infty, 0.0, 1.0, nan_value );
1386 TEST( minus_infty, 0.0, 1.0, nan_value );
1387 TEST( 0.0, plus_infty, 1.0, nan_value );
1388 TEST( 0.0, minus_infty, 1.0, nan_value );
1389 TEST( plus_infty, plus_infty, minus_infty, nan_value );
1390 TEST( minus_infty, plus_infty, plus_infty, nan_value );
1391 TEST( plus_infty, minus_infty, plus_infty, nan_value );
1392 TEST( minus_infty, minus_infty, minus_infty, nan_value );
1393 TEST( plus_infty, 3.5L, minus_infty, nan_value );
1394 TEST( minus_infty, -7.5L, minus_infty, nan_value );
1395 TEST( -13.5L, plus_infty, plus_infty, nan_value );
1396 TEST( minus_infty, 7.5L, plus_infty, nan_value );
1397 TEST( 1.25L, 0.75L, 0.0625L, 1.0L );
1398 TEST( -1.79769313486231570815e+308L, -1.79769313486231570815e+308L, minus_infty, minus_infty );
1399 TEST( 1.79769313486231570815e+308L / 2, 1.79769313486231570815e+308L / 2, minus_infty, minus_infty );
1400 TEST( -1.79769313486231570815e+308L, 1.79769313486231570815e+308L, plus_infty, plus_infty );
1401 TEST( 1.79769313486231570815e+308L / 2, -1.79769313486231570815e+308L / 4, plus_infty, plus_infty );
1402 TEST( plus_infty, 4, plus_infty, plus_infty );
1403 TEST( 2, minus_infty, minus_infty, minus_infty );
1404 TEST( minus_infty, minus_infty, plus_infty, plus_infty );
1405 TEST( plus_infty, minus_infty, minus_infty, minus_infty );
1406 TEST( 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13 );
1407 TEST( 0x1.fffp+0, 0x1.0000000000001p+0, -0x1.fffp+0, 0x1.fffp-52 );
1408 TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, 0x1p-300, 1.0 );
1409 TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, -0x1p-300, 0x1.fffffffffffffp-1 );
1410 TEST( 0x1.deadbeef2feedp+1023, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp+1, 0x1.0989687bc9da4p-53 );
1411 TEST( 0x1.deadbeef2feedp+900, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp-122, 0x1.0989687bc9da4p-176 );
1412 TEST( 0x1.fffffffffffffp+1023, 0x1.001p+0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1011 );
1413 TEST( -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+0, 0x1.fffffffffffffp+1023, -0x1.ffffffffffffdp+1023 );
1414 TEST( 0x1.fffffffffffffp+1023, 2.0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023 );
1415 TEST( 0x1.6a09e667f3bccp-538, 0x1.6a09e667f3bccp-538, 0.0, 0.0 );
1416 TEST( 0x1.deadbeef2feedp-495, 0x1.deadbeef2feedp-495, -0x1.bf86a5786a574p-989, 0x0.0000042625a1fp-1022 );
1417 TEST( 0x1.deadbeef2feedp-503, 0x1.deadbeef2feedp-503, -0x1.bf86a5786a574p-1005, 0x0.0000000004262p-1022 );
1418 TEST( 0x1p-537, 0x1p-538, 0x1p-1074, 0x0.0000000000002p-1022 );
1419 TEST( 0x1.7fffff8p-968, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000001p-1022 );
1420 TEST( 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022 );
1421 TEST( 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022 );
1422 TEST( -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022 );
1423 TEST( -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022 );
1424 TEST( 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022 );
1425 TEST( -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983 );
1426
1427 res |= test_fma( );
1428 if (res == 0)
1429 printf( "Testing successful\n");
1430 return 0;
1431 }
1432