Lines Matching refs:AVX
2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
6 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-64
20 ; AVX-LABEL: f16xi8_i16:
21 ; AVX: # %bb.0:
22 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
23 ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
24 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
25 ; AVX-NEXT: retl
34 ; AVX-64-LABEL: f16xi8_i16:
35 ; AVX-64: # %bb.0:
36 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
37 ; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
38 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
39 ; AVX-64-NEXT: retq
54 ; AVX-LABEL: f16xi8_i32:
55 ; AVX: # %bb.0:
56 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [50462976,50462976,50462976,50462976]
57 ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
58 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
59 ; AVX-NEXT: retl
68 ; AVX-64-LABEL: f16xi8_i32:
69 ; AVX-64: # %bb.0:
70 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [50462976,50462976,50462976,50462976]
71 ; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
72 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
73 ; AVX-64-NEXT: retq
88 ; AVX-LABEL: f16xi8_i64:
89 ; AVX: # %bb.0:
90 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [506097522914230528,506097522914230528]
91 ; AVX-NEXT: # xmm1 = mem[0,0]
92 ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
93 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
94 ; AVX-NEXT: retl
103 ; AVX-64-LABEL: f16xi8_i64:
104 ; AVX-64: # %bb.0:
105 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [506097522914230528,506097522914230528]
106 ; AVX-64-NEXT: # xmm1 = mem[0,0]
107 ; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
108 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
109 ; AVX-64-NEXT: retq
124 ; AVX-LABEL: f32xi8_i16:
125 ; AVX: # %bb.0:
126 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
127 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
128 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
129 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
130 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
131 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
132 ; AVX-NEXT: retl
141 ; AVX-64-LABEL: f32xi8_i16:
142 ; AVX-64: # %bb.0:
143 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
144 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
145 ; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
146 ; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
147 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
148 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
149 ; AVX-64-NEXT: retq
164 ; AVX-LABEL: f32xi8_i32:
165 ; AVX: # %bb.0:
166 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
167 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [50462976,50462976,50462976,50462976]
168 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
169 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
170 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
171 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
172 ; AVX-NEXT: retl
181 ; AVX-64-LABEL: f32xi8_i32:
182 ; AVX-64: # %bb.0:
183 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
184 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [50462976,50462976,50462976,50462976]
185 ; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
186 ; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
187 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
188 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
189 ; AVX-64-NEXT: retq
204 ; AVX-LABEL: f32xi8_i64:
205 ; AVX: # %bb.0:
206 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
207 ; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [506097522914230528,506097522914230528]
208 ; AVX-NEXT: # xmm2 = mem[0,0]
209 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
210 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
211 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
212 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
213 ; AVX-NEXT: retl
222 ; AVX-64-LABEL: f32xi8_i64:
223 ; AVX-64: # %bb.0:
224 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
225 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [506097522914230528,506097522914230528]
226 ; AVX-64-NEXT: # xmm2 = mem[0,0]
227 ; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
228 ; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
229 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
230 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
231 ; AVX-64-NEXT: retq
246 ; AVX-LABEL: f32xi8_i128:
247 ; AVX: # %bb.0:
248 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
249 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
250 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
251 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
252 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
253 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
254 ; AVX-NEXT: retl
264 ; AVX-64-LABEL: f32xi8_i128:
265 ; AVX-64: # %bb.0:
266 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
267 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
268 ; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
269 ; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
270 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
271 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
272 ; AVX-64-NEXT: retq
288 ; AVX-LABEL: f64xi8_i16:
289 ; AVX: # %bb.0:
290 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
291 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
292 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
293 ; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1
294 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
295 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
296 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
297 ; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0
298 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
299 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1…
300 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
301 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
302 ; AVX-NEXT: retl
320 ; AVX-64-LABEL: f64xi8_i16:
321 ; AVX-64: # %bb.0:
322 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
323 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
324 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
325 ; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1
326 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
327 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
328 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
329 ; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0
330 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
331 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,…
332 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
333 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
334 ; AVX-64-NEXT: retq
358 ; AVX-LABEL: f64i8_i32:
359 ; AVX: # %bb.0:
360 ; AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [50462976,50462976,50462976,50462976,50462976,50462976,…
361 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3
362 ; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3
363 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
364 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
365 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3
366 ; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3
367 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
368 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
369 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
370 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
371 ; AVX-NEXT: retl
389 ; AVX-64-LABEL: f64i8_i32:
390 ; AVX-64: # %bb.0:
391 ; AVX-64-NEXT: vbroadcastss {{.*#+}} ymm2 = [50462976,50462976,50462976,50462976,50462976,504629…
392 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
393 ; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3
394 ; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
395 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
396 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3
397 ; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3
398 ; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
399 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
400 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
401 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
402 ; AVX-64-NEXT: retq
426 ; AVX-LABEL: f64xi8_i64:
427 ; AVX: # %bb.0:
428 ; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [506097522914230528,506097522914230528,5060975229142305…
429 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3
430 ; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3
431 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
432 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
433 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3
434 ; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3
435 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
436 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
437 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
438 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
439 ; AVX-NEXT: retl
457 ; AVX-64-LABEL: f64xi8_i64:
458 ; AVX-64: # %bb.0:
459 ; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [506097522914230528,506097522914230528,5060975229142…
460 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
461 ; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3
462 ; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
463 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
464 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3
465 ; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3
466 ; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
467 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
468 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
469 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
470 ; AVX-64-NEXT: retq
494 ; AVX-LABEL: f64xi8_i128:
495 ; AVX: # %bb.0:
496 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
497 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
498 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
499 ; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1
500 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
501 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
502 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
503 ; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0
504 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
505 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
506 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
507 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
508 ; AVX-NEXT: retl
528 ; AVX-64-LABEL: f64xi8_i128:
529 ; AVX-64: # %bb.0:
530 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
531 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
532 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
533 ; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1
534 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
535 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
536 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
537 ; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0
538 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
539 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
540 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
541 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
542 ; AVX-64-NEXT: retq
568 ; AVX-LABEL: f64xi8_i256:
569 ; AVX: # %bb.0:
570 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
571 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
572 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
573 ; AVX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
574 ; AVX-NEXT: vpaddb %xmm4, %xmm1, %xmm1
575 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
576 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
577 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
578 ; AVX-NEXT: vpaddb %xmm4, %xmm0, %xmm0
579 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
580 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,…
581 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
582 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
583 ; AVX-NEXT: retl
602 ; AVX-64-LABEL: f64xi8_i256:
603 ; AVX-64: # %bb.0:
604 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
605 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
606 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
607 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
608 ; AVX-64-NEXT: vpaddb %xmm4, %xmm1, %xmm1
609 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
610 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
611 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
612 ; AVX-64-NEXT: vpaddb %xmm4, %xmm0, %xmm0
613 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
614 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,…
615 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
616 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
617 ; AVX-64-NEXT: retq
642 ; AVX-LABEL: f8xi16_i32:
643 ; AVX: # %bb.0:
644 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [65536,65536,65536,65536]
645 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
646 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
647 ; AVX-NEXT: retl
656 ; AVX-64-LABEL: f8xi16_i32:
657 ; AVX-64: # %bb.0:
658 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [65536,65536,65536,65536]
659 ; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
660 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
661 ; AVX-64-NEXT: retq
676 ; AVX-LABEL: f8xi16_i64:
677 ; AVX: # %bb.0:
678 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [844433520132096,844433520132096]
679 ; AVX-NEXT: # xmm1 = mem[0,0]
680 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
681 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
682 ; AVX-NEXT: retl
691 ; AVX-64-LABEL: f8xi16_i64:
692 ; AVX-64: # %bb.0:
693 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [844433520132096,844433520132096]
694 ; AVX-64-NEXT: # xmm1 = mem[0,0]
695 ; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
696 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
697 ; AVX-64-NEXT: retq
712 ; AVX-LABEL: f16xi16_i32:
713 ; AVX: # %bb.0:
714 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
715 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [65536,65536,65536,65536]
716 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
717 ; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
718 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
719 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
720 ; AVX-NEXT: retl
729 ; AVX-64-LABEL: f16xi16_i32:
730 ; AVX-64: # %bb.0:
731 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
732 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [65536,65536,65536,65536]
733 ; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
734 ; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0
735 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
736 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
737 ; AVX-64-NEXT: retq
752 ; AVX-LABEL: f16xi16_i64:
753 ; AVX: # %bb.0:
754 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
755 ; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [844433520132096,844433520132096]
756 ; AVX-NEXT: # xmm2 = mem[0,0]
757 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
758 ; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
759 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
760 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
761 ; AVX-NEXT: retl
770 ; AVX-64-LABEL: f16xi16_i64:
771 ; AVX-64: # %bb.0:
772 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
773 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [844433520132096,844433520132096]
774 ; AVX-64-NEXT: # xmm2 = mem[0,0]
775 ; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
776 ; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0
777 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
778 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
779 ; AVX-64-NEXT: retq
794 ; AVX-LABEL: f16xi16_i128:
795 ; AVX: # %bb.0:
796 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
797 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7]
798 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
799 ; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
800 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
801 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
802 ; AVX-NEXT: retl
812 ; AVX-64-LABEL: f16xi16_i128:
813 ; AVX-64: # %bb.0:
814 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
815 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7]
816 ; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
817 ; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0
818 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
819 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
820 ; AVX-64-NEXT: retq
836 ; AVX-LABEL: f32xi16_i32:
837 ; AVX: # %bb.0:
838 ; AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [65536,65536,65536,65536,65536,65536,65536,65536]
839 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3
840 ; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3
841 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
842 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
843 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3
844 ; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3
845 ; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
846 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
847 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
848 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
849 ; AVX-NEXT: retl
867 ; AVX-64-LABEL: f32xi16_i32:
868 ; AVX-64: # %bb.0:
869 ; AVX-64-NEXT: vbroadcastss {{.*#+}} ymm2 = [65536,65536,65536,65536,65536,65536,65536,65536]
870 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
871 ; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3
872 ; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
873 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
874 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3
875 ; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3
876 ; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0
877 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
878 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
879 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
880 ; AVX-64-NEXT: retq
904 ; AVX-LABEL: f32xi16_i64:
905 ; AVX: # %bb.0:
906 ; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [844433520132096,844433520132096,844433520132096,844433…
907 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3
908 ; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3
909 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
910 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
911 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3
912 ; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3
913 ; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
914 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
915 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
916 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
917 ; AVX-NEXT: retl
935 ; AVX-64-LABEL: f32xi16_i64:
936 ; AVX-64: # %bb.0:
937 ; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [844433520132096,844433520132096,844433520132096,844…
938 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
939 ; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3
940 ; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
941 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
942 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3
943 ; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3
944 ; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0
945 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
946 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
947 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
948 ; AVX-64-NEXT: retq
972 ; AVX-LABEL: f32xi16_i128:
973 ; AVX: # %bb.0:
974 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
975 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7]
976 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
977 ; AVX-NEXT: vpaddw %xmm3, %xmm1, %xmm1
978 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
979 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
980 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
981 ; AVX-NEXT: vpaddw %xmm3, %xmm0, %xmm0
982 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
983 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
984 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
985 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
986 ; AVX-NEXT: retl
1006 ; AVX-64-LABEL: f32xi16_i128:
1007 ; AVX-64: # %bb.0:
1008 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
1009 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7]
1010 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
1011 ; AVX-64-NEXT: vpaddw %xmm3, %xmm1, %xmm1
1012 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1013 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
1014 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
1015 ; AVX-64-NEXT: vpaddw %xmm3, %xmm0, %xmm0
1016 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1017 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
1018 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
1019 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
1020 ; AVX-64-NEXT: retq
1046 ; AVX-LABEL: f32xi16_i256:
1047 ; AVX: # %bb.0:
1048 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
1049 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,10,11,12,13,14,15]
1050 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
1051 ; AVX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7]
1052 ; AVX-NEXT: vpaddw %xmm4, %xmm1, %xmm1
1053 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1054 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
1055 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
1056 ; AVX-NEXT: vpaddw %xmm4, %xmm0, %xmm0
1057 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1058 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
1059 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
1060 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
1061 ; AVX-NEXT: retl
1080 ; AVX-64-LABEL: f32xi16_i256:
1081 ; AVX-64: # %bb.0:
1082 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
1083 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,10,11,12,13,14,15]
1084 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
1085 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7]
1086 ; AVX-64-NEXT: vpaddw %xmm4, %xmm1, %xmm1
1087 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1088 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
1089 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
1090 ; AVX-64-NEXT: vpaddw %xmm4, %xmm0, %xmm0
1091 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1092 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
1093 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
1094 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
1095 ; AVX-64-NEXT: retq
1120 ; AVX-LABEL: f4xi32_i64:
1121 ; AVX: # %bb.0:
1122 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4294967296,4294967296]
1123 ; AVX-NEXT: # xmm1 = mem[0,0]
1124 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1125 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
1126 ; AVX-NEXT: retl
1135 ; AVX-64-LABEL: f4xi32_i64:
1136 ; AVX-64: # %bb.0:
1137 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [4294967296,4294967296]
1138 ; AVX-64-NEXT: # xmm1 = mem[0,0]
1139 ; AVX-64-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1140 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
1141 ; AVX-64-NEXT: retq
1156 ; AVX-LABEL: f8xi32_i64:
1157 ; AVX: # %bb.0:
1158 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
1159 ; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [4294967296,4294967296]
1160 ; AVX-NEXT: # xmm2 = mem[0,0]
1161 ; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1162 ; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
1163 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1164 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
1165 ; AVX-NEXT: retl
1174 ; AVX-64-LABEL: f8xi32_i64:
1175 ; AVX-64: # %bb.0:
1176 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
1177 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [4294967296,4294967296]
1178 ; AVX-64-NEXT: # xmm2 = mem[0,0]
1179 ; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1180 ; AVX-64-NEXT: vpaddd %xmm2, %xmm0, %xmm0
1181 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1182 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1183 ; AVX-64-NEXT: retq
1198 ; AVX-LABEL: f8xi32_i128:
1199 ; AVX: # %bb.0:
1200 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
1201 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3]
1202 ; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1203 ; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
1204 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1205 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
1206 ; AVX-NEXT: retl
1216 ; AVX-64-LABEL: f8xi32_i128:
1217 ; AVX-64: # %bb.0:
1218 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
1219 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3]
1220 ; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1221 ; AVX-64-NEXT: vpaddd %xmm2, %xmm0, %xmm0
1222 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1223 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1224 ; AVX-64-NEXT: retq
1240 ; AVX-LABEL: f16xi32_i64:
1241 ; AVX: # %bb.0:
1242 ; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296]
1243 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3
1244 ; AVX-NEXT: vpaddd %xmm2, %xmm3, %xmm3
1245 ; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1246 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
1247 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3
1248 ; AVX-NEXT: vpaddd %xmm2, %xmm3, %xmm3
1249 ; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
1250 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1251 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
1252 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
1253 ; AVX-NEXT: retl
1271 ; AVX-64-LABEL: f16xi32_i64:
1272 ; AVX-64: # %bb.0:
1273 ; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296]
1274 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
1275 ; AVX-64-NEXT: vpaddd %xmm2, %xmm3, %xmm3
1276 ; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1277 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
1278 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3
1279 ; AVX-64-NEXT: vpaddd %xmm2, %xmm3, %xmm3
1280 ; AVX-64-NEXT: vpaddd %xmm2, %xmm0, %xmm0
1281 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1282 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
1283 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
1284 ; AVX-64-NEXT: retq
1308 ; AVX-LABEL: f16xi32_i128:
1309 ; AVX: # %bb.0:
1310 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
1311 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3]
1312 ; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2
1313 ; AVX-NEXT: vpaddd %xmm3, %xmm1, %xmm1
1314 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1315 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
1316 ; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2
1317 ; AVX-NEXT: vpaddd %xmm3, %xmm0, %xmm0
1318 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1319 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
1320 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
1321 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
1322 ; AVX-NEXT: retl
1342 ; AVX-64-LABEL: f16xi32_i128:
1343 ; AVX-64: # %bb.0:
1344 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
1345 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3]
1346 ; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2
1347 ; AVX-64-NEXT: vpaddd %xmm3, %xmm1, %xmm1
1348 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1349 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
1350 ; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2
1351 ; AVX-64-NEXT: vpaddd %xmm3, %xmm0, %xmm0
1352 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1353 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
1354 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
1355 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
1356 ; AVX-64-NEXT: retq
1382 ; AVX-LABEL: f4xi64_i128:
1383 ; AVX: # %bb.0:
1384 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
1385 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,1,0]
1386 ; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1
1387 ; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0
1388 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1389 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
1390 ; AVX-NEXT: retl
1400 ; AVX-64-LABEL: f4xi64_i128:
1401 ; AVX-64: # %bb.0:
1402 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
1403 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
1404 ; AVX-64-NEXT: vpaddq %xmm2, %xmm1, %xmm1
1405 ; AVX-64-NEXT: vpaddq %xmm2, %xmm0, %xmm0
1406 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1407 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1408 ; AVX-64-NEXT: retq
1424 ; AVX-LABEL: f8xi64_i128:
1425 ; AVX: # %bb.0:
1426 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
1427 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,0,1,0]
1428 ; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2
1429 ; AVX-NEXT: vpaddq %xmm3, %xmm1, %xmm1
1430 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1431 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
1432 ; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2
1433 ; AVX-NEXT: vpaddq %xmm3, %xmm0, %xmm0
1434 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1435 ; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
1436 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
1437 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
1438 ; AVX-NEXT: retl
1458 ; AVX-64-LABEL: f8xi64_i128:
1459 ; AVX-64: # %bb.0:
1460 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
1461 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
1462 ; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
1463 ; AVX-64-NEXT: vpaddq %xmm3, %xmm1, %xmm1
1464 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1465 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
1466 ; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
1467 ; AVX-64-NEXT: vpaddq %xmm3, %xmm0, %xmm0
1468 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1469 ; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1…
1470 ; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
1471 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
1472 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
1473 ; AVX-64-NEXT: retq
1499 ; AVX-LABEL: f8xi64_i256:
1500 ; AVX: # %bb.0:
1501 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
1502 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [2,0,3,0]
1503 ; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2
1504 ; AVX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,0,1,0]
1505 ; AVX-NEXT: vpaddq %xmm4, %xmm1, %xmm1
1506 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1507 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
1508 ; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2
1509 ; AVX-NEXT: vpaddq %xmm4, %xmm0, %xmm0
1510 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1511 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,0,1,0,2,0,3,0]
1512 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
1513 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
1514 ; AVX-NEXT: retl
1533 ; AVX-64-LABEL: f8xi64_i256:
1534 ; AVX-64: # %bb.0:
1535 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
1536 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3]
1537 ; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
1538 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
1539 ; AVX-64-NEXT: vpaddq %xmm4, %xmm1, %xmm1
1540 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1541 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
1542 ; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
1543 ; AVX-64-NEXT: vpaddq %xmm4, %xmm0, %xmm0
1544 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1545 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3]
1546 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
1547 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
1548 ; AVX-64-NEXT: retq
1573 ; AVX-LABEL: f4xf32_f64:
1574 ; AVX: # %bb.0:
1575 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4575657222482165760,4575657222482165760]
1576 ; AVX-NEXT: # xmm1 = mem[0,0]
1577 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
1578 ; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0
1579 ; AVX-NEXT: retl
1589 ; AVX-64-LABEL: f4xf32_f64:
1590 ; AVX-64: # %bb.0:
1591 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [4575657222482165760,4575657222482165760]
1592 ; AVX-64-NEXT: # xmm1 = mem[0,0]
1593 ; AVX-64-NEXT: vaddps %xmm1, %xmm0, %xmm0
1594 ; AVX-64-NEXT: vdivps %xmm0, %xmm1, %xmm0
1595 ; AVX-64-NEXT: retq
1611 ; AVX-LABEL: f8xf32_f64:
1612 ; AVX: # %bb.0:
1613 ; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4575657222482165760,4575657222482165760,45756572224821…
1614 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
1615 ; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0
1616 ; AVX-NEXT: retl
1625 ; AVX-64-LABEL: f8xf32_f64:
1626 ; AVX-64: # %bb.0:
1627 ; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4575657222482165760,4575657222482165760,45756572224…
1628 ; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0
1629 ; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0
1630 ; AVX-64-NEXT: retq
1645 ; AVX-LABEL: f8xf32_f128:
1646 ; AVX: # %bb.0:
1647 ; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0…
1648 ; AVX-NEXT: # ymm1 = mem[0,1,0,1]
1649 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
1650 ; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0
1651 ; AVX-NEXT: retl
1661 ; AVX-64-LABEL: f8xf32_f128:
1662 ; AVX-64: # %bb.0:
1663 ; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,…
1664 ; AVX-64-NEXT: # ymm1 = mem[0,1,0,1]
1665 ; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0
1666 ; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0
1667 ; AVX-64-NEXT: retq
1683 ; AVX-LABEL: f16xf32_f64:
1684 ; AVX: # %bb.0:
1685 ; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4575657222482165760,4575657222482165760,45756572224821…
1686 ; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
1687 ; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
1688 ; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
1689 ; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1
1690 ; AVX-NEXT: retl
1708 ; AVX-64-LABEL: f16xf32_f64:
1709 ; AVX-64: # %bb.0:
1710 ; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4575657222482165760,4575657222482165760,45756572224…
1711 ; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
1712 ; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
1713 ; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
1714 ; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1
1715 ; AVX-64-NEXT: retq
1739 ; AVX-LABEL: f16xf32_f128:
1740 ; AVX: # %bb.0:
1741 ; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0…
1742 ; AVX-NEXT: # ymm2 = mem[0,1,0,1]
1743 ; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
1744 ; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
1745 ; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
1746 ; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1
1747 ; AVX-NEXT: retl
1767 ; AVX-64-LABEL: f16xf32_f128:
1768 ; AVX-64: # %bb.0:
1769 ; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,…
1770 ; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
1771 ; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
1772 ; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
1773 ; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
1774 ; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1
1775 ; AVX-64-NEXT: retq
1801 ; AVX-LABEL: f16xf32_f256:
1802 ; AVX: # %bb.0:
1803 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
1804 ; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
1805 ; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
1806 ; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
1807 ; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1
1808 ; AVX-NEXT: retl
1827 ; AVX-64-LABEL: f16xf32_f256:
1828 ; AVX-64: # %bb.0:
1829 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
1830 ; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
1831 ; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
1832 ; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
1833 ; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1
1834 ; AVX-64-NEXT: retq
1859 ; AVX-LABEL: f4xf64_f128:
1860 ; AVX: # %bb.0:
1861 ; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
1862 ; AVX-NEXT: # ymm1 = mem[0,1,0,1]
1863 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
1864 ; AVX-NEXT: vdivpd %ymm0, %ymm1, %ymm0
1865 ; AVX-NEXT: retl
1875 ; AVX-64-LABEL: f4xf64_f128:
1876 ; AVX-64: # %bb.0:
1877 ; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
1878 ; AVX-64-NEXT: # ymm1 = mem[0,1,0,1]
1879 ; AVX-64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
1880 ; AVX-64-NEXT: vdivpd %ymm0, %ymm1, %ymm0
1881 ; AVX-64-NEXT: retq
1897 ; AVX-LABEL: f8xf64_f128:
1898 ; AVX: # %bb.0:
1899 ; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
1900 ; AVX-NEXT: # ymm2 = mem[0,1,0,1]
1901 ; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1
1902 ; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0
1903 ; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0
1904 ; AVX-NEXT: vdivpd %ymm1, %ymm2, %ymm1
1905 ; AVX-NEXT: retl
1925 ; AVX-64-LABEL: f8xf64_f128:
1926 ; AVX-64: # %bb.0:
1927 ; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
1928 ; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
1929 ; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
1930 ; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
1931 ; AVX-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
1932 ; AVX-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1
1933 ; AVX-64-NEXT: retq
1966 ; AVX-LABEL: f8xf64_f256:
1967 ; AVX: # %bb.0:
1968 ; AVX-NEXT: vmovapd {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0]
1969 ; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1
1970 ; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0
1971 ; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0
1972 ; AVX-NEXT: vdivpd %ymm1, %ymm2, %ymm1
1973 ; AVX-NEXT: retl
1992 ; AVX-64-LABEL: f8xf64_f256:
1993 ; AVX-64: # %bb.0:
1994 ; AVX-64-NEXT: vmovapd {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0]
1995 ; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
1996 ; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
1997 ; AVX-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
1998 ; AVX-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1
1999 ; AVX-64-NEXT: retq
2024 ; AVX-LABEL: f8xi16_i32_NaN:
2025 ; AVX: # %bb.0:
2026 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [4290379776,4290379776,4290379776,4290379776]
2027 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2028 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
2029 ; AVX-NEXT: retl
2038 ; AVX-64-LABEL: f8xi16_i32_NaN:
2039 ; AVX-64: # %bb.0:
2040 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [4290379776,4290379776,4290379776,4290379776]
2041 ; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2042 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
2043 ; AVX-64-NEXT: retq