• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -mtriple=x86_64-linux-generic -mattr=avx < %s | FileCheck %s
3
4; Bug 45563:
5; The LowerMLOAD() method AVX masked load branch should
6; use the operand vector type rather than the mask type.
7; Given, for example:
8;   v4f64,ch = masked_load ..
9; The select should be:
10;   v4f64 = vselect ..
11; instead of:
12;   v4i64 = vselect ..
13
14define <16 x double> @bug45563(<16 x double>* %addr, <16 x double> %dst, <16 x i64> %e, <16 x i64> %f) {
15; CHECK-LABEL: bug45563:
16; CHECK:       # %bb.0:
17; CHECK-NEXT:    pushq %rbp
18; CHECK-NEXT:    .cfi_def_cfa_offset 16
19; CHECK-NEXT:    .cfi_offset %rbp, -16
20; CHECK-NEXT:    movq %rsp, %rbp
21; CHECK-NEXT:    .cfi_def_cfa_register %rbp
22; CHECK-NEXT:    andq $-32, %rsp
23; CHECK-NEXT:    subq $32, %rsp
24; CHECK-NEXT:    vextractf128 $1, %ymm7, %xmm8
25; CHECK-NEXT:    vmovdqa 112(%rbp), %xmm9
26; CHECK-NEXT:    vmovdqa 128(%rbp), %xmm10
27; CHECK-NEXT:    vpcmpgtq %xmm8, %xmm10, %xmm8
28; CHECK-NEXT:    vpcmpgtq %xmm7, %xmm9, %xmm7
29; CHECK-NEXT:    vinsertf128 $1, %xmm8, %ymm7, %ymm8
30; CHECK-NEXT:    vextractf128 $1, %ymm6, %xmm10
31; CHECK-NEXT:    vmovdqa 80(%rbp), %xmm9
32; CHECK-NEXT:    vmovdqa 96(%rbp), %xmm7
33; CHECK-NEXT:    vpcmpgtq %xmm10, %xmm7, %xmm7
34; CHECK-NEXT:    vpcmpgtq %xmm6, %xmm9, %xmm6
35; CHECK-NEXT:    vinsertf128 $1, %xmm7, %ymm6, %ymm10
36; CHECK-NEXT:    vextractf128 $1, %ymm5, %xmm7
37; CHECK-NEXT:    vmovdqa 48(%rbp), %xmm9
38; CHECK-NEXT:    vmovdqa 64(%rbp), %xmm6
39; CHECK-NEXT:    vpcmpgtq %xmm7, %xmm6, %xmm6
40; CHECK-NEXT:    vpcmpgtq %xmm5, %xmm9, %xmm5
41; CHECK-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
42; CHECK-NEXT:    vextractf128 $1, %ymm4, %xmm6
43; CHECK-NEXT:    vmovdqa 16(%rbp), %xmm9
44; CHECK-NEXT:    vmovdqa 32(%rbp), %xmm7
45; CHECK-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
46; CHECK-NEXT:    vpcmpgtq %xmm4, %xmm9, %xmm4
47; CHECK-NEXT:    vinsertf128 $1, %xmm6, %ymm4, %ymm4
48; CHECK-NEXT:    vmaskmovpd (%rdi), %ymm4, %ymm6
49; CHECK-NEXT:    vblendvpd %ymm4, %ymm6, %ymm0, %ymm0
50; CHECK-NEXT:    vmaskmovpd 32(%rdi), %ymm5, %ymm4
51; CHECK-NEXT:    vblendvpd %ymm5, %ymm4, %ymm1, %ymm1
52; CHECK-NEXT:    vmaskmovpd 64(%rdi), %ymm10, %ymm4
53; CHECK-NEXT:    vblendvpd %ymm10, %ymm4, %ymm2, %ymm2
54; CHECK-NEXT:    vmaskmovpd 96(%rdi), %ymm8, %ymm4
55; CHECK-NEXT:    vblendvpd %ymm8, %ymm4, %ymm3, %ymm3
56; CHECK-NEXT:    movq %rbp, %rsp
57; CHECK-NEXT:    popq %rbp
58; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
59; CHECK-NEXT:    retq
60  %mask = icmp slt <16 x i64> %e, %f
61  %res = call <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %addr, i32 4, <16 x i1>%mask, <16 x double> %dst)
62  ret <16 x double> %res
63}
64
65declare <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %addr, i32 %align, <16 x i1> %mask, <16 x double> %dst)
66