1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f -O0 | FileCheck %s 3 4define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, float %f8, float %f9, float %f10, float %f11, float %f12, float %f13, float %f14, float %f15, float %f16) #0 { 5; CHECK-LABEL: makefloat: 6; CHECK: # %bb.0: # %entry 7; CHECK-NEXT: pushq %rbp 8; CHECK-NEXT: .cfi_def_cfa_offset 16 9; CHECK-NEXT: .cfi_offset %rbp, -16 10; CHECK-NEXT: movq %rsp, %rbp 11; CHECK-NEXT: .cfi_def_cfa_register %rbp 12; CHECK-NEXT: andq $-64, %rsp 13; CHECK-NEXT: subq $256, %rsp # imm = 0x100 14; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero 15; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero 16; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero 17; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero 18; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero 19; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero 20; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero 21; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero 22; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) 23; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) 24; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) 25; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp) 26; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp) 27; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) 28; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) 29; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) 30; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero 31; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero 32; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero 33; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero 34; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero 35; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero 36; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero 37; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero 38; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero 39; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero 40; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero 41; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero 42; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 43; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 44; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 45; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 46; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp) 47; CHECK-NEXT: vmovss %xmm14, {{[0-9]+}}(%rsp) 48; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp) 49; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp) 50; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp) 51; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp) 52; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp) 53; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp) 54; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) 55; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) 56; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) 57; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp) 58; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp) 59; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) 60; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) 61; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) 62; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 63; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 64; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 65; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 66; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 67; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 68; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[0] 69; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 70; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 71; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 72; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 73; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 74; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 75; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[0] 76; CHECK-NEXT: # implicit-def: $ymm0 77; CHECK-NEXT: vmovaps %xmm2, %xmm0 78; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 79; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 80; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 81; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 82; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 83; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 84; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 85; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[0] 86; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 87; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 88; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3] 89; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 90; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] 91; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 92; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm0[0,1,2],xmm3[0] 93; CHECK-NEXT: # implicit-def: $ymm0 94; CHECK-NEXT: vmovaps %xmm3, %xmm0 95; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2 96; CHECK-NEXT: # implicit-def: $zmm0 97; CHECK-NEXT: vmovaps %ymm2, %ymm0 98; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 99; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) 100; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 101; CHECK-NEXT: movq %rbp, %rsp 102; CHECK-NEXT: popq %rbp 103; CHECK-NEXT: .cfi_def_cfa %rsp, 8 104; CHECK-NEXT: retq 105entry: 106 %__A.addr.i = alloca float, align 4 107 %__B.addr.i = alloca float, align 4 108 %__C.addr.i = alloca float, align 4 109 %__D.addr.i = alloca float, align 4 110 %__E.addr.i = alloca float, align 4 111 %__F.addr.i = alloca float, align 4 112 %__G.addr.i = alloca float, align 4 113 %__H.addr.i = alloca float, align 4 114 %__I.addr.i = alloca float, align 4 115 %__J.addr.i = alloca float, align 4 116 %__K.addr.i = alloca float, align 4 117 %__L.addr.i = alloca float, align 4 118 %__M.addr.i = alloca float, align 4 119 %__N.addr.i = alloca float, align 4 120 %__O.addr.i = alloca float, align 4 121 %__P.addr.i = alloca float, align 4 122 %.compoundliteral.i = alloca <16 x float>, align 64 123 %f1.addr = alloca float, align 4 124 %f2.addr = alloca float, align 4 125 %f3.addr = alloca float, align 4 126 %f4.addr = alloca float, align 4 127 %f5.addr = alloca float, align 4 128 %f6.addr = alloca float, align 4 129 %f7.addr = alloca float, align 4 130 %f8.addr = alloca float, align 4 131 %f9.addr = alloca float, align 4 132 %f10.addr = alloca float, align 4 133 %f11.addr = alloca float, align 4 134 %f12.addr = alloca float, align 4 135 %f13.addr = alloca float, align 4 136 %f14.addr = alloca float, align 4 137 %f15.addr = alloca float, align 4 138 %f16.addr = alloca float, align 4 139 store float %f1, float* %f1.addr, align 4 140 store float %f2, float* %f2.addr, align 4 141 store float %f3, float* %f3.addr, align 4 142 store float %f4, float* %f4.addr, align 4 143 store float %f5, float* %f5.addr, align 4 144 store float %f6, float* %f6.addr, align 4 145 store float %f7, float* %f7.addr, align 4 146 store float %f8, float* %f8.addr, align 4 147 store float %f9, float* %f9.addr, align 4 148 store float %f10, float* %f10.addr, align 4 149 store float %f11, float* %f11.addr, align 4 150 store float %f12, float* %f12.addr, align 4 151 store float %f13, float* %f13.addr, align 4 152 store float %f14, float* %f14.addr, align 4 153 store float %f15, float* %f15.addr, align 4 154 store float %f16, float* %f16.addr, align 4 155 %0 = load float, float* %f16.addr, align 4 156 %1 = load float, float* %f15.addr, align 4 157 %2 = load float, float* %f14.addr, align 4 158 %3 = load float, float* %f13.addr, align 4 159 %4 = load float, float* %f12.addr, align 4 160 %5 = load float, float* %f11.addr, align 4 161 %6 = load float, float* %f10.addr, align 4 162 %7 = load float, float* %f9.addr, align 4 163 %8 = load float, float* %f8.addr, align 4 164 %9 = load float, float* %f7.addr, align 4 165 %10 = load float, float* %f6.addr, align 4 166 %11 = load float, float* %f5.addr, align 4 167 %12 = load float, float* %f4.addr, align 4 168 %13 = load float, float* %f3.addr, align 4 169 %14 = load float, float* %f2.addr, align 4 170 %15 = load float, float* %f1.addr, align 4 171 store float %0, float* %__A.addr.i, align 4 172 store float %1, float* %__B.addr.i, align 4 173 store float %2, float* %__C.addr.i, align 4 174 store float %3, float* %__D.addr.i, align 4 175 store float %4, float* %__E.addr.i, align 4 176 store float %5, float* %__F.addr.i, align 4 177 store float %6, float* %__G.addr.i, align 4 178 store float %7, float* %__H.addr.i, align 4 179 store float %8, float* %__I.addr.i, align 4 180 store float %9, float* %__J.addr.i, align 4 181 store float %10, float* %__K.addr.i, align 4 182 store float %11, float* %__L.addr.i, align 4 183 store float %12, float* %__M.addr.i, align 4 184 store float %13, float* %__N.addr.i, align 4 185 store float %14, float* %__O.addr.i, align 4 186 store float %15, float* %__P.addr.i, align 4 187 %16 = load float, float* %__P.addr.i, align 4 188 %vecinit.i = insertelement <16 x float> undef, float %16, i32 0 189 %17 = load float, float* %__O.addr.i, align 4 190 %vecinit1.i = insertelement <16 x float> %vecinit.i, float %17, i32 1 191 %18 = load float, float* %__N.addr.i, align 4 192 %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %18, i32 2 193 %19 = load float, float* %__M.addr.i, align 4 194 %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %19, i32 3 195 %20 = load float, float* %__L.addr.i, align 4 196 %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %20, i32 4 197 %21 = load float, float* %__K.addr.i, align 4 198 %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %21, i32 5 199 %22 = load float, float* %__J.addr.i, align 4 200 %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %22, i32 6 201 %23 = load float, float* %__I.addr.i, align 4 202 %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %23, i32 7 203 %24 = load float, float* %__H.addr.i, align 4 204 %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %24, i32 8 205 %25 = load float, float* %__G.addr.i, align 4 206 %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %25, i32 9 207 %26 = load float, float* %__F.addr.i, align 4 208 %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %26, i32 10 209 %27 = load float, float* %__E.addr.i, align 4 210 %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %27, i32 11 211 %28 = load float, float* %__D.addr.i, align 4 212 %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %28, i32 12 213 %29 = load float, float* %__C.addr.i, align 4 214 %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %29, i32 13 215 %30 = load float, float* %__B.addr.i, align 4 216 %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %30, i32 14 217 %31 = load float, float* %__A.addr.i, align 4 218 %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %31, i32 15 219 store <16 x float> %vecinit15.i, <16 x float>* %.compoundliteral.i, align 64 220 %32 = load <16 x float>, <16 x float>* %.compoundliteral.i, align 64 221 ret <16 x float> %32 222} 223