1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s 2 3; CHECK-LABEL: @test1 4; CHECK: vmovd %xmm0, %eax ## encoding: [0x62 5; CHECK: ret 6define i32 @test1(float %x) { 7 %res = bitcast float %x to i32 8 ret i32 %res 9} 10 11; CHECK-LABEL: @test2 12; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62 13; CHECK: ret 14define <4 x i32> @test2(i32 %x) { 15 %res = insertelement <4 x i32>undef, i32 %x, i32 0 16 ret <4 x i32>%res 17} 18 19; CHECK-LABEL: @test3 20; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62 21; CHECK: ret 22define <2 x i64> @test3(i64 %x) { 23 %res = insertelement <2 x i64>undef, i64 %x, i32 0 24 ret <2 x i64>%res 25} 26 27; CHECK-LABEL: @test4 28; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 29; CHECK: ret 30define <4 x i32> @test4(i32* %x) { 31 %y = load i32, i32* %x 32 %res = insertelement <4 x i32>undef, i32 %y, i32 0 33 ret <4 x i32>%res 34} 35 36; CHECK-LABEL: @test5 37; CHECK: vmovss %xmm0, (%rdi) ## encoding: [0x62 38; CHECK: ret 39define void @test5(float %x, float* %y) { 40 store float %x, float* %y, align 4 41 ret void 42} 43 44; CHECK-LABEL: @test6 45; CHECK: vmovsd %xmm0, (%rdi) ## encoding: [0x62 46; CHECK: ret 47define void @test6(double %x, double* %y) { 48 store double %x, double* %y, align 8 49 ret void 50} 51 52; CHECK-LABEL: @test7 53; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62 54; CHECK: ret 55define float @test7(i32* %x) { 56 %y = load i32, i32* %x 57 %res = bitcast i32 %y to float 58 ret float %res 59} 60 61; CHECK-LABEL: @test8 62; CHECK: vmovd %xmm0, %eax ## encoding: [0x62 63; CHECK: ret 64define i32 @test8(<4 x i32> %x) { 65 %res = extractelement <4 x i32> %x, i32 0 66 ret i32 %res 67} 68 69; CHECK-LABEL: @test9 70; CHECK: vmovq %xmm0, %rax ## encoding: [0x62 71; CHECK: ret 72define i64 @test9(<2 x i64> %x) { 73 %res = extractelement <2 x i64> %x, i32 0 74 ret i64 %res 75} 76 77; CHECK-LABEL: @test10 78; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 79; CHECK: ret 80define <4 x i32> @test10(i32* %x) { 81 %y = load i32, i32* %x, align 4 82 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 83 ret <4 x i32>%res 84} 85 86; CHECK-LABEL: @test11 87; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62 88; CHECK: ret 89define <4 x float> @test11(float* %x) { 90 %y = load float, float* %x, align 4 91 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0 92 ret <4 x float>%res 93} 94 95; CHECK-LABEL: @test12 96; CHECK: vmovsd (%rdi), %xmm0 ## encoding: [0x62 97; CHECK: ret 98define <2 x double> @test12(double* %x) { 99 %y = load double, double* %x, align 8 100 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0 101 ret <2 x double>%res 102} 103 104; CHECK-LABEL: @test13 105; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62 106; CHECK: ret 107define <2 x i64> @test13(i64 %x) { 108 %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 109 ret <2 x i64>%res 110} 111 112; CHECK-LABEL: @test14 113; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62 114; CHECK: ret 115define <4 x i32> @test14(i32 %x) { 116 %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 117 ret <4 x i32>%res 118} 119 120; CHECK-LABEL: @test15 121; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 122; CHECK: ret 123define <4 x i32> @test15(i32* %x) { 124 %y = load i32, i32* %x, align 4 125 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 126 ret <4 x i32>%res 127} 128 129; CHECK-LABEL: test16 130; CHECK: vmovdqu32 131; CHECK: ret 132define <16 x i32> @test16(i8 * %addr) { 133 %vaddr = bitcast i8* %addr to <16 x i32>* 134 %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 135 ret <16 x i32>%res 136} 137 138; CHECK-LABEL: test17 139; CHECK: vmovdqa32 140; CHECK: ret 141define <16 x i32> @test17(i8 * %addr) { 142 %vaddr = bitcast i8* %addr to <16 x i32>* 143 %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 144 ret <16 x i32>%res 145} 146 147; CHECK-LABEL: test18 148; CHECK: vmovdqa64 149; CHECK: ret 150define void @test18(i8 * %addr, <8 x i64> %data) { 151 %vaddr = bitcast i8* %addr to <8 x i64>* 152 store <8 x i64>%data, <8 x i64>* %vaddr, align 64 153 ret void 154} 155 156; CHECK-LABEL: test19 157; CHECK: vmovdqu32 158; CHECK: ret 159define void @test19(i8 * %addr, <16 x i32> %data) { 160 %vaddr = bitcast i8* %addr to <16 x i32>* 161 store <16 x i32>%data, <16 x i32>* %vaddr, align 1 162 ret void 163} 164 165; CHECK-LABEL: test20 166; CHECK: vmovdqa32 167; CHECK: ret 168define void @test20(i8 * %addr, <16 x i32> %data) { 169 %vaddr = bitcast i8* %addr to <16 x i32>* 170 store <16 x i32>%data, <16 x i32>* %vaddr, align 64 171 ret void 172} 173 174; CHECK-LABEL: test21 175; CHECK: vmovdqa64 176; CHECK: ret 177define <8 x i64> @test21(i8 * %addr) { 178 %vaddr = bitcast i8* %addr to <8 x i64>* 179 %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 180 ret <8 x i64>%res 181} 182 183; CHECK-LABEL: test22 184; CHECK: vmovdqu64 185; CHECK: ret 186define void @test22(i8 * %addr, <8 x i64> %data) { 187 %vaddr = bitcast i8* %addr to <8 x i64>* 188 store <8 x i64>%data, <8 x i64>* %vaddr, align 1 189 ret void 190} 191 192; CHECK-LABEL: test23 193; CHECK: vmovdqu64 194; CHECK: ret 195define <8 x i64> @test23(i8 * %addr) { 196 %vaddr = bitcast i8* %addr to <8 x i64>* 197 %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 198 ret <8 x i64>%res 199} 200 201; CHECK-LABEL: test24 202; CHECK: vmovapd 203; CHECK: ret 204define void @test24(i8 * %addr, <8 x double> %data) { 205 %vaddr = bitcast i8* %addr to <8 x double>* 206 store <8 x double>%data, <8 x double>* %vaddr, align 64 207 ret void 208} 209 210; CHECK-LABEL: test25 211; CHECK: vmovapd 212; CHECK: ret 213define <8 x double> @test25(i8 * %addr) { 214 %vaddr = bitcast i8* %addr to <8 x double>* 215 %res = load <8 x double>, <8 x double>* %vaddr, align 64 216 ret <8 x double>%res 217} 218 219; CHECK-LABEL: test26 220; CHECK: vmovaps 221; CHECK: ret 222define void @test26(i8 * %addr, <16 x float> %data) { 223 %vaddr = bitcast i8* %addr to <16 x float>* 224 store <16 x float>%data, <16 x float>* %vaddr, align 64 225 ret void 226} 227 228; CHECK-LABEL: test27 229; CHECK: vmovaps 230; CHECK: ret 231define <16 x float> @test27(i8 * %addr) { 232 %vaddr = bitcast i8* %addr to <16 x float>* 233 %res = load <16 x float>, <16 x float>* %vaddr, align 64 234 ret <16 x float>%res 235} 236 237; CHECK-LABEL: test28 238; CHECK: vmovupd 239; CHECK: ret 240define void @test28(i8 * %addr, <8 x double> %data) { 241 %vaddr = bitcast i8* %addr to <8 x double>* 242 store <8 x double>%data, <8 x double>* %vaddr, align 1 243 ret void 244} 245 246; CHECK-LABEL: test29 247; CHECK: vmovupd 248; CHECK: ret 249define <8 x double> @test29(i8 * %addr) { 250 %vaddr = bitcast i8* %addr to <8 x double>* 251 %res = load <8 x double>, <8 x double>* %vaddr, align 1 252 ret <8 x double>%res 253} 254 255; CHECK-LABEL: test30 256; CHECK: vmovups 257; CHECK: ret 258define void @test30(i8 * %addr, <16 x float> %data) { 259 %vaddr = bitcast i8* %addr to <16 x float>* 260 store <16 x float>%data, <16 x float>* %vaddr, align 1 261 ret void 262} 263 264; CHECK-LABEL: test31 265; CHECK: vmovups 266; CHECK: ret 267define <16 x float> @test31(i8 * %addr) { 268 %vaddr = bitcast i8* %addr to <16 x float>* 269 %res = load <16 x float>, <16 x float>* %vaddr, align 1 270 ret <16 x float>%res 271} 272 273; CHECK-LABEL: test32 274; CHECK: vmovdqa32{{.*{%k[1-7]} }} 275; CHECK: ret 276define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 277 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 278 %vaddr = bitcast i8* %addr to <16 x i32>* 279 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 280 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 281 ret <16 x i32>%res 282} 283 284; CHECK-LABEL: test33 285; CHECK: vmovdqu32{{.*{%k[1-7]} }} 286; CHECK: ret 287define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 288 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 289 %vaddr = bitcast i8* %addr to <16 x i32>* 290 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 291 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 292 ret <16 x i32>%res 293} 294 295; CHECK-LABEL: test34 296; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }} 297; CHECK: ret 298define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) { 299 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 300 %vaddr = bitcast i8* %addr to <16 x i32>* 301 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 302 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 303 ret <16 x i32>%res 304} 305 306; CHECK-LABEL: test35 307; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }} 308; CHECK: ret 309define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) { 310 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 311 %vaddr = bitcast i8* %addr to <16 x i32>* 312 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 313 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 314 ret <16 x i32>%res 315} 316 317; CHECK-LABEL: test36 318; CHECK: vmovdqa64{{.*{%k[1-7]} }} 319; CHECK: ret 320define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 321 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 322 %vaddr = bitcast i8* %addr to <8 x i64>* 323 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 324 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 325 ret <8 x i64>%res 326} 327 328; CHECK-LABEL: test37 329; CHECK: vmovdqu64{{.*{%k[1-7]} }} 330; CHECK: ret 331define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 332 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 333 %vaddr = bitcast i8* %addr to <8 x i64>* 334 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 335 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 336 ret <8 x i64>%res 337} 338 339; CHECK-LABEL: test38 340; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }} 341; CHECK: ret 342define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) { 343 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 344 %vaddr = bitcast i8* %addr to <8 x i64>* 345 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 346 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 347 ret <8 x i64>%res 348} 349 350; CHECK-LABEL: test39 351; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }} 352; CHECK: ret 353define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) { 354 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 355 %vaddr = bitcast i8* %addr to <8 x i64>* 356 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 357 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 358 ret <8 x i64>%res 359} 360 361; CHECK-LABEL: test40 362; CHECK: vmovaps{{.*{%k[1-7]} }} 363; CHECK: ret 364define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 365 %mask = fcmp one <16 x float> %mask1, zeroinitializer 366 %vaddr = bitcast i8* %addr to <16 x float>* 367 %r = load <16 x float>, <16 x float>* %vaddr, align 64 368 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 369 ret <16 x float>%res 370} 371 372; CHECK-LABEL: test41 373; CHECK: vmovups{{.*{%k[1-7]} }} 374; CHECK: ret 375define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 376 %mask = fcmp one <16 x float> %mask1, zeroinitializer 377 %vaddr = bitcast i8* %addr to <16 x float>* 378 %r = load <16 x float>, <16 x float>* %vaddr, align 1 379 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 380 ret <16 x float>%res 381} 382 383; CHECK-LABEL: test42 384; CHECK: vmovaps{{.*{%k[1-7]} {z} }} 385; CHECK: ret 386define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) { 387 %mask = fcmp one <16 x float> %mask1, zeroinitializer 388 %vaddr = bitcast i8* %addr to <16 x float>* 389 %r = load <16 x float>, <16 x float>* %vaddr, align 64 390 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 391 ret <16 x float>%res 392} 393 394; CHECK-LABEL: test43 395; CHECK: vmovups{{.*{%k[1-7]} {z} }} 396; CHECK: ret 397define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) { 398 %mask = fcmp one <16 x float> %mask1, zeroinitializer 399 %vaddr = bitcast i8* %addr to <16 x float>* 400 %r = load <16 x float>, <16 x float>* %vaddr, align 1 401 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 402 ret <16 x float>%res 403} 404 405; CHECK-LABEL: test44 406; CHECK: vmovapd{{.*{%k[1-7]} }} 407; CHECK: ret 408define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 409 %mask = fcmp one <8 x double> %mask1, zeroinitializer 410 %vaddr = bitcast i8* %addr to <8 x double>* 411 %r = load <8 x double>, <8 x double>* %vaddr, align 64 412 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 413 ret <8 x double>%res 414} 415 416; CHECK-LABEL: test45 417; CHECK: vmovupd{{.*{%k[1-7]} }} 418; CHECK: ret 419define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 420 %mask = fcmp one <8 x double> %mask1, zeroinitializer 421 %vaddr = bitcast i8* %addr to <8 x double>* 422 %r = load <8 x double>, <8 x double>* %vaddr, align 1 423 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 424 ret <8 x double>%res 425} 426 427; CHECK-LABEL: test46 428; CHECK: vmovapd{{.*{%k[1-7]} {z} }} 429; CHECK: ret 430define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) { 431 %mask = fcmp one <8 x double> %mask1, zeroinitializer 432 %vaddr = bitcast i8* %addr to <8 x double>* 433 %r = load <8 x double>, <8 x double>* %vaddr, align 64 434 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 435 ret <8 x double>%res 436} 437 438; CHECK-LABEL: test47 439; CHECK: vmovupd{{.*{%k[1-7]} {z} }} 440; CHECK: ret 441define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) { 442 %mask = fcmp one <8 x double> %mask1, zeroinitializer 443 %vaddr = bitcast i8* %addr to <8 x double>* 444 %r = load <8 x double>, <8 x double>* %vaddr, align 1 445 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 446 ret <8 x double>%res 447} 448