1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s 3 4define <64 x i8> @test1(i8 * %addr) { 5; CHECK-LABEL: test1: 6; CHECK: ## BB#0: 7; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 8; CHECK-NEXT: retq 9 %vaddr = bitcast i8* %addr to <64 x i8>* 10 %res = load <64 x i8>, <64 x i8>* %vaddr, align 1 11 ret <64 x i8>%res 12} 13 14define void @test2(i8 * %addr, <64 x i8> %data) { 15; CHECK-LABEL: test2: 16; CHECK: ## BB#0: 17; CHECK-NEXT: vmovdqu8 %zmm0, (%rdi) 18; CHECK-NEXT: retq 19 %vaddr = bitcast i8* %addr to <64 x i8>* 20 store <64 x i8>%data, <64 x i8>* %vaddr, align 1 21 ret void 22} 23 24define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) { 25; CHECK-LABEL: test3: 26; CHECK: ## BB#0: 27; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 28; CHECK-NEXT: vpcmpneqb %zmm2, %zmm1, %k1 29; CHECK-NEXT: vpblendmb (%rdi), %zmm0, %zmm0 {%k1} 30; CHECK-NEXT: retq 31 %mask = icmp ne <64 x i8> %mask1, zeroinitializer 32 %vaddr = bitcast i8* %addr to <64 x i8>* 33 %r = load <64 x i8>, <64 x i8>* %vaddr, align 1 34 %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> %old 35 ret <64 x i8>%res 36} 37 38define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) { 39; CHECK-LABEL: test4: 40; CHECK: ## BB#0: 41; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 42; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k1 43; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 44; CHECK-NEXT: retq 45 %mask = icmp ne <64 x i8> %mask1, zeroinitializer 46 %vaddr = bitcast i8* %addr to <64 x i8>* 47 %r = load <64 x i8>, <64 x i8>* %vaddr, align 1 48 %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> zeroinitializer 49 ret <64 x i8>%res 50} 51 52define <32 x i16> @test5(i8 * %addr) { 53; CHECK-LABEL: test5: 54; CHECK: ## BB#0: 55; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 56; CHECK-NEXT: retq 57 %vaddr = bitcast i8* %addr to <32 x i16>* 58 %res = load <32 x i16>, <32 x i16>* %vaddr, align 1 59 ret <32 x i16>%res 60} 61 62define void @test6(i8 * %addr, <32 x i16> %data) { 63; CHECK-LABEL: test6: 64; CHECK: ## BB#0: 65; CHECK-NEXT: vmovdqu16 %zmm0, (%rdi) 66; CHECK-NEXT: retq 67 %vaddr = bitcast i8* %addr to <32 x i16>* 68 store <32 x i16>%data, <32 x i16>* %vaddr, align 1 69 ret void 70} 71 72define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) { 73; CHECK-LABEL: test7: 74; CHECK: ## BB#0: 75; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 76; CHECK-NEXT: vpcmpneqw %zmm2, %zmm1, %k1 77; CHECK-NEXT: vpblendmw (%rdi), %zmm0, %zmm0 {%k1} 78; CHECK-NEXT: retq 79 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 80 %vaddr = bitcast i8* %addr to <32 x i16>* 81 %r = load <32 x i16>, <32 x i16>* %vaddr, align 1 82 %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> %old 83 ret <32 x i16>%res 84} 85 86define <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) { 87; CHECK-LABEL: test8: 88; CHECK: ## BB#0: 89; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 90; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k1 91; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 92; CHECK-NEXT: retq 93 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 94 %vaddr = bitcast i8* %addr to <32 x i16>* 95 %r = load <32 x i16>, <32 x i16>* %vaddr, align 1 96 %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer 97 ret <32 x i16>%res 98} 99 100define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 101; CHECK-LABEL: test_mask_load_16xi8: 102; CHECK: ## BB#0: 103; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 104; CHECK-NEXT: vpmovb2m %zmm0, %k0 105; CHECK-NEXT: kshiftlq $48, %k0, %k0 106; CHECK-NEXT: kshiftrq $48, %k0, %k1 107; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 108; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 109; CHECK-NEXT: retq 110 %res = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef) 111 ret <16 x i8> %res 112} 113declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) 114 115define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 116; CHECK-LABEL: test_mask_load_32xi8: 117; CHECK: ## BB#0: 118; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 119; CHECK-NEXT: vpmovb2m %zmm0, %k0 120; CHECK-NEXT: kshiftlq $32, %k0, %k0 121; CHECK-NEXT: kshiftrq $32, %k0, %k1 122; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 123; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 124; CHECK-NEXT: retq 125 %res = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer) 126 ret <32 x i8> %res 127} 128declare <32 x i8> @llvm.masked.load.v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>) 129 130define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 131; CHECK-LABEL: test_mask_load_8xi16: 132; CHECK: ## BB#0: 133; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 134; CHECK-NEXT: vpmovw2m %zmm0, %k0 135; CHECK-NEXT: kshiftld $24, %k0, %k0 136; CHECK-NEXT: kshiftrd $24, %k0, %k1 137; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 138; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 139; CHECK-NEXT: retq 140 %res = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef) 141 ret <8 x i16> %res 142} 143declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) 144 145define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 146; CHECK-LABEL: test_mask_load_16xi16: 147; CHECK: ## BB#0: 148; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 149; CHECK-NEXT: vpmovb2m %zmm0, %k0 150; CHECK-NEXT: kshiftld $16, %k0, %k0 151; CHECK-NEXT: kshiftrd $16, %k0, %k1 152; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 153; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 154; CHECK-NEXT: retq 155 %res = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer) 156 ret <16 x i16> %res 157} 158declare <16 x i16> @llvm.masked.load.v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>) 159 160define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 161; CHECK-LABEL: test_mask_store_16xi8: 162; CHECK: ## BB#0: 163; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def> 164; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 165; CHECK-NEXT: vpmovb2m %zmm0, %k0 166; CHECK-NEXT: kshiftlq $48, %k0, %k0 167; CHECK-NEXT: kshiftrq $48, %k0, %k1 168; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 169; CHECK-NEXT: retq 170 call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask) 171 ret void 172} 173declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) 174 175define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 176; CHECK-LABEL: test_mask_store_32xi8: 177; CHECK: ## BB#0: 178; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 179; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 180; CHECK-NEXT: vpmovb2m %zmm0, %k0 181; CHECK-NEXT: kshiftlq $32, %k0, %k0 182; CHECK-NEXT: kshiftrq $32, %k0, %k1 183; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 184; CHECK-NEXT: retq 185 call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask) 186 ret void 187} 188declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>) 189 190define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 191; CHECK-LABEL: test_mask_store_8xi16: 192; CHECK: ## BB#0: 193; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def> 194; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 195; CHECK-NEXT: vpmovw2m %zmm0, %k0 196; CHECK-NEXT: kshiftld $24, %k0, %k0 197; CHECK-NEXT: kshiftrd $24, %k0, %k1 198; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 199; CHECK-NEXT: retq 200 call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask) 201 ret void 202} 203declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) 204 205define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 206; CHECK-LABEL: test_mask_store_16xi16: 207; CHECK: ## BB#0: 208; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 209; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 210; CHECK-NEXT: vpmovb2m %zmm0, %k0 211; CHECK-NEXT: kshiftld $16, %k0, %k0 212; CHECK-NEXT: kshiftrd $16, %k0, %k1 213; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 214; CHECK-NEXT: retq 215 call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask) 216 ret void 217} 218declare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>) 219