1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s 3 4; Skylake-avx512 target supports masked load/store for i8 and i16 vectors 5 6define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 7; CHECK-LABEL: test_mask_load_16xi8: 8; CHECK: ## %bb.0: 9; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 10; CHECK-NEXT: vpmovb2m %xmm0, %k1 11; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} {z} 12; CHECK-NEXT: retq 13 %res = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef) 14 ret <16 x i8> %res 15} 16declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) 17 18define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 19; CHECK-LABEL: test_mask_load_32xi8: 20; CHECK: ## %bb.0: 21; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 22; CHECK-NEXT: vpmovb2m %ymm0, %k1 23; CHECK-NEXT: vpblendmb (%rdi), %ymm1, %ymm0 {%k1} 24; CHECK-NEXT: retq 25 %res = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> %val) 26 ret <32 x i8> %res 27} 28declare <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>) 29 30define <64 x i8> @test_mask_load_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) { 31; CHECK-LABEL: test_mask_load_64xi8: 32; CHECK: ## %bb.0: 33; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0 34; CHECK-NEXT: vpmovb2m %zmm0, %k1 35; CHECK-NEXT: vpblendmb (%rdi), %zmm1, %zmm0 {%k1} 36; CHECK-NEXT: retq 37 %res = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* %addr, i32 4, <64 x i1>%mask, <64 x i8> %val) 38 ret <64 x i8> %res 39} 40declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>) 41 42define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 43; CHECK-LABEL: test_mask_load_8xi16: 44; CHECK: ## %bb.0: 45; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 46; CHECK-NEXT: vpmovw2m %xmm0, %k1 47; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} {z} 48; CHECK-NEXT: retq 49 %res = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef) 50 ret <8 x i16> %res 51} 52declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) 53 54define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 55; CHECK-LABEL: test_mask_load_16xi16: 56; CHECK: ## %bb.0: 57; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 58; CHECK-NEXT: vpmovb2m %xmm0, %k1 59; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z} 60; CHECK-NEXT: retq 61 %res = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer) 62 ret <16 x i16> %res 63} 64declare <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>) 65 66define <32 x i16> @test_mask_load_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) { 67; CHECK-LABEL: test_mask_load_32xi16: 68; CHECK: ## %bb.0: 69; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 70; CHECK-NEXT: vpmovb2m %ymm0, %k1 71; CHECK-NEXT: vpblendmw (%rdi), %zmm1, %zmm0 {%k1} 72; CHECK-NEXT: retq 73 %res = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* %addr, i32 4, <32 x i1>%mask, <32 x i16> %val) 74 ret <32 x i16> %res 75} 76declare <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>) 77 78define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { 79; CHECK-LABEL: test_mask_store_16xi8: 80; CHECK: ## %bb.0: 81; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 82; CHECK-NEXT: vpmovb2m %xmm0, %k1 83; CHECK-NEXT: vmovdqu8 %xmm1, (%rdi) {%k1} 84; CHECK-NEXT: retq 85 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask) 86 ret void 87} 88declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) 89 90define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { 91; CHECK-LABEL: test_mask_store_32xi8: 92; CHECK: ## %bb.0: 93; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 94; CHECK-NEXT: vpmovb2m %ymm0, %k1 95; CHECK-NEXT: vmovdqu8 %ymm1, (%rdi) {%k1} 96; CHECK-NEXT: vzeroupper 97; CHECK-NEXT: retq 98 call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask) 99 ret void 100} 101declare void @llvm.masked.store.v32i8.p0v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>) 102 103define void @test_mask_store_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) { 104; CHECK-LABEL: test_mask_store_64xi8: 105; CHECK: ## %bb.0: 106; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0 107; CHECK-NEXT: vpmovb2m %zmm0, %k1 108; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 109; CHECK-NEXT: vzeroupper 110; CHECK-NEXT: retq 111 call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> %val, <64 x i8>* %addr, i32 4, <64 x i1>%mask) 112 ret void 113} 114declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>) 115 116define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { 117; CHECK-LABEL: test_mask_store_8xi16: 118; CHECK: ## %bb.0: 119; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 120; CHECK-NEXT: vpmovw2m %xmm0, %k1 121; CHECK-NEXT: vmovdqu16 %xmm1, (%rdi) {%k1} 122; CHECK-NEXT: retq 123 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask) 124 ret void 125} 126declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) 127 128define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { 129; CHECK-LABEL: test_mask_store_16xi16: 130; CHECK: ## %bb.0: 131; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 132; CHECK-NEXT: vpmovb2m %xmm0, %k1 133; CHECK-NEXT: vmovdqu16 %ymm1, (%rdi) {%k1} 134; CHECK-NEXT: vzeroupper 135; CHECK-NEXT: retq 136 call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask) 137 ret void 138} 139declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>) 140 141define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) { 142; CHECK-LABEL: test_mask_store_32xi16: 143; CHECK: ## %bb.0: 144; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 145; CHECK-NEXT: vpmovb2m %ymm0, %k1 146; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 147; CHECK-NEXT: vzeroupper 148; CHECK-NEXT: retq 149 call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> %val, <32 x i16>* %addr, i32 4, <32 x i1>%mask) 150 ret void 151} 152 153declare void @llvm.masked.store.v32i16.p0v32i16(<32 x i16>, <32 x i16>*, i32, <32 x i1>) 154