1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4define <32 x i8> @test_256_1(i8 * %addr) { 5; CHECK-LABEL: test_256_1: 6; CHECK: ## BB#0: 7; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7f,0x28,0x6f,0x07] 8; CHECK-NEXT: retq ## encoding: [0xc3] 9 %vaddr = bitcast i8* %addr to <32 x i8>* 10 %res = load <32 x i8>, <32 x i8>* %vaddr, align 1 11 ret <32 x i8>%res 12} 13 14define void @test_256_2(i8 * %addr, <32 x i8> %data) { 15; CHECK-LABEL: test_256_2: 16; CHECK: ## BB#0: 17; CHECK-NEXT: vmovdqu8 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7f,0x28,0x7f,0x07] 18; CHECK-NEXT: retq ## encoding: [0xc3] 19 %vaddr = bitcast i8* %addr to <32 x i8>* 20 store <32 x i8>%data, <32 x i8>* %vaddr, align 1 21 ret void 22} 23 24define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) { 25; CHECK-LABEL: test_256_3: 26; CHECK: ## BB#0: 27; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 28; CHECK-NEXT: vpcmpneqb %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xca,0x04] 29; CHECK-NEXT: vpblendmb (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x66,0x07] 30; CHECK-NEXT: retq ## encoding: [0xc3] 31 %mask = icmp ne <32 x i8> %mask1, zeroinitializer 32 %vaddr = bitcast i8* %addr to <32 x i8>* 33 %r = load <32 x i8>, <32 x i8>* %vaddr, align 1 34 %res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> %old 35 ret <32 x i8>%res 36} 37 38define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) { 39; CHECK-LABEL: test_256_4: 40; CHECK: ## BB#0: 41; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 42; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc9,0x04] 43; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x07] 44; CHECK-NEXT: retq ## encoding: [0xc3] 45 %mask = icmp ne <32 x i8> %mask1, zeroinitializer 46 %vaddr = bitcast i8* %addr to <32 x i8>* 47 %r = load <32 x i8>, <32 x i8>* %vaddr, align 1 48 %res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> zeroinitializer 49 ret <32 x i8>%res 50} 51 52define <16 x i16> @test_256_5(i8 * %addr) { 53; CHECK-LABEL: test_256_5: 54; CHECK: ## BB#0: 55; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xff,0x28,0x6f,0x07] 56; CHECK-NEXT: retq ## encoding: [0xc3] 57 %vaddr = bitcast i8* %addr to <16 x i16>* 58 %res = load <16 x i16>, <16 x i16>* %vaddr, align 1 59 ret <16 x i16>%res 60} 61 62define void @test_256_6(i8 * %addr, <16 x i16> %data) { 63; CHECK-LABEL: test_256_6: 64; CHECK: ## BB#0: 65; CHECK-NEXT: vmovdqu16 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x28,0x7f,0x07] 66; CHECK-NEXT: retq ## encoding: [0xc3] 67 %vaddr = bitcast i8* %addr to <16 x i16>* 68 store <16 x i16>%data, <16 x i16>* %vaddr, align 1 69 ret void 70} 71 72define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) { 73; CHECK-LABEL: test_256_7: 74; CHECK: ## BB#0: 75; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 76; CHECK-NEXT: vpcmpneqw %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x3f,0xca,0x04] 77; CHECK-NEXT: vpblendmw (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x66,0x07] 78; CHECK-NEXT: retq ## encoding: [0xc3] 79 %mask = icmp ne <16 x i16> %mask1, zeroinitializer 80 %vaddr = bitcast i8* %addr to <16 x i16>* 81 %r = load <16 x i16>, <16 x i16>* %vaddr, align 1 82 %res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> %old 83 ret <16 x i16>%res 84} 85 86define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) { 87; CHECK-LABEL: test_256_8: 88; CHECK: ## BB#0: 89; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 90; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc9,0x04] 91; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x07] 92; CHECK-NEXT: retq ## encoding: [0xc3] 93 %mask = icmp ne <16 x i16> %mask1, zeroinitializer 94 %vaddr = bitcast i8* %addr to <16 x i16>* 95 %r = load <16 x i16>, <16 x i16>* %vaddr, align 1 96 %res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> zeroinitializer 97 ret <16 x i16>%res 98} 99 100define <16 x i8> @test_128_1(i8 * %addr) { 101; CHECK-LABEL: test_128_1: 102; CHECK: ## BB#0: 103; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x07] 104; CHECK-NEXT: retq ## encoding: [0xc3] 105 %vaddr = bitcast i8* %addr to <16 x i8>* 106 %res = load <16 x i8>, <16 x i8>* %vaddr, align 1 107 ret <16 x i8>%res 108} 109 110define void @test_128_2(i8 * %addr, <16 x i8> %data) { 111; CHECK-LABEL: test_128_2: 112; CHECK: ## BB#0: 113; CHECK-NEXT: vmovdqu8 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7f,0x08,0x7f,0x07] 114; CHECK-NEXT: retq ## encoding: [0xc3] 115 %vaddr = bitcast i8* %addr to <16 x i8>* 116 store <16 x i8>%data, <16 x i8>* %vaddr, align 1 117 ret void 118} 119 120define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) { 121; CHECK-LABEL: test_128_3: 122; CHECK: ## BB#0: 123; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 124; CHECK-NEXT: vpcmpneqb %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x3f,0xca,0x04] 125; CHECK-NEXT: vpblendmb (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x66,0x07] 126; CHECK-NEXT: retq ## encoding: [0xc3] 127 %mask = icmp ne <16 x i8> %mask1, zeroinitializer 128 %vaddr = bitcast i8* %addr to <16 x i8>* 129 %r = load <16 x i8>, <16 x i8>* %vaddr, align 1 130 %res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> %old 131 ret <16 x i8>%res 132} 133 134define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) { 135; CHECK-LABEL: test_128_4: 136; CHECK: ## BB#0: 137; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 138; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc9,0x04] 139; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x07] 140; CHECK-NEXT: retq ## encoding: [0xc3] 141 %mask = icmp ne <16 x i8> %mask1, zeroinitializer 142 %vaddr = bitcast i8* %addr to <16 x i8>* 143 %r = load <16 x i8>, <16 x i8>* %vaddr, align 1 144 %res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> zeroinitializer 145 ret <16 x i8>%res 146} 147 148define <8 x i16> @test_128_5(i8 * %addr) { 149; CHECK-LABEL: test_128_5: 150; CHECK: ## BB#0: 151; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x6f,0x07] 152; CHECK-NEXT: retq ## encoding: [0xc3] 153 %vaddr = bitcast i8* %addr to <8 x i16>* 154 %res = load <8 x i16>, <8 x i16>* %vaddr, align 1 155 ret <8 x i16>%res 156} 157 158define void @test_128_6(i8 * %addr, <8 x i16> %data) { 159; CHECK-LABEL: test_128_6: 160; CHECK: ## BB#0: 161; CHECK-NEXT: vmovdqu16 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x08,0x7f,0x07] 162; CHECK-NEXT: retq ## encoding: [0xc3] 163 %vaddr = bitcast i8* %addr to <8 x i16>* 164 store <8 x i16>%data, <8 x i16>* %vaddr, align 1 165 ret void 166} 167 168define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) { 169; CHECK-LABEL: test_128_7: 170; CHECK: ## BB#0: 171; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 172; CHECK-NEXT: vpcmpneqw %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x3f,0xca,0x04] 173; CHECK-NEXT: vpblendmw (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x66,0x07] 174; CHECK-NEXT: retq ## encoding: [0xc3] 175 %mask = icmp ne <8 x i16> %mask1, zeroinitializer 176 %vaddr = bitcast i8* %addr to <8 x i16>* 177 %r = load <8 x i16>, <8 x i16>* %vaddr, align 1 178 %res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> %old 179 ret <8 x i16>%res 180} 181 182define <8 x i16> @test_128_8(i8 * %addr, <8 x i16> %mask1) { 183; CHECK-LABEL: test_128_8: 184; CHECK: ## BB#0: 185; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 186; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x04] 187; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x6f,0x07] 188; CHECK-NEXT: retq ## encoding: [0xc3] 189 %mask = icmp ne <8 x i16> %mask1, zeroinitializer 190 %vaddr = bitcast i8* %addr to <8 x i16>* 191 %r = load <8 x i16>, <8 x i16>* %vaddr, align 1 192 %res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> zeroinitializer 193 ret <8 x i16>%res 194} 195 196