1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c 5 6 7; 8; Signed Saturation 9; 10 11define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 12; CHECK-LABEL: test_mask_adds_epi16_rr_128: 13; CHECK: ## %bb.0: 14; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] 15; CHECK-NEXT: retq ## encoding: [0xc3] 16 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 17 ret <8 x i16> %1 18} 19declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) 20 21define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 22; CHECK-LABEL: test_mask_adds_epi16_rrk_128: 23; CHECK: ## %bb.0: 24; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 25; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 26; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 27; CHECK-NEXT: retq ## encoding: [0xc3] 28 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 29 %2 = bitcast i8 %mask to <8 x i1> 30 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 31 ret <8 x i16> %3 32} 33 34define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 35; CHECK-LABEL: test_mask_adds_epi16_rrkz_128: 36; CHECK: ## %bb.0: 37; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 38; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 39; CHECK-NEXT: retq ## encoding: [0xc3] 40 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 41 %2 = bitcast i8 %mask to <8 x i1> 42 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 43 ret <8 x i16> %3 44} 45 46define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 47; CHECK-LABEL: test_mask_adds_epi16_rm_128: 48; CHECK: ## %bb.0: 49; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x07] 50; CHECK-NEXT: retq ## encoding: [0xc3] 51 %b = load <8 x i16>, <8 x i16>* %ptr_b 52 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 53 ret <8 x i16> %1 54} 55 56define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 57; CHECK-LABEL: test_mask_adds_epi16_rmk_128: 58; CHECK: ## %bb.0: 59; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 60; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] 61; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 62; CHECK-NEXT: retq ## encoding: [0xc3] 63 %b = load <8 x i16>, <8 x i16>* %ptr_b 64 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 65 %2 = bitcast i8 %mask to <8 x i1> 66 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 67 ret <8 x i16> %3 68} 69 70define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 71; CHECK-LABEL: test_mask_adds_epi16_rmkz_128: 72; CHECK: ## %bb.0: 73; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 74; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07] 75; CHECK-NEXT: retq ## encoding: [0xc3] 76 %b = load <8 x i16>, <8 x i16>* %ptr_b 77 %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 78 %2 = bitcast i8 %mask to <8 x i1> 79 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 80 ret <8 x i16> %3 81} 82 83define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 84; CHECK-LABEL: test_mask_adds_epi16_rr_256: 85; CHECK: ## %bb.0: 86; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1] 87; CHECK-NEXT: retq ## encoding: [0xc3] 88 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 89 ret <16 x i16> %1 90} 91declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) 92 93define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 94; CHECK-LABEL: test_mask_adds_epi16_rrk_256: 95; CHECK: ## %bb.0: 96; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 97; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 98; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 99; CHECK-NEXT: retq ## encoding: [0xc3] 100 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 101 %2 = bitcast i16 %mask to <16 x i1> 102 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 103 ret <16 x i16> %3 104} 105 106define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 107; CHECK-LABEL: test_mask_adds_epi16_rrkz_256: 108; CHECK: ## %bb.0: 109; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 110; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 111; CHECK-NEXT: retq ## encoding: [0xc3] 112 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 113 %2 = bitcast i16 %mask to <16 x i1> 114 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 115 ret <16 x i16> %3 116} 117 118define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 119; CHECK-LABEL: test_mask_adds_epi16_rm_256: 120; CHECK: ## %bb.0: 121; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x07] 122; CHECK-NEXT: retq ## encoding: [0xc3] 123 %b = load <16 x i16>, <16 x i16>* %ptr_b 124 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 125 ret <16 x i16> %1 126} 127 128define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 129; CHECK-LABEL: test_mask_adds_epi16_rmk_256: 130; CHECK: ## %bb.0: 131; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 132; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] 133; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 134; CHECK-NEXT: retq ## encoding: [0xc3] 135 %b = load <16 x i16>, <16 x i16>* %ptr_b 136 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 137 %2 = bitcast i16 %mask to <16 x i1> 138 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 139 ret <16 x i16> %3 140} 141 142define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 143; CHECK-LABEL: test_mask_adds_epi16_rmkz_256: 144; CHECK: ## %bb.0: 145; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 146; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07] 147; CHECK-NEXT: retq ## encoding: [0xc3] 148 %b = load <16 x i16>, <16 x i16>* %ptr_b 149 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 150 %2 = bitcast i16 %mask to <16 x i1> 151 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 152 ret <16 x i16> %3 153} 154 155define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 156; CHECK-LABEL: test_mask_subs_epi16_rr_128: 157; CHECK: ## %bb.0: 158; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] 159; CHECK-NEXT: retq ## encoding: [0xc3] 160 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 161 ret <8 x i16> %sub 162} 163declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) 164 165define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 166; CHECK-LABEL: test_mask_subs_epi16_rrk_128: 167; CHECK: ## %bb.0: 168; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 169; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 170; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 171; CHECK-NEXT: retq ## encoding: [0xc3] 172 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 173 %bc = bitcast i8 %mask to <8 x i1> 174 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru 175 ret <8 x i16> %res 176} 177 178define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 179; CHECK-LABEL: test_mask_subs_epi16_rrkz_128: 180; CHECK: ## %bb.0: 181; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 182; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 183; CHECK-NEXT: retq ## encoding: [0xc3] 184 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 185 %bc = bitcast i8 %mask to <8 x i1> 186 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer 187 ret <8 x i16> %res 188} 189 190define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 191; CHECK-LABEL: test_mask_subs_epi16_rm_128: 192; CHECK: ## %bb.0: 193; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x07] 194; CHECK-NEXT: retq ## encoding: [0xc3] 195 %b = load <8 x i16>, <8 x i16>* %ptr_b 196 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 197 ret <8 x i16> %sub 198} 199 200define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 201; CHECK-LABEL: test_mask_subs_epi16_rmk_128: 202; CHECK: ## %bb.0: 203; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 204; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] 205; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 206; CHECK-NEXT: retq ## encoding: [0xc3] 207 %b = load <8 x i16>, <8 x i16>* %ptr_b 208 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 209 %bc = bitcast i8 %mask to <8 x i1> 210 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru 211 ret <8 x i16> %res 212} 213 214define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 215; CHECK-LABEL: test_mask_subs_epi16_rmkz_128: 216; CHECK: ## %bb.0: 217; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 218; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07] 219; CHECK-NEXT: retq ## encoding: [0xc3] 220 %b = load <8 x i16>, <8 x i16>* %ptr_b 221 %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 222 %bc = bitcast i8 %mask to <8 x i1> 223 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer 224 ret <8 x i16> %res 225} 226 227define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 228; CHECK-LABEL: test_mask_subs_epi16_rr_256: 229; CHECK: ## %bb.0: 230; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1] 231; CHECK-NEXT: retq ## encoding: [0xc3] 232 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 233 ret <16 x i16> %sub 234} 235declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) 236 237define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 238; CHECK-LABEL: test_mask_subs_epi16_rrk_256: 239; CHECK: ## %bb.0: 240; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 241; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 242; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 243; CHECK-NEXT: retq ## encoding: [0xc3] 244 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 245 %bc = bitcast i16 %mask to <16 x i1> 246 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru 247 ret <16 x i16> %res 248} 249 250define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 251; CHECK-LABEL: test_mask_subs_epi16_rrkz_256: 252; CHECK: ## %bb.0: 253; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 254; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 255; CHECK-NEXT: retq ## encoding: [0xc3] 256 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 257 %bc = bitcast i16 %mask to <16 x i1> 258 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer 259 ret <16 x i16> %res 260} 261 262define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 263; CHECK-LABEL: test_mask_subs_epi16_rm_256: 264; CHECK: ## %bb.0: 265; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x07] 266; CHECK-NEXT: retq ## encoding: [0xc3] 267 %b = load <16 x i16>, <16 x i16>* %ptr_b 268 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 269 ret <16 x i16> %sub 270} 271 272define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 273; CHECK-LABEL: test_mask_subs_epi16_rmk_256: 274; CHECK: ## %bb.0: 275; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 276; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] 277; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 278; CHECK-NEXT: retq ## encoding: [0xc3] 279 %b = load <16 x i16>, <16 x i16>* %ptr_b 280 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 281 %bc = bitcast i16 %mask to <16 x i1> 282 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru 283 ret <16 x i16> %res 284} 285 286define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 287; CHECK-LABEL: test_mask_subs_epi16_rmkz_256: 288; CHECK: ## %bb.0: 289; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 290; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07] 291; CHECK-NEXT: retq ## encoding: [0xc3] 292 %b = load <16 x i16>, <16 x i16>* %ptr_b 293 %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 294 %bc = bitcast i16 %mask to <16 x i1> 295 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer 296 ret <16 x i16> %res 297} 298 299define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 300; CHECK-LABEL: test_mask_adds_epi8_rr_128: 301; CHECK: ## %bb.0: 302; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] 303; CHECK-NEXT: retq ## encoding: [0xc3] 304 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 305 ret <16 x i8> %1 306} 307declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) 308 309define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 310; CHECK-LABEL: test_mask_adds_epi8_rrk_128: 311; CHECK: ## %bb.0: 312; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 313; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 314; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 315; CHECK-NEXT: retq ## encoding: [0xc3] 316 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 317 %2 = bitcast i16 %mask to <16 x i1> 318 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 319 ret <16 x i8> %3 320} 321 322define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 323; CHECK-LABEL: test_mask_adds_epi8_rrkz_128: 324; CHECK: ## %bb.0: 325; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 326; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 327; CHECK-NEXT: retq ## encoding: [0xc3] 328 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 329 %2 = bitcast i16 %mask to <16 x i1> 330 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 331 ret <16 x i8> %3 332} 333 334define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 335; CHECK-LABEL: test_mask_adds_epi8_rm_128: 336; CHECK: ## %bb.0: 337; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07] 338; CHECK-NEXT: retq ## encoding: [0xc3] 339 %b = load <16 x i8>, <16 x i8>* %ptr_b 340 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 341 ret <16 x i8> %1 342} 343 344define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 345; CHECK-LABEL: test_mask_adds_epi8_rmk_128: 346; CHECK: ## %bb.0: 347; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 348; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] 349; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 350; CHECK-NEXT: retq ## encoding: [0xc3] 351 %b = load <16 x i8>, <16 x i8>* %ptr_b 352 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 353 %2 = bitcast i16 %mask to <16 x i1> 354 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 355 ret <16 x i8> %3 356} 357 358define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 359; CHECK-LABEL: test_mask_adds_epi8_rmkz_128: 360; CHECK: ## %bb.0: 361; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 362; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07] 363; CHECK-NEXT: retq ## encoding: [0xc3] 364 %b = load <16 x i8>, <16 x i8>* %ptr_b 365 %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 366 %2 = bitcast i16 %mask to <16 x i1> 367 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 368 ret <16 x i8> %3 369} 370 371define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 372; CHECK-LABEL: test_mask_adds_epi8_rr_256: 373; CHECK: ## %bb.0: 374; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1] 375; CHECK-NEXT: retq ## encoding: [0xc3] 376 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 377 ret <32 x i8> %1 378} 379declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) 380 381define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 382; CHECK-LABEL: test_mask_adds_epi8_rrk_256: 383; CHECK: ## %bb.0: 384; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 385; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 386; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 387; CHECK-NEXT: retq ## encoding: [0xc3] 388 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 389 %2 = bitcast i32 %mask to <32 x i1> 390 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 391 ret <32 x i8> %3 392} 393 394define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 395; CHECK-LABEL: test_mask_adds_epi8_rrkz_256: 396; CHECK: ## %bb.0: 397; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 398; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 399; CHECK-NEXT: retq ## encoding: [0xc3] 400 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 401 %2 = bitcast i32 %mask to <32 x i1> 402 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 403 ret <32 x i8> %3 404} 405 406define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 407; CHECK-LABEL: test_mask_adds_epi8_rm_256: 408; CHECK: ## %bb.0: 409; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x07] 410; CHECK-NEXT: retq ## encoding: [0xc3] 411 %b = load <32 x i8>, <32 x i8>* %ptr_b 412 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 413 ret <32 x i8> %1 414} 415 416define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 417; CHECK-LABEL: test_mask_adds_epi8_rmk_256: 418; CHECK: ## %bb.0: 419; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 420; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f] 421; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 422; CHECK-NEXT: retq ## encoding: [0xc3] 423 %b = load <32 x i8>, <32 x i8>* %ptr_b 424 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 425 %2 = bitcast i32 %mask to <32 x i1> 426 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 427 ret <32 x i8> %3 428} 429 430define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 431; CHECK-LABEL: test_mask_adds_epi8_rmkz_256: 432; CHECK: ## %bb.0: 433; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 434; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07] 435; CHECK-NEXT: retq ## encoding: [0xc3] 436 %b = load <32 x i8>, <32 x i8>* %ptr_b 437 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 438 %2 = bitcast i32 %mask to <32 x i1> 439 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 440 ret <32 x i8> %3 441} 442 443define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 444; CHECK-LABEL: test_mask_subs_epi8_rr_128: 445; CHECK: ## %bb.0: 446; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] 447; CHECK-NEXT: retq ## encoding: [0xc3] 448 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 449 ret <16 x i8> %sub 450} 451declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) 452 453define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 454; CHECK-LABEL: test_mask_subs_epi8_rrk_128: 455; CHECK: ## %bb.0: 456; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 457; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 458; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 459; CHECK-NEXT: retq ## encoding: [0xc3] 460 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 461 %bc = bitcast i16 %mask to <16 x i1> 462 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru 463 ret <16 x i8> %res 464} 465 466define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 467; CHECK-LABEL: test_mask_subs_epi8_rrkz_128: 468; CHECK: ## %bb.0: 469; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 470; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 471; CHECK-NEXT: retq ## encoding: [0xc3] 472 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 473 %bc = bitcast i16 %mask to <16 x i1> 474 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer 475 ret <16 x i8> %res 476} 477 478define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 479; CHECK-LABEL: test_mask_subs_epi8_rm_128: 480; CHECK: ## %bb.0: 481; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x07] 482; CHECK-NEXT: retq ## encoding: [0xc3] 483 %b = load <16 x i8>, <16 x i8>* %ptr_b 484 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 485 ret <16 x i8> %sub 486} 487 488define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 489; CHECK-LABEL: test_mask_subs_epi8_rmk_128: 490; CHECK: ## %bb.0: 491; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 492; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] 493; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 494; CHECK-NEXT: retq ## encoding: [0xc3] 495 %b = load <16 x i8>, <16 x i8>* %ptr_b 496 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 497 %bc = bitcast i16 %mask to <16 x i1> 498 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru 499 ret <16 x i8> %res 500} 501 502define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 503; CHECK-LABEL: test_mask_subs_epi8_rmkz_128: 504; CHECK: ## %bb.0: 505; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 506; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07] 507; CHECK-NEXT: retq ## encoding: [0xc3] 508 %b = load <16 x i8>, <16 x i8>* %ptr_b 509 %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 510 %bc = bitcast i16 %mask to <16 x i1> 511 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer 512 ret <16 x i8> %res 513} 514 515define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 516; CHECK-LABEL: test_mask_subs_epi8_rr_256: 517; CHECK: ## %bb.0: 518; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1] 519; CHECK-NEXT: retq ## encoding: [0xc3] 520 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 521 ret <32 x i8> %sub 522} 523declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) 524 525define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 526; CHECK-LABEL: test_mask_subs_epi8_rrk_256: 527; CHECK: ## %bb.0: 528; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 529; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 530; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 531; CHECK-NEXT: retq ## encoding: [0xc3] 532 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 533 %bc = bitcast i32 %mask to <32 x i1> 534 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru 535 ret <32 x i8> %res 536} 537 538define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 539; CHECK-LABEL: test_mask_subs_epi8_rrkz_256: 540; CHECK: ## %bb.0: 541; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 542; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 543; CHECK-NEXT: retq ## encoding: [0xc3] 544 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 545 %bc = bitcast i32 %mask to <32 x i1> 546 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer 547 ret <32 x i8> %res 548} 549 550define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 551; CHECK-LABEL: test_mask_subs_epi8_rm_256: 552; CHECK: ## %bb.0: 553; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x07] 554; CHECK-NEXT: retq ## encoding: [0xc3] 555 %b = load <32 x i8>, <32 x i8>* %ptr_b 556 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 557 ret <32 x i8> %sub 558} 559 560define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 561; CHECK-LABEL: test_mask_subs_epi8_rmk_256: 562; CHECK: ## %bb.0: 563; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 564; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f] 565; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 566; CHECK-NEXT: retq ## encoding: [0xc3] 567 %b = load <32 x i8>, <32 x i8>* %ptr_b 568 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 569 %bc = bitcast i32 %mask to <32 x i1> 570 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru 571 ret <32 x i8> %res 572} 573 574define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 575; CHECK-LABEL: test_mask_subs_epi8_rmkz_256: 576; CHECK: ## %bb.0: 577; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 578; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07] 579; CHECK-NEXT: retq ## encoding: [0xc3] 580 %b = load <32 x i8>, <32 x i8>* %ptr_b 581 %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 582 %bc = bitcast i32 %mask to <32 x i1> 583 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer 584 ret <32 x i8> %res 585} 586 587; 588; Unsigned Saturation 589; 590 591define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 592; CHECK-LABEL: test_mask_adds_epu16_rr_128: 593; CHECK: ## %bb.0: 594; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] 595; CHECK-NEXT: retq ## encoding: [0xc3] 596 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 597 ret <8 x i16> %1 598} 599declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) 600 601define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 602; CHECK-LABEL: test_mask_adds_epu16_rrk_128: 603; CHECK: ## %bb.0: 604; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 605; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1] 606; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 607; CHECK-NEXT: retq ## encoding: [0xc3] 608 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 609 %2 = bitcast i8 %mask to <8 x i1> 610 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 611 ret <8 x i16> %3 612} 613 614define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 615; CHECK-LABEL: test_mask_adds_epu16_rrkz_128: 616; CHECK: ## %bb.0: 617; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 618; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1] 619; CHECK-NEXT: retq ## encoding: [0xc3] 620 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 621 %2 = bitcast i8 %mask to <8 x i1> 622 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 623 ret <8 x i16> %3 624} 625 626define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 627; CHECK-LABEL: test_mask_adds_epu16_rm_128: 628; CHECK: ## %bb.0: 629; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x07] 630; CHECK-NEXT: retq ## encoding: [0xc3] 631 %b = load <8 x i16>, <8 x i16>* %ptr_b 632 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 633 ret <8 x i16> %1 634} 635 636define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 637; CHECK-LABEL: test_mask_adds_epu16_rmk_128: 638; CHECK: ## %bb.0: 639; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 640; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f] 641; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 642; CHECK-NEXT: retq ## encoding: [0xc3] 643 %b = load <8 x i16>, <8 x i16>* %ptr_b 644 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 645 %2 = bitcast i8 %mask to <8 x i1> 646 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 647 ret <8 x i16> %3 648} 649 650define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 651; CHECK-LABEL: test_mask_adds_epu16_rmkz_128: 652; CHECK: ## %bb.0: 653; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 654; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07] 655; CHECK-NEXT: retq ## encoding: [0xc3] 656 %b = load <8 x i16>, <8 x i16>* %ptr_b 657 %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 658 %2 = bitcast i8 %mask to <8 x i1> 659 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 660 ret <8 x i16> %3 661} 662 663define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 664; CHECK-LABEL: test_mask_adds_epu16_rr_256: 665; CHECK: ## %bb.0: 666; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1] 667; CHECK-NEXT: retq ## encoding: [0xc3] 668 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 669 ret <16 x i16> %1 670} 671declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>) 672 673define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 674; CHECK-LABEL: test_mask_adds_epu16_rrk_256: 675; CHECK: ## %bb.0: 676; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 677; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1] 678; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 679; CHECK-NEXT: retq ## encoding: [0xc3] 680 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 681 %2 = bitcast i16 %mask to <16 x i1> 682 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 683 ret <16 x i16> %3 684} 685 686define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 687; CHECK-LABEL: test_mask_adds_epu16_rrkz_256: 688; CHECK: ## %bb.0: 689; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 690; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1] 691; CHECK-NEXT: retq ## encoding: [0xc3] 692 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 693 %2 = bitcast i16 %mask to <16 x i1> 694 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 695 ret <16 x i16> %3 696} 697 698define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 699; CHECK-LABEL: test_mask_adds_epu16_rm_256: 700; CHECK: ## %bb.0: 701; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x07] 702; CHECK-NEXT: retq ## encoding: [0xc3] 703 %b = load <16 x i16>, <16 x i16>* %ptr_b 704 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 705 ret <16 x i16> %1 706} 707 708define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 709; CHECK-LABEL: test_mask_adds_epu16_rmk_256: 710; CHECK: ## %bb.0: 711; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 712; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f] 713; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 714; CHECK-NEXT: retq ## encoding: [0xc3] 715 %b = load <16 x i16>, <16 x i16>* %ptr_b 716 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 717 %2 = bitcast i16 %mask to <16 x i1> 718 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 719 ret <16 x i16> %3 720} 721 722define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 723; CHECK-LABEL: test_mask_adds_epu16_rmkz_256: 724; CHECK: ## %bb.0: 725; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 726; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07] 727; CHECK-NEXT: retq ## encoding: [0xc3] 728 %b = load <16 x i16>, <16 x i16>* %ptr_b 729 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 730 %2 = bitcast i16 %mask to <16 x i1> 731 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 732 ret <16 x i16> %3 733} 734 735define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 736; CHECK-LABEL: test_mask_subs_epu16_rr_128: 737; CHECK: ## %bb.0: 738; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] 739; CHECK-NEXT: retq ## encoding: [0xc3] 740 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 741 ret <8 x i16> %sub 742} 743declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) 744 745define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 746; CHECK-LABEL: test_mask_subs_epu16_rrk_128: 747; CHECK: ## %bb.0: 748; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 749; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1] 750; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 751; CHECK-NEXT: retq ## encoding: [0xc3] 752 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 753 %bc = bitcast i8 %mask to <8 x i1> 754 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru 755 ret <8 x i16> %res 756} 757 758define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 759; CHECK-LABEL: test_mask_subs_epu16_rrkz_128: 760; CHECK: ## %bb.0: 761; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 762; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1] 763; CHECK-NEXT: retq ## encoding: [0xc3] 764 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 765 %bc = bitcast i8 %mask to <8 x i1> 766 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer 767 ret <8 x i16> %res 768} 769 770define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 771; CHECK-LABEL: test_mask_subs_epu16_rm_128: 772; CHECK: ## %bb.0: 773; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x07] 774; CHECK-NEXT: retq ## encoding: [0xc3] 775 %b = load <8 x i16>, <8 x i16>* %ptr_b 776 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 777 ret <8 x i16> %sub 778} 779 780define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 781; CHECK-LABEL: test_mask_subs_epu16_rmk_128: 782; CHECK: ## %bb.0: 783; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 784; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f] 785; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 786; CHECK-NEXT: retq ## encoding: [0xc3] 787 %b = load <8 x i16>, <8 x i16>* %ptr_b 788 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 789 %bc = bitcast i8 %mask to <8 x i1> 790 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru 791 ret <8 x i16> %res 792} 793 794define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 795; CHECK-LABEL: test_mask_subs_epu16_rmkz_128: 796; CHECK: ## %bb.0: 797; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 798; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07] 799; CHECK-NEXT: retq ## encoding: [0xc3] 800 %b = load <8 x i16>, <8 x i16>* %ptr_b 801 %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) 802 %bc = bitcast i8 %mask to <8 x i1> 803 %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer 804 ret <8 x i16> %res 805} 806 807define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 808; CHECK-LABEL: test_mask_subs_epu16_rr_256: 809; CHECK: ## %bb.0: 810; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1] 811; CHECK-NEXT: retq ## encoding: [0xc3] 812 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 813 ret <16 x i16> %sub 814} 815declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>) 816 817define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 818; CHECK-LABEL: test_mask_subs_epu16_rrk_256: 819; CHECK: ## %bb.0: 820; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 821; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1] 822; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 823; CHECK-NEXT: retq ## encoding: [0xc3] 824 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 825 %bc = bitcast i16 %mask to <16 x i1> 826 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru 827 ret <16 x i16> %res 828} 829 830define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 831; CHECK-LABEL: test_mask_subs_epu16_rrkz_256: 832; CHECK: ## %bb.0: 833; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 834; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1] 835; CHECK-NEXT: retq ## encoding: [0xc3] 836 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 837 %bc = bitcast i16 %mask to <16 x i1> 838 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer 839 ret <16 x i16> %res 840} 841 842define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 843; CHECK-LABEL: test_mask_subs_epu16_rm_256: 844; CHECK: ## %bb.0: 845; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x07] 846; CHECK-NEXT: retq ## encoding: [0xc3] 847 %b = load <16 x i16>, <16 x i16>* %ptr_b 848 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 849 ret <16 x i16> %sub 850} 851 852define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 853; CHECK-LABEL: test_mask_subs_epu16_rmk_256: 854; CHECK: ## %bb.0: 855; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 856; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f] 857; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 858; CHECK-NEXT: retq ## encoding: [0xc3] 859 %b = load <16 x i16>, <16 x i16>* %ptr_b 860 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 861 %bc = bitcast i16 %mask to <16 x i1> 862 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru 863 ret <16 x i16> %res 864} 865 866define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 867; CHECK-LABEL: test_mask_subs_epu16_rmkz_256: 868; CHECK: ## %bb.0: 869; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 870; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07] 871; CHECK-NEXT: retq ## encoding: [0xc3] 872 %b = load <16 x i16>, <16 x i16>* %ptr_b 873 %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b) 874 %bc = bitcast i16 %mask to <16 x i1> 875 %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer 876 ret <16 x i16> %res 877} 878 879define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 880; CHECK-LABEL: test_mask_adds_epu8_rr_128: 881; CHECK: ## %bb.0: 882; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] 883; CHECK-NEXT: retq ## encoding: [0xc3] 884 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 885 ret <16 x i8> %1 886} 887declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) 888 889define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 890; CHECK-LABEL: test_mask_adds_epu8_rrk_128: 891; CHECK: ## %bb.0: 892; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 893; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1] 894; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 895; CHECK-NEXT: retq ## encoding: [0xc3] 896 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 897 %2 = bitcast i16 %mask to <16 x i1> 898 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 899 ret <16 x i8> %3 900} 901 902define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 903; CHECK-LABEL: test_mask_adds_epu8_rrkz_128: 904; CHECK: ## %bb.0: 905; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 906; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1] 907; CHECK-NEXT: retq ## encoding: [0xc3] 908 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 909 %2 = bitcast i16 %mask to <16 x i1> 910 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 911 ret <16 x i8> %3 912} 913 914define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 915; CHECK-LABEL: test_mask_adds_epu8_rm_128: 916; CHECK: ## %bb.0: 917; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x07] 918; CHECK-NEXT: retq ## encoding: [0xc3] 919 %b = load <16 x i8>, <16 x i8>* %ptr_b 920 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 921 ret <16 x i8> %1 922} 923 924define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 925; CHECK-LABEL: test_mask_adds_epu8_rmk_128: 926; CHECK: ## %bb.0: 927; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 928; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f] 929; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 930; CHECK-NEXT: retq ## encoding: [0xc3] 931 %b = load <16 x i8>, <16 x i8>* %ptr_b 932 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 933 %2 = bitcast i16 %mask to <16 x i1> 934 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 935 ret <16 x i8> %3 936} 937 938define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 939; CHECK-LABEL: test_mask_adds_epu8_rmkz_128: 940; CHECK: ## %bb.0: 941; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 942; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07] 943; CHECK-NEXT: retq ## encoding: [0xc3] 944 %b = load <16 x i8>, <16 x i8>* %ptr_b 945 %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 946 %2 = bitcast i16 %mask to <16 x i1> 947 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 948 ret <16 x i8> %3 949} 950 951define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 952; CHECK-LABEL: test_mask_adds_epu8_rr_256: 953; CHECK: ## %bb.0: 954; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1] 955; CHECK-NEXT: retq ## encoding: [0xc3] 956 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 957 ret <32 x i8> %1 958} 959declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>) 960 961define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 962; CHECK-LABEL: test_mask_adds_epu8_rrk_256: 963; CHECK: ## %bb.0: 964; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 965; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1] 966; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 967; CHECK-NEXT: retq ## encoding: [0xc3] 968 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 969 %2 = bitcast i32 %mask to <32 x i1> 970 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 971 ret <32 x i8> %3 972} 973 974define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 975; CHECK-LABEL: test_mask_adds_epu8_rrkz_256: 976; CHECK: ## %bb.0: 977; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 978; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1] 979; CHECK-NEXT: retq ## encoding: [0xc3] 980 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 981 %2 = bitcast i32 %mask to <32 x i1> 982 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 983 ret <32 x i8> %3 984} 985 986define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 987; CHECK-LABEL: test_mask_adds_epu8_rm_256: 988; CHECK: ## %bb.0: 989; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x07] 990; CHECK-NEXT: retq ## encoding: [0xc3] 991 %b = load <32 x i8>, <32 x i8>* %ptr_b 992 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 993 ret <32 x i8> %1 994} 995 996define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 997; CHECK-LABEL: test_mask_adds_epu8_rmk_256: 998; CHECK: ## %bb.0: 999; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 1000; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f] 1001; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1002; CHECK-NEXT: retq ## encoding: [0xc3] 1003 %b = load <32 x i8>, <32 x i8>* %ptr_b 1004 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 1005 %2 = bitcast i32 %mask to <32 x i1> 1006 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 1007 ret <32 x i8> %3 1008} 1009 1010define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 1011; CHECK-LABEL: test_mask_adds_epu8_rmkz_256: 1012; CHECK: ## %bb.0: 1013; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 1014; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07] 1015; CHECK-NEXT: retq ## encoding: [0xc3] 1016 %b = load <32 x i8>, <32 x i8>* %ptr_b 1017 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 1018 %2 = bitcast i32 %mask to <32 x i1> 1019 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 1020 ret <32 x i8> %3 1021} 1022 1023define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 1024; CHECK-LABEL: test_mask_subs_epu8_rr_128: 1025; CHECK: ## %bb.0: 1026; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] 1027; CHECK-NEXT: retq ## encoding: [0xc3] 1028 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 1029 ret <16 x i8> %sub 1030} 1031declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) 1032 1033define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 1034; CHECK-LABEL: test_mask_subs_epu8_rrk_128: 1035; CHECK: ## %bb.0: 1036; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 1037; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1] 1038; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1039; CHECK-NEXT: retq ## encoding: [0xc3] 1040 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 1041 %bc = bitcast i16 %mask to <16 x i1> 1042 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru 1043 ret <16 x i8> %res 1044} 1045 1046define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 1047; CHECK-LABEL: test_mask_subs_epu8_rrkz_128: 1048; CHECK: ## %bb.0: 1049; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 1050; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1] 1051; CHECK-NEXT: retq ## encoding: [0xc3] 1052 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 1053 %bc = bitcast i16 %mask to <16 x i1> 1054 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer 1055 ret <16 x i8> %res 1056} 1057 1058define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 1059; CHECK-LABEL: test_mask_subs_epu8_rm_128: 1060; CHECK: ## %bb.0: 1061; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x07] 1062; CHECK-NEXT: retq ## encoding: [0xc3] 1063 %b = load <16 x i8>, <16 x i8>* %ptr_b 1064 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 1065 ret <16 x i8> %sub 1066} 1067 1068define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 1069; CHECK-LABEL: test_mask_subs_epu8_rmk_128: 1070; CHECK: ## %bb.0: 1071; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 1072; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f] 1073; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1074; CHECK-NEXT: retq ## encoding: [0xc3] 1075 %b = load <16 x i8>, <16 x i8>* %ptr_b 1076 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 1077 %bc = bitcast i16 %mask to <16 x i1> 1078 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru 1079 ret <16 x i8> %res 1080} 1081 1082define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 1083; CHECK-LABEL: test_mask_subs_epu8_rmkz_128: 1084; CHECK: ## %bb.0: 1085; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 1086; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07] 1087; CHECK-NEXT: retq ## encoding: [0xc3] 1088 %b = load <16 x i8>, <16 x i8>* %ptr_b 1089 %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) 1090 %bc = bitcast i16 %mask to <16 x i1> 1091 %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer 1092 ret <16 x i8> %res 1093} 1094 1095define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 1096; CHECK-LABEL: test_mask_subs_epu8_rr_256: 1097; CHECK: ## %bb.0: 1098; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1] 1099; CHECK-NEXT: retq ## encoding: [0xc3] 1100 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 1101 ret <32 x i8> %sub 1102} 1103declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>) 1104 1105define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 1106; CHECK-LABEL: test_mask_subs_epu8_rrk_256: 1107; CHECK: ## %bb.0: 1108; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 1109; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1] 1110; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1111; CHECK-NEXT: retq ## encoding: [0xc3] 1112 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 1113 %bc = bitcast i32 %mask to <32 x i1> 1114 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru 1115 ret <32 x i8> %res 1116} 1117 1118define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 1119; CHECK-LABEL: test_mask_subs_epu8_rrkz_256: 1120; CHECK: ## %bb.0: 1121; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 1122; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1] 1123; CHECK-NEXT: retq ## encoding: [0xc3] 1124 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 1125 %bc = bitcast i32 %mask to <32 x i1> 1126 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer 1127 ret <32 x i8> %res 1128} 1129 1130define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 1131; CHECK-LABEL: test_mask_subs_epu8_rm_256: 1132; CHECK: ## %bb.0: 1133; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x07] 1134; CHECK-NEXT: retq ## encoding: [0xc3] 1135 %b = load <32 x i8>, <32 x i8>* %ptr_b 1136 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 1137 ret <32 x i8> %sub 1138} 1139 1140define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 1141; CHECK-LABEL: test_mask_subs_epu8_rmk_256: 1142; CHECK: ## %bb.0: 1143; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 1144; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f] 1145; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1146; CHECK-NEXT: retq ## encoding: [0xc3] 1147 %b = load <32 x i8>, <32 x i8>* %ptr_b 1148 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 1149 %bc = bitcast i32 %mask to <32 x i1> 1150 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru 1151 ret <32 x i8> %res 1152} 1153 1154define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 1155; CHECK-LABEL: test_mask_subs_epu8_rmkz_256: 1156; CHECK: ## %bb.0: 1157; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 1158; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07] 1159; CHECK-NEXT: retq ## encoding: [0xc3] 1160 %b = load <32 x i8>, <32 x i8>* %ptr_b 1161 %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b) 1162 %bc = bitcast i32 %mask to <32 x i1> 1163 %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer 1164 ret <32 x i8> %res 1165} 1166