1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX 4 5 6define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 7; ALL-LABEL: vpandd: 8; ALL: ## %bb.0: ## %entry 9; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 10; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0 11; ALL-NEXT: retq 12entry: 13 ; Force the execution domain with an add. 14 %a2 = add <16 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, 15 i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 16 %x = and <16 x i32> %a2, %b 17 ret <16 x i32> %x 18} 19 20define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 21; ALL-LABEL: vpandnd: 22; ALL: ## %bb.0: ## %entry 23; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 24; ALL-NEXT: vpandnq %zmm0, %zmm1, %zmm0 25; ALL-NEXT: retq 26entry: 27 ; Force the execution domain with an add. 28 %a2 = add <16 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, 29 i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 30 %b2 = xor <16 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, 31 i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 32 %x = and <16 x i32> %a2, %b2 33 ret <16 x i32> %x 34} 35 36define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 37; ALL-LABEL: vpord: 38; ALL: ## %bb.0: ## %entry 39; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 40; ALL-NEXT: vporq %zmm1, %zmm0, %zmm0 41; ALL-NEXT: retq 42entry: 43 ; Force the execution domain with an add. 44 %a2 = add <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, 45 i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 46 %x = or <16 x i32> %a2, %b 47 ret <16 x i32> %x 48} 49 50define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 51; ALL-LABEL: vpxord: 52; ALL: ## %bb.0: ## %entry 53; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 54; ALL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 55; ALL-NEXT: retq 56entry: 57 ; Force the execution domain with an add. 58 %a2 = add <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, 59 i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 60 %x = xor <16 x i32> %a2, %b 61 ret <16 x i32> %x 62} 63 64define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 65; ALL-LABEL: vpandq: 66; ALL: ## %bb.0: ## %entry 67; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 68; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0 69; ALL-NEXT: retq 70entry: 71 ; Force the execution domain with an add. 72 %a2 = add <8 x i64> %a, <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6> 73 %x = and <8 x i64> %a2, %b 74 ret <8 x i64> %x 75} 76 77define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 78; ALL-LABEL: vpandnq: 79; ALL: ## %bb.0: ## %entry 80; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 81; ALL-NEXT: vpandnq %zmm0, %zmm1, %zmm0 82; ALL-NEXT: retq 83entry: 84 ; Force the execution domain with an add. 85 %a2 = add <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7> 86 %b2 = xor <8 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 87 %x = and <8 x i64> %a2, %b2 88 ret <8 x i64> %x 89} 90 91define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 92; ALL-LABEL: vporq: 93; ALL: ## %bb.0: ## %entry 94; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 95; ALL-NEXT: vporq %zmm1, %zmm0, %zmm0 96; ALL-NEXT: retq 97entry: 98 ; Force the execution domain with an add. 99 %a2 = add <8 x i64> %a, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8> 100 %x = or <8 x i64> %a2, %b 101 ret <8 x i64> %x 102} 103 104define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 105; ALL-LABEL: vpxorq: 106; ALL: ## %bb.0: ## %entry 107; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 108; ALL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 109; ALL-NEXT: retq 110entry: 111 ; Force the execution domain with an add. 112 %a2 = add <8 x i64> %a, <i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9> 113 %x = xor <8 x i64> %a2, %b 114 ret <8 x i64> %x 115} 116 117 118define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 119; KNL-LABEL: orq_broadcast: 120; KNL: ## %bb.0: 121; KNL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 122; KNL-NEXT: retq 123; 124; SKX-LABEL: orq_broadcast: 125; SKX: ## %bb.0: 126; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 127; SKX-NEXT: retq 128 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 129 ret <8 x i64> %b 130} 131 132define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 133; KNL-LABEL: andd512fold: 134; KNL: ## %bb.0: ## %entry 135; KNL-NEXT: vpandq (%rdi), %zmm0, %zmm0 136; KNL-NEXT: retq 137; 138; SKX-LABEL: andd512fold: 139; SKX: ## %bb.0: ## %entry 140; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 141; SKX-NEXT: retq 142entry: 143 %a = load <16 x i32>, <16 x i32>* %x, align 4 144 %b = and <16 x i32> %y, %a 145 ret <16 x i32> %b 146} 147 148define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 149; KNL-LABEL: andqbrst: 150; KNL: ## %bb.0: ## %entry 151; KNL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 152; KNL-NEXT: retq 153; 154; SKX-LABEL: andqbrst: 155; SKX: ## %bb.0: ## %entry 156; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 157; SKX-NEXT: retq 158entry: 159 %a = load i64, i64* %ap, align 8 160 %b = insertelement <8 x i64> undef, i64 %a, i32 0 161 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 162 %d = and <8 x i64> %p1, %c 163 ret <8 x i64>%d 164} 165 166define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { 167; KNL-LABEL: and_v64i8: 168; KNL: ## %bb.0: 169; KNL-NEXT: vandps %ymm2, %ymm0, %ymm0 170; KNL-NEXT: vandps %ymm3, %ymm1, %ymm1 171; KNL-NEXT: retq 172; 173; SKX-LABEL: and_v64i8: 174; SKX: ## %bb.0: 175; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 176; SKX-NEXT: retq 177 %res = and <64 x i8> %a, %b 178 ret <64 x i8> %res 179} 180 181define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { 182; KNL-LABEL: andn_v64i8: 183; KNL: ## %bb.0: 184; KNL-NEXT: vandnps %ymm0, %ymm2, %ymm0 185; KNL-NEXT: vandnps %ymm1, %ymm3, %ymm1 186; KNL-NEXT: retq 187; 188; SKX-LABEL: andn_v64i8: 189; SKX: ## %bb.0: 190; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 191; SKX-NEXT: retq 192 %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 193 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 194 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 195 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 196 %res = and <64 x i8> %a, %b2 197 ret <64 x i8> %res 198} 199 200define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) { 201; KNL-LABEL: or_v64i8: 202; KNL: ## %bb.0: 203; KNL-NEXT: vorps %ymm2, %ymm0, %ymm0 204; KNL-NEXT: vorps %ymm3, %ymm1, %ymm1 205; KNL-NEXT: retq 206; 207; SKX-LABEL: or_v64i8: 208; SKX: ## %bb.0: 209; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 210; SKX-NEXT: retq 211 %res = or <64 x i8> %a, %b 212 ret <64 x i8> %res 213} 214 215define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { 216; KNL-LABEL: xor_v64i8: 217; KNL: ## %bb.0: 218; KNL-NEXT: vxorps %ymm2, %ymm0, %ymm0 219; KNL-NEXT: vxorps %ymm3, %ymm1, %ymm1 220; KNL-NEXT: retq 221; 222; SKX-LABEL: xor_v64i8: 223; SKX: ## %bb.0: 224; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 225; SKX-NEXT: retq 226 %res = xor <64 x i8> %a, %b 227 ret <64 x i8> %res 228} 229 230define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { 231; KNL-LABEL: and_v32i16: 232; KNL: ## %bb.0: 233; KNL-NEXT: vandps %ymm2, %ymm0, %ymm0 234; KNL-NEXT: vandps %ymm3, %ymm1, %ymm1 235; KNL-NEXT: retq 236; 237; SKX-LABEL: and_v32i16: 238; SKX: ## %bb.0: 239; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 240; SKX-NEXT: retq 241 %res = and <32 x i16> %a, %b 242 ret <32 x i16> %res 243} 244 245define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { 246; KNL-LABEL: andn_v32i16: 247; KNL: ## %bb.0: 248; KNL-NEXT: vandnps %ymm0, %ymm2, %ymm0 249; KNL-NEXT: vandnps %ymm1, %ymm3, %ymm1 250; KNL-NEXT: retq 251; 252; SKX-LABEL: andn_v32i16: 253; SKX: ## %bb.0: 254; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 255; SKX-NEXT: retq 256 %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, 257 i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 258 %res = and <32 x i16> %a, %b2 259 ret <32 x i16> %res 260} 261 262define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { 263; KNL-LABEL: or_v32i16: 264; KNL: ## %bb.0: 265; KNL-NEXT: vorps %ymm2, %ymm0, %ymm0 266; KNL-NEXT: vorps %ymm3, %ymm1, %ymm1 267; KNL-NEXT: retq 268; 269; SKX-LABEL: or_v32i16: 270; SKX: ## %bb.0: 271; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 272; SKX-NEXT: retq 273 %res = or <32 x i16> %a, %b 274 ret <32 x i16> %res 275} 276 277define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) { 278; KNL-LABEL: xor_v32i16: 279; KNL: ## %bb.0: 280; KNL-NEXT: vxorps %ymm2, %ymm0, %ymm0 281; KNL-NEXT: vxorps %ymm3, %ymm1, %ymm1 282; KNL-NEXT: retq 283; 284; SKX-LABEL: xor_v32i16: 285; SKX: ## %bb.0: 286; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 287; SKX-NEXT: retq 288 %res = xor <32 x i16> %a, %b 289 ret <32 x i16> %res 290} 291 292define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { 293; KNL-LABEL: masked_and_v16f32: 294; KNL: ## %bb.0: 295; KNL-NEXT: kmovw %edi, %k1 296; KNL-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} 297; KNL-NEXT: vaddps %zmm2, %zmm3, %zmm0 298; KNL-NEXT: retq 299; 300; SKX-LABEL: masked_and_v16f32: 301; SKX: ## %bb.0: 302; SKX-NEXT: kmovd %edi, %k1 303; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} 304; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 305; SKX-NEXT: retq 306 %a1 = bitcast <16 x float> %a to <16 x i32> 307 %b1 = bitcast <16 x float> %b to <16 x i32> 308 %passThru1 = bitcast <16 x float> %passThru to <16 x i32> 309 %mask1 = bitcast i16 %mask to <16 x i1> 310 %op = and <16 x i32> %a1, %b1 311 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 312 %cast = bitcast <16 x i32> %select to <16 x float> 313 %add = fadd <16 x float> %c, %cast 314 ret <16 x float> %add 315} 316 317define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { 318; KNL-LABEL: masked_or_v16f32: 319; KNL: ## %bb.0: 320; KNL-NEXT: kmovw %edi, %k1 321; KNL-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} 322; KNL-NEXT: vaddps %zmm2, %zmm3, %zmm0 323; KNL-NEXT: retq 324; 325; SKX-LABEL: masked_or_v16f32: 326; SKX: ## %bb.0: 327; SKX-NEXT: kmovd %edi, %k1 328; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} 329; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 330; SKX-NEXT: retq 331 %a1 = bitcast <16 x float> %a to <16 x i32> 332 %b1 = bitcast <16 x float> %b to <16 x i32> 333 %passThru1 = bitcast <16 x float> %passThru to <16 x i32> 334 %mask1 = bitcast i16 %mask to <16 x i1> 335 %op = and <16 x i32> %a1, %b1 336 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 337 %cast = bitcast <16 x i32> %select to <16 x float> 338 %add = fadd <16 x float> %c, %cast 339 ret <16 x float> %add 340} 341 342define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { 343; KNL-LABEL: masked_xor_v16f32: 344; KNL: ## %bb.0: 345; KNL-NEXT: kmovw %edi, %k1 346; KNL-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} 347; KNL-NEXT: vaddps %zmm2, %zmm3, %zmm0 348; KNL-NEXT: retq 349; 350; SKX-LABEL: masked_xor_v16f32: 351; SKX: ## %bb.0: 352; SKX-NEXT: kmovd %edi, %k1 353; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} 354; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 355; SKX-NEXT: retq 356 %a1 = bitcast <16 x float> %a to <16 x i32> 357 %b1 = bitcast <16 x float> %b to <16 x i32> 358 %passThru1 = bitcast <16 x float> %passThru to <16 x i32> 359 %mask1 = bitcast i16 %mask to <16 x i1> 360 %op = and <16 x i32> %a1, %b1 361 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 362 %cast = bitcast <16 x i32> %select to <16 x float> 363 %add = fadd <16 x float> %c, %cast 364 ret <16 x float> %add 365} 366 367define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { 368; KNL-LABEL: masked_and_v8f64: 369; KNL: ## %bb.0: 370; KNL-NEXT: kmovw %edi, %k1 371; KNL-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} 372; KNL-NEXT: vaddpd %zmm2, %zmm3, %zmm0 373; KNL-NEXT: retq 374; 375; SKX-LABEL: masked_and_v8f64: 376; SKX: ## %bb.0: 377; SKX-NEXT: kmovd %edi, %k1 378; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} 379; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 380; SKX-NEXT: retq 381 %a1 = bitcast <8 x double> %a to <8 x i64> 382 %b1 = bitcast <8 x double> %b to <8 x i64> 383 %passThru1 = bitcast <8 x double> %passThru to <8 x i64> 384 %mask1 = bitcast i8 %mask to <8 x i1> 385 %op = and <8 x i64> %a1, %b1 386 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 387 %cast = bitcast <8 x i64> %select to <8 x double> 388 %add = fadd <8 x double> %c, %cast 389 ret <8 x double> %add 390} 391 392define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { 393; KNL-LABEL: masked_or_v8f64: 394; KNL: ## %bb.0: 395; KNL-NEXT: kmovw %edi, %k1 396; KNL-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} 397; KNL-NEXT: vaddpd %zmm2, %zmm3, %zmm0 398; KNL-NEXT: retq 399; 400; SKX-LABEL: masked_or_v8f64: 401; SKX: ## %bb.0: 402; SKX-NEXT: kmovd %edi, %k1 403; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} 404; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 405; SKX-NEXT: retq 406 %a1 = bitcast <8 x double> %a to <8 x i64> 407 %b1 = bitcast <8 x double> %b to <8 x i64> 408 %passThru1 = bitcast <8 x double> %passThru to <8 x i64> 409 %mask1 = bitcast i8 %mask to <8 x i1> 410 %op = and <8 x i64> %a1, %b1 411 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 412 %cast = bitcast <8 x i64> %select to <8 x double> 413 %add = fadd <8 x double> %c, %cast 414 ret <8 x double> %add 415} 416 417define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { 418; KNL-LABEL: masked_xor_v8f64: 419; KNL: ## %bb.0: 420; KNL-NEXT: kmovw %edi, %k1 421; KNL-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} 422; KNL-NEXT: vaddpd %zmm2, %zmm3, %zmm0 423; KNL-NEXT: retq 424; 425; SKX-LABEL: masked_xor_v8f64: 426; SKX: ## %bb.0: 427; SKX-NEXT: kmovd %edi, %k1 428; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} 429; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 430; SKX-NEXT: retq 431 %a1 = bitcast <8 x double> %a to <8 x i64> 432 %b1 = bitcast <8 x double> %b to <8 x i64> 433 %passThru1 = bitcast <8 x double> %passThru to <8 x i64> 434 %mask1 = bitcast i8 %mask to <8 x i1> 435 %op = and <8 x i64> %a1, %b1 436 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 437 %cast = bitcast <8 x i64> %select to <8 x double> 438 %add = fadd <8 x double> %c, %cast 439 ret <8 x double> %add 440} 441 442define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { 443; KNL-LABEL: test_mm512_mask_and_epi32: 444; KNL: ## %bb.0: ## %entry 445; KNL-NEXT: kmovw %edi, %k1 446; KNL-NEXT: vpandd %zmm2, %zmm1, %zmm0 {%k1} 447; KNL-NEXT: retq 448; 449; SKX-LABEL: test_mm512_mask_and_epi32: 450; SKX: ## %bb.0: ## %entry 451; SKX-NEXT: kmovd %edi, %k1 452; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} 453; SKX-NEXT: retq 454entry: 455 %and1.i.i = and <8 x i64> %__a, %__b 456 %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> 457 %1 = bitcast <8 x i64> %__src to <16 x i32> 458 %2 = bitcast i16 %__k to <16 x i1> 459 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 460 %4 = bitcast <16 x i32> %3 to <8 x i64> 461 ret <8 x i64> %4 462} 463 464define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { 465; KNL-LABEL: test_mm512_mask_or_epi32: 466; KNL: ## %bb.0: ## %entry 467; KNL-NEXT: kmovw %edi, %k1 468; KNL-NEXT: vpord %zmm2, %zmm1, %zmm0 {%k1} 469; KNL-NEXT: retq 470; 471; SKX-LABEL: test_mm512_mask_or_epi32: 472; SKX: ## %bb.0: ## %entry 473; SKX-NEXT: kmovd %edi, %k1 474; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} 475; SKX-NEXT: retq 476entry: 477 %or1.i.i = or <8 x i64> %__a, %__b 478 %0 = bitcast <8 x i64> %or1.i.i to <16 x i32> 479 %1 = bitcast <8 x i64> %__src to <16 x i32> 480 %2 = bitcast i16 %__k to <16 x i1> 481 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 482 %4 = bitcast <16 x i32> %3 to <8 x i64> 483 ret <8 x i64> %4 484} 485 486define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { 487; KNL-LABEL: test_mm512_mask_xor_epi32: 488; KNL: ## %bb.0: ## %entry 489; KNL-NEXT: kmovw %edi, %k1 490; KNL-NEXT: vpxord %zmm2, %zmm1, %zmm0 {%k1} 491; KNL-NEXT: retq 492; 493; SKX-LABEL: test_mm512_mask_xor_epi32: 494; SKX: ## %bb.0: ## %entry 495; SKX-NEXT: kmovd %edi, %k1 496; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} 497; SKX-NEXT: retq 498entry: 499 %xor1.i.i = xor <8 x i64> %__a, %__b 500 %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32> 501 %1 = bitcast <8 x i64> %__src to <16 x i32> 502 %2 = bitcast i16 %__k to <16 x i1> 503 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 504 %4 = bitcast <16 x i32> %3 to <8 x i64> 505 ret <8 x i64> %4 506} 507 508define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 509; KNL-LABEL: test_mm512_mask_xor_pd: 510; KNL: ## %bb.0: ## %entry 511; KNL-NEXT: kmovw %edi, %k1 512; KNL-NEXT: vpxorq %zmm2, %zmm1, %zmm0 {%k1} 513; KNL-NEXT: retq 514; 515; SKX-LABEL: test_mm512_mask_xor_pd: 516; SKX: ## %bb.0: ## %entry 517; SKX-NEXT: kmovd %edi, %k1 518; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} 519; SKX-NEXT: retq 520entry: 521 %0 = bitcast <8 x double> %__A to <8 x i64> 522 %1 = bitcast <8 x double> %__B to <8 x i64> 523 %xor.i.i = xor <8 x i64> %0, %1 524 %2 = bitcast <8 x i64> %xor.i.i to <8 x double> 525 %3 = bitcast i8 %__U to <8 x i1> 526 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 527 ret <8 x double> %4 528} 529 530define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 531; KNL-LABEL: test_mm512_maskz_xor_pd: 532; KNL: ## %bb.0: ## %entry 533; KNL-NEXT: kmovw %edi, %k1 534; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 {%k1} {z} 535; KNL-NEXT: retq 536; 537; SKX-LABEL: test_mm512_maskz_xor_pd: 538; SKX: ## %bb.0: ## %entry 539; SKX-NEXT: kmovd %edi, %k1 540; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} 541; SKX-NEXT: retq 542entry: 543 %0 = bitcast <8 x double> %__A to <8 x i64> 544 %1 = bitcast <8 x double> %__B to <8 x i64> 545 %xor.i.i = xor <8 x i64> %0, %1 546 %2 = bitcast <8 x i64> %xor.i.i to <8 x double> 547 %3 = bitcast i8 %__U to <8 x i1> 548 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 549 ret <8 x double> %4 550} 551 552define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 553; KNL-LABEL: test_mm512_mask_xor_ps: 554; KNL: ## %bb.0: ## %entry 555; KNL-NEXT: kmovw %edi, %k1 556; KNL-NEXT: vpxord %zmm2, %zmm1, %zmm0 {%k1} 557; KNL-NEXT: retq 558; 559; SKX-LABEL: test_mm512_mask_xor_ps: 560; SKX: ## %bb.0: ## %entry 561; SKX-NEXT: kmovd %edi, %k1 562; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} 563; SKX-NEXT: retq 564entry: 565 %0 = bitcast <16 x float> %__A to <16 x i32> 566 %1 = bitcast <16 x float> %__B to <16 x i32> 567 %xor.i.i = xor <16 x i32> %0, %1 568 %2 = bitcast <16 x i32> %xor.i.i to <16 x float> 569 %3 = bitcast i16 %__U to <16 x i1> 570 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 571 ret <16 x float> %4 572} 573 574define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 575; KNL-LABEL: test_mm512_maskz_xor_ps: 576; KNL: ## %bb.0: ## %entry 577; KNL-NEXT: kmovw %edi, %k1 578; KNL-NEXT: vpxord %zmm1, %zmm0, %zmm0 {%k1} {z} 579; KNL-NEXT: retq 580; 581; SKX-LABEL: test_mm512_maskz_xor_ps: 582; SKX: ## %bb.0: ## %entry 583; SKX-NEXT: kmovd %edi, %k1 584; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} 585; SKX-NEXT: retq 586entry: 587 %0 = bitcast <16 x float> %__A to <16 x i32> 588 %1 = bitcast <16 x float> %__B to <16 x i32> 589 %xor.i.i = xor <16 x i32> %0, %1 590 %2 = bitcast <16 x i32> %xor.i.i to <16 x float> 591 %3 = bitcast i16 %__U to <16 x i1> 592 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 593 ret <16 x float> %4 594} 595 596define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 597; KNL-LABEL: test_mm512_mask_or_pd: 598; KNL: ## %bb.0: ## %entry 599; KNL-NEXT: kmovw %edi, %k1 600; KNL-NEXT: vporq %zmm1, %zmm2, %zmm0 {%k1} 601; KNL-NEXT: retq 602; 603; SKX-LABEL: test_mm512_mask_or_pd: 604; SKX: ## %bb.0: ## %entry 605; SKX-NEXT: kmovd %edi, %k1 606; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} 607; SKX-NEXT: retq 608entry: 609 %0 = bitcast <8 x double> %__A to <8 x i64> 610 %1 = bitcast <8 x double> %__B to <8 x i64> 611 %or.i.i = or <8 x i64> %1, %0 612 %2 = bitcast <8 x i64> %or.i.i to <8 x double> 613 %3 = bitcast i8 %__U to <8 x i1> 614 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 615 ret <8 x double> %4 616} 617 618define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 619; KNL-LABEL: test_mm512_maskz_or_pd: 620; KNL: ## %bb.0: ## %entry 621; KNL-NEXT: kmovw %edi, %k1 622; KNL-NEXT: vporq %zmm0, %zmm1, %zmm0 {%k1} {z} 623; KNL-NEXT: retq 624; 625; SKX-LABEL: test_mm512_maskz_or_pd: 626; SKX: ## %bb.0: ## %entry 627; SKX-NEXT: kmovd %edi, %k1 628; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} 629; SKX-NEXT: retq 630entry: 631 %0 = bitcast <8 x double> %__A to <8 x i64> 632 %1 = bitcast <8 x double> %__B to <8 x i64> 633 %or.i.i = or <8 x i64> %1, %0 634 %2 = bitcast <8 x i64> %or.i.i to <8 x double> 635 %3 = bitcast i8 %__U to <8 x i1> 636 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 637 ret <8 x double> %4 638} 639 640define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 641; KNL-LABEL: test_mm512_mask_or_ps: 642; KNL: ## %bb.0: ## %entry 643; KNL-NEXT: kmovw %edi, %k1 644; KNL-NEXT: vpord %zmm1, %zmm2, %zmm0 {%k1} 645; KNL-NEXT: retq 646; 647; SKX-LABEL: test_mm512_mask_or_ps: 648; SKX: ## %bb.0: ## %entry 649; SKX-NEXT: kmovd %edi, %k1 650; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} 651; SKX-NEXT: retq 652entry: 653 %0 = bitcast <16 x float> %__A to <16 x i32> 654 %1 = bitcast <16 x float> %__B to <16 x i32> 655 %or.i.i = or <16 x i32> %1, %0 656 %2 = bitcast <16 x i32> %or.i.i to <16 x float> 657 %3 = bitcast i16 %__U to <16 x i1> 658 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 659 ret <16 x float> %4 660} 661 662define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 663; KNL-LABEL: test_mm512_maskz_or_ps: 664; KNL: ## %bb.0: ## %entry 665; KNL-NEXT: kmovw %edi, %k1 666; KNL-NEXT: vpord %zmm0, %zmm1, %zmm0 {%k1} {z} 667; KNL-NEXT: retq 668; 669; SKX-LABEL: test_mm512_maskz_or_ps: 670; SKX: ## %bb.0: ## %entry 671; SKX-NEXT: kmovd %edi, %k1 672; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} 673; SKX-NEXT: retq 674entry: 675 %0 = bitcast <16 x float> %__A to <16 x i32> 676 %1 = bitcast <16 x float> %__B to <16 x i32> 677 %or.i.i = or <16 x i32> %1, %0 678 %2 = bitcast <16 x i32> %or.i.i to <16 x float> 679 %3 = bitcast i16 %__U to <16 x i1> 680 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 681 ret <16 x float> %4 682} 683 684define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 685; KNL-LABEL: test_mm512_mask_and_pd: 686; KNL: ## %bb.0: ## %entry 687; KNL-NEXT: kmovw %edi, %k1 688; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm0 {%k1} 689; KNL-NEXT: retq 690; 691; SKX-LABEL: test_mm512_mask_and_pd: 692; SKX: ## %bb.0: ## %entry 693; SKX-NEXT: kmovd %edi, %k1 694; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} 695; SKX-NEXT: retq 696entry: 697 %0 = bitcast <8 x double> %__A to <8 x i64> 698 %1 = bitcast <8 x double> %__B to <8 x i64> 699 %and.i.i = and <8 x i64> %1, %0 700 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 701 %3 = bitcast i8 %__U to <8 x i1> 702 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 703 ret <8 x double> %4 704} 705 706define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 707; KNL-LABEL: test_mm512_maskz_and_pd: 708; KNL: ## %bb.0: ## %entry 709; KNL-NEXT: kmovw %edi, %k1 710; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0 {%k1} {z} 711; KNL-NEXT: retq 712; 713; SKX-LABEL: test_mm512_maskz_and_pd: 714; SKX: ## %bb.0: ## %entry 715; SKX-NEXT: kmovd %edi, %k1 716; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} 717; SKX-NEXT: retq 718entry: 719 %0 = bitcast <8 x double> %__A to <8 x i64> 720 %1 = bitcast <8 x double> %__B to <8 x i64> 721 %and.i.i = and <8 x i64> %1, %0 722 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 723 %3 = bitcast i8 %__U to <8 x i1> 724 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 725 ret <8 x double> %4 726} 727 728define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 729; KNL-LABEL: test_mm512_mask_and_ps: 730; KNL: ## %bb.0: ## %entry 731; KNL-NEXT: kmovw %edi, %k1 732; KNL-NEXT: vpandd %zmm1, %zmm2, %zmm0 {%k1} 733; KNL-NEXT: retq 734; 735; SKX-LABEL: test_mm512_mask_and_ps: 736; SKX: ## %bb.0: ## %entry 737; SKX-NEXT: kmovd %edi, %k1 738; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} 739; SKX-NEXT: retq 740entry: 741 %0 = bitcast <16 x float> %__A to <16 x i32> 742 %1 = bitcast <16 x float> %__B to <16 x i32> 743 %and.i.i = and <16 x i32> %1, %0 744 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 745 %3 = bitcast i16 %__U to <16 x i1> 746 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 747 ret <16 x float> %4 748} 749 750define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 751; KNL-LABEL: test_mm512_maskz_and_ps: 752; KNL: ## %bb.0: ## %entry 753; KNL-NEXT: kmovw %edi, %k1 754; KNL-NEXT: vpandd %zmm0, %zmm1, %zmm0 {%k1} {z} 755; KNL-NEXT: retq 756; 757; SKX-LABEL: test_mm512_maskz_and_ps: 758; SKX: ## %bb.0: ## %entry 759; SKX-NEXT: kmovd %edi, %k1 760; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} 761; SKX-NEXT: retq 762entry: 763 %0 = bitcast <16 x float> %__A to <16 x i32> 764 %1 = bitcast <16 x float> %__B to <16 x i32> 765 %and.i.i = and <16 x i32> %1, %0 766 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 767 %3 = bitcast i16 %__U to <16 x i1> 768 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 769 ret <16 x float> %4 770} 771 772define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 773; KNL-LABEL: test_mm512_mask_andnot_pd: 774; KNL: ## %bb.0: ## %entry 775; KNL-NEXT: kmovw %edi, %k1 776; KNL-NEXT: vpandnq %zmm2, %zmm1, %zmm0 {%k1} 777; KNL-NEXT: retq 778; 779; SKX-LABEL: test_mm512_mask_andnot_pd: 780; SKX: ## %bb.0: ## %entry 781; SKX-NEXT: kmovd %edi, %k1 782; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} 783; SKX-NEXT: retq 784entry: 785 %0 = bitcast <8 x double> %__A to <8 x i64> 786 %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 787 %1 = bitcast <8 x double> %__B to <8 x i64> 788 %and.i.i = and <8 x i64> %1, %neg.i.i 789 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 790 %3 = bitcast i8 %__U to <8 x i1> 791 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 792 ret <8 x double> %4 793} 794 795define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 796; KNL-LABEL: test_mm512_maskz_andnot_pd: 797; KNL: ## %bb.0: ## %entry 798; KNL-NEXT: kmovw %edi, %k1 799; KNL-NEXT: vpandnq %zmm1, %zmm0, %zmm0 {%k1} {z} 800; KNL-NEXT: retq 801; 802; SKX-LABEL: test_mm512_maskz_andnot_pd: 803; SKX: ## %bb.0: ## %entry 804; SKX-NEXT: kmovd %edi, %k1 805; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} 806; SKX-NEXT: retq 807entry: 808 %0 = bitcast <8 x double> %__A to <8 x i64> 809 %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 810 %1 = bitcast <8 x double> %__B to <8 x i64> 811 %and.i.i = and <8 x i64> %1, %neg.i.i 812 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 813 %3 = bitcast i8 %__U to <8 x i1> 814 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 815 ret <8 x double> %4 816} 817 818define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 819; KNL-LABEL: test_mm512_mask_andnot_ps: 820; KNL: ## %bb.0: ## %entry 821; KNL-NEXT: kmovw %edi, %k1 822; KNL-NEXT: vpandnd %zmm2, %zmm1, %zmm0 {%k1} 823; KNL-NEXT: retq 824; 825; SKX-LABEL: test_mm512_mask_andnot_ps: 826; SKX: ## %bb.0: ## %entry 827; SKX-NEXT: kmovd %edi, %k1 828; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} 829; SKX-NEXT: retq 830entry: 831 %0 = bitcast <16 x float> %__A to <16 x i32> 832 %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 833 %1 = bitcast <16 x float> %__B to <16 x i32> 834 %and.i.i = and <16 x i32> %1, %neg.i.i 835 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 836 %3 = bitcast i16 %__U to <16 x i1> 837 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 838 ret <16 x float> %4 839} 840 841define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 842; KNL-LABEL: test_mm512_maskz_andnot_ps: 843; KNL: ## %bb.0: ## %entry 844; KNL-NEXT: kmovw %edi, %k1 845; KNL-NEXT: vpandnd %zmm1, %zmm0, %zmm0 {%k1} {z} 846; KNL-NEXT: retq 847; 848; SKX-LABEL: test_mm512_maskz_andnot_ps: 849; SKX: ## %bb.0: ## %entry 850; SKX-NEXT: kmovd %edi, %k1 851; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} 852; SKX-NEXT: retq 853entry: 854 %0 = bitcast <16 x float> %__A to <16 x i32> 855 %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 856 %1 = bitcast <16 x float> %__B to <16 x i32> 857 %and.i.i = and <16 x i32> %1, %neg.i.i 858 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 859 %3 = bitcast i16 %__U to <16 x i1> 860 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 861 ret <16 x float> %4 862} 863 864