1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VL 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VLDQ 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VL 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VLDQ 8 9; FIXME: Drop the regex pattern matching of 'nan' once we drop support for MSVC 10; 2013. 11 12define <2 x double> @fabs_v2f64(<2 x double> %p) { 13; X32-LABEL: fabs_v2f64: 14; X32: # %bb.0: 15; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 16; X32-NEXT: retl 17; 18; X64-LABEL: fabs_v2f64: 19; X64: # %bb.0: 20; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 21; X64-NEXT: retq 22 %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p) 23 ret <2 x double> %t 24} 25declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p) 26 27define <4 x float> @fabs_v4f32(<4 x float> %p) { 28; X32_AVX-LABEL: fabs_v4f32: 29; X32_AVX: # %bb.0: 30; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 31; X32_AVX-NEXT: retl 32; 33; X32_AVX512VL-LABEL: fabs_v4f32: 34; X32_AVX512VL: # %bb.0: 35; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 36; X32_AVX512VL-NEXT: retl 37; 38; X32_AVX512VLDQ-LABEL: fabs_v4f32: 39; X32_AVX512VLDQ: # %bb.0: 40; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 41; X32_AVX512VLDQ-NEXT: retl 42; 43; X64_AVX-LABEL: fabs_v4f32: 44; X64_AVX: # %bb.0: 45; X64_AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 46; X64_AVX-NEXT: retq 47; 48; X64_AVX512VL-LABEL: fabs_v4f32: 49; X64_AVX512VL: # %bb.0: 50; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 51; X64_AVX512VL-NEXT: retq 52; 53; X64_AVX512VLDQ-LABEL: fabs_v4f32: 54; X64_AVX512VLDQ: # %bb.0: 55; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0 56; X64_AVX512VLDQ-NEXT: retq 57 %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p) 58 ret <4 x float> %t 59} 60declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p) 61 62define <4 x double> @fabs_v4f64(<4 x double> %p) { 63; X32_AVX-LABEL: fabs_v4f64: 64; X32_AVX: # %bb.0: 65; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 66; X32_AVX-NEXT: retl 67; 68; X32_AVX512VL-LABEL: fabs_v4f64: 69; X32_AVX512VL: # %bb.0: 70; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0 71; X32_AVX512VL-NEXT: retl 72; 73; X32_AVX512VLDQ-LABEL: fabs_v4f64: 74; X32_AVX512VLDQ: # %bb.0: 75; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to4}, %ymm0, %ymm0 76; X32_AVX512VLDQ-NEXT: retl 77; 78; X64_AVX-LABEL: fabs_v4f64: 79; X64_AVX: # %bb.0: 80; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 81; X64_AVX-NEXT: retq 82; 83; X64_AVX512VL-LABEL: fabs_v4f64: 84; X64_AVX512VL: # %bb.0: 85; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0 86; X64_AVX512VL-NEXT: retq 87; 88; X64_AVX512VLDQ-LABEL: fabs_v4f64: 89; X64_AVX512VLDQ: # %bb.0: 90; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 91; X64_AVX512VLDQ-NEXT: retq 92 %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p) 93 ret <4 x double> %t 94} 95declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p) 96 97define <8 x float> @fabs_v8f32(<8 x float> %p) { 98; X32_AVX-LABEL: fabs_v8f32: 99; X32_AVX: # %bb.0: 100; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 101; X32_AVX-NEXT: retl 102; 103; X32_AVX512VL-LABEL: fabs_v8f32: 104; X32_AVX512VL: # %bb.0: 105; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 106; X32_AVX512VL-NEXT: retl 107; 108; X32_AVX512VLDQ-LABEL: fabs_v8f32: 109; X32_AVX512VLDQ: # %bb.0: 110; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 111; X32_AVX512VLDQ-NEXT: retl 112; 113; X64_AVX-LABEL: fabs_v8f32: 114; X64_AVX: # %bb.0: 115; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 116; X64_AVX-NEXT: retq 117; 118; X64_AVX512VL-LABEL: fabs_v8f32: 119; X64_AVX512VL: # %bb.0: 120; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 121; X64_AVX512VL-NEXT: retq 122; 123; X64_AVX512VLDQ-LABEL: fabs_v8f32: 124; X64_AVX512VLDQ: # %bb.0: 125; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0 126; X64_AVX512VLDQ-NEXT: retq 127 %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p) 128 ret <8 x float> %t 129} 130declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p) 131 132define <8 x double> @fabs_v8f64(<8 x double> %p) { 133; X32_AVX-LABEL: fabs_v8f64: 134; X32_AVX: # %bb.0: 135; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}] 136; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 137; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 138; X32_AVX-NEXT: retl 139; 140; X32_AVX512VL-LABEL: fabs_v8f64: 141; X32_AVX512VL: # %bb.0: 142; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0 143; X32_AVX512VL-NEXT: retl 144; 145; X32_AVX512VLDQ-LABEL: fabs_v8f64: 146; X32_AVX512VLDQ: # %bb.0: 147; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to8}, %zmm0, %zmm0 148; X32_AVX512VLDQ-NEXT: retl 149; 150; X64_AVX-LABEL: fabs_v8f64: 151; X64_AVX: # %bb.0: 152; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}] 153; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 154; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 155; X64_AVX-NEXT: retq 156; 157; X64_AVX512VL-LABEL: fabs_v8f64: 158; X64_AVX512VL: # %bb.0: 159; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 160; X64_AVX512VL-NEXT: retq 161; 162; X64_AVX512VLDQ-LABEL: fabs_v8f64: 163; X64_AVX512VLDQ: # %bb.0: 164; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 165; X64_AVX512VLDQ-NEXT: retq 166 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 167 ret <8 x double> %t 168} 169declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 170 171define <16 x float> @fabs_v16f32(<16 x float> %p) { 172; X32_AVX-LABEL: fabs_v16f32: 173; X32_AVX: # %bb.0: 174; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}] 175; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 176; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 177; X32_AVX-NEXT: retl 178; 179; X32_AVX512VL-LABEL: fabs_v16f32: 180; X32_AVX512VL: # %bb.0: 181; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0 182; X32_AVX512VL-NEXT: retl 183; 184; X32_AVX512VLDQ-LABEL: fabs_v16f32: 185; X32_AVX512VLDQ: # %bb.0: 186; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm0 187; X32_AVX512VLDQ-NEXT: retl 188; 189; X64_AVX-LABEL: fabs_v16f32: 190; X64_AVX: # %bb.0: 191; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}] 192; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 193; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 194; X64_AVX-NEXT: retq 195; 196; X64_AVX512VL-LABEL: fabs_v16f32: 197; X64_AVX512VL: # %bb.0: 198; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 199; X64_AVX512VL-NEXT: retq 200; 201; X64_AVX512VLDQ-LABEL: fabs_v16f32: 202; X64_AVX512VLDQ: # %bb.0: 203; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 204; X64_AVX512VLDQ-NEXT: retq 205 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 206 ret <16 x float> %t 207} 208declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 209 210; PR20354: when generating code for a vector fabs op, 211; make sure that we're only turning off the sign bit of each float value. 212; No constant pool loads or vector ops are needed for the fabs of a 213; bitcasted integer constant; we should just return an integer constant 214; that has the sign bits turned off. 215; 216; So instead of something like this: 217; movabsq (constant pool load of mask for sign bits) 218; vmovq (move from integer register to vector/fp register) 219; vandps (mask off sign bits) 220; vmovq (move vector/fp register back to integer return register) 221; 222; We should generate: 223; mov (put constant value in return register) 224 225define i64 @fabs_v2f32_1() { 226; X32-LABEL: fabs_v2f32_1: 227; X32: # %bb.0: 228; X32-NEXT: xorl %eax, %eax 229; X32-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF 230; X32-NEXT: retl 231; 232; X64-LABEL: fabs_v2f32_1: 233; X64: # %bb.0: 234; X64-NEXT: movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000 235; X64-NEXT: retq 236 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000 237 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) 238 %ret = bitcast <2 x float> %fabs to i64 239 ret i64 %ret 240} 241 242define i64 @fabs_v2f32_2() { 243; X32-LABEL: fabs_v2f32_2: 244; X32: # %bb.0: 245; X32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 246; X32-NEXT: xorl %edx, %edx 247; X32-NEXT: retl 248; 249; X64-LABEL: fabs_v2f32_2: 250; X64: # %bb.0: 251; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 252; X64-NEXT: retq 253 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF 254 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) 255 %ret = bitcast <2 x float> %fabs to i64 256 ret i64 %ret 257} 258 259declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p) 260