1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s 3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -amdgpu-fast-fdiv < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 4; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s 5; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=UNSAFE-FP -check-prefix=FUNC %s 6; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s 7 8; These tests check that fdiv is expanded correctly and also test that the 9; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate 10; instruction groups. 11 12; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div. 13 14; FUNC-LABEL: {{^}}fdiv_f32: 15; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 16; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS 17 18; UNSAFE-FP: v_rcp_f32 19; UNSAFE-FP: v_mul_f32_e32 20 21; SI-DAG: v_rcp_f32 22; SI-DAG: v_mul_f32 23 24; I754-DAG: v_div_scale_f32 25; I754-DAG: v_rcp_f32 26; I754-DAG: v_fma_f32 27; I754-DAG: v_mul_f32 28; I754-DAG: v_fma_f32 29; I754-DAG: v_div_fixup_f32 30define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) { 31entry: 32 %0 = fdiv float %a, %b 33 store float %0, float addrspace(1)* %out 34 ret void 35} 36 37; FUNC-LABEL: {{^}}fdiv_f32_fast_math: 38; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 39; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS 40 41; UNSAFE-FP: v_rcp_f32 42; UNSAFE-FP: v_mul_f32_e32 43 44; SI-DAG: v_rcp_f32 45; SI-DAG: v_mul_f32 46define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) { 47entry: 48 %0 = fdiv fast float %a, %b 49 store float %0, float addrspace(1)* %out 50 ret void 51} 52 53; FUNC-LABEL: {{^}}fdiv_f32_arcp_math: 54; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 55; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS 56 57; UNSAFE-FP: v_rcp_f32 58; UNSAFE-FP: v_mul_f32_e32 59 60; SI-DAG: v_rcp_f32 61; SI-DAG: v_mul_f32 62define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) { 63entry: 64 %0 = fdiv arcp float %a, %b 65 store float %0, float addrspace(1)* %out 66 ret void 67} 68 69; FUNC-LABEL: {{^}}fdiv_v2f32: 70; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 71; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 72; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 73; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 74 75; UNSAFE-FP: v_rcp_f32 76; UNSAFE-FP: v_rcp_f32 77; UNSAFE-FP: v_mul_f32_e32 78; UNSAFE-FP: v_mul_f32_e32 79 80; SI-DAG: v_rcp_f32 81; SI-DAG: v_mul_f32 82; SI-DAG: v_rcp_f32 83; SI-DAG: v_mul_f32 84 85; I754: v_div_scale_f32 86; I754: v_div_scale_f32 87; I754: v_div_scale_f32 88; I754: v_div_scale_f32 89; I754: v_div_fixup_f32 90; I754: v_div_fixup_f32 91define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { 92entry: 93 %0 = fdiv <2 x float> %a, %b 94 store <2 x float> %0, <2 x float> addrspace(1)* %out 95 ret void 96} 97 98; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math: 99; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 100; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 101; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 102; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 103 104; UNSAFE-FP: v_rcp_f32 105; UNSAFE-FP: v_rcp_f32 106; UNSAFE-FP: v_mul_f32_e32 107; UNSAFE-FP: v_mul_f32_e32 108 109; SI-DAG: v_rcp_f32 110; SI-DAG: v_mul_f32 111; SI-DAG: v_rcp_f32 112; SI-DAG: v_mul_f32 113define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { 114entry: 115 %0 = fdiv fast <2 x float> %a, %b 116 store <2 x float> %0, <2 x float> addrspace(1)* %out 117 ret void 118} 119 120; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math: 121; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 122; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 123; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 124; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 125 126; UNSAFE-FP: v_rcp_f32 127; UNSAFE-FP: v_rcp_f32 128; UNSAFE-FP: v_mul_f32_e32 129; UNSAFE-FP: v_mul_f32_e32 130 131; SI-DAG: v_rcp_f32 132; SI-DAG: v_mul_f32 133; SI-DAG: v_rcp_f32 134; SI-DAG: v_mul_f32 135define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { 136entry: 137 %0 = fdiv arcp <2 x float> %a, %b 138 store <2 x float> %0, <2 x float> addrspace(1)* %out 139 ret void 140} 141 142; FUNC-LABEL: {{^}}fdiv_v4f32: 143; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 144; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 145; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 146; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 147; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 148; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 149; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 150; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 151 152; UNSAFE-FP: v_rcp_f32_e32 153; UNSAFE-FP: v_rcp_f32_e32 154; UNSAFE-FP: v_rcp_f32_e32 155; UNSAFE-FP: v_rcp_f32_e32 156; UNSAFE-FP: v_mul_f32_e32 157; UNSAFE-FP: v_mul_f32_e32 158; UNSAFE-FP: v_mul_f32_e32 159; UNSAFE-FP: v_mul_f32_e32 160 161; SI-DAG: v_rcp_f32 162; SI-DAG: v_mul_f32 163; SI-DAG: v_rcp_f32 164; SI-DAG: v_mul_f32 165; SI-DAG: v_rcp_f32 166; SI-DAG: v_mul_f32 167; SI-DAG: v_rcp_f32 168; SI-DAG: v_mul_f32 169 170; I754: v_div_scale_f32 171; I754: v_div_scale_f32 172; I754: v_div_scale_f32 173; I754: v_div_scale_f32 174; I754: v_div_scale_f32 175; I754: v_div_scale_f32 176; I754: v_div_scale_f32 177; I754: v_div_scale_f32 178; I754: v_div_fixup_f32 179; I754: v_div_fixup_f32 180; I754: v_div_fixup_f32 181; I754: v_div_fixup_f32 182define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { 183 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 184 %a = load <4 x float>, <4 x float> addrspace(1) * %in 185 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 186 %result = fdiv <4 x float> %a, %b 187 store <4 x float> %result, <4 x float> addrspace(1)* %out 188 ret void 189} 190 191; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math: 192; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 193; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 194; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 195; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 196; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 197; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 198; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 199; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 200 201; UNSAFE-FP: v_rcp_f32_e32 202; UNSAFE-FP: v_rcp_f32_e32 203; UNSAFE-FP: v_rcp_f32_e32 204; UNSAFE-FP: v_rcp_f32_e32 205; UNSAFE-FP: v_mul_f32_e32 206; UNSAFE-FP: v_mul_f32_e32 207; UNSAFE-FP: v_mul_f32_e32 208; UNSAFE-FP: v_mul_f32_e32 209 210; SI-DAG: v_rcp_f32 211; SI-DAG: v_mul_f32 212; SI-DAG: v_rcp_f32 213; SI-DAG: v_mul_f32 214; SI-DAG: v_rcp_f32 215; SI-DAG: v_mul_f32 216; SI-DAG: v_rcp_f32 217; SI-DAG: v_mul_f32 218define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { 219 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 220 %a = load <4 x float>, <4 x float> addrspace(1) * %in 221 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 222 %result = fdiv fast <4 x float> %a, %b 223 store <4 x float> %result, <4 x float> addrspace(1)* %out 224 ret void 225} 226 227; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math: 228; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 229; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 230; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 231; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 232; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 233; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 234; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 235; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 236 237; UNSAFE-FP: v_rcp_f32_e32 238; UNSAFE-FP: v_rcp_f32_e32 239; UNSAFE-FP: v_rcp_f32_e32 240; UNSAFE-FP: v_rcp_f32_e32 241; UNSAFE-FP: v_mul_f32_e32 242; UNSAFE-FP: v_mul_f32_e32 243; UNSAFE-FP: v_mul_f32_e32 244; UNSAFE-FP: v_mul_f32_e32 245 246; SI-DAG: v_rcp_f32 247; SI-DAG: v_mul_f32 248; SI-DAG: v_rcp_f32 249; SI-DAG: v_mul_f32 250; SI-DAG: v_rcp_f32 251; SI-DAG: v_mul_f32 252; SI-DAG: v_rcp_f32 253; SI-DAG: v_mul_f32 254define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { 255 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 256 %a = load <4 x float>, <4 x float> addrspace(1) * %in 257 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 258 %result = fdiv arcp <4 x float> %a, %b 259 store <4 x float> %result, <4 x float> addrspace(1)* %out 260 ret void 261} 262