1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s 2; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s 3; Make sure this doesn't crash with no triple 4 5; NOOP-LABEL: @noop_fdiv_fpmath( 6; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0 7define amdgpu_kernel void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 { 8 %md.25ulp = fdiv float %a, %b, !fpmath !0 9 store volatile float %md.25ulp, float addrspace(1)* %out 10 ret void 11} 12 13; CHECK-LABEL: @fdiv_fpmath( 14; CHECK: %no.md = fdiv float %a, %b{{$}} 15; CHECK: %md.half.ulp = fdiv float %a, %b 16; CHECK: %md.1ulp = fdiv float %a, %b 17; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b) 18; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b) 19; CHECK: %[[FAST_RCP:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %b) 20; CHECK: %fast.md.25ulp = fmul fast float %a, %[[FAST_RCP]] 21; CHECK: %[[AFN_RCP:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %b) 22; CHECK: afn.md.25ulp = fmul afn float %a, %[[AFN_RCP]] 23define amdgpu_kernel void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 { 24 %no.md = fdiv float %a, %b 25 store volatile float %no.md, float addrspace(1)* %out 26 27 %md.half.ulp = fdiv float %a, %b, !fpmath !1 28 store volatile float %md.half.ulp, float addrspace(1)* %out 29 30 %md.1ulp = fdiv float %a, %b, !fpmath !2 31 store volatile float %md.1ulp, float addrspace(1)* %out 32 33 %md.25ulp = fdiv float %a, %b, !fpmath !0 34 store volatile float %md.25ulp, float addrspace(1)* %out 35 36 %md.3ulp = fdiv float %a, %b, !fpmath !3 37 store volatile float %md.3ulp, float addrspace(1)* %out 38 39 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 40 store volatile float %fast.md.25ulp, float addrspace(1)* %out 41 42 %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0 43 store volatile float %afn.md.25ulp, float addrspace(1)* %out 44 45 ret void 46} 47 48; CHECK-LABEL: @rcp_fdiv_fpmath( 49; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}} 50; CHECK: %md.25ulp = call float @llvm.amdgcn.rcp.f32(float %x) 51; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x 52; CHECK: %afn.no.md = call afn float @llvm.amdgcn.rcp.f32(float %x) 53; CHECK: %afn.25ulp = call afn float @llvm.amdgcn.rcp.f32(float %x) 54; CHECK: %fast.no.md = call fast float @llvm.amdgcn.rcp.f32(float %x) 55; CHECK: %fast.25ulp = call fast float @llvm.amdgcn.rcp.f32(float %x) 56define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 { 57 %no.md = fdiv float 1.0, %x 58 store volatile float %no.md, float addrspace(1)* %out 59 60 %md.25ulp = fdiv float 1.0, %x, !fpmath !0 61 store volatile float %md.25ulp, float addrspace(1)* %out 62 63 %md.half.ulp = fdiv float 1.0, %x, !fpmath !1 64 store volatile float %md.half.ulp, float addrspace(1)* %out 65 66 %afn.no.md = fdiv afn float 1.0, %x 67 store volatile float %afn.no.md, float addrspace(1)* %out 68 69 %afn.25ulp = fdiv afn float 1.0, %x, !fpmath !0 70 store volatile float %afn.25ulp, float addrspace(1)* %out 71 72 %fast.no.md = fdiv fast float 1.0, %x 73 store volatile float %fast.no.md, float addrspace(1)* %out 74 75 %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0 76 store volatile float %fast.25ulp, float addrspace(1)* %out 77 78 ret void 79} 80 81; CHECK-LABEL: @fdiv_fpmath_vector( 82; CHECK: %[[NO_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0 83; CHECK: %[[NO_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0 84; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float %[[NO_A0]], %[[NO_B0]] 85; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0 86; CHECK: %[[NO_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1 87; CHECK: %[[NO_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1 88; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float %[[NO_A1]], %[[NO_B1]] 89; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1 90; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out 91 92; CHECK: %[[HALF_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0 93; CHECK: %[[HALF_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0 94; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float %[[HALF_A0]], %[[HALF_B0]] 95; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0 96; CHECK: %[[HALF_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1 97; CHECK: %[[HALF_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1 98; CHECK: %[[HALF_FDIV1:[0-9]+]] = fdiv float %[[HALF_A1]], %[[HALF_B1]] 99; CHECK: %md.half.ulp = insertelement <2 x float> %[[HALF_INS0]], float %[[HALF_FDIV1]], i64 1 100; CHECK: store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out 101 102; CHECK: %[[ONE_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0 103; CHECK: %[[ONE_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0 104; CHECK: %[[ONE_FDIV0:[0-9]+]] = fdiv float %[[ONE_A0]], %[[ONE_B0]] 105; CHECK: %[[ONE_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[ONE_FDIV0]], i64 0 106; CHECK: %[[ONE_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1 107; CHECK: %[[ONE_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1 108; CHECK: %[[ONE_FDIV1:[0-9]+]] = fdiv float %[[ONE_A1]], %[[ONE_B1]] 109; CHECK: %md.1ulp = insertelement <2 x float> %[[ONE_INS0]], float %[[ONE_FDIV1]], i64 1 110; CHECK: store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out 111 112; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0 113; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0 114; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]) 115; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0 116; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1 117; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1 118; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]) 119; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1 120define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 { 121 %no.md = fdiv <2 x float> %a, %b 122 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out 123 124 %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 125 store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out 126 127 %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 128 store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out 129 130 %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0 131 store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out 132 133 ret void 134} 135 136; CHECK-LABEL: @rcp_fdiv_fpmath_vector( 137; CHECK: %[[NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0 138; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]] 139; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0 140; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1 141; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 1.000000e+00, %[[NO1]] 142; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1 143; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out 144 145; CHECK: %[[HALF0:[0-9]+]] = extractelement <2 x float> %x, i64 0 146; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[HALF0]] 147; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0 148; CHECK: %[[HALF1:[0-9]+]] = extractelement <2 x float> %x, i64 1 149; CHECK: %[[HALF_FDIV1:[0-9]+]] = fdiv float 1.000000e+00, %[[HALF1]] 150; CHECK: %md.half.ulp = insertelement <2 x float> %[[HALF_INS0]], float %[[HALF_FDIV1]], i64 1 151; CHECK: store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out 152 153; CHECK: %[[AFN_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0 154; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]]) 155; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0 156; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1 157; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]]) 158; CHECK: %afn.no.md = insertelement <2 x float> %[[AFN_NO_INS0]], float %[[AFN_NO_FDIV1]], i64 1 159; CHECK: store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out 160 161; CHECK: %[[FAST_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0 162; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]]) 163; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0 164; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1 165; CHECK: %[[FAST_NO_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]]) 166; CHECK: %fast.no.md = insertelement <2 x float> %[[FAST_NO_INS0]], float %[[FAST_NO_RCP1]], i64 1 167; CHECK: store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out 168 169; CHECK: %[[AFN_250:[0-9]+]] = extractelement <2 x float> %x, i64 0 170; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]]) 171; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0 172; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1 173; CHECK: %[[AFN_25_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]]) 174; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_25_INS0]], float %[[AFN_25_RCP1]], i64 1 175; CHECK: store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out 176 177; CHECK: %[[FAST_250:[0-9]+]] = extractelement <2 x float> %x, i64 0 178; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]]) 179; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0 180; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1 181; CHECK: %[[FAST_25_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]]) 182; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_25_INS0]], float %[[FAST_25_RCP1]], i64 1 183; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out 184define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { 185 %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x 186 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out 187 188 %md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1 189 store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out 190 191 %afn.no.md = fdiv afn <2 x float> <float 1.0, float 1.0>, %x 192 store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out 193 194 %fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x 195 store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out 196 197 %afn.25ulp = fdiv afn <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0 198 store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out 199 200 %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0 201 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out 202 203 ret void 204} 205 206; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat( 207; CHECK: %[[NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0 208; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]] 209; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0 210; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1 211; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 2.000000e+00, %[[NO1]] 212; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1 213; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out 214 215; CHECK: %[[AFN_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0 216; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]]) 217; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0 218; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1 219; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]]) 220; CHECK: %[[AFN_NO_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_NO_FDIV1]] 221; CHECK: %afn.no.md = insertelement <2 x float> %[[AFN_NO_INS0]], float %[[AFN_NO_MUL1]], i64 1 222; CHECK: store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out 223 224; CHECK: %[[FAST_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0 225; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]]) 226; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0 227; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1 228; CHECK: %[[FAST_NO_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]]) 229; CHECK: %[[FAST_NO_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_NO_RCP1]] 230; CHECK: %fast.no.md = insertelement <2 x float> %[[FAST_NO_INS0]], float %[[FAST_NO_MUL1]], i64 1 231; CHECK: store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out 232 233; CHECK: %[[AFN_250:[0-9]+]] = extractelement <2 x float> %x, i64 0 234; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]]) 235; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0 236; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1 237; CHECK: %[[AFN_25_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]]) 238; CHECK: %[[AFN_25_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_25_RCP1]] 239; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_25_INS0]], float %[[AFN_25_MUL1]], i64 1 240; CHECK: store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out 241 242; CHECK: %[[FAST_250:[0-9]+]] = extractelement <2 x float> %x, i64 0 243; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]]) 244; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0 245; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1 246; CHECK: %[[FAST_25_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]]) 247; CHECK: %[[FAST_25_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_25_RCP1]] 248; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_25_INS0]], float %[[FAST_25_MUL1]], i64 1 249; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out 250define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { 251 %no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x 252 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out 253 254 %afn.no.md = fdiv afn <2 x float> <float 1.0, float 2.0>, %x 255 store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out 256 257 %fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x 258 store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out 259 260 %afn.25ulp = fdiv afn <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0 261 store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out 262 263 %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0 264 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out 265 266 ret void 267} 268 269; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant( 270; CHECK: %[[AFN_A0:[0-9]+]] = extractelement <2 x float> %x.insert, i64 0 271; CHECK: %[[AFN_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0 272; CHECK: %[[AFN_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B0]]) 273; CHECK: %[[AFN_MUL0:[0-9]+]] = fmul afn float %[[AFN_A0]], %[[AFN_RCP0]] 274; CHECK: %[[AFN_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_MUL0]], i64 0 275; CHECK: %[[AFN_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1 276; CHECK: %[[AFN_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1 277; CHECK: %[[AFN_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B1]]) 278; CHECK: %[[AFN_MUL1:[0-9]+]] = fmul afn float %[[AFN_A1]], %[[AFN_RCP1]] 279; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_INS0]], float %[[AFN_MUL1]], i64 1 280; CHECK: store volatile <2 x float> %afn.25ulp 281 282; CHECK: %[[FAST_A0:[0-9]+]] = extractelement <2 x float> %x.insert, i64 0 283; CHECK: %[[FAST_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0 284; CHECK: %[[FAST_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B0]]) 285; CHECK: %[[FAST_MUL0:[0-9]+]] = fmul fast float %[[FAST_A0]], %[[FAST_RCP0]] 286; CHECK: %[[FAST_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_MUL0]], i64 0 287; CHECK: %[[FAST_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1 288; CHECK: %[[FAST_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1 289; CHECK: %[[FAST_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B1]]) 290; CHECK: %[[FAST_MUL1:[0-9]+]] = fmul fast float %[[FAST_A1]], %[[FAST_RCP1]] 291; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_INS0]], float %[[FAST_MUL1]], i64 1 292; CHECK: store volatile <2 x float> %fast.25ulp 293define amdgpu_kernel void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 { 294 %x.insert = insertelement <2 x float> %x, float 1.0, i32 0 295 296 %afn.25ulp = fdiv afn <2 x float> %x.insert, %y, !fpmath !0 297 store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out 298 299 %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0 300 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out 301 302 ret void 303} 304 305; CHECK-LABEL: @fdiv_fpmath_f32_denormals( 306; CHECK: %no.md = fdiv float %a, %b{{$}} 307; CHECK: %md.half.ulp = fdiv float %a, %b 308; CHECK: %md.1ulp = fdiv float %a, %b 309; CHECK: %md.25ulp = fdiv float %a, %b 310; CHECK: %md.3ulp = fdiv float %a, %b 311; CHECK: %[[RCP_FAST:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %b) 312; CHECK: %fast.md.25ulp = fmul fast float %a, %[[RCP_FAST]] 313; CHECK: %[[RCP_AFN:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %b) 314; CHECK: %afn.md.25ulp = fmul afn float %a, %[[RCP_AFN]] 315define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 { 316 %no.md = fdiv float %a, %b 317 store volatile float %no.md, float addrspace(1)* %out 318 319 %md.half.ulp = fdiv float %a, %b, !fpmath !1 320 store volatile float %md.half.ulp, float addrspace(1)* %out 321 322 %md.1ulp = fdiv float %a, %b, !fpmath !2 323 store volatile float %md.1ulp, float addrspace(1)* %out 324 325 %md.25ulp = fdiv float %a, %b, !fpmath !0 326 store volatile float %md.25ulp, float addrspace(1)* %out 327 328 %md.3ulp = fdiv float %a, %b, !fpmath !3 329 store volatile float %md.3ulp, float addrspace(1)* %out 330 331 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 332 store volatile float %fast.md.25ulp, float addrspace(1)* %out 333 334 %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0 335 store volatile float %afn.md.25ulp, float addrspace(1)* %out 336 337 ret void 338} 339 340attributes #0 = { nounwind optnone noinline } 341attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 342attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" } 343 344!0 = !{float 2.500000e+00} 345!1 = !{float 5.000000e-01} 346!2 = !{float 1.000000e+00} 347!3 = !{float 3.000000e+00} 348