1; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST 2; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT 3 4target triple = "nvptx64-unknown-cuda" 5 6;; Make sure we are generating proper instruction sequences for fused ops 7;; If fusion is allowed, we try to form fma.rn at the PTX level, and emit 8;; add.f32 otherwise. Without an explicit rounding mode on add.f32, ptxas 9;; is free to fuse with a multiply if it is able. If fusion is not allowed, 10;; we do not form fma.rn at the PTX level and explicitly generate add.rn 11;; for all adds to prevent ptxas from fusion the ops. 12 13;; FAST-LABEL: @t0 14;; DEFAULT-LABEL: @t0 15define float @t0(float %a, float %b, float %c) { 16;; FAST: fma.rn.f32 17;; DEFAULT: mul.rn.f32 18;; DEFAULT: add.rn.f32 19 %v0 = fmul float %a, %b 20 %v1 = fadd float %v0, %c 21 ret float %v1 22} 23 24;; FAST-LABEL: @t1 25;; DEFAULT-LABEL: @t1 26define float @t1(float %a, float %b) { 27;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32 28;; to prevent ptxas from fusing this with anything else. 29;; FAST: add.f32 30;; DEFAULT: add.rn.f32 31 %v1 = fadd float %a, %b 32 ret float %v1 33} 34