1; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ 2; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ 4; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s 5 6declare float @llvm.fma.f32(float %f1, float %f2, float %f3) 7 8define float @f1(float %f1, float %f2, float %acc) { 9; CHECK-LABEL: f1: 10; CHECK-SCALAR: msebr %f4, %f0, %f2 11; CHECK-SCALAR: ler %f0, %f4 12; CHECK-VECTOR: wfmssb %f0, %f0, %f2, %f4 13; CHECK: br %r14 14 %negacc = fsub float -0.0, %acc 15 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) 16 ret float %res 17} 18 19define float @f2(float %f1, float *%ptr, float %acc) { 20; CHECK-LABEL: f2: 21; CHECK: mseb %f2, %f0, 0(%r2) 22; CHECK-SCALAR: ler %f0, %f2 23; CHECK-VECTOR: ldr %f0, %f2 24; CHECK: br %r14 25 %f2 = load float, float *%ptr 26 %negacc = fsub float -0.0, %acc 27 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) 28 ret float %res 29} 30 31define float @f3(float %f1, float *%base, float %acc) { 32; CHECK-LABEL: f3: 33; CHECK: mseb %f2, %f0, 4092(%r2) 34; CHECK-SCALAR: ler %f0, %f2 35; CHECK-VECTOR: ldr %f0, %f2 36; CHECK: br %r14 37 %ptr = getelementptr float, float *%base, i64 1023 38 %f2 = load float, float *%ptr 39 %negacc = fsub float -0.0, %acc 40 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) 41 ret float %res 42} 43 44define float @f4(float %f1, float *%base, float %acc) { 45; The important thing here is that we don't generate an out-of-range 46; displacement. Other sequences besides this one would be OK. 47; 48; CHECK-LABEL: f4: 49; CHECK: aghi %r2, 4096 50; CHECK: mseb %f2, %f0, 0(%r2) 51; CHECK-SCALAR: ler %f0, %f2 52; CHECK-VECTOR: ldr %f0, %f2 53; CHECK: br %r14 54 %ptr = getelementptr float, float *%base, i64 1024 55 %f2 = load float, float *%ptr 56 %negacc = fsub float -0.0, %acc 57 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) 58 ret float %res 59} 60 61define float @f5(float %f1, float *%base, float %acc) { 62; Here too the important thing is that we don't generate an out-of-range 63; displacement. Other sequences besides this one would be OK. 64; 65; CHECK-LABEL: f5: 66; CHECK: aghi %r2, -4 67; CHECK: mseb %f2, %f0, 0(%r2) 68; CHECK-SCALAR: ler %f0, %f2 69; CHECK-VECTOR: ldr %f0, %f2 70; CHECK: br %r14 71 %ptr = getelementptr float, float *%base, i64 -1 72 %f2 = load float, float *%ptr 73 %negacc = fsub float -0.0, %acc 74 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) 75 ret float %res 76} 77 78define float @f6(float %f1, float *%base, i64 %index, float %acc) { 79; CHECK-LABEL: f6: 80; CHECK: sllg %r1, %r3, 2 81; CHECK: mseb %f2, %f0, 0(%r1,%r2) 82; CHECK-SCALAR: ler %f0, %f2 83; CHECK-VECTOR: ldr %f0, %f2 84; CHECK: br %r14 85 %ptr = getelementptr float, float *%base, i64 %index 86 %f2 = load float, float *%ptr 87 %negacc = fsub float -0.0, %acc 88 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) 89 ret float %res 90} 91 92define float @f7(float %f1, float *%base, i64 %index, float %acc) { 93; CHECK-LABEL: f7: 94; CHECK: sllg %r1, %r3, 2 95; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) 96; CHECK-SCALAR: ler %f0, %f2 97; CHECK-VECTOR: ldr %f0, %f2 98; CHECK: br %r14 99 %index2 = add i64 %index, 1023 100 %ptr = getelementptr float, float *%base, i64 %index2 101 %f2 = load float, float *%ptr 102 %negacc = fsub float -0.0, %acc 103 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) 104 ret float %res 105} 106 107define float @f8(float %f1, float *%base, i64 %index, float %acc) { 108; CHECK-LABEL: f8: 109; CHECK: sllg %r1, %r3, 2 110; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) 111; CHECK: mseb %f2, %f0, 0(%r1) 112; CHECK-SCALAR: ler %f0, %f2 113; CHECK-VECTOR: ldr %f0, %f2 114; CHECK: br %r14 115 %index2 = add i64 %index, 1024 116 %ptr = getelementptr float, float *%base, i64 %index2 117 %f2 = load float, float *%ptr 118 %negacc = fsub float -0.0, %acc 119 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) 120 ret float %res 121} 122