• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
3
4; FUNC-LABEL: {{^}}fneg_f64:
5; GCN: v_xor_b32
6define amdgpu_kernel void @fneg_f64(double addrspace(1)* %out, double %in) {
7  %fneg = fsub double -0.000000e+00, %in
8  store double %fneg, double addrspace(1)* %out
9  ret void
10}
11
12; FUNC-LABEL: {{^}}fneg_v2f64:
13; GCN: v_xor_b32
14; GCN: v_xor_b32
15define amdgpu_kernel void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
16  %fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
17  store <2 x double> %fneg, <2 x double> addrspace(1)* %out
18  ret void
19}
20
21; FUNC-LABEL: {{^}}fneg_v4f64:
22; R600: -PV
23; R600: -T
24; R600: -PV
25; R600: -PV
26
27; GCN: v_xor_b32
28; GCN: v_xor_b32
29; GCN: v_xor_b32
30; GCN: v_xor_b32
31define amdgpu_kernel void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
32  %fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
33  store <4 x double> %fneg, <4 x double> addrspace(1)* %out
34  ret void
35}
36
37; DAGCombiner will transform:
38; (fneg (f64 bitcast (i64 a))) => (f64 bitcast (xor (i64 a), 0x80000000))
39; unless the target returns true for isNegFree()
40
41; FUNC-LABEL: {{^}}fneg_free_f64:
42; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, -{{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
43define amdgpu_kernel void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
44  %bc = bitcast i64 %in to double
45  %fsub = fsub double 0.0, %bc
46  store double %fsub, double addrspace(1)* %out
47  ret void
48}
49
50; GCN-LABEL: {{^}}fneg_fold_f64:
51; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
52; VI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
53; GCN-NOT: xor
54; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
55define amdgpu_kernel void @fneg_fold_f64(double addrspace(1)* %out, [8 x i32], double %in) {
56  %fsub = fsub double -0.0, %in
57  %fmul = fmul double %fsub, %in
58  store double %fmul, double addrspace(1)* %out
59  ret void
60}
61