• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2
3declare i32 @llvm.r600.read.tidig.x() #0
4declare float @llvm.fabs.f32(float) #0
5
6; FUNC-LABEL: {{^}}mad_sub_f32:
7; SI: buffer_load_dword [[REGA:v[0-9]+]]
8; SI: buffer_load_dword [[REGB:v[0-9]+]]
9; SI: buffer_load_dword [[REGC:v[0-9]+]]
10; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
11; SI: buffer_store_dword [[RESULT]]
12define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
13  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
14  %tid.ext = sext i32 %tid to i64
15  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
16  %add1 = add i64 %tid.ext, 1
17  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
18  %add2 = add i64 %tid.ext, 2
19  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
20  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
21  %a = load float, float addrspace(1)* %gep0, align 4
22  %b = load float, float addrspace(1)* %gep1, align 4
23  %c = load float, float addrspace(1)* %gep2, align 4
24  %mul = fmul float %a, %b
25  %sub = fsub float %mul, %c
26  store float %sub, float addrspace(1)* %outgep, align 4
27  ret void
28}
29
30; FUNC-LABEL: {{^}}mad_sub_inv_f32:
31; SI: buffer_load_dword [[REGA:v[0-9]+]]
32; SI: buffer_load_dword [[REGB:v[0-9]+]]
33; SI: buffer_load_dword [[REGC:v[0-9]+]]
34; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
35; SI: buffer_store_dword [[RESULT]]
36define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
37  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
38  %tid.ext = sext i32 %tid to i64
39  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
40  %add1 = add i64 %tid.ext, 1
41  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
42  %add2 = add i64 %tid.ext, 2
43  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
44  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
45  %a = load float, float addrspace(1)* %gep0, align 4
46  %b = load float, float addrspace(1)* %gep1, align 4
47  %c = load float, float addrspace(1)* %gep2, align 4
48  %mul = fmul float %a, %b
49  %sub = fsub float %c, %mul
50  store float %sub, float addrspace(1)* %outgep, align 4
51  ret void
52}
53
54; FUNC-LABEL: {{^}}mad_sub_f64:
55; SI: v_mul_f64
56; SI: v_add_f64
57define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
58  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
59  %tid.ext = sext i32 %tid to i64
60  %gep0 = getelementptr double, double addrspace(1)* %ptr, i64 %tid.ext
61  %add1 = add i64 %tid.ext, 1
62  %gep1 = getelementptr double, double addrspace(1)* %ptr, i64 %add1
63  %add2 = add i64 %tid.ext, 2
64  %gep2 = getelementptr double, double addrspace(1)* %ptr, i64 %add2
65  %outgep = getelementptr double, double addrspace(1)* %out, i64 %tid.ext
66  %a = load double, double addrspace(1)* %gep0, align 8
67  %b = load double, double addrspace(1)* %gep1, align 8
68  %c = load double, double addrspace(1)* %gep2, align 8
69  %mul = fmul double %a, %b
70  %sub = fsub double %mul, %c
71  store double %sub, double addrspace(1)* %outgep, align 8
72  ret void
73}
74
75; FUNC-LABEL: {{^}}mad_sub_fabs_f32:
76; SI: buffer_load_dword [[REGA:v[0-9]+]]
77; SI: buffer_load_dword [[REGB:v[0-9]+]]
78; SI: buffer_load_dword [[REGC:v[0-9]+]]
79; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
80; SI: buffer_store_dword [[RESULT]]
81define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
82  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
83  %tid.ext = sext i32 %tid to i64
84  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
85  %add1 = add i64 %tid.ext, 1
86  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
87  %add2 = add i64 %tid.ext, 2
88  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
89  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
90  %a = load float, float addrspace(1)* %gep0, align 4
91  %b = load float, float addrspace(1)* %gep1, align 4
92  %c = load float, float addrspace(1)* %gep2, align 4
93  %c.abs = call float @llvm.fabs.f32(float %c) #0
94  %mul = fmul float %a, %b
95  %sub = fsub float %mul, %c.abs
96  store float %sub, float addrspace(1)* %outgep, align 4
97  ret void
98}
99
100; FUNC-LABEL: {{^}}mad_sub_fabs_inv_f32:
101; SI: buffer_load_dword [[REGA:v[0-9]+]]
102; SI: buffer_load_dword [[REGB:v[0-9]+]]
103; SI: buffer_load_dword [[REGC:v[0-9]+]]
104; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
105; SI: buffer_store_dword [[RESULT]]
106define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
107  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
108  %tid.ext = sext i32 %tid to i64
109  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
110  %add1 = add i64 %tid.ext, 1
111  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
112  %add2 = add i64 %tid.ext, 2
113  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
114  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
115  %a = load float, float addrspace(1)* %gep0, align 4
116  %b = load float, float addrspace(1)* %gep1, align 4
117  %c = load float, float addrspace(1)* %gep2, align 4
118  %c.abs = call float @llvm.fabs.f32(float %c) #0
119  %mul = fmul float %a, %b
120  %sub = fsub float %c.abs, %mul
121  store float %sub, float addrspace(1)* %outgep, align 4
122  ret void
123}
124
125; FUNC-LABEL: {{^}}neg_neg_mad_f32:
126; SI: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
127define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
128  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
129  %tid.ext = sext i32 %tid to i64
130  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
131  %add1 = add i64 %tid.ext, 1
132  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
133  %add2 = add i64 %tid.ext, 2
134  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
135  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
136  %a = load float, float addrspace(1)* %gep0, align 4
137  %b = load float, float addrspace(1)* %gep1, align 4
138  %c = load float, float addrspace(1)* %gep2, align 4
139  %nega = fsub float -0.000000e+00, %a
140  %negb = fsub float -0.000000e+00, %b
141  %mul = fmul float %nega, %negb
142  %sub = fadd float %mul, %c
143  store float %sub, float addrspace(1)* %outgep, align 4
144  ret void
145}
146
147; FUNC-LABEL: {{^}}mad_fabs_sub_f32:
148; SI: buffer_load_dword [[REGA:v[0-9]+]]
149; SI: buffer_load_dword [[REGB:v[0-9]+]]
150; SI: buffer_load_dword [[REGC:v[0-9]+]]
151; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
152; SI: buffer_store_dword [[RESULT]]
153define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
154  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
155  %tid.ext = sext i32 %tid to i64
156  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
157  %add1 = add i64 %tid.ext, 1
158  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
159  %add2 = add i64 %tid.ext, 2
160  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
161  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
162  %a = load float, float addrspace(1)* %gep0, align 4
163  %b = load float, float addrspace(1)* %gep1, align 4
164  %c = load float, float addrspace(1)* %gep2, align 4
165  %b.abs = call float @llvm.fabs.f32(float %b) #0
166  %mul = fmul float %a, %b.abs
167  %sub = fsub float %mul, %c
168  store float %sub, float addrspace(1)* %outgep, align 4
169  ret void
170}
171
172; FUNC-LABEL: {{^}}fsub_c_fadd_a_a:
173; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
174; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
175; SI: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
176; SI: buffer_store_dword [[RESULT]]
177define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) {
178  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
179  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
180  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
181  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
182
183  %r1 = load float, float addrspace(1)* %gep.0
184  %r2 = load float, float addrspace(1)* %gep.1
185
186  %add = fadd float %r1, %r1
187  %r3 = fsub float %r2, %add
188
189  store float %r3, float addrspace(1)* %gep.out
190  ret void
191}
192
193; FUNC-LABEL: {{^}}fsub_fadd_a_a_c:
194; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
195; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
196; SI: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
197; SI: buffer_store_dword [[RESULT]]
198define void @fsub_fadd_a_a_c(float addrspace(1)* %out, float addrspace(1)* %in) {
199  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
200  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
201  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
202  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
203
204  %r1 = load float, float addrspace(1)* %gep.0
205  %r2 = load float, float addrspace(1)* %gep.1
206
207  %add = fadd float %r1, %r1
208  %r3 = fsub float %add, %r2
209
210  store float %r3, float addrspace(1)* %gep.out
211  ret void
212}
213
214attributes #0 = { nounwind readnone }
215attributes #1 = { nounwind }
216