Lines Matching +full:1 +full:- +full:9
1 …RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -chec…
2 … RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -chec…
9 ; (fadd (fmul x, y), z) -> (fma x, y, z)
10 ; FUNC-LABEL: {{^}}combine_to_fma_f64_0:
11 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
12 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
13 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
14 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
16 …fine void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
18 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
19 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
20 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
21 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
23 %a = load volatile double, double addrspace(1)* %gep.0
24 %b = load volatile double, double addrspace(1)* %gep.1
25 %c = load volatile double, double addrspace(1)* %gep.2
29 store double %fma, double addrspace(1)* %gep.out
33 ; (fadd (fmul x, y), z) -> (fma x, y, z)
34 ; FUNC-LABEL: {{^}}combine_to_fma_f64_0_2use:
35 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
36 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
37 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
38 ; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
39 ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
40 ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[D]]
41 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr6…
42 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr6…
44 …void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
46 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
47 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
48 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
49 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
50 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
51 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
53 %a = load volatile double, double addrspace(1)* %gep.0
54 %b = load volatile double, double addrspace(1)* %gep.1
55 %c = load volatile double, double addrspace(1)* %gep.2
56 %d = load volatile double, double addrspace(1)* %gep.3
61 store volatile double %fma0, double addrspace(1)* %gep.out.0
62 store volatile double %fma1, double addrspace(1)* %gep.out.1
66 ; (fadd x, (fmul y, z)) -> (fma y, z, x)
67 ; FUNC-LABEL: {{^}}combine_to_fma_f64_1:
68 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
69 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
70 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
71 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
73 …fine void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
75 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
76 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
77 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
78 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
80 %a = load volatile double, double addrspace(1)* %gep.0
81 %b = load volatile double, double addrspace(1)* %gep.1
82 %c = load volatile double, double addrspace(1)* %gep.2
86 store double %fma, double addrspace(1)* %gep.out
90 ; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
91 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_0_f64:
92 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
93 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
94 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
95 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
97 …void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
99 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
100 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
101 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
102 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
104 %a = load volatile double, double addrspace(1)* %gep.0
105 %b = load volatile double, double addrspace(1)* %gep.1
106 %c = load volatile double, double addrspace(1)* %gep.2
110 store double %fma, double addrspace(1)* %gep.out
114 ; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
115 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_f64_0_2use:
116 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
117 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
118 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
119 ; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
120 ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
121 ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]]
122 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr6…
123 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr6…
125 …@combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
127 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
128 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
129 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
130 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
131 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
132 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
134 %a = load volatile double, double addrspace(1)* %gep.0
135 %b = load volatile double, double addrspace(1)* %gep.1
136 %c = load volatile double, double addrspace(1)* %gep.2
137 %d = load volatile double, double addrspace(1)* %gep.3
142 store volatile double %fma0, double addrspace(1)* %gep.out.0
143 store volatile double %fma1, double addrspace(1)* %gep.out.1
147 ; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
148 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64:
149 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
150 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
151 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
152 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
154 …void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
156 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
157 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
158 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
159 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
161 %a = load volatile double, double addrspace(1)* %gep.0
162 %b = load volatile double, double addrspace(1)* %gep.1
163 %c = load volatile double, double addrspace(1)* %gep.2
167 store double %fma, double addrspace(1)* %gep.out
171 ; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
172 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64_2use:
173 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
174 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
175 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
176 ; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
177 ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
178 ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[D]]
179 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr6…
180 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr6…
182 …@combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
184 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
185 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
186 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
187 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
188 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
189 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
191 %a = load volatile double, double addrspace(1)* %gep.0
192 %b = load volatile double, double addrspace(1)* %gep.1
193 %c = load volatile double, double addrspace(1)* %gep.2
194 %d = load volatile double, double addrspace(1)* %gep.3
199 store volatile double %fma0, double addrspace(1)* %gep.out.0
200 store volatile double %fma1, double addrspace(1)* %gep.out.1
204 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
205 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64:
206 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
207 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
208 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
209 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
211 …void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
213 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
214 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
215 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
216 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
218 %a = load volatile double, double addrspace(1)* %gep.0
219 %b = load volatile double, double addrspace(1)* %gep.1
220 %c = load volatile double, double addrspace(1)* %gep.2
223 %mul.neg = fsub double -0.0, %mul
226 store double %fma, double addrspace(1)* %gep.out
230 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
231 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_neg:
232 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
233 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
234 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
235 ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
236 ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[D]]
237 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr6…
238 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr6…
240 …ine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
242 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
243 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
244 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
245 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
246 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
247 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
249 %a = load volatile double, double addrspace(1)* %gep.0
250 %b = load volatile double, double addrspace(1)* %gep.1
251 %c = load volatile double, double addrspace(1)* %gep.2
252 %d = load volatile double, double addrspace(1)* %gep.3
255 %mul.neg = fsub double -0.0, %mul
259 store volatile double %fma0, double addrspace(1)* %gep.out.0
260 store volatile double %fma1, double addrspace(1)* %gep.out.1
264 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
265 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_mul:
266 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
267 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
268 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
269 ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
270 ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]]
271 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr6…
272 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr6…
274 …ine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
276 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
277 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
278 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
279 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
280 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
281 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
283 %a = load volatile double, double addrspace(1)* %gep.0
284 %b = load volatile double, double addrspace(1)* %gep.1
285 %c = load volatile double, double addrspace(1)* %gep.2
286 %d = load volatile double, double addrspace(1)* %gep.3
289 %mul.neg = fsub double -0.0, %mul
293 store volatile double %fma0, double addrspace(1)* %gep.out.0
294 store volatile double %fma1, double addrspace(1)* %gep.out.1
298 ; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
300 ; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_0_f64:
301 ; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
302 ; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
303 ; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
304 ; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
305 ; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
306 ; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]]
307 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
309 …ssive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
311 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
312 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
313 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
314 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
315 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
316 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
318 %x = load volatile double, double addrspace(1)* %gep.0
319 %y = load volatile double, double addrspace(1)* %gep.1
320 %z = load volatile double, double addrspace(1)* %gep.2
321 %u = load volatile double, double addrspace(1)* %gep.3
322 %v = load volatile double, double addrspace(1)* %gep.4
328 store double %tmp2, double addrspace(1)* %gep.out
333 ; -> (fma (fneg y), z, (fma (fneg u), v, x))
335 ; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_1_f64:
336 ; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
337 ; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
338 ; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
339 ; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
340 ; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\…
341 ; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]]
342 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
344 …ssive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %i…
346 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
347 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
348 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
349 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
350 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
351 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
353 %x = load volatile double, double addrspace(1)* %gep.0
354 %y = load volatile double, double addrspace(1)* %gep.1
355 %z = load volatile double, double addrspace(1)* %gep.2
356 %u = load volatile double, double addrspace(1)* %gep.3
357 %v = load volatile double, double addrspace(1)* %gep.4
363 store double %tmp2, double addrspace(1)* %gep.out
371 ; FUNC-LABEL: {{^}}test_f32_mul_add_x_one_y:
372 ; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]]
373 define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out,
374 float addrspace(1)* %in1,
375 float addrspace(1)* %in2) {
376 %x = load volatile float, float addrspace(1)* %in1
377 %y = load volatile float, float addrspace(1)* %in2
380 store float %m, float addrspace(1)* %out
384 ; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_one:
385 ; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]]
386 define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out,
387 float addrspace(1)* %in1,
388 float addrspace(1)* %in2) {
389 %x = load volatile float, float addrspace(1)* %in1
390 %y = load volatile float, float addrspace(1)* %in2
393 store float %m, float addrspace(1)* %out
397 ; FUNC-LABEL: {{^}}test_f32_mul_add_x_negone_y:
398 ; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
399 define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out,
400 float addrspace(1)* %in1,
401 float addrspace(1)* %in2) {
402 %x = load float, float addrspace(1)* %in1
403 %y = load float, float addrspace(1)* %in2
404 %a = fadd float %x, -1.0
406 store float %m, float addrspace(1)* %out
410 ; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_negone:
411 ; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
412 define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out,
413 float addrspace(1)* %in1,
414 float addrspace(1)* %in2) {
415 %x = load float, float addrspace(1)* %in1
416 %y = load float, float addrspace(1)* %in2
417 %a = fadd float %x, -1.0
419 store float %m, float addrspace(1)* %out
423 ; FUNC-LABEL: {{^}}test_f32_mul_sub_one_x_y:
424 ; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]]
425 define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out,
426 float addrspace(1)* %in1,
427 float addrspace(1)* %in2) {
428 %x = load float, float addrspace(1)* %in1
429 %y = load float, float addrspace(1)* %in2
432 store float %m, float addrspace(1)* %out
436 ; FUNC-LABEL: {{^}}test_f32_mul_y_sub_one_x:
437 ; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]]
438 define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out,
439 float addrspace(1)* %in1,
440 float addrspace(1)* %in2) {
441 %x = load float, float addrspace(1)* %in1
442 %y = load float, float addrspace(1)* %in2
445 store float %m, float addrspace(1)* %out
449 ; FUNC-LABEL: {{^}}test_f32_mul_sub_negone_x_y:
450 ; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]]
451 define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out,
452 float addrspace(1)* %in1,
453 float addrspace(1)* %in2) {
454 %x = load float, float addrspace(1)* %in1
455 %y = load float, float addrspace(1)* %in2
456 %s = fsub float -1.0, %x
458 store float %m, float addrspace(1)* %out
462 ; FUNC-LABEL: {{^}}test_f32_mul_y_sub_negone_x:
463 ; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]]
464 define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out,
465 float addrspace(1)* %in1,
466 float addrspace(1)* %in2) {
467 %x = load float, float addrspace(1)* %in1
468 %y = load float, float addrspace(1)* %in2
469 %s = fsub float -1.0, %x
471 store float %m, float addrspace(1)* %out
475 ; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y:
476 ; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
477 define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out,
478 float addrspace(1)* %in1,
479 float addrspace(1)* %in2) {
480 %x = load float, float addrspace(1)* %in1
481 %y = load float, float addrspace(1)* %in2
484 store float %m, float addrspace(1)* %out
488 ; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one:
489 ; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
490 define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out,
491 float addrspace(1)* %in1,
492 float addrspace(1)* %in2) {
493 %x = load float, float addrspace(1)* %in1
494 %y = load float, float addrspace(1)* %in2
497 store float %m, float addrspace(1)* %out
501 ; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y:
502 ; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]]
503 define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out,
504 float addrspace(1)* %in1,
505 float addrspace(1)* %in2) {
506 %x = load float, float addrspace(1)* %in1
507 %y = load float, float addrspace(1)* %in2
508 %s = fsub float %x, -1.0
510 store float %m, float addrspace(1)* %out
514 ; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone:
515 ; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]]
516 define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out,
517 float addrspace(1)* %in1,
518 float addrspace(1)* %in2) {
519 %x = load float, float addrspace(1)* %in1
520 %y = load float, float addrspace(1)* %in2
521 %s = fsub float %x, -1.0
523 store float %m, float addrspace(1)* %out
531 ; FUNC-LABEL: {{^}}test_f32_interp:
532 ; SI: v_mad_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]]
533 ; SI: v_mac_f32_e32 [[VR]], [[VT]], [[VX:v[0-9]]]
534 define void @test_f32_interp(float addrspace(1)* %out,
535 float addrspace(1)* %in1,
536 float addrspace(1)* %in2,
537 float addrspace(1)* %in3) {
538 %x = load float, float addrspace(1)* %in1
539 %y = load float, float addrspace(1)* %in2
540 %t = load float, float addrspace(1)* %in3
545 store float %r, float addrspace(1)* %out
549 ; FUNC-LABEL: {{^}}test_f64_interp:
550 ; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], […
551 ; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]]
552 define void @test_f64_interp(double addrspace(1)* %out,
553 double addrspace(1)* %in1,
554 double addrspace(1)* %in2,
555 double addrspace(1)* %in3) {
556 %x = load double, double addrspace(1)* %in1
557 %y = load double, double addrspace(1)* %in2
558 %t = load double, double addrspace(1)* %in3
563 store double %r, double addrspace(1)* %out
568 attributes #1 = { nounwind }