• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 #include "../sfn_optimizer.h"
3 #include "../sfn_ra.h"
4 #include "../sfn_scheduler.h"
5 #include "../sfn_shader.h"
6 #include "../sfn_split_address_loads.h"
7 #include "sfn_test_shaders.h"
8 
9 using namespace r600;
10 using std::ostringstream;
11 
TEST_F(TestShaderFromNir,SimpleDCE)12 TEST_F(TestShaderFromNir, SimpleDCE)
13 {
14    auto sh = from_string(red_triangle_fs_expect_from_nir);
15    dead_code_elimination(*sh);
16 
17    check(sh, red_triangle_fs_expect_from_nir_dce);
18 }
19 
TEST_F(TestShaderFromNir,CopyPropagationForwardBackward)20 TEST_F(TestShaderFromNir, CopyPropagationForwardBackward)
21 {
22    auto sh = from_string(add_add_1_expect_from_nir);
23    copy_propagation_fwd(*sh);
24    check(sh, add_add_1_expect_from_nir_copy_prop_fwd);
25 }
26 
TEST_F(TestShaderFromNir,CopyPropagationForwardDCE)27 TEST_F(TestShaderFromNir, CopyPropagationForwardDCE)
28 {
29    auto sh = from_string(add_add_1_expect_from_nir);
30    copy_propagation_fwd(*sh);
31    dead_code_elimination(*sh);
32    check(sh, add_add_1_expect_from_nir_copy_prop_fwd_dce);
33 }
34 
TEST_F(TestShaderFromNir,CopyPropagationBackwardDCE)35 TEST_F(TestShaderFromNir, CopyPropagationBackwardDCE)
36 {
37    auto sh = from_string(add_add_1_expect_from_nir_copy_prop_fwd_dce);
38    copy_propagation_backward(*sh);
39    dead_code_elimination(*sh);
40    check(sh, add_add_1_expect_from_nir_copy_prop_fwd_dce_bwd);
41 }
42 
TEST_F(TestShaderFromNir,FullOPtimize)43 TEST_F(TestShaderFromNir, FullOPtimize)
44 {
45    auto sh = from_string(basic_interpolation_orig);
46 
47    bool progress;
48 
49    do {
50       progress = false;
51       progress |= copy_propagation_fwd(*sh);
52       progress |= dead_code_elimination(*sh);
53       progress |= copy_propagation_backward(*sh);
54       progress |= dead_code_elimination(*sh);
55    } while (progress);
56 
57    check(sh, basic_interpolation_expect_from_nir_opt);
58 }
59 
TEST_F(TestShaderFromNir,CombinePinFlags)60 TEST_F(TestShaderFromNir, CombinePinFlags)
61 {
62    auto sh = from_string(shader_group_chan_pin_to_combine);
63 
64    bool progress;
65 
66    do {
67       progress = false;
68       progress |= copy_propagation_fwd(*sh);
69       progress |= dead_code_elimination(*sh);
70       progress |= copy_propagation_backward(*sh);
71       progress |= dead_code_elimination(*sh);
72    } while (progress);
73 
74    check(sh, shader_group_chan_pin_combined);
75 }
76 
TEST_F(TestShaderFromNir,FullOPtimizeLoop)77 TEST_F(TestShaderFromNir, FullOPtimizeLoop)
78 {
79    auto sh = from_string(vs_nexted_loop_from_nir_expect);
80 
81    optimize(*sh);
82 
83    check(sh, vs_nexted_loop_from_nir_expect_opt);
84 }
85 
TEST_F(TestShaderFromNir,CombineRegisterToTexSrc)86 TEST_F(TestShaderFromNir, CombineRegisterToTexSrc)
87 {
88 const char *shader_input =
89    R"(FS
90 CHIPCLASS EVERGREEN
91 REGISTERS R0.x R1.x R2.x R3.x
92 PROP MAX_COLOR_EXPORTS:1
93 PROP COLOR_EXPORTS:1
94 PROP COLOR_EXPORT_MASK:15
95 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
96 SHADER
97 ALU ADD R2.x : R0.x R2.x {W}
98 ALU MUL R3.x : R0.x R3.x {WL}
99 ALU MOV S1.x@group : R2.x {W}
100 ALU MOV S1.y@group : R3.x {WL}
101 TEX SAMPLE S2.xyzw : S1.xy__ RID:18 SID:0 NNNN
102 EXPORT_DONE PIXEL 0 S2.xyzw
103 )";
104 
105 const char *shader_expect =
106    R"(FS
107 CHIPCLASS EVERGREEN
108 REGISTERS R1024.x@group R1024.y@group R0.x
109 PROP MAX_COLOR_EXPORTS:1
110 PROP COLOR_EXPORTS:1
111 PROP COLOR_EXPORT_MASK:15
112 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
113 SHADER
114 
115 ALU ADD R1024.x@group : R0.x R1024.x@group {W}
116 ALU MUL R1024.y@group : R0.x R1024.y@group {WL}
117 TEX SAMPLE S2.xyzw : R1024.xy__ RID:18 SID:0 NNNN
118 EXPORT_DONE PIXEL 0 S2.xyzw
119 )";
120 
121    auto sh = from_string(shader_input);
122 
123    optimize(*sh);
124 
125    check(sh, shader_expect);
126 }
127 
TEST_F(TestShaderFromNir,CopyPropRegDest)128 TEST_F(TestShaderFromNir, CopyPropRegDest)
129 {
130 const char *shader_input =
131    R"(FS
132 CHIPCLASS EVERGREEN
133 REGISTERS R0.x
134 PROP MAX_COLOR_EXPORTS:1
135 PROP COLOR_EXPORTS:1
136 PROP COLOR_EXPORT_MASK:15
137 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
138 SHADER
139 ALU MOV S2.x : R0.x {W}
140 ALU MUL S3.x : S2.x S2.x {W}
141 EXPORT_DONE PIXEL 0 S3.xxxx
142 )";
143 
144 const char *shader_expect =
145    R"(FS
146 CHIPCLASS EVERGREEN
147 REGISTERS R0.x
148 PROP MAX_COLOR_EXPORTS:1
149 PROP COLOR_EXPORTS:1
150 PROP COLOR_EXPORT_MASK:15
151 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
152 SHADER
153 ALU MUL S3.x : R0.x R0.x {W}
154 EXPORT_DONE PIXEL 0 S3.xxxx
155 )";
156 
157    auto sh = from_string(shader_input);
158 
159    optimize(*sh);
160 
161    check(sh, shader_expect);
162 }
163 
TEST_F(TestShaderFromNir,CopyPropRegDestAndOverwrite)164 TEST_F(TestShaderFromNir, CopyPropRegDestAndOverwrite)
165 {
166 const char *shader_input =
167    R"(FS
168 CHIPCLASS EVERGREEN
169 REGISTERS R0.x
170 PROP MAX_COLOR_EXPORTS:1
171 PROP COLOR_EXPORTS:1
172 PROP COLOR_EXPORT_MASK:15
173 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
174 SHADER
175 ALU MOV S2.x : R0.x {W}
176 ALU MOV R0.x : L[2.0] {W}
177 ALU MUL S3.x : S2.x R0.x {W}
178 EXPORT_DONE PIXEL 0 S3.xxxx
179 )";
180 
181 const char *shader_expect =
182    R"(FS
183 CHIPCLASS EVERGREEN
184 REGISTERS R0.x
185 PROP MAX_COLOR_EXPORTS:1
186 PROP COLOR_EXPORTS:1
187 PROP COLOR_EXPORT_MASK:15
188 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
189 SHADER
190 ALU MOV S2.x : R0.x {W}
191 ALU MOV R0.x : L[2.0] {W}
192 ALU MUL S3.x : S2.x L[2.0] {W}
193 EXPORT_DONE PIXEL 0 S3.xxxx
194 )";
195 
196    auto sh = from_string(shader_input);
197 
198    optimize(*sh);
199 
200    check(sh, shader_expect);
201 }
202 
TEST_F(TestShaderFromNir,CopyPropRegDestAndIndirectOverwrite)203 TEST_F(TestShaderFromNir, CopyPropRegDestAndIndirectOverwrite)
204 {
205 const char *shader_input =
206    R"(FS
207 CHIPCLASS EVERGREEN
208 REGISTERS R0.x
209 ARRAYS A0[2].x
210 PROP MAX_COLOR_EXPORTS:1
211 PROP COLOR_EXPORTS:1
212 PROP COLOR_EXPORT_MASK:15
213 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
214 SHADER
215 ALU MOV S2.x : A0[0].x {W}
216 ALU MOV A0[R0.x].x : L[2.0] {W}
217 ALU MUL S3.x : S2.x A0[1].x {W}
218 EXPORT_DONE PIXEL 0 S3.xxxx
219 )";
220 
221 const char *shader_expect =
222    R"(FS
223 CHIPCLASS EVERGREEN
224 REGISTERS R0.x
225 ARRAYS A0[2].x
226 PROP MAX_COLOR_EXPORTS:1
227 PROP COLOR_EXPORTS:1
228 PROP COLOR_EXPORT_MASK:15
229 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
230 SHADER
231 ALU MOV S2.x : A0[0].x {W}
232 ALU MOV A0[R0.x].x : L[2.0] {W}
233 ALU MUL S3.x : S2.x A0[1].x {W}
234 EXPORT_DONE PIXEL 0 S3.xxxx
235 )";
236 
237    auto sh = from_string(shader_input);
238 
239    optimize(*sh);
240 
241    check(sh, shader_expect);
242 }
243 
244 
TEST_F(TestShaderFromNir,CopyPropAndIndirectReadOrder)245 TEST_F(TestShaderFromNir, CopyPropAndIndirectReadOrder)
246 {
247 const char *shader_input =
248    R"(FS
249 CHIPCLASS EVERGREEN
250 PROP MAX_COLOR_EXPORTS:1
251 PROP COLOR_EXPORTS:1
252 PROP COLOR_EXPORT_MASK:15
253 PROP WRITE_ALL_COLORS:1
254 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
255 ARRAYS A0[4].x
256 REGISTERS R0.xy
257 SHADER
258 BLOCK_START
259   ALU MOV A0[0].x : I[0] {W}
260   ALU MOV A0[1].x : I[1] {W}
261   ALU MOV A0[2].x : I[0] {W}
262   ALU MOV A0[3].x : I[0] {W}
263   ALU MOV S2.x{s} : A0[R0.x].x {W}
264   ALU MOV A0[1].x : L[0x2] {W}
265   ALU MOV A0[2].x : L[0x2] {W}
266   ALU MOV A0[3].x : L[0x3] {W}
267   ALU MOV A0[R0.y].x : I[1] {W}
268   ALU MOV S3.x : A0[0].x {W}
269   ALU MOV S3.y : A0[1].x {W}
270   ALU MOV S3.z : A0[2].x {W}
271   ALU MOV S3.w : A0[3].x {W}
272   EXPORT_DONE PIXEL 0 S3.xyzw
273 BLOCK_END
274 )";
275 
276 const char *shader_expect =
277    R"(FS
278 CHIPCLASS EVERGREEN
279 PROP MAX_COLOR_EXPORTS:1
280 PROP COLOR_EXPORTS:1
281 PROP COLOR_EXPORT_MASK:15
282 PROP WRITE_ALL_COLORS:1
283 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
284 ARRAYS A0[4].x
285 REGISTERS R0.xy
286 SHADER
287 BLOCK_START
288 ALU_GROUP_BEGIN
289   ALU MOVA_INT AR : R0.x {}
290   ALU MOV A0[0].x : I[0] {WL}
291 ALU_GROUP_END
292 ALU_GROUP_BEGIN
293   ALU MOV A0[1].x : I[1] {W}
294   ALU MOV A0[2].x : I[0] {WL}
295 ALU_GROUP_END
296 ALU_GROUP_BEGIN
297   ALU MOV A0[3].x : I[0] {WL}
298 ALU_GROUP_END
299 ALU_GROUP_BEGIN
300     ALU MOV S2.x : A0[AR].x {WL}
301 ALU_GROUP_END
302 ALU_GROUP_BEGIN
303   ALU MOVA_INT AR : R0.y {}
304   ALU MOV A0[1].x : L[0x2] {WL}
305 ALU_GROUP_END
306 ALU_GROUP_BEGIN
307   ALU MOV A0[2].x : L[0x2] {W}
308   ALU MOV A0[3].x : L[0x3] {WL}
309 ALU_GROUP_END
310 ALU_GROUP_BEGIN
311   ALU MOV A0[AR].x : I[1] {WL}
312 ALU_GROUP_END
313 ALU_GROUP_BEGIN
314   ALU MOV S3.x@chgr : A0[0].x {W}
315   ALU MOV S3.y@chgr : A0[1].x {W}
316   ALU MOV S3.z@chgr : A0[2].x {WL}
317 ALU_GROUP_END
318 ALU_GROUP_BEGIN
319   ALU MOV S3.w@chgr : A0[3].x {WL}
320 ALU_GROUP_END
321 BLOCK_START
322 BLOCK_END
323   EXPORT_DONE PIXEL 0 S3.xyzw
324 BLOCK_END
325 )";
326 
327    auto sh = from_string(shader_input);
328    split_address_loads(*sh);
329    check(schedule(sh), shader_expect);
330 }
331 
TEST_F(TestShaderFromNir,OptimizeWithDestArrayValue)332 TEST_F(TestShaderFromNir, OptimizeWithDestArrayValue)
333 {
334    auto sh = from_string(shader_with_dest_array);
335 
336    optimize(*sh);
337 
338    check(sh, shader_with_dest_array_opt_expect);
339 }
340 
TEST_F(TestShaderFromNir,ScheduleOPtimizedWithDestArrayValue)341 TEST_F(TestShaderFromNir, ScheduleOPtimizedWithDestArrayValue)
342 {
343    auto sh = from_string(shader_with_dest_array_opt_expect);
344    split_address_loads(*sh);
345    check(schedule(sh), shader_with_dest_array_opt_scheduled);
346 }
347 
TEST_F(TestShaderFromNir,ScheduleWithArrayWriteAndRead)348 TEST_F(TestShaderFromNir, ScheduleWithArrayWriteAndRead)
349 {
350    auto sh = from_string(shader_with_dest_array2);
351    split_address_loads(*sh);
352    check(schedule(sh), shader_with_dest_array2_scheduled);
353 }
354 
TEST_F(TestShaderFromNir,RA_with_dest_array)355 TEST_F(TestShaderFromNir, RA_with_dest_array)
356 {
357    auto sh = from_string(shader_with_dest_array2_scheduled);
358 
359    auto lrm = r600::LiveRangeEvaluator().run(*sh);
360    EXPECT_TRUE(r600::register_allocation(lrm));
361 
362    ra_check(sh, shader_with_dest_array2_scheduled_ra);
363 }
364 
TEST_F(TestShaderFromNir,RA_with_chan_group)365 TEST_F(TestShaderFromNir, RA_with_chan_group)
366 {
367    auto sh = from_string(shader_group_chan_pin_combined_scheduled);
368 
369    auto lrm = r600::LiveRangeEvaluator().run(*sh);
370    EXPECT_TRUE(r600::register_allocation(lrm));
371    ra_check(sh, shader_group_chan_pin_combined_scheduled_ra);
372 }
373 
TEST_F(TestShaderFromNir,TES_opt)374 TEST_F(TestShaderFromNir, TES_opt)
375 {
376    auto sh = from_string(tes_pre_op);
377 
378    optimize(*sh);
379 
380    check(sh, tes_optimized);
381 }
382 
TEST_F(TestShaderFromNir,TES_scheduled)383 TEST_F(TestShaderFromNir, TES_scheduled)
384 {
385    auto sh = from_string(tes_optimized_pre_sched);
386 
387    check(schedule(sh), tes_optimized_sched);
388 }
389 
390 /*
391 TEST_F(TestShaderFromNir, ShaderClone)
392 {
393    auto sh = from_string(red_triangle_fs_expect_from_nir);
394 
395    auto sh_cloned = sh->clone();
396 
397    MemoryPool::instance().push();
398    dead_code_elimination(*sh);
399 
400    check(sh, red_triangle_fs_expect_from_nir_dce);
401 
402    check(sh_cloned, red_triangle_fs_expect_from_nir);
403 
404    MemoryPool::instance().pop();
405    check(sh, red_triangle_fs_expect_from_nir_dce);
406 }
407 */
408 
TEST_F(TestShaderFromNir,ShaderSchedule)409 TEST_F(TestShaderFromNir, ShaderSchedule)
410 {
411    auto sh = from_string(basic_interpolation_orig);
412 
413    check(schedule(sh), basic_interpolation_expect_from_nir_sched);
414 }
415 
TEST_F(TestShaderFromNir,ShaderScheduleCayman)416 TEST_F(TestShaderFromNir, ShaderScheduleCayman)
417 {
418    auto sh = from_string(basic_interpolation_orig_cayman);
419 
420    check(schedule(sh), basic_interpolation_expect_from_nir_sched_cayman);
421 }
422 
TEST_F(TestShaderFromNir,ShaderScheduleOptimizedCayman)423 TEST_F(TestShaderFromNir, ShaderScheduleOptimizedCayman)
424 {
425    auto sh = from_string(basic_interpolation_orig_cayman);
426 
427    optimize(*sh);
428 
429    check(schedule(sh), basic_interpolation_expect_opt_sched_cayman);
430 }
431 
TEST_F(TestShaderFromNir,CopyPropLegalConst)432 TEST_F(TestShaderFromNir, CopyPropLegalConst)
433 {
434    auto sh = from_string(dot4_pre);
435 
436    copy_propagation_fwd(*sh);
437    dead_code_elimination(*sh);
438 
439    check(sh, dot4_copy_prop_dce);
440 }
441 
TEST_F(TestShaderFromNir,FullOPtimize_glxgears_vs2)442 TEST_F(TestShaderFromNir, FullOPtimize_glxgears_vs2)
443 {
444 
445    auto sh = from_string(glxgears_vs2_from_nir_expect);
446    optimize(*sh);
447    check(sh, glxgears_vs2_from_nir_expect_optimized);
448 }
449 
TEST_F(TestShaderFromNir,test_schedule_group)450 TEST_F(TestShaderFromNir, test_schedule_group)
451 {
452 
453    auto sh = from_string(test_schedule_group);
454    optimize(*sh);
455    check(schedule(sh), test_schedule_group_expect);
456 }
457 
TEST_F(TestShaderFromNir,test_dont_kill_dual_use)458 TEST_F(TestShaderFromNir, test_dont_kill_dual_use)
459 {
460    auto sh = from_string(shader_copy_prop_dont_kill_double_use);
461    optimize(*sh);
462    check(schedule(sh), shader_copy_prop_dont_kill_double_use_expect);
463 }
464 
TEST_F(TestShaderFromNir,test_schedule_with_bany)465 TEST_F(TestShaderFromNir, test_schedule_with_bany)
466 {
467 
468    auto sh = from_string(shader_with_bany_expect_eg);
469    optimize(*sh);
470    check(schedule(sh), shader_with_bany_expect_opt_sched_eg);
471 }
472 
TEST_F(TestShaderFromNir,GroupAndChanCombine)473 TEST_F(TestShaderFromNir, GroupAndChanCombine)
474 {
475    auto sh = from_string(shader_group_chan_pin_to_combine_2);
476    optimize(*sh);
477    check(sh, shader_group_chan_pin_to_combine_2_opt);
478 }
479 
TEST_F(TestShaderFromNir,RemoveUseAfterSplitgroup)480 TEST_F(TestShaderFromNir, RemoveUseAfterSplitgroup)
481 {
482    auto sh = from_string(fs_with_loop_multislot_reuse);
483    check(schedule(sh), fs_with_loop_multislot_reuse_scheduled);
484 }
485 
TEST_F(TestShaderFromNir,OptimizeVSforTCS)486 TEST_F(TestShaderFromNir, OptimizeVSforTCS)
487 {
488    auto sh = from_string(vtx_for_tcs_inp);
489    optimize(*sh);
490    check(sh, vtx_for_tcs_opt);
491 }
492 
TEST_F(TestShaderFromNir,ScheduleVSforTCS)493 TEST_F(TestShaderFromNir, ScheduleVSforTCS)
494 {
495    auto sh = from_string(vtx_for_tcs_pre_sched);
496    check(schedule(sh), vtx_for_tcs_sched);
497 }
498 
TEST_F(TestShaderFromNir,fs_opt_tex_coord)499 TEST_F(TestShaderFromNir, fs_opt_tex_coord)
500 {
501    auto sh = from_string(fs_opt_tex_coord_init);
502 
503    optimize(*sh);
504 
505    check(sh, fs_opt_tex_coord_expect);
506 }
507 
TEST_F(TestShaderFromNir,fs_shed_tex_coord)508 TEST_F(TestShaderFromNir, fs_shed_tex_coord)
509 {
510    auto sh = from_string(fs_sched_tex_coord_init);
511    check(schedule(sh), fs_sched_tex_coord_expect);
512 }
513 
TEST_F(TestShaderFromNir,OptimizeAddWChanetoTrans)514 TEST_F(TestShaderFromNir, OptimizeAddWChanetoTrans)
515 {
516    const char *input =
517 R"(VS
518 CHIPCLASS CAYMAN
519 INPUT LOC:0
520 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
521 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
522 SHADER
523 BLOCK_START
524   ALU MOV S2.x@free{s} : I[0] {WL}
525   ALU MOV S3.y@free{s} : L[0x40c00000] {WL}
526   ALU MOV S4.z@free{s} : L[0xc1140000] {WL}
527   ALU MOV S5.w@free{s} : L[0xbfe00000] {WL}
528   ALU MOV S6.x@free{s} : L[0x3fa00000] {WL}
529   ALU MOV S7.x{s} : KC0[0].x {W}
530   ALU MOV S7.y{s} : KC0[0].y {W}
531   ALU MOV S7.z{s} : KC0[0].z {W}
532   ALU MOV S7.w{s} : KC0[0].w {WL}
533   ALU ADD S8.y@free{s} : S3.y@free{s} -S7.x{s} {WL}
534   ALU ADD S9.z@free{s} : S4.z@free{s} -S7.y{s} {WL}
535   ALU ADD S10.w@free{s} : S5.w@free{s} -S7.z{s} {WL}
536   ALU ADD S11.x@free{s} : S6.x@free{s} -S7.w{s} {WL}
537   ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {WL}
538   ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL}
539   ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL}
540   ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
541   ALU MOV S17.x{s} : S12.y@free{s} {W}
542   ALU MOV S17.y{s} : S13.z@free{s} {W}
543   ALU MOV S17.z{s} : S14.x@free{s} {W}
544   ALU MOV S17.w{s} : S15.y@free{s} {WL}
545   ALU MOV S18.x@group{s} : S17.x{s} {W}
546   ALU MOV S18.y@group{s} : S17.y{s} {W}
547   ALU MOV S18.z@group{s} : S17.z{s} {W}
548   ALU MOV S18.w@group{s} : S17.w{s} {WL}
549   EXPORT_DONE PARAM 0 S18.xyzw
550 BLOCK_END)";
551 
552    const char *expect =
553 R"(VS
554 CHIPCLASS CAYMAN
555 INPUT LOC:0
556 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
557 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
558 SHADER
559 BLOCK_START
560   ALU ADD S8.y@free{s} : L[0x40c00000] -KC0[0].x {WL}
561   ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {WL}
562   ALU ADD S10.w@free{s} : L[0xbfe00000] -KC0[0].z {WL}
563   ALU ADD S11.x@free{s} : L[0x3fa00000] -KC0[0].w {WL}
564   ALU EXP_IEEE S18.x@group{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W}
565   ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
566   ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
567   ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
568   EXPORT_DONE PARAM 0 S18.xyzw
569 BLOCK_END
570 )";
571    auto sh = from_string(input);
572    optimize(*sh);
573    check(sh, expect);
574 };
575 
TEST_F(TestShaderFromNir,PeeholeSoureModsSimple)576 TEST_F(TestShaderFromNir, PeeholeSoureModsSimple)
577 {
578    const char *input =
579 R"(VS
580 CHIPCLASS CAYMAN
581 INPUT LOC:0
582 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
583 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
584 SHADER
585 BLOCK_START
586   ALU MOV S2.x@free{s} : I[0] {WL}
587   ALU MOV S3.y@free{s} : L[0x40c00000] {WL}
588   ALU MOV S4.z@free{s} : L[0xc1140000] {WL}
589   ALU MOV S5.w@free{s} : L[0xbfe00000] {WL}
590   ALU MOV S6.x@free{s} : L[0x3fa00000] {WL}
591   ALU MOV S7.x{s} : |KC0[0].x| {W}
592   ALU MOV S7.y{s} : -KC0[0].y {W}
593   ALU MOV S7.z{s} : -|KC0[0].z| {W}
594   ALU MOV S7.w{s} : KC0[0].w {WL}
595   ALU ADD S8.y@free{s} : S3.y@free{s} S7.x{s} {WL}
596   ALU ADD S9.z@free{s} : S4.z@free{s} S7.y{s} {WL}
597   ALU ADD S10.w@free{s} : S5.w@free{s} S7.z{s} {WL}
598   ALU ADD S11.x@free{s} : S6.x@free{s} S7.w{s} {WL}
599   ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {WL}
600   ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL}
601   ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL}
602   ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
603   ALU MOV S17.x{s} : S12.y@free{s} {W}
604   ALU MOV S17.y{s} : S13.z@free{s} {W}
605   ALU MOV S17.z{s} : S14.x@free{s} {W}
606   ALU MOV S17.w{s} : S15.y@free{s} {WL}
607   ALU MOV S18.x@group{s} : S17.x{s} {W}
608   ALU MOV S18.y@group{s} : S17.y{s} {W}
609   ALU MOV S18.z@group{s} : S17.z{s} {W}
610   ALU MOV S18.w@group{s} : S17.w{s} {WL}
611   EXPORT_DONE PARAM 0 S18.xyzw
612 BLOCK_END)";
613 
614    const char *expect =
615 R"(VS
616 CHIPCLASS CAYMAN
617 INPUT LOC:0
618 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
619 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
620 SHADER
621 BLOCK_START
622   ALU ADD S8.y@free{s} : L[0x40c00000] |KC0[0].x| {WL}
623   ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {WL}
624   ALU ADD S10.w@free{s} : L[0xbfe00000] -|KC0[0].z| {WL}
625   ALU ADD S11.x@free{s} : L[0x3fa00000] KC0[0].w {WL}
626   ALU EXP_IEEE S18.x@group{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W}
627   ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
628   ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
629   ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
630   EXPORT_DONE PARAM 0 S18.xyzw
631 BLOCK_END
632 )";
633    auto sh = from_string(input);
634    optimize(*sh);
635    check(sh, expect);
636 };
637 
TEST_F(TestShaderFromNir,PeeholeSoureModsAbsNegTwice)638 TEST_F(TestShaderFromNir, PeeholeSoureModsAbsNegTwice)
639 {
640    const char *input =
641 R"(VS
642 CHIPCLASS CAYMAN
643 INPUT LOC:0
644 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
645 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
646 SHADER
647 BLOCK_START
648   ALU MOV S2.x@free{s} : I[0] {WL}
649   ALU MOV S3.y@free{s} : L[0x40c00000] {WL}
650   ALU MOV S4.z@free{s} : L[0xc1140000] {WL}
651   ALU MOV S5.w@free{s} : L[0xbfe00000] {WL}
652   ALU MOV S6.x@free{s} : L[0x3fa00000] {WL}
653   ALU MOV S7.x{s} : |KC0[0].x| {W}
654   ALU MOV S7.y{s} : -KC0[0].y {W}
655   ALU MOV S7.z{s} : -|KC0[0].z| {W}
656   ALU MOV S7.w{s} : KC0[0].w {WL}
657   ALU MOV S8.x : |S7.x| {W}
658   ALU MOV S8.y : -S7.y {W}
659   ALU MOV S8.z : -|S7.z| {W}
660   ALU MOV S8.w : -|S7.x| {WL}
661   ALU ADD S19.y@free{s} : S3.y@free{s} S8.x {WL}
662   ALU ADD S9.z@free{s} : S4.z@free{s} S8.y {WL}
663   ALU ADD S10.w@free{s} : S5.w@free{s} S8.z {WL}
664   ALU ADD S11.x@free{s} : S6.x@free{s} S8.w {WL}
665   ALU EXP_IEEE S12.y@free{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {WL}
666   ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL}
667   ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL}
668   ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
669   ALU MOV S17.x{s} : S12.y@free{s} {W}
670   ALU MOV S17.y{s} : S13.z@free{s} {W}
671   ALU MOV S17.z{s} : S14.x@free{s} {W}
672   ALU MOV S17.w{s} : S15.y@free{s} {WL}
673   ALU MOV S18.x@group{s} : S17.x{s} {W}
674   ALU MOV S18.y@group{s} : S17.y{s} {W}
675   ALU MOV S18.z@group{s} : S17.z{s} {W}
676   ALU MOV S18.w@group{s} : S17.w{s} {WL}
677   EXPORT_DONE PARAM 0 S18.xyzw
678 BLOCK_END)";
679 
680    const char *expect =
681 R"(VS
682 CHIPCLASS CAYMAN
683 INPUT LOC:0
684 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
685 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
686 SHADER
687 BLOCK_START
688   ALU ADD S19.y@free{s} : L[0x40c00000] |KC0[0].x| {WL}
689   ALU ADD S9.z@free{s} : L[0xc1140000] KC0[0].y {WL}
690   ALU ADD S10.w@free{s} : L[0xbfe00000] |KC0[0].z| {WL}
691   ALU ADD S11.x@free{s} : L[0x3fa00000] -|KC0[0].x| {WL}
692   ALU EXP_IEEE S18.x@group{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {W}
693   ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
694   ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
695   ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
696   EXPORT_DONE PARAM 0 S18.xyzw
697 BLOCK_END
698 )";
699    auto sh = from_string(input);
700    optimize(*sh);
701    check(sh, expect);
702 };
703 
TEST_F(TestShaderFromNir,PeeholeSoureModsClamp)704 TEST_F(TestShaderFromNir, PeeholeSoureModsClamp)
705 {
706    const char *input =
707 R"(VS
708 CHIPCLASS CAYMAN
709 INPUT LOC:0
710 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
711 SHADER
712 BLOCK_START
713   ALU MOV S1.x{s} : |KC0[0].x| {W}
714   ALU MOV S2.y{s} : -KC0[0].y {W}
715   ALU ADD S3.x : S1.x S2.y {W}
716   ALU MOV CLAMP S4.x : S3.x {W}
717   EXPORT_DONE PARAM 0 S4.xxxx
718 BLOCK_END)";
719 
720    const char *expect =
721 R"(VS
722 CHIPCLASS CAYMAN
723 INPUT LOC:0
724 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
725 SHADER
726 BLOCK_START
727   ALU ADD CLAMP S3.x : |KC0[0].x| -KC0[0].y {W}
728   EXPORT_DONE PARAM 0 S3.xxxx
729 BLOCK_END
730 )";
731    auto sh = from_string(input);
732    optimize(*sh);
733    check(sh, expect);
734 };
735 
TEST_F(TestShaderFromNir,PeeholeSoureModsMuliSlot)736 TEST_F(TestShaderFromNir, PeeholeSoureModsMuliSlot)
737 {
738    const char *input =
739 R"(VS
740 CHIPCLASS CAYMAN
741 INPUT LOC:0
742 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
743 REGISTERS R1.xyzw
744 SHADER
745 BLOCK_START
746   ALU MOV S1.x{s} : |KC0[0].x| {W}
747   ALU MOV S1.y{s} : -KC0[0].y {W}
748   ALU MOV S1.z{s} : |KC0[0].z| {W}
749   ALU MOV S1.w{s} : KC0[0].w {W}
750   ALU MOV S2.x{s} : |R1.x| {W}
751   ALU MOV S2.y{s} : R1.y {W}
752   ALU MOV S2.z{s} : -R1.z {W}
753   ALU MOV S2.w{s} : -R1.w {W}
754   ALU DOT4 S5.x : S1.x S2.x + S1.y S2.y  + S1.z S2.z + S1.w S2.w {W}
755   EXPORT_DONE PARAM 0 S5.xxxx
756 BLOCK_END)";
757 
758    const char *expect =
759 R"(VS
760 CHIPCLASS CAYMAN
761 INPUT LOC:0
762 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
763 REGISTERS R1.xyzw
764 SHADER
765 BLOCK_START
766   ALU DOT4 S5.x : |KC0[0].x| |R1.x| + -KC0[0].y R1.y  + |KC0[0].z| -R1.z + KC0[0].w -R1.w {W}
767   EXPORT_DONE PARAM 0 S5.xxxx
768 BLOCK_END
769 )";
770    auto sh = from_string(input);
771    optimize(*sh);
772    check(sh, expect);
773 };
774 
775 
TEST_F(TestShaderFromNir,OptimizeIntoGroup)776 TEST_F(TestShaderFromNir, OptimizeIntoGroup)
777 {
778    const char *input =
779 R"(VS
780 CHIPCLASS CAYMAN
781 INPUT LOC:0
782 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
783 REGISTERS R0.x R1.x R2.x
784 SHADER
785 BLOCK_START
786   ALU MOV S4.x : R0.x {W}
787   ALU MOV S5.y : R1.x {W}
788   ALU MOV S6.z : R2.x {WL}
789   ALU_GROUP_BEGIN
790     ALU CUBE S7.x@chgr{s} : S6.z S5.y {W}
791     ALU CUBE S7.y@chgr{s} : S6.z S4.x {W}
792     ALU CUBE S7.z@chgr{s} : S4.x S6.z {W}
793     ALU CUBE S7.w@chgr{s} : S5.y S6.z {WL}
794   ALU_GROUP_END
795   EXPORT_DONE PARAM 0 S7.xyzw
796 BLOCK_END)";
797 
798    const char *expect =
799 R"(VS
800 CHIPCLASS CAYMAN
801 INPUT LOC:0
802 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
803 REGISTERS R0.x R1.x R2.x
804 SHADER
805 BLOCK_START
806   ALU_GROUP_BEGIN
807     ALU CUBE S7.x@chgr{s} : R2.x@chan R1.x@chan {W}
808     ALU CUBE S7.y@chgr{s} : R2.x@chan R0.x@chan {W}
809     ALU CUBE S7.z@chgr{s} : R0.x@chan R2.x@chan {W}
810     ALU CUBE S7.w@chgr{s} : R1.x@chan R2.x@chan {WL}
811   ALU_GROUP_END
812   EXPORT_DONE PARAM 0 S7.xyzw
813 BLOCK_END
814 )";
815    auto sh = from_string(input);
816    optimize(*sh);
817    check(sh, expect);
818 };
819 
TEST_F(TestShaderFromNir,ScheduleSplitLoadIndexConst)820 TEST_F(TestShaderFromNir, ScheduleSplitLoadIndexConst)
821 {
822    const char *input =
823 R"(
824 FS
825 CHIPCLASS CAYMAN
826 PROP MAX_COLOR_EXPORTS:1
827 PROP COLOR_EXPORTS:1
828 PROP COLOR_EXPORT_MASK:15
829 PROP WRITE_ALL_COLORS:0
830 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
831 SHADER
832 BLOCK_START
833   ALU MIN_UINT S3.w@free{s} : KC0[0].x L[0x2] {WL}
834   ALU MOVA_INT IDX0 : S3.w@free{s} {}
835   ALU MOV S4.x@group{s} : KC1[IDX0][0].x {W}
836   ALU MOV S4.y@group{s} : KC1[IDX0][0].y {W}
837   ALU MOV S4.z@group{s} : KC1[IDX0][0].z {W}
838   ALU MOV S4.w@group{s} : KC1[IDX0][0].w {WL}
839   EXPORT_DONE PIXEL 0 S4.xyzw
840 BLOCK_END
841 )";
842 
843    const char *expect =
844 R"(
845 FS
846 CHIPCLASS CAYMAN
847 PROP MAX_COLOR_EXPORTS:1
848 PROP COLOR_EXPORTS:1
849 PROP COLOR_EXPORT_MASK:15
850 PROP WRITE_ALL_COLORS:0
851 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
852 SHADER
853 BLOCK_START
854 ALU_GROUP_BEGIN
855      ALU MIN_UINT S3.w@free{s} : KC0[0].x L[0x2] {WL}
856 ALU_GROUP_END
857 ALU_GROUP_BEGIN
858      ALU MOVA_INT IDX0 : S3.w@free{s} {L}
859 ALU_GROUP_END
860 BLOCK_END
861 BLOCK_START
862 ALU_GROUP_BEGIN
863      ALU MOV S4.x@chgr : KC1[IDX0][0].x {W}
864      ALU MOV S4.y@chgr : KC1[IDX0][0].y {W}
865      ALU MOV S4.z@chgr : KC1[IDX0][0].z {W}
866      ALU MOV S4.w@chgr : KC1[IDX0][0].w {WL}
867 ALU_GROUP_END
868 ALU_GROUP_BEGIN
869 BLOCK_END
870 BLOCK_START
871      EXPORT_DONE PIXEL 0 S4.xyzw
872 BLOCK_END
873 )";
874 
875    auto sh = from_string(input);
876    check(schedule(sh), expect);
877 }
878 
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPAfterIndirectDest)879 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDest)
880 {
881    const char *input =
882 R"(FS
883 CHIPCLASS R600
884 FAMILY R600
885 PROP MAX_COLOR_EXPORTS:1
886 PROP COLOR_EXPORTS:1
887 PROP COLOR_EXPORT_MASK:15
888 PROP WRITE_ALL_COLORS:0
889 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
890 ARRAYS A1[2].x
891 SHADER
892 BLOCK_START
893   ALU MOV S1.x : KC0[0].x {WL}
894   ALU MOV A1[S1.x].x : KC0[0].y {WL}
895   ALU ADD S2.x : A1[1].x KC0[1].x {WL}
896   EXPORT_DONE PIXEL 0 S2.xxxx
897 BLOCK_END)";
898 
899 
900    const char *expect =
901 R"(FS
902 CHIPCLASS R600
903 FAMILY R600
904 PROP MAX_COLOR_EXPORTS:1
905 PROP COLOR_EXPORTS:1
906 PROP COLOR_EXPORT_MASK:15
907 PROP WRITE_ALL_COLORS:0
908 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
909 ARRAYS A1[2].x
910 SHADER
911 BLOCK_START
912 ALU_GROUP_BEGIN
913    ALU MOVA_INT AR : KC0[0].x {L}
914 ALU_GROUP_END
915 ALU_GROUP_BEGIN
916     ALU MOV A1[AR].x : KC0[0].y {WL}
917 ALU_GROUP_END
918 ALU_GROUP_BEGIN
919     ALU NOP __.x : {L}
920 ALU_GROUP_END
921 ALU_GROUP_BEGIN
922     ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
923 ALU_GROUP_END
924 BLOCK_END
925 BLOCK_START
926   EXPORT_DONE PIXEL 0 S2.xxxx
927 BLOCK_END)";
928 
929    auto sh = from_string(input);
930    split_address_loads(*sh);
931    optimize(*sh);
932    check(schedule(sh), expect);
933 
934 }
935 
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPBeforIndirectSrc)936 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPBeforIndirectSrc)
937 {
938    const char *input =
939 R"(FS
940 CHIPCLASS R600
941 FAMILY R600
942 PROP MAX_COLOR_EXPORTS:1
943 PROP COLOR_EXPORTS:1
944 PROP COLOR_EXPORT_MASK:15
945 PROP WRITE_ALL_COLORS:0
946 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
947 ARRAYS A1[2].x
948 SHADER
949 BLOCK_START
950   ALU MOV S1.x : KC0[0].x {WL}
951   ALU MOV A1[0].x : KC0[0].y {WL}
952   ALU ADD S2.x : A1[S1.x].x KC0[1].x {WL}
953   EXPORT_DONE PIXEL 0 S2.xxxx
954 BLOCK_END)";
955 
956 
957    const char *expect =
958 R"(FS
959 CHIPCLASS R600
960 FAMILY R600
961 PROP MAX_COLOR_EXPORTS:1
962 PROP COLOR_EXPORTS:1
963 PROP COLOR_EXPORT_MASK:15
964 PROP WRITE_ALL_COLORS:0
965 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
966 ARRAYS A1[2].x
967 SHADER
968 BLOCK_START
969 ALU_GROUP_BEGIN
970    ALU MOVA_INT AR : KC0[0].x {}
971    ALU MOV A1[0].x : KC0[0].y {WL}
972 ALU_GROUP_END
973 ALU_GROUP_BEGIN
974     ALU NOP __.x : {L}
975 ALU_GROUP_END
976 ALU_GROUP_BEGIN
977     ALU ADD S2.x@chgr : A1[AR].x KC0[1].x {WL}
978 ALU_GROUP_END
979 BLOCK_END
980 BLOCK_START
981   EXPORT_DONE PIXEL 0 S2.xxxx
982 BLOCK_END)";
983 
984    auto sh = from_string(input);
985    split_address_loads(*sh);
986    optimize(*sh);
987    check(schedule(sh), expect);
988 
989 }
990 
991 
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV670)992 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV670)
993 {
994    const char *input =
995 R"(FS
996 CHIPCLASS R600
997 FAMILY RV670
998 PROP MAX_COLOR_EXPORTS:1
999 PROP COLOR_EXPORTS:1
1000 PROP COLOR_EXPORT_MASK:15
1001 PROP WRITE_ALL_COLORS:0
1002 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1003 ARRAYS A1[2].x
1004 SHADER
1005 BLOCK_START
1006   ALU MOV S1.x : KC0[0].x {WL}
1007   ALU MOV A1[S1.x].x : KC0[0].y {WL}
1008   ALU ADD S2.x : A1[1].x KC0[1].x {WL}
1009   EXPORT_DONE PIXEL 0 S2.xxxx
1010 BLOCK_END)";
1011 
1012 
1013    const char *expect =
1014 R"(FS
1015 CHIPCLASS R600
1016 FAMILY RV670
1017 PROP MAX_COLOR_EXPORTS:1
1018 PROP COLOR_EXPORTS:1
1019 PROP COLOR_EXPORT_MASK:15
1020 PROP WRITE_ALL_COLORS:0
1021 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1022 ARRAYS A1[2].x
1023 SHADER
1024 BLOCK_START
1025 ALU_GROUP_BEGIN
1026    ALU MOVA_INT AR : KC0[0].x {L}
1027 ALU_GROUP_END
1028 ALU_GROUP_BEGIN
1029     ALU MOV A1[AR].x : KC0[0].y {WL}
1030 ALU_GROUP_END
1031 ALU_GROUP_BEGIN
1032     ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
1033 ALU_GROUP_END
1034 BLOCK_END
1035 BLOCK_START
1036   EXPORT_DONE PIXEL 0 S2.xxxx
1037 BLOCK_END)";
1038 
1039    auto sh = from_string(input);
1040    split_address_loads(*sh);
1041    optimize(*sh);
1042    check(schedule(sh), expect);
1043 
1044 }
1045 
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPAfterIndirectDestEG)1046 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDestEG)
1047 {
1048    const char *input =
1049 R"(FS
1050 CHIPCLASS EVERGREEN
1051 FAMILY BARTS
1052 PROP MAX_COLOR_EXPORTS:1
1053 PROP COLOR_EXPORTS:1
1054 PROP COLOR_EXPORT_MASK:15
1055 PROP WRITE_ALL_COLORS:0
1056 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1057 ARRAYS A1[2].x
1058 SHADER
1059 BLOCK_START
1060   ALU MOV S1.x : KC0[0].x {WL}
1061   ALU MOV A1[S1.x].x : KC0[0].y {WL}
1062   ALU ADD S2.x : A1[1].x KC0[1].x {WL}
1063   EXPORT_DONE PIXEL 0 S2.xxxx
1064 BLOCK_END)";
1065 
1066 
1067    const char *expect =
1068 R"(FS
1069 CHIPCLASS EVERGREEN
1070 FAMILY BARTS
1071 PROP MAX_COLOR_EXPORTS:1
1072 PROP COLOR_EXPORTS:1
1073 PROP COLOR_EXPORT_MASK:15
1074 PROP WRITE_ALL_COLORS:0
1075 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1076 ARRAYS A1[2].x
1077 SHADER
1078 BLOCK_START
1079 ALU_GROUP_BEGIN
1080    ALU MOVA_INT AR : KC0[0].x {L}
1081 ALU_GROUP_END
1082 ALU_GROUP_BEGIN
1083     ALU MOV A1[AR].x : KC0[0].y {WL}
1084 ALU_GROUP_END
1085 ALU_GROUP_BEGIN
1086     ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
1087 ALU_GROUP_END
1088 BLOCK_END
1089 BLOCK_START
1090   EXPORT_DONE PIXEL 0 S2.xxxx
1091 BLOCK_END)";
1092 
1093    auto sh = from_string(input);
1094    split_address_loads(*sh);
1095    optimize(*sh);
1096    check(schedule(sh), expect);
1097 
1098 }
1099 
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV770)1100 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV770)
1101 {
1102    const char *input =
1103 R"(FS
1104 CHIPCLASS R700
1105 FAMILY RV770
1106 PROP MAX_COLOR_EXPORTS:1
1107 PROP COLOR_EXPORTS:1
1108 PROP COLOR_EXPORT_MASK:15
1109 PROP WRITE_ALL_COLORS:0
1110 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1111 ARRAYS A1[2].x
1112 SHADER
1113 BLOCK_START
1114   ALU MOV S1.x : KC0[0].x {WL}
1115   ALU MOV A1[S1.x].x : KC0[0].y {WL}
1116   ALU ADD S2.x : A1[1].x KC0[1].x {WL}
1117   EXPORT_DONE PIXEL 0 S2.xxxx
1118 BLOCK_END)";
1119 
1120 
1121    const char *expect =
1122 R"(FS
1123 CHIPCLASS R700
1124 FAMILY RV770
1125 PROP MAX_COLOR_EXPORTS:1
1126 PROP COLOR_EXPORTS:1
1127 PROP COLOR_EXPORT_MASK:15
1128 PROP WRITE_ALL_COLORS:0
1129 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1130 ARRAYS A1[2].x
1131 SHADER
1132 BLOCK_START
1133 ALU_GROUP_BEGIN
1134    ALU MOVA_INT AR : KC0[0].x {L}
1135 ALU_GROUP_END
1136 ALU_GROUP_BEGIN
1137     ALU MOV A1[AR].x : KC0[0].y {WL}
1138 ALU_GROUP_END
1139 ALU_GROUP_BEGIN
1140     ALU NOP __.x : {L}
1141 ALU_GROUP_END
1142 ALU_GROUP_BEGIN
1143     ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
1144 ALU_GROUP_END
1145 BLOCK_END
1146 BLOCK_START
1147   EXPORT_DONE PIXEL 0 S2.xxxx
1148 BLOCK_END)";
1149 
1150    auto sh = from_string(input);
1151    split_address_loads(*sh);
1152    optimize(*sh);
1153    check(schedule(sh), expect);
1154 
1155 }
1156 
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV770_2)1157 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV770_2)
1158 {
1159    const char *input =
1160 R"(FS
1161 CHIPCLASS R700
1162 FAMILY RV770
1163 PROP MAX_COLOR_EXPORTS:1
1164 PROP COLOR_EXPORTS:1
1165 PROP COLOR_EXPORT_MASK:15
1166 PROP WRITE_ALL_COLORS:0
1167 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1168 ARRAYS A1[2].x
1169 SHADER
1170 BLOCK_START
1171   ALU MOV S1.x : KC0[0].x {W}
1172   ALU MOV S1.y : KC0[0].y {W}
1173   ALU ADD S1.z : KC0[0].z KC0[2].z {W}
1174   ALU ADD S1.w : KC0[0].w KC0[2].w {W}
1175   ALU MOV A1[S1.x].x : KC0[1].y {WL}
1176   ALU ADD S2.x : A1[1].x S1.y {W}
1177   ALU ADD S2.y : KC0[1].y S1.z {W}
1178   ALU ADD S2.z : KC0[1].z S1.w {W}
1179   ALU ADD S2.w : KC0[1].w S1.w {WL}
1180   EXPORT_DONE PIXEL 0 S2.xyzw
1181 BLOCK_END)";
1182 
1183 
1184    const char *expect =
1185 R"(FS
1186 CHIPCLASS R700
1187 FAMILY RV770
1188 PROP MAX_COLOR_EXPORTS:1
1189 PROP COLOR_EXPORTS:1
1190 PROP COLOR_EXPORT_MASK:15
1191 PROP WRITE_ALL_COLORS:0
1192 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1193 ARRAYS A1[2].x
1194 SHADER
1195 BLOCK_START
1196 ALU_GROUP_BEGIN
1197    ALU MOVA_INT AR : KC0[0].x {L}
1198 ALU_GROUP_END
1199 ALU_GROUP_BEGIN
1200     ALU MOV A1[AR].x : KC0[1].y {WL}
1201 ALU_GROUP_END
1202 ALU_GROUP_BEGIN
1203    ALU ADD S1.z : KC0[0].z KC0[2].z {W}
1204    ALU ADD S1.w : KC0[0].w KC0[2].w {WL}
1205 ALU_GROUP_END
1206 ALU_GROUP_BEGIN
1207    ALU ADD S2.x@chgr : A1[1].x KC0[0].y {W}
1208    ALU ADD S2.y@chgr : KC0[1].y S1.z{s} {WL}
1209 ALU_GROUP_END
1210 ALU_GROUP_BEGIN
1211    ALU ADD S2.z@chgr : KC0[1].z S1.w{s} {W}
1212    ALU ADD S2.w@chgr : KC0[1].w S1.w{s} {WL}
1213 ALU_GROUP_END
1214 BLOCK_END
1215 BLOCK_START
1216   EXPORT_DONE PIXEL 0 S2.xyzw
1217 BLOCK_END)";
1218 
1219    auto sh = from_string(input);
1220    split_address_loads(*sh);
1221    optimize(*sh);
1222    check(schedule(sh), expect);
1223 
1224 }
1225