1
2 #include "../sfn_optimizer.h"
3 #include "../sfn_ra.h"
4 #include "../sfn_scheduler.h"
5 #include "../sfn_shader.h"
6 #include "../sfn_split_address_loads.h"
7 #include "sfn_test_shaders.h"
8
9 using namespace r600;
10 using std::ostringstream;
11
TEST_F(TestShaderFromNir,SimpleDCE)12 TEST_F(TestShaderFromNir, SimpleDCE)
13 {
14 auto sh = from_string(red_triangle_fs_expect_from_nir);
15 dead_code_elimination(*sh);
16
17 check(sh, red_triangle_fs_expect_from_nir_dce);
18 }
19
TEST_F(TestShaderFromNir,CopyPropagationForwardBackward)20 TEST_F(TestShaderFromNir, CopyPropagationForwardBackward)
21 {
22 auto sh = from_string(add_add_1_expect_from_nir);
23 copy_propagation_fwd(*sh);
24 check(sh, add_add_1_expect_from_nir_copy_prop_fwd);
25 }
26
TEST_F(TestShaderFromNir,CopyPropagationForwardDCE)27 TEST_F(TestShaderFromNir, CopyPropagationForwardDCE)
28 {
29 auto sh = from_string(add_add_1_expect_from_nir);
30 copy_propagation_fwd(*sh);
31 dead_code_elimination(*sh);
32 check(sh, add_add_1_expect_from_nir_copy_prop_fwd_dce);
33 }
34
TEST_F(TestShaderFromNir,CopyPropagationBackwardDCE)35 TEST_F(TestShaderFromNir, CopyPropagationBackwardDCE)
36 {
37 auto sh = from_string(add_add_1_expect_from_nir_copy_prop_fwd_dce);
38 copy_propagation_backward(*sh);
39 dead_code_elimination(*sh);
40 check(sh, add_add_1_expect_from_nir_copy_prop_fwd_dce_bwd);
41 }
42
TEST_F(TestShaderFromNir,FullOPtimize)43 TEST_F(TestShaderFromNir, FullOPtimize)
44 {
45 auto sh = from_string(basic_interpolation_orig);
46
47 bool progress;
48
49 do {
50 progress = false;
51 progress |= copy_propagation_fwd(*sh);
52 progress |= dead_code_elimination(*sh);
53 progress |= copy_propagation_backward(*sh);
54 progress |= dead_code_elimination(*sh);
55 } while (progress);
56
57 check(sh, basic_interpolation_expect_from_nir_opt);
58 }
59
TEST_F(TestShaderFromNir,CombinePinFlags)60 TEST_F(TestShaderFromNir, CombinePinFlags)
61 {
62 auto sh = from_string(shader_group_chan_pin_to_combine);
63
64 bool progress;
65
66 do {
67 progress = false;
68 progress |= copy_propagation_fwd(*sh);
69 progress |= dead_code_elimination(*sh);
70 progress |= copy_propagation_backward(*sh);
71 progress |= dead_code_elimination(*sh);
72 } while (progress);
73
74 check(sh, shader_group_chan_pin_combined);
75 }
76
TEST_F(TestShaderFromNir,FullOPtimizeLoop)77 TEST_F(TestShaderFromNir, FullOPtimizeLoop)
78 {
79 auto sh = from_string(vs_nexted_loop_from_nir_expect);
80
81 optimize(*sh);
82
83 check(sh, vs_nexted_loop_from_nir_expect_opt);
84 }
85
TEST_F(TestShaderFromNir,CombineRegisterToTexSrc)86 TEST_F(TestShaderFromNir, CombineRegisterToTexSrc)
87 {
88 const char *shader_input =
89 R"(FS
90 CHIPCLASS EVERGREEN
91 REGISTERS R0.x R1.x R2.x R3.x
92 PROP MAX_COLOR_EXPORTS:1
93 PROP COLOR_EXPORTS:1
94 PROP COLOR_EXPORT_MASK:15
95 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
96 SHADER
97 ALU ADD R2.x : R0.x R2.x {W}
98 ALU MUL R3.x : R0.x R3.x {WL}
99 ALU MOV S1.x@group : R2.x {W}
100 ALU MOV S1.y@group : R3.x {WL}
101 TEX SAMPLE S2.xyzw : S1.xy__ RID:18 SID:0 NNNN
102 EXPORT_DONE PIXEL 0 S2.xyzw
103 )";
104
105 const char *shader_expect =
106 R"(FS
107 CHIPCLASS EVERGREEN
108 REGISTERS R1024.x@group R1024.y@group R0.x
109 PROP MAX_COLOR_EXPORTS:1
110 PROP COLOR_EXPORTS:1
111 PROP COLOR_EXPORT_MASK:15
112 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
113 SHADER
114
115 ALU ADD R1024.x@group : R0.x R1024.x@group {W}
116 ALU MUL R1024.y@group : R0.x R1024.y@group {WL}
117 TEX SAMPLE S2.xyzw : R1024.xy__ RID:18 SID:0 NNNN
118 EXPORT_DONE PIXEL 0 S2.xyzw
119 )";
120
121 auto sh = from_string(shader_input);
122
123 optimize(*sh);
124
125 check(sh, shader_expect);
126 }
127
TEST_F(TestShaderFromNir,CopyPropRegDest)128 TEST_F(TestShaderFromNir, CopyPropRegDest)
129 {
130 const char *shader_input =
131 R"(FS
132 CHIPCLASS EVERGREEN
133 REGISTERS R0.x
134 PROP MAX_COLOR_EXPORTS:1
135 PROP COLOR_EXPORTS:1
136 PROP COLOR_EXPORT_MASK:15
137 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
138 SHADER
139 ALU MOV S2.x : R0.x {W}
140 ALU MUL S3.x : S2.x S2.x {W}
141 EXPORT_DONE PIXEL 0 S3.xxxx
142 )";
143
144 const char *shader_expect =
145 R"(FS
146 CHIPCLASS EVERGREEN
147 REGISTERS R0.x
148 PROP MAX_COLOR_EXPORTS:1
149 PROP COLOR_EXPORTS:1
150 PROP COLOR_EXPORT_MASK:15
151 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
152 SHADER
153 ALU MUL S3.x : R0.x R0.x {W}
154 EXPORT_DONE PIXEL 0 S3.xxxx
155 )";
156
157 auto sh = from_string(shader_input);
158
159 optimize(*sh);
160
161 check(sh, shader_expect);
162 }
163
TEST_F(TestShaderFromNir,CopyPropRegDestAndOverwrite)164 TEST_F(TestShaderFromNir, CopyPropRegDestAndOverwrite)
165 {
166 const char *shader_input =
167 R"(FS
168 CHIPCLASS EVERGREEN
169 REGISTERS R0.x
170 PROP MAX_COLOR_EXPORTS:1
171 PROP COLOR_EXPORTS:1
172 PROP COLOR_EXPORT_MASK:15
173 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
174 SHADER
175 ALU MOV S2.x : R0.x {W}
176 ALU MOV R0.x : L[2.0] {W}
177 ALU MUL S3.x : S2.x R0.x {W}
178 EXPORT_DONE PIXEL 0 S3.xxxx
179 )";
180
181 const char *shader_expect =
182 R"(FS
183 CHIPCLASS EVERGREEN
184 REGISTERS R0.x
185 PROP MAX_COLOR_EXPORTS:1
186 PROP COLOR_EXPORTS:1
187 PROP COLOR_EXPORT_MASK:15
188 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
189 SHADER
190 ALU MOV S2.x : R0.x {W}
191 ALU MOV R0.x : L[2.0] {W}
192 ALU MUL S3.x : S2.x L[2.0] {W}
193 EXPORT_DONE PIXEL 0 S3.xxxx
194 )";
195
196 auto sh = from_string(shader_input);
197
198 optimize(*sh);
199
200 check(sh, shader_expect);
201 }
202
TEST_F(TestShaderFromNir,CopyPropRegDestAndIndirectOverwrite)203 TEST_F(TestShaderFromNir, CopyPropRegDestAndIndirectOverwrite)
204 {
205 const char *shader_input =
206 R"(FS
207 CHIPCLASS EVERGREEN
208 REGISTERS R0.x
209 ARRAYS A0[2].x
210 PROP MAX_COLOR_EXPORTS:1
211 PROP COLOR_EXPORTS:1
212 PROP COLOR_EXPORT_MASK:15
213 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
214 SHADER
215 ALU MOV S2.x : A0[0].x {W}
216 ALU MOV A0[R0.x].x : L[2.0] {W}
217 ALU MUL S3.x : S2.x A0[1].x {W}
218 EXPORT_DONE PIXEL 0 S3.xxxx
219 )";
220
221 const char *shader_expect =
222 R"(FS
223 CHIPCLASS EVERGREEN
224 REGISTERS R0.x
225 ARRAYS A0[2].x
226 PROP MAX_COLOR_EXPORTS:1
227 PROP COLOR_EXPORTS:1
228 PROP COLOR_EXPORT_MASK:15
229 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
230 SHADER
231 ALU MOV S2.x : A0[0].x {W}
232 ALU MOV A0[R0.x].x : L[2.0] {W}
233 ALU MUL S3.x : S2.x A0[1].x {W}
234 EXPORT_DONE PIXEL 0 S3.xxxx
235 )";
236
237 auto sh = from_string(shader_input);
238
239 optimize(*sh);
240
241 check(sh, shader_expect);
242 }
243
244
TEST_F(TestShaderFromNir,CopyPropAndIndirectReadOrder)245 TEST_F(TestShaderFromNir, CopyPropAndIndirectReadOrder)
246 {
247 const char *shader_input =
248 R"(FS
249 CHIPCLASS EVERGREEN
250 PROP MAX_COLOR_EXPORTS:1
251 PROP COLOR_EXPORTS:1
252 PROP COLOR_EXPORT_MASK:15
253 PROP WRITE_ALL_COLORS:1
254 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
255 ARRAYS A0[4].x
256 REGISTERS R0.xy
257 SHADER
258 BLOCK_START
259 ALU MOV A0[0].x : I[0] {W}
260 ALU MOV A0[1].x : I[1] {W}
261 ALU MOV A0[2].x : I[0] {W}
262 ALU MOV A0[3].x : I[0] {W}
263 ALU MOV S2.x{s} : A0[R0.x].x {W}
264 ALU MOV A0[1].x : L[0x2] {W}
265 ALU MOV A0[2].x : L[0x2] {W}
266 ALU MOV A0[3].x : L[0x3] {W}
267 ALU MOV A0[R0.y].x : I[1] {W}
268 ALU MOV S3.x : A0[0].x {W}
269 ALU MOV S3.y : A0[1].x {W}
270 ALU MOV S3.z : A0[2].x {W}
271 ALU MOV S3.w : A0[3].x {W}
272 EXPORT_DONE PIXEL 0 S3.xyzw
273 BLOCK_END
274 )";
275
276 const char *shader_expect =
277 R"(FS
278 CHIPCLASS EVERGREEN
279 PROP MAX_COLOR_EXPORTS:1
280 PROP COLOR_EXPORTS:1
281 PROP COLOR_EXPORT_MASK:15
282 PROP WRITE_ALL_COLORS:1
283 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
284 ARRAYS A0[4].x
285 REGISTERS R0.xy
286 SHADER
287 BLOCK_START
288 ALU_GROUP_BEGIN
289 ALU MOVA_INT AR : R0.x {}
290 ALU MOV A0[0].x : I[0] {WL}
291 ALU_GROUP_END
292 ALU_GROUP_BEGIN
293 ALU MOV A0[1].x : I[1] {W}
294 ALU MOV A0[2].x : I[0] {WL}
295 ALU_GROUP_END
296 ALU_GROUP_BEGIN
297 ALU MOV A0[3].x : I[0] {WL}
298 ALU_GROUP_END
299 ALU_GROUP_BEGIN
300 ALU MOV S2.x : A0[AR].x {WL}
301 ALU_GROUP_END
302 ALU_GROUP_BEGIN
303 ALU MOVA_INT AR : R0.y {}
304 ALU MOV A0[1].x : L[0x2] {WL}
305 ALU_GROUP_END
306 ALU_GROUP_BEGIN
307 ALU MOV A0[2].x : L[0x2] {W}
308 ALU MOV A0[3].x : L[0x3] {WL}
309 ALU_GROUP_END
310 ALU_GROUP_BEGIN
311 ALU MOV A0[AR].x : I[1] {WL}
312 ALU_GROUP_END
313 ALU_GROUP_BEGIN
314 ALU MOV S3.x@chgr : A0[0].x {W}
315 ALU MOV S3.y@chgr : A0[1].x {W}
316 ALU MOV S3.z@chgr : A0[2].x {WL}
317 ALU_GROUP_END
318 ALU_GROUP_BEGIN
319 ALU MOV S3.w@chgr : A0[3].x {WL}
320 ALU_GROUP_END
321 BLOCK_START
322 BLOCK_END
323 EXPORT_DONE PIXEL 0 S3.xyzw
324 BLOCK_END
325 )";
326
327 auto sh = from_string(shader_input);
328 split_address_loads(*sh);
329 check(schedule(sh), shader_expect);
330 }
331
TEST_F(TestShaderFromNir,OptimizeWithDestArrayValue)332 TEST_F(TestShaderFromNir, OptimizeWithDestArrayValue)
333 {
334 auto sh = from_string(shader_with_dest_array);
335
336 optimize(*sh);
337
338 check(sh, shader_with_dest_array_opt_expect);
339 }
340
TEST_F(TestShaderFromNir,ScheduleOPtimizedWithDestArrayValue)341 TEST_F(TestShaderFromNir, ScheduleOPtimizedWithDestArrayValue)
342 {
343 auto sh = from_string(shader_with_dest_array_opt_expect);
344 split_address_loads(*sh);
345 check(schedule(sh), shader_with_dest_array_opt_scheduled);
346 }
347
TEST_F(TestShaderFromNir,ScheduleWithArrayWriteAndRead)348 TEST_F(TestShaderFromNir, ScheduleWithArrayWriteAndRead)
349 {
350 auto sh = from_string(shader_with_dest_array2);
351 split_address_loads(*sh);
352 check(schedule(sh), shader_with_dest_array2_scheduled);
353 }
354
TEST_F(TestShaderFromNir,RA_with_dest_array)355 TEST_F(TestShaderFromNir, RA_with_dest_array)
356 {
357 auto sh = from_string(shader_with_dest_array2_scheduled);
358
359 auto lrm = r600::LiveRangeEvaluator().run(*sh);
360 EXPECT_TRUE(r600::register_allocation(lrm));
361
362 ra_check(sh, shader_with_dest_array2_scheduled_ra);
363 }
364
TEST_F(TestShaderFromNir,RA_with_chan_group)365 TEST_F(TestShaderFromNir, RA_with_chan_group)
366 {
367 auto sh = from_string(shader_group_chan_pin_combined_scheduled);
368
369 auto lrm = r600::LiveRangeEvaluator().run(*sh);
370 EXPECT_TRUE(r600::register_allocation(lrm));
371 ra_check(sh, shader_group_chan_pin_combined_scheduled_ra);
372 }
373
TEST_F(TestShaderFromNir,TES_opt)374 TEST_F(TestShaderFromNir, TES_opt)
375 {
376 auto sh = from_string(tes_pre_op);
377
378 optimize(*sh);
379
380 check(sh, tes_optimized);
381 }
382
TEST_F(TestShaderFromNir,TES_scheduled)383 TEST_F(TestShaderFromNir, TES_scheduled)
384 {
385 auto sh = from_string(tes_optimized_pre_sched);
386
387 check(schedule(sh), tes_optimized_sched);
388 }
389
390 /*
391 TEST_F(TestShaderFromNir, ShaderClone)
392 {
393 auto sh = from_string(red_triangle_fs_expect_from_nir);
394
395 auto sh_cloned = sh->clone();
396
397 MemoryPool::instance().push();
398 dead_code_elimination(*sh);
399
400 check(sh, red_triangle_fs_expect_from_nir_dce);
401
402 check(sh_cloned, red_triangle_fs_expect_from_nir);
403
404 MemoryPool::instance().pop();
405 check(sh, red_triangle_fs_expect_from_nir_dce);
406 }
407 */
408
TEST_F(TestShaderFromNir,ShaderSchedule)409 TEST_F(TestShaderFromNir, ShaderSchedule)
410 {
411 auto sh = from_string(basic_interpolation_orig);
412
413 check(schedule(sh), basic_interpolation_expect_from_nir_sched);
414 }
415
TEST_F(TestShaderFromNir,ShaderScheduleCayman)416 TEST_F(TestShaderFromNir, ShaderScheduleCayman)
417 {
418 auto sh = from_string(basic_interpolation_orig_cayman);
419
420 check(schedule(sh), basic_interpolation_expect_from_nir_sched_cayman);
421 }
422
TEST_F(TestShaderFromNir,ShaderScheduleOptimizedCayman)423 TEST_F(TestShaderFromNir, ShaderScheduleOptimizedCayman)
424 {
425 auto sh = from_string(basic_interpolation_orig_cayman);
426
427 optimize(*sh);
428
429 check(schedule(sh), basic_interpolation_expect_opt_sched_cayman);
430 }
431
TEST_F(TestShaderFromNir,CopyPropLegalConst)432 TEST_F(TestShaderFromNir, CopyPropLegalConst)
433 {
434 auto sh = from_string(dot4_pre);
435
436 copy_propagation_fwd(*sh);
437 dead_code_elimination(*sh);
438
439 check(sh, dot4_copy_prop_dce);
440 }
441
TEST_F(TestShaderFromNir,FullOPtimize_glxgears_vs2)442 TEST_F(TestShaderFromNir, FullOPtimize_glxgears_vs2)
443 {
444
445 auto sh = from_string(glxgears_vs2_from_nir_expect);
446 optimize(*sh);
447 check(sh, glxgears_vs2_from_nir_expect_optimized);
448 }
449
TEST_F(TestShaderFromNir,test_schedule_group)450 TEST_F(TestShaderFromNir, test_schedule_group)
451 {
452
453 auto sh = from_string(test_schedule_group);
454 optimize(*sh);
455 check(schedule(sh), test_schedule_group_expect);
456 }
457
TEST_F(TestShaderFromNir,test_dont_kill_dual_use)458 TEST_F(TestShaderFromNir, test_dont_kill_dual_use)
459 {
460 auto sh = from_string(shader_copy_prop_dont_kill_double_use);
461 optimize(*sh);
462 check(schedule(sh), shader_copy_prop_dont_kill_double_use_expect);
463 }
464
TEST_F(TestShaderFromNir,test_schedule_with_bany)465 TEST_F(TestShaderFromNir, test_schedule_with_bany)
466 {
467
468 auto sh = from_string(shader_with_bany_expect_eg);
469 optimize(*sh);
470 check(schedule(sh), shader_with_bany_expect_opt_sched_eg);
471 }
472
TEST_F(TestShaderFromNir,GroupAndChanCombine)473 TEST_F(TestShaderFromNir, GroupAndChanCombine)
474 {
475 auto sh = from_string(shader_group_chan_pin_to_combine_2);
476 optimize(*sh);
477 check(sh, shader_group_chan_pin_to_combine_2_opt);
478 }
479
TEST_F(TestShaderFromNir,RemoveUseAfterSplitgroup)480 TEST_F(TestShaderFromNir, RemoveUseAfterSplitgroup)
481 {
482 auto sh = from_string(fs_with_loop_multislot_reuse);
483 check(schedule(sh), fs_with_loop_multislot_reuse_scheduled);
484 }
485
TEST_F(TestShaderFromNir,OptimizeVSforTCS)486 TEST_F(TestShaderFromNir, OptimizeVSforTCS)
487 {
488 auto sh = from_string(vtx_for_tcs_inp);
489 optimize(*sh);
490 check(sh, vtx_for_tcs_opt);
491 }
492
TEST_F(TestShaderFromNir,ScheduleVSforTCS)493 TEST_F(TestShaderFromNir, ScheduleVSforTCS)
494 {
495 auto sh = from_string(vtx_for_tcs_pre_sched);
496 check(schedule(sh), vtx_for_tcs_sched);
497 }
498
TEST_F(TestShaderFromNir,fs_opt_tex_coord)499 TEST_F(TestShaderFromNir, fs_opt_tex_coord)
500 {
501 auto sh = from_string(fs_opt_tex_coord_init);
502
503 optimize(*sh);
504
505 check(sh, fs_opt_tex_coord_expect);
506 }
507
TEST_F(TestShaderFromNir,fs_shed_tex_coord)508 TEST_F(TestShaderFromNir, fs_shed_tex_coord)
509 {
510 auto sh = from_string(fs_sched_tex_coord_init);
511 check(schedule(sh), fs_sched_tex_coord_expect);
512 }
513
TEST_F(TestShaderFromNir,OptimizeAddWChanetoTrans)514 TEST_F(TestShaderFromNir, OptimizeAddWChanetoTrans)
515 {
516 const char *input =
517 R"(VS
518 CHIPCLASS CAYMAN
519 INPUT LOC:0
520 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
521 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
522 SHADER
523 BLOCK_START
524 ALU MOV S2.x@free{s} : I[0] {WL}
525 ALU MOV S3.y@free{s} : L[0x40c00000] {WL}
526 ALU MOV S4.z@free{s} : L[0xc1140000] {WL}
527 ALU MOV S5.w@free{s} : L[0xbfe00000] {WL}
528 ALU MOV S6.x@free{s} : L[0x3fa00000] {WL}
529 ALU MOV S7.x{s} : KC0[0].x {W}
530 ALU MOV S7.y{s} : KC0[0].y {W}
531 ALU MOV S7.z{s} : KC0[0].z {W}
532 ALU MOV S7.w{s} : KC0[0].w {WL}
533 ALU ADD S8.y@free{s} : S3.y@free{s} -S7.x{s} {WL}
534 ALU ADD S9.z@free{s} : S4.z@free{s} -S7.y{s} {WL}
535 ALU ADD S10.w@free{s} : S5.w@free{s} -S7.z{s} {WL}
536 ALU ADD S11.x@free{s} : S6.x@free{s} -S7.w{s} {WL}
537 ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {WL}
538 ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL}
539 ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL}
540 ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
541 ALU MOV S17.x{s} : S12.y@free{s} {W}
542 ALU MOV S17.y{s} : S13.z@free{s} {W}
543 ALU MOV S17.z{s} : S14.x@free{s} {W}
544 ALU MOV S17.w{s} : S15.y@free{s} {WL}
545 ALU MOV S18.x@group{s} : S17.x{s} {W}
546 ALU MOV S18.y@group{s} : S17.y{s} {W}
547 ALU MOV S18.z@group{s} : S17.z{s} {W}
548 ALU MOV S18.w@group{s} : S17.w{s} {WL}
549 EXPORT_DONE PARAM 0 S18.xyzw
550 BLOCK_END)";
551
552 const char *expect =
553 R"(VS
554 CHIPCLASS CAYMAN
555 INPUT LOC:0
556 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
557 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
558 SHADER
559 BLOCK_START
560 ALU ADD S8.y@free{s} : L[0x40c00000] -KC0[0].x {WL}
561 ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {WL}
562 ALU ADD S10.w@free{s} : L[0xbfe00000] -KC0[0].z {WL}
563 ALU ADD S11.x@free{s} : L[0x3fa00000] -KC0[0].w {WL}
564 ALU EXP_IEEE S18.x@group{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W}
565 ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
566 ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
567 ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
568 EXPORT_DONE PARAM 0 S18.xyzw
569 BLOCK_END
570 )";
571 auto sh = from_string(input);
572 optimize(*sh);
573 check(sh, expect);
574 };
575
TEST_F(TestShaderFromNir,PeeholeSoureModsSimple)576 TEST_F(TestShaderFromNir, PeeholeSoureModsSimple)
577 {
578 const char *input =
579 R"(VS
580 CHIPCLASS CAYMAN
581 INPUT LOC:0
582 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
583 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
584 SHADER
585 BLOCK_START
586 ALU MOV S2.x@free{s} : I[0] {WL}
587 ALU MOV S3.y@free{s} : L[0x40c00000] {WL}
588 ALU MOV S4.z@free{s} : L[0xc1140000] {WL}
589 ALU MOV S5.w@free{s} : L[0xbfe00000] {WL}
590 ALU MOV S6.x@free{s} : L[0x3fa00000] {WL}
591 ALU MOV S7.x{s} : |KC0[0].x| {W}
592 ALU MOV S7.y{s} : -KC0[0].y {W}
593 ALU MOV S7.z{s} : -|KC0[0].z| {W}
594 ALU MOV S7.w{s} : KC0[0].w {WL}
595 ALU ADD S8.y@free{s} : S3.y@free{s} S7.x{s} {WL}
596 ALU ADD S9.z@free{s} : S4.z@free{s} S7.y{s} {WL}
597 ALU ADD S10.w@free{s} : S5.w@free{s} S7.z{s} {WL}
598 ALU ADD S11.x@free{s} : S6.x@free{s} S7.w{s} {WL}
599 ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {WL}
600 ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL}
601 ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL}
602 ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
603 ALU MOV S17.x{s} : S12.y@free{s} {W}
604 ALU MOV S17.y{s} : S13.z@free{s} {W}
605 ALU MOV S17.z{s} : S14.x@free{s} {W}
606 ALU MOV S17.w{s} : S15.y@free{s} {WL}
607 ALU MOV S18.x@group{s} : S17.x{s} {W}
608 ALU MOV S18.y@group{s} : S17.y{s} {W}
609 ALU MOV S18.z@group{s} : S17.z{s} {W}
610 ALU MOV S18.w@group{s} : S17.w{s} {WL}
611 EXPORT_DONE PARAM 0 S18.xyzw
612 BLOCK_END)";
613
614 const char *expect =
615 R"(VS
616 CHIPCLASS CAYMAN
617 INPUT LOC:0
618 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
619 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
620 SHADER
621 BLOCK_START
622 ALU ADD S8.y@free{s} : L[0x40c00000] |KC0[0].x| {WL}
623 ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {WL}
624 ALU ADD S10.w@free{s} : L[0xbfe00000] -|KC0[0].z| {WL}
625 ALU ADD S11.x@free{s} : L[0x3fa00000] KC0[0].w {WL}
626 ALU EXP_IEEE S18.x@group{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W}
627 ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
628 ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
629 ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
630 EXPORT_DONE PARAM 0 S18.xyzw
631 BLOCK_END
632 )";
633 auto sh = from_string(input);
634 optimize(*sh);
635 check(sh, expect);
636 };
637
TEST_F(TestShaderFromNir,PeeholeSoureModsAbsNegTwice)638 TEST_F(TestShaderFromNir, PeeholeSoureModsAbsNegTwice)
639 {
640 const char *input =
641 R"(VS
642 CHIPCLASS CAYMAN
643 INPUT LOC:0
644 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
645 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
646 SHADER
647 BLOCK_START
648 ALU MOV S2.x@free{s} : I[0] {WL}
649 ALU MOV S3.y@free{s} : L[0x40c00000] {WL}
650 ALU MOV S4.z@free{s} : L[0xc1140000] {WL}
651 ALU MOV S5.w@free{s} : L[0xbfe00000] {WL}
652 ALU MOV S6.x@free{s} : L[0x3fa00000] {WL}
653 ALU MOV S7.x{s} : |KC0[0].x| {W}
654 ALU MOV S7.y{s} : -KC0[0].y {W}
655 ALU MOV S7.z{s} : -|KC0[0].z| {W}
656 ALU MOV S7.w{s} : KC0[0].w {WL}
657 ALU MOV S8.x : |S7.x| {W}
658 ALU MOV S8.y : -S7.y {W}
659 ALU MOV S8.z : -|S7.z| {W}
660 ALU MOV S8.w : -|S7.x| {WL}
661 ALU ADD S19.y@free{s} : S3.y@free{s} S8.x {WL}
662 ALU ADD S9.z@free{s} : S4.z@free{s} S8.y {WL}
663 ALU ADD S10.w@free{s} : S5.w@free{s} S8.z {WL}
664 ALU ADD S11.x@free{s} : S6.x@free{s} S8.w {WL}
665 ALU EXP_IEEE S12.y@free{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {WL}
666 ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL}
667 ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL}
668 ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
669 ALU MOV S17.x{s} : S12.y@free{s} {W}
670 ALU MOV S17.y{s} : S13.z@free{s} {W}
671 ALU MOV S17.z{s} : S14.x@free{s} {W}
672 ALU MOV S17.w{s} : S15.y@free{s} {WL}
673 ALU MOV S18.x@group{s} : S17.x{s} {W}
674 ALU MOV S18.y@group{s} : S17.y{s} {W}
675 ALU MOV S18.z@group{s} : S17.z{s} {W}
676 ALU MOV S18.w@group{s} : S17.w{s} {WL}
677 EXPORT_DONE PARAM 0 S18.xyzw
678 BLOCK_END)";
679
680 const char *expect =
681 R"(VS
682 CHIPCLASS CAYMAN
683 INPUT LOC:0
684 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
685 OUTPUT LOC:1 VARYING_SLOT:32 MASK:15
686 SHADER
687 BLOCK_START
688 ALU ADD S19.y@free{s} : L[0x40c00000] |KC0[0].x| {WL}
689 ALU ADD S9.z@free{s} : L[0xc1140000] KC0[0].y {WL}
690 ALU ADD S10.w@free{s} : L[0xbfe00000] |KC0[0].z| {WL}
691 ALU ADD S11.x@free{s} : L[0x3fa00000] -|KC0[0].x| {WL}
692 ALU EXP_IEEE S18.x@group{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {W}
693 ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
694 ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
695 ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
696 EXPORT_DONE PARAM 0 S18.xyzw
697 BLOCK_END
698 )";
699 auto sh = from_string(input);
700 optimize(*sh);
701 check(sh, expect);
702 };
703
TEST_F(TestShaderFromNir,PeeholeSoureModsClamp)704 TEST_F(TestShaderFromNir, PeeholeSoureModsClamp)
705 {
706 const char *input =
707 R"(VS
708 CHIPCLASS CAYMAN
709 INPUT LOC:0
710 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
711 SHADER
712 BLOCK_START
713 ALU MOV S1.x{s} : |KC0[0].x| {W}
714 ALU MOV S2.y{s} : -KC0[0].y {W}
715 ALU ADD S3.x : S1.x S2.y {W}
716 ALU MOV CLAMP S4.x : S3.x {W}
717 EXPORT_DONE PARAM 0 S4.xxxx
718 BLOCK_END)";
719
720 const char *expect =
721 R"(VS
722 CHIPCLASS CAYMAN
723 INPUT LOC:0
724 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
725 SHADER
726 BLOCK_START
727 ALU ADD CLAMP S3.x : |KC0[0].x| -KC0[0].y {W}
728 EXPORT_DONE PARAM 0 S3.xxxx
729 BLOCK_END
730 )";
731 auto sh = from_string(input);
732 optimize(*sh);
733 check(sh, expect);
734 };
735
TEST_F(TestShaderFromNir,PeeholeSoureModsMuliSlot)736 TEST_F(TestShaderFromNir, PeeholeSoureModsMuliSlot)
737 {
738 const char *input =
739 R"(VS
740 CHIPCLASS CAYMAN
741 INPUT LOC:0
742 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
743 REGISTERS R1.xyzw
744 SHADER
745 BLOCK_START
746 ALU MOV S1.x{s} : |KC0[0].x| {W}
747 ALU MOV S1.y{s} : -KC0[0].y {W}
748 ALU MOV S1.z{s} : |KC0[0].z| {W}
749 ALU MOV S1.w{s} : KC0[0].w {W}
750 ALU MOV S2.x{s} : |R1.x| {W}
751 ALU MOV S2.y{s} : R1.y {W}
752 ALU MOV S2.z{s} : -R1.z {W}
753 ALU MOV S2.w{s} : -R1.w {W}
754 ALU DOT4 S5.x : S1.x S2.x + S1.y S2.y + S1.z S2.z + S1.w S2.w {W}
755 EXPORT_DONE PARAM 0 S5.xxxx
756 BLOCK_END)";
757
758 const char *expect =
759 R"(VS
760 CHIPCLASS CAYMAN
761 INPUT LOC:0
762 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
763 REGISTERS R1.xyzw
764 SHADER
765 BLOCK_START
766 ALU DOT4 S5.x : |KC0[0].x| |R1.x| + -KC0[0].y R1.y + |KC0[0].z| -R1.z + KC0[0].w -R1.w {W}
767 EXPORT_DONE PARAM 0 S5.xxxx
768 BLOCK_END
769 )";
770 auto sh = from_string(input);
771 optimize(*sh);
772 check(sh, expect);
773 };
774
775
TEST_F(TestShaderFromNir,OptimizeIntoGroup)776 TEST_F(TestShaderFromNir, OptimizeIntoGroup)
777 {
778 const char *input =
779 R"(VS
780 CHIPCLASS CAYMAN
781 INPUT LOC:0
782 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
783 REGISTERS R0.x R1.x R2.x
784 SHADER
785 BLOCK_START
786 ALU MOV S4.x : R0.x {W}
787 ALU MOV S5.y : R1.x {W}
788 ALU MOV S6.z : R2.x {WL}
789 ALU_GROUP_BEGIN
790 ALU CUBE S7.x@chgr{s} : S6.z S5.y {W}
791 ALU CUBE S7.y@chgr{s} : S6.z S4.x {W}
792 ALU CUBE S7.z@chgr{s} : S4.x S6.z {W}
793 ALU CUBE S7.w@chgr{s} : S5.y S6.z {WL}
794 ALU_GROUP_END
795 EXPORT_DONE PARAM 0 S7.xyzw
796 BLOCK_END)";
797
798 const char *expect =
799 R"(VS
800 CHIPCLASS CAYMAN
801 INPUT LOC:0
802 OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
803 REGISTERS R0.x R1.x R2.x
804 SHADER
805 BLOCK_START
806 ALU_GROUP_BEGIN
807 ALU CUBE S7.x@chgr{s} : R2.x@chan R1.x@chan {W}
808 ALU CUBE S7.y@chgr{s} : R2.x@chan R0.x@chan {W}
809 ALU CUBE S7.z@chgr{s} : R0.x@chan R2.x@chan {W}
810 ALU CUBE S7.w@chgr{s} : R1.x@chan R2.x@chan {WL}
811 ALU_GROUP_END
812 EXPORT_DONE PARAM 0 S7.xyzw
813 BLOCK_END
814 )";
815 auto sh = from_string(input);
816 optimize(*sh);
817 check(sh, expect);
818 };
819
TEST_F(TestShaderFromNir,ScheduleSplitLoadIndexConst)820 TEST_F(TestShaderFromNir, ScheduleSplitLoadIndexConst)
821 {
822 const char *input =
823 R"(
824 FS
825 CHIPCLASS CAYMAN
826 PROP MAX_COLOR_EXPORTS:1
827 PROP COLOR_EXPORTS:1
828 PROP COLOR_EXPORT_MASK:15
829 PROP WRITE_ALL_COLORS:0
830 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
831 SHADER
832 BLOCK_START
833 ALU MIN_UINT S3.w@free{s} : KC0[0].x L[0x2] {WL}
834 ALU MOVA_INT IDX0 : S3.w@free{s} {}
835 ALU MOV S4.x@group{s} : KC1[IDX0][0].x {W}
836 ALU MOV S4.y@group{s} : KC1[IDX0][0].y {W}
837 ALU MOV S4.z@group{s} : KC1[IDX0][0].z {W}
838 ALU MOV S4.w@group{s} : KC1[IDX0][0].w {WL}
839 EXPORT_DONE PIXEL 0 S4.xyzw
840 BLOCK_END
841 )";
842
843 const char *expect =
844 R"(
845 FS
846 CHIPCLASS CAYMAN
847 PROP MAX_COLOR_EXPORTS:1
848 PROP COLOR_EXPORTS:1
849 PROP COLOR_EXPORT_MASK:15
850 PROP WRITE_ALL_COLORS:0
851 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
852 SHADER
853 BLOCK_START
854 ALU_GROUP_BEGIN
855 ALU MIN_UINT S3.w@free{s} : KC0[0].x L[0x2] {WL}
856 ALU_GROUP_END
857 ALU_GROUP_BEGIN
858 ALU MOVA_INT IDX0 : S3.w@free{s} {L}
859 ALU_GROUP_END
860 BLOCK_END
861 BLOCK_START
862 ALU_GROUP_BEGIN
863 ALU MOV S4.x@chgr : KC1[IDX0][0].x {W}
864 ALU MOV S4.y@chgr : KC1[IDX0][0].y {W}
865 ALU MOV S4.z@chgr : KC1[IDX0][0].z {W}
866 ALU MOV S4.w@chgr : KC1[IDX0][0].w {WL}
867 ALU_GROUP_END
868 ALU_GROUP_BEGIN
869 BLOCK_END
870 BLOCK_START
871 EXPORT_DONE PIXEL 0 S4.xyzw
872 BLOCK_END
873 )";
874
875 auto sh = from_string(input);
876 check(schedule(sh), expect);
877 }
878
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPAfterIndirectDest)879 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDest)
880 {
881 const char *input =
882 R"(FS
883 CHIPCLASS R600
884 FAMILY R600
885 PROP MAX_COLOR_EXPORTS:1
886 PROP COLOR_EXPORTS:1
887 PROP COLOR_EXPORT_MASK:15
888 PROP WRITE_ALL_COLORS:0
889 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
890 ARRAYS A1[2].x
891 SHADER
892 BLOCK_START
893 ALU MOV S1.x : KC0[0].x {WL}
894 ALU MOV A1[S1.x].x : KC0[0].y {WL}
895 ALU ADD S2.x : A1[1].x KC0[1].x {WL}
896 EXPORT_DONE PIXEL 0 S2.xxxx
897 BLOCK_END)";
898
899
900 const char *expect =
901 R"(FS
902 CHIPCLASS R600
903 FAMILY R600
904 PROP MAX_COLOR_EXPORTS:1
905 PROP COLOR_EXPORTS:1
906 PROP COLOR_EXPORT_MASK:15
907 PROP WRITE_ALL_COLORS:0
908 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
909 ARRAYS A1[2].x
910 SHADER
911 BLOCK_START
912 ALU_GROUP_BEGIN
913 ALU MOVA_INT AR : KC0[0].x {L}
914 ALU_GROUP_END
915 ALU_GROUP_BEGIN
916 ALU MOV A1[AR].x : KC0[0].y {WL}
917 ALU_GROUP_END
918 ALU_GROUP_BEGIN
919 ALU NOP __.x : {L}
920 ALU_GROUP_END
921 ALU_GROUP_BEGIN
922 ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
923 ALU_GROUP_END
924 BLOCK_END
925 BLOCK_START
926 EXPORT_DONE PIXEL 0 S2.xxxx
927 BLOCK_END)";
928
929 auto sh = from_string(input);
930 split_address_loads(*sh);
931 optimize(*sh);
932 check(schedule(sh), expect);
933
934 }
935
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPBeforIndirectSrc)936 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPBeforIndirectSrc)
937 {
938 const char *input =
939 R"(FS
940 CHIPCLASS R600
941 FAMILY R600
942 PROP MAX_COLOR_EXPORTS:1
943 PROP COLOR_EXPORTS:1
944 PROP COLOR_EXPORT_MASK:15
945 PROP WRITE_ALL_COLORS:0
946 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
947 ARRAYS A1[2].x
948 SHADER
949 BLOCK_START
950 ALU MOV S1.x : KC0[0].x {WL}
951 ALU MOV A1[0].x : KC0[0].y {WL}
952 ALU ADD S2.x : A1[S1.x].x KC0[1].x {WL}
953 EXPORT_DONE PIXEL 0 S2.xxxx
954 BLOCK_END)";
955
956
957 const char *expect =
958 R"(FS
959 CHIPCLASS R600
960 FAMILY R600
961 PROP MAX_COLOR_EXPORTS:1
962 PROP COLOR_EXPORTS:1
963 PROP COLOR_EXPORT_MASK:15
964 PROP WRITE_ALL_COLORS:0
965 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
966 ARRAYS A1[2].x
967 SHADER
968 BLOCK_START
969 ALU_GROUP_BEGIN
970 ALU MOVA_INT AR : KC0[0].x {}
971 ALU MOV A1[0].x : KC0[0].y {WL}
972 ALU_GROUP_END
973 ALU_GROUP_BEGIN
974 ALU NOP __.x : {L}
975 ALU_GROUP_END
976 ALU_GROUP_BEGIN
977 ALU ADD S2.x@chgr : A1[AR].x KC0[1].x {WL}
978 ALU_GROUP_END
979 BLOCK_END
980 BLOCK_START
981 EXPORT_DONE PIXEL 0 S2.xxxx
982 BLOCK_END)";
983
984 auto sh = from_string(input);
985 split_address_loads(*sh);
986 optimize(*sh);
987 check(schedule(sh), expect);
988
989 }
990
991
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV670)992 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV670)
993 {
994 const char *input =
995 R"(FS
996 CHIPCLASS R600
997 FAMILY RV670
998 PROP MAX_COLOR_EXPORTS:1
999 PROP COLOR_EXPORTS:1
1000 PROP COLOR_EXPORT_MASK:15
1001 PROP WRITE_ALL_COLORS:0
1002 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1003 ARRAYS A1[2].x
1004 SHADER
1005 BLOCK_START
1006 ALU MOV S1.x : KC0[0].x {WL}
1007 ALU MOV A1[S1.x].x : KC0[0].y {WL}
1008 ALU ADD S2.x : A1[1].x KC0[1].x {WL}
1009 EXPORT_DONE PIXEL 0 S2.xxxx
1010 BLOCK_END)";
1011
1012
1013 const char *expect =
1014 R"(FS
1015 CHIPCLASS R600
1016 FAMILY RV670
1017 PROP MAX_COLOR_EXPORTS:1
1018 PROP COLOR_EXPORTS:1
1019 PROP COLOR_EXPORT_MASK:15
1020 PROP WRITE_ALL_COLORS:0
1021 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1022 ARRAYS A1[2].x
1023 SHADER
1024 BLOCK_START
1025 ALU_GROUP_BEGIN
1026 ALU MOVA_INT AR : KC0[0].x {L}
1027 ALU_GROUP_END
1028 ALU_GROUP_BEGIN
1029 ALU MOV A1[AR].x : KC0[0].y {WL}
1030 ALU_GROUP_END
1031 ALU_GROUP_BEGIN
1032 ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
1033 ALU_GROUP_END
1034 BLOCK_END
1035 BLOCK_START
1036 EXPORT_DONE PIXEL 0 S2.xxxx
1037 BLOCK_END)";
1038
1039 auto sh = from_string(input);
1040 split_address_loads(*sh);
1041 optimize(*sh);
1042 check(schedule(sh), expect);
1043
1044 }
1045
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPAfterIndirectDestEG)1046 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDestEG)
1047 {
1048 const char *input =
1049 R"(FS
1050 CHIPCLASS EVERGREEN
1051 FAMILY BARTS
1052 PROP MAX_COLOR_EXPORTS:1
1053 PROP COLOR_EXPORTS:1
1054 PROP COLOR_EXPORT_MASK:15
1055 PROP WRITE_ALL_COLORS:0
1056 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1057 ARRAYS A1[2].x
1058 SHADER
1059 BLOCK_START
1060 ALU MOV S1.x : KC0[0].x {WL}
1061 ALU MOV A1[S1.x].x : KC0[0].y {WL}
1062 ALU ADD S2.x : A1[1].x KC0[1].x {WL}
1063 EXPORT_DONE PIXEL 0 S2.xxxx
1064 BLOCK_END)";
1065
1066
1067 const char *expect =
1068 R"(FS
1069 CHIPCLASS EVERGREEN
1070 FAMILY BARTS
1071 PROP MAX_COLOR_EXPORTS:1
1072 PROP COLOR_EXPORTS:1
1073 PROP COLOR_EXPORT_MASK:15
1074 PROP WRITE_ALL_COLORS:0
1075 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1076 ARRAYS A1[2].x
1077 SHADER
1078 BLOCK_START
1079 ALU_GROUP_BEGIN
1080 ALU MOVA_INT AR : KC0[0].x {L}
1081 ALU_GROUP_END
1082 ALU_GROUP_BEGIN
1083 ALU MOV A1[AR].x : KC0[0].y {WL}
1084 ALU_GROUP_END
1085 ALU_GROUP_BEGIN
1086 ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
1087 ALU_GROUP_END
1088 BLOCK_END
1089 BLOCK_START
1090 EXPORT_DONE PIXEL 0 S2.xxxx
1091 BLOCK_END)";
1092
1093 auto sh = from_string(input);
1094 split_address_loads(*sh);
1095 optimize(*sh);
1096 check(schedule(sh), expect);
1097
1098 }
1099
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV770)1100 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV770)
1101 {
1102 const char *input =
1103 R"(FS
1104 CHIPCLASS R700
1105 FAMILY RV770
1106 PROP MAX_COLOR_EXPORTS:1
1107 PROP COLOR_EXPORTS:1
1108 PROP COLOR_EXPORT_MASK:15
1109 PROP WRITE_ALL_COLORS:0
1110 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1111 ARRAYS A1[2].x
1112 SHADER
1113 BLOCK_START
1114 ALU MOV S1.x : KC0[0].x {WL}
1115 ALU MOV A1[S1.x].x : KC0[0].y {WL}
1116 ALU ADD S2.x : A1[1].x KC0[1].x {WL}
1117 EXPORT_DONE PIXEL 0 S2.xxxx
1118 BLOCK_END)";
1119
1120
1121 const char *expect =
1122 R"(FS
1123 CHIPCLASS R700
1124 FAMILY RV770
1125 PROP MAX_COLOR_EXPORTS:1
1126 PROP COLOR_EXPORTS:1
1127 PROP COLOR_EXPORT_MASK:15
1128 PROP WRITE_ALL_COLORS:0
1129 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1130 ARRAYS A1[2].x
1131 SHADER
1132 BLOCK_START
1133 ALU_GROUP_BEGIN
1134 ALU MOVA_INT AR : KC0[0].x {L}
1135 ALU_GROUP_END
1136 ALU_GROUP_BEGIN
1137 ALU MOV A1[AR].x : KC0[0].y {WL}
1138 ALU_GROUP_END
1139 ALU_GROUP_BEGIN
1140 ALU NOP __.x : {L}
1141 ALU_GROUP_END
1142 ALU_GROUP_BEGIN
1143 ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
1144 ALU_GROUP_END
1145 BLOCK_END
1146 BLOCK_START
1147 EXPORT_DONE PIXEL 0 S2.xxxx
1148 BLOCK_END)";
1149
1150 auto sh = from_string(input);
1151 split_address_loads(*sh);
1152 optimize(*sh);
1153 check(schedule(sh), expect);
1154
1155 }
1156
TEST_F(TestShaderFromNir,ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV770_2)1157 TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV770_2)
1158 {
1159 const char *input =
1160 R"(FS
1161 CHIPCLASS R700
1162 FAMILY RV770
1163 PROP MAX_COLOR_EXPORTS:1
1164 PROP COLOR_EXPORTS:1
1165 PROP COLOR_EXPORT_MASK:15
1166 PROP WRITE_ALL_COLORS:0
1167 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1168 ARRAYS A1[2].x
1169 SHADER
1170 BLOCK_START
1171 ALU MOV S1.x : KC0[0].x {W}
1172 ALU MOV S1.y : KC0[0].y {W}
1173 ALU ADD S1.z : KC0[0].z KC0[2].z {W}
1174 ALU ADD S1.w : KC0[0].w KC0[2].w {W}
1175 ALU MOV A1[S1.x].x : KC0[1].y {WL}
1176 ALU ADD S2.x : A1[1].x S1.y {W}
1177 ALU ADD S2.y : KC0[1].y S1.z {W}
1178 ALU ADD S2.z : KC0[1].z S1.w {W}
1179 ALU ADD S2.w : KC0[1].w S1.w {WL}
1180 EXPORT_DONE PIXEL 0 S2.xyzw
1181 BLOCK_END)";
1182
1183
1184 const char *expect =
1185 R"(FS
1186 CHIPCLASS R700
1187 FAMILY RV770
1188 PROP MAX_COLOR_EXPORTS:1
1189 PROP COLOR_EXPORTS:1
1190 PROP COLOR_EXPORT_MASK:15
1191 PROP WRITE_ALL_COLORS:0
1192 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15
1193 ARRAYS A1[2].x
1194 SHADER
1195 BLOCK_START
1196 ALU_GROUP_BEGIN
1197 ALU MOVA_INT AR : KC0[0].x {L}
1198 ALU_GROUP_END
1199 ALU_GROUP_BEGIN
1200 ALU MOV A1[AR].x : KC0[1].y {WL}
1201 ALU_GROUP_END
1202 ALU_GROUP_BEGIN
1203 ALU ADD S1.z : KC0[0].z KC0[2].z {W}
1204 ALU ADD S1.w : KC0[0].w KC0[2].w {WL}
1205 ALU_GROUP_END
1206 ALU_GROUP_BEGIN
1207 ALU ADD S2.x@chgr : A1[1].x KC0[0].y {W}
1208 ALU ADD S2.y@chgr : KC0[1].y S1.z{s} {WL}
1209 ALU_GROUP_END
1210 ALU_GROUP_BEGIN
1211 ALU ADD S2.z@chgr : KC0[1].z S1.w{s} {W}
1212 ALU ADD S2.w@chgr : KC0[1].w S1.w{s} {WL}
1213 ALU_GROUP_END
1214 BLOCK_END
1215 BLOCK_START
1216 EXPORT_DONE PIXEL 0 S2.xyzw
1217 BLOCK_END)";
1218
1219 auto sh = from_string(input);
1220 split_address_loads(*sh);
1221 optimize(*sh);
1222 check(schedule(sh), expect);
1223
1224 }
1225