1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::api::{GetDebugFlags, ShaderBin, DEBUG};
5 use crate::hw_runner::{Runner, CB0};
6 use crate::ir::*;
7 use crate::sm50::ShaderModel50;
8 use crate::sm70::ShaderModel70;
9
10 use acorn::Acorn;
11 use compiler::bindings::MESA_SHADER_COMPUTE;
12 use compiler::cfg::CFGBuilder;
13 use nak_bindings::*;
14 use std::mem::offset_of;
15 use std::str::FromStr;
16 use std::sync::OnceLock;
17
18 struct RunSingleton {
19 sm: Box<dyn ShaderModel + Send + Sync>,
20 run: Runner,
21 }
22
23 static RUN_SINGLETON: OnceLock<RunSingleton> = OnceLock::new();
24
25 impl RunSingleton {
get() -> &'static RunSingleton26 pub fn get() -> &'static RunSingleton {
27 RUN_SINGLETON.get_or_init(|| {
28 let dev_id = match std::env::var("NAK_TEST_DEVICE") {
29 Ok(s) => Some(usize::from_str(&s).unwrap()),
30 Err(_) => None,
31 };
32
33 let run = Runner::new(dev_id);
34 let sm_nr = run.dev_info().sm;
35 let sm: Box<dyn ShaderModel + Send + Sync> = if sm_nr >= 70 {
36 Box::new(ShaderModel70::new(sm_nr))
37 } else if sm_nr >= 50 {
38 Box::new(ShaderModel50::new(sm_nr))
39 } else {
40 panic!("Unsupported shader model");
41 };
42 RunSingleton { sm, run }
43 })
44 }
45 }
46
47 const LOCAL_SIZE_X: u16 = 32;
48
49 pub struct TestShaderBuilder<'a> {
50 sm: &'a dyn ShaderModel,
51 alloc: SSAValueAllocator,
52 b: InstrBuilder<'a>,
53 start_block: BasicBlock,
54 label: Label,
55 data_addr: SSARef,
56 }
57
58 impl<'a> TestShaderBuilder<'a> {
new(sm: &'a dyn ShaderModel) -> Self59 pub fn new(sm: &'a dyn ShaderModel) -> Self {
60 let mut alloc = SSAValueAllocator::new();
61 let mut label_alloc = LabelAllocator::new();
62 let mut b = SSAInstrBuilder::new(sm, &mut alloc);
63
64 // Fill out the start block
65 let lane = b.alloc_ssa(RegFile::GPR, 1);
66 b.push_op(OpS2R {
67 dst: lane.into(),
68 idx: NAK_SV_LANE_ID,
69 });
70
71 let cta = b.alloc_ssa(RegFile::GPR, 1);
72 b.push_op(OpS2R {
73 dst: cta.into(),
74 idx: NAK_SV_CTAID,
75 });
76
77 let invoc_id = b.alloc_ssa(RegFile::GPR, 1);
78 b.push_op(OpIMad {
79 dst: invoc_id.into(),
80 srcs: [cta.into(), u32::from(LOCAL_SIZE_X).into(), lane.into()],
81 signed: false,
82 });
83
84 let data_addr_lo = CBufRef {
85 buf: CBuf::Binding(0),
86 offset: offset_of!(CB0, data_addr_lo).try_into().unwrap(),
87 };
88 let data_addr_hi = CBufRef {
89 buf: CBuf::Binding(0),
90 offset: offset_of!(CB0, data_addr_hi).try_into().unwrap(),
91 };
92 let data_addr = b.alloc_ssa(RegFile::GPR, 2);
93 b.copy_to(data_addr[0].into(), data_addr_lo.into());
94 b.copy_to(data_addr[1].into(), data_addr_hi.into());
95
96 let data_stride = CBufRef {
97 buf: CBuf::Binding(0),
98 offset: offset_of!(CB0, data_stride).try_into().unwrap(),
99 };
100 let invocations = CBufRef {
101 buf: CBuf::Binding(0),
102 offset: offset_of!(CB0, invocations).try_into().unwrap(),
103 };
104
105 let data_offset = SSARef::from([
106 b.imul(invoc_id.into(), data_stride.into())[0],
107 b.copy(0.into())[0],
108 ]);
109 let data_addr =
110 b.iadd64(data_addr.into(), data_offset.into(), 0.into());
111
112 // Finally, exit if we're OOB
113 let oob = b.isetp(
114 IntCmpType::U32,
115 IntCmpOp::Ge,
116 invoc_id.into(),
117 invocations.into(),
118 );
119 b.predicate(oob[0].into()).push_op(OpExit {});
120
121 let start_block = BasicBlock {
122 label: label_alloc.alloc(),
123 uniform: true,
124 instrs: b.as_vec(),
125 };
126
127 TestShaderBuilder {
128 sm,
129 alloc: alloc,
130 b: InstrBuilder::new(sm),
131 start_block,
132 label: label_alloc.alloc(),
133 data_addr,
134 }
135 }
136
ld_test_data(&mut self, offset: u16, mem_type: MemType) -> SSARef137 pub fn ld_test_data(&mut self, offset: u16, mem_type: MemType) -> SSARef {
138 let access = MemAccess {
139 mem_type: mem_type,
140 space: MemSpace::Global(MemAddrType::A64),
141 order: MemOrder::Strong(MemScope::System),
142 eviction_priority: MemEvictionPriority::Normal,
143 };
144 let comps: u8 = mem_type.bits().div_ceil(32).try_into().unwrap();
145 let dst = self.alloc_ssa(RegFile::GPR, comps);
146 self.push_op(OpLd {
147 dst: dst.into(),
148 addr: self.data_addr.into(),
149 offset: offset.into(),
150 access: access,
151 });
152 dst
153 }
154
st_test_data( &mut self, offset: u16, mem_type: MemType, data: SSARef, )155 pub fn st_test_data(
156 &mut self,
157 offset: u16,
158 mem_type: MemType,
159 data: SSARef,
160 ) {
161 let access = MemAccess {
162 mem_type: mem_type,
163 space: MemSpace::Global(MemAddrType::A64),
164 order: MemOrder::Strong(MemScope::System),
165 eviction_priority: MemEvictionPriority::Normal,
166 };
167 let comps: u8 = mem_type.bits().div_ceil(32).try_into().unwrap();
168 assert!(data.comps() == comps);
169 self.push_op(OpSt {
170 addr: self.data_addr.into(),
171 data: data.into(),
172 offset: offset.into(),
173 access: access,
174 });
175 }
176
compile(mut self) -> Box<ShaderBin>177 pub fn compile(mut self) -> Box<ShaderBin> {
178 self.b.push_op(OpExit {});
179 let block = BasicBlock {
180 label: self.label,
181 uniform: true,
182 instrs: self.b.as_vec(),
183 };
184
185 let mut cfg = CFGBuilder::new();
186 cfg.add_node(0, self.start_block);
187 cfg.add_node(1, block);
188 cfg.add_edge(0, 1);
189
190 let f = Function {
191 ssa_alloc: self.alloc,
192 phi_alloc: PhiAllocator::new(),
193 blocks: cfg.as_cfg(),
194 };
195
196 let cs_info = ComputeShaderInfo {
197 local_size: [32, 1, 1],
198 smem_size: 0,
199 };
200 let info = ShaderInfo {
201 num_gprs: 0,
202 num_control_barriers: 0,
203 num_instrs: 0,
204 slm_size: 0,
205 max_crs_depth: 0,
206 uses_global_mem: true,
207 writes_global_mem: true,
208 uses_fp64: false,
209 stage: ShaderStageInfo::Compute(cs_info),
210 io: ShaderIoInfo::None,
211 };
212 let mut s = Shader {
213 sm: self.sm,
214 info: info,
215 functions: vec![f],
216 };
217
218 // We do run a few passes
219 s.opt_copy_prop();
220 s.opt_dce();
221 s.legalize();
222
223 s.assign_regs();
224 s.lower_par_copies();
225 s.lower_copy_swap();
226 s.calc_instr_deps();
227
228 if DEBUG.print() {
229 eprintln!("NAK shader: {s}");
230 }
231
232 s.gather_info();
233 s.remove_annotations();
234
235 let code = self.sm.encode_shader(&s);
236 Box::new(ShaderBin::new(self.sm, &s.info, None, code, ""))
237 }
238 }
239
240 impl Builder for TestShaderBuilder<'_> {
push_instr(&mut self, instr: Box<Instr>) -> &mut Instr241 fn push_instr(&mut self, instr: Box<Instr>) -> &mut Instr {
242 self.b.push_instr(instr)
243 }
244
sm(&self) -> u8245 fn sm(&self) -> u8 {
246 self.b.sm()
247 }
248 }
249
250 impl SSABuilder for TestShaderBuilder<'_> {
alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef251 fn alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef {
252 self.alloc.alloc_vec(file, comps)
253 }
254 }
255
256 #[test]
test_sanity()257 fn test_sanity() {
258 let run = RunSingleton::get();
259 let b = TestShaderBuilder::new(run.sm.as_ref());
260 let bin = b.compile();
261 unsafe {
262 run.run
263 .run_raw(&bin, LOCAL_SIZE_X.into(), 0, std::ptr::null_mut(), 0)
264 .unwrap();
265 }
266 }
267
f32_eq(a: f32, b: f32) -> bool268 fn f32_eq(a: f32, b: f32) -> bool {
269 if a.is_nan() && b.is_nan() {
270 true
271 } else if a.is_nan() || b.is_nan() {
272 // If one is NaN but not the other, fail
273 false
274 } else {
275 (a - b).abs() < 0.000001
276 }
277 }
278
f64_eq(a: f64, b: f64) -> bool279 fn f64_eq(a: f64, b: f64) -> bool {
280 if a.is_nan() && b.is_nan() {
281 true
282 } else if a.is_nan() || b.is_nan() {
283 // If one is NaN but not the other, fail
284 false
285 } else {
286 (a - b).abs() < 0.000001
287 }
288 }
289
test_foldable_op_with( mut op: impl Foldable + Clone + Into<Op>, mut rand_u32: impl FnMut(usize) -> u32, )290 pub fn test_foldable_op_with(
291 mut op: impl Foldable + Clone + Into<Op>,
292 mut rand_u32: impl FnMut(usize) -> u32,
293 ) {
294 let run = RunSingleton::get();
295 let mut b = TestShaderBuilder::new(run.sm.as_ref());
296
297 let mut comps = 0_u16;
298 let mut fold_src = Vec::new();
299 let src_types = op.src_types();
300 for (i, src) in op.srcs_as_mut_slice().iter_mut().enumerate() {
301 match src_types[i] {
302 SrcType::GPR
303 | SrcType::ALU
304 | SrcType::F16
305 | SrcType::F16v2
306 | SrcType::F32
307 | SrcType::I32
308 | SrcType::B32 => {
309 let data = b.ld_test_data(comps * 4, MemType::B32);
310 comps += 1;
311
312 src.src_ref = data.into();
313 fold_src.push(FoldData::U32(0));
314 }
315 SrcType::F64 => {
316 todo!("Double ops aren't tested yet");
317 }
318 SrcType::Pred => {
319 let data = b.ld_test_data(comps * 4, MemType::B32);
320 comps += 1;
321
322 let bit = b.lop2(LogicOp2::And, data.into(), 1.into());
323 let pred = b.isetp(
324 IntCmpType::U32,
325 IntCmpOp::Ne,
326 bit.into(),
327 0.into(),
328 );
329 src.src_ref = pred.into();
330 fold_src.push(FoldData::Pred(false));
331 }
332 SrcType::Carry => {
333 let data = b.ld_test_data(comps * 4, MemType::B32);
334 comps += 1;
335
336 let bit = b.lop2(LogicOp2::And, data.into(), 1.into());
337 let dst = b.alloc_ssa(RegFile::GPR, 1);
338 let carry = b.alloc_ssa(RegFile::Carry, 1);
339 b.push_op(OpIAdd2 {
340 dst: dst.into(),
341 carry_out: carry.into(),
342 srcs: [u32::MAX.into(), bit.into()],
343 });
344 src.src_ref = carry.into();
345 fold_src.push(FoldData::Carry(false));
346 }
347 typ => panic!("Can't auto-generate {typ:?} data"),
348 }
349 }
350 let src_comps = usize::from(comps);
351
352 let mut fold_dst = Vec::new();
353 let dst_types = op.dst_types();
354 for (i, dst) in op.dsts_as_mut_slice().iter_mut().enumerate() {
355 match dst_types[i] {
356 DstType::Pred => {
357 *dst = b.alloc_ssa(RegFile::Pred, 1).into();
358 fold_dst.push(FoldData::Pred(false));
359 }
360 DstType::GPR | DstType::F32 => {
361 *dst = b.alloc_ssa(RegFile::GPR, 1).into();
362 fold_dst.push(FoldData::U32(0));
363 }
364 DstType::F64 => {
365 *dst = b.alloc_ssa(RegFile::GPR, 2).into();
366 fold_dst.push(FoldData::Vec2([0, 0]));
367 }
368 DstType::Carry => {
369 *dst = b.alloc_ssa(RegFile::Carry, 1).into();
370 fold_dst.push(FoldData::Carry(false));
371 }
372 typ => panic!("Can't auto-test {typ:?} data"),
373 }
374 }
375
376 b.push_op(op.clone());
377 let op = op; // Drop mutability
378
379 for dst in op.dsts_as_slice() {
380 let Dst::SSA(vec) = dst else {
381 panic!("Should be an ssa value");
382 };
383
384 for ssa in &vec[..] {
385 let u = match ssa.file() {
386 RegFile::Pred => b.sel((*ssa).into(), 1.into(), 0.into()),
387 RegFile::GPR => (*ssa).into(),
388 RegFile::Carry => {
389 let gpr = b.alloc_ssa(RegFile::GPR, 1);
390 b.push_op(OpIAdd2X {
391 dst: gpr.into(),
392 carry_out: Dst::None,
393 srcs: [0.into(), 0.into()],
394 carry_in: (*ssa).into(),
395 });
396 gpr.into()
397 }
398 file => panic!("Can't auto-test {file:?} data"),
399 };
400 b.st_test_data(comps * 4, MemType::B32, u);
401 comps += 1;
402 }
403 }
404 let comps = usize::from(comps); // Drop mutability
405 let dst_comps = comps - src_comps;
406
407 let bin = b.compile();
408
409 // We're throwing random data at it here so the idea is that the number
410 // of test cases we need to get good coverage is relative to the square
411 // of the number of components. For a big op like IAdd3X, this is going
412 // to give us 2500 iterations.
413 let invocations = src_comps * src_comps * 100;
414
415 let mut data = Vec::new();
416 for _ in 0..invocations {
417 for (i, src) in op.srcs_as_slice().iter().enumerate() {
418 let SrcRef::SSA(vec) = &src.src_ref else {
419 panic!("Should be an ssa value");
420 };
421
422 for _ in 0..vec.comps() {
423 data.push(rand_u32(i));
424 }
425 }
426 for _ in 0..dst_comps {
427 data.push(0_u32);
428 }
429 }
430 debug_assert!(data.len() == invocations * comps);
431
432 unsafe {
433 run.run
434 .run_raw(
435 &bin,
436 invocations.try_into().unwrap(),
437 (comps * 4).try_into().unwrap(),
438 data.as_mut_ptr().cast(),
439 data.len() * 4,
440 )
441 .unwrap();
442 }
443
444 // Now, check the results
445 for invoc_id in 0..invocations {
446 let data = &data[(invoc_id * comps)..((invoc_id + 1) * comps)];
447
448 let mut c = 0_usize;
449 for src in &mut fold_src {
450 match src {
451 FoldData::Pred(b) | FoldData::Carry(b) => {
452 let u = data[c];
453 *b = (u & 1) != 0;
454 c += 1;
455 }
456 FoldData::U32(u) => {
457 *u = data[c];
458 c += 1;
459 }
460 FoldData::Vec2(v) => {
461 *v = [data[c + 0], data[c + 1]];
462 c += 2;
463 }
464 }
465 }
466 debug_assert!(c == src_comps);
467
468 let mut fold = OpFoldData {
469 srcs: &fold_src,
470 dsts: &mut fold_dst,
471 };
472 op.fold(&*run.sm, &mut fold);
473
474 debug_assert!(fold_dst.len() == op.dsts_as_slice().len());
475 for (i, dst) in fold_dst.iter().enumerate() {
476 match dst {
477 FoldData::Pred(b) | FoldData::Carry(b) => {
478 let d = data[c];
479 c += 1;
480 assert_eq!(*b, (d & 1) != 0);
481 }
482 FoldData::U32(u) => {
483 let d = data[c];
484 c += 1;
485
486 match dst_types[i] {
487 DstType::GPR => {
488 assert_eq!(*u, d);
489 }
490 DstType::F32 => {
491 assert!(f32_eq(
492 f32::from_bits(*u),
493 f32::from_bits(d)
494 ));
495 }
496 typ => panic!("Can't auto-test {typ:?} data"),
497 }
498 }
499 FoldData::Vec2(v) => {
500 let d = [data[c + 0], data[c + 1]];
501 c += 2;
502
503 match dst_types[i] {
504 DstType::F64 => {
505 let v_f64 = f64::from_bits(
506 u64::from(v[0]) | (u64::from(v[1]) << 32),
507 );
508 let d_f64 = f64::from_bits(
509 u64::from(d[0]) | (u64::from(d[1]) << 32),
510 );
511 assert!(f64_eq(v_f64, d_f64));
512 }
513 typ => panic!("Can't auto-test {typ:?} data"),
514 }
515 }
516 }
517 }
518 debug_assert!(c == comps);
519 }
520 }
521
test_foldable_op(op: impl Foldable + Clone + Into<Op>)522 pub fn test_foldable_op(op: impl Foldable + Clone + Into<Op>) {
523 let mut a = Acorn::new();
524 test_foldable_op_with(op, &mut |_| a.get_u32());
525 }
526
527 #[test]
test_op_flo()528 fn test_op_flo() {
529 for i in 0..4 {
530 let op = OpFlo {
531 dst: Dst::None,
532 src: 0.into(),
533 signed: i & 0x1 != 0,
534 return_shift_amount: i & 0x2 != 0,
535 };
536
537 let mut a = Acorn::new();
538 test_foldable_op_with(op, &mut |_| {
539 let x = a.get_uint(36);
540 let signed = x & (1 << 32) != 0;
541 let shift = x >> 33;
542 if signed {
543 ((x as i32) >> shift) as u32
544 } else {
545 (x as u32) >> shift
546 }
547 });
548 }
549 }
550
551 #[test]
test_op_iabs()552 fn test_op_iabs() {
553 if RunSingleton::get().sm.sm() >= 70 {
554 let op = OpIAbs {
555 dst: Dst::None,
556 src: 0.into(),
557 };
558 test_foldable_op(op);
559 }
560 }
561
get_iadd_int(a: &mut Acorn) -> u32562 fn get_iadd_int(a: &mut Acorn) -> u32 {
563 let x = a.get_uint(36);
564 match x >> 32 {
565 0 => 0,
566 1 => 1,
567 2 => 1 << 31,
568 3 => (1 << 31) - 1,
569 4 => u32::MAX,
570 5 => u32::MAX - 1,
571 _ => x as u32,
572 }
573 }
574
575 #[test]
test_op_iadd2()576 fn test_op_iadd2() {
577 if RunSingleton::get().sm.sm() < 70 {
578 for i in 0..3 {
579 let mut op = OpIAdd2 {
580 dst: Dst::None,
581 carry_out: Dst::None,
582 srcs: [0.into(), 0.into()],
583 };
584 if i & 0x1 != 0 {
585 op.srcs[0].src_mod = SrcMod::INeg;
586 }
587 if i & 0x2 != 0 {
588 op.srcs[1].src_mod = SrcMod::INeg;
589 }
590
591 let mut a = Acorn::new();
592 test_foldable_op_with(op, |_| get_iadd_int(&mut a));
593 }
594 }
595 }
596
597 #[test]
test_op_iadd2x()598 fn test_op_iadd2x() {
599 if RunSingleton::get().sm.sm() < 70 {
600 for i in 0..3 {
601 let mut op = OpIAdd2X {
602 dst: Dst::None,
603 carry_out: Dst::None,
604 srcs: [0.into(), 0.into()],
605 carry_in: 0.into(),
606 };
607 if i & 0x1 != 0 {
608 op.srcs[0].src_mod = SrcMod::BNot;
609 }
610 if i & 0x2 != 0 {
611 op.srcs[1].src_mod = SrcMod::BNot;
612 }
613
614 let mut a = Acorn::new();
615 test_foldable_op_with(op, |_| get_iadd_int(&mut a));
616 }
617 }
618 }
619
620 #[test]
test_op_iadd3()621 fn test_op_iadd3() {
622 if RunSingleton::get().sm.sm() >= 70 {
623 for i in 0..6 {
624 let mut op = OpIAdd3 {
625 dst: Dst::None,
626 overflow: [Dst::None, Dst::None],
627 srcs: [0.into(), 0.into(), 0.into()],
628 };
629 if i % 3 == 1 {
630 op.srcs[0].src_mod = SrcMod::INeg;
631 } else if i % 3 == 2 {
632 op.srcs[1].src_mod = SrcMod::INeg;
633 }
634 if i / 3 == 1 {
635 op.srcs[2].src_mod = SrcMod::INeg;
636 }
637
638 let mut a = Acorn::new();
639 test_foldable_op_with(op, |_| get_iadd_int(&mut a));
640 }
641 }
642 }
643
644 #[test]
test_op_iadd3x()645 fn test_op_iadd3x() {
646 if RunSingleton::get().sm.sm() >= 70 {
647 for i in 0..6 {
648 let mut op = OpIAdd3X {
649 dst: Dst::None,
650 overflow: [Dst::None, Dst::None],
651 srcs: [0.into(), 0.into(), 0.into()],
652 carry: [false.into(), false.into()],
653 };
654 if i % 3 == 1 {
655 op.srcs[0].src_mod = SrcMod::BNot;
656 } else if i % 3 == 2 {
657 op.srcs[1].src_mod = SrcMod::BNot;
658 }
659 if i / 3 == 1 {
660 op.srcs[2].src_mod = SrcMod::BNot;
661 }
662
663 let mut a = Acorn::new();
664 test_foldable_op_with(op, |_| get_iadd_int(&mut a));
665 }
666 }
667 }
668
669 #[test]
test_op_isetp()670 fn test_op_isetp() {
671 let set_ops = [PredSetOp::And, PredSetOp::Or, PredSetOp::Xor];
672 let cmp_ops = [
673 IntCmpOp::Eq,
674 IntCmpOp::Ne,
675 IntCmpOp::Lt,
676 IntCmpOp::Le,
677 IntCmpOp::Gt,
678 IntCmpOp::Ge,
679 ];
680 let cmp_types = [IntCmpType::U32, IntCmpType::I32];
681
682 for mut i in 0..(set_ops.len() * cmp_ops.len() * cmp_types.len() * 2) {
683 let set_op = set_ops[i % set_ops.len()];
684 i /= set_ops.len();
685
686 let cmp_op = cmp_ops[i % cmp_ops.len()];
687 i /= cmp_ops.len();
688
689 let cmp_type = cmp_types[i % cmp_types.len()];
690 i /= cmp_types.len();
691
692 let ex = i != 0;
693
694 if ex && RunSingleton::get().sm.sm() < 70 {
695 continue;
696 }
697
698 let op = OpISetP {
699 dst: Dst::None,
700 set_op,
701 cmp_op,
702 cmp_type,
703 ex,
704 srcs: [0.into(), 0.into()],
705 accum: 0.into(),
706 low_cmp: 0.into(),
707 };
708
709 let src0_idx = op.src_idx(&op.srcs[0]);
710 let mut a = Acorn::new();
711 let mut src0 = 0_u32;
712 test_foldable_op_with(op, &mut |i| {
713 let x = a.get_u32();
714 if i == src0_idx {
715 src0 = x;
716 }
717
718 // Make src0 and src1
719 if i == src0_idx + 1 && a.get_bool() {
720 src0
721 } else {
722 x
723 }
724 });
725 }
726 }
727
728 #[test]
test_op_lop2()729 fn test_op_lop2() {
730 if RunSingleton::get().sm.sm() < 70 {
731 let logic_ops =
732 [LogicOp2::And, LogicOp2::Or, LogicOp2::Xor, LogicOp2::PassB];
733
734 let src_mods = [
735 (SrcMod::None, SrcMod::None),
736 (SrcMod::BNot, SrcMod::None),
737 (SrcMod::None, SrcMod::BNot),
738 (SrcMod::BNot, SrcMod::BNot),
739 ];
740
741 for logic_op in logic_ops {
742 for (x_mod, y_mod) in src_mods {
743 let mut op = OpLop2 {
744 dst: Dst::None,
745 srcs: [0.into(), 0.into()],
746 op: logic_op,
747 };
748 op.srcs[0].src_mod = x_mod;
749 op.srcs[1].src_mod = y_mod;
750
751 test_foldable_op(op);
752 }
753 }
754 }
755 }
756
757 #[test]
test_op_lop3()758 fn test_op_lop3() {
759 if RunSingleton::get().sm.sm() >= 70 {
760 for lut in 0..255 {
761 let op = OpLop3 {
762 dst: Dst::None,
763 srcs: [0.into(), 0.into(), 0.into()],
764 op: LogicOp3 { lut },
765 };
766 test_foldable_op(op);
767 }
768 }
769 }
770
771 #[test]
test_op_popc()772 fn test_op_popc() {
773 let src_mods = [SrcMod::None, SrcMod::BNot];
774 for src_mod in src_mods {
775 let mut op = OpPopC {
776 dst: Dst::None,
777 src: 0.into(),
778 };
779 op.src.src_mod = src_mod;
780 test_foldable_op(op);
781 }
782 }
783
784 #[test]
test_op_shf()785 fn test_op_shf() {
786 let sm = &RunSingleton::get().sm;
787
788 let types = [IntType::U32, IntType::I32, IntType::U64, IntType::I64];
789
790 for i in 0..32 {
791 let op = OpShf {
792 dst: Dst::None,
793 low: 0.into(),
794 high: 0.into(),
795 shift: 0.into(),
796 data_type: types[i & 0x3],
797 right: i & 0x4 != 0,
798 wrap: i & 0x8 != 0,
799 dst_high: i & 0x10 != 0,
800 };
801
802 if sm.sm() < 70 && !(op.dst_high || op.right) {
803 continue;
804 }
805
806 let shift_idx = op.src_idx(&op.shift);
807 let mut a = Acorn::new();
808 test_foldable_op_with(op, &mut |i| {
809 if i == shift_idx {
810 a.get_uint(6) as u32
811 } else {
812 a.get_u32()
813 }
814 });
815 }
816 }
817
818 #[test]
test_op_prmt()819 fn test_op_prmt() {
820 let op = OpPrmt {
821 dst: Dst::None,
822 srcs: [0.into(), 0.into()],
823 sel: 0.into(),
824 mode: PrmtMode::Index,
825 };
826 test_foldable_op(op);
827 }
828
829 #[test]
test_op_psetp()830 fn test_op_psetp() {
831 if RunSingleton::get().sm.sm() < 70 {
832 let set_ops = [PredSetOp::And, PredSetOp::Or, PredSetOp::Xor];
833 let src_mods = [SrcMod::None, SrcMod::BNot];
834 for mut i in 0..(3 * 3 * 2 * 2 * 2) {
835 let op1 = set_ops[i % 3];
836 i /= 3;
837 let op2 = set_ops[i % 3];
838 i /= 3;
839 let mut op = OpPSetP {
840 dsts: [Dst::None, Dst::None],
841 ops: [op1, op2],
842 srcs: [true.into(), true.into(), true.into()],
843 };
844 op.srcs[0].src_mod = src_mods[(i >> 0) & 1];
845 op.srcs[1].src_mod = src_mods[(i >> 1) & 1];
846 op.srcs[2].src_mod = src_mods[(i >> 2) & 1];
847
848 test_foldable_op(op);
849 }
850 }
851 }
852
853 #[test]
test_iadd64()854 fn test_iadd64() {
855 let run = RunSingleton::get();
856 let invocations = 100;
857
858 let cases = [
859 (SrcMod::None, SrcMod::None),
860 (SrcMod::INeg, SrcMod::None),
861 (SrcMod::None, SrcMod::INeg),
862 ];
863
864 for (x_mod, y_mod) in cases {
865 let mut b = TestShaderBuilder::new(run.sm.as_ref());
866
867 let mut x = Src::from([
868 b.ld_test_data(0, MemType::B32)[0],
869 b.ld_test_data(4, MemType::B32)[0],
870 ]);
871 x.src_mod = x_mod;
872
873 let mut y = Src::from([
874 b.ld_test_data(8, MemType::B32)[0],
875 b.ld_test_data(12, MemType::B32)[0],
876 ]);
877 y.src_mod = y_mod;
878
879 let dst = b.iadd64(x, y, 0.into());
880 b.st_test_data(16, MemType::B32, dst[0].into());
881 b.st_test_data(20, MemType::B32, dst[1].into());
882
883 let bin = b.compile();
884
885 let mut a = Acorn::new();
886 let mut data = Vec::new();
887 for _ in 0..invocations {
888 data.push([
889 get_iadd_int(&mut a),
890 get_iadd_int(&mut a),
891 get_iadd_int(&mut a),
892 get_iadd_int(&mut a),
893 0,
894 0,
895 ]);
896 }
897
898 run.run.run(&bin, &mut data).unwrap();
899
900 for d in &data {
901 let mut x = u64::from(d[0]) | (u64::from(d[1]) << 32);
902 let mut y = u64::from(d[2]) | (u64::from(d[3]) << 32);
903 if x_mod.is_ineg() {
904 x = -(x as i64) as u64;
905 }
906 if y_mod.is_ineg() {
907 y = -(y as i64) as u64;
908 }
909 let dst = x.wrapping_add(y);
910 assert_eq!(d[4], dst as u32);
911 assert_eq!(d[5], (dst >> 32) as u32);
912 }
913 }
914 }
915
916 #[test]
test_ineg64()917 fn test_ineg64() {
918 let run = RunSingleton::get();
919 let invocations = 100;
920
921 let mut b = TestShaderBuilder::new(run.sm.as_ref());
922
923 let x = SSARef::from([
924 b.ld_test_data(0, MemType::B32)[0],
925 b.ld_test_data(4, MemType::B32)[0],
926 ]);
927 let dst = b.ineg64(x.into());
928 b.st_test_data(8, MemType::B32, dst[0].into());
929 b.st_test_data(12, MemType::B32, dst[1].into());
930
931 let bin = b.compile();
932
933 let mut a = Acorn::new();
934 let mut data = Vec::new();
935 for _ in 0..invocations {
936 data.push([a.get_u32(), a.get_u32(), 0, 0]);
937 }
938
939 run.run.run(&bin, &mut data).unwrap();
940
941 for d in &data {
942 let x = u64::from(d[0]) | (u64::from(d[1]) << 32);
943 let dst = -(x as i64) as u64;
944 assert_eq!(d[2], dst as u32);
945 assert_eq!(d[3], (dst >> 32) as u32);
946 }
947 }
948
949 #[test]
test_isetp64()950 fn test_isetp64() {
951 let run = RunSingleton::get();
952 let invocations = 100;
953
954 let types = [IntCmpType::U32, IntCmpType::I32];
955 let ops = [
956 IntCmpOp::Eq,
957 IntCmpOp::Ne,
958 IntCmpOp::Lt,
959 IntCmpOp::Le,
960 IntCmpOp::Gt,
961 IntCmpOp::Ge,
962 ];
963
964 for i in 0..(ops.len() * 2) {
965 let mut b = TestShaderBuilder::new(run.sm.as_ref());
966
967 let cmp_type = types[i % 2];
968 let cmp_op = ops[i / 2];
969
970 let x = SSARef::from([
971 b.ld_test_data(0, MemType::B32)[0],
972 b.ld_test_data(4, MemType::B32)[0],
973 ]);
974 let y = SSARef::from([
975 b.ld_test_data(8, MemType::B32)[0],
976 b.ld_test_data(12, MemType::B32)[0],
977 ]);
978 let p = b.isetp64(cmp_type, cmp_op, x.into(), y.into());
979 let dst = b.sel(p.into(), 1.into(), 0.into());
980 b.st_test_data(16, MemType::B32, dst.into());
981
982 let bin = b.compile();
983
984 let mut a = Acorn::new();
985 let mut data = Vec::new();
986 for _ in 0..invocations {
987 match a.get_u32() % 4 {
988 0 => {
989 // Equal
990 let high = a.get_u32();
991 let low = a.get_u32();
992 data.push([low, high, low, high, 0]);
993 }
994 1 => {
995 // High bits are equal
996 let high = a.get_u32();
997 data.push([a.get_u32(), high, a.get_u32(), high, 0]);
998 }
999 _ => {
1000 data.push([
1001 a.get_u32(),
1002 a.get_u32(),
1003 a.get_u32(),
1004 a.get_u32(),
1005 0,
1006 ]);
1007 }
1008 }
1009 }
1010
1011 run.run.run(&bin, &mut data).unwrap();
1012
1013 for d in &data {
1014 let x = u64::from(d[0]) | (u64::from(d[1]) << 32);
1015 let y = u64::from(d[2]) | (u64::from(d[3]) << 32);
1016 let p = if cmp_type.is_signed() {
1017 let x = x as i64;
1018 let y = y as i64;
1019 match cmp_op {
1020 IntCmpOp::Eq => x == y,
1021 IntCmpOp::Ne => x != y,
1022 IntCmpOp::Lt => x < y,
1023 IntCmpOp::Le => x <= y,
1024 IntCmpOp::Gt => x > y,
1025 IntCmpOp::Ge => x >= y,
1026 }
1027 } else {
1028 match cmp_op {
1029 IntCmpOp::Eq => x == y,
1030 IntCmpOp::Ne => x != y,
1031 IntCmpOp::Lt => x < y,
1032 IntCmpOp::Le => x <= y,
1033 IntCmpOp::Gt => x > y,
1034 IntCmpOp::Ge => x >= y,
1035 }
1036 };
1037 let dst = p as u32;
1038 assert_eq!(d[4], dst);
1039 }
1040 }
1041 }
1042
1043 #[test]
test_shl64()1044 fn test_shl64() {
1045 let run = RunSingleton::get();
1046 let invocations = 100;
1047
1048 let mut b = TestShaderBuilder::new(run.sm.as_ref());
1049
1050 let srcs = SSARef::from([
1051 b.ld_test_data(0, MemType::B32)[0],
1052 b.ld_test_data(4, MemType::B32)[0],
1053 ]);
1054 let shift = b.ld_test_data(8, MemType::B32);
1055 let dst = b.shl64(srcs.into(), shift.into());
1056 b.st_test_data(12, MemType::B32, dst[0].into());
1057 b.st_test_data(16, MemType::B32, dst[1].into());
1058
1059 let bin = b.compile();
1060
1061 let mut a = Acorn::new();
1062 let mut data = Vec::new();
1063 for _ in 0..invocations {
1064 data.push([a.get_u32(), a.get_u32(), a.get_uint(7) as u32, 0, 0]);
1065 }
1066
1067 run.run.run(&bin, &mut data).unwrap();
1068
1069 for d in &data {
1070 let src = u64::from(d[0]) | (u64::from(d[1]) << 32);
1071 let dst = src << (d[2] & 0x3f);
1072 assert_eq!(d[3], dst as u32);
1073 assert_eq!(d[4], (dst >> 32) as u32);
1074 }
1075 }
1076
1077 #[test]
test_shr64()1078 fn test_shr64() {
1079 let run = RunSingleton::get();
1080 let invocations = 100;
1081
1082 let cases = [true, false];
1083
1084 for signed in cases {
1085 let mut b = TestShaderBuilder::new(run.sm.as_ref());
1086
1087 let srcs = SSARef::from([
1088 b.ld_test_data(0, MemType::B32)[0],
1089 b.ld_test_data(4, MemType::B32)[0],
1090 ]);
1091 let shift = b.ld_test_data(8, MemType::B32);
1092 let dst = b.shr64(srcs.into(), shift.into(), signed);
1093 b.st_test_data(12, MemType::B32, dst[0].into());
1094 b.st_test_data(16, MemType::B32, dst[1].into());
1095
1096 let bin = b.compile();
1097
1098 let mut a = Acorn::new();
1099 let mut data = Vec::new();
1100 for _ in 0..invocations {
1101 data.push([a.get_u32(), a.get_u32(), a.get_uint(7) as u32, 0, 0]);
1102 }
1103
1104 run.run.run(&bin, &mut data).unwrap();
1105
1106 for d in &data {
1107 let src = u64::from(d[0]) | (u64::from(d[1]) << 32);
1108 let dst = if signed {
1109 ((src as i64) >> (d[2] & 0x3f)) as u64
1110 } else {
1111 src >> (d[2] & 0x3f)
1112 };
1113 assert_eq!(d[3], dst as u32);
1114 assert_eq!(d[4], (dst >> 32) as u32);
1115 }
1116 }
1117 }
1118
1119 #[test]
test_f2fp_pack_ab()1120 fn test_f2fp_pack_ab() {
1121 let run = RunSingleton::get();
1122 let mut b = TestShaderBuilder::new(run.sm.as_ref());
1123
1124 let srcs = SSARef::from([
1125 b.ld_test_data(0, MemType::B32)[0],
1126 b.ld_test_data(4, MemType::B32)[0],
1127 ]);
1128
1129 let dst = b.alloc_ssa(RegFile::GPR, 1);
1130 b.push_op(OpF2FP {
1131 dst: dst.into(),
1132 srcs: [srcs[0].into(), srcs[1].into()],
1133 rnd_mode: FRndMode::NearestEven,
1134 });
1135 b.st_test_data(8, MemType::B32, dst[0].into());
1136
1137 let dst = b.alloc_ssa(RegFile::GPR, 1);
1138 b.push_op(OpF2FP {
1139 dst: dst.into(),
1140 srcs: [srcs[0].into(), 2.0.into()],
1141 rnd_mode: FRndMode::Zero,
1142 });
1143 b.st_test_data(12, MemType::B32, dst[0].into());
1144
1145 let bin = b.compile();
1146
1147 let zero = 0_f32.to_bits();
1148 let one = 1_f32.to_bits();
1149 let two = 2_f32.to_bits();
1150 let complex = 1.4556_f32.to_bits();
1151
1152 let mut data = Vec::new();
1153 data.push([one, two, 0, 0]);
1154 data.push([one, zero, 0, 0]);
1155 data.push([complex, zero, 0, 0]);
1156 run.run.run(&bin, &mut data).unwrap();
1157
1158 // { 1.0fp16, 2.0fp16 }
1159 assert_eq!(data[0][2], 0x3c004000);
1160 // { 1.0fp16, 2.0fp16 }
1161 assert_eq!(data[0][3], 0x3c004000);
1162 // { 1.0fp16, 0.0fp16 }
1163 assert_eq!(data[1][2], 0x3c000000);
1164 // { 1.0fp16, 0.0fp16 }
1165 assert_eq!(data[1][3], 0x3c004000);
1166 // { 1.456fp16, 0.0fp16 }
1167 assert_eq!(data[2][2], 0x3dd30000);
1168 // { 1.455fp16, 0.0fp16 }
1169 assert_eq!(data[2][3], 0x3dd24000);
1170 }
1171
1172 #[test]
test_gpr_limit_from_local_size()1173 pub fn test_gpr_limit_from_local_size() {
1174 let run = RunSingleton::get();
1175 let b = TestShaderBuilder::new(run.sm.as_ref());
1176 let mut bin = b.compile();
1177
1178 for local_size in 1..=1024 {
1179 let info = &mut bin.bin.info;
1180 let cs_info = unsafe {
1181 assert_eq!(info.stage, MESA_SHADER_COMPUTE);
1182 &mut info.__bindgen_anon_1.cs
1183 };
1184 cs_info.local_size = [local_size, 1, 1];
1185 let num_gprs = gpr_limit_from_local_size(&cs_info.local_size);
1186 info.num_gprs = num_gprs.try_into().unwrap();
1187
1188 run.run.run::<u8>(&bin, &mut [0; 4096]).unwrap_or_else(|_| {
1189 panic!("Failed with local_size {local_size}, num_gprs {num_gprs}")
1190 });
1191 }
1192 }
1193