• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright 2023 Asahi Lina
3 * SPDX-License-Identifier: MIT
4 */
5#include "compiler/libcl/libcl.h"
6#include "helper.h"
7#include "libagx_intrinsics.h"
8
9#define DB_NEXT 32
10#define DB_ACK  48
11#define DB_NACK 49
12
13enum helper_op {
14   OP_STACK_ALLOC = 0,
15   OP_STACK_FREE = 1,
16   OP_THREADGROUP_ALLOC = 4,
17   OP_THREADGROUP_FREE = 5,
18   OP_END = 15,
19};
20
21KERNEL(1)
22libagx_helper(void)
23{
24   uint64_t arg =
25      nir_load_helper_arg_lo_agx() | (((uint64_t)nir_load_helper_arg_hi_agx()) << 32);
26
27   global struct agx_helper_header *hdr =
28      (global struct agx_helper_header *)arg;
29
30   uint32_t core_index = nir_load_core_id_agx();
31   uint32_t subgroups = hdr->subgroups;
32   global struct agx_helper_core *core = &hdr->cores[core_index];
33
34   while (1) {
35      nir_doorbell_agx(DB_NEXT);
36      uint32_t op = nir_load_helper_op_id_agx();
37      uint32_t arg = nir_load_helper_arg_lo_agx();
38
39      switch (op) {
40      case OP_STACK_ALLOC: {
41         uint32_t idx = core->alloc_cur;
42         if (idx >= subgroups) {
43            core->alloc_failed++;
44            nir_doorbell_agx(DB_NACK);
45            break;
46         }
47         core->alloc_max = max(core->alloc_max, ++core->alloc_cur);
48         core->alloc_count[arg]++;
49
50         nir_stack_map_agx(0, core->blocklist[idx].blocks[0]);
51         nir_stack_map_agx(1, core->blocklist[idx].blocks[1]);
52         nir_stack_map_agx(2, core->blocklist[idx].blocks[2]);
53         nir_stack_map_agx(3, core->blocklist[idx].blocks[3]);
54         nir_doorbell_agx(DB_ACK);
55         break;
56      }
57
58      case OP_STACK_FREE: {
59         if (!core->alloc_cur) { // underflow
60            nir_doorbell_agx(DB_NACK);
61            break;
62         }
63         uint32_t idx = --core->alloc_cur;
64         core->blocklist[idx].blocks[0] = nir_stack_unmap_agx(0);
65         core->blocklist[idx].blocks[1] = nir_stack_unmap_agx(1);
66         core->blocklist[idx].blocks[2] = nir_stack_unmap_agx(2);
67         core->blocklist[idx].blocks[3] = nir_stack_unmap_agx(3);
68         nir_doorbell_agx(DB_ACK);
69         break;
70      }
71
72      // TODO: Implement threadgroup allocs (for compute preemption)
73      case OP_THREADGROUP_ALLOC: {
74         nir_doorbell_agx(DB_NACK);
75         break;
76      }
77
78      case OP_THREADGROUP_FREE: {
79         nir_doorbell_agx(DB_NACK);
80         break;
81      }
82
83      case OP_END: {
84         nir_fence_helper_exit_agx();
85         return;
86      }
87
88      default:
89         *(global uint32_t *)((uintptr_t)(0xdead0000 | (op << 8))) = 0;
90         nir_fence_helper_exit_agx();
91         return;
92      }
93   }
94}
95