1 /*
2 * Copyright 2024 Valve Corporation
3 * Copyright 2024 Alyssa Rosenzweig
4 * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5 * SPDX-License-Identifier: MIT
6 */
7 #include "libagx/query.h"
8 #include "vulkan/vulkan_core.h"
9 #include "agx_helpers.h"
10 #include "agx_linker.h"
11 #include "agx_nir_lower_gs.h"
12 #include "agx_pack.h"
13 #include "agx_scratch.h"
14 #include "agx_tilebuffer.h"
15 #include "hk_buffer.h"
16 #include "hk_cmd_buffer.h"
17 #include "hk_descriptor_set.h"
18 #include "hk_device.h"
19 #include "hk_entrypoints.h"
20 #include "hk_physical_device.h"
21 #include "hk_shader.h"
22 #include "libagx_dgc.h"
23 #include "libagx_shaders.h"
24 #include "pool.h"
25
26 void
hk_cmd_buffer_begin_compute(struct hk_cmd_buffer * cmd,const VkCommandBufferBeginInfo * pBeginInfo)27 hk_cmd_buffer_begin_compute(struct hk_cmd_buffer *cmd,
28 const VkCommandBufferBeginInfo *pBeginInfo)
29 {
30 }
31
32 void
hk_cmd_invalidate_compute_state(struct hk_cmd_buffer * cmd)33 hk_cmd_invalidate_compute_state(struct hk_cmd_buffer *cmd)
34 {
35 memset(&cmd->state.cs, 0, sizeof(cmd->state.cs));
36 }
37
38 void
hk_cmd_bind_compute_shader(struct hk_cmd_buffer * cmd,struct hk_api_shader * shader)39 hk_cmd_bind_compute_shader(struct hk_cmd_buffer *cmd,
40 struct hk_api_shader *shader)
41 {
42 cmd->state.cs.shader = shader;
43 }
44
45 void
hk_cdm_cache_flush(struct hk_device * dev,struct hk_cs * cs)46 hk_cdm_cache_flush(struct hk_device *dev, struct hk_cs *cs)
47 {
48 assert(cs->type == HK_CS_CDM);
49 assert(cs->current + AGX_CDM_BARRIER_LENGTH < cs->end &&
50 "caller must ensure space");
51
52 cs->current = agx_cdm_barrier(cs->current, dev->dev.chip);
53 cs->stats.flushes++;
54 }
55
56 void
hk_dispatch_with_usc_launch(struct hk_device * dev,struct hk_cs * cs,struct agx_cdm_launch_word_0_packed launch,uint32_t usc,struct agx_grid grid,struct agx_workgroup wg)57 hk_dispatch_with_usc_launch(struct hk_device *dev, struct hk_cs *cs,
58 struct agx_cdm_launch_word_0_packed launch,
59 uint32_t usc, struct agx_grid grid,
60 struct agx_workgroup wg)
61 {
62 assert(cs->current + 0x2000 < cs->end && "should have ensured space");
63 cs->stats.cmds++;
64
65 cs->current =
66 agx_cdm_launch(cs->current, dev->dev.chip, grid, wg, launch, usc);
67
68 hk_cdm_cache_flush(dev, cs);
69 }
70
71 void
hk_dispatch_with_usc(struct hk_device * dev,struct hk_cs * cs,struct agx_shader_info * info,uint32_t usc,struct agx_grid grid,struct agx_workgroup local_size)72 hk_dispatch_with_usc(struct hk_device *dev, struct hk_cs *cs,
73 struct agx_shader_info *info, uint32_t usc,
74 struct agx_grid grid, struct agx_workgroup local_size)
75 {
76 struct agx_cdm_launch_word_0_packed launch;
77 agx_pack(&launch, CDM_LAUNCH_WORD_0, cfg) {
78 cfg.sampler_state_register_count = 1;
79 cfg.uniform_register_count = info->push_count;
80 cfg.preshader_register_count = info->nr_preamble_gprs;
81 }
82
83 hk_dispatch_with_usc_launch(dev, cs, launch, usc, grid, local_size);
84 }
85
86 static void
dispatch(struct hk_cmd_buffer * cmd,struct agx_grid grid)87 dispatch(struct hk_cmd_buffer *cmd, struct agx_grid grid)
88 {
89 struct hk_device *dev = hk_cmd_buffer_device(cmd);
90 struct hk_shader *s = hk_only_variant(cmd->state.cs.shader);
91 struct hk_cs *cs = hk_cmd_buffer_get_cs(cmd, true /* compute */);
92 if (!cs)
93 return;
94
95 struct agx_workgroup local_size =
96 agx_workgroup(s->b.info.workgroup_size[0], s->b.info.workgroup_size[1],
97 s->b.info.workgroup_size[2]);
98
99 uint64_t stat = hk_pipeline_stat_addr(
100 cmd, VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT);
101
102 if (stat) {
103 perf_debug(dev, "CS invocation statistic");
104 uint64_t grid = cmd->state.cs.descriptors.root.cs.group_count_addr;
105
106 libagx_increment_cs_invocations(cs, agx_1d(1), grid, AGX_BARRIER_ALL,
107 stat, agx_workgroup_threads(local_size));
108 }
109
110 hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */);
111
112 if (!agx_is_indirect(grid)) {
113 grid.count[0] *= local_size.x;
114 grid.count[1] *= local_size.y;
115 grid.count[2] *= local_size.z;
116 }
117
118 hk_dispatch_with_local_size(cmd, cs, s, grid, local_size);
119 cs->stats.calls++;
120 }
121
122 VKAPI_ATTR void VKAPI_CALL
hk_CmdDispatchBase(VkCommandBuffer commandBuffer,uint32_t baseGroupX,uint32_t baseGroupY,uint32_t baseGroupZ,uint32_t groupCountX,uint32_t groupCountY,uint32_t groupCountZ)123 hk_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t baseGroupX,
124 uint32_t baseGroupY, uint32_t baseGroupZ,
125 uint32_t groupCountX, uint32_t groupCountY,
126 uint32_t groupCountZ)
127 {
128 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
129 struct hk_descriptor_state *desc = &cmd->state.cs.descriptors;
130 if (desc->push_dirty)
131 hk_cmd_buffer_flush_push_descriptors(cmd, desc);
132
133 desc->root.cs.base_group[0] = baseGroupX;
134 desc->root.cs.base_group[1] = baseGroupY;
135 desc->root.cs.base_group[2] = baseGroupZ;
136
137 /* We don't want to key the shader to whether we're indirectly dispatching,
138 * so treat everything as indirect.
139 */
140 VkDispatchIndirectCommand group_count = {
141 .x = groupCountX,
142 .y = groupCountY,
143 .z = groupCountZ,
144 };
145
146 desc->root.cs.group_count_addr =
147 hk_pool_upload(cmd, &group_count, sizeof(group_count), 8);
148
149 dispatch(cmd, agx_3d(groupCountX, groupCountY, groupCountZ));
150 }
151
152 VKAPI_ATTR void VKAPI_CALL
hk_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer _buffer,VkDeviceSize offset)153 hk_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer,
154 VkDeviceSize offset)
155 {
156 VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
157 VK_FROM_HANDLE(hk_buffer, buffer, _buffer);
158 struct hk_descriptor_state *desc = &cmd->state.cs.descriptors;
159 if (desc->push_dirty)
160 hk_cmd_buffer_flush_push_descriptors(cmd, desc);
161
162 desc->root.cs.base_group[0] = 0;
163 desc->root.cs.base_group[1] = 0;
164 desc->root.cs.base_group[2] = 0;
165
166 uint64_t dispatch_addr = hk_buffer_address(buffer, offset);
167 assert(dispatch_addr != 0);
168
169 desc->root.cs.group_count_addr = dispatch_addr;
170 dispatch(cmd, agx_grid_indirect(dispatch_addr));
171 }
172