1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_private.h"
6 #include "nvk_device.h"
7 #include "nvk_physical_device.h"
8 #include "nvk_pipeline.h"
9 #include "nvk_shader.h"
10
11 #include "vk_nir.h"
12 #include "vk_pipeline.h"
13 #include "vk_pipeline_layout.h"
14
15 #include "nouveau_bo.h"
16 #include "nouveau_context.h"
17
18 #include "compiler/spirv/nir_spirv.h"
19
20 #include "drf.h"
21 #include "cla0c0.h"
22 #include "cla0c0qmd.h"
23 #include "clc0c0.h"
24 #include "clc0c0qmd.h"
25 #include "clc3c0.h"
26 #include "clc3c0qmd.h"
27 #include "clc6c0.h"
28 #include "clc6c0qmd.h"
29 #define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
30 #define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
31 #define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)
32 #define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)
33 #define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
34 #define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
35 #define NVC6C0_QMDV03_00_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC6C0, QMDV03_00, ##a)
36 #define NVC6C0_QMDV03_00_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC6C0, QMDV03_00, ##a)
37
38 #define QMD_DEF_SET(qmd, class_id, version_major, version_minor, a...) \
39 NVDEF_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
40 #define QMD_VAL_SET(qmd, class_id, version_major, version_minor, a...) \
41 NVVAL_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
42
43 static int
gv100_sm_config_smem_size(uint32_t size)44 gv100_sm_config_smem_size(uint32_t size)
45 {
46 if (size > 64 * 1024) size = 96 * 1024;
47 else if (size > 32 * 1024) size = 64 * 1024;
48 else if (size > 16 * 1024) size = 32 * 1024;
49 else if (size > 8 * 1024) size = 16 * 1024;
50 else size = 8 * 1024;
51 return (size / 4096) + 1;
52 }
53
54 #define base_compute_setup_launch_desc_template(qmd, shader, class_id, version_major, version_minor) \
55 do { \
56 QMD_DEF_SET(qmd, class_id, version_major, version_minor, API_VISIBLE_CALL_LIMIT, NO_CHECK); \
57 QMD_VAL_SET(qmd, class_id, version_major, version_minor, BARRIER_COUNT, shader->info.num_barriers); \
58 QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION0, \
59 shader->info.cs.local_size[0]); \
60 QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION1, \
61 shader->info.cs.local_size[1]); \
62 QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION2, \
63 shader->info.cs.local_size[2]); \
64 QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_MAJOR_VERSION, version_major); \
65 QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_VERSION, version_minor); \
66 QMD_DEF_SET(qmd, class_id, version_major, version_minor, SAMPLER_INDEX, INDEPENDENTLY); \
67 QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); \
68 QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_LOW_SIZE, \
69 align(shader->info.slm_size, 0x10)); \
70 QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHARED_MEMORY_SIZE, \
71 align(shader->info.cs.smem_size, 0x100)); \
72 } while (0)
73
74 static void
nva0c0_compute_setup_launch_desc_template(uint32_t * qmd,struct nvk_shader * shader)75 nva0c0_compute_setup_launch_desc_template(uint32_t *qmd,
76 struct nvk_shader *shader)
77 {
78 base_compute_setup_launch_desc_template(qmd, shader, A0C0, 00, 06);
79
80 if (shader->info.cs.smem_size <= (16 << 10))
81 NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB);
82 else if (shader->info.cs.smem_size <= (32 << 10))
83 NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);
84 else if (shader->info.cs.smem_size <= (48 << 10))
85 NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
86 else
87 unreachable("Invalid shared memory size");
88
89 uint64_t addr = shader->hdr_addr;
90 assert(addr < 0xffffffff);
91 NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, addr);
92 NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs);
93 NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30);
94 }
95
96 static void
nvc0c0_compute_setup_launch_desc_template(uint32_t * qmd,struct nvk_shader * shader)97 nvc0c0_compute_setup_launch_desc_template(uint32_t *qmd,
98 struct nvk_shader *shader)
99 {
100 base_compute_setup_launch_desc_template(qmd, shader, C0C0, 02, 01);
101
102 uint64_t addr = shader->hdr_addr;
103 assert(addr < 0xffffffff);
104
105 NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
106 NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, addr);
107 NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs);
108 }
109
110 static void
nvc3c0_compute_setup_launch_desc_template(uint32_t * qmd,struct nvk_shader * shader)111 nvc3c0_compute_setup_launch_desc_template(uint32_t *qmd,
112 struct nvk_shader *shader)
113 {
114 base_compute_setup_launch_desc_template(qmd, shader, C3C0, 02, 02);
115
116 NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
117 /* those are all QMD 2.2+ */
118 NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
119 gv100_sm_config_smem_size(shader->info.cs.smem_size));
120 NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
121 gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE));
122 NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
123 gv100_sm_config_smem_size(shader->info.cs.smem_size));
124
125 NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs);
126
127 uint64_t addr = shader->hdr_addr;
128 NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff);
129 NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32);
130 }
131
132 static void
nvc6c0_compute_setup_launch_desc_template(uint32_t * qmd,struct nvk_shader * shader)133 nvc6c0_compute_setup_launch_desc_template(uint32_t *qmd,
134 struct nvk_shader *shader)
135 {
136 base_compute_setup_launch_desc_template(qmd, shader, C6C0, 03, 00);
137
138 NVC6C0_QMDV03_00_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
139 /* those are all QMD 2.2+ */
140 NVC6C0_QMDV03_00_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
141 gv100_sm_config_smem_size(shader->info.cs.smem_size));
142 NVC6C0_QMDV03_00_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
143 gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE));
144 NVC6C0_QMDV03_00_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
145 gv100_sm_config_smem_size(shader->info.cs.smem_size));
146
147 NVC6C0_QMDV03_00_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs);
148
149 uint64_t addr = shader->hdr_addr;
150 NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff);
151 NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32);
152 }
153
154 VkResult
nvk_compute_pipeline_create(struct nvk_device * dev,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)155 nvk_compute_pipeline_create(struct nvk_device *dev,
156 struct vk_pipeline_cache *cache,
157 const VkComputePipelineCreateInfo *pCreateInfo,
158 const VkAllocationCallbacks *pAllocator,
159 VkPipeline *pPipeline)
160 {
161 VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
162 struct nvk_physical_device *pdev = nvk_device_physical(dev);
163 struct nvk_compute_pipeline *pipeline;
164 VkResult result;
165
166 pipeline = (void *)nvk_pipeline_zalloc(dev, NVK_PIPELINE_COMPUTE,
167 sizeof(*pipeline), pAllocator);
168 if (pipeline == NULL)
169 return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
170
171 assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
172
173 VkPipelineCreateFlags2KHR pipeline_flags =
174 vk_compute_pipeline_create_flags(pCreateInfo);
175
176 if (pipeline_flags &
177 VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)
178 cache = NULL;
179
180 struct vk_pipeline_robustness_state robustness;
181 vk_pipeline_robustness_state_fill(&dev->vk, &robustness,
182 pCreateInfo->pNext,
183 pCreateInfo->stage.pNext);
184
185 unsigned char sha1[SHA1_DIGEST_LENGTH];
186 nvk_hash_shader(sha1, &pCreateInfo->stage, &robustness, false,
187 pipeline_layout, NULL);
188
189 bool cache_hit = false;
190 struct vk_pipeline_cache_object *cache_obj = NULL;
191
192 if (cache) {
193 cache_obj = vk_pipeline_cache_lookup_object(cache, &sha1, sizeof(sha1),
194 &nvk_shader_ops, &cache_hit);
195 pipeline->base.shaders[MESA_SHADER_COMPUTE] =
196 container_of(cache_obj, struct nvk_shader, base);
197 result = VK_SUCCESS;
198 }
199
200 if (!cache_obj) {
201 if (pCreateInfo->flags &
202 VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) {
203 result = VK_PIPELINE_COMPILE_REQUIRED;
204 goto fail;
205 }
206
207 nir_shader *nir;
208 result = nvk_shader_stage_to_nir(dev, &pCreateInfo->stage, &robustness,
209 cache, NULL, &nir);
210 if (result != VK_SUCCESS)
211 goto fail;
212
213 struct nvk_shader *shader = nvk_shader_init(dev, sha1, SHA1_DIGEST_LENGTH);
214 if(shader == NULL)
215 return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
216
217 nvk_lower_nir(dev, nir, &robustness, false,
218 pipeline_layout->set_count,
219 pipeline_layout->set_layouts,
220 &shader->cbuf_map);
221
222 result = nvk_compile_nir(dev, nir, pipeline_flags, &robustness, NULL, cache, shader);
223
224 if (result == VK_SUCCESS) {
225 cache_obj = &shader->base;
226
227 if (cache)
228 cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
229
230 pipeline->base.shaders[MESA_SHADER_COMPUTE] =
231 container_of(cache_obj, struct nvk_shader, base);
232 }
233
234 ralloc_free(nir);
235 }
236
237 if (result != VK_SUCCESS)
238 goto fail;
239
240 struct nvk_shader *shader = container_of(cache_obj, struct nvk_shader, base);
241
242 result = nvk_shader_upload(dev, shader);
243 if (result != VK_SUCCESS)
244 goto fail;
245
246 if (pdev->info.cls_compute >= AMPERE_COMPUTE_A)
247 nvc6c0_compute_setup_launch_desc_template(pipeline->qmd_template, shader);
248 else if (pdev->info.cls_compute >= VOLTA_COMPUTE_A)
249 nvc3c0_compute_setup_launch_desc_template(pipeline->qmd_template, shader);
250 else if (pdev->info.cls_compute >= PASCAL_COMPUTE_A)
251 nvc0c0_compute_setup_launch_desc_template(pipeline->qmd_template, shader);
252 else if (pdev->info.cls_compute >= KEPLER_COMPUTE_A)
253 nva0c0_compute_setup_launch_desc_template(pipeline->qmd_template, shader);
254 else
255 unreachable("Fermi and older not supported!");
256
257 *pPipeline = nvk_pipeline_to_handle(&pipeline->base);
258 return VK_SUCCESS;
259
260 fail:
261 nvk_pipeline_free(dev, &pipeline->base, pAllocator);
262 return result;
263 }
264