• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_private.h"
6 #include "nvk_device.h"
7 #include "nvk_physical_device.h"
8 #include "nvk_pipeline.h"
9 #include "nvk_shader.h"
10 
11 #include "vk_nir.h"
12 #include "vk_pipeline.h"
13 #include "vk_pipeline_layout.h"
14 
15 #include "nouveau_bo.h"
16 #include "nouveau_context.h"
17 
18 #include "compiler/spirv/nir_spirv.h"
19 
20 #include "drf.h"
21 #include "cla0c0.h"
22 #include "cla0c0qmd.h"
23 #include "clc0c0.h"
24 #include "clc0c0qmd.h"
25 #include "clc3c0.h"
26 #include "clc3c0qmd.h"
27 #include "clc6c0.h"
28 #include "clc6c0qmd.h"
29 #define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
30 #define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
31 #define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)
32 #define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)
33 #define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
34 #define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
35 #define NVC6C0_QMDV03_00_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC6C0, QMDV03_00, ##a)
36 #define NVC6C0_QMDV03_00_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC6C0, QMDV03_00, ##a)
37 
38 #define QMD_DEF_SET(qmd, class_id, version_major, version_minor, a...) \
39    NVDEF_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
40 #define QMD_VAL_SET(qmd, class_id, version_major, version_minor, a...) \
41    NVVAL_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
42 
43 static int
gv100_sm_config_smem_size(uint32_t size)44 gv100_sm_config_smem_size(uint32_t size)
45 {
46    if      (size > 64 * 1024) size = 96 * 1024;
47    else if (size > 32 * 1024) size = 64 * 1024;
48    else if (size > 16 * 1024) size = 32 * 1024;
49    else if (size >  8 * 1024) size = 16 * 1024;
50    else                       size =  8 * 1024;
51    return (size / 4096) + 1;
52 }
53 
54 #define base_compute_setup_launch_desc_template(qmd, shader, class_id, version_major, version_minor)   \
55 do {                                                                                                   \
56    QMD_DEF_SET(qmd, class_id, version_major, version_minor, API_VISIBLE_CALL_LIMIT, NO_CHECK);         \
57    QMD_VAL_SET(qmd, class_id, version_major, version_minor, BARRIER_COUNT, shader->info.num_barriers);      \
58    QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION0,                     \
59                                                             shader->info.cs.local_size[0]);                 \
60    QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION1,                     \
61                                                             shader->info.cs.local_size[1]);                 \
62    QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION2,                     \
63                                                             shader->info.cs.local_size[2]);                 \
64    QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_MAJOR_VERSION, version_major);         \
65    QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_VERSION, version_minor);               \
66    QMD_DEF_SET(qmd, class_id, version_major, version_minor, SAMPLER_INDEX, INDEPENDENTLY);             \
67    QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);         \
68    QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_LOW_SIZE,              \
69                                                             align(shader->info.slm_size, 0x10));            \
70    QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHARED_MEMORY_SIZE,                        \
71                                                             align(shader->info.cs.smem_size, 0x100));       \
72 } while (0)
73 
74 static void
nva0c0_compute_setup_launch_desc_template(uint32_t * qmd,struct nvk_shader * shader)75 nva0c0_compute_setup_launch_desc_template(uint32_t *qmd,
76                                           struct nvk_shader *shader)
77 {
78    base_compute_setup_launch_desc_template(qmd, shader, A0C0, 00, 06);
79 
80    if (shader->info.cs.smem_size <= (16 << 10))
81       NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB);
82    else if (shader->info.cs.smem_size <= (32 << 10))
83       NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);
84    else if (shader->info.cs.smem_size <= (48 << 10))
85       NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
86    else
87       unreachable("Invalid shared memory size");
88 
89    uint64_t addr = shader->hdr_addr;
90    assert(addr < 0xffffffff);
91    NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, addr);
92    NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs);
93    NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30);
94 }
95 
96 static void
nvc0c0_compute_setup_launch_desc_template(uint32_t * qmd,struct nvk_shader * shader)97 nvc0c0_compute_setup_launch_desc_template(uint32_t *qmd,
98                                           struct nvk_shader *shader)
99 {
100    base_compute_setup_launch_desc_template(qmd, shader, C0C0, 02, 01);
101 
102    uint64_t addr = shader->hdr_addr;
103    assert(addr < 0xffffffff);
104 
105    NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
106    NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, addr);
107    NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs);
108 }
109 
110 static void
nvc3c0_compute_setup_launch_desc_template(uint32_t * qmd,struct nvk_shader * shader)111 nvc3c0_compute_setup_launch_desc_template(uint32_t *qmd,
112                                           struct nvk_shader *shader)
113 {
114    base_compute_setup_launch_desc_template(qmd, shader, C3C0, 02, 02);
115 
116    NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
117    /* those are all QMD 2.2+ */
118    NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
119                             gv100_sm_config_smem_size(shader->info.cs.smem_size));
120    NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
121                             gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE));
122    NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
123                             gv100_sm_config_smem_size(shader->info.cs.smem_size));
124 
125    NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs);
126 
127    uint64_t addr = shader->hdr_addr;
128    NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff);
129    NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32);
130 }
131 
132 static void
nvc6c0_compute_setup_launch_desc_template(uint32_t * qmd,struct nvk_shader * shader)133 nvc6c0_compute_setup_launch_desc_template(uint32_t *qmd,
134                                           struct nvk_shader *shader)
135 {
136    base_compute_setup_launch_desc_template(qmd, shader, C6C0, 03, 00);
137 
138    NVC6C0_QMDV03_00_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
139    /* those are all QMD 2.2+ */
140    NVC6C0_QMDV03_00_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
141                             gv100_sm_config_smem_size(shader->info.cs.smem_size));
142    NVC6C0_QMDV03_00_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
143                             gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE));
144    NVC6C0_QMDV03_00_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
145                             gv100_sm_config_smem_size(shader->info.cs.smem_size));
146 
147    NVC6C0_QMDV03_00_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs);
148 
149    uint64_t addr = shader->hdr_addr;
150    NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff);
151    NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32);
152 }
153 
154 VkResult
nvk_compute_pipeline_create(struct nvk_device * dev,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)155 nvk_compute_pipeline_create(struct nvk_device *dev,
156                             struct vk_pipeline_cache *cache,
157                             const VkComputePipelineCreateInfo *pCreateInfo,
158                             const VkAllocationCallbacks *pAllocator,
159                             VkPipeline *pPipeline)
160 {
161    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
162    struct nvk_physical_device *pdev = nvk_device_physical(dev);
163    struct nvk_compute_pipeline *pipeline;
164    VkResult result;
165 
166    pipeline = (void *)nvk_pipeline_zalloc(dev, NVK_PIPELINE_COMPUTE,
167                                           sizeof(*pipeline), pAllocator);
168    if (pipeline == NULL)
169       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
170 
171    assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
172 
173    VkPipelineCreateFlags2KHR pipeline_flags =
174       vk_compute_pipeline_create_flags(pCreateInfo);
175 
176    if (pipeline_flags &
177        VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)
178       cache = NULL;
179 
180    struct vk_pipeline_robustness_state robustness;
181    vk_pipeline_robustness_state_fill(&dev->vk, &robustness,
182                                      pCreateInfo->pNext,
183                                      pCreateInfo->stage.pNext);
184 
185    unsigned char sha1[SHA1_DIGEST_LENGTH];
186    nvk_hash_shader(sha1, &pCreateInfo->stage, &robustness, false,
187                    pipeline_layout, NULL);
188 
189    bool cache_hit = false;
190    struct vk_pipeline_cache_object *cache_obj = NULL;
191 
192    if (cache) {
193       cache_obj = vk_pipeline_cache_lookup_object(cache, &sha1, sizeof(sha1),
194                                                   &nvk_shader_ops, &cache_hit);
195       pipeline->base.shaders[MESA_SHADER_COMPUTE] =
196          container_of(cache_obj, struct nvk_shader, base);
197       result = VK_SUCCESS;
198    }
199 
200    if (!cache_obj) {
201       if (pCreateInfo->flags &
202           VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) {
203          result = VK_PIPELINE_COMPILE_REQUIRED;
204          goto fail;
205       }
206 
207       nir_shader *nir;
208       result = nvk_shader_stage_to_nir(dev, &pCreateInfo->stage, &robustness,
209                                        cache, NULL, &nir);
210       if (result != VK_SUCCESS)
211          goto fail;
212 
213       struct nvk_shader *shader = nvk_shader_init(dev, sha1, SHA1_DIGEST_LENGTH);
214       if(shader == NULL)
215          return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
216 
217       nvk_lower_nir(dev, nir, &robustness, false,
218                     pipeline_layout->set_count,
219                     pipeline_layout->set_layouts,
220                     &shader->cbuf_map);
221 
222       result = nvk_compile_nir(dev, nir, pipeline_flags, &robustness, NULL, cache, shader);
223 
224       if (result == VK_SUCCESS) {
225          cache_obj = &shader->base;
226 
227          if (cache)
228             cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
229 
230          pipeline->base.shaders[MESA_SHADER_COMPUTE] =
231             container_of(cache_obj, struct nvk_shader, base);
232       }
233 
234       ralloc_free(nir);
235    }
236 
237    if (result != VK_SUCCESS)
238       goto fail;
239 
240    struct nvk_shader *shader = container_of(cache_obj, struct nvk_shader, base);
241 
242    result = nvk_shader_upload(dev, shader);
243    if (result != VK_SUCCESS)
244       goto fail;
245 
246    if (pdev->info.cls_compute >= AMPERE_COMPUTE_A)
247       nvc6c0_compute_setup_launch_desc_template(pipeline->qmd_template, shader);
248    else if (pdev->info.cls_compute >= VOLTA_COMPUTE_A)
249       nvc3c0_compute_setup_launch_desc_template(pipeline->qmd_template, shader);
250    else if (pdev->info.cls_compute >= PASCAL_COMPUTE_A)
251       nvc0c0_compute_setup_launch_desc_template(pipeline->qmd_template, shader);
252    else if (pdev->info.cls_compute >= KEPLER_COMPUTE_A)
253       nva0c0_compute_setup_launch_desc_template(pipeline->qmd_template, shader);
254    else
255       unreachable("Fermi and older not supported!");
256 
257    *pPipeline = nvk_pipeline_to_handle(&pipeline->base);
258    return VK_SUCCESS;
259 
260 fail:
261    nvk_pipeline_free(dev, &pipeline->base, pAllocator);
262    return result;
263 }
264