1 /*
2 * Copyright 2012 Francisco Jerez
3 * Copyright 2015 Samuel Pitoiset
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 */
26
27 #include "nv50/nv50_context.h"
28 #include "nv50/nv50_compute.xml.h"
29
30 #include "codegen/nv50_ir_driver.h"
31
32 int
nv50_screen_compute_setup(struct nv50_screen * screen,struct nouveau_pushbuf * push)33 nv50_screen_compute_setup(struct nv50_screen *screen,
34 struct nouveau_pushbuf *push)
35 {
36 struct nouveau_device *dev = screen->base.device;
37 struct nouveau_object *chan = screen->base.channel;
38 struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
39 unsigned obj_class;
40 int i, ret;
41
42 switch (dev->chipset & 0xf0) {
43 case 0x50:
44 case 0x80:
45 case 0x90:
46 obj_class = NV50_COMPUTE_CLASS;
47 break;
48 case 0xa0:
49 switch (dev->chipset) {
50 case 0xa3:
51 case 0xa5:
52 case 0xa8:
53 obj_class = NVA3_COMPUTE_CLASS;
54 break;
55 default:
56 obj_class = NV50_COMPUTE_CLASS;
57 break;
58 }
59 break;
60 default:
61 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
62 return -1;
63 }
64
65 ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
66 &screen->compute);
67 if (ret)
68 return ret;
69
70 BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
71 PUSH_DATA (push, screen->compute->handle);
72
73 BEGIN_NV04(push, NV50_CP(UNK02A0), 1);
74 PUSH_DATA (push, 1);
75 BEGIN_NV04(push, NV50_CP(DMA_STACK), 1);
76 PUSH_DATA (push, fifo->vram);
77 BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2);
78 PUSH_DATAh(push, screen->stack_bo->offset);
79 PUSH_DATA (push, screen->stack_bo->offset);
80 BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1);
81 PUSH_DATA (push, 4);
82
83 BEGIN_NV04(push, NV50_CP(UNK0290), 1);
84 PUSH_DATA (push, 1);
85 BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1);
86 PUSH_DATA (push, 1);
87 BEGIN_NV04(push, NV50_CP(REG_MODE), 1);
88 PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
89 BEGIN_NV04(push, NV50_CP(UNK0384), 1);
90 PUSH_DATA (push, 0x100);
91 BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1);
92 PUSH_DATA (push, fifo->vram);
93
94 for (i = 0; i < 15; i++) {
95 BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2);
96 PUSH_DATA (push, 0);
97 PUSH_DATA (push, 0);
98 BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1);
99 PUSH_DATA (push, 0);
100 BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1);
101 PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
102 }
103
104 BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2);
105 PUSH_DATA (push, 0);
106 PUSH_DATA (push, 0);
107 BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1);
108 PUSH_DATA (push, ~0);
109 BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1);
110 PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
111
112 BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1);
113 PUSH_DATA (push, 7);
114 BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1);
115 PUSH_DATA (push, 1);
116 BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1);
117 PUSH_DATA (push, 7);
118 BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1);
119 PUSH_DATA (push, 1);
120 BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
121 PUSH_DATA (push, 0);
122
123 BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1);
124 PUSH_DATA (push, fifo->vram);
125 BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1);
126 PUSH_DATA (push, 0x54);
127 BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1);
128 PUSH_DATA (push, 0);
129
130 BEGIN_NV04(push, NV50_CP(DMA_TIC), 1);
131 PUSH_DATA (push, fifo->vram);
132 BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3);
133 PUSH_DATAh(push, screen->txc->offset);
134 PUSH_DATA (push, screen->txc->offset);
135 PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
136
137 BEGIN_NV04(push, NV50_CP(DMA_TSC), 1);
138 PUSH_DATA (push, fifo->vram);
139 BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3);
140 PUSH_DATAh(push, screen->txc->offset + 65536);
141 PUSH_DATA (push, screen->txc->offset + 65536);
142 PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
143
144 BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1);
145 PUSH_DATA (push, fifo->vram);
146
147 BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1);
148 PUSH_DATA (push, fifo->vram);
149 BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2);
150 PUSH_DATAh(push, screen->tls_bo->offset + 65536);
151 PUSH_DATA (push, screen->tls_bo->offset + 65536);
152 BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1);
153 PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
154
155 return 0;
156 }
157
158 static void
nv50_compute_validate_globals(struct nv50_context * nv50)159 nv50_compute_validate_globals(struct nv50_context *nv50)
160 {
161 unsigned i;
162
163 for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
164 ++i) {
165 struct pipe_resource *res = *util_dynarray_element(
166 &nv50->global_residents, struct pipe_resource *, i);
167 if (res)
168 nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
169 nv04_resource(res), NOUVEAU_BO_RDWR);
170 }
171 }
172
173 static struct nv50_state_validate
174 validate_list_cp[] = {
175 { nv50_compprog_validate, NV50_NEW_CP_PROGRAM },
176 { nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS },
177 };
178
179 static bool
nv50_state_validate_cp(struct nv50_context * nv50,uint32_t mask)180 nv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask)
181 {
182 bool ret;
183
184 /* TODO: validate textures, samplers, surfaces */
185 ret = nv50_state_validate(nv50, mask, validate_list_cp,
186 ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp,
187 nv50->bufctx_cp);
188
189 if (unlikely(nv50->state.flushed))
190 nv50_bufctx_fence(nv50->bufctx_cp, true);
191 return ret;
192 }
193
194 static void
nv50_compute_upload_input(struct nv50_context * nv50,const uint32_t * input)195 nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
196 {
197 struct nv50_screen *screen = nv50->screen;
198 struct nouveau_pushbuf *push = screen->base.pushbuf;
199 unsigned size = align(nv50->compprog->parm_size, 0x4);
200
201 BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
202 PUSH_DATA (push, (size / 4) << 8);
203
204 if (size) {
205 struct nouveau_mm_allocation *mm;
206 struct nouveau_bo *bo = NULL;
207 unsigned offset;
208
209 mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
210 assert(mm);
211
212 nouveau_bo_map(bo, 0, screen->base.client);
213 memcpy(bo->map + offset, input, size);
214
215 nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
216 nouveau_pushbuf_bufctx(push, nv50->bufctx);
217 nouveau_pushbuf_validate(push);
218
219 BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), size / 4);
220 nouveau_pushbuf_data(push, bo, offset, size);
221
222 nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
223 nouveau_bo_ref(NULL, &bo);
224 nouveau_bufctx_reset(nv50->bufctx, 0);
225 }
226 }
227
228 void
nv50_launch_grid(struct pipe_context * pipe,const struct pipe_grid_info * info)229 nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
230 {
231 struct nv50_context *nv50 = nv50_context(pipe);
232 struct nouveau_pushbuf *push = nv50->base.pushbuf;
233 unsigned block_size = info->block[0] * info->block[1] * info->block[2];
234 struct nv50_program *cp = nv50->compprog;
235 bool ret;
236
237 ret = !nv50_state_validate_cp(nv50, ~0);
238 if (ret) {
239 NOUVEAU_ERR("Failed to launch grid !\n");
240 return;
241 }
242
243 nv50_compute_upload_input(nv50, info->input);
244
245 BEGIN_NV04(push, NV50_CP(CP_START_ID), 1);
246 PUSH_DATA (push, cp->code_base);
247
248 BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1);
249 PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
250 BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1);
251 PUSH_DATA (push, cp->max_gpr);
252
253 /* grid/block setup */
254 BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2);
255 PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
256 PUSH_DATA (push, info->block[2]);
257 BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1);
258 PUSH_DATA (push, 1 << 16 | block_size);
259 BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1);
260 PUSH_DATA (push, 1);
261 BEGIN_NV04(push, NV50_CP(GRIDDIM), 1);
262 PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]);
263 BEGIN_NV04(push, NV50_CP(GRIDID), 1);
264 PUSH_DATA (push, 1);
265
266 /* kernel launching */
267 BEGIN_NV04(push, NV50_CP(LAUNCH), 1);
268 PUSH_DATA (push, 0);
269 BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
270 PUSH_DATA (push, 0);
271
272 /* bind a compute shader clobbers fragment shader state */
273 nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
274 }
275