1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "r600_cs.h"
25 #include "util/u_viewport.h"
26 #include "tgsi/tgsi_scan.h"
27
28 #define R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ 0x028C0C
29 #define CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ 0x28be8
30 #define R_02843C_PA_CL_VPORT_XSCALE 0x02843C
31
32 #define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x028250
33 #define S_028250_TL_X(x) (((unsigned)(x) & 0x7FFF) << 0)
34 #define G_028250_TL_X(x) (((x) >> 0) & 0x7FFF)
35 #define C_028250_TL_X 0xFFFF8000
36 #define S_028250_TL_Y(x) (((unsigned)(x) & 0x7FFF) << 16)
37 #define G_028250_TL_Y(x) (((x) >> 16) & 0x7FFF)
38 #define C_028250_TL_Y 0x8000FFFF
39 #define S_028250_WINDOW_OFFSET_DISABLE(x) (((unsigned)(x) & 0x1) << 31)
40 #define G_028250_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1)
41 #define C_028250_WINDOW_OFFSET_DISABLE 0x7FFFFFFF
42 #define S_028254_BR_X(x) (((unsigned)(x) & 0x7FFF) << 0)
43 #define G_028254_BR_X(x) (((x) >> 0) & 0x7FFF)
44 #define C_028254_BR_X 0xFFFF8000
45 #define S_028254_BR_Y(x) (((unsigned)(x) & 0x7FFF) << 16)
46 #define G_028254_BR_Y(x) (((x) >> 16) & 0x7FFF)
47 #define C_028254_BR_Y 0x8000FFFF
48 #define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0
49 #define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4
50
51 #define GET_MAX_SCISSOR(rctx) (rctx->chip_class >= EVERGREEN ? 16384 : 8192)
52
r600_set_scissor_states(struct pipe_context * ctx,unsigned start_slot,unsigned num_scissors,const struct pipe_scissor_state * state)53 static void r600_set_scissor_states(struct pipe_context *ctx,
54 unsigned start_slot,
55 unsigned num_scissors,
56 const struct pipe_scissor_state *state)
57 {
58 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
59 int i;
60
61 for (i = 0; i < num_scissors; i++)
62 rctx->scissors.states[start_slot + i] = state[i];
63
64 if (!rctx->scissor_enabled)
65 return;
66
67 rctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
68 rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
69 }
70
71 /* Since the guard band disables clipping, we have to clip per-pixel
72 * using a scissor.
73 */
r600_get_scissor_from_viewport(struct r600_common_context * rctx,const struct pipe_viewport_state * vp,struct r600_signed_scissor * scissor)74 static void r600_get_scissor_from_viewport(struct r600_common_context *rctx,
75 const struct pipe_viewport_state *vp,
76 struct r600_signed_scissor *scissor)
77 {
78 float tmp, minx, miny, maxx, maxy;
79
80 /* Convert (-1, -1) and (1, 1) from clip space into window space. */
81 minx = -vp->scale[0] + vp->translate[0];
82 miny = -vp->scale[1] + vp->translate[1];
83 maxx = vp->scale[0] + vp->translate[0];
84 maxy = vp->scale[1] + vp->translate[1];
85
86 /* r600_draw_rectangle sets this. Disable the scissor. */
87 if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) {
88 scissor->minx = scissor->miny = 0;
89 scissor->maxx = scissor->maxy = GET_MAX_SCISSOR(rctx);
90 return;
91 }
92
93 /* Handle inverted viewports. */
94 if (minx > maxx) {
95 tmp = minx;
96 minx = maxx;
97 maxx = tmp;
98 }
99 if (miny > maxy) {
100 tmp = miny;
101 miny = maxy;
102 maxy = tmp;
103 }
104
105 /* Convert to integer and round up the max bounds. */
106 scissor->minx = minx;
107 scissor->miny = miny;
108 scissor->maxx = ceilf(maxx);
109 scissor->maxy = ceilf(maxy);
110 }
111
r600_clamp_scissor(struct r600_common_context * rctx,struct pipe_scissor_state * out,struct r600_signed_scissor * scissor)112 static void r600_clamp_scissor(struct r600_common_context *rctx,
113 struct pipe_scissor_state *out,
114 struct r600_signed_scissor *scissor)
115 {
116 unsigned max_scissor = GET_MAX_SCISSOR(rctx);
117 out->minx = CLAMP(scissor->minx, 0, max_scissor);
118 out->miny = CLAMP(scissor->miny, 0, max_scissor);
119 out->maxx = CLAMP(scissor->maxx, 0, max_scissor);
120 out->maxy = CLAMP(scissor->maxy, 0, max_scissor);
121 }
122
r600_clip_scissor(struct pipe_scissor_state * out,struct pipe_scissor_state * clip)123 static void r600_clip_scissor(struct pipe_scissor_state *out,
124 struct pipe_scissor_state *clip)
125 {
126 out->minx = MAX2(out->minx, clip->minx);
127 out->miny = MAX2(out->miny, clip->miny);
128 out->maxx = MIN2(out->maxx, clip->maxx);
129 out->maxy = MIN2(out->maxy, clip->maxy);
130 }
131
r600_scissor_make_union(struct r600_signed_scissor * out,struct r600_signed_scissor * in)132 static void r600_scissor_make_union(struct r600_signed_scissor *out,
133 struct r600_signed_scissor *in)
134 {
135 out->minx = MIN2(out->minx, in->minx);
136 out->miny = MIN2(out->miny, in->miny);
137 out->maxx = MAX2(out->maxx, in->maxx);
138 out->maxy = MAX2(out->maxy, in->maxy);
139 }
140
evergreen_apply_scissor_bug_workaround(struct r600_common_context * rctx,struct pipe_scissor_state * scissor)141 void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
142 struct pipe_scissor_state *scissor)
143 {
144 if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) {
145 if (scissor->maxx == 0)
146 scissor->minx = 1;
147 if (scissor->maxy == 0)
148 scissor->miny = 1;
149
150 if (rctx->chip_class == CAYMAN &&
151 scissor->maxx == 1 && scissor->maxy == 1)
152 scissor->maxx = 2;
153 }
154 }
155
r600_emit_one_scissor(struct r600_common_context * rctx,struct radeon_cmdbuf * cs,struct r600_signed_scissor * vp_scissor,struct pipe_scissor_state * scissor)156 static void r600_emit_one_scissor(struct r600_common_context *rctx,
157 struct radeon_cmdbuf *cs,
158 struct r600_signed_scissor *vp_scissor,
159 struct pipe_scissor_state *scissor)
160 {
161 struct pipe_scissor_state final;
162
163 if (rctx->vs_disables_clipping_viewport) {
164 final.minx = final.miny = 0;
165 final.maxx = final.maxy = GET_MAX_SCISSOR(rctx);
166 } else {
167 r600_clamp_scissor(rctx, &final, vp_scissor);
168 }
169
170 if (scissor)
171 r600_clip_scissor(&final, scissor);
172
173 evergreen_apply_scissor_bug_workaround(rctx, &final);
174
175 radeon_emit(cs, S_028250_TL_X(final.minx) |
176 S_028250_TL_Y(final.miny) |
177 S_028250_WINDOW_OFFSET_DISABLE(1));
178 radeon_emit(cs, S_028254_BR_X(final.maxx) |
179 S_028254_BR_Y(final.maxy));
180 }
181
182 /* the range is [-MAX, MAX] */
183 #define GET_MAX_VIEWPORT_RANGE(rctx) (rctx->chip_class >= EVERGREEN ? 32768 : 16384)
184
r600_emit_guardband(struct r600_common_context * rctx,struct r600_signed_scissor * vp_as_scissor)185 static void r600_emit_guardband(struct r600_common_context *rctx,
186 struct r600_signed_scissor *vp_as_scissor)
187 {
188 struct radeon_cmdbuf *cs = rctx->gfx.cs;
189 struct pipe_viewport_state vp;
190 float left, top, right, bottom, max_range, guardband_x, guardband_y;
191
192 /* Reconstruct the viewport transformation from the scissor. */
193 vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
194 vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
195 vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
196 vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
197
198 /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
199 if (vp_as_scissor->minx == vp_as_scissor->maxx)
200 vp.scale[0] = 0.5;
201 if (vp_as_scissor->miny == vp_as_scissor->maxy)
202 vp.scale[1] = 0.5;
203
204 /* Find the biggest guard band that is inside the supported viewport
205 * range. The guard band is specified as a horizontal and vertical
206 * distance from (0,0) in clip space.
207 *
208 * This is done by applying the inverse viewport transformation
209 * on the viewport limits to get those limits in clip space.
210 *
211 * Use a limit one pixel smaller to allow for some precision error.
212 */
213 max_range = GET_MAX_VIEWPORT_RANGE(rctx) - 1;
214 left = (-max_range - vp.translate[0]) / vp.scale[0];
215 right = ( max_range - vp.translate[0]) / vp.scale[0];
216 top = (-max_range - vp.translate[1]) / vp.scale[1];
217 bottom = ( max_range - vp.translate[1]) / vp.scale[1];
218
219 assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
220
221 guardband_x = MIN2(-left, right);
222 guardband_y = MIN2(-top, bottom);
223
224 /* If any of the GB registers is updated, all of them must be updated. */
225 if (rctx->chip_class >= CAYMAN)
226 radeon_set_context_reg_seq(cs, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
227 else
228 radeon_set_context_reg_seq(cs, R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
229
230 radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
231 radeon_emit(cs, fui(1.0)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
232 radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
233 radeon_emit(cs, fui(1.0)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
234 }
235
r600_emit_scissors(struct r600_common_context * rctx,struct r600_atom * atom)236 static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
237 {
238 struct radeon_cmdbuf *cs = rctx->gfx.cs;
239 struct pipe_scissor_state *states = rctx->scissors.states;
240 unsigned mask = rctx->scissors.dirty_mask;
241 bool scissor_enabled = rctx->scissor_enabled;
242 struct r600_signed_scissor max_vp_scissor;
243 int i;
244
245 /* The simple case: Only 1 viewport is active. */
246 if (!rctx->vs_writes_viewport_index) {
247 struct r600_signed_scissor *vp = &rctx->viewports.as_scissor[0];
248
249 if (!(mask & 1))
250 return;
251
252 radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
253 r600_emit_one_scissor(rctx, cs, vp, scissor_enabled ? &states[0] : NULL);
254 r600_emit_guardband(rctx, vp);
255 rctx->scissors.dirty_mask &= ~1; /* clear one bit */
256 return;
257 }
258
259 /* Shaders can draw to any viewport. Make a union of all viewports. */
260 max_vp_scissor = rctx->viewports.as_scissor[0];
261 for (i = 1; i < R600_MAX_VIEWPORTS; i++)
262 r600_scissor_make_union(&max_vp_scissor,
263 &rctx->viewports.as_scissor[i]);
264
265 while (mask) {
266 int start, count, i;
267
268 u_bit_scan_consecutive_range(&mask, &start, &count);
269
270 radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
271 start * 4 * 2, count * 2);
272 for (i = start; i < start+count; i++) {
273 r600_emit_one_scissor(rctx, cs, &rctx->viewports.as_scissor[i],
274 scissor_enabled ? &states[i] : NULL);
275 }
276 }
277 r600_emit_guardband(rctx, &max_vp_scissor);
278 rctx->scissors.dirty_mask = 0;
279 }
280
r600_set_viewport_states(struct pipe_context * ctx,unsigned start_slot,unsigned num_viewports,const struct pipe_viewport_state * state)281 static void r600_set_viewport_states(struct pipe_context *ctx,
282 unsigned start_slot,
283 unsigned num_viewports,
284 const struct pipe_viewport_state *state)
285 {
286 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
287 unsigned mask;
288 int i;
289
290 for (i = 0; i < num_viewports; i++) {
291 unsigned index = start_slot + i;
292
293 rctx->viewports.states[index] = state[i];
294 r600_get_scissor_from_viewport(rctx, &state[i],
295 &rctx->viewports.as_scissor[index]);
296 }
297
298 mask = ((1 << num_viewports) - 1) << start_slot;
299 rctx->viewports.dirty_mask |= mask;
300 rctx->viewports.depth_range_dirty_mask |= mask;
301 rctx->scissors.dirty_mask |= mask;
302 rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
303 rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
304 }
305
r600_emit_one_viewport(struct r600_common_context * rctx,struct pipe_viewport_state * state)306 static void r600_emit_one_viewport(struct r600_common_context *rctx,
307 struct pipe_viewport_state *state)
308 {
309 struct radeon_cmdbuf *cs = rctx->gfx.cs;
310
311 radeon_emit(cs, fui(state->scale[0]));
312 radeon_emit(cs, fui(state->translate[0]));
313 radeon_emit(cs, fui(state->scale[1]));
314 radeon_emit(cs, fui(state->translate[1]));
315 radeon_emit(cs, fui(state->scale[2]));
316 radeon_emit(cs, fui(state->translate[2]));
317 }
318
r600_emit_viewports(struct r600_common_context * rctx)319 static void r600_emit_viewports(struct r600_common_context *rctx)
320 {
321 struct radeon_cmdbuf *cs = rctx->gfx.cs;
322 struct pipe_viewport_state *states = rctx->viewports.states;
323 unsigned mask = rctx->viewports.dirty_mask;
324
325 /* The simple case: Only 1 viewport is active. */
326 if (!rctx->vs_writes_viewport_index) {
327 if (!(mask & 1))
328 return;
329
330 radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
331 r600_emit_one_viewport(rctx, &states[0]);
332 rctx->viewports.dirty_mask &= ~1; /* clear one bit */
333 return;
334 }
335
336 while (mask) {
337 int start, count, i;
338
339 u_bit_scan_consecutive_range(&mask, &start, &count);
340
341 radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
342 start * 4 * 6, count * 6);
343 for (i = start; i < start+count; i++)
344 r600_emit_one_viewport(rctx, &states[i]);
345 }
346 rctx->viewports.dirty_mask = 0;
347 }
348
r600_emit_depth_ranges(struct r600_common_context * rctx)349 static void r600_emit_depth_ranges(struct r600_common_context *rctx)
350 {
351 struct radeon_cmdbuf *cs = rctx->gfx.cs;
352 struct pipe_viewport_state *states = rctx->viewports.states;
353 unsigned mask = rctx->viewports.depth_range_dirty_mask;
354 float zmin, zmax;
355
356 /* The simple case: Only 1 viewport is active. */
357 if (!rctx->vs_writes_viewport_index) {
358 if (!(mask & 1))
359 return;
360
361 util_viewport_zmin_zmax(&states[0], rctx->clip_halfz, &zmin, &zmax);
362
363 radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
364 radeon_emit(cs, fui(zmin));
365 radeon_emit(cs, fui(zmax));
366 rctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */
367 return;
368 }
369
370 while (mask) {
371 int start, count, i;
372
373 u_bit_scan_consecutive_range(&mask, &start, &count);
374
375 radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
376 start * 4 * 2, count * 2);
377 for (i = start; i < start+count; i++) {
378 util_viewport_zmin_zmax(&states[i], rctx->clip_halfz, &zmin, &zmax);
379 radeon_emit(cs, fui(zmin));
380 radeon_emit(cs, fui(zmax));
381 }
382 }
383 rctx->viewports.depth_range_dirty_mask = 0;
384 }
385
r600_emit_viewport_states(struct r600_common_context * rctx,struct r600_atom * atom)386 static void r600_emit_viewport_states(struct r600_common_context *rctx,
387 struct r600_atom *atom)
388 {
389 r600_emit_viewports(rctx);
390 r600_emit_depth_ranges(rctx);
391 }
392
393 /* Set viewport dependencies on pipe_rasterizer_state. */
r600_viewport_set_rast_deps(struct r600_common_context * rctx,bool scissor_enable,bool clip_halfz)394 void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
395 bool scissor_enable, bool clip_halfz)
396 {
397 if (rctx->scissor_enabled != scissor_enable) {
398 rctx->scissor_enabled = scissor_enable;
399 rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
400 rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
401 }
402 if (rctx->clip_halfz != clip_halfz) {
403 rctx->clip_halfz = clip_halfz;
404 rctx->viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
405 rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
406 }
407 }
408
409 /**
410 * Normally, we only emit 1 viewport and 1 scissor if no shader is using
411 * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
412 * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
413 * called to emit the rest.
414 */
r600_update_vs_writes_viewport_index(struct r600_common_context * rctx,struct tgsi_shader_info * info)415 void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
416 struct tgsi_shader_info *info)
417 {
418 bool vs_window_space;
419
420 if (!info)
421 return;
422
423 /* When the VS disables clipping and viewport transformation. */
424 vs_window_space =
425 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
426
427 if (rctx->vs_disables_clipping_viewport != vs_window_space) {
428 rctx->vs_disables_clipping_viewport = vs_window_space;
429 rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
430 rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
431 }
432
433 /* Viewport index handling. */
434 rctx->vs_writes_viewport_index = info->writes_viewport_index;
435 if (!rctx->vs_writes_viewport_index)
436 return;
437
438 if (rctx->scissors.dirty_mask)
439 rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
440
441 if (rctx->viewports.dirty_mask ||
442 rctx->viewports.depth_range_dirty_mask)
443 rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
444 }
445
r600_init_viewport_functions(struct r600_common_context * rctx)446 void r600_init_viewport_functions(struct r600_common_context *rctx)
447 {
448 rctx->scissors.atom.emit = r600_emit_scissors;
449 rctx->viewports.atom.emit = r600_emit_viewport_states;
450
451 rctx->scissors.atom.num_dw = (2 + 16 * 2) + 6;
452 rctx->viewports.atom.num_dw = 2 + 16 * 6;
453
454 rctx->b.set_scissor_states = r600_set_scissor_states;
455 rctx->b.set_viewport_states = r600_set_viewport_states;
456 }
457