1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BLORP_PRIV_H
25 #define BLORP_PRIV_H
26
27 #include <stdint.h>
28
29 #include "common/intel_measure.h"
30 #include "compiler/nir/nir.h"
31
32 #include "blorp.h"
33
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37
38 void blorp_init(struct blorp_context *blorp, void *driver_ctx,
39 struct isl_device *isl_dev, const struct blorp_config *config);
40
41 struct blorp_compiler {
42 const struct brw_compiler *brw;
43 const struct elk_compiler *elk;
44
45 const nir_shader_compiler_options *(*nir_options)(struct blorp_context *blorp,
46 gl_shader_stage stage);
47
48 struct blorp_program (*compile_fs)(struct blorp_context *blorp, void *mem_ctx,
49 struct nir_shader *nir,
50 bool multisample_fbo,
51 bool is_fast_clear,
52 bool use_repclear);
53 struct blorp_program (*compile_vs)(struct blorp_context *blorp, void *mem_ctx,
54 struct nir_shader *nir);
55
56 struct blorp_program (*compile_cs)(struct blorp_context *blorp, void *mem_ctx,
57 struct nir_shader *nir);
58
59 bool (*ensure_sf_program)(struct blorp_batch *batch,
60 struct blorp_params *params);
61
62 bool (*params_get_layer_offset_vs)(struct blorp_batch *batch,
63 struct blorp_params *params);
64 };
65
66 /**
67 * Binding table indices used by BLORP.
68 */
69 enum {
70 BLORP_RENDERBUFFER_BT_INDEX,
71 BLORP_TEXTURE_BT_INDEX,
72 BLORP_NUM_BT_ENTRIES
73 };
74
75 #define BLORP_SAMPLER_INDEX 0
76
77 struct blorp_surface_info
78 {
79 bool enabled;
80
81 struct isl_surf surf;
82 struct blorp_address addr;
83
84 struct isl_surf aux_surf;
85 struct blorp_address aux_addr;
86 enum isl_aux_usage aux_usage;
87
88 union isl_color_value clear_color;
89 struct blorp_address clear_color_addr;
90
91 struct isl_view view;
92
93 /* Z offset into a 3-D texture or slice of a 2-D array texture. */
94 float z_offset;
95
96 uint32_t tile_x_sa, tile_y_sa;
97 };
98
99 void
100 blorp_surface_info_init(struct blorp_batch *batch,
101 struct blorp_surface_info *info,
102 const struct blorp_surf *surf,
103 unsigned int level, float layer,
104 enum isl_format format, bool is_dest);
105 void
106 blorp_surf_convert_to_single_slice(const struct isl_device *isl_dev,
107 struct blorp_surface_info *info);
108 void
109 surf_fake_rgb_with_red(const struct isl_device *isl_dev,
110 struct blorp_surface_info *info);
111 void
112 blorp_surf_convert_to_uncompressed(const struct isl_device *isl_dev,
113 struct blorp_surface_info *info,
114 uint32_t *x, uint32_t *y,
115 uint32_t *width, uint32_t *height);
116 void
117 blorp_surf_fake_interleaved_msaa(const struct isl_device *isl_dev,
118 struct blorp_surface_info *info);
119 void
120 blorp_surf_retile_w_to_y(const struct isl_device *isl_dev,
121 struct blorp_surface_info *info);
122
123
124 struct blorp_coord_transform
125 {
126 float multiplier;
127 float offset;
128 };
129
130 /**
131 * Bounding rectangle telling pixel discard which pixels are to be touched.
132 * This is needed in when surfaces are configured as something else what they
133 * really are:
134 *
135 * - writing W-tiled stencil as Y-tiled
136 * - writing interleaved multisampled as single sampled.
137 *
138 * See blorp_check_in_bounds().
139 */
140 struct blorp_bounds_rect
141 {
142 uint32_t x0;
143 uint32_t x1;
144 uint32_t y0;
145 uint32_t y1;
146 };
147
148 /**
149 * Grid needed for blended and scaled blits of integer formats, see
150 * blorp_nir_manual_blend_bilinear().
151 */
152 struct blorp_rect_grid
153 {
154 float x1;
155 float y1;
156 float pad[2];
157 };
158
159 struct blorp_surf_offset {
160 uint32_t x;
161 uint32_t y;
162 };
163
164 struct blorp_wm_inputs
165 {
166 uint32_t clear_color[4];
167
168 struct blorp_bounds_rect bounds_rect;
169 struct blorp_rect_grid rect_grid;
170 struct blorp_coord_transform coord_transform[2];
171
172 struct blorp_surf_offset src_offset;
173 struct blorp_surf_offset dst_offset;
174
175 /* (1/width, 1/height) for the source surface */
176 float src_inv_size[2];
177
178 /* Minimum layer setting works for all the textures types but texture_3d
179 * for which the setting has no effect. Use the z-coordinate instead.
180 */
181 float src_z;
182
183 /* Note: Pad out to an integral number of registers when extending, but
184 * make sure subgroup_id is the last 32-bit item.
185 */
186 /* uint32_t pad[?]; */
187 uint32_t subgroup_id;
188 };
189
190 static inline nir_variable *
blorp_create_nir_input(struct nir_shader * nir,const char * name,const struct glsl_type * type,unsigned int offset)191 blorp_create_nir_input(struct nir_shader *nir,
192 const char *name,
193 const struct glsl_type *type,
194 unsigned int offset)
195 {
196 nir_variable *input;
197 if (nir->info.stage == MESA_SHADER_COMPUTE) {
198 input = nir_variable_create(nir, nir_var_uniform, type, name);
199 input->data.driver_location = offset;
200 input->data.location = offset;
201 } else {
202 input = nir_variable_create(nir, nir_var_shader_in, type, name);
203 input->data.location = VARYING_SLOT_VAR0 + offset / (4 * sizeof(float));
204 input->data.location_frac = (offset / sizeof(float)) % 4;
205 }
206 if (nir->info.stage == MESA_SHADER_FRAGMENT)
207 input->data.interpolation = INTERP_MODE_FLAT;
208 return input;
209 }
210
211 #define BLORP_CREATE_NIR_INPUT(shader, name, type) \
212 blorp_create_nir_input((shader), #name, (type), \
213 offsetof(struct blorp_wm_inputs, name))
214
215 struct blorp_vs_inputs {
216 uint32_t base_layer;
217 uint32_t _instance_id; /* Set in hardware by SGVS */
218 uint32_t pad[2];
219 };
220
221 enum blorp_shader_type {
222 BLORP_SHADER_TYPE_COPY,
223 BLORP_SHADER_TYPE_BLIT,
224 BLORP_SHADER_TYPE_CLEAR,
225 BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE,
226 BLORP_SHADER_TYPE_LAYER_OFFSET_VS,
227 BLORP_SHADER_TYPE_GFX4_SF,
228 };
229
230 enum blorp_shader_pipeline {
231 BLORP_SHADER_PIPELINE_RENDER,
232 BLORP_SHADER_PIPELINE_COMPUTE,
233 };
234
235 struct blorp_params
236 {
237 enum blorp_op op;
238 uint32_t x0;
239 uint32_t y0;
240 uint32_t x1;
241 uint32_t y1;
242 float z;
243 uint8_t stencil_mask;
244 uint8_t stencil_ref;
245 struct blorp_surface_info depth;
246 struct blorp_surface_info stencil;
247 uint32_t depth_format;
248 struct blorp_surface_info src;
249 struct blorp_surface_info dst;
250 enum isl_aux_op hiz_op;
251 bool full_surface_hiz_op;
252 enum isl_aux_op fast_clear_op;
253 uint8_t color_write_disable;
254 struct blorp_wm_inputs wm_inputs;
255 struct blorp_vs_inputs vs_inputs;
256 bool dst_clear_color_as_input;
257 unsigned num_samples;
258 unsigned num_draw_buffers;
259 unsigned num_layers;
260 uint32_t vs_prog_kernel;
261 void *vs_prog_data;
262 uint32_t sf_prog_kernel;
263 void *sf_prog_data;
264 uint32_t wm_prog_kernel;
265 void *wm_prog_data;
266 uint32_t cs_prog_kernel;
267 void *cs_prog_data;
268
269 bool use_pre_baked_binding_table;
270 uint32_t pre_baked_binding_table_offset;
271 enum blorp_shader_type shader_type;
272 enum blorp_shader_pipeline shader_pipeline;
273 };
274
275 enum intel_measure_snapshot_type
276 blorp_op_to_intel_measure_snapshot(enum blorp_op op);
277
278 const char *blorp_op_to_name(enum blorp_op op);
279
280 void blorp_params_init(struct blorp_params *params);
281
282 struct blorp_base_key
283 {
284 char name[8];
285 enum blorp_shader_type shader_type;
286 enum blorp_shader_pipeline shader_pipeline;
287 };
288
289 #define BLORP_BASE_KEY_INIT(_type) \
290 (struct blorp_base_key) { \
291 .name = "blorp", \
292 .shader_type = _type, \
293 .shader_pipeline = BLORP_SHADER_PIPELINE_RENDER, \
294 }
295
296 struct blorp_blit_prog_key
297 {
298 struct blorp_base_key base;
299
300 /* Number of samples per pixel that have been configured in the surface
301 * state for texturing from.
302 */
303 unsigned tex_samples;
304
305 /* MSAA layout that has been configured in the surface state for texturing
306 * from.
307 */
308 enum isl_msaa_layout tex_layout;
309
310 enum isl_aux_usage tex_aux_usage;
311
312 /* Actual number of samples per pixel in the source image. */
313 unsigned src_samples;
314
315 /* Actual MSAA layout used by the source image. */
316 enum isl_msaa_layout src_layout;
317
318 /* The swizzle to apply to the source in the shader */
319 struct isl_swizzle src_swizzle;
320
321 /* The format of the source if format-specific workarounds are needed
322 * and 0 (ISL_FORMAT_R32G32B32A32_FLOAT) if the destination is natively
323 * renderable.
324 */
325 enum isl_format src_format;
326
327 /* True if the source requires normalized coordinates */
328 bool src_coords_normalized;
329
330 /* Number of samples per pixel that have been configured in the render
331 * target.
332 */
333 unsigned rt_samples;
334
335 /* MSAA layout that has been configured in the render target. */
336 enum isl_msaa_layout rt_layout;
337
338 /* Actual number of samples per pixel in the destination image. */
339 unsigned dst_samples;
340
341 /* Actual MSAA layout used by the destination image. */
342 enum isl_msaa_layout dst_layout;
343
344 /* The swizzle to apply to the destination in the shader */
345 struct isl_swizzle dst_swizzle;
346
347 /* The format of the destination if format-specific workarounds are needed
348 * and 0 (ISL_FORMAT_R32G32B32A32_FLOAT) if the destination is natively
349 * renderable.
350 */
351 enum isl_format dst_format;
352
353 /* Whether or not the format workarounds are a bitcast operation */
354 bool format_bit_cast;
355
356 /** True if we need to perform SINT -> UINT clamping. */
357 bool sint32_to_uint;
358
359 /** True if we need to perform UINT -> SINT clamping. */
360 bool uint32_to_sint;
361
362 /* Type of the data to be read from the texture (one of
363 * nir_type_(int|uint|float)).
364 */
365 nir_alu_type texture_data_type;
366
367 /* True if the source image is W tiled. If true, the surface state for the
368 * source image must be configured as Y tiled, and tex_samples must be 0.
369 */
370 bool src_tiled_w;
371
372 /* True if the destination image is W tiled. If true, the surface state
373 * for the render target must be configured as Y tiled, and rt_samples must
374 * be 0.
375 */
376 bool dst_tiled_w;
377
378 /* True if the destination is an RGB format. If true, the surface state
379 * for the render target must be configured as red with three times the
380 * normal width. We need to do this because you cannot render to
381 * non-power-of-two formats.
382 */
383 bool dst_rgb;
384
385 isl_surf_usage_flags_t dst_usage;
386
387 enum blorp_filter filter;
388
389 /* True if the rectangle being sent through the rendering pipeline might be
390 * larger than the destination rectangle, so the WM program should kill any
391 * pixels that are outside the destination rectangle.
392 */
393 bool use_kill;
394
395 /**
396 * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more
397 * than one sample per pixel.
398 */
399 bool persample_msaa_dispatch;
400
401 /* True if this blit operation may involve intratile offsets on the source.
402 * In this case, we need to add the offset before texturing.
403 */
404 bool need_src_offset;
405
406 /* True if this blit operation may involve intratile offsets on the
407 * destination. In this case, we need to add the offset to gl_FragCoord.
408 */
409 bool need_dst_offset;
410
411 /* Scale factors between the pixel grid and the grid of samples. We're
412 * using grid of samples for bilinear filetring in multisample scaled blits.
413 */
414 float x_scale;
415 float y_scale;
416
417 /* If a compute shader is used, this is the local size y dimension.
418 */
419 uint8_t local_y;
420 };
421
422 /**
423 * \name BLORP internals
424 * \{
425 *
426 * Used internally by gfx6_blorp_exec() and gfx7_blorp_exec().
427 */
428
429 bool blorp_blitter_supports_aux(const struct intel_device_info *devinfo,
430 enum isl_aux_usage aux_usage);
431
432 const char *blorp_shader_type_to_name(enum blorp_shader_type type);
433 const char *blorp_shader_pipeline_to_name(enum blorp_shader_pipeline pipe);
434
435 struct blorp_program {
436 const void *kernel;
437 uint32_t kernel_size;
438
439 const void *prog_data;
440 uint32_t prog_data_size;
441 };
442
443 static inline struct blorp_program
blorp_compile_fs(struct blorp_context * blorp,void * mem_ctx,struct nir_shader * nir,bool multisample_fbo,bool is_fast_clear,bool use_repclear)444 blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx,
445 struct nir_shader *nir,
446 bool multisample_fbo,
447 bool is_fast_clear,
448 bool use_repclear)
449 {
450 return blorp->compiler->compile_fs(blorp, mem_ctx, nir, multisample_fbo,
451 is_fast_clear, use_repclear);
452 }
453
454 static inline struct blorp_program
blorp_compile_vs(struct blorp_context * blorp,void * mem_ctx,struct nir_shader * nir)455 blorp_compile_vs(struct blorp_context *blorp, void *mem_ctx,
456 struct nir_shader *nir)
457 {
458 return blorp->compiler->compile_vs(blorp, mem_ctx, nir);
459 }
460
461 static inline bool
blorp_ensure_sf_program(struct blorp_batch * batch,struct blorp_params * params)462 blorp_ensure_sf_program(struct blorp_batch *batch,
463 struct blorp_params *params)
464 {
465 struct blorp_compiler *c = batch->blorp->compiler;
466 /* Absence of callback indicates it is not needed. This is the case for
467 * brw, which is Gfx9+.
468 */
469 return !c->ensure_sf_program || c->ensure_sf_program(batch, params);
470 }
471
472 static inline uint8_t
blorp_get_cs_local_y(struct blorp_params * params)473 blorp_get_cs_local_y(struct blorp_params *params)
474 {
475 uint32_t height = params->y1 - params->y0;
476 uint32_t or_ys = params->y0 | params->y1;
477 if (height > 32 || (or_ys & 3) == 0) {
478 return 4;
479 } else if ((or_ys & 1) == 0) {
480 return 2;
481 } else {
482 return 1;
483 }
484 }
485
486 static inline void
blorp_set_cs_dims(struct nir_shader * nir,uint8_t local_y)487 blorp_set_cs_dims(struct nir_shader *nir, uint8_t local_y)
488 {
489 assert(local_y != 0 && (16 % local_y == 0));
490 nir->info.workgroup_size[0] = 16 / local_y;
491 nir->info.workgroup_size[1] = local_y;
492 nir->info.workgroup_size[2] = 1;
493 }
494
495 static inline struct blorp_program
blorp_compile_cs(struct blorp_context * blorp,void * mem_ctx,struct nir_shader * nir)496 blorp_compile_cs(struct blorp_context *blorp, void *mem_ctx,
497 struct nir_shader *nir)
498 {
499 return blorp->compiler->compile_cs(blorp, mem_ctx, nir);
500 }
501
502 static inline bool
blorp_params_get_layer_offset_vs(struct blorp_batch * batch,struct blorp_params * params)503 blorp_params_get_layer_offset_vs(struct blorp_batch *batch,
504 struct blorp_params *params)
505 {
506 return batch->blorp->compiler->params_get_layer_offset_vs(batch, params);
507 }
508
509 /** \} */
510
511 #ifdef __cplusplus
512 } /* end extern "C" */
513 #endif /* __cplusplus */
514
515 #endif /* BLORP_PRIV_H */
516