1 /*
2 * Copyright © 2021 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "ac_nir.h"
8 #include "ac_nir_helpers.h"
9
10 #include "nir_builder.h"
11 #include "nir_xfb_info.h"
12
13 nir_shader *
ac_nir_create_gs_copy_shader(const nir_shader * gs_nir,enum amd_gfx_level gfx_level,uint32_t clip_cull_mask,const uint8_t * param_offsets,bool has_param_exports,bool disable_streamout,bool kill_pointsize,bool kill_layer,bool force_vrs,ac_nir_gs_output_info * output_info)14 ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
15 enum amd_gfx_level gfx_level,
16 uint32_t clip_cull_mask,
17 const uint8_t *param_offsets,
18 bool has_param_exports,
19 bool disable_streamout,
20 bool kill_pointsize,
21 bool kill_layer,
22 bool force_vrs,
23 ac_nir_gs_output_info *output_info)
24 {
25 nir_builder b = nir_builder_init_simple_shader(
26 MESA_SHADER_VERTEX, gs_nir->options, "gs_copy");
27
28 nir_foreach_shader_out_variable(var, gs_nir)
29 nir_shader_add_variable(b.shader, nir_variable_clone(var, b.shader));
30
31 b.shader->info.outputs_written = gs_nir->info.outputs_written;
32 b.shader->info.outputs_written_16bit = gs_nir->info.outputs_written_16bit;
33
34 nir_def *gsvs_ring = nir_load_ring_gsvs_amd(&b);
35
36 nir_xfb_info *info = ac_nir_get_sorted_xfb_info(gs_nir);
37 nir_def *stream_id = NULL;
38 if (!disable_streamout && info)
39 stream_id = nir_ubfe_imm(&b, nir_load_streamout_config_amd(&b), 24, 2);
40
41 nir_def *vtx_offset = nir_imul_imm(&b, nir_load_vertex_id_zero_base(&b), 4);
42 nir_def *zero = nir_imm_zero(&b, 1, 32);
43
44 for (unsigned stream = 0; stream < 4; stream++) {
45 if (stream > 0 && (!stream_id || !(info->streams_written & BITFIELD_BIT(stream))))
46 continue;
47
48 if (stream_id)
49 nir_push_if(&b, nir_ieq_imm(&b, stream_id, stream));
50
51 uint32_t offset = 0;
52 ac_nir_prerast_out out = {0};
53 if (output_info->types_16bit_lo)
54 memcpy(&out.types_16bit_lo, output_info->types_16bit_lo, sizeof(out.types_16bit_lo));
55 if (output_info->types_16bit_hi)
56 memcpy(&out.types_16bit_hi, output_info->types_16bit_hi, sizeof(out.types_16bit_hi));
57
58 u_foreach_bit64 (i, gs_nir->info.outputs_written) {
59 const uint8_t usage_mask = output_info->varying_mask[i] | output_info->sysval_mask[i];
60 out.infos[i].components_mask = usage_mask;
61 out.infos[i].as_varying_mask = output_info->varying_mask[i];
62 out.infos[i].as_sysval_mask = output_info->sysval_mask[i];
63
64 u_foreach_bit (j, usage_mask) {
65 if (((output_info->streams[i] >> (j * 2)) & 0x3) != stream)
66 continue;
67
68 out.outputs[i][j] =
69 nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
70 .base = offset,
71 .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
72
73 /* clamp legacy color output */
74 if (i == VARYING_SLOT_COL0 || i == VARYING_SLOT_COL1 ||
75 i == VARYING_SLOT_BFC0 || i == VARYING_SLOT_BFC1) {
76 nir_def *color = out.outputs[i][j];
77 nir_def *clamp = nir_load_clamp_vertex_color_amd(&b);
78 out.outputs[i][j] = nir_bcsel(&b, clamp, nir_fsat(&b, color), color);
79 }
80
81 offset += gs_nir->info.gs.vertices_out * 16 * 4;
82 }
83 }
84
85 u_foreach_bit (i, gs_nir->info.outputs_written_16bit) {
86 out.infos_16bit_lo[i].components_mask = output_info->varying_mask_16bit_lo[i];
87 out.infos_16bit_lo[i].as_varying_mask = output_info->varying_mask_16bit_lo[i];
88 out.infos_16bit_hi[i].components_mask = output_info->varying_mask_16bit_hi[i];
89 out.infos_16bit_hi[i].as_varying_mask = output_info->varying_mask_16bit_hi[i];
90
91 for (unsigned j = 0; j < 4; j++) {
92 out.infos[i].as_varying_mask = output_info->varying_mask[i];
93 out.infos[i].as_sysval_mask = output_info->sysval_mask[i];
94
95 bool has_lo_16bit = (output_info->varying_mask_16bit_lo[i] & (1 << j)) &&
96 ((output_info->streams_16bit_lo[i] >> (j * 2)) & 0x3) == stream;
97 bool has_hi_16bit = (output_info->varying_mask_16bit_hi[i] & (1 << j)) &&
98 ((output_info->streams_16bit_hi[i] >> (j * 2)) & 0x3) == stream;
99 if (!has_lo_16bit && !has_hi_16bit)
100 continue;
101
102 nir_def *data =
103 nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
104 .base = offset,
105 .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
106
107 if (has_lo_16bit)
108 out.outputs_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, data);
109
110 if (has_hi_16bit)
111 out.outputs_16bit_hi[i][j] = nir_unpack_32_2x16_split_y(&b, data);
112
113 offset += gs_nir->info.gs.vertices_out * 16 * 4;
114 }
115 }
116
117 if (stream_id)
118 ac_nir_emit_legacy_streamout(&b, stream, info, &out);
119
120 if (stream == 0) {
121 uint64_t export_outputs = b.shader->info.outputs_written | VARYING_BIT_POS;
122 if (kill_pointsize)
123 export_outputs &= ~VARYING_BIT_PSIZ;
124 if (kill_layer)
125 export_outputs &= ~VARYING_BIT_LAYER;
126
127 ac_nir_export_position(&b, gfx_level, clip_cull_mask, !has_param_exports,
128 force_vrs, true, export_outputs, &out, NULL);
129
130 if (has_param_exports) {
131 ac_nir_export_parameters(&b, param_offsets,
132 b.shader->info.outputs_written,
133 b.shader->info.outputs_written_16bit,
134 &out);
135 }
136 }
137
138 if (stream_id)
139 nir_push_else(&b, NULL);
140 }
141
142 b.shader->info.clip_distance_array_size = gs_nir->info.clip_distance_array_size;
143 b.shader->info.cull_distance_array_size = gs_nir->info.cull_distance_array_size;
144
145 return b.shader;
146 }
147