Lines Matching +full:v8 +full:- +full:coverage
26 * This file drives the GLSL IR -> LIR translation, contains the
57 this->src = new fs_reg[MAX2(sources, 3)]; in init()
59 this->src[i] = src[i]; in init()
61 this->opcode = opcode; in init()
62 this->dst = dst; in init()
63 this->sources = sources; in init()
64 this->exec_size = exec_size; in init()
68 assert(this->exec_size != 0); in init()
70 this->conditional_mod = BRW_CONDITIONAL_NONE; in init()
78 this->size_written = dst.component_size(exec_size); in init()
81 this->size_written = 0; in init()
88 this->writes_accumulator = false; in init()
137 this->src = new fs_reg[MAX2(that.sources, 3)]; in fs_inst()
140 this->src[i] = that.src[i]; in fs_inst()
145 delete[] this->src; in ~fs_inst()
151 if (this->sources != num_sources) { in resize_sources()
154 for (unsigned i = 0; i < MIN2(this->sources, num_sources); ++i) in resize_sources()
155 src[i] = this->src[i]; in resize_sources()
157 delete[] this->src; in resize_sources()
158 this->src = src; in resize_sources()
159 this->sources = num_sources; in resize_sources()
184 * We also want to use a 32-bit data type for the dst of the load operation in VARYING_PULL_CONSTANT_LOAD()
198 inst->size_written = 4 * vec4_result.component_size(inst->exec_size); in VARYING_PULL_CONSTANT_LOAD()
325 * - Virtual opcodes that translate to multiple instructions in the
330 * - SIMD16 compressed instructions with certain regioning (see below).
409 * pixels. There's a similar issue for the pre-gfx6 in has_source_and_destination_hazard()
410 * pixel_x/pixel_y, which are registers of 16-bit values and thus in has_source_and_destination_hazard()
439 if (devinfo->ver >= 12 && (opcode == BRW_OPCODE_MUL || in can_do_source_mods()
467 * equality with a 32-bit value. See piglit fs-op-neg-uvec4. in can_do_cmod()
501 this->file = BAD_FILE; in fs_reg()
507 this->offset = 0; in fs_reg()
508 this->stride = 1; in fs_reg()
509 if (this->file == IMM && in fs_reg()
510 (this->type != BRW_REGISTER_TYPE_V && in fs_reg()
511 this->type != BRW_REGISTER_TYPE_UV && in fs_reg()
512 this->type != BRW_REGISTER_TYPE_VF)) { in fs_reg()
513 this->stride = 0; in fs_reg()
520 return (this->backend_reg::equals(r) && in equals()
527 return (this->backend_reg::negative_equals(r) && in negative_equals()
555 const unsigned w = MIN2(width, 1u << this->width); in component_size()
556 const unsigned h = width >> this->width; in component_size()
557 const unsigned vs = vstride ? 1 << (vstride - 1) : 0; in component_size()
558 const unsigned hs = hstride ? 1 << (hstride - 1) : 0; in component_size()
560 return ((MAX2(1, h) - 1) * vs + (w - 1) * hs + 1) * type_sz(type); in component_size()
580 this->fail_msg = msg; in vfail()
632 if (this->predicate && !this->predicate_trivial && in is_partial_write()
633 this->opcode != BRW_OPCODE_SEL) in is_partial_write()
636 if (this->dst.offset % REG_SIZE != 0) in is_partial_write()
640 if (this->opcode == SHADER_OPCODE_SEND) in is_partial_write()
647 * ubld.UNDEF(tmp); <- partial write, even if the whole register is concerned in is_partial_write()
649 if (this->opcode == SHADER_OPCODE_UNDEF) { in is_partial_write()
650 assert(this->dst.is_contiguous()); in is_partial_write()
651 return this->size_written < 32; in is_partial_write()
654 return this->exec_size * type_sz(this->dst.type) < 32 || in is_partial_write()
655 !this->dst.is_contiguous(); in is_partial_write()
874 if (arg < this->header_size) in size_read()
956 if (devinfo->ver >= 20) { in predicate_width()
981 if (devinfo->ver < 20 && (predicate == BRW_PREDICATE_ALIGN1_ANYV || in flags_read()
1055 this->file = file; in fs_reg()
1056 this->nr = nr; in fs_reg()
1057 this->type = BRW_REGISTER_TYPE_F; in fs_reg()
1058 this->stride = (file == UNIFORM ? 0 : 1); in fs_reg()
1064 this->file = file; in fs_reg()
1065 this->nr = nr; in fs_reg()
1066 this->type = type; in fs_reg()
1067 this->stride = (file == UNIFORM ? 0 : 1); in fs_reg()
1076 this->push_constant_loc = v->push_constant_loc; in import_uniforms()
1077 this->uniforms = v->uniforms; in import_uniforms()
1090 switch (intr->intrinsic) { in brw_barycentric_mode()
1120 return (enum brw_barycentric_mode) ((unsigned) bary - 1); in centroid_to_pixel()
1132 foreach_in_list_reverse(fs_inst, prev, &this->instructions) { in mark_last_urb_write_with_eot()
1133 if (prev->opcode == SHADER_OPCODE_URB_WRITE_LOGICAL) { in mark_last_urb_write_with_eot()
1134 prev->eot = true; in mark_last_urb_write_with_eot()
1137 foreach_in_list_reverse_safe(exec_node, dead, &this->instructions) { in mark_last_urb_write_with_eot()
1140 dead->remove(); in mark_last_urb_write_with_eot()
1143 } else if (prev->is_control_flow() || prev->has_side_effects()) { in mark_last_urb_write_with_eot()
1158 if (gs_compile->control_data_header_size_bits > 0) { in emit_gs_thread_end()
1159 emit_gs_control_data_bits(this->final_gs_vertex_count); in emit_gs_thread_end()
1165 if (gs_prog_data->static_vertex_count != -1) { in emit_gs_thread_end()
1180 srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count; in emit_gs_thread_end()
1185 inst->eot = true; in emit_gs_thread_end()
1186 inst->offset = 0; in emit_gs_thread_end()
1192 unsigned uniform_push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8); in assign_curb_setup()
1198 ubo_push_length += stage_prog_data->ubo_ranges[i].length; in assign_curb_setup()
1201 prog_data->curb_read_length = uniform_push_length + ubo_push_length; in assign_curb_setup()
1206 if (is_compute && brw_cs_prog_data(prog_data)->uses_inline_data) { in assign_curb_setup()
1212 assert(devinfo->verx10 >= 125); in assign_curb_setup()
1214 } else if (is_compute && devinfo->verx10 >= 125) { in assign_curb_setup()
1215 assert(devinfo->has_lsc); in assign_curb_setup()
1217 cfg->first_block(), cfg->first_block()->start()); in assign_curb_setup()
1227 /* On Gfx12-HP we load constants at the start of the program using A32 in assign_curb_setup()
1232 unsigned num_regs = MIN2(uniform_push_length - i, 8); in assign_curb_setup()
1250 send->sfid = GFX12_SFID_UGM; in assign_curb_setup()
1251 send->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, in assign_curb_setup()
1261 send->header_size = 0; in assign_curb_setup()
1262 send->mlen = lsc_msg_desc_src0_len(devinfo, send->desc); in assign_curb_setup()
1263 send->size_written = in assign_curb_setup()
1264 lsc_msg_desc_dest_len(devinfo, send->desc) * REG_SIZE; in assign_curb_setup()
1265 send->send_is_volatile = true; in assign_curb_setup()
1275 for (unsigned int i = 0; i < inst->sources; i++) { in assign_curb_setup()
1276 if (inst->src[i].file == UNIFORM) { in assign_curb_setup()
1277 int uniform_nr = inst->src[i].nr + inst->src[i].offset / 4; in assign_curb_setup()
1279 if (inst->src[i].nr >= UBO_START) { in assign_curb_setup()
1280 /* constant_nr is in 32-bit units, the rest are in bytes */ in assign_curb_setup()
1281 constant_nr = ubo_push_start[inst->src[i].nr - UBO_START] + in assign_curb_setup()
1282 inst->src[i].offset / 4; in assign_curb_setup()
1287 * "Out-of-bounds reads return undefined values, which include in assign_curb_setup()
1300 brw_reg.abs = inst->src[i].abs; in assign_curb_setup()
1301 brw_reg.negate = inst->src[i].negate; in assign_curb_setup()
1303 assert(inst->src[i].stride == 0); in assign_curb_setup()
1304 inst->src[i] = byte_offset( in assign_curb_setup()
1305 retype(brw_reg, inst->src[i].type), in assign_curb_setup()
1306 inst->src[i].offset % 4); in assign_curb_setup()
1311 uint64_t want_zero = used & stage_prog_data->zero_push_reg; in assign_curb_setup()
1314 cfg->first_block(), cfg->first_block()->start()); in assign_curb_setup()
1316 /* push_reg_mask_param is in 32-bit units */ in assign_curb_setup()
1317 unsigned mask_param = stage_prog_data->push_reg_mask_param; in assign_curb_setup()
1336 assert(i < prog_data->curb_read_length); in assign_curb_setup()
1349 this->first_non_payload_grf = payload().num_regs + prog_data->curb_read_length; in assign_curb_setup()
1362 * skip per-primitive attributes here. in brw_compute_urb_setup_index()
1369 if (wm_prog_data->urb_setup[attr] >= 0) { in brw_compute_urb_setup_index()
1370 wm_prog_data->urb_setup_attribs[index++] = attr; in brw_compute_urb_setup_index()
1373 wm_prog_data->urb_setup_attribs_count = index; in brw_compute_urb_setup_index()
1383 memset(prog_data->urb_setup, -1, sizeof(prog_data->urb_setup)); in calculate_urb_setup()
1384 memset(prog_data->urb_setup_channel, 0, sizeof(prog_data->urb_setup_channel)); in calculate_urb_setup()
1389 nir->info.inputs_read & ~nir->info.per_primitive_inputs; in calculate_urb_setup()
1392 if (key->mesh_input != BRW_NEVER) { in calculate_urb_setup()
1393 /* Per-Primitive Attributes are laid out by Hardware before the regular in calculate_urb_setup()
1397 if (nir->info.per_primitive_inputs) { in calculate_urb_setup()
1399 nir->info.inputs_read & nir->info.per_primitive_inputs; in calculate_urb_setup()
1403 * Primitive Header, not Per-Primitive Attributes. in calculate_urb_setup()
1410 unsigned per_prim_start_dw = mue_map->per_primitive_start_dw; in calculate_urb_setup()
1411 unsigned per_prim_size_dw = mue_map->per_primitive_pitch_dw; in calculate_urb_setup()
1415 if (reads_header || mue_map->user_data_in_primitive_header) { in calculate_urb_setup()
1417 * 4-dwords slot (psr is dword 0, layer is dword 1, and viewport in calculate_urb_setup()
1421 prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_SHADING_RATE] = 0; in calculate_urb_setup()
1424 prog_data->urb_setup[VARYING_SLOT_LAYER] = 0; in calculate_urb_setup()
1427 prog_data->urb_setup[VARYING_SLOT_VIEWPORT] = 0; in calculate_urb_setup()
1433 * calculating offset from start of per-prim data. in calculate_urb_setup()
1435 per_prim_start_dw += mue_map->per_primitive_header_size_dw; in calculate_urb_setup()
1436 per_prim_size_dw -= mue_map->per_primitive_header_size_dw; in calculate_urb_setup()
1440 int start = mue_map->start_dw[i]; in calculate_urb_setup()
1443 assert(mue_map->len_dw[i] > 0); in calculate_urb_setup()
1446 unsigned pos_dw = unsigned(start) - per_prim_start_dw; in calculate_urb_setup()
1448 prog_data->urb_setup[i] = urb_next + pos_dw / 4; in calculate_urb_setup()
1449 prog_data->urb_setup_channel[i] = pos_dw % 4; in calculate_urb_setup()
1455 * per-primitive attributes won't be packed either, so just lay in calculate_urb_setup()
1462 prog_data->urb_setup[i] = urb_next++; in calculate_urb_setup()
1470 prog_data->num_per_primitive_inputs = urb_next; in calculate_urb_setup()
1479 assert(!mue_map || mue_map->per_vertex_header_size_dw > 8); in calculate_urb_setup()
1484 unsigned per_vertex_start_dw = mue_map->per_vertex_start_dw; in calculate_urb_setup()
1485 unsigned per_vertex_size_dw = mue_map->per_vertex_pitch_dw; in calculate_urb_setup()
1487 /* Per-Vertex header is available to fragment shader only if there's in calculate_urb_setup()
1490 if (!mue_map->user_data_in_vertex_header) { in calculate_urb_setup()
1492 per_vertex_size_dw -= 8; in calculate_urb_setup()
1496 * they come from MUE Vertex Header, not Per-Vertex Attributes. in calculate_urb_setup()
1499 prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] = urb_next; in calculate_urb_setup()
1500 prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] = urb_next + 1; in calculate_urb_setup()
1501 } else if (mue_map && mue_map->per_vertex_header_size_dw > 8) { in calculate_urb_setup()
1504 per_vertex_size_dw -= 8; in calculate_urb_setup()
1507 /* Per-Vertex attributes are laid out ordered. Because we always link in calculate_urb_setup()
1511 int start = mue_map->start_dw[i]; in calculate_urb_setup()
1514 assert(mue_map->len_dw[i] > 0); in calculate_urb_setup()
1517 unsigned pos_dw = unsigned(start) - per_vertex_start_dw; in calculate_urb_setup()
1519 prog_data->urb_setup[i] = urb_next + pos_dw / 4; in calculate_urb_setup()
1520 prog_data->urb_setup_channel[i] = pos_dw % 4; in calculate_urb_setup()
1529 prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] = urb_next++; in calculate_urb_setup()
1530 prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] = urb_next++; in calculate_urb_setup()
1535 prog_data->urb_setup[i] = urb_next++; in calculate_urb_setup()
1539 assert(!nir->info.per_primitive_inputs); in calculate_urb_setup()
1568 prog_data->urb_setup[VARYING_SLOT_PSIZ] = urb_next; in calculate_urb_setup()
1570 prog_data->urb_setup[VARYING_SLOT_LAYER] = urb_next; in calculate_urb_setup()
1572 prog_data->urb_setup[VARYING_SLOT_VIEWPORT] = urb_next; in calculate_urb_setup()
1580 prog_data->urb_setup[i] = urb_next++; in calculate_urb_setup()
1590 /* Re-compute the VUE map here in the case that the one coming from in calculate_urb_setup()
1596 key->input_slots_valid, in calculate_urb_setup()
1597 nir->info.separate_shader, 1); in calculate_urb_setup()
1610 prog_data->urb_setup[varying] = slot - first_slot; in calculate_urb_setup()
1613 urb_next = prev_stage_vue_map.num_slots - first_slot; in calculate_urb_setup()
1617 prog_data->num_varying_inputs = urb_next - prog_data->num_per_primitive_inputs; in calculate_urb_setup()
1618 prog_data->inputs = inputs_read; in calculate_urb_setup()
1627 struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); in assign_urb_setup()
1629 int urb_start = payload().num_regs + prog_data->base.curb_read_length; in assign_urb_setup()
1635 for (int i = 0; i < inst->sources; i++) { in assign_urb_setup()
1636 if (inst->src[i].file == ATTR) { in assign_urb_setup()
1638 * inputs each of which consumes 16B on Gfx4-Gfx12. In in assign_urb_setup()
1644 * 0 Attr0.x a1-a0 a2-a0 N/A a0 in assign_urb_setup()
1645 * 1 Attr0.y a1-a0 a2-a0 N/A a0 in assign_urb_setup()
1646 * 2 Attr0.z a1-a0 a2-a0 N/A a0 in assign_urb_setup()
1647 * 3 Attr0.w a1-a0 a2-a0 N/A a0 in assign_urb_setup()
1648 * 4 Attr1.x a1-a0 a2-a0 N/A a0 in assign_urb_setup()
1654 * represented as a dispatch_width-wide vector: in assign_urb_setup()
1657 * 0 0 Attr0.x a1[0]-a0[0] ... a1[N]-a0[N] in assign_urb_setup()
1658 * 0 4 * dispatch_width Attr0.x a2[0]-a0[0] ... a2[N]-a0[N] in assign_urb_setup()
1661 * 1 0 Attr0.y a1[0]-a0[0] ... a1[N]-a0[N] in assign_urb_setup()
1681 * and ordered like "a0, a1-a0, a2-a0" instead of the in assign_urb_setup()
1695 * per-primitive constant data depending on whether we're in assign_urb_setup()
1699 const bool per_prim = inst->src[i].nr < prog_data->num_per_primitive_inputs; in assign_urb_setup()
1702 ALIGN(prog_data->num_per_primitive_inputs / 2, in assign_urb_setup()
1704 const unsigned idx = per_prim ? inst->src[i].nr : in assign_urb_setup()
1705 inst->src[i].nr - prog_data->num_per_primitive_inputs; in assign_urb_setup()
1707 /* Translate the offset within the param_width-wide in assign_urb_setup()
1712 if (devinfo->ver >= 20 && !per_prim) { in assign_urb_setup()
1717 assert(inst->src[i].offset / param_width < 12); in assign_urb_setup()
1719 inst->src[i].offset / (param_width * chan_sz) * chan_sz + in assign_urb_setup()
1720 inst->src[i].offset % chan_sz; in assign_urb_setup()
1721 reg = byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type), in assign_urb_setup()
1724 /* Earlier platforms and per-primitive block pack 2 logical in assign_urb_setup()
1728 assert(inst->src[i].offset / param_width < REG_SIZE / 2); in assign_urb_setup()
1730 inst->src[i].offset / (param_width * chan_sz) * chan_sz + in assign_urb_setup()
1731 inst->src[i].offset % chan_sz; in assign_urb_setup()
1732 reg = byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type), in assign_urb_setup()
1737 assert(devinfo->ver >= 12); in assign_urb_setup()
1739 * cross-channel access in the representation above are in assign_urb_setup()
1742 assert(inst->src[i].stride * type_sz(inst->src[i].type) == chan_sz); in assign_urb_setup()
1750 * SIMD-lowered instructions though. in assign_urb_setup()
1752 const unsigned chan = inst->src[i].offset % in assign_urb_setup()
1759 if (inst->exec_size > poly_width) { in assign_urb_setup()
1765 const unsigned vstride = reg_size / type_sz(inst->src[i].type); in assign_urb_setup()
1770 /* Accessing one parameter for a single polygon -- in assign_urb_setup()
1773 assert(chan % poly_width + inst->exec_size <= poly_width); in assign_urb_setup()
1778 const unsigned width = inst->src[i].stride == 0 ? in assign_urb_setup()
1779 1 : MIN2(inst->exec_size, 8); in assign_urb_setup()
1780 reg = stride(reg, width * inst->src[i].stride, in assign_urb_setup()
1781 width, inst->src[i].stride); in assign_urb_setup()
1784 reg.abs = inst->src[i].abs; in assign_urb_setup()
1785 reg.negate = inst->src[i].negate; in assign_urb_setup()
1786 inst->src[i] = reg; in assign_urb_setup()
1795 this->first_non_payload_grf += prog_data->num_varying_inputs * 2 * max_polygons; in assign_urb_setup()
1797 /* Unlike regular attributes, per-primitive attributes have all 4 channels in assign_urb_setup()
1800 assert(prog_data->num_per_primitive_inputs % 2 == 0); in assign_urb_setup()
1801 this->first_non_payload_grf += prog_data->num_per_primitive_inputs / 2 * max_polygons; in assign_urb_setup()
1807 for (int i = 0; i < inst->sources; i++) { in convert_attr_sources_to_hw_regs()
1808 if (inst->src[i].file == ATTR) { in convert_attr_sources_to_hw_regs()
1809 assert(inst->src[i].nr == 0); in convert_attr_sources_to_hw_regs()
1811 prog_data->curb_read_length + in convert_attr_sources_to_hw_regs()
1812 inst->src[i].offset / REG_SIZE; in convert_attr_sources_to_hw_regs()
1823 unsigned total_size = inst->exec_size * in convert_attr_sources_to_hw_regs()
1824 inst->src[i].stride * in convert_attr_sources_to_hw_regs()
1825 type_sz(inst->src[i].type); in convert_attr_sources_to_hw_regs()
1829 (total_size <= REG_SIZE) ? inst->exec_size : inst->exec_size / 2; in convert_attr_sources_to_hw_regs()
1831 unsigned width = inst->src[i].stride == 0 ? 1 : exec_size; in convert_attr_sources_to_hw_regs()
1833 stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type), in convert_attr_sources_to_hw_regs()
1834 inst->src[i].offset % REG_SIZE), in convert_attr_sources_to_hw_regs()
1835 exec_size * inst->src[i].stride, in convert_attr_sources_to_hw_regs()
1836 width, inst->src[i].stride); in convert_attr_sources_to_hw_regs()
1837 reg.abs = inst->src[i].abs; in convert_attr_sources_to_hw_regs()
1838 reg.negate = inst->src[i].negate; in convert_attr_sources_to_hw_regs()
1840 inst->src[i] = reg; in convert_attr_sources_to_hw_regs()
1853 this->first_non_payload_grf += 4 * vs_prog_data->nr_attribute_slots; in assign_vs_urb_setup()
1855 assert(vs_prog_data->base.urb_read_length <= 15); in assign_vs_urb_setup()
1881 first_non_payload_grf += 8 * vue_prog_data->urb_read_length; in assign_tes_urb_setup()
1897 8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in; in assign_gs_urb_setup()
1909 if (prog_data->nr_params == 0) in brw_get_subgroup_id_param_index()
1910 return -1; in brw_get_subgroup_id_param_index()
1912 if (devinfo->verx10 >= 125) in brw_get_subgroup_id_param_index()
1913 return -1; in brw_get_subgroup_id_param_index()
1916 uint32_t last_param = prog_data->param[prog_data->nr_params - 1]; in brw_get_subgroup_id_param_index()
1918 return prog_data->nr_params - 1; in brw_get_subgroup_id_param_index()
1920 return -1; in brw_get_subgroup_id_param_index()
1950 unsigned push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8); in assign_constant_locations()
1952 struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; in assign_constant_locations()
1954 if (push_length + range->length > max_push_length) in assign_constant_locations()
1955 range->length = max_push_length - push_length; in assign_constant_locations()
1957 push_length += range->length; in assign_constant_locations()
1973 &prog_data->ubo_ranges[src.nr - UBO_START]; in get_pull_locs()
1976 if (src.offset / 32 < range->length) in get_pull_locs()
1979 *out_surf_index = range->block; in get_pull_locs()
1980 *out_pull_index = (32 * range->start + src.offset) / 4; in get_pull_locs()
1982 prog_data->has_ubo_pull = true; in get_pull_locs()
1994 brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; in emit_repclear_shader()
1998 assume(key->nr_color_regions > 0); in emit_repclear_shader()
2012 if (key->nr_color_regions > 1) { in emit_repclear_shader()
2018 for (int i = 0; i < key->nr_color_regions; ++i) { in emit_repclear_shader()
2023 write->resize_sources(3); in emit_repclear_shader()
2024 write->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE; in emit_repclear_shader()
2025 write->src[0] = brw_imm_ud(0); in emit_repclear_shader()
2026 write->src[1] = brw_imm_ud(0); in emit_repclear_shader()
2027 write->src[2] = i == 0 ? color_output : header; in emit_repclear_shader()
2028 write->check_tdr = true; in emit_repclear_shader()
2029 write->send_has_side_effects = true; in emit_repclear_shader()
2030 write->desc = brw_fb_write_desc(devinfo, i, in emit_repclear_shader()
2032 i == key->nr_color_regions - 1, false); in emit_repclear_shader()
2035 write->header_size = i == 0 ? 0 : 2; in emit_repclear_shader()
2036 write->mlen = 1 + write->header_size; in emit_repclear_shader()
2038 write->eot = true; in emit_repclear_shader()
2039 write->last_rt = true; in emit_repclear_shader()
2043 this->first_non_payload_grf = payload().num_regs; in emit_repclear_shader()
2061 } else if (brw_wm_prog_data(s.stage_prog_data)->uses_kill) { in brw_sample_mask_reg()
2066 assert(s.devinfo->ver < 20); in brw_sample_mask_reg()
2078 if (prog_data->dual_src_blend) { in brw_fb_write_msg_control()
2079 assert(inst->exec_size == 8); in brw_fb_write_msg_control()
2081 if (inst->group % 16 == 0) in brw_fb_write_msg_control()
2083 else if (inst->group % 16 == 8) in brw_fb_write_msg_control()
2086 unreachable("Invalid dual-source FB write instruction group"); in brw_fb_write_msg_control()
2088 assert(inst->group == 0 || (inst->group == 16 && inst->exec_size == 16)); in brw_fb_write_msg_control()
2090 if (inst->exec_size == 16) in brw_fb_write_msg_control()
2092 else if (inst->exec_size == 8) in brw_fb_write_msg_control()
2107 assert(bld.shader->stage == MESA_SHADER_FRAGMENT && in brw_emit_predicate_on_sample_mask()
2108 bld.group() == inst->group && in brw_emit_predicate_on_sample_mask()
2109 bld.dispatch_width() == inst->exec_size); in brw_emit_predicate_on_sample_mask()
2115 if (brw_wm_prog_data(s.stage_prog_data)->uses_kill) { in brw_emit_predicate_on_sample_mask()
2119 subreg + inst->group / 16).subnr); in brw_emit_predicate_on_sample_mask()
2122 .MOV(brw_flag_subreg(subreg + inst->group / 16), sample_mask); in brw_emit_predicate_on_sample_mask()
2125 if (inst->predicate) { in brw_emit_predicate_on_sample_mask()
2126 assert(inst->predicate == BRW_PREDICATE_NORMAL); in brw_emit_predicate_on_sample_mask()
2127 assert(!inst->predicate_inverse); in brw_emit_predicate_on_sample_mask()
2128 assert(inst->flag_subreg == 0); in brw_emit_predicate_on_sample_mask()
2129 assert(s.devinfo->ver < 20); in brw_emit_predicate_on_sample_mask()
2133 inst->predicate = BRW_PREDICATE_ALIGN1_ALLV; in brw_emit_predicate_on_sample_mask()
2135 inst->flag_subreg = subreg; in brw_emit_predicate_on_sample_mask()
2136 inst->predicate = BRW_PREDICATE_NORMAL; in brw_emit_predicate_on_sample_mask()
2137 inst->predicate_inverse = false; in brw_emit_predicate_on_sample_mask()
2149 if (inst->is_control_flow_end()) in dump_instructions_to_file()
2150 cf_count -= 1; in dump_instructions_to_file()
2159 if (inst->is_control_flow_begin()) in dump_instructions_to_file()
2177 if (inst->predicate) { in dump_instruction_to_file()
2179 inst->predicate_inverse ? '-' : '+', in dump_instruction_to_file()
2180 inst->flag_subreg / 2, in dump_instruction_to_file()
2181 inst->flag_subreg % 2); in dump_instruction_to_file()
2184 fprintf(file, "%s", brw_instruction_name(&compiler->isa, inst->opcode)); in dump_instruction_to_file()
2185 if (inst->saturate) in dump_instruction_to_file()
2187 if (inst->conditional_mod) { in dump_instruction_to_file()
2188 fprintf(file, "%s", conditional_modifier[inst->conditional_mod]); in dump_instruction_to_file()
2189 if (!inst->predicate && in dump_instruction_to_file()
2190 (inst->opcode != BRW_OPCODE_SEL && in dump_instruction_to_file()
2191 inst->opcode != BRW_OPCODE_CSEL && in dump_instruction_to_file()
2192 inst->opcode != BRW_OPCODE_IF && in dump_instruction_to_file()
2193 inst->opcode != BRW_OPCODE_WHILE)) { in dump_instruction_to_file()
2194 fprintf(file, ".f%d.%d", inst->flag_subreg / 2, in dump_instruction_to_file()
2195 inst->flag_subreg % 2); in dump_instruction_to_file()
2198 fprintf(file, "(%d) ", inst->exec_size); in dump_instruction_to_file()
2200 if (inst->mlen) { in dump_instruction_to_file()
2201 fprintf(file, "(mlen: %d) ", inst->mlen); in dump_instruction_to_file()
2204 if (inst->ex_mlen) { in dump_instruction_to_file()
2205 fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen); in dump_instruction_to_file()
2208 if (inst->eot) { in dump_instruction_to_file()
2212 switch (inst->dst.file) { in dump_instruction_to_file()
2214 fprintf(file, "vgrf%d", inst->dst.nr); in dump_instruction_to_file()
2217 fprintf(file, "g%d", inst->dst.nr); in dump_instruction_to_file()
2223 fprintf(file, "***u%d***", inst->dst.nr); in dump_instruction_to_file()
2226 fprintf(file, "***attr%d***", inst->dst.nr); in dump_instruction_to_file()
2229 switch (inst->dst.nr) { in dump_instruction_to_file()
2234 fprintf(file, "a0.%d", inst->dst.subnr); in dump_instruction_to_file()
2237 fprintf(file, "acc%d", inst->dst.subnr); in dump_instruction_to_file()
2240 fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); in dump_instruction_to_file()
2243 fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); in dump_instruction_to_file()
2251 if (inst->dst.offset || in dump_instruction_to_file()
2252 (inst->dst.file == VGRF && in dump_instruction_to_file()
2253 alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written)) { in dump_instruction_to_file()
2254 const unsigned reg_size = (inst->dst.file == UNIFORM ? 4 : REG_SIZE); in dump_instruction_to_file()
2255 fprintf(file, "+%d.%d", inst->dst.offset / reg_size, in dump_instruction_to_file()
2256 inst->dst.offset % reg_size); in dump_instruction_to_file()
2259 if (inst->dst.stride != 1) in dump_instruction_to_file()
2260 fprintf(file, "<%u>", inst->dst.stride); in dump_instruction_to_file()
2261 fprintf(file, ":%s, ", brw_reg_type_to_letters(inst->dst.type)); in dump_instruction_to_file()
2263 for (int i = 0; i < inst->sources; i++) { in dump_instruction_to_file()
2264 if (inst->src[i].negate) in dump_instruction_to_file()
2265 fprintf(file, "-"); in dump_instruction_to_file()
2266 if (inst->src[i].abs) in dump_instruction_to_file()
2268 switch (inst->src[i].file) { in dump_instruction_to_file()
2270 fprintf(file, "vgrf%d", inst->src[i].nr); in dump_instruction_to_file()
2273 fprintf(file, "g%d", inst->src[i].nr); in dump_instruction_to_file()
2276 fprintf(file, "attr%d", inst->src[i].nr); in dump_instruction_to_file()
2279 fprintf(file, "u%d", inst->src[i].nr); in dump_instruction_to_file()
2285 switch (inst->src[i].type) { in dump_instruction_to_file()
2287 fprintf(file, "%-ghf", _mesa_half_to_float(inst->src[i].ud & 0xffff)); in dump_instruction_to_file()
2290 fprintf(file, "%-gf", inst->src[i].f); in dump_instruction_to_file()
2293 fprintf(file, "%fdf", inst->src[i].df); in dump_instruction_to_file()
2297 fprintf(file, "%dd", inst->src[i].d); in dump_instruction_to_file()
2301 fprintf(file, "%uu", inst->src[i].ud); in dump_instruction_to_file()
2304 fprintf(file, "%" PRId64 "q", inst->src[i].d64); in dump_instruction_to_file()
2307 fprintf(file, "%" PRIu64 "uq", inst->src[i].u64); in dump_instruction_to_file()
2310 fprintf(file, "[%-gF, %-gF, %-gF, %-gF]", in dump_instruction_to_file()
2311 brw_vf_to_float((inst->src[i].ud >> 0) & 0xff), in dump_instruction_to_file()
2312 brw_vf_to_float((inst->src[i].ud >> 8) & 0xff), in dump_instruction_to_file()
2313 brw_vf_to_float((inst->src[i].ud >> 16) & 0xff), in dump_instruction_to_file()
2314 brw_vf_to_float((inst->src[i].ud >> 24) & 0xff)); in dump_instruction_to_file()
2318 fprintf(file, "%08x%s", inst->src[i].ud, in dump_instruction_to_file()
2319 inst->src[i].type == BRW_REGISTER_TYPE_V ? "V" : "UV"); in dump_instruction_to_file()
2327 switch (inst->src[i].nr) { in dump_instruction_to_file()
2332 fprintf(file, "a0.%d", inst->src[i].subnr); in dump_instruction_to_file()
2335 fprintf(file, "acc%d", inst->src[i].subnr); in dump_instruction_to_file()
2338 fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); in dump_instruction_to_file()
2341 fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); in dump_instruction_to_file()
2347 if (inst->src[i].offset || in dump_instruction_to_file()
2348 (inst->src[i].file == VGRF && in dump_instruction_to_file()
2349 alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) { in dump_instruction_to_file()
2350 const unsigned reg_size = (inst->src[i].file == UNIFORM ? 4 : REG_SIZE); in dump_instruction_to_file()
2351 fprintf(file, "+%d.%d", inst->src[i].offset / reg_size, in dump_instruction_to_file()
2352 inst->src[i].offset % reg_size); in dump_instruction_to_file()
2355 if (inst->src[i].abs) in dump_instruction_to_file()
2358 if (inst->src[i].file != IMM) { in dump_instruction_to_file()
2360 if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) { in dump_instruction_to_file()
2361 unsigned hstride = inst->src[i].hstride; in dump_instruction_to_file()
2362 stride = (hstride == 0 ? 0 : (1 << (hstride - 1))); in dump_instruction_to_file()
2364 stride = inst->src[i].stride; in dump_instruction_to_file()
2369 fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type)); in dump_instruction_to_file()
2372 if (i < inst->sources - 1 && inst->src[i + 1].file != BAD_FILE) in dump_instruction_to_file()
2378 if (inst->force_writemask_all) in dump_instruction_to_file()
2381 if (inst->exec_size != dispatch_width) in dump_instruction_to_file()
2382 fprintf(file, "group%d ", inst->group); in dump_instruction_to_file()
2389 const fs_live_variables &live = v->live_analysis.require(); in register_pressure()
2390 const unsigned num_instructions = v->cfg->num_blocks ? in register_pressure()
2391 v->cfg->blocks[v->cfg->num_blocks - 1]->end_ip + 1 : 0; in register_pressure()
2395 for (unsigned reg = 0; reg < v->alloc.count; reg++) { in register_pressure()
2397 regs_live_at_ip[ip] += v->alloc.sizes[reg]; in register_pressure()
2400 const unsigned payload_count = v->first_non_payload_grf; in register_pressure()
2403 v->calculate_payload_ranges(payload_count, payload_last_use_ip); in register_pressure()
2435 int ret = asprintf(&filename, "%s/%s%d-%s-%02d-%02d-%s", in debug_optimizer()
2437 _mesa_shader_stage_to_abbrev(stage), dispatch_width, nir->info.name, in debug_optimizer()
2439 if (ret == -1) in debug_optimizer()
2464 int num_insts = cfg->last_block()->end_ip + 1; in save_instruction_order()
2469 assert(ip >= block->start_ip && ip <= block->end_ip); in save_instruction_order()
2480 ASSERTED int num_insts = cfg->last_block()->end_ip + 1; in restore_instruction_order()
2484 block->instructions.make_empty(); in restore_instruction_order()
2486 assert(ip == block->start_ip); in restore_instruction_order()
2487 for (; ip <= block->end_ip; ip++) in restore_instruction_order()
2488 block->instructions.push_tail(inst_arr[ip]); in restore_instruction_order()
2506 [SCHEDULE_PRE] = "top-down", in allocate_registers()
2507 [SCHEDULE_PRE_NON_LIFO] = "non-lifo", in allocate_registers()
2543 this->shader_stats.scheduler_mode = scheduler_mode_name[sched_mode]; in allocate_registers()
2584 fprintf(stderr, "Spilling - using lowest-pressure mode \"%s\"\n", in allocate_registers()
2621 prog_data->total_scratch = MAX2(brw_get_scratch_size(last_scratch), in allocate_registers()
2622 prog_data->total_scratch); in allocate_registers()
2631 * See 3D-Media-GPGPU Engine > Media GPGPU Pipeline > in allocate_registers()
2634 assert(prog_data->total_scratch < max_scratch_size); in allocate_registers()
2674 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; in set_tcs_invocation_id()
2678 (devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) : in set_tcs_invocation_id()
2679 (devinfo->ver >= 11) ? INTEL_MASK(22, 16) : in set_tcs_invocation_id()
2682 (devinfo->verx10 >= 125) ? 0 : (devinfo->ver >= 11) ? 16 : 17; in set_tcs_invocation_id()
2695 if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH) { in set_tcs_invocation_id()
2701 assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH); in set_tcs_invocation_id()
2708 if (tcs_prog_data->instances == 1) { in set_tcs_invocation_id()
2712 bld.SHR(instance_times_8, t, brw_imm_ud(instance_id_shift - 3)); in set_tcs_invocation_id()
2741 inst->eot = true; in emit_tcs_thread_end()
2752 assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH || in run_tcs()
2753 vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH); in run_tcs()
2761 vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH && in run_tcs()
2762 (nir->info.tess.tcs_vertices_out % 8) != 0; in run_tcs()
2767 brw_imm_ud(nir->info.tess.tcs_vertices_out), BRW_CONDITIONAL_L); in run_tcs()
2835 this->final_gs_vertex_count = vgrf(glsl_uint_type()); in run_gs()
2837 if (gs_compile->control_data_header_size_bits > 0) { in run_gs()
2839 this->control_data_bits = vgrf(glsl_uint_type()); in run_gs()
2845 if (gs_compile->control_data_header_size_bits <= 32) { in run_gs()
2848 abld.MOV(this->control_data_bits, brw_imm_ud(0u)); in run_gs()
2878 * only header phases (R0-R2)
2880 * WA: Enable a non-header phase (e.g. push constant) when dispatch would
2890 if (wm_prog_data->num_varying_inputs) in gfx9_ps_header_only_workaround()
2893 if (wm_prog_data->base.curb_read_length) in gfx9_ps_header_only_workaround()
2896 wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0; in gfx9_ps_header_only_workaround()
2897 wm_prog_data->num_varying_inputs = 1; in gfx9_ps_header_only_workaround()
2905 struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data); in run_fs()
2906 brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key; in run_fs()
2917 if (nir->info.inputs_read > 0 || in run_fs()
2918 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) || in run_fs()
2919 (nir->info.outputs_read > 0 && !wm_key->coherent_fb_fetch)) { in run_fs()
2923 /* We handle discards by keeping track of the still-live pixels in f0.1. in run_fs()
2926 if (wm_prog_data->uses_kill) { in run_fs()
2935 devinfo->ver >= 20 ? xe2_vec1_grf(i, 15) : in run_fs()
2943 if (nir->info.writes_memory) in run_fs()
2944 wm_prog_data->has_side_effects = true; in run_fs()
2959 if (devinfo->ver == 9) in run_fs()
2982 if (devinfo->platform == INTEL_PLATFORM_HSW && prog_data->total_shared > 0) { in run_cs()
3110 if (nir_src_parent_instr(src)->type != nir_instr_type_intrinsic) in is_used_in_not_interp_frag_coord()
3114 if (intrin->intrinsic != nir_intrinsic_load_frag_coord) in is_used_in_not_interp_frag_coord()
3138 if (instr->type != nir_instr_type_intrinsic) in brw_compute_barycentric_interp_modes()
3142 switch (intrin->intrinsic) { in brw_compute_barycentric_interp_modes()
3154 if (!is_used_in_not_interp_frag_coord(&intrin->def)) in brw_compute_barycentric_interp_modes()
3157 nir_intrinsic_op bary_op = intrin->intrinsic; in brw_compute_barycentric_interp_modes()
3163 if (devinfo->needs_unlit_centroid_workaround && in brw_compute_barycentric_interp_modes()
3177 prog_data->flat_inputs = 0; in brw_compute_flat_inputs()
3181 if (var->data.interpolation != INTERP_MODE_FLAT) in brw_compute_flat_inputs()
3184 if (var->data.per_primitive) in brw_compute_flat_inputs()
3187 unsigned slots = glsl_count_attribute_slots(var->type, false); in brw_compute_flat_inputs()
3189 int input_index = prog_data->urb_setup[var->data.location + s]; in brw_compute_flat_inputs()
3192 prog_data->flat_inputs |= 1 << input_index; in brw_compute_flat_inputs()
3200 if (shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { in computed_depth_mode()
3201 switch (shader->info.fs.depth_layout) { in computed_depth_mode()
3227 * Move load_interpolated_input with simple (payload-based) barycentric modes
3230 * This works around CSE not being able to handle non-dominating cases
3256 if (instr->type != nir_instr_type_intrinsic) in brw_nir_move_interpolation_to_top()
3260 if (intrin->intrinsic != nir_intrinsic_load_interpolated_input) in brw_nir_move_interpolation_to_top()
3263 nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr); in brw_nir_move_interpolation_to_top()
3264 nir_intrinsic_op op = bary_intrinsic->intrinsic; in brw_nir_move_interpolation_to_top()
3272 &bary_intrinsic->instr, in brw_nir_move_interpolation_to_top()
3273 intrin->src[1].ssa->parent_instr, in brw_nir_move_interpolation_to_top()
3278 if (move[i]->block != top) { in brw_nir_move_interpolation_to_top()
3303 prog_data->uses_kill = shader->info.fs.uses_discard || in brw_nir_populate_wm_prog_data()
3304 shader->info.fs.uses_demote; in brw_nir_populate_wm_prog_data()
3305 prog_data->uses_omask = !key->ignore_sample_mask_out && in brw_nir_populate_wm_prog_data()
3306 (shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)); in brw_nir_populate_wm_prog_data()
3307 prog_data->color_outputs_written = key->color_outputs_valid; in brw_nir_populate_wm_prog_data()
3308 prog_data->max_polygons = 1; in brw_nir_populate_wm_prog_data()
3309 prog_data->computed_depth_mode = computed_depth_mode(shader); in brw_nir_populate_wm_prog_data()
3310 prog_data->computed_stencil = in brw_nir_populate_wm_prog_data()
3311 shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); in brw_nir_populate_wm_prog_data()
3313 prog_data->sample_shading = in brw_nir_populate_wm_prog_data()
3314 shader->info.fs.uses_sample_shading || in brw_nir_populate_wm_prog_data()
3315 shader->info.outputs_read; in brw_nir_populate_wm_prog_data()
3317 assert(key->multisample_fbo != BRW_NEVER || in brw_nir_populate_wm_prog_data()
3318 key->persample_interp == BRW_NEVER); in brw_nir_populate_wm_prog_data()
3320 prog_data->persample_dispatch = key->persample_interp; in brw_nir_populate_wm_prog_data()
3321 if (prog_data->sample_shading) in brw_nir_populate_wm_prog_data()
3322 prog_data->persample_dispatch = BRW_ALWAYS; in brw_nir_populate_wm_prog_data()
3325 prog_data->persample_dispatch = MIN2(prog_data->persample_dispatch, in brw_nir_populate_wm_prog_data()
3326 key->multisample_fbo); in brw_nir_populate_wm_prog_data()
3332 prog_data->alpha_to_coverage = key->alpha_to_coverage; in brw_nir_populate_wm_prog_data()
3333 assert(prog_data->alpha_to_coverage != BRW_SOMETIMES || in brw_nir_populate_wm_prog_data()
3334 prog_data->persample_dispatch == BRW_SOMETIMES); in brw_nir_populate_wm_prog_data()
3336 prog_data->uses_sample_mask = in brw_nir_populate_wm_prog_data()
3337 BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); in brw_nir_populate_wm_prog_data()
3345 * per-sample dispatch. If we need gl_SamplePosition and we don't have in brw_nir_populate_wm_prog_data()
3346 * persample dispatch, we hard-code it to 0.5. in brw_nir_populate_wm_prog_data()
3348 prog_data->uses_pos_offset = in brw_nir_populate_wm_prog_data()
3349 prog_data->persample_dispatch != BRW_NEVER && in brw_nir_populate_wm_prog_data()
3350 (BITSET_TEST(shader->info.system_values_read, in brw_nir_populate_wm_prog_data()
3352 BITSET_TEST(shader->info.system_values_read, in brw_nir_populate_wm_prog_data()
3355 prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests; in brw_nir_populate_wm_prog_data()
3356 prog_data->post_depth_coverage = shader->info.fs.post_depth_coverage; in brw_nir_populate_wm_prog_data()
3357 prog_data->inner_coverage = shader->info.fs.inner_coverage; in brw_nir_populate_wm_prog_data()
3359 prog_data->barycentric_interp_modes = in brw_nir_populate_wm_prog_data()
3365 * Sample or Non- perspective Sample barycentric coordinates." in brw_nir_populate_wm_prog_data()
3370 if (prog_data->persample_dispatch == BRW_NEVER) { in brw_nir_populate_wm_prog_data()
3371 prog_data->barycentric_interp_modes &= in brw_nir_populate_wm_prog_data()
3375 prog_data->uses_nonperspective_interp_modes |= in brw_nir_populate_wm_prog_data()
3376 (prog_data->barycentric_interp_modes & in brw_nir_populate_wm_prog_data()
3386 assert(!key->coarse_pixel || key->persample_interp != BRW_ALWAYS); in brw_nir_populate_wm_prog_data()
3388 prog_data->coarse_pixel_dispatch = in brw_nir_populate_wm_prog_data()
3389 brw_sometimes_invert(prog_data->persample_dispatch); in brw_nir_populate_wm_prog_data()
3390 if (!key->coarse_pixel || in brw_nir_populate_wm_prog_data()
3391 prog_data->uses_omask || in brw_nir_populate_wm_prog_data()
3392 prog_data->sample_shading || in brw_nir_populate_wm_prog_data()
3393 prog_data->uses_sample_mask || in brw_nir_populate_wm_prog_data()
3394 (prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF) || in brw_nir_populate_wm_prog_data()
3395 prog_data->computed_stencil) { in brw_nir_populate_wm_prog_data()
3396 prog_data->coarse_pixel_dispatch = BRW_NEVER; in brw_nir_populate_wm_prog_data()
3403 * pixel-rate evaluation is requested : in brw_nir_populate_wm_prog_data()
3412 * coarse-rate evaluation is requested : in brw_nir_populate_wm_prog_data()
3426 prog_data->coarse_pixel_dispatch = BRW_NEVER; in brw_nir_populate_wm_prog_data()
3431 prog_data->uses_vmask = devinfo->verx10 < 125 || in brw_nir_populate_wm_prog_data()
3432 shader->info.fs.needs_quad_helper_invocations || in brw_nir_populate_wm_prog_data()
3433 shader->info.uses_wide_subgroup_intrinsics || in brw_nir_populate_wm_prog_data()
3434 prog_data->coarse_pixel_dispatch != BRW_NEVER; in brw_nir_populate_wm_prog_data()
3436 prog_data->uses_src_w = in brw_nir_populate_wm_prog_data()
3437 BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); in brw_nir_populate_wm_prog_data()
3438 prog_data->uses_src_depth = in brw_nir_populate_wm_prog_data()
3439 BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) && in brw_nir_populate_wm_prog_data()
3440 prog_data->coarse_pixel_dispatch != BRW_ALWAYS; in brw_nir_populate_wm_prog_data()
3441 prog_data->uses_depth_w_coefficients = in brw_nir_populate_wm_prog_data()
3442 BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) && in brw_nir_populate_wm_prog_data()
3443 prog_data->coarse_pixel_dispatch != BRW_NEVER; in brw_nir_populate_wm_prog_data()
3450 * Pre-gfx6, the register file of the EUs was shared between threads,
3451 * and each thread used some subset allocated on a 16-register block
3457 return ALIGN(reg_count, 16) / 16 - 1; in brw_register_blocks()
3464 struct nir_shader *nir = params->base.nir; in brw_compile_fs()
3465 const struct brw_wm_prog_key *key = params->key; in brw_compile_fs()
3466 struct brw_wm_prog_data *prog_data = params->prog_data; in brw_compile_fs()
3467 bool allow_spilling = params->allow_spilling; in brw_compile_fs()
3469 brw_should_print_shader(nir, params->base.debug_flag ? in brw_compile_fs()
3470 params->base.debug_flag : DEBUG_WM); in brw_compile_fs()
3472 prog_data->base.stage = MESA_SHADER_FRAGMENT; in brw_compile_fs()
3473 prog_data->base.ray_queries = nir->info.ray_queries; in brw_compile_fs()
3474 prog_data->base.total_scratch = 0; in brw_compile_fs()
3476 const struct intel_device_info *devinfo = compiler->devinfo; in brw_compile_fs()
3479 brw_nir_apply_key(nir, compiler, &key->base, max_subgroup_size); in brw_compile_fs()
3483 /* From the SKL PRM, Volume 7, "Alpha Coverage": in brw_compile_fs()
3487 if (key->alpha_to_coverage != BRW_NEVER) { in brw_compile_fs()
3498 key->base.robust_flags); in brw_compile_fs()
3500 brw_nir_populate_wm_prog_data(nir, compiler->devinfo, key, prog_data, in brw_compile_fs()
3501 params->mue_map); in brw_compile_fs()
3503 std::unique_ptr<fs_visitor> v8, v16, v32, vmulti; in brw_compile_fs() local
3509 if (devinfo->ver < 20) { in brw_compile_fs()
3510 v8 = std::make_unique<fs_visitor>(compiler, ¶ms->base, key, in brw_compile_fs()
3512 params->base.stats != NULL, in brw_compile_fs()
3514 if (!v8->run_fs(allow_spilling, false /* do_rep_send */)) { in brw_compile_fs()
3515 params->base.error_str = ralloc_strdup(params->base.mem_ctx, in brw_compile_fs()
3516 v8->fail_msg); in brw_compile_fs()
3519 simd8_cfg = v8->cfg; in brw_compile_fs()
3521 assert(v8->payload().num_regs % reg_unit(devinfo) == 0); in brw_compile_fs()
3522 prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs / reg_unit(devinfo); in brw_compile_fs()
3524 prog_data->reg_blocks_8 = brw_register_blocks(v8->grf_used); in brw_compile_fs()
3525 const performance &perf = v8->performance_analysis.require(); in brw_compile_fs()
3527 has_spilled = v8->spilled_any_registers; in brw_compile_fs()
3532 if (key->coarse_pixel && devinfo->ver < 20) { in brw_compile_fs()
3533 if (prog_data->dual_src_blend) { in brw_compile_fs()
3534 v8->limit_dispatch_width(8, "SIMD16 coarse pixel shading cannot" in brw_compile_fs()
3537 v8->limit_dispatch_width(16, "SIMD32 not supported with coarse" in brw_compile_fs()
3541 if (nir->info.ray_queries > 0 && v8) in brw_compile_fs()
3542 v8->limit_dispatch_width(16, "SIMD32 with ray queries.\n"); in brw_compile_fs()
3545 (!v8 || v8->max_dispatch_width >= 16) && in brw_compile_fs()
3546 (INTEL_SIMD(FS, 16) || params->use_rep_send)) { in brw_compile_fs()
3548 v16 = std::make_unique<fs_visitor>(compiler, ¶ms->base, key, in brw_compile_fs()
3550 params->base.stats != NULL, in brw_compile_fs()
3552 if (v8) in brw_compile_fs()
3553 v16->import_uniforms(v8.get()); in brw_compile_fs()
3554 if (!v16->run_fs(allow_spilling, params->use_rep_send)) { in brw_compile_fs()
3555 brw_shader_perf_log(compiler, params->base.log_data, in brw_compile_fs()
3557 v16->fail_msg); in brw_compile_fs()
3559 simd16_cfg = v16->cfg; in brw_compile_fs()
3561 assert(v16->payload().num_regs % reg_unit(devinfo) == 0); in brw_compile_fs()
3562 prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs / reg_unit(devinfo); in brw_compile_fs()
3564 prog_data->reg_blocks_16 = brw_register_blocks(v16->grf_used); in brw_compile_fs()
3565 const performance &perf = v16->performance_analysis.require(); in brw_compile_fs()
3567 has_spilled = v16->spilled_any_registers; in brw_compile_fs()
3576 (!v8 || v8->max_dispatch_width >= 32) && in brw_compile_fs()
3577 (!v16 || v16->max_dispatch_width >= 32) && !params->use_rep_send && in brw_compile_fs()
3581 v32 = std::make_unique<fs_visitor>(compiler, ¶ms->base, key, in brw_compile_fs()
3583 params->base.stats != NULL, in brw_compile_fs()
3585 if (v8) in brw_compile_fs()
3586 v32->import_uniforms(v8.get()); in brw_compile_fs()
3588 v32->import_uniforms(v16.get()); in brw_compile_fs()
3590 if (!v32->run_fs(allow_spilling, false)) { in brw_compile_fs()
3591 brw_shader_perf_log(compiler, params->base.log_data, in brw_compile_fs()
3593 v32->fail_msg); in brw_compile_fs()
3595 const performance &perf = v32->performance_analysis.require(); in brw_compile_fs()
3598 brw_shader_perf_log(compiler, params->base.log_data, in brw_compile_fs()
3601 simd32_cfg = v32->cfg; in brw_compile_fs()
3603 assert(v32->payload().num_regs % reg_unit(devinfo) == 0); in brw_compile_fs()
3604 prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs / reg_unit(devinfo); in brw_compile_fs()
3606 prog_data->reg_blocks_32 = brw_register_blocks(v32->grf_used); in brw_compile_fs()
3612 if (devinfo->ver >= 12 && !has_spilled && in brw_compile_fs()
3613 params->max_polygons >= 2 && !key->coarse_pixel) { in brw_compile_fs()
3614 fs_visitor *vbase = v8 ? v8.get() : v16 ? v16.get() : v32.get(); in brw_compile_fs()
3617 if (devinfo->ver >= 20 && in brw_compile_fs()
3618 params->max_polygons >= 4 && in brw_compile_fs()
3619 vbase->max_dispatch_width >= 32 && in brw_compile_fs()
3620 4 * prog_data->num_varying_inputs <= MAX_VARYING && in brw_compile_fs()
3622 /* Try a quad-SIMD8 compile */ in brw_compile_fs()
3623 vmulti = std::make_unique<fs_visitor>(compiler, ¶ms->base, key, in brw_compile_fs()
3625 params->base.stats != NULL, in brw_compile_fs()
3627 vmulti->import_uniforms(vbase); in brw_compile_fs()
3628 if (!vmulti->run_fs(false, params->use_rep_send)) { in brw_compile_fs()
3629 brw_shader_perf_log(compiler, params->base.log_data, in brw_compile_fs()
3630 "Quad-SIMD8 shader failed to compile: %s\n", in brw_compile_fs()
3631 vmulti->fail_msg); in brw_compile_fs()
3633 multi_cfg = vmulti->cfg; in brw_compile_fs()
3634 assert(!vmulti->spilled_any_registers); in brw_compile_fs()
3638 if (!multi_cfg && devinfo->ver >= 20 && in brw_compile_fs()
3639 vbase->max_dispatch_width >= 32 && in brw_compile_fs()
3640 2 * prog_data->num_varying_inputs <= MAX_VARYING && in brw_compile_fs()
3642 /* Try a dual-SIMD16 compile */ in brw_compile_fs()
3643 vmulti = std::make_unique<fs_visitor>(compiler, ¶ms->base, key, in brw_compile_fs()
3645 params->base.stats != NULL, in brw_compile_fs()
3647 vmulti->import_uniforms(vbase); in brw_compile_fs()
3648 if (!vmulti->run_fs(false, params->use_rep_send)) { in brw_compile_fs()
3649 brw_shader_perf_log(compiler, params->base.log_data, in brw_compile_fs()
3650 "Dual-SIMD16 shader failed to compile: %s\n", in brw_compile_fs()
3651 vmulti->fail_msg); in brw_compile_fs()
3653 multi_cfg = vmulti->cfg; in brw_compile_fs()
3654 assert(!vmulti->spilled_any_registers); in brw_compile_fs()
3658 if (!multi_cfg && vbase->max_dispatch_width >= 16 && in brw_compile_fs()
3659 2 * prog_data->num_varying_inputs <= MAX_VARYING && in brw_compile_fs()
3661 /* Try a dual-SIMD8 compile */ in brw_compile_fs()
3662 vmulti = std::make_unique<fs_visitor>(compiler, ¶ms->base, key, in brw_compile_fs()
3664 params->base.stats != NULL, in brw_compile_fs()
3666 vmulti->import_uniforms(vbase); in brw_compile_fs()
3667 if (!vmulti->run_fs(allow_spilling, params->use_rep_send)) { in brw_compile_fs()
3668 brw_shader_perf_log(compiler, params->base.log_data, in brw_compile_fs()
3669 "Dual-SIMD8 shader failed to compile: %s\n", in brw_compile_fs()
3670 vmulti->fail_msg); in brw_compile_fs()
3672 multi_cfg = vmulti->cfg; in brw_compile_fs()
3677 assert(vmulti->payload().num_regs % reg_unit(devinfo) == 0); in brw_compile_fs()
3678 prog_data->base.dispatch_grf_start_reg = vmulti->payload().num_regs / reg_unit(devinfo); in brw_compile_fs()
3680 prog_data->reg_blocks_8 = brw_register_blocks(vmulti->grf_used); in brw_compile_fs()
3684 /* When the caller requests a repclear shader, they want SIMD16-only */ in brw_compile_fs()
3685 if (params->use_rep_send) in brw_compile_fs()
3688 fs_generator g(compiler, ¶ms->base, &prog_data->base, in brw_compile_fs()
3692 g.enable_debug(ralloc_asprintf(params->base.mem_ctx, in brw_compile_fs()
3694 nir->info.label ? in brw_compile_fs()
3695 nir->info.label : "unnamed", in brw_compile_fs()
3696 nir->info.name)); in brw_compile_fs()
3699 struct brw_compile_stats *stats = params->base.stats; in brw_compile_fs()
3703 prog_data->dispatch_multi = vmulti->dispatch_width; in brw_compile_fs()
3704 prog_data->max_polygons = vmulti->max_polygons; in brw_compile_fs()
3705 g.generate_code(multi_cfg, vmulti->dispatch_width, vmulti->shader_stats, in brw_compile_fs()
3706 vmulti->performance_analysis.require(), in brw_compile_fs()
3707 stats, vmulti->max_polygons); in brw_compile_fs()
3709 max_dispatch_width = vmulti->dispatch_width; in brw_compile_fs()
3712 prog_data->dispatch_8 = true; in brw_compile_fs()
3713 g.generate_code(simd8_cfg, 8, v8->shader_stats, in brw_compile_fs()
3714 v8->performance_analysis.require(), stats, 1); in brw_compile_fs()
3720 prog_data->dispatch_16 = true; in brw_compile_fs()
3721 prog_data->prog_offset_16 = g.generate_code( in brw_compile_fs()
3722 simd16_cfg, 16, v16->shader_stats, in brw_compile_fs()
3723 v16->performance_analysis.require(), stats, 1); in brw_compile_fs()
3729 prog_data->dispatch_32 = true; in brw_compile_fs()
3730 prog_data->prog_offset_32 = g.generate_code( in brw_compile_fs()
3731 simd32_cfg, 32, v32->shader_stats, in brw_compile_fs()
3732 v32->performance_analysis.require(), stats, 1); in brw_compile_fs()
3737 for (struct brw_compile_stats *s = params->base.stats; s != NULL && s != stats; s++) in brw_compile_fs()
3738 s->max_dispatch_width = max_dispatch_width; in brw_compile_fs()
3740 g.add_const_data(nir->constant_data, nir->constant_data_size); in brw_compile_fs()
3748 assert(cs_prog_data->push.per_thread.size % REG_SIZE == 0); in brw_cs_push_const_total_size()
3749 assert(cs_prog_data->push.cross_thread.size % REG_SIZE == 0); in brw_cs_push_const_total_size()
3750 return cs_prog_data->push.per_thread.size * threads + in brw_cs_push_const_total_size()
3751 cs_prog_data->push.cross_thread.size; in brw_cs_push_const_total_size()
3757 block->dwords = dwords; in fill_push_const_block_info()
3758 block->regs = DIV_ROUND_UP(dwords, 8); in fill_push_const_block_info()
3759 block->size = block->regs * 32; in fill_push_const_block_info()
3766 const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; in cs_fill_push_const_info()
3770 assert(subgroup_id_index == -1 || in cs_fill_push_const_info()
3771 subgroup_id_index == (int)prog_data->nr_params - 1); in cs_fill_push_const_info()
3775 /* Fill all but the last register with cross-thread payload */ in cs_fill_push_const_info()
3777 per_thread_dwords = prog_data->nr_params - cross_thread_dwords; in cs_fill_push_const_info()
3780 /* Fill all data using cross-thread payload */ in cs_fill_push_const_info()
3781 cross_thread_dwords = prog_data->nr_params; in cs_fill_push_const_info()
3785 fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords); in cs_fill_push_const_info()
3786 fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords); in cs_fill_push_const_info()
3788 assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 || in cs_fill_push_const_info()
3789 cs_prog_data->push.per_thread.size == 0); in cs_fill_push_const_info()
3790 assert(cs_prog_data->push.cross_thread.dwords + in cs_fill_push_const_info()
3791 cs_prog_data->push.per_thread.dwords == in cs_fill_push_const_info()
3792 prog_data->nr_params); in cs_fill_push_const_info()
3798 if (instr->type != nir_instr_type_intrinsic) in filter_simd()
3801 switch (nir_instr_as_intrinsic(instr)->intrinsic) { in filter_simd()
3816 switch (nir_instr_as_intrinsic(instr)->intrinsic) { in lower_simd()
3824 if (!b->shader->info.workgroup_size_variable) { in lower_simd()
3825 unsigned local_workgroup_size = b->shader->info.workgroup_size[0] * in lower_simd()
3826 b->shader->info.workgroup_size[1] * in lower_simd()
3827 b->shader->info.workgroup_size[2]; in lower_simd()
3849 const nir_shader *nir = params->base.nir; in brw_compile_cs()
3850 const struct brw_cs_prog_key *key = params->key; in brw_compile_cs()
3851 struct brw_cs_prog_data *prog_data = params->prog_data; in brw_compile_cs()
3854 brw_should_print_shader(nir, params->base.debug_flag ? in brw_compile_cs()
3855 params->base.debug_flag : DEBUG_CS); in brw_compile_cs()
3857 prog_data->base.stage = MESA_SHADER_COMPUTE; in brw_compile_cs()
3858 prog_data->base.total_shared = nir->info.shared_size; in brw_compile_cs()
3859 prog_data->base.ray_queries = nir->info.ray_queries; in brw_compile_cs()
3860 prog_data->base.total_scratch = 0; in brw_compile_cs()
3862 if (!nir->info.workgroup_size_variable) { in brw_compile_cs()
3863 prog_data->local_size[0] = nir->info.workgroup_size[0]; in brw_compile_cs()
3864 prog_data->local_size[1] = nir->info.workgroup_size[1]; in brw_compile_cs()
3865 prog_data->local_size[2] = nir->info.workgroup_size[2]; in brw_compile_cs()
3869 .devinfo = compiler->devinfo, in brw_compile_cs()
3871 .required_width = brw_required_dispatch_width(&nir->info), in brw_compile_cs()
3882 nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir); in brw_compile_cs()
3883 brw_nir_apply_key(shader, compiler, &key->base, in brw_compile_cs()
3893 key->base.robust_flags); in brw_compile_cs()
3895 v[simd] = std::make_unique<fs_visitor>(compiler, ¶ms->base, in brw_compile_cs()
3896 &key->base, in brw_compile_cs()
3897 &prog_data->base, in brw_compile_cs()
3899 params->base.stats != NULL, in brw_compile_cs()
3904 v[simd]->import_uniforms(v[first].get()); in brw_compile_cs()
3906 const bool allow_spilling = first < 0 || nir->info.workgroup_size_variable; in brw_compile_cs()
3908 if (v[simd]->run_cs(allow_spilling)) { in brw_compile_cs()
3909 cs_fill_push_const_info(compiler->devinfo, prog_data); in brw_compile_cs()
3911 brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers); in brw_compile_cs()
3913 simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx, v[simd]->fail_msg); in brw_compile_cs()
3915 brw_shader_perf_log(compiler, params->base.log_data, in brw_compile_cs()
3917 dispatch_width, v[simd]->fail_msg); in brw_compile_cs()
3924 params->base.error_str = in brw_compile_cs()
3925 ralloc_asprintf(params->base.mem_ctx, in brw_compile_cs()
3935 if (!nir->info.workgroup_size_variable) in brw_compile_cs()
3936 prog_data->prog_mask = 1 << selected_simd; in brw_compile_cs()
3938 fs_generator g(compiler, ¶ms->base, &prog_data->base, in brw_compile_cs()
3941 char *name = ralloc_asprintf(params->base.mem_ctx, in brw_compile_cs()
3943 nir->info.label ? in brw_compile_cs()
3944 nir->info.label : "unnamed", in brw_compile_cs()
3945 nir->info.name); in brw_compile_cs()
3949 uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1); in brw_compile_cs()
3951 struct brw_compile_stats *stats = params->base.stats; in brw_compile_cs()
3953 if (prog_data->prog_mask & (1u << simd)) { in brw_compile_cs()
3955 prog_data->prog_offset[simd] = in brw_compile_cs()
3956 g.generate_code(v[simd]->cfg, 8u << simd, v[simd]->shader_stats, in brw_compile_cs()
3957 v[simd]->performance_analysis.require(), stats); in brw_compile_cs()
3959 stats->max_dispatch_width = max_dispatch_width; in brw_compile_cs()
3965 g.add_const_data(nir->constant_data, nir->constant_data_size); in brw_compile_cs()
3979 prog_data->local_size; in brw_cs_get_dispatch_info()
3988 const uint32_t remainder = info.group_size & (info.simd_size - 1); in brw_cs_get_dispatch_info()
3990 info.right_mask = ~0u >> (32 - remainder); in brw_cs_get_dispatch_info()
3992 info.right_mask = ~0u >> (32 - info.simd_size); in brw_cs_get_dispatch_info()
4009 prog_data->base.stage = shader->info.stage; in compile_single_bs()
4010 prog_data->max_stack_size = MAX2(prog_data->max_stack_size, in compile_single_bs()
4011 shader->scratch_size); in compile_single_bs()
4014 brw_nir_apply_key(shader, compiler, &key->base, max_dispatch_width); in compile_single_bs()
4016 key->base.robust_flags); in compile_single_bs()
4019 .devinfo = compiler->devinfo, in compile_single_bs()
4025 .required_width = compiler->devinfo->ver >= 20 ? 16u : 8u, in compile_single_bs()
4036 if (dispatch_width == 8 && compiler->devinfo->ver >= 20) in compile_single_bs()
4039 v[simd] = std::make_unique<fs_visitor>(compiler, ¶ms->base, in compile_single_bs()
4040 &key->base, in compile_single_bs()
4041 &prog_data->base, shader, in compile_single_bs()
4047 if (v[simd]->run_bs(allow_spilling)) { in compile_single_bs()
4048 brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers); in compile_single_bs()
4050 simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx, in compile_single_bs()
4051 v[simd]->fail_msg); in compile_single_bs()
4053 brw_shader_perf_log(compiler, params->base.log_data, in compile_single_bs()
4055 dispatch_width, v[simd]->fail_msg); in compile_single_bs()
4062 params->base.error_str = in compile_single_bs()
4063 ralloc_asprintf(params->base.mem_ctx, in compile_single_bs()
4074 const unsigned dispatch_width = selected->dispatch_width; in compile_single_bs()
4076 int offset = g->generate_code(selected->cfg, dispatch_width, selected->shader_stats, in compile_single_bs()
4077 selected->performance_analysis.require(), stats); in compile_single_bs()
4103 nir_shader *shader = params->base.nir; in brw_compile_bs()
4104 struct brw_bs_prog_data *prog_data = params->prog_data; in brw_compile_bs()
4105 unsigned num_resume_shaders = params->num_resume_shaders; in brw_compile_bs()
4106 nir_shader **resume_shaders = params->resume_shaders; in brw_compile_bs()
4109 prog_data->base.stage = shader->info.stage; in brw_compile_bs()
4110 prog_data->base.ray_queries = shader->info.ray_queries; in brw_compile_bs()
4111 prog_data->base.total_scratch = 0; in brw_compile_bs()
4113 prog_data->max_stack_size = 0; in brw_compile_bs()
4114 prog_data->num_resume_shaders = num_resume_shaders; in brw_compile_bs()
4116 fs_generator g(compiler, ¶ms->base, &prog_data->base, in brw_compile_bs()
4117 shader->info.stage); in brw_compile_bs()
4119 char *name = ralloc_asprintf(params->base.mem_ctx, in brw_compile_bs()
4121 shader->info.label ? in brw_compile_bs()
4122 shader->info.label : "unnamed", in brw_compile_bs()
4123 gl_shader_stage_name(shader->info.stage), in brw_compile_bs()
4124 shader->info.name); in brw_compile_bs()
4128 prog_data->simd_size = in brw_compile_bs()
4129 compile_single_bs(compiler, params, params->key, prog_data, in brw_compile_bs()
4130 shader, &g, params->base.stats, NULL); in brw_compile_bs()
4131 if (prog_data->simd_size == 0) in brw_compile_bs()
4134 uint64_t *resume_sbt = ralloc_array(params->base.mem_ctx, in brw_compile_bs()
4138 char *name = ralloc_asprintf(params->base.mem_ctx, in brw_compile_bs()
4140 shader->info.label ? in brw_compile_bs()
4141 shader->info.label : "unnamed", in brw_compile_bs()
4142 gl_shader_stage_name(shader->info.stage), in brw_compile_bs()
4143 i, shader->info.name); in brw_compile_bs()
4150 compile_single_bs(compiler, params, params->key, in brw_compile_bs()
4156 resume_sbt[i] = brw_bsr(compiler->devinfo, offset, simd_size, 0); in brw_compile_bs()
4163 assert(resume_shaders[i]->constant_data_size == in brw_compile_bs()
4164 shader->constant_data_size); in brw_compile_bs()
4165 assert(memcmp(resume_shaders[i]->constant_data, in brw_compile_bs()
4166 shader->constant_data, in brw_compile_bs()
4167 shader->constant_data_size) == 0); in brw_compile_bs()
4170 g.add_const_data(shader->constant_data, shader->constant_data_size); in brw_compile_bs()
4186 const gl_shader_stage stage = shader->stage; in brw_fs_test_dispatch_packing()
4189 brw_wm_prog_data(shader->stage_prog_data)->uses_vmask; in brw_fs_test_dispatch_packing()
4191 if (brw_stage_has_packed_dispatch(shader->devinfo, stage, in brw_fs_test_dispatch_packing()
4192 shader->max_polygons, in brw_fs_test_dispatch_packing()
4193 shader->stage_prog_data)) { in brw_fs_test_dispatch_packing()
4202 * form '2^n-1', in which case tmp will be non-zero. in brw_fs_test_dispatch_packing()
4215 return cs->local_size[0] * cs->local_size[1] * cs->local_size[2]; in workgroup_size()
4220 return INTEL_DEBUG(debug_flag) && (!shader->info.internal || NIR_DEBUG(PRINT_INTERNAL)); in brw_should_print_shader()
4258 else if (bld.shader->devinfo->ver >= 20) in fetch_barycentric_reg()
4286 inst->conditional_mod = BRW_CONDITIONAL_NZ; in check_dynamic_msaa_flag()