• Home
  • Raw
  • Download

Lines Matching +full:clz +full:- +full:optimizations

52         {"inorder",   BIFROST_DBG_INORDER, 	"Force in-order bundling"},
80 if (bi_is_null(b->shader->preloaded[reg])) { in bi_preload()
83 b_.cursor = bi_before_block(bi_start_block(&b->shader->blocks)); in bi_preload()
86 b->shader->preloaded[reg] = bi_mov_i32(&b_, bi_register(reg)); in bi_preload()
89 return b->shader->preloaded[reg]; in bi_preload()
95 if (bi_is_null(b->shader->coverage)) in bi_coverage()
96 b->shader->coverage = bi_preload(b, 60); in bi_coverage()
98 return b->shader->coverage; in bi_coverage()
109 return bi_preload(b, (b->shader->arch >= 9) ? 60 : 61); in bi_vertex_id()
115 return bi_preload(b, (b->shader->arch >= 9) ? 61 : 62); in bi_instance_id()
123 switch (instr->type) { in bi_emit_jump()
125 branch->branch_target = b->shader->break_block; in bi_emit_jump()
128 branch->branch_target = b->shader->continue_block; in bi_emit_jump()
134 bi_block_add_successor(b->shader->current_block, branch->branch_target); in bi_emit_jump()
135 b->shader->current_block->unconditional_jumps = true; in bi_emit_jump()
138 /* Builds a 64-bit hash table key for an index */
162 I->nr_dests = channel + 1; in bi_extract()
163 I->dest[channel] = bi_temp(b->shader); in bi_extract()
164 return I->dest[channel]; in bi_extract()
168 _mesa_hash_table_u64_search(b->shader->allocated_vec, in bi_extract()
188 bi_index *channels = ralloc_array(b->shader, bi_index, n); in bi_cache_collect()
191 _mesa_hash_table_u64_insert(b->shader->allocated_vec, in bi_cache_collect()
196 * Splits an n-component vector (vec) into n scalar destinations (dests) using a
197 * split pseudo-instruction.
199 * Pre-condition: dests is filled with bi_null().
206 dests[i] = bi_temp(b->shader); in bi_emit_split_i32()
214 I->nr_dests = n; in bi_emit_split_i32()
217 I->dest[j] = dests[j]; in bi_emit_split_i32()
231 * composed of 32-bit words, but it will be split at 32-bit word boundaries.
255 I->nr_srcs = n; in bi_emit_collect_to()
258 I->src[i] = chan[i]; in bi_emit_collect_to()
279 bi_index dst = bi_temp(b->shader); in bi_collect_v2i32()
287 switch (intr->intrinsic) { in bi_varying_src0_for_barycentric()
292 /* Need to put the sample ID in the top 16-bits */ in bi_varying_src0_for_barycentric()
295 bi_half(bi_src_index(&intr->src[0]), false)); in bi_varying_src0_for_barycentric()
298 * Y axes respectively, relative to top-left of pixel. In NIR, (0, 0) in bi_varying_src0_for_barycentric()
310 bi_index offset = bi_src_index(&intr->src[0]); in bi_varying_src0_for_barycentric()
312 unsigned sz = nir_src_bit_size(intr->src[0]); in bi_varying_src0_for_barycentric()
335 return b->shader->arch >= 9 ? bi_preload(b, 61) : bi_dontcare(b); in bi_varying_src0_for_barycentric()
356 /* auto, 64-bit omitted */
402 unsigned nr = instr->num_components; in bi_copy_component()
404 unsigned bitsize = nir_dest_bit_size(instr->dest); in bi_copy_component()
415 bi_make_vec_to(b, bi_dest_index(&instr->dest), in bi_copy_component()
416 srcs, channels, nr, nir_dest_bit_size(instr->dest)); in bi_copy_component()
426 enum bi_vecsize vecsize = (instr->num_components + component - 1); in bi_emit_load_attr()
431 bi_index dest = (component == 0) ? bi_dest_index(&instr->dest) : bi_temp(b->shader); in bi_emit_load_attr()
439 bi_index idx = bi_src_index(&instr->src[0]); in bi_emit_load_attr()
450 if (b->shader->arch >= 9) in bi_emit_load_attr()
451 I->table = PAN_TABLE_ATTRIBUTE; in bi_emit_load_attr()
466 uint32_t mask = ctx->inputs->fixed_varying_mask; in bi_varying_base_bytes()
470 unsigned general_index = (sem.location - VARYING_SLOT_VAR0); in bi_varying_base_bytes()
497 bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input; in bi_emit_load_vary()
501 enum bi_vecsize vecsize = (instr->num_components + component - 1); in bi_emit_load_vary()
502 bi_index dest = (component == 0) ? bi_dest_index(&instr->dest) : bi_temp(b->shader); in bi_emit_load_vary()
504 unsigned sz = nir_dest_bit_size(instr->dest); in bi_emit_load_vary()
507 nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]); in bi_emit_load_vary()
510 sample = bi_interp_for_intrinsic(parent->intrinsic); in bi_emit_load_vary()
523 if (b->shader->arch >= 9) in bi_emit_load_vary()
527 b->shader->info.bifrost->uses_flat_shading = true; in bi_emit_load_vary()
538 if (b->shader->malloc_idvs && immediate) { in bi_emit_load_vary()
542 bi_varying_offset(b->shader, instr)); in bi_emit_load_vary()
553 if (b->shader->malloc_idvs) { in bi_emit_load_vary()
558 unsigned vbase = bi_varying_base_bytes(b->shader, instr); in bi_emit_load_vary()
582 /* Valhall usually uses machine-allocated IDVS. If this is disabled, use in bi_emit_load_vary()
583 * a simple Midgard-style ABI. in bi_emit_load_vary()
585 if (b->shader->arch >= 9 && I != NULL) in bi_emit_load_vary()
586 I->table = PAN_TABLE_ATTRIBUTE; in bi_emit_load_vary()
641 unreachable("8-bit mkvec not yet supported"); in bi_make_vec_to()
651 if (b->shader->arch >= 9) { in bi_load_ubo_to()
653 I->seg = BI_SEG_UBO; in bi_load_ubo_to()
666 unsigned sysval_ubo = b->shader->inputs->fixed_sysval_ubo >= 0 ? in bi_load_sysval_to()
667 b->shader->inputs->fixed_sysval_ubo : in bi_load_sysval_to()
668 b->shader->nir->info.num_ubos; in bi_load_sysval_to()
670 pan_lookup_sysval(b->shader->sysval_to_id, in bi_load_sysval_to()
671 b->shader->info.sysvals, in bi_load_sysval_to()
683 bi_load_sysval_to(b, bi_dest_index(&intr->dest), in bi_load_sysval_nir()
684 panfrost_sysval_for_instr(&intr->instr, NULL), in bi_load_sysval_nir()
692 bi_index tmp = bi_temp(b->shader); in bi_load_sysval()
702 * as zero), so use a 5-bit mask instead of 8-bits */ in bi_load_sample_id_to()
711 bi_index sample_id = bi_temp(b->shader); in bi_load_sample_id()
732 if (b->shader->inputs->blend.nr_samples > 1) in bi_pixel_indices()
738 /* Source color is passed through r0-r3, or r4-r7 for the second source when
739 * dual-source blending. Preload the corresponding vector.
754 bi_emit_collect_to(b, bi_dest_index(&instr->dest), srcs, size == 32 ? 4 : 2); in bi_emit_load_blend_input()
762 bool bifrost = b->shader->arch <= 8; in bi_emit_blend_op()
769 const struct panfrost_compile_inputs *inputs = b->shader->inputs; in bi_emit_blend_op()
770 uint64_t blend_desc = inputs->blend.bifrost_blend_desc; in bi_emit_blend_op()
773 if (inputs->is_blend && inputs->blend.nr_samples > 1) { in bi_emit_blend_op()
779 } else if (b->shader->inputs->is_blend) { in bi_emit_blend_op()
780 uint64_t blend_desc = b->shader->inputs->blend.bifrost_blend_desc; in bi_emit_blend_op()
785 bi_blend_to(b, bifrost ? bi_temp(b->shader) : bi_null(), rgba, in bi_emit_blend_op()
795 bi_blend_to(b, bifrost ? bi_temp(b->shader) : bi_null(), rgba, in bi_emit_blend_op()
803 b->shader->info.bifrost->blend[rt].type = T; in bi_emit_blend_op()
806 b->shader->info.bifrost->blend_src1_type = T2; in bi_emit_blend_op()
811 * ATEST is not needed if early-z is forced, alpha-to-coverage is disabled, and
813 * all blit shaders, so we just care about early-z, which blit shaders force
819 return (ctx->inputs->is_blit && !emit_zs) || ctx->inputs->is_blend; in bi_skip_atest()
825 bi_instr *atest = bi_atest_to(b, bi_temp(b->shader), bi_coverage(b), alpha); in bi_emit_atest()
826 b->shader->emitted_atest = true; in bi_emit_atest()
827 b->shader->coverage = atest->dest[0]; in bi_emit_atest()
829 /* Pseudo-source to encode in the tuple */ in bi_emit_atest()
830 atest->src[2] = bi_fau(BIR_FAU_ATEST_PARAM, false); in bi_emit_atest()
836 bool combined = instr->intrinsic == in bi_emit_fragment_out()
846 nir_find_variable_with_driver_location(b->shader->nir, in bi_emit_fragment_out()
849 unsigned loc = var ? var->data.location : 0; in bi_emit_fragment_out()
851 bi_index src0 = bi_src_index(&instr->src[0]); in bi_emit_fragment_out()
860 b->shader->coverage = in bi_emit_fragment_out()
865 /* Emit ATEST if we have to, note ATEST requires a floating-point alpha in bi_emit_fragment_out()
867 * alpha value is only used for alpha-to-coverage, a stage which is in bi_emit_fragment_out()
870 if (!b->shader->emitted_atest && !bi_skip_atest(b->shader, emit_zs)) { in bi_emit_fragment_out()
873 bi_index rgba = bi_src_index(&instr->src[0]); in bi_emit_fragment_out()
879 /* Don't read out-of-bounds */ in bi_emit_fragment_out()
880 if (nir_src_num_components(instr->src[0]) < 4) in bi_emit_fragment_out()
890 z = bi_src_index(&instr->src[2]); in bi_emit_fragment_out()
893 s = bi_src_index(&instr->src[3]); in bi_emit_fragment_out()
895 b->shader->coverage = bi_zs_emit(b, z, s, bi_coverage(b), in bi_emit_fragment_out()
901 unsigned rt = loc ? (loc - FRAG_RESULT_DATA0) : 0; in bi_emit_fragment_out()
903 bi_index color = bi_src_index(&instr->src[0]); in bi_emit_fragment_out()
904 bi_index color2 = dual ? bi_src_index(&instr->src[4]) : bi_null(); in bi_emit_fragment_out()
907 /* Explicit copy since BLEND inputs are precoloured to R0-R3, in bi_emit_fragment_out()
912 nir_foreach_shader_out_variable(var, b->shader->nir) in bi_emit_fragment_out()
913 has_mrt |= (var->data.location > FRAG_RESULT_DATA0); in bi_emit_fragment_out()
918 color = bi_temp(b->shader); in bi_emit_fragment_out()
920 nir_src_num_components(instr->src[0]), in bi_emit_fragment_out()
928 if (b->shader->inputs->is_blend) { in bi_emit_fragment_out()
935 if (b->shader->arch >= 8) in bi_emit_fragment_out()
966 if (instr->type != nir_instr_type_intrinsic) in bifrost_nir_specialize_idvs()
971 if (intr->intrinsic != nir_intrinsic_store_output) in bifrost_nir_specialize_idvs()
985 /* In principle we can do better for 16-bit. At the moment we require in bi_emit_store_vary()
986 * 32-bit to permit the use of .auto, in order to force .u32 for flat in bi_emit_store_vary()
992 assert(T_size == 32 || (b->shader->arch >= 9 && T_size == 16)); in bi_emit_store_vary()
1007 bi_index data = bi_src_index(&instr->src[0]); in bi_emit_store_vary()
1016 assert(T_size == 32 && "todo: 16-bit trim"); in bi_emit_store_vary()
1019 split->nr_dests = nir_intrinsic_src_components(instr, 0); in bi_emit_store_vary()
1021 bi_index tmp = bi_temp(b->shader); in bi_emit_store_vary()
1023 collect->nr_srcs = nr; in bi_emit_store_vary()
1026 split->dest[w] = bi_temp(b->shader); in bi_emit_store_vary()
1027 collect->src[w] = split->dest[w]; in bi_emit_store_vary()
1037 if (b->shader->arch <= 8 && b->shader->idvs == BI_IDVS_POSITION) { in bi_emit_store_vary()
1041 unsigned identity = (b->shader->arch == 6) ? 0x688 : 0; in bi_emit_store_vary()
1046 bi_imm_u32(format), regfmt, nr - 1); in bi_emit_store_vary()
1047 } else if (b->shader->arch >= 9 && b->shader->idvs != BI_IDVS_NONE) { in bi_emit_store_vary()
1058 bool varying = (b->shader->idvs == BI_IDVS_VARYING); in bi_emit_store_vary()
1060 bi_store(b, nr * nir_src_bit_size(instr->src[0]), in bi_emit_store_vary()
1063 varying ? bi_varying_offset(b->shader, instr) : 0); in bi_emit_store_vary()
1070 bi_st_cvt(b, data, a[0], a[1], a[2], regfmt, nr - 1); in bi_emit_store_vary()
1082 bi_st_cvt(b, data, a[0], a[1], a[2], regfmt, nr - 1); in bi_emit_store_vary()
1094 bool kernel_input = (instr->intrinsic == nir_intrinsic_load_kernel_input); in bi_emit_load_ubo()
1096 bi_load_ubo_to(b, instr->num_components * nir_dest_bit_size(instr->dest), in bi_emit_load_ubo()
1097 bi_dest_index(&instr->dest), offset_is_const ? in bi_emit_load_ubo()
1099 kernel_input ? bi_zero() : bi_src_index(&instr->src[0])); in bi_emit_load_ubo()
1105 assert(b->shader->inputs->no_ubo_to_push && "can't mix push constant forms"); in bi_emit_load_push_constant()
1107 nir_src *offset = &instr->src[0]; in bi_emit_load_push_constant()
1112 unsigned bits = nir_dest_bit_size(instr->dest) * in bi_emit_load_push_constant()
1113 nir_dest_num_components(instr->dest); in bi_emit_load_push_constant()
1125 bi_emit_collect_to(b, bi_dest_index(&instr->dest), channels, n); in bi_emit_load_push_constant()
1139 if (b->shader->arch < 9 || seg == BI_SEG_NONE) in bi_handle_segment()
1153 … if (offset && addr_lo->type == BI_INDEX_CONSTANT && addr_lo->value == (int16_t) addr_lo->value) { in bi_handle_segment()
1154 *offset = addr_lo->value; in bi_handle_segment()
1168 unsigned bits = instr->num_components * nir_dest_bit_size(instr->dest); in bi_emit_load()
1169 bi_index dest = bi_dest_index(&instr->dest); in bi_emit_load()
1170 bi_index addr_lo = bi_extract(b, bi_src_index(&instr->src[0]), 0); in bi_emit_load()
1171 bi_index addr_hi = bi_addr_high(b, &instr->src[0]); in bi_emit_load()
1184 BITFIELD_MASK(instr->num_components)); in bi_emit_store()
1187 bi_index addr_lo = bi_extract(b, bi_src_index(&instr->src[1]), 0); in bi_emit_store()
1188 bi_index addr_hi = bi_addr_high(b, &instr->src[1]); in bi_emit_store()
1192 bi_store(b, instr->num_components * nir_src_bit_size(instr->src[0]), in bi_emit_store()
1193 bi_src_index(&instr->src[0]), in bi_emit_store()
1211 if (b->shader->arch >= 9) in bi_emit_axchg_to()
1238 /* 64-bit */ in bi_emit_acmpxchg_to()
1243 bi_index in = bi_temp(b->shader); in bi_emit_acmpxchg_to()
1247 if (b->shader->arch >= 9) in bi_emit_acmpxchg_to()
1323 if (!(arg.value == 1 || (arg.value == -1 && op == BI_ATOM_OPC_AADD))) in bi_promote_atom_c1()
1346 * Coordinates are 16-bit integers in Bifrost but 32-bit in NIR. We need to
1368 if (coord_comps == 3 && b->shader->arch >= 9) in bi_emit_image_coord()
1383 nir_src src = instr->src[0]; in bi_emit_image_index()
1385 bi_context *ctx = b->shader; in bi_emit_image_index()
1388 unsigned offset = (ctx->stage == MESA_SHADER_VERTEX) ? in bi_emit_image_index()
1389 util_bitcount64(ctx->nir->info.inputs_read) : 0; in bi_emit_image_index()
1407 bi_index coords = bi_src_index(&instr->src[1]); in bi_emit_image_load()
1410 bi_index dest = bi_dest_index(&instr->dest); in bi_emit_image_load()
1412 enum bi_vecsize vecsize = instr->num_components - 1; in bi_emit_image_load()
1417 if (b->shader->arch >= 9 && nir_src_is_const(instr->src[0])) { in bi_emit_image_load()
1419 nir_src_as_uint(instr->src[0])); in bi_emit_image_load()
1421 I->table = PAN_TABLE_IMAGE; in bi_emit_image_load()
1422 } else if (b->shader->arch >= 9) { in bi_emit_image_load()
1430 bi_split_dest(b, instr->dest); in bi_emit_image_load()
1444 enum bi_register_format type = (instr->intrinsic == nir_intrinsic_image_store) ? in bi_emit_lea_image()
1448 bi_index coords = bi_src_index(&instr->src[1]); in bi_emit_lea_image()
1451 bi_index dest = bi_temp(b->shader); in bi_emit_lea_image()
1453 if (b->shader->arch >= 9 && nir_src_is_const(instr->src[0])) { in bi_emit_lea_image()
1455 nir_src_as_uint(instr->src[0])); in bi_emit_lea_image()
1457 I->table = PAN_TABLE_IMAGE; in bi_emit_lea_image()
1458 } else if (b->shader->arch >= 9) { in bi_emit_lea_image()
1467 I->table = BI_TABLE_ATTRIBUTE_1; in bi_emit_lea_image()
1480 bi_st_cvt(b, bi_src_index(&instr->src[3]), a[0], a[1], a[2], in bi_emit_image_store()
1482 instr->num_components - 1); in bi_emit_image_store()
1491 bool bifrost = b->shader->arch <= 8; in bi_emit_atomic_i32_to()
1495 bi_index tmp_dest = bifrost ? bi_temp(b->shader) : dst; in bi_emit_atomic_i32_to()
1508 /* Post-process it */ in bi_emit_atomic_i32_to()
1539 bi_make_vec_to(b, bi_dest_index(&instr->dest), src, NULL, 4, 32); in bi_emit_load_frag_coord()
1545 bi_index dest = bi_dest_index(&instr->dest); in bi_emit_ld_tile()
1548 unsigned rt = b->shader->inputs->blend.rt; in bi_emit_ld_tile()
1549 unsigned size = nir_dest_bit_size(instr->dest); in bi_emit_ld_tile()
1550 unsigned nr = instr->num_components; in bi_emit_ld_tile()
1553 if (!b->shader->inputs->is_blend) { in bi_emit_ld_tile()
1555 nir_find_variable_with_driver_location(b->shader->nir, in bi_emit_ld_tile()
1557 unsigned loc = var->data.location; in bi_emit_ld_tile()
1559 rt = (loc - FRAG_RESULT_DATA0); in bi_emit_ld_tile()
1562 bi_index desc = b->shader->inputs->is_blend ? in bi_emit_ld_tile()
1563 bi_imm_u32(b->shader->inputs->blend.bifrost_blend_desc >> 32) : in bi_emit_ld_tile()
1564 b->shader->inputs->bifrost.static_rt_conv ? in bi_emit_ld_tile()
1565 bi_imm_u32(b->shader->inputs->bifrost.rt_conv[rt]) : in bi_emit_ld_tile()
1569 regfmt, nr - 1); in bi_emit_ld_tile()
1576 bi_index dst = nir_intrinsic_infos[instr->intrinsic].has_dest ? in bi_emit_intrinsic()
1577 bi_dest_index(&instr->dest) : bi_null(); in bi_emit_intrinsic()
1578 gl_shader_stage stage = b->shader->stage; in bi_emit_intrinsic()
1580 switch (instr->intrinsic) { in bi_emit_intrinsic()
1590 if (b->shader->inputs->is_blend) in bi_emit_intrinsic()
1658 assert(b->shader->stage != MESA_SHADER_FRAGMENT); in bi_emit_intrinsic()
1670 assert(nir_src_bit_size(instr->src[1]) == 32); in bi_emit_intrinsic()
1672 bi_index addr = bi_src_index(&instr->src[0]); in bi_emit_intrinsic()
1675 if (b->shader->arch >= 9) { in bi_emit_intrinsic()
1683 bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]), in bi_emit_intrinsic()
1684 instr->intrinsic); in bi_emit_intrinsic()
1685 bi_split_dest(b, instr->dest); in bi_emit_intrinsic()
1697 assert(nir_src_bit_size(instr->src[3]) == 32); in bi_emit_intrinsic()
1701 bi_src_index(&instr->src[3]), in bi_emit_intrinsic()
1702 instr->intrinsic); in bi_emit_intrinsic()
1703 bi_split_dest(b, instr->dest); in bi_emit_intrinsic()
1714 assert(nir_src_bit_size(instr->src[1]) == 32); in bi_emit_intrinsic()
1717 bi_src_index(&instr->src[0]), in bi_emit_intrinsic()
1718 bi_src_index(&instr->src[1]), in bi_emit_intrinsic()
1719 instr->intrinsic); in bi_emit_intrinsic()
1721 bi_split_dest(b, instr->dest); in bi_emit_intrinsic()
1733 bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), in bi_emit_intrinsic()
1734 &instr->src[1], BI_SEG_NONE); in bi_emit_intrinsic()
1735 bi_split_dest(b, instr->dest); in bi_emit_intrinsic()
1740 &instr->src[3], BI_SEG_NONE); in bi_emit_intrinsic()
1741 bi_split_dest(b, instr->dest); in bi_emit_intrinsic()
1745 bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), in bi_emit_intrinsic()
1746 &instr->src[1], BI_SEG_WLS); in bi_emit_intrinsic()
1747 bi_split_dest(b, instr->dest); in bi_emit_intrinsic()
1751 bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), in bi_emit_intrinsic()
1752 &instr->src[1], &instr->src[2], BI_SEG_NONE); in bi_emit_intrinsic()
1753 bi_split_dest(b, instr->dest); in bi_emit_intrinsic()
1758 &instr->src[3], &instr->src[4], BI_SEG_NONE); in bi_emit_intrinsic()
1759 bi_split_dest(b, instr->dest); in bi_emit_intrinsic()
1763 bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), in bi_emit_intrinsic()
1764 &instr->src[1], &instr->src[2], BI_SEG_WLS); in bi_emit_intrinsic()
1765 bi_split_dest(b, instr->dest); in bi_emit_intrinsic()
1777 bi_discard_b32(b, bi_src_index(&instr->src[0])); in bi_emit_intrinsic()
1823 nir_dest_num_components(instr->dest), 0); in bi_emit_intrinsic()
1828 nir_dest_num_components(instr->dest), 0); in bi_emit_intrinsic()
1859 /* It appears vertex_id is zero-based with Bifrost geometry flows, but in bi_emit_intrinsic()
1860 * not with Valhall's memory-allocation IDVS geometry flow. Ostensibly in bi_emit_intrinsic()
1866 if (b->shader->malloc_idvs) { in bi_emit_intrinsic()
1880 assert(b->shader->nir->info.has_transform_feedback_varyings); in bi_emit_intrinsic()
1912 bi_split_dest(b, instr->dest); in bi_emit_intrinsic()
1916 … fprintf(stderr, "Unhandled intrinsic %s\n", nir_intrinsic_infos[instr->intrinsic].name); in bi_emit_intrinsic()
1925 assert(instr->def.num_components <= (32 / instr->def.bit_size)); in bi_emit_load_const()
1931 for (unsigned i = 0; i < instr->def.num_components; ++i) { in bi_emit_load_const()
1932 unsigned v = nir_const_value_as_uint(instr->value[i], instr->def.bit_size); in bi_emit_load_const()
1933 acc |= (v << (i * instr->def.bit_size)); in bi_emit_load_const()
1936 bi_mov_i32_to(b, bi_get_index(instr->def.index, false, 0), bi_imm_u32(acc)); in bi_emit_load_const()
1947 /* the bi_index carries the 32-bit (word) offset separate from the in bi_alu_src_index()
1977 /* 8-bit vectors not yet supported */ in bi_alu_src_index()
1978 assert(comps == 1 && "8-bit vectors not supported"); in bi_alu_src_index()
1979 assert(src.swizzle[0] < 4 && "8-bit vectors not supported"); in bi_alu_src_index()
2003 return bi_fma_f32(b, s0, s1, bi_imm_f32(-0.0f)); in bi_fmul_f32()
2007 * Newton-Raphson to improve precision */
2028 bi_imm_u32(-1), BI_SPECIAL_N); in bi_lower_frsq_32()
2033 * https://gitlab.freedesktop.org/panfrost/mali-isa-docs/-/blob/master/Bifrost.adoc
2039 bi_index t1 = bi_temp(b->shader); in bi_lower_fexp2_32()
2041 t1_instr->clamp = BI_CLAMP_CLAMP_0_INF; in bi_lower_fexp2_32()
2045 bi_instr *a2 = bi_fadd_f32_to(b, bi_temp(b->shader), s0, bi_neg(t2)); in bi_lower_fexp2_32()
2046 a2->clamp = BI_CLAMP_CLAMP_M1_1; in bi_lower_fexp2_32()
2051 bi_index p1 = bi_fma_f32(b, a2->dest[0], bi_imm_u32(0x3d635635), in bi_lower_fexp2_32()
2053 bi_index p2 = bi_fma_f32(b, p1, a2->dest[0], bi_imm_u32(0x3f317218)); in bi_lower_fexp2_32()
2054 bi_index p3 = bi_fmul_f32(b, a2->dest[0], p2); in bi_lower_fexp2_32()
2055 bi_instr *x = bi_fma_rscale_f32_to(b, bi_temp(b->shader), in bi_lower_fexp2_32()
2057 x->clamp = BI_CLAMP_CLAMP_0_INF; in bi_lower_fexp2_32()
2059 bi_instr *max = bi_fmax_f32_to(b, dst, x->dest[0], s0); in bi_lower_fexp2_32()
2060 max->sem = BI_SEM_NAN_PROPAGATE; in bi_lower_fexp2_32()
2067 * fixed-point input */ in bi_fexp_32()
2070 bi_instr *fixed_pt = bi_f32_to_s32_to(b, bi_temp(b->shader), scale); in bi_fexp_32()
2071 fixed_pt->round = BI_ROUND_NONE; // XXX in bi_fexp_32()
2073 /* Compute the result for the fixed-point input, but pass along in bi_fexp_32()
2074 * the floating-point scale for correct NaN propagation */ in bi_fexp_32()
2075 bi_fexp_f32_to(b, dst, fixed_pt->dest[0], scale); in bi_fexp_32()
2086 /* xt estimates -log(r1), a coarse approximation of log(a1) */ in bi_lower_flog2_32()
2090 /* log(s0) = log(a1 * 2^e) = e + log(a1) = e + log(a1 * r1) - in bi_lower_flog2_32()
2091 * log(r1), so let x1 = e - log(r1) ~= e + xt and x2 = log(a1 * r1), in bi_lower_flog2_32()
2097 * 1, so set y = (a1 * r1) - 1.0 */ in bi_lower_flog2_32()
2098 bi_index y = bi_fma_f32(b, a1, r1, bi_imm_f32(-1.0)); in bi_lower_flog2_32()
2102 * y - y^2/2 + O(y^3) = y(1 - y/2) + O(y^3) */ in bi_lower_flog2_32()
2104 bi_fma_f32(b, y, bi_imm_f32(-0.5), bi_imm_f32(1.0))); in bi_lower_flog2_32()
2117 bi_index add = bi_fadd_lscale_f32(b, bi_imm_f32(-1.0f), s0); in bi_flog2_32()
2129 log2_base = bi_temp(b->shader); in bi_lower_fpow_32()
2144 log2_base = bi_temp(b->shader); in bi_fpow_32()
2152 * FSIN/COS_TABLE.u6, which multiplies the bottom 6-bits by pi/32 and
2157 * sin(x + e) = sin(x) + e cos(x) - (e^2)/2 sin(x)
2158 * cos(x + e) = cos(x) - e sin(x) - (e^2)/2 cos(x)
2162 #define MPI_OVER_TWO bi_imm_f32(-3.14159f / 2.0)
2168 /* bottom 6-bits of result times pi/32 approximately s0 mod 2pi */ in bi_lower_fsincos_32()
2181 bi_imm_u32(-1), BI_SPECIAL_NONE); in bi_lower_fsincos_32()
2183 /* (-e^2)/2 f''(x) */ in bi_lower_fsincos_32()
2188 /* e f'(x) - (e^2/2) f''(x) */ in bi_lower_fsincos_32()
2189 bi_instr *I = bi_fma_f32_to(b, bi_temp(b->shader), e, in bi_lower_fsincos_32()
2192 I->clamp = BI_CLAMP_CLAMP_M1_1; in bi_lower_fsincos_32()
2194 /* f(x) + e f'(x) - (e^2/2) f''(x) */ in bi_lower_fsincos_32()
2195 bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx); in bi_lower_fsincos_32()
2206 if (!(b->shader->quirks & BIFROST_LIMITED_CLPER)) { in bi_clper_xor()
2263 for (unsigned i = 1; i < nir_src_num_components(src->src); ++i) { in bi_nir_is_replicated()
2264 if (src->swizzle[0] == src->swizzle[i]) in bi_nir_is_replicated()
2274 bi_index dst = bi_dest_index(&instr->dest.dest); in bi_emit_alu()
2275 unsigned srcs = nir_op_infos[instr->op].num_inputs; in bi_emit_alu()
2276 unsigned sz = nir_dest_bit_size(instr->dest.dest); in bi_emit_alu()
2277 unsigned comps = nir_dest_num_components(instr->dest.dest); in bi_emit_alu()
2278 unsigned src_sz = srcs > 0 ? nir_src_bit_size(instr->src[0].src) : 0; in bi_emit_alu()
2284 if (!instr->dest.dest.is_ssa) { in bi_emit_alu()
2286 assert(instr->dest.write_mask); in bi_emit_alu()
2290 * special-cased because they can operate on vectors even after in bi_emit_alu()
2292 * instruction is no "bigger" than SIMD-within-a-register. These moves in bi_emit_alu()
2295 switch (instr->op) { in bi_emit_alu()
2300 srcs > 0 ? bi_src_index(&instr->src[0].src) : bi_null(), in bi_emit_alu()
2301 srcs > 1 ? bi_src_index(&instr->src[1].src) : bi_null(), in bi_emit_alu()
2302 srcs > 2 ? bi_src_index(&instr->src[2].src) : bi_null(), in bi_emit_alu()
2303 srcs > 3 ? bi_src_index(&instr->src[3].src) : bi_null(), in bi_emit_alu()
2307 instr->src[0].swizzle[0], in bi_emit_alu()
2308 instr->src[1].swizzle[0], in bi_emit_alu()
2309 srcs > 2 ? instr->src[2].swizzle[0] : 0, in bi_emit_alu()
2310 srcs > 3 ? instr->src[3].swizzle[0] : 0, in bi_emit_alu()
2325 bi_index vec = bi_src_index(&instr->src[0].src); in bi_emit_alu()
2326 unsigned chan = instr->src[0].swizzle[0]; in bi_emit_alu()
2333 bi_mov_i32_to(b, dst, bi_extract(b, bi_src_index(&instr->src[0].src), 0)); in bi_emit_alu()
2337 bi_mov_i32_to(b, dst, bi_extract(b, bi_src_index(&instr->src[0].src), 1)); in bi_emit_alu()
2342 … bi_extract(b, bi_src_index(&instr->src[0].src), instr->src[0].swizzle[0]), in bi_emit_alu()
2343 … bi_extract(b, bi_src_index(&instr->src[1].src), instr->src[1].swizzle[0])); in bi_emit_alu()
2348 bi_extract(b, bi_src_index(&instr->src[0].src), 0), in bi_emit_alu()
2349 bi_extract(b, bi_src_index(&instr->src[0].src), 1)); in bi_emit_alu()
2353 bi_index src = bi_src_index(&instr->src[0].src); in bi_emit_alu()
2362 bi_index src = bi_src_index(&instr->src[0].src); in bi_emit_alu()
2373 bi_index idx = bi_src_index(&instr->src[0].src); in bi_emit_alu()
2377 comps > 0 ? instr->src[0].swizzle[0] : 0, in bi_emit_alu()
2378 comps > 1 ? instr->src[0].swizzle[1] : 0, in bi_emit_alu()
2379 comps > 2 ? instr->src[0].swizzle[2] : 0, in bi_emit_alu()
2380 comps > 3 ? instr->src[0].swizzle[3] : 0, in bi_emit_alu()
2388 assert(nir_src_num_components(instr->src[0].src) == 2); in bi_emit_alu()
2391 bi_index idx = bi_src_index(&instr->src[0].src); in bi_emit_alu()
2395 instr->src[0].swizzle[0], in bi_emit_alu()
2396 instr->src[0].swizzle[1] in bi_emit_alu()
2407 bi_index idx = bi_src_index(&instr->src[0].src); in bi_emit_alu()
2408 bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]); in bi_emit_alu()
2410 bi_extract(b, idx, instr->src[0].swizzle[1]) : s0; in bi_emit_alu()
2417 * mode may not be nearest-even. in bi_emit_alu()
2419 if (instr->op == nir_op_f2f16_rtz) in bi_emit_alu()
2420 I->round = BI_ROUND_RTZ; in bi_emit_alu()
2421 else if (instr->op == nir_op_f2f16_rtne) in bi_emit_alu()
2422 I->round = BI_ROUND_NONE; /* Nearest even */ in bi_emit_alu()
2433 bi_index idx = bi_src_index(&instr->src[0].src); in bi_emit_alu()
2434 bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]); in bi_emit_alu()
2435 bi_index s1 = bi_extract(b, idx, instr->src[0].swizzle[1]); in bi_emit_alu()
2452 nir_alu_src *src = &instr->src[0]; in bi_emit_alu()
2453 bi_index idx = bi_src_index(&src->src); in bi_emit_alu()
2454 bi_index s0 = bi_extract(b, idx, src->swizzle[0]); in bi_emit_alu()
2455 bi_index s1 = bi_extract(b, idx, src->swizzle[1]); in bi_emit_alu()
2457 bi_index t = (src->swizzle[0] == src->swizzle[1]) ? in bi_emit_alu()
2462 if (instr->op == nir_op_u2f16) in bi_emit_alu()
2473 /* Acts like an 8-bit swizzle */ in bi_emit_alu()
2474 bi_index idx = bi_src_index(&instr->src[0].src); in bi_emit_alu()
2479 chan[i] = instr->src[0].swizzle[i] * factor; in bi_emit_alu()
2491 * translated as MUX.v2i16, even though cond is a 32-bit vector. in bi_emit_alu()
2495 * corresponding half of the 32-bit source. NIR uses 0/~0 in bi_emit_alu()
2496 * booleans so that's guaranteed to work (that is, 32-bit NIR in bi_emit_alu()
2497 * booleans are 16-bit replicated). in bi_emit_alu()
2500 * insert a MKVEC.v2i16 first to convert down to 16-bit. in bi_emit_alu()
2502 bi_index idx = bi_src_index(&instr->src[0].src); in bi_emit_alu()
2503 bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]); in bi_emit_alu()
2504 bi_index s1 = bi_alu_src_index(b, instr->src[1], comps); in bi_emit_alu()
2505 bi_index s2 = bi_alu_src_index(b, instr->src[2], comps); in bi_emit_alu()
2507 if (!bi_nir_is_replicated(&instr->src[0])) { in bi_emit_alu()
2509 … bi_half(bi_extract(b, idx, instr->src[0].swizzle[1]), false)); in bi_emit_alu()
2520 bi_index s0 = srcs > 0 ? bi_alu_src_index(b, instr->src[0], comps) : bi_null(); in bi_emit_alu()
2521 bi_index s1 = srcs > 1 ? bi_alu_src_index(b, instr->src[1], comps) : bi_null(); in bi_emit_alu()
2522 bi_index s2 = srcs > 2 ? bi_alu_src_index(b, instr->src[2], comps) : bi_null(); in bi_emit_alu()
2524 switch (instr->op) { in bi_emit_alu()
2542 I->clamp = BI_CLAMP_CLAMP_0_1; in bi_emit_alu()
2548 I->clamp = BI_CLAMP_CLAMP_M1_1; in bi_emit_alu()
2554 I->clamp = BI_CLAMP_CLAMP_0_INF; in bi_emit_alu()
2577 if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS) in bi_emit_alu()
2587 if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS) in bi_emit_alu()
2597 if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS) in bi_emit_alu()
2636 if (b->shader->arch >= 9) in bi_emit_alu()
2644 bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst, in bi_emit_alu()
2650 bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst, in bi_emit_alu()
2656 bi_index bit = bi_imm_u32(instr->op == nir_op_fddx_must_abs_mali ? 1 : 2); in bi_emit_alu()
2669 switch (instr->op) { in bi_emit_alu()
2685 switch (instr->op) { in bi_emit_alu()
2710 if (b->shader->quirks & BIFROST_LIMITED_CLPER) { in bi_emit_alu()
2733 bi_instr *f16 = bi_v2f32_to_v2f16_to(b, bi_temp(b->shader), s0, s0); in bi_emit_alu()
2734 bi_instr *f32 = bi_f16_to_f32_to(b, dst, bi_half(f16->dest[0], false)); in bi_emit_alu()
2736 f16->ftz = f32->ftz = true; in bi_emit_alu()
2747 /* Note 32-bit sources => no vectorization, so 32-bit works */ in bi_emit_alu()
2883 … bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, bi_translate_cmpf(instr->op), BI_RESULT_TYPE_M1); in bi_emit_alu()
2892 … bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, bi_translate_cmpf(instr->op), BI_RESULT_TYPE_M1); in bi_emit_alu()
2903 bi_fcmp_to(b, sz, dst, s0, s1, bi_translate_cmpf(instr->op), BI_RESULT_TYPE_M1); in bi_emit_alu()
2910 bi_fround_to(b, sz, dst, s0, bi_nir_round(instr->op)); in bi_emit_alu()
2982 if (sz == 32 && b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS) in bi_emit_alu()
2989 if (sz == 32 && b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS) in bi_emit_alu()
3008 bi_index clz = bi_clz(b, src_sz, s0, false); in bi_emit_alu() local
3011 clz = bi_byte(clz, 0); in bi_emit_alu()
3013 clz = bi_half(clz, false); in bi_emit_alu()
3015 bi_isub_u32_to(b, dst, bi_imm_u32(src_sz - 1), clz, false); in bi_emit_alu()
3020 fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name); in bi_emit_alu()
3092 /* Array indices are specified as 32-bit uints, need to convert. In .z component from NIR */
3106 * 0, dt - 1). So we use round RTE, clamping is handled at the data in bi_emit_texc_array_index()
3109 bi_instr *I = bi_f32_to_u32_to(b, bi_temp(b->shader), idx); in bi_emit_texc_array_index()
3110 I->round = BI_ROUND_NONE; in bi_emit_texc_array_index()
3111 return I->dest[0]; in bi_emit_texc_array_index()
3115 * 16-bit 8:8 fixed-point format. We lower as:
3117 * F32_TO_S32(clamp(x, -16.0, +16.0) * 256.0) & 0xFFFF =
3118 * MKVEC(F32_TO_S32(clamp(x * 1.0/16.0, -1.0, 1.0) * (16.0 * 256.0)), #0)
3128 int32_t s32 = CLAMP(x, -16.0f, 16.0f) * 256.0f; in bi_emit_texc_lod_88()
3137 bi_instr *fsat = bi_fma_f32_to(b, bi_temp(b->shader), in bi_emit_texc_lod_88()
3141 fsat->clamp = BI_CLAMP_CLAMP_M1_1; in bi_emit_texc_lod_88()
3143 bi_index fmul = bi_fma_f32(b, fsat->dest[0], bi_imm_f32(max_lod * 256.0f), in bi_emit_texc_lod_88()
3150 /* FETCH takes a 32-bit staging register containing the LOD as an integer in
3151 * the bottom 16-bits and (if present) the cube face index in the top 16-bits.
3174 (!nir_src_is_const(instr->src[offs_idx].src) || in bi_emit_texc_offset_ms_index()
3175 nir_src_as_uint(instr->src[offs_idx].src) != 0)) { in bi_emit_texc_offset_ms_index()
3176 unsigned nr = nir_src_num_components(instr->src[offs_idx].src); in bi_emit_texc_offset_ms_index()
3177 bi_index idx = bi_src_index(&instr->src[offs_idx].src); in bi_emit_texc_offset_ms_index()
3187 (!nir_src_is_const(instr->src[ms_idx].src) || in bi_emit_texc_offset_ms_index()
3188 nir_src_as_uint(instr->src[ms_idx].src) != 0)) { in bi_emit_texc_offset_ms_index()
3190 bi_src_index(&instr->src[ms_idx].src), dest, in bi_emit_texc_offset_ms_index()
3212 /* Components 0-2: offsets */ in bi_emit_valhall_offsets()
3214 (!nir_src_is_const(instr->src[offs_idx].src) || in bi_emit_valhall_offsets()
3215 nir_src_as_uint(instr->src[offs_idx].src) != 0)) { in bi_emit_valhall_offsets()
3216 unsigned nr = nir_src_num_components(instr->src[offs_idx].src); in bi_emit_valhall_offsets()
3217 bi_index idx = bi_src_index(&instr->src[offs_idx].src); in bi_emit_valhall_offsets()
3236 (!nir_src_is_const(instr->src[ms_idx].src) || in bi_emit_valhall_offsets()
3237 nir_src_as_uint(instr->src[ms_idx].src) != 0)) { in bi_emit_valhall_offsets()
3239 bi_src_index(&instr->src[ms_idx].src)); in bi_emit_valhall_offsets()
3242 /* Component 3: 8-bit LOD */ in bi_emit_valhall_offsets()
3244 (!nir_src_is_const(instr->src[lod_idx].src) || in bi_emit_valhall_offsets()
3245 nir_src_as_uint(instr->src[lod_idx].src) != 0) && in bi_emit_valhall_offsets()
3248 bi_src_index(&instr->src[lod_idx].src), dest, in bi_emit_valhall_offsets()
3260 bi_index maxxyz = bi_temp(b->shader); in bi_emit_cube_coord()
3261 *face = bi_temp(b->shader); in bi_emit_cube_coord()
3268 if (b->shader->arch <= 8) { in bi_emit_cube_coord()
3299 *s = bi_temp(b->shader); in bi_emit_cube_coord()
3300 *t = bi_temp(b->shader); in bi_emit_cube_coord()
3305 S->clamp = BI_CLAMP_CLAMP_0_1; in bi_emit_cube_coord()
3306 T->clamp = BI_CLAMP_CLAMP_0_1; in bi_emit_cube_coord()
3309 /* Emits a cube map descriptor, returning lower 32-bits and putting upper
3310 * 32-bits in passed pointer t. The packing of the face with the S coordinate
3387 .op = bi_tex_op(instr->op), in bi_emit_texc()
3389 .shadow_or_clamp_disable = instr->is_shadow, in bi_emit_texc()
3390 .array = instr->is_array, in bi_emit_texc()
3391 .dimension = bifrost_tex_format(instr->sampler_dim), in bi_emit_texc()
3392 ….format = bi_texture_format(instr->dest_type | nir_dest_bit_size(instr->dest), BI_CLAMP_NONE), /* … in bi_emit_texc()
3402 (instr->op == nir_texop_tg4 ? in bi_emit_texc()
3403 BIFROST_TEXTURE_FETCH_GATHER4_R + instr->component : in bi_emit_texc()
3410 /* 32-bit indices to be allocated as consecutive staging registers */ in bi_emit_texc()
3414 for (unsigned i = 0; i < instr->num_srcs; ++i) { in bi_emit_texc()
3415 bi_index index = bi_src_index(&instr->src[i].src); in bi_emit_texc()
3416 unsigned sz = nir_src_bit_size(instr->src[i].src); in bi_emit_texc()
3417 unsigned components = nir_src_num_components(instr->src[i].src); in bi_emit_texc()
3421 switch (instr->src[i].src_type) { in bi_emit_texc()
3423 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { in bi_emit_texc()
3428 cy = bi_extract(b, index, MIN2(1, components - 1)); in bi_emit_texc()
3442 … bi_extract(b, index, components - 1), T); in bi_emit_texc()
3449 nir_src_is_const(instr->src[i].src) && in bi_emit_texc()
3450 nir_src_as_uint(instr->src[i].src) == 0) { in bi_emit_texc()
3471 /* Upper 16-bits interpreted as a clamp, leave zero */ in bi_emit_texc()
3496 if (instr->texture_index) in bi_emit_texc()
3497 … index = bi_iadd_u32(b, index, bi_imm_u32(instr->texture_index), false); in bi_emit_texc()
3504 if (instr->sampler_index) in bi_emit_texc()
3505 … index = bi_iadd_u32(b, index, bi_imm_u32(instr->sampler_index), false); in bi_emit_texc()
3526 desc.immediate_indices = direct && (instr->sampler_index < 16); in bi_emit_texc()
3529 desc.sampler_index_or_mode = instr->sampler_index; in bi_emit_texc()
3530 desc.index = instr->texture_index; in bi_emit_texc()
3534 if (direct && instr->sampler_index == instr->texture_index) { in bi_emit_texc()
3536 desc.index = instr->texture_index; in bi_emit_texc()
3539 desc.index = instr->sampler_index; in bi_emit_texc()
3541 bi_imm_u32(instr->texture_index)); in bi_emit_texc()
3545 desc.index = instr->texture_index; in bi_emit_texc()
3549 desc.index = instr->sampler_index; in bi_emit_texc()
3566 unsigned res_size = nir_dest_bit_size(instr->dest) == 16 ? 2 : 4; in bi_emit_texc()
3568 bi_index sr = sr_count ? bi_temp(b->shader) : bi_null(); in bi_emit_texc()
3569 bi_index dst = bi_temp(b->shader); in bi_emit_texc()
3580 I->register_format = bi_reg_fmt_for_nir(instr->dest_type); in bi_emit_texc()
3584 bi_emit_collect_to(b, bi_dest_index(&instr->dest), w, in bi_emit_texc()
3585 DIV_ROUND_UP(nir_dest_num_components(instr->dest) * res_size, 4)); in bi_emit_texc()
3610 (instr->op == nir_texop_tex) || in bi_emit_tex_valhall()
3611 (instr->op == nir_texop_txl) || in bi_emit_tex_valhall()
3612 (instr->op == nir_texop_txb); in bi_emit_tex_valhall()
3614 /* 32-bit indices to be allocated as consecutive staging registers */ in bi_emit_tex_valhall()
3617 bi_index sampler = bi_imm_u32(instr->sampler_index); in bi_emit_tex_valhall()
3618 bi_index texture = bi_imm_u32(instr->texture_index); in bi_emit_tex_valhall()
3621 for (unsigned i = 0; i < instr->num_srcs; ++i) { in bi_emit_tex_valhall()
3622 bi_index index = bi_src_index(&instr->src[i].src); in bi_emit_tex_valhall()
3623 unsigned sz = nir_src_bit_size(instr->src[i].src); in bi_emit_tex_valhall()
3624 unsigned components = nir_src_num_components(instr->src[i].src); in bi_emit_tex_valhall()
3626 switch (instr->src[i].src_type) { in bi_emit_tex_valhall()
3628 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { in bi_emit_tex_valhall()
3641 if (components == 3 && !instr->is_array) { in bi_emit_tex_valhall()
3647 if (instr->is_array) { in bi_emit_tex_valhall()
3649 bi_extract(b, index, components - 1); in bi_emit_tex_valhall()
3655 if (nir_src_is_const(instr->src[i].src) && in bi_emit_tex_valhall()
3656 nir_src_as_uint(instr->src[i].src) == 0) { in bi_emit_tex_valhall()
3668 /* Upper 16-bits interpreted as a clamp, leave zero */ in bi_emit_tex_valhall()
3685 assert(instr->texture_index == 0); in bi_emit_tex_valhall()
3690 assert(instr->sampler_index == 0); in bi_emit_tex_valhall()
3717 bi_index idx = sr_count ? bi_temp(b->shader) : bi_null(); in bi_emit_tex_valhall()
3727 unsigned res_size = nir_dest_bit_size(instr->dest) == 16 ? 2 : 4; in bi_emit_tex_valhall()
3728 enum bi_register_format regfmt = bi_reg_fmt_for_nir(instr->dest_type); in bi_emit_tex_valhall()
3729 enum bi_dimension dim = valhall_tex_dimension(instr->sampler_dim); in bi_emit_tex_valhall()
3730 bi_index dest = bi_temp(b->shader); in bi_emit_tex_valhall()
3732 switch (instr->op) { in bi_emit_tex_valhall()
3737 instr->is_array, dim, regfmt, instr->is_shadow, in bi_emit_tex_valhall()
3743 instr->is_array, dim, regfmt, explicit_offset, in bi_emit_tex_valhall()
3748 instr->is_array, dim, instr->component, false, in bi_emit_tex_valhall()
3749 regfmt, instr->is_shadow, explicit_offset, in bi_emit_tex_valhall()
3758 bi_emit_collect_to(b, bi_dest_index(&instr->dest), w, in bi_emit_tex_valhall()
3759 DIV_ROUND_UP(nir_dest_num_components(instr->dest) * res_size, 4)); in bi_emit_tex_valhall()
3771 bi_index coords = bi_src_index(&instr->src[coord_idx].src); in bi_emit_texs()
3773 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { in bi_emit_texs()
3777 bi_texs_cube_to(b, nir_dest_bit_size(instr->dest), in bi_emit_texs()
3778 bi_dest_index(&instr->dest), in bi_emit_texs()
3780 instr->sampler_index, instr->texture_index); in bi_emit_texs()
3782 bi_texs_2d_to(b, nir_dest_bit_size(instr->dest), in bi_emit_texs()
3783 bi_dest_index(&instr->dest), in bi_emit_texs()
3786 instr->op != nir_texop_tex, /* zero LOD */ in bi_emit_texs()
3787 instr->sampler_index, instr->texture_index); in bi_emit_texs()
3790 bi_split_dest(b, instr->dest); in bi_emit_texs()
3796 if (instr->op != nir_texop_tex && instr->op != nir_texop_txl) in bi_is_simple_tex()
3799 if (instr->dest_type != nir_type_float32 && in bi_is_simple_tex()
3800 instr->dest_type != nir_type_float16) in bi_is_simple_tex()
3803 if (instr->is_shadow || instr->is_array) in bi_is_simple_tex()
3806 switch (instr->sampler_dim) { in bi_is_simple_tex()
3814 if (instr->op == nir_texop_txl) in bi_is_simple_tex()
3822 for (unsigned i = 0; i < instr->num_srcs; ++i) { in bi_is_simple_tex()
3823 if (instr->src[i].src_type != nir_tex_src_lod && in bi_is_simple_tex()
3824 instr->src[i].src_type != nir_tex_src_coord) in bi_is_simple_tex()
3829 unsigned idx_bits = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE ? 2 : 3; in bi_is_simple_tex()
3830 if (MAX2(instr->sampler_index, instr->texture_index) >= (1 << idx_bits)) in bi_is_simple_tex()
3837 nir_src lod = instr->src[lod_idx].src; in bi_is_simple_tex()
3844 switch (instr->op) { in bi_emit_tex()
3846 bi_load_sysval_to(b, bi_dest_index(&instr->dest), in bi_emit_tex()
3847 panfrost_sysval_for_instr(&instr->instr, NULL), in bi_emit_tex()
3848 nir_dest_num_components(instr->dest), 0); in bi_emit_tex()
3861 if (b->shader->arch >= 9) in bi_emit_tex()
3872 switch (instr->type) { in bi_emit_instr()
3903 util_dynarray_init(&blk->predecessors, blk); in create_empty_block()
3911 if (ctx->after_block) { in emit_block()
3912 ctx->current_block = ctx->after_block; in emit_block()
3913 ctx->after_block = NULL; in emit_block()
3915 ctx->current_block = create_empty_block(ctx); in emit_block()
3918 list_addtail(&ctx->current_block->link, &ctx->blocks); in emit_block()
3919 list_inithead(&ctx->current_block->instructions); in emit_block()
3921 bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block)); in emit_block()
3925 ++ctx->instruction_count; in emit_block()
3928 return ctx->current_block; in emit_block()
3934 bi_block *before_block = ctx->current_block; in emit_if()
3937 bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block)); in emit_if()
3939 bi_half(bi_src_index(&nif->condition), false), in emit_if()
3943 bi_block *then_block = emit_cf_list(ctx, &nif->then_list); in emit_if()
3944 bi_block *end_then_block = ctx->current_block; in emit_if()
3948 int count_in = ctx->instruction_count; in emit_if()
3949 bi_block *else_block = emit_cf_list(ctx, &nif->else_list); in emit_if()
3950 bi_block *end_else_block = ctx->current_block; in emit_if()
3951 ctx->after_block = create_empty_block(ctx); in emit_if()
3958 if (ctx->instruction_count == count_in) { in emit_if()
3959 then_branch->branch_target = ctx->after_block; in emit_if()
3960 bi_block_add_successor(end_then_block, ctx->after_block); /* fallthrough */ in emit_if()
3962 then_branch->branch_target = else_block; in emit_if()
3967 then_exit->branch_target = ctx->after_block; in emit_if()
3969 bi_block_add_successor(end_then_block, then_exit->branch_target); in emit_if()
3970 bi_block_add_successor(end_else_block, ctx->after_block); /* fallthrough */ in emit_if()
3973 bi_block_add_successor(before_block, then_branch->branch_target); /* then_branch */ in emit_if()
3981 bi_block *start_block = ctx->current_block; in emit_loop()
3983 bi_block *saved_break = ctx->break_block; in emit_loop()
3984 bi_block *saved_continue = ctx->continue_block; in emit_loop()
3986 ctx->continue_block = create_empty_block(ctx); in emit_loop()
3987 ctx->break_block = create_empty_block(ctx); in emit_loop()
3988 ctx->after_block = ctx->continue_block; in emit_loop()
3991 emit_cf_list(ctx, &nloop->body); in emit_loop()
3994 bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block)); in emit_loop()
3996 I->branch_target = ctx->continue_block; in emit_loop()
3997 bi_block_add_successor(start_block, ctx->continue_block); in emit_loop()
3998 bi_block_add_successor(ctx->current_block, ctx->continue_block); in emit_loop()
4000 ctx->after_block = ctx->break_block; in emit_loop()
4003 ctx->break_block = saved_break; in emit_loop()
4004 ctx->continue_block = saved_continue; in emit_loop()
4005 ++ctx->loop_count; in emit_loop()
4014 switch (node->type) { in emit_cf_list()
4040 /* shader-db stuff */
4051 stats->nr_ins += (tuple->fma ? 1 : 0) + (tuple->add ? 1 : 0); in bi_count_tuple_stats()
4053 /* Non-message passing tuples are always arithmetic */ in bi_count_tuple_stats()
4054 if (tuple->add != clause->message) { in bi_count_tuple_stats()
4055 stats->nr_arith++; in bi_count_tuple_stats()
4060 if (tuple->fma) in bi_count_tuple_stats()
4061 stats->nr_arith++; in bi_count_tuple_stats()
4063 switch (clause->message_type) { in bi_count_tuple_stats()
4066 stats->nr_varying += (clause->message->vecsize + 1) * in bi_count_tuple_stats()
4067 (bi_is_regfmt_16(clause->message->register_format) ? 1 : 2); in bi_count_tuple_stats()
4072 stats->nr_varying += (2 * 2); in bi_count_tuple_stats()
4075 stats->nr_texture++; in bi_count_tuple_stats()
4082 stats->nr_ldst++; in bi_count_tuple_stats()
4113 for (unsigned i = 0; i < ARRAY_SIZE(ctx->info.bifrost->messages); ++i) { in bi_count_preload_cost()
4114 struct bifrost_message_preload msg = ctx->info.bifrost->messages[i]; in bi_count_preload_cost()
4117 /* 2 coordinate, 2 half-words each, plus texture */ in bi_count_preload_cost()
4130 if (ctx->idvs == BI_IDVS_VARYING) in bi_shader_stage_name()
4132 else if (ctx->idvs == BI_IDVS_POSITION) in bi_shader_stage_name()
4134 else if (ctx->inputs->is_blend) in bi_shader_stage_name()
4137 return gl_shader_stage_name(ctx->stage); in bi_shader_stage_name()
4150 * 16 x 16-bit varying channels interpolated/cycle in bi_print_stats()
4160 stats.nr_tuples += clause->tuple_count; in bi_print_stats()
4162 for (unsigned i = 0; i < clause->tuple_count; ++i) in bi_print_stats()
4163 bi_count_tuple_stats(clause, &clause->tuples[i], &stats); in bi_print_stats()
4176 bool full_threads = (ctx->arch == 7 && ctx->info.work_reg_count <= 32); in bi_print_stats()
4180 char *str = ralloc_asprintf(NULL, "%s - %s shader: " in bi_print_stats()
4184 ctx->nir->info.label ?: "", in bi_print_stats()
4191 if (ctx->arch == 7) { in bi_print_stats()
4196 ctx->loop_count, ctx->spills, ctx->fills); in bi_print_stats()
4219 * 8 x 32-bit varying channels interpolated per cycle in va_print_stats()
4238 unsigned nr_threads = (ctx->info.work_reg_count <= 32) ? 2 : 1; in va_print_stats()
4241 fprintf(stderr, "%s - %s shader: " in va_print_stats()
4245 ctx->nir->info.label ?: "", in va_print_stats()
4249 ctx->loop_count, ctx->spills, ctx->fills); in va_print_stats()
4267 switch (intr->intrinsic) { in should_split_wrmask()
4283 if (instr->type != nir_instr_type_alu) in bi_lower_bit_size()
4288 switch (alu->op) { in bi_lower_bit_size()
4294 return (nir_dest_bit_size(alu->dest.dest) == 32) ? 0 : 32; in bi_lower_bit_size()
4300 /* Although Bifrost generally supports packed 16-bit vec2 and 8-bit vec4,
4302 * (8-bit in Bifrost, 32-bit in NIR TODO - workaround!). Some conversions need
4309 if (instr->type != nir_instr_type_alu) in bi_vectorize_filter()
4314 switch (alu->op) { in bi_vectorize_filter()
4327 /* Vectorized instructions cannot write more than 32-bit */ in bi_vectorize_filter()
4328 int dst_bit_size = nir_dest_bit_size(alu->dest.dest); in bi_vectorize_filter()
4338 if (instr->type != nir_instr_type_alu) in bi_scalarize_filter()
4343 switch (alu->op) { in bi_scalarize_filter()
4355 * coalescing divergent with non-divergent nodes. */
4360 ssa->divergent = false; in nir_invalidate_divergence_ssa()
4391 if (instr->type != nir_instr_type_intrinsic) in bifrost_nir_lower_blend_components()
4396 if (intr->intrinsic != nir_intrinsic_store_output) in bifrost_nir_lower_blend_components()
4399 nir_ssa_def *in = intr->src[0].ssa; in bifrost_nir_lower_blend_components()
4409 b->cursor = nir_before_instr(&intr->instr); in bifrost_nir_lower_blend_components()
4419 nir_instr_rewrite_src_ssa(instr, &intr->src[0], replicated); in bifrost_nir_lower_blend_components()
4422 intr->num_components = 4; in bifrost_nir_lower_blend_components()
4502 /* TODO: Why is 64-bit getting rematerialized? in bi_optimize_nir()
4503 * KHR-GLES31.core.shader_image_load_store.basic-allTargets-atomicFS */ in bi_optimize_nir()
4507 * optimizations, since otherwise NIR can produce weird edge cases in bi_optimize_nir()
4540 if (nir->info.stage == MESA_SHADER_FRAGMENT) { in bi_optimize_nir()
4547 /* Backend scheduler is purely local, so do some global optimizations in bi_optimize_nir()
4559 nir->info.inputs_read_indirectly || in bi_optimize_nir()
4560 nir->info.outputs_accessed_indirectly || in bi_optimize_nir()
4561 nir->info.patch_inputs_read_indirectly || in bi_optimize_nir()
4562 nir->info.patch_outputs_accessed_indirectly || in bi_optimize_nir()
4563 nir->info.images_used[0]; in bi_optimize_nir()
4575 /* The cmdstream lowers 8-bit fragment output as 16-bit, so we need to do the
4576 * same lowering here to zero-extend correctly */
4582 if (nir_src_bit_size(intr->src[0]) != 8) in bifrost_nir_lower_i8_fragout_impl()
4590 b->cursor = nir_before_instr(&intr->instr); in bifrost_nir_lower_i8_fragout_impl()
4591 nir_ssa_def *cast = nir_convert_to_bit_size(b, intr->src[0].ssa, type, 16); in bifrost_nir_lower_i8_fragout_impl()
4594 nir_instr_rewrite_src_ssa(&intr->instr, &intr->src[0], cast); in bifrost_nir_lower_i8_fragout_impl()
4602 if (nir_dest_bit_size(intr->dest) != 8) in bifrost_nir_lower_i8_fragin_impl()
4610 b->cursor = nir_before_instr(&intr->instr); in bifrost_nir_lower_i8_fragin_impl()
4612 nir_load_output(b, intr->num_components, 16, intr->src[0].ssa, in bifrost_nir_lower_i8_fragin_impl()
4619 nir_ssa_def_rewrite_uses(&intr->dest.ssa, cast); in bifrost_nir_lower_i8_fragin_impl()
4627 if (instr->type != nir_instr_type_intrinsic) in bifrost_nir_lower_i8_frag()
4631 if (intr->intrinsic == nir_intrinsic_load_output) in bifrost_nir_lower_i8_frag()
4633 else if (intr->intrinsic == nir_intrinsic_store_output) in bifrost_nir_lower_i8_frag()
4643 if (ins->op == BI_OPCODE_MOV_I32 && bi_is_equiv(ins->dest[0], ins->src[0])) in bi_opt_post_ra()
4656 if (instr->type != nir_instr_type_intrinsic) in bifrost_nir_lower_store_component()
4661 if (intr->intrinsic != nir_intrinsic_store_output) in bifrost_nir_lower_store_component()
4672 nir_ssa_def *value = intr->src[0].ssa; in bifrost_nir_lower_store_component()
4673 b->cursor = nir_before_instr(&intr->instr); in bifrost_nir_lower_store_component()
4675 nir_ssa_def *undef = nir_ssa_undef(b, 1, value->bit_size); in bifrost_nir_lower_store_component()
4681 nir_ssa_def *prev_ssa = prev->src[0].ssa; in bifrost_nir_lower_store_component()
4694 intr->num_components = util_last_bit(mask); in bifrost_nir_lower_store_component()
4695 nir_instr_rewrite_src_ssa(instr, &intr->src[0], in bifrost_nir_lower_store_component()
4696 nir_vec(b, channels, intr->num_components)); in bifrost_nir_lower_store_component()
4703 nir_instr_remove(&prev->instr); in bifrost_nir_lower_store_component()
4710 /* Dead code elimination for branches at the end of a block - only one branch
4720 bool cull_terminal = (ctx->arch <= 8); in bi_lower_branch()
4725 if (!ins->branch_target) continue; in bi_lower_branch()
4728 assert(was_jump && (ins->op == BI_OPCODE_JUMP)); in bi_lower_branch()
4734 was_jump = ins->op == BI_OPCODE_JUMP; in bi_lower_branch()
4736 if (!bi_is_terminal_block(ins->branch_target)) in bi_lower_branch()
4740 ins->branch_target = NULL; in bi_lower_branch()
4741 else if (ins->branch_target) in bi_lower_branch()
4742 ins->branch_target->needs_nop = true; in bi_lower_branch()
4753 bi_block *first_block = list_first_entry(&ctx->blocks, bi_block, link); in bi_pack_clauses()
4756 unsigned first_deps = first_clause ? first_clause->dependencies : 0; in bi_pack_clauses()
4757 ctx->info.bifrost->wait_6 = (first_deps & (1 << 6)); in bi_pack_clauses()
4758 ctx->info.bifrost->wait_7 = (first_deps & (1 << 7)); in bi_pack_clauses()
4763 unsigned prefetch_size = BIFROST_SHADER_PREFETCH - final_clause; in bi_pack_clauses()
4765 if (binary->size - offset) { in bi_pack_clauses()
4773 * information is needed by lower_mediump_io, as we don't yet support 16-bit
4782 * we could implement 16-bit flat varyings. Consider if this case matters.
4791 if (instr->type != nir_instr_type_tex) in bi_gather_texcoords()
4800 nir_src src = tex->src[coord_idx].src; in bi_gather_texcoords()
4809 nir_instr *parent = x.def->parent_instr; in bi_gather_texcoords()
4811 if (parent->type != nir_instr_type_intrinsic) in bi_gather_texcoords()
4816 if (intr->intrinsic != nir_intrinsic_load_interpolated_input) in bi_gather_texcoords()
4829 assert(nir->info.stage == MESA_SHADER_FRAGMENT); in bi_fp32_varying_mask()
4832 if (var->data.interpolation == INTERP_MODE_FLAT) in bi_fp32_varying_mask()
4833 mask |= BITFIELD64_BIT(var->data.location); in bi_fp32_varying_mask()
4844 /* Lower gl_Position pre-optimisation, but after lowering vars to ssa in bi_finalize_nir()
4850 if (nir->info.stage == MESA_SHADER_VERTEX) { in bi_finalize_nir()
4858 psiz->data.precision = GLSL_PRECISION_MEDIUM; in bi_finalize_nir()
4866 * straddle 16-byte boundaries. As such, when packed TLS is in use in bi_finalize_nir()
4891 if (nir->info.stage == MESA_SHADER_FRAGMENT) { in bi_finalize_nir()
4914 if (nir->info.stage == MESA_SHADER_FRAGMENT) { in bi_finalize_nir()
4921 if (nir->xfb_info != NULL && nir->info.has_transform_feedback_varyings) { in bi_finalize_nir()
4942 unsigned offset = binary->size; in bi_compile_variant_nir()
4944 ctx->sysval_to_id = sysval_to_id; in bi_compile_variant_nir()
4945 ctx->inputs = inputs; in bi_compile_variant_nir()
4946 ctx->nir = nir; in bi_compile_variant_nir()
4947 ctx->stage = nir->info.stage; in bi_compile_variant_nir()
4948 ctx->quirks = bifrost_get_quirks(inputs->gpu_id); in bi_compile_variant_nir()
4949 ctx->arch = inputs->gpu_id >> 12; in bi_compile_variant_nir()
4950 ctx->info = info; in bi_compile_variant_nir()
4951 ctx->idvs = idvs; in bi_compile_variant_nir()
4952 ctx->malloc_idvs = (ctx->arch >= 9) && !inputs->no_idvs; in bi_compile_variant_nir()
4960 ctx->nir = nir = nir_shader_clone(ctx, nir); in bi_compile_variant_nir()
4978 /* We can only go out-of-SSA after speciailizing IDVS, as opt_dead_cf in bi_compile_variant_nir()
4984 ctx->ubo_mask = ~0; in bi_compile_variant_nir()
4986 list_inithead(&ctx->blocks); in bi_compile_variant_nir()
4988 bool skip_internal = nir->info.internal; in bi_compile_variant_nir()
4995 ctx->allocated_vec = _mesa_hash_table_u64_create(ctx); in bi_compile_variant_nir()
4998 if (!func->impl) in bi_compile_variant_nir()
5001 ctx->ssa_alloc += func->impl->ssa_alloc; in bi_compile_variant_nir()
5002 ctx->reg_alloc += func->impl->reg_alloc; in bi_compile_variant_nir()
5004 emit_cf_list(ctx, &func->impl->body); in bi_compile_variant_nir()
5005 break; /* TODO: Multi-function shaders */ in bi_compile_variant_nir()
5010 block->index = ctx->num_blocks++; in bi_compile_variant_nir()
5013 bi_validate(ctx, "NIR -> BIR"); in bi_compile_variant_nir()
5018 (ctx->stage == MESA_SHADER_FRAGMENT) && in bi_compile_variant_nir()
5019 !ctx->emitted_atest && in bi_compile_variant_nir()
5023 bi_block *end = list_last_entry(&ctx->blocks, bi_block, link); in bi_compile_variant_nir()
5035 if (optimize && !ctx->inputs->no_ubo_to_push) { in bi_compile_variant_nir()
5050 if (ctx->arch == 7 && ctx->stage == MESA_SHADER_FRAGMENT && in bi_compile_variant_nir()
5060 if (!ctx->inputs->no_ubo_to_push) in bi_compile_variant_nir()
5069 if (ctx->arch >= 9) { in bi_compile_variant_nir()
5101 if (ctx->stage == MESA_SHADER_FRAGMENT) in bi_compile_variant_nir()
5113 if (ctx->arch <= 8) { in bi_compile_variant_nir()
5136 if (ctx->arch >= 9) { in bi_compile_variant_nir()
5156 if (ctx->arch <= 8) { in bi_compile_variant_nir()
5163 if (ctx->arch <= 8) { in bi_compile_variant_nir()
5164 disassemble_bifrost(stdout, binary->data + offset, in bi_compile_variant_nir()
5165 binary->size - offset, in bi_compile_variant_nir()
5168 disassemble_valhall(stdout, binary->data + offset, in bi_compile_variant_nir()
5169 binary->size - offset, in bi_compile_variant_nir()
5176 if ((bifrost_debug & BIFROST_DBG_SHADERDB || inputs->shaderdb) && in bi_compile_variant_nir()
5178 if (ctx->arch >= 9) { in bi_compile_variant_nir()
5179 va_print_stats(ctx, binary->size - offset, stderr); in bi_compile_variant_nir()
5181 bi_print_stats(ctx, binary->size - offset, stderr); in bi_compile_variant_nir()
5197 .push = &info->push, in bi_compile_variant()
5198 .bifrost = &info->bifrost, in bi_compile_variant()
5199 .tls_size = info->tls_size, in bi_compile_variant()
5200 .sysvals = &info->sysvals, in bi_compile_variant()
5201 .push_offset = info->push.count in bi_compile_variant()
5204 unsigned offset = binary->size; in bi_compile_variant()
5221 bi_block *first_block = list_first_entry(&ctx->blocks, bi_block, link); in bi_compile_variant()
5222 uint64_t preload = first_block->reg_live_in; in bi_compile_variant()
5235 if (nir->info.stage == MESA_SHADER_FRAGMENT && ctx->arch >= 9) in bi_compile_variant()
5238 info->ubo_mask |= ctx->ubo_mask; in bi_compile_variant()
5239 info->tls_size = MAX2(info->tls_size, ctx->info.tls_size); in bi_compile_variant()
5242 info->vs.secondary_enable = (binary->size > offset); in bi_compile_variant()
5243 info->vs.secondary_offset = offset; in bi_compile_variant()
5244 info->vs.secondary_preload = preload; in bi_compile_variant()
5245 info->vs.secondary_work_reg_count = ctx->info.work_reg_count; in bi_compile_variant()
5247 info->preload = preload; in bi_compile_variant()
5248 info->work_reg_count = ctx->info.work_reg_count; in bi_compile_variant()
5252 !nir->info.internal && in bi_compile_variant()
5253 nir->info.outputs_written & BITFIELD_BIT(VARYING_SLOT_PSIZ)) { in bi_compile_variant()
5258 if (I->op == BI_OPCODE_STORE_I16 && I->seg == BI_SEG_POS) { in bi_compile_variant()
5267 if (write->flow) { in bi_compile_variant()
5270 nop->flow = write->flow; in bi_compile_variant()
5275 info->vs.no_psiz_offset = binary->size; in bi_compile_variant()
5282 /* Decide if Index-Driven Vertex Shading should be used for a given shader */
5286 /* Opt-out */ in bi_should_idvs()
5287 if (inputs->no_idvs || bifrost_debug & BIFROST_DBG_NOIDVS) in bi_should_idvs()
5291 if (nir->info.stage != MESA_SHADER_VERTEX) in bi_should_idvs()
5295 if ((inputs->gpu_id < 0x9000) && in bi_should_idvs()
5296 nir->info.outputs_written & BITFIELD_BIT(VARYING_SLOT_PSIZ)) in bi_should_idvs()
5311 bi_finalize_nir(nir, inputs->gpu_id, inputs->is_blend); in bifrost_compile_shader_nir()
5313 panfrost_init_sysvals(&info->sysvals, in bifrost_compile_shader_nir()
5314 inputs->fixed_sysval_layout, in bifrost_compile_shader_nir()
5317 info->tls_size = nir->scratch_size; in bifrost_compile_shader_nir()
5318 info->vs.idvs = bi_should_idvs(nir, inputs); in bifrost_compile_shader_nir()
5320 if (info->vs.idvs) { in bifrost_compile_shader_nir()
5327 if (gl_shader_stage_is_compute(nir->info.stage)) { in bifrost_compile_shader_nir()
5333 info->cs.allow_merging_workgroups = in bifrost_compile_shader_nir()
5334 (nir->info.shared_size == 0) && in bifrost_compile_shader_nir()
5335 !nir->info.uses_control_barrier && in bifrost_compile_shader_nir()
5336 !nir->info.uses_memory_barrier; in bifrost_compile_shader_nir()
5339 info->ubo_mask &= (1 << nir->info.num_ubos) - 1; in bifrost_compile_shader_nir()