Lines Matching +full:sync +full:- +full:branches
29 * bits on the wire (as well as fixup branches) */
37 unsigned dependency_wait = next_1 ? next_1->dependencies : 0; in bi_pack_header()
38 dependency_wait |= next_2 ? next_2->dependencies : 0; in bi_pack_header()
43 if (clause->message_type == BIFROST_MESSAGE_BARRIER) in bi_pack_header()
46 bool staging_barrier = next_1 ? next_1->staging_barrier : false; in bi_pack_header()
47 staging_barrier |= next_2 ? next_2->staging_barrier : 0; in bi_pack_header()
52 BIFROST_FLOW_END : clause->flow_control, in bi_pack_header()
53 .terminate_discarded_threads = clause->td, in bi_pack_header()
54 .next_clause_prefetch = clause->next_clause_prefetch && next_1, in bi_pack_header()
56 .staging_register = clause->staging_register, in bi_pack_header()
58 .dependency_slot = clause->scoreboard_id, in bi_pack_header()
59 .message_type = clause->message_type, in bi_pack_header()
60 .next_message_type = next_1 ? next_1->message_type : 0, in bi_pack_header()
61 .flush_to_zero = clause->ftz ? BIFROST_FTZ_ALWAYS : BIFROST_FTZ_DISABLE in bi_pack_header()
80 if (regs->slot[i] == src.value && regs->enabled[i]) in bi_assign_slot_read()
84 if (regs->slot[2] == src.value && regs->slot23.slot2 == BIFROST_OP_READ) in bi_assign_slot_read()
90 if (!regs->enabled[i]) { in bi_assign_slot_read()
91 regs->slot[i] = src.value; in bi_assign_slot_read()
92 regs->enabled[i] = true; in bi_assign_slot_read()
97 if (!regs->slot23.slot3) { in bi_assign_slot_read()
98 regs->slot[2] = src.value; in bi_assign_slot_read()
99 regs->slot23.slot2 = BIFROST_OP_READ; in bi_assign_slot_read()
114 bool read_dreg = now->add && bi_opcode_props[now->add->op].sr_read; in bi_assign_slots()
115 bool write_dreg = prev->add && bi_opcode_props[prev->add->op].sr_write; in bi_assign_slots()
119 if (now->fma) in bi_assign_slots()
120 bi_foreach_src(now->fma, src) in bi_assign_slots()
121 bi_assign_slot_read(&now->regs, (now->fma)->src[src]); in bi_assign_slots()
123 if (now->add) { in bi_assign_slots()
124 bi_foreach_src(now->add, src) { in bi_assign_slots()
128 if (now->add->op == BI_OPCODE_BLEND && src == 4) in bi_assign_slots()
132 bi_assign_slot_read(&now->regs, (now->add)->src[src]); in bi_assign_slots()
140 if (prev->add && (!write_dreg || prev->add->op == BI_OPCODE_ATEST)) { in bi_assign_slots()
141 bi_index idx = prev->add->dest[0]; in bi_assign_slots()
144 now->regs.slot[3] = idx.value; in bi_assign_slots()
145 now->regs.slot23.slot3 = BIFROST_OP_WRITE; in bi_assign_slots()
149 if (prev->fma) { in bi_assign_slots()
150 bi_index idx = (prev->fma)->dest[0]; in bi_assign_slots()
153 if (now->regs.slot23.slot3) { in bi_assign_slots()
155 assert(!now->regs.slot23.slot2); in bi_assign_slots()
156 now->regs.slot[2] = idx.value; in bi_assign_slots()
157 now->regs.slot23.slot2 = BIFROST_OP_WRITE; in bi_assign_slots()
159 now->regs.slot[3] = idx.value; in bi_assign_slots()
160 now->regs.slot23.slot3 = BIFROST_OP_WRITE; in bi_assign_slots()
161 now->regs.slot23.slot3_fma = true; in bi_assign_slots()
166 return now->regs; in bi_assign_slots()
193 /* Need to pack 5-bit mode as a 4-bit field. The decoder moves bit 3 to bit 4 for in bi_pack_registers()
221 /* Gotta save that bit!~ Required by the 63-x trick */ in bi_pack_registers()
225 /* Do the 63-x trick, see docs/disasm */ in bi_pack_registers()
227 regs.slot[0] = 63 - regs.slot[0]; in bi_pack_registers()
228 regs.slot[1] = 63 - regs.slot[1]; in bi_pack_registers()
275 /* We must ensure slot 1 > slot 0 for the 63-x trick to function, so we fix
281 if (regs->enabled[0] && regs->enabled[1] && regs->slot[1] < regs->slot[0]) { in bi_flip_slots()
282 unsigned temp = regs->slot[0]; in bi_flip_slots()
283 regs->slot[0] = regs->slot[1]; in bi_flip_slots()
284 regs->slot[1] = temp; in bi_flip_slots()
292 if (regs->slot[0] == reg && regs->enabled[0]) in bi_get_src_slot()
294 else if (regs->slot[1] == reg && regs->enabled[1]) in bi_get_src_slot()
296 else if (regs->slot[2] == reg && regs->slot23.slot2 == BIFROST_OP_READ) in bi_get_src_slot()
308 bi_index src = ins->src[s]; in bi_get_src_new()
324 tuple->regs.fau_idx = tuple->fau_idx; in bi_pack_tuple()
325 tuple->regs.first_instruction = first_tuple; in bi_pack_tuple()
327 bi_flip_slots(&tuple->regs); in bi_pack_tuple()
329 bool sr_read = tuple->add && in bi_pack_tuple()
330 bi_opcode_props[(tuple->add)->op].sr_read; in bi_pack_tuple()
332 uint64_t reg = bi_pack_registers(tuple->regs); in bi_pack_tuple()
333 uint64_t fma = bi_pack_fma(tuple->fma, in bi_pack_tuple()
334 bi_get_src_new(tuple->fma, &tuple->regs, 0), in bi_pack_tuple()
335 bi_get_src_new(tuple->fma, &tuple->regs, 1), in bi_pack_tuple()
336 bi_get_src_new(tuple->fma, &tuple->regs, 2), in bi_pack_tuple()
337 bi_get_src_new(tuple->fma, &tuple->regs, 3)); in bi_pack_tuple()
339 uint64_t add = bi_pack_add(tuple->add, in bi_pack_tuple()
340 bi_get_src_new(tuple->add, &tuple->regs, sr_read + 0), in bi_pack_tuple()
341 bi_get_src_new(tuple->add, &tuple->regs, sr_read + 1), in bi_pack_tuple()
342 bi_get_src_new(tuple->add, &tuple->regs, sr_read + 2), in bi_pack_tuple()
345 if (tuple->add) { in bi_pack_tuple()
346 bi_instr *add = tuple->add; in bi_pack_tuple()
348 bool sr_write = bi_opcode_props[add->op].sr_write && in bi_pack_tuple()
349 !bi_is_null(add->dest[0]); in bi_pack_tuple()
351 if (sr_read && !bi_is_null(add->src[0])) { in bi_pack_tuple()
352 assert(add->src[0].type == BI_INDEX_REGISTER); in bi_pack_tuple()
353 clause->staging_register = add->src[0].value; in bi_pack_tuple()
356 assert(bi_is_equiv(add->src[0], add->dest[0])); in bi_pack_tuple()
358 assert(add->dest[0].type == BI_INDEX_REGISTER); in bi_pack_tuple()
359 clause->staging_register = add->dest[0].value; in bi_pack_tuple()
371 /* A block contains at most one PC-relative constant, from a terminal branch.
373 * PC-relative constant to contain the absolute offset. This occurs at pack
381 if (list_is_empty(&block->clauses)) in bi_assign_branch_offset()
384 bi_clause *clause = list_last_entry(&block->clauses, bi_clause, link); in bi_assign_branch_offset()
387 if (!br->branch_target) in bi_assign_branch_offset()
391 int32_t qwords = bi_block_offset(ctx, clause, br->branch_target); in bi_assign_branch_offset()
401 /* Put in top 32-bits */ in bi_assign_branch_offset()
402 assert(clause->pcrel_idx < 8); in bi_assign_branch_offset()
403 clause->constants[clause->pcrel_idx] |= ((uint64_t) raw) << 32ull; in bi_assign_branch_offset()
430 assert((tuple_count - 1) < 8); in bi_pack_constants()
432 unsigned pos = pos_lookup[tuple_count - 1][word_idx]; in bi_pack_constants()
451 return (literal - BI_CLAUSE_SUBWORD_LITERAL_0); in bi_pack_literal()
461 /* top 3-bits of 78-bits is tuple >> 75 == (tuple >> 64) >> 11 */ in bi_clause_upper()
474 return bi_clause_upper(upper - BI_CLAUSE_SUBWORD_UPPER_0, tuples, in bi_pack_upper()
487 unsigned val = (idx - BI_CLAUSE_SUBWORD_TUPLE_0); in bi_pack_tuple_bits()
498 * = { ((hi << (64 - start)) | (lo >> start)) & m if start <= 64 in bi_pack_tuple_bits()
499 * { ((hi >> (start - 64)) | (lo >> start)) & m if start >= 64 in bi_pack_tuple_bits()
500 * = { ((hi << (64 - start)) & m) | ((lo >> start) & m) if start <= 64 in bi_pack_tuple_bits()
501 * { ((hi >> (start - 64)) & m) | ((lo >> start) & m) if start >= 64 in bi_pack_tuple_bits()
503 * By setting m = 2^64 - 1, we justify doing the respective shifts as in bi_pack_tuple_bits()
504 * 64-bit integers. Zero special cased to avoid undefined behaviour. in bi_pack_tuple_bits()
509 : (offset > 64) ? (tuple.hi >> (offset - 64)) in bi_pack_tuple_bits()
510 : (tuple.hi << (64 - offset)); in bi_pack_tuple_bits()
512 return (lo | hi) & ((1ULL << nbits) - 1); in bi_pack_tuple_bits()
533 uint8_t sync = in bi_pack_sync() local
538 sync |= z << 6; in bi_pack_sync()
540 sync |= bi_pack_literal(t1) << 6; in bi_pack_sync()
542 return sync; in bi_pack_sync()
566 return (header & ((1 << 30) - 1)); in bi_pack_subwords_56()
570 return (ec0 >> 15) & ((1 << 30) - 1); in bi_pack_subwords_56()
590 (ec0 & ((1 << 15) - 1)) : in bi_pack_subword()
605 /* EC0 is 60-bits (bottom 4 already shifted off) */
616 uint8_t sync = bi_pack_sync(format.tag_1, format.tag_2, format.tag_3, in bi_pack_format() local
629 /* Now that subwords are packed, split into 64-bit halves and emit */ in bi_pack_format()
630 uint64_t lo = sync | ((s0_s3 & ((1ull << 56) - 1)) << 8); in bi_pack_format()
644 for (unsigned i = 0; i < clause->tuple_count; ++i) { in bi_pack_clause()
645 unsigned prev = ((i == 0) ? clause->tuple_count : i) - 1; in bi_pack_clause()
646 ins[i] = bi_pack_tuple(clause, &clause->tuples[i], in bi_pack_clause()
647 &clause->tuples[prev], i == 0, stage); in bi_pack_clause()
649 bi_instr *add = clause->tuples[i].add; in bi_pack_clause()
654 if (add && add->op == BI_OPCODE_CLPER_OLD_I32) in bi_pack_clause()
655 assert(ctx->quirks & BIFROST_LIMITED_CLPER); in bi_pack_clause()
656 else if (add && add->op == BI_OPCODE_CLPER_I32) in bi_pack_clause()
657 assert(!(ctx->quirks & BIFROST_LIMITED_CLPER)); in bi_pack_clause()
660 bool ec0_packed = bi_ec0_packed(clause->tuple_count); in bi_pack_clause()
663 clause->constant_count = MAX2(clause->constant_count, 1); in bi_pack_clause()
666 DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2); in bi_pack_clause()
669 uint64_t ec0 = (clause->constants[0] >> 4); in bi_pack_clause()
670 unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0; in bi_pack_clause()
687 unsigned count = counts[clause->tuple_count - 1]; in bi_pack_clause()
690 ASSERTED unsigned idx = indices[clause->tuple_count - 1][pos]; in bi_pack_clause()
693 (pos == count - 1)); in bi_pack_clause()
698 bi_pack_format(emission, indices[clause->tuple_count - 1][pos], in bi_pack_clause()
699 ins, clause->tuple_count, header, ec0, m0, in bi_pack_clause()
706 bi_pack_constants(clause->tuple_count, clause->constants, in bi_pack_clause()
716 if (ctx->inputs->is_blend) in bi_collect_blend_ret_addr()
719 const bi_tuple *tuple = &clause->tuples[clause->tuple_count - 1]; in bi_collect_blend_ret_addr()
720 const bi_instr *ins = tuple->add; in bi_collect_blend_ret_addr()
722 if (!ins || ins->op != BI_OPCODE_BLEND) in bi_collect_blend_ret_addr()
726 unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0; in bi_collect_blend_ret_addr()
727 assert(loc < ARRAY_SIZE(ctx->info.bifrost->blend)); in bi_collect_blend_ret_addr()
728 assert(!ctx->info.bifrost->blend[loc].return_offset); in bi_collect_blend_ret_addr()
729 ctx->info.bifrost->blend[loc].return_offset = in bi_collect_blend_ret_addr()
731 assert(!(ctx->info.bifrost->blend[loc].return_offset & 0x7)); in bi_collect_blend_ret_addr()
737 unsigned previous_size = emission->size; in bi_pack()
743 bool is_last = (clause->link.next == &block->clauses); in bi_pack()
752 next = bi_next_clause(ctx, block->successors[0], NULL); in bi_pack()
753 next_2 = bi_next_clause(ctx, block->successors[1], NULL); in bi_pack()
759 previous_size = emission->size; in bi_pack()
761 bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage); in bi_pack()
768 return emission->size - previous_size; in bi_pack()