/* * Copyright 2022 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "include/core/SkStream.h" #include "include/private/SkSLString.h" #include "include/private/base/SkMalloc.h" #include "include/private/base/SkTo.h" #include "include/sksl/SkSLPosition.h" #include "src/base/SkArenaAlloc.h" #include "src/core/SkOpts.h" #include "src/core/SkRasterPipelineOpContexts.h" #include "src/core/SkRasterPipelineOpList.h" #include "src/sksl/codegen/SkSLRasterPipelineBuilder.h" #include "src/sksl/tracing/SkRPDebugTrace.h" #include "src/sksl/tracing/SkSLDebugInfo.h" #include "src/utils/SkBitSet.h" #if !defined(SKSL_STANDALONE) #include "src/core/SkRasterPipeline.h" #endif #include #include #include #include #include #include #include #include #include namespace SkSL { namespace RP { #define ALL_SINGLE_SLOT_UNARY_OP_CASES \ BuilderOp::atan_float: \ case BuilderOp::cos_float: \ case BuilderOp::exp_float: \ case BuilderOp::sin_float: \ case BuilderOp::sqrt_float: \ case BuilderOp::tan_float #define ALL_MULTI_SLOT_UNARY_OP_CASES \ BuilderOp::abs_float: \ case BuilderOp::abs_int: \ case BuilderOp::bitwise_not_int: \ case BuilderOp::cast_to_float_from_int: \ case BuilderOp::cast_to_float_from_uint: \ case BuilderOp::cast_to_int_from_float: \ case BuilderOp::cast_to_uint_from_float: \ case BuilderOp::ceil_float: \ case BuilderOp::floor_float \ #define ALL_N_WAY_BINARY_OP_CASES \ BuilderOp::atan2_n_floats: \ case BuilderOp::pow_n_floats #define ALL_MULTI_SLOT_BINARY_OP_CASES \ BuilderOp::add_n_floats: \ case BuilderOp::add_n_ints: \ case BuilderOp::sub_n_floats: \ case BuilderOp::sub_n_ints: \ case BuilderOp::mul_n_floats: \ case BuilderOp::mul_n_ints: \ case BuilderOp::div_n_floats: \ case BuilderOp::div_n_ints: \ case BuilderOp::div_n_uints: \ case BuilderOp::bitwise_and_n_ints: \ case BuilderOp::bitwise_or_n_ints: \ case BuilderOp::bitwise_xor_n_ints: \ case BuilderOp::min_n_floats: \ case BuilderOp::min_n_ints: \ case BuilderOp::min_n_uints: \ case BuilderOp::max_n_floats: \ case BuilderOp::max_n_ints: \ case BuilderOp::max_n_uints: \ case BuilderOp::cmple_n_floats: \ case BuilderOp::cmple_n_ints: \ case BuilderOp::cmple_n_uints: \ case BuilderOp::cmplt_n_floats: \ case BuilderOp::cmplt_n_ints: \ case BuilderOp::cmplt_n_uints: \ case BuilderOp::cmpeq_n_floats: \ case BuilderOp::cmpeq_n_ints: \ case BuilderOp::cmpne_n_floats: \ case BuilderOp::cmpne_n_ints #define ALL_MULTI_SLOT_TERNARY_OP_CASES \ BuilderOp::mix_n_floats: \ case BuilderOp::mix_n_ints void Builder::unary_op(BuilderOp op, int32_t slots) { switch (op) { case ALL_SINGLE_SLOT_UNARY_OP_CASES: case ALL_MULTI_SLOT_UNARY_OP_CASES: fInstructions.push_back({op, {}, slots}); break; default: SkDEBUGFAIL("not a unary op"); break; } } void Builder::binary_op(BuilderOp op, int32_t slots) { switch (op) { case ALL_N_WAY_BINARY_OP_CASES: case ALL_MULTI_SLOT_BINARY_OP_CASES: fInstructions.push_back({op, {}, slots}); break; default: SkDEBUGFAIL("not a binary op"); break; } } void Builder::ternary_op(BuilderOp op, int32_t slots) { switch (op) { case ALL_MULTI_SLOT_TERNARY_OP_CASES: fInstructions.push_back({op, {}, slots}); break; default: SkDEBUGFAIL("not a ternary op"); break; } } void Builder::dot_floats(int32_t slots) { switch (slots) { case 1: fInstructions.push_back({BuilderOp::mul_n_floats, {}, slots}); break; case 2: fInstructions.push_back({BuilderOp::dot_2_floats, {}, slots}); break; case 3: fInstructions.push_back({BuilderOp::dot_3_floats, {}, slots}); break; case 4: fInstructions.push_back({BuilderOp::dot_4_floats, {}, slots}); break; default: SkDEBUGFAIL("invalid number of slots"); break; } } void Builder::discard_stack(int32_t count) { // If we pushed something onto the stack and then immediately discarded part of it, we can // shrink or eliminate the push. while (count > 0 && !fInstructions.empty()) { Instruction& lastInstruction = fInstructions.back(); switch (lastInstruction.fOp) { case BuilderOp::discard_stack: // Our last op was actually a separate discard_stack; combine the discards. lastInstruction.fImmA += count; return; case BuilderOp::push_zeros: case BuilderOp::push_clone: case BuilderOp::push_clone_from_stack: case BuilderOp::push_slots: case BuilderOp::push_uniform: // Our last op was a multi-slot push; cancel out one discard and eliminate the op // if its count reached zero. --count; --lastInstruction.fImmA; if (lastInstruction.fImmA == 0) { fInstructions.pop_back(); } continue; case BuilderOp::push_literal: case BuilderOp::push_condition_mask: case BuilderOp::push_loop_mask: case BuilderOp::push_return_mask: // Our last op was a single-slot push; cancel out one discard and eliminate the op. --count; fInstructions.pop_back(); continue; default: break; } // This instruction wasn't a push. break; } if (count > 0) { fInstructions.push_back({BuilderOp::discard_stack, {}, count}); } } void Builder::label(int labelID) { SkASSERT(labelID >= 0 && labelID < fNumLabels); // If the previous instruction was a branch to this label, it's a no-op; jumping to the very // next instruction is effectively meaningless. while (!fInstructions.empty()) { Instruction& lastInstruction = fInstructions.back(); switch (lastInstruction.fOp) { case BuilderOp::jump: case BuilderOp::branch_if_any_active_lanes: case BuilderOp::branch_if_no_active_lanes: case BuilderOp::branch_if_no_active_lanes_on_stack_top_equal: if (lastInstruction.fImmA == labelID) { fInstructions.pop_back(); continue; } break; default: break; } break; } fInstructions.push_back({BuilderOp::label, {}, labelID}); } void Builder::jump(int labelID) { SkASSERT(labelID >= 0 && labelID < fNumLabels); if (!fInstructions.empty() && fInstructions.back().fOp == BuilderOp::jump) { // The previous instruction was also `jump`, so this branch could never possibly occur. return; } fInstructions.push_back({BuilderOp::jump, {}, labelID}); } void Builder::branch_if_any_active_lanes(int labelID) { if (!this->executionMaskWritesAreEnabled()) { this->jump(labelID); return; } SkASSERT(labelID >= 0 && labelID < fNumLabels); if (!fInstructions.empty() && (fInstructions.back().fOp == BuilderOp::branch_if_any_active_lanes || fInstructions.back().fOp == BuilderOp::jump)) { // The previous instruction was `jump` or `branch_if_any_active_lanes`, so this branch // could never possibly occur. return; } fInstructions.push_back({BuilderOp::branch_if_any_active_lanes, {}, labelID}); } void Builder::branch_if_no_active_lanes(int labelID) { if (!this->executionMaskWritesAreEnabled()) { return; } SkASSERT(labelID >= 0 && labelID < fNumLabels); if (!fInstructions.empty() && (fInstructions.back().fOp == BuilderOp::branch_if_no_active_lanes || fInstructions.back().fOp == BuilderOp::jump)) { // The previous instruction was `jump` or `branch_if_no_active_lanes`, so this branch // could never possibly occur. return; } fInstructions.push_back({BuilderOp::branch_if_no_active_lanes, {}, labelID}); } void Builder::branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID) { SkASSERT(labelID >= 0 && labelID < fNumLabels); if (!fInstructions.empty() && (fInstructions.back().fOp == BuilderOp::jump || (fInstructions.back().fOp == BuilderOp::branch_if_no_active_lanes_on_stack_top_equal && fInstructions.back().fImmB == value))) { // The previous instruction was `jump` or `branch_if_no_active_lanes_on_stack_top_equal` // (checking against the same value), so this branch could never possibly occur. return; } fInstructions.push_back({BuilderOp::branch_if_no_active_lanes_on_stack_top_equal, {}, labelID, value}); } void Builder::push_slots(SlotRange src) { SkASSERT(src.count >= 0); if (!fInstructions.empty()) { Instruction& lastInstruction = fInstructions.back(); // If the previous instruction was pushing slots contiguous to this range, we can collapse // the two pushes into one larger push. if (lastInstruction.fOp == BuilderOp::push_slots && lastInstruction.fSlotA + lastInstruction.fImmA == src.index) { lastInstruction.fImmA += src.count; return; } // If the previous instruction was discarding an equal number of slots... if (lastInstruction.fOp == BuilderOp::discard_stack && lastInstruction.fImmA == src.count) { // ... and the instruction before that was copying from the stack to the same slots... Instruction& prevInstruction = fInstructions.fromBack(1); if ((prevInstruction.fOp == BuilderOp::copy_stack_to_slots || prevInstruction.fOp == BuilderOp::copy_stack_to_slots_unmasked) && prevInstruction.fSlotA == src.index && prevInstruction.fImmA == src.count) { // ... we are emitting `copy stack to X, discard stack, copy X to stack`. This is a // common pattern when multiple operations in a row affect the same variable. We can // eliminate the discard and just leave X on the stack. fInstructions.pop_back(); return; } } } if (src.count > 0) { fInstructions.push_back({BuilderOp::push_slots, {src.index}, src.count}); } } void Builder::push_uniform(SlotRange src) { SkASSERT(src.count >= 0); if (!fInstructions.empty()) { Instruction& lastInstruction = fInstructions.back(); // If the previous instruction was pushing uniforms contiguous to this range, we can // collapse the two pushes into one larger push. if (lastInstruction.fOp == BuilderOp::push_uniform && lastInstruction.fSlotA + lastInstruction.fImmA == src.index) { lastInstruction.fImmA += src.count; return; } } if (src.count > 0) { fInstructions.push_back({BuilderOp::push_uniform, {src.index}, src.count}); } } void Builder::push_duplicates(int count) { if (!fInstructions.empty()) { Instruction& lastInstruction = fInstructions.back(); // If the previous op is pushing a zero, we can just push more of them. if (lastInstruction.fOp == BuilderOp::push_zeros) { lastInstruction.fImmA += count; return; } } SkASSERT(count >= 0); if (count >= 3) { // Use a swizzle to splat the input into a 4-slot value. this->swizzle(/*consumedSlots=*/1, {0, 0, 0, 0}); count -= 3; } for (; count >= 4; count -= 4) { // Clone the splatted value four slots at a time. this->push_clone(/*numSlots=*/4); } // Use a swizzle or clone to handle the trailing items. switch (count) { case 3: this->swizzle(/*consumedSlots=*/1, {0, 0, 0, 0}); break; case 2: this->swizzle(/*consumedSlots=*/1, {0, 0, 0}); break; case 1: this->push_clone(/*numSlots=*/1); break; default: break; } } void Builder::push_clone_from_stack(int numSlots, int otherStackIndex, int offsetFromStackTop) { offsetFromStackTop += numSlots; if (!fInstructions.empty()) { Instruction& lastInstruction = fInstructions.back(); // If the previous op is also pushing a clone... if (lastInstruction.fOp == BuilderOp::push_clone_from_stack && // ... from the same stack... lastInstruction.fImmB == otherStackIndex && // ... and this clone starts at the same place that the last clone ends... lastInstruction.fImmC - lastInstruction.fImmA == offsetFromStackTop) { // ... just extend the existing clone-op. lastInstruction.fImmA += numSlots; return; } } fInstructions.push_back({BuilderOp::push_clone_from_stack, {}, numSlots, otherStackIndex, offsetFromStackTop}); } void Builder::pop_slots(SlotRange dst) { if (!this->executionMaskWritesAreEnabled()) { this->pop_slots_unmasked(dst); return; } this->copy_stack_to_slots(dst); this->discard_stack(dst.count); } void Builder::simplifyPopSlotsUnmasked(SlotRange* dst) { if (!dst->count || fInstructions.empty()) { // There's nothing left to simplify. return; } Instruction& lastInstruction = fInstructions.back(); // If the last instruction is pushing a constant, we can simplify it by copying the constant // directly into the destination slot. if (lastInstruction.fOp == BuilderOp::push_literal) { // Remove the constant-push instruction. int value = lastInstruction.fImmA; fInstructions.pop_back(); // Consume one destination slot. dst->count--; Slot destinationSlot = dst->index + dst->count; // Continue simplifying if possible. this->simplifyPopSlotsUnmasked(dst); // Write the constant directly to the destination slot. this->copy_constant(destinationSlot, value); return; } // If the last instruction is pushing a zero, we can save a step by directly zeroing out // the destination slot. if (lastInstruction.fOp == BuilderOp::push_zeros) { // Remove one zero-push. lastInstruction.fImmA--; if (lastInstruction.fImmA == 0) { fInstructions.pop_back(); } // Consume one destination slot. dst->count--; Slot destinationSlot = dst->index + dst->count; // Continue simplifying if possible. this->simplifyPopSlotsUnmasked(dst); // Zero the destination slot directly. this->zero_slots_unmasked({destinationSlot, 1}); return; } // If the last instruction is pushing a slot, we can just copy that slot. if (lastInstruction.fOp == BuilderOp::push_slots) { // Get the last slot. Slot sourceSlot = lastInstruction.fSlotA + lastInstruction.fImmA - 1; lastInstruction.fImmA--; if (lastInstruction.fImmA == 0) { fInstructions.pop_back(); } // Consume one destination slot. dst->count--; Slot destinationSlot = dst->index + dst->count; // Try once more. this->simplifyPopSlotsUnmasked(dst); // Copy the slot directly. if (destinationSlot != sourceSlot) { this->copy_slots_unmasked({destinationSlot, 1}, {sourceSlot, 1}); } return; } } void Builder::pop_slots_unmasked(SlotRange dst) { SkASSERT(dst.count >= 0); // If we are popping immediately after a push, we can simplify the code by writing the pushed // value directly to the destination range. this->simplifyPopSlotsUnmasked(&dst); // Pop from the stack normally. if (dst.count > 0) { this->copy_stack_to_slots_unmasked(dst); this->discard_stack(dst.count); } } void Builder::copy_stack_to_slots(SlotRange dst, int offsetFromStackTop) { // If the execution mask is known to be all-true, then we can ignore the write mask. if (!this->executionMaskWritesAreEnabled()) { this->copy_stack_to_slots_unmasked(dst, offsetFromStackTop); return; } // If the last instruction copied the previous stack slots, just extend it. if (!fInstructions.empty()) { Instruction& lastInstruction = fInstructions.back(); // If the last op is copy-stack-to-slots... if (lastInstruction.fOp == BuilderOp::copy_stack_to_slots && // and this op's destination is immediately after the last copy-slots-op's destination lastInstruction.fSlotA + lastInstruction.fImmA == dst.index && // and this op's source is immediately after the last copy-slots-op's source lastInstruction.fImmB - lastInstruction.fImmA == offsetFromStackTop) { // then we can just extend the copy! lastInstruction.fImmA += dst.count; return; } } fInstructions.push_back({BuilderOp::copy_stack_to_slots, {dst.index}, dst.count, offsetFromStackTop}); } static bool slot_ranges_overlap(SlotRange x, SlotRange y) { return x.index < y.index + y.count && y.index < x.index + x.count; } void Builder::copy_slots_unmasked(SlotRange dst, SlotRange src) { // If the last instruction copied adjacent slots, just extend it. if (!fInstructions.empty()) { Instruction& lastInstr = fInstructions.back(); // If the last op is copy-slots-unmasked... if (lastInstr.fOp == BuilderOp::copy_slot_unmasked && // and this op's destination is immediately after the last copy-slots-op's destination lastInstr.fSlotA + lastInstr.fImmA == dst.index && // and this op's source is immediately after the last copy-slots-op's source lastInstr.fSlotB + lastInstr.fImmA == src.index && // and the source/dest ranges will not overlap !slot_ranges_overlap({lastInstr.fSlotB, lastInstr.fImmA + dst.count}, {lastInstr.fSlotA, lastInstr.fImmA + dst.count})) { // then we can just extend the copy! lastInstr.fImmA += dst.count; return; } } SkASSERT(dst.count == src.count); fInstructions.push_back({BuilderOp::copy_slot_unmasked, {dst.index, src.index}, dst.count}); } void Builder::copy_stack_to_slots_unmasked(SlotRange dst, int offsetFromStackTop) { // If the last instruction copied the previous stack slots, just extend it. if (!fInstructions.empty()) { Instruction& lastInstruction = fInstructions.back(); // If the last op is copy-stack-to-slots-unmasked... if (lastInstruction.fOp == BuilderOp::copy_stack_to_slots_unmasked && // and this op's destination is immediately after the last copy-slots-op's destination lastInstruction.fSlotA + lastInstruction.fImmA == dst.index && // and this op's source is immediately after the last copy-slots-op's source lastInstruction.fImmB - lastInstruction.fImmA == offsetFromStackTop) { // then we can just extend the copy! lastInstruction.fImmA += dst.count; return; } } fInstructions.push_back({BuilderOp::copy_stack_to_slots_unmasked, {dst.index}, dst.count, offsetFromStackTop}); } void Builder::pop_return_mask() { SkASSERT(this->executionMaskWritesAreEnabled()); // This instruction is going to overwrite the return mask. If the previous instruction was // masking off the return mask, that's wasted work and it can be eliminated. if (!fInstructions.empty()) { Instruction& lastInstruction = fInstructions.back(); if (lastInstruction.fOp == BuilderOp::mask_off_return_mask) { fInstructions.pop_back(); } } fInstructions.push_back({BuilderOp::pop_return_mask, {}}); } void Builder::zero_slots_unmasked(SlotRange dst) { if (!fInstructions.empty()) { Instruction& lastInstruction = fInstructions.back(); if (lastInstruction.fOp == BuilderOp::zero_slot_unmasked) { if (lastInstruction.fSlotA + lastInstruction.fImmA == dst.index) { // The previous instruction was zeroing the range immediately before this range. // Combine the ranges. lastInstruction.fImmA += dst.count; return; } } if (lastInstruction.fOp == BuilderOp::zero_slot_unmasked) { if (lastInstruction.fSlotA == dst.index + dst.count) { // The previous instruction was zeroing the range immediately after this range. // Combine the ranges. lastInstruction.fSlotA = dst.index; lastInstruction.fImmA += dst.count; return; } } } fInstructions.push_back({BuilderOp::zero_slot_unmasked, {dst.index}, dst.count}); } static int pack_nybbles(SkSpan components) { // Pack up to 8 elements into nybbles, in reverse order. int packed = 0; for (auto iter = components.rbegin(); iter != components.rend(); ++iter) { SkASSERT(*iter >= 0 && *iter <= 0xF); packed <<= 4; packed |= *iter; } return packed; } static void unpack_nybbles_to_offsets(uint32_t components, SkSpan offsets) { // Unpack component nybbles into byte-offsets pointing at stack slots. for (size_t index = 0; index < offsets.size(); ++index) { offsets[index] = (components & 0xF) * SkOpts::raster_pipeline_highp_stride * sizeof(float); components >>= 4; } } void Builder::swizzle_copy_stack_to_slots(SlotRange dst, SkSpan components, int offsetFromStackTop) { // An unmasked version of this op could squeeze out a little bit of extra speed, if needed. fInstructions.push_back({BuilderOp::swizzle_copy_stack_to_slots, {dst.index}, (int)components.size(), offsetFromStackTop, pack_nybbles(components)}); } void Builder::swizzle(int consumedSlots, SkSpan components) { // Consumes `consumedSlots` elements on the stack, then generates `elementSpan.size()` elements. SkASSERT(consumedSlots >= 0); // We only allow up to 16 elements, and they can only reach 0-15 slots, due to nybble packing. int numElements = components.size(); SkASSERT(numElements <= 16); SkASSERT(std::all_of(components.begin(), components.end(), [](int8_t e){ return e >= 0; })); SkASSERT(std::all_of(components.begin(), components.end(), [](int8_t e){ return e <= 0xF; })); // Make a local copy of the element array. int8_t elements[16] = {}; std::copy(components.begin(), components.end(), std::begin(elements)); while (numElements > 0) { // If the first element of the swizzle is zero... if (elements[0] != 0) { break; } // ...and zero isn't used elsewhere in the swizzle... if (std::any_of(&elements[1], &elements[numElements], [](int8_t e) { return e == 0; })) { break; } // We can omit the first slot from the swizzle entirely. // Slide everything forward by one slot, and reduce the element index by one. for (int index = 1; index < numElements; ++index) { elements[index - 1] = elements[index] - 1; } elements[numElements - 1] = 0; --consumedSlots; --numElements; } // A completely empty swizzle is a no-op. if (numElements == 0) { this->discard_stack(consumedSlots); return; } if (consumedSlots <= 4 && numElements <= 4) { // We can fit everything into a little swizzle. int op = (int)BuilderOp::swizzle_1 + numElements - 1; fInstructions.push_back({(BuilderOp)op, {}, consumedSlots, pack_nybbles(SkSpan(elements, numElements))}); return; } // This is a big swizzle. We use the `shuffle` op to handle these. // Slot usage is packed into immA. The top 16 bits of immA count the consumed slots; the bottom // 16 bits count the generated slots. int slotUsage = consumedSlots << 16; slotUsage |= numElements; // Pack immB and immC with the shuffle list in packed-nybble form. fInstructions.push_back({BuilderOp::shuffle, {}, slotUsage, pack_nybbles(SkSpan(&elements[0], 8)), pack_nybbles(SkSpan(&elements[8], 8))}); } void Builder::transpose(int columns, int rows) { // Transposes a matrix of size CxR on the stack (into a matrix of size RxC). int8_t elements[16] = {}; size_t index = 0; for (int r = 0; r < rows; ++r) { for (int c = 0; c < columns; ++c) { elements[index++] = (c * rows) + r; } } this->swizzle(/*consumedSlots=*/columns * rows, SkSpan(elements, index)); } void Builder::diagonal_matrix(int columns, int rows) { // Generates a CxR diagonal matrix from the top two scalars on the stack. int8_t elements[16] = {}; size_t index = 0; for (int c = 0; c < columns; ++c) { for (int r = 0; r < rows; ++r) { elements[index++] = (c == r) ? 1 : 0; } } this->swizzle(/*consumedSlots=*/2, SkSpan(elements, index)); } void Builder::matrix_resize(int origColumns, int origRows, int newColumns, int newRows) { // Resizes a CxR matrix at the top of the stack to C'xR'. int8_t elements[16] = {}; size_t index = 0; size_t consumedSlots = origColumns * origRows; size_t zeroOffset = 0, oneOffset = 0; for (int c = 0; c < newColumns; ++c) { for (int r = 0; r < newRows; ++r) { if (c < origColumns && r < origRows) { // Push an element from the original matrix. elements[index++] = (c * origRows) + r; } else { // This element is outside the original matrix; push 1 or 0. if (c == r) { // We need to synthesize a literal 1. if (oneOffset == 0) { this->push_literal_f(1.0f); oneOffset = consumedSlots++; } elements[index++] = oneOffset; } else { // We need to synthesize a literal 0. if (zeroOffset == 0) { this->push_zeros(1); zeroOffset = consumedSlots++; } elements[index++] = zeroOffset; } } } } this->swizzle(consumedSlots, SkSpan(elements, index)); } std::unique_ptr Builder::finish(int numValueSlots, int numUniformSlots, SkRPDebugTrace* debugTrace) { // Verify that calls to enableExecutionMaskWrites and disableExecutionMaskWrites are balanced. SkASSERT(fExecutionMaskWritesEnabled == 0); return std::make_unique(std::move(fInstructions), numValueSlots, numUniformSlots, fNumLabels, debugTrace); } void Program::optimize() { // TODO(johnstiles): perform any last-minute cleanup of the instruction stream here } static int stack_usage(const Instruction& inst) { switch (inst.fOp) { case BuilderOp::push_literal: case BuilderOp::push_condition_mask: case BuilderOp::push_loop_mask: case BuilderOp::push_return_mask: return 1; case BuilderOp::push_src_rgba: case BuilderOp::push_dst_rgba: return 4; case BuilderOp::push_slots: case BuilderOp::push_uniform: case BuilderOp::push_zeros: case BuilderOp::push_clone: case BuilderOp::push_clone_from_stack: return inst.fImmA; case BuilderOp::pop_condition_mask: case BuilderOp::pop_loop_mask: case BuilderOp::pop_and_reenable_loop_mask: case BuilderOp::pop_return_mask: return -1; case BuilderOp::pop_src_rg: return -2; case BuilderOp::pop_src_rgba: case BuilderOp::pop_dst_rgba: return -4; case ALL_N_WAY_BINARY_OP_CASES: case ALL_MULTI_SLOT_BINARY_OP_CASES: case BuilderOp::discard_stack: case BuilderOp::select: return -inst.fImmA; case ALL_MULTI_SLOT_TERNARY_OP_CASES: return 2 * -inst.fImmA; case BuilderOp::swizzle_1: return 1 - inst.fImmA; // consumes immA slots and emits a scalar case BuilderOp::swizzle_2: return 2 - inst.fImmA; // consumes immA slots and emits a 2-slot vector case BuilderOp::swizzle_3: return 3 - inst.fImmA; // consumes immA slots and emits a 3-slot vector case BuilderOp::swizzle_4: return 4 - inst.fImmA; // consumes immA slots and emits a 4-slot vector case BuilderOp::dot_2_floats: return -3; // consumes two 2-slot vectors and emits one scalar case BuilderOp::dot_3_floats: return -5; // consumes two 3-slot vectors and emits one scalar case BuilderOp::dot_4_floats: return -7; // consumes two 4-slot vectors and emits one scalar case BuilderOp::shuffle: { int consumed = inst.fImmA >> 16; int generated = inst.fImmA & 0xFFFF; return generated - consumed; } case ALL_SINGLE_SLOT_UNARY_OP_CASES: case ALL_MULTI_SLOT_UNARY_OP_CASES: default: return 0; } } Program::StackDepthMap Program::tempStackMaxDepths() const { StackDepthMap largest; StackDepthMap current; int curIdx = 0; for (const Instruction& inst : fInstructions) { if (inst.fOp == BuilderOp::set_current_stack) { curIdx = inst.fImmA; } current[curIdx] += stack_usage(inst); largest[curIdx] = std::max(current[curIdx], largest[curIdx]); SkASSERTF(current[curIdx] >= 0, "unbalanced temp stack push/pop on stack %d", curIdx); } for (const auto& [stackIdx, depth] : current) { (void)stackIdx; SkASSERTF(depth == 0, "unbalanced temp stack push/pop"); } return largest; } Program::Program(SkTArray instrs, int numValueSlots, int numUniformSlots, int numLabels, SkRPDebugTrace* debugTrace) : fInstructions(std::move(instrs)) , fNumValueSlots(numValueSlots) , fNumUniformSlots(numUniformSlots) , fNumLabels(numLabels) , fDebugTrace(debugTrace) { this->optimize(); fTempStackMaxDepths = this->tempStackMaxDepths(); fNumTempStackSlots = 0; for (const auto& [stackIdx, depth] : fTempStackMaxDepths) { (void)stackIdx; fNumTempStackSlots += depth; } } void Program::appendCopy(SkTArray* pipeline, SkArenaAlloc* alloc, ProgramOp baseStage, float* dst, int dstStride, const float* src, int srcStride, int numSlots) const { SkASSERT(numSlots >= 0); while (numSlots > 4) { this->appendCopy(pipeline, alloc, baseStage, dst, dstStride, src, srcStride,/*numSlots=*/4); dst += 4 * dstStride; src += 4 * srcStride; numSlots -= 4; } if (numSlots > 0) { SkASSERT(numSlots <= 4); auto stage = (ProgramOp)((int)baseStage + numSlots - 1); auto* ctx = alloc->make(); ctx->dst = dst; ctx->src = src; pipeline->push_back({stage, ctx}); } } void Program::appendCopySlotsUnmasked(SkTArray* pipeline, SkArenaAlloc* alloc, float* dst, const float* src, int numSlots) const { this->appendCopy(pipeline, alloc, ProgramOp::copy_slot_unmasked, dst, /*dstStride=*/SkOpts::raster_pipeline_highp_stride, src, /*srcStride=*/SkOpts::raster_pipeline_highp_stride, numSlots); } void Program::appendCopySlotsMasked(SkTArray* pipeline, SkArenaAlloc* alloc, float* dst, const float* src, int numSlots) const { this->appendCopy(pipeline, alloc, ProgramOp::copy_slot_masked, dst, /*dstStride=*/SkOpts::raster_pipeline_highp_stride, src, /*srcStride=*/SkOpts::raster_pipeline_highp_stride, numSlots); } void Program::appendCopyConstants(SkTArray* pipeline, SkArenaAlloc* alloc, float* dst, const float* src, int numSlots) const { this->appendCopy(pipeline, alloc, ProgramOp::copy_constant, dst, /*dstStride=*/SkOpts::raster_pipeline_highp_stride, src, /*srcStride=*/1, numSlots); } void Program::appendSingleSlotUnaryOp(SkTArray* pipeline, ProgramOp stage, float* dst, int numSlots) const { SkASSERT(numSlots >= 0); while (numSlots--) { pipeline->push_back({stage, dst}); dst += SkOpts::raster_pipeline_highp_stride; } } void Program::appendMultiSlotUnaryOp(SkTArray* pipeline, ProgramOp baseStage, float* dst, int numSlots) const { SkASSERT(numSlots >= 0); while (numSlots > 4) { this->appendMultiSlotUnaryOp(pipeline, baseStage, dst, /*numSlots=*/4); dst += 4 * SkOpts::raster_pipeline_highp_stride; numSlots -= 4; } SkASSERT(numSlots <= 4); auto stage = (ProgramOp)((int)baseStage + numSlots - 1); pipeline->push_back({stage, dst}); } void Program::appendAdjacentNWayBinaryOp(SkTArray* pipeline, SkArenaAlloc* alloc, ProgramOp stage, float* dst, const float* src, int numSlots) const { // The source and destination must be directly next to one another. SkASSERT(numSlots >= 0); SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots) == src); if (numSlots > 0) { auto ctx = alloc->make(); ctx->dst = dst; ctx->src = src; pipeline->push_back({stage, ctx}); return; } } void Program::appendAdjacentMultiSlotBinaryOp(SkTArray* pipeline, SkArenaAlloc* alloc, ProgramOp baseStage, float* dst, const float* src, int numSlots) const { // The source and destination must be directly next to one another. SkASSERT(numSlots >= 0); SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots) == src); if (numSlots > 4) { this->appendAdjacentNWayBinaryOp(pipeline, alloc, baseStage, dst, src, numSlots); return; } if (numSlots > 0) { auto specializedStage = (ProgramOp)((int)baseStage + numSlots); pipeline->push_back({specializedStage, dst}); } } void Program::appendAdjacentMultiSlotTernaryOp(SkTArray* pipeline, SkArenaAlloc* alloc, ProgramOp baseStage, float* dst, const float* src0, const float* src1, int numSlots) const { // The float pointers must all be immediately adjacent to each other. SkASSERT(numSlots >= 0); SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots) == src0); SkASSERT((src0 + SkOpts::raster_pipeline_highp_stride * numSlots) == src1); if (numSlots > 4) { auto ctx = alloc->make(); ctx->dst = dst; ctx->src0 = src0; ctx->src1 = src1; pipeline->push_back({baseStage, ctx}); return; } if (numSlots > 0) { auto specializedStage = (ProgramOp)((int)baseStage + numSlots); pipeline->push_back({specializedStage, dst}); } } void Program::appendStackRewind(SkTArray* pipeline) const { #if defined(SKSL_STANDALONE) || !SK_HAS_MUSTTAIL pipeline->push_back({ProgramOp::stack_rewind, nullptr}); #endif } static void* context_bit_pun(intptr_t val) { return sk_bit_cast(val); } Program::SlotData Program::allocateSlotData(SkArenaAlloc* alloc) const { // Allocate a contiguous slab of slot data for values and stack entries. const int N = SkOpts::raster_pipeline_highp_stride; const int vectorWidth = N * sizeof(float); const int allocSize = vectorWidth * (fNumValueSlots + fNumTempStackSlots); float* slotPtr = static_cast(alloc->makeBytesAlignedTo(allocSize, vectorWidth)); sk_bzero(slotPtr, allocSize); // Store the temp stack immediately after the values. SlotData s; s.values = SkSpan{slotPtr, N * fNumValueSlots}; s.stack = SkSpan{s.values.end(), N * fNumTempStackSlots}; return s; } #if !defined(SKSL_STANDALONE) bool Program::appendStages(SkRasterPipeline* pipeline, SkArenaAlloc* alloc, RP::Callbacks* callbacks, SkSpan uniforms) const { // Convert our Instruction list to an array of ProgramOps. SkTArray stages; this->makeStages(&stages, alloc, uniforms, this->allocateSlotData(alloc)); // Allocate buffers for branch targets and labels; these are needed to convert labels into // actual offsets into the pipeline and fix up branches. SkTArray branchContexts; branchContexts.reserve_back(fNumLabels); SkTArray labelOffsets; labelOffsets.push_back_n(fNumLabels, -1); SkTArray branchGoesToLabel; branchGoesToLabel.reserve_back(fNumLabels); for (const Stage& stage : stages) { switch (stage.op) { case ProgramOp::stack_rewind: pipeline->append_stack_rewind(); break; case ProgramOp::invoke_shader: if (!callbacks || !callbacks->appendShader(sk_bit_cast(stage.ctx))) { return false; } break; case ProgramOp::invoke_color_filter: if (!callbacks || !callbacks->appendColorFilter(sk_bit_cast(stage.ctx))) { return false; } break; case ProgramOp::invoke_blender: if (!callbacks || !callbacks->appendBlender(sk_bit_cast(stage.ctx))) { return false; } break; case ProgramOp::label: { // Remember the absolute pipeline position of this label. int labelID = sk_bit_cast(stage.ctx); SkASSERT(labelID >= 0 && labelID < fNumLabels); labelOffsets[labelID] = pipeline->getNumStages(); break; } case ProgramOp::jump: case ProgramOp::branch_if_any_active_lanes: case ProgramOp::branch_if_no_active_lanes: case ProgramOp::branch_if_no_active_lanes_eq: { // The branch context contain a valid label ID at this point. auto* branchCtx = static_cast(stage.ctx); int labelID = branchCtx->offset; SkASSERT(labelID >= 0 && labelID < fNumLabels); // Replace the label ID in the branch context with the absolute pipeline position. // We will go back over the branch targets at the end and fix them up. branchCtx->offset = pipeline->getNumStages(); SkASSERT(branchContexts.size() == branchGoesToLabel.size()); branchContexts.push_back(branchCtx); branchGoesToLabel.push_back(labelID); [[fallthrough]]; } default: // Append a regular op to the program. SkASSERT((int)stage.op < kNumRasterPipelineHighpOps); pipeline->append((SkRasterPipelineOp)stage.op, stage.ctx); break; } } // Now that we have assembled the program and know the pipeline positions of each label and // branch, fix up every branch target. SkASSERT(branchContexts.size() == branchGoesToLabel.size()); for (int index = 0; index < branchContexts.size(); ++index) { int branchFromIdx = branchContexts[index]->offset; int branchToIdx = labelOffsets[branchGoesToLabel[index]]; branchContexts[index]->offset = branchToIdx - branchFromIdx; } return true; } #endif void Program::makeStages(SkTArray* pipeline, SkArenaAlloc* alloc, SkSpan uniforms, const SlotData& slots) const { SkASSERT(fNumUniformSlots == SkToInt(uniforms.size())); const int N = SkOpts::raster_pipeline_highp_stride; StackDepthMap tempStackDepth; int currentStack = 0; int mostRecentRewind = 0; // Assemble a map holding the current stack-top for each temporary stack. Position each temp // stack immediately after the previous temp stack; temp stacks are never allowed to overlap. int pos = 0; SkTHashMap tempStackMap; for (auto& [idx, depth] : fTempStackMaxDepths) { tempStackMap[idx] = slots.stack.begin() + (pos * N); pos += depth; } // Track labels that we have reached in processing. SkBitSet labelsEncountered(fNumLabels); auto EmitStackRewindForBackwardsBranch = [&](int labelID) { // If we have already encountered the label associated with this branch, this is a // backwards branch. Add a stack-rewind immediately before the branch to ensure that // long-running loops don't use an unbounded amount of stack space. if (labelsEncountered.test(labelID)) { this->appendStackRewind(pipeline); mostRecentRewind = pipeline->size(); } }; // We can reuse constants from our arena by placing them in this map. SkTHashMap constantLookupMap; // // Write each BuilderOp to the pipeline array. pipeline->reserve_back(fInstructions.size()); for (const Instruction& inst : fInstructions) { auto SlotA = [&]() { return &slots.values[N * inst.fSlotA]; }; auto SlotB = [&]() { return &slots.values[N * inst.fSlotB]; }; auto UniformA = [&]() { return &uniforms[inst.fSlotA]; }; float*& tempStackPtr = tempStackMap[currentStack]; switch (inst.fOp) { case BuilderOp::label: SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels); labelsEncountered.set(inst.fImmA); pipeline->push_back({ProgramOp::label, context_bit_pun(inst.fImmA)}); break; case BuilderOp::jump: case BuilderOp::branch_if_any_active_lanes: case BuilderOp::branch_if_no_active_lanes: { SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels); EmitStackRewindForBackwardsBranch(inst.fImmA); auto* ctx = alloc->make(); ctx->offset = inst.fImmA; pipeline->push_back({(ProgramOp)inst.fOp, ctx}); break; } case BuilderOp::branch_if_no_active_lanes_on_stack_top_equal: { SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels); EmitStackRewindForBackwardsBranch(inst.fImmA); auto* ctx = alloc->make(); ctx->offset = inst.fImmA; ctx->value = inst.fImmB; ctx->ptr = reinterpret_cast(tempStackPtr - N); pipeline->push_back({ProgramOp::branch_if_no_active_lanes_eq, ctx}); break; } case BuilderOp::init_lane_masks: pipeline->push_back({ProgramOp::init_lane_masks, nullptr}); break; case BuilderOp::store_src_rg: pipeline->push_back({ProgramOp::store_src_rg, SlotA()}); break; case BuilderOp::store_src: pipeline->push_back({ProgramOp::store_src, SlotA()}); break; case BuilderOp::store_dst: pipeline->push_back({ProgramOp::store_dst, SlotA()}); break; case BuilderOp::store_device_xy01: pipeline->push_back({ProgramOp::store_device_xy01, SlotA()}); break; case BuilderOp::load_src: pipeline->push_back({ProgramOp::load_src, SlotA()}); break; case BuilderOp::load_dst: pipeline->push_back({ProgramOp::load_dst, SlotA()}); break; case ALL_SINGLE_SLOT_UNARY_OP_CASES: { float* dst = tempStackPtr - (inst.fImmA * N); this->appendSingleSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA); break; } case ALL_MULTI_SLOT_UNARY_OP_CASES: { float* dst = tempStackPtr - (inst.fImmA * N); this->appendMultiSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA); break; } case ALL_N_WAY_BINARY_OP_CASES: { float* src = tempStackPtr - (inst.fImmA * N); float* dst = tempStackPtr - (inst.fImmA * 2 * N); this->appendAdjacentNWayBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp, dst, src, inst.fImmA); break; } case ALL_MULTI_SLOT_BINARY_OP_CASES: { float* src = tempStackPtr - (inst.fImmA * N); float* dst = tempStackPtr - (inst.fImmA * 2 * N); this->appendAdjacentMultiSlotBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp, dst, src, inst.fImmA); break; } case ALL_MULTI_SLOT_TERNARY_OP_CASES: { float* src1 = tempStackPtr - (inst.fImmA * N); float* src0 = tempStackPtr - (inst.fImmA * 2 * N); float* dst = tempStackPtr - (inst.fImmA * 3 * N); this->appendAdjacentMultiSlotTernaryOp(pipeline, alloc, (ProgramOp)inst.fOp, dst, src0, src1, inst.fImmA); break; } case BuilderOp::select: { float* src = tempStackPtr - (inst.fImmA * N); float* dst = tempStackPtr - (inst.fImmA * 2 * N); this->appendCopySlotsMasked(pipeline, alloc, dst, src, inst.fImmA); break; } case BuilderOp::copy_slot_masked: this->appendCopySlotsMasked(pipeline, alloc, SlotA(), SlotB(), inst.fImmA); break; case BuilderOp::copy_slot_unmasked: this->appendCopySlotsUnmasked(pipeline, alloc, SlotA(), SlotB(), inst.fImmA); break; case BuilderOp::zero_slot_unmasked: this->appendMultiSlotUnaryOp(pipeline, ProgramOp::zero_slot_unmasked, SlotA(), inst.fImmA); break; case BuilderOp::dot_2_floats: case BuilderOp::dot_3_floats: case BuilderOp::dot_4_floats: { float* dst = tempStackPtr - (inst.fImmA * 2 * N); pipeline->push_back({(ProgramOp)inst.fOp, dst}); break; } case BuilderOp::swizzle_1: case BuilderOp::swizzle_2: case BuilderOp::swizzle_3: case BuilderOp::swizzle_4: { auto* ctx = alloc->make(); ctx->ptr = tempStackPtr - (N * inst.fImmA); // Unpack component nybbles into byte-offsets pointing at stack slots. unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx->offsets)); pipeline->push_back({(ProgramOp)inst.fOp, ctx}); break; } case BuilderOp::shuffle: { int consumed = inst.fImmA >> 16; int generated = inst.fImmA & 0xFFFF; auto* ctx = alloc->make(); ctx->ptr = tempStackPtr - (N * consumed); ctx->count = generated; // Unpack immB and immC from nybble form into the offset array. unpack_nybbles_to_offsets(inst.fImmB, SkSpan(&ctx->offsets[0], 8)); unpack_nybbles_to_offsets(inst.fImmC, SkSpan(&ctx->offsets[8], 8)); pipeline->push_back({ProgramOp::shuffle, ctx}); break; } case BuilderOp::push_src_rgba: { float* dst = tempStackPtr; pipeline->push_back({ProgramOp::store_src, dst}); break; } case BuilderOp::push_dst_rgba: { float* dst = tempStackPtr; pipeline->push_back({ProgramOp::store_dst, dst}); break; } case BuilderOp::pop_src_rg: { float* dst = tempStackPtr - (2 * N); pipeline->push_back({ProgramOp::load_src_rg, dst}); break; } case BuilderOp::pop_src_rgba: { float* dst = tempStackPtr - (4 * N); pipeline->push_back({ProgramOp::load_src, dst}); break; } case BuilderOp::pop_dst_rgba: { float* dst = tempStackPtr - (4 * N); pipeline->push_back({ProgramOp::load_dst, dst}); break; } case BuilderOp::push_slots: { float* dst = tempStackPtr; this->appendCopySlotsUnmasked(pipeline, alloc, dst, SlotA(), inst.fImmA); break; } case BuilderOp::push_uniform: { float* dst = tempStackPtr; this->appendCopyConstants(pipeline, alloc, dst, UniformA(), inst.fImmA); break; } case BuilderOp::push_zeros: { float* dst = tempStackPtr; this->appendMultiSlotUnaryOp(pipeline, ProgramOp::zero_slot_unmasked, dst, inst.fImmA); break; } case BuilderOp::push_condition_mask: { float* dst = tempStackPtr; pipeline->push_back({ProgramOp::store_condition_mask, dst}); break; } case BuilderOp::pop_condition_mask: { float* src = tempStackPtr - (1 * N); pipeline->push_back({ProgramOp::load_condition_mask, src}); break; } case BuilderOp::merge_condition_mask: { float* ptr = tempStackPtr - (2 * N); pipeline->push_back({ProgramOp::merge_condition_mask, ptr}); break; } case BuilderOp::push_loop_mask: { float* dst = tempStackPtr; pipeline->push_back({ProgramOp::store_loop_mask, dst}); break; } case BuilderOp::pop_loop_mask: { float* src = tempStackPtr - (1 * N); pipeline->push_back({ProgramOp::load_loop_mask, src}); break; } case BuilderOp::pop_and_reenable_loop_mask: { float* src = tempStackPtr - (1 * N); pipeline->push_back({ProgramOp::reenable_loop_mask, src}); break; } case BuilderOp::reenable_loop_mask: pipeline->push_back({ProgramOp::reenable_loop_mask, SlotA()}); break; case BuilderOp::mask_off_loop_mask: pipeline->push_back({ProgramOp::mask_off_loop_mask, nullptr}); break; case BuilderOp::merge_loop_mask: { float* src = tempStackPtr - (1 * N); pipeline->push_back({ProgramOp::merge_loop_mask, src}); break; } case BuilderOp::push_return_mask: { float* dst = tempStackPtr; pipeline->push_back({ProgramOp::store_return_mask, dst}); break; } case BuilderOp::pop_return_mask: { float* src = tempStackPtr - (1 * N); pipeline->push_back({ProgramOp::load_return_mask, src}); break; } case BuilderOp::mask_off_return_mask: pipeline->push_back({ProgramOp::mask_off_return_mask, nullptr}); break; case BuilderOp::copy_constant: case BuilderOp::push_literal: { float* dst = (inst.fOp == BuilderOp::push_literal) ? tempStackPtr : SlotA(); int* constantPtr; if (int** lookup = constantLookupMap.find(inst.fImmA)) { constantPtr = *lookup; } else { constantPtr = alloc->make(inst.fImmA); constantLookupMap[inst.fImmA] = constantPtr; } SkASSERT(constantPtr); this->appendCopyConstants(pipeline, alloc, dst, (float*)constantPtr,/*numSlots=*/1); break; } case BuilderOp::copy_stack_to_slots: { float* src = tempStackPtr - (inst.fImmB * N); this->appendCopySlotsMasked(pipeline, alloc, SlotA(), src, inst.fImmA); break; } case BuilderOp::copy_stack_to_slots_unmasked: { float* src = tempStackPtr - (inst.fImmB * N); this->appendCopySlotsUnmasked(pipeline, alloc, SlotA(), src, inst.fImmA); break; } case BuilderOp::swizzle_copy_stack_to_slots: { auto stage = (ProgramOp)((int)ProgramOp::swizzle_copy_slot_masked + inst.fImmA - 1); auto* ctx = alloc->make(); ctx->src = tempStackPtr - (inst.fImmB * N); ctx->dst = SlotA(); unpack_nybbles_to_offsets(inst.fImmC, SkSpan(ctx->offsets)); pipeline->push_back({stage, ctx}); break; } case BuilderOp::push_clone: { float* src = tempStackPtr - (inst.fImmB * N); float* dst = tempStackPtr; this->appendCopySlotsUnmasked(pipeline, alloc, dst, src, inst.fImmA); break; } case BuilderOp::push_clone_from_stack: { float* sourceStackPtr = tempStackMap[inst.fImmB]; float* src = sourceStackPtr - (inst.fImmC * N); float* dst = tempStackPtr; this->appendCopySlotsUnmasked(pipeline, alloc, dst, src, inst.fImmA); break; } case BuilderOp::case_op: { auto* ctx = alloc->make(); ctx->ptr = reinterpret_cast(tempStackPtr - 2 * N); ctx->expectedValue = inst.fImmA; pipeline->push_back({ProgramOp::case_op, ctx}); break; } case BuilderOp::discard_stack: break; case BuilderOp::set_current_stack: currentStack = inst.fImmA; break; case BuilderOp::invoke_shader: case BuilderOp::invoke_color_filter: case BuilderOp::invoke_blender: pipeline->push_back({(ProgramOp)inst.fOp, context_bit_pun(inst.fImmA)}); break; default: SkDEBUGFAILF("Raster Pipeline: unsupported instruction %d", (int)inst.fOp); break; } tempStackPtr += stack_usage(inst) * N; SkASSERT(tempStackPtr >= slots.stack.begin()); SkASSERT(tempStackPtr <= slots.stack.end()); // Periodically rewind the stack every 500 instructions. When SK_HAS_MUSTTAIL is set, // rewinds are not actually used; the appendStackRewind call becomes a no-op. On platforms // that don't support SK_HAS_MUSTTAIL, rewinding the stack periodically can prevent a // potential stack overflow when running a long program. int numPipelineStages = pipeline->size(); if (numPipelineStages - mostRecentRewind > 500) { this->appendStackRewind(pipeline); mostRecentRewind = numPipelineStages; } } } // Finds duplicate names in the program and disambiguates them with subscripts. SkTArray build_unique_slot_name_list(const SkRPDebugTrace* debugTrace) { SkTArray slotName; if (debugTrace) { slotName.reserve_back(debugTrace->fSlotInfo.size()); // The map consists of >. SkTHashMap> uniqueNameMap; for (const SlotDebugInfo& slotInfo : debugTrace->fSlotInfo) { // Look up this variable by its name and source position. int pos = slotInfo.pos.valid() ? slotInfo.pos.startOffset() : 0; SkTHashMap& positionMap = uniqueNameMap[slotInfo.name]; std::string& uniqueName = positionMap[pos]; // Have we seen this variable name/position combination before? if (uniqueName.empty()) { // This is a unique name/position pair. uniqueName = slotInfo.name; // But if it's not a unique _name_, it deserves a subscript to disambiguate it. int subscript = positionMap.count() - 1; if (subscript > 0) { for (char digit : std::to_string(subscript)) { // U+2080 through U+2089 (₀₁₂₃₄₅₆₇₈₉) in UTF8: uniqueName.push_back((char)0xE2); uniqueName.push_back((char)0x82); uniqueName.push_back((char)(0x80 + digit - '0')); } } } slotName.push_back(uniqueName); } } return slotName; } void Program::dump(SkWStream* out) const { // Allocate memory for the slot and uniform data, even though the program won't ever be // executed. The program requires pointer ranges for managing its data, and ASAN will report // errors if those pointers are pointing at unallocated memory. SkArenaAlloc alloc(/*firstHeapAllocation=*/1000); const int N = SkOpts::raster_pipeline_highp_stride; SlotData slots = this->allocateSlotData(&alloc); float* uniformPtr = alloc.makeArray(fNumUniformSlots); SkSpan uniforms = SkSpan(uniformPtr, fNumUniformSlots); // Turn this program into an array of Raster Pipeline stages. SkTArray stages; this->makeStages(&stages, &alloc, uniforms, slots); // Find the labels in the program, and keep track of their offsets. SkTHashMap labelToStageMap; //