1 /* 2 * Copyright 2022 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SKSL_RASTERPIPELINEBUILDER 9 #define SKSL_RASTERPIPELINEBUILDER 10 11 #include "include/core/SkTypes.h" 12 13 #include "include/core/SkSpan.h" 14 #include "include/core/SkTypes.h" 15 #include "include/private/base/SkTArray.h" 16 #include "src/base/SkUtils.h" 17 #include "src/core/SkRasterPipelineOpList.h" 18 19 #include <cstddef> 20 #include <cstdint> 21 #include <memory> 22 23 class SkArenaAlloc; 24 class SkRasterPipeline; 25 class SkWStream; 26 using SkRPOffset = uint32_t; 27 28 namespace SkSL { 29 30 class DebugTracePriv; 31 class TraceHook; 32 33 namespace RP { 34 35 // A single scalar in our program consumes one slot. 36 using Slot = int; 37 constexpr Slot NA = -1; 38 39 // Scalars, vectors, and matrices can be represented as a range of slot indices. 40 struct SlotRange { 41 Slot index = 0; 42 int count = 0; 43 }; 44 45 #define SKRP_EXTENDED_OPS(M) \ 46 /* branch targets */ \ 47 M(label) \ 48 \ 49 /* child programs */ \ 50 M(invoke_shader) \ 51 M(invoke_color_filter) \ 52 M(invoke_blender) \ 53 \ 54 /* color space transforms */ \ 55 M(invoke_to_linear_srgb) \ 56 M(invoke_from_linear_srgb) 57 58 // An RP::Program will consist entirely of ProgramOps. The ProgramOps list is a superset of the 59 // native SkRasterPipelineOps op-list. It also has a few extra ops to indicate child-effect 60 // invocation, and a `label` op to indicate branch targets. 61 enum class ProgramOp { 62 #define M(stage) stage, 63 // A finished program can contain any native Raster Pipeline op... 64 SK_RASTER_PIPELINE_OPS_ALL(M) 65 66 // ... as well as our extended ops. 67 SKRP_EXTENDED_OPS(M) 68 #undef M 69 }; 70 71 // BuilderOps are a superset of ProgramOps. They are used by the RP::Builder, which works in terms 72 // of Instructions; Instructions are slightly more expressive than raw SkRasterPipelineOps. In 73 // particular, the Builder supports stacks for pushing and popping scratch values. 74 // RP::Program::makeStages is responsible for rewriting Instructions/BuilderOps into an array of 75 // RP::Program::Stages, which will contain only native SkRasterPipelineOps and (optionally) 76 // child-effect invocations. 77 enum class BuilderOp { 78 #define M(stage) stage, 79 // An in-flight program can contain all the native Raster Pipeline ops... 80 SK_RASTER_PIPELINE_OPS_ALL(M) 81 82 // ... and our extended ops... 83 SKRP_EXTENDED_OPS(M) 84 #undef M 85 86 // ... and also has Builder-specific ops. These ops generally interface with the stack, and are 87 // converted into ProgramOps during `makeStages`. 88 push_clone, 89 push_clone_from_stack, 90 push_clone_indirect_from_stack, 91 push_constant, 92 push_immutable, 93 push_immutable_indirect, 94 push_slots, 95 push_slots_indirect, 96 push_uniform, 97 push_uniform_indirect, 98 copy_stack_to_slots, 99 copy_stack_to_slots_unmasked, 100 copy_stack_to_slots_indirect, 101 copy_uniform_to_slots_unmasked, 102 store_immutable_value, 103 swizzle_copy_stack_to_slots, 104 swizzle_copy_stack_to_slots_indirect, 105 discard_stack, 106 pad_stack, 107 select, 108 push_condition_mask, 109 pop_condition_mask, 110 push_loop_mask, 111 pop_loop_mask, 112 pop_and_reenable_loop_mask, 113 push_return_mask, 114 pop_return_mask, 115 push_src_rgba, 116 push_dst_rgba, 117 push_device_xy01, 118 pop_src_rgba, 119 pop_dst_rgba, 120 trace_var_indirect, 121 branch_if_no_active_lanes_on_stack_top_equal, 122 unsupported 123 }; 124 125 // If the extended ops are not in sync between enums, program creation will not work. 126 static_assert((int)ProgramOp::label == (int)BuilderOp::label); 127 128 // Represents a single raster-pipeline SkSL instruction. 129 struct Instruction { 130 BuilderOp fOp; 131 Slot fSlotA = NA; 132 Slot fSlotB = NA; 133 int fImmA = 0; 134 int fImmB = 0; 135 int fImmC = 0; 136 int fImmD = 0; 137 int fStackID = 0; 138 }; 139 140 class Callbacks { 141 public: 142 virtual ~Callbacks() = default; 143 144 virtual bool appendShader(int index) = 0; 145 virtual bool appendColorFilter(int index) = 0; 146 virtual bool appendBlender(int index) = 0; 147 148 virtual void toLinearSrgb(const void* color) = 0; 149 virtual void fromLinearSrgb(const void* color) = 0; 150 }; 151 152 class Program { 153 public: 154 Program(skia_private::TArray<Instruction> instrs, 155 int numValueSlots, 156 int numUniformSlots, 157 int numImmutableSlots, 158 int numLabels, 159 DebugTracePriv* debugTrace); 160 ~Program(); 161 162 bool appendStages(SkRasterPipeline* pipeline, 163 SkArenaAlloc* alloc, 164 Callbacks* callbacks, 165 SkSpan<const float> uniforms) const; 166 167 void dump(SkWStream* out, bool writeInstructionCount = false) const; 168 numUniforms()169 int numUniforms() const { return fNumUniformSlots; } 170 171 private: 172 using StackDepths = skia_private::TArray<int>; // [stack index] = depth of stack 173 174 struct SlotData { 175 SkSpan<float> values; 176 SkSpan<float> stack; 177 SkSpan<float> immutable; 178 }; 179 SlotData allocateSlotData(SkArenaAlloc* alloc) const; 180 181 struct Stage { 182 ProgramOp op; 183 void* ctx; 184 }; 185 void makeStages(skia_private::TArray<Stage>* pipeline, 186 SkArenaAlloc* alloc, 187 SkSpan<const float> uniforms, 188 const SlotData& slots) const; 189 void optimize(); 190 StackDepths tempStackMaxDepths() const; 191 192 // These methods are used to split up multi-slot copies into multiple ops as needed. 193 void appendCopy(skia_private::TArray<Stage>* pipeline, 194 SkArenaAlloc* alloc, 195 std::byte* basePtr, 196 ProgramOp baseStage, 197 SkRPOffset dst, int dstStride, 198 SkRPOffset src, int srcStride, 199 int numSlots) const; 200 void appendCopyImmutableUnmasked(skia_private::TArray<Stage>* pipeline, 201 SkArenaAlloc* alloc, 202 std::byte* basePtr, 203 SkRPOffset dst, 204 SkRPOffset src, 205 int numSlots) const; 206 void appendCopySlotsUnmasked(skia_private::TArray<Stage>* pipeline, 207 SkArenaAlloc* alloc, 208 SkRPOffset dst, 209 SkRPOffset src, 210 int numSlots) const; 211 void appendCopySlotsMasked(skia_private::TArray<Stage>* pipeline, 212 SkArenaAlloc* alloc, 213 SkRPOffset dst, 214 SkRPOffset src, 215 int numSlots) const; 216 217 // Appends a single-slot single-input math operation to the pipeline. The op `stage` will 218 // appended `numSlots` times, starting at position `dst` and advancing one slot for each 219 // subsequent invocation. 220 void appendSingleSlotUnaryOp(skia_private::TArray<Stage>* pipeline, ProgramOp stage, 221 float* dst, int numSlots) const; 222 223 // Appends a multi-slot single-input math operation to the pipeline. `baseStage` must refer to 224 // a single-slot "apply_op" stage, which must be immediately followed by specializations for 225 // 2-4 slots. For instance, {`ceil_float`, `ceil_2_floats`, `ceil_3_floats`, `ceil_4_floats`} 226 // must be contiguous ops in the stage list, listed in that order; pass `ceil_float` and we 227 // pick the appropriate op based on `numSlots`. 228 void appendMultiSlotUnaryOp(skia_private::TArray<Stage>* pipeline, ProgramOp baseStage, 229 float* dst, int numSlots) const; 230 231 // Appends an immediate-mode binary operation to the pipeline. `baseStage` must refer to 232 // a single-slot, immediate-mode "apply-imm" stage, which must be immediately preceded by 233 // specializations for 2-4 slots if numSlots is greater than 1. For instance, {`add_imm_4_ints`, 234 // `add_imm_3_ints`, `add_imm_2_ints`, `add_imm_int`} must be contiguous ops in the stage list, 235 // listed in that order; pass `add_imm_int` and we pick the appropriate op based on `numSlots`. 236 // Some immediate-mode binary ops are single-slot only in the interest of code size; in this 237 // case, the multi-slot ops can be absent, but numSlots must be 1. 238 void appendImmediateBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc, 239 ProgramOp baseStage, 240 SkRPOffset dst, int32_t value, int numSlots) const; 241 242 // Appends a two-input math operation to the pipeline. `src` must be _immediately_ after `dst` 243 // in memory. `baseStage` must refer to an unbounded "apply_to_n_slots" stage. A BinaryOpCtx 244 // will be used to pass pointers to the destination and source; the delta between the two 245 // pointers implicitly gives the number of slots. 246 void appendAdjacentNWayBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc, 247 ProgramOp stage, 248 SkRPOffset dst, SkRPOffset src, int numSlots) const; 249 250 // Appends a multi-slot two-input math operation to the pipeline. `src` must be _immediately_ 251 // after `dst` in memory. `baseStage` must refer to an unbounded "apply_to_n_slots" stage, which 252 // must be immediately followed by specializations for 1-4 slots. For instance, {`add_n_floats`, 253 // `add_float`, `add_2_floats`, `add_3_floats`, `add_4_floats`} must be contiguous ops in the 254 // stage list, listed in that order; pass `add_n_floats` and we pick the appropriate op based on 255 // `numSlots`. 256 void appendAdjacentMultiSlotBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc, 257 ProgramOp baseStage, std::byte* basePtr, 258 SkRPOffset dst, SkRPOffset src, int numSlots) const; 259 260 // Appends a multi-slot math operation having three inputs (dst, src0, src1) and one output 261 // (dst) to the pipeline. The three inputs must be _immediately_ adjacent in memory. `baseStage` 262 // must refer to an unbounded "apply_to_n_slots" stage, which must be immediately followed by 263 // specializations for 1-4 slots. 264 void appendAdjacentMultiSlotTernaryOp(skia_private::TArray<Stage>* pipeline, 265 SkArenaAlloc* alloc, ProgramOp baseStage, 266 std::byte* basePtr, SkRPOffset dst, SkRPOffset src0, 267 SkRPOffset src1, int numSlots) const; 268 269 // Appends a math operation having three inputs (dst, src0, src1) and one output (dst) to the 270 // pipeline. The three inputs must be _immediately_ adjacent in memory. `baseStage` must refer 271 // to an unbounded "apply_to_n_slots" stage. A TernaryOpCtx will be used to pass pointers to the 272 // destination and sources; the delta between the each pointer implicitly gives the slot count. 273 void appendAdjacentNWayTernaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc, 274 ProgramOp stage, std::byte* basePtr, SkRPOffset dst, 275 SkRPOffset src0, SkRPOffset src1, int numSlots) const; 276 277 // Appends a stack_rewind op on platforms where it is needed (when SK_HAS_MUSTTAIL is not set). 278 void appendStackRewind(skia_private::TArray<Stage>* pipeline) const; 279 280 class Dumper; 281 friend class Dumper; 282 283 skia_private::TArray<Instruction> fInstructions; 284 int fNumValueSlots = 0; 285 int fNumUniformSlots = 0; 286 int fNumImmutableSlots = 0; 287 int fNumTempStackSlots = 0; 288 int fNumLabels = 0; 289 StackDepths fTempStackMaxDepths; 290 DebugTracePriv* fDebugTrace = nullptr; 291 std::unique_ptr<SkSL::TraceHook> fTraceHook; 292 }; 293 294 class Builder { 295 public: 296 /** Finalizes and optimizes the program. */ 297 std::unique_ptr<Program> finish(int numValueSlots, 298 int numUniformSlots, 299 int numImmutableSlots, 300 DebugTracePriv* debugTrace = nullptr); 301 /** 302 * Peels off a label ID for use in the program. Set the label's position in the program with 303 * the `label` instruction. Actually branch to the target with an instruction like 304 * `branch_if_any_lanes_active` or `jump`. 305 */ nextLabelID()306 int nextLabelID() { 307 return fNumLabels++; 308 } 309 310 /** 311 * The builder keeps track of the state of execution masks; when we know that the execution 312 * mask is unaltered, we can generate simpler code. Code which alters the execution mask is 313 * required to enable this flag. 314 */ enableExecutionMaskWrites()315 void enableExecutionMaskWrites() { 316 ++fExecutionMaskWritesEnabled; 317 } 318 disableExecutionMaskWrites()319 void disableExecutionMaskWrites() { 320 SkASSERT(this->executionMaskWritesAreEnabled()); 321 --fExecutionMaskWritesEnabled; 322 } 323 executionMaskWritesAreEnabled()324 bool executionMaskWritesAreEnabled() { 325 return fExecutionMaskWritesEnabled > 0; 326 } 327 328 /** Assemble a program from the Raster Pipeline instructions below. */ init_lane_masks()329 void init_lane_masks() { 330 this->appendInstruction(BuilderOp::init_lane_masks, {}); 331 } 332 store_src_rg(SlotRange slots)333 void store_src_rg(SlotRange slots) { 334 SkASSERT(slots.count == 2); 335 this->appendInstruction(BuilderOp::store_src_rg, {slots.index}); 336 } 337 store_src(SlotRange slots)338 void store_src(SlotRange slots) { 339 SkASSERT(slots.count == 4); 340 this->appendInstruction(BuilderOp::store_src, {slots.index}); 341 } 342 store_dst(SlotRange slots)343 void store_dst(SlotRange slots) { 344 SkASSERT(slots.count == 4); 345 this->appendInstruction(BuilderOp::store_dst, {slots.index}); 346 } 347 store_device_xy01(SlotRange slots)348 void store_device_xy01(SlotRange slots) { 349 SkASSERT(slots.count == 4); 350 this->appendInstruction(BuilderOp::store_device_xy01, {slots.index}); 351 } 352 load_src(SlotRange slots)353 void load_src(SlotRange slots) { 354 SkASSERT(slots.count == 4); 355 this->appendInstruction(BuilderOp::load_src, {slots.index}); 356 } 357 load_dst(SlotRange slots)358 void load_dst(SlotRange slots) { 359 SkASSERT(slots.count == 4); 360 this->appendInstruction(BuilderOp::load_dst, {slots.index}); 361 } 362 set_current_stack(int stackID)363 void set_current_stack(int stackID) { 364 fCurrentStackID = stackID; 365 } 366 367 // Inserts a label into the instruction stream. 368 void label(int labelID); 369 370 // Unconditionally branches to a label. 371 void jump(int labelID); 372 373 // Branches to a label if the execution mask is active in every lane. 374 void branch_if_all_lanes_active(int labelID); 375 376 // Branches to a label if the execution mask is active in any lane. 377 void branch_if_any_lanes_active(int labelID); 378 379 // Branches to a label if the execution mask is inactive across all lanes. 380 void branch_if_no_lanes_active(int labelID); 381 382 // Branches to a label if the top value on the stack is _not_ equal to `value` in any lane. 383 void branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID); 384 385 // We use the same SkRasterPipeline op regardless of the literal type, and bitcast the value. 386 void push_constant_i(int32_t val, int count = 1); 387 push_zeros(int count)388 void push_zeros(int count) { 389 this->push_constant_i(/*val=*/0, count); 390 } 391 push_constant_f(float val)392 void push_constant_f(float val) { 393 this->push_constant_i(sk_bit_cast<int32_t>(val), /*count=*/1); 394 } 395 396 void push_constant_u(uint32_t val, int count = 1) { 397 this->push_constant_i(sk_bit_cast<int32_t>(val), count); 398 } 399 400 // Translates into copy_uniforms (from uniforms into temp stack) in Raster Pipeline. 401 void push_uniform(SlotRange src); 402 403 // Initializes the Raster Pipeline slot with a constant value when the program is first created. 404 // Does not add any instructions to the program. store_immutable_value_i(Slot slot,int32_t val)405 void store_immutable_value_i(Slot slot, int32_t val) { 406 this->appendInstruction(BuilderOp::store_immutable_value, {slot}, val); 407 } 408 409 // Translates into copy_uniforms (from uniforms into value-slots) in Raster Pipeline. 410 void copy_uniform_to_slots_unmasked(SlotRange dst, SlotRange src); 411 412 // Translates into copy_from_indirect_uniform_unmasked (from values into temp stack) in Raster 413 // Pipeline. `fixedRange` denotes a fixed set of slots; this range is pushed forward by the 414 // value at the top of stack `dynamicStack`. Pass the range of the uniform being indexed as 415 // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds. 416 void push_uniform_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange); 417 418 419 // Translates into copy_slots_unmasked (from values into temp stack) in Raster Pipeline. push_slots(SlotRange src)420 void push_slots(SlotRange src) { 421 this->push_slots_or_immutable(src, BuilderOp::push_slots); 422 } 423 424 // Translates into copy_immutable_unmasked (from immutables into temp stack) in Raster Pipeline. push_immutable(SlotRange src)425 void push_immutable(SlotRange src) { 426 this->push_slots_or_immutable(src, BuilderOp::push_immutable); 427 } 428 429 void push_slots_or_immutable(SlotRange src, BuilderOp op); 430 431 // Translates into copy_from_indirect_unmasked (from values into temp stack) in Raster Pipeline. 432 // `fixedRange` denotes a fixed set of slots; this range is pushed forward by the value at the 433 // top of stack `dynamicStack`. Pass the slot range of the variable being indexed as 434 // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds. push_slots_indirect(SlotRange fixedRange,int dynamicStack,SlotRange limitRange)435 void push_slots_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange) { 436 this->push_slots_or_immutable_indirect(fixedRange, dynamicStack, limitRange, 437 BuilderOp::push_slots_indirect); 438 } 439 push_immutable_indirect(SlotRange fixedRange,int dynamicStack,SlotRange limitRange)440 void push_immutable_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange) { 441 this->push_slots_or_immutable_indirect(fixedRange, dynamicStack, limitRange, 442 BuilderOp::push_immutable_indirect); 443 } 444 445 void push_slots_or_immutable_indirect(SlotRange fixedRange, int dynamicStack, 446 SlotRange limitRange, BuilderOp op); 447 448 // Translates into copy_slots_masked (from temp stack to values) in Raster Pipeline. 449 // Does not discard any values on the temp stack. copy_stack_to_slots(SlotRange dst)450 void copy_stack_to_slots(SlotRange dst) { 451 this->copy_stack_to_slots(dst, /*offsetFromStackTop=*/dst.count); 452 } 453 454 void copy_stack_to_slots(SlotRange dst, int offsetFromStackTop); 455 456 // Translates into swizzle_copy_slots_masked (from temp stack to values) in Raster Pipeline. 457 // Does not discard any values on the temp stack. 458 void swizzle_copy_stack_to_slots(SlotRange dst, 459 SkSpan<const int8_t> components, 460 int offsetFromStackTop); 461 462 // Translates into swizzle_copy_to_indirect_masked (from temp stack to values) in Raster 463 // Pipeline. Does not discard any values on the temp stack. 464 void swizzle_copy_stack_to_slots_indirect(SlotRange fixedRange, 465 int dynamicStackID, 466 SlotRange limitRange, 467 SkSpan<const int8_t> components, 468 int offsetFromStackTop); 469 470 // Translates into copy_slots_unmasked (from temp stack to values) in Raster Pipeline. 471 // Does not discard any values on the temp stack. copy_stack_to_slots_unmasked(SlotRange dst)472 void copy_stack_to_slots_unmasked(SlotRange dst) { 473 this->copy_stack_to_slots_unmasked(dst, /*offsetFromStackTop=*/dst.count); 474 } 475 476 void copy_stack_to_slots_unmasked(SlotRange dst, int offsetFromStackTop); 477 478 // Translates into copy_to_indirect_masked (from temp stack into values) in Raster Pipeline. 479 // `fixedRange` denotes a fixed set of slots; this range is pushed forward by the value at the 480 // top of stack `dynamicStack`. Pass the slot range of the variable being indexed as 481 // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds. 482 void copy_stack_to_slots_indirect(SlotRange fixedRange, 483 int dynamicStackID, 484 SlotRange limitRange); 485 486 // Copies from temp stack to slots, including an indirect offset, then shrinks the temp stack. pop_slots_indirect(SlotRange fixedRange,int dynamicStackID,SlotRange limitRange)487 void pop_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange) { 488 this->copy_stack_to_slots_indirect(fixedRange, dynamicStackID, limitRange); 489 this->discard_stack(fixedRange.count); 490 } 491 492 // Performs a unary op (like `bitwise_not`), given a slot count of `slots`. The stack top is 493 // replaced with the result. 494 void unary_op(BuilderOp op, int32_t slots); 495 496 // Performs a binary op (like `add_n_floats` or `cmpeq_n_ints`), given a slot count of 497 // `slots`. Two n-slot input values are consumed, and the result is pushed onto the stack. 498 void binary_op(BuilderOp op, int32_t slots); 499 500 // Performs a ternary op (like `mix` or `smoothstep`), given a slot count of 501 // `slots`. Three n-slot input values are consumed, and the result is pushed onto the stack. 502 void ternary_op(BuilderOp op, int32_t slots); 503 504 // Computes a dot product on the stack. The slots consumed (`slots`) must be between 1 and 4. 505 // Two n-slot input vectors are consumed, and a scalar result is pushed onto the stack. 506 void dot_floats(int32_t slots); 507 508 // Computes refract(N, I, eta) on the stack. N and I are assumed to be 4-slot vectors, and can 509 // be padded with zeros for smaller inputs. Eta is a scalar. The result is a 4-slot vector. 510 void refract_floats(); 511 512 // Computes inverse(matN) on the stack. Pass 2, 3 or 4 for n to specify matrix size. 513 void inverse_matrix(int32_t n); 514 515 // Shrinks the temp stack, discarding values on top. 516 void discard_stack(int32_t count, int stackID); 517 discard_stack(int32_t count)518 void discard_stack(int32_t count) { 519 this->discard_stack(count, fCurrentStackID); 520 } 521 522 // Grows the temp stack, leaving any preexisting values in place. 523 void pad_stack(int32_t count); 524 525 // Copies vales from the temp stack into slots, and then shrinks the temp stack. 526 void pop_slots(SlotRange dst); 527 528 // Creates many clones of the top single-slot item on the temp stack. 529 void push_duplicates(int count); 530 531 // Creates a single clone of an item on the current temp stack. The cloned item can consist of 532 // any number of slots, and can be copied from an earlier position on the stack. 533 void push_clone(int numSlots, int offsetFromStackTop = 0); 534 535 // Clones a range of slots from another stack onto this stack. 536 void push_clone_from_stack(SlotRange range, int otherStackID, int offsetFromStackTop); 537 538 // Translates into copy_from_indirect_unmasked (from one temp stack to another) in Raster 539 // Pipeline. `fixedOffset` denotes a range of slots within the top `offsetFromStackTop` slots of 540 // `otherStackID`. This range is pushed forward by the value at the top of `dynamicStackID`. 541 void push_clone_indirect_from_stack(SlotRange fixedOffset, 542 int dynamicStackID, 543 int otherStackID, 544 int offsetFromStackTop); 545 546 // Compares the stack top with the passed-in value; if it matches, enables the loop mask. case_op(int value)547 void case_op(int value) { 548 this->appendInstruction(BuilderOp::case_op, {}, value); 549 } 550 551 // Performs a `continue` in a loop. continue_op(int continueMaskStackID)552 void continue_op(int continueMaskStackID) { 553 this->appendInstruction(BuilderOp::continue_op, {}, continueMaskStackID); 554 } 555 select(int slots)556 void select(int slots) { 557 // Overlays the top two entries on the stack, making one hybrid entry. The execution mask 558 // is used to select which lanes are preserved. 559 SkASSERT(slots > 0); 560 this->appendInstruction(BuilderOp::select, {}, slots); 561 } 562 563 // The opposite of push_slots; copies values from the temp stack into value slots, then 564 // shrinks the temp stack. 565 void pop_slots_unmasked(SlotRange dst); 566 copy_slots_masked(SlotRange dst,SlotRange src)567 void copy_slots_masked(SlotRange dst, SlotRange src) { 568 SkASSERT(dst.count == src.count); 569 this->appendInstruction(BuilderOp::copy_slot_masked, {dst.index, src.index}, dst.count); 570 } 571 572 void copy_slots_unmasked(SlotRange dst, SlotRange src); 573 574 void copy_immutable_unmasked(SlotRange dst, SlotRange src); 575 576 // Directly writes a constant value into a slot. 577 void copy_constant(Slot slot, int constantValue); 578 579 // Stores zeros across the entire slot range. 580 void zero_slots_unmasked(SlotRange dst); 581 582 // Consumes `consumedSlots` elements on the stack, then generates `components.size()` elements. 583 void swizzle(int consumedSlots, SkSpan<const int8_t> components); 584 585 // Transposes a matrix of size CxR on the stack (into a matrix of size RxC). 586 void transpose(int columns, int rows); 587 588 // Generates a CxR diagonal matrix from the top two scalars on the stack. The second scalar is 589 // used as the diagonal value; the first scalar (usually zero) fills in the rest of the slots. 590 void diagonal_matrix(int columns, int rows); 591 592 // Resizes a CxR matrix at the top of the stack to C'xR'. 593 void matrix_resize(int origColumns, int origRows, int newColumns, int newRows); 594 595 // Multiplies a CxR matrix/vector against an adjacent CxR matrix/vector on the stack. 596 void matrix_multiply(int leftColumns, int leftRows, int rightColumns, int rightRows); 597 598 void push_condition_mask(); 599 pop_condition_mask()600 void pop_condition_mask() { 601 SkASSERT(this->executionMaskWritesAreEnabled()); 602 this->appendInstruction(BuilderOp::pop_condition_mask, {}); 603 } 604 605 void merge_condition_mask(); 606 merge_inv_condition_mask()607 void merge_inv_condition_mask() { 608 SkASSERT(this->executionMaskWritesAreEnabled()); 609 this->appendInstruction(BuilderOp::merge_inv_condition_mask, {}); 610 } 611 push_loop_mask()612 void push_loop_mask() { 613 SkASSERT(this->executionMaskWritesAreEnabled()); 614 this->appendInstruction(BuilderOp::push_loop_mask, {}); 615 } 616 pop_loop_mask()617 void pop_loop_mask() { 618 SkASSERT(this->executionMaskWritesAreEnabled()); 619 this->appendInstruction(BuilderOp::pop_loop_mask, {}); 620 } 621 622 // Exchanges src.rgba with the four values at the top of the stack. 623 void exchange_src(); 624 push_src_rgba()625 void push_src_rgba() { 626 this->appendInstruction(BuilderOp::push_src_rgba, {}); 627 } 628 push_dst_rgba()629 void push_dst_rgba() { 630 this->appendInstruction(BuilderOp::push_dst_rgba, {}); 631 } 632 push_device_xy01()633 void push_device_xy01() { 634 this->appendInstruction(BuilderOp::push_device_xy01, {}); 635 } 636 637 void pop_src_rgba(); 638 pop_dst_rgba()639 void pop_dst_rgba() { 640 this->appendInstruction(BuilderOp::pop_dst_rgba, {}); 641 } 642 mask_off_loop_mask()643 void mask_off_loop_mask() { 644 SkASSERT(this->executionMaskWritesAreEnabled()); 645 this->appendInstruction(BuilderOp::mask_off_loop_mask, {}); 646 } 647 reenable_loop_mask(SlotRange src)648 void reenable_loop_mask(SlotRange src) { 649 SkASSERT(this->executionMaskWritesAreEnabled()); 650 SkASSERT(src.count == 1); 651 this->appendInstruction(BuilderOp::reenable_loop_mask, {src.index}); 652 } 653 pop_and_reenable_loop_mask()654 void pop_and_reenable_loop_mask() { 655 SkASSERT(this->executionMaskWritesAreEnabled()); 656 this->appendInstruction(BuilderOp::pop_and_reenable_loop_mask, {}); 657 } 658 merge_loop_mask()659 void merge_loop_mask() { 660 SkASSERT(this->executionMaskWritesAreEnabled()); 661 this->appendInstruction(BuilderOp::merge_loop_mask, {}); 662 } 663 push_return_mask()664 void push_return_mask() { 665 SkASSERT(this->executionMaskWritesAreEnabled()); 666 this->appendInstruction(BuilderOp::push_return_mask, {}); 667 } 668 669 void pop_return_mask(); 670 mask_off_return_mask()671 void mask_off_return_mask() { 672 SkASSERT(this->executionMaskWritesAreEnabled()); 673 this->appendInstruction(BuilderOp::mask_off_return_mask, {}); 674 } 675 invoke_shader(int childIdx)676 void invoke_shader(int childIdx) { 677 this->appendInstruction(BuilderOp::invoke_shader, {}, childIdx); 678 } 679 invoke_color_filter(int childIdx)680 void invoke_color_filter(int childIdx) { 681 this->appendInstruction(BuilderOp::invoke_color_filter, {}, childIdx); 682 } 683 invoke_blender(int childIdx)684 void invoke_blender(int childIdx) { 685 this->appendInstruction(BuilderOp::invoke_blender, {}, childIdx); 686 } 687 invoke_to_linear_srgb()688 void invoke_to_linear_srgb() { 689 // The intrinsics accept a three-component value; add a fourth padding element (which 690 // will be ignored) since our RP ops deal in RGBA colors. 691 this->pad_stack(1); 692 this->appendInstruction(BuilderOp::invoke_to_linear_srgb, {}); 693 this->discard_stack(1); 694 } 695 invoke_from_linear_srgb()696 void invoke_from_linear_srgb() { 697 // The intrinsics accept a three-component value; add a fourth padding element (which 698 // will be ignored) since our RP ops deal in RGBA colors. 699 this->pad_stack(1); 700 this->appendInstruction(BuilderOp::invoke_from_linear_srgb, {}); 701 this->discard_stack(1); 702 } 703 704 // Writes the current line number to the debug trace. trace_line(int traceMaskStackID,int line)705 void trace_line(int traceMaskStackID, int line) { 706 this->appendInstruction(BuilderOp::trace_line, {}, traceMaskStackID, line); 707 } 708 709 // Writes a variable update to the debug trace. trace_var(int traceMaskStackID,SlotRange r)710 void trace_var(int traceMaskStackID, SlotRange r) { 711 this->appendInstruction(BuilderOp::trace_var, {r.index}, traceMaskStackID, r.count); 712 } 713 714 // Writes a variable update (via indirection) to the debug trace. 715 void trace_var_indirect(int traceMaskStackID, SlotRange fixedRange, 716 int dynamicStackID, SlotRange limitRange); 717 718 // Writes a function-entrance to the debug trace. trace_enter(int traceMaskStackID,int funcID)719 void trace_enter(int traceMaskStackID, int funcID) { 720 this->appendInstruction(BuilderOp::trace_enter, {}, traceMaskStackID, funcID); 721 } 722 723 // Writes a function-exit to the debug trace. trace_exit(int traceMaskStackID,int funcID)724 void trace_exit(int traceMaskStackID, int funcID) { 725 this->appendInstruction(BuilderOp::trace_exit, {}, traceMaskStackID, funcID); 726 } 727 728 // Writes a scope-level change to the debug trace. trace_scope(int traceMaskStackID,int delta)729 void trace_scope(int traceMaskStackID, int delta) { 730 this->appendInstruction(BuilderOp::trace_scope, {}, traceMaskStackID, delta); 731 } 732 733 private: 734 struct SlotList { fSlotASlotList735 SlotList(Slot a = NA, Slot b = NA) : fSlotA(a), fSlotB(b) {} 736 Slot fSlotA = NA; 737 Slot fSlotB = NA; 738 }; 739 void appendInstruction(BuilderOp op, SlotList slots, 740 int a = 0, int b = 0, int c = 0, int d = 0); 741 Instruction* lastInstruction(int fromBack = 0); 742 Instruction* lastInstructionOnAnyStack(int fromBack = 0); 743 void simplifyPopSlotsUnmasked(SlotRange* dst); 744 bool simplifyImmediateUnmaskedOp(); 745 746 skia_private::TArray<Instruction> fInstructions; 747 int fNumLabels = 0; 748 int fExecutionMaskWritesEnabled = 0; 749 int fCurrentStackID = 0; 750 }; 751 752 } // namespace RP 753 } // namespace SkSL 754 755 #endif // SKSL_RASTERPIPELINEBUILDER 756