• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SKSL_RASTERPIPELINEBUILDER
9 #define SKSL_RASTERPIPELINEBUILDER
10 
11 #include "include/core/SkTypes.h"
12 
13 #include "include/core/SkSpan.h"
14 #include "include/core/SkTypes.h"
15 #include "include/private/base/SkTArray.h"
16 #include "src/base/SkUtils.h"
17 #include "src/core/SkRasterPipelineOpList.h"
18 
19 #include <cstddef>
20 #include <cstdint>
21 #include <memory>
22 
23 class SkArenaAlloc;
24 class SkRasterPipeline;
25 class SkWStream;
26 using SkRPOffset = uint32_t;
27 
28 namespace SkSL {
29 
30 class DebugTracePriv;
31 class TraceHook;
32 
33 namespace RP {
34 
35 // A single scalar in our program consumes one slot.
36 using Slot = int;
37 constexpr Slot NA = -1;
38 
39 // Scalars, vectors, and matrices can be represented as a range of slot indices.
40 struct SlotRange {
41     Slot index = 0;
42     int count = 0;
43 };
44 
45 #define SKRP_EXTENDED_OPS(M)     \
46     /* branch targets */         \
47     M(label)                     \
48                                  \
49     /* child programs */         \
50     M(invoke_shader)             \
51     M(invoke_color_filter)       \
52     M(invoke_blender)            \
53                                  \
54     /* color space transforms */ \
55     M(invoke_to_linear_srgb)     \
56     M(invoke_from_linear_srgb)
57 
58 // An RP::Program will consist entirely of ProgramOps. The ProgramOps list is a superset of the
59 // native SkRasterPipelineOps op-list. It also has a few extra ops to indicate child-effect
60 // invocation, and a `label` op to indicate branch targets.
61 enum class ProgramOp {
62     #define M(stage) stage,
63         // A finished program can contain any native Raster Pipeline op...
64         SK_RASTER_PIPELINE_OPS_ALL(M)
65 
66         // ... as well as our extended ops.
67         SKRP_EXTENDED_OPS(M)
68     #undef M
69 };
70 
71 // BuilderOps are a superset of ProgramOps. They are used by the RP::Builder, which works in terms
72 // of Instructions; Instructions are slightly more expressive than raw SkRasterPipelineOps. In
73 // particular, the Builder supports stacks for pushing and popping scratch values.
74 // RP::Program::makeStages is responsible for rewriting Instructions/BuilderOps into an array of
75 // RP::Program::Stages, which will contain only native SkRasterPipelineOps and (optionally)
76 // child-effect invocations.
77 enum class BuilderOp {
78     #define M(stage) stage,
79         // An in-flight program can contain all the native Raster Pipeline ops...
80         SK_RASTER_PIPELINE_OPS_ALL(M)
81 
82         // ... and our extended ops...
83         SKRP_EXTENDED_OPS(M)
84     #undef M
85 
86     // ... and also has Builder-specific ops. These ops generally interface with the stack, and are
87     // converted into ProgramOps during `makeStages`.
88     push_clone,
89     push_clone_from_stack,
90     push_clone_indirect_from_stack,
91     push_constant,
92     push_immutable,
93     push_immutable_indirect,
94     push_slots,
95     push_slots_indirect,
96     push_uniform,
97     push_uniform_indirect,
98     copy_stack_to_slots,
99     copy_stack_to_slots_unmasked,
100     copy_stack_to_slots_indirect,
101     copy_uniform_to_slots_unmasked,
102     store_immutable_value,
103     swizzle_copy_stack_to_slots,
104     swizzle_copy_stack_to_slots_indirect,
105     discard_stack,
106     pad_stack,
107     select,
108     push_condition_mask,
109     pop_condition_mask,
110     push_loop_mask,
111     pop_loop_mask,
112     pop_and_reenable_loop_mask,
113     push_return_mask,
114     pop_return_mask,
115     push_src_rgba,
116     push_dst_rgba,
117     push_device_xy01,
118     pop_src_rgba,
119     pop_dst_rgba,
120     trace_var_indirect,
121     branch_if_no_active_lanes_on_stack_top_equal,
122     unsupported
123 };
124 
125 // If the extended ops are not in sync between enums, program creation will not work.
126 static_assert((int)ProgramOp::label == (int)BuilderOp::label);
127 
128 // Represents a single raster-pipeline SkSL instruction.
129 struct Instruction {
130     BuilderOp fOp;
131     Slot      fSlotA = NA;
132     Slot      fSlotB = NA;
133     int       fImmA = 0;
134     int       fImmB = 0;
135     int       fImmC = 0;
136     int       fImmD = 0;
137     int       fStackID = 0;
138 };
139 
140 class Callbacks {
141 public:
142     virtual ~Callbacks() = default;
143 
144     virtual bool appendShader(int index) = 0;
145     virtual bool appendColorFilter(int index) = 0;
146     virtual bool appendBlender(int index) = 0;
147 
148     virtual void toLinearSrgb(const void* color) = 0;
149     virtual void fromLinearSrgb(const void* color) = 0;
150 };
151 
152 class Program {
153 public:
154     Program(skia_private::TArray<Instruction> instrs,
155             int numValueSlots,
156             int numUniformSlots,
157             int numImmutableSlots,
158             int numLabels,
159             DebugTracePriv* debugTrace);
160     ~Program();
161 
162     bool appendStages(SkRasterPipeline* pipeline,
163                       SkArenaAlloc* alloc,
164                       Callbacks* callbacks,
165                       SkSpan<const float> uniforms) const;
166 
167     void dump(SkWStream* out, bool writeInstructionCount = false) const;
168 
numUniforms()169     int numUniforms() const { return fNumUniformSlots; }
170 
171 private:
172     using StackDepths = skia_private::TArray<int>; // [stack index] = depth of stack
173 
174     struct SlotData {
175         SkSpan<float> values;
176         SkSpan<float> stack;
177         SkSpan<float> immutable;
178     };
179     SlotData allocateSlotData(SkArenaAlloc* alloc) const;
180 
181     struct Stage {
182         ProgramOp op;
183         void*     ctx;
184     };
185     void makeStages(skia_private::TArray<Stage>* pipeline,
186                     SkArenaAlloc* alloc,
187                     SkSpan<const float> uniforms,
188                     const SlotData& slots) const;
189     void optimize();
190     StackDepths tempStackMaxDepths() const;
191 
192     // These methods are used to split up multi-slot copies into multiple ops as needed.
193     void appendCopy(skia_private::TArray<Stage>* pipeline,
194                     SkArenaAlloc* alloc,
195                     std::byte* basePtr,
196                     ProgramOp baseStage,
197                     SkRPOffset dst, int dstStride,
198                     SkRPOffset src, int srcStride,
199                     int numSlots) const;
200     void appendCopyImmutableUnmasked(skia_private::TArray<Stage>* pipeline,
201                                      SkArenaAlloc* alloc,
202                                      std::byte* basePtr,
203                                      SkRPOffset dst,
204                                      SkRPOffset src,
205                                      int numSlots) const;
206     void appendCopySlotsUnmasked(skia_private::TArray<Stage>* pipeline,
207                                  SkArenaAlloc* alloc,
208                                  SkRPOffset dst,
209                                  SkRPOffset src,
210                                  int numSlots) const;
211     void appendCopySlotsMasked(skia_private::TArray<Stage>* pipeline,
212                                SkArenaAlloc* alloc,
213                                SkRPOffset dst,
214                                SkRPOffset src,
215                                int numSlots) const;
216 
217     // Appends a single-slot single-input math operation to the pipeline. The op `stage` will
218     // appended `numSlots` times, starting at position `dst` and advancing one slot for each
219     // subsequent invocation.
220     void appendSingleSlotUnaryOp(skia_private::TArray<Stage>* pipeline, ProgramOp stage,
221                                  float* dst, int numSlots) const;
222 
223     // Appends a multi-slot single-input math operation to the pipeline. `baseStage` must refer to
224     // a single-slot "apply_op" stage, which must be immediately followed by specializations for
225     // 2-4 slots. For instance, {`ceil_float`, `ceil_2_floats`, `ceil_3_floats`, `ceil_4_floats`}
226     // must be contiguous ops in the stage list, listed in that order; pass `ceil_float` and we
227     // pick the appropriate op based on `numSlots`.
228     void appendMultiSlotUnaryOp(skia_private::TArray<Stage>* pipeline, ProgramOp baseStage,
229                                 float* dst, int numSlots) const;
230 
231     // Appends an immediate-mode binary operation to the pipeline. `baseStage` must refer to
232     // a single-slot, immediate-mode "apply-imm" stage, which must be immediately preceded by
233     // specializations for 2-4 slots if numSlots is greater than 1. For instance, {`add_imm_4_ints`,
234     // `add_imm_3_ints`, `add_imm_2_ints`, `add_imm_int`} must be contiguous ops in the stage list,
235     // listed in that order; pass `add_imm_int` and we pick the appropriate op based on `numSlots`.
236     // Some immediate-mode binary ops are single-slot only in the interest of code size; in this
237     // case, the multi-slot ops can be absent, but numSlots must be 1.
238     void appendImmediateBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
239                                  ProgramOp baseStage,
240                                  SkRPOffset dst, int32_t value, int numSlots) const;
241 
242     // Appends a two-input math operation to the pipeline. `src` must be _immediately_ after `dst`
243     // in memory. `baseStage` must refer to an unbounded "apply_to_n_slots" stage. A BinaryOpCtx
244     // will be used to pass pointers to the destination and source; the delta between the two
245     // pointers implicitly gives the number of slots.
246     void appendAdjacentNWayBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
247                                     ProgramOp stage,
248                                     SkRPOffset dst, SkRPOffset src, int numSlots) const;
249 
250     // Appends a multi-slot two-input math operation to the pipeline. `src` must be _immediately_
251     // after `dst` in memory. `baseStage` must refer to an unbounded "apply_to_n_slots" stage, which
252     // must be immediately followed by specializations for 1-4 slots. For instance, {`add_n_floats`,
253     // `add_float`, `add_2_floats`, `add_3_floats`, `add_4_floats`} must be contiguous ops in the
254     // stage list, listed in that order; pass `add_n_floats` and we pick the appropriate op based on
255     // `numSlots`.
256     void appendAdjacentMultiSlotBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
257                                          ProgramOp baseStage, std::byte* basePtr,
258                                          SkRPOffset dst, SkRPOffset src, int numSlots) const;
259 
260     // Appends a multi-slot math operation having three inputs (dst, src0, src1) and one output
261     // (dst) to the pipeline. The three inputs must be _immediately_ adjacent in memory. `baseStage`
262     // must refer to an unbounded "apply_to_n_slots" stage, which must be immediately followed by
263     // specializations for 1-4 slots.
264     void appendAdjacentMultiSlotTernaryOp(skia_private::TArray<Stage>* pipeline,
265                                           SkArenaAlloc* alloc, ProgramOp baseStage,
266                                           std::byte* basePtr, SkRPOffset dst, SkRPOffset src0,
267                                           SkRPOffset src1, int numSlots) const;
268 
269     // Appends a math operation having three inputs (dst, src0, src1) and one output (dst) to the
270     // pipeline. The three inputs must be _immediately_ adjacent in memory. `baseStage` must refer
271     // to an unbounded "apply_to_n_slots" stage. A TernaryOpCtx will be used to pass pointers to the
272     // destination and sources; the delta between the each pointer implicitly gives the slot count.
273     void appendAdjacentNWayTernaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
274                                      ProgramOp stage, std::byte* basePtr, SkRPOffset dst,
275                                      SkRPOffset src0, SkRPOffset src1, int numSlots) const;
276 
277     // Appends a stack_rewind op on platforms where it is needed (when SK_HAS_MUSTTAIL is not set).
278     void appendStackRewind(skia_private::TArray<Stage>* pipeline) const;
279 
280     class Dumper;
281     friend class Dumper;
282 
283     skia_private::TArray<Instruction> fInstructions;
284     int fNumValueSlots = 0;
285     int fNumUniformSlots = 0;
286     int fNumImmutableSlots = 0;
287     int fNumTempStackSlots = 0;
288     int fNumLabels = 0;
289     StackDepths fTempStackMaxDepths;
290     DebugTracePriv* fDebugTrace = nullptr;
291     std::unique_ptr<SkSL::TraceHook> fTraceHook;
292 };
293 
294 class Builder {
295 public:
296     /** Finalizes and optimizes the program. */
297     std::unique_ptr<Program> finish(int numValueSlots,
298                                     int numUniformSlots,
299                                     int numImmutableSlots,
300                                     DebugTracePriv* debugTrace = nullptr);
301     /**
302      * Peels off a label ID for use in the program. Set the label's position in the program with
303      * the `label` instruction. Actually branch to the target with an instruction like
304      * `branch_if_any_lanes_active` or `jump`.
305      */
nextLabelID()306     int nextLabelID() {
307         return fNumLabels++;
308     }
309 
310     /**
311      * The builder keeps track of the state of execution masks; when we know that the execution
312      * mask is unaltered, we can generate simpler code. Code which alters the execution mask is
313      * required to enable this flag.
314      */
enableExecutionMaskWrites()315     void enableExecutionMaskWrites() {
316         ++fExecutionMaskWritesEnabled;
317     }
318 
disableExecutionMaskWrites()319     void disableExecutionMaskWrites() {
320         SkASSERT(this->executionMaskWritesAreEnabled());
321         --fExecutionMaskWritesEnabled;
322     }
323 
executionMaskWritesAreEnabled()324     bool executionMaskWritesAreEnabled() {
325         return fExecutionMaskWritesEnabled > 0;
326     }
327 
328     /** Assemble a program from the Raster Pipeline instructions below. */
init_lane_masks()329     void init_lane_masks() {
330         this->appendInstruction(BuilderOp::init_lane_masks, {});
331     }
332 
store_src_rg(SlotRange slots)333     void store_src_rg(SlotRange slots) {
334         SkASSERT(slots.count == 2);
335         this->appendInstruction(BuilderOp::store_src_rg, {slots.index});
336     }
337 
store_src(SlotRange slots)338     void store_src(SlotRange slots) {
339         SkASSERT(slots.count == 4);
340         this->appendInstruction(BuilderOp::store_src, {slots.index});
341     }
342 
store_dst(SlotRange slots)343     void store_dst(SlotRange slots) {
344         SkASSERT(slots.count == 4);
345         this->appendInstruction(BuilderOp::store_dst, {slots.index});
346     }
347 
store_device_xy01(SlotRange slots)348     void store_device_xy01(SlotRange slots) {
349         SkASSERT(slots.count == 4);
350         this->appendInstruction(BuilderOp::store_device_xy01, {slots.index});
351     }
352 
load_src(SlotRange slots)353     void load_src(SlotRange slots) {
354         SkASSERT(slots.count == 4);
355         this->appendInstruction(BuilderOp::load_src, {slots.index});
356     }
357 
load_dst(SlotRange slots)358     void load_dst(SlotRange slots) {
359         SkASSERT(slots.count == 4);
360         this->appendInstruction(BuilderOp::load_dst, {slots.index});
361     }
362 
set_current_stack(int stackID)363     void set_current_stack(int stackID) {
364         fCurrentStackID = stackID;
365     }
366 
367     // Inserts a label into the instruction stream.
368     void label(int labelID);
369 
370     // Unconditionally branches to a label.
371     void jump(int labelID);
372 
373     // Branches to a label if the execution mask is active in every lane.
374     void branch_if_all_lanes_active(int labelID);
375 
376     // Branches to a label if the execution mask is active in any lane.
377     void branch_if_any_lanes_active(int labelID);
378 
379     // Branches to a label if the execution mask is inactive across all lanes.
380     void branch_if_no_lanes_active(int labelID);
381 
382     // Branches to a label if the top value on the stack is _not_ equal to `value` in any lane.
383     void branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID);
384 
385     // We use the same SkRasterPipeline op regardless of the literal type, and bitcast the value.
386     void push_constant_i(int32_t val, int count = 1);
387 
push_zeros(int count)388     void push_zeros(int count) {
389         this->push_constant_i(/*val=*/0, count);
390     }
391 
push_constant_f(float val)392     void push_constant_f(float val) {
393         this->push_constant_i(sk_bit_cast<int32_t>(val), /*count=*/1);
394     }
395 
396     void push_constant_u(uint32_t val, int count = 1) {
397         this->push_constant_i(sk_bit_cast<int32_t>(val), count);
398     }
399 
400     // Translates into copy_uniforms (from uniforms into temp stack) in Raster Pipeline.
401     void push_uniform(SlotRange src);
402 
403     // Initializes the Raster Pipeline slot with a constant value when the program is first created.
404     // Does not add any instructions to the program.
store_immutable_value_i(Slot slot,int32_t val)405     void store_immutable_value_i(Slot slot, int32_t val) {
406         this->appendInstruction(BuilderOp::store_immutable_value, {slot}, val);
407     }
408 
409     // Translates into copy_uniforms (from uniforms into value-slots) in Raster Pipeline.
410     void copy_uniform_to_slots_unmasked(SlotRange dst, SlotRange src);
411 
412     // Translates into copy_from_indirect_uniform_unmasked (from values into temp stack) in Raster
413     // Pipeline. `fixedRange` denotes a fixed set of slots; this range is pushed forward by the
414     // value at the top of stack `dynamicStack`. Pass the range of the uniform being indexed as
415     // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds.
416     void push_uniform_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange);
417 
418 
419     // Translates into copy_slots_unmasked (from values into temp stack) in Raster Pipeline.
push_slots(SlotRange src)420     void push_slots(SlotRange src) {
421         this->push_slots_or_immutable(src, BuilderOp::push_slots);
422     }
423 
424     // Translates into copy_immutable_unmasked (from immutables into temp stack) in Raster Pipeline.
push_immutable(SlotRange src)425     void push_immutable(SlotRange src) {
426         this->push_slots_or_immutable(src, BuilderOp::push_immutable);
427     }
428 
429     void push_slots_or_immutable(SlotRange src, BuilderOp op);
430 
431     // Translates into copy_from_indirect_unmasked (from values into temp stack) in Raster Pipeline.
432     // `fixedRange` denotes a fixed set of slots; this range is pushed forward by the value at the
433     // top of stack `dynamicStack`. Pass the slot range of the variable being indexed as
434     // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds.
push_slots_indirect(SlotRange fixedRange,int dynamicStack,SlotRange limitRange)435     void push_slots_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange) {
436         this->push_slots_or_immutable_indirect(fixedRange, dynamicStack, limitRange,
437                                                BuilderOp::push_slots_indirect);
438     }
439 
push_immutable_indirect(SlotRange fixedRange,int dynamicStack,SlotRange limitRange)440     void push_immutable_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange) {
441         this->push_slots_or_immutable_indirect(fixedRange, dynamicStack, limitRange,
442                                                BuilderOp::push_immutable_indirect);
443     }
444 
445     void push_slots_or_immutable_indirect(SlotRange fixedRange, int dynamicStack,
446                                           SlotRange limitRange, BuilderOp op);
447 
448     // Translates into copy_slots_masked (from temp stack to values) in Raster Pipeline.
449     // Does not discard any values on the temp stack.
copy_stack_to_slots(SlotRange dst)450     void copy_stack_to_slots(SlotRange dst) {
451         this->copy_stack_to_slots(dst, /*offsetFromStackTop=*/dst.count);
452     }
453 
454     void copy_stack_to_slots(SlotRange dst, int offsetFromStackTop);
455 
456     // Translates into swizzle_copy_slots_masked (from temp stack to values) in Raster Pipeline.
457     // Does not discard any values on the temp stack.
458     void swizzle_copy_stack_to_slots(SlotRange dst,
459                                      SkSpan<const int8_t> components,
460                                      int offsetFromStackTop);
461 
462     // Translates into swizzle_copy_to_indirect_masked (from temp stack to values) in Raster
463     // Pipeline. Does not discard any values on the temp stack.
464     void swizzle_copy_stack_to_slots_indirect(SlotRange fixedRange,
465                                               int dynamicStackID,
466                                               SlotRange limitRange,
467                                               SkSpan<const int8_t> components,
468                                               int offsetFromStackTop);
469 
470     // Translates into copy_slots_unmasked (from temp stack to values) in Raster Pipeline.
471     // Does not discard any values on the temp stack.
copy_stack_to_slots_unmasked(SlotRange dst)472     void copy_stack_to_slots_unmasked(SlotRange dst) {
473         this->copy_stack_to_slots_unmasked(dst, /*offsetFromStackTop=*/dst.count);
474     }
475 
476     void copy_stack_to_slots_unmasked(SlotRange dst, int offsetFromStackTop);
477 
478     // Translates into copy_to_indirect_masked (from temp stack into values) in Raster Pipeline.
479     // `fixedRange` denotes a fixed set of slots; this range is pushed forward by the value at the
480     // top of stack `dynamicStack`. Pass the slot range of the variable being indexed as
481     // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds.
482     void copy_stack_to_slots_indirect(SlotRange fixedRange,
483                                       int dynamicStackID,
484                                       SlotRange limitRange);
485 
486     // Copies from temp stack to slots, including an indirect offset, then shrinks the temp stack.
pop_slots_indirect(SlotRange fixedRange,int dynamicStackID,SlotRange limitRange)487     void pop_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange) {
488         this->copy_stack_to_slots_indirect(fixedRange, dynamicStackID, limitRange);
489         this->discard_stack(fixedRange.count);
490     }
491 
492     // Performs a unary op (like `bitwise_not`), given a slot count of `slots`. The stack top is
493     // replaced with the result.
494     void unary_op(BuilderOp op, int32_t slots);
495 
496     // Performs a binary op (like `add_n_floats` or `cmpeq_n_ints`), given a slot count of
497     // `slots`. Two n-slot input values are consumed, and the result is pushed onto the stack.
498     void binary_op(BuilderOp op, int32_t slots);
499 
500     // Performs a ternary op (like `mix` or `smoothstep`), given a slot count of
501     // `slots`. Three n-slot input values are consumed, and the result is pushed onto the stack.
502     void ternary_op(BuilderOp op, int32_t slots);
503 
504     // Computes a dot product on the stack. The slots consumed (`slots`) must be between 1 and 4.
505     // Two n-slot input vectors are consumed, and a scalar result is pushed onto the stack.
506     void dot_floats(int32_t slots);
507 
508     // Computes refract(N, I, eta) on the stack. N and I are assumed to be 4-slot vectors, and can
509     // be padded with zeros for smaller inputs. Eta is a scalar. The result is a 4-slot vector.
510     void refract_floats();
511 
512     // Computes inverse(matN) on the stack. Pass 2, 3 or 4 for n to specify matrix size.
513     void inverse_matrix(int32_t n);
514 
515     // Shrinks the temp stack, discarding values on top.
516     void discard_stack(int32_t count, int stackID);
517 
discard_stack(int32_t count)518     void discard_stack(int32_t count) {
519         this->discard_stack(count, fCurrentStackID);
520     }
521 
522     // Grows the temp stack, leaving any preexisting values in place.
523     void pad_stack(int32_t count);
524 
525     // Copies vales from the temp stack into slots, and then shrinks the temp stack.
526     void pop_slots(SlotRange dst);
527 
528     // Creates many clones of the top single-slot item on the temp stack.
529     void push_duplicates(int count);
530 
531     // Creates a single clone of an item on the current temp stack. The cloned item can consist of
532     // any number of slots, and can be copied from an earlier position on the stack.
533     void push_clone(int numSlots, int offsetFromStackTop = 0);
534 
535     // Clones a range of slots from another stack onto this stack.
536     void push_clone_from_stack(SlotRange range, int otherStackID, int offsetFromStackTop);
537 
538     // Translates into copy_from_indirect_unmasked (from one temp stack to another) in Raster
539     // Pipeline. `fixedOffset` denotes a range of slots within the top `offsetFromStackTop` slots of
540     // `otherStackID`. This range is pushed forward by the value at the top of `dynamicStackID`.
541     void push_clone_indirect_from_stack(SlotRange fixedOffset,
542                                         int dynamicStackID,
543                                         int otherStackID,
544                                         int offsetFromStackTop);
545 
546     // Compares the stack top with the passed-in value; if it matches, enables the loop mask.
case_op(int value)547     void case_op(int value) {
548         this->appendInstruction(BuilderOp::case_op, {}, value);
549     }
550 
551     // Performs a `continue` in a loop.
continue_op(int continueMaskStackID)552     void continue_op(int continueMaskStackID) {
553         this->appendInstruction(BuilderOp::continue_op, {}, continueMaskStackID);
554     }
555 
select(int slots)556     void select(int slots) {
557         // Overlays the top two entries on the stack, making one hybrid entry. The execution mask
558         // is used to select which lanes are preserved.
559         SkASSERT(slots > 0);
560         this->appendInstruction(BuilderOp::select, {}, slots);
561     }
562 
563     // The opposite of push_slots; copies values from the temp stack into value slots, then
564     // shrinks the temp stack.
565     void pop_slots_unmasked(SlotRange dst);
566 
copy_slots_masked(SlotRange dst,SlotRange src)567     void copy_slots_masked(SlotRange dst, SlotRange src) {
568         SkASSERT(dst.count == src.count);
569         this->appendInstruction(BuilderOp::copy_slot_masked, {dst.index, src.index}, dst.count);
570     }
571 
572     void copy_slots_unmasked(SlotRange dst, SlotRange src);
573 
574     void copy_immutable_unmasked(SlotRange dst, SlotRange src);
575 
576     // Directly writes a constant value into a slot.
577     void copy_constant(Slot slot, int constantValue);
578 
579     // Stores zeros across the entire slot range.
580     void zero_slots_unmasked(SlotRange dst);
581 
582     // Consumes `consumedSlots` elements on the stack, then generates `components.size()` elements.
583     void swizzle(int consumedSlots, SkSpan<const int8_t> components);
584 
585     // Transposes a matrix of size CxR on the stack (into a matrix of size RxC).
586     void transpose(int columns, int rows);
587 
588     // Generates a CxR diagonal matrix from the top two scalars on the stack. The second scalar is
589     // used as the diagonal value; the first scalar (usually zero) fills in the rest of the slots.
590     void diagonal_matrix(int columns, int rows);
591 
592     // Resizes a CxR matrix at the top of the stack to C'xR'.
593     void matrix_resize(int origColumns, int origRows, int newColumns, int newRows);
594 
595     // Multiplies a CxR matrix/vector against an adjacent CxR matrix/vector on the stack.
596     void matrix_multiply(int leftColumns, int leftRows, int rightColumns, int rightRows);
597 
598     void push_condition_mask();
599 
pop_condition_mask()600     void pop_condition_mask() {
601         SkASSERT(this->executionMaskWritesAreEnabled());
602         this->appendInstruction(BuilderOp::pop_condition_mask, {});
603     }
604 
605     void merge_condition_mask();
606 
merge_inv_condition_mask()607     void merge_inv_condition_mask() {
608         SkASSERT(this->executionMaskWritesAreEnabled());
609         this->appendInstruction(BuilderOp::merge_inv_condition_mask, {});
610     }
611 
push_loop_mask()612     void push_loop_mask() {
613         SkASSERT(this->executionMaskWritesAreEnabled());
614         this->appendInstruction(BuilderOp::push_loop_mask, {});
615     }
616 
pop_loop_mask()617     void pop_loop_mask() {
618         SkASSERT(this->executionMaskWritesAreEnabled());
619         this->appendInstruction(BuilderOp::pop_loop_mask, {});
620     }
621 
622     // Exchanges src.rgba with the four values at the top of the stack.
623     void exchange_src();
624 
push_src_rgba()625     void push_src_rgba() {
626         this->appendInstruction(BuilderOp::push_src_rgba, {});
627     }
628 
push_dst_rgba()629     void push_dst_rgba() {
630         this->appendInstruction(BuilderOp::push_dst_rgba, {});
631     }
632 
push_device_xy01()633     void push_device_xy01() {
634         this->appendInstruction(BuilderOp::push_device_xy01, {});
635     }
636 
637     void pop_src_rgba();
638 
pop_dst_rgba()639     void pop_dst_rgba() {
640         this->appendInstruction(BuilderOp::pop_dst_rgba, {});
641     }
642 
mask_off_loop_mask()643     void mask_off_loop_mask() {
644         SkASSERT(this->executionMaskWritesAreEnabled());
645         this->appendInstruction(BuilderOp::mask_off_loop_mask, {});
646     }
647 
reenable_loop_mask(SlotRange src)648     void reenable_loop_mask(SlotRange src) {
649         SkASSERT(this->executionMaskWritesAreEnabled());
650         SkASSERT(src.count == 1);
651         this->appendInstruction(BuilderOp::reenable_loop_mask, {src.index});
652     }
653 
pop_and_reenable_loop_mask()654     void pop_and_reenable_loop_mask() {
655         SkASSERT(this->executionMaskWritesAreEnabled());
656         this->appendInstruction(BuilderOp::pop_and_reenable_loop_mask, {});
657     }
658 
merge_loop_mask()659     void merge_loop_mask() {
660         SkASSERT(this->executionMaskWritesAreEnabled());
661         this->appendInstruction(BuilderOp::merge_loop_mask, {});
662     }
663 
push_return_mask()664     void push_return_mask() {
665         SkASSERT(this->executionMaskWritesAreEnabled());
666         this->appendInstruction(BuilderOp::push_return_mask, {});
667     }
668 
669     void pop_return_mask();
670 
mask_off_return_mask()671     void mask_off_return_mask() {
672         SkASSERT(this->executionMaskWritesAreEnabled());
673         this->appendInstruction(BuilderOp::mask_off_return_mask, {});
674     }
675 
invoke_shader(int childIdx)676     void invoke_shader(int childIdx) {
677         this->appendInstruction(BuilderOp::invoke_shader, {}, childIdx);
678     }
679 
invoke_color_filter(int childIdx)680     void invoke_color_filter(int childIdx) {
681         this->appendInstruction(BuilderOp::invoke_color_filter, {}, childIdx);
682     }
683 
invoke_blender(int childIdx)684     void invoke_blender(int childIdx) {
685         this->appendInstruction(BuilderOp::invoke_blender, {}, childIdx);
686     }
687 
invoke_to_linear_srgb()688     void invoke_to_linear_srgb() {
689         // The intrinsics accept a three-component value; add a fourth padding element (which
690         // will be ignored) since our RP ops deal in RGBA colors.
691         this->pad_stack(1);
692         this->appendInstruction(BuilderOp::invoke_to_linear_srgb, {});
693         this->discard_stack(1);
694     }
695 
invoke_from_linear_srgb()696     void invoke_from_linear_srgb() {
697         // The intrinsics accept a three-component value; add a fourth padding element (which
698         // will be ignored) since our RP ops deal in RGBA colors.
699         this->pad_stack(1);
700         this->appendInstruction(BuilderOp::invoke_from_linear_srgb, {});
701         this->discard_stack(1);
702     }
703 
704     // Writes the current line number to the debug trace.
trace_line(int traceMaskStackID,int line)705     void trace_line(int traceMaskStackID, int line) {
706         this->appendInstruction(BuilderOp::trace_line, {}, traceMaskStackID, line);
707     }
708 
709     // Writes a variable update to the debug trace.
trace_var(int traceMaskStackID,SlotRange r)710     void trace_var(int traceMaskStackID, SlotRange r) {
711         this->appendInstruction(BuilderOp::trace_var, {r.index}, traceMaskStackID, r.count);
712     }
713 
714     // Writes a variable update (via indirection) to the debug trace.
715     void trace_var_indirect(int traceMaskStackID, SlotRange fixedRange,
716                             int dynamicStackID, SlotRange limitRange);
717 
718     // Writes a function-entrance to the debug trace.
trace_enter(int traceMaskStackID,int funcID)719     void trace_enter(int traceMaskStackID, int funcID) {
720         this->appendInstruction(BuilderOp::trace_enter, {}, traceMaskStackID, funcID);
721     }
722 
723     // Writes a function-exit to the debug trace.
trace_exit(int traceMaskStackID,int funcID)724     void trace_exit(int traceMaskStackID, int funcID) {
725         this->appendInstruction(BuilderOp::trace_exit, {}, traceMaskStackID, funcID);
726     }
727 
728     // Writes a scope-level change to the debug trace.
trace_scope(int traceMaskStackID,int delta)729     void trace_scope(int traceMaskStackID, int delta) {
730         this->appendInstruction(BuilderOp::trace_scope, {}, traceMaskStackID, delta);
731     }
732 
733 private:
734     struct SlotList {
fSlotASlotList735         SlotList(Slot a = NA, Slot b = NA) : fSlotA(a), fSlotB(b) {}
736         Slot fSlotA = NA;
737         Slot fSlotB = NA;
738     };
739     void appendInstruction(BuilderOp op, SlotList slots,
740                            int a = 0, int b = 0, int c = 0, int d = 0);
741     Instruction* lastInstruction(int fromBack = 0);
742     Instruction* lastInstructionOnAnyStack(int fromBack = 0);
743     void simplifyPopSlotsUnmasked(SlotRange* dst);
744     bool simplifyImmediateUnmaskedOp();
745 
746     skia_private::TArray<Instruction> fInstructions;
747     int fNumLabels = 0;
748     int fExecutionMaskWritesEnabled = 0;
749     int fCurrentStackID = 0;
750 };
751 
752 }  // namespace RP
753 }  // namespace SkSL
754 
755 #endif  // SKSL_RASTERPIPELINEBUILDER
756