1 /*
2 * Copyright 2022 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/core/SkStream.h"
9 #include "include/private/SkSLString.h"
10 #include "include/private/base/SkMalloc.h"
11 #include "include/private/base/SkTo.h"
12 #include "include/sksl/SkSLPosition.h"
13 #include "src/base/SkArenaAlloc.h"
14 #include "src/core/SkOpts.h"
15 #include "src/core/SkRasterPipelineOpContexts.h"
16 #include "src/core/SkRasterPipelineOpList.h"
17 #include "src/sksl/codegen/SkSLRasterPipelineBuilder.h"
18 #include "src/sksl/tracing/SkRPDebugTrace.h"
19 #include "src/sksl/tracing/SkSLDebugInfo.h"
20 #include "src/utils/SkBitSet.h"
21
22 #if !defined(SKSL_STANDALONE)
23 #include "src/core/SkRasterPipeline.h"
24 #endif
25
26 #include <algorithm>
27 #include <cmath>
28 #include <cstring>
29 #include <iterator>
30 #include <string>
31 #include <string_view>
32 #include <tuple>
33 #include <utility>
34 #include <vector>
35
36 namespace SkSL {
37 namespace RP {
38
39 #define ALL_SINGLE_SLOT_UNARY_OP_CASES \
40 BuilderOp::atan_float: \
41 case BuilderOp::cos_float: \
42 case BuilderOp::exp_float: \
43 case BuilderOp::sin_float: \
44 case BuilderOp::sqrt_float: \
45 case BuilderOp::tan_float
46
47 #define ALL_MULTI_SLOT_UNARY_OP_CASES \
48 BuilderOp::abs_float: \
49 case BuilderOp::abs_int: \
50 case BuilderOp::bitwise_not_int: \
51 case BuilderOp::cast_to_float_from_int: \
52 case BuilderOp::cast_to_float_from_uint: \
53 case BuilderOp::cast_to_int_from_float: \
54 case BuilderOp::cast_to_uint_from_float: \
55 case BuilderOp::ceil_float: \
56 case BuilderOp::floor_float \
57
58 #define ALL_N_WAY_BINARY_OP_CASES \
59 BuilderOp::atan2_n_floats: \
60 case BuilderOp::pow_n_floats
61
62 #define ALL_MULTI_SLOT_BINARY_OP_CASES \
63 BuilderOp::add_n_floats: \
64 case BuilderOp::add_n_ints: \
65 case BuilderOp::sub_n_floats: \
66 case BuilderOp::sub_n_ints: \
67 case BuilderOp::mul_n_floats: \
68 case BuilderOp::mul_n_ints: \
69 case BuilderOp::div_n_floats: \
70 case BuilderOp::div_n_ints: \
71 case BuilderOp::div_n_uints: \
72 case BuilderOp::bitwise_and_n_ints: \
73 case BuilderOp::bitwise_or_n_ints: \
74 case BuilderOp::bitwise_xor_n_ints: \
75 case BuilderOp::min_n_floats: \
76 case BuilderOp::min_n_ints: \
77 case BuilderOp::min_n_uints: \
78 case BuilderOp::max_n_floats: \
79 case BuilderOp::max_n_ints: \
80 case BuilderOp::max_n_uints: \
81 case BuilderOp::cmple_n_floats: \
82 case BuilderOp::cmple_n_ints: \
83 case BuilderOp::cmple_n_uints: \
84 case BuilderOp::cmplt_n_floats: \
85 case BuilderOp::cmplt_n_ints: \
86 case BuilderOp::cmplt_n_uints: \
87 case BuilderOp::cmpeq_n_floats: \
88 case BuilderOp::cmpeq_n_ints: \
89 case BuilderOp::cmpne_n_floats: \
90 case BuilderOp::cmpne_n_ints
91
92 #define ALL_MULTI_SLOT_TERNARY_OP_CASES \
93 BuilderOp::mix_n_floats: \
94 case BuilderOp::mix_n_ints
95
unary_op(BuilderOp op,int32_t slots)96 void Builder::unary_op(BuilderOp op, int32_t slots) {
97 switch (op) {
98 case ALL_SINGLE_SLOT_UNARY_OP_CASES:
99 case ALL_MULTI_SLOT_UNARY_OP_CASES:
100 fInstructions.push_back({op, {}, slots});
101 break;
102
103 default:
104 SkDEBUGFAIL("not a unary op");
105 break;
106 }
107 }
108
binary_op(BuilderOp op,int32_t slots)109 void Builder::binary_op(BuilderOp op, int32_t slots) {
110 switch (op) {
111 case ALL_N_WAY_BINARY_OP_CASES:
112 case ALL_MULTI_SLOT_BINARY_OP_CASES:
113 fInstructions.push_back({op, {}, slots});
114 break;
115
116 default:
117 SkDEBUGFAIL("not a binary op");
118 break;
119 }
120 }
121
ternary_op(BuilderOp op,int32_t slots)122 void Builder::ternary_op(BuilderOp op, int32_t slots) {
123 switch (op) {
124 case ALL_MULTI_SLOT_TERNARY_OP_CASES:
125 fInstructions.push_back({op, {}, slots});
126 break;
127
128 default:
129 SkDEBUGFAIL("not a ternary op");
130 break;
131 }
132 }
133
dot_floats(int32_t slots)134 void Builder::dot_floats(int32_t slots) {
135 switch (slots) {
136 case 1: fInstructions.push_back({BuilderOp::mul_n_floats, {}, slots}); break;
137 case 2: fInstructions.push_back({BuilderOp::dot_2_floats, {}, slots}); break;
138 case 3: fInstructions.push_back({BuilderOp::dot_3_floats, {}, slots}); break;
139 case 4: fInstructions.push_back({BuilderOp::dot_4_floats, {}, slots}); break;
140
141 default:
142 SkDEBUGFAIL("invalid number of slots");
143 break;
144 }
145 }
146
discard_stack(int32_t count)147 void Builder::discard_stack(int32_t count) {
148 // If we pushed something onto the stack and then immediately discarded part of it, we can
149 // shrink or eliminate the push.
150 while (count > 0 && !fInstructions.empty()) {
151 Instruction& lastInstruction = fInstructions.back();
152
153 switch (lastInstruction.fOp) {
154 case BuilderOp::discard_stack:
155 // Our last op was actually a separate discard_stack; combine the discards.
156 lastInstruction.fImmA += count;
157 return;
158
159 case BuilderOp::push_zeros:
160 case BuilderOp::push_clone:
161 case BuilderOp::push_clone_from_stack:
162 case BuilderOp::push_slots:
163 case BuilderOp::push_uniform:
164 // Our last op was a multi-slot push; cancel out one discard and eliminate the op
165 // if its count reached zero.
166 --count;
167 --lastInstruction.fImmA;
168 if (lastInstruction.fImmA == 0) {
169 fInstructions.pop_back();
170 }
171 continue;
172
173 case BuilderOp::push_literal:
174 case BuilderOp::push_condition_mask:
175 case BuilderOp::push_loop_mask:
176 case BuilderOp::push_return_mask:
177 // Our last op was a single-slot push; cancel out one discard and eliminate the op.
178 --count;
179 fInstructions.pop_back();
180 continue;
181
182 default:
183 break;
184 }
185
186 // This instruction wasn't a push.
187 break;
188 }
189
190 if (count > 0) {
191 fInstructions.push_back({BuilderOp::discard_stack, {}, count});
192 }
193 }
194
label(int labelID)195 void Builder::label(int labelID) {
196 SkASSERT(labelID >= 0 && labelID < fNumLabels);
197
198 // If the previous instruction was a branch to this label, it's a no-op; jumping to the very
199 // next instruction is effectively meaningless.
200 while (!fInstructions.empty()) {
201 Instruction& lastInstruction = fInstructions.back();
202 switch (lastInstruction.fOp) {
203 case BuilderOp::jump:
204 case BuilderOp::branch_if_any_active_lanes:
205 case BuilderOp::branch_if_no_active_lanes:
206 case BuilderOp::branch_if_no_active_lanes_on_stack_top_equal:
207 if (lastInstruction.fImmA == labelID) {
208 fInstructions.pop_back();
209 continue;
210 }
211 break;
212
213 default:
214 break;
215 }
216 break;
217 }
218 fInstructions.push_back({BuilderOp::label, {}, labelID});
219 }
220
jump(int labelID)221 void Builder::jump(int labelID) {
222 SkASSERT(labelID >= 0 && labelID < fNumLabels);
223 if (!fInstructions.empty() && fInstructions.back().fOp == BuilderOp::jump) {
224 // The previous instruction was also `jump`, so this branch could never possibly occur.
225 return;
226 }
227 fInstructions.push_back({BuilderOp::jump, {}, labelID});
228 }
229
branch_if_any_active_lanes(int labelID)230 void Builder::branch_if_any_active_lanes(int labelID) {
231 if (!this->executionMaskWritesAreEnabled()) {
232 this->jump(labelID);
233 return;
234 }
235
236 SkASSERT(labelID >= 0 && labelID < fNumLabels);
237 if (!fInstructions.empty() &&
238 (fInstructions.back().fOp == BuilderOp::branch_if_any_active_lanes ||
239 fInstructions.back().fOp == BuilderOp::jump)) {
240 // The previous instruction was `jump` or `branch_if_any_active_lanes`, so this branch
241 // could never possibly occur.
242 return;
243 }
244 fInstructions.push_back({BuilderOp::branch_if_any_active_lanes, {}, labelID});
245 }
246
branch_if_no_active_lanes(int labelID)247 void Builder::branch_if_no_active_lanes(int labelID) {
248 if (!this->executionMaskWritesAreEnabled()) {
249 return;
250 }
251
252 SkASSERT(labelID >= 0 && labelID < fNumLabels);
253 if (!fInstructions.empty() &&
254 (fInstructions.back().fOp == BuilderOp::branch_if_no_active_lanes ||
255 fInstructions.back().fOp == BuilderOp::jump)) {
256 // The previous instruction was `jump` or `branch_if_no_active_lanes`, so this branch
257 // could never possibly occur.
258 return;
259 }
260 fInstructions.push_back({BuilderOp::branch_if_no_active_lanes, {}, labelID});
261 }
262
branch_if_no_active_lanes_on_stack_top_equal(int value,int labelID)263 void Builder::branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID) {
264 SkASSERT(labelID >= 0 && labelID < fNumLabels);
265 if (!fInstructions.empty() &&
266 (fInstructions.back().fOp == BuilderOp::jump ||
267 (fInstructions.back().fOp == BuilderOp::branch_if_no_active_lanes_on_stack_top_equal &&
268 fInstructions.back().fImmB == value))) {
269 // The previous instruction was `jump` or `branch_if_no_active_lanes_on_stack_top_equal`
270 // (checking against the same value), so this branch could never possibly occur.
271 return;
272 }
273 fInstructions.push_back({BuilderOp::branch_if_no_active_lanes_on_stack_top_equal,
274 {}, labelID, value});
275 }
276
push_slots(SlotRange src)277 void Builder::push_slots(SlotRange src) {
278 SkASSERT(src.count >= 0);
279 if (!fInstructions.empty()) {
280 Instruction& lastInstruction = fInstructions.back();
281
282 // If the previous instruction was pushing slots contiguous to this range, we can collapse
283 // the two pushes into one larger push.
284 if (lastInstruction.fOp == BuilderOp::push_slots &&
285 lastInstruction.fSlotA + lastInstruction.fImmA == src.index) {
286 lastInstruction.fImmA += src.count;
287 return;
288 }
289
290 // If the previous instruction was discarding an equal number of slots...
291 if (lastInstruction.fOp == BuilderOp::discard_stack && lastInstruction.fImmA == src.count) {
292 // ... and the instruction before that was copying from the stack to the same slots...
293 Instruction& prevInstruction = fInstructions.fromBack(1);
294 if ((prevInstruction.fOp == BuilderOp::copy_stack_to_slots ||
295 prevInstruction.fOp == BuilderOp::copy_stack_to_slots_unmasked) &&
296 prevInstruction.fSlotA == src.index &&
297 prevInstruction.fImmA == src.count) {
298 // ... we are emitting `copy stack to X, discard stack, copy X to stack`. This is a
299 // common pattern when multiple operations in a row affect the same variable. We can
300 // eliminate the discard and just leave X on the stack.
301 fInstructions.pop_back();
302 return;
303 }
304 }
305 }
306
307 if (src.count > 0) {
308 fInstructions.push_back({BuilderOp::push_slots, {src.index}, src.count});
309 }
310 }
311
push_uniform(SlotRange src)312 void Builder::push_uniform(SlotRange src) {
313 SkASSERT(src.count >= 0);
314 if (!fInstructions.empty()) {
315 Instruction& lastInstruction = fInstructions.back();
316
317 // If the previous instruction was pushing uniforms contiguous to this range, we can
318 // collapse the two pushes into one larger push.
319 if (lastInstruction.fOp == BuilderOp::push_uniform &&
320 lastInstruction.fSlotA + lastInstruction.fImmA == src.index) {
321 lastInstruction.fImmA += src.count;
322 return;
323 }
324 }
325
326 if (src.count > 0) {
327 fInstructions.push_back({BuilderOp::push_uniform, {src.index}, src.count});
328 }
329 }
330
push_duplicates(int count)331 void Builder::push_duplicates(int count) {
332 if (!fInstructions.empty()) {
333 Instruction& lastInstruction = fInstructions.back();
334
335 // If the previous op is pushing a zero, we can just push more of them.
336 if (lastInstruction.fOp == BuilderOp::push_zeros) {
337 lastInstruction.fImmA += count;
338 return;
339 }
340 }
341 SkASSERT(count >= 0);
342 if (count >= 3) {
343 // Use a swizzle to splat the input into a 4-slot value.
344 this->swizzle(/*consumedSlots=*/1, {0, 0, 0, 0});
345 count -= 3;
346 }
347 for (; count >= 4; count -= 4) {
348 // Clone the splatted value four slots at a time.
349 this->push_clone(/*numSlots=*/4);
350 }
351 // Use a swizzle or clone to handle the trailing items.
352 switch (count) {
353 case 3: this->swizzle(/*consumedSlots=*/1, {0, 0, 0, 0}); break;
354 case 2: this->swizzle(/*consumedSlots=*/1, {0, 0, 0}); break;
355 case 1: this->push_clone(/*numSlots=*/1); break;
356 default: break;
357 }
358 }
359
push_clone_from_stack(int numSlots,int otherStackIndex,int offsetFromStackTop)360 void Builder::push_clone_from_stack(int numSlots, int otherStackIndex, int offsetFromStackTop) {
361 offsetFromStackTop += numSlots;
362
363 if (!fInstructions.empty()) {
364 Instruction& lastInstruction = fInstructions.back();
365
366 // If the previous op is also pushing a clone...
367 if (lastInstruction.fOp == BuilderOp::push_clone_from_stack &&
368 // ... from the same stack...
369 lastInstruction.fImmB == otherStackIndex &&
370 // ... and this clone starts at the same place that the last clone ends...
371 lastInstruction.fImmC - lastInstruction.fImmA == offsetFromStackTop) {
372 // ... just extend the existing clone-op.
373 lastInstruction.fImmA += numSlots;
374 return;
375 }
376 }
377
378 fInstructions.push_back({BuilderOp::push_clone_from_stack, {},
379 numSlots, otherStackIndex, offsetFromStackTop});
380 }
381
pop_slots(SlotRange dst)382 void Builder::pop_slots(SlotRange dst) {
383 if (!this->executionMaskWritesAreEnabled()) {
384 this->pop_slots_unmasked(dst);
385 return;
386 }
387
388 this->copy_stack_to_slots(dst);
389 this->discard_stack(dst.count);
390 }
391
simplifyPopSlotsUnmasked(SlotRange * dst)392 void Builder::simplifyPopSlotsUnmasked(SlotRange* dst) {
393 if (!dst->count || fInstructions.empty()) {
394 // There's nothing left to simplify.
395 return;
396 }
397
398 Instruction& lastInstruction = fInstructions.back();
399
400 // If the last instruction is pushing a constant, we can simplify it by copying the constant
401 // directly into the destination slot.
402 if (lastInstruction.fOp == BuilderOp::push_literal) {
403 // Remove the constant-push instruction.
404 int value = lastInstruction.fImmA;
405 fInstructions.pop_back();
406
407 // Consume one destination slot.
408 dst->count--;
409 Slot destinationSlot = dst->index + dst->count;
410
411 // Continue simplifying if possible.
412 this->simplifyPopSlotsUnmasked(dst);
413
414 // Write the constant directly to the destination slot.
415 this->copy_constant(destinationSlot, value);
416 return;
417 }
418
419 // If the last instruction is pushing a zero, we can save a step by directly zeroing out
420 // the destination slot.
421 if (lastInstruction.fOp == BuilderOp::push_zeros) {
422 // Remove one zero-push.
423 lastInstruction.fImmA--;
424 if (lastInstruction.fImmA == 0) {
425 fInstructions.pop_back();
426 }
427
428 // Consume one destination slot.
429 dst->count--;
430 Slot destinationSlot = dst->index + dst->count;
431
432 // Continue simplifying if possible.
433 this->simplifyPopSlotsUnmasked(dst);
434
435 // Zero the destination slot directly.
436 this->zero_slots_unmasked({destinationSlot, 1});
437 return;
438 }
439
440 // If the last instruction is pushing a slot, we can just copy that slot.
441 if (lastInstruction.fOp == BuilderOp::push_slots) {
442 // Get the last slot.
443 Slot sourceSlot = lastInstruction.fSlotA + lastInstruction.fImmA - 1;
444 lastInstruction.fImmA--;
445 if (lastInstruction.fImmA == 0) {
446 fInstructions.pop_back();
447 }
448
449 // Consume one destination slot.
450 dst->count--;
451 Slot destinationSlot = dst->index + dst->count;
452
453 // Try once more.
454 this->simplifyPopSlotsUnmasked(dst);
455
456 // Copy the slot directly.
457 if (destinationSlot != sourceSlot) {
458 this->copy_slots_unmasked({destinationSlot, 1}, {sourceSlot, 1});
459 }
460 return;
461 }
462 }
463
pop_slots_unmasked(SlotRange dst)464 void Builder::pop_slots_unmasked(SlotRange dst) {
465 SkASSERT(dst.count >= 0);
466
467 // If we are popping immediately after a push, we can simplify the code by writing the pushed
468 // value directly to the destination range.
469 this->simplifyPopSlotsUnmasked(&dst);
470
471 // Pop from the stack normally.
472 if (dst.count > 0) {
473 this->copy_stack_to_slots_unmasked(dst);
474 this->discard_stack(dst.count);
475 }
476 }
477
copy_stack_to_slots(SlotRange dst,int offsetFromStackTop)478 void Builder::copy_stack_to_slots(SlotRange dst, int offsetFromStackTop) {
479 // If the execution mask is known to be all-true, then we can ignore the write mask.
480 if (!this->executionMaskWritesAreEnabled()) {
481 this->copy_stack_to_slots_unmasked(dst, offsetFromStackTop);
482 return;
483 }
484
485 // If the last instruction copied the previous stack slots, just extend it.
486 if (!fInstructions.empty()) {
487 Instruction& lastInstruction = fInstructions.back();
488
489 // If the last op is copy-stack-to-slots...
490 if (lastInstruction.fOp == BuilderOp::copy_stack_to_slots &&
491 // and this op's destination is immediately after the last copy-slots-op's destination
492 lastInstruction.fSlotA + lastInstruction.fImmA == dst.index &&
493 // and this op's source is immediately after the last copy-slots-op's source
494 lastInstruction.fImmB - lastInstruction.fImmA == offsetFromStackTop) {
495 // then we can just extend the copy!
496 lastInstruction.fImmA += dst.count;
497 return;
498 }
499 }
500
501 fInstructions.push_back({BuilderOp::copy_stack_to_slots, {dst.index},
502 dst.count, offsetFromStackTop});
503 }
504
slot_ranges_overlap(SlotRange x,SlotRange y)505 static bool slot_ranges_overlap(SlotRange x, SlotRange y) {
506 return x.index < y.index + y.count &&
507 y.index < x.index + x.count;
508 }
509
copy_slots_unmasked(SlotRange dst,SlotRange src)510 void Builder::copy_slots_unmasked(SlotRange dst, SlotRange src) {
511 // If the last instruction copied adjacent slots, just extend it.
512 if (!fInstructions.empty()) {
513 Instruction& lastInstr = fInstructions.back();
514
515 // If the last op is copy-slots-unmasked...
516 if (lastInstr.fOp == BuilderOp::copy_slot_unmasked &&
517 // and this op's destination is immediately after the last copy-slots-op's destination
518 lastInstr.fSlotA + lastInstr.fImmA == dst.index &&
519 // and this op's source is immediately after the last copy-slots-op's source
520 lastInstr.fSlotB + lastInstr.fImmA == src.index &&
521 // and the source/dest ranges will not overlap
522 !slot_ranges_overlap({lastInstr.fSlotB, lastInstr.fImmA + dst.count},
523 {lastInstr.fSlotA, lastInstr.fImmA + dst.count})) {
524 // then we can just extend the copy!
525 lastInstr.fImmA += dst.count;
526 return;
527 }
528 }
529
530 SkASSERT(dst.count == src.count);
531 fInstructions.push_back({BuilderOp::copy_slot_unmasked, {dst.index, src.index}, dst.count});
532 }
533
copy_stack_to_slots_unmasked(SlotRange dst,int offsetFromStackTop)534 void Builder::copy_stack_to_slots_unmasked(SlotRange dst, int offsetFromStackTop) {
535 // If the last instruction copied the previous stack slots, just extend it.
536 if (!fInstructions.empty()) {
537 Instruction& lastInstruction = fInstructions.back();
538
539 // If the last op is copy-stack-to-slots-unmasked...
540 if (lastInstruction.fOp == BuilderOp::copy_stack_to_slots_unmasked &&
541 // and this op's destination is immediately after the last copy-slots-op's destination
542 lastInstruction.fSlotA + lastInstruction.fImmA == dst.index &&
543 // and this op's source is immediately after the last copy-slots-op's source
544 lastInstruction.fImmB - lastInstruction.fImmA == offsetFromStackTop) {
545 // then we can just extend the copy!
546 lastInstruction.fImmA += dst.count;
547 return;
548 }
549 }
550
551 fInstructions.push_back({BuilderOp::copy_stack_to_slots_unmasked, {dst.index},
552 dst.count, offsetFromStackTop});
553 }
554
pop_return_mask()555 void Builder::pop_return_mask() {
556 SkASSERT(this->executionMaskWritesAreEnabled());
557
558 // This instruction is going to overwrite the return mask. If the previous instruction was
559 // masking off the return mask, that's wasted work and it can be eliminated.
560 if (!fInstructions.empty()) {
561 Instruction& lastInstruction = fInstructions.back();
562
563 if (lastInstruction.fOp == BuilderOp::mask_off_return_mask) {
564 fInstructions.pop_back();
565 }
566 }
567
568 fInstructions.push_back({BuilderOp::pop_return_mask, {}});
569 }
570
zero_slots_unmasked(SlotRange dst)571 void Builder::zero_slots_unmasked(SlotRange dst) {
572 if (!fInstructions.empty()) {
573 Instruction& lastInstruction = fInstructions.back();
574
575 if (lastInstruction.fOp == BuilderOp::zero_slot_unmasked) {
576 if (lastInstruction.fSlotA + lastInstruction.fImmA == dst.index) {
577 // The previous instruction was zeroing the range immediately before this range.
578 // Combine the ranges.
579 lastInstruction.fImmA += dst.count;
580 return;
581 }
582 }
583
584 if (lastInstruction.fOp == BuilderOp::zero_slot_unmasked) {
585 if (lastInstruction.fSlotA == dst.index + dst.count) {
586 // The previous instruction was zeroing the range immediately after this range.
587 // Combine the ranges.
588 lastInstruction.fSlotA = dst.index;
589 lastInstruction.fImmA += dst.count;
590 return;
591 }
592 }
593 }
594
595 fInstructions.push_back({BuilderOp::zero_slot_unmasked, {dst.index}, dst.count});
596 }
597
pack_nybbles(SkSpan<const int8_t> components)598 static int pack_nybbles(SkSpan<const int8_t> components) {
599 // Pack up to 8 elements into nybbles, in reverse order.
600 int packed = 0;
601 for (auto iter = components.rbegin(); iter != components.rend(); ++iter) {
602 SkASSERT(*iter >= 0 && *iter <= 0xF);
603 packed <<= 4;
604 packed |= *iter;
605 }
606 return packed;
607 }
608
unpack_nybbles_to_offsets(uint32_t components,SkSpan<uint16_t> offsets)609 static void unpack_nybbles_to_offsets(uint32_t components, SkSpan<uint16_t> offsets) {
610 // Unpack component nybbles into byte-offsets pointing at stack slots.
611 for (size_t index = 0; index < offsets.size(); ++index) {
612 offsets[index] = (components & 0xF) * SkOpts::raster_pipeline_highp_stride * sizeof(float);
613 components >>= 4;
614 }
615 }
616
swizzle_copy_stack_to_slots(SlotRange dst,SkSpan<const int8_t> components,int offsetFromStackTop)617 void Builder::swizzle_copy_stack_to_slots(SlotRange dst,
618 SkSpan<const int8_t> components,
619 int offsetFromStackTop) {
620 // An unmasked version of this op could squeeze out a little bit of extra speed, if needed.
621 fInstructions.push_back({BuilderOp::swizzle_copy_stack_to_slots, {dst.index},
622 (int)components.size(), offsetFromStackTop, pack_nybbles(components)});
623 }
624
swizzle(int consumedSlots,SkSpan<const int8_t> components)625 void Builder::swizzle(int consumedSlots, SkSpan<const int8_t> components) {
626 // Consumes `consumedSlots` elements on the stack, then generates `elementSpan.size()` elements.
627 SkASSERT(consumedSlots >= 0);
628
629 // We only allow up to 16 elements, and they can only reach 0-15 slots, due to nybble packing.
630 int numElements = components.size();
631 SkASSERT(numElements <= 16);
632 SkASSERT(std::all_of(components.begin(), components.end(), [](int8_t e){ return e >= 0; }));
633 SkASSERT(std::all_of(components.begin(), components.end(), [](int8_t e){ return e <= 0xF; }));
634
635 // Make a local copy of the element array.
636 int8_t elements[16] = {};
637 std::copy(components.begin(), components.end(), std::begin(elements));
638
639 while (numElements > 0) {
640 // If the first element of the swizzle is zero...
641 if (elements[0] != 0) {
642 break;
643 }
644 // ...and zero isn't used elsewhere in the swizzle...
645 if (std::any_of(&elements[1], &elements[numElements], [](int8_t e) { return e == 0; })) {
646 break;
647 }
648 // We can omit the first slot from the swizzle entirely.
649 // Slide everything forward by one slot, and reduce the element index by one.
650 for (int index = 1; index < numElements; ++index) {
651 elements[index - 1] = elements[index] - 1;
652 }
653 elements[numElements - 1] = 0;
654 --consumedSlots;
655 --numElements;
656 }
657
658 // A completely empty swizzle is a no-op.
659 if (numElements == 0) {
660 this->discard_stack(consumedSlots);
661 return;
662 }
663
664 if (consumedSlots <= 4 && numElements <= 4) {
665 // We can fit everything into a little swizzle.
666 int op = (int)BuilderOp::swizzle_1 + numElements - 1;
667 fInstructions.push_back({(BuilderOp)op, {}, consumedSlots,
668 pack_nybbles(SkSpan(elements, numElements))});
669 return;
670 }
671
672 // This is a big swizzle. We use the `shuffle` op to handle these.
673 // Slot usage is packed into immA. The top 16 bits of immA count the consumed slots; the bottom
674 // 16 bits count the generated slots.
675 int slotUsage = consumedSlots << 16;
676 slotUsage |= numElements;
677
678 // Pack immB and immC with the shuffle list in packed-nybble form.
679 fInstructions.push_back({BuilderOp::shuffle, {}, slotUsage,
680 pack_nybbles(SkSpan(&elements[0], 8)),
681 pack_nybbles(SkSpan(&elements[8], 8))});
682 }
683
transpose(int columns,int rows)684 void Builder::transpose(int columns, int rows) {
685 // Transposes a matrix of size CxR on the stack (into a matrix of size RxC).
686 int8_t elements[16] = {};
687 size_t index = 0;
688 for (int r = 0; r < rows; ++r) {
689 for (int c = 0; c < columns; ++c) {
690 elements[index++] = (c * rows) + r;
691 }
692 }
693 this->swizzle(/*consumedSlots=*/columns * rows, SkSpan(elements, index));
694 }
695
diagonal_matrix(int columns,int rows)696 void Builder::diagonal_matrix(int columns, int rows) {
697 // Generates a CxR diagonal matrix from the top two scalars on the stack.
698 int8_t elements[16] = {};
699 size_t index = 0;
700 for (int c = 0; c < columns; ++c) {
701 for (int r = 0; r < rows; ++r) {
702 elements[index++] = (c == r) ? 1 : 0;
703 }
704 }
705 this->swizzle(/*consumedSlots=*/2, SkSpan(elements, index));
706 }
707
matrix_resize(int origColumns,int origRows,int newColumns,int newRows)708 void Builder::matrix_resize(int origColumns, int origRows, int newColumns, int newRows) {
709 // Resizes a CxR matrix at the top of the stack to C'xR'.
710 int8_t elements[16] = {};
711 size_t index = 0;
712
713 size_t consumedSlots = origColumns * origRows;
714 size_t zeroOffset = 0, oneOffset = 0;
715
716 for (int c = 0; c < newColumns; ++c) {
717 for (int r = 0; r < newRows; ++r) {
718 if (c < origColumns && r < origRows) {
719 // Push an element from the original matrix.
720 elements[index++] = (c * origRows) + r;
721 } else {
722 // This element is outside the original matrix; push 1 or 0.
723 if (c == r) {
724 // We need to synthesize a literal 1.
725 if (oneOffset == 0) {
726 this->push_literal_f(1.0f);
727 oneOffset = consumedSlots++;
728 }
729 elements[index++] = oneOffset;
730 } else {
731 // We need to synthesize a literal 0.
732 if (zeroOffset == 0) {
733 this->push_zeros(1);
734 zeroOffset = consumedSlots++;
735 }
736 elements[index++] = zeroOffset;
737 }
738 }
739 }
740 }
741 this->swizzle(consumedSlots, SkSpan(elements, index));
742 }
743
finish(int numValueSlots,int numUniformSlots,SkRPDebugTrace * debugTrace)744 std::unique_ptr<Program> Builder::finish(int numValueSlots,
745 int numUniformSlots,
746 SkRPDebugTrace* debugTrace) {
747 // Verify that calls to enableExecutionMaskWrites and disableExecutionMaskWrites are balanced.
748 SkASSERT(fExecutionMaskWritesEnabled == 0);
749
750 return std::make_unique<Program>(std::move(fInstructions), numValueSlots, numUniformSlots,
751 fNumLabels, debugTrace);
752 }
753
optimize()754 void Program::optimize() {
755 // TODO(johnstiles): perform any last-minute cleanup of the instruction stream here
756 }
757
stack_usage(const Instruction & inst)758 static int stack_usage(const Instruction& inst) {
759 switch (inst.fOp) {
760 case BuilderOp::push_literal:
761 case BuilderOp::push_condition_mask:
762 case BuilderOp::push_loop_mask:
763 case BuilderOp::push_return_mask:
764 return 1;
765
766 case BuilderOp::push_src_rgba:
767 case BuilderOp::push_dst_rgba:
768 return 4;
769
770 case BuilderOp::push_slots:
771 case BuilderOp::push_uniform:
772 case BuilderOp::push_zeros:
773 case BuilderOp::push_clone:
774 case BuilderOp::push_clone_from_stack:
775 return inst.fImmA;
776
777 case BuilderOp::pop_condition_mask:
778 case BuilderOp::pop_loop_mask:
779 case BuilderOp::pop_and_reenable_loop_mask:
780 case BuilderOp::pop_return_mask:
781 return -1;
782
783 case BuilderOp::pop_src_rg:
784 return -2;
785
786 case BuilderOp::pop_src_rgba:
787 case BuilderOp::pop_dst_rgba:
788 return -4;
789
790 case ALL_N_WAY_BINARY_OP_CASES:
791 case ALL_MULTI_SLOT_BINARY_OP_CASES:
792 case BuilderOp::discard_stack:
793 case BuilderOp::select:
794 return -inst.fImmA;
795
796 case ALL_MULTI_SLOT_TERNARY_OP_CASES:
797 return 2 * -inst.fImmA;
798
799 case BuilderOp::swizzle_1:
800 return 1 - inst.fImmA; // consumes immA slots and emits a scalar
801 case BuilderOp::swizzle_2:
802 return 2 - inst.fImmA; // consumes immA slots and emits a 2-slot vector
803 case BuilderOp::swizzle_3:
804 return 3 - inst.fImmA; // consumes immA slots and emits a 3-slot vector
805 case BuilderOp::swizzle_4:
806 return 4 - inst.fImmA; // consumes immA slots and emits a 4-slot vector
807
808 case BuilderOp::dot_2_floats:
809 return -3; // consumes two 2-slot vectors and emits one scalar
810 case BuilderOp::dot_3_floats:
811 return -5; // consumes two 3-slot vectors and emits one scalar
812 case BuilderOp::dot_4_floats:
813 return -7; // consumes two 4-slot vectors and emits one scalar
814
815 case BuilderOp::shuffle: {
816 int consumed = inst.fImmA >> 16;
817 int generated = inst.fImmA & 0xFFFF;
818 return generated - consumed;
819 }
820 case ALL_SINGLE_SLOT_UNARY_OP_CASES:
821 case ALL_MULTI_SLOT_UNARY_OP_CASES:
822 default:
823 return 0;
824 }
825 }
826
tempStackMaxDepths() const827 Program::StackDepthMap Program::tempStackMaxDepths() const {
828 StackDepthMap largest;
829 StackDepthMap current;
830
831 int curIdx = 0;
832 for (const Instruction& inst : fInstructions) {
833 if (inst.fOp == BuilderOp::set_current_stack) {
834 curIdx = inst.fImmA;
835 }
836 current[curIdx] += stack_usage(inst);
837 largest[curIdx] = std::max(current[curIdx], largest[curIdx]);
838 SkASSERTF(current[curIdx] >= 0, "unbalanced temp stack push/pop on stack %d", curIdx);
839 }
840
841 for (const auto& [stackIdx, depth] : current) {
842 (void)stackIdx;
843 SkASSERTF(depth == 0, "unbalanced temp stack push/pop");
844 }
845
846 return largest;
847 }
848
Program(SkTArray<Instruction> instrs,int numValueSlots,int numUniformSlots,int numLabels,SkRPDebugTrace * debugTrace)849 Program::Program(SkTArray<Instruction> instrs,
850 int numValueSlots,
851 int numUniformSlots,
852 int numLabels,
853 SkRPDebugTrace* debugTrace)
854 : fInstructions(std::move(instrs))
855 , fNumValueSlots(numValueSlots)
856 , fNumUniformSlots(numUniformSlots)
857 , fNumLabels(numLabels)
858 , fDebugTrace(debugTrace) {
859 this->optimize();
860
861 fTempStackMaxDepths = this->tempStackMaxDepths();
862
863 fNumTempStackSlots = 0;
864 for (const auto& [stackIdx, depth] : fTempStackMaxDepths) {
865 (void)stackIdx;
866 fNumTempStackSlots += depth;
867 }
868 }
869
appendCopy(SkTArray<Stage> * pipeline,SkArenaAlloc * alloc,ProgramOp baseStage,float * dst,int dstStride,const float * src,int srcStride,int numSlots) const870 void Program::appendCopy(SkTArray<Stage>* pipeline,
871 SkArenaAlloc* alloc,
872 ProgramOp baseStage,
873 float* dst, int dstStride,
874 const float* src, int srcStride,
875 int numSlots) const {
876 SkASSERT(numSlots >= 0);
877 while (numSlots > 4) {
878 this->appendCopy(pipeline, alloc, baseStage, dst, dstStride, src, srcStride,/*numSlots=*/4);
879 dst += 4 * dstStride;
880 src += 4 * srcStride;
881 numSlots -= 4;
882 }
883
884 if (numSlots > 0) {
885 SkASSERT(numSlots <= 4);
886 auto stage = (ProgramOp)((int)baseStage + numSlots - 1);
887 auto* ctx = alloc->make<SkRasterPipeline_BinaryOpCtx>();
888 ctx->dst = dst;
889 ctx->src = src;
890 pipeline->push_back({stage, ctx});
891 }
892 }
893
appendCopySlotsUnmasked(SkTArray<Stage> * pipeline,SkArenaAlloc * alloc,float * dst,const float * src,int numSlots) const894 void Program::appendCopySlotsUnmasked(SkTArray<Stage>* pipeline,
895 SkArenaAlloc* alloc,
896 float* dst,
897 const float* src,
898 int numSlots) const {
899 this->appendCopy(pipeline, alloc,
900 ProgramOp::copy_slot_unmasked,
901 dst, /*dstStride=*/SkOpts::raster_pipeline_highp_stride,
902 src, /*srcStride=*/SkOpts::raster_pipeline_highp_stride,
903 numSlots);
904 }
905
appendCopySlotsMasked(SkTArray<Stage> * pipeline,SkArenaAlloc * alloc,float * dst,const float * src,int numSlots) const906 void Program::appendCopySlotsMasked(SkTArray<Stage>* pipeline,
907 SkArenaAlloc* alloc,
908 float* dst,
909 const float* src,
910 int numSlots) const {
911 this->appendCopy(pipeline, alloc,
912 ProgramOp::copy_slot_masked,
913 dst, /*dstStride=*/SkOpts::raster_pipeline_highp_stride,
914 src, /*srcStride=*/SkOpts::raster_pipeline_highp_stride,
915 numSlots);
916 }
917
appendCopyConstants(SkTArray<Stage> * pipeline,SkArenaAlloc * alloc,float * dst,const float * src,int numSlots) const918 void Program::appendCopyConstants(SkTArray<Stage>* pipeline,
919 SkArenaAlloc* alloc,
920 float* dst,
921 const float* src,
922 int numSlots) const {
923 this->appendCopy(pipeline, alloc,
924 ProgramOp::copy_constant,
925 dst, /*dstStride=*/SkOpts::raster_pipeline_highp_stride,
926 src, /*srcStride=*/1,
927 numSlots);
928 }
929
appendSingleSlotUnaryOp(SkTArray<Stage> * pipeline,ProgramOp stage,float * dst,int numSlots) const930 void Program::appendSingleSlotUnaryOp(SkTArray<Stage>* pipeline, ProgramOp stage,
931 float* dst, int numSlots) const {
932 SkASSERT(numSlots >= 0);
933 while (numSlots--) {
934 pipeline->push_back({stage, dst});
935 dst += SkOpts::raster_pipeline_highp_stride;
936 }
937 }
938
appendMultiSlotUnaryOp(SkTArray<Stage> * pipeline,ProgramOp baseStage,float * dst,int numSlots) const939 void Program::appendMultiSlotUnaryOp(SkTArray<Stage>* pipeline, ProgramOp baseStage,
940 float* dst, int numSlots) const {
941 SkASSERT(numSlots >= 0);
942 while (numSlots > 4) {
943 this->appendMultiSlotUnaryOp(pipeline, baseStage, dst, /*numSlots=*/4);
944 dst += 4 * SkOpts::raster_pipeline_highp_stride;
945 numSlots -= 4;
946 }
947
948 SkASSERT(numSlots <= 4);
949 auto stage = (ProgramOp)((int)baseStage + numSlots - 1);
950 pipeline->push_back({stage, dst});
951 }
952
appendAdjacentNWayBinaryOp(SkTArray<Stage> * pipeline,SkArenaAlloc * alloc,ProgramOp stage,float * dst,const float * src,int numSlots) const953 void Program::appendAdjacentNWayBinaryOp(SkTArray<Stage>* pipeline, SkArenaAlloc* alloc,
954 ProgramOp stage,
955 float* dst, const float* src, int numSlots) const {
956 // The source and destination must be directly next to one another.
957 SkASSERT(numSlots >= 0);
958 SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots) == src);
959
960 if (numSlots > 0) {
961 auto ctx = alloc->make<SkRasterPipeline_BinaryOpCtx>();
962 ctx->dst = dst;
963 ctx->src = src;
964 pipeline->push_back({stage, ctx});
965 return;
966 }
967 }
968
appendAdjacentMultiSlotBinaryOp(SkTArray<Stage> * pipeline,SkArenaAlloc * alloc,ProgramOp baseStage,float * dst,const float * src,int numSlots) const969 void Program::appendAdjacentMultiSlotBinaryOp(SkTArray<Stage>* pipeline, SkArenaAlloc* alloc,
970 ProgramOp baseStage,
971 float* dst, const float* src, int numSlots) const {
972 // The source and destination must be directly next to one another.
973 SkASSERT(numSlots >= 0);
974 SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots) == src);
975
976 if (numSlots > 4) {
977 this->appendAdjacentNWayBinaryOp(pipeline, alloc, baseStage, dst, src, numSlots);
978 return;
979 }
980 if (numSlots > 0) {
981 auto specializedStage = (ProgramOp)((int)baseStage + numSlots);
982 pipeline->push_back({specializedStage, dst});
983 }
984 }
985
appendAdjacentMultiSlotTernaryOp(SkTArray<Stage> * pipeline,SkArenaAlloc * alloc,ProgramOp baseStage,float * dst,const float * src0,const float * src1,int numSlots) const986 void Program::appendAdjacentMultiSlotTernaryOp(SkTArray<Stage>* pipeline, SkArenaAlloc* alloc,
987 ProgramOp baseStage, float* dst, const float* src0,
988 const float* src1, int numSlots) const {
989 // The float pointers must all be immediately adjacent to each other.
990 SkASSERT(numSlots >= 0);
991 SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots) == src0);
992 SkASSERT((src0 + SkOpts::raster_pipeline_highp_stride * numSlots) == src1);
993
994 if (numSlots > 4) {
995 auto ctx = alloc->make<SkRasterPipeline_TernaryOpCtx>();
996 ctx->dst = dst;
997 ctx->src0 = src0;
998 ctx->src1 = src1;
999 pipeline->push_back({baseStage, ctx});
1000 return;
1001 }
1002 if (numSlots > 0) {
1003 auto specializedStage = (ProgramOp)((int)baseStage + numSlots);
1004 pipeline->push_back({specializedStage, dst});
1005 }
1006 }
1007
appendStackRewind(SkTArray<Stage> * pipeline) const1008 void Program::appendStackRewind(SkTArray<Stage>* pipeline) const {
1009 #if defined(SKSL_STANDALONE) || !SK_HAS_MUSTTAIL
1010 pipeline->push_back({ProgramOp::stack_rewind, nullptr});
1011 #endif
1012 }
1013
context_bit_pun(intptr_t val)1014 static void* context_bit_pun(intptr_t val) {
1015 return sk_bit_cast<void*>(val);
1016 }
1017
allocateSlotData(SkArenaAlloc * alloc) const1018 Program::SlotData Program::allocateSlotData(SkArenaAlloc* alloc) const {
1019 // Allocate a contiguous slab of slot data for values and stack entries.
1020 const int N = SkOpts::raster_pipeline_highp_stride;
1021 const int vectorWidth = N * sizeof(float);
1022 const int allocSize = vectorWidth * (fNumValueSlots + fNumTempStackSlots);
1023 float* slotPtr = static_cast<float*>(alloc->makeBytesAlignedTo(allocSize, vectorWidth));
1024 sk_bzero(slotPtr, allocSize);
1025
1026 // Store the temp stack immediately after the values.
1027 SlotData s;
1028 s.values = SkSpan{slotPtr, N * fNumValueSlots};
1029 s.stack = SkSpan{s.values.end(), N * fNumTempStackSlots};
1030 return s;
1031 }
1032
1033 #if !defined(SKSL_STANDALONE)
1034
appendStages(SkRasterPipeline * pipeline,SkArenaAlloc * alloc,RP::Callbacks * callbacks,SkSpan<const float> uniforms) const1035 bool Program::appendStages(SkRasterPipeline* pipeline,
1036 SkArenaAlloc* alloc,
1037 RP::Callbacks* callbacks,
1038 SkSpan<const float> uniforms) const {
1039 // Convert our Instruction list to an array of ProgramOps.
1040 SkTArray<Stage> stages;
1041 this->makeStages(&stages, alloc, uniforms, this->allocateSlotData(alloc));
1042
1043 // Allocate buffers for branch targets and labels; these are needed to convert labels into
1044 // actual offsets into the pipeline and fix up branches.
1045 SkTArray<SkRasterPipeline_BranchCtx*> branchContexts;
1046 branchContexts.reserve_back(fNumLabels);
1047 SkTArray<int> labelOffsets;
1048 labelOffsets.push_back_n(fNumLabels, -1);
1049 SkTArray<int> branchGoesToLabel;
1050 branchGoesToLabel.reserve_back(fNumLabels);
1051
1052 for (const Stage& stage : stages) {
1053 switch (stage.op) {
1054 case ProgramOp::stack_rewind:
1055 pipeline->append_stack_rewind();
1056 break;
1057
1058 case ProgramOp::invoke_shader:
1059 if (!callbacks || !callbacks->appendShader(sk_bit_cast<intptr_t>(stage.ctx))) {
1060 return false;
1061 }
1062 break;
1063
1064 case ProgramOp::invoke_color_filter:
1065 if (!callbacks || !callbacks->appendColorFilter(sk_bit_cast<intptr_t>(stage.ctx))) {
1066 return false;
1067 }
1068 break;
1069
1070 case ProgramOp::invoke_blender:
1071 if (!callbacks || !callbacks->appendBlender(sk_bit_cast<intptr_t>(stage.ctx))) {
1072 return false;
1073 }
1074 break;
1075
1076 case ProgramOp::label: {
1077 // Remember the absolute pipeline position of this label.
1078 int labelID = sk_bit_cast<intptr_t>(stage.ctx);
1079 SkASSERT(labelID >= 0 && labelID < fNumLabels);
1080 labelOffsets[labelID] = pipeline->getNumStages();
1081 break;
1082 }
1083 case ProgramOp::jump:
1084 case ProgramOp::branch_if_any_active_lanes:
1085 case ProgramOp::branch_if_no_active_lanes:
1086 case ProgramOp::branch_if_no_active_lanes_eq: {
1087 // The branch context contain a valid label ID at this point.
1088 auto* branchCtx = static_cast<SkRasterPipeline_BranchCtx*>(stage.ctx);
1089 int labelID = branchCtx->offset;
1090 SkASSERT(labelID >= 0 && labelID < fNumLabels);
1091
1092 // Replace the label ID in the branch context with the absolute pipeline position.
1093 // We will go back over the branch targets at the end and fix them up.
1094 branchCtx->offset = pipeline->getNumStages();
1095
1096 SkASSERT(branchContexts.size() == branchGoesToLabel.size());
1097 branchContexts.push_back(branchCtx);
1098 branchGoesToLabel.push_back(labelID);
1099 [[fallthrough]];
1100 }
1101 default:
1102 // Append a regular op to the program.
1103 SkASSERT((int)stage.op < kNumRasterPipelineHighpOps);
1104 pipeline->append((SkRasterPipelineOp)stage.op, stage.ctx);
1105 break;
1106 }
1107 }
1108
1109 // Now that we have assembled the program and know the pipeline positions of each label and
1110 // branch, fix up every branch target.
1111 SkASSERT(branchContexts.size() == branchGoesToLabel.size());
1112 for (int index = 0; index < branchContexts.size(); ++index) {
1113 int branchFromIdx = branchContexts[index]->offset;
1114 int branchToIdx = labelOffsets[branchGoesToLabel[index]];
1115 branchContexts[index]->offset = branchToIdx - branchFromIdx;
1116 }
1117
1118 return true;
1119 }
1120
1121 #endif
1122
makeStages(SkTArray<Stage> * pipeline,SkArenaAlloc * alloc,SkSpan<const float> uniforms,const SlotData & slots) const1123 void Program::makeStages(SkTArray<Stage>* pipeline,
1124 SkArenaAlloc* alloc,
1125 SkSpan<const float> uniforms,
1126 const SlotData& slots) const {
1127 SkASSERT(fNumUniformSlots == SkToInt(uniforms.size()));
1128
1129 const int N = SkOpts::raster_pipeline_highp_stride;
1130 StackDepthMap tempStackDepth;
1131 int currentStack = 0;
1132 int mostRecentRewind = 0;
1133
1134 // Assemble a map holding the current stack-top for each temporary stack. Position each temp
1135 // stack immediately after the previous temp stack; temp stacks are never allowed to overlap.
1136 int pos = 0;
1137 SkTHashMap<int, float*> tempStackMap;
1138 for (auto& [idx, depth] : fTempStackMaxDepths) {
1139 tempStackMap[idx] = slots.stack.begin() + (pos * N);
1140 pos += depth;
1141 }
1142
1143 // Track labels that we have reached in processing.
1144 SkBitSet labelsEncountered(fNumLabels);
1145
1146 auto EmitStackRewindForBackwardsBranch = [&](int labelID) {
1147 // If we have already encountered the label associated with this branch, this is a
1148 // backwards branch. Add a stack-rewind immediately before the branch to ensure that
1149 // long-running loops don't use an unbounded amount of stack space.
1150 if (labelsEncountered.test(labelID)) {
1151 this->appendStackRewind(pipeline);
1152 mostRecentRewind = pipeline->size();
1153 }
1154 };
1155
1156 // We can reuse constants from our arena by placing them in this map.
1157 SkTHashMap<int, int*> constantLookupMap; // <constant value, pointer into arena>
1158
1159 // Write each BuilderOp to the pipeline array.
1160 pipeline->reserve_back(fInstructions.size());
1161 for (const Instruction& inst : fInstructions) {
1162 auto SlotA = [&]() { return &slots.values[N * inst.fSlotA]; };
1163 auto SlotB = [&]() { return &slots.values[N * inst.fSlotB]; };
1164 auto UniformA = [&]() { return &uniforms[inst.fSlotA]; };
1165 float*& tempStackPtr = tempStackMap[currentStack];
1166
1167 switch (inst.fOp) {
1168 case BuilderOp::label:
1169 SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
1170 labelsEncountered.set(inst.fImmA);
1171 pipeline->push_back({ProgramOp::label, context_bit_pun(inst.fImmA)});
1172 break;
1173
1174 case BuilderOp::jump:
1175 case BuilderOp::branch_if_any_active_lanes:
1176 case BuilderOp::branch_if_no_active_lanes: {
1177 SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
1178 EmitStackRewindForBackwardsBranch(inst.fImmA);
1179
1180 auto* ctx = alloc->make<SkRasterPipeline_BranchCtx>();
1181 ctx->offset = inst.fImmA;
1182 pipeline->push_back({(ProgramOp)inst.fOp, ctx});
1183 break;
1184 }
1185 case BuilderOp::branch_if_no_active_lanes_on_stack_top_equal: {
1186 SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels);
1187 EmitStackRewindForBackwardsBranch(inst.fImmA);
1188
1189 auto* ctx = alloc->make<SkRasterPipeline_BranchIfEqualCtx>();
1190 ctx->offset = inst.fImmA;
1191 ctx->value = inst.fImmB;
1192 ctx->ptr = reinterpret_cast<int*>(tempStackPtr - N);
1193 pipeline->push_back({ProgramOp::branch_if_no_active_lanes_eq, ctx});
1194 break;
1195 }
1196 case BuilderOp::init_lane_masks:
1197 pipeline->push_back({ProgramOp::init_lane_masks, nullptr});
1198 break;
1199
1200 case BuilderOp::store_src_rg:
1201 pipeline->push_back({ProgramOp::store_src_rg, SlotA()});
1202 break;
1203
1204 case BuilderOp::store_src:
1205 pipeline->push_back({ProgramOp::store_src, SlotA()});
1206 break;
1207
1208 case BuilderOp::store_dst:
1209 pipeline->push_back({ProgramOp::store_dst, SlotA()});
1210 break;
1211
1212 case BuilderOp::store_device_xy01:
1213 pipeline->push_back({ProgramOp::store_device_xy01, SlotA()});
1214 break;
1215
1216 case BuilderOp::load_src:
1217 pipeline->push_back({ProgramOp::load_src, SlotA()});
1218 break;
1219
1220 case BuilderOp::load_dst:
1221 pipeline->push_back({ProgramOp::load_dst, SlotA()});
1222 break;
1223
1224 case ALL_SINGLE_SLOT_UNARY_OP_CASES: {
1225 float* dst = tempStackPtr - (inst.fImmA * N);
1226 this->appendSingleSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA);
1227 break;
1228 }
1229 case ALL_MULTI_SLOT_UNARY_OP_CASES: {
1230 float* dst = tempStackPtr - (inst.fImmA * N);
1231 this->appendMultiSlotUnaryOp(pipeline, (ProgramOp)inst.fOp, dst, inst.fImmA);
1232 break;
1233 }
1234 case ALL_N_WAY_BINARY_OP_CASES: {
1235 float* src = tempStackPtr - (inst.fImmA * N);
1236 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
1237 this->appendAdjacentNWayBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
1238 dst, src, inst.fImmA);
1239 break;
1240 }
1241 case ALL_MULTI_SLOT_BINARY_OP_CASES: {
1242 float* src = tempStackPtr - (inst.fImmA * N);
1243 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
1244 this->appendAdjacentMultiSlotBinaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
1245 dst, src, inst.fImmA);
1246 break;
1247 }
1248 case ALL_MULTI_SLOT_TERNARY_OP_CASES: {
1249 float* src1 = tempStackPtr - (inst.fImmA * N);
1250 float* src0 = tempStackPtr - (inst.fImmA * 2 * N);
1251 float* dst = tempStackPtr - (inst.fImmA * 3 * N);
1252 this->appendAdjacentMultiSlotTernaryOp(pipeline, alloc, (ProgramOp)inst.fOp,
1253 dst, src0, src1, inst.fImmA);
1254 break;
1255 }
1256 case BuilderOp::select: {
1257 float* src = tempStackPtr - (inst.fImmA * N);
1258 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
1259 this->appendCopySlotsMasked(pipeline, alloc, dst, src, inst.fImmA);
1260 break;
1261 }
1262 case BuilderOp::copy_slot_masked:
1263 this->appendCopySlotsMasked(pipeline, alloc, SlotA(), SlotB(), inst.fImmA);
1264 break;
1265
1266 case BuilderOp::copy_slot_unmasked:
1267 this->appendCopySlotsUnmasked(pipeline, alloc, SlotA(), SlotB(), inst.fImmA);
1268 break;
1269
1270 case BuilderOp::zero_slot_unmasked:
1271 this->appendMultiSlotUnaryOp(pipeline, ProgramOp::zero_slot_unmasked,
1272 SlotA(), inst.fImmA);
1273 break;
1274
1275 case BuilderOp::dot_2_floats:
1276 case BuilderOp::dot_3_floats:
1277 case BuilderOp::dot_4_floats: {
1278 float* dst = tempStackPtr - (inst.fImmA * 2 * N);
1279 pipeline->push_back({(ProgramOp)inst.fOp, dst});
1280 break;
1281 }
1282 case BuilderOp::swizzle_1:
1283 case BuilderOp::swizzle_2:
1284 case BuilderOp::swizzle_3:
1285 case BuilderOp::swizzle_4: {
1286 auto* ctx = alloc->make<SkRasterPipeline_SwizzleCtx>();
1287 ctx->ptr = tempStackPtr - (N * inst.fImmA);
1288 // Unpack component nybbles into byte-offsets pointing at stack slots.
1289 unpack_nybbles_to_offsets(inst.fImmB, SkSpan(ctx->offsets));
1290 pipeline->push_back({(ProgramOp)inst.fOp, ctx});
1291 break;
1292 }
1293 case BuilderOp::shuffle: {
1294 int consumed = inst.fImmA >> 16;
1295 int generated = inst.fImmA & 0xFFFF;
1296
1297 auto* ctx = alloc->make<SkRasterPipeline_ShuffleCtx>();
1298 ctx->ptr = tempStackPtr - (N * consumed);
1299 ctx->count = generated;
1300 // Unpack immB and immC from nybble form into the offset array.
1301 unpack_nybbles_to_offsets(inst.fImmB, SkSpan(&ctx->offsets[0], 8));
1302 unpack_nybbles_to_offsets(inst.fImmC, SkSpan(&ctx->offsets[8], 8));
1303 pipeline->push_back({ProgramOp::shuffle, ctx});
1304 break;
1305 }
1306 case BuilderOp::push_src_rgba: {
1307 float* dst = tempStackPtr;
1308 pipeline->push_back({ProgramOp::store_src, dst});
1309 break;
1310 }
1311 case BuilderOp::push_dst_rgba: {
1312 float* dst = tempStackPtr;
1313 pipeline->push_back({ProgramOp::store_dst, dst});
1314 break;
1315 }
1316 case BuilderOp::pop_src_rg: {
1317 float* dst = tempStackPtr - (2 * N);
1318 pipeline->push_back({ProgramOp::load_src_rg, dst});
1319 break;
1320 }
1321 case BuilderOp::pop_src_rgba: {
1322 float* dst = tempStackPtr - (4 * N);
1323 pipeline->push_back({ProgramOp::load_src, dst});
1324 break;
1325 }
1326 case BuilderOp::pop_dst_rgba: {
1327 float* dst = tempStackPtr - (4 * N);
1328 pipeline->push_back({ProgramOp::load_dst, dst});
1329 break;
1330 }
1331 case BuilderOp::push_slots: {
1332 float* dst = tempStackPtr;
1333 this->appendCopySlotsUnmasked(pipeline, alloc, dst, SlotA(), inst.fImmA);
1334 break;
1335 }
1336 case BuilderOp::push_uniform: {
1337 float* dst = tempStackPtr;
1338 this->appendCopyConstants(pipeline, alloc, dst, UniformA(), inst.fImmA);
1339 break;
1340 }
1341 case BuilderOp::push_zeros: {
1342 float* dst = tempStackPtr;
1343 this->appendMultiSlotUnaryOp(pipeline, ProgramOp::zero_slot_unmasked, dst,
1344 inst.fImmA);
1345 break;
1346 }
1347 case BuilderOp::push_condition_mask: {
1348 float* dst = tempStackPtr;
1349 pipeline->push_back({ProgramOp::store_condition_mask, dst});
1350 break;
1351 }
1352 case BuilderOp::pop_condition_mask: {
1353 float* src = tempStackPtr - (1 * N);
1354 pipeline->push_back({ProgramOp::load_condition_mask, src});
1355 break;
1356 }
1357 case BuilderOp::merge_condition_mask: {
1358 float* ptr = tempStackPtr - (2 * N);
1359 pipeline->push_back({ProgramOp::merge_condition_mask, ptr});
1360 break;
1361 }
1362 case BuilderOp::push_loop_mask: {
1363 float* dst = tempStackPtr;
1364 pipeline->push_back({ProgramOp::store_loop_mask, dst});
1365 break;
1366 }
1367 case BuilderOp::pop_loop_mask: {
1368 float* src = tempStackPtr - (1 * N);
1369 pipeline->push_back({ProgramOp::load_loop_mask, src});
1370 break;
1371 }
1372 case BuilderOp::pop_and_reenable_loop_mask: {
1373 float* src = tempStackPtr - (1 * N);
1374 pipeline->push_back({ProgramOp::reenable_loop_mask, src});
1375 break;
1376 }
1377 case BuilderOp::reenable_loop_mask:
1378 pipeline->push_back({ProgramOp::reenable_loop_mask, SlotA()});
1379 break;
1380
1381 case BuilderOp::mask_off_loop_mask:
1382 pipeline->push_back({ProgramOp::mask_off_loop_mask, nullptr});
1383 break;
1384
1385 case BuilderOp::merge_loop_mask: {
1386 float* src = tempStackPtr - (1 * N);
1387 pipeline->push_back({ProgramOp::merge_loop_mask, src});
1388 break;
1389 }
1390 case BuilderOp::push_return_mask: {
1391 float* dst = tempStackPtr;
1392 pipeline->push_back({ProgramOp::store_return_mask, dst});
1393 break;
1394 }
1395 case BuilderOp::pop_return_mask: {
1396 float* src = tempStackPtr - (1 * N);
1397 pipeline->push_back({ProgramOp::load_return_mask, src});
1398 break;
1399 }
1400 case BuilderOp::mask_off_return_mask:
1401 pipeline->push_back({ProgramOp::mask_off_return_mask, nullptr});
1402 break;
1403
1404 case BuilderOp::copy_constant:
1405 case BuilderOp::push_literal: {
1406 float* dst = (inst.fOp == BuilderOp::push_literal) ? tempStackPtr : SlotA();
1407 int* constantPtr;
1408 if (int** lookup = constantLookupMap.find(inst.fImmA)) {
1409 constantPtr = *lookup;
1410 } else {
1411 constantPtr = alloc->make<int>(inst.fImmA);
1412 constantLookupMap[inst.fImmA] = constantPtr;
1413 }
1414 SkASSERT(constantPtr);
1415 this->appendCopyConstants(pipeline, alloc, dst, (float*)constantPtr,/*numSlots=*/1);
1416 break;
1417 }
1418 case BuilderOp::copy_stack_to_slots: {
1419 float* src = tempStackPtr - (inst.fImmB * N);
1420 this->appendCopySlotsMasked(pipeline, alloc, SlotA(), src, inst.fImmA);
1421 break;
1422 }
1423 case BuilderOp::copy_stack_to_slots_unmasked: {
1424 float* src = tempStackPtr - (inst.fImmB * N);
1425 this->appendCopySlotsUnmasked(pipeline, alloc, SlotA(), src, inst.fImmA);
1426 break;
1427 }
1428 case BuilderOp::swizzle_copy_stack_to_slots: {
1429 auto stage = (ProgramOp)((int)ProgramOp::swizzle_copy_slot_masked + inst.fImmA - 1);
1430 auto* ctx = alloc->make<SkRasterPipeline_SwizzleCopyCtx>();
1431 ctx->src = tempStackPtr - (inst.fImmB * N);
1432 ctx->dst = SlotA();
1433 unpack_nybbles_to_offsets(inst.fImmC, SkSpan(ctx->offsets));
1434 pipeline->push_back({stage, ctx});
1435 break;
1436 }
1437 case BuilderOp::push_clone: {
1438 float* src = tempStackPtr - (inst.fImmB * N);
1439 float* dst = tempStackPtr;
1440 this->appendCopySlotsUnmasked(pipeline, alloc, dst, src, inst.fImmA);
1441 break;
1442 }
1443 case BuilderOp::push_clone_from_stack: {
1444 float* sourceStackPtr = tempStackMap[inst.fImmB];
1445 float* src = sourceStackPtr - (inst.fImmC * N);
1446 float* dst = tempStackPtr;
1447 this->appendCopySlotsUnmasked(pipeline, alloc, dst, src, inst.fImmA);
1448 break;
1449 }
1450 case BuilderOp::case_op: {
1451 auto* ctx = alloc->make<SkRasterPipeline_CaseOpCtx>();
1452 ctx->ptr = reinterpret_cast<int*>(tempStackPtr - 2 * N);
1453 ctx->expectedValue = inst.fImmA;
1454 pipeline->push_back({ProgramOp::case_op, ctx});
1455 break;
1456 }
1457 case BuilderOp::discard_stack:
1458 break;
1459
1460 case BuilderOp::set_current_stack:
1461 currentStack = inst.fImmA;
1462 break;
1463
1464 case BuilderOp::invoke_shader:
1465 case BuilderOp::invoke_color_filter:
1466 case BuilderOp::invoke_blender:
1467 pipeline->push_back({(ProgramOp)inst.fOp, context_bit_pun(inst.fImmA)});
1468 break;
1469
1470 default:
1471 SkDEBUGFAILF("Raster Pipeline: unsupported instruction %d", (int)inst.fOp);
1472 break;
1473 }
1474
1475 tempStackPtr += stack_usage(inst) * N;
1476 SkASSERT(tempStackPtr >= slots.stack.begin());
1477 SkASSERT(tempStackPtr <= slots.stack.end());
1478
1479 // Periodically rewind the stack every 500 instructions. When SK_HAS_MUSTTAIL is set,
1480 // rewinds are not actually used; the appendStackRewind call becomes a no-op. On platforms
1481 // that don't support SK_HAS_MUSTTAIL, rewinding the stack periodically can prevent a
1482 // potential stack overflow when running a long program.
1483 int numPipelineStages = pipeline->size();
1484 if (numPipelineStages - mostRecentRewind > 500) {
1485 this->appendStackRewind(pipeline);
1486 mostRecentRewind = numPipelineStages;
1487 }
1488 }
1489 }
1490
1491 // Finds duplicate names in the program and disambiguates them with subscripts.
build_unique_slot_name_list(const SkRPDebugTrace * debugTrace)1492 SkTArray<std::string> build_unique_slot_name_list(const SkRPDebugTrace* debugTrace) {
1493 SkTArray<std::string> slotName;
1494 if (debugTrace) {
1495 slotName.reserve_back(debugTrace->fSlotInfo.size());
1496
1497 // The map consists of <variable name, <source position, unique name>>.
1498 SkTHashMap<std::string_view, SkTHashMap<int, std::string>> uniqueNameMap;
1499
1500 for (const SlotDebugInfo& slotInfo : debugTrace->fSlotInfo) {
1501 // Look up this variable by its name and source position.
1502 int pos = slotInfo.pos.valid() ? slotInfo.pos.startOffset() : 0;
1503 SkTHashMap<int, std::string>& positionMap = uniqueNameMap[slotInfo.name];
1504 std::string& uniqueName = positionMap[pos];
1505
1506 // Have we seen this variable name/position combination before?
1507 if (uniqueName.empty()) {
1508 // This is a unique name/position pair.
1509 uniqueName = slotInfo.name;
1510
1511 // But if it's not a unique _name_, it deserves a subscript to disambiguate it.
1512 int subscript = positionMap.count() - 1;
1513 if (subscript > 0) {
1514 for (char digit : std::to_string(subscript)) {
1515 // U+2080 through U+2089 (₀₁₂₃₄₅₆₇₈₉) in UTF8:
1516 uniqueName.push_back((char)0xE2);
1517 uniqueName.push_back((char)0x82);
1518 uniqueName.push_back((char)(0x80 + digit - '0'));
1519 }
1520 }
1521 }
1522
1523 slotName.push_back(uniqueName);
1524 }
1525 }
1526 return slotName;
1527 }
1528
dump(SkWStream * out) const1529 void Program::dump(SkWStream* out) const {
1530 // Allocate memory for the slot and uniform data, even though the program won't ever be
1531 // executed. The program requires pointer ranges for managing its data, and ASAN will report
1532 // errors if those pointers are pointing at unallocated memory.
1533 SkArenaAlloc alloc(/*firstHeapAllocation=*/1000);
1534 const int N = SkOpts::raster_pipeline_highp_stride;
1535 SlotData slots = this->allocateSlotData(&alloc);
1536 float* uniformPtr = alloc.makeArray<float>(fNumUniformSlots);
1537 SkSpan<float> uniforms = SkSpan(uniformPtr, fNumUniformSlots);
1538
1539 // Turn this program into an array of Raster Pipeline stages.
1540 SkTArray<Stage> stages;
1541 this->makeStages(&stages, &alloc, uniforms, slots);
1542
1543 // Find the labels in the program, and keep track of their offsets.
1544 SkTHashMap<int, int> labelToStageMap; // <label ID, stage index>
1545 for (int index = 0; index < stages.size(); ++index) {
1546 if (stages[index].op == ProgramOp::label) {
1547 int labelID = sk_bit_cast<intptr_t>(stages[index].ctx);
1548 SkASSERT(!labelToStageMap.find(labelID));
1549 labelToStageMap[labelID] = index;
1550 }
1551 }
1552
1553 // Assign unique names to each variable slot; our trace might have multiple variables with the
1554 // same name, which can make a dump hard to read.
1555 SkTArray<std::string> slotName = build_unique_slot_name_list(fDebugTrace);
1556
1557 // Emit the program's instruction list.
1558 for (int index = 0; index < stages.size(); ++index) {
1559 const Stage& stage = stages[index];
1560
1561 // Interpret the context value as a branch offset.
1562 auto BranchOffset = [&](const SkRasterPipeline_BranchCtx* ctx) -> std::string {
1563 // The context's offset field contains a label ID
1564 int labelID = ctx->offset;
1565 SkASSERT(labelToStageMap.find(labelID));
1566 int labelIndex = labelToStageMap[labelID];
1567 return SkSL::String::printf("%+d (label %d at #%d)",
1568 labelIndex - index, labelID, labelIndex + 1);
1569 };
1570
1571 // Print a 32-bit immediate value of unknown type (int/float).
1572 auto Imm = [&](float immFloat, bool showAsFloat = true) -> std::string {
1573 // Start with `0x3F800000` as a baseline.
1574 uint32_t immUnsigned;
1575 memcpy(&immUnsigned, &immFloat, sizeof(uint32_t));
1576 auto text = SkSL::String::printf("0x%08X", immUnsigned);
1577
1578 // Extend it to `0x3F800000 (1.0)` for finite floating point values.
1579 if (showAsFloat && std::isfinite(immFloat)) {
1580 text += " (";
1581 text += skstd::to_string(immFloat);
1582 text += ")";
1583 }
1584 return text;
1585 };
1586
1587 // Interpret the context pointer as a 32-bit immediate value of unknown type (int/float).
1588 auto ImmCtx = [&](const void* ctx, bool showAsFloat = true) -> std::string {
1589 float f;
1590 memcpy(&f, &ctx, sizeof(float));
1591 return Imm(f, showAsFloat);
1592 };
1593
1594 // Print `1` for single slots and `1..3` for ranges of slots.
1595 auto AsRange = [](int first, int count) -> std::string {
1596 std::string text = std::to_string(first);
1597 if (count > 1) {
1598 text += ".." + std::to_string(first + count - 1);
1599 }
1600 return text;
1601 };
1602
1603 // Come up with a reasonable name for a range of slots, e.g.:
1604 // `val`: slot range points at one variable, named val
1605 // `val(0..1)`: slot range points at the first and second slot of val (which has 3+ slots)
1606 // `foo, bar`: slot range fully covers two variables, named foo and bar
1607 // `foo(3), bar(0)`: slot range covers the fourth slot of foo and the first slot of bar
1608 auto SlotName = [&](SkSpan<const SlotDebugInfo> debugInfo,
1609 SkSpan<const std::string> names,
1610 SlotRange range) -> std::string {
1611 SkASSERT(range.index >= 0 && (range.index + range.count) <= (int)debugInfo.size());
1612
1613 std::string text;
1614 auto separator = SkSL::String::Separator();
1615 while (range.count > 0) {
1616 const SlotDebugInfo& slotInfo = debugInfo[range.index];
1617 text += separator();
1618 text += names.empty() ? slotInfo.name : names[range.index];
1619
1620 // Figure out how many slots we can chomp in this iteration.
1621 int entireVariable = slotInfo.columns * slotInfo.rows;
1622 int slotsToChomp = std::min(range.count, entireVariable - slotInfo.componentIndex);
1623 // If we aren't consuming an entire variable, from first slot to last...
1624 if (slotsToChomp != entireVariable) {
1625 // ... decorate it with a range suffix.
1626 text += "(" + AsRange(slotInfo.componentIndex, slotsToChomp) + ")";
1627 }
1628 range.index += slotsToChomp;
1629 range.count -= slotsToChomp;
1630 }
1631
1632 return text;
1633 };
1634
1635 // Attempts to interpret the passed-in pointer as a uniform range.
1636 auto UniformPtrCtx = [&](const float* ptr, int numSlots) -> std::string {
1637 const float* end = ptr + numSlots;
1638 if (ptr >= uniforms.begin() && end <= uniforms.end()) {
1639 int uniformIdx = ptr - uniforms.begin();
1640 if (fDebugTrace) {
1641 // Handle pointers to named uniform slots.
1642 std::string name = SlotName(fDebugTrace->fUniformInfo, /*names=*/{},
1643 {uniformIdx, numSlots});
1644 if (!name.empty()) {
1645 return name;
1646 }
1647 }
1648 // Handle pointers to uniforms (when no debug info exists).
1649 return "u" + AsRange(uniformIdx, numSlots);
1650 }
1651 return {};
1652 };
1653
1654 // Attempts to interpret the passed-in pointer as a value slot range.
1655 auto ValuePtrCtx = [&](const float* ptr, int numSlots) -> std::string {
1656 const float* end = ptr + (N * numSlots);
1657 if (ptr >= slots.values.begin() && end <= slots.values.end()) {
1658 int valueIdx = ptr - slots.values.begin();
1659 SkASSERT((valueIdx % N) == 0);
1660 valueIdx /= N;
1661 if (fDebugTrace) {
1662 // Handle pointers to named value slots.
1663 std::string name = SlotName(fDebugTrace->fSlotInfo, slotName,
1664 {valueIdx, numSlots});
1665 if (!name.empty()) {
1666 return name;
1667 }
1668 }
1669 // Handle pointers to value slots (when no debug info exists).
1670 return "v" + AsRange(valueIdx, numSlots);
1671 }
1672 return {};
1673 };
1674
1675 // Interpret the context value as a pointer to `count` immediate values.
1676 auto MultiImmCtx = [&](const float* ptr, int count) -> std::string {
1677 // If this is a uniform, print it by name.
1678 if (std::string text = UniformPtrCtx(ptr, count); !text.empty()) {
1679 return text;
1680 }
1681 // Emit a single unbracketed immediate.
1682 if (count == 1) {
1683 return Imm(*ptr);
1684 }
1685 // Emit a list like `[0x00000000 (0.0), 0x3F80000 (1.0)]`.
1686 std::string text = "[";
1687 auto separator = SkSL::String::Separator();
1688 while (count--) {
1689 text += separator();
1690 text += Imm(*ptr++);
1691 }
1692 return text + "]";
1693 };
1694
1695 // Interpret the context value as a generic pointer.
1696 auto PtrCtx = [&](const void* ctx, int numSlots) -> std::string {
1697 const float *ctxAsSlot = static_cast<const float*>(ctx);
1698 // Check for uniform and value pointers.
1699 if (std::string uniform = UniformPtrCtx(ctxAsSlot, numSlots); !uniform.empty()) {
1700 return uniform;
1701 }
1702 if (std::string value = ValuePtrCtx(ctxAsSlot, numSlots); !value.empty()) {
1703 return value;
1704 }
1705 // Handle pointers to temporary stack slots.
1706 if (ctxAsSlot >= slots.stack.begin() && ctxAsSlot < slots.stack.end()) {
1707 int stackIdx = ctxAsSlot - slots.stack.begin();
1708 SkASSERT((stackIdx % N) == 0);
1709 return "$" + AsRange(stackIdx / N, numSlots);
1710 }
1711 // This pointer is out of our expected bounds; this generally isn't expected to happen.
1712 return "ExternalPtr(" + AsRange(0, numSlots) + ")";
1713 };
1714
1715 // Interpret the context value as a pointer to two adjacent values.
1716 auto AdjacentPtrCtx = [&](const void* ctx,
1717 int numSlots) -> std::tuple<std::string, std::string> {
1718 const float *ctxAsSlot = static_cast<const float*>(ctx);
1719 return std::make_tuple(PtrCtx(ctxAsSlot, numSlots),
1720 PtrCtx(ctxAsSlot + (N * numSlots), numSlots));
1721 };
1722
1723 // Interpret the context value as a pointer to three adjacent values.
1724 auto Adjacent3PtrCtx = [&](const void* ctx, int numSlots) ->
1725 std::tuple<std::string, std::string, std::string> {
1726 const float *ctxAsSlot = static_cast<const float*>(ctx);
1727 return std::make_tuple(PtrCtx(ctxAsSlot, numSlots),
1728 PtrCtx(ctxAsSlot + (N * numSlots), numSlots),
1729 PtrCtx(ctxAsSlot + (2 * N * numSlots), numSlots));
1730 };
1731
1732 // Interpret the context value as a BinaryOp structure for copy_n_slots (numSlots is
1733 // dictated by the op itself).
1734 auto BinaryOpCtx = [&](const void* v,
1735 int numSlots) -> std::tuple<std::string, std::string> {
1736 const auto *ctx = static_cast<const SkRasterPipeline_BinaryOpCtx*>(v);
1737 return std::make_tuple(PtrCtx(ctx->dst, numSlots),
1738 PtrCtx(ctx->src, numSlots));
1739 };
1740
1741 // Interpret the context value as a BinaryOp structure for copy_n_constants (numSlots is
1742 // dictated by the op itself).
1743 auto CopyConstantCtx = [&](const void* v,
1744 int numSlots) -> std::tuple<std::string, std::string> {
1745 const auto *ctx = static_cast<const SkRasterPipeline_BinaryOpCtx*>(v);
1746 return std::make_tuple(PtrCtx(ctx->dst, numSlots),
1747 MultiImmCtx(ctx->src, numSlots));
1748 };
1749
1750 // Interpret the context value as a BinaryOp structure (numSlots is inferred from the
1751 // distance between pointers).
1752 auto AdjacentBinaryOpCtx = [&](const void* v) -> std::tuple<std::string, std::string> {
1753 const auto *ctx = static_cast<const SkRasterPipeline_BinaryOpCtx*>(v);
1754 int numSlots = (ctx->src - ctx->dst) / N;
1755 return AdjacentPtrCtx(ctx->dst, numSlots);
1756 };
1757
1758 // Interpret the context value as a TernaryOp structure (numSlots is inferred from the
1759 // distance between pointers).
1760 auto AdjacentTernaryOpCtx = [&](const void* v) ->
1761 std::tuple<std::string, std::string, std::string> {
1762 const auto* ctx = static_cast<const SkRasterPipeline_TernaryOpCtx*>(v);
1763 int numSlots = (ctx->src0 - ctx->dst) / N;
1764 return Adjacent3PtrCtx(ctx->dst, numSlots);
1765 };
1766
1767 // Stringize a swizzled pointer. Note that the slot-width of the original expression is not
1768 // preserved in the instruction encoding, so we need to do our best using the data we have.
1769 // (e.g., myFloat4.y would be indistinguishable from myFloat2.y.)
1770 auto SwizzlePtr = [&](const float* ptr, SkSpan<const uint16_t> offsets) {
1771 size_t highestComponent = *std::max_element(offsets.begin(), offsets.end()) /
1772 (N * sizeof(float));
1773
1774 std::string src = "(" + PtrCtx(ptr, std::max(offsets.size(), highestComponent + 1)) +
1775 ").";
1776 for (uint16_t offset : offsets) {
1777 if (offset == (0 * N * sizeof(float))) {
1778 src.push_back('x');
1779 } else if (offset == (1 * N * sizeof(float))) {
1780 src.push_back('y');
1781 } else if (offset == (2 * N * sizeof(float))) {
1782 src.push_back('z');
1783 } else if (offset == (3 * N * sizeof(float))) {
1784 src.push_back('w');
1785 } else {
1786 src.push_back('?');
1787 }
1788 }
1789 return src;
1790 };
1791
1792 // Interpret the context value as a Swizzle structure.
1793 auto SwizzleCtx = [&](ProgramOp op, const void* v) -> std::tuple<std::string, std::string> {
1794 const auto* ctx = static_cast<const SkRasterPipeline_SwizzleCtx*>(v);
1795 int destSlots = (int)op - (int)BuilderOp::swizzle_1 + 1;
1796
1797 return std::make_tuple(PtrCtx(ctx->ptr, destSlots),
1798 SwizzlePtr(ctx->ptr, SkSpan(ctx->offsets, destSlots)));
1799 };
1800
1801 // Interpret the context value as a SwizzleCopy structure.
1802 auto SwizzleCopyCtx = [&](ProgramOp op,
1803 const void* v) -> std::tuple<std::string, std::string> {
1804 const auto* ctx = static_cast<const SkRasterPipeline_SwizzleCopyCtx*>(v);
1805 int destSlots = (int)op - (int)BuilderOp::swizzle_copy_slot_masked + 1;
1806
1807 return std::make_tuple(SwizzlePtr(ctx->dst, SkSpan(ctx->offsets, destSlots)),
1808 PtrCtx(ctx->src, destSlots));
1809 };
1810
1811 // Interpret the context value as a Shuffle structure.
1812 auto ShuffleCtx = [&](const void* v) -> std::tuple<std::string, std::string> {
1813 const auto* ctx = static_cast<const SkRasterPipeline_ShuffleCtx*>(v);
1814
1815 std::string dst = PtrCtx(ctx->ptr, ctx->count);
1816 std::string src = "(" + dst + ")[";
1817 for (int index = 0; index < ctx->count; ++index) {
1818 if (ctx->offsets[index] % (N * sizeof(float))) {
1819 src.push_back('?');
1820 } else {
1821 src += std::to_string(ctx->offsets[index] / (N * sizeof(float)));
1822 }
1823 src.push_back(' ');
1824 }
1825 src.back() = ']';
1826 return std::make_tuple(dst, src);
1827 };
1828
1829 std::string opArg1, opArg2, opArg3;
1830 using POp = ProgramOp;
1831 switch (stage.op) {
1832 case POp::label:
1833 case POp::invoke_shader:
1834 case POp::invoke_color_filter:
1835 case POp::invoke_blender:
1836 opArg1 = ImmCtx(stage.ctx, /*showAsFloat=*/false);
1837 break;
1838
1839 case POp::case_op: {
1840 const auto* ctx = static_cast<SkRasterPipeline_CaseOpCtx*>(stage.ctx);
1841 opArg1 = PtrCtx(ctx->ptr, 1);
1842 opArg2 = PtrCtx(ctx->ptr + N, 1);
1843 opArg3 = Imm(sk_bit_cast<float>(ctx->expectedValue), /*showAsFloat=*/false);
1844 break;
1845 }
1846 case POp::swizzle_1:
1847 case POp::swizzle_2:
1848 case POp::swizzle_3:
1849 case POp::swizzle_4:
1850 std::tie(opArg1, opArg2) = SwizzleCtx(stage.op, stage.ctx);
1851 break;
1852
1853 case POp::swizzle_copy_slot_masked:
1854 case POp::swizzle_copy_2_slots_masked:
1855 case POp::swizzle_copy_3_slots_masked:
1856 case POp::swizzle_copy_4_slots_masked:
1857 std::tie(opArg1, opArg2) = SwizzleCopyCtx(stage.op, stage.ctx);
1858 break;
1859
1860 case POp::dot_2_floats:
1861 opArg1 = PtrCtx(stage.ctx, 1);
1862 std::tie(opArg2, opArg3) = AdjacentPtrCtx(stage.ctx, 2);
1863 break;
1864
1865 case POp::dot_3_floats:
1866 opArg1 = PtrCtx(stage.ctx, 1);
1867 std::tie(opArg2, opArg3) = AdjacentPtrCtx(stage.ctx, 3);
1868 break;
1869
1870 case POp::dot_4_floats:
1871 opArg1 = PtrCtx(stage.ctx, 1);
1872 std::tie(opArg2, opArg3) = AdjacentPtrCtx(stage.ctx, 4);
1873 break;
1874
1875 case POp::shuffle:
1876 std::tie(opArg1, opArg2) = ShuffleCtx(stage.ctx);
1877 break;
1878
1879 case POp::load_condition_mask:
1880 case POp::store_condition_mask:
1881 case POp::load_loop_mask:
1882 case POp::store_loop_mask:
1883 case POp::merge_loop_mask:
1884 case POp::reenable_loop_mask:
1885 case POp::load_return_mask:
1886 case POp::store_return_mask:
1887 case POp::zero_slot_unmasked:
1888 case POp::bitwise_not_int:
1889 case POp::cast_to_float_from_int: case POp::cast_to_float_from_uint:
1890 case POp::cast_to_int_from_float: case POp::cast_to_uint_from_float:
1891 case POp::abs_float: case POp::abs_int:
1892 case POp::atan_float:
1893 case POp::ceil_float:
1894 case POp::cos_float:
1895 case POp::exp_float:
1896 case POp::floor_float:
1897 case POp::sin_float:
1898 case POp::sqrt_float:
1899 case POp::tan_float:
1900 opArg1 = PtrCtx(stage.ctx, 1);
1901 break;
1902
1903 case POp::zero_2_slots_unmasked:
1904 case POp::bitwise_not_2_ints:
1905 case POp::load_src_rg: case POp::store_src_rg:
1906 case POp::cast_to_float_from_2_ints: case POp::cast_to_float_from_2_uints:
1907 case POp::cast_to_int_from_2_floats: case POp::cast_to_uint_from_2_floats:
1908 case POp::abs_2_floats: case POp::abs_2_ints:
1909 case POp::ceil_2_floats:
1910 case POp::floor_2_floats:
1911 opArg1 = PtrCtx(stage.ctx, 2);
1912 break;
1913
1914 case POp::zero_3_slots_unmasked:
1915 case POp::bitwise_not_3_ints:
1916 case POp::cast_to_float_from_3_ints: case POp::cast_to_float_from_3_uints:
1917 case POp::cast_to_int_from_3_floats: case POp::cast_to_uint_from_3_floats:
1918 case POp::abs_3_floats: case POp::abs_3_ints:
1919 case POp::ceil_3_floats:
1920 case POp::floor_3_floats:
1921 opArg1 = PtrCtx(stage.ctx, 3);
1922 break;
1923
1924 case POp::load_src:
1925 case POp::load_dst:
1926 case POp::store_src:
1927 case POp::store_dst:
1928 case POp::store_device_xy01:
1929 case POp::zero_4_slots_unmasked:
1930 case POp::bitwise_not_4_ints:
1931 case POp::cast_to_float_from_4_ints: case POp::cast_to_float_from_4_uints:
1932 case POp::cast_to_int_from_4_floats: case POp::cast_to_uint_from_4_floats:
1933 case POp::abs_4_floats: case POp::abs_4_ints:
1934 case POp::ceil_4_floats:
1935 case POp::floor_4_floats:
1936 opArg1 = PtrCtx(stage.ctx, 4);
1937 break;
1938
1939 case POp::copy_constant:
1940 std::tie(opArg1, opArg2) = CopyConstantCtx(stage.ctx, 1);
1941 break;
1942
1943 case POp::copy_2_constants:
1944 std::tie(opArg1, opArg2) = CopyConstantCtx(stage.ctx, 2);
1945 break;
1946
1947 case POp::copy_3_constants:
1948 std::tie(opArg1, opArg2) = CopyConstantCtx(stage.ctx, 3);
1949 break;
1950
1951 case POp::copy_4_constants:
1952 std::tie(opArg1, opArg2) = CopyConstantCtx(stage.ctx, 4);
1953 break;
1954
1955 case POp::copy_slot_masked:
1956 case POp::copy_slot_unmasked:
1957 std::tie(opArg1, opArg2) = BinaryOpCtx(stage.ctx, 1);
1958 break;
1959
1960 case POp::copy_2_slots_masked:
1961 case POp::copy_2_slots_unmasked:
1962 std::tie(opArg1, opArg2) = BinaryOpCtx(stage.ctx, 2);
1963 break;
1964
1965 case POp::copy_3_slots_masked:
1966 case POp::copy_3_slots_unmasked:
1967 std::tie(opArg1, opArg2) = BinaryOpCtx(stage.ctx, 3);
1968 break;
1969
1970 case POp::copy_4_slots_masked:
1971 case POp::copy_4_slots_unmasked:
1972 std::tie(opArg1, opArg2) = BinaryOpCtx(stage.ctx, 4);
1973 break;
1974
1975 case POp::merge_condition_mask:
1976 case POp::add_float: case POp::add_int:
1977 case POp::sub_float: case POp::sub_int:
1978 case POp::mul_float: case POp::mul_int:
1979 case POp::div_float: case POp::div_int: case POp::div_uint:
1980 case POp::bitwise_and_int:
1981 case POp::bitwise_or_int:
1982 case POp::bitwise_xor_int:
1983 case POp::min_float: case POp::min_int: case POp::min_uint:
1984 case POp::max_float: case POp::max_int: case POp::max_uint:
1985 case POp::cmplt_float: case POp::cmplt_int: case POp::cmplt_uint:
1986 case POp::cmple_float: case POp::cmple_int: case POp::cmple_uint:
1987 case POp::cmpeq_float: case POp::cmpeq_int:
1988 case POp::cmpne_float: case POp::cmpne_int:
1989 std::tie(opArg1, opArg2) = AdjacentPtrCtx(stage.ctx, 1);
1990 break;
1991
1992 case POp::mix_float: case POp::mix_int:
1993 std::tie(opArg1, opArg2, opArg3) = Adjacent3PtrCtx(stage.ctx, 1);
1994 break;
1995
1996 case POp::add_2_floats: case POp::add_2_ints:
1997 case POp::sub_2_floats: case POp::sub_2_ints:
1998 case POp::mul_2_floats: case POp::mul_2_ints:
1999 case POp::div_2_floats: case POp::div_2_ints: case POp::div_2_uints:
2000 case POp::bitwise_and_2_ints:
2001 case POp::bitwise_or_2_ints:
2002 case POp::bitwise_xor_2_ints:
2003 case POp::min_2_floats: case POp::min_2_ints: case POp::min_2_uints:
2004 case POp::max_2_floats: case POp::max_2_ints: case POp::max_2_uints:
2005 case POp::cmplt_2_floats: case POp::cmplt_2_ints: case POp::cmplt_2_uints:
2006 case POp::cmple_2_floats: case POp::cmple_2_ints: case POp::cmple_2_uints:
2007 case POp::cmpeq_2_floats: case POp::cmpeq_2_ints:
2008 case POp::cmpne_2_floats: case POp::cmpne_2_ints:
2009 std::tie(opArg1, opArg2) = AdjacentPtrCtx(stage.ctx, 2);
2010 break;
2011
2012 case POp::mix_2_floats: case POp::mix_2_ints:
2013 std::tie(opArg1, opArg2, opArg3) = Adjacent3PtrCtx(stage.ctx, 2);
2014 break;
2015
2016 case POp::add_3_floats: case POp::add_3_ints:
2017 case POp::sub_3_floats: case POp::sub_3_ints:
2018 case POp::mul_3_floats: case POp::mul_3_ints:
2019 case POp::div_3_floats: case POp::div_3_ints: case POp::div_3_uints:
2020 case POp::bitwise_and_3_ints:
2021 case POp::bitwise_or_3_ints:
2022 case POp::bitwise_xor_3_ints:
2023 case POp::min_3_floats: case POp::min_3_ints: case POp::min_3_uints:
2024 case POp::max_3_floats: case POp::max_3_ints: case POp::max_3_uints:
2025 case POp::cmplt_3_floats: case POp::cmplt_3_ints: case POp::cmplt_3_uints:
2026 case POp::cmple_3_floats: case POp::cmple_3_ints: case POp::cmple_3_uints:
2027 case POp::cmpeq_3_floats: case POp::cmpeq_3_ints:
2028 case POp::cmpne_3_floats: case POp::cmpne_3_ints:
2029 std::tie(opArg1, opArg2) = AdjacentPtrCtx(stage.ctx, 3);
2030 break;
2031
2032 case POp::mix_3_floats: case POp::mix_3_ints:
2033 std::tie(opArg1, opArg2, opArg3) = Adjacent3PtrCtx(stage.ctx, 3);
2034 break;
2035
2036 case POp::add_4_floats: case POp::add_4_ints:
2037 case POp::sub_4_floats: case POp::sub_4_ints:
2038 case POp::mul_4_floats: case POp::mul_4_ints:
2039 case POp::div_4_floats: case POp::div_4_ints: case POp::div_4_uints:
2040 case POp::bitwise_and_4_ints:
2041 case POp::bitwise_or_4_ints:
2042 case POp::bitwise_xor_4_ints:
2043 case POp::min_4_floats: case POp::min_4_ints: case POp::min_4_uints:
2044 case POp::max_4_floats: case POp::max_4_ints: case POp::max_4_uints:
2045 case POp::cmplt_4_floats: case POp::cmplt_4_ints: case POp::cmplt_4_uints:
2046 case POp::cmple_4_floats: case POp::cmple_4_ints: case POp::cmple_4_uints:
2047 case POp::cmpeq_4_floats: case POp::cmpeq_4_ints:
2048 case POp::cmpne_4_floats: case POp::cmpne_4_ints:
2049 std::tie(opArg1, opArg2) = AdjacentPtrCtx(stage.ctx, 4);
2050 break;
2051
2052 case POp::mix_4_floats: case POp::mix_4_ints:
2053 std::tie(opArg1, opArg2, opArg3) = Adjacent3PtrCtx(stage.ctx, 4);
2054 break;
2055
2056 case POp::add_n_floats: case POp::add_n_ints:
2057 case POp::sub_n_floats: case POp::sub_n_ints:
2058 case POp::mul_n_floats: case POp::mul_n_ints:
2059 case POp::div_n_floats: case POp::div_n_ints: case POp::div_n_uints:
2060 case POp::bitwise_and_n_ints:
2061 case POp::bitwise_or_n_ints:
2062 case POp::bitwise_xor_n_ints:
2063 case POp::min_n_floats: case POp::min_n_ints: case POp::min_n_uints:
2064 case POp::max_n_floats: case POp::max_n_ints: case POp::max_n_uints:
2065 case POp::cmplt_n_floats: case POp::cmplt_n_ints: case POp::cmplt_n_uints:
2066 case POp::cmple_n_floats: case POp::cmple_n_ints: case POp::cmple_n_uints:
2067 case POp::cmpeq_n_floats: case POp::cmpeq_n_ints:
2068 case POp::cmpne_n_floats: case POp::cmpne_n_ints:
2069 case POp::atan2_n_floats:
2070 case POp::pow_n_floats:
2071 std::tie(opArg1, opArg2) = AdjacentBinaryOpCtx(stage.ctx);
2072 break;
2073
2074 case POp::mix_n_floats: case POp::mix_n_ints:
2075 std::tie(opArg1, opArg2, opArg3) = AdjacentTernaryOpCtx(stage.ctx);
2076 break;
2077
2078 case POp::jump:
2079 case POp::branch_if_any_active_lanes:
2080 case POp::branch_if_no_active_lanes:
2081 opArg1 = BranchOffset(static_cast<SkRasterPipeline_BranchCtx*>(stage.ctx));
2082 break;
2083
2084 case POp::branch_if_no_active_lanes_eq: {
2085 const auto* ctx = static_cast<SkRasterPipeline_BranchIfEqualCtx*>(stage.ctx);
2086 opArg1 = BranchOffset(ctx);
2087 opArg2 = PtrCtx(ctx->ptr, 1);
2088 opArg3 = Imm(sk_bit_cast<float>(ctx->value));
2089 break;
2090 }
2091 default:
2092 break;
2093 }
2094
2095 const char* opName = "";
2096 switch (stage.op) {
2097 #define M(x) case POp::x: opName = #x; break;
2098 SK_RASTER_PIPELINE_OPS_ALL(M)
2099 #undef M
2100 case POp::label: opName = "label"; break;
2101 case POp::invoke_shader: opName = "invoke_shader"; break;
2102 case POp::invoke_color_filter: opName = "invoke_color_filter"; break;
2103 case POp::invoke_blender: opName = "invoke_blender"; break;
2104 }
2105
2106 std::string opText;
2107 switch (stage.op) {
2108 case POp::init_lane_masks:
2109 opText = "CondMask = LoopMask = RetMask = true";
2110 break;
2111
2112 case POp::load_condition_mask:
2113 opText = "CondMask = " + opArg1;
2114 break;
2115
2116 case POp::store_condition_mask:
2117 opText = opArg1 + " = CondMask";
2118 break;
2119
2120 case POp::merge_condition_mask:
2121 opText = "CondMask = " + opArg1 + " & " + opArg2;
2122 break;
2123
2124 case POp::load_loop_mask:
2125 opText = "LoopMask = " + opArg1;
2126 break;
2127
2128 case POp::store_loop_mask:
2129 opText = opArg1 + " = LoopMask";
2130 break;
2131
2132 case POp::mask_off_loop_mask:
2133 opText = "LoopMask &= ~(CondMask & LoopMask & RetMask)";
2134 break;
2135
2136 case POp::reenable_loop_mask:
2137 opText = "LoopMask |= " + opArg1;
2138 break;
2139
2140 case POp::merge_loop_mask:
2141 opText = "LoopMask &= " + opArg1;
2142 break;
2143
2144 case POp::load_return_mask:
2145 opText = "RetMask = " + opArg1;
2146 break;
2147
2148 case POp::store_return_mask:
2149 opText = opArg1 + " = RetMask";
2150 break;
2151
2152 case POp::mask_off_return_mask:
2153 opText = "RetMask &= ~(CondMask & LoopMask & RetMask)";
2154 break;
2155
2156 case POp::store_src_rg:
2157 opText = opArg1 + " = src.rg";
2158 break;
2159
2160 case POp::store_src:
2161 opText = opArg1 + " = src.rgba";
2162 break;
2163
2164 case POp::store_dst:
2165 opText = opArg1 + " = dst.rgba";
2166 break;
2167
2168 case POp::store_device_xy01:
2169 opText = opArg1 + " = DeviceCoords.xy01";
2170 break;
2171
2172 case POp::load_src_rg:
2173 opText = "src.rg = " + opArg1;
2174 break;
2175
2176 case POp::load_src:
2177 opText = "src.rgba = " + opArg1;
2178 break;
2179
2180 case POp::load_dst:
2181 opText = "dst.rgba = " + opArg1;
2182 break;
2183
2184 case POp::bitwise_and_int:
2185 case POp::bitwise_and_2_ints:
2186 case POp::bitwise_and_3_ints:
2187 case POp::bitwise_and_4_ints:
2188 case POp::bitwise_and_n_ints:
2189 opText = opArg1 + " &= " + opArg2;
2190 break;
2191
2192 case POp::bitwise_or_int:
2193 case POp::bitwise_or_2_ints:
2194 case POp::bitwise_or_3_ints:
2195 case POp::bitwise_or_4_ints:
2196 case POp::bitwise_or_n_ints:
2197 opText = opArg1 + " |= " + opArg2;
2198 break;
2199
2200 case POp::bitwise_xor_int:
2201 case POp::bitwise_xor_2_ints:
2202 case POp::bitwise_xor_3_ints:
2203 case POp::bitwise_xor_4_ints:
2204 case POp::bitwise_xor_n_ints:
2205 opText = opArg1 + " ^= " + opArg2;
2206 break;
2207
2208 case POp::bitwise_not_int:
2209 case POp::bitwise_not_2_ints:
2210 case POp::bitwise_not_3_ints:
2211 case POp::bitwise_not_4_ints:
2212 opText = opArg1 + " = ~" + opArg1;
2213 break;
2214
2215 case POp::cast_to_float_from_int:
2216 case POp::cast_to_float_from_2_ints:
2217 case POp::cast_to_float_from_3_ints:
2218 case POp::cast_to_float_from_4_ints:
2219 opText = opArg1 + " = IntToFloat(" + opArg1 + ")";
2220 break;
2221
2222 case POp::cast_to_float_from_uint:
2223 case POp::cast_to_float_from_2_uints:
2224 case POp::cast_to_float_from_3_uints:
2225 case POp::cast_to_float_from_4_uints:
2226 opText = opArg1 + " = UintToFloat(" + opArg1 + ")";
2227 break;
2228
2229 case POp::cast_to_int_from_float:
2230 case POp::cast_to_int_from_2_floats:
2231 case POp::cast_to_int_from_3_floats:
2232 case POp::cast_to_int_from_4_floats:
2233 opText = opArg1 + " = FloatToInt(" + opArg1 + ")";
2234 break;
2235
2236 case POp::cast_to_uint_from_float:
2237 case POp::cast_to_uint_from_2_floats:
2238 case POp::cast_to_uint_from_3_floats:
2239 case POp::cast_to_uint_from_4_floats:
2240 opText = opArg1 + " = FloatToUint(" + opArg1 + ")";
2241 break;
2242
2243 case POp::copy_slot_masked: case POp::copy_2_slots_masked:
2244 case POp::copy_3_slots_masked: case POp::copy_4_slots_masked:
2245 case POp::swizzle_copy_slot_masked: case POp::swizzle_copy_2_slots_masked:
2246 case POp::swizzle_copy_3_slots_masked: case POp::swizzle_copy_4_slots_masked:
2247 opText = opArg1 + " = Mask(" + opArg2 + ")";
2248 break;
2249
2250 case POp::copy_constant: case POp::copy_2_constants:
2251 case POp::copy_3_constants: case POp::copy_4_constants:
2252 case POp::copy_slot_unmasked: case POp::copy_2_slots_unmasked:
2253 case POp::copy_3_slots_unmasked: case POp::copy_4_slots_unmasked:
2254 case POp::swizzle_1: case POp::swizzle_2:
2255 case POp::swizzle_3: case POp::swizzle_4:
2256 case POp::shuffle:
2257 opText = opArg1 + " = " + opArg2;
2258 break;
2259
2260 case POp::zero_slot_unmasked: case POp::zero_2_slots_unmasked:
2261 case POp::zero_3_slots_unmasked: case POp::zero_4_slots_unmasked:
2262 opText = opArg1 + " = 0";
2263 break;
2264
2265 case POp::abs_float: case POp::abs_int:
2266 case POp::abs_2_floats: case POp::abs_2_ints:
2267 case POp::abs_3_floats: case POp::abs_3_ints:
2268 case POp::abs_4_floats: case POp::abs_4_ints:
2269 opText = opArg1 + " = abs(" + opArg1 + ")";
2270 break;
2271
2272 case POp::atan_float:
2273 opText = opArg1 + " = atan(" + opArg1 + ")";
2274 break;
2275
2276 case POp::atan2_n_floats:
2277 opText = opArg1 + " = atan2(" + opArg1 + ", " + opArg2 + ")";
2278 break;
2279
2280 case POp::ceil_float:
2281 case POp::ceil_2_floats:
2282 case POp::ceil_3_floats:
2283 case POp::ceil_4_floats:
2284 opText = opArg1 + " = ceil(" + opArg1 + ")";
2285 break;
2286
2287 case POp::cos_float:
2288 opText = opArg1 + " = cos(" + opArg1 + ")";
2289 break;
2290
2291 case POp::dot_2_floats:
2292 case POp::dot_3_floats:
2293 case POp::dot_4_floats:
2294 opText = opArg1 + " = dot(" + opArg2 + ", " + opArg3 + ")";
2295 break;
2296
2297 case POp::exp_float:
2298 opText = opArg1 + " = exp(" + opArg1 + ")";
2299 break;
2300
2301 case POp::pow_n_floats:
2302 opText = opArg1 + " = pow(" + opArg1 + ", " + opArg2 + ")";
2303 break;
2304
2305 case POp::sin_float:
2306 opText = opArg1 + " = sin(" + opArg1 + ")";
2307 break;
2308
2309 case POp::sqrt_float:
2310 opText = opArg1 + " = sqrt(" + opArg1 + ")";
2311 break;
2312
2313 case POp::tan_float:
2314 opText = opArg1 + " = tan(" + opArg1 + ")";
2315 break;
2316
2317 case POp::floor_float:
2318 case POp::floor_2_floats:
2319 case POp::floor_3_floats:
2320 case POp::floor_4_floats:
2321 opText = opArg1 + " = floor(" + opArg1 + ")";
2322 break;
2323
2324 case POp::add_float: case POp::add_int:
2325 case POp::add_2_floats: case POp::add_2_ints:
2326 case POp::add_3_floats: case POp::add_3_ints:
2327 case POp::add_4_floats: case POp::add_4_ints:
2328 case POp::add_n_floats: case POp::add_n_ints:
2329 opText = opArg1 + " += " + opArg2;
2330 break;
2331
2332 case POp::sub_float: case POp::sub_int:
2333 case POp::sub_2_floats: case POp::sub_2_ints:
2334 case POp::sub_3_floats: case POp::sub_3_ints:
2335 case POp::sub_4_floats: case POp::sub_4_ints:
2336 case POp::sub_n_floats: case POp::sub_n_ints:
2337 opText = opArg1 + " -= " + opArg2;
2338 break;
2339
2340 case POp::mul_float: case POp::mul_int:
2341 case POp::mul_2_floats: case POp::mul_2_ints:
2342 case POp::mul_3_floats: case POp::mul_3_ints:
2343 case POp::mul_4_floats: case POp::mul_4_ints:
2344 case POp::mul_n_floats: case POp::mul_n_ints:
2345 opText = opArg1 + " *= " + opArg2;
2346 break;
2347
2348 case POp::div_float: case POp::div_int: case POp::div_uint:
2349 case POp::div_2_floats: case POp::div_2_ints: case POp::div_2_uints:
2350 case POp::div_3_floats: case POp::div_3_ints: case POp::div_3_uints:
2351 case POp::div_4_floats: case POp::div_4_ints: case POp::div_4_uints:
2352 case POp::div_n_floats: case POp::div_n_ints: case POp::div_n_uints:
2353 opText = opArg1 + " /= " + opArg2;
2354 break;
2355
2356 case POp::min_float: case POp::min_int: case POp::min_uint:
2357 case POp::min_2_floats: case POp::min_2_ints: case POp::min_2_uints:
2358 case POp::min_3_floats: case POp::min_3_ints: case POp::min_3_uints:
2359 case POp::min_4_floats: case POp::min_4_ints: case POp::min_4_uints:
2360 case POp::min_n_floats: case POp::min_n_ints: case POp::min_n_uints:
2361 opText = opArg1 + " = min(" + opArg1 + ", " + opArg2 + ")";
2362 break;
2363
2364 case POp::max_float: case POp::max_int: case POp::max_uint:
2365 case POp::max_2_floats: case POp::max_2_ints: case POp::max_2_uints:
2366 case POp::max_3_floats: case POp::max_3_ints: case POp::max_3_uints:
2367 case POp::max_4_floats: case POp::max_4_ints: case POp::max_4_uints:
2368 case POp::max_n_floats: case POp::max_n_ints: case POp::max_n_uints:
2369 opText = opArg1 + " = max(" + opArg1 + ", " + opArg2 + ")";
2370 break;
2371
2372 case POp::cmplt_float: case POp::cmplt_int: case POp::cmplt_uint:
2373 case POp::cmplt_2_floats: case POp::cmplt_2_ints: case POp::cmplt_2_uints:
2374 case POp::cmplt_3_floats: case POp::cmplt_3_ints: case POp::cmplt_3_uints:
2375 case POp::cmplt_4_floats: case POp::cmplt_4_ints: case POp::cmplt_4_uints:
2376 case POp::cmplt_n_floats: case POp::cmplt_n_ints: case POp::cmplt_n_uints:
2377 opText = opArg1 + " = lessThan(" + opArg1 + ", " + opArg2 + ")";
2378 break;
2379
2380 case POp::cmple_float: case POp::cmple_int: case POp::cmple_uint:
2381 case POp::cmple_2_floats: case POp::cmple_2_ints: case POp::cmple_2_uints:
2382 case POp::cmple_3_floats: case POp::cmple_3_ints: case POp::cmple_3_uints:
2383 case POp::cmple_4_floats: case POp::cmple_4_ints: case POp::cmple_4_uints:
2384 case POp::cmple_n_floats: case POp::cmple_n_ints: case POp::cmple_n_uints:
2385 opText = opArg1 + " = lessThanEqual(" + opArg1 + ", " + opArg2 + ")";
2386 break;
2387
2388 case POp::cmpeq_float: case POp::cmpeq_int:
2389 case POp::cmpeq_2_floats: case POp::cmpeq_2_ints:
2390 case POp::cmpeq_3_floats: case POp::cmpeq_3_ints:
2391 case POp::cmpeq_4_floats: case POp::cmpeq_4_ints:
2392 case POp::cmpeq_n_floats: case POp::cmpeq_n_ints:
2393 opText = opArg1 + " = equal(" + opArg1 + ", " + opArg2 + ")";
2394 break;
2395
2396 case POp::cmpne_float: case POp::cmpne_int:
2397 case POp::cmpne_2_floats: case POp::cmpne_2_ints:
2398 case POp::cmpne_3_floats: case POp::cmpne_3_ints:
2399 case POp::cmpne_4_floats: case POp::cmpne_4_ints:
2400 case POp::cmpne_n_floats: case POp::cmpne_n_ints:
2401 opText = opArg1 + " = notEqual(" + opArg1 + ", " + opArg2 + ")";
2402 break;
2403
2404 case POp::mix_float: case POp::mix_int:
2405 case POp::mix_2_floats: case POp::mix_2_ints:
2406 case POp::mix_3_floats: case POp::mix_3_ints:
2407 case POp::mix_4_floats: case POp::mix_4_ints:
2408 case POp::mix_n_floats: case POp::mix_n_ints:
2409 opText = opArg1 + " = mix(" + opArg2 + ", " + opArg3 + ", " + opArg1 + ")";
2410 break;
2411
2412 case POp::jump:
2413 case POp::branch_if_any_active_lanes:
2414 case POp::branch_if_no_active_lanes:
2415 case POp::invoke_shader:
2416 case POp::invoke_color_filter:
2417 case POp::invoke_blender:
2418 opText = std::string(opName) + " " + opArg1;
2419 break;
2420
2421 case POp::branch_if_no_active_lanes_eq:
2422 opText = "branch " + opArg1 + " if no lanes of " + opArg2 + " == " + opArg3;
2423 break;
2424
2425 case POp::label:
2426 opText = "label " + opArg1;
2427 break;
2428
2429 case POp::case_op: {
2430 opText = "if (" + opArg1 + " == " + opArg3 +
2431 ") { LoopMask = true; " + opArg2 + " = false; }";
2432 break;
2433 }
2434 default:
2435 break;
2436 }
2437
2438 std::string line = !opText.empty()
2439 ? SkSL::String::printf("% 5d. %-30s %s\n", index + 1, opName, opText.c_str())
2440 : SkSL::String::printf("% 5d. %s\n", index + 1, opName);
2441
2442 out->writeText(line.c_str());
2443 }
2444 }
2445
2446 } // namespace RP
2447 } // namespace SkSL
2448